sunholo 0.140.6__py3-none-any.whl → 0.140.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sunholo/agents/flask/base.py +27 -1
- sunholo/agents/flask/vac_routes.py +182 -62
- {sunholo-0.140.6.dist-info → sunholo-0.140.8.dist-info}/METADATA +1 -1
- {sunholo-0.140.6.dist-info → sunholo-0.140.8.dist-info}/RECORD +8 -8
- {sunholo-0.140.6.dist-info → sunholo-0.140.8.dist-info}/WHEEL +0 -0
- {sunholo-0.140.6.dist-info → sunholo-0.140.8.dist-info}/entry_points.txt +0 -0
- {sunholo-0.140.6.dist-info → sunholo-0.140.8.dist-info}/licenses/LICENSE.txt +0 -0
- {sunholo-0.140.6.dist-info → sunholo-0.140.8.dist-info}/top_level.txt +0 -0
sunholo/agents/flask/base.py
CHANGED
@@ -11,12 +11,38 @@
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
12
|
# See the License for the specific language governing permissions and
|
13
13
|
# limitations under the License.
|
14
|
+
|
15
|
+
from ...custom_logging import log
|
16
|
+
import time
|
17
|
+
|
14
18
|
def create_app(name):
|
15
|
-
from flask import Flask
|
19
|
+
from flask import Flask, request
|
16
20
|
|
17
21
|
app = Flask(name)
|
18
22
|
|
19
23
|
app.config['TRAP_HTTP_EXCEPTIONS'] = True
|
20
24
|
app.config['PROPAGATE_EXCEPTIONS'] = True
|
21
25
|
|
26
|
+
@app.before_request
|
27
|
+
def start_timer():
|
28
|
+
request.start_time = time.time()
|
29
|
+
|
30
|
+
@app.after_request
|
31
|
+
def log_timing(response):
|
32
|
+
if hasattr(request, 'start_time'):
|
33
|
+
duration = time.time() - request.start_time
|
34
|
+
|
35
|
+
# Log all VAC requests with different detail levels
|
36
|
+
if request.path.startswith('/vac/streaming/'):
|
37
|
+
log.info(f"🚀 STREAMING: {duration:.3f}s - {request.path}")
|
38
|
+
elif request.path.startswith('/vac/'):
|
39
|
+
log.info(f"⚡ VAC: {duration:.3f}s - {request.path}")
|
40
|
+
elif duration > 1.0: # Log any slow requests
|
41
|
+
log.warning(f"🐌 SLOW REQUEST: {duration:.3f}s - {request.path}")
|
42
|
+
|
43
|
+
# Add response headers with timing info for debugging
|
44
|
+
response.headers['X-Response-Time'] = f"{duration:.3f}s"
|
45
|
+
|
46
|
+
return response
|
47
|
+
|
22
48
|
return app
|
@@ -6,6 +6,10 @@ import random
|
|
6
6
|
from functools import partial
|
7
7
|
import inspect
|
8
8
|
import asyncio
|
9
|
+
import time
|
10
|
+
import threading
|
11
|
+
from functools import lru_cache
|
12
|
+
from concurrent.futures import ThreadPoolExecutor
|
9
13
|
|
10
14
|
from ..chat_history import extract_chat_history_with_cache, extract_chat_history_async_cached
|
11
15
|
from ...qna.parsers import parse_output
|
@@ -32,6 +36,11 @@ except ImportError:
|
|
32
36
|
# Cache dictionary to store validated API keys
|
33
37
|
api_key_cache = {}
|
34
38
|
cache_duration = timedelta(minutes=5) # Cache duration
|
39
|
+
# Global caches and thread pool
|
40
|
+
_config_cache = {}
|
41
|
+
_config_lock = threading.Lock()
|
42
|
+
_thread_pool = ThreadPoolExecutor(max_workers=4)
|
43
|
+
|
35
44
|
|
36
45
|
class VACRoutes:
|
37
46
|
"""
|
@@ -69,8 +78,44 @@ if __name__ == "__main__":
|
|
69
78
|
self.additional_routes = additional_routes if additional_routes is not None else []
|
70
79
|
self.async_stream = async_stream
|
71
80
|
self.add_langfuse_eval = add_langfuse_eval
|
81
|
+
|
82
|
+
# Pre-warm common configs
|
83
|
+
self._preload_common_configs()
|
84
|
+
|
72
85
|
self.register_routes()
|
86
|
+
|
87
|
+
def _preload_common_configs(self):
|
88
|
+
"""Pre-load commonly used configurations to cache"""
|
89
|
+
common_vector_names = ["aitana3"] # Add your common vector names
|
90
|
+
for vector_name in common_vector_names:
|
91
|
+
try:
|
92
|
+
self._get_cached_config(vector_name)
|
93
|
+
log.info(f"Pre-loaded config for {vector_name}")
|
94
|
+
except Exception as e:
|
95
|
+
log.warning(f"Failed to pre-load config for {vector_name}: {e}")
|
96
|
+
|
97
|
+
def _get_cached_config(self, vector_name: str):
|
98
|
+
"""Cached config loader with thread safety - CORRECTED VERSION"""
|
99
|
+
# Check cache first (without lock for read)
|
100
|
+
if vector_name in _config_cache:
|
101
|
+
log.debug(f"Using cached config for {vector_name}")
|
102
|
+
return _config_cache[vector_name]
|
73
103
|
|
104
|
+
# Need to load config
|
105
|
+
with _config_lock:
|
106
|
+
# Double-check inside lock (another thread might have loaded it)
|
107
|
+
if vector_name in _config_cache:
|
108
|
+
return _config_cache[vector_name]
|
109
|
+
|
110
|
+
try:
|
111
|
+
log.info(f"Loading fresh config for {vector_name}")
|
112
|
+
config = ConfigManager(vector_name)
|
113
|
+
_config_cache[vector_name] = config
|
114
|
+
log.info(f"Cached config for {vector_name}")
|
115
|
+
return config
|
116
|
+
except Exception as e:
|
117
|
+
log.error(f"Error loading config for {vector_name}: {e}")
|
118
|
+
raise
|
74
119
|
|
75
120
|
def vac_interpreter_default(self, question: str, vector_name: str, chat_history=[], **kwargs):
|
76
121
|
# Create a callback that does nothing for streaming if you don't want intermediate outputs
|
@@ -228,22 +273,43 @@ if __name__ == "__main__":
|
|
228
273
|
|
229
274
|
log.info(f"OpenAI response: {openai_response}")
|
230
275
|
return jsonify(openai_response)
|
231
|
-
|
276
|
+
|
277
|
+
def _finalize_trace_background(self, trace, span, response, all_input):
|
278
|
+
"""Finalize trace operations in background"""
|
279
|
+
try:
|
280
|
+
if span:
|
281
|
+
span.end(output=str(response))
|
282
|
+
if trace:
|
283
|
+
trace.update(output=str(response))
|
284
|
+
self.langfuse_eval_response(trace_id=trace.id, eval_percent=all_input.get('eval_percent'))
|
285
|
+
except Exception as e:
|
286
|
+
log.warning(f"Background trace finalization failed: {e}")
|
287
|
+
|
232
288
|
def handle_stream_vac(self, vector_name):
|
289
|
+
request_start = time.time()
|
233
290
|
observed_stream_interpreter = self.stream_interpreter
|
234
291
|
is_async = inspect.iscoroutinefunction(self.stream_interpreter)
|
235
292
|
|
236
293
|
if is_async:
|
237
294
|
log.info(f"Stream interpreter is async: {observed_stream_interpreter}")
|
238
295
|
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
296
|
+
# Call prep_vac and handle errors properly
|
297
|
+
try:
|
298
|
+
prep = self.prep_vac(request, vector_name)
|
299
|
+
except Exception as e:
|
300
|
+
log.error(f"prep_vac failed: {e}")
|
301
|
+
error_response = {'error': f'Prep error: {str(e)}'}
|
302
|
+
return jsonify(error_response), 500
|
303
|
+
|
304
|
+
log.info(f"Processing prep completed in {time.time() - request_start:.3f}s")
|
305
|
+
|
306
|
+
trace = prep.get("trace")
|
307
|
+
span = prep.get("span")
|
243
308
|
vac_config = prep["vac_config"]
|
244
309
|
all_input = prep["all_input"]
|
245
310
|
|
246
|
-
log.info(f'
|
311
|
+
log.info(f'Starting stream with: {all_input["user_input"][:100]}...')
|
312
|
+
|
247
313
|
if span:
|
248
314
|
span.update(
|
249
315
|
name="start_streaming_chat",
|
@@ -254,7 +320,7 @@ if __name__ == "__main__":
|
|
254
320
|
def generate_response_content():
|
255
321
|
try:
|
256
322
|
if is_async:
|
257
|
-
from queue import Queue
|
323
|
+
from queue import Queue
|
258
324
|
result_queue = Queue()
|
259
325
|
import threading
|
260
326
|
|
@@ -271,7 +337,7 @@ if __name__ == "__main__":
|
|
271
337
|
trace_id=trace.id if trace else None,
|
272
338
|
**all_input["kwargs"]
|
273
339
|
)
|
274
|
-
|
340
|
+
|
275
341
|
async for chunk in async_gen:
|
276
342
|
if isinstance(chunk, dict) and 'answer' in chunk:
|
277
343
|
if trace:
|
@@ -284,9 +350,12 @@ if __name__ == "__main__":
|
|
284
350
|
else:
|
285
351
|
result_queue.put(chunk)
|
286
352
|
except Exception as e:
|
287
|
-
|
353
|
+
error_msg = f"Streaming Error: {str(e)} {traceback.format_exc()}"
|
354
|
+
log.error(error_msg)
|
355
|
+
result_queue.put(error_msg)
|
288
356
|
finally:
|
289
357
|
result_queue.put(None) # Sentinel
|
358
|
+
|
290
359
|
asyncio.run(process_async())
|
291
360
|
|
292
361
|
thread = threading.Thread(target=run_async)
|
@@ -301,7 +370,7 @@ if __name__ == "__main__":
|
|
301
370
|
|
302
371
|
thread.join()
|
303
372
|
else:
|
304
|
-
log.info("sync streaming response")
|
373
|
+
log.info("Starting sync streaming response")
|
305
374
|
for chunk in start_streaming_chat(
|
306
375
|
question=all_input["user_input"],
|
307
376
|
vector_name=vector_name,
|
@@ -325,17 +394,19 @@ if __name__ == "__main__":
|
|
325
394
|
yield chunk
|
326
395
|
|
327
396
|
except Exception as e:
|
328
|
-
|
397
|
+
error_msg = f"Streaming Error: {str(e)} {traceback.format_exc()}"
|
398
|
+
log.error(error_msg)
|
399
|
+
yield error_msg
|
329
400
|
|
330
|
-
#
|
401
|
+
# Create streaming response
|
331
402
|
response = Response(generate_response_content(), content_type='text/plain; charset=utf-8')
|
332
403
|
response.headers['Transfer-Encoding'] = 'chunked'
|
333
404
|
|
334
|
-
log.
|
405
|
+
log.info(f"Streaming response created in {time.time() - request_start:.3f}s")
|
406
|
+
|
407
|
+
# Do final trace operations in background (don't block the response)
|
335
408
|
if trace:
|
336
|
-
|
337
|
-
trace.update(output=response)
|
338
|
-
self.langfuse_eval_response(trace_id=trace.id, eval_percent=all_input.get('eval_percent'))
|
409
|
+
_thread_pool.submit(self._finalize_trace_background, trace, span, response, all_input)
|
339
410
|
|
340
411
|
return response
|
341
412
|
|
@@ -654,78 +725,127 @@ if __name__ == "__main__":
|
|
654
725
|
tags = tags,
|
655
726
|
release = package_version
|
656
727
|
)
|
728
|
+
|
729
|
+
def _create_langfuse_trace_background(self, request, vector_name, trace_id):
|
730
|
+
"""Create Langfuse trace in background"""
|
731
|
+
try:
|
732
|
+
return self.create_langfuse_trace(request, vector_name, trace_id)
|
733
|
+
except Exception as e:
|
734
|
+
log.warning(f"Background trace creation failed: {e}")
|
735
|
+
return None
|
657
736
|
|
737
|
+
def _handle_file_upload_background(self, file, vector_name):
|
738
|
+
"""Handle file upload in background thread"""
|
739
|
+
try:
|
740
|
+
# Save with timestamp to avoid conflicts
|
741
|
+
temp_filename = f"temp_{int(time.time() * 1000)}_{file.filename}"
|
742
|
+
file.save(temp_filename)
|
743
|
+
|
744
|
+
# Upload to GCS
|
745
|
+
image_uri = add_file_to_gcs(temp_filename, vector_name)
|
746
|
+
|
747
|
+
# Clean up
|
748
|
+
os.remove(temp_filename)
|
749
|
+
|
750
|
+
return {"image_uri": image_uri, "mime": file.mimetype}
|
751
|
+
except Exception as e:
|
752
|
+
log.error(f"Background file upload failed: {e}")
|
753
|
+
return {}
|
754
|
+
|
658
755
|
def prep_vac(self, request, vector_name):
|
659
|
-
|
756
|
+
start_time = time.time()
|
757
|
+
|
758
|
+
# Fast request parsing - KEEP ORIGINAL ERROR HANDLING STYLE
|
660
759
|
if request.content_type.startswith('application/json'):
|
661
760
|
data = request.get_json()
|
662
761
|
elif request.content_type.startswith('multipart/form-data'):
|
663
762
|
data = request.form.to_dict()
|
763
|
+
# Handle file upload in background if present
|
664
764
|
if 'file' in request.files:
|
665
765
|
file = request.files['file']
|
666
766
|
if file.filename != '':
|
667
|
-
log.info(f"Found file: {file.filename}
|
668
|
-
|
669
|
-
|
670
|
-
|
671
|
-
data["mime"] = mime_type
|
672
|
-
except Exception as e:
|
673
|
-
log.error(traceback.format_exc())
|
674
|
-
return jsonify({'error': str(e), 'traceback': traceback.format_exc()}), 500
|
675
|
-
else:
|
676
|
-
log.error("No file selected")
|
677
|
-
return jsonify({"error": "No file selected"}), 400
|
767
|
+
log.info(f"Found file: {file.filename} - uploading in background")
|
768
|
+
# Start file upload in background, don't block
|
769
|
+
upload_future = _thread_pool.submit(self._handle_file_upload_background, file, vector_name)
|
770
|
+
data["_upload_future"] = upload_future
|
678
771
|
else:
|
679
|
-
return
|
772
|
+
# KEEP ORIGINAL STYLE - return the error response directly
|
773
|
+
raise ValueError("Unsupported content type")
|
680
774
|
|
681
|
-
log.info(f"vac/{vector_name} got data: {data}")
|
775
|
+
log.info(f"vac/{vector_name} got data keys: {list(data.keys())}")
|
682
776
|
|
683
|
-
|
684
|
-
span = None
|
685
|
-
if self.add_langfuse_eval:
|
686
|
-
trace_id = data.get('trace_id')
|
687
|
-
trace = self.create_langfuse_trace(request, vector_name, trace_id)
|
688
|
-
log.info(f"Using existing langfuse trace: {trace_id}")
|
689
|
-
|
690
|
-
#config, _ = load_config("config/llm_config.yaml")
|
777
|
+
# Get config from cache first (before processing other data)
|
691
778
|
try:
|
692
|
-
vac_config =
|
779
|
+
vac_config = self._get_cached_config(vector_name)
|
693
780
|
except Exception as e:
|
694
781
|
raise ValueError(f"Unable to find vac_config for {vector_name} - {str(e)}")
|
695
782
|
|
696
|
-
|
697
|
-
|
698
|
-
|
699
|
-
|
700
|
-
|
701
|
-
|
702
|
-
|
783
|
+
# Initialize trace variables
|
784
|
+
trace = None
|
785
|
+
span = None
|
786
|
+
if self.add_langfuse_eval:
|
787
|
+
trace_id = data.get('trace_id')
|
788
|
+
# Create trace in background - don't block
|
789
|
+
trace_future = _thread_pool.submit(self._create_langfuse_trace_background, request, vector_name, trace_id)
|
703
790
|
|
791
|
+
# Extract data (keep original logic)
|
704
792
|
user_input = data.pop('user_input').strip()
|
705
793
|
stream_wait_time = data.pop('stream_wait_time', 7)
|
706
794
|
stream_timeout = data.pop('stream_timeout', 120)
|
707
795
|
chat_history = data.pop('chat_history', None)
|
708
796
|
eval_percent = data.pop('eval_percent', 0.01)
|
709
|
-
|
710
|
-
data.pop('trace_id', None)
|
797
|
+
vector_name_param = data.pop('vector_name', vector_name)
|
798
|
+
data.pop('trace_id', None) # to ensure not in kwargs
|
711
799
|
|
800
|
+
# Process chat history with caching
|
712
801
|
paired_messages = extract_chat_history_with_cache(chat_history)
|
713
802
|
|
714
|
-
|
715
|
-
|
716
|
-
|
717
|
-
|
718
|
-
|
719
|
-
|
720
|
-
|
803
|
+
# Wait for file upload if it was started (with timeout)
|
804
|
+
if "_upload_future" in data:
|
805
|
+
try:
|
806
|
+
upload_result = data["_upload_future"].result(timeout=3.0) # 3 sec max wait
|
807
|
+
data.update(upload_result)
|
808
|
+
log.info(f"File upload completed: {upload_result.get('image_uri', 'no uri')}")
|
809
|
+
except Exception as e:
|
810
|
+
log.warning(f"File upload failed or timed out: {e}")
|
811
|
+
finally:
|
812
|
+
data.pop("_upload_future", None)
|
813
|
+
|
814
|
+
# Build final input
|
815
|
+
all_input = {
|
816
|
+
'user_input': user_input,
|
817
|
+
'vector_name': vector_name_param,
|
818
|
+
'chat_history': paired_messages,
|
819
|
+
'stream_wait_time': stream_wait_time,
|
820
|
+
'stream_timeout': stream_timeout,
|
821
|
+
'eval_percent': eval_percent,
|
822
|
+
'kwargs': data
|
823
|
+
}
|
824
|
+
|
825
|
+
# Try to get trace result if available (don't block long)
|
826
|
+
if self.add_langfuse_eval:
|
827
|
+
try:
|
828
|
+
trace = trace_future.result(timeout=0.1) # Very short timeout
|
829
|
+
if trace:
|
830
|
+
this_vac_config = vac_config.configs_by_kind.get("vacConfig")
|
831
|
+
metadata_config = None
|
832
|
+
if this_vac_config:
|
833
|
+
metadata_config = this_vac_config.get(vector_name)
|
834
|
+
trace.update(input=data, metadata=metadata_config)
|
835
|
+
|
836
|
+
span = trace.span(
|
837
|
+
name="VAC",
|
838
|
+
metadata=vac_config.configs_by_kind,
|
839
|
+
input=all_input
|
840
|
+
)
|
841
|
+
except Exception as e:
|
842
|
+
log.warning(f"Langfuse trace creation timed out or failed: {e}")
|
843
|
+
trace = None
|
844
|
+
span = None
|
845
|
+
|
846
|
+
prep_time = time.time() - start_time
|
847
|
+
log.info(f"prep_vac completed in {prep_time:.3f}s")
|
721
848
|
|
722
|
-
if trace:
|
723
|
-
span = trace.span(
|
724
|
-
name="VAC",
|
725
|
-
metadata=vac_config.configs_by_kind,
|
726
|
-
input = all_input
|
727
|
-
)
|
728
|
-
|
729
849
|
return {
|
730
850
|
"trace": trace,
|
731
851
|
"span": span,
|
@@ -13,8 +13,8 @@ sunholo/agents/fastapi/__init__.py,sha256=S_pj4_bTUmDGoq_exaREHlOKThi0zTuGT0VZY0
|
|
13
13
|
sunholo/agents/fastapi/base.py,sha256=W-cyF8ZDUH40rc-c-Apw3-_8IIi2e4Y9qRtnoVnsc1Q,2521
|
14
14
|
sunholo/agents/fastapi/qna_routes.py,sha256=lKHkXPmwltu9EH3RMwmD153-J6pE7kWQ4BhBlV3to-s,3864
|
15
15
|
sunholo/agents/flask/__init__.py,sha256=dEoByI3gDNUOjpX1uVKP7uPjhfFHJubbiaAv3xLopnk,63
|
16
|
-
sunholo/agents/flask/base.py,sha256=
|
17
|
-
sunholo/agents/flask/vac_routes.py,sha256=
|
16
|
+
sunholo/agents/flask/base.py,sha256=vnpxFEOnCmt9humqj-jYPLfJcdwzsop9NorgkJ-tSaU,1756
|
17
|
+
sunholo/agents/flask/vac_routes.py,sha256=YOW64HaRYa0MfMnzwbx2s9IrU6lz-CeqpcfmIo_L3ho,37664
|
18
18
|
sunholo/archive/__init__.py,sha256=qNHWm5rGPVOlxZBZCpA1wTYPbalizRT7f8X4rs2t290,31
|
19
19
|
sunholo/archive/archive.py,sha256=PxVfDtO2_2ZEEbnhXSCbXLdeoHoQVImo4y3Jr2XkCFY,1204
|
20
20
|
sunholo/auth/__init__.py,sha256=TeP-OY0XGxYV_8AQcVGoh35bvyWhNUcMRfhuD5l44Sk,91
|
@@ -168,9 +168,9 @@ sunholo/vertex/init.py,sha256=1OQwcPBKZYBTDPdyU7IM4X4OmiXLdsNV30C-fee2scQ,2875
|
|
168
168
|
sunholo/vertex/memory_tools.py,sha256=tBZxqVZ4InTmdBvLlOYwoSEWu4-kGquc-gxDwZCC4FA,7667
|
169
169
|
sunholo/vertex/safety.py,sha256=S9PgQT1O_BQAkcqauWncRJaydiP8Q_Jzmu9gxYfy1VA,2482
|
170
170
|
sunholo/vertex/type_dict_to_json.py,sha256=uTzL4o9tJRao4u-gJOFcACgWGkBOtqACmb6ihvCErL8,4694
|
171
|
-
sunholo-0.140.
|
172
|
-
sunholo-0.140.
|
173
|
-
sunholo-0.140.
|
174
|
-
sunholo-0.140.
|
175
|
-
sunholo-0.140.
|
176
|
-
sunholo-0.140.
|
171
|
+
sunholo-0.140.8.dist-info/licenses/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
|
172
|
+
sunholo-0.140.8.dist-info/METADATA,sha256=30zPLVCgeU87lsGIdFxyaAOFvYDuC-EOayXWNgophxI,10067
|
173
|
+
sunholo-0.140.8.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
|
174
|
+
sunholo-0.140.8.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
|
175
|
+
sunholo-0.140.8.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
|
176
|
+
sunholo-0.140.8.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|