sunholo 0.140.9__py3-none-any.whl → 0.140.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sunholo/agents/flask/vac_routes.py +120 -179
- {sunholo-0.140.9.dist-info → sunholo-0.140.11.dist-info}/METADATA +1 -1
- {sunholo-0.140.9.dist-info → sunholo-0.140.11.dist-info}/RECORD +7 -7
- {sunholo-0.140.9.dist-info → sunholo-0.140.11.dist-info}/WHEEL +0 -0
- {sunholo-0.140.9.dist-info → sunholo-0.140.11.dist-info}/entry_points.txt +0 -0
- {sunholo-0.140.9.dist-info → sunholo-0.140.11.dist-info}/licenses/LICENSE.txt +0 -0
- {sunholo-0.140.9.dist-info → sunholo-0.140.11.dist-info}/top_level.txt +0 -0
@@ -6,10 +6,6 @@ import random
|
|
6
6
|
from functools import partial
|
7
7
|
import inspect
|
8
8
|
import asyncio
|
9
|
-
import time
|
10
|
-
import threading
|
11
|
-
from functools import lru_cache
|
12
|
-
from concurrent.futures import ThreadPoolExecutor
|
13
9
|
|
14
10
|
from ..chat_history import extract_chat_history_with_cache, extract_chat_history_async_cached
|
15
11
|
from ...qna.parsers import parse_output
|
@@ -36,11 +32,6 @@ except ImportError:
|
|
36
32
|
# Cache dictionary to store validated API keys
|
37
33
|
api_key_cache = {}
|
38
34
|
cache_duration = timedelta(minutes=5) # Cache duration
|
39
|
-
# Global caches and thread pool
|
40
|
-
_config_cache = {}
|
41
|
-
_config_lock = threading.Lock()
|
42
|
-
_thread_pool = ThreadPoolExecutor(max_workers=4)
|
43
|
-
|
44
35
|
|
45
36
|
class VACRoutes:
|
46
37
|
"""
|
@@ -78,44 +69,8 @@ if __name__ == "__main__":
|
|
78
69
|
self.additional_routes = additional_routes if additional_routes is not None else []
|
79
70
|
self.async_stream = async_stream
|
80
71
|
self.add_langfuse_eval = add_langfuse_eval
|
81
|
-
|
82
|
-
# Pre-warm common configs
|
83
|
-
self._preload_common_configs()
|
84
|
-
|
85
72
|
self.register_routes()
|
86
|
-
|
87
|
-
def _preload_common_configs(self):
|
88
|
-
"""Pre-load commonly used configurations to cache"""
|
89
|
-
common_vector_names = ["aitana3"] # Add your common vector names
|
90
|
-
for vector_name in common_vector_names:
|
91
|
-
try:
|
92
|
-
self._get_cached_config(vector_name)
|
93
|
-
log.info(f"Pre-loaded config for {vector_name}")
|
94
|
-
except Exception as e:
|
95
|
-
log.warning(f"Failed to pre-load config for {vector_name}: {e}")
|
96
|
-
|
97
|
-
def _get_cached_config(self, vector_name: str):
|
98
|
-
"""Cached config loader with thread safety - CORRECTED VERSION"""
|
99
|
-
# Check cache first (without lock for read)
|
100
|
-
if vector_name in _config_cache:
|
101
|
-
log.debug(f"Using cached config for {vector_name}")
|
102
|
-
return _config_cache[vector_name]
|
103
73
|
|
104
|
-
# Need to load config
|
105
|
-
with _config_lock:
|
106
|
-
# Double-check inside lock (another thread might have loaded it)
|
107
|
-
if vector_name in _config_cache:
|
108
|
-
return _config_cache[vector_name]
|
109
|
-
|
110
|
-
try:
|
111
|
-
log.info(f"Loading fresh config for {vector_name}")
|
112
|
-
config = ConfigManager(vector_name)
|
113
|
-
_config_cache[vector_name] = config
|
114
|
-
log.info(f"Cached config for {vector_name}")
|
115
|
-
return config
|
116
|
-
except Exception as e:
|
117
|
-
log.error(f"Error loading config for {vector_name}: {e}")
|
118
|
-
raise
|
119
74
|
|
120
75
|
def vac_interpreter_default(self, question: str, vector_name: str, chat_history=[], **kwargs):
|
121
76
|
# Create a callback that does nothing for streaming if you don't want intermediate outputs
|
@@ -273,43 +228,22 @@ if __name__ == "__main__":
|
|
273
228
|
|
274
229
|
log.info(f"OpenAI response: {openai_response}")
|
275
230
|
return jsonify(openai_response)
|
276
|
-
|
277
|
-
def _finalize_trace_background(self, trace, span, response, all_input):
|
278
|
-
"""Finalize trace operations in background"""
|
279
|
-
try:
|
280
|
-
if span:
|
281
|
-
span.end(output=str(response))
|
282
|
-
if trace:
|
283
|
-
trace.update(output=str(response))
|
284
|
-
self.langfuse_eval_response(trace_id=trace.id, eval_percent=all_input.get('eval_percent'))
|
285
|
-
except Exception as e:
|
286
|
-
log.warning(f"Background trace finalization failed: {e}")
|
287
|
-
|
231
|
+
|
288
232
|
def handle_stream_vac(self, vector_name):
|
289
|
-
request_start = time.time()
|
290
233
|
observed_stream_interpreter = self.stream_interpreter
|
291
234
|
is_async = inspect.iscoroutinefunction(self.stream_interpreter)
|
292
235
|
|
293
236
|
if is_async:
|
294
237
|
log.info(f"Stream interpreter is async: {observed_stream_interpreter}")
|
295
238
|
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
log.error(f"prep_vac failed: {e}")
|
301
|
-
error_response = {'error': f'Prep error: {str(e)}'}
|
302
|
-
return jsonify(error_response), 500
|
303
|
-
|
304
|
-
log.info(f"Processing prep completed in {time.time() - request_start:.3f}s")
|
305
|
-
|
306
|
-
trace = prep.get("trace")
|
307
|
-
span = prep.get("span")
|
239
|
+
prep = self.prep_vac(request, vector_name)
|
240
|
+
log.info(f"Processing prep: {prep}")
|
241
|
+
trace = prep["trace"]
|
242
|
+
span = prep["span"]
|
308
243
|
vac_config = prep["vac_config"]
|
309
244
|
all_input = prep["all_input"]
|
310
245
|
|
311
|
-
log.info(f'
|
312
|
-
|
246
|
+
log.info(f'Streaming data with: {all_input}')
|
313
247
|
if span:
|
314
248
|
span.update(
|
315
249
|
name="start_streaming_chat",
|
@@ -320,7 +254,7 @@ if __name__ == "__main__":
|
|
320
254
|
def generate_response_content():
|
321
255
|
try:
|
322
256
|
if is_async:
|
323
|
-
from queue import Queue
|
257
|
+
from queue import Queue, Empty
|
324
258
|
result_queue = Queue()
|
325
259
|
import threading
|
326
260
|
|
@@ -337,7 +271,7 @@ if __name__ == "__main__":
|
|
337
271
|
trace_id=trace.id if trace else None,
|
338
272
|
**all_input["kwargs"]
|
339
273
|
)
|
340
|
-
|
274
|
+
log.info(f"{async_gen=}")
|
341
275
|
async for chunk in async_gen:
|
342
276
|
if isinstance(chunk, dict) and 'answer' in chunk:
|
343
277
|
if trace:
|
@@ -350,12 +284,9 @@ if __name__ == "__main__":
|
|
350
284
|
else:
|
351
285
|
result_queue.put(chunk)
|
352
286
|
except Exception as e:
|
353
|
-
|
354
|
-
log.error(error_msg)
|
355
|
-
result_queue.put(error_msg)
|
287
|
+
result_queue.put(f"Streaming Error: {str(e)} {traceback.format_exc()}")
|
356
288
|
finally:
|
357
289
|
result_queue.put(None) # Sentinel
|
358
|
-
|
359
290
|
asyncio.run(process_async())
|
360
291
|
|
361
292
|
thread = threading.Thread(target=run_async)
|
@@ -370,7 +301,7 @@ if __name__ == "__main__":
|
|
370
301
|
|
371
302
|
thread.join()
|
372
303
|
else:
|
373
|
-
log.info("
|
304
|
+
log.info("sync streaming response")
|
374
305
|
for chunk in start_streaming_chat(
|
375
306
|
question=all_input["user_input"],
|
376
307
|
vector_name=vector_name,
|
@@ -394,19 +325,17 @@ if __name__ == "__main__":
|
|
394
325
|
yield chunk
|
395
326
|
|
396
327
|
except Exception as e:
|
397
|
-
|
398
|
-
log.error(error_msg)
|
399
|
-
yield error_msg
|
328
|
+
yield f"Streaming Error: {str(e)} {traceback.format_exc()}"
|
400
329
|
|
401
|
-
#
|
330
|
+
# Here, the generator function will handle streaming the content to the client.
|
402
331
|
response = Response(generate_response_content(), content_type='text/plain; charset=utf-8')
|
403
332
|
response.headers['Transfer-Encoding'] = 'chunked'
|
404
333
|
|
405
|
-
log.
|
406
|
-
|
407
|
-
# Do final trace operations in background (don't block the response)
|
334
|
+
log.debug(f"streaming response: {response}")
|
408
335
|
if trace:
|
409
|
-
|
336
|
+
span.end(output=response)
|
337
|
+
trace.update(output=response)
|
338
|
+
self.langfuse_eval_response(trace_id=trace.id, eval_percent=all_input.get('eval_percent'))
|
410
339
|
|
411
340
|
return response
|
412
341
|
|
@@ -725,131 +654,144 @@ if __name__ == "__main__":
|
|
725
654
|
tags = tags,
|
726
655
|
release = package_version
|
727
656
|
)
|
728
|
-
|
729
|
-
def _create_langfuse_trace_background(self, request, vector_name, trace_id):
|
730
|
-
"""Create Langfuse trace in background"""
|
731
|
-
try:
|
732
|
-
return self.create_langfuse_trace(request, vector_name, trace_id)
|
733
|
-
except Exception as e:
|
734
|
-
log.warning(f"Background trace creation failed: {e}")
|
735
|
-
return None
|
736
657
|
|
737
|
-
def _handle_file_upload_background(self, file, vector_name):
|
738
|
-
"""Handle file upload in background thread"""
|
739
|
-
try:
|
740
|
-
# Save with timestamp to avoid conflicts
|
741
|
-
temp_filename = f"temp_{int(time.time() * 1000)}_{file.filename}"
|
742
|
-
file.save(temp_filename)
|
743
|
-
|
744
|
-
# Upload to GCS
|
745
|
-
image_uri = add_file_to_gcs(temp_filename, vector_name)
|
746
|
-
|
747
|
-
# Clean up
|
748
|
-
os.remove(temp_filename)
|
749
|
-
|
750
|
-
return {"image_uri": image_uri, "mime": file.mimetype}
|
751
|
-
except Exception as e:
|
752
|
-
log.error(f"Background file upload failed: {e}")
|
753
|
-
return {}
|
754
|
-
|
755
658
|
def prep_vac(self, request, vector_name):
|
756
|
-
|
757
|
-
|
758
|
-
# Fast request parsing - KEEP ORIGINAL ERROR HANDLING STYLE
|
659
|
+
|
759
660
|
if request.content_type.startswith('application/json'):
|
760
661
|
data = request.get_json()
|
761
662
|
elif request.content_type.startswith('multipart/form-data'):
|
762
663
|
data = request.form.to_dict()
|
763
|
-
# Handle file upload in background if present
|
764
664
|
if 'file' in request.files:
|
765
665
|
file = request.files['file']
|
766
666
|
if file.filename != '':
|
767
|
-
log.info(f"Found file: {file.filename}
|
768
|
-
|
769
|
-
|
770
|
-
|
667
|
+
log.info(f"Found file: {file.filename} to upload to GCS")
|
668
|
+
try:
|
669
|
+
image_uri, mime_type = self.handle_file_upload(file, vector_name)
|
670
|
+
data["image_uri"] = image_uri
|
671
|
+
data["mime"] = mime_type
|
672
|
+
except Exception as e:
|
673
|
+
log.error(traceback.format_exc())
|
674
|
+
return jsonify({'error': str(e), 'traceback': traceback.format_exc()}), 500
|
675
|
+
else:
|
676
|
+
log.error("No file selected")
|
677
|
+
return jsonify({"error": "No file selected"}), 400
|
771
678
|
else:
|
772
|
-
|
773
|
-
raise ValueError("Unsupported content type")
|
679
|
+
return jsonify({"error": "Unsupported content type"}), 400
|
774
680
|
|
775
|
-
log.info(f"vac/{vector_name} got data
|
681
|
+
log.info(f"vac/{vector_name} got data: {data}")
|
776
682
|
|
777
|
-
|
683
|
+
trace = None
|
684
|
+
span = None
|
685
|
+
if self.add_langfuse_eval:
|
686
|
+
trace_id = data.get('trace_id')
|
687
|
+
trace = self.create_langfuse_trace(request, vector_name, trace_id)
|
688
|
+
log.info(f"Using existing langfuse trace: {trace_id}")
|
689
|
+
|
690
|
+
#config, _ = load_config("config/llm_config.yaml")
|
778
691
|
try:
|
779
|
-
vac_config =
|
692
|
+
vac_config = ConfigManager(vector_name)
|
780
693
|
except Exception as e:
|
781
694
|
raise ValueError(f"Unable to find vac_config for {vector_name} - {str(e)}")
|
782
695
|
|
783
|
-
|
696
|
+
if trace:
|
697
|
+
this_vac_config = vac_config.configs_by_kind.get("vacConfig")
|
698
|
+
metadata_config=None
|
699
|
+
if this_vac_config:
|
700
|
+
metadata_config = this_vac_config.get(vector_name)
|
701
|
+
|
702
|
+
trace.update(input=data, metadata=metadata_config)
|
703
|
+
|
784
704
|
user_input = data.pop('user_input').strip()
|
785
705
|
stream_wait_time = data.pop('stream_wait_time', 7)
|
786
706
|
stream_timeout = data.pop('stream_timeout', 120)
|
787
707
|
chat_history = data.pop('chat_history', None)
|
788
708
|
eval_percent = data.pop('eval_percent', 0.01)
|
789
|
-
|
790
|
-
data.pop('trace_id', None)
|
709
|
+
vector_name = data.pop('vector_name', vector_name)
|
710
|
+
data.pop('trace_id', None) # to ensure not in kwargs
|
791
711
|
|
792
|
-
# Process chat history with caching
|
793
712
|
paired_messages = extract_chat_history_with_cache(chat_history)
|
794
713
|
|
795
|
-
|
796
|
-
|
797
|
-
|
798
|
-
|
799
|
-
|
800
|
-
|
801
|
-
|
802
|
-
|
803
|
-
|
804
|
-
|
714
|
+
all_input = {'user_input': user_input,
|
715
|
+
'vector_name': vector_name,
|
716
|
+
'chat_history': paired_messages,
|
717
|
+
'stream_wait_time': stream_wait_time,
|
718
|
+
'stream_timeout': stream_timeout,
|
719
|
+
'eval_percent': eval_percent,
|
720
|
+
'kwargs': data}
|
721
|
+
|
722
|
+
if trace:
|
723
|
+
span = trace.span(
|
724
|
+
name="VAC",
|
725
|
+
metadata=vac_config.configs_by_kind,
|
726
|
+
input = all_input
|
727
|
+
)
|
728
|
+
|
729
|
+
return {
|
730
|
+
"trace": trace,
|
731
|
+
"span": span,
|
732
|
+
"all_input": all_input,
|
733
|
+
"vac_config": vac_config
|
734
|
+
}
|
735
|
+
|
736
|
+
async def prep_vac_async(self, request, vector_name):
|
737
|
+
"""Async version of prep_vac."""
|
738
|
+
# Parse request data
|
739
|
+
if request.content_type.startswith('application/json'):
|
740
|
+
data = request.get_json()
|
741
|
+
elif request.content_type.startswith('multipart/form-data'):
|
742
|
+
data = request.form.to_dict()
|
743
|
+
if 'file' in request.files:
|
744
|
+
file = request.files['file']
|
745
|
+
if file.filename != '':
|
746
|
+
log.info(f"Found file: {file.filename} to upload to GCS")
|
747
|
+
try:
|
748
|
+
# Make file upload async if possible
|
749
|
+
image_uri, mime_type = await self.handle_file_upload_async(file, vector_name)
|
750
|
+
data["image_uri"] = image_uri
|
751
|
+
data["mime"] = mime_type
|
752
|
+
except Exception as e:
|
753
|
+
log.error(traceback.format_exc())
|
754
|
+
return jsonify({'error': str(e), 'traceback': traceback.format_exc()}), 500
|
755
|
+
else:
|
756
|
+
log.error("No file selected")
|
757
|
+
return jsonify({"error": "No file selected"}), 400
|
758
|
+
else:
|
759
|
+
return jsonify({"error": "Unsupported content type"}), 400
|
760
|
+
|
761
|
+
log.info(f"vac/{vector_name} got data: {data}")
|
762
|
+
|
763
|
+
# Run these operations concurrently
|
764
|
+
tasks = []
|
765
|
+
|
766
|
+
# Extract other data while configs load
|
767
|
+
user_input = data.pop('user_input').strip()
|
768
|
+
stream_wait_time = data.pop('stream_wait_time', 7)
|
769
|
+
stream_timeout = data.pop('stream_timeout', 120)
|
770
|
+
chat_history = data.pop('chat_history', None)
|
771
|
+
vector_name_param = data.pop('vector_name', vector_name)
|
772
|
+
data.pop('trace_id', None) # to ensure not in kwargs
|
773
|
+
|
774
|
+
# Task 3: Process chat history
|
775
|
+
chat_history_task = asyncio.create_task(extract_chat_history_async_cached(chat_history))
|
776
|
+
tasks.append(chat_history_task)
|
777
|
+
|
778
|
+
# Await all tasks concurrently
|
779
|
+
results = await asyncio.gather(*tasks, return_exceptions=True)
|
805
780
|
|
806
|
-
|
781
|
+
paired_messages = results[0] if not isinstance(results[0], Exception) else []
|
782
|
+
|
783
|
+
# Only create span after we have trace
|
807
784
|
all_input = {
|
808
785
|
'user_input': user_input,
|
809
786
|
'vector_name': vector_name_param,
|
810
787
|
'chat_history': paired_messages,
|
811
788
|
'stream_wait_time': stream_wait_time,
|
812
789
|
'stream_timeout': stream_timeout,
|
813
|
-
'eval_percent': eval_percent,
|
814
790
|
'kwargs': data
|
815
791
|
}
|
816
|
-
|
817
|
-
# Initialize trace variables
|
818
|
-
trace = None
|
819
|
-
span = None
|
820
|
-
if self.add_langfuse_eval:
|
821
|
-
trace_id = data.get('trace_id')
|
822
|
-
# Create trace in background - don't block
|
823
|
-
trace_future = _thread_pool.submit(self._create_langfuse_trace_background, request, vector_name, trace_id)
|
824
|
-
|
825
|
-
# Try to get trace result if available (don't block long)
|
826
|
-
try:
|
827
|
-
trace = trace_future.result(timeout=0.1) # Very short timeout
|
828
|
-
if trace:
|
829
|
-
this_vac_config = vac_config.configs_by_kind.get("vacConfig")
|
830
|
-
metadata_config = None
|
831
|
-
if this_vac_config:
|
832
|
-
metadata_config = this_vac_config.get(vector_name)
|
833
|
-
trace.update(input=data, metadata=metadata_config)
|
834
|
-
|
835
|
-
span = trace.span(
|
836
|
-
name="VAC",
|
837
|
-
metadata=vac_config.configs_by_kind,
|
838
|
-
input=all_input
|
839
|
-
)
|
840
|
-
except Exception as e:
|
841
|
-
log.warning(f"Langfuse trace creation timed out or failed: {e}")
|
842
|
-
trace = None
|
843
|
-
span = None
|
844
|
-
|
845
|
-
prep_time = time.time() - start_time
|
846
|
-
log.info(f"prep_vac completed in {prep_time:.3f}s")
|
847
|
-
|
792
|
+
|
848
793
|
return {
|
849
|
-
"
|
850
|
-
"span": span,
|
851
|
-
"all_input": all_input,
|
852
|
-
"vac_config": vac_config
|
794
|
+
"all_input": all_input
|
853
795
|
}
|
854
796
|
|
855
797
|
def handle_file_upload(self, file, vector_name):
|
@@ -861,4 +803,3 @@ if __name__ == "__main__":
|
|
861
803
|
except Exception as e:
|
862
804
|
raise Exception(f'File upload failed: {str(e)}')
|
863
805
|
|
864
|
-
|
@@ -14,7 +14,7 @@ sunholo/agents/fastapi/base.py,sha256=W-cyF8ZDUH40rc-c-Apw3-_8IIi2e4Y9qRtnoVnsc1
|
|
14
14
|
sunholo/agents/fastapi/qna_routes.py,sha256=lKHkXPmwltu9EH3RMwmD153-J6pE7kWQ4BhBlV3to-s,3864
|
15
15
|
sunholo/agents/flask/__init__.py,sha256=dEoByI3gDNUOjpX1uVKP7uPjhfFHJubbiaAv3xLopnk,63
|
16
16
|
sunholo/agents/flask/base.py,sha256=vnpxFEOnCmt9humqj-jYPLfJcdwzsop9NorgkJ-tSaU,1756
|
17
|
-
sunholo/agents/flask/vac_routes.py,sha256=
|
17
|
+
sunholo/agents/flask/vac_routes.py,sha256=TEM0u2vkZC0BSKJABxQVPm4QiUsEFoPOwJZIOxzi1Sk,32621
|
18
18
|
sunholo/archive/__init__.py,sha256=qNHWm5rGPVOlxZBZCpA1wTYPbalizRT7f8X4rs2t290,31
|
19
19
|
sunholo/archive/archive.py,sha256=PxVfDtO2_2ZEEbnhXSCbXLdeoHoQVImo4y3Jr2XkCFY,1204
|
20
20
|
sunholo/auth/__init__.py,sha256=TeP-OY0XGxYV_8AQcVGoh35bvyWhNUcMRfhuD5l44Sk,91
|
@@ -168,9 +168,9 @@ sunholo/vertex/init.py,sha256=1OQwcPBKZYBTDPdyU7IM4X4OmiXLdsNV30C-fee2scQ,2875
|
|
168
168
|
sunholo/vertex/memory_tools.py,sha256=tBZxqVZ4InTmdBvLlOYwoSEWu4-kGquc-gxDwZCC4FA,7667
|
169
169
|
sunholo/vertex/safety.py,sha256=S9PgQT1O_BQAkcqauWncRJaydiP8Q_Jzmu9gxYfy1VA,2482
|
170
170
|
sunholo/vertex/type_dict_to_json.py,sha256=uTzL4o9tJRao4u-gJOFcACgWGkBOtqACmb6ihvCErL8,4694
|
171
|
-
sunholo-0.140.
|
172
|
-
sunholo-0.140.
|
173
|
-
sunholo-0.140.
|
174
|
-
sunholo-0.140.
|
175
|
-
sunholo-0.140.
|
176
|
-
sunholo-0.140.
|
171
|
+
sunholo-0.140.11.dist-info/licenses/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
|
172
|
+
sunholo-0.140.11.dist-info/METADATA,sha256=5dpzFz8d_yiaY_tXGWGUsAxCHy2TPeQcHcooeZzbIcE,10068
|
173
|
+
sunholo-0.140.11.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
|
174
|
+
sunholo-0.140.11.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
|
175
|
+
sunholo-0.140.11.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
|
176
|
+
sunholo-0.140.11.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|