sunholo 0.140.10__py3-none-any.whl → 0.140.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,10 +6,6 @@ import random
6
6
  from functools import partial
7
7
  import inspect
8
8
  import asyncio
9
- import time
10
- import threading
11
- from functools import lru_cache
12
- from concurrent.futures import ThreadPoolExecutor
13
9
 
14
10
  from ..chat_history import extract_chat_history_with_cache, extract_chat_history_async_cached
15
11
  from ...qna.parsers import parse_output
@@ -36,11 +32,6 @@ except ImportError:
36
32
  # Cache dictionary to store validated API keys
37
33
  api_key_cache = {}
38
34
  cache_duration = timedelta(minutes=5) # Cache duration
39
- # Global caches and thread pool
40
- _config_cache = {}
41
- _config_lock = threading.Lock()
42
- _thread_pool = ThreadPoolExecutor(max_workers=4)
43
-
44
35
 
45
36
  class VACRoutes:
46
37
  """
@@ -78,44 +69,8 @@ if __name__ == "__main__":
78
69
  self.additional_routes = additional_routes if additional_routes is not None else []
79
70
  self.async_stream = async_stream
80
71
  self.add_langfuse_eval = add_langfuse_eval
81
-
82
- # Pre-warm common configs
83
- self._preload_common_configs()
84
-
85
72
  self.register_routes()
86
-
87
- def _preload_common_configs(self):
88
- """Pre-load commonly used configurations to cache"""
89
- common_vector_names = ["aitana3"] # Add your common vector names
90
- for vector_name in common_vector_names:
91
- try:
92
- self._get_cached_config(vector_name)
93
- log.info(f"Pre-loaded config for {vector_name}")
94
- except Exception as e:
95
- log.warning(f"Failed to pre-load config for {vector_name}: {e}")
96
-
97
- def _get_cached_config(self, vector_name: str):
98
- """Cached config loader with thread safety - CORRECTED VERSION"""
99
- # Check cache first (without lock for read)
100
- if vector_name in _config_cache:
101
- log.debug(f"Using cached config for {vector_name}")
102
- return _config_cache[vector_name]
103
73
 
104
- # Need to load config
105
- with _config_lock:
106
- # Double-check inside lock (another thread might have loaded it)
107
- if vector_name in _config_cache:
108
- return _config_cache[vector_name]
109
-
110
- try:
111
- log.info(f"Loading fresh config for {vector_name}")
112
- config = ConfigManager(vector_name)
113
- _config_cache[vector_name] = config
114
- log.info(f"Cached config for {vector_name}")
115
- return config
116
- except Exception as e:
117
- log.error(f"Error loading config for {vector_name}: {e}")
118
- raise
119
74
 
120
75
  def vac_interpreter_default(self, question: str, vector_name: str, chat_history=[], **kwargs):
121
76
  # Create a callback that does nothing for streaming if you don't want intermediate outputs
@@ -273,18 +228,7 @@ if __name__ == "__main__":
273
228
 
274
229
  log.info(f"OpenAI response: {openai_response}")
275
230
  return jsonify(openai_response)
276
-
277
- def _finalize_trace_background(self, trace, span, response, all_input):
278
- """Finalize trace operations in background"""
279
- try:
280
- if span:
281
- span.end(output=str(response))
282
- if trace:
283
- trace.update(output=str(response))
284
- self.langfuse_eval_response(trace_id=trace.id, eval_percent=all_input.get('eval_percent'))
285
- except Exception as e:
286
- log.warning(f"Background trace finalization failed: {e}")
287
-
231
+
288
232
  def handle_stream_vac(self, vector_name):
289
233
  observed_stream_interpreter = self.stream_interpreter
290
234
  is_async = inspect.iscoroutinefunction(self.stream_interpreter)
@@ -710,131 +654,144 @@ if __name__ == "__main__":
710
654
  tags = tags,
711
655
  release = package_version
712
656
  )
713
-
714
- def _create_langfuse_trace_background(self, request, vector_name, trace_id):
715
- """Create Langfuse trace in background"""
716
- try:
717
- return self.create_langfuse_trace(request, vector_name, trace_id)
718
- except Exception as e:
719
- log.warning(f"Background trace creation failed: {e}")
720
- return None
721
657
 
722
- def _handle_file_upload_background(self, file, vector_name):
723
- """Handle file upload in background thread"""
724
- try:
725
- # Save with timestamp to avoid conflicts
726
- temp_filename = f"temp_{int(time.time() * 1000)}_{file.filename}"
727
- file.save(temp_filename)
728
-
729
- # Upload to GCS
730
- image_uri = add_file_to_gcs(temp_filename, vector_name)
731
-
732
- # Clean up
733
- os.remove(temp_filename)
734
-
735
- return {"image_uri": image_uri, "mime": file.mimetype}
736
- except Exception as e:
737
- log.error(f"Background file upload failed: {e}")
738
- return {}
739
-
740
658
  def prep_vac(self, request, vector_name):
741
- start_time = time.time()
742
-
743
- # Fast request parsing - KEEP ORIGINAL ERROR HANDLING STYLE
659
+
744
660
  if request.content_type.startswith('application/json'):
745
661
  data = request.get_json()
746
662
  elif request.content_type.startswith('multipart/form-data'):
747
663
  data = request.form.to_dict()
748
- # Handle file upload in background if present
749
664
  if 'file' in request.files:
750
665
  file = request.files['file']
751
666
  if file.filename != '':
752
- log.info(f"Found file: {file.filename} - uploading in background")
753
- # Start file upload in background, don't block
754
- upload_future = _thread_pool.submit(self._handle_file_upload_background, file, vector_name)
755
- data["_upload_future"] = upload_future
667
+ log.info(f"Found file: {file.filename} to upload to GCS")
668
+ try:
669
+ image_uri, mime_type = self.handle_file_upload(file, vector_name)
670
+ data["image_uri"] = image_uri
671
+ data["mime"] = mime_type
672
+ except Exception as e:
673
+ log.error(traceback.format_exc())
674
+ return jsonify({'error': str(e), 'traceback': traceback.format_exc()}), 500
675
+ else:
676
+ log.error("No file selected")
677
+ return jsonify({"error": "No file selected"}), 400
756
678
  else:
757
- # KEEP ORIGINAL STYLE - return the error response directly
758
- raise ValueError("Unsupported content type")
679
+ return jsonify({"error": "Unsupported content type"}), 400
759
680
 
760
- log.info(f"vac/{vector_name} got data keys: {list(data.keys())}")
681
+ log.info(f"vac/{vector_name} got data: {data}")
761
682
 
762
- # Get config from cache first (before processing other data)
683
+ trace = None
684
+ span = None
685
+ if self.add_langfuse_eval:
686
+ trace_id = data.get('trace_id')
687
+ trace = self.create_langfuse_trace(request, vector_name, trace_id)
688
+ log.info(f"Using existing langfuse trace: {trace_id}")
689
+
690
+ #config, _ = load_config("config/llm_config.yaml")
763
691
  try:
764
- vac_config = self._get_cached_config(vector_name)
692
+ vac_config = ConfigManager(vector_name)
765
693
  except Exception as e:
766
694
  raise ValueError(f"Unable to find vac_config for {vector_name} - {str(e)}")
767
695
 
768
- # Extract data (keep original logic)
696
+ if trace:
697
+ this_vac_config = vac_config.configs_by_kind.get("vacConfig")
698
+ metadata_config=None
699
+ if this_vac_config:
700
+ metadata_config = this_vac_config.get(vector_name)
701
+
702
+ trace.update(input=data, metadata=metadata_config)
703
+
769
704
  user_input = data.pop('user_input').strip()
770
705
  stream_wait_time = data.pop('stream_wait_time', 7)
771
706
  stream_timeout = data.pop('stream_timeout', 120)
772
707
  chat_history = data.pop('chat_history', None)
773
708
  eval_percent = data.pop('eval_percent', 0.01)
774
- vector_name_param = data.pop('vector_name', vector_name)
775
- data.pop('trace_id', None) # to ensure not in kwargs
709
+ vector_name = data.pop('vector_name', vector_name)
710
+ data.pop('trace_id', None) # to ensure not in kwargs
776
711
 
777
- # Process chat history with caching
778
712
  paired_messages = extract_chat_history_with_cache(chat_history)
779
713
 
780
- # Wait for file upload if it was started (with timeout)
781
- if "_upload_future" in data:
782
- try:
783
- upload_result = data["_upload_future"].result(timeout=3.0) # 3 sec max wait
784
- data.update(upload_result)
785
- log.info(f"File upload completed: {upload_result.get('image_uri', 'no uri')}")
786
- except Exception as e:
787
- log.warning(f"File upload failed or timed out: {e}")
788
- finally:
789
- data.pop("_upload_future", None)
714
+ all_input = {'user_input': user_input,
715
+ 'vector_name': vector_name,
716
+ 'chat_history': paired_messages,
717
+ 'stream_wait_time': stream_wait_time,
718
+ 'stream_timeout': stream_timeout,
719
+ 'eval_percent': eval_percent,
720
+ 'kwargs': data}
721
+
722
+ if trace:
723
+ span = trace.span(
724
+ name="VAC",
725
+ metadata=vac_config.configs_by_kind,
726
+ input = all_input
727
+ )
728
+
729
+ return {
730
+ "trace": trace,
731
+ "span": span,
732
+ "all_input": all_input,
733
+ "vac_config": vac_config
734
+ }
790
735
 
791
- # BUILD all_input BEFORE trace creation (this was moved inside try/catch by mistake)
736
+ async def prep_vac_async(self, request, vector_name):
737
+ """Async version of prep_vac."""
738
+ # Parse request data
739
+ if request.content_type.startswith('application/json'):
740
+ data = request.get_json()
741
+ elif request.content_type.startswith('multipart/form-data'):
742
+ data = request.form.to_dict()
743
+ if 'file' in request.files:
744
+ file = request.files['file']
745
+ if file.filename != '':
746
+ log.info(f"Found file: {file.filename} to upload to GCS")
747
+ try:
748
+ # Make file upload async if possible
749
+ image_uri, mime_type = await self.handle_file_upload_async(file, vector_name)
750
+ data["image_uri"] = image_uri
751
+ data["mime"] = mime_type
752
+ except Exception as e:
753
+ log.error(traceback.format_exc())
754
+ return jsonify({'error': str(e), 'traceback': traceback.format_exc()}), 500
755
+ else:
756
+ log.error("No file selected")
757
+ return jsonify({"error": "No file selected"}), 400
758
+ else:
759
+ return jsonify({"error": "Unsupported content type"}), 400
760
+
761
+ log.info(f"vac/{vector_name} got data: {data}")
762
+
763
+ # Run these operations concurrently
764
+ tasks = []
765
+
766
+ # Extract other data while configs load
767
+ user_input = data.pop('user_input').strip()
768
+ stream_wait_time = data.pop('stream_wait_time', 7)
769
+ stream_timeout = data.pop('stream_timeout', 120)
770
+ chat_history = data.pop('chat_history', None)
771
+ vector_name_param = data.pop('vector_name', vector_name)
772
+ data.pop('trace_id', None) # to ensure not in kwargs
773
+
774
+ # Task 3: Process chat history
775
+ chat_history_task = asyncio.create_task(extract_chat_history_async_cached(chat_history))
776
+ tasks.append(chat_history_task)
777
+
778
+ # Await all tasks concurrently
779
+ results = await asyncio.gather(*tasks, return_exceptions=True)
780
+
781
+ paired_messages = results[0] if not isinstance(results[0], Exception) else []
782
+
783
+ # Only create span after we have trace
792
784
  all_input = {
793
785
  'user_input': user_input,
794
786
  'vector_name': vector_name_param,
795
787
  'chat_history': paired_messages,
796
788
  'stream_wait_time': stream_wait_time,
797
789
  'stream_timeout': stream_timeout,
798
- 'eval_percent': eval_percent,
799
790
  'kwargs': data
800
791
  }
801
-
802
- # Initialize trace variables
803
- trace = None
804
- span = None
805
- if self.add_langfuse_eval:
806
- trace_id = data.get('trace_id')
807
- # Create trace in background - don't block
808
- trace_future = _thread_pool.submit(self._create_langfuse_trace_background, request, vector_name, trace_id)
809
-
810
- # Try to get trace result if available (don't block long)
811
- try:
812
- trace = trace_future.result(timeout=0.1) # Very short timeout
813
- if trace:
814
- this_vac_config = vac_config.configs_by_kind.get("vacConfig")
815
- metadata_config = None
816
- if this_vac_config:
817
- metadata_config = this_vac_config.get(vector_name)
818
- trace.update(input=data, metadata=metadata_config)
819
-
820
- span = trace.span(
821
- name="VAC",
822
- metadata=vac_config.configs_by_kind,
823
- input=all_input
824
- )
825
- except Exception as e:
826
- log.warning(f"Langfuse trace creation timed out or failed: {e}")
827
- trace = None
828
- span = None
829
-
830
- prep_time = time.time() - start_time
831
- log.info(f"prep_vac completed in {prep_time:.3f}s")
832
-
792
+
833
793
  return {
834
- "trace": trace,
835
- "span": span,
836
- "all_input": all_input,
837
- "vac_config": vac_config
794
+ "all_input": all_input
838
795
  }
839
796
 
840
797
  def handle_file_upload(self, file, vector_name):
@@ -846,4 +803,3 @@ if __name__ == "__main__":
846
803
  except Exception as e:
847
804
  raise Exception(f'File upload failed: {str(e)}')
848
805
 
849
-
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sunholo
3
- Version: 0.140.10
3
+ Version: 0.140.11
4
4
  Summary: AI DevOps - a package to help deploy GenAI to the Cloud.
5
5
  Author-email: Holosun ApS <multivac@sunholo.com>
6
6
  License: Apache License, Version 2.0
@@ -14,7 +14,7 @@ sunholo/agents/fastapi/base.py,sha256=W-cyF8ZDUH40rc-c-Apw3-_8IIi2e4Y9qRtnoVnsc1
14
14
  sunholo/agents/fastapi/qna_routes.py,sha256=lKHkXPmwltu9EH3RMwmD153-J6pE7kWQ4BhBlV3to-s,3864
15
15
  sunholo/agents/flask/__init__.py,sha256=dEoByI3gDNUOjpX1uVKP7uPjhfFHJubbiaAv3xLopnk,63
16
16
  sunholo/agents/flask/base.py,sha256=vnpxFEOnCmt9humqj-jYPLfJcdwzsop9NorgkJ-tSaU,1756
17
- sunholo/agents/flask/vac_routes.py,sha256=QfM1nChWLAXhZ4_YYAsiXMss2CeQgxVI0GPD2fNLkmE,34541
17
+ sunholo/agents/flask/vac_routes.py,sha256=TEM0u2vkZC0BSKJABxQVPm4QiUsEFoPOwJZIOxzi1Sk,32621
18
18
  sunholo/archive/__init__.py,sha256=qNHWm5rGPVOlxZBZCpA1wTYPbalizRT7f8X4rs2t290,31
19
19
  sunholo/archive/archive.py,sha256=PxVfDtO2_2ZEEbnhXSCbXLdeoHoQVImo4y3Jr2XkCFY,1204
20
20
  sunholo/auth/__init__.py,sha256=TeP-OY0XGxYV_8AQcVGoh35bvyWhNUcMRfhuD5l44Sk,91
@@ -168,9 +168,9 @@ sunholo/vertex/init.py,sha256=1OQwcPBKZYBTDPdyU7IM4X4OmiXLdsNV30C-fee2scQ,2875
168
168
  sunholo/vertex/memory_tools.py,sha256=tBZxqVZ4InTmdBvLlOYwoSEWu4-kGquc-gxDwZCC4FA,7667
169
169
  sunholo/vertex/safety.py,sha256=S9PgQT1O_BQAkcqauWncRJaydiP8Q_Jzmu9gxYfy1VA,2482
170
170
  sunholo/vertex/type_dict_to_json.py,sha256=uTzL4o9tJRao4u-gJOFcACgWGkBOtqACmb6ihvCErL8,4694
171
- sunholo-0.140.10.dist-info/licenses/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
172
- sunholo-0.140.10.dist-info/METADATA,sha256=EfU0N2go-DI1A6WaL5p_K9TtXwf8PkZCdUWZOoM7j4c,10068
173
- sunholo-0.140.10.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
174
- sunholo-0.140.10.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
175
- sunholo-0.140.10.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
176
- sunholo-0.140.10.dist-info/RECORD,,
171
+ sunholo-0.140.11.dist-info/licenses/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
172
+ sunholo-0.140.11.dist-info/METADATA,sha256=5dpzFz8d_yiaY_tXGWGUsAxCHy2TPeQcHcooeZzbIcE,10068
173
+ sunholo-0.140.11.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
174
+ sunholo-0.140.11.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
175
+ sunholo-0.140.11.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
176
+ sunholo-0.140.11.dist-info/RECORD,,