sunholo 0.140.9__py3-none-any.whl → 0.140.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,10 +6,6 @@ import random
6
6
  from functools import partial
7
7
  import inspect
8
8
  import asyncio
9
- import time
10
- import threading
11
- from functools import lru_cache
12
- from concurrent.futures import ThreadPoolExecutor
13
9
 
14
10
  from ..chat_history import extract_chat_history_with_cache, extract_chat_history_async_cached
15
11
  from ...qna.parsers import parse_output
@@ -36,11 +32,6 @@ except ImportError:
36
32
  # Cache dictionary to store validated API keys
37
33
  api_key_cache = {}
38
34
  cache_duration = timedelta(minutes=5) # Cache duration
39
- # Global caches and thread pool
40
- _config_cache = {}
41
- _config_lock = threading.Lock()
42
- _thread_pool = ThreadPoolExecutor(max_workers=4)
43
-
44
35
 
45
36
  class VACRoutes:
46
37
  """
@@ -78,44 +69,8 @@ if __name__ == "__main__":
78
69
  self.additional_routes = additional_routes if additional_routes is not None else []
79
70
  self.async_stream = async_stream
80
71
  self.add_langfuse_eval = add_langfuse_eval
81
-
82
- # Pre-warm common configs
83
- self._preload_common_configs()
84
-
85
72
  self.register_routes()
86
-
87
- def _preload_common_configs(self):
88
- """Pre-load commonly used configurations to cache"""
89
- common_vector_names = ["aitana3"] # Add your common vector names
90
- for vector_name in common_vector_names:
91
- try:
92
- self._get_cached_config(vector_name)
93
- log.info(f"Pre-loaded config for {vector_name}")
94
- except Exception as e:
95
- log.warning(f"Failed to pre-load config for {vector_name}: {e}")
96
-
97
- def _get_cached_config(self, vector_name: str):
98
- """Cached config loader with thread safety - CORRECTED VERSION"""
99
- # Check cache first (without lock for read)
100
- if vector_name in _config_cache:
101
- log.debug(f"Using cached config for {vector_name}")
102
- return _config_cache[vector_name]
103
73
 
104
- # Need to load config
105
- with _config_lock:
106
- # Double-check inside lock (another thread might have loaded it)
107
- if vector_name in _config_cache:
108
- return _config_cache[vector_name]
109
-
110
- try:
111
- log.info(f"Loading fresh config for {vector_name}")
112
- config = ConfigManager(vector_name)
113
- _config_cache[vector_name] = config
114
- log.info(f"Cached config for {vector_name}")
115
- return config
116
- except Exception as e:
117
- log.error(f"Error loading config for {vector_name}: {e}")
118
- raise
119
74
 
120
75
  def vac_interpreter_default(self, question: str, vector_name: str, chat_history=[], **kwargs):
121
76
  # Create a callback that does nothing for streaming if you don't want intermediate outputs
@@ -273,43 +228,22 @@ if __name__ == "__main__":
273
228
 
274
229
  log.info(f"OpenAI response: {openai_response}")
275
230
  return jsonify(openai_response)
276
-
277
- def _finalize_trace_background(self, trace, span, response, all_input):
278
- """Finalize trace operations in background"""
279
- try:
280
- if span:
281
- span.end(output=str(response))
282
- if trace:
283
- trace.update(output=str(response))
284
- self.langfuse_eval_response(trace_id=trace.id, eval_percent=all_input.get('eval_percent'))
285
- except Exception as e:
286
- log.warning(f"Background trace finalization failed: {e}")
287
-
231
+
288
232
  def handle_stream_vac(self, vector_name):
289
- request_start = time.time()
290
233
  observed_stream_interpreter = self.stream_interpreter
291
234
  is_async = inspect.iscoroutinefunction(self.stream_interpreter)
292
235
 
293
236
  if is_async:
294
237
  log.info(f"Stream interpreter is async: {observed_stream_interpreter}")
295
238
 
296
- # Call prep_vac and handle errors properly
297
- try:
298
- prep = self.prep_vac(request, vector_name)
299
- except Exception as e:
300
- log.error(f"prep_vac failed: {e}")
301
- error_response = {'error': f'Prep error: {str(e)}'}
302
- return jsonify(error_response), 500
303
-
304
- log.info(f"Processing prep completed in {time.time() - request_start:.3f}s")
305
-
306
- trace = prep.get("trace")
307
- span = prep.get("span")
239
+ prep = self.prep_vac(request, vector_name)
240
+ log.info(f"Processing prep: {prep}")
241
+ trace = prep["trace"]
242
+ span = prep["span"]
308
243
  vac_config = prep["vac_config"]
309
244
  all_input = prep["all_input"]
310
245
 
311
- log.info(f'Starting stream with: {all_input["user_input"][:100]}...')
312
-
246
+ log.info(f'Streaming data with: {all_input}')
313
247
  if span:
314
248
  span.update(
315
249
  name="start_streaming_chat",
@@ -320,7 +254,7 @@ if __name__ == "__main__":
320
254
  def generate_response_content():
321
255
  try:
322
256
  if is_async:
323
- from queue import Queue
257
+ from queue import Queue, Empty
324
258
  result_queue = Queue()
325
259
  import threading
326
260
 
@@ -337,7 +271,7 @@ if __name__ == "__main__":
337
271
  trace_id=trace.id if trace else None,
338
272
  **all_input["kwargs"]
339
273
  )
340
-
274
+ log.info(f"{async_gen=}")
341
275
  async for chunk in async_gen:
342
276
  if isinstance(chunk, dict) and 'answer' in chunk:
343
277
  if trace:
@@ -350,12 +284,9 @@ if __name__ == "__main__":
350
284
  else:
351
285
  result_queue.put(chunk)
352
286
  except Exception as e:
353
- error_msg = f"Streaming Error: {str(e)} {traceback.format_exc()}"
354
- log.error(error_msg)
355
- result_queue.put(error_msg)
287
+ result_queue.put(f"Streaming Error: {str(e)} {traceback.format_exc()}")
356
288
  finally:
357
289
  result_queue.put(None) # Sentinel
358
-
359
290
  asyncio.run(process_async())
360
291
 
361
292
  thread = threading.Thread(target=run_async)
@@ -370,7 +301,7 @@ if __name__ == "__main__":
370
301
 
371
302
  thread.join()
372
303
  else:
373
- log.info("Starting sync streaming response")
304
+ log.info("sync streaming response")
374
305
  for chunk in start_streaming_chat(
375
306
  question=all_input["user_input"],
376
307
  vector_name=vector_name,
@@ -394,19 +325,17 @@ if __name__ == "__main__":
394
325
  yield chunk
395
326
 
396
327
  except Exception as e:
397
- error_msg = f"Streaming Error: {str(e)} {traceback.format_exc()}"
398
- log.error(error_msg)
399
- yield error_msg
328
+ yield f"Streaming Error: {str(e)} {traceback.format_exc()}"
400
329
 
401
- # Create streaming response
330
+ # Here, the generator function will handle streaming the content to the client.
402
331
  response = Response(generate_response_content(), content_type='text/plain; charset=utf-8')
403
332
  response.headers['Transfer-Encoding'] = 'chunked'
404
333
 
405
- log.info(f"Streaming response created in {time.time() - request_start:.3f}s")
406
-
407
- # Do final trace operations in background (don't block the response)
334
+ log.debug(f"streaming response: {response}")
408
335
  if trace:
409
- _thread_pool.submit(self._finalize_trace_background, trace, span, response, all_input)
336
+ span.end(output=response)
337
+ trace.update(output=response)
338
+ self.langfuse_eval_response(trace_id=trace.id, eval_percent=all_input.get('eval_percent'))
410
339
 
411
340
  return response
412
341
 
@@ -725,131 +654,144 @@ if __name__ == "__main__":
725
654
  tags = tags,
726
655
  release = package_version
727
656
  )
728
-
729
- def _create_langfuse_trace_background(self, request, vector_name, trace_id):
730
- """Create Langfuse trace in background"""
731
- try:
732
- return self.create_langfuse_trace(request, vector_name, trace_id)
733
- except Exception as e:
734
- log.warning(f"Background trace creation failed: {e}")
735
- return None
736
657
 
737
- def _handle_file_upload_background(self, file, vector_name):
738
- """Handle file upload in background thread"""
739
- try:
740
- # Save with timestamp to avoid conflicts
741
- temp_filename = f"temp_{int(time.time() * 1000)}_{file.filename}"
742
- file.save(temp_filename)
743
-
744
- # Upload to GCS
745
- image_uri = add_file_to_gcs(temp_filename, vector_name)
746
-
747
- # Clean up
748
- os.remove(temp_filename)
749
-
750
- return {"image_uri": image_uri, "mime": file.mimetype}
751
- except Exception as e:
752
- log.error(f"Background file upload failed: {e}")
753
- return {}
754
-
755
658
  def prep_vac(self, request, vector_name):
756
- start_time = time.time()
757
-
758
- # Fast request parsing - KEEP ORIGINAL ERROR HANDLING STYLE
659
+
759
660
  if request.content_type.startswith('application/json'):
760
661
  data = request.get_json()
761
662
  elif request.content_type.startswith('multipart/form-data'):
762
663
  data = request.form.to_dict()
763
- # Handle file upload in background if present
764
664
  if 'file' in request.files:
765
665
  file = request.files['file']
766
666
  if file.filename != '':
767
- log.info(f"Found file: {file.filename} - uploading in background")
768
- # Start file upload in background, don't block
769
- upload_future = _thread_pool.submit(self._handle_file_upload_background, file, vector_name)
770
- data["_upload_future"] = upload_future
667
+ log.info(f"Found file: {file.filename} to upload to GCS")
668
+ try:
669
+ image_uri, mime_type = self.handle_file_upload(file, vector_name)
670
+ data["image_uri"] = image_uri
671
+ data["mime"] = mime_type
672
+ except Exception as e:
673
+ log.error(traceback.format_exc())
674
+ return jsonify({'error': str(e), 'traceback': traceback.format_exc()}), 500
675
+ else:
676
+ log.error("No file selected")
677
+ return jsonify({"error": "No file selected"}), 400
771
678
  else:
772
- # KEEP ORIGINAL STYLE - return the error response directly
773
- raise ValueError("Unsupported content type")
679
+ return jsonify({"error": "Unsupported content type"}), 400
774
680
 
775
- log.info(f"vac/{vector_name} got data keys: {list(data.keys())}")
681
+ log.info(f"vac/{vector_name} got data: {data}")
776
682
 
777
- # Get config from cache first (before processing other data)
683
+ trace = None
684
+ span = None
685
+ if self.add_langfuse_eval:
686
+ trace_id = data.get('trace_id')
687
+ trace = self.create_langfuse_trace(request, vector_name, trace_id)
688
+ log.info(f"Using existing langfuse trace: {trace_id}")
689
+
690
+ #config, _ = load_config("config/llm_config.yaml")
778
691
  try:
779
- vac_config = self._get_cached_config(vector_name)
692
+ vac_config = ConfigManager(vector_name)
780
693
  except Exception as e:
781
694
  raise ValueError(f"Unable to find vac_config for {vector_name} - {str(e)}")
782
695
 
783
- # Extract data (keep original logic)
696
+ if trace:
697
+ this_vac_config = vac_config.configs_by_kind.get("vacConfig")
698
+ metadata_config=None
699
+ if this_vac_config:
700
+ metadata_config = this_vac_config.get(vector_name)
701
+
702
+ trace.update(input=data, metadata=metadata_config)
703
+
784
704
  user_input = data.pop('user_input').strip()
785
705
  stream_wait_time = data.pop('stream_wait_time', 7)
786
706
  stream_timeout = data.pop('stream_timeout', 120)
787
707
  chat_history = data.pop('chat_history', None)
788
708
  eval_percent = data.pop('eval_percent', 0.01)
789
- vector_name_param = data.pop('vector_name', vector_name)
790
- data.pop('trace_id', None) # to ensure not in kwargs
709
+ vector_name = data.pop('vector_name', vector_name)
710
+ data.pop('trace_id', None) # to ensure not in kwargs
791
711
 
792
- # Process chat history with caching
793
712
  paired_messages = extract_chat_history_with_cache(chat_history)
794
713
 
795
- # Wait for file upload if it was started (with timeout)
796
- if "_upload_future" in data:
797
- try:
798
- upload_result = data["_upload_future"].result(timeout=3.0) # 3 sec max wait
799
- data.update(upload_result)
800
- log.info(f"File upload completed: {upload_result.get('image_uri', 'no uri')}")
801
- except Exception as e:
802
- log.warning(f"File upload failed or timed out: {e}")
803
- finally:
804
- data.pop("_upload_future", None)
714
+ all_input = {'user_input': user_input,
715
+ 'vector_name': vector_name,
716
+ 'chat_history': paired_messages,
717
+ 'stream_wait_time': stream_wait_time,
718
+ 'stream_timeout': stream_timeout,
719
+ 'eval_percent': eval_percent,
720
+ 'kwargs': data}
721
+
722
+ if trace:
723
+ span = trace.span(
724
+ name="VAC",
725
+ metadata=vac_config.configs_by_kind,
726
+ input = all_input
727
+ )
728
+
729
+ return {
730
+ "trace": trace,
731
+ "span": span,
732
+ "all_input": all_input,
733
+ "vac_config": vac_config
734
+ }
735
+
736
+ async def prep_vac_async(self, request, vector_name):
737
+ """Async version of prep_vac."""
738
+ # Parse request data
739
+ if request.content_type.startswith('application/json'):
740
+ data = request.get_json()
741
+ elif request.content_type.startswith('multipart/form-data'):
742
+ data = request.form.to_dict()
743
+ if 'file' in request.files:
744
+ file = request.files['file']
745
+ if file.filename != '':
746
+ log.info(f"Found file: {file.filename} to upload to GCS")
747
+ try:
748
+ # Make file upload async if possible
749
+ image_uri, mime_type = await self.handle_file_upload_async(file, vector_name)
750
+ data["image_uri"] = image_uri
751
+ data["mime"] = mime_type
752
+ except Exception as e:
753
+ log.error(traceback.format_exc())
754
+ return jsonify({'error': str(e), 'traceback': traceback.format_exc()}), 500
755
+ else:
756
+ log.error("No file selected")
757
+ return jsonify({"error": "No file selected"}), 400
758
+ else:
759
+ return jsonify({"error": "Unsupported content type"}), 400
760
+
761
+ log.info(f"vac/{vector_name} got data: {data}")
762
+
763
+ # Run these operations concurrently
764
+ tasks = []
765
+
766
+ # Extract other data while configs load
767
+ user_input = data.pop('user_input').strip()
768
+ stream_wait_time = data.pop('stream_wait_time', 7)
769
+ stream_timeout = data.pop('stream_timeout', 120)
770
+ chat_history = data.pop('chat_history', None)
771
+ vector_name_param = data.pop('vector_name', vector_name)
772
+ data.pop('trace_id', None) # to ensure not in kwargs
773
+
774
+ # Task 3: Process chat history
775
+ chat_history_task = asyncio.create_task(extract_chat_history_async_cached(chat_history))
776
+ tasks.append(chat_history_task)
777
+
778
+ # Await all tasks concurrently
779
+ results = await asyncio.gather(*tasks, return_exceptions=True)
805
780
 
806
- # BUILD all_input BEFORE trace creation (this was moved inside try/catch by mistake)
781
+ paired_messages = results[0] if not isinstance(results[0], Exception) else []
782
+
783
+ # Only create span after we have trace
807
784
  all_input = {
808
785
  'user_input': user_input,
809
786
  'vector_name': vector_name_param,
810
787
  'chat_history': paired_messages,
811
788
  'stream_wait_time': stream_wait_time,
812
789
  'stream_timeout': stream_timeout,
813
- 'eval_percent': eval_percent,
814
790
  'kwargs': data
815
791
  }
816
-
817
- # Initialize trace variables
818
- trace = None
819
- span = None
820
- if self.add_langfuse_eval:
821
- trace_id = data.get('trace_id')
822
- # Create trace in background - don't block
823
- trace_future = _thread_pool.submit(self._create_langfuse_trace_background, request, vector_name, trace_id)
824
-
825
- # Try to get trace result if available (don't block long)
826
- try:
827
- trace = trace_future.result(timeout=0.1) # Very short timeout
828
- if trace:
829
- this_vac_config = vac_config.configs_by_kind.get("vacConfig")
830
- metadata_config = None
831
- if this_vac_config:
832
- metadata_config = this_vac_config.get(vector_name)
833
- trace.update(input=data, metadata=metadata_config)
834
-
835
- span = trace.span(
836
- name="VAC",
837
- metadata=vac_config.configs_by_kind,
838
- input=all_input
839
- )
840
- except Exception as e:
841
- log.warning(f"Langfuse trace creation timed out or failed: {e}")
842
- trace = None
843
- span = None
844
-
845
- prep_time = time.time() - start_time
846
- log.info(f"prep_vac completed in {prep_time:.3f}s")
847
-
792
+
848
793
  return {
849
- "trace": trace,
850
- "span": span,
851
- "all_input": all_input,
852
- "vac_config": vac_config
794
+ "all_input": all_input
853
795
  }
854
796
 
855
797
  def handle_file_upload(self, file, vector_name):
@@ -861,4 +803,3 @@ if __name__ == "__main__":
861
803
  except Exception as e:
862
804
  raise Exception(f'File upload failed: {str(e)}')
863
805
 
864
-
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sunholo
3
- Version: 0.140.9
3
+ Version: 0.140.11
4
4
  Summary: AI DevOps - a package to help deploy GenAI to the Cloud.
5
5
  Author-email: Holosun ApS <multivac@sunholo.com>
6
6
  License: Apache License, Version 2.0
@@ -14,7 +14,7 @@ sunholo/agents/fastapi/base.py,sha256=W-cyF8ZDUH40rc-c-Apw3-_8IIi2e4Y9qRtnoVnsc1
14
14
  sunholo/agents/fastapi/qna_routes.py,sha256=lKHkXPmwltu9EH3RMwmD153-J6pE7kWQ4BhBlV3to-s,3864
15
15
  sunholo/agents/flask/__init__.py,sha256=dEoByI3gDNUOjpX1uVKP7uPjhfFHJubbiaAv3xLopnk,63
16
16
  sunholo/agents/flask/base.py,sha256=vnpxFEOnCmt9humqj-jYPLfJcdwzsop9NorgkJ-tSaU,1756
17
- sunholo/agents/flask/vac_routes.py,sha256=eafqIudPKAtsOC73bnIXCpreL8AhMz_LQ212HuXqGhc,35101
17
+ sunholo/agents/flask/vac_routes.py,sha256=TEM0u2vkZC0BSKJABxQVPm4QiUsEFoPOwJZIOxzi1Sk,32621
18
18
  sunholo/archive/__init__.py,sha256=qNHWm5rGPVOlxZBZCpA1wTYPbalizRT7f8X4rs2t290,31
19
19
  sunholo/archive/archive.py,sha256=PxVfDtO2_2ZEEbnhXSCbXLdeoHoQVImo4y3Jr2XkCFY,1204
20
20
  sunholo/auth/__init__.py,sha256=TeP-OY0XGxYV_8AQcVGoh35bvyWhNUcMRfhuD5l44Sk,91
@@ -168,9 +168,9 @@ sunholo/vertex/init.py,sha256=1OQwcPBKZYBTDPdyU7IM4X4OmiXLdsNV30C-fee2scQ,2875
168
168
  sunholo/vertex/memory_tools.py,sha256=tBZxqVZ4InTmdBvLlOYwoSEWu4-kGquc-gxDwZCC4FA,7667
169
169
  sunholo/vertex/safety.py,sha256=S9PgQT1O_BQAkcqauWncRJaydiP8Q_Jzmu9gxYfy1VA,2482
170
170
  sunholo/vertex/type_dict_to_json.py,sha256=uTzL4o9tJRao4u-gJOFcACgWGkBOtqACmb6ihvCErL8,4694
171
- sunholo-0.140.9.dist-info/licenses/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
172
- sunholo-0.140.9.dist-info/METADATA,sha256=PrTJywV40Kp5vGdiwPaLEzu2pujsY0DuoJSKuhis0MA,10067
173
- sunholo-0.140.9.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
174
- sunholo-0.140.9.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
175
- sunholo-0.140.9.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
176
- sunholo-0.140.9.dist-info/RECORD,,
171
+ sunholo-0.140.11.dist-info/licenses/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
172
+ sunholo-0.140.11.dist-info/METADATA,sha256=5dpzFz8d_yiaY_tXGWGUsAxCHy2TPeQcHcooeZzbIcE,10068
173
+ sunholo-0.140.11.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
174
+ sunholo-0.140.11.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
175
+ sunholo-0.140.11.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
176
+ sunholo-0.140.11.dist-info/RECORD,,