sunholo 0.140.6__py3-none-any.whl → 0.140.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,12 +11,38 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
+
15
+ from ...custom_logging import log
16
+ import time
17
+
14
18
  def create_app(name):
15
- from flask import Flask
19
+ from flask import Flask, request
16
20
 
17
21
  app = Flask(name)
18
22
 
19
23
  app.config['TRAP_HTTP_EXCEPTIONS'] = True
20
24
  app.config['PROPAGATE_EXCEPTIONS'] = True
21
25
 
26
+ @app.before_request
27
+ def start_timer():
28
+ request.start_time = time.time()
29
+
30
+ @app.after_request
31
+ def log_timing(response):
32
+ if hasattr(request, 'start_time'):
33
+ duration = time.time() - request.start_time
34
+
35
+ # Log all VAC requests with different detail levels
36
+ if request.path.startswith('/vac/streaming/'):
37
+ log.info(f"🚀 STREAMING: {duration:.3f}s - {request.path}")
38
+ elif request.path.startswith('/vac/'):
39
+ log.info(f"⚡ VAC: {duration:.3f}s - {request.path}")
40
+ elif duration > 1.0: # Log any slow requests
41
+ log.warning(f"🐌 SLOW REQUEST: {duration:.3f}s - {request.path}")
42
+
43
+ # Add response headers with timing info for debugging
44
+ response.headers['X-Response-Time'] = f"{duration:.3f}s"
45
+
46
+ return response
47
+
22
48
  return app
@@ -6,6 +6,10 @@ import random
6
6
  from functools import partial
7
7
  import inspect
8
8
  import asyncio
9
+ import time
10
+ import threading
11
+ from functools import lru_cache
12
+ from concurrent.futures import ThreadPoolExecutor
9
13
 
10
14
  from ..chat_history import extract_chat_history_with_cache, extract_chat_history_async_cached
11
15
  from ...qna.parsers import parse_output
@@ -32,6 +36,11 @@ except ImportError:
32
36
  # Cache dictionary to store validated API keys
33
37
  api_key_cache = {}
34
38
  cache_duration = timedelta(minutes=5) # Cache duration
39
+ # Global caches and thread pool
40
+ _config_cache = {}
41
+ _config_lock = threading.Lock()
42
+ _thread_pool = ThreadPoolExecutor(max_workers=4)
43
+
35
44
 
36
45
  class VACRoutes:
37
46
  """
@@ -69,8 +78,44 @@ if __name__ == "__main__":
69
78
  self.additional_routes = additional_routes if additional_routes is not None else []
70
79
  self.async_stream = async_stream
71
80
  self.add_langfuse_eval = add_langfuse_eval
81
+
82
+ # Pre-warm common configs
83
+ self._preload_common_configs()
84
+
72
85
  self.register_routes()
86
+
87
+ def _preload_common_configs(self):
88
+ """Pre-load commonly used configurations to cache"""
89
+ common_vector_names = ["aitana3"] # Add your common vector names
90
+ for vector_name in common_vector_names:
91
+ try:
92
+ self._get_cached_config(vector_name)
93
+ log.info(f"Pre-loaded config for {vector_name}")
94
+ except Exception as e:
95
+ log.warning(f"Failed to pre-load config for {vector_name}: {e}")
96
+
97
+ def _get_cached_config(self, vector_name: str):
98
+ """Cached config loader with thread safety - CORRECTED VERSION"""
99
+ # Check cache first (without lock for read)
100
+ if vector_name in _config_cache:
101
+ log.debug(f"Using cached config for {vector_name}")
102
+ return _config_cache[vector_name]
73
103
 
104
+ # Need to load config
105
+ with _config_lock:
106
+ # Double-check inside lock (another thread might have loaded it)
107
+ if vector_name in _config_cache:
108
+ return _config_cache[vector_name]
109
+
110
+ try:
111
+ log.info(f"Loading fresh config for {vector_name}")
112
+ config = ConfigManager(vector_name)
113
+ _config_cache[vector_name] = config
114
+ log.info(f"Cached config for {vector_name}")
115
+ return config
116
+ except Exception as e:
117
+ log.error(f"Error loading config for {vector_name}: {e}")
118
+ raise
74
119
 
75
120
  def vac_interpreter_default(self, question: str, vector_name: str, chat_history=[], **kwargs):
76
121
  # Create a callback that does nothing for streaming if you don't want intermediate outputs
@@ -228,22 +273,43 @@ if __name__ == "__main__":
228
273
 
229
274
  log.info(f"OpenAI response: {openai_response}")
230
275
  return jsonify(openai_response)
231
-
276
+
277
+ def _finalize_trace_background(self, trace, span, response, all_input):
278
+ """Finalize trace operations in background"""
279
+ try:
280
+ if span:
281
+ span.end(output=str(response))
282
+ if trace:
283
+ trace.update(output=str(response))
284
+ self.langfuse_eval_response(trace_id=trace.id, eval_percent=all_input.get('eval_percent'))
285
+ except Exception as e:
286
+ log.warning(f"Background trace finalization failed: {e}")
287
+
232
288
  def handle_stream_vac(self, vector_name):
289
+ request_start = time.time()
233
290
  observed_stream_interpreter = self.stream_interpreter
234
291
  is_async = inspect.iscoroutinefunction(self.stream_interpreter)
235
292
 
236
293
  if is_async:
237
294
  log.info(f"Stream interpreter is async: {observed_stream_interpreter}")
238
295
 
239
- prep = self.prep_vac(request, vector_name)
240
- log.info(f"Processing prep: {prep}")
241
- trace = prep["trace"]
242
- span = prep["span"]
296
+ # Call prep_vac and handle errors properly
297
+ try:
298
+ prep = self.prep_vac(request, vector_name)
299
+ except Exception as e:
300
+ log.error(f"prep_vac failed: {e}")
301
+ error_response = {'error': f'Prep error: {str(e)}'}
302
+ return jsonify(error_response), 500
303
+
304
+ log.info(f"Processing prep completed in {time.time() - request_start:.3f}s")
305
+
306
+ trace = prep.get("trace")
307
+ span = prep.get("span")
243
308
  vac_config = prep["vac_config"]
244
309
  all_input = prep["all_input"]
245
310
 
246
- log.info(f'Streaming data with: {all_input}')
311
+ log.info(f'Starting stream with: {all_input["user_input"][:100]}...')
312
+
247
313
  if span:
248
314
  span.update(
249
315
  name="start_streaming_chat",
@@ -254,7 +320,7 @@ if __name__ == "__main__":
254
320
  def generate_response_content():
255
321
  try:
256
322
  if is_async:
257
- from queue import Queue, Empty
323
+ from queue import Queue
258
324
  result_queue = Queue()
259
325
  import threading
260
326
 
@@ -271,7 +337,7 @@ if __name__ == "__main__":
271
337
  trace_id=trace.id if trace else None,
272
338
  **all_input["kwargs"]
273
339
  )
274
- log.info(f"{async_gen=}")
340
+
275
341
  async for chunk in async_gen:
276
342
  if isinstance(chunk, dict) and 'answer' in chunk:
277
343
  if trace:
@@ -284,9 +350,12 @@ if __name__ == "__main__":
284
350
  else:
285
351
  result_queue.put(chunk)
286
352
  except Exception as e:
287
- result_queue.put(f"Streaming Error: {str(e)} {traceback.format_exc()}")
353
+ error_msg = f"Streaming Error: {str(e)} {traceback.format_exc()}"
354
+ log.error(error_msg)
355
+ result_queue.put(error_msg)
288
356
  finally:
289
357
  result_queue.put(None) # Sentinel
358
+
290
359
  asyncio.run(process_async())
291
360
 
292
361
  thread = threading.Thread(target=run_async)
@@ -301,7 +370,7 @@ if __name__ == "__main__":
301
370
 
302
371
  thread.join()
303
372
  else:
304
- log.info("sync streaming response")
373
+ log.info("Starting sync streaming response")
305
374
  for chunk in start_streaming_chat(
306
375
  question=all_input["user_input"],
307
376
  vector_name=vector_name,
@@ -325,17 +394,19 @@ if __name__ == "__main__":
325
394
  yield chunk
326
395
 
327
396
  except Exception as e:
328
- yield f"Streaming Error: {str(e)} {traceback.format_exc()}"
397
+ error_msg = f"Streaming Error: {str(e)} {traceback.format_exc()}"
398
+ log.error(error_msg)
399
+ yield error_msg
329
400
 
330
- # Here, the generator function will handle streaming the content to the client.
401
+ # Create streaming response
331
402
  response = Response(generate_response_content(), content_type='text/plain; charset=utf-8')
332
403
  response.headers['Transfer-Encoding'] = 'chunked'
333
404
 
334
- log.debug(f"streaming response: {response}")
405
+ log.info(f"Streaming response created in {time.time() - request_start:.3f}s")
406
+
407
+ # Do final trace operations in background (don't block the response)
335
408
  if trace:
336
- span.end(output=response)
337
- trace.update(output=response)
338
- self.langfuse_eval_response(trace_id=trace.id, eval_percent=all_input.get('eval_percent'))
409
+ _thread_pool.submit(self._finalize_trace_background, trace, span, response, all_input)
339
410
 
340
411
  return response
341
412
 
@@ -654,78 +725,127 @@ if __name__ == "__main__":
654
725
  tags = tags,
655
726
  release = package_version
656
727
  )
728
+
729
+ def _create_langfuse_trace_background(self, request, vector_name, trace_id):
730
+ """Create Langfuse trace in background"""
731
+ try:
732
+ return self.create_langfuse_trace(request, vector_name, trace_id)
733
+ except Exception as e:
734
+ log.warning(f"Background trace creation failed: {e}")
735
+ return None
657
736
 
737
+ def _handle_file_upload_background(self, file, vector_name):
738
+ """Handle file upload in background thread"""
739
+ try:
740
+ # Save with timestamp to avoid conflicts
741
+ temp_filename = f"temp_{int(time.time() * 1000)}_{file.filename}"
742
+ file.save(temp_filename)
743
+
744
+ # Upload to GCS
745
+ image_uri = add_file_to_gcs(temp_filename, vector_name)
746
+
747
+ # Clean up
748
+ os.remove(temp_filename)
749
+
750
+ return {"image_uri": image_uri, "mime": file.mimetype}
751
+ except Exception as e:
752
+ log.error(f"Background file upload failed: {e}")
753
+ return {}
754
+
658
755
  def prep_vac(self, request, vector_name):
659
-
756
+ start_time = time.time()
757
+
758
+ # Fast request parsing - KEEP ORIGINAL ERROR HANDLING STYLE
660
759
  if request.content_type.startswith('application/json'):
661
760
  data = request.get_json()
662
761
  elif request.content_type.startswith('multipart/form-data'):
663
762
  data = request.form.to_dict()
763
+ # Handle file upload in background if present
664
764
  if 'file' in request.files:
665
765
  file = request.files['file']
666
766
  if file.filename != '':
667
- log.info(f"Found file: {file.filename} to upload to GCS")
668
- try:
669
- image_uri, mime_type = self.handle_file_upload(file, vector_name)
670
- data["image_uri"] = image_uri
671
- data["mime"] = mime_type
672
- except Exception as e:
673
- log.error(traceback.format_exc())
674
- return jsonify({'error': str(e), 'traceback': traceback.format_exc()}), 500
675
- else:
676
- log.error("No file selected")
677
- return jsonify({"error": "No file selected"}), 400
767
+ log.info(f"Found file: {file.filename} - uploading in background")
768
+ # Start file upload in background, don't block
769
+ upload_future = _thread_pool.submit(self._handle_file_upload_background, file, vector_name)
770
+ data["_upload_future"] = upload_future
678
771
  else:
679
- return jsonify({"error": "Unsupported content type"}), 400
772
+ # KEEP ORIGINAL STYLE - return the error response directly
773
+ raise ValueError("Unsupported content type")
680
774
 
681
- log.info(f"vac/{vector_name} got data: {data}")
775
+ log.info(f"vac/{vector_name} got data keys: {list(data.keys())}")
682
776
 
683
- trace = None
684
- span = None
685
- if self.add_langfuse_eval:
686
- trace_id = data.get('trace_id')
687
- trace = self.create_langfuse_trace(request, vector_name, trace_id)
688
- log.info(f"Using existing langfuse trace: {trace_id}")
689
-
690
- #config, _ = load_config("config/llm_config.yaml")
777
+ # Get config from cache first (before processing other data)
691
778
  try:
692
- vac_config = ConfigManager(vector_name)
779
+ vac_config = self._get_cached_config(vector_name)
693
780
  except Exception as e:
694
781
  raise ValueError(f"Unable to find vac_config for {vector_name} - {str(e)}")
695
782
 
696
- if trace:
697
- this_vac_config = vac_config.configs_by_kind.get("vacConfig")
698
- metadata_config=None
699
- if this_vac_config:
700
- metadata_config = this_vac_config.get(vector_name)
701
-
702
- trace.update(input=data, metadata=metadata_config)
783
+ # Initialize trace variables
784
+ trace = None
785
+ span = None
786
+ if self.add_langfuse_eval:
787
+ trace_id = data.get('trace_id')
788
+ # Create trace in background - don't block
789
+ trace_future = _thread_pool.submit(self._create_langfuse_trace_background, request, vector_name, trace_id)
703
790
 
791
+ # Extract data (keep original logic)
704
792
  user_input = data.pop('user_input').strip()
705
793
  stream_wait_time = data.pop('stream_wait_time', 7)
706
794
  stream_timeout = data.pop('stream_timeout', 120)
707
795
  chat_history = data.pop('chat_history', None)
708
796
  eval_percent = data.pop('eval_percent', 0.01)
709
- vector_name = data.pop('vector_name', vector_name)
710
- data.pop('trace_id', None) # to ensure not in kwargs
797
+ vector_name_param = data.pop('vector_name', vector_name)
798
+ data.pop('trace_id', None) # to ensure not in kwargs
711
799
 
800
+ # Process chat history with caching
712
801
  paired_messages = extract_chat_history_with_cache(chat_history)
713
802
 
714
- all_input = {'user_input': user_input,
715
- 'vector_name': vector_name,
716
- 'chat_history': paired_messages,
717
- 'stream_wait_time': stream_wait_time,
718
- 'stream_timeout': stream_timeout,
719
- 'eval_percent': eval_percent,
720
- 'kwargs': data}
803
+ # Wait for file upload if it was started (with timeout)
804
+ if "_upload_future" in data:
805
+ try:
806
+ upload_result = data["_upload_future"].result(timeout=3.0) # 3 sec max wait
807
+ data.update(upload_result)
808
+ log.info(f"File upload completed: {upload_result.get('image_uri', 'no uri')}")
809
+ except Exception as e:
810
+ log.warning(f"File upload failed or timed out: {e}")
811
+ finally:
812
+ data.pop("_upload_future", None)
813
+
814
+ # Build final input
815
+ all_input = {
816
+ 'user_input': user_input,
817
+ 'vector_name': vector_name_param,
818
+ 'chat_history': paired_messages,
819
+ 'stream_wait_time': stream_wait_time,
820
+ 'stream_timeout': stream_timeout,
821
+ 'eval_percent': eval_percent,
822
+ 'kwargs': data
823
+ }
824
+
825
+ # Try to get trace result if available (don't block long)
826
+ if self.add_langfuse_eval:
827
+ try:
828
+ trace = trace_future.result(timeout=0.1) # Very short timeout
829
+ if trace:
830
+ this_vac_config = vac_config.configs_by_kind.get("vacConfig")
831
+ metadata_config = None
832
+ if this_vac_config:
833
+ metadata_config = this_vac_config.get(vector_name)
834
+ trace.update(input=data, metadata=metadata_config)
835
+
836
+ span = trace.span(
837
+ name="VAC",
838
+ metadata=vac_config.configs_by_kind,
839
+ input=all_input
840
+ )
841
+ except Exception as e:
842
+ log.warning(f"Langfuse trace creation timed out or failed: {e}")
843
+ trace = None
844
+ span = None
845
+
846
+ prep_time = time.time() - start_time
847
+ log.info(f"prep_vac completed in {prep_time:.3f}s")
721
848
 
722
- if trace:
723
- span = trace.span(
724
- name="VAC",
725
- metadata=vac_config.configs_by_kind,
726
- input = all_input
727
- )
728
-
729
849
  return {
730
850
  "trace": trace,
731
851
  "span": span,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sunholo
3
- Version: 0.140.6
3
+ Version: 0.140.8
4
4
  Summary: AI DevOps - a package to help deploy GenAI to the Cloud.
5
5
  Author-email: Holosun ApS <multivac@sunholo.com>
6
6
  License: Apache License, Version 2.0
@@ -13,8 +13,8 @@ sunholo/agents/fastapi/__init__.py,sha256=S_pj4_bTUmDGoq_exaREHlOKThi0zTuGT0VZY0
13
13
  sunholo/agents/fastapi/base.py,sha256=W-cyF8ZDUH40rc-c-Apw3-_8IIi2e4Y9qRtnoVnsc1Q,2521
14
14
  sunholo/agents/fastapi/qna_routes.py,sha256=lKHkXPmwltu9EH3RMwmD153-J6pE7kWQ4BhBlV3to-s,3864
15
15
  sunholo/agents/flask/__init__.py,sha256=dEoByI3gDNUOjpX1uVKP7uPjhfFHJubbiaAv3xLopnk,63
16
- sunholo/agents/flask/base.py,sha256=HLz3Z5efWaewTwSFEM6JH48NA9otoJBoVFJlARGk9L8,788
17
- sunholo/agents/flask/vac_routes.py,sha256=al4-k-QNKH5bX9Ai8FP7DC1R7yomSO3Lnq_cugnUHcw,32622
16
+ sunholo/agents/flask/base.py,sha256=vnpxFEOnCmt9humqj-jYPLfJcdwzsop9NorgkJ-tSaU,1756
17
+ sunholo/agents/flask/vac_routes.py,sha256=YOW64HaRYa0MfMnzwbx2s9IrU6lz-CeqpcfmIo_L3ho,37664
18
18
  sunholo/archive/__init__.py,sha256=qNHWm5rGPVOlxZBZCpA1wTYPbalizRT7f8X4rs2t290,31
19
19
  sunholo/archive/archive.py,sha256=PxVfDtO2_2ZEEbnhXSCbXLdeoHoQVImo4y3Jr2XkCFY,1204
20
20
  sunholo/auth/__init__.py,sha256=TeP-OY0XGxYV_8AQcVGoh35bvyWhNUcMRfhuD5l44Sk,91
@@ -168,9 +168,9 @@ sunholo/vertex/init.py,sha256=1OQwcPBKZYBTDPdyU7IM4X4OmiXLdsNV30C-fee2scQ,2875
168
168
  sunholo/vertex/memory_tools.py,sha256=tBZxqVZ4InTmdBvLlOYwoSEWu4-kGquc-gxDwZCC4FA,7667
169
169
  sunholo/vertex/safety.py,sha256=S9PgQT1O_BQAkcqauWncRJaydiP8Q_Jzmu9gxYfy1VA,2482
170
170
  sunholo/vertex/type_dict_to_json.py,sha256=uTzL4o9tJRao4u-gJOFcACgWGkBOtqACmb6ihvCErL8,4694
171
- sunholo-0.140.6.dist-info/licenses/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
172
- sunholo-0.140.6.dist-info/METADATA,sha256=J62v0HZ3NpRqt-zt0jpcA-KgGXyb5aEQyPGt6D4W-B8,10067
173
- sunholo-0.140.6.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
174
- sunholo-0.140.6.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
175
- sunholo-0.140.6.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
176
- sunholo-0.140.6.dist-info/RECORD,,
171
+ sunholo-0.140.8.dist-info/licenses/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
172
+ sunholo-0.140.8.dist-info/METADATA,sha256=30zPLVCgeU87lsGIdFxyaAOFvYDuC-EOayXWNgophxI,10067
173
+ sunholo-0.140.8.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
174
+ sunholo-0.140.8.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
175
+ sunholo-0.140.8.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
176
+ sunholo-0.140.8.dist-info/RECORD,,