PyPI - sunholo - Versions diffs - 0.140.6__py3-none-any.whl → 0.140.8__py3-none-any.whl - Mend

sunholo 0.140.6py3-none-any.whl → 0.140.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

sunholo/agents/flask/base.py CHANGED Viewed

@@ -11,12 +11,38 @@
 #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
+from ...custom_logging import log
+import time
 def create_app(name):
-    from flask import Flask
+    from flask import Flask, request
     app = Flask(name)
     app.config['TRAP_HTTP_EXCEPTIONS'] = True
     app.config['PROPAGATE_EXCEPTIONS'] = True
+    @app.before_request
+    def start_timer():
+        request.start_time = time.time()
+    @app.after_request
+    def log_timing(response):
+        if hasattr(request, 'start_time'):
+            duration = time.time() - request.start_time
+            # Log all VAC requests with different detail levels
+            if request.path.startswith('/vac/streaming/'):
+                log.info(f"🚀 STREAMING: {duration:.3f}s - {request.path}")
+            elif request.path.startswith('/vac/'):
+                log.info(f"⚡ VAC: {duration:.3f}s - {request.path}")
+            elif duration > 1.0:  # Log any slow requests
+                log.warning(f"🐌 SLOW REQUEST: {duration:.3f}s - {request.path}")
+            # Add response headers with timing info for debugging
+            response.headers['X-Response-Time'] = f"{duration:.3f}s"
+        return response
     return app

sunholo/agents/flask/vac_routes.py CHANGED Viewed

@@ -6,6 +6,10 @@ import random
 from functools import partial
 import inspect
 import asyncio
+import time
+import threading
+from functools import lru_cache
+from concurrent.futures import ThreadPoolExecutor
 from ..chat_history import extract_chat_history_with_cache, extract_chat_history_async_cached
 from ...qna.parsers import parse_output
@@ -32,6 +36,11 @@ except ImportError:
 # Cache dictionary to store validated API keys
 api_key_cache = {}
 cache_duration = timedelta(minutes=5)  # Cache duration
+# Global caches and thread pool
+_config_cache = {}
+_config_lock = threading.Lock()
+_thread_pool = ThreadPoolExecutor(max_workers=4)
 class VACRoutes:
     """
@@ -69,8 +78,44 @@ if __name__ == "__main__":
         self.additional_routes = additional_routes if additional_routes is not None else []
         self.async_stream = async_stream
         self.add_langfuse_eval = add_langfuse_eval
+        # Pre-warm common configs
+        self._preload_common_configs()
         self.register_routes()
+    def _preload_common_configs(self):
+        """Pre-load commonly used configurations to cache"""
+        common_vector_names = ["aitana3"]  # Add your common vector names
+        for vector_name in common_vector_names:
+            try:
+                self._get_cached_config(vector_name)
+                log.info(f"Pre-loaded config for {vector_name}")
+            except Exception as e:
+                log.warning(f"Failed to pre-load config for {vector_name}: {e}")
+    def _get_cached_config(self, vector_name: str):
+        """Cached config loader with thread safety - CORRECTED VERSION"""
+        # Check cache first (without lock for read)
+        if vector_name in _config_cache:
+            log.debug(f"Using cached config for {vector_name}")
+            return _config_cache[vector_name]
+        # Need to load config
+        with _config_lock:
+            # Double-check inside lock (another thread might have loaded it)
+            if vector_name in _config_cache:
+                return _config_cache[vector_name]
+            try:
+                log.info(f"Loading fresh config for {vector_name}")
+                config = ConfigManager(vector_name)
+                _config_cache[vector_name] = config
+                log.info(f"Cached config for {vector_name}")
+                return config
+            except Exception as e:
+                log.error(f"Error loading config for {vector_name}: {e}")
+                raise
     def vac_interpreter_default(self, question: str, vector_name: str, chat_history=[], **kwargs):
         # Create a callback that does nothing for streaming if you don't want intermediate outputs
@@ -228,22 +273,43 @@ if __name__ == "__main__":
         log.info(f"OpenAI response: {openai_response}")
         return jsonify(openai_response)
+    def _finalize_trace_background(self, trace, span, response, all_input):
+        """Finalize trace operations in background"""
+        try:
+            if span:
+                span.end(output=str(response))
+            if trace:
+                trace.update(output=str(response))
+                self.langfuse_eval_response(trace_id=trace.id, eval_percent=all_input.get('eval_percent'))
+        except Exception as e:
+            log.warning(f"Background trace finalization failed: {e}")
     def handle_stream_vac(self, vector_name):
+        request_start = time.time()
         observed_stream_interpreter = self.stream_interpreter
         is_async = inspect.iscoroutinefunction(self.stream_interpreter)
         if is_async:
             log.info(f"Stream interpreter is async: {observed_stream_interpreter}")
-        prep = self.prep_vac(request, vector_name)
-        log.info(f"Processing prep: {prep}")
-        trace = prep["trace"]
-        span = prep["span"]
+        # Call prep_vac and handle errors properly
+        try:
+            prep = self.prep_vac(request, vector_name)
+        except Exception as e:
+            log.error(f"prep_vac failed: {e}")
+            error_response = {'error': f'Prep error: {str(e)}'}
+            return jsonify(error_response), 500
+        log.info(f"Processing prep completed in {time.time() - request_start:.3f}s")
+        trace = prep.get("trace")
+        span = prep.get("span")
         vac_config = prep["vac_config"]
         all_input = prep["all_input"]
-        log.info(f'Streaming data with: {all_input}')
+        log.info(f'Starting stream with: {all_input["user_input"][:100]}...')
         if span:
             span.update(
                 name="start_streaming_chat",
@@ -254,7 +320,7 @@ if __name__ == "__main__":
         def generate_response_content():
             try:
                 if is_async:
-                    from queue import Queue, Empty
+                    from queue import Queue
                     result_queue = Queue()
                     import threading
@@ -271,7 +337,7 @@ if __name__ == "__main__":
                                     trace_id=trace.id if trace else None,
                                     **all_input["kwargs"]
                                 )
-                                log.info(f"{async_gen=}")
                                 async for chunk in async_gen:
                                     if isinstance(chunk, dict) and 'answer' in chunk:
                                         if trace:
@@ -284,9 +350,12 @@ if __name__ == "__main__":
                                     else:
                                         result_queue.put(chunk)
                             except Exception as e:
-                                result_queue.put(f"Streaming Error: {str(e)} {traceback.format_exc()}")
+                                error_msg = f"Streaming Error: {str(e)} {traceback.format_exc()}"
+                                log.error(error_msg)
+                                result_queue.put(error_msg)
                             finally:
                                 result_queue.put(None)  # Sentinel
                         asyncio.run(process_async())
                     thread = threading.Thread(target=run_async)
@@ -301,7 +370,7 @@ if __name__ == "__main__":
                     thread.join()
                 else:
-                    log.info("sync streaming response")
+                    log.info("Starting sync streaming response")
                     for chunk in start_streaming_chat(
                         question=all_input["user_input"],
                         vector_name=vector_name,
@@ -325,17 +394,19 @@ if __name__ == "__main__":
                             yield chunk
             except Exception as e:
-                yield f"Streaming Error: {str(e)} {traceback.format_exc()}"
+                error_msg = f"Streaming Error: {str(e)} {traceback.format_exc()}"
+                log.error(error_msg)
+                yield error_msg
-        # Here, the generator function will handle streaming the content to the client.
+        # Create streaming response
         response = Response(generate_response_content(), content_type='text/plain; charset=utf-8')
         response.headers['Transfer-Encoding'] = 'chunked'
-        log.debug(f"streaming response: {response}")
+        log.info(f"Streaming response created in {time.time() - request_start:.3f}s")
+        # Do final trace operations in background (don't block the response)
         if trace:
-            span.end(output=response)
-            trace.update(output=response)
-            self.langfuse_eval_response(trace_id=trace.id, eval_percent=all_input.get('eval_percent'))
+            _thread_pool.submit(self._finalize_trace_background, trace, span, response, all_input)
         return response
@@ -654,78 +725,127 @@ if __name__ == "__main__":
             tags = tags,
             release = package_version
         )
+    def _create_langfuse_trace_background(self, request, vector_name, trace_id):
+        """Create Langfuse trace in background"""
+        try:
+            return self.create_langfuse_trace(request, vector_name, trace_id)
+        except Exception as e:
+            log.warning(f"Background trace creation failed: {e}")
+            return None
+    def _handle_file_upload_background(self, file, vector_name):
+        """Handle file upload in background thread"""
+        try:
+            # Save with timestamp to avoid conflicts
+            temp_filename = f"temp_{int(time.time() * 1000)}_{file.filename}"
+            file.save(temp_filename)
+            # Upload to GCS
+            image_uri = add_file_to_gcs(temp_filename, vector_name)
+            # Clean up
+            os.remove(temp_filename)
+            return {"image_uri": image_uri, "mime": file.mimetype}
+        except Exception as e:
+            log.error(f"Background file upload failed: {e}")
+            return {}
     def prep_vac(self, request, vector_name):
+        start_time = time.time()
+        # Fast request parsing - KEEP ORIGINAL ERROR HANDLING STYLE
         if request.content_type.startswith('application/json'):
             data = request.get_json()
         elif request.content_type.startswith('multipart/form-data'):
             data = request.form.to_dict()
+            # Handle file upload in background if present
             if 'file' in request.files:
                 file = request.files['file']
                 if file.filename != '':
-                    log.info(f"Found file: {file.filename} to upload to GCS")
-                    try:
-                        image_uri, mime_type = self.handle_file_upload(file, vector_name)
-                        data["image_uri"] = image_uri
-                        data["mime"] = mime_type
-                    except Exception as e:
-                        log.error(traceback.format_exc())
-                        return jsonify({'error': str(e), 'traceback': traceback.format_exc()}), 500
-                else:
-                    log.error("No file selected")
-                    return jsonify({"error": "No file selected"}), 400
+                    log.info(f"Found file: {file.filename} - uploading in background")
+                    # Start file upload in background, don't block
+                    upload_future = _thread_pool.submit(self._handle_file_upload_background, file, vector_name)
+                    data["_upload_future"] = upload_future
         else:
-            return jsonify({"error": "Unsupported content type"}), 400
+            # KEEP ORIGINAL STYLE - return the error response directly
+            raise ValueError("Unsupported content type")
-        log.info(f"vac/{vector_name} got data: {data}")
+        log.info(f"vac/{vector_name} got data keys: {list(data.keys())}")
-        trace = None
-        span = None
-        if self.add_langfuse_eval:
-            trace_id = data.get('trace_id')
-            trace = self.create_langfuse_trace(request, vector_name, trace_id)
-            log.info(f"Using existing langfuse trace: {trace_id}")
-        #config, _ = load_config("config/llm_config.yaml")
+        # Get config from cache first (before processing other data)
         try:
-            vac_config = ConfigManager(vector_name)
+            vac_config = self._get_cached_config(vector_name)
         except Exception as e:
             raise ValueError(f"Unable to find vac_config for {vector_name} - {str(e)}")
-        if trace:
-            this_vac_config = vac_config.configs_by_kind.get("vacConfig")
-            metadata_config=None
-            if this_vac_config:
-                metadata_config = this_vac_config.get(vector_name)
-            trace.update(input=data, metadata=metadata_config)
+        # Initialize trace variables
+        trace = None
+        span = None
+        if self.add_langfuse_eval:
+            trace_id = data.get('trace_id')
+            # Create trace in background - don't block
+            trace_future = _thread_pool.submit(self._create_langfuse_trace_background, request, vector_name, trace_id)
+        # Extract data (keep original logic)
         user_input = data.pop('user_input').strip()
         stream_wait_time = data.pop('stream_wait_time', 7)
         stream_timeout = data.pop('stream_timeout', 120)
         chat_history = data.pop('chat_history', None)
         eval_percent = data.pop('eval_percent', 0.01)
-        vector_name = data.pop('vector_name', vector_name)
-        data.pop('trace_id', None) # to ensure not in kwargs
+        vector_name_param = data.pop('vector_name', vector_name)
+        data.pop('trace_id', None)  # to ensure not in kwargs
+        # Process chat history with caching
         paired_messages = extract_chat_history_with_cache(chat_history)
-        all_input = {'user_input': user_input,
-                     'vector_name': vector_name,
-                     'chat_history': paired_messages,
-                     'stream_wait_time': stream_wait_time,
-                     'stream_timeout': stream_timeout,
-                     'eval_percent': eval_percent,
-                     'kwargs': data}
+        # Wait for file upload if it was started (with timeout)
+        if "_upload_future" in data:
+            try:
+                upload_result = data["_upload_future"].result(timeout=3.0)  # 3 sec max wait
+                data.update(upload_result)
+                log.info(f"File upload completed: {upload_result.get('image_uri', 'no uri')}")
+            except Exception as e:
+                log.warning(f"File upload failed or timed out: {e}")
+            finally:
+                data.pop("_upload_future", None)
+        # Build final input
+        all_input = {
+            'user_input': user_input,
+            'vector_name': vector_name_param,
+            'chat_history': paired_messages,
+            'stream_wait_time': stream_wait_time,
+            'stream_timeout': stream_timeout,
+            'eval_percent': eval_percent,
+            'kwargs': data
+        }
+        # Try to get trace result if available (don't block long)
+        if self.add_langfuse_eval:
+            try:
+                trace = trace_future.result(timeout=0.1)  # Very short timeout
+                if trace:
+                    this_vac_config = vac_config.configs_by_kind.get("vacConfig")
+                    metadata_config = None
+                    if this_vac_config:
+                        metadata_config = this_vac_config.get(vector_name)
+                    trace.update(input=data, metadata=metadata_config)
+                    span = trace.span(
+                        name="VAC",
+                        metadata=vac_config.configs_by_kind,
+                        input=all_input
+                    )
+            except Exception as e:
+                log.warning(f"Langfuse trace creation timed out or failed: {e}")
+                trace = None
+                span = None
+        prep_time = time.time() - start_time
+        log.info(f"prep_vac completed in {prep_time:.3f}s")
-        if trace:
-            span = trace.span(
-                name="VAC",
-                metadata=vac_config.configs_by_kind,
-                input = all_input
-            )
         return {
             "trace": trace,
             "span": span,

{sunholo-0.140.6.dist-info → sunholo-0.140.8.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: sunholo
-Version: 0.140.6
+Version: 0.140.8
 Summary: AI DevOps - a package to help deploy GenAI to the Cloud.
 Author-email: Holosun ApS <multivac@sunholo.com>
 License: Apache License, Version 2.0

{sunholo-0.140.6.dist-info → sunholo-0.140.8.dist-info}/RECORD RENAMED Viewed

@@ -13,8 +13,8 @@ sunholo/agents/fastapi/__init__.py,sha256=S_pj4_bTUmDGoq_exaREHlOKThi0zTuGT0VZY0
 sunholo/agents/fastapi/base.py,sha256=W-cyF8ZDUH40rc-c-Apw3-_8IIi2e4Y9qRtnoVnsc1Q,2521
 sunholo/agents/fastapi/qna_routes.py,sha256=lKHkXPmwltu9EH3RMwmD153-J6pE7kWQ4BhBlV3to-s,3864
 sunholo/agents/flask/__init__.py,sha256=dEoByI3gDNUOjpX1uVKP7uPjhfFHJubbiaAv3xLopnk,63
-sunholo/agents/flask/base.py,sha256=HLz3Z5efWaewTwSFEM6JH48NA9otoJBoVFJlARGk9L8,788
-sunholo/agents/flask/vac_routes.py,sha256=al4-k-QNKH5bX9Ai8FP7DC1R7yomSO3Lnq_cugnUHcw,32622
+sunholo/agents/flask/base.py,sha256=vnpxFEOnCmt9humqj-jYPLfJcdwzsop9NorgkJ-tSaU,1756
+sunholo/agents/flask/vac_routes.py,sha256=YOW64HaRYa0MfMnzwbx2s9IrU6lz-CeqpcfmIo_L3ho,37664
 sunholo/archive/__init__.py,sha256=qNHWm5rGPVOlxZBZCpA1wTYPbalizRT7f8X4rs2t290,31
 sunholo/archive/archive.py,sha256=PxVfDtO2_2ZEEbnhXSCbXLdeoHoQVImo4y3Jr2XkCFY,1204
 sunholo/auth/__init__.py,sha256=TeP-OY0XGxYV_8AQcVGoh35bvyWhNUcMRfhuD5l44Sk,91
@@ -168,9 +168,9 @@ sunholo/vertex/init.py,sha256=1OQwcPBKZYBTDPdyU7IM4X4OmiXLdsNV30C-fee2scQ,2875
 sunholo/vertex/memory_tools.py,sha256=tBZxqVZ4InTmdBvLlOYwoSEWu4-kGquc-gxDwZCC4FA,7667
 sunholo/vertex/safety.py,sha256=S9PgQT1O_BQAkcqauWncRJaydiP8Q_Jzmu9gxYfy1VA,2482
 sunholo/vertex/type_dict_to_json.py,sha256=uTzL4o9tJRao4u-gJOFcACgWGkBOtqACmb6ihvCErL8,4694
-sunholo-0.140.6.dist-info/licenses/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
-sunholo-0.140.6.dist-info/METADATA,sha256=J62v0HZ3NpRqt-zt0jpcA-KgGXyb5aEQyPGt6D4W-B8,10067
-sunholo-0.140.6.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
-sunholo-0.140.6.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
-sunholo-0.140.6.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
-sunholo-0.140.6.dist-info/RECORD,,
+sunholo-0.140.8.dist-info/licenses/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
+sunholo-0.140.8.dist-info/METADATA,sha256=30zPLVCgeU87lsGIdFxyaAOFvYDuC-EOayXWNgophxI,10067
+sunholo-0.140.8.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
+sunholo-0.140.8.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
+sunholo-0.140.8.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
+sunholo-0.140.8.dist-info/RECORD,,

{sunholo-0.140.6.dist-info → sunholo-0.140.8.dist-info}/WHEEL RENAMED Viewed

File without changes

{sunholo-0.140.6.dist-info → sunholo-0.140.8.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{sunholo-0.140.6.dist-info → sunholo-0.140.8.dist-info}/licenses/LICENSE.txt RENAMED Viewed

File without changes

{sunholo-0.140.6.dist-info → sunholo-0.140.8.dist-info}/top_level.txt RENAMED Viewed

File without changes

sunholo 0.140.6__py3-none-any.whl → 0.140.8__py3-none-any.whl

sunholo 0.140.6py3-none-any.whl → 0.140.8py3-none-any.whl