PyPI - sunholo - Versions diffs - 0.140.4__py3-none-any.whl → 0.140.6__py3-none-any.whl - Mend

sunholo 0.140.4py3-none-any.whl → 0.140.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

sunholo/agents/chat_history.py CHANGED Viewed

@@ -1,5 +1,282 @@
 import json
 from ..custom_logging import log
+import time
+import hashlib
+from functools import lru_cache
+from typing import List, Tuple, Optional
+class ChatHistoryCache:
+    """
+    Incremental cache for chat history processing.
+    Caches processed message pairs and only processes new messages
+    when the chat history is extended.
+    """
+    def __init__(self, max_cache_size: int = 1000):
+        self.cache = {}
+        self.max_cache_size = max_cache_size
+    def _get_cache_key(self, chat_history: List[dict]) -> str:
+        """Generate a cache key based on the chat history content."""
+        # Use the hash of the serialized chat history for the key
+        # Only hash the first few and last few messages to balance performance vs accuracy
+        if len(chat_history) <= 10:
+            content = str(chat_history)
+        else:
+            # Hash first 5 and last 5 messages + length
+            content = str(chat_history[:5] + chat_history[-5:] + [len(chat_history)])
+        return hashlib.md5(content.encode()).hexdigest()
+    def _find_cached_prefix(self, current_history: List[dict]) -> Tuple[Optional[List[Tuple]], int]:
+        """
+        Find the longest cached prefix of the current chat history.
+        Returns:
+            Tuple of (cached_pairs, cache_length) or (None, 0) if no cache found
+        """
+        current_length = len(current_history)
+        # Check for cached versions of prefixes, starting from longest
+        for cache_length in range(current_length - 1, 0, -1):
+            prefix = current_history[:cache_length]
+            cache_key = self._get_cache_key(prefix)
+            if cache_key in self.cache:
+                cached_data = self.cache[cache_key]
+                cached_pairs = cached_data['pairs']
+                # Verify the cache is still valid by checking a few messages
+                if self._verify_cache_validity(prefix, cached_data['original_history']):
+                    return cached_pairs, cache_length
+                else:
+                    # Cache is stale, remove it
+                    del self.cache[cache_key]
+        return None, 0
+    def _verify_cache_validity(self, current_prefix: List[dict], cached_prefix: List[dict]) -> bool:
+        """Quick verification that cached data is still valid."""
+        if len(current_prefix) != len(cached_prefix):
+            return False
+        # Check first and last few messages for equality
+        check_indices = [0, -1] if len(current_prefix) >= 2 else [0]
+        for i in check_indices:
+            if current_prefix[i] != cached_prefix[i]:
+                return False
+        return True
+    def extract_chat_history_incremental(self, chat_history: List[dict]) -> List[Tuple]:
+        """
+        Extract chat history with incremental caching.
+        Args:
+            chat_history: List of chat message dictionaries
+        Returns:
+            List of (human_message, ai_message) tuples
+        """
+        if not chat_history:
+            return []
+        # Try to find cached prefix
+        cached_pairs, cache_length = self._find_cached_prefix(chat_history)
+        if cached_pairs is not None:
+            log.debug(f"Found cached pairs for {cache_length} messages, processing {len(chat_history) - cache_length} new messages")
+            # Process only the new messages
+            new_messages = chat_history[cache_length:]
+            new_pairs = self._process_new_messages(new_messages, cached_pairs)
+            # Combine cached and new pairs
+            all_pairs = cached_pairs + new_pairs
+        else:
+            log.debug(f"No cache found, processing all {len(chat_history)} messages")
+            # Process all messages from scratch
+            all_pairs = self._extract_chat_history_full(chat_history)
+        # Cache the result
+        self._update_cache(chat_history, all_pairs)
+        return all_pairs
+    def _process_new_messages(self, new_messages: List[dict], cached_pairs: List[Tuple]) -> List[Tuple]:
+        """
+        Process only the new messages, considering the state from cached pairs.
+        Args:
+            new_messages: New messages to process
+            cached_pairs: Previously processed message pairs
+        Returns:
+            List of new message pairs
+        """
+        if not new_messages:
+            return []
+        new_pairs = []
+        # Determine if we're waiting for a bot response based on cached pairs
+        waiting_for_bot = True
+        if cached_pairs:
+            last_pair = cached_pairs[-1]
+            # If last pair has both human and AI message, we're ready for a new human message
+            waiting_for_bot = not (last_pair[0] and last_pair[1])
+        # If we ended with an unpaired human message, get it
+        last_human_message = ""
+        if cached_pairs and waiting_for_bot:
+            last_human_message = cached_pairs[-1][0]
+        # Process new messages
+        for message in new_messages:
+            try:
+                is_human_msg = is_human(message)
+                content = create_message_element(message)
+                if is_human_msg:
+                    last_human_message = content
+                    waiting_for_bot = True
+                else:  # Bot message
+                    if waiting_for_bot and last_human_message:
+                        new_pairs.append((last_human_message, content))
+                        last_human_message = ""
+                        waiting_for_bot = False
+                    # If not waiting for bot or no human message, this is an orphaned bot message
+            except (KeyError, TypeError) as e:
+                log.warning(f"Error processing new message: {e}")
+                continue
+        return new_pairs
+    def _extract_chat_history_full(self, chat_history: List[dict]) -> List[Tuple]:
+        """Full extraction when no cache is available."""
+        # Use the optimized version from before
+        paired_messages = []
+        # Handle initial bot message
+        start_idx = 0
+        if chat_history and is_bot(chat_history[0]):
+            try:
+                first_message = chat_history[0]
+                blank_element = ""
+                bot_element = create_message_element(first_message)
+                paired_messages.append((blank_element, bot_element))
+                start_idx = 1
+            except (KeyError, TypeError):
+                pass
+        # Process remaining messages
+        last_human_message = ""
+        for i in range(start_idx, len(chat_history)):
+            message = chat_history[i]
+            try:
+                is_human_msg = is_human(message)
+                content = create_message_element(message)
+                if is_human_msg:
+                    last_human_message = content
+                else:  # Bot message
+                    if last_human_message:
+                        paired_messages.append((last_human_message, content))
+                        last_human_message = ""
+            except (KeyError, TypeError) as e:
+                log.warning(f"Error processing message {i}: {e}")
+                continue
+        return paired_messages
+    def _update_cache(self, chat_history: List[dict], pairs: List[Tuple]):
+        """Update cache with new result."""
+        # Only cache if the history is of reasonable size
+        if len(chat_history) < 2:
+            return
+        cache_key = self._get_cache_key(chat_history)
+        # Implement simple LRU by removing oldest entries
+        if len(self.cache) >= self.max_cache_size:
+            # Remove 20% of oldest entries
+            remove_count = self.max_cache_size // 5
+            oldest_keys = list(self.cache.keys())[:remove_count]
+            for key in oldest_keys:
+                del self.cache[key]
+        self.cache[cache_key] = {
+            'pairs': pairs,
+            'original_history': chat_history.copy(),  # Store copy for validation
+            'timestamp': time.time()
+        }
+        log.debug(f"Cached {len(pairs)} pairs for history of length {len(chat_history)}")
+    def clear_cache(self):
+        """Clear the entire cache."""
+        self.cache.clear()
+        log.info("Chat history cache cleared")
+# Global cache instance
+_chat_history_cache = ChatHistoryCache()
+def extract_chat_history_with_cache(chat_history: List[dict] = None) -> List[Tuple]:
+    """
+    Main function to replace the original extract_chat_history.
+    Uses incremental caching for better performance with growing chat histories.
+    """
+    if not chat_history:
+        log.debug("No chat history found")
+        return []
+    return _chat_history_cache.extract_chat_history_incremental(chat_history)
+# Async version that wraps the cached version
+async def extract_chat_history_async_cached(chat_history: List[dict] = None) -> List[Tuple]:
+    """
+    Async version that uses the cache and runs in a thread pool if needed.
+    """
+    import asyncio
+    if not chat_history:
+        return []
+    # For very large histories, run in thread pool to avoid blocking
+    if len(chat_history) > 1000:
+        loop = asyncio.get_event_loop()
+        return await loop.run_in_executor(
+            None,
+            extract_chat_history_with_cache,
+            chat_history
+        )
+    else:
+        # For smaller histories, just run directly
+        return extract_chat_history_with_cache(chat_history)
+# Utility function to warm up the cache
+def warm_up_cache(chat_histories: List[List[dict]]):
+    """
+    Pre-populate cache with common chat histories.
+    Args:
+        chat_histories: List of chat history lists to cache
+    """
+    for history in chat_histories:
+        extract_chat_history_with_cache(history)
+    log.info(f"Warmed up cache with {len(chat_histories)} chat histories")
 async def extract_chat_history_async(chat_history=None):
@@ -243,3 +520,4 @@ def is_ai(message: dict):
         return message['role'] == 'assistant'
     else:
         return 'bot_id' in message  # Slack

sunholo/agents/flask/vac_routes.py CHANGED Viewed

@@ -7,8 +7,7 @@ from functools import partial
 import inspect
 import asyncio
-from ...agents import extract_chat_history, handle_special_commands
-from ..chat_history import extract_chat_history_async
+from ..chat_history import extract_chat_history_with_cache, extract_chat_history_async_cached
 from ...qna.parsers import parse_output
 from ...streaming import start_streaming_chat, start_streaming_chat_async
 from ...archive import archive_qa
@@ -58,12 +57,18 @@ if __name__ == "__main__":
 ```
     """
-    def __init__(self, app, stream_interpreter: callable, vac_interpreter:callable=None, additional_routes:dict=None, async_stream:bool=False):
+    def __init__(self, app,
+                 stream_interpreter: callable,
+                 vac_interpreter:callable=None,
+                 additional_routes:dict=None,
+                 async_stream:bool=False,
+                 add_langfuse_eval:bool=True):
         self.app = app
         self.stream_interpreter = stream_interpreter
         self.vac_interpreter = vac_interpreter or partial(self.vac_interpreter_default)
         self.additional_routes = additional_routes if additional_routes is not None else []
         self.async_stream = async_stream
+        self.add_langfuse_eval = add_langfuse_eval
         self.register_routes()
@@ -235,13 +240,9 @@ if __name__ == "__main__":
         log.info(f"Processing prep: {prep}")
         trace = prep["trace"]
         span = prep["span"]
-        command_response = prep["command_response"]
         vac_config = prep["vac_config"]
         all_input = prep["all_input"]
-        if command_response:
-            return jsonify(command_response)
         log.info(f'Streaming data with: {all_input}')
         if span:
             span.update(
@@ -426,13 +427,9 @@ if __name__ == "__main__":
         log.debug(f"Processing prep: {prep}")
         trace = prep["trace"]
         span = prep["span"]
-        command_response = prep["command_response"]
         vac_config: ConfigManager = prep["vac_config"]
         all_input = prep["all_input"]
-        if command_response:
-            return jsonify(command_response)
         try:
             if span:
                 gen = span.generation(
@@ -530,8 +527,6 @@ if __name__ == "__main__":
         image_uri = None
         mime_type = None
         for msg in reversed(messages):
             if msg['role'] == 'user':
                 if isinstance(msg['content'], list):
@@ -550,13 +545,6 @@ if __name__ == "__main__":
         else:
             log.info(f"User message: {user_message}")
-        paired_messages = extract_chat_history(chat_history)
-        command_response = handle_special_commands(user_message, vector_name, paired_messages)
-        if command_response is not None:
-            return self.make_openai_response(user_message, vector_name, command_response)
         if image_uri:
             data["image_uri"] = image_uri
             data["mime"] = mime_type
@@ -694,10 +682,10 @@ if __name__ == "__main__":
         trace = None
         span = None
-        trace_id = data.get('trace_id')
-        trace = self.create_langfuse_trace(request, vector_name, trace_id)
-        log.info(f"Using existing langfuse trace: {trace_id}")
+        if self.add_langfuse_eval:
+            trace_id = data.get('trace_id')
+            trace = self.create_langfuse_trace(request, vector_name, trace_id)
+            log.info(f"Using existing langfuse trace: {trace_id}")
         #config, _ = load_config("config/llm_config.yaml")
         try:
@@ -721,7 +709,7 @@ if __name__ == "__main__":
         vector_name = data.pop('vector_name', vector_name)
         data.pop('trace_id', None) # to ensure not in kwargs
-        paired_messages = extract_chat_history(chat_history)
+        paired_messages = extract_chat_history_with_cache(chat_history)
         all_input = {'user_input': user_input,
                      'vector_name': vector_name,
@@ -737,15 +725,10 @@ if __name__ == "__main__":
                 metadata=vac_config.configs_by_kind,
                 input = all_input
             )
-        command_response = handle_special_commands(user_input, vector_name, paired_messages)
-        if command_response is not None:
-            if trace:
-                trace.update(output=jsonify(command_response))
         return {
             "trace": trace,
             "span": span,
-            "command_response": command_response,
             "all_input": all_input,
             "vac_config": vac_config
         }
@@ -789,7 +772,7 @@ if __name__ == "__main__":
         data.pop('trace_id', None)  # to ensure not in kwargs
         # Task 3: Process chat history
-        chat_history_task = asyncio.create_task(extract_chat_history_async(chat_history))
+        chat_history_task = asyncio.create_task(extract_chat_history_async_cached(chat_history))
         tasks.append(chat_history_task)
         # Await all tasks concurrently

{sunholo-0.140.4.dist-info → sunholo-0.140.6.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: sunholo
-Version: 0.140.4
+Version: 0.140.6
 Summary: AI DevOps - a package to help deploy GenAI to the Cloud.
 Author-email: Holosun ApS <multivac@sunholo.com>
 License: Apache License, Version 2.0

{sunholo-0.140.4.dist-info → sunholo-0.140.6.dist-info}/RECORD RENAMED Viewed

@@ -2,7 +2,7 @@ sunholo/__init__.py,sha256=InRbX4V0-qdNHo9zYH3GEye7ASLR6LX8-SMvPV4Jsaw,1212
 sunholo/custom_logging.py,sha256=JXZTnXp_DixP3jwYfKw4LYRDS9IuTq7ctCgfZbI2rxA,22023
 sunholo/langchain_types.py,sha256=uZ4zvgej_f7pLqjtu4YP7qMC_eZD5ym_5x4pyvA1Ih4,1834
 sunholo/agents/__init__.py,sha256=AauG3l0y4r5Fzx1zJfZ634M4o-0o7B7J5T8k_gPvNqE,370
-sunholo/agents/chat_history.py,sha256=4jGCHBP8dZfUjSJPxgKyh6nOqhnHRn1x9U3CnGb0I5E,7624
+sunholo/agents/chat_history.py,sha256=e2NmiooaRUxKGr_aoU05rzhHi3VsKjbZZmzeDr2yJJE,17780
 sunholo/agents/dispatch_to_qa.py,sha256=NHihwAoCJ5_Lk11e_jZnucVUGQyZHCB-YpkfMHBCpQk,8882
 sunholo/agents/langserve.py,sha256=C46ph2mnygr6bdHijYWYyfQDI9ylAF0_9Kx2PfcCJpU,4414
 sunholo/agents/pubsub.py,sha256=TscZN_6am6DfaQkC-Yl18ZIBOoLE-0nDSiil6GpQEh4,1344
@@ -14,7 +14,7 @@ sunholo/agents/fastapi/base.py,sha256=W-cyF8ZDUH40rc-c-Apw3-_8IIi2e4Y9qRtnoVnsc1
 sunholo/agents/fastapi/qna_routes.py,sha256=lKHkXPmwltu9EH3RMwmD153-J6pE7kWQ4BhBlV3to-s,3864
 sunholo/agents/flask/__init__.py,sha256=dEoByI3gDNUOjpX1uVKP7uPjhfFHJubbiaAv3xLopnk,63
 sunholo/agents/flask/base.py,sha256=HLz3Z5efWaewTwSFEM6JH48NA9otoJBoVFJlARGk9L8,788
-sunholo/agents/flask/vac_routes.py,sha256=Dk9QrPvXNRzAWxaTWsYgHVxmK-Rjrvgd6-sAuvqt9P8,33236
+sunholo/agents/flask/vac_routes.py,sha256=al4-k-QNKH5bX9Ai8FP7DC1R7yomSO3Lnq_cugnUHcw,32622
 sunholo/archive/__init__.py,sha256=qNHWm5rGPVOlxZBZCpA1wTYPbalizRT7f8X4rs2t290,31
 sunholo/archive/archive.py,sha256=PxVfDtO2_2ZEEbnhXSCbXLdeoHoQVImo4y3Jr2XkCFY,1204
 sunholo/auth/__init__.py,sha256=TeP-OY0XGxYV_8AQcVGoh35bvyWhNUcMRfhuD5l44Sk,91
@@ -168,9 +168,9 @@ sunholo/vertex/init.py,sha256=1OQwcPBKZYBTDPdyU7IM4X4OmiXLdsNV30C-fee2scQ,2875
 sunholo/vertex/memory_tools.py,sha256=tBZxqVZ4InTmdBvLlOYwoSEWu4-kGquc-gxDwZCC4FA,7667
 sunholo/vertex/safety.py,sha256=S9PgQT1O_BQAkcqauWncRJaydiP8Q_Jzmu9gxYfy1VA,2482
 sunholo/vertex/type_dict_to_json.py,sha256=uTzL4o9tJRao4u-gJOFcACgWGkBOtqACmb6ihvCErL8,4694
-sunholo-0.140.4.dist-info/licenses/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
-sunholo-0.140.4.dist-info/METADATA,sha256=oKjtRKqFPtwaoV177G0nRWfv3P9xfGB3U4fonaffJrk,10067
-sunholo-0.140.4.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
-sunholo-0.140.4.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
-sunholo-0.140.4.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
-sunholo-0.140.4.dist-info/RECORD,,
+sunholo-0.140.6.dist-info/licenses/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
+sunholo-0.140.6.dist-info/METADATA,sha256=J62v0HZ3NpRqt-zt0jpcA-KgGXyb5aEQyPGt6D4W-B8,10067
+sunholo-0.140.6.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
+sunholo-0.140.6.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
+sunholo-0.140.6.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
+sunholo-0.140.6.dist-info/RECORD,,

{sunholo-0.140.4.dist-info → sunholo-0.140.6.dist-info}/WHEEL RENAMED Viewed

File without changes

{sunholo-0.140.4.dist-info → sunholo-0.140.6.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{sunholo-0.140.4.dist-info → sunholo-0.140.6.dist-info}/licenses/LICENSE.txt RENAMED Viewed

File without changes

{sunholo-0.140.4.dist-info → sunholo-0.140.6.dist-info}/top_level.txt RENAMED Viewed

File without changes

sunholo 0.140.4__py3-none-any.whl → 0.140.6__py3-none-any.whl

sunholo 0.140.4py3-none-any.whl → 0.140.6py3-none-any.whl