sunholo 0.140.4__py3-none-any.whl → 0.140.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,282 @@
1
1
  import json
2
2
  from ..custom_logging import log
3
+ import time
4
+ import hashlib
5
+ from functools import lru_cache
6
+ from typing import List, Tuple, Optional
7
+
8
+
9
+ class ChatHistoryCache:
10
+ """
11
+ Incremental cache for chat history processing.
12
+
13
+ Caches processed message pairs and only processes new messages
14
+ when the chat history is extended.
15
+ """
16
+
17
+ def __init__(self, max_cache_size: int = 1000):
18
+ self.cache = {}
19
+ self.max_cache_size = max_cache_size
20
+
21
+ def _get_cache_key(self, chat_history: List[dict]) -> str:
22
+ """Generate a cache key based on the chat history content."""
23
+ # Use the hash of the serialized chat history for the key
24
+ # Only hash the first few and last few messages to balance performance vs accuracy
25
+ if len(chat_history) <= 10:
26
+ content = str(chat_history)
27
+ else:
28
+ # Hash first 5 and last 5 messages + length
29
+ content = str(chat_history[:5] + chat_history[-5:] + [len(chat_history)])
30
+
31
+ return hashlib.md5(content.encode()).hexdigest()
32
+
33
+ def _find_cached_prefix(self, current_history: List[dict]) -> Tuple[Optional[List[Tuple]], int]:
34
+ """
35
+ Find the longest cached prefix of the current chat history.
36
+
37
+ Returns:
38
+ Tuple of (cached_pairs, cache_length) or (None, 0) if no cache found
39
+ """
40
+ current_length = len(current_history)
41
+
42
+ # Check for cached versions of prefixes, starting from longest
43
+ for cache_length in range(current_length - 1, 0, -1):
44
+ prefix = current_history[:cache_length]
45
+ cache_key = self._get_cache_key(prefix)
46
+
47
+ if cache_key in self.cache:
48
+ cached_data = self.cache[cache_key]
49
+ cached_pairs = cached_data['pairs']
50
+
51
+ # Verify the cache is still valid by checking a few messages
52
+ if self._verify_cache_validity(prefix, cached_data['original_history']):
53
+ return cached_pairs, cache_length
54
+ else:
55
+ # Cache is stale, remove it
56
+ del self.cache[cache_key]
57
+
58
+ return None, 0
59
+
60
+ def _verify_cache_validity(self, current_prefix: List[dict], cached_prefix: List[dict]) -> bool:
61
+ """Quick verification that cached data is still valid."""
62
+ if len(current_prefix) != len(cached_prefix):
63
+ return False
64
+
65
+ # Check first and last few messages for equality
66
+ check_indices = [0, -1] if len(current_prefix) >= 2 else [0]
67
+
68
+ for i in check_indices:
69
+ if current_prefix[i] != cached_prefix[i]:
70
+ return False
71
+
72
+ return True
73
+
74
+ def extract_chat_history_incremental(self, chat_history: List[dict]) -> List[Tuple]:
75
+ """
76
+ Extract chat history with incremental caching.
77
+
78
+ Args:
79
+ chat_history: List of chat message dictionaries
80
+
81
+ Returns:
82
+ List of (human_message, ai_message) tuples
83
+ """
84
+ if not chat_history:
85
+ return []
86
+
87
+ # Try to find cached prefix
88
+ cached_pairs, cache_length = self._find_cached_prefix(chat_history)
89
+
90
+ if cached_pairs is not None:
91
+ log.debug(f"Found cached pairs for {cache_length} messages, processing {len(chat_history) - cache_length} new messages")
92
+
93
+ # Process only the new messages
94
+ new_messages = chat_history[cache_length:]
95
+ new_pairs = self._process_new_messages(new_messages, cached_pairs)
96
+
97
+ # Combine cached and new pairs
98
+ all_pairs = cached_pairs + new_pairs
99
+ else:
100
+ log.debug(f"No cache found, processing all {len(chat_history)} messages")
101
+ # Process all messages from scratch
102
+ all_pairs = self._extract_chat_history_full(chat_history)
103
+
104
+ # Cache the result
105
+ self._update_cache(chat_history, all_pairs)
106
+
107
+ return all_pairs
108
+
109
+ def _process_new_messages(self, new_messages: List[dict], cached_pairs: List[Tuple]) -> List[Tuple]:
110
+ """
111
+ Process only the new messages, considering the state from cached pairs.
112
+
113
+ Args:
114
+ new_messages: New messages to process
115
+ cached_pairs: Previously processed message pairs
116
+
117
+ Returns:
118
+ List of new message pairs
119
+ """
120
+ if not new_messages:
121
+ return []
122
+
123
+ new_pairs = []
124
+
125
+ # Determine if we're waiting for a bot response based on cached pairs
126
+ waiting_for_bot = True
127
+ if cached_pairs:
128
+ last_pair = cached_pairs[-1]
129
+ # If last pair has both human and AI message, we're ready for a new human message
130
+ waiting_for_bot = not (last_pair[0] and last_pair[1])
131
+
132
+ # If we ended with an unpaired human message, get it
133
+ last_human_message = ""
134
+ if cached_pairs and waiting_for_bot:
135
+ last_human_message = cached_pairs[-1][0]
136
+
137
+ # Process new messages
138
+ for message in new_messages:
139
+ try:
140
+ is_human_msg = is_human(message)
141
+ content = create_message_element(message)
142
+
143
+ if is_human_msg:
144
+ last_human_message = content
145
+ waiting_for_bot = True
146
+ else: # Bot message
147
+ if waiting_for_bot and last_human_message:
148
+ new_pairs.append((last_human_message, content))
149
+ last_human_message = ""
150
+ waiting_for_bot = False
151
+ # If not waiting for bot or no human message, this is an orphaned bot message
152
+
153
+ except (KeyError, TypeError) as e:
154
+ log.warning(f"Error processing new message: {e}")
155
+ continue
156
+
157
+ return new_pairs
158
+
159
+ def _extract_chat_history_full(self, chat_history: List[dict]) -> List[Tuple]:
160
+ """Full extraction when no cache is available."""
161
+ # Use the optimized version from before
162
+ paired_messages = []
163
+
164
+ # Handle initial bot message
165
+ start_idx = 0
166
+ if chat_history and is_bot(chat_history[0]):
167
+ try:
168
+ first_message = chat_history[0]
169
+ blank_element = ""
170
+ bot_element = create_message_element(first_message)
171
+ paired_messages.append((blank_element, bot_element))
172
+ start_idx = 1
173
+ except (KeyError, TypeError):
174
+ pass
175
+
176
+ # Process remaining messages
177
+ last_human_message = ""
178
+ for i in range(start_idx, len(chat_history)):
179
+ message = chat_history[i]
180
+
181
+ try:
182
+ is_human_msg = is_human(message)
183
+ content = create_message_element(message)
184
+
185
+ if is_human_msg:
186
+ last_human_message = content
187
+ else: # Bot message
188
+ if last_human_message:
189
+ paired_messages.append((last_human_message, content))
190
+ last_human_message = ""
191
+
192
+ except (KeyError, TypeError) as e:
193
+ log.warning(f"Error processing message {i}: {e}")
194
+ continue
195
+
196
+ return paired_messages
197
+
198
+ def _update_cache(self, chat_history: List[dict], pairs: List[Tuple]):
199
+ """Update cache with new result."""
200
+ # Only cache if the history is of reasonable size
201
+ if len(chat_history) < 2:
202
+ return
203
+
204
+ cache_key = self._get_cache_key(chat_history)
205
+
206
+ # Implement simple LRU by removing oldest entries
207
+ if len(self.cache) >= self.max_cache_size:
208
+ # Remove 20% of oldest entries
209
+ remove_count = self.max_cache_size // 5
210
+ oldest_keys = list(self.cache.keys())[:remove_count]
211
+ for key in oldest_keys:
212
+ del self.cache[key]
213
+
214
+ self.cache[cache_key] = {
215
+ 'pairs': pairs,
216
+ 'original_history': chat_history.copy(), # Store copy for validation
217
+ 'timestamp': time.time()
218
+ }
219
+
220
+ log.debug(f"Cached {len(pairs)} pairs for history of length {len(chat_history)}")
221
+
222
+ def clear_cache(self):
223
+ """Clear the entire cache."""
224
+ self.cache.clear()
225
+ log.info("Chat history cache cleared")
226
+
227
+
228
+ # Global cache instance
229
+ _chat_history_cache = ChatHistoryCache()
230
+
231
+
232
+ def extract_chat_history_with_cache(chat_history: List[dict] = None) -> List[Tuple]:
233
+ """
234
+ Main function to replace the original extract_chat_history.
235
+
236
+ Uses incremental caching for better performance with growing chat histories.
237
+ """
238
+ if not chat_history:
239
+ log.debug("No chat history found")
240
+ return []
241
+
242
+ return _chat_history_cache.extract_chat_history_incremental(chat_history)
243
+
244
+
245
+ # Async version that wraps the cached version
246
+ async def extract_chat_history_async_cached(chat_history: List[dict] = None) -> List[Tuple]:
247
+ """
248
+ Async version that uses the cache and runs in a thread pool if needed.
249
+ """
250
+ import asyncio
251
+
252
+ if not chat_history:
253
+ return []
254
+
255
+ # For very large histories, run in thread pool to avoid blocking
256
+ if len(chat_history) > 1000:
257
+ loop = asyncio.get_event_loop()
258
+ return await loop.run_in_executor(
259
+ None,
260
+ extract_chat_history_with_cache,
261
+ chat_history
262
+ )
263
+ else:
264
+ # For smaller histories, just run directly
265
+ return extract_chat_history_with_cache(chat_history)
266
+
267
+
268
+ # Utility function to warm up the cache
269
+ def warm_up_cache(chat_histories: List[List[dict]]):
270
+ """
271
+ Pre-populate cache with common chat histories.
272
+
273
+ Args:
274
+ chat_histories: List of chat history lists to cache
275
+ """
276
+ for history in chat_histories:
277
+ extract_chat_history_with_cache(history)
278
+
279
+ log.info(f"Warmed up cache with {len(chat_histories)} chat histories")
3
280
 
4
281
 
5
282
  async def extract_chat_history_async(chat_history=None):
@@ -243,3 +520,4 @@ def is_ai(message: dict):
243
520
  return message['role'] == 'assistant'
244
521
  else:
245
522
  return 'bot_id' in message # Slack
523
+
@@ -7,8 +7,7 @@ from functools import partial
7
7
  import inspect
8
8
  import asyncio
9
9
 
10
- from ...agents import extract_chat_history, handle_special_commands
11
- from ..chat_history import extract_chat_history_async
10
+ from ..chat_history import extract_chat_history_with_cache, extract_chat_history_async_cached
12
11
  from ...qna.parsers import parse_output
13
12
  from ...streaming import start_streaming_chat, start_streaming_chat_async
14
13
  from ...archive import archive_qa
@@ -58,12 +57,18 @@ if __name__ == "__main__":
58
57
  ```
59
58
 
60
59
  """
61
- def __init__(self, app, stream_interpreter: callable, vac_interpreter:callable=None, additional_routes:dict=None, async_stream:bool=False):
60
+ def __init__(self, app,
61
+ stream_interpreter: callable,
62
+ vac_interpreter:callable=None,
63
+ additional_routes:dict=None,
64
+ async_stream:bool=False,
65
+ add_langfuse_eval:bool=True):
62
66
  self.app = app
63
67
  self.stream_interpreter = stream_interpreter
64
68
  self.vac_interpreter = vac_interpreter or partial(self.vac_interpreter_default)
65
69
  self.additional_routes = additional_routes if additional_routes is not None else []
66
70
  self.async_stream = async_stream
71
+ self.add_langfuse_eval = add_langfuse_eval
67
72
  self.register_routes()
68
73
 
69
74
 
@@ -235,13 +240,9 @@ if __name__ == "__main__":
235
240
  log.info(f"Processing prep: {prep}")
236
241
  trace = prep["trace"]
237
242
  span = prep["span"]
238
- command_response = prep["command_response"]
239
243
  vac_config = prep["vac_config"]
240
244
  all_input = prep["all_input"]
241
245
 
242
- if command_response:
243
- return jsonify(command_response)
244
-
245
246
  log.info(f'Streaming data with: {all_input}')
246
247
  if span:
247
248
  span.update(
@@ -426,13 +427,9 @@ if __name__ == "__main__":
426
427
  log.debug(f"Processing prep: {prep}")
427
428
  trace = prep["trace"]
428
429
  span = prep["span"]
429
- command_response = prep["command_response"]
430
430
  vac_config: ConfigManager = prep["vac_config"]
431
431
  all_input = prep["all_input"]
432
432
 
433
- if command_response:
434
- return jsonify(command_response)
435
-
436
433
  try:
437
434
  if span:
438
435
  gen = span.generation(
@@ -530,8 +527,6 @@ if __name__ == "__main__":
530
527
  image_uri = None
531
528
  mime_type = None
532
529
 
533
-
534
-
535
530
  for msg in reversed(messages):
536
531
  if msg['role'] == 'user':
537
532
  if isinstance(msg['content'], list):
@@ -550,13 +545,6 @@ if __name__ == "__main__":
550
545
  else:
551
546
  log.info(f"User message: {user_message}")
552
547
 
553
- paired_messages = extract_chat_history(chat_history)
554
- command_response = handle_special_commands(user_message, vector_name, paired_messages)
555
-
556
- if command_response is not None:
557
-
558
- return self.make_openai_response(user_message, vector_name, command_response)
559
-
560
548
  if image_uri:
561
549
  data["image_uri"] = image_uri
562
550
  data["mime"] = mime_type
@@ -694,10 +682,10 @@ if __name__ == "__main__":
694
682
 
695
683
  trace = None
696
684
  span = None
697
-
698
- trace_id = data.get('trace_id')
699
- trace = self.create_langfuse_trace(request, vector_name, trace_id)
700
- log.info(f"Using existing langfuse trace: {trace_id}")
685
+ if self.add_langfuse_eval:
686
+ trace_id = data.get('trace_id')
687
+ trace = self.create_langfuse_trace(request, vector_name, trace_id)
688
+ log.info(f"Using existing langfuse trace: {trace_id}")
701
689
 
702
690
  #config, _ = load_config("config/llm_config.yaml")
703
691
  try:
@@ -721,7 +709,7 @@ if __name__ == "__main__":
721
709
  vector_name = data.pop('vector_name', vector_name)
722
710
  data.pop('trace_id', None) # to ensure not in kwargs
723
711
 
724
- paired_messages = extract_chat_history(chat_history)
712
+ paired_messages = extract_chat_history_with_cache(chat_history)
725
713
 
726
714
  all_input = {'user_input': user_input,
727
715
  'vector_name': vector_name,
@@ -737,15 +725,10 @@ if __name__ == "__main__":
737
725
  metadata=vac_config.configs_by_kind,
738
726
  input = all_input
739
727
  )
740
- command_response = handle_special_commands(user_input, vector_name, paired_messages)
741
- if command_response is not None:
742
- if trace:
743
- trace.update(output=jsonify(command_response))
744
728
 
745
729
  return {
746
730
  "trace": trace,
747
731
  "span": span,
748
- "command_response": command_response,
749
732
  "all_input": all_input,
750
733
  "vac_config": vac_config
751
734
  }
@@ -789,7 +772,7 @@ if __name__ == "__main__":
789
772
  data.pop('trace_id', None) # to ensure not in kwargs
790
773
 
791
774
  # Task 3: Process chat history
792
- chat_history_task = asyncio.create_task(extract_chat_history_async(chat_history))
775
+ chat_history_task = asyncio.create_task(extract_chat_history_async_cached(chat_history))
793
776
  tasks.append(chat_history_task)
794
777
 
795
778
  # Await all tasks concurrently
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sunholo
3
- Version: 0.140.4
3
+ Version: 0.140.6
4
4
  Summary: AI DevOps - a package to help deploy GenAI to the Cloud.
5
5
  Author-email: Holosun ApS <multivac@sunholo.com>
6
6
  License: Apache License, Version 2.0
@@ -2,7 +2,7 @@ sunholo/__init__.py,sha256=InRbX4V0-qdNHo9zYH3GEye7ASLR6LX8-SMvPV4Jsaw,1212
2
2
  sunholo/custom_logging.py,sha256=JXZTnXp_DixP3jwYfKw4LYRDS9IuTq7ctCgfZbI2rxA,22023
3
3
  sunholo/langchain_types.py,sha256=uZ4zvgej_f7pLqjtu4YP7qMC_eZD5ym_5x4pyvA1Ih4,1834
4
4
  sunholo/agents/__init__.py,sha256=AauG3l0y4r5Fzx1zJfZ634M4o-0o7B7J5T8k_gPvNqE,370
5
- sunholo/agents/chat_history.py,sha256=4jGCHBP8dZfUjSJPxgKyh6nOqhnHRn1x9U3CnGb0I5E,7624
5
+ sunholo/agents/chat_history.py,sha256=e2NmiooaRUxKGr_aoU05rzhHi3VsKjbZZmzeDr2yJJE,17780
6
6
  sunholo/agents/dispatch_to_qa.py,sha256=NHihwAoCJ5_Lk11e_jZnucVUGQyZHCB-YpkfMHBCpQk,8882
7
7
  sunholo/agents/langserve.py,sha256=C46ph2mnygr6bdHijYWYyfQDI9ylAF0_9Kx2PfcCJpU,4414
8
8
  sunholo/agents/pubsub.py,sha256=TscZN_6am6DfaQkC-Yl18ZIBOoLE-0nDSiil6GpQEh4,1344
@@ -14,7 +14,7 @@ sunholo/agents/fastapi/base.py,sha256=W-cyF8ZDUH40rc-c-Apw3-_8IIi2e4Y9qRtnoVnsc1
14
14
  sunholo/agents/fastapi/qna_routes.py,sha256=lKHkXPmwltu9EH3RMwmD153-J6pE7kWQ4BhBlV3to-s,3864
15
15
  sunholo/agents/flask/__init__.py,sha256=dEoByI3gDNUOjpX1uVKP7uPjhfFHJubbiaAv3xLopnk,63
16
16
  sunholo/agents/flask/base.py,sha256=HLz3Z5efWaewTwSFEM6JH48NA9otoJBoVFJlARGk9L8,788
17
- sunholo/agents/flask/vac_routes.py,sha256=Dk9QrPvXNRzAWxaTWsYgHVxmK-Rjrvgd6-sAuvqt9P8,33236
17
+ sunholo/agents/flask/vac_routes.py,sha256=al4-k-QNKH5bX9Ai8FP7DC1R7yomSO3Lnq_cugnUHcw,32622
18
18
  sunholo/archive/__init__.py,sha256=qNHWm5rGPVOlxZBZCpA1wTYPbalizRT7f8X4rs2t290,31
19
19
  sunholo/archive/archive.py,sha256=PxVfDtO2_2ZEEbnhXSCbXLdeoHoQVImo4y3Jr2XkCFY,1204
20
20
  sunholo/auth/__init__.py,sha256=TeP-OY0XGxYV_8AQcVGoh35bvyWhNUcMRfhuD5l44Sk,91
@@ -168,9 +168,9 @@ sunholo/vertex/init.py,sha256=1OQwcPBKZYBTDPdyU7IM4X4OmiXLdsNV30C-fee2scQ,2875
168
168
  sunholo/vertex/memory_tools.py,sha256=tBZxqVZ4InTmdBvLlOYwoSEWu4-kGquc-gxDwZCC4FA,7667
169
169
  sunholo/vertex/safety.py,sha256=S9PgQT1O_BQAkcqauWncRJaydiP8Q_Jzmu9gxYfy1VA,2482
170
170
  sunholo/vertex/type_dict_to_json.py,sha256=uTzL4o9tJRao4u-gJOFcACgWGkBOtqACmb6ihvCErL8,4694
171
- sunholo-0.140.4.dist-info/licenses/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
172
- sunholo-0.140.4.dist-info/METADATA,sha256=oKjtRKqFPtwaoV177G0nRWfv3P9xfGB3U4fonaffJrk,10067
173
- sunholo-0.140.4.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
174
- sunholo-0.140.4.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
175
- sunholo-0.140.4.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
176
- sunholo-0.140.4.dist-info/RECORD,,
171
+ sunholo-0.140.6.dist-info/licenses/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
172
+ sunholo-0.140.6.dist-info/METADATA,sha256=J62v0HZ3NpRqt-zt0jpcA-KgGXyb5aEQyPGt6D4W-B8,10067
173
+ sunholo-0.140.6.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
174
+ sunholo-0.140.6.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
175
+ sunholo-0.140.6.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
176
+ sunholo-0.140.6.dist-info/RECORD,,