ziya 0.3.0__py3-none-any.whl → 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ziya might be problematic. Click here for more details.

Files changed (73) hide show
  1. app/agents/agent.py +71 -73
  2. app/agents/direct_streaming.py +1 -1
  3. app/agents/prompts.py +1 -1
  4. app/agents/prompts_manager.py +14 -10
  5. app/agents/wrappers/google_direct.py +31 -1
  6. app/agents/wrappers/nova_tool_execution.py +2 -2
  7. app/agents/wrappers/nova_wrapper.py +1 -1
  8. app/agents/wrappers/ziya_bedrock.py +53 -31
  9. app/config/models_config.py +61 -20
  10. app/config/shell_config.py +5 -1
  11. app/extensions/prompt_extensions/claude_extensions.py +27 -5
  12. app/extensions/prompt_extensions/mcp_prompt_extensions.py +82 -56
  13. app/main.py +5 -3
  14. app/mcp/client.py +19 -10
  15. app/mcp/manager.py +68 -10
  16. app/mcp/tools.py +8 -9
  17. app/mcp_servers/shell_server.py +3 -3
  18. app/middleware/streaming.py +29 -41
  19. app/routes/file_validation.py +35 -0
  20. app/routes/mcp_routes.py +54 -8
  21. app/server.py +525 -614
  22. app/streaming_tool_executor.py +748 -137
  23. app/templates/asset-manifest.json +20 -20
  24. app/templates/index.html +1 -1
  25. app/templates/static/css/{main.0297bfee.css → main.e7109b49.css} +2 -2
  26. app/templates/static/css/main.e7109b49.css.map +1 -0
  27. app/templates/static/js/14386.65fcfe53.chunk.js +2 -0
  28. app/templates/static/js/14386.65fcfe53.chunk.js.map +1 -0
  29. app/templates/static/js/35589.0368973a.chunk.js +2 -0
  30. app/templates/static/js/35589.0368973a.chunk.js.map +1 -0
  31. app/templates/static/js/{50295.ab92f61b.chunk.js → 50295.90aca393.chunk.js} +3 -3
  32. app/templates/static/js/50295.90aca393.chunk.js.map +1 -0
  33. app/templates/static/js/55734.5f0fd567.chunk.js +2 -0
  34. app/templates/static/js/55734.5f0fd567.chunk.js.map +1 -0
  35. app/templates/static/js/58542.57fed736.chunk.js +2 -0
  36. app/templates/static/js/58542.57fed736.chunk.js.map +1 -0
  37. app/templates/static/js/{68418.2554bb1e.chunk.js → 68418.f7b4d2d9.chunk.js} +3 -3
  38. app/templates/static/js/68418.f7b4d2d9.chunk.js.map +1 -0
  39. app/templates/static/js/99948.b280eda0.chunk.js +2 -0
  40. app/templates/static/js/99948.b280eda0.chunk.js.map +1 -0
  41. app/templates/static/js/main.e075582c.js +3 -0
  42. app/templates/static/js/main.e075582c.js.map +1 -0
  43. app/utils/code_util.py +5 -2
  44. app/utils/context_cache.py +11 -0
  45. app/utils/conversation_filter.py +90 -0
  46. app/utils/custom_bedrock.py +43 -1
  47. app/utils/diff_utils/validation/validators.py +32 -22
  48. app/utils/file_cache.py +5 -3
  49. app/utils/precision_prompt_system.py +116 -0
  50. app/utils/streaming_optimizer.py +100 -0
  51. {ziya-0.3.0.dist-info → ziya-0.3.2.dist-info}/METADATA +3 -2
  52. {ziya-0.3.0.dist-info → ziya-0.3.2.dist-info}/RECORD +59 -55
  53. app/templates/static/css/main.0297bfee.css.map +0 -1
  54. app/templates/static/js/14386.567bf803.chunk.js +0 -2
  55. app/templates/static/js/14386.567bf803.chunk.js.map +0 -1
  56. app/templates/static/js/35589.278ecda2.chunk.js +0 -2
  57. app/templates/static/js/35589.278ecda2.chunk.js.map +0 -1
  58. app/templates/static/js/50295.ab92f61b.chunk.js.map +0 -1
  59. app/templates/static/js/55734.90d8bd52.chunk.js +0 -2
  60. app/templates/static/js/55734.90d8bd52.chunk.js.map +0 -1
  61. app/templates/static/js/58542.08fb5cf4.chunk.js +0 -2
  62. app/templates/static/js/58542.08fb5cf4.chunk.js.map +0 -1
  63. app/templates/static/js/68418.2554bb1e.chunk.js.map +0 -1
  64. app/templates/static/js/99948.71670e91.chunk.js +0 -2
  65. app/templates/static/js/99948.71670e91.chunk.js.map +0 -1
  66. app/templates/static/js/main.1d79eac2.js +0 -3
  67. app/templates/static/js/main.1d79eac2.js.map +0 -1
  68. /app/templates/static/js/{50295.ab92f61b.chunk.js.LICENSE.txt → 50295.90aca393.chunk.js.LICENSE.txt} +0 -0
  69. /app/templates/static/js/{68418.2554bb1e.chunk.js.LICENSE.txt → 68418.f7b4d2d9.chunk.js.LICENSE.txt} +0 -0
  70. /app/templates/static/js/{main.1d79eac2.js.LICENSE.txt → main.e075582c.js.LICENSE.txt} +0 -0
  71. {ziya-0.3.0.dist-info → ziya-0.3.2.dist-info}/WHEEL +0 -0
  72. {ziya-0.3.0.dist-info → ziya-0.3.2.dist-info}/entry_points.txt +0 -0
  73. {ziya-0.3.0.dist-info → ziya-0.3.2.dist-info}/licenses/LICENSE +0 -0
app/utils/code_util.py CHANGED
@@ -3,8 +3,11 @@ Legacy module for backward compatibility.
3
3
  This module re-exports all functionality from the diff_utils package.
4
4
  """
5
5
 
6
- # Re-export everything from diff_utils
7
- from app.utils.diff_utils import *
6
+ from typing import List, Dict, Any, Optional
7
+ from app.utils.diff_utils.pipeline import apply_diff_pipeline
8
+ from app.utils.diff_utils.application.git_diff import use_git_to_apply_code_diff
9
+ from app.utils.diff_utils.parsing.diff_parser import parse_unified_diff
10
+ # Import only what's needed, not everything
8
11
 
9
12
  # For backward compatibility
10
13
  from app.utils.diff_utils.core.exceptions import PatchApplicationError
@@ -44,6 +44,8 @@ class ContextCacheManager:
44
44
  def __init__(self):
45
45
  self.cache_store: Dict[str, CachedContext] = {}
46
46
  self.default_ttl = 3600 # 1 hour default TTL
47
+ self.max_cache_entries = 1000 # Prevent unbounded growth
48
+ self.max_cache_memory_mb = 500 # Memory limit
47
49
  self.min_cache_size = 10000 # Minimum tokens to cache
48
50
  self.file_state_manager = FileStateManager()
49
51
  self.cache_stats = {"hits": 0, "misses": 0, "splits": 0, "tokens_cached": 0}
@@ -316,6 +318,15 @@ class ContextCacheManager:
316
318
  )
317
319
 
318
320
  self.cache_store[cache_key] = cached_context
321
+
322
+ # Enforce cache limits
323
+ if len(self.cache_store) > self.max_cache_entries:
324
+ # Remove oldest entries
325
+ oldest_keys = sorted(self.cache_store.keys(),
326
+ key=lambda k: self.cache_store[k].created_at)[:100]
327
+ for key in oldest_keys:
328
+ del self.cache_store[key]
329
+
319
330
  logger.info(f"Cached context for conversation {conversation_id}: {token_count} tokens, TTL {ttl_seconds}s")
320
331
 
321
332
  return cached_context
@@ -0,0 +1,90 @@
1
+ """
2
+ Conversation filtering utilities to prevent tool result contamination.
3
+
4
+ This module ensures that frontend display artifacts never reach the model,
5
+ preventing the model from learning to hallucinate fake tool results.
6
+ """
7
+
8
+ from typing import List, Dict, Any, Union
9
+ from app.utils.logging_utils import logger
10
+
11
+
12
+ def filter_conversation_for_model(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
13
+ """
14
+ Filter out frontend-only messages that should never reach the model.
15
+
16
+ This prevents tool result contamination by excluding:
17
+ - tool_execution (old contaminating format)
18
+ - tool_display (new frontend-only format)
19
+ - tool_start (frontend progress indicators)
20
+
21
+ Args:
22
+ messages: Raw conversation including frontend artifacts
23
+
24
+ Returns:
25
+ Clean conversation suitable for model consumption
26
+ """
27
+ filtered = []
28
+ filtered_count = 0
29
+
30
+ for msg in messages:
31
+ if isinstance(msg, dict):
32
+ msg_type = msg.get('type')
33
+
34
+ # Skip all frontend display messages
35
+ if msg_type in ['tool_execution', 'tool_display', 'tool_start']:
36
+ filtered_count += 1
37
+ logger.debug(f"Filtered out frontend artifact: {msg_type}")
38
+ continue
39
+
40
+ # Convert clean tool results for model
41
+ elif msg_type == 'tool_result_for_model':
42
+ filtered.append({
43
+ 'role': 'user',
44
+ 'content': [
45
+ {
46
+ 'type': 'tool_result',
47
+ 'tool_use_id': msg.get('tool_use_id'),
48
+ 'content': msg.get('content', '')
49
+ }
50
+ ]
51
+ })
52
+ logger.debug(f"Converted tool result for model: {msg.get('tool_use_id')}")
53
+
54
+ else:
55
+ # Include regular conversation messages
56
+ filtered.append(msg)
57
+ else:
58
+ # Include non-dict messages as-is
59
+ filtered.append(msg)
60
+
61
+ if filtered_count > 0:
62
+ logger.info(f"Filtered {filtered_count} frontend artifacts from conversation")
63
+
64
+ return filtered
65
+
66
+
67
+ def is_contaminating_message(msg: Union[Dict[str, Any], Any]) -> bool:
68
+ """
69
+ Check if a message would contaminate the model's understanding.
70
+
71
+ Args:
72
+ msg: Message to check
73
+
74
+ Returns:
75
+ True if message should be filtered out
76
+ """
77
+ if not isinstance(msg, dict):
78
+ return False
79
+
80
+ msg_type = msg.get('type')
81
+ contaminating_types = [
82
+ 'tool_execution', # Old contaminating format
83
+ 'tool_display', # New frontend-only format
84
+ 'tool_start', # Progress indicators
85
+ 'tool_progress', # Progress updates
86
+ 'stream_end', # Stream control
87
+ 'error' # Error displays
88
+ ]
89
+
90
+ return msg_type in contaminating_types
@@ -49,6 +49,7 @@ class CustomBedrockClient:
49
49
  self.last_extended_context_notification = None
50
50
  self.model_config = model_config or {}
51
51
  self.extended_context_manager = get_extended_context_manager()
52
+ self.throttled = False # Track if we've hit throttling
52
53
 
53
54
  # Get the region from the client
54
55
  self.region = self.client.meta.region_name if hasattr(self.client, 'meta') else None
@@ -219,9 +220,50 @@ class CustomBedrockClient:
219
220
  except Exception as e:
220
221
  error_message = str(e)
221
222
  self.last_error = error_message
223
+ logger.warning(f"🔄 INITIAL_ERROR: {error_message}")
224
+
225
+ # Check for throttling errors - let higher level retry handle it
226
+ # But keep timeout retries here since they need immediate retry
227
+ if ("ThrottlingException" in error_message or
228
+ "Too many tokens" in error_message or
229
+ "rate limit" in error_message.lower()):
230
+ # Mark as throttled and recreate client without boto3 retries
231
+ if not self.throttled:
232
+ self.throttled = True
233
+ logger.info("🔄 THROTTLE_DETECTED: Disabling boto3 retries for subsequent attempts")
234
+ from botocore.config import Config
235
+ import boto3
236
+ retry_config = Config(retries={'max_attempts': 1, 'mode': 'standard'})
237
+ new_client = boto3.client('bedrock-runtime', region_name=self.region, config=retry_config)
238
+ self.client = new_client
239
+ self.original_invoke = new_client.invoke_model_with_response_stream
240
+ if hasattr(new_client, 'invoke_model'):
241
+ self.original_invoke_model = new_client.invoke_model
242
+ # Don't retry here, let streaming_tool_executor handle throttling retries
243
+ raise
244
+
245
+ if "timeout" in error_message.lower():
246
+ # Retry timeouts immediately with short delays
247
+ max_retries = 2
248
+ for retry_attempt in range(max_retries):
249
+ delays = [1, 2]
250
+ delay = delays[retry_attempt]
251
+ logger.warning(f"🔄 TIMEOUT_RETRY: Attempt {retry_attempt + 1}/{max_retries} after {delay}s delay")
252
+
253
+ import time
254
+ time.sleep(delay)
255
+
256
+ try:
257
+ return self.original_invoke(**kwargs)
258
+ except Exception as retry_error:
259
+ if retry_attempt == max_retries - 1:
260
+ logger.error(f"🔄 TIMEOUT_RETRY: All {max_retries} retry attempts failed")
261
+ raise retry_error
262
+ elif "timeout" not in str(retry_error).lower():
263
+ raise retry_error
222
264
 
223
265
  # Check if it's a context limit error
224
- if ("input length and `max_tokens` exceed context limit" in error_message or
266
+ elif ("input length and `max_tokens` exceed context limit" in error_message or
225
267
  "Input is too long" in error_message):
226
268
  logger.warning(f"Context limit error detected: {error_message}")
227
269
 
@@ -291,9 +291,9 @@ def is_hunk_already_applied(file_lines: List[str], hunk: Dict[str, Any], pos: in
291
291
  if not removed_lines and not added_lines:
292
292
  return True
293
293
 
294
- # For pure additions, check if content already exists in file
294
+ # For pure additions, check if content already exists at the expected position
295
295
  if len(removed_lines) == 0 and len(added_lines) > 0:
296
- return _check_pure_addition_already_applied(file_lines, added_lines)
296
+ return _check_pure_addition_already_applied(file_lines, added_lines, hunk, pos)
297
297
 
298
298
  # CRITICAL: For hunks with removals, validate that the content to be removed matches
299
299
  # If removal validation fails, the hunk cannot be already applied
@@ -314,38 +314,48 @@ def _is_valid_hunk_header(hunk: Dict[str, Any]) -> bool:
314
314
  return True
315
315
 
316
316
 
317
- def _check_pure_addition_already_applied(file_lines: List[str], added_lines: List[str]) -> bool:
318
- """Check if a pure addition (no removals) is already applied."""
317
+ def _check_pure_addition_already_applied(file_lines: List[str], added_lines: List[str], hunk: Dict[str, Any], pos: int) -> bool:
318
+ """Check if a pure addition (no removals) is already applied with context validation."""
319
319
 
320
320
  logger.debug(f"Checking pure addition - added_lines: {added_lines}")
321
321
 
322
- # Check if the added lines exist as a contiguous block anywhere in the file
323
- # This is more precise than checking individual lines scattered throughout
324
322
  if not added_lines:
325
323
  return True
326
324
 
327
- # CRITICAL FIX: For very common patterns like closing braces, be more conservative
328
- # Don't mark as already applied if the added content consists only of common syntax elements
329
- normalized_added = [normalize_line_for_comparison(line).strip() for line in added_lines]
325
+ # Get context lines from the hunk
326
+ hunk_lines = hunk.get('lines', [])
327
+ context_lines = [line[1:] for line in hunk_lines if line.startswith(' ')]
330
328
 
331
- # Check if all added lines are just common syntax elements (braces, semicolons, etc.)
332
- common_syntax_patterns = {'}', '};', '{', ')', '(', ']', '[', ',', ';'}
333
- if all(line in common_syntax_patterns for line in normalized_added):
334
- logger.debug("Added lines contain only common syntax elements, being conservative")
329
+ if not context_lines:
330
+ logger.debug("No context lines in hunk - cannot validate if pure addition is already applied")
335
331
  return False
336
332
 
337
333
  added_block = [normalize_line_for_comparison(line) for line in added_lines]
338
-
339
- # Look for the exact sequence of added lines in the file
340
- for start_pos in range(len(file_lines) - len(added_lines) + 1):
341
- file_block = [normalize_line_for_comparison(file_lines[start_pos + i])
342
- for i in range(len(added_lines))]
334
+ context_normalized = [normalize_line_for_comparison(line) for line in context_lines]
335
+ first_context_line = context_normalized[0]
336
+
337
+ # Search for context lines in the file, then check if added lines follow
338
+ for context_pos in range(len(file_lines) - len(context_lines) + 1):
339
+ # Quick check: skip if first context line doesn't match
340
+ if normalize_line_for_comparison(file_lines[context_pos]) != first_context_line:
341
+ continue
342
+
343
+ file_context = [normalize_line_for_comparison(file_lines[context_pos + i])
344
+ for i in range(len(context_lines))]
343
345
 
344
- if file_block == added_block:
345
- logger.debug(f"Found contiguous block of added lines at position {start_pos}")
346
- return True
346
+ if file_context == context_normalized:
347
+ # Found matching context, check if added lines are right after
348
+ check_pos = context_pos + len(context_lines)
349
+
350
+ if check_pos + len(added_lines) <= len(file_lines):
351
+ file_block = [normalize_line_for_comparison(file_lines[check_pos + i])
352
+ for i in range(len(added_lines))]
353
+
354
+ if file_block == added_block:
355
+ logger.debug(f"Found added lines at position {check_pos} with matching context at {context_pos}")
356
+ return True
347
357
 
348
- logger.debug("Added lines not found as contiguous block in file")
358
+ logger.debug("Added lines not found at expected position after context")
349
359
  return False
350
360
 
351
361
 
app/utils/file_cache.py CHANGED
@@ -22,9 +22,11 @@ class ThreadStateManager:
22
22
  def __new__(cls):
23
23
  with cls._lock:
24
24
  if cls._instance is None:
25
- instance = super().__new__(cls)
26
- instance.thread_states = {}
27
- cls._instance = instance
25
+ cls._instance = super().__new__(cls)
26
+ cls._instance.thread_states = {}
27
+ cls._instance._initialized = True
28
+ elif not hasattr(cls._instance, '_initialized'):
29
+ cls._instance.thread_states = {}
28
30
  return cls._instance
29
31
 
30
32
 
@@ -0,0 +1,116 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Precision prompt system that achieves 100% equivalence with original Ziya prompts.
4
+ """
5
+
6
+ import os
7
+ import sys
8
+ from typing import Dict, List, Any
9
+
10
+ class PrecisionPromptSystem:
11
+ """
12
+ Production-ready system that achieves 100% equivalence with the original Ziya prompts
13
+ while eliminating the file loss issue caused by regex stripping.
14
+ """
15
+
16
+ def build_messages(self,
17
+ request_path: str,
18
+ model_info: Dict[str, Any],
19
+ files: List[str],
20
+ question: str,
21
+ chat_history: List[Dict[str, Any]] = None) -> List:
22
+ """
23
+ Drop-in replacement for the original build_messages function.
24
+
25
+ Achieves 100% equivalence with original Ziya prompts while preserving all files.
26
+ No regex stripping needed - clean templates prevent contamination.
27
+ """
28
+
29
+ if chat_history is None:
30
+ chat_history = []
31
+
32
+ try:
33
+ # Use extended prompt system with native tools context
34
+ from app.agents.prompts_manager import get_extended_prompt
35
+ from app.agents.agent import extract_codebase
36
+
37
+ print(f"🎯 PRECISION_DEBUG: Calling extract_codebase with {len(files)} files")
38
+ file_context = extract_codebase({
39
+ "config": {"files": files},
40
+ "conversation_id": f"precision_{hash(str(files))}"
41
+ })
42
+
43
+ print(f"🎯 PRECISION_DEBUG: Got file_context length: {len(file_context) if file_context else 0}")
44
+
45
+ # Build context with native_tools_available for Bedrock
46
+ context = {
47
+ "model_id": model_info.get("model_id", ""),
48
+ "endpoint": model_info.get("endpoint", "bedrock"),
49
+ "native_tools_available": model_info.get("endpoint", "bedrock") == "bedrock"
50
+ }
51
+
52
+ # Get extended prompt with proper context
53
+ extended_prompt = get_extended_prompt(
54
+ model_name=model_info.get("model_name", "sonnet4.0"),
55
+ model_family=model_info.get("model_family", "claude"),
56
+ endpoint=model_info.get("endpoint", "bedrock"),
57
+ context=context
58
+ )
59
+
60
+ # Format the prompt with file context
61
+ formatted_messages = extended_prompt.format_messages(
62
+ codebase=file_context,
63
+ tools="", # Tools are handled natively
64
+ question=question
65
+ )
66
+
67
+ # Convert to dict format
68
+ messages = []
69
+ for msg in formatted_messages:
70
+ if hasattr(msg, 'type') and hasattr(msg, 'content'):
71
+ role = 'system' if msg.type == 'system' else ('user' if msg.type == 'human' else 'assistant')
72
+ messages.append({"role": role, "content": msg.content})
73
+
74
+ # Add chat history before the question
75
+ if chat_history:
76
+ # Insert chat history before the last message (the question)
77
+ question_msg = messages.pop() if messages else None
78
+ for msg in chat_history:
79
+ if isinstance(msg, dict):
80
+ if 'type' in msg:
81
+ role = 'user' if msg['type'] in ['human', 'user'] else 'assistant'
82
+ messages.append({"role": role, "content": msg.get('content', '')})
83
+ elif 'role' in msg:
84
+ messages.append(msg)
85
+ if question_msg:
86
+ messages.append(question_msg)
87
+
88
+ print(f"🎯 PRECISION_DEBUG: Generated {len(messages)} messages")
89
+
90
+ return messages
91
+
92
+ except Exception as e:
93
+ import logging
94
+ logger = logging.getLogger(__name__)
95
+ logger.warning(f"Error in precision system: {e}")
96
+ # Fallback to minimal system
97
+ return self._fallback_build_messages(question, chat_history)
98
+
99
+ def _fallback_build_messages(self, question, chat_history):
100
+ """Fallback to minimal system if needed"""
101
+ messages = [
102
+ {"role": "system", "content": "You are an excellent coder. Help the user with their coding tasks."}
103
+ ]
104
+
105
+ # Add chat history
106
+ for msg in chat_history:
107
+ messages.append(msg)
108
+
109
+ # Add current question
110
+ if question:
111
+ messages.append({"role": "user", "content": question})
112
+
113
+ return messages
114
+
115
+ # Global instance
116
+ precision_system = PrecisionPromptSystem()
@@ -0,0 +1,100 @@
1
+ """
2
+ Streaming Response Optimizer
3
+ Fixes chunking issues identified in streaming analysis
4
+ """
5
+
6
+ import re
7
+ import time
8
+ from typing import Generator, Optional
9
+
10
+ class StreamingContentOptimizer:
11
+ """Optimizes content streaming to prevent mid-word splits"""
12
+
13
+ def __init__(self, min_chunk_size: int = 15, max_buffer_size: int = 100):
14
+ self.buffer = ""
15
+ self.min_chunk_size = min_chunk_size
16
+ self.max_buffer_size = max_buffer_size
17
+ self.word_boundary = re.compile(r'(\s+)')
18
+
19
+ def add_content(self, content: str) -> Generator[str, None, None]:
20
+ """Add content and yield optimized chunks"""
21
+ self.buffer += content
22
+
23
+ # Force flush if buffer gets too large
24
+ if len(self.buffer) > self.max_buffer_size:
25
+ yield from self._flush_complete_words()
26
+
27
+ # Check if we have enough content to send
28
+ elif len(self.buffer) >= self.min_chunk_size:
29
+ yield from self._flush_complete_words()
30
+
31
+ def _flush_complete_words(self) -> Generator[str, None, None]:
32
+ """Flush complete words from buffer"""
33
+ if not self.buffer.strip():
34
+ return
35
+
36
+ # Split on word boundaries but keep delimiters
37
+ parts = self.word_boundary.split(self.buffer)
38
+
39
+ if len(parts) > 2: # We have at least one complete word
40
+ # Send all but the last part (which might be incomplete)
41
+ complete_parts = parts[:-1]
42
+ chunk_to_send = ''.join(complete_parts)
43
+
44
+ if chunk_to_send.strip():
45
+ yield chunk_to_send
46
+ self.buffer = parts[-1] # Keep the last part
47
+
48
+ def flush_remaining(self) -> Optional[str]:
49
+ """Flush any remaining content"""
50
+ if self.buffer.strip():
51
+ content = self.buffer
52
+ self.buffer = ""
53
+ return content
54
+ return None
55
+
56
+ def is_in_code_block(self, accumulated_text: str) -> tuple[bool, str]:
57
+ """
58
+ Check if the accumulated text ends in the middle of a code block.
59
+
60
+ Returns:
61
+ (is_in_block, block_type): True if in block, with block type
62
+ """
63
+ lines = accumulated_text.split('\n')
64
+ open_blocks = []
65
+
66
+ for line in lines:
67
+ stripped = line.strip()
68
+ if stripped.startswith('```'):
69
+ if stripped == '```':
70
+ # Closing block
71
+ if open_blocks:
72
+ open_blocks.pop()
73
+ else:
74
+ # Opening block
75
+ block_type = stripped[3:].strip() or 'text'
76
+ open_blocks.append(block_type)
77
+
78
+ if open_blocks:
79
+ return True, open_blocks[-1] # Return the most recent open block type
80
+ return False, None
81
+
82
+ def count_code_block_markers(self, text: str) -> int:
83
+ """Count the number of ``` markers in text."""
84
+ return text.count('```')
85
+
86
+ def has_incomplete_code_block(self, text: str) -> bool:
87
+ """Check if text has an odd number of code block markers (incomplete)."""
88
+ return self.count_code_block_markers(text) % 2 == 1
89
+ def optimize_streaming_chunk(content: str, optimizer: StreamingContentOptimizer) -> Generator[dict, None, None]:
90
+ """
91
+ Optimize a streaming content chunk to prevent mid-word splits
92
+ """
93
+ chunks = list(optimizer.add_content(content))
94
+
95
+ for chunk in chunks:
96
+ yield {
97
+ 'type': 'text',
98
+ 'content': chunk,
99
+ 'timestamp': f"{int(time.time() * 1000)}ms"
100
+ }
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ziya
3
- Version: 0.3.0
3
+ Version: 0.3.2
4
4
  Summary:
5
5
  License-File: LICENSE
6
6
  Author: Vishnu Krishnaprasad
@@ -17,6 +17,7 @@ Requires-Dist: PyPDF2 (>=3.0.1,<4.0.0)
17
17
  Requires-Dist: boto3 (>=1.34.88,<2.0.0)
18
18
  Requires-Dist: cryptography (>=3.4.8,<43.0.0)
19
19
  Requires-Dist: cssutils (>=2.6.0)
20
+ Requires-Dist: google-generativeai (>=0.8.5)
20
21
  Requires-Dist: html5lib (>=1.1)
21
22
  Requires-Dist: jinja2 (>=3.1.3,<4.0.0)
22
23
  Requires-Dist: langchain (>=0.3,<0.4)
@@ -24,7 +25,7 @@ Requires-Dist: langchain-anthropic (>=0.2,<0.3)
24
25
  Requires-Dist: langchain-aws (>=0.2,<0.3)
25
26
  Requires-Dist: langchain-cli (>=0.0.15)
26
27
  Requires-Dist: langchain-community (>=0.3.1,<0.4.0)
27
- Requires-Dist: langchain-google-genai (>=2.1.0,<3.0.0)
28
+ Requires-Dist: langchain-google-genai (==2.0.4)
28
29
  Requires-Dist: langchainhub (>=0.1.15)
29
30
  Requires-Dist: langgraph (>=0.2,<0.3)
30
31
  Requires-Dist: numpy (>=1.21.0,<2.0.0)