ziya 0.3.0__py3-none-any.whl → 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ziya might be problematic. Click here for more details.

Files changed (73) hide show
  1. app/agents/agent.py +71 -73
  2. app/agents/direct_streaming.py +1 -1
  3. app/agents/prompts.py +1 -1
  4. app/agents/prompts_manager.py +14 -10
  5. app/agents/wrappers/google_direct.py +31 -1
  6. app/agents/wrappers/nova_tool_execution.py +2 -2
  7. app/agents/wrappers/nova_wrapper.py +1 -1
  8. app/agents/wrappers/ziya_bedrock.py +53 -31
  9. app/config/models_config.py +61 -20
  10. app/config/shell_config.py +5 -1
  11. app/extensions/prompt_extensions/claude_extensions.py +27 -5
  12. app/extensions/prompt_extensions/mcp_prompt_extensions.py +82 -56
  13. app/main.py +5 -3
  14. app/mcp/client.py +19 -10
  15. app/mcp/manager.py +68 -10
  16. app/mcp/tools.py +8 -9
  17. app/mcp_servers/shell_server.py +3 -3
  18. app/middleware/streaming.py +29 -41
  19. app/routes/file_validation.py +35 -0
  20. app/routes/mcp_routes.py +54 -8
  21. app/server.py +525 -614
  22. app/streaming_tool_executor.py +748 -137
  23. app/templates/asset-manifest.json +20 -20
  24. app/templates/index.html +1 -1
  25. app/templates/static/css/{main.0297bfee.css → main.e7109b49.css} +2 -2
  26. app/templates/static/css/main.e7109b49.css.map +1 -0
  27. app/templates/static/js/14386.65fcfe53.chunk.js +2 -0
  28. app/templates/static/js/14386.65fcfe53.chunk.js.map +1 -0
  29. app/templates/static/js/35589.0368973a.chunk.js +2 -0
  30. app/templates/static/js/35589.0368973a.chunk.js.map +1 -0
  31. app/templates/static/js/{50295.ab92f61b.chunk.js → 50295.90aca393.chunk.js} +3 -3
  32. app/templates/static/js/50295.90aca393.chunk.js.map +1 -0
  33. app/templates/static/js/55734.5f0fd567.chunk.js +2 -0
  34. app/templates/static/js/55734.5f0fd567.chunk.js.map +1 -0
  35. app/templates/static/js/58542.57fed736.chunk.js +2 -0
  36. app/templates/static/js/58542.57fed736.chunk.js.map +1 -0
  37. app/templates/static/js/{68418.2554bb1e.chunk.js → 68418.f7b4d2d9.chunk.js} +3 -3
  38. app/templates/static/js/68418.f7b4d2d9.chunk.js.map +1 -0
  39. app/templates/static/js/99948.b280eda0.chunk.js +2 -0
  40. app/templates/static/js/99948.b280eda0.chunk.js.map +1 -0
  41. app/templates/static/js/main.e075582c.js +3 -0
  42. app/templates/static/js/main.e075582c.js.map +1 -0
  43. app/utils/code_util.py +5 -2
  44. app/utils/context_cache.py +11 -0
  45. app/utils/conversation_filter.py +90 -0
  46. app/utils/custom_bedrock.py +43 -1
  47. app/utils/diff_utils/validation/validators.py +32 -22
  48. app/utils/file_cache.py +5 -3
  49. app/utils/precision_prompt_system.py +116 -0
  50. app/utils/streaming_optimizer.py +100 -0
  51. {ziya-0.3.0.dist-info → ziya-0.3.2.dist-info}/METADATA +3 -2
  52. {ziya-0.3.0.dist-info → ziya-0.3.2.dist-info}/RECORD +59 -55
  53. app/templates/static/css/main.0297bfee.css.map +0 -1
  54. app/templates/static/js/14386.567bf803.chunk.js +0 -2
  55. app/templates/static/js/14386.567bf803.chunk.js.map +0 -1
  56. app/templates/static/js/35589.278ecda2.chunk.js +0 -2
  57. app/templates/static/js/35589.278ecda2.chunk.js.map +0 -1
  58. app/templates/static/js/50295.ab92f61b.chunk.js.map +0 -1
  59. app/templates/static/js/55734.90d8bd52.chunk.js +0 -2
  60. app/templates/static/js/55734.90d8bd52.chunk.js.map +0 -1
  61. app/templates/static/js/58542.08fb5cf4.chunk.js +0 -2
  62. app/templates/static/js/58542.08fb5cf4.chunk.js.map +0 -1
  63. app/templates/static/js/68418.2554bb1e.chunk.js.map +0 -1
  64. app/templates/static/js/99948.71670e91.chunk.js +0 -2
  65. app/templates/static/js/99948.71670e91.chunk.js.map +0 -1
  66. app/templates/static/js/main.1d79eac2.js +0 -3
  67. app/templates/static/js/main.1d79eac2.js.map +0 -1
  68. /app/templates/static/js/{50295.ab92f61b.chunk.js.LICENSE.txt → 50295.90aca393.chunk.js.LICENSE.txt} +0 -0
  69. /app/templates/static/js/{68418.2554bb1e.chunk.js.LICENSE.txt → 68418.f7b4d2d9.chunk.js.LICENSE.txt} +0 -0
  70. /app/templates/static/js/{main.1d79eac2.js.LICENSE.txt → main.e075582c.js.LICENSE.txt} +0 -0
  71. {ziya-0.3.0.dist-info → ziya-0.3.2.dist-info}/WHEEL +0 -0
  72. {ziya-0.3.0.dist-info → ziya-0.3.2.dist-info}/entry_points.txt +0 -0
  73. {ziya-0.3.0.dist-info → ziya-0.3.2.dist-info}/licenses/LICENSE +0 -0
@@ -3,15 +3,42 @@ import asyncio
3
3
  import json
4
4
  import boto3
5
5
  import logging
6
+ import os
7
+ import time
6
8
  from typing import Dict, Any, List, AsyncGenerator, Optional
9
+ from app.utils.conversation_filter import filter_conversation_for_model
7
10
 
8
11
  logger = logging.getLogger(__name__)
9
12
 
10
13
  class StreamingToolExecutor:
11
- def __init__(self, profile_name: str = 'ziya', region: str = 'us-west-2'):
12
- session = boto3.Session(profile_name=profile_name)
13
- self.bedrock = session.client('bedrock-runtime', region_name=region)
14
- self.model_id = 'us.anthropic.claude-sonnet-4-20250514-v1:0'
14
+ def __init__(self, profile_name: str = 'ziya', region: str = 'us-west-2', model_id: str = None):
15
+ self.model_id = model_id or os.environ.get('DEFAULT_MODEL_ID', 'us.anthropic.claude-sonnet-4-20250514-v1:0')
16
+
17
+ # Only initialize Bedrock client for Bedrock endpoints
18
+ from app.agents.models import ModelManager
19
+ endpoint = os.environ.get("ZIYA_ENDPOINT", "bedrock")
20
+ model_name = os.environ.get("ZIYA_MODEL")
21
+ self.model_config = ModelManager.get_model_config(endpoint, model_name)
22
+
23
+ if endpoint == "bedrock":
24
+ # Use ModelManager's wrapped bedrock client for proper extended context handling
25
+ try:
26
+ self.bedrock = ModelManager._get_persistent_bedrock_client(
27
+ aws_profile=profile_name,
28
+ region=region,
29
+ model_id=self.model_id,
30
+ model_config=self.model_config
31
+ )
32
+ logger.info(f"🔍 Using ModelManager's wrapped bedrock client with extended context support")
33
+ except Exception as e:
34
+ logger.warning(f"🔍 Could not get wrapped client, falling back to direct client: {e}")
35
+ # Fallback to direct client creation
36
+ session = boto3.Session(profile_name=profile_name)
37
+ self.bedrock = session.client('bedrock-runtime', region_name=region)
38
+ else:
39
+ # Non-Bedrock endpoints don't need a bedrock client
40
+ self.bedrock = None
41
+ logger.info(f"🔍 Skipping Bedrock client initialization for endpoint: {endpoint}")
15
42
 
16
43
  def _convert_tool_schema(self, tool):
17
44
  """Convert tool schema to JSON-serializable format"""
@@ -19,20 +46,19 @@ class StreamingToolExecutor:
19
46
  # Already a dict, but check input_schema
20
47
  result = tool.copy()
21
48
  input_schema = result.get('input_schema')
22
- if hasattr(input_schema, 'model_json_schema'):
49
+ if isinstance(input_schema, dict):
50
+ # Already a dict, use as-is
51
+ pass
52
+ elif hasattr(input_schema, 'model_json_schema'):
23
53
  # Pydantic class - convert to JSON schema
24
54
  result['input_schema'] = input_schema.model_json_schema()
25
- elif hasattr(input_schema, '__dict__') and not isinstance(input_schema, dict):
26
- # Some other class object - try to convert
55
+ elif input_schema is not None:
56
+ # Some other object - try to convert
27
57
  try:
28
58
  result['input_schema'] = input_schema.model_json_schema()
29
59
  except:
30
- # Fallback to basic schema
31
- result['input_schema'] = {
32
- "type": "object",
33
- "properties": {"command": {"type": "string"}},
34
- "required": ["command"]
35
- }
60
+ logger.warning(f"🔍 TOOL_SCHEMA: Could not convert input_schema, using fallback")
61
+ result['input_schema'] = {"type": "object", "properties": {}}
36
62
  return result
37
63
  else:
38
64
  # Tool object - extract properties
@@ -40,25 +66,35 @@ class StreamingToolExecutor:
40
66
  description = getattr(tool, 'description', 'No description')
41
67
  input_schema = getattr(tool, 'input_schema', getattr(tool, 'inputSchema', {}))
42
68
 
43
- # Convert input_schema if it's a Pydantic class
44
- if hasattr(input_schema, 'model_json_schema'):
69
+ logger.info(f"🔍 TOOL_SCHEMA: Converting tool '{name}', input_schema type: {type(input_schema)}")
70
+
71
+ # Handle different input_schema types
72
+ if isinstance(input_schema, dict):
73
+ # Already a dict, use as-is
74
+ logger.info(f"🔍 TOOL_SCHEMA: Tool '{name}' has dict schema with keys: {list(input_schema.keys())}")
75
+ elif hasattr(input_schema, 'model_json_schema'):
76
+ # Pydantic class - convert to JSON schema
45
77
  input_schema = input_schema.model_json_schema()
46
- elif hasattr(input_schema, '__dict__') and not isinstance(input_schema, dict):
47
- # Some other class object
78
+ logger.info(f"🔍 TOOL_SCHEMA: Converted Pydantic schema for '{name}'")
79
+ elif input_schema:
80
+ # Some other object - try to convert
48
81
  try:
49
82
  input_schema = input_schema.model_json_schema()
83
+ logger.info(f"🔍 TOOL_SCHEMA: Converted object schema for '{name}'")
50
84
  except:
51
- input_schema = {
52
- "type": "object",
53
- "properties": {"command": {"type": "string"}},
54
- "required": ["command"]
55
- }
85
+ logger.warning(f"🔍 TOOL_SCHEMA: Failed to convert schema for '{name}', using empty schema")
86
+ input_schema = {"type": "object", "properties": {}}
87
+ else:
88
+ logger.warning(f"🔍 TOOL_SCHEMA: Tool '{name}' has no input_schema, using empty schema")
89
+ input_schema = {"type": "object", "properties": {}}
56
90
 
57
- return {
91
+ result = {
58
92
  'name': name,
59
93
  'description': description,
60
94
  'input_schema': input_schema
61
95
  }
96
+ logger.info(f"🔍 TOOL_SCHEMA: Final schema for '{name}': {json.dumps(result, indent=2)}")
97
+ return result
62
98
 
63
99
  def _commands_similar(self, cmd1: str, cmd2: str) -> bool:
64
100
  """Check if two shell commands are functionally similar"""
@@ -95,7 +131,7 @@ class StreamingToolExecutor:
95
131
  })
96
132
 
97
133
  return {
98
- 'type': 'tool_execution',
134
+ 'type': 'tool_display',
99
135
  'tool_id': f'fake_{len(tool_results)}',
100
136
  'tool_name': tool_name,
101
137
  'result': result_text
@@ -104,7 +140,39 @@ class StreamingToolExecutor:
104
140
  logger.error(f"Error executing intercepted tool call: {e}")
105
141
  return None
106
142
 
107
- async def stream_with_tools(self, messages: List[Dict[str, Any]], tools: Optional[List] = None) -> AsyncGenerator[Dict[str, Any], None]:
143
+ async def stream_with_tools(self, messages: List[Dict[str, Any]], tools: Optional[List] = None, conversation_id: Optional[str] = None) -> AsyncGenerator[Dict[str, Any], None]:
144
+ # Initialize streaming metrics
145
+ stream_metrics = {
146
+ 'events_sent': 0,
147
+ 'bytes_sent': 0,
148
+ 'chunk_sizes': [],
149
+ 'start_time': time.time()
150
+ }
151
+
152
+ def track_yield(event_data):
153
+ """Track metrics for yielded events"""
154
+ chunk_size = len(json.dumps(event_data))
155
+ stream_metrics['events_sent'] += 1
156
+ stream_metrics['bytes_sent'] += chunk_size
157
+ stream_metrics['chunk_sizes'].append(chunk_size)
158
+
159
+ if stream_metrics['events_sent'] % 100 == 0:
160
+ logger.info(f"📊 Stream metrics: {stream_metrics['events_sent']} events, "
161
+ f"{stream_metrics['bytes_sent']} bytes, "
162
+ f"avg={stream_metrics['bytes_sent']/stream_metrics['events_sent']:.2f}")
163
+ return event_data
164
+
165
+ # Extended context handling for sonnet4.5
166
+ if conversation_id:
167
+ logger.info(f"🔍 EXTENDED_CONTEXT: Processing conversation_id = {conversation_id}")
168
+ # Set conversation_id in custom_bedrock module global so CustomBedrockClient can use it
169
+ try:
170
+ import app.utils.custom_bedrock as custom_bedrock_module
171
+ custom_bedrock_module._current_conversation_id = conversation_id
172
+ logger.info(f"🔍 EXTENDED_CONTEXT: Set module global conversation_id")
173
+ except Exception as e:
174
+ logger.warning(f"🔍 EXTENDED_CONTEXT: Could not set conversation_id: {e}")
175
+
108
176
  # Get MCP tools
109
177
  from app.mcp.manager import get_mcp_manager
110
178
  mcp_manager = get_mcp_manager()
@@ -130,20 +198,51 @@ class StreamingToolExecutor:
130
198
  conversation = []
131
199
  system_content = None
132
200
 
133
- for msg in messages:
134
- role = msg.get('role', '')
135
- content = msg.get('content', '')
201
+ logger.info(f"🔍 STREAMING_TOOL_EXECUTOR: Received {len(messages)} messages")
202
+ for i, msg in enumerate(messages):
203
+ # Handle both dict format and LangChain message objects
204
+ if hasattr(msg, 'type') and hasattr(msg, 'content'):
205
+ # LangChain message object
206
+ role = msg.type if msg.type != 'human' else 'user'
207
+ content = msg.content
208
+ elif isinstance(msg, str):
209
+ # String format - treat as user message
210
+ role = 'user'
211
+ content = msg
212
+ else:
213
+ # Dict format
214
+ role = msg.get('role', '')
215
+ content = msg.get('content', '')
216
+
217
+ logger.info(f"🔍 STREAMING_TOOL_EXECUTOR: Message {i}: role={role}, content_length={len(content)}")
136
218
  if role == 'system':
137
219
  system_content = content
138
- elif role in ['user', 'assistant']:
139
- conversation.append({"role": role, "content": content})
220
+ logger.info(f"🔍 STREAMING_TOOL_EXECUTOR: Found system message with {len(content)} characters")
221
+ elif role in ['user', 'assistant', 'ai']:
222
+ # Normalize ai role to assistant for Bedrock
223
+ bedrock_role = 'assistant' if role == 'ai' else role
224
+ conversation.append({"role": bedrock_role, "content": content})
140
225
 
141
226
  # Iterative execution with proper tool result handling
142
227
  recent_commands = [] # Track recent commands to prevent duplicates
228
+ using_extended_context = False # Track if we've enabled extended context
229
+ consecutive_empty_tool_calls = 0 # Track empty tool calls to break loops
143
230
 
144
231
  for iteration in range(50): # Increased from 20 to support more complex tasks
145
232
  logger.info(f"🔍 ITERATION_START: Beginning iteration {iteration}")
233
+
234
+ # Log last 2 messages to debug conversation state
235
+ if len(conversation) >= 2:
236
+ for i, msg in enumerate(conversation[-2:]):
237
+ role = msg.get('role', msg.get('type', 'unknown'))
238
+ content = msg.get('content', '')
239
+ content_preview = str(content)[:150] if content else 'empty'
240
+ logger.info(f"🔍 CONV_DEBUG: Message -{2-i}: role={role}, content_preview={content_preview}")
241
+
146
242
  tools_executed_this_iteration = False # Track if tools were executed in this iteration
243
+ blocked_tools_this_iteration = 0 # Track blocked tools to prevent runaway loops
244
+ commands_this_iteration = [] # Track commands executed in this specific iteration
245
+ empty_tool_calls_this_iteration = 0 # Track empty tool calls in this iteration
147
246
 
148
247
  body = {
149
248
  "anthropic_version": "bedrock-2023-05-31",
@@ -152,46 +251,90 @@ class StreamingToolExecutor:
152
251
  }
153
252
 
154
253
  if system_content:
155
- # Remove ALL tool instructions to prevent confusion
156
- import re
157
- system_content = re.sub(r'## MCP Tool Usage.*?(?=##|$)', '', system_content, flags=re.DOTALL)
158
- system_content = re.sub(r'<TOOL_SENTINEL>.*?</TOOL_SENTINEL>', '', system_content, flags=re.DOTALL)
159
- system_content = re.sub(r'<invoke.*?</invoke>', '', system_content, flags=re.DOTALL)
160
- system_content = re.sub(r'<tool_input.*?</tool_input>', '', system_content, flags=re.DOTALL)
161
- system_content = re.sub(r'<mcp_tool.*?</mcp_tool>', '', system_content, flags=re.DOTALL)
162
- # Remove any remaining XML-like tool patterns
163
- system_content = re.sub(r'<[^>]*tool[^>]*>.*?</[^>]*>', '', system_content, flags=re.DOTALL | re.IGNORECASE)
164
- # Remove markdown tool patterns that cause hallucinations
165
- system_content = re.sub(r'```tool:.*?```', '', system_content, flags=re.DOTALL)
166
- system_content = re.sub(r'```.*?mcp_.*?```', '', system_content, flags=re.DOTALL)
167
- body["system"] = system_content + "\n\nCRITICAL: Use ONLY native tool calling. Never generate markdown like ```tool:mcp_run_shell_command or ```bash. Use the provided tools directly."
254
+ # With precision prompts, system content is already clean - no regex needed
255
+ logger.info(f"🔍 SYSTEM_DEBUG: Using clean system content length: {len(system_content)}")
256
+ logger.info(f"🔍 SYSTEM_DEBUG: File count in system content: {system_content.count('File:')}")
257
+
258
+ system_text = system_content + "\n\nCRITICAL: Use ONLY native tool calling. Never generate markdown like ```tool:mcp_run_shell_command or ```bash. Use the provided tools directly.\n\nIMPORTANT: Only use tools when you need to interact with the system (run commands, check time, etc). If you can answer from the provided context or your reasoning, do so directly without using tools. Don't use echo commands just to show your thinking - just answer directly."
259
+
260
+ # Use prompt caching for large system prompts to speed up iterations
261
+ if len(system_text) > 1024:
262
+ body["system"] = [
263
+ {
264
+ "type": "text",
265
+ "text": system_text,
266
+ "cache_control": {"type": "ephemeral"}
267
+ }
268
+ ]
269
+ logger.info(f"🔍 CACHE: Enabled prompt caching for {len(system_text)} char system prompt")
270
+ else:
271
+ body["system"] = system_text
272
+
273
+ logger.info(f"🔍 SYSTEM_DEBUG: Final system prompt length: {len(system_text)}")
274
+ logger.info(f"🔍 SYSTEM_CONTENT_DEBUG: First 500 chars of system prompt: {system_text[:500]}")
275
+ logger.info(f"🔍 SYSTEM_CONTENT_DEBUG: System prompt contains 'File:' count: {system_text.count('File:')}")
276
+ logger.info(f"🔍 SYSTEM_CONTENT_DEBUG: Last 500 chars of system prompt: {system_text[-500:]}")
277
+
278
+ # If we've already enabled extended context, keep using it
279
+ if using_extended_context and self.model_config:
280
+ header_value = self.model_config.get('extended_context_header')
281
+ if header_value:
282
+ body['anthropic_beta'] = [header_value]
283
+ logger.info(f"🔍 EXTENDED_CONTEXT: Continuing with extended context header")
168
284
 
169
285
  if bedrock_tools:
170
- body["tools"] = bedrock_tools
171
- # Use "auto" to allow model to decide when to stop
172
- body["tool_choice"] = {"type": "auto"}
286
+ # Don't send tools if we've had too many consecutive empty calls
287
+ if consecutive_empty_tool_calls >= 5:
288
+ logger.warning(f"🔍 TOOL_SUPPRESSION: Suppressing tools due to {consecutive_empty_tool_calls} consecutive empty calls")
289
+ # Don't add tools to body - force model to respond without them
290
+ else:
291
+ body["tools"] = bedrock_tools
292
+ # Use "auto" to allow model to decide when to stop
293
+ body["tool_choice"] = {"type": "auto"}
294
+ logger.info(f"🔍 TOOL_DEBUG: Sending {len(bedrock_tools)} tools to model: {[t['name'] for t in bedrock_tools]}")
173
295
 
174
296
  try:
175
297
  # Exponential backoff for rate limiting
176
298
  max_retries = 4
177
299
  base_delay = 2 # Start with 2 seconds
300
+ iteration_start_time = time.time()
178
301
 
179
302
  for retry_attempt in range(max_retries + 1):
180
303
  try:
181
- response = self.bedrock.invoke_model_with_response_stream(
182
- modelId=self.model_id,
183
- body=json.dumps(body)
184
- )
304
+ api_params = {
305
+ 'modelId': self.model_id,
306
+ 'body': json.dumps(body)
307
+ }
308
+
309
+ response = self.bedrock.invoke_model_with_response_stream(**api_params)
185
310
  break # Success, exit retry loop
186
311
  except Exception as e:
187
312
  error_str = str(e)
188
313
  is_rate_limit = ("Too many tokens" in error_str or
189
314
  "ThrottlingException" in error_str or
190
315
  "Too many requests" in error_str)
316
+ is_context_limit = "Input is too long" in error_str or "too large" in error_str
317
+
318
+ # On context limit error, enable extended context and retry
319
+ if is_context_limit and not using_extended_context and self.model_config:
320
+ if self.model_config.get('supports_extended_context'):
321
+ header_value = self.model_config.get('extended_context_header')
322
+ if header_value:
323
+ logger.info(f"🔍 EXTENDED_CONTEXT: Context limit hit, enabling extended context with header {header_value}")
324
+ body['anthropic_beta'] = [header_value]
325
+ api_params['body'] = json.dumps(body)
326
+ using_extended_context = True # Set flag to keep using it
327
+ try:
328
+ response = self.bedrock.invoke_model_with_response_stream(**api_params)
329
+ break
330
+ except Exception as retry_error:
331
+ logger.error(f"🔍 EXTENDED_CONTEXT: Retry with extended context failed: {retry_error}")
332
+ raise
191
333
 
192
334
  if is_rate_limit and retry_attempt < max_retries:
193
- # Exponential backoff: 2s, 4s, 8s, 16s, 32s (max >20s)
194
- delay = base_delay * (2 ** retry_attempt)
335
+ # Exponential backoff with longer delays to allow token bucket refill
336
+ # boto3 already did fast retries, so we need longer waits
337
+ delay = base_delay * (2 ** retry_attempt) + 4 # Add 4s base to account for boto3 retries
195
338
  logger.warning(f"Rate limit hit, retrying in {delay}s (attempt {retry_attempt + 1}/{max_retries + 1})")
196
339
  await asyncio.sleep(delay)
197
340
  else:
@@ -200,6 +343,7 @@ class StreamingToolExecutor:
200
343
  # Process this iteration's stream - collect ALL tool calls first
201
344
  assistant_text = ""
202
345
  tool_results = []
346
+ tool_use_blocks = [] # Store actual tool_use blocks from Bedrock
203
347
  yielded_text_length = 0 # Track how much text we've yielded
204
348
  all_tool_calls = [] # Collect all tool calls from this response
205
349
 
@@ -209,17 +353,34 @@ class StreamingToolExecutor:
209
353
  skipped_tools = set() # Track tools we're skipping due to limits
210
354
  executed_tool_signatures = set() # Track tool name + args to prevent duplicates
211
355
 
212
- # Timeout protection
213
- import time
214
- start_time = time.time()
215
- chunk_timeout = 30
356
+ # Timeout protection - use configured timeout from shell config
357
+ last_activity_time = time.time()
358
+ from app.config.shell_config import DEFAULT_SHELL_CONFIG
359
+ chunk_timeout = int(os.environ.get('COMMAND_TIMEOUT', DEFAULT_SHELL_CONFIG["timeout"]))
216
360
 
361
+ # Initialize content buffer and visualization detector
362
+ content_buffer = ""
363
+ viz_buffer = "" # Track potential visualization blocks
364
+ in_viz_block = False
365
+
366
+ # Code block continuation tracking
367
+ code_block_tracker = {
368
+ 'in_block': False,
369
+ 'block_type': None,
370
+ 'accumulated_content': ''
371
+ }
372
+
217
373
  for event in response['body']:
218
- # Timeout protection
219
- if time.time() - start_time > chunk_timeout:
220
- logger.warning(f"🚨 STREAM TIMEOUT after {chunk_timeout}s")
221
- yield {'type': 'stream_end'}
222
- break
374
+ # Timeout protection - only timeout if NO activity for chunk_timeout seconds
375
+ if time.time() - last_activity_time > chunk_timeout:
376
+ logger.warning(f"🚨 STREAM TIMEOUT after {chunk_timeout}s of inactivity - ending this iteration")
377
+ # Add timeout message to assistant text so model knows what happened
378
+ if not assistant_text.strip():
379
+ assistant_text = f"[Stream timeout after {chunk_timeout}s - no response received]"
380
+ break # Break from chunk loop, but continue to next iteration
381
+
382
+ # Reset activity timer on any event
383
+ last_activity_time = time.time()
223
384
 
224
385
  chunk = json.loads(event['chunk']['bytes'])
225
386
 
@@ -227,9 +388,39 @@ class StreamingToolExecutor:
227
388
  content_block = chunk.get('content_block', {})
228
389
  logger.info(f"🔍 CHUNK_DEBUG: content_block_start - type: {content_block.get('type')}, id: {content_block.get('id')}")
229
390
  if content_block.get('type') == 'tool_use':
391
+ # FLUSH any buffered content before tool starts
392
+ if hasattr(self, '_content_optimizer'):
393
+ remaining = self._content_optimizer.flush_remaining()
394
+ if remaining:
395
+ yield track_yield({
396
+ 'type': 'text',
397
+ 'content': remaining,
398
+ 'timestamp': f"{int((time.time() - iteration_start_time) * 1000)}ms"
399
+ })
400
+ if content_buffer.strip():
401
+ yield track_yield({
402
+ 'type': 'text',
403
+ 'content': content_buffer,
404
+ 'timestamp': f"{int((time.time() - iteration_start_time) * 1000)}ms"
405
+ })
406
+ content_buffer = ""
407
+
230
408
  tool_id = content_block.get('id')
231
409
  tool_name = content_block.get('name')
232
410
  if tool_id and tool_name:
411
+ # Check for duplicates FIRST
412
+ tool_signature = f"{tool_name}_{tool_id}"
413
+ if tool_signature in executed_tool_signatures:
414
+ logger.info(f"🔍 DUPLICATE_SKIP: Tool {tool_signature} already executed")
415
+ skipped_tools.add(chunk.get('index'))
416
+ continue
417
+
418
+ # Send tool_start event to frontend only (not to model)
419
+ # This prevents contamination of model training data
420
+
421
+ # Mark as executed to prevent duplicates
422
+ executed_tool_signatures.add(tool_signature)
423
+
233
424
  # Collect tool call instead of executing immediately
234
425
  all_tool_calls.append({
235
426
  'id': tool_id,
@@ -257,14 +448,16 @@ class StreamingToolExecutor:
257
448
  assistant_text += text
258
449
 
259
450
  # Check for fake tool calls in the text and intercept them
260
- if ('```tool:' in assistant_text or 'run_shell_command\n$' in assistant_text or
261
- ':mcp_run_shell_command\n$' in assistant_text):
451
+ # DISABLED: This was causing premature execution of incomplete commands
452
+ if False and (('```tool:' in assistant_text and '```' in assistant_text[assistant_text.find('```tool:') + 8:]) or \
453
+ ('run_shell_command\n$' in assistant_text and '\n' in assistant_text[assistant_text.find('run_shell_command\n$') + 20:]) or \
454
+ (':mcp_run_shell_command\n$' in assistant_text and '\n' in assistant_text[assistant_text.find(':mcp_run_shell_command\n$') + 23:])):
262
455
  # Extract and execute fake tool calls with multiple patterns
263
456
  import re
264
457
  patterns = [
265
- r'```tool:(mcp_\w+)\n\$\s*([^`]+)```', # Full markdown blocks
266
- r'run_shell_command\n\$\s*([^\n]+)', # Partial patterns
267
- r':mcp_run_shell_command\n\$\s*([^\n]+)' # Alternative patterns
458
+ r'```tool:(mcp_\w+)\n\$\s*([^`]+)```', # Full markdown blocks only
459
+ r'run_shell_command\n\$\s*([^\n]+)\n', # Complete lines only
460
+ r':mcp_run_shell_command\n\$\s*([^\n]+)\n' # Complete lines only
268
461
  ]
269
462
 
270
463
  for pattern in patterns:
@@ -280,11 +473,72 @@ class StreamingToolExecutor:
280
473
  result = await self._execute_fake_tool('mcp_run_shell_command', command, assistant_text, tool_results, mcp_manager)
281
474
  if result:
282
475
  yield result
476
+
477
+ for pattern in patterns:
478
+ if re.search(pattern, text):
479
+ logger.warning(f"🚫 Intercepted fake tool call: {pattern}")
480
+ if 'tool:' in text:
481
+ # Skip fake tool patterns
482
+ continue
283
483
 
284
- # Only yield text if it doesn't contain fake tool calls
285
- if not ('```tool:' in text):
286
- yield {'type': 'text', 'content': text}
287
- yielded_text_length += len(text) # Track yielded text
484
+ # Initialize content optimizer if not exists
485
+ if not hasattr(self, '_content_optimizer'):
486
+ from app.utils.streaming_optimizer import StreamingContentOptimizer
487
+ self._content_optimizer = StreamingContentOptimizer()
488
+
489
+ # Skip fake tool patterns
490
+ if 'tool:' in text:
491
+ continue
492
+
493
+ # Check for visualization block boundaries - ensure proper markdown format
494
+ viz_patterns = ['```vega-lite', '```mermaid', '```graphviz', '```d3']
495
+ if any(pattern in text for pattern in viz_patterns):
496
+ in_viz_block = True
497
+ viz_buffer = text
498
+ continue
499
+ elif in_viz_block:
500
+ viz_buffer += text
501
+ # Check for closing ``` - ensure complete block
502
+ if '```' in text and viz_buffer.count('```') >= 2:
503
+ # Complete visualization block - ensure it ends with newline for proper markdown
504
+ if not viz_buffer.endswith('\n'):
505
+ viz_buffer += '\n'
506
+
507
+ # Flush any pending content first
508
+ if hasattr(self, '_content_optimizer'):
509
+ remaining = self._content_optimizer.flush_remaining()
510
+ if remaining:
511
+ yield track_yield({
512
+ 'type': 'text',
513
+ 'content': remaining,
514
+ 'timestamp': f"{int((time.time() - iteration_start_time) * 1000)}ms"
515
+ })
516
+ if content_buffer.strip():
517
+ yield track_yield({
518
+ 'type': 'text',
519
+ 'content': content_buffer,
520
+ 'timestamp': f"{int((time.time() - iteration_start_time) * 1000)}ms"
521
+ })
522
+ content_buffer = ""
523
+
524
+ # Send complete visualization block
525
+ yield track_yield({
526
+ 'type': 'text',
527
+ 'content': viz_buffer,
528
+ 'timestamp': f"{int((time.time() - iteration_start_time) * 1000)}ms"
529
+ })
530
+ viz_buffer = ""
531
+ in_viz_block = False
532
+ continue
533
+
534
+ # Use content optimizer to prevent mid-word splits
535
+ for optimized_chunk in self._content_optimizer.add_content(text):
536
+ self._update_code_block_tracker(optimized_chunk, code_block_tracker)
537
+ yield track_yield({
538
+ 'type': 'text',
539
+ 'content': optimized_chunk,
540
+ 'timestamp': f"{int((time.time() - iteration_start_time) * 1000)}ms"
541
+ })
288
542
  elif delta.get('type') == 'input_json_delta':
289
543
  # Find tool by index
290
544
  tool_id = None
@@ -307,10 +561,34 @@ class StreamingToolExecutor:
307
561
  tool_data = active_tools[tool_id]
308
562
  tool_name = tool_data['name']
309
563
  args_json = tool_data['partial_json']
564
+
565
+ logger.info(f"🔍 TOOL_ARGS: Tool '{tool_name}' (id: {tool_id}) has args_json: '{args_json}'")
310
566
 
311
567
  try:
312
568
  args = json.loads(args_json) if args_json.strip() else {}
313
569
 
570
+ # Detect empty tool calls for tools that require arguments
571
+ actual_tool_name = tool_name[4:] if tool_name.startswith('mcp_') else tool_name
572
+ if actual_tool_name == 'run_shell_command' and not args.get('command'):
573
+ logger.warning(f"🔍 EMPTY_TOOL_CALL: Model called {tool_name} without required 'command' argument")
574
+ logger.warning(f"🔍 EMPTY_TOOL_CONTEXT: Assistant text before call: '{assistant_text[-200:]}'")
575
+ empty_tool_calls_this_iteration += 1
576
+ consecutive_empty_tool_calls += 1
577
+
578
+ # Return helpful error immediately without executing
579
+ error_result = f"Error: Tool call failed - the 'command' parameter is required but was not provided. You must call run_shell_command with a JSON object containing the command string. Example: {{\"command\": \"ls -la\"}}. Please retry with the correct format."
580
+
581
+ tool_results.append({
582
+ 'tool_id': tool_id,
583
+ 'tool_name': tool_name,
584
+ 'result': error_result
585
+ })
586
+
587
+ completed_tools.add(tool_id)
588
+ tools_executed_this_iteration = True
589
+ logger.info(f"🔍 TOOL_EXECUTED_FLAG: Set tools_executed_this_iteration = True for tool {tool_id}")
590
+ continue
591
+
314
592
  # Update the corresponding entry in all_tool_calls with parsed arguments
315
593
  for tool_call in all_tool_calls:
316
594
  if tool_call['id'] == tool_id:
@@ -326,80 +604,202 @@ class StreamingToolExecutor:
326
604
  # Create signature to detect duplicates
327
605
  tool_signature = f"{actual_tool_name}:{json.dumps(args, sort_keys=True)}"
328
606
 
329
- # Skip if we've already executed this exact tool call
330
- if tool_signature in executed_tool_signatures:
331
- logger.info(f"🔍 DUPLICATE_TOOL_SKIP: Skipping duplicate {actual_tool_name} with args {args}")
332
- completed_tools.add(tool_id)
333
- continue
607
+ # Execute the tool (already checked for duplicates at collection)
608
+ logger.info(f"🔍 EXECUTING_TOOL: {actual_tool_name} with args {args}")
609
+
610
+ # Send tool_start event with complete arguments
611
+ yield {
612
+ 'type': 'tool_start',
613
+ 'tool_id': tool_id,
614
+ 'tool_name': tool_name,
615
+ 'args': args,
616
+ 'timestamp': f"{int((time.time() - iteration_start_time) * 1000)}ms"
617
+ }
334
618
 
335
- # For shell commands, check for similar recent commands
336
- if actual_tool_name == 'run_shell_command':
337
- command = args.get('command', '')
338
- # Normalize command for similarity check
339
- normalized_cmd = ' '.join(command.split())
619
+ # Execute the tool immediately
620
+ try:
621
+ result = await mcp_manager.call_tool(actual_tool_name, args)
340
622
 
341
- # Check if a very similar command was run recently
342
- for recent_cmd in recent_commands:
343
- if self._commands_similar(normalized_cmd, recent_cmd):
344
- logger.info(f"🔍 SIMILAR_COMMAND_SKIP: Skipping similar command: {command}")
345
- completed_tools.add(tool_id)
346
- # Skip this tool entirely - don't execute it
347
- break
348
- else:
349
- # Only execute if we didn't break (no similar command found)
350
- recent_commands.append(normalized_cmd)
351
- # Keep only last 5 commands to prevent memory bloat
352
- if len(recent_commands) > 5:
353
- recent_commands.pop(0)
354
-
355
- executed_tool_signatures.add(tool_signature)
356
-
357
- # Execute the tool only if it's not similar to recent commands
358
- result = await mcp_manager.call_tool(actual_tool_name, args)
359
-
360
- if isinstance(result, dict) and 'content' in result:
361
- content = result['content']
362
- if isinstance(content, list) and len(content) > 0:
363
- result_text = content[0].get('text', str(result))
364
- else:
365
- result_text = str(result)
623
+ # Process result
624
+ if isinstance(result, dict) and result.get('error') and result.get('error') != False:
625
+ error_msg = result.get('message', 'Unknown error')
626
+ result_text = f"ERROR: {error_msg}. Please try a different approach or fix the command."
627
+ elif isinstance(result, dict) and 'content' in result:
628
+ content = result['content']
629
+ if isinstance(content, list) and len(content) > 0:
630
+ result_text = content[0].get('text', str(result))
366
631
  else:
367
632
  result_text = str(result)
633
+ else:
634
+ result_text = str(result)
368
635
 
369
- tool_results.append({
370
- 'tool_id': tool_id,
371
- 'tool_name': tool_name,
372
- 'result': result_text
373
- })
636
+ tool_results.append({
637
+ 'tool_id': tool_id,
638
+ 'tool_name': tool_name,
639
+ 'result': result_text
640
+ })
374
641
 
375
- yield {
376
- 'type': 'tool_execution',
377
- 'tool_id': tool_id,
378
- 'tool_name': tool_name,
379
- 'result': result_text
380
- }
381
-
382
- tools_executed_this_iteration = True # Mark that tools were executed
642
+ yield {
643
+ 'type': 'tool_display',
644
+ 'tool_id': tool_id,
645
+ 'tool_name': tool_name,
646
+ 'result': result_text,
647
+ 'timestamp': f"{int((time.time() - iteration_start_time) * 1000)}ms"
648
+ }
649
+
650
+ # Add clean tool result for model conversation
651
+ yield {
652
+ 'type': 'tool_result_for_model',
653
+ 'tool_use_id': tool_id,
654
+ 'content': result_text.strip()
655
+ }
656
+
657
+ # Immediate flush to reduce delay
658
+ await asyncio.sleep(0)
659
+
660
+ tools_executed_this_iteration = True
661
+ logger.info(f"🔍 TOOL_EXECUTED_FLAG: Set tools_executed_this_iteration = True for tool {tool_id}")
662
+
663
+ except Exception as e:
664
+ error_msg = f"Tool error: {str(e)}"
665
+ logger.error(f"🔍 TOOL_EXECUTION_ERROR: {error_msg}")
666
+ tool_results.append({
667
+ 'tool_id': tool_id,
668
+ 'tool_name': tool_name,
669
+ 'result': f"ERROR: {error_msg}. Please try a different approach or fix the command."
670
+ })
671
+
672
+ # Frontend error display
673
+ yield {'type': 'tool_display', 'tool_name': tool_name, 'result': f"ERROR: {error_msg}"}
674
+
675
+ # Clean error for model
676
+ yield {
677
+ 'type': 'tool_result_for_model',
678
+ 'tool_use_id': tool_id,
679
+ 'content': f"ERROR: {error_msg}. Please try a different approach or fix the command."
680
+ }
383
681
  completed_tools.add(tool_id)
384
682
 
385
683
  except Exception as e:
386
684
  error_msg = f"Tool error: {str(e)}"
387
- yield {'type': 'error', 'content': error_msg}
685
+
686
+ # Add error to tool_results so it gets fed back to the model
687
+ tool_results.append({
688
+ 'tool_id': tool_id,
689
+ 'tool_name': tool_name,
690
+ 'result': f"ERROR: {error_msg}. Please try a different approach or fix the command."
691
+ })
692
+
693
+ # Frontend error display
694
+ yield {'type': 'tool_display', 'tool_name': 'unknown', 'result': f"ERROR: {error_msg}"}
695
+
696
+ # Clean error for model
697
+ yield {
698
+ 'type': 'tool_result_for_model',
699
+ 'tool_use_id': tool_id or 'unknown',
700
+ 'content': f"ERROR: {error_msg}. Please try a different approach or fix the command."
701
+ }
388
702
 
389
703
  elif chunk['type'] == 'message_stop':
704
+ # Flush any remaining content from buffers before stopping
705
+ if viz_buffer.strip():
706
+ self._update_code_block_tracker(viz_buffer, code_block_tracker)
707
+ yield track_yield({
708
+ 'type': 'text',
709
+ 'content': viz_buffer,
710
+ 'timestamp': f"{int((time.time() - iteration_start_time) * 1000)}ms"
711
+ })
712
+ # Flush any remaining content from optimizer
713
+ if hasattr(self, '_content_optimizer'):
714
+ remaining = self._content_optimizer.flush_remaining()
715
+ if remaining:
716
+ self._update_code_block_tracker(remaining, code_block_tracker)
717
+ yield track_yield({
718
+ 'type': 'text',
719
+ 'content': remaining,
720
+ 'timestamp': f"{int((time.time() - iteration_start_time) * 1000)}ms"
721
+ })
722
+ if content_buffer.strip():
723
+ self._update_code_block_tracker(content_buffer, code_block_tracker)
724
+ yield track_yield({
725
+ 'type': 'text',
726
+ 'content': content_buffer,
727
+ 'timestamp': f"{int((time.time() - iteration_start_time) * 1000)}ms"
728
+ })
729
+
730
+ # Check if we ended mid-code-block and auto-continue
731
+ continuation_count = 0
732
+ max_continuations = 10 # Increased for large diagrams/code blocks
733
+
734
+ # Log tracker state before checking
735
+ backtick_count = assistant_text.count('```')
736
+ logger.info(f"🔍 TRACKER_STATE: in_block={code_block_tracker['in_block']}, block_type={code_block_tracker.get('block_type')}, backtick_count={backtick_count}, last_50_chars='{assistant_text[-50:]}'")
737
+
738
+ while code_block_tracker['in_block'] and continuation_count < max_continuations:
739
+ continuation_count += 1
740
+ logger.info(f"🔄 INCOMPLETE_BLOCK: Detected incomplete {code_block_tracker['block_type']} block, auto-continuing (attempt {continuation_count})")
741
+
742
+ # Send heartbeat before continuation to keep connection alive
743
+ yield {
744
+ 'type': 'heartbeat',
745
+ 'heartbeat': True,
746
+ 'timestamp': f"{int((time.time() - iteration_start_time) * 1000)}ms"
747
+ }
748
+
749
+ continuation_had_content = False
750
+ async for continuation_chunk in self._continue_incomplete_code_block(
751
+ conversation, code_block_tracker, mcp_manager, iteration_start_time, assistant_text
752
+ ):
753
+ if continuation_chunk.get('content'):
754
+ continuation_had_content = True
755
+ self._update_code_block_tracker(continuation_chunk['content'], code_block_tracker)
756
+ assistant_text += continuation_chunk['content']
757
+
758
+ if code_block_tracker['in_block']:
759
+ continuation_chunk['code_block_continuation'] = True
760
+ continuation_chunk['block_type'] = code_block_tracker['block_type']
761
+
762
+ yield continuation_chunk
763
+
764
+ if not continuation_had_content:
765
+ logger.info("🔄 CONTINUATION: No content generated, stopping continuation attempts")
766
+ break
767
+
768
+ # Log tracker state after continuation
769
+ logger.info(f"🔄 CONTINUATION_RESULT: After attempt {continuation_count}, in_block={code_block_tracker['in_block']}, had_content={continuation_had_content}")
770
+
390
771
  # Just break out of chunk processing, handle completion logic below
391
772
  break
392
773
 
393
- # Add assistant response to conversation
394
- if assistant_text.strip():
395
- conversation.append({"role": "assistant", "content": assistant_text})
396
-
397
- # Skip duplicate execution - tools are already executed in content_block_stop
398
- # This section was causing duplicate tool execution
399
-
400
- # Add tool results and continue iteration
401
- if tool_results:
402
- logger.info(f"🔍 TOOL_RESULTS_PROCESSING: Adding {len(tool_results)} tool results to conversation for iteration {iteration + 1}")
774
+ # Add assistant response to conversation with proper tool_use blocks
775
+ if assistant_text.strip() or tools_executed_this_iteration:
776
+ # Build content as list with text and tool_use blocks
777
+ content_blocks = []
778
+ if assistant_text.strip():
779
+ content_blocks.append({"type": "text", "text": assistant_text})
780
+
781
+ # Add tool_use blocks for each tool that was executed with actual args
782
+ for tool_result in tool_results:
783
+ # Find the corresponding tool call to get the actual args
784
+ tool_args = {}
785
+ for tool_call in all_tool_calls:
786
+ if tool_call['id'] == tool_result['tool_id']:
787
+ tool_args = tool_call.get('args', {})
788
+ break
789
+
790
+ content_blocks.append({
791
+ "type": "tool_use",
792
+ "id": tool_result['tool_id'],
793
+ "name": tool_result['tool_name'],
794
+ "input": tool_args
795
+ })
796
+
797
+ conversation.append({"role": "assistant", "content": content_blocks})
798
+
799
+ # Add tool results to conversation BEFORE filtering
800
+ logger.info(f"🔍 ITERATION_END_CHECK: tools_executed_this_iteration = {tools_executed_this_iteration}, tool_results count = {len(tool_results)}")
801
+ if tools_executed_this_iteration:
802
+ logger.info(f"🔍 TOOL_RESULTS_PROCESSING: Adding {len(tool_results)} tool results to conversation")
403
803
  for tool_result in tool_results:
404
804
  raw_result = tool_result['result']
405
805
  if isinstance(raw_result, str) and '$ ' in raw_result:
@@ -407,20 +807,80 @@ class StreamingToolExecutor:
407
807
  clean_lines = [line for line in lines if not line.startswith('$ ')]
408
808
  raw_result = '\n'.join(clean_lines).strip()
409
809
 
810
+ # Add in tool_result_for_model format so filter can convert to proper Bedrock format
410
811
  conversation.append({
411
- "role": "user",
412
- "content": f"Tool execution completed. Result: {raw_result}"
812
+ 'type': 'tool_result_for_model',
813
+ 'tool_use_id': tool_result['tool_id'],
814
+ 'content': raw_result
413
815
  })
816
+
817
+ # Filter conversation to convert tool results to proper format
818
+ original_length = len(conversation)
819
+ conversation = filter_conversation_for_model(conversation)
820
+ logger.info(f"🤖 MODEL_RESPONSE: {assistant_text}")
821
+ logger.info(f"Filtered conversation: {original_length} -> {len(conversation)} messages")
822
+
823
+ # Skip duplicate execution - tools are already executed in content_block_stop
824
+ # This section was causing duplicate tool execution
825
+
826
+ # Continue to next iteration if tools were executed
827
+ if tools_executed_this_iteration:
828
+ # Warn about consecutive empty tool calls but don't break
829
+ if consecutive_empty_tool_calls >= 5:
830
+ logger.warning(f"🔍 EMPTY_TOOL_WARNING: {consecutive_empty_tool_calls} consecutive empty tool calls detected")
831
+ # Add a message to guide the model to respond without tools
832
+ conversation.append({
833
+ "role": "user",
834
+ "content": "Please provide your response based on the information available. Do not attempt to use tools."
835
+ })
836
+ elif consecutive_empty_tool_calls >= 3:
837
+ logger.warning(f"🔍 EMPTY_TOOL_WARNING: {consecutive_empty_tool_calls} consecutive empty tool calls detected, adding delay")
838
+ # Add a small delay to slow down the loop
839
+ await asyncio.sleep(0.5)
414
840
 
415
- logger.info(f"🔍 CONTINUING_ITERATION: Moving to iteration {iteration + 1} to let model respond to tool results")
416
- continue
841
+ # Reset consecutive counter if we had successful tool calls
842
+ if empty_tool_calls_this_iteration == 0:
843
+ consecutive_empty_tool_calls = 0
844
+
845
+ logger.info(f"🔍 CONTINUING_ROUND: Tool results added, model will continue in same stream (round {iteration + 1})")
846
+ # Yield heartbeat to flush stream before next iteration
847
+ yield {'type': 'iteration_continue', 'iteration': iteration + 1}
848
+ await asyncio.sleep(0)
849
+ continue # Immediately start next iteration
417
850
  else:
418
- # No tools executed - end the stream
419
- if assistant_text.strip():
420
- logger.info(f"🔍 STREAM_END: Model produced text without tools, ending stream")
851
+ # Check if too many tools were blocked (indicates runaway loop)
852
+ if blocked_tools_this_iteration >= 3:
853
+ logger.warning(f"🔍 RUNAWAY_LOOP_DETECTED: {blocked_tools_this_iteration} tools blocked in iteration {iteration}, ending stream")
421
854
  yield {'type': 'stream_end'}
422
855
  break
423
- elif iteration >= 2: # Safety: end after 2 failed iterations
856
+
857
+ # No tools executed - check if we should end the stream
858
+ if assistant_text.strip():
859
+ # Check if code block is still incomplete
860
+ if code_block_tracker.get('in_block'):
861
+ logger.warning(f"🔍 INCOMPLETE_BLOCK_REMAINING: Code block still incomplete after max continuations, ending stream anyway")
862
+
863
+ # Check if the text suggests the model is about to make a tool call
864
+ # Only check the last 200 characters to avoid issues with long accumulated text
865
+ text_end = assistant_text[-200:].lower().strip()
866
+ suggests_tool_call = text_end.endswith(':')
867
+
868
+ if suggests_tool_call and iteration < 3: # More conservative limit
869
+ logger.info(f"🔍 POTENTIAL_TOOL_CALL: Text suggests model wants to make a tool call, continuing: '{assistant_text[-50:]}'")
870
+ continue
871
+ else:
872
+ logger.info(f"🔍 STREAM_END: Model produced text without tools, ending stream")
873
+ # Log final metrics
874
+ logger.info(f"📊 Final stream metrics: events={stream_metrics['events_sent']}, "
875
+ f"bytes={stream_metrics['bytes_sent']}, "
876
+ f"avg_size={stream_metrics['bytes_sent']/max(stream_metrics['events_sent'],1):.2f}, "
877
+ f"min={min(stream_metrics['chunk_sizes']) if stream_metrics['chunk_sizes'] else 0}, "
878
+ f"max={max(stream_metrics['chunk_sizes']) if stream_metrics['chunk_sizes'] else 0}, "
879
+ f"duration={time.time()-stream_metrics['start_time']:.2f}s")
880
+ yield {'type': 'stream_end'}
881
+ break
882
+ elif iteration >= 5: # Safety: end after 5 iterations total
883
+ logger.info(f"🔍 MAX_ITERATIONS: Reached maximum iterations ({iteration}), ending stream")
424
884
  yield {'type': 'stream_end'}
425
885
  break
426
886
  else:
@@ -429,3 +889,154 @@ class StreamingToolExecutor:
429
889
  except Exception as e:
430
890
  yield {'type': 'error', 'content': f'Error: {e}'}
431
891
  return
892
+
893
+ def _update_code_block_tracker(self, text: str, tracker: Dict[str, Any]) -> None:
894
+ """Update code block tracking state based on text content."""
895
+ if not text:
896
+ return
897
+
898
+ lines = text.split('\n')
899
+ for line in lines:
900
+ stripped = line.strip()
901
+ if stripped.startswith('```'):
902
+ if not tracker['in_block']:
903
+ # Opening a new block
904
+ block_type = stripped[3:].strip() or 'code'
905
+ tracker['in_block'] = True
906
+ tracker['block_type'] = block_type
907
+ tracker['accumulated_content'] = line + '\n'
908
+ logger.debug(f"🔍 TRACKER: Opened {block_type} block")
909
+ else:
910
+ # Closing the current block - any ``` closes it
911
+ # Don't require type to match since closing ``` often has no type
912
+ tracker['in_block'] = False
913
+ tracker['block_type'] = None
914
+ tracker['accumulated_content'] = ''
915
+ logger.debug(f"🔍 TRACKER: Closed block")
916
+ elif tracker['in_block']:
917
+ tracker['accumulated_content'] += line + '\n'
918
+
919
+ async def _continue_incomplete_code_block(
920
+ self,
921
+ conversation: List[Dict[str, Any]],
922
+ code_block_tracker: Dict[str, Any],
923
+ mcp_manager,
924
+ start_time: float,
925
+ assistant_text: str
926
+ ) -> AsyncGenerator[Dict[str, Any], None]:
927
+ """Continue an incomplete code block by making a new API call."""
928
+ try:
929
+ block_type = code_block_tracker['block_type']
930
+ continuation_prompt = f"Continue the incomplete {block_type} code block from where it left off and close it with ```. Output ONLY the continuation of the code block, no explanations."
931
+
932
+ continuation_conversation = conversation.copy()
933
+
934
+ # Remove incomplete last line
935
+ if assistant_text.strip():
936
+ lines = assistant_text.split('\n')
937
+ if len(lines) > 1:
938
+ last_line = lines[-1].strip()
939
+ if not last_line or ('```' in last_line and not last_line.endswith('```')):
940
+ cleaned_text = '\n'.join(lines[:-1])
941
+ logger.info(f"🔄 CONTEXT_CLEANUP: Removed incomplete last line: '{last_line}'")
942
+ else:
943
+ cleaned_text = assistant_text
944
+
945
+ if continuation_conversation and continuation_conversation[-1].get('role') == 'assistant':
946
+ # Update the last assistant message with cleaned text in proper format
947
+ continuation_conversation[-1]['content'] = [{"type": "text", "text": cleaned_text}]
948
+ else:
949
+ continuation_conversation.append({"role": "assistant", "content": [{"type": "text", "text": cleaned_text}]})
950
+
951
+ continuation_conversation.append({"role": "user", "content": continuation_prompt})
952
+
953
+ body = {
954
+ "messages": continuation_conversation,
955
+ "max_tokens": 2000,
956
+ "temperature": 0.1,
957
+ "anthropic_version": "bedrock-2023-05-31"
958
+ }
959
+
960
+ logger.info(f"🔄 CONTINUATION: Making API call to continue {block_type} block")
961
+
962
+ # Yield initial heartbeat
963
+ yield {
964
+ 'type': 'heartbeat',
965
+ 'heartbeat': True,
966
+ 'timestamp': f"{int((time.time() - start_time) * 1000)}ms"
967
+ }
968
+
969
+ # Make the Bedrock call - this returns immediately with a stream
970
+ response = self.bedrock.invoke_model_with_response_stream(
971
+ modelId=self.model_id,
972
+ body=json.dumps(body)
973
+ )
974
+
975
+ # Send heartbeat after getting response object (before first chunk)
976
+ yield {
977
+ 'type': 'heartbeat',
978
+ 'heartbeat': True,
979
+ 'timestamp': f"{int((time.time() - start_time) * 1000)}ms"
980
+ }
981
+
982
+ accumulated_start = ""
983
+ header_filtered = False
984
+ chunk_count = 0
985
+
986
+ for event in response['body']:
987
+ # Send heartbeat every 10 chunks to keep connection alive
988
+ chunk_count += 1
989
+ if chunk_count % 10 == 0:
990
+ yield {
991
+ 'type': 'heartbeat',
992
+ 'heartbeat': True,
993
+ 'timestamp': f"{int((time.time() - start_time) * 1000)}ms"
994
+ }
995
+
996
+ chunk = json.loads(event['chunk']['bytes'])
997
+
998
+ if chunk['type'] == 'content_block_delta':
999
+ delta = chunk.get('delta', {})
1000
+ if delta.get('type') == 'text_delta':
1001
+ text = delta.get('text', '')
1002
+
1003
+ if not header_filtered:
1004
+ accumulated_start += text
1005
+
1006
+ if '\n' in accumulated_start or len(accumulated_start) > 20:
1007
+ if accumulated_start.strip().startswith('```'):
1008
+ lines = accumulated_start.split('\n', 1)
1009
+ if len(lines) > 1:
1010
+ remaining_text = '\n' + lines[1] # Preserve the newline
1011
+ header_type = lines[0].strip()
1012
+ logger.info(f"🔄 FILTERED: Removed redundant {header_type} from continuation")
1013
+ else:
1014
+ remaining_text = ""
1015
+
1016
+ if remaining_text:
1017
+ yield {
1018
+ 'type': 'text',
1019
+ 'content': remaining_text,
1020
+ 'timestamp': f"{int((time.time() - start_time) * 1000)}ms",
1021
+ 'continuation': True
1022
+ }
1023
+ else:
1024
+ yield {
1025
+ 'type': 'text',
1026
+ 'content': accumulated_start,
1027
+ 'timestamp': f"{int((time.time() - start_time) * 1000)}ms",
1028
+ 'continuation': True
1029
+ }
1030
+
1031
+ header_filtered = True
1032
+ else:
1033
+ if text:
1034
+ yield {
1035
+ 'type': 'text',
1036
+ 'content': text,
1037
+ 'timestamp': f"{int((time.time() - start_time) * 1000)}ms",
1038
+ 'continuation': True
1039
+ }
1040
+
1041
+ except Exception as e:
1042
+ logger.error(f"🔄 CONTINUATION: Error in continuation: {e}")