ziya 0.3.0__py3-none-any.whl → 0.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ziya might be problematic. Click here for more details.
- app/agents/agent.py +71 -73
- app/agents/direct_streaming.py +1 -1
- app/agents/prompts.py +1 -1
- app/agents/prompts_manager.py +14 -10
- app/agents/wrappers/google_direct.py +31 -1
- app/agents/wrappers/nova_tool_execution.py +2 -2
- app/agents/wrappers/nova_wrapper.py +1 -1
- app/agents/wrappers/ziya_bedrock.py +53 -31
- app/config/models_config.py +61 -20
- app/config/shell_config.py +5 -1
- app/extensions/prompt_extensions/claude_extensions.py +27 -5
- app/extensions/prompt_extensions/mcp_prompt_extensions.py +82 -56
- app/main.py +5 -3
- app/mcp/client.py +19 -10
- app/mcp/manager.py +68 -10
- app/mcp/tools.py +8 -9
- app/mcp_servers/shell_server.py +3 -3
- app/middleware/streaming.py +29 -41
- app/routes/file_validation.py +35 -0
- app/routes/mcp_routes.py +54 -8
- app/server.py +525 -614
- app/streaming_tool_executor.py +748 -137
- app/templates/asset-manifest.json +20 -20
- app/templates/index.html +1 -1
- app/templates/static/css/{main.0297bfee.css → main.e7109b49.css} +2 -2
- app/templates/static/css/main.e7109b49.css.map +1 -0
- app/templates/static/js/14386.65fcfe53.chunk.js +2 -0
- app/templates/static/js/14386.65fcfe53.chunk.js.map +1 -0
- app/templates/static/js/35589.0368973a.chunk.js +2 -0
- app/templates/static/js/35589.0368973a.chunk.js.map +1 -0
- app/templates/static/js/{50295.ab92f61b.chunk.js → 50295.90aca393.chunk.js} +3 -3
- app/templates/static/js/50295.90aca393.chunk.js.map +1 -0
- app/templates/static/js/55734.5f0fd567.chunk.js +2 -0
- app/templates/static/js/55734.5f0fd567.chunk.js.map +1 -0
- app/templates/static/js/58542.57fed736.chunk.js +2 -0
- app/templates/static/js/58542.57fed736.chunk.js.map +1 -0
- app/templates/static/js/{68418.2554bb1e.chunk.js → 68418.f7b4d2d9.chunk.js} +3 -3
- app/templates/static/js/68418.f7b4d2d9.chunk.js.map +1 -0
- app/templates/static/js/99948.b280eda0.chunk.js +2 -0
- app/templates/static/js/99948.b280eda0.chunk.js.map +1 -0
- app/templates/static/js/main.e075582c.js +3 -0
- app/templates/static/js/main.e075582c.js.map +1 -0
- app/utils/code_util.py +5 -2
- app/utils/context_cache.py +11 -0
- app/utils/conversation_filter.py +90 -0
- app/utils/custom_bedrock.py +43 -1
- app/utils/diff_utils/validation/validators.py +32 -22
- app/utils/file_cache.py +5 -3
- app/utils/precision_prompt_system.py +116 -0
- app/utils/streaming_optimizer.py +100 -0
- {ziya-0.3.0.dist-info → ziya-0.3.2.dist-info}/METADATA +3 -2
- {ziya-0.3.0.dist-info → ziya-0.3.2.dist-info}/RECORD +59 -55
- app/templates/static/css/main.0297bfee.css.map +0 -1
- app/templates/static/js/14386.567bf803.chunk.js +0 -2
- app/templates/static/js/14386.567bf803.chunk.js.map +0 -1
- app/templates/static/js/35589.278ecda2.chunk.js +0 -2
- app/templates/static/js/35589.278ecda2.chunk.js.map +0 -1
- app/templates/static/js/50295.ab92f61b.chunk.js.map +0 -1
- app/templates/static/js/55734.90d8bd52.chunk.js +0 -2
- app/templates/static/js/55734.90d8bd52.chunk.js.map +0 -1
- app/templates/static/js/58542.08fb5cf4.chunk.js +0 -2
- app/templates/static/js/58542.08fb5cf4.chunk.js.map +0 -1
- app/templates/static/js/68418.2554bb1e.chunk.js.map +0 -1
- app/templates/static/js/99948.71670e91.chunk.js +0 -2
- app/templates/static/js/99948.71670e91.chunk.js.map +0 -1
- app/templates/static/js/main.1d79eac2.js +0 -3
- app/templates/static/js/main.1d79eac2.js.map +0 -1
- /app/templates/static/js/{50295.ab92f61b.chunk.js.LICENSE.txt → 50295.90aca393.chunk.js.LICENSE.txt} +0 -0
- /app/templates/static/js/{68418.2554bb1e.chunk.js.LICENSE.txt → 68418.f7b4d2d9.chunk.js.LICENSE.txt} +0 -0
- /app/templates/static/js/{main.1d79eac2.js.LICENSE.txt → main.e075582c.js.LICENSE.txt} +0 -0
- {ziya-0.3.0.dist-info → ziya-0.3.2.dist-info}/WHEEL +0 -0
- {ziya-0.3.0.dist-info → ziya-0.3.2.dist-info}/entry_points.txt +0 -0
- {ziya-0.3.0.dist-info → ziya-0.3.2.dist-info}/licenses/LICENSE +0 -0
app/streaming_tool_executor.py
CHANGED
|
@@ -3,15 +3,42 @@ import asyncio
|
|
|
3
3
|
import json
|
|
4
4
|
import boto3
|
|
5
5
|
import logging
|
|
6
|
+
import os
|
|
7
|
+
import time
|
|
6
8
|
from typing import Dict, Any, List, AsyncGenerator, Optional
|
|
9
|
+
from app.utils.conversation_filter import filter_conversation_for_model
|
|
7
10
|
|
|
8
11
|
logger = logging.getLogger(__name__)
|
|
9
12
|
|
|
10
13
|
class StreamingToolExecutor:
|
|
11
|
-
def __init__(self, profile_name: str = 'ziya', region: str = 'us-west-2'):
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
14
|
+
def __init__(self, profile_name: str = 'ziya', region: str = 'us-west-2', model_id: str = None):
|
|
15
|
+
self.model_id = model_id or os.environ.get('DEFAULT_MODEL_ID', 'us.anthropic.claude-sonnet-4-20250514-v1:0')
|
|
16
|
+
|
|
17
|
+
# Only initialize Bedrock client for Bedrock endpoints
|
|
18
|
+
from app.agents.models import ModelManager
|
|
19
|
+
endpoint = os.environ.get("ZIYA_ENDPOINT", "bedrock")
|
|
20
|
+
model_name = os.environ.get("ZIYA_MODEL")
|
|
21
|
+
self.model_config = ModelManager.get_model_config(endpoint, model_name)
|
|
22
|
+
|
|
23
|
+
if endpoint == "bedrock":
|
|
24
|
+
# Use ModelManager's wrapped bedrock client for proper extended context handling
|
|
25
|
+
try:
|
|
26
|
+
self.bedrock = ModelManager._get_persistent_bedrock_client(
|
|
27
|
+
aws_profile=profile_name,
|
|
28
|
+
region=region,
|
|
29
|
+
model_id=self.model_id,
|
|
30
|
+
model_config=self.model_config
|
|
31
|
+
)
|
|
32
|
+
logger.info(f"🔍 Using ModelManager's wrapped bedrock client with extended context support")
|
|
33
|
+
except Exception as e:
|
|
34
|
+
logger.warning(f"🔍 Could not get wrapped client, falling back to direct client: {e}")
|
|
35
|
+
# Fallback to direct client creation
|
|
36
|
+
session = boto3.Session(profile_name=profile_name)
|
|
37
|
+
self.bedrock = session.client('bedrock-runtime', region_name=region)
|
|
38
|
+
else:
|
|
39
|
+
# Non-Bedrock endpoints don't need a bedrock client
|
|
40
|
+
self.bedrock = None
|
|
41
|
+
logger.info(f"🔍 Skipping Bedrock client initialization for endpoint: {endpoint}")
|
|
15
42
|
|
|
16
43
|
def _convert_tool_schema(self, tool):
|
|
17
44
|
"""Convert tool schema to JSON-serializable format"""
|
|
@@ -19,20 +46,19 @@ class StreamingToolExecutor:
|
|
|
19
46
|
# Already a dict, but check input_schema
|
|
20
47
|
result = tool.copy()
|
|
21
48
|
input_schema = result.get('input_schema')
|
|
22
|
-
if
|
|
49
|
+
if isinstance(input_schema, dict):
|
|
50
|
+
# Already a dict, use as-is
|
|
51
|
+
pass
|
|
52
|
+
elif hasattr(input_schema, 'model_json_schema'):
|
|
23
53
|
# Pydantic class - convert to JSON schema
|
|
24
54
|
result['input_schema'] = input_schema.model_json_schema()
|
|
25
|
-
elif
|
|
26
|
-
# Some other
|
|
55
|
+
elif input_schema is not None:
|
|
56
|
+
# Some other object - try to convert
|
|
27
57
|
try:
|
|
28
58
|
result['input_schema'] = input_schema.model_json_schema()
|
|
29
59
|
except:
|
|
30
|
-
|
|
31
|
-
result['input_schema'] = {
|
|
32
|
-
"type": "object",
|
|
33
|
-
"properties": {"command": {"type": "string"}},
|
|
34
|
-
"required": ["command"]
|
|
35
|
-
}
|
|
60
|
+
logger.warning(f"🔍 TOOL_SCHEMA: Could not convert input_schema, using fallback")
|
|
61
|
+
result['input_schema'] = {"type": "object", "properties": {}}
|
|
36
62
|
return result
|
|
37
63
|
else:
|
|
38
64
|
# Tool object - extract properties
|
|
@@ -40,25 +66,35 @@ class StreamingToolExecutor:
|
|
|
40
66
|
description = getattr(tool, 'description', 'No description')
|
|
41
67
|
input_schema = getattr(tool, 'input_schema', getattr(tool, 'inputSchema', {}))
|
|
42
68
|
|
|
43
|
-
|
|
44
|
-
|
|
69
|
+
logger.info(f"🔍 TOOL_SCHEMA: Converting tool '{name}', input_schema type: {type(input_schema)}")
|
|
70
|
+
|
|
71
|
+
# Handle different input_schema types
|
|
72
|
+
if isinstance(input_schema, dict):
|
|
73
|
+
# Already a dict, use as-is
|
|
74
|
+
logger.info(f"🔍 TOOL_SCHEMA: Tool '{name}' has dict schema with keys: {list(input_schema.keys())}")
|
|
75
|
+
elif hasattr(input_schema, 'model_json_schema'):
|
|
76
|
+
# Pydantic class - convert to JSON schema
|
|
45
77
|
input_schema = input_schema.model_json_schema()
|
|
46
|
-
|
|
47
|
-
|
|
78
|
+
logger.info(f"🔍 TOOL_SCHEMA: Converted Pydantic schema for '{name}'")
|
|
79
|
+
elif input_schema:
|
|
80
|
+
# Some other object - try to convert
|
|
48
81
|
try:
|
|
49
82
|
input_schema = input_schema.model_json_schema()
|
|
83
|
+
logger.info(f"🔍 TOOL_SCHEMA: Converted object schema for '{name}'")
|
|
50
84
|
except:
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
85
|
+
logger.warning(f"🔍 TOOL_SCHEMA: Failed to convert schema for '{name}', using empty schema")
|
|
86
|
+
input_schema = {"type": "object", "properties": {}}
|
|
87
|
+
else:
|
|
88
|
+
logger.warning(f"🔍 TOOL_SCHEMA: Tool '{name}' has no input_schema, using empty schema")
|
|
89
|
+
input_schema = {"type": "object", "properties": {}}
|
|
56
90
|
|
|
57
|
-
|
|
91
|
+
result = {
|
|
58
92
|
'name': name,
|
|
59
93
|
'description': description,
|
|
60
94
|
'input_schema': input_schema
|
|
61
95
|
}
|
|
96
|
+
logger.info(f"🔍 TOOL_SCHEMA: Final schema for '{name}': {json.dumps(result, indent=2)}")
|
|
97
|
+
return result
|
|
62
98
|
|
|
63
99
|
def _commands_similar(self, cmd1: str, cmd2: str) -> bool:
|
|
64
100
|
"""Check if two shell commands are functionally similar"""
|
|
@@ -95,7 +131,7 @@ class StreamingToolExecutor:
|
|
|
95
131
|
})
|
|
96
132
|
|
|
97
133
|
return {
|
|
98
|
-
'type': '
|
|
134
|
+
'type': 'tool_display',
|
|
99
135
|
'tool_id': f'fake_{len(tool_results)}',
|
|
100
136
|
'tool_name': tool_name,
|
|
101
137
|
'result': result_text
|
|
@@ -104,7 +140,39 @@ class StreamingToolExecutor:
|
|
|
104
140
|
logger.error(f"Error executing intercepted tool call: {e}")
|
|
105
141
|
return None
|
|
106
142
|
|
|
107
|
-
async def stream_with_tools(self, messages: List[Dict[str, Any]], tools: Optional[List] = None) -> AsyncGenerator[Dict[str, Any], None]:
|
|
143
|
+
async def stream_with_tools(self, messages: List[Dict[str, Any]], tools: Optional[List] = None, conversation_id: Optional[str] = None) -> AsyncGenerator[Dict[str, Any], None]:
|
|
144
|
+
# Initialize streaming metrics
|
|
145
|
+
stream_metrics = {
|
|
146
|
+
'events_sent': 0,
|
|
147
|
+
'bytes_sent': 0,
|
|
148
|
+
'chunk_sizes': [],
|
|
149
|
+
'start_time': time.time()
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
def track_yield(event_data):
|
|
153
|
+
"""Track metrics for yielded events"""
|
|
154
|
+
chunk_size = len(json.dumps(event_data))
|
|
155
|
+
stream_metrics['events_sent'] += 1
|
|
156
|
+
stream_metrics['bytes_sent'] += chunk_size
|
|
157
|
+
stream_metrics['chunk_sizes'].append(chunk_size)
|
|
158
|
+
|
|
159
|
+
if stream_metrics['events_sent'] % 100 == 0:
|
|
160
|
+
logger.info(f"📊 Stream metrics: {stream_metrics['events_sent']} events, "
|
|
161
|
+
f"{stream_metrics['bytes_sent']} bytes, "
|
|
162
|
+
f"avg={stream_metrics['bytes_sent']/stream_metrics['events_sent']:.2f}")
|
|
163
|
+
return event_data
|
|
164
|
+
|
|
165
|
+
# Extended context handling for sonnet4.5
|
|
166
|
+
if conversation_id:
|
|
167
|
+
logger.info(f"🔍 EXTENDED_CONTEXT: Processing conversation_id = {conversation_id}")
|
|
168
|
+
# Set conversation_id in custom_bedrock module global so CustomBedrockClient can use it
|
|
169
|
+
try:
|
|
170
|
+
import app.utils.custom_bedrock as custom_bedrock_module
|
|
171
|
+
custom_bedrock_module._current_conversation_id = conversation_id
|
|
172
|
+
logger.info(f"🔍 EXTENDED_CONTEXT: Set module global conversation_id")
|
|
173
|
+
except Exception as e:
|
|
174
|
+
logger.warning(f"🔍 EXTENDED_CONTEXT: Could not set conversation_id: {e}")
|
|
175
|
+
|
|
108
176
|
# Get MCP tools
|
|
109
177
|
from app.mcp.manager import get_mcp_manager
|
|
110
178
|
mcp_manager = get_mcp_manager()
|
|
@@ -130,20 +198,51 @@ class StreamingToolExecutor:
|
|
|
130
198
|
conversation = []
|
|
131
199
|
system_content = None
|
|
132
200
|
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
201
|
+
logger.info(f"🔍 STREAMING_TOOL_EXECUTOR: Received {len(messages)} messages")
|
|
202
|
+
for i, msg in enumerate(messages):
|
|
203
|
+
# Handle both dict format and LangChain message objects
|
|
204
|
+
if hasattr(msg, 'type') and hasattr(msg, 'content'):
|
|
205
|
+
# LangChain message object
|
|
206
|
+
role = msg.type if msg.type != 'human' else 'user'
|
|
207
|
+
content = msg.content
|
|
208
|
+
elif isinstance(msg, str):
|
|
209
|
+
# String format - treat as user message
|
|
210
|
+
role = 'user'
|
|
211
|
+
content = msg
|
|
212
|
+
else:
|
|
213
|
+
# Dict format
|
|
214
|
+
role = msg.get('role', '')
|
|
215
|
+
content = msg.get('content', '')
|
|
216
|
+
|
|
217
|
+
logger.info(f"🔍 STREAMING_TOOL_EXECUTOR: Message {i}: role={role}, content_length={len(content)}")
|
|
136
218
|
if role == 'system':
|
|
137
219
|
system_content = content
|
|
138
|
-
|
|
139
|
-
|
|
220
|
+
logger.info(f"🔍 STREAMING_TOOL_EXECUTOR: Found system message with {len(content)} characters")
|
|
221
|
+
elif role in ['user', 'assistant', 'ai']:
|
|
222
|
+
# Normalize ai role to assistant for Bedrock
|
|
223
|
+
bedrock_role = 'assistant' if role == 'ai' else role
|
|
224
|
+
conversation.append({"role": bedrock_role, "content": content})
|
|
140
225
|
|
|
141
226
|
# Iterative execution with proper tool result handling
|
|
142
227
|
recent_commands = [] # Track recent commands to prevent duplicates
|
|
228
|
+
using_extended_context = False # Track if we've enabled extended context
|
|
229
|
+
consecutive_empty_tool_calls = 0 # Track empty tool calls to break loops
|
|
143
230
|
|
|
144
231
|
for iteration in range(50): # Increased from 20 to support more complex tasks
|
|
145
232
|
logger.info(f"🔍 ITERATION_START: Beginning iteration {iteration}")
|
|
233
|
+
|
|
234
|
+
# Log last 2 messages to debug conversation state
|
|
235
|
+
if len(conversation) >= 2:
|
|
236
|
+
for i, msg in enumerate(conversation[-2:]):
|
|
237
|
+
role = msg.get('role', msg.get('type', 'unknown'))
|
|
238
|
+
content = msg.get('content', '')
|
|
239
|
+
content_preview = str(content)[:150] if content else 'empty'
|
|
240
|
+
logger.info(f"🔍 CONV_DEBUG: Message -{2-i}: role={role}, content_preview={content_preview}")
|
|
241
|
+
|
|
146
242
|
tools_executed_this_iteration = False # Track if tools were executed in this iteration
|
|
243
|
+
blocked_tools_this_iteration = 0 # Track blocked tools to prevent runaway loops
|
|
244
|
+
commands_this_iteration = [] # Track commands executed in this specific iteration
|
|
245
|
+
empty_tool_calls_this_iteration = 0 # Track empty tool calls in this iteration
|
|
147
246
|
|
|
148
247
|
body = {
|
|
149
248
|
"anthropic_version": "bedrock-2023-05-31",
|
|
@@ -152,46 +251,90 @@ class StreamingToolExecutor:
|
|
|
152
251
|
}
|
|
153
252
|
|
|
154
253
|
if system_content:
|
|
155
|
-
#
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
254
|
+
# With precision prompts, system content is already clean - no regex needed
|
|
255
|
+
logger.info(f"🔍 SYSTEM_DEBUG: Using clean system content length: {len(system_content)}")
|
|
256
|
+
logger.info(f"🔍 SYSTEM_DEBUG: File count in system content: {system_content.count('File:')}")
|
|
257
|
+
|
|
258
|
+
system_text = system_content + "\n\nCRITICAL: Use ONLY native tool calling. Never generate markdown like ```tool:mcp_run_shell_command or ```bash. Use the provided tools directly.\n\nIMPORTANT: Only use tools when you need to interact with the system (run commands, check time, etc). If you can answer from the provided context or your reasoning, do so directly without using tools. Don't use echo commands just to show your thinking - just answer directly."
|
|
259
|
+
|
|
260
|
+
# Use prompt caching for large system prompts to speed up iterations
|
|
261
|
+
if len(system_text) > 1024:
|
|
262
|
+
body["system"] = [
|
|
263
|
+
{
|
|
264
|
+
"type": "text",
|
|
265
|
+
"text": system_text,
|
|
266
|
+
"cache_control": {"type": "ephemeral"}
|
|
267
|
+
}
|
|
268
|
+
]
|
|
269
|
+
logger.info(f"🔍 CACHE: Enabled prompt caching for {len(system_text)} char system prompt")
|
|
270
|
+
else:
|
|
271
|
+
body["system"] = system_text
|
|
272
|
+
|
|
273
|
+
logger.info(f"🔍 SYSTEM_DEBUG: Final system prompt length: {len(system_text)}")
|
|
274
|
+
logger.info(f"🔍 SYSTEM_CONTENT_DEBUG: First 500 chars of system prompt: {system_text[:500]}")
|
|
275
|
+
logger.info(f"🔍 SYSTEM_CONTENT_DEBUG: System prompt contains 'File:' count: {system_text.count('File:')}")
|
|
276
|
+
logger.info(f"🔍 SYSTEM_CONTENT_DEBUG: Last 500 chars of system prompt: {system_text[-500:]}")
|
|
277
|
+
|
|
278
|
+
# If we've already enabled extended context, keep using it
|
|
279
|
+
if using_extended_context and self.model_config:
|
|
280
|
+
header_value = self.model_config.get('extended_context_header')
|
|
281
|
+
if header_value:
|
|
282
|
+
body['anthropic_beta'] = [header_value]
|
|
283
|
+
logger.info(f"🔍 EXTENDED_CONTEXT: Continuing with extended context header")
|
|
168
284
|
|
|
169
285
|
if bedrock_tools:
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
286
|
+
# Don't send tools if we've had too many consecutive empty calls
|
|
287
|
+
if consecutive_empty_tool_calls >= 5:
|
|
288
|
+
logger.warning(f"🔍 TOOL_SUPPRESSION: Suppressing tools due to {consecutive_empty_tool_calls} consecutive empty calls")
|
|
289
|
+
# Don't add tools to body - force model to respond without them
|
|
290
|
+
else:
|
|
291
|
+
body["tools"] = bedrock_tools
|
|
292
|
+
# Use "auto" to allow model to decide when to stop
|
|
293
|
+
body["tool_choice"] = {"type": "auto"}
|
|
294
|
+
logger.info(f"🔍 TOOL_DEBUG: Sending {len(bedrock_tools)} tools to model: {[t['name'] for t in bedrock_tools]}")
|
|
173
295
|
|
|
174
296
|
try:
|
|
175
297
|
# Exponential backoff for rate limiting
|
|
176
298
|
max_retries = 4
|
|
177
299
|
base_delay = 2 # Start with 2 seconds
|
|
300
|
+
iteration_start_time = time.time()
|
|
178
301
|
|
|
179
302
|
for retry_attempt in range(max_retries + 1):
|
|
180
303
|
try:
|
|
181
|
-
|
|
182
|
-
modelId
|
|
183
|
-
body
|
|
184
|
-
|
|
304
|
+
api_params = {
|
|
305
|
+
'modelId': self.model_id,
|
|
306
|
+
'body': json.dumps(body)
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
response = self.bedrock.invoke_model_with_response_stream(**api_params)
|
|
185
310
|
break # Success, exit retry loop
|
|
186
311
|
except Exception as e:
|
|
187
312
|
error_str = str(e)
|
|
188
313
|
is_rate_limit = ("Too many tokens" in error_str or
|
|
189
314
|
"ThrottlingException" in error_str or
|
|
190
315
|
"Too many requests" in error_str)
|
|
316
|
+
is_context_limit = "Input is too long" in error_str or "too large" in error_str
|
|
317
|
+
|
|
318
|
+
# On context limit error, enable extended context and retry
|
|
319
|
+
if is_context_limit and not using_extended_context and self.model_config:
|
|
320
|
+
if self.model_config.get('supports_extended_context'):
|
|
321
|
+
header_value = self.model_config.get('extended_context_header')
|
|
322
|
+
if header_value:
|
|
323
|
+
logger.info(f"🔍 EXTENDED_CONTEXT: Context limit hit, enabling extended context with header {header_value}")
|
|
324
|
+
body['anthropic_beta'] = [header_value]
|
|
325
|
+
api_params['body'] = json.dumps(body)
|
|
326
|
+
using_extended_context = True # Set flag to keep using it
|
|
327
|
+
try:
|
|
328
|
+
response = self.bedrock.invoke_model_with_response_stream(**api_params)
|
|
329
|
+
break
|
|
330
|
+
except Exception as retry_error:
|
|
331
|
+
logger.error(f"🔍 EXTENDED_CONTEXT: Retry with extended context failed: {retry_error}")
|
|
332
|
+
raise
|
|
191
333
|
|
|
192
334
|
if is_rate_limit and retry_attempt < max_retries:
|
|
193
|
-
# Exponential backoff
|
|
194
|
-
|
|
335
|
+
# Exponential backoff with longer delays to allow token bucket refill
|
|
336
|
+
# boto3 already did fast retries, so we need longer waits
|
|
337
|
+
delay = base_delay * (2 ** retry_attempt) + 4 # Add 4s base to account for boto3 retries
|
|
195
338
|
logger.warning(f"Rate limit hit, retrying in {delay}s (attempt {retry_attempt + 1}/{max_retries + 1})")
|
|
196
339
|
await asyncio.sleep(delay)
|
|
197
340
|
else:
|
|
@@ -200,6 +343,7 @@ class StreamingToolExecutor:
|
|
|
200
343
|
# Process this iteration's stream - collect ALL tool calls first
|
|
201
344
|
assistant_text = ""
|
|
202
345
|
tool_results = []
|
|
346
|
+
tool_use_blocks = [] # Store actual tool_use blocks from Bedrock
|
|
203
347
|
yielded_text_length = 0 # Track how much text we've yielded
|
|
204
348
|
all_tool_calls = [] # Collect all tool calls from this response
|
|
205
349
|
|
|
@@ -209,17 +353,34 @@ class StreamingToolExecutor:
|
|
|
209
353
|
skipped_tools = set() # Track tools we're skipping due to limits
|
|
210
354
|
executed_tool_signatures = set() # Track tool name + args to prevent duplicates
|
|
211
355
|
|
|
212
|
-
# Timeout protection
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
chunk_timeout =
|
|
356
|
+
# Timeout protection - use configured timeout from shell config
|
|
357
|
+
last_activity_time = time.time()
|
|
358
|
+
from app.config.shell_config import DEFAULT_SHELL_CONFIG
|
|
359
|
+
chunk_timeout = int(os.environ.get('COMMAND_TIMEOUT', DEFAULT_SHELL_CONFIG["timeout"]))
|
|
216
360
|
|
|
361
|
+
# Initialize content buffer and visualization detector
|
|
362
|
+
content_buffer = ""
|
|
363
|
+
viz_buffer = "" # Track potential visualization blocks
|
|
364
|
+
in_viz_block = False
|
|
365
|
+
|
|
366
|
+
# Code block continuation tracking
|
|
367
|
+
code_block_tracker = {
|
|
368
|
+
'in_block': False,
|
|
369
|
+
'block_type': None,
|
|
370
|
+
'accumulated_content': ''
|
|
371
|
+
}
|
|
372
|
+
|
|
217
373
|
for event in response['body']:
|
|
218
|
-
# Timeout protection
|
|
219
|
-
if time.time() -
|
|
220
|
-
logger.warning(f"🚨 STREAM TIMEOUT after {chunk_timeout}s")
|
|
221
|
-
|
|
222
|
-
|
|
374
|
+
# Timeout protection - only timeout if NO activity for chunk_timeout seconds
|
|
375
|
+
if time.time() - last_activity_time > chunk_timeout:
|
|
376
|
+
logger.warning(f"🚨 STREAM TIMEOUT after {chunk_timeout}s of inactivity - ending this iteration")
|
|
377
|
+
# Add timeout message to assistant text so model knows what happened
|
|
378
|
+
if not assistant_text.strip():
|
|
379
|
+
assistant_text = f"[Stream timeout after {chunk_timeout}s - no response received]"
|
|
380
|
+
break # Break from chunk loop, but continue to next iteration
|
|
381
|
+
|
|
382
|
+
# Reset activity timer on any event
|
|
383
|
+
last_activity_time = time.time()
|
|
223
384
|
|
|
224
385
|
chunk = json.loads(event['chunk']['bytes'])
|
|
225
386
|
|
|
@@ -227,9 +388,39 @@ class StreamingToolExecutor:
|
|
|
227
388
|
content_block = chunk.get('content_block', {})
|
|
228
389
|
logger.info(f"🔍 CHUNK_DEBUG: content_block_start - type: {content_block.get('type')}, id: {content_block.get('id')}")
|
|
229
390
|
if content_block.get('type') == 'tool_use':
|
|
391
|
+
# FLUSH any buffered content before tool starts
|
|
392
|
+
if hasattr(self, '_content_optimizer'):
|
|
393
|
+
remaining = self._content_optimizer.flush_remaining()
|
|
394
|
+
if remaining:
|
|
395
|
+
yield track_yield({
|
|
396
|
+
'type': 'text',
|
|
397
|
+
'content': remaining,
|
|
398
|
+
'timestamp': f"{int((time.time() - iteration_start_time) * 1000)}ms"
|
|
399
|
+
})
|
|
400
|
+
if content_buffer.strip():
|
|
401
|
+
yield track_yield({
|
|
402
|
+
'type': 'text',
|
|
403
|
+
'content': content_buffer,
|
|
404
|
+
'timestamp': f"{int((time.time() - iteration_start_time) * 1000)}ms"
|
|
405
|
+
})
|
|
406
|
+
content_buffer = ""
|
|
407
|
+
|
|
230
408
|
tool_id = content_block.get('id')
|
|
231
409
|
tool_name = content_block.get('name')
|
|
232
410
|
if tool_id and tool_name:
|
|
411
|
+
# Check for duplicates FIRST
|
|
412
|
+
tool_signature = f"{tool_name}_{tool_id}"
|
|
413
|
+
if tool_signature in executed_tool_signatures:
|
|
414
|
+
logger.info(f"🔍 DUPLICATE_SKIP: Tool {tool_signature} already executed")
|
|
415
|
+
skipped_tools.add(chunk.get('index'))
|
|
416
|
+
continue
|
|
417
|
+
|
|
418
|
+
# Send tool_start event to frontend only (not to model)
|
|
419
|
+
# This prevents contamination of model training data
|
|
420
|
+
|
|
421
|
+
# Mark as executed to prevent duplicates
|
|
422
|
+
executed_tool_signatures.add(tool_signature)
|
|
423
|
+
|
|
233
424
|
# Collect tool call instead of executing immediately
|
|
234
425
|
all_tool_calls.append({
|
|
235
426
|
'id': tool_id,
|
|
@@ -257,14 +448,16 @@ class StreamingToolExecutor:
|
|
|
257
448
|
assistant_text += text
|
|
258
449
|
|
|
259
450
|
# Check for fake tool calls in the text and intercept them
|
|
260
|
-
|
|
261
|
-
|
|
451
|
+
# DISABLED: This was causing premature execution of incomplete commands
|
|
452
|
+
if False and (('```tool:' in assistant_text and '```' in assistant_text[assistant_text.find('```tool:') + 8:]) or \
|
|
453
|
+
('run_shell_command\n$' in assistant_text and '\n' in assistant_text[assistant_text.find('run_shell_command\n$') + 20:]) or \
|
|
454
|
+
(':mcp_run_shell_command\n$' in assistant_text and '\n' in assistant_text[assistant_text.find(':mcp_run_shell_command\n$') + 23:])):
|
|
262
455
|
# Extract and execute fake tool calls with multiple patterns
|
|
263
456
|
import re
|
|
264
457
|
patterns = [
|
|
265
|
-
r'```tool:(mcp_\w+)\n\$\s*([^`]+)```', # Full markdown blocks
|
|
266
|
-
r'run_shell_command\n\$\s*([^\n]+)', #
|
|
267
|
-
r':mcp_run_shell_command\n\$\s*([^\n]+)' #
|
|
458
|
+
r'```tool:(mcp_\w+)\n\$\s*([^`]+)```', # Full markdown blocks only
|
|
459
|
+
r'run_shell_command\n\$\s*([^\n]+)\n', # Complete lines only
|
|
460
|
+
r':mcp_run_shell_command\n\$\s*([^\n]+)\n' # Complete lines only
|
|
268
461
|
]
|
|
269
462
|
|
|
270
463
|
for pattern in patterns:
|
|
@@ -280,11 +473,72 @@ class StreamingToolExecutor:
|
|
|
280
473
|
result = await self._execute_fake_tool('mcp_run_shell_command', command, assistant_text, tool_results, mcp_manager)
|
|
281
474
|
if result:
|
|
282
475
|
yield result
|
|
476
|
+
|
|
477
|
+
for pattern in patterns:
|
|
478
|
+
if re.search(pattern, text):
|
|
479
|
+
logger.warning(f"🚫 Intercepted fake tool call: {pattern}")
|
|
480
|
+
if 'tool:' in text:
|
|
481
|
+
# Skip fake tool patterns
|
|
482
|
+
continue
|
|
283
483
|
|
|
284
|
-
#
|
|
285
|
-
if not ('
|
|
286
|
-
|
|
287
|
-
|
|
484
|
+
# Initialize content optimizer if not exists
|
|
485
|
+
if not hasattr(self, '_content_optimizer'):
|
|
486
|
+
from app.utils.streaming_optimizer import StreamingContentOptimizer
|
|
487
|
+
self._content_optimizer = StreamingContentOptimizer()
|
|
488
|
+
|
|
489
|
+
# Skip fake tool patterns
|
|
490
|
+
if 'tool:' in text:
|
|
491
|
+
continue
|
|
492
|
+
|
|
493
|
+
# Check for visualization block boundaries - ensure proper markdown format
|
|
494
|
+
viz_patterns = ['```vega-lite', '```mermaid', '```graphviz', '```d3']
|
|
495
|
+
if any(pattern in text for pattern in viz_patterns):
|
|
496
|
+
in_viz_block = True
|
|
497
|
+
viz_buffer = text
|
|
498
|
+
continue
|
|
499
|
+
elif in_viz_block:
|
|
500
|
+
viz_buffer += text
|
|
501
|
+
# Check for closing ``` - ensure complete block
|
|
502
|
+
if '```' in text and viz_buffer.count('```') >= 2:
|
|
503
|
+
# Complete visualization block - ensure it ends with newline for proper markdown
|
|
504
|
+
if not viz_buffer.endswith('\n'):
|
|
505
|
+
viz_buffer += '\n'
|
|
506
|
+
|
|
507
|
+
# Flush any pending content first
|
|
508
|
+
if hasattr(self, '_content_optimizer'):
|
|
509
|
+
remaining = self._content_optimizer.flush_remaining()
|
|
510
|
+
if remaining:
|
|
511
|
+
yield track_yield({
|
|
512
|
+
'type': 'text',
|
|
513
|
+
'content': remaining,
|
|
514
|
+
'timestamp': f"{int((time.time() - iteration_start_time) * 1000)}ms"
|
|
515
|
+
})
|
|
516
|
+
if content_buffer.strip():
|
|
517
|
+
yield track_yield({
|
|
518
|
+
'type': 'text',
|
|
519
|
+
'content': content_buffer,
|
|
520
|
+
'timestamp': f"{int((time.time() - iteration_start_time) * 1000)}ms"
|
|
521
|
+
})
|
|
522
|
+
content_buffer = ""
|
|
523
|
+
|
|
524
|
+
# Send complete visualization block
|
|
525
|
+
yield track_yield({
|
|
526
|
+
'type': 'text',
|
|
527
|
+
'content': viz_buffer,
|
|
528
|
+
'timestamp': f"{int((time.time() - iteration_start_time) * 1000)}ms"
|
|
529
|
+
})
|
|
530
|
+
viz_buffer = ""
|
|
531
|
+
in_viz_block = False
|
|
532
|
+
continue
|
|
533
|
+
|
|
534
|
+
# Use content optimizer to prevent mid-word splits
|
|
535
|
+
for optimized_chunk in self._content_optimizer.add_content(text):
|
|
536
|
+
self._update_code_block_tracker(optimized_chunk, code_block_tracker)
|
|
537
|
+
yield track_yield({
|
|
538
|
+
'type': 'text',
|
|
539
|
+
'content': optimized_chunk,
|
|
540
|
+
'timestamp': f"{int((time.time() - iteration_start_time) * 1000)}ms"
|
|
541
|
+
})
|
|
288
542
|
elif delta.get('type') == 'input_json_delta':
|
|
289
543
|
# Find tool by index
|
|
290
544
|
tool_id = None
|
|
@@ -307,10 +561,34 @@ class StreamingToolExecutor:
|
|
|
307
561
|
tool_data = active_tools[tool_id]
|
|
308
562
|
tool_name = tool_data['name']
|
|
309
563
|
args_json = tool_data['partial_json']
|
|
564
|
+
|
|
565
|
+
logger.info(f"🔍 TOOL_ARGS: Tool '{tool_name}' (id: {tool_id}) has args_json: '{args_json}'")
|
|
310
566
|
|
|
311
567
|
try:
|
|
312
568
|
args = json.loads(args_json) if args_json.strip() else {}
|
|
313
569
|
|
|
570
|
+
# Detect empty tool calls for tools that require arguments
|
|
571
|
+
actual_tool_name = tool_name[4:] if tool_name.startswith('mcp_') else tool_name
|
|
572
|
+
if actual_tool_name == 'run_shell_command' and not args.get('command'):
|
|
573
|
+
logger.warning(f"🔍 EMPTY_TOOL_CALL: Model called {tool_name} without required 'command' argument")
|
|
574
|
+
logger.warning(f"🔍 EMPTY_TOOL_CONTEXT: Assistant text before call: '{assistant_text[-200:]}'")
|
|
575
|
+
empty_tool_calls_this_iteration += 1
|
|
576
|
+
consecutive_empty_tool_calls += 1
|
|
577
|
+
|
|
578
|
+
# Return helpful error immediately without executing
|
|
579
|
+
error_result = f"Error: Tool call failed - the 'command' parameter is required but was not provided. You must call run_shell_command with a JSON object containing the command string. Example: {{\"command\": \"ls -la\"}}. Please retry with the correct format."
|
|
580
|
+
|
|
581
|
+
tool_results.append({
|
|
582
|
+
'tool_id': tool_id,
|
|
583
|
+
'tool_name': tool_name,
|
|
584
|
+
'result': error_result
|
|
585
|
+
})
|
|
586
|
+
|
|
587
|
+
completed_tools.add(tool_id)
|
|
588
|
+
tools_executed_this_iteration = True
|
|
589
|
+
logger.info(f"🔍 TOOL_EXECUTED_FLAG: Set tools_executed_this_iteration = True for tool {tool_id}")
|
|
590
|
+
continue
|
|
591
|
+
|
|
314
592
|
# Update the corresponding entry in all_tool_calls with parsed arguments
|
|
315
593
|
for tool_call in all_tool_calls:
|
|
316
594
|
if tool_call['id'] == tool_id:
|
|
@@ -326,80 +604,202 @@ class StreamingToolExecutor:
|
|
|
326
604
|
# Create signature to detect duplicates
|
|
327
605
|
tool_signature = f"{actual_tool_name}:{json.dumps(args, sort_keys=True)}"
|
|
328
606
|
|
|
329
|
-
#
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
607
|
+
# Execute the tool (already checked for duplicates at collection)
|
|
608
|
+
logger.info(f"🔍 EXECUTING_TOOL: {actual_tool_name} with args {args}")
|
|
609
|
+
|
|
610
|
+
# Send tool_start event with complete arguments
|
|
611
|
+
yield {
|
|
612
|
+
'type': 'tool_start',
|
|
613
|
+
'tool_id': tool_id,
|
|
614
|
+
'tool_name': tool_name,
|
|
615
|
+
'args': args,
|
|
616
|
+
'timestamp': f"{int((time.time() - iteration_start_time) * 1000)}ms"
|
|
617
|
+
}
|
|
334
618
|
|
|
335
|
-
#
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
# Normalize command for similarity check
|
|
339
|
-
normalized_cmd = ' '.join(command.split())
|
|
619
|
+
# Execute the tool immediately
|
|
620
|
+
try:
|
|
621
|
+
result = await mcp_manager.call_tool(actual_tool_name, args)
|
|
340
622
|
|
|
341
|
-
#
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
# Only execute if we didn't break (no similar command found)
|
|
350
|
-
recent_commands.append(normalized_cmd)
|
|
351
|
-
# Keep only last 5 commands to prevent memory bloat
|
|
352
|
-
if len(recent_commands) > 5:
|
|
353
|
-
recent_commands.pop(0)
|
|
354
|
-
|
|
355
|
-
executed_tool_signatures.add(tool_signature)
|
|
356
|
-
|
|
357
|
-
# Execute the tool only if it's not similar to recent commands
|
|
358
|
-
result = await mcp_manager.call_tool(actual_tool_name, args)
|
|
359
|
-
|
|
360
|
-
if isinstance(result, dict) and 'content' in result:
|
|
361
|
-
content = result['content']
|
|
362
|
-
if isinstance(content, list) and len(content) > 0:
|
|
363
|
-
result_text = content[0].get('text', str(result))
|
|
364
|
-
else:
|
|
365
|
-
result_text = str(result)
|
|
623
|
+
# Process result
|
|
624
|
+
if isinstance(result, dict) and result.get('error') and result.get('error') != False:
|
|
625
|
+
error_msg = result.get('message', 'Unknown error')
|
|
626
|
+
result_text = f"ERROR: {error_msg}. Please try a different approach or fix the command."
|
|
627
|
+
elif isinstance(result, dict) and 'content' in result:
|
|
628
|
+
content = result['content']
|
|
629
|
+
if isinstance(content, list) and len(content) > 0:
|
|
630
|
+
result_text = content[0].get('text', str(result))
|
|
366
631
|
else:
|
|
367
632
|
result_text = str(result)
|
|
633
|
+
else:
|
|
634
|
+
result_text = str(result)
|
|
368
635
|
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
636
|
+
tool_results.append({
|
|
637
|
+
'tool_id': tool_id,
|
|
638
|
+
'tool_name': tool_name,
|
|
639
|
+
'result': result_text
|
|
640
|
+
})
|
|
374
641
|
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
}
|
|
381
|
-
|
|
382
|
-
|
|
642
|
+
yield {
|
|
643
|
+
'type': 'tool_display',
|
|
644
|
+
'tool_id': tool_id,
|
|
645
|
+
'tool_name': tool_name,
|
|
646
|
+
'result': result_text,
|
|
647
|
+
'timestamp': f"{int((time.time() - iteration_start_time) * 1000)}ms"
|
|
648
|
+
}
|
|
649
|
+
|
|
650
|
+
# Add clean tool result for model conversation
|
|
651
|
+
yield {
|
|
652
|
+
'type': 'tool_result_for_model',
|
|
653
|
+
'tool_use_id': tool_id,
|
|
654
|
+
'content': result_text.strip()
|
|
655
|
+
}
|
|
656
|
+
|
|
657
|
+
# Immediate flush to reduce delay
|
|
658
|
+
await asyncio.sleep(0)
|
|
659
|
+
|
|
660
|
+
tools_executed_this_iteration = True
|
|
661
|
+
logger.info(f"🔍 TOOL_EXECUTED_FLAG: Set tools_executed_this_iteration = True for tool {tool_id}")
|
|
662
|
+
|
|
663
|
+
except Exception as e:
|
|
664
|
+
error_msg = f"Tool error: {str(e)}"
|
|
665
|
+
logger.error(f"🔍 TOOL_EXECUTION_ERROR: {error_msg}")
|
|
666
|
+
tool_results.append({
|
|
667
|
+
'tool_id': tool_id,
|
|
668
|
+
'tool_name': tool_name,
|
|
669
|
+
'result': f"ERROR: {error_msg}. Please try a different approach or fix the command."
|
|
670
|
+
})
|
|
671
|
+
|
|
672
|
+
# Frontend error display
|
|
673
|
+
yield {'type': 'tool_display', 'tool_name': tool_name, 'result': f"ERROR: {error_msg}"}
|
|
674
|
+
|
|
675
|
+
# Clean error for model
|
|
676
|
+
yield {
|
|
677
|
+
'type': 'tool_result_for_model',
|
|
678
|
+
'tool_use_id': tool_id,
|
|
679
|
+
'content': f"ERROR: {error_msg}. Please try a different approach or fix the command."
|
|
680
|
+
}
|
|
383
681
|
completed_tools.add(tool_id)
|
|
384
682
|
|
|
385
683
|
except Exception as e:
|
|
386
684
|
error_msg = f"Tool error: {str(e)}"
|
|
387
|
-
|
|
685
|
+
|
|
686
|
+
# Add error to tool_results so it gets fed back to the model
|
|
687
|
+
tool_results.append({
|
|
688
|
+
'tool_id': tool_id,
|
|
689
|
+
'tool_name': tool_name,
|
|
690
|
+
'result': f"ERROR: {error_msg}. Please try a different approach or fix the command."
|
|
691
|
+
})
|
|
692
|
+
|
|
693
|
+
# Frontend error display
|
|
694
|
+
yield {'type': 'tool_display', 'tool_name': 'unknown', 'result': f"ERROR: {error_msg}"}
|
|
695
|
+
|
|
696
|
+
# Clean error for model
|
|
697
|
+
yield {
|
|
698
|
+
'type': 'tool_result_for_model',
|
|
699
|
+
'tool_use_id': tool_id or 'unknown',
|
|
700
|
+
'content': f"ERROR: {error_msg}. Please try a different approach or fix the command."
|
|
701
|
+
}
|
|
388
702
|
|
|
389
703
|
elif chunk['type'] == 'message_stop':
|
|
704
|
+
# Flush any remaining content from buffers before stopping
|
|
705
|
+
if viz_buffer.strip():
|
|
706
|
+
self._update_code_block_tracker(viz_buffer, code_block_tracker)
|
|
707
|
+
yield track_yield({
|
|
708
|
+
'type': 'text',
|
|
709
|
+
'content': viz_buffer,
|
|
710
|
+
'timestamp': f"{int((time.time() - iteration_start_time) * 1000)}ms"
|
|
711
|
+
})
|
|
712
|
+
# Flush any remaining content from optimizer
|
|
713
|
+
if hasattr(self, '_content_optimizer'):
|
|
714
|
+
remaining = self._content_optimizer.flush_remaining()
|
|
715
|
+
if remaining:
|
|
716
|
+
self._update_code_block_tracker(remaining, code_block_tracker)
|
|
717
|
+
yield track_yield({
|
|
718
|
+
'type': 'text',
|
|
719
|
+
'content': remaining,
|
|
720
|
+
'timestamp': f"{int((time.time() - iteration_start_time) * 1000)}ms"
|
|
721
|
+
})
|
|
722
|
+
if content_buffer.strip():
|
|
723
|
+
self._update_code_block_tracker(content_buffer, code_block_tracker)
|
|
724
|
+
yield track_yield({
|
|
725
|
+
'type': 'text',
|
|
726
|
+
'content': content_buffer,
|
|
727
|
+
'timestamp': f"{int((time.time() - iteration_start_time) * 1000)}ms"
|
|
728
|
+
})
|
|
729
|
+
|
|
730
|
+
# Check if we ended mid-code-block and auto-continue
|
|
731
|
+
continuation_count = 0
|
|
732
|
+
max_continuations = 10 # Increased for large diagrams/code blocks
|
|
733
|
+
|
|
734
|
+
# Log tracker state before checking
|
|
735
|
+
backtick_count = assistant_text.count('```')
|
|
736
|
+
logger.info(f"🔍 TRACKER_STATE: in_block={code_block_tracker['in_block']}, block_type={code_block_tracker.get('block_type')}, backtick_count={backtick_count}, last_50_chars='{assistant_text[-50:]}'")
|
|
737
|
+
|
|
738
|
+
while code_block_tracker['in_block'] and continuation_count < max_continuations:
|
|
739
|
+
continuation_count += 1
|
|
740
|
+
logger.info(f"🔄 INCOMPLETE_BLOCK: Detected incomplete {code_block_tracker['block_type']} block, auto-continuing (attempt {continuation_count})")
|
|
741
|
+
|
|
742
|
+
# Send heartbeat before continuation to keep connection alive
|
|
743
|
+
yield {
|
|
744
|
+
'type': 'heartbeat',
|
|
745
|
+
'heartbeat': True,
|
|
746
|
+
'timestamp': f"{int((time.time() - iteration_start_time) * 1000)}ms"
|
|
747
|
+
}
|
|
748
|
+
|
|
749
|
+
continuation_had_content = False
|
|
750
|
+
async for continuation_chunk in self._continue_incomplete_code_block(
|
|
751
|
+
conversation, code_block_tracker, mcp_manager, iteration_start_time, assistant_text
|
|
752
|
+
):
|
|
753
|
+
if continuation_chunk.get('content'):
|
|
754
|
+
continuation_had_content = True
|
|
755
|
+
self._update_code_block_tracker(continuation_chunk['content'], code_block_tracker)
|
|
756
|
+
assistant_text += continuation_chunk['content']
|
|
757
|
+
|
|
758
|
+
if code_block_tracker['in_block']:
|
|
759
|
+
continuation_chunk['code_block_continuation'] = True
|
|
760
|
+
continuation_chunk['block_type'] = code_block_tracker['block_type']
|
|
761
|
+
|
|
762
|
+
yield continuation_chunk
|
|
763
|
+
|
|
764
|
+
if not continuation_had_content:
|
|
765
|
+
logger.info("🔄 CONTINUATION: No content generated, stopping continuation attempts")
|
|
766
|
+
break
|
|
767
|
+
|
|
768
|
+
# Log tracker state after continuation
|
|
769
|
+
logger.info(f"🔄 CONTINUATION_RESULT: After attempt {continuation_count}, in_block={code_block_tracker['in_block']}, had_content={continuation_had_content}")
|
|
770
|
+
|
|
390
771
|
# Just break out of chunk processing, handle completion logic below
|
|
391
772
|
break
|
|
392
773
|
|
|
393
|
-
# Add assistant response to conversation
|
|
394
|
-
if assistant_text.strip():
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
774
|
+
# Add assistant response to conversation with proper tool_use blocks
|
|
775
|
+
if assistant_text.strip() or tools_executed_this_iteration:
|
|
776
|
+
# Build content as list with text and tool_use blocks
|
|
777
|
+
content_blocks = []
|
|
778
|
+
if assistant_text.strip():
|
|
779
|
+
content_blocks.append({"type": "text", "text": assistant_text})
|
|
780
|
+
|
|
781
|
+
# Add tool_use blocks for each tool that was executed with actual args
|
|
782
|
+
for tool_result in tool_results:
|
|
783
|
+
# Find the corresponding tool call to get the actual args
|
|
784
|
+
tool_args = {}
|
|
785
|
+
for tool_call in all_tool_calls:
|
|
786
|
+
if tool_call['id'] == tool_result['tool_id']:
|
|
787
|
+
tool_args = tool_call.get('args', {})
|
|
788
|
+
break
|
|
789
|
+
|
|
790
|
+
content_blocks.append({
|
|
791
|
+
"type": "tool_use",
|
|
792
|
+
"id": tool_result['tool_id'],
|
|
793
|
+
"name": tool_result['tool_name'],
|
|
794
|
+
"input": tool_args
|
|
795
|
+
})
|
|
796
|
+
|
|
797
|
+
conversation.append({"role": "assistant", "content": content_blocks})
|
|
798
|
+
|
|
799
|
+
# Add tool results to conversation BEFORE filtering
|
|
800
|
+
logger.info(f"🔍 ITERATION_END_CHECK: tools_executed_this_iteration = {tools_executed_this_iteration}, tool_results count = {len(tool_results)}")
|
|
801
|
+
if tools_executed_this_iteration:
|
|
802
|
+
logger.info(f"🔍 TOOL_RESULTS_PROCESSING: Adding {len(tool_results)} tool results to conversation")
|
|
403
803
|
for tool_result in tool_results:
|
|
404
804
|
raw_result = tool_result['result']
|
|
405
805
|
if isinstance(raw_result, str) and '$ ' in raw_result:
|
|
@@ -407,20 +807,80 @@ class StreamingToolExecutor:
|
|
|
407
807
|
clean_lines = [line for line in lines if not line.startswith('$ ')]
|
|
408
808
|
raw_result = '\n'.join(clean_lines).strip()
|
|
409
809
|
|
|
810
|
+
# Add in tool_result_for_model format so filter can convert to proper Bedrock format
|
|
410
811
|
conversation.append({
|
|
411
|
-
|
|
412
|
-
|
|
812
|
+
'type': 'tool_result_for_model',
|
|
813
|
+
'tool_use_id': tool_result['tool_id'],
|
|
814
|
+
'content': raw_result
|
|
413
815
|
})
|
|
816
|
+
|
|
817
|
+
# Filter conversation to convert tool results to proper format
|
|
818
|
+
original_length = len(conversation)
|
|
819
|
+
conversation = filter_conversation_for_model(conversation)
|
|
820
|
+
logger.info(f"🤖 MODEL_RESPONSE: {assistant_text}")
|
|
821
|
+
logger.info(f"Filtered conversation: {original_length} -> {len(conversation)} messages")
|
|
822
|
+
|
|
823
|
+
# Skip duplicate execution - tools are already executed in content_block_stop
|
|
824
|
+
# This section was causing duplicate tool execution
|
|
825
|
+
|
|
826
|
+
# Continue to next iteration if tools were executed
|
|
827
|
+
if tools_executed_this_iteration:
|
|
828
|
+
# Warn about consecutive empty tool calls but don't break
|
|
829
|
+
if consecutive_empty_tool_calls >= 5:
|
|
830
|
+
logger.warning(f"🔍 EMPTY_TOOL_WARNING: {consecutive_empty_tool_calls} consecutive empty tool calls detected")
|
|
831
|
+
# Add a message to guide the model to respond without tools
|
|
832
|
+
conversation.append({
|
|
833
|
+
"role": "user",
|
|
834
|
+
"content": "Please provide your response based on the information available. Do not attempt to use tools."
|
|
835
|
+
})
|
|
836
|
+
elif consecutive_empty_tool_calls >= 3:
|
|
837
|
+
logger.warning(f"🔍 EMPTY_TOOL_WARNING: {consecutive_empty_tool_calls} consecutive empty tool calls detected, adding delay")
|
|
838
|
+
# Add a small delay to slow down the loop
|
|
839
|
+
await asyncio.sleep(0.5)
|
|
414
840
|
|
|
415
|
-
|
|
416
|
-
|
|
841
|
+
# Reset consecutive counter if we had successful tool calls
|
|
842
|
+
if empty_tool_calls_this_iteration == 0:
|
|
843
|
+
consecutive_empty_tool_calls = 0
|
|
844
|
+
|
|
845
|
+
logger.info(f"🔍 CONTINUING_ROUND: Tool results added, model will continue in same stream (round {iteration + 1})")
|
|
846
|
+
# Yield heartbeat to flush stream before next iteration
|
|
847
|
+
yield {'type': 'iteration_continue', 'iteration': iteration + 1}
|
|
848
|
+
await asyncio.sleep(0)
|
|
849
|
+
continue # Immediately start next iteration
|
|
417
850
|
else:
|
|
418
|
-
#
|
|
419
|
-
if
|
|
420
|
-
logger.
|
|
851
|
+
# Check if too many tools were blocked (indicates runaway loop)
|
|
852
|
+
if blocked_tools_this_iteration >= 3:
|
|
853
|
+
logger.warning(f"🔍 RUNAWAY_LOOP_DETECTED: {blocked_tools_this_iteration} tools blocked in iteration {iteration}, ending stream")
|
|
421
854
|
yield {'type': 'stream_end'}
|
|
422
855
|
break
|
|
423
|
-
|
|
856
|
+
|
|
857
|
+
# No tools executed - check if we should end the stream
|
|
858
|
+
if assistant_text.strip():
|
|
859
|
+
# Check if code block is still incomplete
|
|
860
|
+
if code_block_tracker.get('in_block'):
|
|
861
|
+
logger.warning(f"🔍 INCOMPLETE_BLOCK_REMAINING: Code block still incomplete after max continuations, ending stream anyway")
|
|
862
|
+
|
|
863
|
+
# Check if the text suggests the model is about to make a tool call
|
|
864
|
+
# Only check the last 200 characters to avoid issues with long accumulated text
|
|
865
|
+
text_end = assistant_text[-200:].lower().strip()
|
|
866
|
+
suggests_tool_call = text_end.endswith(':')
|
|
867
|
+
|
|
868
|
+
if suggests_tool_call and iteration < 3: # More conservative limit
|
|
869
|
+
logger.info(f"🔍 POTENTIAL_TOOL_CALL: Text suggests model wants to make a tool call, continuing: '{assistant_text[-50:]}'")
|
|
870
|
+
continue
|
|
871
|
+
else:
|
|
872
|
+
logger.info(f"🔍 STREAM_END: Model produced text without tools, ending stream")
|
|
873
|
+
# Log final metrics
|
|
874
|
+
logger.info(f"📊 Final stream metrics: events={stream_metrics['events_sent']}, "
|
|
875
|
+
f"bytes={stream_metrics['bytes_sent']}, "
|
|
876
|
+
f"avg_size={stream_metrics['bytes_sent']/max(stream_metrics['events_sent'],1):.2f}, "
|
|
877
|
+
f"min={min(stream_metrics['chunk_sizes']) if stream_metrics['chunk_sizes'] else 0}, "
|
|
878
|
+
f"max={max(stream_metrics['chunk_sizes']) if stream_metrics['chunk_sizes'] else 0}, "
|
|
879
|
+
f"duration={time.time()-stream_metrics['start_time']:.2f}s")
|
|
880
|
+
yield {'type': 'stream_end'}
|
|
881
|
+
break
|
|
882
|
+
elif iteration >= 5: # Safety: end after 5 iterations total
|
|
883
|
+
logger.info(f"🔍 MAX_ITERATIONS: Reached maximum iterations ({iteration}), ending stream")
|
|
424
884
|
yield {'type': 'stream_end'}
|
|
425
885
|
break
|
|
426
886
|
else:
|
|
@@ -429,3 +889,154 @@ class StreamingToolExecutor:
|
|
|
429
889
|
except Exception as e:
|
|
430
890
|
yield {'type': 'error', 'content': f'Error: {e}'}
|
|
431
891
|
return
|
|
892
|
+
|
|
893
|
+
def _update_code_block_tracker(self, text: str, tracker: Dict[str, Any]) -> None:
|
|
894
|
+
"""Update code block tracking state based on text content."""
|
|
895
|
+
if not text:
|
|
896
|
+
return
|
|
897
|
+
|
|
898
|
+
lines = text.split('\n')
|
|
899
|
+
for line in lines:
|
|
900
|
+
stripped = line.strip()
|
|
901
|
+
if stripped.startswith('```'):
|
|
902
|
+
if not tracker['in_block']:
|
|
903
|
+
# Opening a new block
|
|
904
|
+
block_type = stripped[3:].strip() or 'code'
|
|
905
|
+
tracker['in_block'] = True
|
|
906
|
+
tracker['block_type'] = block_type
|
|
907
|
+
tracker['accumulated_content'] = line + '\n'
|
|
908
|
+
logger.debug(f"🔍 TRACKER: Opened {block_type} block")
|
|
909
|
+
else:
|
|
910
|
+
# Closing the current block - any ``` closes it
|
|
911
|
+
# Don't require type to match since closing ``` often has no type
|
|
912
|
+
tracker['in_block'] = False
|
|
913
|
+
tracker['block_type'] = None
|
|
914
|
+
tracker['accumulated_content'] = ''
|
|
915
|
+
logger.debug(f"🔍 TRACKER: Closed block")
|
|
916
|
+
elif tracker['in_block']:
|
|
917
|
+
tracker['accumulated_content'] += line + '\n'
|
|
918
|
+
|
|
919
|
+
async def _continue_incomplete_code_block(
|
|
920
|
+
self,
|
|
921
|
+
conversation: List[Dict[str, Any]],
|
|
922
|
+
code_block_tracker: Dict[str, Any],
|
|
923
|
+
mcp_manager,
|
|
924
|
+
start_time: float,
|
|
925
|
+
assistant_text: str
|
|
926
|
+
) -> AsyncGenerator[Dict[str, Any], None]:
|
|
927
|
+
"""Continue an incomplete code block by making a new API call."""
|
|
928
|
+
try:
|
|
929
|
+
block_type = code_block_tracker['block_type']
|
|
930
|
+
continuation_prompt = f"Continue the incomplete {block_type} code block from where it left off and close it with ```. Output ONLY the continuation of the code block, no explanations."
|
|
931
|
+
|
|
932
|
+
continuation_conversation = conversation.copy()
|
|
933
|
+
|
|
934
|
+
# Remove incomplete last line
|
|
935
|
+
if assistant_text.strip():
|
|
936
|
+
lines = assistant_text.split('\n')
|
|
937
|
+
if len(lines) > 1:
|
|
938
|
+
last_line = lines[-1].strip()
|
|
939
|
+
if not last_line or ('```' in last_line and not last_line.endswith('```')):
|
|
940
|
+
cleaned_text = '\n'.join(lines[:-1])
|
|
941
|
+
logger.info(f"🔄 CONTEXT_CLEANUP: Removed incomplete last line: '{last_line}'")
|
|
942
|
+
else:
|
|
943
|
+
cleaned_text = assistant_text
|
|
944
|
+
|
|
945
|
+
if continuation_conversation and continuation_conversation[-1].get('role') == 'assistant':
|
|
946
|
+
# Update the last assistant message with cleaned text in proper format
|
|
947
|
+
continuation_conversation[-1]['content'] = [{"type": "text", "text": cleaned_text}]
|
|
948
|
+
else:
|
|
949
|
+
continuation_conversation.append({"role": "assistant", "content": [{"type": "text", "text": cleaned_text}]})
|
|
950
|
+
|
|
951
|
+
continuation_conversation.append({"role": "user", "content": continuation_prompt})
|
|
952
|
+
|
|
953
|
+
body = {
|
|
954
|
+
"messages": continuation_conversation,
|
|
955
|
+
"max_tokens": 2000,
|
|
956
|
+
"temperature": 0.1,
|
|
957
|
+
"anthropic_version": "bedrock-2023-05-31"
|
|
958
|
+
}
|
|
959
|
+
|
|
960
|
+
logger.info(f"🔄 CONTINUATION: Making API call to continue {block_type} block")
|
|
961
|
+
|
|
962
|
+
# Yield initial heartbeat
|
|
963
|
+
yield {
|
|
964
|
+
'type': 'heartbeat',
|
|
965
|
+
'heartbeat': True,
|
|
966
|
+
'timestamp': f"{int((time.time() - start_time) * 1000)}ms"
|
|
967
|
+
}
|
|
968
|
+
|
|
969
|
+
# Make the Bedrock call - this returns immediately with a stream
|
|
970
|
+
response = self.bedrock.invoke_model_with_response_stream(
|
|
971
|
+
modelId=self.model_id,
|
|
972
|
+
body=json.dumps(body)
|
|
973
|
+
)
|
|
974
|
+
|
|
975
|
+
# Send heartbeat after getting response object (before first chunk)
|
|
976
|
+
yield {
|
|
977
|
+
'type': 'heartbeat',
|
|
978
|
+
'heartbeat': True,
|
|
979
|
+
'timestamp': f"{int((time.time() - start_time) * 1000)}ms"
|
|
980
|
+
}
|
|
981
|
+
|
|
982
|
+
accumulated_start = ""
|
|
983
|
+
header_filtered = False
|
|
984
|
+
chunk_count = 0
|
|
985
|
+
|
|
986
|
+
for event in response['body']:
|
|
987
|
+
# Send heartbeat every 10 chunks to keep connection alive
|
|
988
|
+
chunk_count += 1
|
|
989
|
+
if chunk_count % 10 == 0:
|
|
990
|
+
yield {
|
|
991
|
+
'type': 'heartbeat',
|
|
992
|
+
'heartbeat': True,
|
|
993
|
+
'timestamp': f"{int((time.time() - start_time) * 1000)}ms"
|
|
994
|
+
}
|
|
995
|
+
|
|
996
|
+
chunk = json.loads(event['chunk']['bytes'])
|
|
997
|
+
|
|
998
|
+
if chunk['type'] == 'content_block_delta':
|
|
999
|
+
delta = chunk.get('delta', {})
|
|
1000
|
+
if delta.get('type') == 'text_delta':
|
|
1001
|
+
text = delta.get('text', '')
|
|
1002
|
+
|
|
1003
|
+
if not header_filtered:
|
|
1004
|
+
accumulated_start += text
|
|
1005
|
+
|
|
1006
|
+
if '\n' in accumulated_start or len(accumulated_start) > 20:
|
|
1007
|
+
if accumulated_start.strip().startswith('```'):
|
|
1008
|
+
lines = accumulated_start.split('\n', 1)
|
|
1009
|
+
if len(lines) > 1:
|
|
1010
|
+
remaining_text = '\n' + lines[1] # Preserve the newline
|
|
1011
|
+
header_type = lines[0].strip()
|
|
1012
|
+
logger.info(f"🔄 FILTERED: Removed redundant {header_type} from continuation")
|
|
1013
|
+
else:
|
|
1014
|
+
remaining_text = ""
|
|
1015
|
+
|
|
1016
|
+
if remaining_text:
|
|
1017
|
+
yield {
|
|
1018
|
+
'type': 'text',
|
|
1019
|
+
'content': remaining_text,
|
|
1020
|
+
'timestamp': f"{int((time.time() - start_time) * 1000)}ms",
|
|
1021
|
+
'continuation': True
|
|
1022
|
+
}
|
|
1023
|
+
else:
|
|
1024
|
+
yield {
|
|
1025
|
+
'type': 'text',
|
|
1026
|
+
'content': accumulated_start,
|
|
1027
|
+
'timestamp': f"{int((time.time() - start_time) * 1000)}ms",
|
|
1028
|
+
'continuation': True
|
|
1029
|
+
}
|
|
1030
|
+
|
|
1031
|
+
header_filtered = True
|
|
1032
|
+
else:
|
|
1033
|
+
if text:
|
|
1034
|
+
yield {
|
|
1035
|
+
'type': 'text',
|
|
1036
|
+
'content': text,
|
|
1037
|
+
'timestamp': f"{int((time.time() - start_time) * 1000)}ms",
|
|
1038
|
+
'continuation': True
|
|
1039
|
+
}
|
|
1040
|
+
|
|
1041
|
+
except Exception as e:
|
|
1042
|
+
logger.error(f"🔄 CONTINUATION: Error in continuation: {e}")
|