ziya 0.3.0__py3-none-any.whl → 0.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ziya might be problematic. Click here for more details.
- app/agents/agent.py +71 -73
- app/agents/direct_streaming.py +1 -1
- app/agents/prompts.py +1 -1
- app/agents/prompts_manager.py +14 -10
- app/agents/wrappers/google_direct.py +31 -1
- app/agents/wrappers/nova_tool_execution.py +2 -2
- app/agents/wrappers/nova_wrapper.py +1 -1
- app/agents/wrappers/ziya_bedrock.py +53 -31
- app/config/models_config.py +61 -20
- app/config/shell_config.py +5 -1
- app/extensions/prompt_extensions/claude_extensions.py +27 -5
- app/extensions/prompt_extensions/mcp_prompt_extensions.py +82 -56
- app/main.py +5 -3
- app/mcp/client.py +19 -10
- app/mcp/manager.py +68 -10
- app/mcp/tools.py +8 -9
- app/mcp_servers/shell_server.py +3 -3
- app/middleware/streaming.py +29 -41
- app/routes/file_validation.py +35 -0
- app/routes/mcp_routes.py +54 -8
- app/server.py +525 -614
- app/streaming_tool_executor.py +748 -137
- app/templates/asset-manifest.json +20 -20
- app/templates/index.html +1 -1
- app/templates/static/css/{main.0297bfee.css → main.e7109b49.css} +2 -2
- app/templates/static/css/main.e7109b49.css.map +1 -0
- app/templates/static/js/14386.65fcfe53.chunk.js +2 -0
- app/templates/static/js/14386.65fcfe53.chunk.js.map +1 -0
- app/templates/static/js/35589.0368973a.chunk.js +2 -0
- app/templates/static/js/35589.0368973a.chunk.js.map +1 -0
- app/templates/static/js/{50295.ab92f61b.chunk.js → 50295.90aca393.chunk.js} +3 -3
- app/templates/static/js/50295.90aca393.chunk.js.map +1 -0
- app/templates/static/js/55734.5f0fd567.chunk.js +2 -0
- app/templates/static/js/55734.5f0fd567.chunk.js.map +1 -0
- app/templates/static/js/58542.57fed736.chunk.js +2 -0
- app/templates/static/js/58542.57fed736.chunk.js.map +1 -0
- app/templates/static/js/{68418.2554bb1e.chunk.js → 68418.f7b4d2d9.chunk.js} +3 -3
- app/templates/static/js/68418.f7b4d2d9.chunk.js.map +1 -0
- app/templates/static/js/99948.b280eda0.chunk.js +2 -0
- app/templates/static/js/99948.b280eda0.chunk.js.map +1 -0
- app/templates/static/js/main.e075582c.js +3 -0
- app/templates/static/js/main.e075582c.js.map +1 -0
- app/utils/code_util.py +5 -2
- app/utils/context_cache.py +11 -0
- app/utils/conversation_filter.py +90 -0
- app/utils/custom_bedrock.py +43 -1
- app/utils/diff_utils/validation/validators.py +32 -22
- app/utils/file_cache.py +5 -3
- app/utils/precision_prompt_system.py +116 -0
- app/utils/streaming_optimizer.py +100 -0
- {ziya-0.3.0.dist-info → ziya-0.3.2.dist-info}/METADATA +3 -2
- {ziya-0.3.0.dist-info → ziya-0.3.2.dist-info}/RECORD +59 -55
- app/templates/static/css/main.0297bfee.css.map +0 -1
- app/templates/static/js/14386.567bf803.chunk.js +0 -2
- app/templates/static/js/14386.567bf803.chunk.js.map +0 -1
- app/templates/static/js/35589.278ecda2.chunk.js +0 -2
- app/templates/static/js/35589.278ecda2.chunk.js.map +0 -1
- app/templates/static/js/50295.ab92f61b.chunk.js.map +0 -1
- app/templates/static/js/55734.90d8bd52.chunk.js +0 -2
- app/templates/static/js/55734.90d8bd52.chunk.js.map +0 -1
- app/templates/static/js/58542.08fb5cf4.chunk.js +0 -2
- app/templates/static/js/58542.08fb5cf4.chunk.js.map +0 -1
- app/templates/static/js/68418.2554bb1e.chunk.js.map +0 -1
- app/templates/static/js/99948.71670e91.chunk.js +0 -2
- app/templates/static/js/99948.71670e91.chunk.js.map +0 -1
- app/templates/static/js/main.1d79eac2.js +0 -3
- app/templates/static/js/main.1d79eac2.js.map +0 -1
- /app/templates/static/js/{50295.ab92f61b.chunk.js.LICENSE.txt → 50295.90aca393.chunk.js.LICENSE.txt} +0 -0
- /app/templates/static/js/{68418.2554bb1e.chunk.js.LICENSE.txt → 68418.f7b4d2d9.chunk.js.LICENSE.txt} +0 -0
- /app/templates/static/js/{main.1d79eac2.js.LICENSE.txt → main.e075582c.js.LICENSE.txt} +0 -0
- {ziya-0.3.0.dist-info → ziya-0.3.2.dist-info}/WHEEL +0 -0
- {ziya-0.3.0.dist-info → ziya-0.3.2.dist-info}/entry_points.txt +0 -0
- {ziya-0.3.0.dist-info → ziya-0.3.2.dist-info}/licenses/LICENSE +0 -0
app/agents/agent.py
CHANGED
|
@@ -820,9 +820,12 @@ class RetryingChatBedrock(Runnable):
|
|
|
820
820
|
model_config = config.copy() if config else {}
|
|
821
821
|
if conversation_id:
|
|
822
822
|
model_config["conversation_id"] = conversation_id
|
|
823
|
+
|
|
824
|
+
# Merge model_config into kwargs for compatibility with all model types
|
|
825
|
+
merged_kwargs = {**kwargs, **model_config}
|
|
823
826
|
|
|
824
|
-
async for chunk in self.model.astream(messages,
|
|
825
|
-
logger.
|
|
827
|
+
async for chunk in self.model.astream(messages, **merged_kwargs):
|
|
828
|
+
logger.debug(f"🔍 AGENT_MODEL_ASTREAM: Received chunk type: {type(chunk)}, content: {getattr(chunk, 'content', str(chunk))[:100]}")
|
|
826
829
|
# Check if this is an error chunk that should terminate this specific stream
|
|
827
830
|
# If we reach here, we've successfully started streaming
|
|
828
831
|
|
|
@@ -1078,23 +1081,20 @@ class RetryingChatBedrock(Runnable):
|
|
|
1078
1081
|
|
|
1079
1082
|
|
|
1080
1083
|
# Check if this is a throttling error wrapped in another exception
|
|
1084
|
+
logger.error(f"🔍 ACTUAL_ERROR: {error_str}")
|
|
1085
|
+
logger.error(f"🔍 ERROR_TYPE: {type(e)}")
|
|
1081
1086
|
if "ThrottlingException" in error_str or "Too many requests" in error_str:
|
|
1082
1087
|
logger.warning("Detected throttling error in exception")
|
|
1083
|
-
#
|
|
1088
|
+
# Simple error message for frontend
|
|
1089
|
+
error_message = {
|
|
1090
|
+
"error": "⚠️ AWS rate limit exceeded. Please wait a moment and try again.",
|
|
1091
|
+
"type": "throttling"
|
|
1092
|
+
}
|
|
1093
|
+
else:
|
|
1094
|
+
# Show the actual error instead of masking it
|
|
1084
1095
|
error_message = {
|
|
1085
|
-
"error": "
|
|
1086
|
-
"
|
|
1087
|
-
"status_code": 429,
|
|
1088
|
-
"stream_id": stream_id,
|
|
1089
|
-
"retry_after": "60",
|
|
1090
|
-
"throttle_info": {
|
|
1091
|
-
"auto_attempts_exhausted": True,
|
|
1092
|
-
"total_auto_attempts": max_retries,
|
|
1093
|
-
"can_user_retry": True,
|
|
1094
|
-
"backoff_used": [5, 10, 20, 40][:attempt + 1]
|
|
1095
|
-
},
|
|
1096
|
-
"ui_action": "show_retry_button",
|
|
1097
|
-
"user_message": "Click 'Retry' to attempt again, or wait a few minutes for better success rate."
|
|
1096
|
+
"error": f"⚠️ Error: {error_str}",
|
|
1097
|
+
"type": "general"
|
|
1098
1098
|
}
|
|
1099
1099
|
|
|
1100
1100
|
# Include pre-streaming work in preservation
|
|
@@ -1412,19 +1412,10 @@ class RetryingChatBedrock(Runnable):
|
|
|
1412
1412
|
time.sleep(retry_delay)
|
|
1413
1413
|
continue
|
|
1414
1414
|
else:
|
|
1415
|
-
#
|
|
1415
|
+
# Simple error response for frontend
|
|
1416
1416
|
error_message = {
|
|
1417
|
-
"error": "
|
|
1418
|
-
"
|
|
1419
|
-
"status_code": 429,
|
|
1420
|
-
"throttle_info": {
|
|
1421
|
-
"auto_attempts_exhausted": True,
|
|
1422
|
-
"total_auto_attempts": max_retries,
|
|
1423
|
-
"can_user_retry": True,
|
|
1424
|
-
"backoff_used": [5.0, 10.0, 20.0, 40.0][:attempt + 1]
|
|
1425
|
-
},
|
|
1426
|
-
"ui_action": "show_retry_button",
|
|
1427
|
-
"user_message": "Click 'Retry' to attempt again, or wait a few minutes for better success rate."
|
|
1417
|
+
"error": "⚠️ AWS rate limit exceeded. Please wait a moment and try again.",
|
|
1418
|
+
"type": "throttling"
|
|
1428
1419
|
}
|
|
1429
1420
|
# Let this fall through to the final error handling
|
|
1430
1421
|
|
|
@@ -1808,31 +1799,34 @@ def create_agent_chain(chat_model: BaseChatModel):
|
|
|
1808
1799
|
# Create cache key based on model configuration
|
|
1809
1800
|
model_id = ModelManager.get_model_id() or getattr(chat_model, 'model_id', 'unknown')
|
|
1810
1801
|
ast_enabled = os.environ.get("ZIYA_ENABLE_AST") == "true"
|
|
1811
|
-
mcp_enabled = os.environ.get("ZIYA_ENABLE_MCP")
|
|
1802
|
+
mcp_enabled = os.environ.get("ZIYA_ENABLE_MCP", "true").lower() in ("true", "1", "yes")
|
|
1812
1803
|
|
|
1813
1804
|
# Get MCP tools first to include in cache key
|
|
1814
1805
|
mcp_tools = []
|
|
1815
|
-
|
|
1816
|
-
|
|
1817
|
-
|
|
1818
|
-
|
|
1819
|
-
|
|
1820
|
-
|
|
1821
|
-
|
|
1822
|
-
|
|
1823
|
-
|
|
1824
|
-
|
|
1825
|
-
|
|
1826
|
-
|
|
1806
|
+
if mcp_enabled:
|
|
1807
|
+
try:
|
|
1808
|
+
from app.mcp.manager import get_mcp_manager
|
|
1809
|
+
from app.mcp.enhanced_tools import create_secure_mcp_tools
|
|
1810
|
+
mcp_manager = get_mcp_manager()
|
|
1811
|
+
# Ensure MCP is initialized before creating tools
|
|
1812
|
+
if not mcp_manager.is_initialized:
|
|
1813
|
+
# Don't initialize during startup - let server startup handle it
|
|
1814
|
+
logger.info("MCP manager not yet initialized, will use available tools when ready")
|
|
1815
|
+
mcp_tools = []
|
|
1816
|
+
else:
|
|
1817
|
+
mcp_tools = create_secure_mcp_tools()
|
|
1818
|
+
logger.info(f"Created {len(mcp_tools)} MCP tools for agent chain: {[tool.name for tool in mcp_tools]}")
|
|
1819
|
+
|
|
1820
|
+
if mcp_manager.is_initialized:
|
|
1821
|
+
mcp_tools = create_secure_mcp_tools()
|
|
1822
|
+
logger.info(f"Created {len(mcp_tools)} MCP tools for agent chain: {[tool.name for tool in mcp_tools]}")
|
|
1823
|
+
else:
|
|
1824
|
+
logger.warning("MCP manager not initialized, no MCP tools available")
|
|
1827
1825
|
|
|
1828
|
-
|
|
1829
|
-
|
|
1830
|
-
|
|
1831
|
-
|
|
1832
|
-
logger.warning("MCP manager not initialized, no MCP tools available")
|
|
1833
|
-
|
|
1834
|
-
except Exception as e:
|
|
1835
|
-
logger.warning(f"Failed to get MCP tools for agent: {str(e)}")
|
|
1826
|
+
except Exception as e:
|
|
1827
|
+
logger.warning(f"Failed to get MCP tools for agent: {str(e)}")
|
|
1828
|
+
else:
|
|
1829
|
+
logger.debug("MCP is disabled, no tools will be created for agent chain")
|
|
1836
1830
|
|
|
1837
1831
|
# Include MCP tool count in cache key to ensure different chains for different tool availability
|
|
1838
1832
|
cache_key = f"{model_id}_{ast_enabled}_{mcp_enabled}_{len(mcp_tools)}"
|
|
@@ -2099,22 +2093,26 @@ def create_agent_executor(agent_chain: Runnable):
|
|
|
2099
2093
|
|
|
2100
2094
|
# Get MCP tools for the executor
|
|
2101
2095
|
mcp_tools = []
|
|
2102
|
-
|
|
2103
|
-
|
|
2104
|
-
|
|
2105
|
-
|
|
2106
|
-
|
|
2107
|
-
|
|
2108
|
-
|
|
2109
|
-
|
|
2110
|
-
|
|
2111
|
-
|
|
2112
|
-
logger.info(f"
|
|
2113
|
-
|
|
2114
|
-
|
|
2115
|
-
|
|
2116
|
-
|
|
2117
|
-
|
|
2096
|
+
# Check if MCP is enabled before creating tools
|
|
2097
|
+
if os.environ.get("ZIYA_ENABLE_MCP", "true").lower() in ("true", "1", "yes"):
|
|
2098
|
+
try:
|
|
2099
|
+
logger.info("Attempting to get MCP tools for agent executor...")
|
|
2100
|
+
|
|
2101
|
+
from app.mcp.manager import get_mcp_manager
|
|
2102
|
+
mcp_manager = get_mcp_manager()
|
|
2103
|
+
|
|
2104
|
+
if mcp_manager.is_initialized:
|
|
2105
|
+
mcp_tools = create_mcp_tools()
|
|
2106
|
+
logger.info(f"Created agent executor with {len(mcp_tools)} MCP tools")
|
|
2107
|
+
for tool in mcp_tools:
|
|
2108
|
+
logger.info(f" - {tool.name}: {tool.description}")
|
|
2109
|
+
else:
|
|
2110
|
+
logger.info("MCP not initialized, no MCP tools available")
|
|
2111
|
+
except Exception as e:
|
|
2112
|
+
logger.warning(f"Failed to initialize MCP tools: {str(e)}", exc_info=True)
|
|
2113
|
+
from app.mcp.manager import get_mcp_manager
|
|
2114
|
+
else:
|
|
2115
|
+
logger.debug("MCP is disabled, no tools will be created for agent executor")
|
|
2118
2116
|
mcp_manager = get_mcp_manager()
|
|
2119
2117
|
|
|
2120
2118
|
logger.info(f"AGENT_EXECUTOR: Tools being passed to AgentExecutor: {[tool.name for tool in mcp_tools] if mcp_tools else 'No tools'}")
|
|
@@ -2363,15 +2361,15 @@ def initialize_langserve(app, executor):
|
|
|
2363
2361
|
new_app.routes.append(route)
|
|
2364
2362
|
|
|
2365
2363
|
# Add LangServe routes for non-Bedrock models (Gemini, Nova, etc.)
|
|
2366
|
-
#
|
|
2367
|
-
add_routes(
|
|
2368
|
-
|
|
2369
|
-
|
|
2370
|
-
|
|
2371
|
-
|
|
2372
|
-
)
|
|
2364
|
+
# DISABLED: LangServe /ziya routes cause duplicate execution with /api/chat
|
|
2365
|
+
# add_routes(
|
|
2366
|
+
# new_app,
|
|
2367
|
+
# executor,
|
|
2368
|
+
# disabled_endpoints=["playground"], # Keep stream and invoke for non-Bedrock models
|
|
2369
|
+
# path="/ziya"
|
|
2370
|
+
# )
|
|
2373
2371
|
|
|
2374
|
-
logger.info("
|
|
2372
|
+
logger.info("DISABLED LangServe /ziya routes - using /api/chat only to prevent duplicate execution")
|
|
2375
2373
|
|
|
2376
2374
|
# Clear all routes from original app
|
|
2377
2375
|
while app.routes:
|
app/agents/direct_streaming.py
CHANGED
|
@@ -141,7 +141,7 @@ class DirectStreamingAgent:
|
|
|
141
141
|
chunk_count = 0
|
|
142
142
|
tool_results_sent = 0
|
|
143
143
|
largest_chunk = 0
|
|
144
|
-
async for chunk in self.executor.stream_with_tools(openai_messages, tools):
|
|
144
|
+
async for chunk in self.executor.stream_with_tools(openai_messages, tools, conversation_id=conversation_id):
|
|
145
145
|
chunk_count += 1
|
|
146
146
|
chunk_size = len(str(chunk))
|
|
147
147
|
largest_chunk = max(largest_chunk, chunk_size)
|
app/agents/prompts.py
CHANGED
|
@@ -248,7 +248,7 @@ Do not include any explanatory text within the diff blocks. If you need to provi
|
|
|
248
248
|
AVAILABLE TOOLS:
|
|
249
249
|
You have access to the following tools:
|
|
250
250
|
|
|
251
|
-
|
|
251
|
+
{tools}
|
|
252
252
|
|
|
253
253
|
The codebase is provided at the end of this prompt in a specific format.
|
|
254
254
|
The code that the user has given to you for context is in the format like below where first line has the File path and then the content follows.
|
app/agents/prompts_manager.py
CHANGED
|
@@ -79,16 +79,20 @@ def get_extended_prompt(model_name: Optional[str] = None,
|
|
|
79
79
|
logger.info(f"PROMPT_MANAGER: Template was modified: {len(extended_template) != len(original_template)}")
|
|
80
80
|
|
|
81
81
|
# Create a new prompt template with the extended template
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
82
|
+
# Build messages list dynamically
|
|
83
|
+
messages = [
|
|
84
|
+
("system", extended_template),
|
|
85
|
+
MessagesPlaceholder(variable_name="chat_history", optional=True),
|
|
86
|
+
("user", "{question}"),
|
|
87
|
+
]
|
|
88
|
+
|
|
89
|
+
# Only add AST context system message if AST is enabled
|
|
90
|
+
if os.environ.get("ZIYA_ENABLE_AST", "false").lower() in ("true", "1", "yes"):
|
|
91
|
+
messages.append(("system", "{ast_context}"))
|
|
92
|
+
|
|
93
|
+
messages.append(MessagesPlaceholder(variable_name="agent_scratchpad", optional=True))
|
|
94
|
+
|
|
95
|
+
extended_prompt = ChatPromptTemplate.from_messages(messages)
|
|
92
96
|
|
|
93
97
|
# Cache the result
|
|
94
98
|
_prompt_cache[cache_key] = extended_prompt
|
|
@@ -19,6 +19,17 @@ class DirectGoogleModel:
|
|
|
19
19
|
self.temperature = temperature
|
|
20
20
|
self.max_output_tokens = max_output_tokens
|
|
21
21
|
self.mcp_manager = get_mcp_manager()
|
|
22
|
+
|
|
23
|
+
logger.info(f"DirectGoogleModel initialized: model={model_name}, temp={temperature}, max_output_tokens={max_output_tokens}")
|
|
24
|
+
|
|
25
|
+
# Get API key from environment and configure genai
|
|
26
|
+
import os
|
|
27
|
+
api_key = os.getenv('GOOGLE_API_KEY')
|
|
28
|
+
if api_key:
|
|
29
|
+
genai.configure(api_key=api_key)
|
|
30
|
+
logger.info("Configured Google GenAI with API key from environment")
|
|
31
|
+
else:
|
|
32
|
+
logger.info("No GOOGLE_API_KEY found, will attempt to use Application Default Credentials")
|
|
22
33
|
|
|
23
34
|
def _extract_text_from_mcp_result(self, result: Any) -> str:
|
|
24
35
|
"""Extracts the text content from a structured MCP tool result."""
|
|
@@ -123,8 +134,25 @@ class DirectGoogleModel:
|
|
|
123
134
|
|
|
124
135
|
tool_calls = []
|
|
125
136
|
model_response_parts = []
|
|
137
|
+
finish_reason = None
|
|
138
|
+
finish_reason_name = None
|
|
126
139
|
|
|
127
140
|
async for chunk in response:
|
|
141
|
+
# Log finish reason if present
|
|
142
|
+
if hasattr(chunk, 'candidates') and chunk.candidates:
|
|
143
|
+
for candidate in chunk.candidates:
|
|
144
|
+
if hasattr(candidate, 'finish_reason') and candidate.finish_reason:
|
|
145
|
+
finish_reason = candidate.finish_reason
|
|
146
|
+
# Decode finish reason
|
|
147
|
+
try:
|
|
148
|
+
from google.ai.generativelanguage_v1beta.types import Candidate
|
|
149
|
+
finish_reason_name = Candidate.FinishReason(finish_reason).name
|
|
150
|
+
except:
|
|
151
|
+
finish_reason_name = str(finish_reason)
|
|
152
|
+
logger.info(f"Google model finish_reason: {finish_reason_name} ({finish_reason})")
|
|
153
|
+
if hasattr(candidate, 'safety_ratings') and candidate.safety_ratings:
|
|
154
|
+
logger.info(f"Google model safety_ratings: {candidate.safety_ratings}")
|
|
155
|
+
|
|
128
156
|
if chunk.parts:
|
|
129
157
|
for part in chunk.parts:
|
|
130
158
|
if part.text:
|
|
@@ -136,6 +164,8 @@ class DirectGoogleModel:
|
|
|
136
164
|
for candidate in chunk.candidates:
|
|
137
165
|
if candidate.content and candidate.content.parts:
|
|
138
166
|
model_response_parts.extend(candidate.content.parts)
|
|
167
|
+
|
|
168
|
+
logger.info(f"Stream ended. Tool calls: {len(tool_calls)}, Finish reason: {finish_reason_name or finish_reason}")
|
|
139
169
|
|
|
140
170
|
if not tool_calls:
|
|
141
171
|
logger.info("No tool calls from model. Ending loop.")
|
|
@@ -155,7 +185,7 @@ class DirectGoogleModel:
|
|
|
155
185
|
tool_result_obj = await self.mcp_manager.call_tool(tool_name, tool_args)
|
|
156
186
|
tool_result_str = self._extract_text_from_mcp_result(tool_result_obj)
|
|
157
187
|
|
|
158
|
-
yield {"type": "
|
|
188
|
+
yield {"type": "tool_display", "tool_name": tool_name, "result": tool_result_str}
|
|
159
189
|
|
|
160
190
|
tool_results.append(
|
|
161
191
|
{"function_response": {"name": tool_name, "response": {"content": tool_result_str}}}
|
|
@@ -136,7 +136,7 @@ async def execute_nova_tools_properly(bedrock_client, converse_params, formatted
|
|
|
136
136
|
|
|
137
137
|
# Yield for frontend display in the format it expects
|
|
138
138
|
yield {
|
|
139
|
-
'type': '
|
|
139
|
+
'type': 'tool_display',
|
|
140
140
|
'tool_name': tool_use['name'],
|
|
141
141
|
'result': result_text
|
|
142
142
|
}
|
|
@@ -150,7 +150,7 @@ async def execute_nova_tools_properly(bedrock_client, converse_params, formatted
|
|
|
150
150
|
except Exception as e:
|
|
151
151
|
logger.error(f"Nova: Tool execution failed: {e}")
|
|
152
152
|
yield {
|
|
153
|
-
'type': '
|
|
153
|
+
'type': 'tool_display',
|
|
154
154
|
'tool_name': tool_use['name'],
|
|
155
155
|
'result': f"Error: {str(e)}"
|
|
156
156
|
}
|
|
@@ -554,7 +554,7 @@ class NovaWrapper(BaseChatModel):
|
|
|
554
554
|
|
|
555
555
|
if result:
|
|
556
556
|
results.append({
|
|
557
|
-
'type': '
|
|
557
|
+
'type': 'tool_display',
|
|
558
558
|
'tool_id': f'nova_text_{hash(f"{tool_name}_{command}") % 10000}',
|
|
559
559
|
'tool_name': f'mcp_{tool_name.replace("mcp_", "")}',
|
|
560
560
|
'result': result
|
|
@@ -318,7 +318,8 @@ class ZiyaBedrock(Runnable):
|
|
|
318
318
|
# Ensure system messages are properly ordered after caching
|
|
319
319
|
messages = self._ensure_system_message_ordering(messages)
|
|
320
320
|
|
|
321
|
-
|
|
321
|
+
# Use much higher default if not set
|
|
322
|
+
kwargs["max_tokens"] = int(os.environ.get("ZIYA_MAX_OUTPUT_TOKENS", self.ziya_max_tokens or 32768))
|
|
322
323
|
if self.ziya_max_tokens is not None and "max_tokens" not in kwargs:
|
|
323
324
|
kwargs["max_tokens"] = self.ziya_max_tokens
|
|
324
325
|
logger.debug(f"Added max_tokens={self.ziya_max_tokens} to _generate kwargs")
|
|
@@ -580,39 +581,60 @@ class ZiyaBedrock(Runnable):
|
|
|
580
581
|
# Set streaming to True for this call
|
|
581
582
|
self.bedrock_model.streaming = True
|
|
582
583
|
|
|
583
|
-
# Call the underlying model's stream method
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
584
|
+
# Call the underlying model's stream method with retry logic
|
|
585
|
+
stream_retries = 0
|
|
586
|
+
max_stream_retries = 2
|
|
587
|
+
|
|
588
|
+
while stream_retries <= max_stream_retries:
|
|
589
|
+
try:
|
|
590
|
+
for chunk in self.bedrock_model.stream(lc_messages, **kwargs):
|
|
591
|
+
if hasattr(chunk, 'content') and chunk.content:
|
|
592
|
+
# Check for repetitive lines
|
|
593
|
+
content = chunk.content
|
|
594
|
+
lines = content.split('\n')
|
|
595
|
+
|
|
596
|
+
for line in lines:
|
|
597
|
+
if line.strip(): # Only track non-empty lines
|
|
598
|
+
self._recent_lines.append(line)
|
|
599
|
+
# Keep only recent lines
|
|
600
|
+
if len(self._recent_lines) > 100:
|
|
601
|
+
self._recent_lines.pop(0)
|
|
602
|
+
|
|
603
|
+
# Check if any line repeats too many times
|
|
604
|
+
if any(self._recent_lines.count(line) > self._max_repetitions for line in set(self._recent_lines)):
|
|
605
|
+
yield "\n\n**Warning: Response was interrupted because repetitive content was detected.**"
|
|
606
|
+
|
|
607
|
+
# Log the repetitive content for debugging
|
|
608
|
+
repetitive_lines = [line for line in set(self._recent_lines)
|
|
609
|
+
if self._recent_lines.count(line) > self._max_repetitions]
|
|
610
|
+
logger.warning(f"Repetitive content detected. Repetitive lines: {repetitive_lines}")
|
|
611
|
+
|
|
612
|
+
# Send a special marker to indicate the stream should end
|
|
613
|
+
yield "\n\n[STREAM_END_REPETITION_DETECTED]"
|
|
614
|
+
|
|
615
|
+
# Break the streaming loop
|
|
616
|
+
logger.warning("Streaming response interrupted due to repetitive content")
|
|
617
|
+
return
|
|
618
|
+
|
|
619
|
+
yield chunk.content
|
|
620
|
+
elif hasattr(chunk, 'message') and hasattr(chunk.message, 'content'):
|
|
621
|
+
yield chunk.message.content
|
|
622
|
+
return # Success, exit retry loop
|
|
596
623
|
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
# Log the repetitive content for debugging
|
|
602
|
-
repetitive_lines = [line for line in set(self._recent_lines)
|
|
603
|
-
if self._recent_lines.count(line) > self._max_repetitions]
|
|
604
|
-
logger.warning(f"Repetitive content detected. Repetitive lines: {repetitive_lines}")
|
|
624
|
+
except Exception as e:
|
|
625
|
+
error_str = str(e)
|
|
626
|
+
if ("ThrottlingException" in error_str or "rate limit" in error_str.lower() or
|
|
627
|
+
"timeout" in error_str.lower()) and stream_retries < max_stream_retries:
|
|
605
628
|
|
|
606
|
-
|
|
607
|
-
|
|
629
|
+
stream_retries += 1
|
|
630
|
+
delay = 2 if stream_retries == 1 else 5 # 2s, 5s
|
|
631
|
+
logger.warning(f"🔄 STREAM_RETRY: Attempt {stream_retries}/{max_stream_retries} after {delay}s delay")
|
|
608
632
|
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
elif hasattr(chunk, 'message') and hasattr(chunk.message, 'content'):
|
|
615
|
-
yield chunk.message.content
|
|
633
|
+
import time
|
|
634
|
+
time.sleep(delay)
|
|
635
|
+
continue
|
|
636
|
+
else:
|
|
637
|
+
raise # Re-raise for higher-level retry or final failure
|
|
616
638
|
|
|
617
639
|
async def astream(self, messages: List[Dict[str, Any]], system: Optional[str] = None, **kwargs) -> AsyncIterator[str]:
|
|
618
640
|
"""
|
app/config/models_config.py
CHANGED
|
@@ -9,7 +9,7 @@ import os
|
|
|
9
9
|
# Model configuration
|
|
10
10
|
DEFAULT_ENDPOINT = "bedrock"
|
|
11
11
|
DEFAULT_MODELS = {
|
|
12
|
-
"bedrock": "sonnet4.
|
|
12
|
+
"bedrock": "sonnet4.5",
|
|
13
13
|
"google": "gemini-pro"
|
|
14
14
|
}
|
|
15
15
|
|
|
@@ -29,7 +29,7 @@ GLOBAL_MODEL_DEFAULTS = {
|
|
|
29
29
|
"temperature": 0.3,
|
|
30
30
|
"supports_thinking": False,
|
|
31
31
|
"supports_max_input_tokens": False,
|
|
32
|
-
"default_max_output_tokens":
|
|
32
|
+
"default_max_output_tokens": 32768, # Default value for max_output_tokens
|
|
33
33
|
"parameter_mappings": {
|
|
34
34
|
"max_output_tokens": ["max_tokens"], # Some APIs use max_tokens instead
|
|
35
35
|
"temperature": ["temperature"],
|
|
@@ -169,19 +169,6 @@ ENDPOINT_DEFAULTS = {
|
|
|
169
169
|
# Model-specific configs that override endpoint defaults
|
|
170
170
|
MODEL_CONFIGS = {
|
|
171
171
|
"bedrock": {
|
|
172
|
-
"opus4.1": {
|
|
173
|
-
"model_id": {
|
|
174
|
-
"us": "us.anthropic.claude-opus-4-1-20250805-v1:0"
|
|
175
|
-
},
|
|
176
|
-
"token_limit": 200000, # Total context window size
|
|
177
|
-
"max_output_tokens": 64000, # Maximum output tokens
|
|
178
|
-
"default_max_output_tokens": 10000, # Default value for max_output_tokens
|
|
179
|
-
"supports_max_input_tokens": True,
|
|
180
|
-
"supports_thinking": True, # Override global default
|
|
181
|
-
"family": "claude",
|
|
182
|
-
"supports_context_caching": True,
|
|
183
|
-
"region": "us-east-1" # Model-specific region preference
|
|
184
|
-
},
|
|
185
172
|
"sonnet4.0": {
|
|
186
173
|
"model_id": {
|
|
187
174
|
"us": "us.anthropic.claude-sonnet-4-20250514-v1:0",
|
|
@@ -196,7 +183,7 @@ MODEL_CONFIGS = {
|
|
|
196
183
|
"preferred_region": "us-east-1", # Default preference but not restricted
|
|
197
184
|
"token_limit": 200000, # Total context window size
|
|
198
185
|
"max_output_tokens": 64000, # Maximum output tokens
|
|
199
|
-
"default_max_output_tokens":
|
|
186
|
+
"default_max_output_tokens": 36000, # Default value for max_output_tokens
|
|
200
187
|
"supports_max_input_tokens": True,
|
|
201
188
|
"supports_thinking": True, # Override global default
|
|
202
189
|
"family": "claude",
|
|
@@ -205,6 +192,26 @@ MODEL_CONFIGS = {
|
|
|
205
192
|
"extended_context_limit": 1000000, # Extended context window size
|
|
206
193
|
"extended_context_header": "context-1m-2025-08-07" # Beta header for extended context
|
|
207
194
|
},
|
|
195
|
+
"sonnet4.5": {
|
|
196
|
+
"model_id": {
|
|
197
|
+
"us": "anthropic.claude-sonnet-4-5-20250929-v1:0",
|
|
198
|
+
"eu": "anthropic.claude-sonnet-4-5-20250929-v1:0"
|
|
199
|
+
},
|
|
200
|
+
"available_regions": [
|
|
201
|
+
"us-east-1", "us-west-2", "eu-west-1", "eu-central-1", "ap-southeast-1"
|
|
202
|
+
],
|
|
203
|
+
"preferred_region": "us-east-1", # Default preference
|
|
204
|
+
"token_limit": 200000, # Total context window size
|
|
205
|
+
"max_output_tokens": 64000, # Maximum output tokens
|
|
206
|
+
"default_max_output_tokens": 36000, # Default value for max_output_tokens
|
|
207
|
+
"supports_max_input_tokens": True,
|
|
208
|
+
"supports_thinking": True, # Override global default
|
|
209
|
+
"family": "claude",
|
|
210
|
+
"supports_context_caching": True,
|
|
211
|
+
"supports_extended_context": True, # Supports 1M token context window
|
|
212
|
+
"extended_context_limit": 1000000, # Extended context window size
|
|
213
|
+
"extended_context_header": "context-1m-2025-08-07" # Same header as sonnet4.0
|
|
214
|
+
},
|
|
208
215
|
"sonnet3.7": {
|
|
209
216
|
"model_id": "eu.anthropic.claude-3-7-sonnet-20250219-v1:0",
|
|
210
217
|
"available_regions": ["eu-west-1", "eu-central-1"],
|
|
@@ -243,9 +250,16 @@ MODEL_CONFIGS = {
|
|
|
243
250
|
"region_restricted": True, # Only available in US regions
|
|
244
251
|
"preferred_region": "us-east-1",
|
|
245
252
|
"family": "claude",
|
|
246
|
-
"
|
|
247
|
-
},
|
|
253
|
+
"region": "us-east-1" # Model-specific region preference
|
|
254
|
+
},
|
|
248
255
|
"opus4": {
|
|
256
|
+
"max_output_tokens": 64000, # Add explicit output token limits
|
|
257
|
+
"default_max_output_tokens": 32000, # Higher default for opus4
|
|
258
|
+
"max_iterations": 8, # Higher iteration limit for advanced model
|
|
259
|
+
"timeout_multiplier": 6, # Longer timeouts for complex responses
|
|
260
|
+
"is_advanced_model": True, # Flag for 4.0+ capabilities
|
|
261
|
+
"token_limit": 200000, # Add context window
|
|
262
|
+
"supports_max_input_tokens": True,
|
|
249
263
|
"model_id": {
|
|
250
264
|
"us": "us.anthropic.claude-opus-4-20250514-v1:0"
|
|
251
265
|
},
|
|
@@ -264,7 +278,10 @@ MODEL_CONFIGS = {
|
|
|
264
278
|
"preferred_region": "us-east-1",
|
|
265
279
|
"token_limit": 200000, # Total context window size
|
|
266
280
|
"max_output_tokens": 64000, # Maximum output tokens
|
|
267
|
-
"default_max_output_tokens":
|
|
281
|
+
"default_max_output_tokens": 32000, # Increased from 10k to 32k for longer responses
|
|
282
|
+
"max_iterations": 8,
|
|
283
|
+
"timeout_multiplier": 6,
|
|
284
|
+
"is_advanced_model": True,
|
|
268
285
|
"supports_max_input_tokens": True,
|
|
269
286
|
"supports_thinking": True, # Override global default
|
|
270
287
|
"family": "claude",
|
|
@@ -352,7 +369,7 @@ MODEL_CONFIGS = {
|
|
|
352
369
|
"model_id": "gemini-2.5-pro",
|
|
353
370
|
"token_limit": 1048576,
|
|
354
371
|
"family": "gemini-pro",
|
|
355
|
-
"max_output_tokens":
|
|
372
|
+
"max_output_tokens": 65536, # Gemini 2.5 Pro supports up to 65K output tokens
|
|
356
373
|
"convert_system_message_to_human": False,
|
|
357
374
|
"supports_function_calling": True,
|
|
358
375
|
"native_function_calling": True,
|
|
@@ -426,6 +443,30 @@ TOOL_SENTINEL_TAG = os.environ.get("ZIYA_TOOL_SENTINEL", "TOOL_SENTINEL")
|
|
|
426
443
|
TOOL_SENTINEL_OPEN = f"<{TOOL_SENTINEL_TAG}>"
|
|
427
444
|
TOOL_SENTINEL_CLOSE = f"</{TOOL_SENTINEL_TAG}>"
|
|
428
445
|
|
|
446
|
+
# Shell command configuration
|
|
447
|
+
DEFAULT_SHELL_CONFIG = {
|
|
448
|
+
"enabled": True,
|
|
449
|
+
"allowedCommands": [
|
|
450
|
+
"ls", "cat", "pwd", "grep", "wc", "touch", "find", "date", "od", "df",
|
|
451
|
+
"netstat", "lsof", "ps", "sed", "awk", "cut", "sort", "which", "hexdump",
|
|
452
|
+
"xxd", "tail", "head", "echo", "printf", "tr", "uniq", "column", "nl",
|
|
453
|
+
"tee", "base64", "md5sum", "sha1sum", "sha256sum", "bc", "expr", "seq",
|
|
454
|
+
"paste", "join", "fold", "expand", "cd", "tree", "less", "xargs", "curl",
|
|
455
|
+
"ping", "du", "file"
|
|
456
|
+
],
|
|
457
|
+
"gitOperationsEnabled": True,
|
|
458
|
+
"safeGitOperations": [
|
|
459
|
+
"status", "log", "show", "diff", "branch", "remote", "config --get",
|
|
460
|
+
"ls-files", "ls-tree", "blame", "tag", "stash list", "reflog",
|
|
461
|
+
"rev-parse", "describe", "shortlog", "whatchanged"
|
|
462
|
+
],
|
|
463
|
+
"timeout": 90 # Increased base timeout to support longer operations
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
def get_default_shell_config():
|
|
467
|
+
"""Get the default shell configuration."""
|
|
468
|
+
return DEFAULT_SHELL_CONFIG.copy()
|
|
469
|
+
|
|
429
470
|
# Helper functions for model parameter validation
|
|
430
471
|
|
|
431
472
|
def get_supported_parameters(endpoint, model_name):
|
app/config/shell_config.py
CHANGED
|
@@ -1,5 +1,9 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Single source of truth for shell command configuration.
|
|
3
|
+
|
|
4
|
+
IMPORTANT: All commands must be complete, non-interactive operations.
|
|
5
|
+
Do not use tools in interactive mode (e.g., 'bc' without expression, 'python' REPL).
|
|
6
|
+
Always provide complete command with all arguments needed for one-shot execution.
|
|
3
7
|
"""
|
|
4
8
|
|
|
5
9
|
# SINGLE SOURCE OF TRUTH for shell command configuration
|
|
@@ -19,7 +23,7 @@ DEFAULT_SHELL_CONFIG = {
|
|
|
19
23
|
"ls-files", "ls-tree", "blame", "tag", "stash list", "reflog",
|
|
20
24
|
"rev-parse", "describe", "shortlog", "whatchanged"
|
|
21
25
|
],
|
|
22
|
-
"timeout":
|
|
26
|
+
"timeout": 30
|
|
23
27
|
}
|
|
24
28
|
|
|
25
29
|
def get_default_shell_config():
|