ziya 0.3.0__py3-none-any.whl → 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ziya might be problematic. Click here for more details.

Files changed (73) hide show
  1. app/agents/agent.py +71 -73
  2. app/agents/direct_streaming.py +1 -1
  3. app/agents/prompts.py +1 -1
  4. app/agents/prompts_manager.py +14 -10
  5. app/agents/wrappers/google_direct.py +31 -1
  6. app/agents/wrappers/nova_tool_execution.py +2 -2
  7. app/agents/wrappers/nova_wrapper.py +1 -1
  8. app/agents/wrappers/ziya_bedrock.py +53 -31
  9. app/config/models_config.py +61 -20
  10. app/config/shell_config.py +5 -1
  11. app/extensions/prompt_extensions/claude_extensions.py +27 -5
  12. app/extensions/prompt_extensions/mcp_prompt_extensions.py +82 -56
  13. app/main.py +5 -3
  14. app/mcp/client.py +19 -10
  15. app/mcp/manager.py +68 -10
  16. app/mcp/tools.py +8 -9
  17. app/mcp_servers/shell_server.py +3 -3
  18. app/middleware/streaming.py +29 -41
  19. app/routes/file_validation.py +35 -0
  20. app/routes/mcp_routes.py +54 -8
  21. app/server.py +525 -614
  22. app/streaming_tool_executor.py +748 -137
  23. app/templates/asset-manifest.json +20 -20
  24. app/templates/index.html +1 -1
  25. app/templates/static/css/{main.0297bfee.css → main.e7109b49.css} +2 -2
  26. app/templates/static/css/main.e7109b49.css.map +1 -0
  27. app/templates/static/js/14386.65fcfe53.chunk.js +2 -0
  28. app/templates/static/js/14386.65fcfe53.chunk.js.map +1 -0
  29. app/templates/static/js/35589.0368973a.chunk.js +2 -0
  30. app/templates/static/js/35589.0368973a.chunk.js.map +1 -0
  31. app/templates/static/js/{50295.ab92f61b.chunk.js → 50295.90aca393.chunk.js} +3 -3
  32. app/templates/static/js/50295.90aca393.chunk.js.map +1 -0
  33. app/templates/static/js/55734.5f0fd567.chunk.js +2 -0
  34. app/templates/static/js/55734.5f0fd567.chunk.js.map +1 -0
  35. app/templates/static/js/58542.57fed736.chunk.js +2 -0
  36. app/templates/static/js/58542.57fed736.chunk.js.map +1 -0
  37. app/templates/static/js/{68418.2554bb1e.chunk.js → 68418.f7b4d2d9.chunk.js} +3 -3
  38. app/templates/static/js/68418.f7b4d2d9.chunk.js.map +1 -0
  39. app/templates/static/js/99948.b280eda0.chunk.js +2 -0
  40. app/templates/static/js/99948.b280eda0.chunk.js.map +1 -0
  41. app/templates/static/js/main.e075582c.js +3 -0
  42. app/templates/static/js/main.e075582c.js.map +1 -0
  43. app/utils/code_util.py +5 -2
  44. app/utils/context_cache.py +11 -0
  45. app/utils/conversation_filter.py +90 -0
  46. app/utils/custom_bedrock.py +43 -1
  47. app/utils/diff_utils/validation/validators.py +32 -22
  48. app/utils/file_cache.py +5 -3
  49. app/utils/precision_prompt_system.py +116 -0
  50. app/utils/streaming_optimizer.py +100 -0
  51. {ziya-0.3.0.dist-info → ziya-0.3.2.dist-info}/METADATA +3 -2
  52. {ziya-0.3.0.dist-info → ziya-0.3.2.dist-info}/RECORD +59 -55
  53. app/templates/static/css/main.0297bfee.css.map +0 -1
  54. app/templates/static/js/14386.567bf803.chunk.js +0 -2
  55. app/templates/static/js/14386.567bf803.chunk.js.map +0 -1
  56. app/templates/static/js/35589.278ecda2.chunk.js +0 -2
  57. app/templates/static/js/35589.278ecda2.chunk.js.map +0 -1
  58. app/templates/static/js/50295.ab92f61b.chunk.js.map +0 -1
  59. app/templates/static/js/55734.90d8bd52.chunk.js +0 -2
  60. app/templates/static/js/55734.90d8bd52.chunk.js.map +0 -1
  61. app/templates/static/js/58542.08fb5cf4.chunk.js +0 -2
  62. app/templates/static/js/58542.08fb5cf4.chunk.js.map +0 -1
  63. app/templates/static/js/68418.2554bb1e.chunk.js.map +0 -1
  64. app/templates/static/js/99948.71670e91.chunk.js +0 -2
  65. app/templates/static/js/99948.71670e91.chunk.js.map +0 -1
  66. app/templates/static/js/main.1d79eac2.js +0 -3
  67. app/templates/static/js/main.1d79eac2.js.map +0 -1
  68. /app/templates/static/js/{50295.ab92f61b.chunk.js.LICENSE.txt → 50295.90aca393.chunk.js.LICENSE.txt} +0 -0
  69. /app/templates/static/js/{68418.2554bb1e.chunk.js.LICENSE.txt → 68418.f7b4d2d9.chunk.js.LICENSE.txt} +0 -0
  70. /app/templates/static/js/{main.1d79eac2.js.LICENSE.txt → main.e075582c.js.LICENSE.txt} +0 -0
  71. {ziya-0.3.0.dist-info → ziya-0.3.2.dist-info}/WHEEL +0 -0
  72. {ziya-0.3.0.dist-info → ziya-0.3.2.dist-info}/entry_points.txt +0 -0
  73. {ziya-0.3.0.dist-info → ziya-0.3.2.dist-info}/licenses/LICENSE +0 -0
app/agents/agent.py CHANGED
@@ -820,9 +820,12 @@ class RetryingChatBedrock(Runnable):
820
820
  model_config = config.copy() if config else {}
821
821
  if conversation_id:
822
822
  model_config["conversation_id"] = conversation_id
823
+
824
+ # Merge model_config into kwargs for compatibility with all model types
825
+ merged_kwargs = {**kwargs, **model_config}
823
826
 
824
- async for chunk in self.model.astream(messages, model_config, **kwargs):
825
- logger.error(f"🔍 AGENT_MODEL_ASTREAM: Received chunk type: {type(chunk)}, content: {getattr(chunk, 'content', str(chunk))[:100]}")
827
+ async for chunk in self.model.astream(messages, **merged_kwargs):
828
+ logger.debug(f"🔍 AGENT_MODEL_ASTREAM: Received chunk type: {type(chunk)}, content: {getattr(chunk, 'content', str(chunk))[:100]}")
826
829
  # Check if this is an error chunk that should terminate this specific stream
827
830
  # If we reach here, we've successfully started streaming
828
831
 
@@ -1078,23 +1081,20 @@ class RetryingChatBedrock(Runnable):
1078
1081
 
1079
1082
 
1080
1083
  # Check if this is a throttling error wrapped in another exception
1084
+ logger.error(f"🔍 ACTUAL_ERROR: {error_str}")
1085
+ logger.error(f"🔍 ERROR_TYPE: {type(e)}")
1081
1086
  if "ThrottlingException" in error_str or "Too many requests" in error_str:
1082
1087
  logger.warning("Detected throttling error in exception")
1083
- # Format error message for throttling
1088
+ # Simple error message for frontend
1089
+ error_message = {
1090
+ "error": "⚠️ AWS rate limit exceeded. Please wait a moment and try again.",
1091
+ "type": "throttling"
1092
+ }
1093
+ else:
1094
+ # Show the actual error instead of masking it
1084
1095
  error_message = {
1085
- "error": "throttling_error",
1086
- "detail": "AWS Bedrock rate limit exceeded. All automatic retries have been exhausted.",
1087
- "status_code": 429,
1088
- "stream_id": stream_id,
1089
- "retry_after": "60",
1090
- "throttle_info": {
1091
- "auto_attempts_exhausted": True,
1092
- "total_auto_attempts": max_retries,
1093
- "can_user_retry": True,
1094
- "backoff_used": [5, 10, 20, 40][:attempt + 1]
1095
- },
1096
- "ui_action": "show_retry_button",
1097
- "user_message": "Click 'Retry' to attempt again, or wait a few minutes for better success rate."
1096
+ "error": f"⚠️ Error: {error_str}",
1097
+ "type": "general"
1098
1098
  }
1099
1099
 
1100
1100
  # Include pre-streaming work in preservation
@@ -1412,19 +1412,10 @@ class RetryingChatBedrock(Runnable):
1412
1412
  time.sleep(retry_delay)
1413
1413
  continue
1414
1414
  else:
1415
- # Final attempt failed - enhance error response for frontend
1415
+ # Simple error response for frontend
1416
1416
  error_message = {
1417
- "error": "throttling_error",
1418
- "detail": "AWS Bedrock rate limit exceeded. All automatic retries have been exhausted.",
1419
- "status_code": 429,
1420
- "throttle_info": {
1421
- "auto_attempts_exhausted": True,
1422
- "total_auto_attempts": max_retries,
1423
- "can_user_retry": True,
1424
- "backoff_used": [5.0, 10.0, 20.0, 40.0][:attempt + 1]
1425
- },
1426
- "ui_action": "show_retry_button",
1427
- "user_message": "Click 'Retry' to attempt again, or wait a few minutes for better success rate."
1417
+ "error": "⚠️ AWS rate limit exceeded. Please wait a moment and try again.",
1418
+ "type": "throttling"
1428
1419
  }
1429
1420
  # Let this fall through to the final error handling
1430
1421
 
@@ -1808,31 +1799,34 @@ def create_agent_chain(chat_model: BaseChatModel):
1808
1799
  # Create cache key based on model configuration
1809
1800
  model_id = ModelManager.get_model_id() or getattr(chat_model, 'model_id', 'unknown')
1810
1801
  ast_enabled = os.environ.get("ZIYA_ENABLE_AST") == "true"
1811
- mcp_enabled = os.environ.get("ZIYA_ENABLE_MCP") != "false"
1802
+ mcp_enabled = os.environ.get("ZIYA_ENABLE_MCP", "true").lower() in ("true", "1", "yes")
1812
1803
 
1813
1804
  # Get MCP tools first to include in cache key
1814
1805
  mcp_tools = []
1815
- try:
1816
- from app.mcp.manager import get_mcp_manager
1817
- from app.mcp.enhanced_tools import create_secure_mcp_tools
1818
- mcp_manager = get_mcp_manager()
1819
- # Ensure MCP is initialized before creating tools
1820
- if not mcp_manager.is_initialized:
1821
- # Don't initialize during startup - let server startup handle it
1822
- logger.info("MCP manager not yet initialized, will use available tools when ready")
1823
- mcp_tools = []
1824
- else:
1825
- mcp_tools = create_secure_mcp_tools()
1826
- logger.info(f"Created {len(mcp_tools)} MCP tools for agent chain: {[tool.name for tool in mcp_tools]}")
1806
+ if mcp_enabled:
1807
+ try:
1808
+ from app.mcp.manager import get_mcp_manager
1809
+ from app.mcp.enhanced_tools import create_secure_mcp_tools
1810
+ mcp_manager = get_mcp_manager()
1811
+ # Ensure MCP is initialized before creating tools
1812
+ if not mcp_manager.is_initialized:
1813
+ # Don't initialize during startup - let server startup handle it
1814
+ logger.info("MCP manager not yet initialized, will use available tools when ready")
1815
+ mcp_tools = []
1816
+ else:
1817
+ mcp_tools = create_secure_mcp_tools()
1818
+ logger.info(f"Created {len(mcp_tools)} MCP tools for agent chain: {[tool.name for tool in mcp_tools]}")
1819
+
1820
+ if mcp_manager.is_initialized:
1821
+ mcp_tools = create_secure_mcp_tools()
1822
+ logger.info(f"Created {len(mcp_tools)} MCP tools for agent chain: {[tool.name for tool in mcp_tools]}")
1823
+ else:
1824
+ logger.warning("MCP manager not initialized, no MCP tools available")
1827
1825
 
1828
- if mcp_manager.is_initialized:
1829
- mcp_tools = create_secure_mcp_tools()
1830
- logger.info(f"Created {len(mcp_tools)} MCP tools for agent chain: {[tool.name for tool in mcp_tools]}")
1831
- else:
1832
- logger.warning("MCP manager not initialized, no MCP tools available")
1833
-
1834
- except Exception as e:
1835
- logger.warning(f"Failed to get MCP tools for agent: {str(e)}")
1826
+ except Exception as e:
1827
+ logger.warning(f"Failed to get MCP tools for agent: {str(e)}")
1828
+ else:
1829
+ logger.debug("MCP is disabled, no tools will be created for agent chain")
1836
1830
 
1837
1831
  # Include MCP tool count in cache key to ensure different chains for different tool availability
1838
1832
  cache_key = f"{model_id}_{ast_enabled}_{mcp_enabled}_{len(mcp_tools)}"
@@ -2099,22 +2093,26 @@ def create_agent_executor(agent_chain: Runnable):
2099
2093
 
2100
2094
  # Get MCP tools for the executor
2101
2095
  mcp_tools = []
2102
- try:
2103
- logger.info("Attempting to get MCP tools for agent executor...")
2104
-
2105
- from app.mcp.manager import get_mcp_manager
2106
- mcp_manager = get_mcp_manager()
2107
-
2108
- if mcp_manager.is_initialized:
2109
- mcp_tools = create_mcp_tools()
2110
- logger.info(f"Created agent executor with {len(mcp_tools)} MCP tools")
2111
- for tool in mcp_tools:
2112
- logger.info(f" - {tool.name}: {tool.description}")
2113
- else:
2114
- logger.info("MCP not initialized, no MCP tools available")
2115
- except Exception as e:
2116
- logger.warning(f"Failed to initialize MCP tools: {str(e)}", exc_info=True)
2117
- from app.mcp.manager import get_mcp_manager
2096
+ # Check if MCP is enabled before creating tools
2097
+ if os.environ.get("ZIYA_ENABLE_MCP", "true").lower() in ("true", "1", "yes"):
2098
+ try:
2099
+ logger.info("Attempting to get MCP tools for agent executor...")
2100
+
2101
+ from app.mcp.manager import get_mcp_manager
2102
+ mcp_manager = get_mcp_manager()
2103
+
2104
+ if mcp_manager.is_initialized:
2105
+ mcp_tools = create_mcp_tools()
2106
+ logger.info(f"Created agent executor with {len(mcp_tools)} MCP tools")
2107
+ for tool in mcp_tools:
2108
+ logger.info(f" - {tool.name}: {tool.description}")
2109
+ else:
2110
+ logger.info("MCP not initialized, no MCP tools available")
2111
+ except Exception as e:
2112
+ logger.warning(f"Failed to initialize MCP tools: {str(e)}", exc_info=True)
2113
+ from app.mcp.manager import get_mcp_manager
2114
+ else:
2115
+ logger.debug("MCP is disabled, no tools will be created for agent executor")
2118
2116
  mcp_manager = get_mcp_manager()
2119
2117
 
2120
2118
  logger.info(f"AGENT_EXECUTOR: Tools being passed to AgentExecutor: {[tool.name for tool in mcp_tools] if mcp_tools else 'No tools'}")
@@ -2363,15 +2361,15 @@ def initialize_langserve(app, executor):
2363
2361
  new_app.routes.append(route)
2364
2362
 
2365
2363
  # Add LangServe routes for non-Bedrock models (Gemini, Nova, etc.)
2366
- # The priority /api/chat endpoint will intercept Bedrock requests
2367
- add_routes(
2368
- new_app,
2369
- executor,
2370
- disabled_endpoints=["playground"], # Keep stream and invoke for non-Bedrock models
2371
- path="/ziya"
2372
- )
2364
+ # DISABLED: LangServe /ziya routes cause duplicate execution with /api/chat
2365
+ # add_routes(
2366
+ # new_app,
2367
+ # executor,
2368
+ # disabled_endpoints=["playground"], # Keep stream and invoke for non-Bedrock models
2369
+ # path="/ziya"
2370
+ # )
2373
2371
 
2374
- logger.info("Added LangServe routes - priority /api/chat will handle Bedrock routing")
2372
+ logger.info("DISABLED LangServe /ziya routes - using /api/chat only to prevent duplicate execution")
2375
2373
 
2376
2374
  # Clear all routes from original app
2377
2375
  while app.routes:
@@ -141,7 +141,7 @@ class DirectStreamingAgent:
141
141
  chunk_count = 0
142
142
  tool_results_sent = 0
143
143
  largest_chunk = 0
144
- async for chunk in self.executor.stream_with_tools(openai_messages, tools):
144
+ async for chunk in self.executor.stream_with_tools(openai_messages, tools, conversation_id=conversation_id):
145
145
  chunk_count += 1
146
146
  chunk_size = len(str(chunk))
147
147
  largest_chunk = max(largest_chunk, chunk_size)
app/agents/prompts.py CHANGED
@@ -248,7 +248,7 @@ Do not include any explanatory text within the diff blocks. If you need to provi
248
248
  AVAILABLE TOOLS:
249
249
  You have access to the following tools:
250
250
 
251
- MCP tools available
251
+ {tools}
252
252
 
253
253
  The codebase is provided at the end of this prompt in a specific format.
254
254
  The code that the user has given to you for context is in the format like below where first line has the File path and then the content follows.
@@ -79,16 +79,20 @@ def get_extended_prompt(model_name: Optional[str] = None,
79
79
  logger.info(f"PROMPT_MANAGER: Template was modified: {len(extended_template) != len(original_template)}")
80
80
 
81
81
  # Create a new prompt template with the extended template
82
- extended_prompt = ChatPromptTemplate.from_messages(
83
- [
84
- ("system", extended_template),
85
- MessagesPlaceholder(variable_name="chat_history", optional=True),
86
- ("user", "{question}"),
87
- # Add AST context if available
88
- ("system", "{ast_context}"),
89
- MessagesPlaceholder(variable_name="agent_scratchpad", optional=True),
90
- ]
91
- )
82
+ # Build messages list dynamically
83
+ messages = [
84
+ ("system", extended_template),
85
+ MessagesPlaceholder(variable_name="chat_history", optional=True),
86
+ ("user", "{question}"),
87
+ ]
88
+
89
+ # Only add AST context system message if AST is enabled
90
+ if os.environ.get("ZIYA_ENABLE_AST", "false").lower() in ("true", "1", "yes"):
91
+ messages.append(("system", "{ast_context}"))
92
+
93
+ messages.append(MessagesPlaceholder(variable_name="agent_scratchpad", optional=True))
94
+
95
+ extended_prompt = ChatPromptTemplate.from_messages(messages)
92
96
 
93
97
  # Cache the result
94
98
  _prompt_cache[cache_key] = extended_prompt
@@ -19,6 +19,17 @@ class DirectGoogleModel:
19
19
  self.temperature = temperature
20
20
  self.max_output_tokens = max_output_tokens
21
21
  self.mcp_manager = get_mcp_manager()
22
+
23
+ logger.info(f"DirectGoogleModel initialized: model={model_name}, temp={temperature}, max_output_tokens={max_output_tokens}")
24
+
25
+ # Get API key from environment and configure genai
26
+ import os
27
+ api_key = os.getenv('GOOGLE_API_KEY')
28
+ if api_key:
29
+ genai.configure(api_key=api_key)
30
+ logger.info("Configured Google GenAI with API key from environment")
31
+ else:
32
+ logger.info("No GOOGLE_API_KEY found, will attempt to use Application Default Credentials")
22
33
 
23
34
  def _extract_text_from_mcp_result(self, result: Any) -> str:
24
35
  """Extracts the text content from a structured MCP tool result."""
@@ -123,8 +134,25 @@ class DirectGoogleModel:
123
134
 
124
135
  tool_calls = []
125
136
  model_response_parts = []
137
+ finish_reason = None
138
+ finish_reason_name = None
126
139
 
127
140
  async for chunk in response:
141
+ # Log finish reason if present
142
+ if hasattr(chunk, 'candidates') and chunk.candidates:
143
+ for candidate in chunk.candidates:
144
+ if hasattr(candidate, 'finish_reason') and candidate.finish_reason:
145
+ finish_reason = candidate.finish_reason
146
+ # Decode finish reason
147
+ try:
148
+ from google.ai.generativelanguage_v1beta.types import Candidate
149
+ finish_reason_name = Candidate.FinishReason(finish_reason).name
150
+ except:
151
+ finish_reason_name = str(finish_reason)
152
+ logger.info(f"Google model finish_reason: {finish_reason_name} ({finish_reason})")
153
+ if hasattr(candidate, 'safety_ratings') and candidate.safety_ratings:
154
+ logger.info(f"Google model safety_ratings: {candidate.safety_ratings}")
155
+
128
156
  if chunk.parts:
129
157
  for part in chunk.parts:
130
158
  if part.text:
@@ -136,6 +164,8 @@ class DirectGoogleModel:
136
164
  for candidate in chunk.candidates:
137
165
  if candidate.content and candidate.content.parts:
138
166
  model_response_parts.extend(candidate.content.parts)
167
+
168
+ logger.info(f"Stream ended. Tool calls: {len(tool_calls)}, Finish reason: {finish_reason_name or finish_reason}")
139
169
 
140
170
  if not tool_calls:
141
171
  logger.info("No tool calls from model. Ending loop.")
@@ -155,7 +185,7 @@ class DirectGoogleModel:
155
185
  tool_result_obj = await self.mcp_manager.call_tool(tool_name, tool_args)
156
186
  tool_result_str = self._extract_text_from_mcp_result(tool_result_obj)
157
187
 
158
- yield {"type": "tool_execution", "tool_name": tool_name, "result": tool_result_str}
188
+ yield {"type": "tool_display", "tool_name": tool_name, "result": tool_result_str}
159
189
 
160
190
  tool_results.append(
161
191
  {"function_response": {"name": tool_name, "response": {"content": tool_result_str}}}
@@ -136,7 +136,7 @@ async def execute_nova_tools_properly(bedrock_client, converse_params, formatted
136
136
 
137
137
  # Yield for frontend display in the format it expects
138
138
  yield {
139
- 'type': 'tool_execution',
139
+ 'type': 'tool_display',
140
140
  'tool_name': tool_use['name'],
141
141
  'result': result_text
142
142
  }
@@ -150,7 +150,7 @@ async def execute_nova_tools_properly(bedrock_client, converse_params, formatted
150
150
  except Exception as e:
151
151
  logger.error(f"Nova: Tool execution failed: {e}")
152
152
  yield {
153
- 'type': 'tool_execution',
153
+ 'type': 'tool_display',
154
154
  'tool_name': tool_use['name'],
155
155
  'result': f"Error: {str(e)}"
156
156
  }
@@ -554,7 +554,7 @@ class NovaWrapper(BaseChatModel):
554
554
 
555
555
  if result:
556
556
  results.append({
557
- 'type': 'tool_execution',
557
+ 'type': 'tool_display',
558
558
  'tool_id': f'nova_text_{hash(f"{tool_name}_{command}") % 10000}',
559
559
  'tool_name': f'mcp_{tool_name.replace("mcp_", "")}',
560
560
  'result': result
@@ -318,7 +318,8 @@ class ZiyaBedrock(Runnable):
318
318
  # Ensure system messages are properly ordered after caching
319
319
  messages = self._ensure_system_message_ordering(messages)
320
320
 
321
- kwargs["max_tokens"] = int(os.environ.get("ZIYA_MAX_OUTPUT_TOKENS", self.ziya_max_tokens)) # Use environment variable if available
321
+ # Use much higher default if not set
322
+ kwargs["max_tokens"] = int(os.environ.get("ZIYA_MAX_OUTPUT_TOKENS", self.ziya_max_tokens or 32768))
322
323
  if self.ziya_max_tokens is not None and "max_tokens" not in kwargs:
323
324
  kwargs["max_tokens"] = self.ziya_max_tokens
324
325
  logger.debug(f"Added max_tokens={self.ziya_max_tokens} to _generate kwargs")
@@ -580,39 +581,60 @@ class ZiyaBedrock(Runnable):
580
581
  # Set streaming to True for this call
581
582
  self.bedrock_model.streaming = True
582
583
 
583
- # Call the underlying model's stream method
584
- for chunk in self.bedrock_model.stream(lc_messages, **kwargs):
585
- if hasattr(chunk, 'content') and chunk.content:
586
- # Check for repetitive lines
587
- content = chunk.content
588
- lines = content.split('\n')
589
-
590
- for line in lines:
591
- if line.strip(): # Only track non-empty lines
592
- self._recent_lines.append(line)
593
- # Keep only recent lines
594
- if len(self._recent_lines) > 100:
595
- self._recent_lines.pop(0)
584
+ # Call the underlying model's stream method with retry logic
585
+ stream_retries = 0
586
+ max_stream_retries = 2
587
+
588
+ while stream_retries <= max_stream_retries:
589
+ try:
590
+ for chunk in self.bedrock_model.stream(lc_messages, **kwargs):
591
+ if hasattr(chunk, 'content') and chunk.content:
592
+ # Check for repetitive lines
593
+ content = chunk.content
594
+ lines = content.split('\n')
595
+
596
+ for line in lines:
597
+ if line.strip(): # Only track non-empty lines
598
+ self._recent_lines.append(line)
599
+ # Keep only recent lines
600
+ if len(self._recent_lines) > 100:
601
+ self._recent_lines.pop(0)
602
+
603
+ # Check if any line repeats too many times
604
+ if any(self._recent_lines.count(line) > self._max_repetitions for line in set(self._recent_lines)):
605
+ yield "\n\n**Warning: Response was interrupted because repetitive content was detected.**"
606
+
607
+ # Log the repetitive content for debugging
608
+ repetitive_lines = [line for line in set(self._recent_lines)
609
+ if self._recent_lines.count(line) > self._max_repetitions]
610
+ logger.warning(f"Repetitive content detected. Repetitive lines: {repetitive_lines}")
611
+
612
+ # Send a special marker to indicate the stream should end
613
+ yield "\n\n[STREAM_END_REPETITION_DETECTED]"
614
+
615
+ # Break the streaming loop
616
+ logger.warning("Streaming response interrupted due to repetitive content")
617
+ return
618
+
619
+ yield chunk.content
620
+ elif hasattr(chunk, 'message') and hasattr(chunk.message, 'content'):
621
+ yield chunk.message.content
622
+ return # Success, exit retry loop
596
623
 
597
- # Check if any line repeats too many times
598
- if any(self._recent_lines.count(line) > self._max_repetitions for line in set(self._recent_lines)):
599
- yield "\n\n**Warning: Response was interrupted because repetitive content was detected.**"
600
-
601
- # Log the repetitive content for debugging
602
- repetitive_lines = [line for line in set(self._recent_lines)
603
- if self._recent_lines.count(line) > self._max_repetitions]
604
- logger.warning(f"Repetitive content detected. Repetitive lines: {repetitive_lines}")
624
+ except Exception as e:
625
+ error_str = str(e)
626
+ if ("ThrottlingException" in error_str or "rate limit" in error_str.lower() or
627
+ "timeout" in error_str.lower()) and stream_retries < max_stream_retries:
605
628
 
606
- # Send a special marker to indicate the stream should end
607
- yield "\n\n[STREAM_END_REPETITION_DETECTED]"
629
+ stream_retries += 1
630
+ delay = 2 if stream_retries == 1 else 5 # 2s, 5s
631
+ logger.warning(f"🔄 STREAM_RETRY: Attempt {stream_retries}/{max_stream_retries} after {delay}s delay")
608
632
 
609
- # Break the streaming loop
610
- logger.warning("Streaming response interrupted due to repetitive content")
611
- break
612
-
613
- yield chunk.content
614
- elif hasattr(chunk, 'message') and hasattr(chunk.message, 'content'):
615
- yield chunk.message.content
633
+ import time
634
+ time.sleep(delay)
635
+ continue
636
+ else:
637
+ raise # Re-raise for higher-level retry or final failure
616
638
 
617
639
  async def astream(self, messages: List[Dict[str, Any]], system: Optional[str] = None, **kwargs) -> AsyncIterator[str]:
618
640
  """
@@ -9,7 +9,7 @@ import os
9
9
  # Model configuration
10
10
  DEFAULT_ENDPOINT = "bedrock"
11
11
  DEFAULT_MODELS = {
12
- "bedrock": "sonnet4.0",
12
+ "bedrock": "sonnet4.5",
13
13
  "google": "gemini-pro"
14
14
  }
15
15
 
@@ -29,7 +29,7 @@ GLOBAL_MODEL_DEFAULTS = {
29
29
  "temperature": 0.3,
30
30
  "supports_thinking": False,
31
31
  "supports_max_input_tokens": False,
32
- "default_max_output_tokens": 4096, # Default value for max_output_tokens
32
+ "default_max_output_tokens": 32768, # Default value for max_output_tokens
33
33
  "parameter_mappings": {
34
34
  "max_output_tokens": ["max_tokens"], # Some APIs use max_tokens instead
35
35
  "temperature": ["temperature"],
@@ -169,19 +169,6 @@ ENDPOINT_DEFAULTS = {
169
169
  # Model-specific configs that override endpoint defaults
170
170
  MODEL_CONFIGS = {
171
171
  "bedrock": {
172
- "opus4.1": {
173
- "model_id": {
174
- "us": "us.anthropic.claude-opus-4-1-20250805-v1:0"
175
- },
176
- "token_limit": 200000, # Total context window size
177
- "max_output_tokens": 64000, # Maximum output tokens
178
- "default_max_output_tokens": 10000, # Default value for max_output_tokens
179
- "supports_max_input_tokens": True,
180
- "supports_thinking": True, # Override global default
181
- "family": "claude",
182
- "supports_context_caching": True,
183
- "region": "us-east-1" # Model-specific region preference
184
- },
185
172
  "sonnet4.0": {
186
173
  "model_id": {
187
174
  "us": "us.anthropic.claude-sonnet-4-20250514-v1:0",
@@ -196,7 +183,7 @@ MODEL_CONFIGS = {
196
183
  "preferred_region": "us-east-1", # Default preference but not restricted
197
184
  "token_limit": 200000, # Total context window size
198
185
  "max_output_tokens": 64000, # Maximum output tokens
199
- "default_max_output_tokens": 10000, # Default value for max_output_tokens
186
+ "default_max_output_tokens": 36000, # Default value for max_output_tokens
200
187
  "supports_max_input_tokens": True,
201
188
  "supports_thinking": True, # Override global default
202
189
  "family": "claude",
@@ -205,6 +192,26 @@ MODEL_CONFIGS = {
205
192
  "extended_context_limit": 1000000, # Extended context window size
206
193
  "extended_context_header": "context-1m-2025-08-07" # Beta header for extended context
207
194
  },
195
+ "sonnet4.5": {
196
+ "model_id": {
197
+ "us": "anthropic.claude-sonnet-4-5-20250929-v1:0",
198
+ "eu": "anthropic.claude-sonnet-4-5-20250929-v1:0"
199
+ },
200
+ "available_regions": [
201
+ "us-east-1", "us-west-2", "eu-west-1", "eu-central-1", "ap-southeast-1"
202
+ ],
203
+ "preferred_region": "us-east-1", # Default preference
204
+ "token_limit": 200000, # Total context window size
205
+ "max_output_tokens": 64000, # Maximum output tokens
206
+ "default_max_output_tokens": 36000, # Default value for max_output_tokens
207
+ "supports_max_input_tokens": True,
208
+ "supports_thinking": True, # Override global default
209
+ "family": "claude",
210
+ "supports_context_caching": True,
211
+ "supports_extended_context": True, # Supports 1M token context window
212
+ "extended_context_limit": 1000000, # Extended context window size
213
+ "extended_context_header": "context-1m-2025-08-07" # Same header as sonnet4.0
214
+ },
208
215
  "sonnet3.7": {
209
216
  "model_id": "eu.anthropic.claude-3-7-sonnet-20250219-v1:0",
210
217
  "available_regions": ["eu-west-1", "eu-central-1"],
@@ -243,9 +250,16 @@ MODEL_CONFIGS = {
243
250
  "region_restricted": True, # Only available in US regions
244
251
  "preferred_region": "us-east-1",
245
252
  "family": "claude",
246
- "supports_context_caching": True,
247
- },
253
+ "region": "us-east-1" # Model-specific region preference
254
+ },
248
255
  "opus4": {
256
+ "max_output_tokens": 64000, # Add explicit output token limits
257
+ "default_max_output_tokens": 32000, # Higher default for opus4
258
+ "max_iterations": 8, # Higher iteration limit for advanced model
259
+ "timeout_multiplier": 6, # Longer timeouts for complex responses
260
+ "is_advanced_model": True, # Flag for 4.0+ capabilities
261
+ "token_limit": 200000, # Add context window
262
+ "supports_max_input_tokens": True,
249
263
  "model_id": {
250
264
  "us": "us.anthropic.claude-opus-4-20250514-v1:0"
251
265
  },
@@ -264,7 +278,10 @@ MODEL_CONFIGS = {
264
278
  "preferred_region": "us-east-1",
265
279
  "token_limit": 200000, # Total context window size
266
280
  "max_output_tokens": 64000, # Maximum output tokens
267
- "default_max_output_tokens": 10000, # Default value for max_output_tokens
281
+ "default_max_output_tokens": 32000, # Increased from 10k to 32k for longer responses
282
+ "max_iterations": 8,
283
+ "timeout_multiplier": 6,
284
+ "is_advanced_model": True,
268
285
  "supports_max_input_tokens": True,
269
286
  "supports_thinking": True, # Override global default
270
287
  "family": "claude",
@@ -352,7 +369,7 @@ MODEL_CONFIGS = {
352
369
  "model_id": "gemini-2.5-pro",
353
370
  "token_limit": 1048576,
354
371
  "family": "gemini-pro",
355
- "max_output_tokens": 8192,
372
+ "max_output_tokens": 65536, # Gemini 2.5 Pro supports up to 65K output tokens
356
373
  "convert_system_message_to_human": False,
357
374
  "supports_function_calling": True,
358
375
  "native_function_calling": True,
@@ -426,6 +443,30 @@ TOOL_SENTINEL_TAG = os.environ.get("ZIYA_TOOL_SENTINEL", "TOOL_SENTINEL")
426
443
  TOOL_SENTINEL_OPEN = f"<{TOOL_SENTINEL_TAG}>"
427
444
  TOOL_SENTINEL_CLOSE = f"</{TOOL_SENTINEL_TAG}>"
428
445
 
446
+ # Shell command configuration
447
+ DEFAULT_SHELL_CONFIG = {
448
+ "enabled": True,
449
+ "allowedCommands": [
450
+ "ls", "cat", "pwd", "grep", "wc", "touch", "find", "date", "od", "df",
451
+ "netstat", "lsof", "ps", "sed", "awk", "cut", "sort", "which", "hexdump",
452
+ "xxd", "tail", "head", "echo", "printf", "tr", "uniq", "column", "nl",
453
+ "tee", "base64", "md5sum", "sha1sum", "sha256sum", "bc", "expr", "seq",
454
+ "paste", "join", "fold", "expand", "cd", "tree", "less", "xargs", "curl",
455
+ "ping", "du", "file"
456
+ ],
457
+ "gitOperationsEnabled": True,
458
+ "safeGitOperations": [
459
+ "status", "log", "show", "diff", "branch", "remote", "config --get",
460
+ "ls-files", "ls-tree", "blame", "tag", "stash list", "reflog",
461
+ "rev-parse", "describe", "shortlog", "whatchanged"
462
+ ],
463
+ "timeout": 90 # Increased base timeout to support longer operations
464
+ }
465
+
466
+ def get_default_shell_config():
467
+ """Get the default shell configuration."""
468
+ return DEFAULT_SHELL_CONFIG.copy()
469
+
429
470
  # Helper functions for model parameter validation
430
471
 
431
472
  def get_supported_parameters(endpoint, model_name):
@@ -1,5 +1,9 @@
1
1
  """
2
2
  Single source of truth for shell command configuration.
3
+
4
+ IMPORTANT: All commands must be complete, non-interactive operations.
5
+ Do not use tools in interactive mode (e.g., 'bc' without expression, 'python' REPL).
6
+ Always provide complete command with all arguments needed for one-shot execution.
3
7
  """
4
8
 
5
9
  # SINGLE SOURCE OF TRUTH for shell command configuration
@@ -19,7 +23,7 @@ DEFAULT_SHELL_CONFIG = {
19
23
  "ls-files", "ls-tree", "blame", "tag", "stash list", "reflog",
20
24
  "rev-parse", "describe", "shortlog", "whatchanged"
21
25
  ],
22
- "timeout": 10
26
+ "timeout": 30
23
27
  }
24
28
 
25
29
  def get_default_shell_config():