PyPI - ziya - Versions diffs - 0.3.0__py3-none-any.whl → 0.3.2__py3-none-any.whl - Mend

ziya 0.3.0py3-none-any.whl → 0.3.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ziya might be problematic. Click here for more details.

Files changed (73) hide show

app/agents/agent.py +71 -73
app/agents/direct_streaming.py +1 -1
app/agents/prompts.py +1 -1
app/agents/prompts_manager.py +14 -10
app/agents/wrappers/google_direct.py +31 -1
app/agents/wrappers/nova_tool_execution.py +2 -2
app/agents/wrappers/nova_wrapper.py +1 -1
app/agents/wrappers/ziya_bedrock.py +53 -31
app/config/models_config.py +61 -20
app/config/shell_config.py +5 -1
app/extensions/prompt_extensions/claude_extensions.py +27 -5
app/extensions/prompt_extensions/mcp_prompt_extensions.py +82 -56
app/main.py +5 -3
app/mcp/client.py +19 -10
app/mcp/manager.py +68 -10
app/mcp/tools.py +8 -9
app/mcp_servers/shell_server.py +3 -3
app/middleware/streaming.py +29 -41
app/routes/file_validation.py +35 -0
app/routes/mcp_routes.py +54 -8
app/server.py +525 -614
app/streaming_tool_executor.py +748 -137
app/templates/asset-manifest.json +20 -20
app/templates/index.html +1 -1
app/templates/static/css/{main.0297bfee.css → main.e7109b49.css} +2 -2
app/templates/static/css/main.e7109b49.css.map +1 -0
app/templates/static/js/14386.65fcfe53.chunk.js +2 -0
app/templates/static/js/14386.65fcfe53.chunk.js.map +1 -0
app/templates/static/js/35589.0368973a.chunk.js +2 -0
app/templates/static/js/35589.0368973a.chunk.js.map +1 -0
app/templates/static/js/{50295.ab92f61b.chunk.js → 50295.90aca393.chunk.js} +3 -3
app/templates/static/js/50295.90aca393.chunk.js.map +1 -0
app/templates/static/js/55734.5f0fd567.chunk.js +2 -0
app/templates/static/js/55734.5f0fd567.chunk.js.map +1 -0
app/templates/static/js/58542.57fed736.chunk.js +2 -0
app/templates/static/js/58542.57fed736.chunk.js.map +1 -0
app/templates/static/js/{68418.2554bb1e.chunk.js → 68418.f7b4d2d9.chunk.js} +3 -3
app/templates/static/js/68418.f7b4d2d9.chunk.js.map +1 -0
app/templates/static/js/99948.b280eda0.chunk.js +2 -0
app/templates/static/js/99948.b280eda0.chunk.js.map +1 -0
app/templates/static/js/main.e075582c.js +3 -0
app/templates/static/js/main.e075582c.js.map +1 -0
app/utils/code_util.py +5 -2
app/utils/context_cache.py +11 -0
app/utils/conversation_filter.py +90 -0
app/utils/custom_bedrock.py +43 -1
app/utils/diff_utils/validation/validators.py +32 -22
app/utils/file_cache.py +5 -3
app/utils/precision_prompt_system.py +116 -0
app/utils/streaming_optimizer.py +100 -0
{ziya-0.3.0.dist-info → ziya-0.3.2.dist-info}/METADATA +3 -2
{ziya-0.3.0.dist-info → ziya-0.3.2.dist-info}/RECORD +59 -55
app/templates/static/css/main.0297bfee.css.map +0 -1
app/templates/static/js/14386.567bf803.chunk.js +0 -2
app/templates/static/js/14386.567bf803.chunk.js.map +0 -1
app/templates/static/js/35589.278ecda2.chunk.js +0 -2
app/templates/static/js/35589.278ecda2.chunk.js.map +0 -1
app/templates/static/js/50295.ab92f61b.chunk.js.map +0 -1
app/templates/static/js/55734.90d8bd52.chunk.js +0 -2
app/templates/static/js/55734.90d8bd52.chunk.js.map +0 -1
app/templates/static/js/58542.08fb5cf4.chunk.js +0 -2
app/templates/static/js/58542.08fb5cf4.chunk.js.map +0 -1
app/templates/static/js/68418.2554bb1e.chunk.js.map +0 -1
app/templates/static/js/99948.71670e91.chunk.js +0 -2
app/templates/static/js/99948.71670e91.chunk.js.map +0 -1
app/templates/static/js/main.1d79eac2.js +0 -3
app/templates/static/js/main.1d79eac2.js.map +0 -1
/app/templates/static/js/{50295.ab92f61b.chunk.js.LICENSE.txt → 50295.90aca393.chunk.js.LICENSE.txt} +0 -0
/app/templates/static/js/{68418.2554bb1e.chunk.js.LICENSE.txt → 68418.f7b4d2d9.chunk.js.LICENSE.txt} +0 -0
/app/templates/static/js/{main.1d79eac2.js.LICENSE.txt → main.e075582c.js.LICENSE.txt} +0 -0
{ziya-0.3.0.dist-info → ziya-0.3.2.dist-info}/WHEEL +0 -0
{ziya-0.3.0.dist-info → ziya-0.3.2.dist-info}/entry_points.txt +0 -0
{ziya-0.3.0.dist-info → ziya-0.3.2.dist-info}/licenses/LICENSE +0 -0

app/agents/agent.py CHANGED Viewed

@@ -820,9 +820,12 @@ class RetryingChatBedrock(Runnable):
                 model_config = config.copy() if config else {}
                 if conversation_id:
                     model_config["conversation_id"] = conversation_id
+                # Merge model_config into kwargs for compatibility with all model types
+                merged_kwargs = {**kwargs, **model_config}
-                async for chunk in self.model.astream(messages, model_config, **kwargs):
-                    logger.error(f"🔍 AGENT_MODEL_ASTREAM: Received chunk type: {type(chunk)}, content: {getattr(chunk, 'content', str(chunk))[:100]}")
+                async for chunk in self.model.astream(messages, **merged_kwargs):
+                    logger.debug(f"🔍 AGENT_MODEL_ASTREAM: Received chunk type: {type(chunk)}, content: {getattr(chunk, 'content', str(chunk))[:100]}")
                     # Check if this is an error chunk that should terminate this specific stream
                     # If we reach here, we've successfully started streaming
@@ -1078,23 +1081,20 @@ class RetryingChatBedrock(Runnable):
                 # Check if this is a throttling error wrapped in another exception
+                logger.error(f"🔍 ACTUAL_ERROR: {error_str}")
+                logger.error(f"🔍 ERROR_TYPE: {type(e)}")
                 if "ThrottlingException" in error_str or "Too many requests" in error_str:
                     logger.warning("Detected throttling error in exception")
-                    # Format error message for throttling
+                    # Simple error message for frontend
+                    error_message = {
+                        "error": "⚠️ AWS rate limit exceeded. Please wait a moment and try again.",
+                        "type": "throttling"
+                    }
+                else:
+                    # Show the actual error instead of masking it
                     error_message = {
-                        "error": "throttling_error",
-                        "detail": "AWS Bedrock rate limit exceeded. All automatic retries have been exhausted.",
-                        "status_code": 429,
-                        "stream_id": stream_id,
-                        "retry_after": "60",
-                        "throttle_info": {
-                            "auto_attempts_exhausted": True,
-                            "total_auto_attempts": max_retries,
-                            "can_user_retry": True,
-                            "backoff_used": [5, 10, 20, 40][:attempt + 1]
-                        },
-                        "ui_action": "show_retry_button",
-                        "user_message": "Click 'Retry' to attempt again, or wait a few minutes for better success rate."
+                        "error": f"⚠️ Error: {error_str}",
+                        "type": "general"
                     }
                     # Include pre-streaming work in preservation
@@ -1412,19 +1412,10 @@ class RetryingChatBedrock(Runnable):
                         time.sleep(retry_delay)
                         continue
                     else:
-                        # Final attempt failed - enhance error response for frontend
+                        # Simple error response for frontend
                         error_message = {
-                            "error": "throttling_error",
-                            "detail": "AWS Bedrock rate limit exceeded. All automatic retries have been exhausted.",
-                            "status_code": 429,
-                            "throttle_info": {
-                                "auto_attempts_exhausted": True,
-                                "total_auto_attempts": max_retries,
-                                "can_user_retry": True,
-                                "backoff_used": [5.0, 10.0, 20.0, 40.0][:attempt + 1]
-                            },
-                            "ui_action": "show_retry_button",
-                            "user_message": "Click 'Retry' to attempt again, or wait a few minutes for better success rate."
+                            "error": "⚠️ AWS rate limit exceeded. Please wait a moment and try again.",
+                            "type": "throttling"
                         }
                         # Let this fall through to the final error handling
@@ -1808,31 +1799,34 @@ def create_agent_chain(chat_model: BaseChatModel):
     # Create cache key based on model configuration
     model_id = ModelManager.get_model_id() or getattr(chat_model, 'model_id', 'unknown')
     ast_enabled = os.environ.get("ZIYA_ENABLE_AST") == "true"
-    mcp_enabled = os.environ.get("ZIYA_ENABLE_MCP") != "false"
+    mcp_enabled = os.environ.get("ZIYA_ENABLE_MCP", "true").lower() in ("true", "1", "yes")
     # Get MCP tools first to include in cache key
     mcp_tools = []
-    try:
-        from app.mcp.manager import get_mcp_manager
-        from app.mcp.enhanced_tools import create_secure_mcp_tools
-        mcp_manager = get_mcp_manager()
-        # Ensure MCP is initialized before creating tools
-        if not mcp_manager.is_initialized:
-            # Don't initialize during startup - let server startup handle it
-            logger.info("MCP manager not yet initialized, will use available tools when ready")
-            mcp_tools = []
-        else:
-            mcp_tools = create_secure_mcp_tools()
-            logger.info(f"Created {len(mcp_tools)} MCP tools for agent chain: {[tool.name for tool in mcp_tools]}")
+    if mcp_enabled:
+        try:
+            from app.mcp.manager import get_mcp_manager
+            from app.mcp.enhanced_tools import create_secure_mcp_tools
+            mcp_manager = get_mcp_manager()
+            # Ensure MCP is initialized before creating tools
+            if not mcp_manager.is_initialized:
+                # Don't initialize during startup - let server startup handle it
+                logger.info("MCP manager not yet initialized, will use available tools when ready")
+                mcp_tools = []
+            else:
+                mcp_tools = create_secure_mcp_tools()
+                logger.info(f"Created {len(mcp_tools)} MCP tools for agent chain: {[tool.name for tool in mcp_tools]}")
+            if mcp_manager.is_initialized:
+                mcp_tools = create_secure_mcp_tools()
+                logger.info(f"Created {len(mcp_tools)} MCP tools for agent chain: {[tool.name for tool in mcp_tools]}")
+            else:
+                logger.warning("MCP manager not initialized, no MCP tools available")
-        if mcp_manager.is_initialized:
-            mcp_tools = create_secure_mcp_tools()
-            logger.info(f"Created {len(mcp_tools)} MCP tools for agent chain: {[tool.name for tool in mcp_tools]}")
-        else:
-            logger.warning("MCP manager not initialized, no MCP tools available")
-    except Exception as e:
-        logger.warning(f"Failed to get MCP tools for agent: {str(e)}")
+        except Exception as e:
+            logger.warning(f"Failed to get MCP tools for agent: {str(e)}")
+    else:
+        logger.debug("MCP is disabled, no tools will be created for agent chain")
     # Include MCP tool count in cache key to ensure different chains for different tool availability
     cache_key = f"{model_id}_{ast_enabled}_{mcp_enabled}_{len(mcp_tools)}"
@@ -2099,22 +2093,26 @@ def create_agent_executor(agent_chain: Runnable):
     # Get MCP tools for the executor
     mcp_tools = []
-    try:
-        logger.info("Attempting to get MCP tools for agent executor...")
-        from app.mcp.manager import get_mcp_manager
-        mcp_manager = get_mcp_manager()
-        if mcp_manager.is_initialized:
-            mcp_tools = create_mcp_tools()
-            logger.info(f"Created agent executor with {len(mcp_tools)} MCP tools")
-            for tool in mcp_tools:
-                logger.info(f"  - {tool.name}: {tool.description}")
-        else:
-            logger.info("MCP not initialized, no MCP tools available")
-    except Exception as e:
-        logger.warning(f"Failed to initialize MCP tools: {str(e)}", exc_info=True)
-        from app.mcp.manager import get_mcp_manager
+    # Check if MCP is enabled before creating tools
+    if os.environ.get("ZIYA_ENABLE_MCP", "true").lower() in ("true", "1", "yes"):
+        try:
+            logger.info("Attempting to get MCP tools for agent executor...")
+            from app.mcp.manager import get_mcp_manager
+            mcp_manager = get_mcp_manager()
+            if mcp_manager.is_initialized:
+                mcp_tools = create_mcp_tools()
+                logger.info(f"Created agent executor with {len(mcp_tools)} MCP tools")
+                for tool in mcp_tools:
+                    logger.info(f"  - {tool.name}: {tool.description}")
+            else:
+                logger.info("MCP not initialized, no MCP tools available")
+        except Exception as e:
+            logger.warning(f"Failed to initialize MCP tools: {str(e)}", exc_info=True)
+            from app.mcp.manager import get_mcp_manager
+    else:
+        logger.debug("MCP is disabled, no tools will be created for agent executor")
         mcp_manager = get_mcp_manager()
     logger.info(f"AGENT_EXECUTOR: Tools being passed to AgentExecutor: {[tool.name for tool in mcp_tools] if mcp_tools else 'No tools'}")
@@ -2363,15 +2361,15 @@ def initialize_langserve(app, executor):
         new_app.routes.append(route)
     # Add LangServe routes for non-Bedrock models (Gemini, Nova, etc.)
-    # The priority /api/chat endpoint will intercept Bedrock requests
-    add_routes(
-        new_app,
-        executor,
-        disabled_endpoints=["playground"],  # Keep stream and invoke for non-Bedrock models
-        path="/ziya"
-    )
+    # DISABLED: LangServe /ziya routes cause duplicate execution with /api/chat
+    # add_routes(
+    #     new_app,
+    #     executor,
+    #     disabled_endpoints=["playground"],  # Keep stream and invoke for non-Bedrock models
+    #     path="/ziya"
+    # )
-    logger.info("Added LangServe routes - priority /api/chat will handle Bedrock routing")
+    logger.info("DISABLED LangServe /ziya routes - using /api/chat only to prevent duplicate execution")
     # Clear all routes from original app
     while app.routes:

app/agents/direct_streaming.py CHANGED Viewed

@@ -141,7 +141,7 @@ class DirectStreamingAgent:
                 chunk_count = 0
                 tool_results_sent = 0
                 largest_chunk = 0
-                async for chunk in self.executor.stream_with_tools(openai_messages, tools):
+                async for chunk in self.executor.stream_with_tools(openai_messages, tools, conversation_id=conversation_id):
                     chunk_count += 1
                     chunk_size = len(str(chunk))
                     largest_chunk = max(largest_chunk, chunk_size)

app/agents/prompts.py CHANGED Viewed

@@ -248,7 +248,7 @@ Do not include any explanatory text within the diff blocks. If you need to provi
 AVAILABLE TOOLS:
 You have access to the following tools:
-MCP tools available
+{tools}
 The codebase is provided at the end of this prompt in a specific format.
 The code that the user has given to you for context is in the format like below where first line has the File path and then the content follows.

app/agents/prompts_manager.py CHANGED Viewed

@@ -79,16 +79,20 @@ def get_extended_prompt(model_name: Optional[str] = None,
     logger.info(f"PROMPT_MANAGER: Template was modified: {len(extended_template) != len(original_template)}")
     # Create a new prompt template with the extended template
-    extended_prompt = ChatPromptTemplate.from_messages(
-        [
-            ("system", extended_template),
-            MessagesPlaceholder(variable_name="chat_history", optional=True),
-            ("user", "{question}"),
-            # Add AST context if available
-            ("system", "{ast_context}"),
-            MessagesPlaceholder(variable_name="agent_scratchpad", optional=True),
-        ]
-    )
+    # Build messages list dynamically
+    messages = [
+        ("system", extended_template),
+        MessagesPlaceholder(variable_name="chat_history", optional=True),
+        ("user", "{question}"),
+    ]
+    # Only add AST context system message if AST is enabled
+    if os.environ.get("ZIYA_ENABLE_AST", "false").lower() in ("true", "1", "yes"):
+        messages.append(("system", "{ast_context}"))
+    messages.append(MessagesPlaceholder(variable_name="agent_scratchpad", optional=True))
+    extended_prompt = ChatPromptTemplate.from_messages(messages)
     # Cache the result
     _prompt_cache[cache_key] = extended_prompt

app/agents/wrappers/google_direct.py CHANGED Viewed

@@ -19,6 +19,17 @@ class DirectGoogleModel:
         self.temperature = temperature
         self.max_output_tokens = max_output_tokens
         self.mcp_manager = get_mcp_manager()
+        logger.info(f"DirectGoogleModel initialized: model={model_name}, temp={temperature}, max_output_tokens={max_output_tokens}")
+        # Get API key from environment and configure genai
+        import os
+        api_key = os.getenv('GOOGLE_API_KEY')
+        if api_key:
+            genai.configure(api_key=api_key)
+            logger.info("Configured Google GenAI with API key from environment")
+        else:
+            logger.info("No GOOGLE_API_KEY found, will attempt to use Application Default Credentials")
     def _extract_text_from_mcp_result(self, result: Any) -> str:
         """Extracts the text content from a structured MCP tool result."""
@@ -123,8 +134,25 @@ class DirectGoogleModel:
             tool_calls = []
             model_response_parts = []
+            finish_reason = None
+            finish_reason_name = None
             async for chunk in response:
+                # Log finish reason if present
+                if hasattr(chunk, 'candidates') and chunk.candidates:
+                    for candidate in chunk.candidates:
+                        if hasattr(candidate, 'finish_reason') and candidate.finish_reason:
+                            finish_reason = candidate.finish_reason
+                            # Decode finish reason
+                            try:
+                                from google.ai.generativelanguage_v1beta.types import Candidate
+                                finish_reason_name = Candidate.FinishReason(finish_reason).name
+                            except:
+                                finish_reason_name = str(finish_reason)
+                            logger.info(f"Google model finish_reason: {finish_reason_name} ({finish_reason})")
+                        if hasattr(candidate, 'safety_ratings') and candidate.safety_ratings:
+                            logger.info(f"Google model safety_ratings: {candidate.safety_ratings}")
                 if chunk.parts:
                     for part in chunk.parts:
                         if part.text:
@@ -136,6 +164,8 @@ class DirectGoogleModel:
                     for candidate in chunk.candidates:
                         if candidate.content and candidate.content.parts:
                             model_response_parts.extend(candidate.content.parts)
+            logger.info(f"Stream ended. Tool calls: {len(tool_calls)}, Finish reason: {finish_reason_name or finish_reason}")
             if not tool_calls:
                 logger.info("No tool calls from model. Ending loop.")
@@ -155,7 +185,7 @@ class DirectGoogleModel:
                     tool_result_obj = await self.mcp_manager.call_tool(tool_name, tool_args)
                     tool_result_str = self._extract_text_from_mcp_result(tool_result_obj)
-                    yield {"type": "tool_execution", "tool_name": tool_name, "result": tool_result_str}
+                    yield {"type": "tool_display", "tool_name": tool_name, "result": tool_result_str}
                     tool_results.append(
                         {"function_response": {"name": tool_name, "response": {"content": tool_result_str}}}

app/agents/wrappers/nova_tool_execution.py CHANGED Viewed

@@ -136,7 +136,7 @@ async def execute_nova_tools_properly(bedrock_client, converse_params, formatted
                 # Yield for frontend display in the format it expects
                 yield {
-                    'type': 'tool_execution',
+                    'type': 'tool_display',
                     'tool_name': tool_use['name'],
                     'result': result_text
                 }
@@ -150,7 +150,7 @@ async def execute_nova_tools_properly(bedrock_client, converse_params, formatted
             except Exception as e:
                 logger.error(f"Nova: Tool execution failed: {e}")
                 yield {
-                    'type': 'tool_execution',
+                    'type': 'tool_display',
                     'tool_name': tool_use['name'],
                     'result': f"Error: {str(e)}"
                 }

app/agents/wrappers/nova_wrapper.py CHANGED Viewed

@@ -554,7 +554,7 @@ class NovaWrapper(BaseChatModel):
                 if result:
                     results.append({
-                        'type': 'tool_execution',
+                        'type': 'tool_display',
                         'tool_id': f'nova_text_{hash(f"{tool_name}_{command}") % 10000}',
                         'tool_name': f'mcp_{tool_name.replace("mcp_", "")}',
                         'result': result

app/agents/wrappers/ziya_bedrock.py CHANGED Viewed

@@ -318,7 +318,8 @@ class ZiyaBedrock(Runnable):
         # Ensure system messages are properly ordered after caching
         messages = self._ensure_system_message_ordering(messages)
-        kwargs["max_tokens"] = int(os.environ.get("ZIYA_MAX_OUTPUT_TOKENS", self.ziya_max_tokens))  # Use environment variable if available
+        # Use much higher default if not set
+        kwargs["max_tokens"] = int(os.environ.get("ZIYA_MAX_OUTPUT_TOKENS", self.ziya_max_tokens or 32768))
         if self.ziya_max_tokens is not None and "max_tokens" not in kwargs:
             kwargs["max_tokens"] = self.ziya_max_tokens
             logger.debug(f"Added max_tokens={self.ziya_max_tokens} to _generate kwargs")
@@ -580,39 +581,60 @@ class ZiyaBedrock(Runnable):
         # Set streaming to True for this call
         self.bedrock_model.streaming = True
-        # Call the underlying model's stream method
-        for chunk in self.bedrock_model.stream(lc_messages, **kwargs):
-            if hasattr(chunk, 'content') and chunk.content:
-                # Check for repetitive lines
-                content = chunk.content
-                lines = content.split('\n')
-                for line in lines:
-                    if line.strip():  # Only track non-empty lines
-                        self._recent_lines.append(line)
-                        # Keep only recent lines
-                        if len(self._recent_lines) > 100:
-                            self._recent_lines.pop(0)
+        # Call the underlying model's stream method with retry logic
+        stream_retries = 0
+        max_stream_retries = 2
+        while stream_retries <= max_stream_retries:
+            try:
+                for chunk in self.bedrock_model.stream(lc_messages, **kwargs):
+                    if hasattr(chunk, 'content') and chunk.content:
+                        # Check for repetitive lines
+                        content = chunk.content
+                        lines = content.split('\n')
+                        for line in lines:
+                            if line.strip():  # Only track non-empty lines
+                                self._recent_lines.append(line)
+                                # Keep only recent lines
+                                if len(self._recent_lines) > 100:
+                                    self._recent_lines.pop(0)
+                        # Check if any line repeats too many times
+                        if any(self._recent_lines.count(line) > self._max_repetitions for line in set(self._recent_lines)):
+                            yield "\n\n**Warning: Response was interrupted because repetitive content was detected.**"
+                            # Log the repetitive content for debugging
+                            repetitive_lines = [line for line in set(self._recent_lines)
+                                               if self._recent_lines.count(line) > self._max_repetitions]
+                            logger.warning(f"Repetitive content detected. Repetitive lines: {repetitive_lines}")
+                            # Send a special marker to indicate the stream should end
+                            yield "\n\n[STREAM_END_REPETITION_DETECTED]"
+                            # Break the streaming loop
+                            logger.warning("Streaming response interrupted due to repetitive content")
+                            return
+                        yield chunk.content
+                    elif hasattr(chunk, 'message') and hasattr(chunk.message, 'content'):
+                        yield chunk.message.content
+                return  # Success, exit retry loop
-                # Check if any line repeats too many times
-                if any(self._recent_lines.count(line) > self._max_repetitions for line in set(self._recent_lines)):
-                    yield "\n\n**Warning: Response was interrupted because repetitive content was detected.**"
-                    # Log the repetitive content for debugging
-                    repetitive_lines = [line for line in set(self._recent_lines)
-                                       if self._recent_lines.count(line) > self._max_repetitions]
-                    logger.warning(f"Repetitive content detected. Repetitive lines: {repetitive_lines}")
+            except Exception as e:
+                error_str = str(e)
+                if ("ThrottlingException" in error_str or "rate limit" in error_str.lower() or
+                    "timeout" in error_str.lower()) and stream_retries < max_stream_retries:
-                    # Send a special marker to indicate the stream should end
-                    yield "\n\n[STREAM_END_REPETITION_DETECTED]"
+                    stream_retries += 1
+                    delay = 2 if stream_retries == 1 else 5  # 2s, 5s
+                    logger.warning(f"🔄 STREAM_RETRY: Attempt {stream_retries}/{max_stream_retries} after {delay}s delay")
-                    # Break the streaming loop
-                    logger.warning("Streaming response interrupted due to repetitive content")
-                    break
-                yield chunk.content
-            elif hasattr(chunk, 'message') and hasattr(chunk.message, 'content'):
-                yield chunk.message.content
+                    import time
+                    time.sleep(delay)
+                    continue
+                else:
+                    raise  # Re-raise for higher-level retry or final failure
     async def astream(self, messages: List[Dict[str, Any]], system: Optional[str] = None, **kwargs) -> AsyncIterator[str]:
         """

app/config/models_config.py CHANGED Viewed

@@ -9,7 +9,7 @@ import os
 # Model configuration
 DEFAULT_ENDPOINT = "bedrock"
 DEFAULT_MODELS = {
-    "bedrock": "sonnet4.0",
+    "bedrock": "sonnet4.5",
     "google": "gemini-pro"
 }
@@ -29,7 +29,7 @@ GLOBAL_MODEL_DEFAULTS = {
     "temperature": 0.3,
     "supports_thinking": False,
     "supports_max_input_tokens": False,
-    "default_max_output_tokens": 4096,  # Default value for max_output_tokens
+    "default_max_output_tokens": 32768,  # Default value for max_output_tokens
     "parameter_mappings": {
         "max_output_tokens": ["max_tokens"],  # Some APIs use max_tokens instead
         "temperature": ["temperature"],
@@ -169,19 +169,6 @@ ENDPOINT_DEFAULTS = {
 # Model-specific configs that override endpoint defaults
 MODEL_CONFIGS = {
     "bedrock": {
-        "opus4.1": {
-            "model_id": {
-                "us": "us.anthropic.claude-opus-4-1-20250805-v1:0"
-            },
-            "token_limit": 200000,  # Total context window size
-            "max_output_tokens": 64000,  # Maximum output tokens
-            "default_max_output_tokens": 10000,  # Default value for max_output_tokens
-            "supports_max_input_tokens": True,
-            "supports_thinking": True,  # Override global default
-            "family": "claude",
-            "supports_context_caching": True,
-            "region": "us-east-1"  # Model-specific region preference
-        },
         "sonnet4.0": {
             "model_id": {
                 "us": "us.anthropic.claude-sonnet-4-20250514-v1:0",
@@ -196,7 +183,7 @@ MODEL_CONFIGS = {
             "preferred_region": "us-east-1",  # Default preference but not restricted
             "token_limit": 200000,  # Total context window size
             "max_output_tokens": 64000,  # Maximum output tokens
-            "default_max_output_tokens": 10000,  # Default value for max_output_tokens
+            "default_max_output_tokens": 36000,  # Default value for max_output_tokens
             "supports_max_input_tokens": True,
             "supports_thinking": True,  # Override global default
             "family": "claude",
@@ -205,6 +192,26 @@ MODEL_CONFIGS = {
             "extended_context_limit": 1000000,  # Extended context window size
             "extended_context_header": "context-1m-2025-08-07"  # Beta header for extended context
         },
+        "sonnet4.5": {
+            "model_id": {
+                "us": "anthropic.claude-sonnet-4-5-20250929-v1:0",
+                "eu": "anthropic.claude-sonnet-4-5-20250929-v1:0"
+            },
+            "available_regions": [
+                "us-east-1", "us-west-2", "eu-west-1", "eu-central-1", "ap-southeast-1"
+            ],
+            "preferred_region": "us-east-1",  # Default preference
+            "token_limit": 200000,  # Total context window size
+            "max_output_tokens": 64000,  # Maximum output tokens
+            "default_max_output_tokens": 36000,  # Default value for max_output_tokens
+            "supports_max_input_tokens": True,
+            "supports_thinking": True,  # Override global default
+            "family": "claude",
+            "supports_context_caching": True,
+            "supports_extended_context": True,  # Supports 1M token context window
+            "extended_context_limit": 1000000,  # Extended context window size
+            "extended_context_header": "context-1m-2025-08-07"  # Same header as sonnet4.0
+        },
         "sonnet3.7": {
             "model_id": "eu.anthropic.claude-3-7-sonnet-20250219-v1:0",
             "available_regions": ["eu-west-1", "eu-central-1"],
@@ -243,9 +250,16 @@ MODEL_CONFIGS = {
             "region_restricted": True,  # Only available in US regions
             "preferred_region": "us-east-1",
             "family": "claude",
-            "supports_context_caching": True,
-        },
+            "region": "us-east-1"  # Model-specific region preference
+        },
         "opus4": {
+            "max_output_tokens": 64000,  # Add explicit output token limits
+            "default_max_output_tokens": 32000,  # Higher default for opus4
+            "max_iterations": 8,  # Higher iteration limit for advanced model
+            "timeout_multiplier": 6,  # Longer timeouts for complex responses
+            "is_advanced_model": True,  # Flag for 4.0+ capabilities
+            "token_limit": 200000,  # Add context window
+            "supports_max_input_tokens": True,
             "model_id": {
                 "us": "us.anthropic.claude-opus-4-20250514-v1:0"
             },
@@ -264,7 +278,10 @@ MODEL_CONFIGS = {
             "preferred_region": "us-east-1",
             "token_limit": 200000,  # Total context window size
             "max_output_tokens": 64000,  # Maximum output tokens
-            "default_max_output_tokens": 10000,  # Default value for max_output_tokens
+            "default_max_output_tokens": 32000,  # Increased from 10k to 32k for longer responses
+            "max_iterations": 8,
+            "timeout_multiplier": 6,
+            "is_advanced_model": True,
             "supports_max_input_tokens": True,
             "supports_thinking": True,  # Override global default
             "family": "claude",
@@ -352,7 +369,7 @@ MODEL_CONFIGS = {
             "model_id": "gemini-2.5-pro",
             "token_limit": 1048576,
             "family": "gemini-pro",
-            "max_output_tokens": 8192,
+            "max_output_tokens": 65536,  # Gemini 2.5 Pro supports up to 65K output tokens
             "convert_system_message_to_human": False,
             "supports_function_calling": True,
             "native_function_calling": True,
@@ -426,6 +443,30 @@ TOOL_SENTINEL_TAG = os.environ.get("ZIYA_TOOL_SENTINEL", "TOOL_SENTINEL")
 TOOL_SENTINEL_OPEN = f"<{TOOL_SENTINEL_TAG}>"
 TOOL_SENTINEL_CLOSE = f"</{TOOL_SENTINEL_TAG}>"
+# Shell command configuration
+DEFAULT_SHELL_CONFIG = {
+    "enabled": True,
+    "allowedCommands": [
+        "ls", "cat", "pwd", "grep", "wc", "touch", "find", "date", "od", "df",
+        "netstat", "lsof", "ps", "sed", "awk", "cut", "sort", "which", "hexdump",
+        "xxd", "tail", "head", "echo", "printf", "tr", "uniq", "column", "nl",
+        "tee", "base64", "md5sum", "sha1sum", "sha256sum", "bc", "expr", "seq",
+        "paste", "join", "fold", "expand", "cd", "tree", "less", "xargs", "curl",
+        "ping", "du", "file"
+    ],
+    "gitOperationsEnabled": True,
+    "safeGitOperations": [
+        "status", "log", "show", "diff", "branch", "remote", "config --get",
+        "ls-files", "ls-tree", "blame", "tag", "stash list", "reflog",
+        "rev-parse", "describe", "shortlog", "whatchanged"
+    ],
+    "timeout": 90  # Increased base timeout to support longer operations
+}
+def get_default_shell_config():
+    """Get the default shell configuration."""
+    return DEFAULT_SHELL_CONFIG.copy()
 # Helper functions for model parameter validation
 def get_supported_parameters(endpoint, model_name):

app/config/shell_config.py CHANGED Viewed

@@ -1,5 +1,9 @@
 """
 Single source of truth for shell command configuration.
+IMPORTANT: All commands must be complete, non-interactive operations.
+Do not use tools in interactive mode (e.g., 'bc' without expression, 'python' REPL).
+Always provide complete command with all arguments needed for one-shot execution.
 """
 # SINGLE SOURCE OF TRUTH for shell command configuration
@@ -19,7 +23,7 @@ DEFAULT_SHELL_CONFIG = {
         "ls-files", "ls-tree", "blame", "tag", "stash list", "reflog",
         "rev-parse", "describe", "shortlog", "whatchanged"
     ],
-    "timeout": 10
+    "timeout": 30
 }
 def get_default_shell_config():

ziya 0.3.0__py3-none-any.whl → 0.3.2__py3-none-any.whl

Potentially problematic release.

ziya 0.3.0py3-none-any.whl → 0.3.2py3-none-any.whl