PyPI - ziya - Versions diffs - 0.1.49__py3-none-any.whl → 0.1.51__py3-none-any.whl - Mend - Supply Chain Defender

ziya 0.1.49py3-none-any.whl → 0.1.51py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ziya might be problematic. Click here for more details.

Files changed (49) hide show

app/server.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import os
 import time
 import json
-from typing import Dict, Any, List, Tuple, Optional
+from typing import Dict, Any, List, Tuple, Optional, Union
 import tiktoken
 from fastapi import FastAPI, Request, HTTPException
@@ -10,15 +10,20 @@ from fastapi.responses import JSONResponse
 from fastapi.staticfiles import StaticFiles
 from fastapi.templating import Jinja2Templates
 from langserve import add_routes
-from app.agents.agent import model
+from app.agents.agent import model, RetryingChatBedrock
 from app.agents.agent import agent_executor
-from fastapi.responses import FileResponse
-from pydantic import BaseModel
+from app.agents.agent import update_conversation_state, update_and_return
+from langchain_google_genai.chat_models import ChatGoogleGenerativeAIError
+from fastapi.responses import FileResponse, StreamingResponse
+from pydantic import BaseModel, Field
+from app.agents.models import ModelManager
 from botocore.exceptions import ClientError, BotoCoreError, CredentialRetrievalError
 from botocore.exceptions import EventStreamError
+import botocore.errorfactory
 from starlette.responses import StreamingResponse
 # import pydevd_pycharm
+from google.api_core.exceptions import ResourceExhausted
 import uvicorn
 from app.utils.code_util import use_git_to_apply_code_diff, correct_git_diff, PatchApplicationError
@@ -26,6 +31,13 @@ from app.utils.directory_util import get_ignored_patterns
 from app.utils.logging_utils import logger
 from app.utils.gitignore_parser import parse_gitignore_patterns
+# Server configuration defaults
+DEFAULT_PORT = 6969
+# For model configurations, see app/agents/model.py
+class SetModelRequest(BaseModel):
+    model_id: str
 app = FastAPI()
 app.add_middleware(
@@ -74,7 +86,6 @@ async def credential_exception_handler(request: Request, exc: CredentialRetrieva
         headers={"WWW-Authenticate": "Bearer"}
     )
 @app.exception_handler(ClientError)
 async def boto_client_exception_handler(request: Request, exc: ClientError):
     error_message = str(exc)
@@ -84,6 +95,13 @@ async def boto_client_exception_handler(request: Request, exc: ClientError):
             content={"detail": "AWS credentials have expired. Please refresh your credentials."},
             headers={"WWW-Authenticate": "Bearer"}
         )
+    elif "ValidationException" in error_message:
+        logger.error(f"Bedrock validation error: {error_message}")
+        return JSONResponse(
+            status_code=400,
+            content={"error": "validation_error",
+                    "detail": "Invalid request format for Bedrock service. Please check your input format.",
+                    "message": error_message})
     elif "ServiceUnavailableException" in error_message:
         return JSONResponse(
             status_code=503,
@@ -94,11 +112,77 @@ async def boto_client_exception_handler(request: Request, exc: ClientError):
         content={"detail": f"AWS Service Error: {str(exc)}"}
     )
+@app.exception_handler(ResourceExhausted)
+async def resource_exhausted_handler(request: Request, exc: ResourceExhausted):
+    """Handle Google API quota exceeded errors."""
+    logger.error(f"Google API quota exceeded: {str(exc)}")
+    return JSONResponse(
+        status_code=429,  # Too Many Requests
+        content={
+            "error": "quota_exceeded",
+            "detail": "API quota has been exceeded. Please try again in a few minutes.",
+            "original_error": str(exc)
+        }
+    )
+@app.exception_handler(ResourceExhausted)
+async def resource_exhausted_handler(request: Request, exc: ResourceExhausted):
+    """Handle Google API quota exceeded errors."""
+    logger.error(f"Google API quota exceeded: {str(exc)}")
+    return JSONResponse(
+        status_code=429,  # Too Many Requests
+        content={
+            "error": "quota_exceeded",
+            "detail": "API quota has been exceeded. Please try again in a few minutes.",
+            "original_error": str(exc)
+        }
+    )
 @app.exception_handler(Exception)
 async def general_exception_handler(request: Request, exc: Exception):
     error_message = str(exc)
     status_code = 500
     error_type = "unknown_error"
+    # Check for empty text parameter error from Gemini
+    if "Unable to submit request because it has an empty text parameter" in error_message:
+        logger.error("Caught empty text parameter error from Gemini")
+        return JSONResponse(
+            status_code=400,
+            content={
+                "error": "validation_error",
+                "detail": "Empty message content detected. Please provide a question."
+            }
+        )
+    # Check for Google API quota exceeded error
+    if "Resource has been exhausted" in error_message and "check quota" in error_message:
+        return JSONResponse(
+            status_code=429,  # Too Many Requests
+            content={
+                "error": "quota_exceeded",
+                "detail": "API quota has been exceeded. Please try again in a few minutes."
+            })
+    # Check for Gemini token limit error
+    if isinstance(exc, ChatGoogleGenerativeAIError) and "token count" in error_message:
+        return JSONResponse(
+            status_code=413,
+            content={
+                "error": "validation_error",
+                "detail": "Selected content is too large for the model. Please reduce the number of files."
+            }
+        )
+    # Check for Google API quota exceeded error
+    if "Resource has been exhausted" in error_message and "check quota" in error_message:
+        return JSONResponse(
+            status_code=429,  # Too Many Requests
+            content={
+                "error": "quota_exceeded",
+                "detail": "API quota has been exceeded. Please try again in a few minutes."
+            })
     try:
         # Check if this is a streaming error
         if isinstance(exc, EventStreamError):
@@ -124,7 +208,21 @@ async def general_exception_handler(request: Request, exc: Exception):
         logger.error(f"Error in exception handler: {str(e)}", exc_info=True)
         raise
-app.mount("/static", StaticFiles(directory="../templates/static"), name="static")
+# Get the absolute path to the project root directory
+project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+# Define paths relative to project root
+static_dir = os.path.join(project_root, "templates", "static")
+testcases_dir = os.path.join(project_root, "tests", "frontend", "testcases")
+templates_dir = os.path.join(project_root, "templates")
+# Create directories if they don't exist
+os.makedirs(static_dir, exist_ok=True)
+os.makedirs(testcases_dir, exist_ok=True)
+os.makedirs(templates_dir, exist_ok=True)
+# Mount static files and templates
+app.mount("/static", StaticFiles(directory=static_dir), name="static")
 # Only mount testcases directory if it exists
 testcases_dir = "../tests/frontend/testcases"
@@ -133,27 +231,109 @@ if os.path.exists(testcases_dir):
 else:
     logger.info(f"Testcases directory '{testcases_dir}' does not exist - skipping mount")
-templates = Jinja2Templates(directory="../templates")
+templates = Jinja2Templates(directory=templates_dir)
 # Add a route for the frontend
 add_routes(app, agent_executor, disabled_endpoints=["playground"], path="/ziya")
 # Override the stream endpoint with our error handling
 @app.post("/ziya/stream")
 async def stream_endpoint(body: dict):
+    # Debug logging
+    logger.info("Stream endpoint request body:")
+    logger.info(f"Question: '{body.get('question', 'EMPTY')}'")
+    logger.info(f"Chat history length: {len(body.get('chat_history', []))}")
+    logger.info(f"Files count: {len(body.get('config', {}).get('files', []))}")
+    logger.info(f"Question type: {type(body.get('question', None))}")
+    # Log the first few files
+    if 'config' in body and 'files' in body['config']:
+        logger.info(f"First few files: {body['config']['files'][:5]}")
+    # Check if the question is empty or missing
+    if not body.get("question") or not body.get("question").strip():
+        logger.warning("Empty question detected, returning error response")
+        error_response = json.dumps({
+            "error": "validation_error",
+            "detail": "Please provide a question to continue."
+        })
+        # Return a properly formatted SSE response with the error
+        async def error_stream():
+            # Send the error message
+            yield f"data: {error_response}\n\n"
+            # Wait a moment to ensure the client receives it
+            await asyncio.sleep(0.1)
+            # Send an end message
+            yield "data: [DONE]\n\n"
+        return StreamingResponse(
+            error_stream(),
+            media_type="text/event-stream",
+            headers={"Cache-Control": "no-cache"}
+        )
     try:
+        # Check for empty question
+        if not body.get("question") or not body.get("question").strip():
+            logger.warning("Empty question detected in stream request")
+            # Return a friendly error message
+            return StreamingResponse(
+                iter([f'data: {json.dumps({"error": "validation_error", "detail": "Please enter a question"})}' + '\n\n']),
+                media_type="text/event-stream",
+                headers={"Cache-Control": "no-cache"}
+            )
+        # Check for empty messages in chat history
+        if "chat_history" in body:
+            cleaned_history = []
+            for pair in body["chat_history"]:
+                try:
+                    if not isinstance(pair, (list, tuple)) or len(pair) != 2:
+                        logger.warning(f"Invalid chat history pair format: {pair}")
+                        continue
+                    human, ai = pair
+                    if not isinstance(human, str) or not isinstance(ai, str):
+                        logger.warning(f"Non-string message in pair: human={type(human)}, ai={type(ai)}")
+                        continue
+                    if human.strip() and ai.strip():
+                        cleaned_history.append((human.strip(), ai.strip()))
+                    else:
+                        logger.warning(f"Empty message in pair: {pair}")
+                except Exception as e:
+                    logger.error(f"Error processing chat history pair: {str(e)}")
+            logger.debug(f"Cleaned chat history from {len(body['chat_history'])} to {len(cleaned_history)} pairs")
+            body["chat_history"] = cleaned_history
+            logger.debug(f"Cleaned chat history: {json.dumps(cleaned_history)}")
         logger.info("Starting stream endpoint with body size: %d", len(str(body)))
         # Define the streaming response with proper error handling
         async def error_handled_stream():
+            response = None
             try:
+                # Convert to ChatPromptValue before streaming
+                if isinstance(body, dict) and "messages" in body:
+                    from langchain_core.prompt_values import ChatPromptValue
+                    from langchain_core.messages import HumanMessage
+                    body["messages"] = [HumanMessage(content=msg) for msg in body["messages"]]
+                    body = ChatPromptValue(messages=body["messages"])
                 # Create the iterator inside the error handling context
-                iterator = agent_executor.astream_log(body, {})
+                iterator = agent_executor.astream_log(body)
                 async for chunk in iterator:
                     logger.info("Processing chunk: %s",
                               chunk if isinstance(chunk, dict) else chunk[:200] + "..." if len(chunk) > 200 else chunk)
                     if isinstance(chunk, dict) and "error" in chunk:
                         # Format error as SSE message
                         yield f"data: {json.dumps(chunk)}\n\n"
-                        logger.info("Sent error message: %s", error_msg)
+                        # Update file state before returning
+                        update_and_return(body)
+                        logger.info(f"Sent error message: {chunk}")
+                        return
+                    elif isinstance(chunk, Generation) and hasattr(chunk, 'text') and "quota_exceeded" in chunk.text:
+                        yield f"data: {chunk.text}\n\n"
+                        update_and_return(body)
                         return
                     else:
                         try:
@@ -166,9 +346,30 @@ async def stream_endpoint(body: dict):
                                     "detail": "Selected content is too large for the model. Please reduce the number of files."
                                 }
                                 yield f"data: {json.dumps(error_msg)}\n\n"
+                                update_and_return(body)
                                 await response.flush()
                                 logger.info("Sent EventStreamError message: %s", error_msg)
                                 return
+                        except ChatGoogleGenerativeAIError as e:
+                            if "token count" in str(e):
+                                error_msg = {
+                                    "error": "validation_error",
+                                    "detail": "Selected content is too large for the model. Please reduce the number of files."
+                                }
+                                yield f"data: {json.dumps(error_msg)}\n\n"
+                                update_and_return(body)
+                                await response.flush()
+                                logger.info("Sent token limit error message: %s", error_msg)
+                                return
+            except ResourceExhausted as e:
+                error_msg = {
+                    "error": "quota_exceeded",
+                    "detail": "API quota has been exceeded. Please try again in a few minutes."
+                }
+                yield f"data: {json.dumps(error_msg)}\n\n"
+                update_and_return(body)
+                logger.error(f"Caught ResourceExhausted error: {str(e)}")
+                return
             except EventStreamError as e:
                 if "validationException" in str(e):
                     error_msg = {
@@ -176,15 +377,18 @@ async def stream_endpoint(body: dict):
                         "detail": "Selected content is too large for the model. Please reduce the number of files."
                     }
                     yield f"data: {json.dumps(error_msg)}\n\n"
+                    update_and_return(body)
                     await response.flush()
                     return
                 raise
+            finally:
+                update_and_return(body)
         return StreamingResponse(error_handled_stream(), media_type="text/event-stream", headers={"Cache-Control": "no-cache"})
     except Exception as e:
         logger.error(f"Error in stream endpoint: {str(e)}")
         error_msg = {"error": "stream_error", "detail": str(e)}
         logger.error(f"Sending error response: {error_msg}")
-        logger.error(f"Sending error response: {error_msg}")
+        update_and_return(body)
         return StreamingResponse(iter([f"data: {json.dumps(error_msg)}\n\n"]), media_type="text/event-stream", headers={"Cache-Control": "no-cache"})
@@ -287,24 +491,145 @@ async def get_folders():
     return get_cached_folder_structure(user_codebase_dir, ignored_patterns, max_depth)
 @app.get('/api/default-included-folders')
-def get_default_included_folders():
+def get_model_id():
     return {'defaultIncludedFolders': []}
+@app.get('/api/current-model')
+def get_current_model():
+    """Get detailed information about the currently active model."""
+    logger.info(
+        "Current model info request: %s",
+        {   'model_id': model.model_id,
+            'endpoint': os.environ.get("ZIYA_ENDPOINT", "bedrock")
+        })
+    # Get actual model settings
+    model_kwargs = {}
+    if hasattr(model, 'model') and hasattr(model.model, 'model_kwargs'):
+        model_kwargs = model.model.model_kwargs
+    elif hasattr(model, 'model_kwargs'):
+        model_kwargs = model.model_kwargs
+    logger.info("Current model configuration:")
+    logger.info(f"  Model ID: {model.model_id}")
+    logger.info(f"  Temperature: {model_kwargs.get('temperature', 'Not set')} (env: {os.environ.get('ZIYA_TEMPERATURE', 'Not set')})")
+    logger.info(f"  Top K: {model_kwargs.get('top_k', 'Not set')} (env: {os.environ.get('ZIYA_TOP_K', 'Not set')})")
+    logger.info(f"  Max tokens: {model_kwargs.get('max_tokens', 'Not set')} (env: {os.environ.get('ZIYA_MAX_OUTPUT_TOKENS', 'Not set')})")
+    logger.info(f"  Thinking mode: {os.environ.get('ZIYA_THINKING_MODE', 'Not set')}")
+    return {
+        'model_id': model.model_id,
+        'endpoint': os.environ.get("ZIYA_ENDPOINT", "bedrock"),
+        'settings': {
+            'temperature': model_kwargs.get('temperature',
+                float(os.environ.get("ZIYA_TEMPERATURE", 0.3))),
+            'max_output_tokens': model_kwargs.get('max_tokens',
+                int(os.environ.get("ZIYA_MAX_OUTPUT_TOKENS", 4096))),
+            'top_k': model_kwargs.get('top_k',
+                int(os.environ.get("ZIYA_TOP_K", 15))),
+            'thinking_mode': os.environ.get("ZIYA_THINKING_MODE") == "1"
+        }
+    }
 @app.get('/api/model-id')
 def get_model_id():
-    # Get the model ID from the configured Bedrock client
-    return {'model_id': model.model_id.split(':')[0].split('/')[-1]}
+    if os.environ.get("ZIYA_ENDPOINT") == "google":
+        model_name = os.environ.get("ZIYA_MODEL", "gemini-pro")
+        return {'model_id': model_name}
+    elif os.environ.get("ZIYA_MODEL"):
+        return {'model_id': os.environ.get("ZIYA_MODEL")}
+    else:
+        # Bedrock
+        return {'model_id': model.model_id.split(':')[0].split('/')[-1]}
+@app.post('/api/set-model')
+async def set_model(request: SetModelRequest):
+    """Set the active model for the current endpoint."""
+    try:
+        model_id = request.model_id
+        if not model_id:
+            logger.error("Empty model ID provided")
+            raise HTTPException(status_code=400, detail="Model ID is required")
+        # Update environment variable
+        os.environ["ZIYA_MODEL"] = model_id
+        logger.info(f"Setting model to: {model_id}")
+        # Reinitialize the model
+        try:
+            logger.info(f"Reinitializing model with ID: {model_id}")
+            new_model = ModelManager.initialize_model(force_reinit=True)
+            new_model.model_id = model_id  # Ensure model ID is set correctly
+            # Update the global model instance
+            global model
+            model = RetryingChatBedrock(new_model)
+            return {"status": "success", "model": model_id}
+        except Exception as e:
+            logger.error(f"Failed to initialize model: {str(e)}")
+            raise HTTPException(status_code=500, detail=f"Failed to initialize model: {str(e)}")
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@app.get('/api/available-models')
+def get_available_models():
+    """Get list of available models for the current endpoint."""
+    endpoint = os.environ.get("ZIYA_ENDPOINT", "bedrock")
+    try:
+        models = []
+        for name, config in ModelManager.MODEL_CONFIGS[endpoint].items():
+            models.append({
+                "id": config["model_id"],
+                "name": name
+            })
+        return models
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+        return {'model_id': model.model_id.split(':')[0].split('/')[-1]}
+@app.get('/api/model-capabilities')
+def get_model_capabilities(model: str = None):
+    """Get the capabilities of the current model."""
+    endpoint = os.environ.get("ZIYA_ENDPOINT", "bedrock")
+    # If model parameter is provided, get capabilities for that model
+    # Otherwise use current model
+    model_name = model if model else os.environ.get("ZIYA_MODEL")
+    try:
+        model_config = ModelManager.get_model_config(endpoint, model_name)
+        capabilities = {
+            "supports_thinking": model_config.get("supports_thinking", False),
+            "max_output_tokens": model_config.get("max_output_tokens", 4096),
+            "temperature_range": {"min": 0, "max": 1, "default": model_config.get("temperature", 0.3)},
+            "top_k_range": {"min": 0, "max": 500, "default": model_config.get("top_k", 15)} if endpoint == "bedrock" else None
+        }
+        return capabilities
+    except Exception as e:
+        logger.error(f"Error getting model capabilities: {str(e)}")
+        return {"error": str(e)}
 class ApplyChangesRequest(BaseModel):
     diff: str
     filePath: str
+class ModelSettingsRequest(BaseModel):
+    temperature: float = Field(default=0.3, ge=0, le=1)
+    top_k: int = Field(default=15, ge=0, le=500)
+    max_output_tokens: int = Field(default=4096, ge=1, le=128000)
+    thinking_mode: bool = Field(default=False)
 class TokenCountRequest(BaseModel):
     text: str
 def count_tokens_fallback(text: str) -> int:
     """Fallback methods for counting tokens when primary method fails."""
     try:
+        os.environ["TOKENIZERS_PARALLELISM"] = "false"
         # First try using tiktoken directly with cl100k_base (used by Claude)
         encoding = tiktoken.get_encoding("cl100k_base")
         return len(encoding.encode(text))
@@ -346,6 +671,70 @@ async def count_tokens(request: TokenCountRequest) -> Dict[str, int]:
         # Return 0 in case of error to avoid breaking the frontend
         return {"token_count": 0}
+@app.post('/api/model-settings')
+async def update_model_settings(settings: ModelSettingsRequest):
+    global model
+    try:
+        # Log the requested settings
+        logger.info(f"Requested model settings update:")
+        logger.info(f"  Temperature: {settings.temperature}")
+        logger.info(f"  Top K: {settings.top_k}")
+        logger.info(f"  Max Output Tokens: {settings.max_output_tokens}")
+        logger.info(f"  Thinking Mode: {settings.thinking_mode}")
+        # Store settings in environment variables for the agent to use
+        os.environ["ZIYA_TEMPERATURE"] = str(settings.temperature)
+        os.environ["ZIYA_TOP_K"] = str(settings.top_k)
+        os.environ["ZIYA_MAX_OUTPUT_TOKENS"] = str(settings.max_output_tokens)
+        os.environ["ZIYA_THINKING_MODE"] = "1" if settings.thinking_mode else "0"
+        # Update the model's kwargs directly
+        if hasattr(model, 'model'):
+            # For wrapped models (e.g., RetryingChatBedrock)
+            if hasattr(model.model, 'model_kwargs'):
+                model.model.model_kwargs.update({
+                    'temperature': settings.temperature,
+                    'top_k': settings.top_k,
+                    'max_tokens': settings.max_output_tokens
+                })
+        elif hasattr(model, 'model_kwargs'):
+            # For direct model instances
+            model.model_kwargs.update({
+                'temperature': settings.temperature,
+                'top_k': settings.top_k,
+                'max_tokens': settings.max_output_tokens
+            })
+        # Force model reinitialization to apply new settings
+        from app.agents.models import ModelManager
+        model = ModelManager.initialize_model(force_reinit=True)
+        model.model_id = os.environ.get("ZIYA_MODEL", model.model_id)
+        # Get the model's current settings for verification
+        model_kwargs = {}
+        if hasattr(model, 'model') and hasattr(model.model, 'model_kwargs'):
+            model_kwargs = model.model.model_kwargs
+        elif hasattr(model, 'model_kwargs'):
+            model_kwargs = model.model_kwargs
+        logger.info("Current model settings after update:")
+        logger.info(f"  Model kwargs temperature: {model_kwargs.get('temperature', 'Not set')}")
+        logger.info(f"  Model kwargs top_k: {model_kwargs.get('top_k', 'Not set')}")
+        logger.info(f"  Model kwargs max_tokens: {model_kwargs.get('max_tokens', 'Not set')}")
+        logger.info(f"  Environment ZIYA_THINKING_MODE: {os.environ.get('ZIYA_THINKING_MODE')}")
+        return {
+            'status': 'success',
+            'message': 'Model settings updated',
+            'settings': model_kwargs
+        }
+    except Exception as e:
+        logger.error(f"Error updating model settings: {str(e)}", exc_info=True)
+        raise HTTPException(
+            status_code=500,
+            detail=f"Error updating model settings: {str(e)}"
+        )
 @app.post('/api/apply-changes')
 async def apply_changes(request: ApplyChangesRequest):
     try:
@@ -405,4 +794,4 @@ async def apply_changes(request: ApplyChangesRequest):
         )
 if __name__ == "__main__":
-    uvicorn.run(app, host="0.0.0.0", port=8000)
+    uvicorn.run(app, host="0.0.0.0", port=DEFAULT_PORT)