PyPI - stravinsky - Versions diffs - 0.2.52__py3-none-any.whl → 0.4.18__py3-none-any.whl - Mend

stravinsky 0.2.52py3-none-any.whl → 0.4.18py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of stravinsky might be problematic. Click here for more details.

Files changed (58) hide show

mcp_bridge/__init__.py +1 -1
mcp_bridge/auth/token_store.py +113 -11
mcp_bridge/cli/__init__.py +6 -0
mcp_bridge/cli/install_hooks.py +1265 -0
mcp_bridge/cli/session_report.py +585 -0
mcp_bridge/config/MANIFEST_SCHEMA.md +305 -0
mcp_bridge/config/README.md +276 -0
mcp_bridge/config/hook_config.py +249 -0
mcp_bridge/config/hooks_manifest.json +138 -0
mcp_bridge/config/rate_limits.py +222 -0
mcp_bridge/config/skills_manifest.json +128 -0
mcp_bridge/hooks/HOOKS_SETTINGS.json +175 -0
mcp_bridge/hooks/README.md +215 -0
mcp_bridge/hooks/__init__.py +119 -60
mcp_bridge/hooks/edit_recovery.py +42 -37
mcp_bridge/hooks/git_noninteractive.py +89 -0
mcp_bridge/hooks/keyword_detector.py +30 -0
mcp_bridge/hooks/manager.py +8 -0
mcp_bridge/hooks/notification_hook.py +103 -0
mcp_bridge/hooks/parallel_execution.py +111 -0
mcp_bridge/hooks/pre_compact.py +82 -183
mcp_bridge/hooks/rules_injector.py +507 -0
mcp_bridge/hooks/session_notifier.py +125 -0
mcp_bridge/{native_hooks → hooks}/stravinsky_mode.py +51 -16
mcp_bridge/hooks/subagent_stop.py +98 -0
mcp_bridge/hooks/task_validator.py +73 -0
mcp_bridge/hooks/tmux_manager.py +141 -0
mcp_bridge/hooks/todo_continuation.py +90 -0
mcp_bridge/hooks/todo_delegation.py +88 -0
mcp_bridge/hooks/tool_messaging.py +267 -0
mcp_bridge/hooks/truncator.py +21 -17
mcp_bridge/notifications.py +151 -0
mcp_bridge/prompts/multimodal.py +24 -3
mcp_bridge/server.py +214 -49
mcp_bridge/server_tools.py +445 -0
mcp_bridge/tools/__init__.py +22 -18
mcp_bridge/tools/agent_manager.py +220 -32
mcp_bridge/tools/code_search.py +97 -11
mcp_bridge/tools/lsp/__init__.py +7 -0
mcp_bridge/tools/lsp/manager.py +448 -0
mcp_bridge/tools/lsp/tools.py +637 -150
mcp_bridge/tools/model_invoke.py +208 -106
mcp_bridge/tools/query_classifier.py +323 -0
mcp_bridge/tools/semantic_search.py +3042 -0
mcp_bridge/tools/templates.py +32 -18
mcp_bridge/update_manager.py +589 -0
mcp_bridge/update_manager_pypi.py +299 -0
stravinsky-0.4.18.dist-info/METADATA +468 -0
stravinsky-0.4.18.dist-info/RECORD +88 -0
stravinsky-0.4.18.dist-info/entry_points.txt +5 -0
mcp_bridge/native_hooks/edit_recovery.py +0 -46
mcp_bridge/native_hooks/todo_delegation.py +0 -54
mcp_bridge/native_hooks/truncator.py +0 -23
stravinsky-0.2.52.dist-info/METADATA +0 -204
stravinsky-0.2.52.dist-info/RECORD +0 -63
stravinsky-0.2.52.dist-info/entry_points.txt +0 -3
/mcp_bridge/{native_hooks → hooks}/context.py +0 -0
{stravinsky-0.2.52.dist-info → stravinsky-0.4.18.dist-info}/WHEEL +0 -0

mcp_bridge/tools/model_invoke.py CHANGED Viewed

@@ -5,6 +5,7 @@ These tools use OAuth tokens from the token store to authenticate
 API requests to external model providers.
 """
+import asyncio
 import logging
 import os
 import time
@@ -134,6 +135,9 @@ _SESSION_CACHE: dict[str, str] = {}
 # Pooled HTTP client for connection reuse
 _HTTP_CLIENT: httpx.AsyncClient | None = None
+# Rate limiting: Max 5 concurrent Gemini requests to prevent burst rate limits
+_GEMINI_SEMAPHORE: asyncio.Semaphore | None = None
 def _get_session_id(conversation_key: str | None = None) -> str:
     """
@@ -174,6 +178,19 @@ async def _get_http_client() -> httpx.AsyncClient:
     return _HTTP_CLIENT
+def _get_gemini_semaphore() -> asyncio.Semaphore:
+    """
+    Get or create semaphore for Gemini API rate limiting.
+    Limits concurrent Gemini requests to prevent burst rate limits (429 errors).
+    Max 5 concurrent requests balances throughput with API quota constraints.
+    """
+    global _GEMINI_SEMAPHORE
+    if _GEMINI_SEMAPHORE is None:
+        _GEMINI_SEMAPHORE = asyncio.Semaphore(5)
+    return _GEMINI_SEMAPHORE
 def _extract_gemini_response(data: dict) -> str:
     """
     Extract text from Gemini response, handling thinking blocks.
@@ -284,18 +301,25 @@ async def _ensure_valid_token(token_store: TokenStore, provider: str) -> str:
 def is_retryable_exception(e: Exception) -> bool:
-    """Check if an exception is retryable (429 or 5xx)."""
+    """
+    Check if an exception is retryable (5xx only, NOT 429).
+    429 (Rate Limit) errors should fail fast - retrying makes the problem worse
+    by adding more requests to an already exhausted quota. The semaphore prevents
+    these in the first place, but if one slips through, we shouldn't retry.
+    """
     if isinstance(e, httpx.HTTPStatusError):
-        return e.response.status_code == 429 or 500 <= e.response.status_code < 600
+        # Only retry server errors (5xx), not rate limits (429)
+        return 500 <= e.response.status_code < 600
     return False
 @retry(
-    stop=stop_after_attempt(5),
-    wait=wait_exponential(multiplier=1, min=4, max=60),
+    stop=stop_after_attempt(2),  # Reduced from 5 to 2 attempts
+    wait=wait_exponential(multiplier=2, min=10, max=120),  # Longer waits: 10s → 20s → 40s
     retry=retry_if_exception(is_retryable_exception),
     before_sleep=lambda retry_state: logger.info(
-        f"Rate limited or server error, retrying in {retry_state.next_action.sleep} seconds..."
+        f"Server error, retrying in {retry_state.next_action.sleep} seconds..."
     ),
 )
 async def invoke_gemini(
@@ -305,11 +329,13 @@ async def invoke_gemini(
     temperature: float = 0.7,
     max_tokens: int = 4096,
     thinking_budget: int = 0,
+    image_path: str | None = None,
 ) -> str:
     """
     Invoke a Gemini model with the given prompt.
     Uses OAuth authentication with Antigravity credentials.
+    Supports vision API for image/PDF analysis when image_path is provided.
     Args:
         token_store: Token store for OAuth credentials
@@ -317,6 +343,8 @@ async def invoke_gemini(
         model: Gemini model to use
         temperature: Sampling temperature (0.0-2.0)
         max_tokens: Maximum tokens in response
+        thinking_budget: Tokens reserved for internal reasoning
+        image_path: Optional path to image/PDF for vision analysis (token optimization)
     Returns:
         The model's response text.
@@ -349,132 +377,198 @@ async def invoke_gemini(
     # Extract agent context for logging (may be passed via params or original call)
     agent_context = params.get("agent_context", {})
     agent_type = agent_context.get("agent_type", "direct")
+    task_id = agent_context.get("task_id", "")
+    description = agent_context.get("description", "")
     prompt_summary = _summarize_prompt(prompt)
     # Log with agent context and prompt summary
     logger.info(f"[{agent_type}] → {model}: {prompt_summary}")
-    access_token = await _ensure_valid_token(token_store, "gemini")
-    # Resolve user-friendly model name to actual API model ID
-    api_model = resolve_gemini_model(model)
+    # USER-VISIBLE NOTIFICATION (stderr) - Shows when Gemini is invoked
+    import sys
+    task_info = f" task={task_id}" if task_id else ""
+    desc_info = f" | {description}" if description else ""
+    print(f"🔮 GEMINI: {model} | agent={agent_type}{task_info}{desc_info}", file=sys.stderr)
-    # Use persistent session ID for thinking signature caching
-    session_id = _get_session_id()
-    project_id = os.getenv("STRAVINSKY_ANTIGRAVITY_PROJECT_ID", ANTIGRAVITY_DEFAULT_PROJECT_ID)
+    # Acquire semaphore to limit concurrent Gemini requests (prevents 429 rate limits)
+    semaphore = _get_gemini_semaphore()
+    async with semaphore:
+        access_token = await _ensure_valid_token(token_store, "gemini")
-    headers = {
-        "Authorization": f"Bearer {access_token}",
-        "Content-Type": "application/json",
-        **ANTIGRAVITY_HEADERS,  # Include Antigravity headers
-    }
+        # Resolve user-friendly model name to actual API model ID
+        api_model = resolve_gemini_model(model)
-    # Build inner request payload
-    # Per API spec: contents must include role ("user" or "model")
-    inner_payload = {
-        "contents": [{"role": "user", "parts": [{"text": prompt}]}],
-        "generationConfig": {
-            "temperature": temperature,
-            "maxOutputTokens": max_tokens,
-        },
-        "sessionId": session_id,
-    }
+        # Use persistent session ID for thinking signature caching
+        session_id = _get_session_id()
+        project_id = os.getenv("STRAVINSKY_ANTIGRAVITY_PROJECT_ID", ANTIGRAVITY_DEFAULT_PROJECT_ID)
-    # Add thinking budget if supported by model/API
-    if thinking_budget > 0:
-        # For Gemini 2.0+ Thinking models
-        # Per Antigravity API: use "thinkingBudget", NOT "tokenLimit"
-        inner_payload["generationConfig"]["thinkingConfig"] = {
-            "includeThoughts": True,
-            "thinkingBudget": thinking_budget,
+        headers = {
+            "Authorization": f"Bearer {access_token}",
+            "Content-Type": "application/json",
+            **ANTIGRAVITY_HEADERS,  # Include Antigravity headers
         }
-    # Wrap request body per reference implementation
-    try:
-        import uuid as uuid_module  # Local import workaround for MCP context issue
+        # Build inner request payload
+        # Per API spec: contents must include role ("user" or "model")
+        # Build parts list - text prompt plus optional image
+        parts = [{"text": prompt}]
+        # Add image data for vision analysis (token optimization for multimodal)
+        if image_path:
+            import base64
+            from pathlib import Path
+            image_file = Path(image_path)
+            if image_file.exists():
+                # Determine MIME type
+                suffix = image_file.suffix.lower()
+                mime_types = {
+                    ".png": "image/png",
+                    ".jpg": "image/jpeg",
+                    ".jpeg": "image/jpeg",
+                    ".gif": "image/gif",
+                    ".webp": "image/webp",
+                    ".pdf": "application/pdf",
+                }
+                mime_type = mime_types.get(suffix, "image/png")
-        request_id = f"invoke-{uuid_module.uuid4()}"
-    except Exception as e:
-        logger.error(f"UUID IMPORT FAILED: {e}")
-        raise RuntimeError(f"CUSTOM ERROR: UUID import failed: {e}")
-    wrapped_payload = {
-        "project": project_id,
-        "model": api_model,
-        "userAgent": "antigravity",
-        "requestId": request_id,
-        "request": inner_payload,
-    }
+                # Read and base64 encode
+                image_data = base64.b64encode(image_file.read_bytes()).decode("utf-8")
-    # Get pooled HTTP client for connection reuse
-    client = await _get_http_client()
+                # Add inline image data for Gemini Vision API
+                parts.append({
+                    "inlineData": {
+                        "mimeType": mime_type,
+                        "data": image_data,
+                    }
+                })
+                logger.info(f"[multimodal] Added vision data: {image_path} ({mime_type})")
-    # Try endpoints in fallback order with thinking recovery
-    response = None
-    last_error = None
-    max_retries = 2  # For thinking recovery
+        inner_payload = {
+            "contents": [{"role": "user", "parts": parts}],
+            "generationConfig": {
+                "temperature": temperature,
+                "maxOutputTokens": max_tokens,
+            },
+            "sessionId": session_id,
+        }
-    for retry_attempt in range(max_retries):
-        for endpoint in ANTIGRAVITY_ENDPOINTS:
-            # Reference uses: {endpoint}/v1internal:generateContent (NOT /models/{model})
-            api_url = f"{endpoint}/v1internal:generateContent"
+        # Add thinking budget if supported by model/API
+        if thinking_budget > 0:
+            # For Gemini 2.0+ Thinking models
+            # Per Antigravity API: use "thinkingBudget", NOT "tokenLimit"
+            inner_payload["generationConfig"]["thinkingConfig"] = {
+                "includeThoughts": True,
+                "thinkingBudget": thinking_budget,
+            }
-            try:
-                response = await client.post(
-                    api_url,
-                    headers=headers,
-                    json=wrapped_payload,
-                    timeout=120.0,
-                )
+        # Wrap request body per reference implementation
+        try:
+            import uuid as uuid_module  # Local import workaround for MCP context issue
-                # 401/403 might be endpoint-specific, try next endpoint
-                if response.status_code in (401, 403):
-                    logger.warning(
-                        f"[Gemini] Endpoint {endpoint} returned {response.status_code}, trying next"
+            request_id = f"invoke-{uuid_module.uuid4()}"
+        except Exception as e:
+            logger.error(f"UUID IMPORT FAILED: {e}")
+            raise RuntimeError(f"CUSTOM ERROR: UUID import failed: {e}")
+        wrapped_payload = {
+            "project": project_id,
+            "model": api_model,
+            "userAgent": "antigravity",
+            "requestId": request_id,
+            "request": inner_payload,
+        }
+        # Get pooled HTTP client for connection reuse
+        client = await _get_http_client()
+        # Try endpoints in fallback order with thinking recovery
+        response = None
+        last_error = None
+        max_retries = 2  # For thinking recovery
+        for retry_attempt in range(max_retries):
+            for endpoint in ANTIGRAVITY_ENDPOINTS:
+                # Reference uses: {endpoint}/v1internal:generateContent (NOT /models/{model})
+                api_url = f"{endpoint}/v1internal:generateContent"
+                try:
+                    response = await client.post(
+                        api_url,
+                        headers=headers,
+                        json=wrapped_payload,
+                        timeout=120.0,
                     )
-                    last_error = Exception(f"{response.status_code} from {endpoint}")
-                    continue
-                # Check for thinking-related errors that need recovery
-                if response.status_code in (400, 500):
-                    error_text = response.text.lower()
-                    if "thinking" in error_text or "signature" in error_text:
+                    # 401/403 might be endpoint-specific, try next endpoint
+                    if response.status_code in (401, 403):
                         logger.warning(
-                            f"[Gemini] Thinking error detected, clearing session cache and retrying"
+                            f"[Gemini] Endpoint {endpoint} returned {response.status_code}, trying next"
                         )
-                        clear_session_cache()
-                        # Update session ID for retry
-                        wrapped_payload["request"]["sessionId"] = _get_session_id()
-                        last_error = Exception(f"Thinking error: {response.text[:200]}")
-                        break  # Break inner loop to retry with new session
-                # If we got a non-retryable response (success or 4xx client error), use it
-                if response.status_code < 500 and response.status_code != 429:
-                    break
+                        last_error = Exception(f"{response.status_code} from {endpoint}")
+                        continue
+                    # Check for thinking-related errors that need recovery
+                    if response.status_code in (400, 500):
+                        error_text = response.text.lower()
+                        if "thinking" in error_text or "signature" in error_text:
+                            logger.warning(
+                                f"[Gemini] Thinking error detected, clearing session cache and retrying"
+                            )
+                            clear_session_cache()
+                            # Update session ID for retry
+                            wrapped_payload["request"]["sessionId"] = _get_session_id()
+                            last_error = Exception(f"Thinking error: {response.text[:200]}")
+                            break  # Break inner loop to retry with new session
+                    # If we got a non-retryable response (success or 4xx client error), use it
+                    if response.status_code < 500 and response.status_code != 429:
+                        break
+                except httpx.TimeoutException as e:
+                    last_error = e
+                    continue
+                except Exception as e:
+                    last_error = e
+                    continue
+            else:
+                # Inner loop completed without break - no thinking recovery needed
+                break
-            except httpx.TimeoutException as e:
-                last_error = e
+            # If we broke out of inner loop for thinking recovery, continue outer retry loop
+            if response and response.status_code in (400, 500):
                 continue
-            except Exception as e:
-                last_error = e
-                continue
-        else:
-            # Inner loop completed without break - no thinking recovery needed
             break
-        # If we broke out of inner loop for thinking recovery, continue outer retry loop
-        if response and response.status_code in (400, 500):
-            continue
-        break
+        if response is None:
+            # FALLBACK: Try Claude sonnet-4.5 for agents that support it
+            agent_context = params.get("agent_context", {})
+            agent_type = agent_context.get("agent_type", "unknown")
+            if agent_type in ("dewey", "explore", "document_writer", "multimodal"):
+                logger.warning(f"[{agent_type}] Gemini failed, falling back to Claude sonnet-4.5")
+                try:
+                    import subprocess
+                    fallback_result = subprocess.run(
+                        ["claude", "-p", prompt, "--model", "sonnet", "--output-format", "text"],
+                        capture_output=True,
+                        text=True,
+                        timeout=120,
+                        cwd=os.getcwd(),
+                    )
+                    if fallback_result.returncode == 0 and fallback_result.stdout.strip():
+                        return fallback_result.stdout.strip()
+                except Exception as fallback_error:
+                    logger.error(f"Fallback to Claude also failed: {fallback_error}")
-    if response is None:
-        raise ValueError(f"All Antigravity endpoints failed: {last_error}")
+            raise ValueError(f"All Antigravity endpoints failed: {last_error}")
-    response.raise_for_status()
-    data = response.json()
+        response.raise_for_status()
+        data = response.json()
-    # Extract text from response using thinking-aware parser
-    return _extract_gemini_response(data)
+        # Extract text from response using thinking-aware parser
+        return _extract_gemini_response(data)
 # ========================
@@ -761,11 +855,11 @@ async def invoke_gemini_agentic(
 @retry(
-    stop=stop_after_attempt(5),
-    wait=wait_exponential(multiplier=1, min=4, max=60),
+    stop=stop_after_attempt(2),  # Reduced from 5 to 2 attempts
+    wait=wait_exponential(multiplier=2, min=10, max=120),  # Longer waits: 10s → 20s → 40s
     retry=retry_if_exception(is_retryable_exception),
     before_sleep=lambda retry_state: logger.info(
-        f"Rate limited or server error, retrying in {retry_state.next_action.sleep} seconds..."
+        f"Server error, retrying in {retry_state.next_action.sleep} seconds..."
     ),
 )
 async def invoke_openai(
@@ -816,11 +910,19 @@ async def invoke_openai(
     # Extract agent context for logging (may be passed via params or original call)
     agent_context = params.get("agent_context", {})
     agent_type = agent_context.get("agent_type", "direct")
+    task_id = agent_context.get("task_id", "")
+    description = agent_context.get("description", "")
     prompt_summary = _summarize_prompt(prompt)
     # Log with agent context and prompt summary
     logger.info(f"[{agent_type}] → {model}: {prompt_summary}")
+    # USER-VISIBLE NOTIFICATION (stderr) - Shows when OpenAI is invoked
+    import sys
+    task_info = f" task={task_id}" if task_id else ""
+    desc_info = f" | {description}" if description else ""
+    print(f"🧠 OPENAI: {model} | agent={agent_type}{task_info}{desc_info}", file=sys.stderr)
     access_token = await _ensure_valid_token(token_store, "openai")
     logger.info(f"[invoke_openai] Got access token")

stravinsky 0.2.52__py3-none-any.whl → 0.4.18__py3-none-any.whl

Potentially problematic release.

stravinsky 0.2.52py3-none-any.whl → 0.4.18py3-none-any.whl