PyPI - stravinsky - Versions diffs - 0.2.67__py3-none-any.whl → 0.4.66__py3-none-any.whl - Mend

stravinsky 0.2.67py3-none-any.whl → 0.4.66py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of stravinsky might be problematic. Click here for more details.

Files changed (190) hide show

mcp_bridge/__init__.py +1 -1
mcp_bridge/auth/__init__.py +16 -6
mcp_bridge/auth/cli.py +202 -11
mcp_bridge/auth/oauth.py +1 -2
mcp_bridge/auth/openai_oauth.py +4 -7
mcp_bridge/auth/token_store.py +112 -11
mcp_bridge/cli/__init__.py +1 -1
mcp_bridge/cli/install_hooks.py +503 -107
mcp_bridge/cli/session_report.py +0 -3
mcp_bridge/config/MANIFEST_SCHEMA.md +305 -0
mcp_bridge/config/README.md +276 -0
mcp_bridge/config/__init__.py +2 -2
mcp_bridge/config/hook_config.py +247 -0
mcp_bridge/config/hooks_manifest.json +138 -0
mcp_bridge/config/rate_limits.py +317 -0
mcp_bridge/config/skills_manifest.json +128 -0
mcp_bridge/hooks/HOOKS_SETTINGS.json +17 -4
mcp_bridge/hooks/__init__.py +19 -4
mcp_bridge/hooks/agent_reminder.py +4 -4
mcp_bridge/hooks/auto_slash_command.py +5 -5
mcp_bridge/hooks/budget_optimizer.py +2 -2
mcp_bridge/hooks/claude_limits_hook.py +114 -0
mcp_bridge/hooks/comment_checker.py +3 -4
mcp_bridge/hooks/compaction.py +2 -2
mcp_bridge/hooks/context.py +2 -1
mcp_bridge/hooks/context_monitor.py +2 -2
mcp_bridge/hooks/delegation_policy.py +85 -0
mcp_bridge/hooks/directory_context.py +3 -3
mcp_bridge/hooks/edit_recovery.py +3 -2
mcp_bridge/hooks/edit_recovery_policy.py +49 -0
mcp_bridge/hooks/empty_message_sanitizer.py +2 -2
mcp_bridge/hooks/events.py +160 -0
mcp_bridge/hooks/git_noninteractive.py +4 -4
mcp_bridge/hooks/keyword_detector.py +8 -10
mcp_bridge/hooks/manager.py +43 -22
mcp_bridge/hooks/notification_hook.py +13 -6
mcp_bridge/hooks/parallel_enforcement_policy.py +67 -0
mcp_bridge/hooks/parallel_enforcer.py +5 -5
mcp_bridge/hooks/parallel_execution.py +22 -10
mcp_bridge/hooks/post_tool/parallel_validation.py +103 -0
mcp_bridge/hooks/pre_compact.py +8 -9
mcp_bridge/hooks/pre_tool/agent_spawn_validator.py +115 -0
mcp_bridge/hooks/preemptive_compaction.py +2 -3
mcp_bridge/hooks/routing_notifications.py +80 -0
mcp_bridge/hooks/rules_injector.py +11 -19
mcp_bridge/hooks/session_idle.py +4 -4
mcp_bridge/hooks/session_notifier.py +4 -4
mcp_bridge/hooks/session_recovery.py +4 -5
mcp_bridge/hooks/stravinsky_mode.py +1 -1
mcp_bridge/hooks/subagent_stop.py +1 -3
mcp_bridge/hooks/task_validator.py +2 -2
mcp_bridge/hooks/tmux_manager.py +7 -8
mcp_bridge/hooks/todo_delegation.py +4 -1
mcp_bridge/hooks/todo_enforcer.py +180 -10
mcp_bridge/hooks/tool_messaging.py +113 -10
mcp_bridge/hooks/truncation_policy.py +37 -0
mcp_bridge/hooks/truncator.py +1 -2
mcp_bridge/metrics/cost_tracker.py +115 -0
mcp_bridge/native_search.py +93 -0
mcp_bridge/native_watcher.py +118 -0
mcp_bridge/notifications.py +150 -0
mcp_bridge/orchestrator/enums.py +11 -0
mcp_bridge/orchestrator/router.py +165 -0
mcp_bridge/orchestrator/state.py +32 -0
mcp_bridge/orchestrator/visualization.py +14 -0
mcp_bridge/orchestrator/wisdom.py +34 -0
mcp_bridge/prompts/__init__.py +1 -8
mcp_bridge/prompts/dewey.py +1 -1
mcp_bridge/prompts/planner.py +2 -4
mcp_bridge/prompts/stravinsky.py +53 -31
mcp_bridge/proxy/__init__.py +0 -0
mcp_bridge/proxy/client.py +70 -0
mcp_bridge/proxy/model_server.py +157 -0
mcp_bridge/routing/__init__.py +43 -0
mcp_bridge/routing/config.py +250 -0
mcp_bridge/routing/model_tiers.py +135 -0
mcp_bridge/routing/provider_state.py +261 -0
mcp_bridge/routing/task_classifier.py +190 -0
mcp_bridge/server.py +542 -59
mcp_bridge/server_tools.py +738 -6
mcp_bridge/tools/__init__.py +40 -25
mcp_bridge/tools/agent_manager.py +616 -697
mcp_bridge/tools/background_tasks.py +13 -17
mcp_bridge/tools/code_search.py +70 -53
mcp_bridge/tools/continuous_loop.py +0 -1
mcp_bridge/tools/dashboard.py +19 -0
mcp_bridge/tools/find_code.py +296 -0
mcp_bridge/tools/init.py +1 -0
mcp_bridge/tools/list_directory.py +42 -0
mcp_bridge/tools/lsp/__init__.py +12 -5
mcp_bridge/tools/lsp/manager.py +471 -0
mcp_bridge/tools/lsp/tools.py +723 -207
mcp_bridge/tools/model_invoke.py +1195 -273
mcp_bridge/tools/mux_client.py +75 -0
mcp_bridge/tools/project_context.py +1 -2
mcp_bridge/tools/query_classifier.py +406 -0
mcp_bridge/tools/read_file.py +84 -0
mcp_bridge/tools/replace.py +45 -0
mcp_bridge/tools/run_shell_command.py +38 -0
mcp_bridge/tools/search_enhancements.py +347 -0
mcp_bridge/tools/semantic_search.py +3627 -0
mcp_bridge/tools/session_manager.py +0 -2
mcp_bridge/tools/skill_loader.py +0 -1
mcp_bridge/tools/task_runner.py +5 -7
mcp_bridge/tools/templates.py +3 -3
mcp_bridge/tools/tool_search.py +331 -0
mcp_bridge/tools/write_file.py +29 -0
mcp_bridge/update_manager.py +585 -0
mcp_bridge/update_manager_pypi.py +297 -0
mcp_bridge/utils/cache.py +82 -0
mcp_bridge/utils/process.py +71 -0
mcp_bridge/utils/session_state.py +51 -0
mcp_bridge/utils/truncation.py +76 -0
stravinsky-0.4.66.dist-info/METADATA +517 -0
stravinsky-0.4.66.dist-info/RECORD +198 -0
{stravinsky-0.2.67.dist-info → stravinsky-0.4.66.dist-info}/entry_points.txt +1 -0
stravinsky_claude_assets/HOOKS_INTEGRATION.md +316 -0
stravinsky_claude_assets/agents/HOOKS.md +437 -0
stravinsky_claude_assets/agents/code-reviewer.md +210 -0
stravinsky_claude_assets/agents/comment_checker.md +580 -0
stravinsky_claude_assets/agents/debugger.md +254 -0
stravinsky_claude_assets/agents/delphi.md +495 -0
stravinsky_claude_assets/agents/dewey.md +248 -0
stravinsky_claude_assets/agents/explore.md +1198 -0
stravinsky_claude_assets/agents/frontend.md +472 -0
stravinsky_claude_assets/agents/implementation-lead.md +164 -0
stravinsky_claude_assets/agents/momus.md +464 -0
stravinsky_claude_assets/agents/research-lead.md +141 -0
stravinsky_claude_assets/agents/stravinsky.md +730 -0
stravinsky_claude_assets/commands/delphi.md +9 -0
stravinsky_claude_assets/commands/dewey.md +54 -0
stravinsky_claude_assets/commands/git-master.md +112 -0
stravinsky_claude_assets/commands/index.md +49 -0
stravinsky_claude_assets/commands/publish.md +86 -0
stravinsky_claude_assets/commands/review.md +73 -0
stravinsky_claude_assets/commands/str/agent_cancel.md +70 -0
stravinsky_claude_assets/commands/str/agent_list.md +56 -0
stravinsky_claude_assets/commands/str/agent_output.md +92 -0
stravinsky_claude_assets/commands/str/agent_progress.md +74 -0
stravinsky_claude_assets/commands/str/agent_retry.md +94 -0
stravinsky_claude_assets/commands/str/cancel.md +51 -0
stravinsky_claude_assets/commands/str/clean.md +97 -0
stravinsky_claude_assets/commands/str/continue.md +38 -0
stravinsky_claude_assets/commands/str/index.md +199 -0
stravinsky_claude_assets/commands/str/list_watchers.md +96 -0
stravinsky_claude_assets/commands/str/search.md +205 -0
stravinsky_claude_assets/commands/str/start_filewatch.md +136 -0
stravinsky_claude_assets/commands/str/stats.md +71 -0
stravinsky_claude_assets/commands/str/stop_filewatch.md +89 -0
stravinsky_claude_assets/commands/str/unwatch.md +42 -0
stravinsky_claude_assets/commands/str/watch.md +45 -0
stravinsky_claude_assets/commands/strav.md +53 -0
stravinsky_claude_assets/commands/stravinsky.md +292 -0
stravinsky_claude_assets/commands/verify.md +60 -0
stravinsky_claude_assets/commands/version.md +5 -0
stravinsky_claude_assets/hooks/README.md +248 -0
stravinsky_claude_assets/hooks/comment_checker.py +193 -0
stravinsky_claude_assets/hooks/context.py +38 -0
stravinsky_claude_assets/hooks/context_monitor.py +153 -0
stravinsky_claude_assets/hooks/dependency_tracker.py +73 -0
stravinsky_claude_assets/hooks/edit_recovery.py +46 -0
stravinsky_claude_assets/hooks/execution_state_tracker.py +68 -0
stravinsky_claude_assets/hooks/notification_hook.py +103 -0
stravinsky_claude_assets/hooks/notification_hook_v2.py +96 -0
stravinsky_claude_assets/hooks/parallel_execution.py +241 -0
stravinsky_claude_assets/hooks/parallel_reinforcement.py +106 -0
stravinsky_claude_assets/hooks/parallel_reinforcement_v2.py +112 -0
stravinsky_claude_assets/hooks/pre_compact.py +123 -0
stravinsky_claude_assets/hooks/ralph_loop.py +173 -0
stravinsky_claude_assets/hooks/session_recovery.py +263 -0
stravinsky_claude_assets/hooks/stop_hook.py +89 -0
stravinsky_claude_assets/hooks/stravinsky_metrics.py +164 -0
stravinsky_claude_assets/hooks/stravinsky_mode.py +146 -0
stravinsky_claude_assets/hooks/subagent_stop.py +98 -0
stravinsky_claude_assets/hooks/todo_continuation.py +111 -0
stravinsky_claude_assets/hooks/todo_delegation.py +96 -0
stravinsky_claude_assets/hooks/tool_messaging.py +281 -0
stravinsky_claude_assets/hooks/truncator.py +23 -0
stravinsky_claude_assets/rules/deployment_safety.md +51 -0
stravinsky_claude_assets/rules/integration_wiring.md +89 -0
stravinsky_claude_assets/rules/pypi_deployment.md +220 -0
stravinsky_claude_assets/rules/stravinsky_orchestrator.md +32 -0
stravinsky_claude_assets/settings.json +152 -0
stravinsky_claude_assets/skills/chrome-devtools/SKILL.md +81 -0
stravinsky_claude_assets/skills/sqlite/SKILL.md +77 -0
stravinsky_claude_assets/skills/supabase/SKILL.md +74 -0
stravinsky_claude_assets/task_dependencies.json +34 -0
stravinsky-0.2.67.dist-info/METADATA +0 -284
stravinsky-0.2.67.dist-info/RECORD +0 -76
{stravinsky-0.2.67.dist-info → stravinsky-0.4.66.dist-info}/WHEEL +0 -0

mcp_bridge/tools/model_invoke.py CHANGED Viewed

@@ -5,12 +5,17 @@ These tools use OAuth tokens from the token store to authenticate
 API requests to external model providers.
 """
+import asyncio
+import base64
+import json as json_module
 import logging
 import os
 import time
 import uuid
-import base64
-import json as json_module
+from mcp_bridge.config.rate_limits import get_rate_limiter, get_gemini_time_limiter
+from mcp_bridge.routing.model_tiers import get_oauth_fallback_chain
+from mcp_bridge.routing.provider_state import get_provider_tracker
 logger = logging.getLogger(__name__)
@@ -42,6 +47,130 @@ def _summarize_prompt(prompt: str, max_length: int = 120) -> str:
 _CODEX_INSTRUCTIONS_CACHE = {}
 _CODEX_INSTRUCTIONS_RELEASE_TAG = "rust-v0.77.0"  # Update as needed
+# ==============================================
+# GEMINI AUTH MODE STATE (OAuth-first with 429 fallback)
+# ==============================================
+# When OAuth gets a 429 rate limit, we switch to API-only mode for 5 minutes.
+# After 5 minutes, we automatically retry OAuth.
+_GEMINI_OAUTH_429_TIMESTAMP: float | None = None  # Timestamp of last 429
+_OAUTH_COOLDOWN_SECONDS = 300  # 5 minutes
+# ==============================================
+# OPENAI AUTH MODE STATE (OAuth-first with 429 fallback)
+# ==============================================
+# When OpenAI OAuth gets a 429 rate limit, we fallback to Gemini for 5 minutes.
+# After 5 minutes, we automatically retry OpenAI OAuth.
+_OPENAI_OAUTH_429_TIMESTAMP: float | None = None  # Timestamp of last OpenAI 429
+def _get_gemini_api_key() -> str | None:
+    """Get Gemini API key from environment (loaded from ~/.stravinsky/.env)."""
+    return os.getenv("GEMINI_API_KEY") or os.getenv("GOOGLE_API_KEY")
+def _set_api_only_mode(reason: str = "429 rate limit"):
+    """Switch to API-only mode after OAuth rate limit (5-minute cooldown)."""
+    global _GEMINI_OAUTH_429_TIMESTAMP
+    _GEMINI_OAUTH_429_TIMESTAMP = time.time()
+    logger.warning(f"[Gemini] Switching to API-only mode: {reason}")
+    import sys
+    print(
+        f"⚠️ GEMINI: OAuth rate-limited (429). "
+        f"Using API key for 5 minutes (will retry OAuth at {time.strftime('%H:%M:%S', time.localtime(_GEMINI_OAUTH_429_TIMESTAMP + _OAUTH_COOLDOWN_SECONDS))}).",
+        file=sys.stderr,
+    )
+def _is_api_only_mode() -> bool:
+    """
+    Check if we're in API-only mode (5-minute cooldown after 429).
+    Returns True if:
+    - 429 occurred AND
+    - Less than 5 minutes have elapsed
+    Automatically resets to OAuth mode after 5 minutes.
+    """
+    global _GEMINI_OAUTH_429_TIMESTAMP
+    if _GEMINI_OAUTH_429_TIMESTAMP is None:
+        return False
+    elapsed = time.time() - _GEMINI_OAUTH_429_TIMESTAMP
+    if elapsed >= _OAUTH_COOLDOWN_SECONDS:
+        # Cooldown expired - reset to OAuth mode
+        logger.info(
+            f"[Gemini] 5-minute cooldown expired (elapsed: {elapsed:.0f}s). Retrying OAuth."
+        )
+        _GEMINI_OAUTH_429_TIMESTAMP = None
+        return False
+    # Still in cooldown
+    remaining = _OAUTH_COOLDOWN_SECONDS - elapsed
+    logger.debug(f"[Gemini] API-only mode active ({remaining:.0f}s remaining)")
+    return True
+def reset_gemini_auth_mode():
+    """Reset to OAuth-first mode. Call this to manually reset cooldown."""
+    global _GEMINI_OAUTH_429_TIMESTAMP
+    _GEMINI_OAUTH_429_TIMESTAMP = None
+    logger.info("[Gemini] Reset to OAuth-first mode")
+def _set_openai_fallback_mode(reason: str = "429 rate limit"):
+    """Switch to Gemini fallback after OpenAI rate limit (5-minute cooldown)."""
+    global _OPENAI_OAUTH_429_TIMESTAMP
+    _OPENAI_OAUTH_429_TIMESTAMP = time.time()
+    logger.warning(f"[OpenAI] Switching to Gemini fallback: {reason}")
+    import sys
+    print(
+        f"⚠️ OPENAI: OAuth rate-limited (429). "
+        f"Using Gemini for 5 minutes (will retry OpenAI at {time.strftime('%H:%M:%S', time.localtime(_OPENAI_OAUTH_429_TIMESTAMP + _OAUTH_COOLDOWN_SECONDS))}).",
+        file=sys.stderr,
+    )
+def _is_openai_fallback_mode() -> bool:
+    """
+    Check if we're in Gemini fallback mode (5-minute cooldown after OpenAI 429).
+    Returns True if:
+    - OpenAI 429 occurred AND
+    - Less than 5 minutes have elapsed
+    Automatically resets to OpenAI mode after 5 minutes.
+    """
+    global _OPENAI_OAUTH_429_TIMESTAMP
+    if _OPENAI_OAUTH_429_TIMESTAMP is None:
+        return False
+    elapsed = time.time() - _OPENAI_OAUTH_429_TIMESTAMP
+    if elapsed >= _OAUTH_COOLDOWN_SECONDS:
+        # Cooldown expired - reset to OpenAI mode
+        logger.info(
+            f"[OpenAI] 5-minute cooldown expired (elapsed: {elapsed:.0f}s). Retrying OpenAI OAuth."
+        )
+        _OPENAI_OAUTH_429_TIMESTAMP = None
+        return False
+    # Still in cooldown
+    remaining = _OAUTH_COOLDOWN_SECONDS - elapsed
+    logger.debug(f"[OpenAI] Gemini fallback mode active ({remaining:.0f}s remaining)")
+    return True
+def reset_openai_auth_mode():
+    """Reset to OpenAI-first mode. Call this to manually reset cooldown."""
+    global _OPENAI_OAUTH_429_TIMESTAMP
+    _OPENAI_OAUTH_429_TIMESTAMP = None
+    logger.info("[OpenAI] Reset to OAuth-first mode")
 async def _fetch_codex_instructions(model: str = "gpt-5.2-codex") -> str:
     """
@@ -107,20 +236,21 @@ def resolve_gemini_model(model: str) -> str:
 import httpx
 from tenacity import (
     retry,
+    retry_if_exception,
     stop_after_attempt,
     wait_exponential,
-    retry_if_exception,
 )
-from ..auth.token_store import TokenStore
 from ..auth.oauth import (
-    refresh_access_token as gemini_refresh,
-    ANTIGRAVITY_HEADERS,
-    ANTIGRAVITY_ENDPOINTS,
     ANTIGRAVITY_DEFAULT_PROJECT_ID,
-    ANTIGRAVITY_API_VERSION,
+    ANTIGRAVITY_ENDPOINTS,
+    ANTIGRAVITY_HEADERS,
+)
+from ..auth.oauth import (
+    refresh_access_token as gemini_refresh,
 )
 from ..auth.openai_oauth import refresh_access_token as openai_refresh
+from ..auth.token_store import TokenStore
 from ..hooks.manager import get_hook_manager
 # ========================
@@ -134,6 +264,53 @@ _SESSION_CACHE: dict[str, str] = {}
 # Pooled HTTP client for connection reuse
 _HTTP_CLIENT: httpx.AsyncClient | None = None
+# Per-model semaphores for async rate limiting (uses config from ~/.stravinsky/config.json)
+_GEMINI_SEMAPHORES: dict[str, asyncio.Semaphore] = {}
+def _get_gemini_rate_limit(model: str) -> int:
+    """
+    Get configured rate limit for a Gemini model.
+    Reads from ~/.stravinsky/config.json if available, otherwise uses defaults.
+    Args:
+        model: Gemini model name (e.g., "gemini-3-flash", "gemini-3-pro-high")
+    Returns:
+        Configured concurrency limit for this model
+    """
+    rate_limiter = get_rate_limiter()
+    # Normalize model name to match config keys
+    normalized = rate_limiter._normalize_model(model)
+    return rate_limiter._limits.get(normalized, rate_limiter._limits.get("_default", 5))
+def _get_gemini_semaphore(model: str) -> asyncio.Semaphore:
+    """
+    Get or create async semaphore for Gemini model rate limiting.
+    Creates one semaphore per model type with limits from config.
+    Limits can be customized in ~/.stravinsky/config.json:
+    {
+        "rate_limits": {
+            "gemini-3-flash": 15,
+            "gemini-3-pro-high": 8
+        }
+    }
+    Args:
+        model: Gemini model name
+    Returns:
+        asyncio.Semaphore with configured limit for this model
+    """
+    if model not in _GEMINI_SEMAPHORES:
+        limit = _get_gemini_rate_limit(model)
+        _GEMINI_SEMAPHORES[model] = asyncio.Semaphore(limit)
+        logger.info(f"[RateLimit] Created semaphore for {model} with limit {limit}")
+    return _GEMINI_SEMAPHORES[model]
 def _get_session_id(conversation_key: str | None = None) -> str:
     """
@@ -284,51 +461,180 @@ async def _ensure_valid_token(token_store: TokenStore, provider: str) -> str:
 def is_retryable_exception(e: Exception) -> bool:
-    """Check if an exception is retryable (429 or 5xx)."""
+    """
+    Check if an exception is retryable (5xx only, NOT 429).
+    429 (Rate Limit) errors should fail fast - retrying makes the problem worse
+    by adding more requests to an already exhausted quota. The semaphore prevents
+    these in the first place, but if one slips through, we shouldn't retry.
+    """
     if isinstance(e, httpx.HTTPStatusError):
-        return e.response.status_code == 429 or 500 <= e.response.status_code < 600
+        # Only retry server errors (5xx), not rate limits (429)
+        return 500 <= e.response.status_code < 600
     return False
-@retry(
-    stop=stop_after_attempt(5),
-    wait=wait_exponential(multiplier=1, min=4, max=60),
-    retry=retry_if_exception(is_retryable_exception),
-    before_sleep=lambda retry_state: logger.info(
-        f"Rate limited or server error, retrying in {retry_state.next_action.sleep} seconds..."
-    ),
-)
-async def invoke_gemini(
-    token_store: TokenStore,
+async def _invoke_gemini_with_api_key(
+    api_key: str,
     prompt: str,
     model: str = "gemini-3-flash",
     temperature: float = 0.7,
     max_tokens: int = 4096,
     thinking_budget: int = 0,
     image_path: str | None = None,
+    agent_context: dict | None = None,
 ) -> str:
     """
-    Invoke a Gemini model with the given prompt.
+    Invoke Gemini using API key authentication (google-genai library).
-    Uses OAuth authentication with Antigravity credentials.
-    Supports vision API for image/PDF analysis when image_path is provided.
+    This is an alternative to OAuth authentication that uses the official
+    google-genai Python library with a simple API key.
     Args:
-        token_store: Token store for OAuth credentials
+        api_key: Gemini API key (from GEMINI_API_KEY or GOOGLE_API_KEY env var)
         prompt: The prompt to send to Gemini
-        model: Gemini model to use
+        model: Gemini model to use (e.g., "gemini-3-flash-preview")
         temperature: Sampling temperature (0.0-2.0)
         max_tokens: Maximum tokens in response
-        thinking_budget: Tokens reserved for internal reasoning
-        image_path: Optional path to image/PDF for vision analysis (token optimization)
+        thinking_budget: Tokens reserved for internal reasoning (if supported)
+        image_path: Optional path to image/PDF for vision analysis
     Returns:
         The model's response text.
     Raises:
-        ValueError: If not authenticated with Gemini
-        httpx.HTTPStatusError: If API request fails
+        ImportError: If google-genai library is not installed
+        ValueError: If API request fails
     """
+    try:
+        from google import genai
+    except ImportError:
+        raise ImportError(
+            "google-genai library not installed. Install with: pip install google-genai"
+        )
+    # Map stravinsky model names to google-genai model names
+    # Pass through gemini-3-* models directly (Tier 3 benefits)
+    model_map = {
+        "gemini-3-flash": "gemini-3-flash-preview",  # Tier 3 model (not -exp)
+        "gemini-3-flash-preview": "gemini-3-flash-preview",  # Pass through
+        "gemini-3-pro-low": "gemini-3-flash-preview",
+        "gemini-3-pro-high": "gemini-3-pro-preview",  # Tier 3 pro model
+        "gemini-3-pro-preview": "gemini-3-pro-preview",  # Pass through
+        "gemini-flash": "gemini-3-flash-preview",
+        "gemini-pro": "gemini-3-pro-preview",
+        "gemini-3-pro": "gemini-3-pro-preview",
+        "gemini": "gemini-3-flash-preview",
+    }
+    genai_model = model_map.get(model, "gemini-3-flash-preview")  # Default to tier 3 flash
+    try:
+        # Initialize client with API key
+        client = genai.Client(api_key=api_key)
+        # Build generation config
+        config = {
+            "temperature": temperature,
+            "max_output_tokens": max_tokens,
+        }
+        # Add thinking budget if supported (experimental feature)
+        if thinking_budget > 0:
+            config["thinking_config"] = {
+                "thinking_budget": thinking_budget,
+            }
+        # Build contents - text prompt plus optional image
+        contents = [prompt]
+        # Add image data for vision analysis
+        if image_path:
+            from pathlib import Path
+            image_file = Path(image_path)
+            if image_file.exists():
+                # google-genai supports direct file path or base64
+                # For simplicity, use the file path directly
+                contents.append(image_file)
+                logger.info(f"[API_KEY] Added vision data: {image_path}")
+        # Generate content
+        response = client.models.generate_content(
+            model=genai_model,
+            contents=contents,
+            config=config,
+        )
+        # Track usage
+        try:
+            from mcp_bridge.metrics.cost_tracker import get_cost_tracker
+            tracker = get_cost_tracker()
+            if hasattr(response, "usage_metadata"):
+                usage = response.usage_metadata
+                agent_type = (agent_context or {}).get("agent_type", "unknown")
+                task_id = (agent_context or {}).get("task_id", "")
+                tracker.track_usage(
+                    model=model,
+                    input_tokens=usage.prompt_token_count,
+                    output_tokens=usage.candidates_token_count,
+                    agent_type=agent_type,
+                    task_id=task_id,
+                )
+        except Exception:
+            pass
+        # Extract text from response
+        if hasattr(response, "text"):
+            return response.text
+        elif hasattr(response, "candidates") and response.candidates:
+            # Fallback: extract from candidates
+            candidate = response.candidates[0]
+            if hasattr(candidate, "content"):
+                parts = candidate.content.parts
+                text_parts = [part.text for part in parts if hasattr(part, "text")]
+                return "".join(text_parts) if text_parts else "No response generated"
+        return "No response generated"
+    except Exception as e:
+        logger.error(f"API key authentication failed: {e}")
+        raise ValueError(f"Gemini API key request failed: {e}")
+@retry(
+    stop=stop_after_attempt(2),  # Reduced from 5 to 2 attempts
+    wait=wait_exponential(multiplier=2, min=10, max=120),  # Longer waits: 10s → 20s → 40s
+    retry=retry_if_exception(is_retryable_exception),
+    before_sleep=lambda retry_state: logger.info(
+        f"Server error, retrying in {retry_state.next_action.sleep} seconds..."
+    ),
+)
+async def invoke_gemini(
+    token_store: TokenStore,
+    prompt: str,
+    model: str = "gemini-3-flash",
+    temperature: float = 0.7,
+    max_tokens: int = 4096,
+    thinking_budget: int = 0,
+    image_path: str | None = None,
+) -> str:
+    """
+    Invoke a Gemini model with the given prompt.
+    """
+    from mcp_bridge.proxy.client import is_proxy_enabled, proxy_invoke_gemini
+    if is_proxy_enabled():
+        return await proxy_invoke_gemini(
+            prompt=prompt,
+            model=model,
+            temperature=temperature,
+            max_tokens=max_tokens,
+            thinking_budget=thinking_budget,
+            image_path=image_path,
+        )
     logger.info(f"[DEBUG] invoke_gemini called, uuid module check: {uuid}")
     # Execute pre-model invoke hooks
     params = {
@@ -360,188 +666,380 @@ async def invoke_gemini(
     # Log with agent context and prompt summary
     logger.info(f"[{agent_type}] → {model}: {prompt_summary}")
-    # USER-VISIBLE NOTIFICATION (stderr) - Shows when Gemini is invoked
+    # Get API key from environment (loaded from ~/.stravinsky/.env)
+    api_key = _get_gemini_api_key()
     import sys
     task_info = f" task={task_id}" if task_id else ""
     desc_info = f" | {description}" if description else ""
-    print(f"🔮 GEMINI: {model} | agent={agent_type}{task_info}{desc_info}", file=sys.stderr)
-    access_token = await _ensure_valid_token(token_store, "gemini")
+    # ==============================================
+    # AUTH PRIORITY: OAuth first, API fallback on 429
+    # ==============================================
+    # 1. If API-only mode (after 429), use API key directly
+    # 2. Otherwise, try OAuth first
+    # 3. On 429 from OAuth, switch to API-only mode and retry
-    # Resolve user-friendly model name to actual API model ID
-    api_model = resolve_gemini_model(model)
+    # If we're in API-only mode (after a 429), use API key directly
+    if _is_api_only_mode():
+        if not api_key:
+            raise ValueError(
+                "OAuth rate-limited (429) and no API key available. "
+                "Add GEMINI_API_KEY to ~/.stravinsky/.env"
+            )
-    # Use persistent session ID for thinking signature caching
-    session_id = _get_session_id()
-    project_id = os.getenv("STRAVINSKY_ANTIGRAVITY_PROJECT_ID", ANTIGRAVITY_DEFAULT_PROJECT_ID)
+        # Calculate remaining cooldown time
+        if _GEMINI_OAUTH_429_TIMESTAMP is not None:
+            remaining = _OAUTH_COOLDOWN_SECONDS - (time.time() - _GEMINI_OAUTH_429_TIMESTAMP)
+            remaining_mins = int(remaining // 60)
+            remaining_secs = int(remaining % 60)
+            cooldown_msg = f" (OAuth retry in {remaining_mins}m {remaining_secs}s)"
+        else:
+            cooldown_msg = ""
+        # Check time-window rate limit (30 req/min)
+        time_limiter = get_gemini_time_limiter()
+        wait_time = time_limiter.acquire_visible("GEMINI", "API key")
+        if wait_time > 0:
+            await asyncio.sleep(wait_time)
+            # Re-acquire after sleep
+            wait_time = time_limiter.acquire_visible("GEMINI", "API key")
+        print(
+            f"🔑 GEMINI (API-only cooldown{cooldown_msg}): {model} | agent={agent_type}{task_info}{desc_info}",
+            file=sys.stderr,
+        )
+        logger.info(f"[{agent_type}] Using API key (5-min cooldown after OAuth 429)")
+        semaphore = _get_gemini_semaphore(model)
+        async with semaphore:
+            result = await _invoke_gemini_with_api_key(
+                api_key=api_key,
+                prompt=prompt,
+                model=model,
+                temperature=temperature,
+                max_tokens=max_tokens,
+                thinking_budget=thinking_budget,
+                image_path=image_path,
+                agent_context=agent_context,
+            )
+            # Prepend auth header for visibility in logs
+            auth_header = f"[Auth: API key (5-min cooldown) | Model: {model}]\n\n"
+            return auth_header + result
-    headers = {
-        "Authorization": f"Bearer {access_token}",
-        "Content-Type": "application/json",
-        **ANTIGRAVITY_HEADERS,  # Include Antigravity headers
-    }
+    provider_tracker = get_provider_tracker()
-    # Build inner request payload
-    # Per API spec: contents must include role ("user" or "model")
-    # Build parts list - text prompt plus optional image
-    parts = [{"text": prompt}]
-    # Add image data for vision analysis (token optimization for multimodal)
-    if image_path:
-        import base64
-        from pathlib import Path
-        image_file = Path(image_path)
-        if image_file.exists():
-            # Determine MIME type
-            suffix = image_file.suffix.lower()
-            mime_types = {
-                ".png": "image/png",
-                ".jpg": "image/jpeg",
-                ".jpeg": "image/jpeg",
-                ".gif": "image/gif",
-                ".webp": "image/webp",
-                ".pdf": "application/pdf",
-            }
-            mime_type = mime_types.get(suffix, "image/png")
+    # If Gemini is in cooldown, follow tier-aware fallback chain.
+    if not provider_tracker.is_available("gemini"):
+        for candidate_provider, candidate_model, use_oauth in get_oauth_fallback_chain("gemini", model):
+            if candidate_provider == "gemini" and use_oauth:
+                continue
+            if use_oauth and not provider_tracker.is_available(candidate_provider):
+                continue
+            if candidate_provider == "gemini" and not use_oauth:
+                api_key = _get_gemini_api_key()
+                if not api_key:
+                    continue
+                _set_api_only_mode("Gemini in cooldown; using API key")
+                result = await _invoke_gemini_with_api_key(
+                    api_key=api_key,
+                    prompt=prompt,
+                    model=candidate_model,
+                    temperature=temperature,
+                    max_tokens=max_tokens,
+                    thinking_budget=thinking_budget,
+                    image_path=image_path,
+                    agent_context=agent_context,
+                )
+                auth_header = f"[Auth: API key (cooldown) | Model: {candidate_model}]\n\n"
+                return auth_header + result
+            if candidate_provider == "openai" and use_oauth:
+                return await invoke_openai(
+                    token_store=token_store,
+                    prompt=prompt,
+                    model=candidate_model,
+                    temperature=temperature,
+                    max_tokens=max_tokens,
+                    thinking_budget=0,
+                    reasoning_effort="medium",
+                )
-            # Read and base64 encode
-            image_data = base64.b64encode(image_file.read_bytes()).decode("utf-8")
+    # DEFAULT: Try OAuth first (Antigravity)
+    # Check time-window rate limit (30 req/min)
+    time_limiter = get_gemini_time_limiter()
+    wait_time = time_limiter.acquire_visible("GEMINI", "OAuth")
+    if wait_time > 0:
+        await asyncio.sleep(wait_time)
+        # Re-acquire after sleep
+        wait_time = time_limiter.acquire_visible("GEMINI", "OAuth")
+    print(
+        f"🔮 GEMINI (OAuth): {model} | agent={agent_type}{task_info}{desc_info}",
+        file=sys.stderr,
+    )
+    logger.info(f"[{agent_type}] Using OAuth authentication (Antigravity)")
+    # Rate limit concurrent Gemini requests (configurable via ~/.stravinsky/config.json)
+    semaphore = _get_gemini_semaphore(model)
+    async with semaphore:
+        access_token = await _ensure_valid_token(token_store, "gemini")
+        # Resolve user-friendly model name to actual API model ID
+        api_model = resolve_gemini_model(model)
+        # Use persistent session ID for thinking signature caching
+        session_id = _get_session_id()
+        project_id = os.getenv("STRAVINSKY_ANTIGRAVITY_PROJECT_ID", ANTIGRAVITY_DEFAULT_PROJECT_ID)
+        headers = {
+            "Authorization": f"Bearer {access_token}",
+            "Content-Type": "application/json",
+            **ANTIGRAVITY_HEADERS,  # Include Antigravity headers
+        }
-            # Add inline image data for Gemini Vision API
-            parts.append({
-                "inlineData": {
-                    "mimeType": mime_type,
-                    "data": image_data,
+        # Build inner request payload
+        # Per API spec: contents must include role ("user" or "model")
+        # Build parts list - text prompt plus optional image
+        parts = [{"text": prompt}]
+        # Add image data for vision analysis (token optimization for multimodal)
+        if image_path:
+            import base64
+            from pathlib import Path
+            image_file = Path(image_path)
+            if image_file.exists():
+                # Determine MIME type
+                suffix = image_file.suffix.lower()
+                mime_types = {
+                    ".png": "image/png",
+                    ".jpg": "image/jpeg",
+                    ".jpeg": "image/jpeg",
+                    ".gif": "image/gif",
+                    ".webp": "image/webp",
+                    ".pdf": "application/pdf",
                 }
-            })
-            logger.info(f"[multimodal] Added vision data: {image_path} ({mime_type})")
+                mime_type = mime_types.get(suffix, "image/png")
-    inner_payload = {
-        "contents": [{"role": "user", "parts": parts}],
-        "generationConfig": {
-            "temperature": temperature,
-            "maxOutputTokens": max_tokens,
-        },
-        "sessionId": session_id,
-    }
+                # Read and base64 encode
+                image_data = base64.b64encode(image_file.read_bytes()).decode("utf-8")
-    # Add thinking budget if supported by model/API
-    if thinking_budget > 0:
-        # For Gemini 2.0+ Thinking models
-        # Per Antigravity API: use "thinkingBudget", NOT "tokenLimit"
-        inner_payload["generationConfig"]["thinkingConfig"] = {
-            "includeThoughts": True,
-            "thinkingBudget": thinking_budget,
-        }
+                # Add inline image data for Gemini Vision API
+                parts.append(
+                    {
+                        "inlineData": {
+                            "mimeType": mime_type,
+                            "data": image_data,
+                        }
+                    }
+                )
+                logger.info(f"[multimodal] Added vision data: {image_path} ({mime_type})")
-    # Wrap request body per reference implementation
-    try:
-        import uuid as uuid_module  # Local import workaround for MCP context issue
+        inner_payload = {
+            "contents": [{"role": "user", "parts": parts}],
+            "generationConfig": {
+                "temperature": temperature,
+                "maxOutputTokens": max_tokens,
+            },
+            "sessionId": session_id,
+        }
-        request_id = f"invoke-{uuid_module.uuid4()}"
-    except Exception as e:
-        logger.error(f"UUID IMPORT FAILED: {e}")
-        raise RuntimeError(f"CUSTOM ERROR: UUID import failed: {e}")
-    wrapped_payload = {
-        "project": project_id,
-        "model": api_model,
-        "userAgent": "antigravity",
-        "requestId": request_id,
-        "request": inner_payload,
-    }
+        # Add thinking budget if supported by model/API
+        if thinking_budget > 0:
+            # For Gemini 2.0+ Thinking models
+            # Per Antigravity API: use "thinkingBudget", NOT "tokenLimit"
+            inner_payload["generationConfig"]["thinkingConfig"] = {
+                "includeThoughts": True,
+                "thinkingBudget": thinking_budget,
+            }
-    # Get pooled HTTP client for connection reuse
-    client = await _get_http_client()
+        # Wrap request body per reference implementation
+        try:
+            import uuid as uuid_module  # Local import workaround for MCP context issue
-    # Try endpoints in fallback order with thinking recovery
-    response = None
-    last_error = None
-    max_retries = 2  # For thinking recovery
+            request_id = f"invoke-{uuid_module.uuid4()}"
+        except Exception as e:
+            logger.error(f"UUID IMPORT FAILED: {e}")
+            raise RuntimeError(f"CUSTOM ERROR: UUID import failed: {e}")
-    for retry_attempt in range(max_retries):
-        for endpoint in ANTIGRAVITY_ENDPOINTS:
-            # Reference uses: {endpoint}/v1internal:generateContent (NOT /models/{model})
-            api_url = f"{endpoint}/v1internal:generateContent"
+        wrapped_payload = {
+            "project": project_id,
+            "model": api_model,
+            "userAgent": "antigravity",
+            "requestId": request_id,
+            "request": inner_payload,
+        }
-            try:
-                response = await client.post(
-                    api_url,
-                    headers=headers,
-                    json=wrapped_payload,
-                    timeout=120.0,
-                )
+        # Get pooled HTTP client for connection reuse
+        client = await _get_http_client()
-                # 401/403 might be endpoint-specific, try next endpoint
-                if response.status_code in (401, 403):
-                    logger.warning(
-                        f"[Gemini] Endpoint {endpoint} returned {response.status_code}, trying next"
+        # Try endpoints in fallback order with thinking recovery
+        response = None
+        last_error = None
+        max_retries = 2  # For thinking recovery
+        for retry_attempt in range(max_retries):
+            for endpoint in ANTIGRAVITY_ENDPOINTS:
+                # Reference uses: {endpoint}/v1internal:generateContent (NOT /models/{model})
+                api_url = f"{endpoint}/v1internal:generateContent"
+                try:
+                    response = await client.post(
+                        api_url,
+                        headers=headers,
+                        json=wrapped_payload,
+                        timeout=120.0,
                     )
-                    last_error = Exception(f"{response.status_code} from {endpoint}")
-                    continue
-                # Check for thinking-related errors that need recovery
-                if response.status_code in (400, 500):
-                    error_text = response.text.lower()
-                    if "thinking" in error_text or "signature" in error_text:
+                    # 401/403 might be endpoint-specific, try next endpoint
+                    if response.status_code in (401, 403):
                         logger.warning(
-                            f"[Gemini] Thinking error detected, clearing session cache and retrying"
+                            f"[Gemini] Endpoint {endpoint} returned {response.status_code}, trying next"
                         )
-                        clear_session_cache()
-                        # Update session ID for retry
-                        wrapped_payload["request"]["sessionId"] = _get_session_id()
-                        last_error = Exception(f"Thinking error: {response.text[:200]}")
-                        break  # Break inner loop to retry with new session
-                # If we got a non-retryable response (success or 4xx client error), use it
-                if response.status_code < 500 and response.status_code != 429:
-                    break
+                        last_error = Exception(f"{response.status_code} from {endpoint}")
+                        continue
+                    # Check for thinking-related errors that need recovery
+                    if response.status_code in (400, 500):
+                        error_text = response.text.lower()
+                        if "thinking" in error_text or "signature" in error_text:
+                            logger.warning(
+                                "[Gemini] Thinking error detected, clearing session cache and retrying"
+                            )
+                            clear_session_cache()
+                            # Update session ID for retry
+                            wrapped_payload["request"]["sessionId"] = _get_session_id()
+                            last_error = Exception(f"Thinking error: {response.text[:200]}")
+                            break  # Break inner loop to retry with new session
+                    # If we got a non-retryable response (success or 4xx client error), use it
+                    if response.status_code < 500 and response.status_code != 429:
+                        break
+                except httpx.TimeoutException as e:
+                    last_error = e
+                    continue
+                except Exception as e:
+                    last_error = e
+                    continue
+            else:
+                # Inner loop completed without break - no thinking recovery needed
+                break
-            except httpx.TimeoutException as e:
-                last_error = e
+            # If we broke out of inner loop for thinking recovery, continue outer retry loop
+            if response and response.status_code in (400, 500):
                 continue
-            except Exception as e:
-                last_error = e
-                continue
-        else:
-            # Inner loop completed without break - no thinking recovery needed
             break
-        # If we broke out of inner loop for thinking recovery, continue outer retry loop
-        if response and response.status_code in (400, 500):
-            continue
-        break
+        # ==============================================
+        # 429 RATE LIMIT DETECTION: Tier-aware fallback chain
+        # ==============================================
+        if response is not None and response.status_code == 429:
+            provider_tracker = get_provider_tracker()
+            provider_tracker.mark_rate_limited(
+                "gemini",
+                duration=_OAUTH_COOLDOWN_SECONDS,
+                reason="Gemini OAuth rate-limited (429)",
+            )
-    if response is None:
-        # FALLBACK: Try Claude sonnet-4.5 for agents that support it
-        agent_context = params.get("agent_context", {})
-        agent_type = agent_context.get("agent_type", "unknown")
+            for candidate_provider, candidate_model, use_oauth in get_oauth_fallback_chain(
+                "gemini", model
+            ):
+                if candidate_provider == "gemini" and use_oauth:
+                    continue
+                if use_oauth and not provider_tracker.is_available(candidate_provider):
+                    continue
-        if agent_type in ("dewey", "explore", "document_writer", "multimodal"):
-            logger.warning(f"[{agent_type}] Gemini failed, falling back to Claude sonnet-4.5")
-            try:
-                import subprocess
-                fallback_result = subprocess.run(
-                    ["claude", "-p", prompt, "--model", "sonnet", "--output-format", "text"],
-                    capture_output=True,
-                    text=True,
-                    timeout=120,
-                    cwd=os.getcwd(),
-                )
-                if fallback_result.returncode == 0 and fallback_result.stdout.strip():
-                    return fallback_result.stdout.strip()
-            except Exception as fallback_error:
-                logger.error(f"Fallback to Claude also failed: {fallback_error}")
+                if candidate_provider == "gemini" and not use_oauth:
+                    api_key = _get_gemini_api_key()
+                    if not api_key:
+                        continue
+                    _set_api_only_mode("OAuth rate-limited (429)")
+                    logger.info("[Gemini] Retrying with API key after OAuth 429")
+                    result = await _invoke_gemini_with_api_key(
+                        api_key=api_key,
+                        prompt=prompt,
+                        model=candidate_model,
+                        temperature=temperature,
+                        max_tokens=max_tokens,
+                        thinking_budget=thinking_budget,
+                        image_path=image_path,
+                        agent_context=agent_context,
+                    )
+                    auth_header = (
+                        f"[Auth: API key (OAuth 429 fallback) | Model: {candidate_model}]\n\n"
+                    )
+                    return auth_header + result
+                if candidate_provider == "openai" and use_oauth:
+                    return await invoke_openai(
+                        token_store=token_store,
+                        prompt=prompt,
+                        model=candidate_model,
+                        temperature=temperature,
+                        max_tokens=max_tokens,
+                        thinking_budget=0,
+                        reasoning_effort="medium",
+                    )
+            raise ValueError(
+                "OAuth rate-limited (429) and no fallback succeeded. "
+                "Add GEMINI_API_KEY to ~/.stravinsky/.env"
+            )
+        if response is None:
+            # FALLBACK: Try Claude sonnet-4.5 for agents that support it
+            agent_context = params.get("agent_context", {})
+            agent_type = agent_context.get("agent_type", "unknown")
+            if agent_type in ("dewey", "explore", "document_writer", "multimodal"):
+                logger.warning(f"[{agent_type}] Gemini failed, falling back to Claude sonnet-4.5")
+                try:
+                    from mcp_bridge.utils.process import async_execute
+                    result_obj = await async_execute(
+                        ["claude", "-p", prompt, "--model", "sonnet", "--output-format", "text"],
+                        timeout=120,
+                    )
+                    if result_obj.returncode == 0 and result_obj.stdout.strip():
+                        result = result_obj.stdout.strip()
+                        # Prepend auth header for visibility
+                        auth_header = f"[Auth: Claude fallback | Model: sonnet-4.5]\n\n"
+                        return auth_header + result
+                except Exception as fallback_error:
+                    logger.error(f"Fallback to Claude also failed: {fallback_error}")
+            raise ValueError(f"All Antigravity endpoints failed: {last_error}")
-        raise ValueError(f"All Antigravity endpoints failed: {last_error}")
+        response.raise_for_status()
+        data = response.json()
+        # Track usage
+        try:
+            from mcp_bridge.metrics.cost_tracker import get_cost_tracker
+            tracker = get_cost_tracker()
+            usage = data.get("usageMetadata", {})
+            input_tokens = usage.get("promptTokenCount", 0)
+            output_tokens = usage.get("candidatesTokenCount", 0)
+            tracker.track_usage(
+                model=model,
+                input_tokens=input_tokens,
+                output_tokens=output_tokens,
+                agent_type=agent_type,
+                task_id=task_id,
+            )
+        except Exception as e:
+            logger.warning(f"Failed to track cost: {e}")
-    response.raise_for_status()
-    data = response.json()
+        # Extract text from response using thinking-aware parser
+        result = _extract_gemini_response(data)
-    # Extract text from response using thinking-aware parser
-    return _extract_gemini_response(data)
+        # Prepend auth header for visibility in logs
+        auth_header = f"[Auth: OAuth | Model: {model}]\n\n"
+        return auth_header + result
 # ========================
@@ -552,9 +1050,57 @@ async def invoke_gemini(
 AGENT_TOOLS = [
     {
         "functionDeclarations": [
+            {
+                "name": "semantic_search",
+                "description": "Search codebase with natural language query using semantic embeddings. ALWAYS use this FIRST before grep_search or read_file to find relevant files efficiently. Returns code snippets with file paths and relevance scores.",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "query": {
+                            "type": "string",
+                            "description": "Natural language search query (e.g., 'find authentication logic', 'PDF rendering code')",
+                        },
+                        "project_path": {
+                            "type": "string",
+                            "description": "Path to the project root (default: '.')",
+                        },
+                        "n_results": {
+                            "type": "integer",
+                            "description": "Maximum number of results to return (default: 10)",
+                        },
+                    },
+                    "required": ["query"],
+                },
+            },
+            {
+                "name": "hybrid_search",
+                "description": "Hybrid search combining semantic similarity with structural AST pattern matching. Use when you need precise structural patterns (e.g., specific function signatures) combined with semantic relevance.",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "query": {
+                            "type": "string",
+                            "description": "Natural language search query (e.g., 'find authentication logic')",
+                        },
+                        "pattern": {
+                            "type": "string",
+                            "description": "Optional ast-grep pattern for structural matching (e.g., 'def $FUNC($$$):', 'async function $NAME($$$)')",
+                        },
+                        "project_path": {
+                            "type": "string",
+                            "description": "Path to the project root (default: '.')",
+                        },
+                        "n_results": {
+                            "type": "integer",
+                            "description": "Maximum number of results to return (default: 10)",
+                        },
+                    },
+                    "required": ["query"],
+                },
+            },
             {
                 "name": "read_file",
-                "description": "Read the contents of a file. Returns the file contents as text.",
+                "description": "Read the contents of a file. Returns the file contents as text. USE ONLY AFTER semantic_search identifies the target file.",
                 "parameters": {
                     "type": "object",
                     "properties": {
@@ -579,7 +1125,7 @@ AGENT_TOOLS = [
             },
             {
                 "name": "grep_search",
-                "description": "Search for a pattern in files using ripgrep. Returns matching lines with file paths and line numbers.",
+                "description": "Search for a pattern in files using ripgrep. Returns matching lines with file paths and line numbers. USE ONLY for precise pattern matching AFTER semantic_search narrows down the search scope.",
                 "parameters": {
                     "type": "object",
                     "properties": {
@@ -609,50 +1155,85 @@ AGENT_TOOLS = [
 ]
-def _execute_tool(name: str, args: dict) -> str:
+async def _execute_tool(name: str, args: dict) -> str:
     """Execute a tool and return the result."""
-    import os
-    import subprocess
     from pathlib import Path
+    from mcp_bridge.utils.process import async_execute
     try:
-        if name == "read_file":
-            path = Path(args["path"])
-            if not path.exists():
-                return f"Error: File not found: {path}"
-            return path.read_text()
+        if name == "semantic_search":
+            # Import semantic_search function from tools
+            from .semantic_search import semantic_search
+            # Extract args with defaults
+            query = args.get("query")
+            if not query:
+                return "Error: 'query' parameter is required for semantic_search"
+            project_path = args.get("project_path", ".")
+            n_results = args.get("n_results", 10)
+            result = await semantic_search(
+                query=query,
+                project_path=project_path,
+                n_results=n_results,
+            )
+            return result
+        elif name == "hybrid_search":
+            # Import hybrid_search function from tools
+            from .semantic_search import hybrid_search
+            # Extract args with defaults
+            query = args.get("query")
+            if not query:
+                return "Error: 'query' parameter is required for hybrid_search"
+            pattern = args.get("pattern")
+            project_path = args.get("project_path", ".")
+            n_results = args.get("n_results", 10)
+            result = await hybrid_search(
+                query=query,
+                pattern=pattern,
+                project_path=project_path,
+                n_results=n_results,
+            )
+            return result
+        elif name == "read_file":
+            from .read_file import read_file
+            path = args["path"]
+            return await read_file(path)
         elif name == "list_directory":
-            path = Path(args["path"])
-            if not path.exists():
-                return f"Error: Directory not found: {path}"
-            entries = []
-            for entry in path.iterdir():
-                entry_type = "DIR" if entry.is_dir() else "FILE"
-                entries.append(f"[{entry_type}] {entry.name}")
-            return "\n".join(entries) if entries else "(empty directory)"
+            from .list_directory import list_directory
+            path = args["path"]
+            return await list_directory(path)
         elif name == "grep_search":
             pattern = args["pattern"]
             search_path = args["path"]
-            result = subprocess.run(
-                ["rg", "--json", "-m", "50", pattern, search_path],
-                capture_output=True,
-                text=True,
-                timeout=30,
+            result_obj = await async_execute(
+                ["rg", "--json", "-m", "50", pattern, search_path], timeout=30
             )
-            if result.returncode == 0:
-                return result.stdout[:10000]  # Limit output size
-            elif result.returncode == 1:
+            if result_obj.returncode == 0:
+                return result_obj.stdout[:10000]  # Limit output size
+            elif result_obj.returncode == 1:
                 return "No matches found"
             else:
-                return f"Search error: {result.stderr}"
+                return f"Search error: {result_obj.stderr}"
         elif name == "write_file":
-            path = Path(args["path"])
-            path.parent.mkdir(parents=True, exist_ok=True)
-            path.write_text(args["content"])
-            return f"Successfully wrote {len(args['content'])} bytes to {path}"
+            from .write_file import write_file
+            path = args["path"]
+            content = args["content"]
+            return await write_file(path, content)
         else:
             return f"Unknown tool: {name}"
@@ -661,32 +1242,244 @@ def _execute_tool(name: str, args: dict) -> str:
         return f"Tool error: {str(e)}"
-async def invoke_gemini_agentic(
-    token_store: TokenStore,
+async def _invoke_gemini_agentic_with_api_key(
+    api_key: str,
     prompt: str,
     model: str = "gemini-3-flash",
     max_turns: int = 10,
     timeout: int = 120,
 ) -> str:
     """
-    Invoke Gemini with function calling for agentic tasks.
+    Invoke Gemini with function calling using API key authentication (google-genai library).
-    This function implements a multi-turn agentic loop:
+    This implements a multi-turn agentic loop:
     1. Send prompt with tool definitions
     2. If model returns FunctionCall, execute the tool
     3. Send FunctionResponse back to model
     4. Repeat until model returns text or max_turns reached
     Args:
-        token_store: Token store for OAuth credentials
+        api_key: Gemini API key (from GEMINI_API_KEY or GOOGLE_API_KEY env var)
         prompt: The task prompt
         model: Gemini model to use
         max_turns: Maximum number of tool-use turns
-        timeout: Request timeout in seconds
+        timeout: Request timeout in seconds (currently unused by google-genai)
     Returns:
         Final text response from the model
+    Raises:
+        ImportError: If google-genai library is not installed
+        ValueError: If API request fails
     """
+    # USER-VISIBLE NOTIFICATION (stderr) - Shows agentic mode with API key
+    import sys
+    print(f"🔮 GEMINI (API/Agentic): {model} | max_turns={max_turns}", file=sys.stderr)
+    try:
+        from google import genai
+        from google.genai import types
+    except ImportError:
+        raise ImportError(
+            "google-genai library not installed. Install with: pip install google-genai"
+        )
+    # Map stravinsky model names to google-genai model names
+    # Pass through gemini-3-* models directly (Tier 3 benefits)
+    model_map = {
+        "gemini-3-flash": "gemini-3-flash-preview",  # Tier 3 model (not -exp)
+        "gemini-3-flash-preview": "gemini-3-flash-preview",  # Pass through
+        "gemini-3-pro-low": "gemini-3-flash-preview",
+        "gemini-3-pro-high": "gemini-3-pro-preview",  # Tier 3 pro model
+        "gemini-3-pro-preview": "gemini-3-pro-preview",  # Pass through
+        "gemini-flash": "gemini-3-flash-preview",
+        "gemini-pro": "gemini-3-pro-preview",
+        "gemini-3-pro": "gemini-3-pro-preview",
+        "gemini": "gemini-3-flash-preview",
+    }
+    genai_model = model_map.get(model, "gemini-3-flash-preview")  # Default to tier 3 flash
+    # Initialize client with API key
+    client = genai.Client(api_key=api_key)
+    # Convert AGENT_TOOLS to google-genai format
+    # google-genai expects tools as a list of Tool objects containing function_declarations
+    function_declarations = []
+    for tool_group in AGENT_TOOLS:
+        for func_decl in tool_group.get("functionDeclarations", []):
+            function_declarations.append(
+                types.FunctionDeclaration(
+                    name=func_decl["name"],
+                    description=func_decl["description"],
+                    parameters=func_decl["parameters"],
+                )
+            )
+    # Wrap function declarations in a Tool object
+    tools = [types.Tool(function_declarations=function_declarations)]
+    # Initialize conversation with user message
+    contents = [types.Content(role="user", parts=[types.Part(text=prompt)])]
+    for turn in range(max_turns):
+        try:
+            # Generate content with tools
+            response = client.models.generate_content(
+                model=genai_model,
+                contents=contents,
+                config=types.GenerateContentConfig(
+                    tools=tools,
+                    temperature=0.7,
+                    max_output_tokens=8192,
+                ),
+            )
+            # Check if response has function calls
+            if not response.candidates or not response.candidates[0].content.parts:
+                return "No response generated"
+            parts = response.candidates[0].content.parts
+            function_calls = []
+            text_parts = []
+            for part in parts:
+                if part.function_call:
+                    function_calls.append(part.function_call)
+                elif part.text:
+                    text_parts.append(part.text)
+            # If no function calls, return text response
+            if not function_calls:
+                result = "".join(text_parts)
+                return result if result.strip() else "Task completed"
+            # Execute function calls and prepare responses
+            function_responses = []
+            for func_call in function_calls:
+                func_name = func_call.name
+                func_args = dict(func_call.args) if func_call.args else {}
+                logger.info(f"[AgenticGemini] Turn {turn + 1}: Executing {func_name}")
+                result = await _execute_tool(func_name, func_args)
+                function_responses.append(
+                    types.Part(
+                        function_response=types.FunctionResponse(
+                            name=func_name,
+                            response={"result": result},
+                        )
+                    )
+                )
+            # Add model's response to conversation
+            contents.append(response.candidates[0].content)
+            # Add function responses to conversation
+            contents.append(
+                types.Content(
+                    role="user",
+                    parts=function_responses,
+                )
+            )
+        except Exception as e:
+            logger.error(f"[AgenticGemini] Error in turn {turn + 1}: {e}")
+            raise ValueError(f"Gemini API key request failed: {e}")
+    return "Max turns reached without final response"
+async def invoke_gemini_agentic(
+    token_store: TokenStore,
+    prompt: str,
+    model: str = "gemini-3-flash",
+    max_turns: int = 10,
+    timeout: int = 120,
+) -> str:
+    """
+    Invoke Gemini with function calling for agentic tasks.
+    """
+    from mcp_bridge.proxy.client import is_proxy_enabled, PROXY_URL
+    if is_proxy_enabled():
+        import httpx
+        async with httpx.AsyncClient(timeout=float(timeout) + 10) as client:
+            payload = {"prompt": prompt, "model": model, "max_turns": max_turns, "timeout": timeout}
+            response = await client.post(f"{PROXY_URL}/v1/gemini/agentic", json=payload)
+            response.raise_for_status()
+            return response.json()["response"]
+    import sys
+    # Get API key from environment (loaded from ~/.stravinsky/.env)
+    api_key = _get_gemini_api_key()
+    # ==============================================
+    # AUTH PRIORITY: OAuth first, API fallback on 429
+    # ==============================================
+    # 1. If API-only mode (after 429), use API key directly
+    # 2. Otherwise, try OAuth first
+    # 3. On 429 from OAuth, switch to API-only mode and retry
+    # If we're in API-only mode (after a 429), use API key directly
+    if _is_api_only_mode():
+        if not api_key:
+            raise ValueError(
+                "OAuth rate-limited (429) and no API key available. "
+                "Add GEMINI_API_KEY to ~/.stravinsky/.env"
+            )
+        # Calculate remaining cooldown time
+        if _GEMINI_OAUTH_429_TIMESTAMP is not None:
+            remaining = _OAUTH_COOLDOWN_SECONDS - (time.time() - _GEMINI_OAUTH_429_TIMESTAMP)
+            remaining_mins = int(remaining // 60)
+            remaining_secs = int(remaining % 60)
+            cooldown_msg = f" (OAuth retry in {remaining_mins}m {remaining_secs}s)"
+        else:
+            cooldown_msg = ""
+        # Check time-window rate limit (30 req/min)
+        time_limiter = get_gemini_time_limiter()
+        wait_time = time_limiter.acquire_visible("GEMINI", "API key")
+        if wait_time > 0:
+            await asyncio.sleep(wait_time)
+            # Re-acquire after sleep
+            wait_time = time_limiter.acquire_visible("GEMINI", "API key")
+        print(
+            f"🔑 GEMINI (API-only cooldown{cooldown_msg}/Agentic): {model} | max_turns={max_turns}",
+            file=sys.stderr,
+        )
+        logger.info("[AgenticGemini] Using API key (5-min cooldown after OAuth 429)")
+        result = await _invoke_gemini_agentic_with_api_key(
+            api_key=api_key,
+            prompt=prompt,
+            model=model,
+            max_turns=max_turns,
+            timeout=timeout,
+        )
+        # Prepend auth header for visibility in logs
+        auth_header = f"[Auth: API key (5-min cooldown, Agentic) | Model: {model}]\n\n"
+        return auth_header + result
+    # DEFAULT: Try OAuth first (Antigravity)
+    logger.info("[AgenticGemini] Using OAuth authentication (Antigravity)")
+    # Check time-window rate limit (30 req/min)
+    time_limiter = get_gemini_time_limiter()
+    wait_time = time_limiter.acquire_visible("GEMINI", "OAuth")
+    if wait_time > 0:
+        await asyncio.sleep(wait_time)
+        # Re-acquire after sleep
+        wait_time = time_limiter.acquire_visible("GEMINI", "OAuth")
+    # USER-VISIBLE NOTIFICATION (stderr) - Shows agentic mode with OAuth
+    import sys
+    print(f"🔮 GEMINI (OAuth/Agentic): {model} | max_turns={max_turns}", file=sys.stderr)
     access_token = await _ensure_valid_token(token_store, "gemini")
     api_model = resolve_gemini_model(model)
@@ -773,6 +1566,33 @@ async def invoke_gemini_agentic(
                 logger.warning(f"[AgenticGemini] Endpoint {endpoint} failed: {e}, trying next")
                 continue
+        # ==============================================
+        # 429 RATE LIMIT DETECTION: Fallback to API key
+        # ==============================================
+        # If OAuth got rate-limited (429), switch to API-only mode and retry
+        if response is not None and response.status_code == 429:
+            api_key = _get_gemini_api_key()
+            if api_key:
+                _set_api_only_mode("OAuth rate-limited (429) in agentic mode")
+                logger.info("[AgenticGemini] Retrying with API key after OAuth 429")
+                # Retry entire agentic call with API key
+                result = await _invoke_gemini_agentic_with_api_key(
+                    api_key=api_key,
+                    prompt=prompt,
+                    model=model,
+                    max_turns=max_turns,
+                    timeout=timeout,
+                )
+                # Prepend auth header for visibility
+                auth_header = f"[Auth: API key (OAuth 429 fallback, Agentic) | Model: {model}]\n\n"
+                return auth_header + result
+            else:
+                # No API key available - raise clear error
+                raise ValueError(
+                    "OAuth rate-limited (429) and no API key available. "
+                    "Add GEMINI_API_KEY to ~/.stravinsky/.env"
+                )
         if response is None:
             raise ValueError(f"All Antigravity endpoints failed: {last_error}")
@@ -783,13 +1603,15 @@ async def invoke_gemini_agentic(
         inner_response = data.get("response", data)
         candidates = inner_response.get("candidates", [])
         if not candidates:
-            return "No response generated"
+            auth_header = f"[Auth: OAuth (Agentic) | Model: {model}]\n\n"
+            return auth_header + "No response generated"
         content = candidates[0].get("content", {})
         parts = content.get("parts", [])
         if not parts:
-            return "No response parts"
+            auth_header = f"[Auth: OAuth (Agentic) | Model: {model}]\n\n"
+            return auth_header + "No response parts"
         # Check for function call
         function_call = None
@@ -808,7 +1630,7 @@ async def invoke_gemini_agentic(
             func_args = function_call.get("args", {})
             logger.info(f"[AgenticGemini] Turn {turn + 1}: Executing {func_name}")
-            result = _execute_tool(func_name, func_args)
+            result = await _execute_tool(func_name, func_args)
             # Add model's response and function result to conversation
             contents.append({"role": "model", "parts": [{"functionCall": function_call}]})
@@ -822,17 +1644,20 @@ async def invoke_gemini_agentic(
             )
         else:
             # No function call, return text response
-            return text_response or "Task completed"
+            result = text_response or "Task completed"
+            auth_header = f"[Auth: OAuth (Agentic) | Model: {model}]\n\n"
+            return auth_header + result
-    return "Max turns reached without final response"
+    auth_header = f"[Auth: OAuth (Agentic) | Model: {model}]\n\n"
+    return auth_header + "Max turns reached without final response"
 @retry(
-    stop=stop_after_attempt(5),
-    wait=wait_exponential(multiplier=1, min=4, max=60),
+    stop=stop_after_attempt(2),  # Reduced from 5 to 2 attempts
+    wait=wait_exponential(multiplier=2, min=10, max=120),  # Longer waits: 10s → 20s → 40s
     retry=retry_if_exception(is_retryable_exception),
     before_sleep=lambda retry_state: logger.info(
-        f"Rate limited or server error, retrying in {retry_state.next_action.sleep} seconds..."
+        f"Server error, retrying in {retry_state.next_action.sleep} seconds..."
     ),
 )
 async def invoke_openai(
@@ -842,24 +1667,23 @@ async def invoke_openai(
     temperature: float = 0.7,
     max_tokens: int = 4096,
     thinking_budget: int = 0,
+    reasoning_effort: str = "medium",
 ) -> str:
     """
     Invoke an OpenAI model with the given prompt.
-    Args:
-        token_store: Token store for API key
-        prompt: The prompt to send to OpenAI
-        model: OpenAI model to use
-        temperature: Sampling temperature (0.0-2.0)
-        max_tokens: Maximum tokens in response
-    Returns:
-        The model's response text.
-    Raises:
-        ValueError: If not authenticated with OpenAI
-        httpx.HTTPStatusError: If API request fails
     """
+    from mcp_bridge.proxy.client import is_proxy_enabled, proxy_invoke_openai
+    if is_proxy_enabled():
+        return await proxy_invoke_openai(
+            prompt=prompt,
+            model=model,
+            temperature=temperature,
+            max_tokens=max_tokens,
+            thinking_budget=thinking_budget,
+            reasoning_effort=reasoning_effort,
+        )
     # Execute pre-model invoke hooks
     params = {
         "prompt": prompt,
@@ -867,6 +1691,7 @@ async def invoke_openai(
         "temperature": temperature,
         "max_tokens": max_tokens,
         "thinking_budget": thinking_budget,
+        "reasoning_effort": reasoning_effort,
         "token_store": token_store,  # Pass for hooks that need model access
         "provider": "openai",  # Identify which provider is being called
     }
@@ -879,6 +1704,7 @@ async def invoke_openai(
     temperature = params["temperature"]
     max_tokens = params["max_tokens"]
     thinking_budget = params["thinking_budget"]
+    reasoning_effort = params.get("reasoning_effort", "medium")
     # Extract agent context for logging (may be passed via params or original call)
     agent_context = params.get("agent_context", {})
@@ -892,19 +1718,55 @@ async def invoke_openai(
     # USER-VISIBLE NOTIFICATION (stderr) - Shows when OpenAI is invoked
     import sys
     task_info = f" task={task_id}" if task_id else ""
     desc_info = f" | {description}" if description else ""
+    # ==============================================
+    # AUTH PRIORITY: OAuth first, Gemini fallback on 429
+    # ==============================================
+    # 1. If fallback mode (after 429), use Gemini directly
+    # 2. Otherwise, try OpenAI OAuth first
+    # 3. On 429 from OAuth, switch to fallback mode and retry with Gemini
+    provider_tracker = get_provider_tracker()
+    # If OpenAI is in cooldown, follow tier-aware fallback chain.
+    if not provider_tracker.is_available("openai"):
+        for candidate_provider, candidate_model, use_oauth in get_oauth_fallback_chain("openai", model):
+            if candidate_provider == "openai":
+                continue
+            if use_oauth and not provider_tracker.is_available(candidate_provider):
+                continue
+            if candidate_provider == "gemini":
+                if not use_oauth:
+                    # Force Gemini API-key mode for the cooldown window.
+                    if _get_gemini_api_key() is None:
+                        continue
+                    _set_api_only_mode("OpenAI in cooldown; using Gemini API key")
+                return await invoke_gemini(
+                    token_store=token_store,
+                    prompt=prompt,
+                    model=candidate_model,
+                    temperature=temperature,
+                    max_tokens=max_tokens,
+                    thinking_budget=0,
+                    image_path=None,
+                )
+    # DEFAULT: Try OpenAI OAuth first
     print(f"🧠 OPENAI: {model} | agent={agent_type}{task_info}{desc_info}", file=sys.stderr)
     access_token = await _ensure_valid_token(token_store, "openai")
-    logger.info(f"[invoke_openai] Got access token")
+    logger.info("[invoke_openai] Got access token")
     # ChatGPT Backend API - Uses Codex Responses endpoint
     # Replicates opencode-openai-codex-auth plugin behavior
     api_url = "https://chatgpt.com/backend-api/codex/responses"
     # Extract account ID from JWT token
-    logger.info(f"[invoke_openai] Extracting account ID from JWT")
+    logger.info("[invoke_openai] Extracting account ID from JWT")
     try:
         parts = access_token.split(".")
         payload_b64 = parts[1]
@@ -932,6 +1794,10 @@ async def invoke_openai(
     if account_id:
         headers["x-openai-account-id"] = account_id
+    # Determine final effort
+    # Legacy: thinking_budget > 0 implies high effort
+    effort = "high" if thinking_budget > 0 else reasoning_effort
     # Request body matching opencode transformation
     payload = {
         "model": model,
@@ -939,7 +1805,7 @@ async def invoke_openai(
         "stream": True,  # Always stream (handler converts to non-stream if needed)
         "instructions": instructions,
         "input": [{"role": "user", "content": prompt}],
-        "reasoning": {"effort": "high" if thinking_budget > 0 else "medium", "summary": "auto"},
+        "reasoning": {"effort": effort, "summary": "auto"},
         "text": {"verbosity": "medium"},
         "include": ["reasoning.encrypted_content"],
     }
@@ -952,44 +1818,100 @@ async def invoke_openai(
     logger.info(f"[invoke_openai] Instructions length: {len(instructions)}")
     try:
-        async with httpx.AsyncClient() as client:
-            async with client.stream(
+        async with (
+            httpx.AsyncClient() as client,
+            client.stream(
                 "POST", api_url, headers=headers, json=payload, timeout=120.0
-            ) as response:
-                logger.info(f"[invoke_openai] Response status: {response.status_code}")
-                if response.status_code == 401:
-                    raise ValueError(
-                        "OpenAI authentication failed. Run: stravinsky-auth login openai"
-                    )
+            ) as response,
+        ):
+            logger.info(f"[invoke_openai] Response status: {response.status_code}")
+            if response.status_code == 401:
+                raise ValueError("OpenAI authentication failed. Run: stravinsky-auth login openai")
+            # ==============================================
+            # 429 RATE LIMIT DETECTION: Tier-aware fallback chain
+            # ==============================================
+            if response.status_code == 429:
+                provider_tracker = get_provider_tracker()
+                provider_tracker.mark_rate_limited(
+                    "openai",
+                    duration=_OAUTH_COOLDOWN_SECONDS,
+                    reason="OpenAI OAuth rate-limited (429)",
+                )
+                for candidate_provider, candidate_model, use_oauth in get_oauth_fallback_chain(
+                    "openai", model
+                ):
+                    if candidate_provider == "openai":
+                        continue
+                    if use_oauth and not provider_tracker.is_available(candidate_provider):
+                        continue
+                    if candidate_provider == "gemini":
+                        if not use_oauth:
+                            if _get_gemini_api_key() is None:
+                                continue
+                            _set_api_only_mode("OpenAI OAuth rate-limited (429)")
+                        return await invoke_gemini(
+                            token_store=token_store,
+                            prompt=prompt,
+                            model=candidate_model,
+                            temperature=temperature,
+                            max_tokens=max_tokens,
+                            thinking_budget=0,
+                            image_path=None,
+                        )
-                if response.status_code >= 400:
-                    error_body = await response.aread()
-                    error_text = error_body.decode("utf-8")
-                    logger.error(f"OpenAI API error {response.status_code}: {error_text}")
-                    logger.error(f"Request payload was: {payload}")
-                    logger.error(f"Request headers were: {headers}")
-                    raise ValueError(f"OpenAI API error {response.status_code}: {error_text}")
-                # Parse SSE stream for text deltas
-                async for line in response.aiter_lines():
-                    if line.startswith("data: "):
-                        data_json = line[6:]  # Remove "data: " prefix
-                        try:
-                            data = json_module.loads(data_json)
-                            event_type = data.get("type")
-                            # Extract text deltas from SSE stream
-                            if event_type == "response.output_text.delta":
-                                delta = data.get("delta", "")
-                                text_chunks.append(delta)
-                        except json_module.JSONDecodeError:
-                            pass  # Skip malformed JSON
-                        except Exception as e:
-                            logger.warning(f"Error processing SSE event: {e}")
+                raise ValueError("OpenAI OAuth rate-limited (429) and no fallback succeeded")
+            if response.status_code >= 400:
+                error_body = await response.aread()
+                error_text = error_body.decode("utf-8")
+                logger.error(f"OpenAI API error {response.status_code}: {error_text}")
+                logger.error(f"Request payload was: {payload}")
+                logger.error(f"Request headers were: {headers}")
+                raise ValueError(f"OpenAI API error {response.status_code}: {error_text}")
+            # Parse SSE stream for text deltas
+            async for line in response.aiter_lines():
+                if line.startswith("data: "):
+                    data_json = line[6:]  # Remove "data: " prefix
+                    try:
+                        data = json_module.loads(data_json)
+                        event_type = data.get("type")
+                        # Extract text deltas from SSE stream
+                        if event_type == "response.output_text.delta":
+                            delta = data.get("delta", "")
+                            text_chunks.append(delta)
+                    except json_module.JSONDecodeError:
+                        pass  # Skip malformed JSON
+                    except Exception as e:
+                        logger.warning(f"Error processing SSE event: {e}")
         # Return collected text
         result = "".join(text_chunks)
+        # Track estimated usage
+        try:
+            from mcp_bridge.metrics.cost_tracker import get_cost_tracker
+            tracker = get_cost_tracker()
+            # Estimate: 4 chars per token
+            input_tokens = len(prompt) // 4
+            output_tokens = len(result) // 4
+            tracker.track_usage(
+                model=model,
+                input_tokens=input_tokens,
+                output_tokens=output_tokens,
+                agent_type=agent_type,
+                task_id=task_id,
+            )
+        except Exception as e:
+            logger.warning(f"Failed to track cost: {e}")
         if not result:
             return "No response generated"
         return result

stravinsky 0.2.67__py3-none-any.whl → 0.4.66__py3-none-any.whl

Potentially problematic release.

stravinsky 0.2.67py3-none-any.whl → 0.4.66py3-none-any.whl