stravinsky 0.4.18__py3-none-any.whl → 0.4.66__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of stravinsky might be problematic. Click here for more details.
- mcp_bridge/__init__.py +1 -1
- mcp_bridge/auth/__init__.py +16 -6
- mcp_bridge/auth/cli.py +202 -11
- mcp_bridge/auth/oauth.py +1 -2
- mcp_bridge/auth/openai_oauth.py +4 -7
- mcp_bridge/auth/token_store.py +0 -1
- mcp_bridge/cli/__init__.py +1 -1
- mcp_bridge/cli/install_hooks.py +503 -107
- mcp_bridge/cli/session_report.py +0 -3
- mcp_bridge/config/__init__.py +2 -2
- mcp_bridge/config/hook_config.py +3 -5
- mcp_bridge/config/rate_limits.py +108 -13
- mcp_bridge/hooks/HOOKS_SETTINGS.json +17 -4
- mcp_bridge/hooks/__init__.py +14 -4
- mcp_bridge/hooks/agent_reminder.py +4 -4
- mcp_bridge/hooks/auto_slash_command.py +5 -5
- mcp_bridge/hooks/budget_optimizer.py +2 -2
- mcp_bridge/hooks/claude_limits_hook.py +114 -0
- mcp_bridge/hooks/comment_checker.py +3 -4
- mcp_bridge/hooks/compaction.py +2 -2
- mcp_bridge/hooks/context.py +2 -1
- mcp_bridge/hooks/context_monitor.py +2 -2
- mcp_bridge/hooks/delegation_policy.py +85 -0
- mcp_bridge/hooks/directory_context.py +3 -3
- mcp_bridge/hooks/edit_recovery.py +3 -2
- mcp_bridge/hooks/edit_recovery_policy.py +49 -0
- mcp_bridge/hooks/empty_message_sanitizer.py +2 -2
- mcp_bridge/hooks/events.py +160 -0
- mcp_bridge/hooks/git_noninteractive.py +4 -4
- mcp_bridge/hooks/keyword_detector.py +8 -10
- mcp_bridge/hooks/manager.py +35 -22
- mcp_bridge/hooks/notification_hook.py +13 -6
- mcp_bridge/hooks/parallel_enforcement_policy.py +67 -0
- mcp_bridge/hooks/parallel_enforcer.py +5 -5
- mcp_bridge/hooks/parallel_execution.py +22 -10
- mcp_bridge/hooks/post_tool/parallel_validation.py +103 -0
- mcp_bridge/hooks/pre_compact.py +8 -9
- mcp_bridge/hooks/pre_tool/agent_spawn_validator.py +115 -0
- mcp_bridge/hooks/preemptive_compaction.py +2 -3
- mcp_bridge/hooks/routing_notifications.py +80 -0
- mcp_bridge/hooks/rules_injector.py +11 -19
- mcp_bridge/hooks/session_idle.py +4 -4
- mcp_bridge/hooks/session_notifier.py +4 -4
- mcp_bridge/hooks/session_recovery.py +4 -5
- mcp_bridge/hooks/stravinsky_mode.py +1 -1
- mcp_bridge/hooks/subagent_stop.py +1 -3
- mcp_bridge/hooks/task_validator.py +2 -2
- mcp_bridge/hooks/tmux_manager.py +7 -8
- mcp_bridge/hooks/todo_delegation.py +4 -1
- mcp_bridge/hooks/todo_enforcer.py +180 -10
- mcp_bridge/hooks/truncation_policy.py +37 -0
- mcp_bridge/hooks/truncator.py +1 -2
- mcp_bridge/metrics/cost_tracker.py +115 -0
- mcp_bridge/native_search.py +93 -0
- mcp_bridge/native_watcher.py +118 -0
- mcp_bridge/notifications.py +3 -4
- mcp_bridge/orchestrator/enums.py +11 -0
- mcp_bridge/orchestrator/router.py +165 -0
- mcp_bridge/orchestrator/state.py +32 -0
- mcp_bridge/orchestrator/visualization.py +14 -0
- mcp_bridge/orchestrator/wisdom.py +34 -0
- mcp_bridge/prompts/__init__.py +1 -8
- mcp_bridge/prompts/dewey.py +1 -1
- mcp_bridge/prompts/planner.py +2 -4
- mcp_bridge/prompts/stravinsky.py +53 -31
- mcp_bridge/proxy/__init__.py +0 -0
- mcp_bridge/proxy/client.py +70 -0
- mcp_bridge/proxy/model_server.py +157 -0
- mcp_bridge/routing/__init__.py +43 -0
- mcp_bridge/routing/config.py +250 -0
- mcp_bridge/routing/model_tiers.py +135 -0
- mcp_bridge/routing/provider_state.py +261 -0
- mcp_bridge/routing/task_classifier.py +190 -0
- mcp_bridge/server.py +363 -34
- mcp_bridge/server_tools.py +298 -6
- mcp_bridge/tools/__init__.py +19 -8
- mcp_bridge/tools/agent_manager.py +549 -799
- mcp_bridge/tools/background_tasks.py +13 -17
- mcp_bridge/tools/code_search.py +54 -51
- mcp_bridge/tools/continuous_loop.py +0 -1
- mcp_bridge/tools/dashboard.py +19 -0
- mcp_bridge/tools/find_code.py +296 -0
- mcp_bridge/tools/init.py +1 -0
- mcp_bridge/tools/list_directory.py +42 -0
- mcp_bridge/tools/lsp/__init__.py +8 -8
- mcp_bridge/tools/lsp/manager.py +51 -28
- mcp_bridge/tools/lsp/tools.py +98 -65
- mcp_bridge/tools/model_invoke.py +1047 -152
- mcp_bridge/tools/mux_client.py +75 -0
- mcp_bridge/tools/project_context.py +1 -2
- mcp_bridge/tools/query_classifier.py +132 -49
- mcp_bridge/tools/read_file.py +84 -0
- mcp_bridge/tools/replace.py +45 -0
- mcp_bridge/tools/run_shell_command.py +38 -0
- mcp_bridge/tools/search_enhancements.py +347 -0
- mcp_bridge/tools/semantic_search.py +677 -92
- mcp_bridge/tools/session_manager.py +0 -2
- mcp_bridge/tools/skill_loader.py +0 -1
- mcp_bridge/tools/task_runner.py +5 -7
- mcp_bridge/tools/templates.py +3 -3
- mcp_bridge/tools/tool_search.py +331 -0
- mcp_bridge/tools/write_file.py +29 -0
- mcp_bridge/update_manager.py +33 -37
- mcp_bridge/update_manager_pypi.py +6 -8
- mcp_bridge/utils/cache.py +82 -0
- mcp_bridge/utils/process.py +71 -0
- mcp_bridge/utils/session_state.py +51 -0
- mcp_bridge/utils/truncation.py +76 -0
- {stravinsky-0.4.18.dist-info → stravinsky-0.4.66.dist-info}/METADATA +84 -35
- stravinsky-0.4.66.dist-info/RECORD +198 -0
- {stravinsky-0.4.18.dist-info → stravinsky-0.4.66.dist-info}/entry_points.txt +1 -0
- stravinsky_claude_assets/HOOKS_INTEGRATION.md +316 -0
- stravinsky_claude_assets/agents/HOOKS.md +437 -0
- stravinsky_claude_assets/agents/code-reviewer.md +210 -0
- stravinsky_claude_assets/agents/comment_checker.md +580 -0
- stravinsky_claude_assets/agents/debugger.md +254 -0
- stravinsky_claude_assets/agents/delphi.md +495 -0
- stravinsky_claude_assets/agents/dewey.md +248 -0
- stravinsky_claude_assets/agents/explore.md +1198 -0
- stravinsky_claude_assets/agents/frontend.md +472 -0
- stravinsky_claude_assets/agents/implementation-lead.md +164 -0
- stravinsky_claude_assets/agents/momus.md +464 -0
- stravinsky_claude_assets/agents/research-lead.md +141 -0
- stravinsky_claude_assets/agents/stravinsky.md +730 -0
- stravinsky_claude_assets/commands/delphi.md +9 -0
- stravinsky_claude_assets/commands/dewey.md +54 -0
- stravinsky_claude_assets/commands/git-master.md +112 -0
- stravinsky_claude_assets/commands/index.md +49 -0
- stravinsky_claude_assets/commands/publish.md +86 -0
- stravinsky_claude_assets/commands/review.md +73 -0
- stravinsky_claude_assets/commands/str/agent_cancel.md +70 -0
- stravinsky_claude_assets/commands/str/agent_list.md +56 -0
- stravinsky_claude_assets/commands/str/agent_output.md +92 -0
- stravinsky_claude_assets/commands/str/agent_progress.md +74 -0
- stravinsky_claude_assets/commands/str/agent_retry.md +94 -0
- stravinsky_claude_assets/commands/str/cancel.md +51 -0
- stravinsky_claude_assets/commands/str/clean.md +97 -0
- stravinsky_claude_assets/commands/str/continue.md +38 -0
- stravinsky_claude_assets/commands/str/index.md +199 -0
- stravinsky_claude_assets/commands/str/list_watchers.md +96 -0
- stravinsky_claude_assets/commands/str/search.md +205 -0
- stravinsky_claude_assets/commands/str/start_filewatch.md +136 -0
- stravinsky_claude_assets/commands/str/stats.md +71 -0
- stravinsky_claude_assets/commands/str/stop_filewatch.md +89 -0
- stravinsky_claude_assets/commands/str/unwatch.md +42 -0
- stravinsky_claude_assets/commands/str/watch.md +45 -0
- stravinsky_claude_assets/commands/strav.md +53 -0
- stravinsky_claude_assets/commands/stravinsky.md +292 -0
- stravinsky_claude_assets/commands/verify.md +60 -0
- stravinsky_claude_assets/commands/version.md +5 -0
- stravinsky_claude_assets/hooks/README.md +248 -0
- stravinsky_claude_assets/hooks/comment_checker.py +193 -0
- stravinsky_claude_assets/hooks/context.py +38 -0
- stravinsky_claude_assets/hooks/context_monitor.py +153 -0
- stravinsky_claude_assets/hooks/dependency_tracker.py +73 -0
- stravinsky_claude_assets/hooks/edit_recovery.py +46 -0
- stravinsky_claude_assets/hooks/execution_state_tracker.py +68 -0
- stravinsky_claude_assets/hooks/notification_hook.py +103 -0
- stravinsky_claude_assets/hooks/notification_hook_v2.py +96 -0
- stravinsky_claude_assets/hooks/parallel_execution.py +241 -0
- stravinsky_claude_assets/hooks/parallel_reinforcement.py +106 -0
- stravinsky_claude_assets/hooks/parallel_reinforcement_v2.py +112 -0
- stravinsky_claude_assets/hooks/pre_compact.py +123 -0
- stravinsky_claude_assets/hooks/ralph_loop.py +173 -0
- stravinsky_claude_assets/hooks/session_recovery.py +263 -0
- stravinsky_claude_assets/hooks/stop_hook.py +89 -0
- stravinsky_claude_assets/hooks/stravinsky_metrics.py +164 -0
- stravinsky_claude_assets/hooks/stravinsky_mode.py +146 -0
- stravinsky_claude_assets/hooks/subagent_stop.py +98 -0
- stravinsky_claude_assets/hooks/todo_continuation.py +111 -0
- stravinsky_claude_assets/hooks/todo_delegation.py +96 -0
- stravinsky_claude_assets/hooks/tool_messaging.py +281 -0
- stravinsky_claude_assets/hooks/truncator.py +23 -0
- stravinsky_claude_assets/rules/deployment_safety.md +51 -0
- stravinsky_claude_assets/rules/integration_wiring.md +89 -0
- stravinsky_claude_assets/rules/pypi_deployment.md +220 -0
- stravinsky_claude_assets/rules/stravinsky_orchestrator.md +32 -0
- stravinsky_claude_assets/settings.json +152 -0
- stravinsky_claude_assets/skills/chrome-devtools/SKILL.md +81 -0
- stravinsky_claude_assets/skills/sqlite/SKILL.md +77 -0
- stravinsky_claude_assets/skills/supabase/SKILL.md +74 -0
- stravinsky_claude_assets/task_dependencies.json +34 -0
- stravinsky-0.4.18.dist-info/RECORD +0 -88
- {stravinsky-0.4.18.dist-info → stravinsky-0.4.66.dist-info}/WHEEL +0 -0
mcp_bridge/tools/model_invoke.py
CHANGED
|
@@ -6,12 +6,16 @@ API requests to external model providers.
|
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
8
|
import asyncio
|
|
9
|
+
import base64
|
|
10
|
+
import json as json_module
|
|
9
11
|
import logging
|
|
10
12
|
import os
|
|
11
13
|
import time
|
|
12
14
|
import uuid
|
|
13
|
-
|
|
14
|
-
import
|
|
15
|
+
|
|
16
|
+
from mcp_bridge.config.rate_limits import get_rate_limiter, get_gemini_time_limiter
|
|
17
|
+
from mcp_bridge.routing.model_tiers import get_oauth_fallback_chain
|
|
18
|
+
from mcp_bridge.routing.provider_state import get_provider_tracker
|
|
15
19
|
|
|
16
20
|
logger = logging.getLogger(__name__)
|
|
17
21
|
|
|
@@ -43,6 +47,130 @@ def _summarize_prompt(prompt: str, max_length: int = 120) -> str:
|
|
|
43
47
|
_CODEX_INSTRUCTIONS_CACHE = {}
|
|
44
48
|
_CODEX_INSTRUCTIONS_RELEASE_TAG = "rust-v0.77.0" # Update as needed
|
|
45
49
|
|
|
50
|
+
# ==============================================
|
|
51
|
+
# GEMINI AUTH MODE STATE (OAuth-first with 429 fallback)
|
|
52
|
+
# ==============================================
|
|
53
|
+
# When OAuth gets a 429 rate limit, we switch to API-only mode for 5 minutes.
|
|
54
|
+
# After 5 minutes, we automatically retry OAuth.
|
|
55
|
+
_GEMINI_OAUTH_429_TIMESTAMP: float | None = None # Timestamp of last 429
|
|
56
|
+
_OAUTH_COOLDOWN_SECONDS = 300 # 5 minutes
|
|
57
|
+
|
|
58
|
+
# ==============================================
|
|
59
|
+
# OPENAI AUTH MODE STATE (OAuth-first with 429 fallback)
|
|
60
|
+
# ==============================================
|
|
61
|
+
# When OpenAI OAuth gets a 429 rate limit, we fallback to Gemini for 5 minutes.
|
|
62
|
+
# After 5 minutes, we automatically retry OpenAI OAuth.
|
|
63
|
+
_OPENAI_OAUTH_429_TIMESTAMP: float | None = None # Timestamp of last OpenAI 429
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _get_gemini_api_key() -> str | None:
|
|
67
|
+
"""Get Gemini API key from environment (loaded from ~/.stravinsky/.env)."""
|
|
68
|
+
return os.getenv("GEMINI_API_KEY") or os.getenv("GOOGLE_API_KEY")
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def _set_api_only_mode(reason: str = "429 rate limit"):
|
|
72
|
+
"""Switch to API-only mode after OAuth rate limit (5-minute cooldown)."""
|
|
73
|
+
global _GEMINI_OAUTH_429_TIMESTAMP
|
|
74
|
+
_GEMINI_OAUTH_429_TIMESTAMP = time.time()
|
|
75
|
+
logger.warning(f"[Gemini] Switching to API-only mode: {reason}")
|
|
76
|
+
import sys
|
|
77
|
+
|
|
78
|
+
print(
|
|
79
|
+
f"⚠️ GEMINI: OAuth rate-limited (429). "
|
|
80
|
+
f"Using API key for 5 minutes (will retry OAuth at {time.strftime('%H:%M:%S', time.localtime(_GEMINI_OAUTH_429_TIMESTAMP + _OAUTH_COOLDOWN_SECONDS))}).",
|
|
81
|
+
file=sys.stderr,
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _is_api_only_mode() -> bool:
|
|
86
|
+
"""
|
|
87
|
+
Check if we're in API-only mode (5-minute cooldown after 429).
|
|
88
|
+
|
|
89
|
+
Returns True if:
|
|
90
|
+
- 429 occurred AND
|
|
91
|
+
- Less than 5 minutes have elapsed
|
|
92
|
+
|
|
93
|
+
Automatically resets to OAuth mode after 5 minutes.
|
|
94
|
+
"""
|
|
95
|
+
global _GEMINI_OAUTH_429_TIMESTAMP
|
|
96
|
+
|
|
97
|
+
if _GEMINI_OAUTH_429_TIMESTAMP is None:
|
|
98
|
+
return False
|
|
99
|
+
|
|
100
|
+
elapsed = time.time() - _GEMINI_OAUTH_429_TIMESTAMP
|
|
101
|
+
|
|
102
|
+
if elapsed >= _OAUTH_COOLDOWN_SECONDS:
|
|
103
|
+
# Cooldown expired - reset to OAuth mode
|
|
104
|
+
logger.info(
|
|
105
|
+
f"[Gemini] 5-minute cooldown expired (elapsed: {elapsed:.0f}s). Retrying OAuth."
|
|
106
|
+
)
|
|
107
|
+
_GEMINI_OAUTH_429_TIMESTAMP = None
|
|
108
|
+
return False
|
|
109
|
+
|
|
110
|
+
# Still in cooldown
|
|
111
|
+
remaining = _OAUTH_COOLDOWN_SECONDS - elapsed
|
|
112
|
+
logger.debug(f"[Gemini] API-only mode active ({remaining:.0f}s remaining)")
|
|
113
|
+
return True
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def reset_gemini_auth_mode():
|
|
117
|
+
"""Reset to OAuth-first mode. Call this to manually reset cooldown."""
|
|
118
|
+
global _GEMINI_OAUTH_429_TIMESTAMP
|
|
119
|
+
_GEMINI_OAUTH_429_TIMESTAMP = None
|
|
120
|
+
logger.info("[Gemini] Reset to OAuth-first mode")
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def _set_openai_fallback_mode(reason: str = "429 rate limit"):
|
|
124
|
+
"""Switch to Gemini fallback after OpenAI rate limit (5-minute cooldown)."""
|
|
125
|
+
global _OPENAI_OAUTH_429_TIMESTAMP
|
|
126
|
+
_OPENAI_OAUTH_429_TIMESTAMP = time.time()
|
|
127
|
+
logger.warning(f"[OpenAI] Switching to Gemini fallback: {reason}")
|
|
128
|
+
import sys
|
|
129
|
+
|
|
130
|
+
print(
|
|
131
|
+
f"⚠️ OPENAI: OAuth rate-limited (429). "
|
|
132
|
+
f"Using Gemini for 5 minutes (will retry OpenAI at {time.strftime('%H:%M:%S', time.localtime(_OPENAI_OAUTH_429_TIMESTAMP + _OAUTH_COOLDOWN_SECONDS))}).",
|
|
133
|
+
file=sys.stderr,
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def _is_openai_fallback_mode() -> bool:
|
|
138
|
+
"""
|
|
139
|
+
Check if we're in Gemini fallback mode (5-minute cooldown after OpenAI 429).
|
|
140
|
+
|
|
141
|
+
Returns True if:
|
|
142
|
+
- OpenAI 429 occurred AND
|
|
143
|
+
- Less than 5 minutes have elapsed
|
|
144
|
+
|
|
145
|
+
Automatically resets to OpenAI mode after 5 minutes.
|
|
146
|
+
"""
|
|
147
|
+
global _OPENAI_OAUTH_429_TIMESTAMP
|
|
148
|
+
|
|
149
|
+
if _OPENAI_OAUTH_429_TIMESTAMP is None:
|
|
150
|
+
return False
|
|
151
|
+
|
|
152
|
+
elapsed = time.time() - _OPENAI_OAUTH_429_TIMESTAMP
|
|
153
|
+
|
|
154
|
+
if elapsed >= _OAUTH_COOLDOWN_SECONDS:
|
|
155
|
+
# Cooldown expired - reset to OpenAI mode
|
|
156
|
+
logger.info(
|
|
157
|
+
f"[OpenAI] 5-minute cooldown expired (elapsed: {elapsed:.0f}s). Retrying OpenAI OAuth."
|
|
158
|
+
)
|
|
159
|
+
_OPENAI_OAUTH_429_TIMESTAMP = None
|
|
160
|
+
return False
|
|
161
|
+
|
|
162
|
+
# Still in cooldown
|
|
163
|
+
remaining = _OAUTH_COOLDOWN_SECONDS - elapsed
|
|
164
|
+
logger.debug(f"[OpenAI] Gemini fallback mode active ({remaining:.0f}s remaining)")
|
|
165
|
+
return True
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def reset_openai_auth_mode():
|
|
169
|
+
"""Reset to OpenAI-first mode. Call this to manually reset cooldown."""
|
|
170
|
+
global _OPENAI_OAUTH_429_TIMESTAMP
|
|
171
|
+
_OPENAI_OAUTH_429_TIMESTAMP = None
|
|
172
|
+
logger.info("[OpenAI] Reset to OAuth-first mode")
|
|
173
|
+
|
|
46
174
|
|
|
47
175
|
async def _fetch_codex_instructions(model: str = "gpt-5.2-codex") -> str:
|
|
48
176
|
"""
|
|
@@ -108,20 +236,21 @@ def resolve_gemini_model(model: str) -> str:
|
|
|
108
236
|
import httpx
|
|
109
237
|
from tenacity import (
|
|
110
238
|
retry,
|
|
239
|
+
retry_if_exception,
|
|
111
240
|
stop_after_attempt,
|
|
112
241
|
wait_exponential,
|
|
113
|
-
retry_if_exception,
|
|
114
242
|
)
|
|
115
243
|
|
|
116
|
-
from ..auth.token_store import TokenStore
|
|
117
244
|
from ..auth.oauth import (
|
|
118
|
-
refresh_access_token as gemini_refresh,
|
|
119
|
-
ANTIGRAVITY_HEADERS,
|
|
120
|
-
ANTIGRAVITY_ENDPOINTS,
|
|
121
245
|
ANTIGRAVITY_DEFAULT_PROJECT_ID,
|
|
122
|
-
|
|
246
|
+
ANTIGRAVITY_ENDPOINTS,
|
|
247
|
+
ANTIGRAVITY_HEADERS,
|
|
248
|
+
)
|
|
249
|
+
from ..auth.oauth import (
|
|
250
|
+
refresh_access_token as gemini_refresh,
|
|
123
251
|
)
|
|
124
252
|
from ..auth.openai_oauth import refresh_access_token as openai_refresh
|
|
253
|
+
from ..auth.token_store import TokenStore
|
|
125
254
|
from ..hooks.manager import get_hook_manager
|
|
126
255
|
|
|
127
256
|
# ========================
|
|
@@ -135,8 +264,52 @@ _SESSION_CACHE: dict[str, str] = {}
|
|
|
135
264
|
# Pooled HTTP client for connection reuse
|
|
136
265
|
_HTTP_CLIENT: httpx.AsyncClient | None = None
|
|
137
266
|
|
|
138
|
-
#
|
|
139
|
-
|
|
267
|
+
# Per-model semaphores for async rate limiting (uses config from ~/.stravinsky/config.json)
|
|
268
|
+
_GEMINI_SEMAPHORES: dict[str, asyncio.Semaphore] = {}
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
def _get_gemini_rate_limit(model: str) -> int:
|
|
272
|
+
"""
|
|
273
|
+
Get configured rate limit for a Gemini model.
|
|
274
|
+
|
|
275
|
+
Reads from ~/.stravinsky/config.json if available, otherwise uses defaults.
|
|
276
|
+
|
|
277
|
+
Args:
|
|
278
|
+
model: Gemini model name (e.g., "gemini-3-flash", "gemini-3-pro-high")
|
|
279
|
+
|
|
280
|
+
Returns:
|
|
281
|
+
Configured concurrency limit for this model
|
|
282
|
+
"""
|
|
283
|
+
rate_limiter = get_rate_limiter()
|
|
284
|
+
# Normalize model name to match config keys
|
|
285
|
+
normalized = rate_limiter._normalize_model(model)
|
|
286
|
+
return rate_limiter._limits.get(normalized, rate_limiter._limits.get("_default", 5))
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
def _get_gemini_semaphore(model: str) -> asyncio.Semaphore:
|
|
290
|
+
"""
|
|
291
|
+
Get or create async semaphore for Gemini model rate limiting.
|
|
292
|
+
|
|
293
|
+
Creates one semaphore per model type with limits from config.
|
|
294
|
+
Limits can be customized in ~/.stravinsky/config.json:
|
|
295
|
+
{
|
|
296
|
+
"rate_limits": {
|
|
297
|
+
"gemini-3-flash": 15,
|
|
298
|
+
"gemini-3-pro-high": 8
|
|
299
|
+
}
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
Args:
|
|
303
|
+
model: Gemini model name
|
|
304
|
+
|
|
305
|
+
Returns:
|
|
306
|
+
asyncio.Semaphore with configured limit for this model
|
|
307
|
+
"""
|
|
308
|
+
if model not in _GEMINI_SEMAPHORES:
|
|
309
|
+
limit = _get_gemini_rate_limit(model)
|
|
310
|
+
_GEMINI_SEMAPHORES[model] = asyncio.Semaphore(limit)
|
|
311
|
+
logger.info(f"[RateLimit] Created semaphore for {model} with limit {limit}")
|
|
312
|
+
return _GEMINI_SEMAPHORES[model]
|
|
140
313
|
|
|
141
314
|
|
|
142
315
|
def _get_session_id(conversation_key: str | None = None) -> str:
|
|
@@ -178,19 +351,6 @@ async def _get_http_client() -> httpx.AsyncClient:
|
|
|
178
351
|
return _HTTP_CLIENT
|
|
179
352
|
|
|
180
353
|
|
|
181
|
-
def _get_gemini_semaphore() -> asyncio.Semaphore:
|
|
182
|
-
"""
|
|
183
|
-
Get or create semaphore for Gemini API rate limiting.
|
|
184
|
-
|
|
185
|
-
Limits concurrent Gemini requests to prevent burst rate limits (429 errors).
|
|
186
|
-
Max 5 concurrent requests balances throughput with API quota constraints.
|
|
187
|
-
"""
|
|
188
|
-
global _GEMINI_SEMAPHORE
|
|
189
|
-
if _GEMINI_SEMAPHORE is None:
|
|
190
|
-
_GEMINI_SEMAPHORE = asyncio.Semaphore(5)
|
|
191
|
-
return _GEMINI_SEMAPHORE
|
|
192
|
-
|
|
193
|
-
|
|
194
354
|
def _extract_gemini_response(data: dict) -> str:
|
|
195
355
|
"""
|
|
196
356
|
Extract text from Gemini response, handling thinking blocks.
|
|
@@ -314,45 +474,167 @@ def is_retryable_exception(e: Exception) -> bool:
|
|
|
314
474
|
return False
|
|
315
475
|
|
|
316
476
|
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
wait=wait_exponential(multiplier=2, min=10, max=120), # Longer waits: 10s → 20s → 40s
|
|
320
|
-
retry=retry_if_exception(is_retryable_exception),
|
|
321
|
-
before_sleep=lambda retry_state: logger.info(
|
|
322
|
-
f"Server error, retrying in {retry_state.next_action.sleep} seconds..."
|
|
323
|
-
),
|
|
324
|
-
)
|
|
325
|
-
async def invoke_gemini(
|
|
326
|
-
token_store: TokenStore,
|
|
477
|
+
async def _invoke_gemini_with_api_key(
|
|
478
|
+
api_key: str,
|
|
327
479
|
prompt: str,
|
|
328
480
|
model: str = "gemini-3-flash",
|
|
329
481
|
temperature: float = 0.7,
|
|
330
482
|
max_tokens: int = 4096,
|
|
331
483
|
thinking_budget: int = 0,
|
|
332
484
|
image_path: str | None = None,
|
|
485
|
+
agent_context: dict | None = None,
|
|
333
486
|
) -> str:
|
|
334
487
|
"""
|
|
335
|
-
Invoke
|
|
488
|
+
Invoke Gemini using API key authentication (google-genai library).
|
|
336
489
|
|
|
337
|
-
|
|
338
|
-
|
|
490
|
+
This is an alternative to OAuth authentication that uses the official
|
|
491
|
+
google-genai Python library with a simple API key.
|
|
339
492
|
|
|
340
493
|
Args:
|
|
341
|
-
|
|
494
|
+
api_key: Gemini API key (from GEMINI_API_KEY or GOOGLE_API_KEY env var)
|
|
342
495
|
prompt: The prompt to send to Gemini
|
|
343
|
-
model: Gemini model to use
|
|
496
|
+
model: Gemini model to use (e.g., "gemini-3-flash-preview")
|
|
344
497
|
temperature: Sampling temperature (0.0-2.0)
|
|
345
498
|
max_tokens: Maximum tokens in response
|
|
346
|
-
thinking_budget: Tokens reserved for internal reasoning
|
|
347
|
-
image_path: Optional path to image/PDF for vision analysis
|
|
499
|
+
thinking_budget: Tokens reserved for internal reasoning (if supported)
|
|
500
|
+
image_path: Optional path to image/PDF for vision analysis
|
|
348
501
|
|
|
349
502
|
Returns:
|
|
350
503
|
The model's response text.
|
|
351
504
|
|
|
352
505
|
Raises:
|
|
353
|
-
|
|
354
|
-
|
|
506
|
+
ImportError: If google-genai library is not installed
|
|
507
|
+
ValueError: If API request fails
|
|
355
508
|
"""
|
|
509
|
+
try:
|
|
510
|
+
from google import genai
|
|
511
|
+
except ImportError:
|
|
512
|
+
raise ImportError(
|
|
513
|
+
"google-genai library not installed. Install with: pip install google-genai"
|
|
514
|
+
)
|
|
515
|
+
|
|
516
|
+
# Map stravinsky model names to google-genai model names
|
|
517
|
+
# Pass through gemini-3-* models directly (Tier 3 benefits)
|
|
518
|
+
model_map = {
|
|
519
|
+
"gemini-3-flash": "gemini-3-flash-preview", # Tier 3 model (not -exp)
|
|
520
|
+
"gemini-3-flash-preview": "gemini-3-flash-preview", # Pass through
|
|
521
|
+
"gemini-3-pro-low": "gemini-3-flash-preview",
|
|
522
|
+
"gemini-3-pro-high": "gemini-3-pro-preview", # Tier 3 pro model
|
|
523
|
+
"gemini-3-pro-preview": "gemini-3-pro-preview", # Pass through
|
|
524
|
+
"gemini-flash": "gemini-3-flash-preview",
|
|
525
|
+
"gemini-pro": "gemini-3-pro-preview",
|
|
526
|
+
"gemini-3-pro": "gemini-3-pro-preview",
|
|
527
|
+
"gemini": "gemini-3-flash-preview",
|
|
528
|
+
}
|
|
529
|
+
genai_model = model_map.get(model, "gemini-3-flash-preview") # Default to tier 3 flash
|
|
530
|
+
|
|
531
|
+
try:
|
|
532
|
+
# Initialize client with API key
|
|
533
|
+
client = genai.Client(api_key=api_key)
|
|
534
|
+
|
|
535
|
+
# Build generation config
|
|
536
|
+
config = {
|
|
537
|
+
"temperature": temperature,
|
|
538
|
+
"max_output_tokens": max_tokens,
|
|
539
|
+
}
|
|
540
|
+
|
|
541
|
+
# Add thinking budget if supported (experimental feature)
|
|
542
|
+
if thinking_budget > 0:
|
|
543
|
+
config["thinking_config"] = {
|
|
544
|
+
"thinking_budget": thinking_budget,
|
|
545
|
+
}
|
|
546
|
+
|
|
547
|
+
# Build contents - text prompt plus optional image
|
|
548
|
+
contents = [prompt]
|
|
549
|
+
|
|
550
|
+
# Add image data for vision analysis
|
|
551
|
+
if image_path:
|
|
552
|
+
from pathlib import Path
|
|
553
|
+
|
|
554
|
+
image_file = Path(image_path)
|
|
555
|
+
if image_file.exists():
|
|
556
|
+
# google-genai supports direct file path or base64
|
|
557
|
+
# For simplicity, use the file path directly
|
|
558
|
+
contents.append(image_file)
|
|
559
|
+
logger.info(f"[API_KEY] Added vision data: {image_path}")
|
|
560
|
+
|
|
561
|
+
# Generate content
|
|
562
|
+
response = client.models.generate_content(
|
|
563
|
+
model=genai_model,
|
|
564
|
+
contents=contents,
|
|
565
|
+
config=config,
|
|
566
|
+
)
|
|
567
|
+
|
|
568
|
+
# Track usage
|
|
569
|
+
try:
|
|
570
|
+
from mcp_bridge.metrics.cost_tracker import get_cost_tracker
|
|
571
|
+
|
|
572
|
+
tracker = get_cost_tracker()
|
|
573
|
+
if hasattr(response, "usage_metadata"):
|
|
574
|
+
usage = response.usage_metadata
|
|
575
|
+
agent_type = (agent_context or {}).get("agent_type", "unknown")
|
|
576
|
+
task_id = (agent_context or {}).get("task_id", "")
|
|
577
|
+
|
|
578
|
+
tracker.track_usage(
|
|
579
|
+
model=model,
|
|
580
|
+
input_tokens=usage.prompt_token_count,
|
|
581
|
+
output_tokens=usage.candidates_token_count,
|
|
582
|
+
agent_type=agent_type,
|
|
583
|
+
task_id=task_id,
|
|
584
|
+
)
|
|
585
|
+
except Exception:
|
|
586
|
+
pass
|
|
587
|
+
|
|
588
|
+
# Extract text from response
|
|
589
|
+
if hasattr(response, "text"):
|
|
590
|
+
return response.text
|
|
591
|
+
elif hasattr(response, "candidates") and response.candidates:
|
|
592
|
+
# Fallback: extract from candidates
|
|
593
|
+
candidate = response.candidates[0]
|
|
594
|
+
if hasattr(candidate, "content"):
|
|
595
|
+
parts = candidate.content.parts
|
|
596
|
+
text_parts = [part.text for part in parts if hasattr(part, "text")]
|
|
597
|
+
return "".join(text_parts) if text_parts else "No response generated"
|
|
598
|
+
|
|
599
|
+
return "No response generated"
|
|
600
|
+
|
|
601
|
+
except Exception as e:
|
|
602
|
+
logger.error(f"API key authentication failed: {e}")
|
|
603
|
+
raise ValueError(f"Gemini API key request failed: {e}")
|
|
604
|
+
|
|
605
|
+
|
|
606
|
+
@retry(
|
|
607
|
+
stop=stop_after_attempt(2), # Reduced from 5 to 2 attempts
|
|
608
|
+
wait=wait_exponential(multiplier=2, min=10, max=120), # Longer waits: 10s → 20s → 40s
|
|
609
|
+
retry=retry_if_exception(is_retryable_exception),
|
|
610
|
+
before_sleep=lambda retry_state: logger.info(
|
|
611
|
+
f"Server error, retrying in {retry_state.next_action.sleep} seconds..."
|
|
612
|
+
),
|
|
613
|
+
)
|
|
614
|
+
async def invoke_gemini(
|
|
615
|
+
token_store: TokenStore,
|
|
616
|
+
prompt: str,
|
|
617
|
+
model: str = "gemini-3-flash",
|
|
618
|
+
temperature: float = 0.7,
|
|
619
|
+
max_tokens: int = 4096,
|
|
620
|
+
thinking_budget: int = 0,
|
|
621
|
+
image_path: str | None = None,
|
|
622
|
+
) -> str:
|
|
623
|
+
"""
|
|
624
|
+
Invoke a Gemini model with the given prompt.
|
|
625
|
+
"""
|
|
626
|
+
from mcp_bridge.proxy.client import is_proxy_enabled, proxy_invoke_gemini
|
|
627
|
+
|
|
628
|
+
if is_proxy_enabled():
|
|
629
|
+
return await proxy_invoke_gemini(
|
|
630
|
+
prompt=prompt,
|
|
631
|
+
model=model,
|
|
632
|
+
temperature=temperature,
|
|
633
|
+
max_tokens=max_tokens,
|
|
634
|
+
thinking_budget=thinking_budget,
|
|
635
|
+
image_path=image_path,
|
|
636
|
+
)
|
|
637
|
+
|
|
356
638
|
logger.info(f"[DEBUG] invoke_gemini called, uuid module check: {uuid}")
|
|
357
639
|
# Execute pre-model invoke hooks
|
|
358
640
|
params = {
|
|
@@ -384,14 +666,122 @@ async def invoke_gemini(
|
|
|
384
666
|
# Log with agent context and prompt summary
|
|
385
667
|
logger.info(f"[{agent_type}] → {model}: {prompt_summary}")
|
|
386
668
|
|
|
387
|
-
#
|
|
669
|
+
# Get API key from environment (loaded from ~/.stravinsky/.env)
|
|
670
|
+
api_key = _get_gemini_api_key()
|
|
388
671
|
import sys
|
|
672
|
+
|
|
389
673
|
task_info = f" task={task_id}" if task_id else ""
|
|
390
674
|
desc_info = f" | {description}" if description else ""
|
|
391
|
-
print(f"🔮 GEMINI: {model} | agent={agent_type}{task_info}{desc_info}", file=sys.stderr)
|
|
392
675
|
|
|
393
|
-
#
|
|
394
|
-
|
|
676
|
+
# ==============================================
|
|
677
|
+
# AUTH PRIORITY: OAuth first, API fallback on 429
|
|
678
|
+
# ==============================================
|
|
679
|
+
# 1. If API-only mode (after 429), use API key directly
|
|
680
|
+
# 2. Otherwise, try OAuth first
|
|
681
|
+
# 3. On 429 from OAuth, switch to API-only mode and retry
|
|
682
|
+
|
|
683
|
+
# If we're in API-only mode (after a 429), use API key directly
|
|
684
|
+
if _is_api_only_mode():
|
|
685
|
+
if not api_key:
|
|
686
|
+
raise ValueError(
|
|
687
|
+
"OAuth rate-limited (429) and no API key available. "
|
|
688
|
+
"Add GEMINI_API_KEY to ~/.stravinsky/.env"
|
|
689
|
+
)
|
|
690
|
+
|
|
691
|
+
# Calculate remaining cooldown time
|
|
692
|
+
if _GEMINI_OAUTH_429_TIMESTAMP is not None:
|
|
693
|
+
remaining = _OAUTH_COOLDOWN_SECONDS - (time.time() - _GEMINI_OAUTH_429_TIMESTAMP)
|
|
694
|
+
remaining_mins = int(remaining // 60)
|
|
695
|
+
remaining_secs = int(remaining % 60)
|
|
696
|
+
cooldown_msg = f" (OAuth retry in {remaining_mins}m {remaining_secs}s)"
|
|
697
|
+
else:
|
|
698
|
+
cooldown_msg = ""
|
|
699
|
+
|
|
700
|
+
# Check time-window rate limit (30 req/min)
|
|
701
|
+
time_limiter = get_gemini_time_limiter()
|
|
702
|
+
wait_time = time_limiter.acquire_visible("GEMINI", "API key")
|
|
703
|
+
if wait_time > 0:
|
|
704
|
+
await asyncio.sleep(wait_time)
|
|
705
|
+
# Re-acquire after sleep
|
|
706
|
+
wait_time = time_limiter.acquire_visible("GEMINI", "API key")
|
|
707
|
+
|
|
708
|
+
print(
|
|
709
|
+
f"🔑 GEMINI (API-only cooldown{cooldown_msg}): {model} | agent={agent_type}{task_info}{desc_info}",
|
|
710
|
+
file=sys.stderr,
|
|
711
|
+
)
|
|
712
|
+
logger.info(f"[{agent_type}] Using API key (5-min cooldown after OAuth 429)")
|
|
713
|
+
semaphore = _get_gemini_semaphore(model)
|
|
714
|
+
async with semaphore:
|
|
715
|
+
result = await _invoke_gemini_with_api_key(
|
|
716
|
+
api_key=api_key,
|
|
717
|
+
prompt=prompt,
|
|
718
|
+
model=model,
|
|
719
|
+
temperature=temperature,
|
|
720
|
+
max_tokens=max_tokens,
|
|
721
|
+
thinking_budget=thinking_budget,
|
|
722
|
+
image_path=image_path,
|
|
723
|
+
agent_context=agent_context,
|
|
724
|
+
)
|
|
725
|
+
# Prepend auth header for visibility in logs
|
|
726
|
+
auth_header = f"[Auth: API key (5-min cooldown) | Model: {model}]\n\n"
|
|
727
|
+
return auth_header + result
|
|
728
|
+
|
|
729
|
+
provider_tracker = get_provider_tracker()
|
|
730
|
+
|
|
731
|
+
# If Gemini is in cooldown, follow tier-aware fallback chain.
|
|
732
|
+
if not provider_tracker.is_available("gemini"):
|
|
733
|
+
for candidate_provider, candidate_model, use_oauth in get_oauth_fallback_chain("gemini", model):
|
|
734
|
+
if candidate_provider == "gemini" and use_oauth:
|
|
735
|
+
continue
|
|
736
|
+
if use_oauth and not provider_tracker.is_available(candidate_provider):
|
|
737
|
+
continue
|
|
738
|
+
|
|
739
|
+
if candidate_provider == "gemini" and not use_oauth:
|
|
740
|
+
api_key = _get_gemini_api_key()
|
|
741
|
+
if not api_key:
|
|
742
|
+
continue
|
|
743
|
+
_set_api_only_mode("Gemini in cooldown; using API key")
|
|
744
|
+
result = await _invoke_gemini_with_api_key(
|
|
745
|
+
api_key=api_key,
|
|
746
|
+
prompt=prompt,
|
|
747
|
+
model=candidate_model,
|
|
748
|
+
temperature=temperature,
|
|
749
|
+
max_tokens=max_tokens,
|
|
750
|
+
thinking_budget=thinking_budget,
|
|
751
|
+
image_path=image_path,
|
|
752
|
+
agent_context=agent_context,
|
|
753
|
+
)
|
|
754
|
+
auth_header = f"[Auth: API key (cooldown) | Model: {candidate_model}]\n\n"
|
|
755
|
+
return auth_header + result
|
|
756
|
+
|
|
757
|
+
if candidate_provider == "openai" and use_oauth:
|
|
758
|
+
return await invoke_openai(
|
|
759
|
+
token_store=token_store,
|
|
760
|
+
prompt=prompt,
|
|
761
|
+
model=candidate_model,
|
|
762
|
+
temperature=temperature,
|
|
763
|
+
max_tokens=max_tokens,
|
|
764
|
+
thinking_budget=0,
|
|
765
|
+
reasoning_effort="medium",
|
|
766
|
+
)
|
|
767
|
+
|
|
768
|
+
# DEFAULT: Try OAuth first (Antigravity)
|
|
769
|
+
|
|
770
|
+
# Check time-window rate limit (30 req/min)
|
|
771
|
+
time_limiter = get_gemini_time_limiter()
|
|
772
|
+
wait_time = time_limiter.acquire_visible("GEMINI", "OAuth")
|
|
773
|
+
if wait_time > 0:
|
|
774
|
+
await asyncio.sleep(wait_time)
|
|
775
|
+
# Re-acquire after sleep
|
|
776
|
+
wait_time = time_limiter.acquire_visible("GEMINI", "OAuth")
|
|
777
|
+
|
|
778
|
+
print(
|
|
779
|
+
f"🔮 GEMINI (OAuth): {model} | agent={agent_type}{task_info}{desc_info}",
|
|
780
|
+
file=sys.stderr,
|
|
781
|
+
)
|
|
782
|
+
logger.info(f"[{agent_type}] Using OAuth authentication (Antigravity)")
|
|
783
|
+
# Rate limit concurrent Gemini requests (configurable via ~/.stravinsky/config.json)
|
|
784
|
+
semaphore = _get_gemini_semaphore(model)
|
|
395
785
|
async with semaphore:
|
|
396
786
|
access_token = await _ensure_valid_token(token_store, "gemini")
|
|
397
787
|
|
|
@@ -437,12 +827,14 @@ async def invoke_gemini(
|
|
|
437
827
|
image_data = base64.b64encode(image_file.read_bytes()).decode("utf-8")
|
|
438
828
|
|
|
439
829
|
# Add inline image data for Gemini Vision API
|
|
440
|
-
parts.append(
|
|
441
|
-
|
|
442
|
-
"
|
|
443
|
-
|
|
830
|
+
parts.append(
|
|
831
|
+
{
|
|
832
|
+
"inlineData": {
|
|
833
|
+
"mimeType": mime_type,
|
|
834
|
+
"data": image_data,
|
|
835
|
+
}
|
|
444
836
|
}
|
|
445
|
-
|
|
837
|
+
)
|
|
446
838
|
logger.info(f"[multimodal] Added vision data: {image_path} ({mime_type})")
|
|
447
839
|
|
|
448
840
|
inner_payload = {
|
|
@@ -514,7 +906,7 @@ async def invoke_gemini(
|
|
|
514
906
|
error_text = response.text.lower()
|
|
515
907
|
if "thinking" in error_text or "signature" in error_text:
|
|
516
908
|
logger.warning(
|
|
517
|
-
|
|
909
|
+
"[Gemini] Thinking error detected, clearing session cache and retrying"
|
|
518
910
|
)
|
|
519
911
|
clear_session_cache()
|
|
520
912
|
# Update session ID for retry
|
|
@@ -541,6 +933,61 @@ async def invoke_gemini(
|
|
|
541
933
|
continue
|
|
542
934
|
break
|
|
543
935
|
|
|
936
|
+
# ==============================================
|
|
937
|
+
# 429 RATE LIMIT DETECTION: Tier-aware fallback chain
|
|
938
|
+
# ==============================================
|
|
939
|
+
if response is not None and response.status_code == 429:
|
|
940
|
+
provider_tracker = get_provider_tracker()
|
|
941
|
+
provider_tracker.mark_rate_limited(
|
|
942
|
+
"gemini",
|
|
943
|
+
duration=_OAUTH_COOLDOWN_SECONDS,
|
|
944
|
+
reason="Gemini OAuth rate-limited (429)",
|
|
945
|
+
)
|
|
946
|
+
|
|
947
|
+
for candidate_provider, candidate_model, use_oauth in get_oauth_fallback_chain(
|
|
948
|
+
"gemini", model
|
|
949
|
+
):
|
|
950
|
+
if candidate_provider == "gemini" and use_oauth:
|
|
951
|
+
continue
|
|
952
|
+
if use_oauth and not provider_tracker.is_available(candidate_provider):
|
|
953
|
+
continue
|
|
954
|
+
|
|
955
|
+
if candidate_provider == "gemini" and not use_oauth:
|
|
956
|
+
api_key = _get_gemini_api_key()
|
|
957
|
+
if not api_key:
|
|
958
|
+
continue
|
|
959
|
+
_set_api_only_mode("OAuth rate-limited (429)")
|
|
960
|
+
logger.info("[Gemini] Retrying with API key after OAuth 429")
|
|
961
|
+
result = await _invoke_gemini_with_api_key(
|
|
962
|
+
api_key=api_key,
|
|
963
|
+
prompt=prompt,
|
|
964
|
+
model=candidate_model,
|
|
965
|
+
temperature=temperature,
|
|
966
|
+
max_tokens=max_tokens,
|
|
967
|
+
thinking_budget=thinking_budget,
|
|
968
|
+
image_path=image_path,
|
|
969
|
+
agent_context=agent_context,
|
|
970
|
+
)
|
|
971
|
+
auth_header = (
|
|
972
|
+
f"[Auth: API key (OAuth 429 fallback) | Model: {candidate_model}]\n\n"
|
|
973
|
+
)
|
|
974
|
+
return auth_header + result
|
|
975
|
+
|
|
976
|
+
if candidate_provider == "openai" and use_oauth:
|
|
977
|
+
return await invoke_openai(
|
|
978
|
+
token_store=token_store,
|
|
979
|
+
prompt=prompt,
|
|
980
|
+
model=candidate_model,
|
|
981
|
+
temperature=temperature,
|
|
982
|
+
max_tokens=max_tokens,
|
|
983
|
+
thinking_budget=0,
|
|
984
|
+
reasoning_effort="medium",
|
|
985
|
+
)
|
|
986
|
+
|
|
987
|
+
raise ValueError(
|
|
988
|
+
"OAuth rate-limited (429) and no fallback succeeded. "
|
|
989
|
+
"Add GEMINI_API_KEY to ~/.stravinsky/.env"
|
|
990
|
+
)
|
|
544
991
|
if response is None:
|
|
545
992
|
# FALLBACK: Try Claude sonnet-4.5 for agents that support it
|
|
546
993
|
agent_context = params.get("agent_context", {})
|
|
@@ -549,16 +996,17 @@ async def invoke_gemini(
|
|
|
549
996
|
if agent_type in ("dewey", "explore", "document_writer", "multimodal"):
|
|
550
997
|
logger.warning(f"[{agent_type}] Gemini failed, falling back to Claude sonnet-4.5")
|
|
551
998
|
try:
|
|
552
|
-
import
|
|
553
|
-
|
|
999
|
+
from mcp_bridge.utils.process import async_execute
|
|
1000
|
+
|
|
1001
|
+
result_obj = await async_execute(
|
|
554
1002
|
["claude", "-p", prompt, "--model", "sonnet", "--output-format", "text"],
|
|
555
|
-
capture_output=True,
|
|
556
|
-
text=True,
|
|
557
1003
|
timeout=120,
|
|
558
|
-
cwd=os.getcwd(),
|
|
559
1004
|
)
|
|
560
|
-
if
|
|
561
|
-
|
|
1005
|
+
if result_obj.returncode == 0 and result_obj.stdout.strip():
|
|
1006
|
+
result = result_obj.stdout.strip()
|
|
1007
|
+
# Prepend auth header for visibility
|
|
1008
|
+
auth_header = f"[Auth: Claude fallback | Model: sonnet-4.5]\n\n"
|
|
1009
|
+
return auth_header + result
|
|
562
1010
|
except Exception as fallback_error:
|
|
563
1011
|
logger.error(f"Fallback to Claude also failed: {fallback_error}")
|
|
564
1012
|
|
|
@@ -567,8 +1015,31 @@ async def invoke_gemini(
|
|
|
567
1015
|
response.raise_for_status()
|
|
568
1016
|
data = response.json()
|
|
569
1017
|
|
|
1018
|
+
# Track usage
|
|
1019
|
+
try:
|
|
1020
|
+
from mcp_bridge.metrics.cost_tracker import get_cost_tracker
|
|
1021
|
+
|
|
1022
|
+
tracker = get_cost_tracker()
|
|
1023
|
+
usage = data.get("usageMetadata", {})
|
|
1024
|
+
input_tokens = usage.get("promptTokenCount", 0)
|
|
1025
|
+
output_tokens = usage.get("candidatesTokenCount", 0)
|
|
1026
|
+
|
|
1027
|
+
tracker.track_usage(
|
|
1028
|
+
model=model,
|
|
1029
|
+
input_tokens=input_tokens,
|
|
1030
|
+
output_tokens=output_tokens,
|
|
1031
|
+
agent_type=agent_type,
|
|
1032
|
+
task_id=task_id,
|
|
1033
|
+
)
|
|
1034
|
+
except Exception as e:
|
|
1035
|
+
logger.warning(f"Failed to track cost: {e}")
|
|
1036
|
+
|
|
570
1037
|
# Extract text from response using thinking-aware parser
|
|
571
|
-
|
|
1038
|
+
result = _extract_gemini_response(data)
|
|
1039
|
+
|
|
1040
|
+
# Prepend auth header for visibility in logs
|
|
1041
|
+
auth_header = f"[Auth: OAuth | Model: {model}]\n\n"
|
|
1042
|
+
return auth_header + result
|
|
572
1043
|
|
|
573
1044
|
|
|
574
1045
|
# ========================
|
|
@@ -579,9 +1050,57 @@ async def invoke_gemini(
|
|
|
579
1050
|
AGENT_TOOLS = [
|
|
580
1051
|
{
|
|
581
1052
|
"functionDeclarations": [
|
|
1053
|
+
{
|
|
1054
|
+
"name": "semantic_search",
|
|
1055
|
+
"description": "Search codebase with natural language query using semantic embeddings. ALWAYS use this FIRST before grep_search or read_file to find relevant files efficiently. Returns code snippets with file paths and relevance scores.",
|
|
1056
|
+
"parameters": {
|
|
1057
|
+
"type": "object",
|
|
1058
|
+
"properties": {
|
|
1059
|
+
"query": {
|
|
1060
|
+
"type": "string",
|
|
1061
|
+
"description": "Natural language search query (e.g., 'find authentication logic', 'PDF rendering code')",
|
|
1062
|
+
},
|
|
1063
|
+
"project_path": {
|
|
1064
|
+
"type": "string",
|
|
1065
|
+
"description": "Path to the project root (default: '.')",
|
|
1066
|
+
},
|
|
1067
|
+
"n_results": {
|
|
1068
|
+
"type": "integer",
|
|
1069
|
+
"description": "Maximum number of results to return (default: 10)",
|
|
1070
|
+
},
|
|
1071
|
+
},
|
|
1072
|
+
"required": ["query"],
|
|
1073
|
+
},
|
|
1074
|
+
},
|
|
1075
|
+
{
|
|
1076
|
+
"name": "hybrid_search",
|
|
1077
|
+
"description": "Hybrid search combining semantic similarity with structural AST pattern matching. Use when you need precise structural patterns (e.g., specific function signatures) combined with semantic relevance.",
|
|
1078
|
+
"parameters": {
|
|
1079
|
+
"type": "object",
|
|
1080
|
+
"properties": {
|
|
1081
|
+
"query": {
|
|
1082
|
+
"type": "string",
|
|
1083
|
+
"description": "Natural language search query (e.g., 'find authentication logic')",
|
|
1084
|
+
},
|
|
1085
|
+
"pattern": {
|
|
1086
|
+
"type": "string",
|
|
1087
|
+
"description": "Optional ast-grep pattern for structural matching (e.g., 'def $FUNC($$$):', 'async function $NAME($$$)')",
|
|
1088
|
+
},
|
|
1089
|
+
"project_path": {
|
|
1090
|
+
"type": "string",
|
|
1091
|
+
"description": "Path to the project root (default: '.')",
|
|
1092
|
+
},
|
|
1093
|
+
"n_results": {
|
|
1094
|
+
"type": "integer",
|
|
1095
|
+
"description": "Maximum number of results to return (default: 10)",
|
|
1096
|
+
},
|
|
1097
|
+
},
|
|
1098
|
+
"required": ["query"],
|
|
1099
|
+
},
|
|
1100
|
+
},
|
|
582
1101
|
{
|
|
583
1102
|
"name": "read_file",
|
|
584
|
-
"description": "Read the contents of a file. Returns the file contents as text.",
|
|
1103
|
+
"description": "Read the contents of a file. Returns the file contents as text. USE ONLY AFTER semantic_search identifies the target file.",
|
|
585
1104
|
"parameters": {
|
|
586
1105
|
"type": "object",
|
|
587
1106
|
"properties": {
|
|
@@ -606,7 +1125,7 @@ AGENT_TOOLS = [
|
|
|
606
1125
|
},
|
|
607
1126
|
{
|
|
608
1127
|
"name": "grep_search",
|
|
609
|
-
"description": "Search for a pattern in files using ripgrep. Returns matching lines with file paths and line numbers.",
|
|
1128
|
+
"description": "Search for a pattern in files using ripgrep. Returns matching lines with file paths and line numbers. USE ONLY for precise pattern matching AFTER semantic_search narrows down the search scope.",
|
|
610
1129
|
"parameters": {
|
|
611
1130
|
"type": "object",
|
|
612
1131
|
"properties": {
|
|
@@ -636,50 +1155,85 @@ AGENT_TOOLS = [
|
|
|
636
1155
|
]
|
|
637
1156
|
|
|
638
1157
|
|
|
639
|
-
def _execute_tool(name: str, args: dict) -> str:
|
|
1158
|
+
async def _execute_tool(name: str, args: dict) -> str:
|
|
640
1159
|
"""Execute a tool and return the result."""
|
|
641
|
-
import os
|
|
642
|
-
import subprocess
|
|
643
1160
|
from pathlib import Path
|
|
1161
|
+
from mcp_bridge.utils.process import async_execute
|
|
644
1162
|
|
|
645
1163
|
try:
|
|
646
|
-
if name == "
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
1164
|
+
if name == "semantic_search":
|
|
1165
|
+
# Import semantic_search function from tools
|
|
1166
|
+
from .semantic_search import semantic_search
|
|
1167
|
+
|
|
1168
|
+
# Extract args with defaults
|
|
1169
|
+
query = args.get("query")
|
|
1170
|
+
if not query:
|
|
1171
|
+
return "Error: 'query' parameter is required for semantic_search"
|
|
1172
|
+
|
|
1173
|
+
project_path = args.get("project_path", ".")
|
|
1174
|
+
n_results = args.get("n_results", 10)
|
|
1175
|
+
|
|
1176
|
+
result = await semantic_search(
|
|
1177
|
+
query=query,
|
|
1178
|
+
project_path=project_path,
|
|
1179
|
+
n_results=n_results,
|
|
1180
|
+
)
|
|
1181
|
+
return result
|
|
1182
|
+
|
|
1183
|
+
elif name == "hybrid_search":
|
|
1184
|
+
# Import hybrid_search function from tools
|
|
1185
|
+
from .semantic_search import hybrid_search
|
|
1186
|
+
|
|
1187
|
+
# Extract args with defaults
|
|
1188
|
+
query = args.get("query")
|
|
1189
|
+
if not query:
|
|
1190
|
+
return "Error: 'query' parameter is required for hybrid_search"
|
|
1191
|
+
|
|
1192
|
+
pattern = args.get("pattern")
|
|
1193
|
+
project_path = args.get("project_path", ".")
|
|
1194
|
+
n_results = args.get("n_results", 10)
|
|
1195
|
+
|
|
1196
|
+
result = await hybrid_search(
|
|
1197
|
+
query=query,
|
|
1198
|
+
pattern=pattern,
|
|
1199
|
+
project_path=project_path,
|
|
1200
|
+
n_results=n_results,
|
|
1201
|
+
)
|
|
1202
|
+
return result
|
|
1203
|
+
|
|
1204
|
+
elif name == "read_file":
|
|
1205
|
+
from .read_file import read_file
|
|
1206
|
+
|
|
1207
|
+
path = args["path"]
|
|
1208
|
+
return await read_file(path)
|
|
651
1209
|
|
|
652
1210
|
elif name == "list_directory":
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
for entry in path.iterdir():
|
|
658
|
-
entry_type = "DIR" if entry.is_dir() else "FILE"
|
|
659
|
-
entries.append(f"[{entry_type}] {entry.name}")
|
|
660
|
-
return "\n".join(entries) if entries else "(empty directory)"
|
|
1211
|
+
from .list_directory import list_directory
|
|
1212
|
+
|
|
1213
|
+
path = args["path"]
|
|
1214
|
+
return await list_directory(path)
|
|
661
1215
|
|
|
662
1216
|
elif name == "grep_search":
|
|
663
1217
|
pattern = args["pattern"]
|
|
664
1218
|
search_path = args["path"]
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
text=True,
|
|
669
|
-
timeout=30,
|
|
1219
|
+
|
|
1220
|
+
result_obj = await async_execute(
|
|
1221
|
+
["rg", "--json", "-m", "50", pattern, search_path], timeout=30
|
|
670
1222
|
)
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
1223
|
+
|
|
1224
|
+
if result_obj.returncode == 0:
|
|
1225
|
+
return result_obj.stdout[:10000] # Limit output size
|
|
1226
|
+
elif result_obj.returncode == 1:
|
|
674
1227
|
return "No matches found"
|
|
675
1228
|
else:
|
|
676
|
-
return f"Search error: {
|
|
1229
|
+
return f"Search error: {result_obj.stderr}"
|
|
677
1230
|
|
|
678
1231
|
elif name == "write_file":
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
path
|
|
682
|
-
|
|
1232
|
+
from .write_file import write_file
|
|
1233
|
+
|
|
1234
|
+
path = args["path"]
|
|
1235
|
+
content = args["content"]
|
|
1236
|
+
return await write_file(path, content)
|
|
683
1237
|
|
|
684
1238
|
else:
|
|
685
1239
|
return f"Unknown tool: {name}"
|
|
@@ -688,32 +1242,244 @@ def _execute_tool(name: str, args: dict) -> str:
|
|
|
688
1242
|
return f"Tool error: {str(e)}"
|
|
689
1243
|
|
|
690
1244
|
|
|
691
|
-
async def
|
|
692
|
-
|
|
1245
|
+
async def _invoke_gemini_agentic_with_api_key(
|
|
1246
|
+
api_key: str,
|
|
693
1247
|
prompt: str,
|
|
694
1248
|
model: str = "gemini-3-flash",
|
|
695
1249
|
max_turns: int = 10,
|
|
696
1250
|
timeout: int = 120,
|
|
697
1251
|
) -> str:
|
|
698
1252
|
"""
|
|
699
|
-
Invoke Gemini with function calling
|
|
1253
|
+
Invoke Gemini with function calling using API key authentication (google-genai library).
|
|
700
1254
|
|
|
701
|
-
This
|
|
1255
|
+
This implements a multi-turn agentic loop:
|
|
702
1256
|
1. Send prompt with tool definitions
|
|
703
1257
|
2. If model returns FunctionCall, execute the tool
|
|
704
1258
|
3. Send FunctionResponse back to model
|
|
705
1259
|
4. Repeat until model returns text or max_turns reached
|
|
706
1260
|
|
|
707
1261
|
Args:
|
|
708
|
-
|
|
1262
|
+
api_key: Gemini API key (from GEMINI_API_KEY or GOOGLE_API_KEY env var)
|
|
709
1263
|
prompt: The task prompt
|
|
710
1264
|
model: Gemini model to use
|
|
711
1265
|
max_turns: Maximum number of tool-use turns
|
|
712
|
-
timeout: Request timeout in seconds
|
|
1266
|
+
timeout: Request timeout in seconds (currently unused by google-genai)
|
|
713
1267
|
|
|
714
1268
|
Returns:
|
|
715
1269
|
Final text response from the model
|
|
1270
|
+
|
|
1271
|
+
Raises:
|
|
1272
|
+
ImportError: If google-genai library is not installed
|
|
1273
|
+
ValueError: If API request fails
|
|
716
1274
|
"""
|
|
1275
|
+
# USER-VISIBLE NOTIFICATION (stderr) - Shows agentic mode with API key
|
|
1276
|
+
import sys
|
|
1277
|
+
|
|
1278
|
+
print(f"🔮 GEMINI (API/Agentic): {model} | max_turns={max_turns}", file=sys.stderr)
|
|
1279
|
+
|
|
1280
|
+
try:
|
|
1281
|
+
from google import genai
|
|
1282
|
+
from google.genai import types
|
|
1283
|
+
except ImportError:
|
|
1284
|
+
raise ImportError(
|
|
1285
|
+
"google-genai library not installed. Install with: pip install google-genai"
|
|
1286
|
+
)
|
|
1287
|
+
|
|
1288
|
+
# Map stravinsky model names to google-genai model names
|
|
1289
|
+
# Pass through gemini-3-* models directly (Tier 3 benefits)
|
|
1290
|
+
model_map = {
|
|
1291
|
+
"gemini-3-flash": "gemini-3-flash-preview", # Tier 3 model (not -exp)
|
|
1292
|
+
"gemini-3-flash-preview": "gemini-3-flash-preview", # Pass through
|
|
1293
|
+
"gemini-3-pro-low": "gemini-3-flash-preview",
|
|
1294
|
+
"gemini-3-pro-high": "gemini-3-pro-preview", # Tier 3 pro model
|
|
1295
|
+
"gemini-3-pro-preview": "gemini-3-pro-preview", # Pass through
|
|
1296
|
+
"gemini-flash": "gemini-3-flash-preview",
|
|
1297
|
+
"gemini-pro": "gemini-3-pro-preview",
|
|
1298
|
+
"gemini-3-pro": "gemini-3-pro-preview",
|
|
1299
|
+
"gemini": "gemini-3-flash-preview",
|
|
1300
|
+
}
|
|
1301
|
+
genai_model = model_map.get(model, "gemini-3-flash-preview") # Default to tier 3 flash
|
|
1302
|
+
|
|
1303
|
+
# Initialize client with API key
|
|
1304
|
+
client = genai.Client(api_key=api_key)
|
|
1305
|
+
|
|
1306
|
+
# Convert AGENT_TOOLS to google-genai format
|
|
1307
|
+
# google-genai expects tools as a list of Tool objects containing function_declarations
|
|
1308
|
+
function_declarations = []
|
|
1309
|
+
for tool_group in AGENT_TOOLS:
|
|
1310
|
+
for func_decl in tool_group.get("functionDeclarations", []):
|
|
1311
|
+
function_declarations.append(
|
|
1312
|
+
types.FunctionDeclaration(
|
|
1313
|
+
name=func_decl["name"],
|
|
1314
|
+
description=func_decl["description"],
|
|
1315
|
+
parameters=func_decl["parameters"],
|
|
1316
|
+
)
|
|
1317
|
+
)
|
|
1318
|
+
|
|
1319
|
+
# Wrap function declarations in a Tool object
|
|
1320
|
+
tools = [types.Tool(function_declarations=function_declarations)]
|
|
1321
|
+
|
|
1322
|
+
# Initialize conversation with user message
|
|
1323
|
+
contents = [types.Content(role="user", parts=[types.Part(text=prompt)])]
|
|
1324
|
+
|
|
1325
|
+
for turn in range(max_turns):
|
|
1326
|
+
try:
|
|
1327
|
+
# Generate content with tools
|
|
1328
|
+
response = client.models.generate_content(
|
|
1329
|
+
model=genai_model,
|
|
1330
|
+
contents=contents,
|
|
1331
|
+
config=types.GenerateContentConfig(
|
|
1332
|
+
tools=tools,
|
|
1333
|
+
temperature=0.7,
|
|
1334
|
+
max_output_tokens=8192,
|
|
1335
|
+
),
|
|
1336
|
+
)
|
|
1337
|
+
|
|
1338
|
+
# Check if response has function calls
|
|
1339
|
+
if not response.candidates or not response.candidates[0].content.parts:
|
|
1340
|
+
return "No response generated"
|
|
1341
|
+
|
|
1342
|
+
parts = response.candidates[0].content.parts
|
|
1343
|
+
function_calls = []
|
|
1344
|
+
text_parts = []
|
|
1345
|
+
|
|
1346
|
+
for part in parts:
|
|
1347
|
+
if part.function_call:
|
|
1348
|
+
function_calls.append(part.function_call)
|
|
1349
|
+
elif part.text:
|
|
1350
|
+
text_parts.append(part.text)
|
|
1351
|
+
|
|
1352
|
+
# If no function calls, return text response
|
|
1353
|
+
if not function_calls:
|
|
1354
|
+
result = "".join(text_parts)
|
|
1355
|
+
return result if result.strip() else "Task completed"
|
|
1356
|
+
|
|
1357
|
+
# Execute function calls and prepare responses
|
|
1358
|
+
function_responses = []
|
|
1359
|
+
for func_call in function_calls:
|
|
1360
|
+
func_name = func_call.name
|
|
1361
|
+
func_args = dict(func_call.args) if func_call.args else {}
|
|
1362
|
+
|
|
1363
|
+
logger.info(f"[AgenticGemini] Turn {turn + 1}: Executing {func_name}")
|
|
1364
|
+
result = await _execute_tool(func_name, func_args)
|
|
1365
|
+
|
|
1366
|
+
function_responses.append(
|
|
1367
|
+
types.Part(
|
|
1368
|
+
function_response=types.FunctionResponse(
|
|
1369
|
+
name=func_name,
|
|
1370
|
+
response={"result": result},
|
|
1371
|
+
)
|
|
1372
|
+
)
|
|
1373
|
+
)
|
|
1374
|
+
|
|
1375
|
+
# Add model's response to conversation
|
|
1376
|
+
contents.append(response.candidates[0].content)
|
|
1377
|
+
|
|
1378
|
+
# Add function responses to conversation
|
|
1379
|
+
contents.append(
|
|
1380
|
+
types.Content(
|
|
1381
|
+
role="user",
|
|
1382
|
+
parts=function_responses,
|
|
1383
|
+
)
|
|
1384
|
+
)
|
|
1385
|
+
|
|
1386
|
+
except Exception as e:
|
|
1387
|
+
logger.error(f"[AgenticGemini] Error in turn {turn + 1}: {e}")
|
|
1388
|
+
raise ValueError(f"Gemini API key request failed: {e}")
|
|
1389
|
+
|
|
1390
|
+
return "Max turns reached without final response"
|
|
1391
|
+
|
|
1392
|
+
|
|
1393
|
+
async def invoke_gemini_agentic(
|
|
1394
|
+
token_store: TokenStore,
|
|
1395
|
+
prompt: str,
|
|
1396
|
+
model: str = "gemini-3-flash",
|
|
1397
|
+
max_turns: int = 10,
|
|
1398
|
+
timeout: int = 120,
|
|
1399
|
+
) -> str:
|
|
1400
|
+
"""
|
|
1401
|
+
Invoke Gemini with function calling for agentic tasks.
|
|
1402
|
+
"""
|
|
1403
|
+
from mcp_bridge.proxy.client import is_proxy_enabled, PROXY_URL
|
|
1404
|
+
|
|
1405
|
+
if is_proxy_enabled():
|
|
1406
|
+
import httpx
|
|
1407
|
+
|
|
1408
|
+
async with httpx.AsyncClient(timeout=float(timeout) + 10) as client:
|
|
1409
|
+
payload = {"prompt": prompt, "model": model, "max_turns": max_turns, "timeout": timeout}
|
|
1410
|
+
response = await client.post(f"{PROXY_URL}/v1/gemini/agentic", json=payload)
|
|
1411
|
+
response.raise_for_status()
|
|
1412
|
+
return response.json()["response"]
|
|
1413
|
+
|
|
1414
|
+
import sys
|
|
1415
|
+
|
|
1416
|
+
# Get API key from environment (loaded from ~/.stravinsky/.env)
|
|
1417
|
+
api_key = _get_gemini_api_key()
|
|
1418
|
+
|
|
1419
|
+
# ==============================================
|
|
1420
|
+
# AUTH PRIORITY: OAuth first, API fallback on 429
|
|
1421
|
+
# ==============================================
|
|
1422
|
+
# 1. If API-only mode (after 429), use API key directly
|
|
1423
|
+
# 2. Otherwise, try OAuth first
|
|
1424
|
+
# 3. On 429 from OAuth, switch to API-only mode and retry
|
|
1425
|
+
|
|
1426
|
+
# If we're in API-only mode (after a 429), use API key directly
|
|
1427
|
+
if _is_api_only_mode():
|
|
1428
|
+
if not api_key:
|
|
1429
|
+
raise ValueError(
|
|
1430
|
+
"OAuth rate-limited (429) and no API key available. "
|
|
1431
|
+
"Add GEMINI_API_KEY to ~/.stravinsky/.env"
|
|
1432
|
+
)
|
|
1433
|
+
|
|
1434
|
+
# Calculate remaining cooldown time
|
|
1435
|
+
if _GEMINI_OAUTH_429_TIMESTAMP is not None:
|
|
1436
|
+
remaining = _OAUTH_COOLDOWN_SECONDS - (time.time() - _GEMINI_OAUTH_429_TIMESTAMP)
|
|
1437
|
+
remaining_mins = int(remaining // 60)
|
|
1438
|
+
remaining_secs = int(remaining % 60)
|
|
1439
|
+
cooldown_msg = f" (OAuth retry in {remaining_mins}m {remaining_secs}s)"
|
|
1440
|
+
else:
|
|
1441
|
+
cooldown_msg = ""
|
|
1442
|
+
|
|
1443
|
+
# Check time-window rate limit (30 req/min)
|
|
1444
|
+
time_limiter = get_gemini_time_limiter()
|
|
1445
|
+
wait_time = time_limiter.acquire_visible("GEMINI", "API key")
|
|
1446
|
+
if wait_time > 0:
|
|
1447
|
+
await asyncio.sleep(wait_time)
|
|
1448
|
+
# Re-acquire after sleep
|
|
1449
|
+
wait_time = time_limiter.acquire_visible("GEMINI", "API key")
|
|
1450
|
+
|
|
1451
|
+
print(
|
|
1452
|
+
f"🔑 GEMINI (API-only cooldown{cooldown_msg}/Agentic): {model} | max_turns={max_turns}",
|
|
1453
|
+
file=sys.stderr,
|
|
1454
|
+
)
|
|
1455
|
+
logger.info("[AgenticGemini] Using API key (5-min cooldown after OAuth 429)")
|
|
1456
|
+
result = await _invoke_gemini_agentic_with_api_key(
|
|
1457
|
+
api_key=api_key,
|
|
1458
|
+
prompt=prompt,
|
|
1459
|
+
model=model,
|
|
1460
|
+
max_turns=max_turns,
|
|
1461
|
+
timeout=timeout,
|
|
1462
|
+
)
|
|
1463
|
+
# Prepend auth header for visibility in logs
|
|
1464
|
+
auth_header = f"[Auth: API key (5-min cooldown, Agentic) | Model: {model}]\n\n"
|
|
1465
|
+
return auth_header + result
|
|
1466
|
+
|
|
1467
|
+
# DEFAULT: Try OAuth first (Antigravity)
|
|
1468
|
+
logger.info("[AgenticGemini] Using OAuth authentication (Antigravity)")
|
|
1469
|
+
|
|
1470
|
+
# Check time-window rate limit (30 req/min)
|
|
1471
|
+
time_limiter = get_gemini_time_limiter()
|
|
1472
|
+
wait_time = time_limiter.acquire_visible("GEMINI", "OAuth")
|
|
1473
|
+
if wait_time > 0:
|
|
1474
|
+
await asyncio.sleep(wait_time)
|
|
1475
|
+
# Re-acquire after sleep
|
|
1476
|
+
wait_time = time_limiter.acquire_visible("GEMINI", "OAuth")
|
|
1477
|
+
|
|
1478
|
+
# USER-VISIBLE NOTIFICATION (stderr) - Shows agentic mode with OAuth
|
|
1479
|
+
import sys
|
|
1480
|
+
|
|
1481
|
+
print(f"🔮 GEMINI (OAuth/Agentic): {model} | max_turns={max_turns}", file=sys.stderr)
|
|
1482
|
+
|
|
717
1483
|
access_token = await _ensure_valid_token(token_store, "gemini")
|
|
718
1484
|
api_model = resolve_gemini_model(model)
|
|
719
1485
|
|
|
@@ -800,6 +1566,33 @@ async def invoke_gemini_agentic(
|
|
|
800
1566
|
logger.warning(f"[AgenticGemini] Endpoint {endpoint} failed: {e}, trying next")
|
|
801
1567
|
continue
|
|
802
1568
|
|
|
1569
|
+
# ==============================================
|
|
1570
|
+
# 429 RATE LIMIT DETECTION: Fallback to API key
|
|
1571
|
+
# ==============================================
|
|
1572
|
+
# If OAuth got rate-limited (429), switch to API-only mode and retry
|
|
1573
|
+
if response is not None and response.status_code == 429:
|
|
1574
|
+
api_key = _get_gemini_api_key()
|
|
1575
|
+
if api_key:
|
|
1576
|
+
_set_api_only_mode("OAuth rate-limited (429) in agentic mode")
|
|
1577
|
+
logger.info("[AgenticGemini] Retrying with API key after OAuth 429")
|
|
1578
|
+
# Retry entire agentic call with API key
|
|
1579
|
+
result = await _invoke_gemini_agentic_with_api_key(
|
|
1580
|
+
api_key=api_key,
|
|
1581
|
+
prompt=prompt,
|
|
1582
|
+
model=model,
|
|
1583
|
+
max_turns=max_turns,
|
|
1584
|
+
timeout=timeout,
|
|
1585
|
+
)
|
|
1586
|
+
# Prepend auth header for visibility
|
|
1587
|
+
auth_header = f"[Auth: API key (OAuth 429 fallback, Agentic) | Model: {model}]\n\n"
|
|
1588
|
+
return auth_header + result
|
|
1589
|
+
else:
|
|
1590
|
+
# No API key available - raise clear error
|
|
1591
|
+
raise ValueError(
|
|
1592
|
+
"OAuth rate-limited (429) and no API key available. "
|
|
1593
|
+
"Add GEMINI_API_KEY to ~/.stravinsky/.env"
|
|
1594
|
+
)
|
|
1595
|
+
|
|
803
1596
|
if response is None:
|
|
804
1597
|
raise ValueError(f"All Antigravity endpoints failed: {last_error}")
|
|
805
1598
|
|
|
@@ -810,13 +1603,15 @@ async def invoke_gemini_agentic(
|
|
|
810
1603
|
inner_response = data.get("response", data)
|
|
811
1604
|
candidates = inner_response.get("candidates", [])
|
|
812
1605
|
if not candidates:
|
|
813
|
-
|
|
1606
|
+
auth_header = f"[Auth: OAuth (Agentic) | Model: {model}]\n\n"
|
|
1607
|
+
return auth_header + "No response generated"
|
|
814
1608
|
|
|
815
1609
|
content = candidates[0].get("content", {})
|
|
816
1610
|
parts = content.get("parts", [])
|
|
817
1611
|
|
|
818
1612
|
if not parts:
|
|
819
|
-
|
|
1613
|
+
auth_header = f"[Auth: OAuth (Agentic) | Model: {model}]\n\n"
|
|
1614
|
+
return auth_header + "No response parts"
|
|
820
1615
|
|
|
821
1616
|
# Check for function call
|
|
822
1617
|
function_call = None
|
|
@@ -835,7 +1630,7 @@ async def invoke_gemini_agentic(
|
|
|
835
1630
|
func_args = function_call.get("args", {})
|
|
836
1631
|
|
|
837
1632
|
logger.info(f"[AgenticGemini] Turn {turn + 1}: Executing {func_name}")
|
|
838
|
-
result = _execute_tool(func_name, func_args)
|
|
1633
|
+
result = await _execute_tool(func_name, func_args)
|
|
839
1634
|
|
|
840
1635
|
# Add model's response and function result to conversation
|
|
841
1636
|
contents.append({"role": "model", "parts": [{"functionCall": function_call}]})
|
|
@@ -849,9 +1644,12 @@ async def invoke_gemini_agentic(
|
|
|
849
1644
|
)
|
|
850
1645
|
else:
|
|
851
1646
|
# No function call, return text response
|
|
852
|
-
|
|
1647
|
+
result = text_response or "Task completed"
|
|
1648
|
+
auth_header = f"[Auth: OAuth (Agentic) | Model: {model}]\n\n"
|
|
1649
|
+
return auth_header + result
|
|
853
1650
|
|
|
854
|
-
|
|
1651
|
+
auth_header = f"[Auth: OAuth (Agentic) | Model: {model}]\n\n"
|
|
1652
|
+
return auth_header + "Max turns reached without final response"
|
|
855
1653
|
|
|
856
1654
|
|
|
857
1655
|
@retry(
|
|
@@ -869,24 +1667,23 @@ async def invoke_openai(
|
|
|
869
1667
|
temperature: float = 0.7,
|
|
870
1668
|
max_tokens: int = 4096,
|
|
871
1669
|
thinking_budget: int = 0,
|
|
1670
|
+
reasoning_effort: str = "medium",
|
|
872
1671
|
) -> str:
|
|
873
1672
|
"""
|
|
874
1673
|
Invoke an OpenAI model with the given prompt.
|
|
875
|
-
|
|
876
|
-
Args:
|
|
877
|
-
token_store: Token store for API key
|
|
878
|
-
prompt: The prompt to send to OpenAI
|
|
879
|
-
model: OpenAI model to use
|
|
880
|
-
temperature: Sampling temperature (0.0-2.0)
|
|
881
|
-
max_tokens: Maximum tokens in response
|
|
882
|
-
|
|
883
|
-
Returns:
|
|
884
|
-
The model's response text.
|
|
885
|
-
|
|
886
|
-
Raises:
|
|
887
|
-
ValueError: If not authenticated with OpenAI
|
|
888
|
-
httpx.HTTPStatusError: If API request fails
|
|
889
1674
|
"""
|
|
1675
|
+
from mcp_bridge.proxy.client import is_proxy_enabled, proxy_invoke_openai
|
|
1676
|
+
|
|
1677
|
+
if is_proxy_enabled():
|
|
1678
|
+
return await proxy_invoke_openai(
|
|
1679
|
+
prompt=prompt,
|
|
1680
|
+
model=model,
|
|
1681
|
+
temperature=temperature,
|
|
1682
|
+
max_tokens=max_tokens,
|
|
1683
|
+
thinking_budget=thinking_budget,
|
|
1684
|
+
reasoning_effort=reasoning_effort,
|
|
1685
|
+
)
|
|
1686
|
+
|
|
890
1687
|
# Execute pre-model invoke hooks
|
|
891
1688
|
params = {
|
|
892
1689
|
"prompt": prompt,
|
|
@@ -894,6 +1691,7 @@ async def invoke_openai(
|
|
|
894
1691
|
"temperature": temperature,
|
|
895
1692
|
"max_tokens": max_tokens,
|
|
896
1693
|
"thinking_budget": thinking_budget,
|
|
1694
|
+
"reasoning_effort": reasoning_effort,
|
|
897
1695
|
"token_store": token_store, # Pass for hooks that need model access
|
|
898
1696
|
"provider": "openai", # Identify which provider is being called
|
|
899
1697
|
}
|
|
@@ -906,6 +1704,7 @@ async def invoke_openai(
|
|
|
906
1704
|
temperature = params["temperature"]
|
|
907
1705
|
max_tokens = params["max_tokens"]
|
|
908
1706
|
thinking_budget = params["thinking_budget"]
|
|
1707
|
+
reasoning_effort = params.get("reasoning_effort", "medium")
|
|
909
1708
|
|
|
910
1709
|
# Extract agent context for logging (may be passed via params or original call)
|
|
911
1710
|
agent_context = params.get("agent_context", {})
|
|
@@ -919,19 +1718,55 @@ async def invoke_openai(
|
|
|
919
1718
|
|
|
920
1719
|
# USER-VISIBLE NOTIFICATION (stderr) - Shows when OpenAI is invoked
|
|
921
1720
|
import sys
|
|
1721
|
+
|
|
922
1722
|
task_info = f" task={task_id}" if task_id else ""
|
|
923
1723
|
desc_info = f" | {description}" if description else ""
|
|
1724
|
+
|
|
1725
|
+
# ==============================================
|
|
1726
|
+
# AUTH PRIORITY: OAuth first, Gemini fallback on 429
|
|
1727
|
+
# ==============================================
|
|
1728
|
+
# 1. If fallback mode (after 429), use Gemini directly
|
|
1729
|
+
# 2. Otherwise, try OpenAI OAuth first
|
|
1730
|
+
# 3. On 429 from OAuth, switch to fallback mode and retry with Gemini
|
|
1731
|
+
|
|
1732
|
+
provider_tracker = get_provider_tracker()
|
|
1733
|
+
|
|
1734
|
+
# If OpenAI is in cooldown, follow tier-aware fallback chain.
|
|
1735
|
+
if not provider_tracker.is_available("openai"):
|
|
1736
|
+
for candidate_provider, candidate_model, use_oauth in get_oauth_fallback_chain("openai", model):
|
|
1737
|
+
if candidate_provider == "openai":
|
|
1738
|
+
continue
|
|
1739
|
+
if use_oauth and not provider_tracker.is_available(candidate_provider):
|
|
1740
|
+
continue
|
|
1741
|
+
|
|
1742
|
+
if candidate_provider == "gemini":
|
|
1743
|
+
if not use_oauth:
|
|
1744
|
+
# Force Gemini API-key mode for the cooldown window.
|
|
1745
|
+
if _get_gemini_api_key() is None:
|
|
1746
|
+
continue
|
|
1747
|
+
_set_api_only_mode("OpenAI in cooldown; using Gemini API key")
|
|
1748
|
+
|
|
1749
|
+
return await invoke_gemini(
|
|
1750
|
+
token_store=token_store,
|
|
1751
|
+
prompt=prompt,
|
|
1752
|
+
model=candidate_model,
|
|
1753
|
+
temperature=temperature,
|
|
1754
|
+
max_tokens=max_tokens,
|
|
1755
|
+
thinking_budget=0,
|
|
1756
|
+
image_path=None,
|
|
1757
|
+
)
|
|
1758
|
+
# DEFAULT: Try OpenAI OAuth first
|
|
924
1759
|
print(f"🧠 OPENAI: {model} | agent={agent_type}{task_info}{desc_info}", file=sys.stderr)
|
|
925
1760
|
|
|
926
1761
|
access_token = await _ensure_valid_token(token_store, "openai")
|
|
927
|
-
logger.info(
|
|
1762
|
+
logger.info("[invoke_openai] Got access token")
|
|
928
1763
|
|
|
929
1764
|
# ChatGPT Backend API - Uses Codex Responses endpoint
|
|
930
1765
|
# Replicates opencode-openai-codex-auth plugin behavior
|
|
931
1766
|
api_url = "https://chatgpt.com/backend-api/codex/responses"
|
|
932
1767
|
|
|
933
1768
|
# Extract account ID from JWT token
|
|
934
|
-
logger.info(
|
|
1769
|
+
logger.info("[invoke_openai] Extracting account ID from JWT")
|
|
935
1770
|
try:
|
|
936
1771
|
parts = access_token.split(".")
|
|
937
1772
|
payload_b64 = parts[1]
|
|
@@ -959,6 +1794,10 @@ async def invoke_openai(
|
|
|
959
1794
|
if account_id:
|
|
960
1795
|
headers["x-openai-account-id"] = account_id
|
|
961
1796
|
|
|
1797
|
+
# Determine final effort
|
|
1798
|
+
# Legacy: thinking_budget > 0 implies high effort
|
|
1799
|
+
effort = "high" if thinking_budget > 0 else reasoning_effort
|
|
1800
|
+
|
|
962
1801
|
# Request body matching opencode transformation
|
|
963
1802
|
payload = {
|
|
964
1803
|
"model": model,
|
|
@@ -966,7 +1805,7 @@ async def invoke_openai(
|
|
|
966
1805
|
"stream": True, # Always stream (handler converts to non-stream if needed)
|
|
967
1806
|
"instructions": instructions,
|
|
968
1807
|
"input": [{"role": "user", "content": prompt}],
|
|
969
|
-
"reasoning": {"effort":
|
|
1808
|
+
"reasoning": {"effort": effort, "summary": "auto"},
|
|
970
1809
|
"text": {"verbosity": "medium"},
|
|
971
1810
|
"include": ["reasoning.encrypted_content"],
|
|
972
1811
|
}
|
|
@@ -979,44 +1818,100 @@ async def invoke_openai(
|
|
|
979
1818
|
logger.info(f"[invoke_openai] Instructions length: {len(instructions)}")
|
|
980
1819
|
|
|
981
1820
|
try:
|
|
982
|
-
async with
|
|
983
|
-
|
|
1821
|
+
async with (
|
|
1822
|
+
httpx.AsyncClient() as client,
|
|
1823
|
+
client.stream(
|
|
984
1824
|
"POST", api_url, headers=headers, json=payload, timeout=120.0
|
|
985
|
-
) as response
|
|
986
|
-
|
|
987
|
-
|
|
988
|
-
|
|
989
|
-
|
|
990
|
-
|
|
1825
|
+
) as response,
|
|
1826
|
+
):
|
|
1827
|
+
logger.info(f"[invoke_openai] Response status: {response.status_code}")
|
|
1828
|
+
if response.status_code == 401:
|
|
1829
|
+
raise ValueError("OpenAI authentication failed. Run: stravinsky-auth login openai")
|
|
1830
|
+
|
|
1831
|
+
# ==============================================
|
|
1832
|
+
# 429 RATE LIMIT DETECTION: Tier-aware fallback chain
|
|
1833
|
+
# ==============================================
|
|
1834
|
+
if response.status_code == 429:
|
|
1835
|
+
provider_tracker = get_provider_tracker()
|
|
1836
|
+
provider_tracker.mark_rate_limited(
|
|
1837
|
+
"openai",
|
|
1838
|
+
duration=_OAUTH_COOLDOWN_SECONDS,
|
|
1839
|
+
reason="OpenAI OAuth rate-limited (429)",
|
|
1840
|
+
)
|
|
991
1841
|
|
|
992
|
-
|
|
993
|
-
|
|
994
|
-
|
|
995
|
-
|
|
996
|
-
|
|
997
|
-
|
|
998
|
-
|
|
999
|
-
|
|
1000
|
-
|
|
1001
|
-
|
|
1002
|
-
|
|
1003
|
-
|
|
1004
|
-
|
|
1005
|
-
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
|
|
1010
|
-
|
|
1011
|
-
|
|
1012
|
-
|
|
1013
|
-
|
|
1014
|
-
|
|
1015
|
-
|
|
1016
|
-
|
|
1842
|
+
for candidate_provider, candidate_model, use_oauth in get_oauth_fallback_chain(
|
|
1843
|
+
"openai", model
|
|
1844
|
+
):
|
|
1845
|
+
if candidate_provider == "openai":
|
|
1846
|
+
continue
|
|
1847
|
+
if use_oauth and not provider_tracker.is_available(candidate_provider):
|
|
1848
|
+
continue
|
|
1849
|
+
|
|
1850
|
+
if candidate_provider == "gemini":
|
|
1851
|
+
if not use_oauth:
|
|
1852
|
+
if _get_gemini_api_key() is None:
|
|
1853
|
+
continue
|
|
1854
|
+
_set_api_only_mode("OpenAI OAuth rate-limited (429)")
|
|
1855
|
+
|
|
1856
|
+
return await invoke_gemini(
|
|
1857
|
+
token_store=token_store,
|
|
1858
|
+
prompt=prompt,
|
|
1859
|
+
model=candidate_model,
|
|
1860
|
+
temperature=temperature,
|
|
1861
|
+
max_tokens=max_tokens,
|
|
1862
|
+
thinking_budget=0,
|
|
1863
|
+
image_path=None,
|
|
1864
|
+
)
|
|
1865
|
+
|
|
1866
|
+
raise ValueError("OpenAI OAuth rate-limited (429) and no fallback succeeded")
|
|
1867
|
+
if response.status_code >= 400:
|
|
1868
|
+
error_body = await response.aread()
|
|
1869
|
+
error_text = error_body.decode("utf-8")
|
|
1870
|
+
logger.error(f"OpenAI API error {response.status_code}: {error_text}")
|
|
1871
|
+
logger.error(f"Request payload was: {payload}")
|
|
1872
|
+
logger.error(f"Request headers were: {headers}")
|
|
1873
|
+
raise ValueError(f"OpenAI API error {response.status_code}: {error_text}")
|
|
1874
|
+
|
|
1875
|
+
# Parse SSE stream for text deltas
|
|
1876
|
+
async for line in response.aiter_lines():
|
|
1877
|
+
if line.startswith("data: "):
|
|
1878
|
+
data_json = line[6:] # Remove "data: " prefix
|
|
1879
|
+
try:
|
|
1880
|
+
data = json_module.loads(data_json)
|
|
1881
|
+
event_type = data.get("type")
|
|
1882
|
+
|
|
1883
|
+
# Extract text deltas from SSE stream
|
|
1884
|
+
if event_type == "response.output_text.delta":
|
|
1885
|
+
delta = data.get("delta", "")
|
|
1886
|
+
text_chunks.append(delta)
|
|
1887
|
+
|
|
1888
|
+
except json_module.JSONDecodeError:
|
|
1889
|
+
pass # Skip malformed JSON
|
|
1890
|
+
except Exception as e:
|
|
1891
|
+
logger.warning(f"Error processing SSE event: {e}")
|
|
1017
1892
|
|
|
1018
1893
|
# Return collected text
|
|
1019
1894
|
result = "".join(text_chunks)
|
|
1895
|
+
|
|
1896
|
+
# Track estimated usage
|
|
1897
|
+
try:
|
|
1898
|
+
from mcp_bridge.metrics.cost_tracker import get_cost_tracker
|
|
1899
|
+
|
|
1900
|
+
tracker = get_cost_tracker()
|
|
1901
|
+
# Estimate: 4 chars per token
|
|
1902
|
+
input_tokens = len(prompt) // 4
|
|
1903
|
+
output_tokens = len(result) // 4
|
|
1904
|
+
|
|
1905
|
+
tracker.track_usage(
|
|
1906
|
+
model=model,
|
|
1907
|
+
input_tokens=input_tokens,
|
|
1908
|
+
output_tokens=output_tokens,
|
|
1909
|
+
agent_type=agent_type,
|
|
1910
|
+
task_id=task_id,
|
|
1911
|
+
)
|
|
1912
|
+
except Exception as e:
|
|
1913
|
+
logger.warning(f"Failed to track cost: {e}")
|
|
1914
|
+
|
|
1020
1915
|
if not result:
|
|
1021
1916
|
return "No response generated"
|
|
1022
1917
|
return result
|