stravinsky 0.2.67__py3-none-any.whl → 0.4.66__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of stravinsky might be problematic. Click here for more details.
- mcp_bridge/__init__.py +1 -1
- mcp_bridge/auth/__init__.py +16 -6
- mcp_bridge/auth/cli.py +202 -11
- mcp_bridge/auth/oauth.py +1 -2
- mcp_bridge/auth/openai_oauth.py +4 -7
- mcp_bridge/auth/token_store.py +112 -11
- mcp_bridge/cli/__init__.py +1 -1
- mcp_bridge/cli/install_hooks.py +503 -107
- mcp_bridge/cli/session_report.py +0 -3
- mcp_bridge/config/MANIFEST_SCHEMA.md +305 -0
- mcp_bridge/config/README.md +276 -0
- mcp_bridge/config/__init__.py +2 -2
- mcp_bridge/config/hook_config.py +247 -0
- mcp_bridge/config/hooks_manifest.json +138 -0
- mcp_bridge/config/rate_limits.py +317 -0
- mcp_bridge/config/skills_manifest.json +128 -0
- mcp_bridge/hooks/HOOKS_SETTINGS.json +17 -4
- mcp_bridge/hooks/__init__.py +19 -4
- mcp_bridge/hooks/agent_reminder.py +4 -4
- mcp_bridge/hooks/auto_slash_command.py +5 -5
- mcp_bridge/hooks/budget_optimizer.py +2 -2
- mcp_bridge/hooks/claude_limits_hook.py +114 -0
- mcp_bridge/hooks/comment_checker.py +3 -4
- mcp_bridge/hooks/compaction.py +2 -2
- mcp_bridge/hooks/context.py +2 -1
- mcp_bridge/hooks/context_monitor.py +2 -2
- mcp_bridge/hooks/delegation_policy.py +85 -0
- mcp_bridge/hooks/directory_context.py +3 -3
- mcp_bridge/hooks/edit_recovery.py +3 -2
- mcp_bridge/hooks/edit_recovery_policy.py +49 -0
- mcp_bridge/hooks/empty_message_sanitizer.py +2 -2
- mcp_bridge/hooks/events.py +160 -0
- mcp_bridge/hooks/git_noninteractive.py +4 -4
- mcp_bridge/hooks/keyword_detector.py +8 -10
- mcp_bridge/hooks/manager.py +43 -22
- mcp_bridge/hooks/notification_hook.py +13 -6
- mcp_bridge/hooks/parallel_enforcement_policy.py +67 -0
- mcp_bridge/hooks/parallel_enforcer.py +5 -5
- mcp_bridge/hooks/parallel_execution.py +22 -10
- mcp_bridge/hooks/post_tool/parallel_validation.py +103 -0
- mcp_bridge/hooks/pre_compact.py +8 -9
- mcp_bridge/hooks/pre_tool/agent_spawn_validator.py +115 -0
- mcp_bridge/hooks/preemptive_compaction.py +2 -3
- mcp_bridge/hooks/routing_notifications.py +80 -0
- mcp_bridge/hooks/rules_injector.py +11 -19
- mcp_bridge/hooks/session_idle.py +4 -4
- mcp_bridge/hooks/session_notifier.py +4 -4
- mcp_bridge/hooks/session_recovery.py +4 -5
- mcp_bridge/hooks/stravinsky_mode.py +1 -1
- mcp_bridge/hooks/subagent_stop.py +1 -3
- mcp_bridge/hooks/task_validator.py +2 -2
- mcp_bridge/hooks/tmux_manager.py +7 -8
- mcp_bridge/hooks/todo_delegation.py +4 -1
- mcp_bridge/hooks/todo_enforcer.py +180 -10
- mcp_bridge/hooks/tool_messaging.py +113 -10
- mcp_bridge/hooks/truncation_policy.py +37 -0
- mcp_bridge/hooks/truncator.py +1 -2
- mcp_bridge/metrics/cost_tracker.py +115 -0
- mcp_bridge/native_search.py +93 -0
- mcp_bridge/native_watcher.py +118 -0
- mcp_bridge/notifications.py +150 -0
- mcp_bridge/orchestrator/enums.py +11 -0
- mcp_bridge/orchestrator/router.py +165 -0
- mcp_bridge/orchestrator/state.py +32 -0
- mcp_bridge/orchestrator/visualization.py +14 -0
- mcp_bridge/orchestrator/wisdom.py +34 -0
- mcp_bridge/prompts/__init__.py +1 -8
- mcp_bridge/prompts/dewey.py +1 -1
- mcp_bridge/prompts/planner.py +2 -4
- mcp_bridge/prompts/stravinsky.py +53 -31
- mcp_bridge/proxy/__init__.py +0 -0
- mcp_bridge/proxy/client.py +70 -0
- mcp_bridge/proxy/model_server.py +157 -0
- mcp_bridge/routing/__init__.py +43 -0
- mcp_bridge/routing/config.py +250 -0
- mcp_bridge/routing/model_tiers.py +135 -0
- mcp_bridge/routing/provider_state.py +261 -0
- mcp_bridge/routing/task_classifier.py +190 -0
- mcp_bridge/server.py +542 -59
- mcp_bridge/server_tools.py +738 -6
- mcp_bridge/tools/__init__.py +40 -25
- mcp_bridge/tools/agent_manager.py +616 -697
- mcp_bridge/tools/background_tasks.py +13 -17
- mcp_bridge/tools/code_search.py +70 -53
- mcp_bridge/tools/continuous_loop.py +0 -1
- mcp_bridge/tools/dashboard.py +19 -0
- mcp_bridge/tools/find_code.py +296 -0
- mcp_bridge/tools/init.py +1 -0
- mcp_bridge/tools/list_directory.py +42 -0
- mcp_bridge/tools/lsp/__init__.py +12 -5
- mcp_bridge/tools/lsp/manager.py +471 -0
- mcp_bridge/tools/lsp/tools.py +723 -207
- mcp_bridge/tools/model_invoke.py +1195 -273
- mcp_bridge/tools/mux_client.py +75 -0
- mcp_bridge/tools/project_context.py +1 -2
- mcp_bridge/tools/query_classifier.py +406 -0
- mcp_bridge/tools/read_file.py +84 -0
- mcp_bridge/tools/replace.py +45 -0
- mcp_bridge/tools/run_shell_command.py +38 -0
- mcp_bridge/tools/search_enhancements.py +347 -0
- mcp_bridge/tools/semantic_search.py +3627 -0
- mcp_bridge/tools/session_manager.py +0 -2
- mcp_bridge/tools/skill_loader.py +0 -1
- mcp_bridge/tools/task_runner.py +5 -7
- mcp_bridge/tools/templates.py +3 -3
- mcp_bridge/tools/tool_search.py +331 -0
- mcp_bridge/tools/write_file.py +29 -0
- mcp_bridge/update_manager.py +585 -0
- mcp_bridge/update_manager_pypi.py +297 -0
- mcp_bridge/utils/cache.py +82 -0
- mcp_bridge/utils/process.py +71 -0
- mcp_bridge/utils/session_state.py +51 -0
- mcp_bridge/utils/truncation.py +76 -0
- stravinsky-0.4.66.dist-info/METADATA +517 -0
- stravinsky-0.4.66.dist-info/RECORD +198 -0
- {stravinsky-0.2.67.dist-info → stravinsky-0.4.66.dist-info}/entry_points.txt +1 -0
- stravinsky_claude_assets/HOOKS_INTEGRATION.md +316 -0
- stravinsky_claude_assets/agents/HOOKS.md +437 -0
- stravinsky_claude_assets/agents/code-reviewer.md +210 -0
- stravinsky_claude_assets/agents/comment_checker.md +580 -0
- stravinsky_claude_assets/agents/debugger.md +254 -0
- stravinsky_claude_assets/agents/delphi.md +495 -0
- stravinsky_claude_assets/agents/dewey.md +248 -0
- stravinsky_claude_assets/agents/explore.md +1198 -0
- stravinsky_claude_assets/agents/frontend.md +472 -0
- stravinsky_claude_assets/agents/implementation-lead.md +164 -0
- stravinsky_claude_assets/agents/momus.md +464 -0
- stravinsky_claude_assets/agents/research-lead.md +141 -0
- stravinsky_claude_assets/agents/stravinsky.md +730 -0
- stravinsky_claude_assets/commands/delphi.md +9 -0
- stravinsky_claude_assets/commands/dewey.md +54 -0
- stravinsky_claude_assets/commands/git-master.md +112 -0
- stravinsky_claude_assets/commands/index.md +49 -0
- stravinsky_claude_assets/commands/publish.md +86 -0
- stravinsky_claude_assets/commands/review.md +73 -0
- stravinsky_claude_assets/commands/str/agent_cancel.md +70 -0
- stravinsky_claude_assets/commands/str/agent_list.md +56 -0
- stravinsky_claude_assets/commands/str/agent_output.md +92 -0
- stravinsky_claude_assets/commands/str/agent_progress.md +74 -0
- stravinsky_claude_assets/commands/str/agent_retry.md +94 -0
- stravinsky_claude_assets/commands/str/cancel.md +51 -0
- stravinsky_claude_assets/commands/str/clean.md +97 -0
- stravinsky_claude_assets/commands/str/continue.md +38 -0
- stravinsky_claude_assets/commands/str/index.md +199 -0
- stravinsky_claude_assets/commands/str/list_watchers.md +96 -0
- stravinsky_claude_assets/commands/str/search.md +205 -0
- stravinsky_claude_assets/commands/str/start_filewatch.md +136 -0
- stravinsky_claude_assets/commands/str/stats.md +71 -0
- stravinsky_claude_assets/commands/str/stop_filewatch.md +89 -0
- stravinsky_claude_assets/commands/str/unwatch.md +42 -0
- stravinsky_claude_assets/commands/str/watch.md +45 -0
- stravinsky_claude_assets/commands/strav.md +53 -0
- stravinsky_claude_assets/commands/stravinsky.md +292 -0
- stravinsky_claude_assets/commands/verify.md +60 -0
- stravinsky_claude_assets/commands/version.md +5 -0
- stravinsky_claude_assets/hooks/README.md +248 -0
- stravinsky_claude_assets/hooks/comment_checker.py +193 -0
- stravinsky_claude_assets/hooks/context.py +38 -0
- stravinsky_claude_assets/hooks/context_monitor.py +153 -0
- stravinsky_claude_assets/hooks/dependency_tracker.py +73 -0
- stravinsky_claude_assets/hooks/edit_recovery.py +46 -0
- stravinsky_claude_assets/hooks/execution_state_tracker.py +68 -0
- stravinsky_claude_assets/hooks/notification_hook.py +103 -0
- stravinsky_claude_assets/hooks/notification_hook_v2.py +96 -0
- stravinsky_claude_assets/hooks/parallel_execution.py +241 -0
- stravinsky_claude_assets/hooks/parallel_reinforcement.py +106 -0
- stravinsky_claude_assets/hooks/parallel_reinforcement_v2.py +112 -0
- stravinsky_claude_assets/hooks/pre_compact.py +123 -0
- stravinsky_claude_assets/hooks/ralph_loop.py +173 -0
- stravinsky_claude_assets/hooks/session_recovery.py +263 -0
- stravinsky_claude_assets/hooks/stop_hook.py +89 -0
- stravinsky_claude_assets/hooks/stravinsky_metrics.py +164 -0
- stravinsky_claude_assets/hooks/stravinsky_mode.py +146 -0
- stravinsky_claude_assets/hooks/subagent_stop.py +98 -0
- stravinsky_claude_assets/hooks/todo_continuation.py +111 -0
- stravinsky_claude_assets/hooks/todo_delegation.py +96 -0
- stravinsky_claude_assets/hooks/tool_messaging.py +281 -0
- stravinsky_claude_assets/hooks/truncator.py +23 -0
- stravinsky_claude_assets/rules/deployment_safety.md +51 -0
- stravinsky_claude_assets/rules/integration_wiring.md +89 -0
- stravinsky_claude_assets/rules/pypi_deployment.md +220 -0
- stravinsky_claude_assets/rules/stravinsky_orchestrator.md +32 -0
- stravinsky_claude_assets/settings.json +152 -0
- stravinsky_claude_assets/skills/chrome-devtools/SKILL.md +81 -0
- stravinsky_claude_assets/skills/sqlite/SKILL.md +77 -0
- stravinsky_claude_assets/skills/supabase/SKILL.md +74 -0
- stravinsky_claude_assets/task_dependencies.json +34 -0
- stravinsky-0.2.67.dist-info/METADATA +0 -284
- stravinsky-0.2.67.dist-info/RECORD +0 -76
- {stravinsky-0.2.67.dist-info → stravinsky-0.4.66.dist-info}/WHEEL +0 -0
mcp_bridge/tools/model_invoke.py
CHANGED
|
@@ -5,12 +5,17 @@ These tools use OAuth tokens from the token store to authenticate
|
|
|
5
5
|
API requests to external model providers.
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
+
import asyncio
|
|
9
|
+
import base64
|
|
10
|
+
import json as json_module
|
|
8
11
|
import logging
|
|
9
12
|
import os
|
|
10
13
|
import time
|
|
11
14
|
import uuid
|
|
12
|
-
|
|
13
|
-
import
|
|
15
|
+
|
|
16
|
+
from mcp_bridge.config.rate_limits import get_rate_limiter, get_gemini_time_limiter
|
|
17
|
+
from mcp_bridge.routing.model_tiers import get_oauth_fallback_chain
|
|
18
|
+
from mcp_bridge.routing.provider_state import get_provider_tracker
|
|
14
19
|
|
|
15
20
|
logger = logging.getLogger(__name__)
|
|
16
21
|
|
|
@@ -42,6 +47,130 @@ def _summarize_prompt(prompt: str, max_length: int = 120) -> str:
|
|
|
42
47
|
_CODEX_INSTRUCTIONS_CACHE = {}
|
|
43
48
|
_CODEX_INSTRUCTIONS_RELEASE_TAG = "rust-v0.77.0" # Update as needed
|
|
44
49
|
|
|
50
|
+
# ==============================================
|
|
51
|
+
# GEMINI AUTH MODE STATE (OAuth-first with 429 fallback)
|
|
52
|
+
# ==============================================
|
|
53
|
+
# When OAuth gets a 429 rate limit, we switch to API-only mode for 5 minutes.
|
|
54
|
+
# After 5 minutes, we automatically retry OAuth.
|
|
55
|
+
_GEMINI_OAUTH_429_TIMESTAMP: float | None = None # Timestamp of last 429
|
|
56
|
+
_OAUTH_COOLDOWN_SECONDS = 300 # 5 minutes
|
|
57
|
+
|
|
58
|
+
# ==============================================
|
|
59
|
+
# OPENAI AUTH MODE STATE (OAuth-first with 429 fallback)
|
|
60
|
+
# ==============================================
|
|
61
|
+
# When OpenAI OAuth gets a 429 rate limit, we fallback to Gemini for 5 minutes.
|
|
62
|
+
# After 5 minutes, we automatically retry OpenAI OAuth.
|
|
63
|
+
_OPENAI_OAUTH_429_TIMESTAMP: float | None = None # Timestamp of last OpenAI 429
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _get_gemini_api_key() -> str | None:
|
|
67
|
+
"""Get Gemini API key from environment (loaded from ~/.stravinsky/.env)."""
|
|
68
|
+
return os.getenv("GEMINI_API_KEY") or os.getenv("GOOGLE_API_KEY")
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def _set_api_only_mode(reason: str = "429 rate limit"):
|
|
72
|
+
"""Switch to API-only mode after OAuth rate limit (5-minute cooldown)."""
|
|
73
|
+
global _GEMINI_OAUTH_429_TIMESTAMP
|
|
74
|
+
_GEMINI_OAUTH_429_TIMESTAMP = time.time()
|
|
75
|
+
logger.warning(f"[Gemini] Switching to API-only mode: {reason}")
|
|
76
|
+
import sys
|
|
77
|
+
|
|
78
|
+
print(
|
|
79
|
+
f"⚠️ GEMINI: OAuth rate-limited (429). "
|
|
80
|
+
f"Using API key for 5 minutes (will retry OAuth at {time.strftime('%H:%M:%S', time.localtime(_GEMINI_OAUTH_429_TIMESTAMP + _OAUTH_COOLDOWN_SECONDS))}).",
|
|
81
|
+
file=sys.stderr,
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _is_api_only_mode() -> bool:
|
|
86
|
+
"""
|
|
87
|
+
Check if we're in API-only mode (5-minute cooldown after 429).
|
|
88
|
+
|
|
89
|
+
Returns True if:
|
|
90
|
+
- 429 occurred AND
|
|
91
|
+
- Less than 5 minutes have elapsed
|
|
92
|
+
|
|
93
|
+
Automatically resets to OAuth mode after 5 minutes.
|
|
94
|
+
"""
|
|
95
|
+
global _GEMINI_OAUTH_429_TIMESTAMP
|
|
96
|
+
|
|
97
|
+
if _GEMINI_OAUTH_429_TIMESTAMP is None:
|
|
98
|
+
return False
|
|
99
|
+
|
|
100
|
+
elapsed = time.time() - _GEMINI_OAUTH_429_TIMESTAMP
|
|
101
|
+
|
|
102
|
+
if elapsed >= _OAUTH_COOLDOWN_SECONDS:
|
|
103
|
+
# Cooldown expired - reset to OAuth mode
|
|
104
|
+
logger.info(
|
|
105
|
+
f"[Gemini] 5-minute cooldown expired (elapsed: {elapsed:.0f}s). Retrying OAuth."
|
|
106
|
+
)
|
|
107
|
+
_GEMINI_OAUTH_429_TIMESTAMP = None
|
|
108
|
+
return False
|
|
109
|
+
|
|
110
|
+
# Still in cooldown
|
|
111
|
+
remaining = _OAUTH_COOLDOWN_SECONDS - elapsed
|
|
112
|
+
logger.debug(f"[Gemini] API-only mode active ({remaining:.0f}s remaining)")
|
|
113
|
+
return True
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def reset_gemini_auth_mode():
|
|
117
|
+
"""Reset to OAuth-first mode. Call this to manually reset cooldown."""
|
|
118
|
+
global _GEMINI_OAUTH_429_TIMESTAMP
|
|
119
|
+
_GEMINI_OAUTH_429_TIMESTAMP = None
|
|
120
|
+
logger.info("[Gemini] Reset to OAuth-first mode")
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def _set_openai_fallback_mode(reason: str = "429 rate limit"):
|
|
124
|
+
"""Switch to Gemini fallback after OpenAI rate limit (5-minute cooldown)."""
|
|
125
|
+
global _OPENAI_OAUTH_429_TIMESTAMP
|
|
126
|
+
_OPENAI_OAUTH_429_TIMESTAMP = time.time()
|
|
127
|
+
logger.warning(f"[OpenAI] Switching to Gemini fallback: {reason}")
|
|
128
|
+
import sys
|
|
129
|
+
|
|
130
|
+
print(
|
|
131
|
+
f"⚠️ OPENAI: OAuth rate-limited (429). "
|
|
132
|
+
f"Using Gemini for 5 minutes (will retry OpenAI at {time.strftime('%H:%M:%S', time.localtime(_OPENAI_OAUTH_429_TIMESTAMP + _OAUTH_COOLDOWN_SECONDS))}).",
|
|
133
|
+
file=sys.stderr,
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def _is_openai_fallback_mode() -> bool:
|
|
138
|
+
"""
|
|
139
|
+
Check if we're in Gemini fallback mode (5-minute cooldown after OpenAI 429).
|
|
140
|
+
|
|
141
|
+
Returns True if:
|
|
142
|
+
- OpenAI 429 occurred AND
|
|
143
|
+
- Less than 5 minutes have elapsed
|
|
144
|
+
|
|
145
|
+
Automatically resets to OpenAI mode after 5 minutes.
|
|
146
|
+
"""
|
|
147
|
+
global _OPENAI_OAUTH_429_TIMESTAMP
|
|
148
|
+
|
|
149
|
+
if _OPENAI_OAUTH_429_TIMESTAMP is None:
|
|
150
|
+
return False
|
|
151
|
+
|
|
152
|
+
elapsed = time.time() - _OPENAI_OAUTH_429_TIMESTAMP
|
|
153
|
+
|
|
154
|
+
if elapsed >= _OAUTH_COOLDOWN_SECONDS:
|
|
155
|
+
# Cooldown expired - reset to OpenAI mode
|
|
156
|
+
logger.info(
|
|
157
|
+
f"[OpenAI] 5-minute cooldown expired (elapsed: {elapsed:.0f}s). Retrying OpenAI OAuth."
|
|
158
|
+
)
|
|
159
|
+
_OPENAI_OAUTH_429_TIMESTAMP = None
|
|
160
|
+
return False
|
|
161
|
+
|
|
162
|
+
# Still in cooldown
|
|
163
|
+
remaining = _OAUTH_COOLDOWN_SECONDS - elapsed
|
|
164
|
+
logger.debug(f"[OpenAI] Gemini fallback mode active ({remaining:.0f}s remaining)")
|
|
165
|
+
return True
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def reset_openai_auth_mode():
|
|
169
|
+
"""Reset to OpenAI-first mode. Call this to manually reset cooldown."""
|
|
170
|
+
global _OPENAI_OAUTH_429_TIMESTAMP
|
|
171
|
+
_OPENAI_OAUTH_429_TIMESTAMP = None
|
|
172
|
+
logger.info("[OpenAI] Reset to OAuth-first mode")
|
|
173
|
+
|
|
45
174
|
|
|
46
175
|
async def _fetch_codex_instructions(model: str = "gpt-5.2-codex") -> str:
|
|
47
176
|
"""
|
|
@@ -107,20 +236,21 @@ def resolve_gemini_model(model: str) -> str:
|
|
|
107
236
|
import httpx
|
|
108
237
|
from tenacity import (
|
|
109
238
|
retry,
|
|
239
|
+
retry_if_exception,
|
|
110
240
|
stop_after_attempt,
|
|
111
241
|
wait_exponential,
|
|
112
|
-
retry_if_exception,
|
|
113
242
|
)
|
|
114
243
|
|
|
115
|
-
from ..auth.token_store import TokenStore
|
|
116
244
|
from ..auth.oauth import (
|
|
117
|
-
refresh_access_token as gemini_refresh,
|
|
118
|
-
ANTIGRAVITY_HEADERS,
|
|
119
|
-
ANTIGRAVITY_ENDPOINTS,
|
|
120
245
|
ANTIGRAVITY_DEFAULT_PROJECT_ID,
|
|
121
|
-
|
|
246
|
+
ANTIGRAVITY_ENDPOINTS,
|
|
247
|
+
ANTIGRAVITY_HEADERS,
|
|
248
|
+
)
|
|
249
|
+
from ..auth.oauth import (
|
|
250
|
+
refresh_access_token as gemini_refresh,
|
|
122
251
|
)
|
|
123
252
|
from ..auth.openai_oauth import refresh_access_token as openai_refresh
|
|
253
|
+
from ..auth.token_store import TokenStore
|
|
124
254
|
from ..hooks.manager import get_hook_manager
|
|
125
255
|
|
|
126
256
|
# ========================
|
|
@@ -134,6 +264,53 @@ _SESSION_CACHE: dict[str, str] = {}
|
|
|
134
264
|
# Pooled HTTP client for connection reuse
|
|
135
265
|
_HTTP_CLIENT: httpx.AsyncClient | None = None
|
|
136
266
|
|
|
267
|
+
# Per-model semaphores for async rate limiting (uses config from ~/.stravinsky/config.json)
|
|
268
|
+
_GEMINI_SEMAPHORES: dict[str, asyncio.Semaphore] = {}
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
def _get_gemini_rate_limit(model: str) -> int:
|
|
272
|
+
"""
|
|
273
|
+
Get configured rate limit for a Gemini model.
|
|
274
|
+
|
|
275
|
+
Reads from ~/.stravinsky/config.json if available, otherwise uses defaults.
|
|
276
|
+
|
|
277
|
+
Args:
|
|
278
|
+
model: Gemini model name (e.g., "gemini-3-flash", "gemini-3-pro-high")
|
|
279
|
+
|
|
280
|
+
Returns:
|
|
281
|
+
Configured concurrency limit for this model
|
|
282
|
+
"""
|
|
283
|
+
rate_limiter = get_rate_limiter()
|
|
284
|
+
# Normalize model name to match config keys
|
|
285
|
+
normalized = rate_limiter._normalize_model(model)
|
|
286
|
+
return rate_limiter._limits.get(normalized, rate_limiter._limits.get("_default", 5))
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
def _get_gemini_semaphore(model: str) -> asyncio.Semaphore:
|
|
290
|
+
"""
|
|
291
|
+
Get or create async semaphore for Gemini model rate limiting.
|
|
292
|
+
|
|
293
|
+
Creates one semaphore per model type with limits from config.
|
|
294
|
+
Limits can be customized in ~/.stravinsky/config.json:
|
|
295
|
+
{
|
|
296
|
+
"rate_limits": {
|
|
297
|
+
"gemini-3-flash": 15,
|
|
298
|
+
"gemini-3-pro-high": 8
|
|
299
|
+
}
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
Args:
|
|
303
|
+
model: Gemini model name
|
|
304
|
+
|
|
305
|
+
Returns:
|
|
306
|
+
asyncio.Semaphore with configured limit for this model
|
|
307
|
+
"""
|
|
308
|
+
if model not in _GEMINI_SEMAPHORES:
|
|
309
|
+
limit = _get_gemini_rate_limit(model)
|
|
310
|
+
_GEMINI_SEMAPHORES[model] = asyncio.Semaphore(limit)
|
|
311
|
+
logger.info(f"[RateLimit] Created semaphore for {model} with limit {limit}")
|
|
312
|
+
return _GEMINI_SEMAPHORES[model]
|
|
313
|
+
|
|
137
314
|
|
|
138
315
|
def _get_session_id(conversation_key: str | None = None) -> str:
|
|
139
316
|
"""
|
|
@@ -284,51 +461,180 @@ async def _ensure_valid_token(token_store: TokenStore, provider: str) -> str:
|
|
|
284
461
|
|
|
285
462
|
|
|
286
463
|
def is_retryable_exception(e: Exception) -> bool:
|
|
287
|
-
"""
|
|
464
|
+
"""
|
|
465
|
+
Check if an exception is retryable (5xx only, NOT 429).
|
|
466
|
+
|
|
467
|
+
429 (Rate Limit) errors should fail fast - retrying makes the problem worse
|
|
468
|
+
by adding more requests to an already exhausted quota. The semaphore prevents
|
|
469
|
+
these in the first place, but if one slips through, we shouldn't retry.
|
|
470
|
+
"""
|
|
288
471
|
if isinstance(e, httpx.HTTPStatusError):
|
|
289
|
-
|
|
472
|
+
# Only retry server errors (5xx), not rate limits (429)
|
|
473
|
+
return 500 <= e.response.status_code < 600
|
|
290
474
|
return False
|
|
291
475
|
|
|
292
476
|
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
wait=wait_exponential(multiplier=1, min=4, max=60),
|
|
296
|
-
retry=retry_if_exception(is_retryable_exception),
|
|
297
|
-
before_sleep=lambda retry_state: logger.info(
|
|
298
|
-
f"Rate limited or server error, retrying in {retry_state.next_action.sleep} seconds..."
|
|
299
|
-
),
|
|
300
|
-
)
|
|
301
|
-
async def invoke_gemini(
|
|
302
|
-
token_store: TokenStore,
|
|
477
|
+
async def _invoke_gemini_with_api_key(
|
|
478
|
+
api_key: str,
|
|
303
479
|
prompt: str,
|
|
304
480
|
model: str = "gemini-3-flash",
|
|
305
481
|
temperature: float = 0.7,
|
|
306
482
|
max_tokens: int = 4096,
|
|
307
483
|
thinking_budget: int = 0,
|
|
308
484
|
image_path: str | None = None,
|
|
485
|
+
agent_context: dict | None = None,
|
|
309
486
|
) -> str:
|
|
310
487
|
"""
|
|
311
|
-
Invoke
|
|
488
|
+
Invoke Gemini using API key authentication (google-genai library).
|
|
312
489
|
|
|
313
|
-
|
|
314
|
-
|
|
490
|
+
This is an alternative to OAuth authentication that uses the official
|
|
491
|
+
google-genai Python library with a simple API key.
|
|
315
492
|
|
|
316
493
|
Args:
|
|
317
|
-
|
|
494
|
+
api_key: Gemini API key (from GEMINI_API_KEY or GOOGLE_API_KEY env var)
|
|
318
495
|
prompt: The prompt to send to Gemini
|
|
319
|
-
model: Gemini model to use
|
|
496
|
+
model: Gemini model to use (e.g., "gemini-3-flash-preview")
|
|
320
497
|
temperature: Sampling temperature (0.0-2.0)
|
|
321
498
|
max_tokens: Maximum tokens in response
|
|
322
|
-
thinking_budget: Tokens reserved for internal reasoning
|
|
323
|
-
image_path: Optional path to image/PDF for vision analysis
|
|
499
|
+
thinking_budget: Tokens reserved for internal reasoning (if supported)
|
|
500
|
+
image_path: Optional path to image/PDF for vision analysis
|
|
324
501
|
|
|
325
502
|
Returns:
|
|
326
503
|
The model's response text.
|
|
327
504
|
|
|
328
505
|
Raises:
|
|
329
|
-
|
|
330
|
-
|
|
506
|
+
ImportError: If google-genai library is not installed
|
|
507
|
+
ValueError: If API request fails
|
|
331
508
|
"""
|
|
509
|
+
try:
|
|
510
|
+
from google import genai
|
|
511
|
+
except ImportError:
|
|
512
|
+
raise ImportError(
|
|
513
|
+
"google-genai library not installed. Install with: pip install google-genai"
|
|
514
|
+
)
|
|
515
|
+
|
|
516
|
+
# Map stravinsky model names to google-genai model names
|
|
517
|
+
# Pass through gemini-3-* models directly (Tier 3 benefits)
|
|
518
|
+
model_map = {
|
|
519
|
+
"gemini-3-flash": "gemini-3-flash-preview", # Tier 3 model (not -exp)
|
|
520
|
+
"gemini-3-flash-preview": "gemini-3-flash-preview", # Pass through
|
|
521
|
+
"gemini-3-pro-low": "gemini-3-flash-preview",
|
|
522
|
+
"gemini-3-pro-high": "gemini-3-pro-preview", # Tier 3 pro model
|
|
523
|
+
"gemini-3-pro-preview": "gemini-3-pro-preview", # Pass through
|
|
524
|
+
"gemini-flash": "gemini-3-flash-preview",
|
|
525
|
+
"gemini-pro": "gemini-3-pro-preview",
|
|
526
|
+
"gemini-3-pro": "gemini-3-pro-preview",
|
|
527
|
+
"gemini": "gemini-3-flash-preview",
|
|
528
|
+
}
|
|
529
|
+
genai_model = model_map.get(model, "gemini-3-flash-preview") # Default to tier 3 flash
|
|
530
|
+
|
|
531
|
+
try:
|
|
532
|
+
# Initialize client with API key
|
|
533
|
+
client = genai.Client(api_key=api_key)
|
|
534
|
+
|
|
535
|
+
# Build generation config
|
|
536
|
+
config = {
|
|
537
|
+
"temperature": temperature,
|
|
538
|
+
"max_output_tokens": max_tokens,
|
|
539
|
+
}
|
|
540
|
+
|
|
541
|
+
# Add thinking budget if supported (experimental feature)
|
|
542
|
+
if thinking_budget > 0:
|
|
543
|
+
config["thinking_config"] = {
|
|
544
|
+
"thinking_budget": thinking_budget,
|
|
545
|
+
}
|
|
546
|
+
|
|
547
|
+
# Build contents - text prompt plus optional image
|
|
548
|
+
contents = [prompt]
|
|
549
|
+
|
|
550
|
+
# Add image data for vision analysis
|
|
551
|
+
if image_path:
|
|
552
|
+
from pathlib import Path
|
|
553
|
+
|
|
554
|
+
image_file = Path(image_path)
|
|
555
|
+
if image_file.exists():
|
|
556
|
+
# google-genai supports direct file path or base64
|
|
557
|
+
# For simplicity, use the file path directly
|
|
558
|
+
contents.append(image_file)
|
|
559
|
+
logger.info(f"[API_KEY] Added vision data: {image_path}")
|
|
560
|
+
|
|
561
|
+
# Generate content
|
|
562
|
+
response = client.models.generate_content(
|
|
563
|
+
model=genai_model,
|
|
564
|
+
contents=contents,
|
|
565
|
+
config=config,
|
|
566
|
+
)
|
|
567
|
+
|
|
568
|
+
# Track usage
|
|
569
|
+
try:
|
|
570
|
+
from mcp_bridge.metrics.cost_tracker import get_cost_tracker
|
|
571
|
+
|
|
572
|
+
tracker = get_cost_tracker()
|
|
573
|
+
if hasattr(response, "usage_metadata"):
|
|
574
|
+
usage = response.usage_metadata
|
|
575
|
+
agent_type = (agent_context or {}).get("agent_type", "unknown")
|
|
576
|
+
task_id = (agent_context or {}).get("task_id", "")
|
|
577
|
+
|
|
578
|
+
tracker.track_usage(
|
|
579
|
+
model=model,
|
|
580
|
+
input_tokens=usage.prompt_token_count,
|
|
581
|
+
output_tokens=usage.candidates_token_count,
|
|
582
|
+
agent_type=agent_type,
|
|
583
|
+
task_id=task_id,
|
|
584
|
+
)
|
|
585
|
+
except Exception:
|
|
586
|
+
pass
|
|
587
|
+
|
|
588
|
+
# Extract text from response
|
|
589
|
+
if hasattr(response, "text"):
|
|
590
|
+
return response.text
|
|
591
|
+
elif hasattr(response, "candidates") and response.candidates:
|
|
592
|
+
# Fallback: extract from candidates
|
|
593
|
+
candidate = response.candidates[0]
|
|
594
|
+
if hasattr(candidate, "content"):
|
|
595
|
+
parts = candidate.content.parts
|
|
596
|
+
text_parts = [part.text for part in parts if hasattr(part, "text")]
|
|
597
|
+
return "".join(text_parts) if text_parts else "No response generated"
|
|
598
|
+
|
|
599
|
+
return "No response generated"
|
|
600
|
+
|
|
601
|
+
except Exception as e:
|
|
602
|
+
logger.error(f"API key authentication failed: {e}")
|
|
603
|
+
raise ValueError(f"Gemini API key request failed: {e}")
|
|
604
|
+
|
|
605
|
+
|
|
606
|
+
@retry(
|
|
607
|
+
stop=stop_after_attempt(2), # Reduced from 5 to 2 attempts
|
|
608
|
+
wait=wait_exponential(multiplier=2, min=10, max=120), # Longer waits: 10s → 20s → 40s
|
|
609
|
+
retry=retry_if_exception(is_retryable_exception),
|
|
610
|
+
before_sleep=lambda retry_state: logger.info(
|
|
611
|
+
f"Server error, retrying in {retry_state.next_action.sleep} seconds..."
|
|
612
|
+
),
|
|
613
|
+
)
|
|
614
|
+
async def invoke_gemini(
|
|
615
|
+
token_store: TokenStore,
|
|
616
|
+
prompt: str,
|
|
617
|
+
model: str = "gemini-3-flash",
|
|
618
|
+
temperature: float = 0.7,
|
|
619
|
+
max_tokens: int = 4096,
|
|
620
|
+
thinking_budget: int = 0,
|
|
621
|
+
image_path: str | None = None,
|
|
622
|
+
) -> str:
|
|
623
|
+
"""
|
|
624
|
+
Invoke a Gemini model with the given prompt.
|
|
625
|
+
"""
|
|
626
|
+
from mcp_bridge.proxy.client import is_proxy_enabled, proxy_invoke_gemini
|
|
627
|
+
|
|
628
|
+
if is_proxy_enabled():
|
|
629
|
+
return await proxy_invoke_gemini(
|
|
630
|
+
prompt=prompt,
|
|
631
|
+
model=model,
|
|
632
|
+
temperature=temperature,
|
|
633
|
+
max_tokens=max_tokens,
|
|
634
|
+
thinking_budget=thinking_budget,
|
|
635
|
+
image_path=image_path,
|
|
636
|
+
)
|
|
637
|
+
|
|
332
638
|
logger.info(f"[DEBUG] invoke_gemini called, uuid module check: {uuid}")
|
|
333
639
|
# Execute pre-model invoke hooks
|
|
334
640
|
params = {
|
|
@@ -360,188 +666,380 @@ async def invoke_gemini(
|
|
|
360
666
|
# Log with agent context and prompt summary
|
|
361
667
|
logger.info(f"[{agent_type}] → {model}: {prompt_summary}")
|
|
362
668
|
|
|
363
|
-
#
|
|
669
|
+
# Get API key from environment (loaded from ~/.stravinsky/.env)
|
|
670
|
+
api_key = _get_gemini_api_key()
|
|
364
671
|
import sys
|
|
672
|
+
|
|
365
673
|
task_info = f" task={task_id}" if task_id else ""
|
|
366
674
|
desc_info = f" | {description}" if description else ""
|
|
367
|
-
print(f"🔮 GEMINI: {model} | agent={agent_type}{task_info}{desc_info}", file=sys.stderr)
|
|
368
675
|
|
|
369
|
-
|
|
676
|
+
# ==============================================
|
|
677
|
+
# AUTH PRIORITY: OAuth first, API fallback on 429
|
|
678
|
+
# ==============================================
|
|
679
|
+
# 1. If API-only mode (after 429), use API key directly
|
|
680
|
+
# 2. Otherwise, try OAuth first
|
|
681
|
+
# 3. On 429 from OAuth, switch to API-only mode and retry
|
|
370
682
|
|
|
371
|
-
#
|
|
372
|
-
|
|
683
|
+
# If we're in API-only mode (after a 429), use API key directly
|
|
684
|
+
if _is_api_only_mode():
|
|
685
|
+
if not api_key:
|
|
686
|
+
raise ValueError(
|
|
687
|
+
"OAuth rate-limited (429) and no API key available. "
|
|
688
|
+
"Add GEMINI_API_KEY to ~/.stravinsky/.env"
|
|
689
|
+
)
|
|
373
690
|
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
691
|
+
# Calculate remaining cooldown time
|
|
692
|
+
if _GEMINI_OAUTH_429_TIMESTAMP is not None:
|
|
693
|
+
remaining = _OAUTH_COOLDOWN_SECONDS - (time.time() - _GEMINI_OAUTH_429_TIMESTAMP)
|
|
694
|
+
remaining_mins = int(remaining // 60)
|
|
695
|
+
remaining_secs = int(remaining % 60)
|
|
696
|
+
cooldown_msg = f" (OAuth retry in {remaining_mins}m {remaining_secs}s)"
|
|
697
|
+
else:
|
|
698
|
+
cooldown_msg = ""
|
|
699
|
+
|
|
700
|
+
# Check time-window rate limit (30 req/min)
|
|
701
|
+
time_limiter = get_gemini_time_limiter()
|
|
702
|
+
wait_time = time_limiter.acquire_visible("GEMINI", "API key")
|
|
703
|
+
if wait_time > 0:
|
|
704
|
+
await asyncio.sleep(wait_time)
|
|
705
|
+
# Re-acquire after sleep
|
|
706
|
+
wait_time = time_limiter.acquire_visible("GEMINI", "API key")
|
|
707
|
+
|
|
708
|
+
print(
|
|
709
|
+
f"🔑 GEMINI (API-only cooldown{cooldown_msg}): {model} | agent={agent_type}{task_info}{desc_info}",
|
|
710
|
+
file=sys.stderr,
|
|
711
|
+
)
|
|
712
|
+
logger.info(f"[{agent_type}] Using API key (5-min cooldown after OAuth 429)")
|
|
713
|
+
semaphore = _get_gemini_semaphore(model)
|
|
714
|
+
async with semaphore:
|
|
715
|
+
result = await _invoke_gemini_with_api_key(
|
|
716
|
+
api_key=api_key,
|
|
717
|
+
prompt=prompt,
|
|
718
|
+
model=model,
|
|
719
|
+
temperature=temperature,
|
|
720
|
+
max_tokens=max_tokens,
|
|
721
|
+
thinking_budget=thinking_budget,
|
|
722
|
+
image_path=image_path,
|
|
723
|
+
agent_context=agent_context,
|
|
724
|
+
)
|
|
725
|
+
# Prepend auth header for visibility in logs
|
|
726
|
+
auth_header = f"[Auth: API key (5-min cooldown) | Model: {model}]\n\n"
|
|
727
|
+
return auth_header + result
|
|
377
728
|
|
|
378
|
-
|
|
379
|
-
"Authorization": f"Bearer {access_token}",
|
|
380
|
-
"Content-Type": "application/json",
|
|
381
|
-
**ANTIGRAVITY_HEADERS, # Include Antigravity headers
|
|
382
|
-
}
|
|
729
|
+
provider_tracker = get_provider_tracker()
|
|
383
730
|
|
|
384
|
-
#
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
731
|
+
# If Gemini is in cooldown, follow tier-aware fallback chain.
|
|
732
|
+
if not provider_tracker.is_available("gemini"):
|
|
733
|
+
for candidate_provider, candidate_model, use_oauth in get_oauth_fallback_chain("gemini", model):
|
|
734
|
+
if candidate_provider == "gemini" and use_oauth:
|
|
735
|
+
continue
|
|
736
|
+
if use_oauth and not provider_tracker.is_available(candidate_provider):
|
|
737
|
+
continue
|
|
738
|
+
|
|
739
|
+
if candidate_provider == "gemini" and not use_oauth:
|
|
740
|
+
api_key = _get_gemini_api_key()
|
|
741
|
+
if not api_key:
|
|
742
|
+
continue
|
|
743
|
+
_set_api_only_mode("Gemini in cooldown; using API key")
|
|
744
|
+
result = await _invoke_gemini_with_api_key(
|
|
745
|
+
api_key=api_key,
|
|
746
|
+
prompt=prompt,
|
|
747
|
+
model=candidate_model,
|
|
748
|
+
temperature=temperature,
|
|
749
|
+
max_tokens=max_tokens,
|
|
750
|
+
thinking_budget=thinking_budget,
|
|
751
|
+
image_path=image_path,
|
|
752
|
+
agent_context=agent_context,
|
|
753
|
+
)
|
|
754
|
+
auth_header = f"[Auth: API key (cooldown) | Model: {candidate_model}]\n\n"
|
|
755
|
+
return auth_header + result
|
|
756
|
+
|
|
757
|
+
if candidate_provider == "openai" and use_oauth:
|
|
758
|
+
return await invoke_openai(
|
|
759
|
+
token_store=token_store,
|
|
760
|
+
prompt=prompt,
|
|
761
|
+
model=candidate_model,
|
|
762
|
+
temperature=temperature,
|
|
763
|
+
max_tokens=max_tokens,
|
|
764
|
+
thinking_budget=0,
|
|
765
|
+
reasoning_effort="medium",
|
|
766
|
+
)
|
|
408
767
|
|
|
409
|
-
|
|
410
|
-
|
|
768
|
+
# DEFAULT: Try OAuth first (Antigravity)
|
|
769
|
+
|
|
770
|
+
# Check time-window rate limit (30 req/min)
|
|
771
|
+
time_limiter = get_gemini_time_limiter()
|
|
772
|
+
wait_time = time_limiter.acquire_visible("GEMINI", "OAuth")
|
|
773
|
+
if wait_time > 0:
|
|
774
|
+
await asyncio.sleep(wait_time)
|
|
775
|
+
# Re-acquire after sleep
|
|
776
|
+
wait_time = time_limiter.acquire_visible("GEMINI", "OAuth")
|
|
777
|
+
|
|
778
|
+
print(
|
|
779
|
+
f"🔮 GEMINI (OAuth): {model} | agent={agent_type}{task_info}{desc_info}",
|
|
780
|
+
file=sys.stderr,
|
|
781
|
+
)
|
|
782
|
+
logger.info(f"[{agent_type}] Using OAuth authentication (Antigravity)")
|
|
783
|
+
# Rate limit concurrent Gemini requests (configurable via ~/.stravinsky/config.json)
|
|
784
|
+
semaphore = _get_gemini_semaphore(model)
|
|
785
|
+
async with semaphore:
|
|
786
|
+
access_token = await _ensure_valid_token(token_store, "gemini")
|
|
787
|
+
|
|
788
|
+
# Resolve user-friendly model name to actual API model ID
|
|
789
|
+
api_model = resolve_gemini_model(model)
|
|
790
|
+
|
|
791
|
+
# Use persistent session ID for thinking signature caching
|
|
792
|
+
session_id = _get_session_id()
|
|
793
|
+
project_id = os.getenv("STRAVINSKY_ANTIGRAVITY_PROJECT_ID", ANTIGRAVITY_DEFAULT_PROJECT_ID)
|
|
794
|
+
|
|
795
|
+
headers = {
|
|
796
|
+
"Authorization": f"Bearer {access_token}",
|
|
797
|
+
"Content-Type": "application/json",
|
|
798
|
+
**ANTIGRAVITY_HEADERS, # Include Antigravity headers
|
|
799
|
+
}
|
|
411
800
|
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
801
|
+
# Build inner request payload
|
|
802
|
+
# Per API spec: contents must include role ("user" or "model")
|
|
803
|
+
|
|
804
|
+
# Build parts list - text prompt plus optional image
|
|
805
|
+
parts = [{"text": prompt}]
|
|
806
|
+
|
|
807
|
+
# Add image data for vision analysis (token optimization for multimodal)
|
|
808
|
+
if image_path:
|
|
809
|
+
import base64
|
|
810
|
+
from pathlib import Path
|
|
811
|
+
|
|
812
|
+
image_file = Path(image_path)
|
|
813
|
+
if image_file.exists():
|
|
814
|
+
# Determine MIME type
|
|
815
|
+
suffix = image_file.suffix.lower()
|
|
816
|
+
mime_types = {
|
|
817
|
+
".png": "image/png",
|
|
818
|
+
".jpg": "image/jpeg",
|
|
819
|
+
".jpeg": "image/jpeg",
|
|
820
|
+
".gif": "image/gif",
|
|
821
|
+
".webp": "image/webp",
|
|
822
|
+
".pdf": "application/pdf",
|
|
417
823
|
}
|
|
418
|
-
|
|
419
|
-
logger.info(f"[multimodal] Added vision data: {image_path} ({mime_type})")
|
|
824
|
+
mime_type = mime_types.get(suffix, "image/png")
|
|
420
825
|
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
"generationConfig": {
|
|
424
|
-
"temperature": temperature,
|
|
425
|
-
"maxOutputTokens": max_tokens,
|
|
426
|
-
},
|
|
427
|
-
"sessionId": session_id,
|
|
428
|
-
}
|
|
826
|
+
# Read and base64 encode
|
|
827
|
+
image_data = base64.b64encode(image_file.read_bytes()).decode("utf-8")
|
|
429
828
|
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
829
|
+
# Add inline image data for Gemini Vision API
|
|
830
|
+
parts.append(
|
|
831
|
+
{
|
|
832
|
+
"inlineData": {
|
|
833
|
+
"mimeType": mime_type,
|
|
834
|
+
"data": image_data,
|
|
835
|
+
}
|
|
836
|
+
}
|
|
837
|
+
)
|
|
838
|
+
logger.info(f"[multimodal] Added vision data: {image_path} ({mime_type})")
|
|
438
839
|
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
840
|
+
inner_payload = {
|
|
841
|
+
"contents": [{"role": "user", "parts": parts}],
|
|
842
|
+
"generationConfig": {
|
|
843
|
+
"temperature": temperature,
|
|
844
|
+
"maxOutputTokens": max_tokens,
|
|
845
|
+
},
|
|
846
|
+
"sessionId": session_id,
|
|
847
|
+
}
|
|
442
848
|
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
"userAgent": "antigravity",
|
|
452
|
-
"requestId": request_id,
|
|
453
|
-
"request": inner_payload,
|
|
454
|
-
}
|
|
849
|
+
# Add thinking budget if supported by model/API
|
|
850
|
+
if thinking_budget > 0:
|
|
851
|
+
# For Gemini 2.0+ Thinking models
|
|
852
|
+
# Per Antigravity API: use "thinkingBudget", NOT "tokenLimit"
|
|
853
|
+
inner_payload["generationConfig"]["thinkingConfig"] = {
|
|
854
|
+
"includeThoughts": True,
|
|
855
|
+
"thinkingBudget": thinking_budget,
|
|
856
|
+
}
|
|
455
857
|
|
|
456
|
-
|
|
457
|
-
|
|
858
|
+
# Wrap request body per reference implementation
|
|
859
|
+
try:
|
|
860
|
+
import uuid as uuid_module # Local import workaround for MCP context issue
|
|
458
861
|
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
862
|
+
request_id = f"invoke-{uuid_module.uuid4()}"
|
|
863
|
+
except Exception as e:
|
|
864
|
+
logger.error(f"UUID IMPORT FAILED: {e}")
|
|
865
|
+
raise RuntimeError(f"CUSTOM ERROR: UUID import failed: {e}")
|
|
463
866
|
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
867
|
+
wrapped_payload = {
|
|
868
|
+
"project": project_id,
|
|
869
|
+
"model": api_model,
|
|
870
|
+
"userAgent": "antigravity",
|
|
871
|
+
"requestId": request_id,
|
|
872
|
+
"request": inner_payload,
|
|
873
|
+
}
|
|
468
874
|
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
api_url,
|
|
472
|
-
headers=headers,
|
|
473
|
-
json=wrapped_payload,
|
|
474
|
-
timeout=120.0,
|
|
475
|
-
)
|
|
875
|
+
# Get pooled HTTP client for connection reuse
|
|
876
|
+
client = await _get_http_client()
|
|
476
877
|
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
878
|
+
# Try endpoints in fallback order with thinking recovery
|
|
879
|
+
response = None
|
|
880
|
+
last_error = None
|
|
881
|
+
max_retries = 2 # For thinking recovery
|
|
882
|
+
|
|
883
|
+
for retry_attempt in range(max_retries):
|
|
884
|
+
for endpoint in ANTIGRAVITY_ENDPOINTS:
|
|
885
|
+
# Reference uses: {endpoint}/v1internal:generateContent (NOT /models/{model})
|
|
886
|
+
api_url = f"{endpoint}/v1internal:generateContent"
|
|
887
|
+
|
|
888
|
+
try:
|
|
889
|
+
response = await client.post(
|
|
890
|
+
api_url,
|
|
891
|
+
headers=headers,
|
|
892
|
+
json=wrapped_payload,
|
|
893
|
+
timeout=120.0,
|
|
481
894
|
)
|
|
482
|
-
last_error = Exception(f"{response.status_code} from {endpoint}")
|
|
483
|
-
continue
|
|
484
895
|
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
error_text = response.text.lower()
|
|
488
|
-
if "thinking" in error_text or "signature" in error_text:
|
|
896
|
+
# 401/403 might be endpoint-specific, try next endpoint
|
|
897
|
+
if response.status_code in (401, 403):
|
|
489
898
|
logger.warning(
|
|
490
|
-
f"[Gemini]
|
|
899
|
+
f"[Gemini] Endpoint {endpoint} returned {response.status_code}, trying next"
|
|
491
900
|
)
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
901
|
+
last_error = Exception(f"{response.status_code} from {endpoint}")
|
|
902
|
+
continue
|
|
903
|
+
|
|
904
|
+
# Check for thinking-related errors that need recovery
|
|
905
|
+
if response.status_code in (400, 500):
|
|
906
|
+
error_text = response.text.lower()
|
|
907
|
+
if "thinking" in error_text or "signature" in error_text:
|
|
908
|
+
logger.warning(
|
|
909
|
+
"[Gemini] Thinking error detected, clearing session cache and retrying"
|
|
910
|
+
)
|
|
911
|
+
clear_session_cache()
|
|
912
|
+
# Update session ID for retry
|
|
913
|
+
wrapped_payload["request"]["sessionId"] = _get_session_id()
|
|
914
|
+
last_error = Exception(f"Thinking error: {response.text[:200]}")
|
|
915
|
+
break # Break inner loop to retry with new session
|
|
916
|
+
|
|
917
|
+
# If we got a non-retryable response (success or 4xx client error), use it
|
|
918
|
+
if response.status_code < 500 and response.status_code != 429:
|
|
919
|
+
break
|
|
920
|
+
|
|
921
|
+
except httpx.TimeoutException as e:
|
|
922
|
+
last_error = e
|
|
923
|
+
continue
|
|
924
|
+
except Exception as e:
|
|
925
|
+
last_error = e
|
|
926
|
+
continue
|
|
927
|
+
else:
|
|
928
|
+
# Inner loop completed without break - no thinking recovery needed
|
|
929
|
+
break
|
|
501
930
|
|
|
502
|
-
|
|
503
|
-
|
|
931
|
+
# If we broke out of inner loop for thinking recovery, continue outer retry loop
|
|
932
|
+
if response and response.status_code in (400, 500):
|
|
504
933
|
continue
|
|
505
|
-
except Exception as e:
|
|
506
|
-
last_error = e
|
|
507
|
-
continue
|
|
508
|
-
else:
|
|
509
|
-
# Inner loop completed without break - no thinking recovery needed
|
|
510
934
|
break
|
|
511
935
|
|
|
512
|
-
#
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
936
|
+
# ==============================================
|
|
937
|
+
# 429 RATE LIMIT DETECTION: Tier-aware fallback chain
|
|
938
|
+
# ==============================================
|
|
939
|
+
if response is not None and response.status_code == 429:
|
|
940
|
+
provider_tracker = get_provider_tracker()
|
|
941
|
+
provider_tracker.mark_rate_limited(
|
|
942
|
+
"gemini",
|
|
943
|
+
duration=_OAUTH_COOLDOWN_SECONDS,
|
|
944
|
+
reason="Gemini OAuth rate-limited (429)",
|
|
945
|
+
)
|
|
516
946
|
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
947
|
+
for candidate_provider, candidate_model, use_oauth in get_oauth_fallback_chain(
|
|
948
|
+
"gemini", model
|
|
949
|
+
):
|
|
950
|
+
if candidate_provider == "gemini" and use_oauth:
|
|
951
|
+
continue
|
|
952
|
+
if use_oauth and not provider_tracker.is_available(candidate_provider):
|
|
953
|
+
continue
|
|
521
954
|
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
[
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
955
|
+
if candidate_provider == "gemini" and not use_oauth:
|
|
956
|
+
api_key = _get_gemini_api_key()
|
|
957
|
+
if not api_key:
|
|
958
|
+
continue
|
|
959
|
+
_set_api_only_mode("OAuth rate-limited (429)")
|
|
960
|
+
logger.info("[Gemini] Retrying with API key after OAuth 429")
|
|
961
|
+
result = await _invoke_gemini_with_api_key(
|
|
962
|
+
api_key=api_key,
|
|
963
|
+
prompt=prompt,
|
|
964
|
+
model=candidate_model,
|
|
965
|
+
temperature=temperature,
|
|
966
|
+
max_tokens=max_tokens,
|
|
967
|
+
thinking_budget=thinking_budget,
|
|
968
|
+
image_path=image_path,
|
|
969
|
+
agent_context=agent_context,
|
|
970
|
+
)
|
|
971
|
+
auth_header = (
|
|
972
|
+
f"[Auth: API key (OAuth 429 fallback) | Model: {candidate_model}]\n\n"
|
|
973
|
+
)
|
|
974
|
+
return auth_header + result
|
|
975
|
+
|
|
976
|
+
if candidate_provider == "openai" and use_oauth:
|
|
977
|
+
return await invoke_openai(
|
|
978
|
+
token_store=token_store,
|
|
979
|
+
prompt=prompt,
|
|
980
|
+
model=candidate_model,
|
|
981
|
+
temperature=temperature,
|
|
982
|
+
max_tokens=max_tokens,
|
|
983
|
+
thinking_budget=0,
|
|
984
|
+
reasoning_effort="medium",
|
|
985
|
+
)
|
|
986
|
+
|
|
987
|
+
raise ValueError(
|
|
988
|
+
"OAuth rate-limited (429) and no fallback succeeded. "
|
|
989
|
+
"Add GEMINI_API_KEY to ~/.stravinsky/.env"
|
|
990
|
+
)
|
|
991
|
+
if response is None:
|
|
992
|
+
# FALLBACK: Try Claude sonnet-4.5 for agents that support it
|
|
993
|
+
agent_context = params.get("agent_context", {})
|
|
994
|
+
agent_type = agent_context.get("agent_type", "unknown")
|
|
995
|
+
|
|
996
|
+
if agent_type in ("dewey", "explore", "document_writer", "multimodal"):
|
|
997
|
+
logger.warning(f"[{agent_type}] Gemini failed, falling back to Claude sonnet-4.5")
|
|
998
|
+
try:
|
|
999
|
+
from mcp_bridge.utils.process import async_execute
|
|
1000
|
+
|
|
1001
|
+
result_obj = await async_execute(
|
|
1002
|
+
["claude", "-p", prompt, "--model", "sonnet", "--output-format", "text"],
|
|
1003
|
+
timeout=120,
|
|
1004
|
+
)
|
|
1005
|
+
if result_obj.returncode == 0 and result_obj.stdout.strip():
|
|
1006
|
+
result = result_obj.stdout.strip()
|
|
1007
|
+
# Prepend auth header for visibility
|
|
1008
|
+
auth_header = f"[Auth: Claude fallback | Model: sonnet-4.5]\n\n"
|
|
1009
|
+
return auth_header + result
|
|
1010
|
+
except Exception as fallback_error:
|
|
1011
|
+
logger.error(f"Fallback to Claude also failed: {fallback_error}")
|
|
1012
|
+
|
|
1013
|
+
raise ValueError(f"All Antigravity endpoints failed: {last_error}")
|
|
537
1014
|
|
|
538
|
-
|
|
1015
|
+
response.raise_for_status()
|
|
1016
|
+
data = response.json()
|
|
1017
|
+
|
|
1018
|
+
# Track usage
|
|
1019
|
+
try:
|
|
1020
|
+
from mcp_bridge.metrics.cost_tracker import get_cost_tracker
|
|
1021
|
+
|
|
1022
|
+
tracker = get_cost_tracker()
|
|
1023
|
+
usage = data.get("usageMetadata", {})
|
|
1024
|
+
input_tokens = usage.get("promptTokenCount", 0)
|
|
1025
|
+
output_tokens = usage.get("candidatesTokenCount", 0)
|
|
1026
|
+
|
|
1027
|
+
tracker.track_usage(
|
|
1028
|
+
model=model,
|
|
1029
|
+
input_tokens=input_tokens,
|
|
1030
|
+
output_tokens=output_tokens,
|
|
1031
|
+
agent_type=agent_type,
|
|
1032
|
+
task_id=task_id,
|
|
1033
|
+
)
|
|
1034
|
+
except Exception as e:
|
|
1035
|
+
logger.warning(f"Failed to track cost: {e}")
|
|
539
1036
|
|
|
540
|
-
|
|
541
|
-
|
|
1037
|
+
# Extract text from response using thinking-aware parser
|
|
1038
|
+
result = _extract_gemini_response(data)
|
|
542
1039
|
|
|
543
|
-
|
|
544
|
-
|
|
1040
|
+
# Prepend auth header for visibility in logs
|
|
1041
|
+
auth_header = f"[Auth: OAuth | Model: {model}]\n\n"
|
|
1042
|
+
return auth_header + result
|
|
545
1043
|
|
|
546
1044
|
|
|
547
1045
|
# ========================
|
|
@@ -552,9 +1050,57 @@ async def invoke_gemini(
|
|
|
552
1050
|
AGENT_TOOLS = [
|
|
553
1051
|
{
|
|
554
1052
|
"functionDeclarations": [
|
|
1053
|
+
{
|
|
1054
|
+
"name": "semantic_search",
|
|
1055
|
+
"description": "Search codebase with natural language query using semantic embeddings. ALWAYS use this FIRST before grep_search or read_file to find relevant files efficiently. Returns code snippets with file paths and relevance scores.",
|
|
1056
|
+
"parameters": {
|
|
1057
|
+
"type": "object",
|
|
1058
|
+
"properties": {
|
|
1059
|
+
"query": {
|
|
1060
|
+
"type": "string",
|
|
1061
|
+
"description": "Natural language search query (e.g., 'find authentication logic', 'PDF rendering code')",
|
|
1062
|
+
},
|
|
1063
|
+
"project_path": {
|
|
1064
|
+
"type": "string",
|
|
1065
|
+
"description": "Path to the project root (default: '.')",
|
|
1066
|
+
},
|
|
1067
|
+
"n_results": {
|
|
1068
|
+
"type": "integer",
|
|
1069
|
+
"description": "Maximum number of results to return (default: 10)",
|
|
1070
|
+
},
|
|
1071
|
+
},
|
|
1072
|
+
"required": ["query"],
|
|
1073
|
+
},
|
|
1074
|
+
},
|
|
1075
|
+
{
|
|
1076
|
+
"name": "hybrid_search",
|
|
1077
|
+
"description": "Hybrid search combining semantic similarity with structural AST pattern matching. Use when you need precise structural patterns (e.g., specific function signatures) combined with semantic relevance.",
|
|
1078
|
+
"parameters": {
|
|
1079
|
+
"type": "object",
|
|
1080
|
+
"properties": {
|
|
1081
|
+
"query": {
|
|
1082
|
+
"type": "string",
|
|
1083
|
+
"description": "Natural language search query (e.g., 'find authentication logic')",
|
|
1084
|
+
},
|
|
1085
|
+
"pattern": {
|
|
1086
|
+
"type": "string",
|
|
1087
|
+
"description": "Optional ast-grep pattern for structural matching (e.g., 'def $FUNC($$$):', 'async function $NAME($$$)')",
|
|
1088
|
+
},
|
|
1089
|
+
"project_path": {
|
|
1090
|
+
"type": "string",
|
|
1091
|
+
"description": "Path to the project root (default: '.')",
|
|
1092
|
+
},
|
|
1093
|
+
"n_results": {
|
|
1094
|
+
"type": "integer",
|
|
1095
|
+
"description": "Maximum number of results to return (default: 10)",
|
|
1096
|
+
},
|
|
1097
|
+
},
|
|
1098
|
+
"required": ["query"],
|
|
1099
|
+
},
|
|
1100
|
+
},
|
|
555
1101
|
{
|
|
556
1102
|
"name": "read_file",
|
|
557
|
-
"description": "Read the contents of a file. Returns the file contents as text.",
|
|
1103
|
+
"description": "Read the contents of a file. Returns the file contents as text. USE ONLY AFTER semantic_search identifies the target file.",
|
|
558
1104
|
"parameters": {
|
|
559
1105
|
"type": "object",
|
|
560
1106
|
"properties": {
|
|
@@ -579,7 +1125,7 @@ AGENT_TOOLS = [
|
|
|
579
1125
|
},
|
|
580
1126
|
{
|
|
581
1127
|
"name": "grep_search",
|
|
582
|
-
"description": "Search for a pattern in files using ripgrep. Returns matching lines with file paths and line numbers.",
|
|
1128
|
+
"description": "Search for a pattern in files using ripgrep. Returns matching lines with file paths and line numbers. USE ONLY for precise pattern matching AFTER semantic_search narrows down the search scope.",
|
|
583
1129
|
"parameters": {
|
|
584
1130
|
"type": "object",
|
|
585
1131
|
"properties": {
|
|
@@ -609,50 +1155,85 @@ AGENT_TOOLS = [
|
|
|
609
1155
|
]
|
|
610
1156
|
|
|
611
1157
|
|
|
612
|
-
def _execute_tool(name: str, args: dict) -> str:
|
|
1158
|
+
async def _execute_tool(name: str, args: dict) -> str:
|
|
613
1159
|
"""Execute a tool and return the result."""
|
|
614
|
-
import os
|
|
615
|
-
import subprocess
|
|
616
1160
|
from pathlib import Path
|
|
1161
|
+
from mcp_bridge.utils.process import async_execute
|
|
617
1162
|
|
|
618
1163
|
try:
|
|
619
|
-
if name == "
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
1164
|
+
if name == "semantic_search":
|
|
1165
|
+
# Import semantic_search function from tools
|
|
1166
|
+
from .semantic_search import semantic_search
|
|
1167
|
+
|
|
1168
|
+
# Extract args with defaults
|
|
1169
|
+
query = args.get("query")
|
|
1170
|
+
if not query:
|
|
1171
|
+
return "Error: 'query' parameter is required for semantic_search"
|
|
1172
|
+
|
|
1173
|
+
project_path = args.get("project_path", ".")
|
|
1174
|
+
n_results = args.get("n_results", 10)
|
|
1175
|
+
|
|
1176
|
+
result = await semantic_search(
|
|
1177
|
+
query=query,
|
|
1178
|
+
project_path=project_path,
|
|
1179
|
+
n_results=n_results,
|
|
1180
|
+
)
|
|
1181
|
+
return result
|
|
1182
|
+
|
|
1183
|
+
elif name == "hybrid_search":
|
|
1184
|
+
# Import hybrid_search function from tools
|
|
1185
|
+
from .semantic_search import hybrid_search
|
|
1186
|
+
|
|
1187
|
+
# Extract args with defaults
|
|
1188
|
+
query = args.get("query")
|
|
1189
|
+
if not query:
|
|
1190
|
+
return "Error: 'query' parameter is required for hybrid_search"
|
|
1191
|
+
|
|
1192
|
+
pattern = args.get("pattern")
|
|
1193
|
+
project_path = args.get("project_path", ".")
|
|
1194
|
+
n_results = args.get("n_results", 10)
|
|
1195
|
+
|
|
1196
|
+
result = await hybrid_search(
|
|
1197
|
+
query=query,
|
|
1198
|
+
pattern=pattern,
|
|
1199
|
+
project_path=project_path,
|
|
1200
|
+
n_results=n_results,
|
|
1201
|
+
)
|
|
1202
|
+
return result
|
|
1203
|
+
|
|
1204
|
+
elif name == "read_file":
|
|
1205
|
+
from .read_file import read_file
|
|
1206
|
+
|
|
1207
|
+
path = args["path"]
|
|
1208
|
+
return await read_file(path)
|
|
624
1209
|
|
|
625
1210
|
elif name == "list_directory":
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
for entry in path.iterdir():
|
|
631
|
-
entry_type = "DIR" if entry.is_dir() else "FILE"
|
|
632
|
-
entries.append(f"[{entry_type}] {entry.name}")
|
|
633
|
-
return "\n".join(entries) if entries else "(empty directory)"
|
|
1211
|
+
from .list_directory import list_directory
|
|
1212
|
+
|
|
1213
|
+
path = args["path"]
|
|
1214
|
+
return await list_directory(path)
|
|
634
1215
|
|
|
635
1216
|
elif name == "grep_search":
|
|
636
1217
|
pattern = args["pattern"]
|
|
637
1218
|
search_path = args["path"]
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
text=True,
|
|
642
|
-
timeout=30,
|
|
1219
|
+
|
|
1220
|
+
result_obj = await async_execute(
|
|
1221
|
+
["rg", "--json", "-m", "50", pattern, search_path], timeout=30
|
|
643
1222
|
)
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
1223
|
+
|
|
1224
|
+
if result_obj.returncode == 0:
|
|
1225
|
+
return result_obj.stdout[:10000] # Limit output size
|
|
1226
|
+
elif result_obj.returncode == 1:
|
|
647
1227
|
return "No matches found"
|
|
648
1228
|
else:
|
|
649
|
-
return f"Search error: {
|
|
1229
|
+
return f"Search error: {result_obj.stderr}"
|
|
650
1230
|
|
|
651
1231
|
elif name == "write_file":
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
path
|
|
655
|
-
|
|
1232
|
+
from .write_file import write_file
|
|
1233
|
+
|
|
1234
|
+
path = args["path"]
|
|
1235
|
+
content = args["content"]
|
|
1236
|
+
return await write_file(path, content)
|
|
656
1237
|
|
|
657
1238
|
else:
|
|
658
1239
|
return f"Unknown tool: {name}"
|
|
@@ -661,32 +1242,244 @@ def _execute_tool(name: str, args: dict) -> str:
|
|
|
661
1242
|
return f"Tool error: {str(e)}"
|
|
662
1243
|
|
|
663
1244
|
|
|
664
|
-
async def
|
|
665
|
-
|
|
1245
|
+
async def _invoke_gemini_agentic_with_api_key(
|
|
1246
|
+
api_key: str,
|
|
666
1247
|
prompt: str,
|
|
667
1248
|
model: str = "gemini-3-flash",
|
|
668
1249
|
max_turns: int = 10,
|
|
669
1250
|
timeout: int = 120,
|
|
670
1251
|
) -> str:
|
|
671
1252
|
"""
|
|
672
|
-
Invoke Gemini with function calling
|
|
1253
|
+
Invoke Gemini with function calling using API key authentication (google-genai library).
|
|
673
1254
|
|
|
674
|
-
This
|
|
1255
|
+
This implements a multi-turn agentic loop:
|
|
675
1256
|
1. Send prompt with tool definitions
|
|
676
1257
|
2. If model returns FunctionCall, execute the tool
|
|
677
1258
|
3. Send FunctionResponse back to model
|
|
678
1259
|
4. Repeat until model returns text or max_turns reached
|
|
679
1260
|
|
|
680
1261
|
Args:
|
|
681
|
-
|
|
1262
|
+
api_key: Gemini API key (from GEMINI_API_KEY or GOOGLE_API_KEY env var)
|
|
682
1263
|
prompt: The task prompt
|
|
683
1264
|
model: Gemini model to use
|
|
684
1265
|
max_turns: Maximum number of tool-use turns
|
|
685
|
-
timeout: Request timeout in seconds
|
|
1266
|
+
timeout: Request timeout in seconds (currently unused by google-genai)
|
|
686
1267
|
|
|
687
1268
|
Returns:
|
|
688
1269
|
Final text response from the model
|
|
1270
|
+
|
|
1271
|
+
Raises:
|
|
1272
|
+
ImportError: If google-genai library is not installed
|
|
1273
|
+
ValueError: If API request fails
|
|
689
1274
|
"""
|
|
1275
|
+
# USER-VISIBLE NOTIFICATION (stderr) - Shows agentic mode with API key
|
|
1276
|
+
import sys
|
|
1277
|
+
|
|
1278
|
+
print(f"🔮 GEMINI (API/Agentic): {model} | max_turns={max_turns}", file=sys.stderr)
|
|
1279
|
+
|
|
1280
|
+
try:
|
|
1281
|
+
from google import genai
|
|
1282
|
+
from google.genai import types
|
|
1283
|
+
except ImportError:
|
|
1284
|
+
raise ImportError(
|
|
1285
|
+
"google-genai library not installed. Install with: pip install google-genai"
|
|
1286
|
+
)
|
|
1287
|
+
|
|
1288
|
+
# Map stravinsky model names to google-genai model names
|
|
1289
|
+
# Pass through gemini-3-* models directly (Tier 3 benefits)
|
|
1290
|
+
model_map = {
|
|
1291
|
+
"gemini-3-flash": "gemini-3-flash-preview", # Tier 3 model (not -exp)
|
|
1292
|
+
"gemini-3-flash-preview": "gemini-3-flash-preview", # Pass through
|
|
1293
|
+
"gemini-3-pro-low": "gemini-3-flash-preview",
|
|
1294
|
+
"gemini-3-pro-high": "gemini-3-pro-preview", # Tier 3 pro model
|
|
1295
|
+
"gemini-3-pro-preview": "gemini-3-pro-preview", # Pass through
|
|
1296
|
+
"gemini-flash": "gemini-3-flash-preview",
|
|
1297
|
+
"gemini-pro": "gemini-3-pro-preview",
|
|
1298
|
+
"gemini-3-pro": "gemini-3-pro-preview",
|
|
1299
|
+
"gemini": "gemini-3-flash-preview",
|
|
1300
|
+
}
|
|
1301
|
+
genai_model = model_map.get(model, "gemini-3-flash-preview") # Default to tier 3 flash
|
|
1302
|
+
|
|
1303
|
+
# Initialize client with API key
|
|
1304
|
+
client = genai.Client(api_key=api_key)
|
|
1305
|
+
|
|
1306
|
+
# Convert AGENT_TOOLS to google-genai format
|
|
1307
|
+
# google-genai expects tools as a list of Tool objects containing function_declarations
|
|
1308
|
+
function_declarations = []
|
|
1309
|
+
for tool_group in AGENT_TOOLS:
|
|
1310
|
+
for func_decl in tool_group.get("functionDeclarations", []):
|
|
1311
|
+
function_declarations.append(
|
|
1312
|
+
types.FunctionDeclaration(
|
|
1313
|
+
name=func_decl["name"],
|
|
1314
|
+
description=func_decl["description"],
|
|
1315
|
+
parameters=func_decl["parameters"],
|
|
1316
|
+
)
|
|
1317
|
+
)
|
|
1318
|
+
|
|
1319
|
+
# Wrap function declarations in a Tool object
|
|
1320
|
+
tools = [types.Tool(function_declarations=function_declarations)]
|
|
1321
|
+
|
|
1322
|
+
# Initialize conversation with user message
|
|
1323
|
+
contents = [types.Content(role="user", parts=[types.Part(text=prompt)])]
|
|
1324
|
+
|
|
1325
|
+
for turn in range(max_turns):
|
|
1326
|
+
try:
|
|
1327
|
+
# Generate content with tools
|
|
1328
|
+
response = client.models.generate_content(
|
|
1329
|
+
model=genai_model,
|
|
1330
|
+
contents=contents,
|
|
1331
|
+
config=types.GenerateContentConfig(
|
|
1332
|
+
tools=tools,
|
|
1333
|
+
temperature=0.7,
|
|
1334
|
+
max_output_tokens=8192,
|
|
1335
|
+
),
|
|
1336
|
+
)
|
|
1337
|
+
|
|
1338
|
+
# Check if response has function calls
|
|
1339
|
+
if not response.candidates or not response.candidates[0].content.parts:
|
|
1340
|
+
return "No response generated"
|
|
1341
|
+
|
|
1342
|
+
parts = response.candidates[0].content.parts
|
|
1343
|
+
function_calls = []
|
|
1344
|
+
text_parts = []
|
|
1345
|
+
|
|
1346
|
+
for part in parts:
|
|
1347
|
+
if part.function_call:
|
|
1348
|
+
function_calls.append(part.function_call)
|
|
1349
|
+
elif part.text:
|
|
1350
|
+
text_parts.append(part.text)
|
|
1351
|
+
|
|
1352
|
+
# If no function calls, return text response
|
|
1353
|
+
if not function_calls:
|
|
1354
|
+
result = "".join(text_parts)
|
|
1355
|
+
return result if result.strip() else "Task completed"
|
|
1356
|
+
|
|
1357
|
+
# Execute function calls and prepare responses
|
|
1358
|
+
function_responses = []
|
|
1359
|
+
for func_call in function_calls:
|
|
1360
|
+
func_name = func_call.name
|
|
1361
|
+
func_args = dict(func_call.args) if func_call.args else {}
|
|
1362
|
+
|
|
1363
|
+
logger.info(f"[AgenticGemini] Turn {turn + 1}: Executing {func_name}")
|
|
1364
|
+
result = await _execute_tool(func_name, func_args)
|
|
1365
|
+
|
|
1366
|
+
function_responses.append(
|
|
1367
|
+
types.Part(
|
|
1368
|
+
function_response=types.FunctionResponse(
|
|
1369
|
+
name=func_name,
|
|
1370
|
+
response={"result": result},
|
|
1371
|
+
)
|
|
1372
|
+
)
|
|
1373
|
+
)
|
|
1374
|
+
|
|
1375
|
+
# Add model's response to conversation
|
|
1376
|
+
contents.append(response.candidates[0].content)
|
|
1377
|
+
|
|
1378
|
+
# Add function responses to conversation
|
|
1379
|
+
contents.append(
|
|
1380
|
+
types.Content(
|
|
1381
|
+
role="user",
|
|
1382
|
+
parts=function_responses,
|
|
1383
|
+
)
|
|
1384
|
+
)
|
|
1385
|
+
|
|
1386
|
+
except Exception as e:
|
|
1387
|
+
logger.error(f"[AgenticGemini] Error in turn {turn + 1}: {e}")
|
|
1388
|
+
raise ValueError(f"Gemini API key request failed: {e}")
|
|
1389
|
+
|
|
1390
|
+
return "Max turns reached without final response"
|
|
1391
|
+
|
|
1392
|
+
|
|
1393
|
+
async def invoke_gemini_agentic(
|
|
1394
|
+
token_store: TokenStore,
|
|
1395
|
+
prompt: str,
|
|
1396
|
+
model: str = "gemini-3-flash",
|
|
1397
|
+
max_turns: int = 10,
|
|
1398
|
+
timeout: int = 120,
|
|
1399
|
+
) -> str:
|
|
1400
|
+
"""
|
|
1401
|
+
Invoke Gemini with function calling for agentic tasks.
|
|
1402
|
+
"""
|
|
1403
|
+
from mcp_bridge.proxy.client import is_proxy_enabled, PROXY_URL
|
|
1404
|
+
|
|
1405
|
+
if is_proxy_enabled():
|
|
1406
|
+
import httpx
|
|
1407
|
+
|
|
1408
|
+
async with httpx.AsyncClient(timeout=float(timeout) + 10) as client:
|
|
1409
|
+
payload = {"prompt": prompt, "model": model, "max_turns": max_turns, "timeout": timeout}
|
|
1410
|
+
response = await client.post(f"{PROXY_URL}/v1/gemini/agentic", json=payload)
|
|
1411
|
+
response.raise_for_status()
|
|
1412
|
+
return response.json()["response"]
|
|
1413
|
+
|
|
1414
|
+
import sys
|
|
1415
|
+
|
|
1416
|
+
# Get API key from environment (loaded from ~/.stravinsky/.env)
|
|
1417
|
+
api_key = _get_gemini_api_key()
|
|
1418
|
+
|
|
1419
|
+
# ==============================================
|
|
1420
|
+
# AUTH PRIORITY: OAuth first, API fallback on 429
|
|
1421
|
+
# ==============================================
|
|
1422
|
+
# 1. If API-only mode (after 429), use API key directly
|
|
1423
|
+
# 2. Otherwise, try OAuth first
|
|
1424
|
+
# 3. On 429 from OAuth, switch to API-only mode and retry
|
|
1425
|
+
|
|
1426
|
+
# If we're in API-only mode (after a 429), use API key directly
|
|
1427
|
+
if _is_api_only_mode():
|
|
1428
|
+
if not api_key:
|
|
1429
|
+
raise ValueError(
|
|
1430
|
+
"OAuth rate-limited (429) and no API key available. "
|
|
1431
|
+
"Add GEMINI_API_KEY to ~/.stravinsky/.env"
|
|
1432
|
+
)
|
|
1433
|
+
|
|
1434
|
+
# Calculate remaining cooldown time
|
|
1435
|
+
if _GEMINI_OAUTH_429_TIMESTAMP is not None:
|
|
1436
|
+
remaining = _OAUTH_COOLDOWN_SECONDS - (time.time() - _GEMINI_OAUTH_429_TIMESTAMP)
|
|
1437
|
+
remaining_mins = int(remaining // 60)
|
|
1438
|
+
remaining_secs = int(remaining % 60)
|
|
1439
|
+
cooldown_msg = f" (OAuth retry in {remaining_mins}m {remaining_secs}s)"
|
|
1440
|
+
else:
|
|
1441
|
+
cooldown_msg = ""
|
|
1442
|
+
|
|
1443
|
+
# Check time-window rate limit (30 req/min)
|
|
1444
|
+
time_limiter = get_gemini_time_limiter()
|
|
1445
|
+
wait_time = time_limiter.acquire_visible("GEMINI", "API key")
|
|
1446
|
+
if wait_time > 0:
|
|
1447
|
+
await asyncio.sleep(wait_time)
|
|
1448
|
+
# Re-acquire after sleep
|
|
1449
|
+
wait_time = time_limiter.acquire_visible("GEMINI", "API key")
|
|
1450
|
+
|
|
1451
|
+
print(
|
|
1452
|
+
f"🔑 GEMINI (API-only cooldown{cooldown_msg}/Agentic): {model} | max_turns={max_turns}",
|
|
1453
|
+
file=sys.stderr,
|
|
1454
|
+
)
|
|
1455
|
+
logger.info("[AgenticGemini] Using API key (5-min cooldown after OAuth 429)")
|
|
1456
|
+
result = await _invoke_gemini_agentic_with_api_key(
|
|
1457
|
+
api_key=api_key,
|
|
1458
|
+
prompt=prompt,
|
|
1459
|
+
model=model,
|
|
1460
|
+
max_turns=max_turns,
|
|
1461
|
+
timeout=timeout,
|
|
1462
|
+
)
|
|
1463
|
+
# Prepend auth header for visibility in logs
|
|
1464
|
+
auth_header = f"[Auth: API key (5-min cooldown, Agentic) | Model: {model}]\n\n"
|
|
1465
|
+
return auth_header + result
|
|
1466
|
+
|
|
1467
|
+
# DEFAULT: Try OAuth first (Antigravity)
|
|
1468
|
+
logger.info("[AgenticGemini] Using OAuth authentication (Antigravity)")
|
|
1469
|
+
|
|
1470
|
+
# Check time-window rate limit (30 req/min)
|
|
1471
|
+
time_limiter = get_gemini_time_limiter()
|
|
1472
|
+
wait_time = time_limiter.acquire_visible("GEMINI", "OAuth")
|
|
1473
|
+
if wait_time > 0:
|
|
1474
|
+
await asyncio.sleep(wait_time)
|
|
1475
|
+
# Re-acquire after sleep
|
|
1476
|
+
wait_time = time_limiter.acquire_visible("GEMINI", "OAuth")
|
|
1477
|
+
|
|
1478
|
+
# USER-VISIBLE NOTIFICATION (stderr) - Shows agentic mode with OAuth
|
|
1479
|
+
import sys
|
|
1480
|
+
|
|
1481
|
+
print(f"🔮 GEMINI (OAuth/Agentic): {model} | max_turns={max_turns}", file=sys.stderr)
|
|
1482
|
+
|
|
690
1483
|
access_token = await _ensure_valid_token(token_store, "gemini")
|
|
691
1484
|
api_model = resolve_gemini_model(model)
|
|
692
1485
|
|
|
@@ -773,6 +1566,33 @@ async def invoke_gemini_agentic(
|
|
|
773
1566
|
logger.warning(f"[AgenticGemini] Endpoint {endpoint} failed: {e}, trying next")
|
|
774
1567
|
continue
|
|
775
1568
|
|
|
1569
|
+
# ==============================================
|
|
1570
|
+
# 429 RATE LIMIT DETECTION: Fallback to API key
|
|
1571
|
+
# ==============================================
|
|
1572
|
+
# If OAuth got rate-limited (429), switch to API-only mode and retry
|
|
1573
|
+
if response is not None and response.status_code == 429:
|
|
1574
|
+
api_key = _get_gemini_api_key()
|
|
1575
|
+
if api_key:
|
|
1576
|
+
_set_api_only_mode("OAuth rate-limited (429) in agentic mode")
|
|
1577
|
+
logger.info("[AgenticGemini] Retrying with API key after OAuth 429")
|
|
1578
|
+
# Retry entire agentic call with API key
|
|
1579
|
+
result = await _invoke_gemini_agentic_with_api_key(
|
|
1580
|
+
api_key=api_key,
|
|
1581
|
+
prompt=prompt,
|
|
1582
|
+
model=model,
|
|
1583
|
+
max_turns=max_turns,
|
|
1584
|
+
timeout=timeout,
|
|
1585
|
+
)
|
|
1586
|
+
# Prepend auth header for visibility
|
|
1587
|
+
auth_header = f"[Auth: API key (OAuth 429 fallback, Agentic) | Model: {model}]\n\n"
|
|
1588
|
+
return auth_header + result
|
|
1589
|
+
else:
|
|
1590
|
+
# No API key available - raise clear error
|
|
1591
|
+
raise ValueError(
|
|
1592
|
+
"OAuth rate-limited (429) and no API key available. "
|
|
1593
|
+
"Add GEMINI_API_KEY to ~/.stravinsky/.env"
|
|
1594
|
+
)
|
|
1595
|
+
|
|
776
1596
|
if response is None:
|
|
777
1597
|
raise ValueError(f"All Antigravity endpoints failed: {last_error}")
|
|
778
1598
|
|
|
@@ -783,13 +1603,15 @@ async def invoke_gemini_agentic(
|
|
|
783
1603
|
inner_response = data.get("response", data)
|
|
784
1604
|
candidates = inner_response.get("candidates", [])
|
|
785
1605
|
if not candidates:
|
|
786
|
-
|
|
1606
|
+
auth_header = f"[Auth: OAuth (Agentic) | Model: {model}]\n\n"
|
|
1607
|
+
return auth_header + "No response generated"
|
|
787
1608
|
|
|
788
1609
|
content = candidates[0].get("content", {})
|
|
789
1610
|
parts = content.get("parts", [])
|
|
790
1611
|
|
|
791
1612
|
if not parts:
|
|
792
|
-
|
|
1613
|
+
auth_header = f"[Auth: OAuth (Agentic) | Model: {model}]\n\n"
|
|
1614
|
+
return auth_header + "No response parts"
|
|
793
1615
|
|
|
794
1616
|
# Check for function call
|
|
795
1617
|
function_call = None
|
|
@@ -808,7 +1630,7 @@ async def invoke_gemini_agentic(
|
|
|
808
1630
|
func_args = function_call.get("args", {})
|
|
809
1631
|
|
|
810
1632
|
logger.info(f"[AgenticGemini] Turn {turn + 1}: Executing {func_name}")
|
|
811
|
-
result = _execute_tool(func_name, func_args)
|
|
1633
|
+
result = await _execute_tool(func_name, func_args)
|
|
812
1634
|
|
|
813
1635
|
# Add model's response and function result to conversation
|
|
814
1636
|
contents.append({"role": "model", "parts": [{"functionCall": function_call}]})
|
|
@@ -822,17 +1644,20 @@ async def invoke_gemini_agentic(
|
|
|
822
1644
|
)
|
|
823
1645
|
else:
|
|
824
1646
|
# No function call, return text response
|
|
825
|
-
|
|
1647
|
+
result = text_response or "Task completed"
|
|
1648
|
+
auth_header = f"[Auth: OAuth (Agentic) | Model: {model}]\n\n"
|
|
1649
|
+
return auth_header + result
|
|
826
1650
|
|
|
827
|
-
|
|
1651
|
+
auth_header = f"[Auth: OAuth (Agentic) | Model: {model}]\n\n"
|
|
1652
|
+
return auth_header + "Max turns reached without final response"
|
|
828
1653
|
|
|
829
1654
|
|
|
830
1655
|
@retry(
|
|
831
|
-
stop=stop_after_attempt(
|
|
832
|
-
wait=wait_exponential(multiplier=
|
|
1656
|
+
stop=stop_after_attempt(2), # Reduced from 5 to 2 attempts
|
|
1657
|
+
wait=wait_exponential(multiplier=2, min=10, max=120), # Longer waits: 10s → 20s → 40s
|
|
833
1658
|
retry=retry_if_exception(is_retryable_exception),
|
|
834
1659
|
before_sleep=lambda retry_state: logger.info(
|
|
835
|
-
f"
|
|
1660
|
+
f"Server error, retrying in {retry_state.next_action.sleep} seconds..."
|
|
836
1661
|
),
|
|
837
1662
|
)
|
|
838
1663
|
async def invoke_openai(
|
|
@@ -842,24 +1667,23 @@ async def invoke_openai(
|
|
|
842
1667
|
temperature: float = 0.7,
|
|
843
1668
|
max_tokens: int = 4096,
|
|
844
1669
|
thinking_budget: int = 0,
|
|
1670
|
+
reasoning_effort: str = "medium",
|
|
845
1671
|
) -> str:
|
|
846
1672
|
"""
|
|
847
1673
|
Invoke an OpenAI model with the given prompt.
|
|
848
|
-
|
|
849
|
-
Args:
|
|
850
|
-
token_store: Token store for API key
|
|
851
|
-
prompt: The prompt to send to OpenAI
|
|
852
|
-
model: OpenAI model to use
|
|
853
|
-
temperature: Sampling temperature (0.0-2.0)
|
|
854
|
-
max_tokens: Maximum tokens in response
|
|
855
|
-
|
|
856
|
-
Returns:
|
|
857
|
-
The model's response text.
|
|
858
|
-
|
|
859
|
-
Raises:
|
|
860
|
-
ValueError: If not authenticated with OpenAI
|
|
861
|
-
httpx.HTTPStatusError: If API request fails
|
|
862
1674
|
"""
|
|
1675
|
+
from mcp_bridge.proxy.client import is_proxy_enabled, proxy_invoke_openai
|
|
1676
|
+
|
|
1677
|
+
if is_proxy_enabled():
|
|
1678
|
+
return await proxy_invoke_openai(
|
|
1679
|
+
prompt=prompt,
|
|
1680
|
+
model=model,
|
|
1681
|
+
temperature=temperature,
|
|
1682
|
+
max_tokens=max_tokens,
|
|
1683
|
+
thinking_budget=thinking_budget,
|
|
1684
|
+
reasoning_effort=reasoning_effort,
|
|
1685
|
+
)
|
|
1686
|
+
|
|
863
1687
|
# Execute pre-model invoke hooks
|
|
864
1688
|
params = {
|
|
865
1689
|
"prompt": prompt,
|
|
@@ -867,6 +1691,7 @@ async def invoke_openai(
|
|
|
867
1691
|
"temperature": temperature,
|
|
868
1692
|
"max_tokens": max_tokens,
|
|
869
1693
|
"thinking_budget": thinking_budget,
|
|
1694
|
+
"reasoning_effort": reasoning_effort,
|
|
870
1695
|
"token_store": token_store, # Pass for hooks that need model access
|
|
871
1696
|
"provider": "openai", # Identify which provider is being called
|
|
872
1697
|
}
|
|
@@ -879,6 +1704,7 @@ async def invoke_openai(
|
|
|
879
1704
|
temperature = params["temperature"]
|
|
880
1705
|
max_tokens = params["max_tokens"]
|
|
881
1706
|
thinking_budget = params["thinking_budget"]
|
|
1707
|
+
reasoning_effort = params.get("reasoning_effort", "medium")
|
|
882
1708
|
|
|
883
1709
|
# Extract agent context for logging (may be passed via params or original call)
|
|
884
1710
|
agent_context = params.get("agent_context", {})
|
|
@@ -892,19 +1718,55 @@ async def invoke_openai(
|
|
|
892
1718
|
|
|
893
1719
|
# USER-VISIBLE NOTIFICATION (stderr) - Shows when OpenAI is invoked
|
|
894
1720
|
import sys
|
|
1721
|
+
|
|
895
1722
|
task_info = f" task={task_id}" if task_id else ""
|
|
896
1723
|
desc_info = f" | {description}" if description else ""
|
|
1724
|
+
|
|
1725
|
+
# ==============================================
|
|
1726
|
+
# AUTH PRIORITY: OAuth first, Gemini fallback on 429
|
|
1727
|
+
# ==============================================
|
|
1728
|
+
# 1. If fallback mode (after 429), use Gemini directly
|
|
1729
|
+
# 2. Otherwise, try OpenAI OAuth first
|
|
1730
|
+
# 3. On 429 from OAuth, switch to fallback mode and retry with Gemini
|
|
1731
|
+
|
|
1732
|
+
provider_tracker = get_provider_tracker()
|
|
1733
|
+
|
|
1734
|
+
# If OpenAI is in cooldown, follow tier-aware fallback chain.
|
|
1735
|
+
if not provider_tracker.is_available("openai"):
|
|
1736
|
+
for candidate_provider, candidate_model, use_oauth in get_oauth_fallback_chain("openai", model):
|
|
1737
|
+
if candidate_provider == "openai":
|
|
1738
|
+
continue
|
|
1739
|
+
if use_oauth and not provider_tracker.is_available(candidate_provider):
|
|
1740
|
+
continue
|
|
1741
|
+
|
|
1742
|
+
if candidate_provider == "gemini":
|
|
1743
|
+
if not use_oauth:
|
|
1744
|
+
# Force Gemini API-key mode for the cooldown window.
|
|
1745
|
+
if _get_gemini_api_key() is None:
|
|
1746
|
+
continue
|
|
1747
|
+
_set_api_only_mode("OpenAI in cooldown; using Gemini API key")
|
|
1748
|
+
|
|
1749
|
+
return await invoke_gemini(
|
|
1750
|
+
token_store=token_store,
|
|
1751
|
+
prompt=prompt,
|
|
1752
|
+
model=candidate_model,
|
|
1753
|
+
temperature=temperature,
|
|
1754
|
+
max_tokens=max_tokens,
|
|
1755
|
+
thinking_budget=0,
|
|
1756
|
+
image_path=None,
|
|
1757
|
+
)
|
|
1758
|
+
# DEFAULT: Try OpenAI OAuth first
|
|
897
1759
|
print(f"🧠 OPENAI: {model} | agent={agent_type}{task_info}{desc_info}", file=sys.stderr)
|
|
898
1760
|
|
|
899
1761
|
access_token = await _ensure_valid_token(token_store, "openai")
|
|
900
|
-
logger.info(
|
|
1762
|
+
logger.info("[invoke_openai] Got access token")
|
|
901
1763
|
|
|
902
1764
|
# ChatGPT Backend API - Uses Codex Responses endpoint
|
|
903
1765
|
# Replicates opencode-openai-codex-auth plugin behavior
|
|
904
1766
|
api_url = "https://chatgpt.com/backend-api/codex/responses"
|
|
905
1767
|
|
|
906
1768
|
# Extract account ID from JWT token
|
|
907
|
-
logger.info(
|
|
1769
|
+
logger.info("[invoke_openai] Extracting account ID from JWT")
|
|
908
1770
|
try:
|
|
909
1771
|
parts = access_token.split(".")
|
|
910
1772
|
payload_b64 = parts[1]
|
|
@@ -932,6 +1794,10 @@ async def invoke_openai(
|
|
|
932
1794
|
if account_id:
|
|
933
1795
|
headers["x-openai-account-id"] = account_id
|
|
934
1796
|
|
|
1797
|
+
# Determine final effort
|
|
1798
|
+
# Legacy: thinking_budget > 0 implies high effort
|
|
1799
|
+
effort = "high" if thinking_budget > 0 else reasoning_effort
|
|
1800
|
+
|
|
935
1801
|
# Request body matching opencode transformation
|
|
936
1802
|
payload = {
|
|
937
1803
|
"model": model,
|
|
@@ -939,7 +1805,7 @@ async def invoke_openai(
|
|
|
939
1805
|
"stream": True, # Always stream (handler converts to non-stream if needed)
|
|
940
1806
|
"instructions": instructions,
|
|
941
1807
|
"input": [{"role": "user", "content": prompt}],
|
|
942
|
-
"reasoning": {"effort":
|
|
1808
|
+
"reasoning": {"effort": effort, "summary": "auto"},
|
|
943
1809
|
"text": {"verbosity": "medium"},
|
|
944
1810
|
"include": ["reasoning.encrypted_content"],
|
|
945
1811
|
}
|
|
@@ -952,44 +1818,100 @@ async def invoke_openai(
|
|
|
952
1818
|
logger.info(f"[invoke_openai] Instructions length: {len(instructions)}")
|
|
953
1819
|
|
|
954
1820
|
try:
|
|
955
|
-
async with
|
|
956
|
-
|
|
1821
|
+
async with (
|
|
1822
|
+
httpx.AsyncClient() as client,
|
|
1823
|
+
client.stream(
|
|
957
1824
|
"POST", api_url, headers=headers, json=payload, timeout=120.0
|
|
958
|
-
) as response
|
|
959
|
-
|
|
960
|
-
|
|
961
|
-
|
|
962
|
-
|
|
963
|
-
|
|
1825
|
+
) as response,
|
|
1826
|
+
):
|
|
1827
|
+
logger.info(f"[invoke_openai] Response status: {response.status_code}")
|
|
1828
|
+
if response.status_code == 401:
|
|
1829
|
+
raise ValueError("OpenAI authentication failed. Run: stravinsky-auth login openai")
|
|
1830
|
+
|
|
1831
|
+
# ==============================================
|
|
1832
|
+
# 429 RATE LIMIT DETECTION: Tier-aware fallback chain
|
|
1833
|
+
# ==============================================
|
|
1834
|
+
if response.status_code == 429:
|
|
1835
|
+
provider_tracker = get_provider_tracker()
|
|
1836
|
+
provider_tracker.mark_rate_limited(
|
|
1837
|
+
"openai",
|
|
1838
|
+
duration=_OAUTH_COOLDOWN_SECONDS,
|
|
1839
|
+
reason="OpenAI OAuth rate-limited (429)",
|
|
1840
|
+
)
|
|
1841
|
+
|
|
1842
|
+
for candidate_provider, candidate_model, use_oauth in get_oauth_fallback_chain(
|
|
1843
|
+
"openai", model
|
|
1844
|
+
):
|
|
1845
|
+
if candidate_provider == "openai":
|
|
1846
|
+
continue
|
|
1847
|
+
if use_oauth and not provider_tracker.is_available(candidate_provider):
|
|
1848
|
+
continue
|
|
1849
|
+
|
|
1850
|
+
if candidate_provider == "gemini":
|
|
1851
|
+
if not use_oauth:
|
|
1852
|
+
if _get_gemini_api_key() is None:
|
|
1853
|
+
continue
|
|
1854
|
+
_set_api_only_mode("OpenAI OAuth rate-limited (429)")
|
|
1855
|
+
|
|
1856
|
+
return await invoke_gemini(
|
|
1857
|
+
token_store=token_store,
|
|
1858
|
+
prompt=prompt,
|
|
1859
|
+
model=candidate_model,
|
|
1860
|
+
temperature=temperature,
|
|
1861
|
+
max_tokens=max_tokens,
|
|
1862
|
+
thinking_budget=0,
|
|
1863
|
+
image_path=None,
|
|
1864
|
+
)
|
|
964
1865
|
|
|
965
|
-
|
|
966
|
-
|
|
967
|
-
|
|
968
|
-
|
|
969
|
-
|
|
970
|
-
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
|
|
974
|
-
|
|
975
|
-
|
|
976
|
-
|
|
977
|
-
|
|
978
|
-
|
|
979
|
-
|
|
980
|
-
|
|
981
|
-
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
|
|
987
|
-
|
|
988
|
-
|
|
989
|
-
|
|
1866
|
+
raise ValueError("OpenAI OAuth rate-limited (429) and no fallback succeeded")
|
|
1867
|
+
if response.status_code >= 400:
|
|
1868
|
+
error_body = await response.aread()
|
|
1869
|
+
error_text = error_body.decode("utf-8")
|
|
1870
|
+
logger.error(f"OpenAI API error {response.status_code}: {error_text}")
|
|
1871
|
+
logger.error(f"Request payload was: {payload}")
|
|
1872
|
+
logger.error(f"Request headers were: {headers}")
|
|
1873
|
+
raise ValueError(f"OpenAI API error {response.status_code}: {error_text}")
|
|
1874
|
+
|
|
1875
|
+
# Parse SSE stream for text deltas
|
|
1876
|
+
async for line in response.aiter_lines():
|
|
1877
|
+
if line.startswith("data: "):
|
|
1878
|
+
data_json = line[6:] # Remove "data: " prefix
|
|
1879
|
+
try:
|
|
1880
|
+
data = json_module.loads(data_json)
|
|
1881
|
+
event_type = data.get("type")
|
|
1882
|
+
|
|
1883
|
+
# Extract text deltas from SSE stream
|
|
1884
|
+
if event_type == "response.output_text.delta":
|
|
1885
|
+
delta = data.get("delta", "")
|
|
1886
|
+
text_chunks.append(delta)
|
|
1887
|
+
|
|
1888
|
+
except json_module.JSONDecodeError:
|
|
1889
|
+
pass # Skip malformed JSON
|
|
1890
|
+
except Exception as e:
|
|
1891
|
+
logger.warning(f"Error processing SSE event: {e}")
|
|
990
1892
|
|
|
991
1893
|
# Return collected text
|
|
992
1894
|
result = "".join(text_chunks)
|
|
1895
|
+
|
|
1896
|
+
# Track estimated usage
|
|
1897
|
+
try:
|
|
1898
|
+
from mcp_bridge.metrics.cost_tracker import get_cost_tracker
|
|
1899
|
+
|
|
1900
|
+
tracker = get_cost_tracker()
|
|
1901
|
+
# Estimate: 4 chars per token
|
|
1902
|
+
input_tokens = len(prompt) // 4
|
|
1903
|
+
output_tokens = len(result) // 4
|
|
1904
|
+
|
|
1905
|
+
tracker.track_usage(
|
|
1906
|
+
model=model,
|
|
1907
|
+
input_tokens=input_tokens,
|
|
1908
|
+
output_tokens=output_tokens,
|
|
1909
|
+
agent_type=agent_type,
|
|
1910
|
+
task_id=task_id,
|
|
1911
|
+
)
|
|
1912
|
+
except Exception as e:
|
|
1913
|
+
logger.warning(f"Failed to track cost: {e}")
|
|
1914
|
+
|
|
993
1915
|
if not result:
|
|
994
1916
|
return "No response generated"
|
|
995
1917
|
return result
|