stravinsky 0.2.67__py3-none-any.whl → 0.4.66__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of stravinsky might be problematic. Click here for more details.

Files changed (190) hide show
  1. mcp_bridge/__init__.py +1 -1
  2. mcp_bridge/auth/__init__.py +16 -6
  3. mcp_bridge/auth/cli.py +202 -11
  4. mcp_bridge/auth/oauth.py +1 -2
  5. mcp_bridge/auth/openai_oauth.py +4 -7
  6. mcp_bridge/auth/token_store.py +112 -11
  7. mcp_bridge/cli/__init__.py +1 -1
  8. mcp_bridge/cli/install_hooks.py +503 -107
  9. mcp_bridge/cli/session_report.py +0 -3
  10. mcp_bridge/config/MANIFEST_SCHEMA.md +305 -0
  11. mcp_bridge/config/README.md +276 -0
  12. mcp_bridge/config/__init__.py +2 -2
  13. mcp_bridge/config/hook_config.py +247 -0
  14. mcp_bridge/config/hooks_manifest.json +138 -0
  15. mcp_bridge/config/rate_limits.py +317 -0
  16. mcp_bridge/config/skills_manifest.json +128 -0
  17. mcp_bridge/hooks/HOOKS_SETTINGS.json +17 -4
  18. mcp_bridge/hooks/__init__.py +19 -4
  19. mcp_bridge/hooks/agent_reminder.py +4 -4
  20. mcp_bridge/hooks/auto_slash_command.py +5 -5
  21. mcp_bridge/hooks/budget_optimizer.py +2 -2
  22. mcp_bridge/hooks/claude_limits_hook.py +114 -0
  23. mcp_bridge/hooks/comment_checker.py +3 -4
  24. mcp_bridge/hooks/compaction.py +2 -2
  25. mcp_bridge/hooks/context.py +2 -1
  26. mcp_bridge/hooks/context_monitor.py +2 -2
  27. mcp_bridge/hooks/delegation_policy.py +85 -0
  28. mcp_bridge/hooks/directory_context.py +3 -3
  29. mcp_bridge/hooks/edit_recovery.py +3 -2
  30. mcp_bridge/hooks/edit_recovery_policy.py +49 -0
  31. mcp_bridge/hooks/empty_message_sanitizer.py +2 -2
  32. mcp_bridge/hooks/events.py +160 -0
  33. mcp_bridge/hooks/git_noninteractive.py +4 -4
  34. mcp_bridge/hooks/keyword_detector.py +8 -10
  35. mcp_bridge/hooks/manager.py +43 -22
  36. mcp_bridge/hooks/notification_hook.py +13 -6
  37. mcp_bridge/hooks/parallel_enforcement_policy.py +67 -0
  38. mcp_bridge/hooks/parallel_enforcer.py +5 -5
  39. mcp_bridge/hooks/parallel_execution.py +22 -10
  40. mcp_bridge/hooks/post_tool/parallel_validation.py +103 -0
  41. mcp_bridge/hooks/pre_compact.py +8 -9
  42. mcp_bridge/hooks/pre_tool/agent_spawn_validator.py +115 -0
  43. mcp_bridge/hooks/preemptive_compaction.py +2 -3
  44. mcp_bridge/hooks/routing_notifications.py +80 -0
  45. mcp_bridge/hooks/rules_injector.py +11 -19
  46. mcp_bridge/hooks/session_idle.py +4 -4
  47. mcp_bridge/hooks/session_notifier.py +4 -4
  48. mcp_bridge/hooks/session_recovery.py +4 -5
  49. mcp_bridge/hooks/stravinsky_mode.py +1 -1
  50. mcp_bridge/hooks/subagent_stop.py +1 -3
  51. mcp_bridge/hooks/task_validator.py +2 -2
  52. mcp_bridge/hooks/tmux_manager.py +7 -8
  53. mcp_bridge/hooks/todo_delegation.py +4 -1
  54. mcp_bridge/hooks/todo_enforcer.py +180 -10
  55. mcp_bridge/hooks/tool_messaging.py +113 -10
  56. mcp_bridge/hooks/truncation_policy.py +37 -0
  57. mcp_bridge/hooks/truncator.py +1 -2
  58. mcp_bridge/metrics/cost_tracker.py +115 -0
  59. mcp_bridge/native_search.py +93 -0
  60. mcp_bridge/native_watcher.py +118 -0
  61. mcp_bridge/notifications.py +150 -0
  62. mcp_bridge/orchestrator/enums.py +11 -0
  63. mcp_bridge/orchestrator/router.py +165 -0
  64. mcp_bridge/orchestrator/state.py +32 -0
  65. mcp_bridge/orchestrator/visualization.py +14 -0
  66. mcp_bridge/orchestrator/wisdom.py +34 -0
  67. mcp_bridge/prompts/__init__.py +1 -8
  68. mcp_bridge/prompts/dewey.py +1 -1
  69. mcp_bridge/prompts/planner.py +2 -4
  70. mcp_bridge/prompts/stravinsky.py +53 -31
  71. mcp_bridge/proxy/__init__.py +0 -0
  72. mcp_bridge/proxy/client.py +70 -0
  73. mcp_bridge/proxy/model_server.py +157 -0
  74. mcp_bridge/routing/__init__.py +43 -0
  75. mcp_bridge/routing/config.py +250 -0
  76. mcp_bridge/routing/model_tiers.py +135 -0
  77. mcp_bridge/routing/provider_state.py +261 -0
  78. mcp_bridge/routing/task_classifier.py +190 -0
  79. mcp_bridge/server.py +542 -59
  80. mcp_bridge/server_tools.py +738 -6
  81. mcp_bridge/tools/__init__.py +40 -25
  82. mcp_bridge/tools/agent_manager.py +616 -697
  83. mcp_bridge/tools/background_tasks.py +13 -17
  84. mcp_bridge/tools/code_search.py +70 -53
  85. mcp_bridge/tools/continuous_loop.py +0 -1
  86. mcp_bridge/tools/dashboard.py +19 -0
  87. mcp_bridge/tools/find_code.py +296 -0
  88. mcp_bridge/tools/init.py +1 -0
  89. mcp_bridge/tools/list_directory.py +42 -0
  90. mcp_bridge/tools/lsp/__init__.py +12 -5
  91. mcp_bridge/tools/lsp/manager.py +471 -0
  92. mcp_bridge/tools/lsp/tools.py +723 -207
  93. mcp_bridge/tools/model_invoke.py +1195 -273
  94. mcp_bridge/tools/mux_client.py +75 -0
  95. mcp_bridge/tools/project_context.py +1 -2
  96. mcp_bridge/tools/query_classifier.py +406 -0
  97. mcp_bridge/tools/read_file.py +84 -0
  98. mcp_bridge/tools/replace.py +45 -0
  99. mcp_bridge/tools/run_shell_command.py +38 -0
  100. mcp_bridge/tools/search_enhancements.py +347 -0
  101. mcp_bridge/tools/semantic_search.py +3627 -0
  102. mcp_bridge/tools/session_manager.py +0 -2
  103. mcp_bridge/tools/skill_loader.py +0 -1
  104. mcp_bridge/tools/task_runner.py +5 -7
  105. mcp_bridge/tools/templates.py +3 -3
  106. mcp_bridge/tools/tool_search.py +331 -0
  107. mcp_bridge/tools/write_file.py +29 -0
  108. mcp_bridge/update_manager.py +585 -0
  109. mcp_bridge/update_manager_pypi.py +297 -0
  110. mcp_bridge/utils/cache.py +82 -0
  111. mcp_bridge/utils/process.py +71 -0
  112. mcp_bridge/utils/session_state.py +51 -0
  113. mcp_bridge/utils/truncation.py +76 -0
  114. stravinsky-0.4.66.dist-info/METADATA +517 -0
  115. stravinsky-0.4.66.dist-info/RECORD +198 -0
  116. {stravinsky-0.2.67.dist-info → stravinsky-0.4.66.dist-info}/entry_points.txt +1 -0
  117. stravinsky_claude_assets/HOOKS_INTEGRATION.md +316 -0
  118. stravinsky_claude_assets/agents/HOOKS.md +437 -0
  119. stravinsky_claude_assets/agents/code-reviewer.md +210 -0
  120. stravinsky_claude_assets/agents/comment_checker.md +580 -0
  121. stravinsky_claude_assets/agents/debugger.md +254 -0
  122. stravinsky_claude_assets/agents/delphi.md +495 -0
  123. stravinsky_claude_assets/agents/dewey.md +248 -0
  124. stravinsky_claude_assets/agents/explore.md +1198 -0
  125. stravinsky_claude_assets/agents/frontend.md +472 -0
  126. stravinsky_claude_assets/agents/implementation-lead.md +164 -0
  127. stravinsky_claude_assets/agents/momus.md +464 -0
  128. stravinsky_claude_assets/agents/research-lead.md +141 -0
  129. stravinsky_claude_assets/agents/stravinsky.md +730 -0
  130. stravinsky_claude_assets/commands/delphi.md +9 -0
  131. stravinsky_claude_assets/commands/dewey.md +54 -0
  132. stravinsky_claude_assets/commands/git-master.md +112 -0
  133. stravinsky_claude_assets/commands/index.md +49 -0
  134. stravinsky_claude_assets/commands/publish.md +86 -0
  135. stravinsky_claude_assets/commands/review.md +73 -0
  136. stravinsky_claude_assets/commands/str/agent_cancel.md +70 -0
  137. stravinsky_claude_assets/commands/str/agent_list.md +56 -0
  138. stravinsky_claude_assets/commands/str/agent_output.md +92 -0
  139. stravinsky_claude_assets/commands/str/agent_progress.md +74 -0
  140. stravinsky_claude_assets/commands/str/agent_retry.md +94 -0
  141. stravinsky_claude_assets/commands/str/cancel.md +51 -0
  142. stravinsky_claude_assets/commands/str/clean.md +97 -0
  143. stravinsky_claude_assets/commands/str/continue.md +38 -0
  144. stravinsky_claude_assets/commands/str/index.md +199 -0
  145. stravinsky_claude_assets/commands/str/list_watchers.md +96 -0
  146. stravinsky_claude_assets/commands/str/search.md +205 -0
  147. stravinsky_claude_assets/commands/str/start_filewatch.md +136 -0
  148. stravinsky_claude_assets/commands/str/stats.md +71 -0
  149. stravinsky_claude_assets/commands/str/stop_filewatch.md +89 -0
  150. stravinsky_claude_assets/commands/str/unwatch.md +42 -0
  151. stravinsky_claude_assets/commands/str/watch.md +45 -0
  152. stravinsky_claude_assets/commands/strav.md +53 -0
  153. stravinsky_claude_assets/commands/stravinsky.md +292 -0
  154. stravinsky_claude_assets/commands/verify.md +60 -0
  155. stravinsky_claude_assets/commands/version.md +5 -0
  156. stravinsky_claude_assets/hooks/README.md +248 -0
  157. stravinsky_claude_assets/hooks/comment_checker.py +193 -0
  158. stravinsky_claude_assets/hooks/context.py +38 -0
  159. stravinsky_claude_assets/hooks/context_monitor.py +153 -0
  160. stravinsky_claude_assets/hooks/dependency_tracker.py +73 -0
  161. stravinsky_claude_assets/hooks/edit_recovery.py +46 -0
  162. stravinsky_claude_assets/hooks/execution_state_tracker.py +68 -0
  163. stravinsky_claude_assets/hooks/notification_hook.py +103 -0
  164. stravinsky_claude_assets/hooks/notification_hook_v2.py +96 -0
  165. stravinsky_claude_assets/hooks/parallel_execution.py +241 -0
  166. stravinsky_claude_assets/hooks/parallel_reinforcement.py +106 -0
  167. stravinsky_claude_assets/hooks/parallel_reinforcement_v2.py +112 -0
  168. stravinsky_claude_assets/hooks/pre_compact.py +123 -0
  169. stravinsky_claude_assets/hooks/ralph_loop.py +173 -0
  170. stravinsky_claude_assets/hooks/session_recovery.py +263 -0
  171. stravinsky_claude_assets/hooks/stop_hook.py +89 -0
  172. stravinsky_claude_assets/hooks/stravinsky_metrics.py +164 -0
  173. stravinsky_claude_assets/hooks/stravinsky_mode.py +146 -0
  174. stravinsky_claude_assets/hooks/subagent_stop.py +98 -0
  175. stravinsky_claude_assets/hooks/todo_continuation.py +111 -0
  176. stravinsky_claude_assets/hooks/todo_delegation.py +96 -0
  177. stravinsky_claude_assets/hooks/tool_messaging.py +281 -0
  178. stravinsky_claude_assets/hooks/truncator.py +23 -0
  179. stravinsky_claude_assets/rules/deployment_safety.md +51 -0
  180. stravinsky_claude_assets/rules/integration_wiring.md +89 -0
  181. stravinsky_claude_assets/rules/pypi_deployment.md +220 -0
  182. stravinsky_claude_assets/rules/stravinsky_orchestrator.md +32 -0
  183. stravinsky_claude_assets/settings.json +152 -0
  184. stravinsky_claude_assets/skills/chrome-devtools/SKILL.md +81 -0
  185. stravinsky_claude_assets/skills/sqlite/SKILL.md +77 -0
  186. stravinsky_claude_assets/skills/supabase/SKILL.md +74 -0
  187. stravinsky_claude_assets/task_dependencies.json +34 -0
  188. stravinsky-0.2.67.dist-info/METADATA +0 -284
  189. stravinsky-0.2.67.dist-info/RECORD +0 -76
  190. {stravinsky-0.2.67.dist-info → stravinsky-0.4.66.dist-info}/WHEEL +0 -0
@@ -5,12 +5,17 @@ These tools use OAuth tokens from the token store to authenticate
5
5
  API requests to external model providers.
6
6
  """
7
7
 
8
+ import asyncio
9
+ import base64
10
+ import json as json_module
8
11
  import logging
9
12
  import os
10
13
  import time
11
14
  import uuid
12
- import base64
13
- import json as json_module
15
+
16
+ from mcp_bridge.config.rate_limits import get_rate_limiter, get_gemini_time_limiter
17
+ from mcp_bridge.routing.model_tiers import get_oauth_fallback_chain
18
+ from mcp_bridge.routing.provider_state import get_provider_tracker
14
19
 
15
20
  logger = logging.getLogger(__name__)
16
21
 
@@ -42,6 +47,130 @@ def _summarize_prompt(prompt: str, max_length: int = 120) -> str:
42
47
  _CODEX_INSTRUCTIONS_CACHE = {}
43
48
  _CODEX_INSTRUCTIONS_RELEASE_TAG = "rust-v0.77.0" # Update as needed
44
49
 
50
+ # ==============================================
51
+ # GEMINI AUTH MODE STATE (OAuth-first with 429 fallback)
52
+ # ==============================================
53
+ # When OAuth gets a 429 rate limit, we switch to API-only mode for 5 minutes.
54
+ # After 5 minutes, we automatically retry OAuth.
55
+ _GEMINI_OAUTH_429_TIMESTAMP: float | None = None # Timestamp of last 429
56
+ _OAUTH_COOLDOWN_SECONDS = 300 # 5 minutes
57
+
58
+ # ==============================================
59
+ # OPENAI AUTH MODE STATE (OAuth-first with 429 fallback)
60
+ # ==============================================
61
+ # When OpenAI OAuth gets a 429 rate limit, we fallback to Gemini for 5 minutes.
62
+ # After 5 minutes, we automatically retry OpenAI OAuth.
63
+ _OPENAI_OAUTH_429_TIMESTAMP: float | None = None # Timestamp of last OpenAI 429
64
+
65
+
66
+ def _get_gemini_api_key() -> str | None:
67
+ """Get Gemini API key from environment (loaded from ~/.stravinsky/.env)."""
68
+ return os.getenv("GEMINI_API_KEY") or os.getenv("GOOGLE_API_KEY")
69
+
70
+
71
+ def _set_api_only_mode(reason: str = "429 rate limit"):
72
+ """Switch to API-only mode after OAuth rate limit (5-minute cooldown)."""
73
+ global _GEMINI_OAUTH_429_TIMESTAMP
74
+ _GEMINI_OAUTH_429_TIMESTAMP = time.time()
75
+ logger.warning(f"[Gemini] Switching to API-only mode: {reason}")
76
+ import sys
77
+
78
+ print(
79
+ f"⚠️ GEMINI: OAuth rate-limited (429). "
80
+ f"Using API key for 5 minutes (will retry OAuth at {time.strftime('%H:%M:%S', time.localtime(_GEMINI_OAUTH_429_TIMESTAMP + _OAUTH_COOLDOWN_SECONDS))}).",
81
+ file=sys.stderr,
82
+ )
83
+
84
+
85
+ def _is_api_only_mode() -> bool:
86
+ """
87
+ Check if we're in API-only mode (5-minute cooldown after 429).
88
+
89
+ Returns True if:
90
+ - 429 occurred AND
91
+ - Less than 5 minutes have elapsed
92
+
93
+ Automatically resets to OAuth mode after 5 minutes.
94
+ """
95
+ global _GEMINI_OAUTH_429_TIMESTAMP
96
+
97
+ if _GEMINI_OAUTH_429_TIMESTAMP is None:
98
+ return False
99
+
100
+ elapsed = time.time() - _GEMINI_OAUTH_429_TIMESTAMP
101
+
102
+ if elapsed >= _OAUTH_COOLDOWN_SECONDS:
103
+ # Cooldown expired - reset to OAuth mode
104
+ logger.info(
105
+ f"[Gemini] 5-minute cooldown expired (elapsed: {elapsed:.0f}s). Retrying OAuth."
106
+ )
107
+ _GEMINI_OAUTH_429_TIMESTAMP = None
108
+ return False
109
+
110
+ # Still in cooldown
111
+ remaining = _OAUTH_COOLDOWN_SECONDS - elapsed
112
+ logger.debug(f"[Gemini] API-only mode active ({remaining:.0f}s remaining)")
113
+ return True
114
+
115
+
116
+ def reset_gemini_auth_mode():
117
+ """Reset to OAuth-first mode. Call this to manually reset cooldown."""
118
+ global _GEMINI_OAUTH_429_TIMESTAMP
119
+ _GEMINI_OAUTH_429_TIMESTAMP = None
120
+ logger.info("[Gemini] Reset to OAuth-first mode")
121
+
122
+
123
+ def _set_openai_fallback_mode(reason: str = "429 rate limit"):
124
+ """Switch to Gemini fallback after OpenAI rate limit (5-minute cooldown)."""
125
+ global _OPENAI_OAUTH_429_TIMESTAMP
126
+ _OPENAI_OAUTH_429_TIMESTAMP = time.time()
127
+ logger.warning(f"[OpenAI] Switching to Gemini fallback: {reason}")
128
+ import sys
129
+
130
+ print(
131
+ f"⚠️ OPENAI: OAuth rate-limited (429). "
132
+ f"Using Gemini for 5 minutes (will retry OpenAI at {time.strftime('%H:%M:%S', time.localtime(_OPENAI_OAUTH_429_TIMESTAMP + _OAUTH_COOLDOWN_SECONDS))}).",
133
+ file=sys.stderr,
134
+ )
135
+
136
+
137
+ def _is_openai_fallback_mode() -> bool:
138
+ """
139
+ Check if we're in Gemini fallback mode (5-minute cooldown after OpenAI 429).
140
+
141
+ Returns True if:
142
+ - OpenAI 429 occurred AND
143
+ - Less than 5 minutes have elapsed
144
+
145
+ Automatically resets to OpenAI mode after 5 minutes.
146
+ """
147
+ global _OPENAI_OAUTH_429_TIMESTAMP
148
+
149
+ if _OPENAI_OAUTH_429_TIMESTAMP is None:
150
+ return False
151
+
152
+ elapsed = time.time() - _OPENAI_OAUTH_429_TIMESTAMP
153
+
154
+ if elapsed >= _OAUTH_COOLDOWN_SECONDS:
155
+ # Cooldown expired - reset to OpenAI mode
156
+ logger.info(
157
+ f"[OpenAI] 5-minute cooldown expired (elapsed: {elapsed:.0f}s). Retrying OpenAI OAuth."
158
+ )
159
+ _OPENAI_OAUTH_429_TIMESTAMP = None
160
+ return False
161
+
162
+ # Still in cooldown
163
+ remaining = _OAUTH_COOLDOWN_SECONDS - elapsed
164
+ logger.debug(f"[OpenAI] Gemini fallback mode active ({remaining:.0f}s remaining)")
165
+ return True
166
+
167
+
168
+ def reset_openai_auth_mode():
169
+ """Reset to OpenAI-first mode. Call this to manually reset cooldown."""
170
+ global _OPENAI_OAUTH_429_TIMESTAMP
171
+ _OPENAI_OAUTH_429_TIMESTAMP = None
172
+ logger.info("[OpenAI] Reset to OAuth-first mode")
173
+
45
174
 
46
175
  async def _fetch_codex_instructions(model: str = "gpt-5.2-codex") -> str:
47
176
  """
@@ -107,20 +236,21 @@ def resolve_gemini_model(model: str) -> str:
107
236
  import httpx
108
237
  from tenacity import (
109
238
  retry,
239
+ retry_if_exception,
110
240
  stop_after_attempt,
111
241
  wait_exponential,
112
- retry_if_exception,
113
242
  )
114
243
 
115
- from ..auth.token_store import TokenStore
116
244
  from ..auth.oauth import (
117
- refresh_access_token as gemini_refresh,
118
- ANTIGRAVITY_HEADERS,
119
- ANTIGRAVITY_ENDPOINTS,
120
245
  ANTIGRAVITY_DEFAULT_PROJECT_ID,
121
- ANTIGRAVITY_API_VERSION,
246
+ ANTIGRAVITY_ENDPOINTS,
247
+ ANTIGRAVITY_HEADERS,
248
+ )
249
+ from ..auth.oauth import (
250
+ refresh_access_token as gemini_refresh,
122
251
  )
123
252
  from ..auth.openai_oauth import refresh_access_token as openai_refresh
253
+ from ..auth.token_store import TokenStore
124
254
  from ..hooks.manager import get_hook_manager
125
255
 
126
256
  # ========================
@@ -134,6 +264,53 @@ _SESSION_CACHE: dict[str, str] = {}
134
264
  # Pooled HTTP client for connection reuse
135
265
  _HTTP_CLIENT: httpx.AsyncClient | None = None
136
266
 
267
+ # Per-model semaphores for async rate limiting (uses config from ~/.stravinsky/config.json)
268
+ _GEMINI_SEMAPHORES: dict[str, asyncio.Semaphore] = {}
269
+
270
+
271
+ def _get_gemini_rate_limit(model: str) -> int:
272
+ """
273
+ Get configured rate limit for a Gemini model.
274
+
275
+ Reads from ~/.stravinsky/config.json if available, otherwise uses defaults.
276
+
277
+ Args:
278
+ model: Gemini model name (e.g., "gemini-3-flash", "gemini-3-pro-high")
279
+
280
+ Returns:
281
+ Configured concurrency limit for this model
282
+ """
283
+ rate_limiter = get_rate_limiter()
284
+ # Normalize model name to match config keys
285
+ normalized = rate_limiter._normalize_model(model)
286
+ return rate_limiter._limits.get(normalized, rate_limiter._limits.get("_default", 5))
287
+
288
+
289
+ def _get_gemini_semaphore(model: str) -> asyncio.Semaphore:
290
+ """
291
+ Get or create async semaphore for Gemini model rate limiting.
292
+
293
+ Creates one semaphore per model type with limits from config.
294
+ Limits can be customized in ~/.stravinsky/config.json:
295
+ {
296
+ "rate_limits": {
297
+ "gemini-3-flash": 15,
298
+ "gemini-3-pro-high": 8
299
+ }
300
+ }
301
+
302
+ Args:
303
+ model: Gemini model name
304
+
305
+ Returns:
306
+ asyncio.Semaphore with configured limit for this model
307
+ """
308
+ if model not in _GEMINI_SEMAPHORES:
309
+ limit = _get_gemini_rate_limit(model)
310
+ _GEMINI_SEMAPHORES[model] = asyncio.Semaphore(limit)
311
+ logger.info(f"[RateLimit] Created semaphore for {model} with limit {limit}")
312
+ return _GEMINI_SEMAPHORES[model]
313
+
137
314
 
138
315
  def _get_session_id(conversation_key: str | None = None) -> str:
139
316
  """
@@ -284,51 +461,180 @@ async def _ensure_valid_token(token_store: TokenStore, provider: str) -> str:
284
461
 
285
462
 
286
463
  def is_retryable_exception(e: Exception) -> bool:
287
- """Check if an exception is retryable (429 or 5xx)."""
464
+ """
465
+ Check if an exception is retryable (5xx only, NOT 429).
466
+
467
+ 429 (Rate Limit) errors should fail fast - retrying makes the problem worse
468
+ by adding more requests to an already exhausted quota. The semaphore prevents
469
+ these in the first place, but if one slips through, we shouldn't retry.
470
+ """
288
471
  if isinstance(e, httpx.HTTPStatusError):
289
- return e.response.status_code == 429 or 500 <= e.response.status_code < 600
472
+ # Only retry server errors (5xx), not rate limits (429)
473
+ return 500 <= e.response.status_code < 600
290
474
  return False
291
475
 
292
476
 
293
- @retry(
294
- stop=stop_after_attempt(5),
295
- wait=wait_exponential(multiplier=1, min=4, max=60),
296
- retry=retry_if_exception(is_retryable_exception),
297
- before_sleep=lambda retry_state: logger.info(
298
- f"Rate limited or server error, retrying in {retry_state.next_action.sleep} seconds..."
299
- ),
300
- )
301
- async def invoke_gemini(
302
- token_store: TokenStore,
477
+ async def _invoke_gemini_with_api_key(
478
+ api_key: str,
303
479
  prompt: str,
304
480
  model: str = "gemini-3-flash",
305
481
  temperature: float = 0.7,
306
482
  max_tokens: int = 4096,
307
483
  thinking_budget: int = 0,
308
484
  image_path: str | None = None,
485
+ agent_context: dict | None = None,
309
486
  ) -> str:
310
487
  """
311
- Invoke a Gemini model with the given prompt.
488
+ Invoke Gemini using API key authentication (google-genai library).
312
489
 
313
- Uses OAuth authentication with Antigravity credentials.
314
- Supports vision API for image/PDF analysis when image_path is provided.
490
+ This is an alternative to OAuth authentication that uses the official
491
+ google-genai Python library with a simple API key.
315
492
 
316
493
  Args:
317
- token_store: Token store for OAuth credentials
494
+ api_key: Gemini API key (from GEMINI_API_KEY or GOOGLE_API_KEY env var)
318
495
  prompt: The prompt to send to Gemini
319
- model: Gemini model to use
496
+ model: Gemini model to use (e.g., "gemini-3-flash-preview")
320
497
  temperature: Sampling temperature (0.0-2.0)
321
498
  max_tokens: Maximum tokens in response
322
- thinking_budget: Tokens reserved for internal reasoning
323
- image_path: Optional path to image/PDF for vision analysis (token optimization)
499
+ thinking_budget: Tokens reserved for internal reasoning (if supported)
500
+ image_path: Optional path to image/PDF for vision analysis
324
501
 
325
502
  Returns:
326
503
  The model's response text.
327
504
 
328
505
  Raises:
329
- ValueError: If not authenticated with Gemini
330
- httpx.HTTPStatusError: If API request fails
506
+ ImportError: If google-genai library is not installed
507
+ ValueError: If API request fails
331
508
  """
509
+ try:
510
+ from google import genai
511
+ except ImportError:
512
+ raise ImportError(
513
+ "google-genai library not installed. Install with: pip install google-genai"
514
+ )
515
+
516
+ # Map stravinsky model names to google-genai model names
517
+ # Pass through gemini-3-* models directly (Tier 3 benefits)
518
+ model_map = {
519
+ "gemini-3-flash": "gemini-3-flash-preview", # Tier 3 model (not -exp)
520
+ "gemini-3-flash-preview": "gemini-3-flash-preview", # Pass through
521
+ "gemini-3-pro-low": "gemini-3-flash-preview",
522
+ "gemini-3-pro-high": "gemini-3-pro-preview", # Tier 3 pro model
523
+ "gemini-3-pro-preview": "gemini-3-pro-preview", # Pass through
524
+ "gemini-flash": "gemini-3-flash-preview",
525
+ "gemini-pro": "gemini-3-pro-preview",
526
+ "gemini-3-pro": "gemini-3-pro-preview",
527
+ "gemini": "gemini-3-flash-preview",
528
+ }
529
+ genai_model = model_map.get(model, "gemini-3-flash-preview") # Default to tier 3 flash
530
+
531
+ try:
532
+ # Initialize client with API key
533
+ client = genai.Client(api_key=api_key)
534
+
535
+ # Build generation config
536
+ config = {
537
+ "temperature": temperature,
538
+ "max_output_tokens": max_tokens,
539
+ }
540
+
541
+ # Add thinking budget if supported (experimental feature)
542
+ if thinking_budget > 0:
543
+ config["thinking_config"] = {
544
+ "thinking_budget": thinking_budget,
545
+ }
546
+
547
+ # Build contents - text prompt plus optional image
548
+ contents = [prompt]
549
+
550
+ # Add image data for vision analysis
551
+ if image_path:
552
+ from pathlib import Path
553
+
554
+ image_file = Path(image_path)
555
+ if image_file.exists():
556
+ # google-genai supports direct file path or base64
557
+ # For simplicity, use the file path directly
558
+ contents.append(image_file)
559
+ logger.info(f"[API_KEY] Added vision data: {image_path}")
560
+
561
+ # Generate content
562
+ response = client.models.generate_content(
563
+ model=genai_model,
564
+ contents=contents,
565
+ config=config,
566
+ )
567
+
568
+ # Track usage
569
+ try:
570
+ from mcp_bridge.metrics.cost_tracker import get_cost_tracker
571
+
572
+ tracker = get_cost_tracker()
573
+ if hasattr(response, "usage_metadata"):
574
+ usage = response.usage_metadata
575
+ agent_type = (agent_context or {}).get("agent_type", "unknown")
576
+ task_id = (agent_context or {}).get("task_id", "")
577
+
578
+ tracker.track_usage(
579
+ model=model,
580
+ input_tokens=usage.prompt_token_count,
581
+ output_tokens=usage.candidates_token_count,
582
+ agent_type=agent_type,
583
+ task_id=task_id,
584
+ )
585
+ except Exception:
586
+ pass
587
+
588
+ # Extract text from response
589
+ if hasattr(response, "text"):
590
+ return response.text
591
+ elif hasattr(response, "candidates") and response.candidates:
592
+ # Fallback: extract from candidates
593
+ candidate = response.candidates[0]
594
+ if hasattr(candidate, "content"):
595
+ parts = candidate.content.parts
596
+ text_parts = [part.text for part in parts if hasattr(part, "text")]
597
+ return "".join(text_parts) if text_parts else "No response generated"
598
+
599
+ return "No response generated"
600
+
601
+ except Exception as e:
602
+ logger.error(f"API key authentication failed: {e}")
603
+ raise ValueError(f"Gemini API key request failed: {e}")
604
+
605
+
606
+ @retry(
607
+ stop=stop_after_attempt(2), # Reduced from 5 to 2 attempts
608
+ wait=wait_exponential(multiplier=2, min=10, max=120), # Longer waits: 10s → 20s → 40s
609
+ retry=retry_if_exception(is_retryable_exception),
610
+ before_sleep=lambda retry_state: logger.info(
611
+ f"Server error, retrying in {retry_state.next_action.sleep} seconds..."
612
+ ),
613
+ )
614
+ async def invoke_gemini(
615
+ token_store: TokenStore,
616
+ prompt: str,
617
+ model: str = "gemini-3-flash",
618
+ temperature: float = 0.7,
619
+ max_tokens: int = 4096,
620
+ thinking_budget: int = 0,
621
+ image_path: str | None = None,
622
+ ) -> str:
623
+ """
624
+ Invoke a Gemini model with the given prompt.
625
+ """
626
+ from mcp_bridge.proxy.client import is_proxy_enabled, proxy_invoke_gemini
627
+
628
+ if is_proxy_enabled():
629
+ return await proxy_invoke_gemini(
630
+ prompt=prompt,
631
+ model=model,
632
+ temperature=temperature,
633
+ max_tokens=max_tokens,
634
+ thinking_budget=thinking_budget,
635
+ image_path=image_path,
636
+ )
637
+
332
638
  logger.info(f"[DEBUG] invoke_gemini called, uuid module check: {uuid}")
333
639
  # Execute pre-model invoke hooks
334
640
  params = {
@@ -360,188 +666,380 @@ async def invoke_gemini(
360
666
  # Log with agent context and prompt summary
361
667
  logger.info(f"[{agent_type}] → {model}: {prompt_summary}")
362
668
 
363
- # USER-VISIBLE NOTIFICATION (stderr) - Shows when Gemini is invoked
669
+ # Get API key from environment (loaded from ~/.stravinsky/.env)
670
+ api_key = _get_gemini_api_key()
364
671
  import sys
672
+
365
673
  task_info = f" task={task_id}" if task_id else ""
366
674
  desc_info = f" | {description}" if description else ""
367
- print(f"🔮 GEMINI: {model} | agent={agent_type}{task_info}{desc_info}", file=sys.stderr)
368
675
 
369
- access_token = await _ensure_valid_token(token_store, "gemini")
676
+ # ==============================================
677
+ # AUTH PRIORITY: OAuth first, API fallback on 429
678
+ # ==============================================
679
+ # 1. If API-only mode (after 429), use API key directly
680
+ # 2. Otherwise, try OAuth first
681
+ # 3. On 429 from OAuth, switch to API-only mode and retry
370
682
 
371
- # Resolve user-friendly model name to actual API model ID
372
- api_model = resolve_gemini_model(model)
683
+ # If we're in API-only mode (after a 429), use API key directly
684
+ if _is_api_only_mode():
685
+ if not api_key:
686
+ raise ValueError(
687
+ "OAuth rate-limited (429) and no API key available. "
688
+ "Add GEMINI_API_KEY to ~/.stravinsky/.env"
689
+ )
373
690
 
374
- # Use persistent session ID for thinking signature caching
375
- session_id = _get_session_id()
376
- project_id = os.getenv("STRAVINSKY_ANTIGRAVITY_PROJECT_ID", ANTIGRAVITY_DEFAULT_PROJECT_ID)
691
+ # Calculate remaining cooldown time
692
+ if _GEMINI_OAUTH_429_TIMESTAMP is not None:
693
+ remaining = _OAUTH_COOLDOWN_SECONDS - (time.time() - _GEMINI_OAUTH_429_TIMESTAMP)
694
+ remaining_mins = int(remaining // 60)
695
+ remaining_secs = int(remaining % 60)
696
+ cooldown_msg = f" (OAuth retry in {remaining_mins}m {remaining_secs}s)"
697
+ else:
698
+ cooldown_msg = ""
699
+
700
+ # Check time-window rate limit (30 req/min)
701
+ time_limiter = get_gemini_time_limiter()
702
+ wait_time = time_limiter.acquire_visible("GEMINI", "API key")
703
+ if wait_time > 0:
704
+ await asyncio.sleep(wait_time)
705
+ # Re-acquire after sleep
706
+ wait_time = time_limiter.acquire_visible("GEMINI", "API key")
707
+
708
+ print(
709
+ f"🔑 GEMINI (API-only cooldown{cooldown_msg}): {model} | agent={agent_type}{task_info}{desc_info}",
710
+ file=sys.stderr,
711
+ )
712
+ logger.info(f"[{agent_type}] Using API key (5-min cooldown after OAuth 429)")
713
+ semaphore = _get_gemini_semaphore(model)
714
+ async with semaphore:
715
+ result = await _invoke_gemini_with_api_key(
716
+ api_key=api_key,
717
+ prompt=prompt,
718
+ model=model,
719
+ temperature=temperature,
720
+ max_tokens=max_tokens,
721
+ thinking_budget=thinking_budget,
722
+ image_path=image_path,
723
+ agent_context=agent_context,
724
+ )
725
+ # Prepend auth header for visibility in logs
726
+ auth_header = f"[Auth: API key (5-min cooldown) | Model: {model}]\n\n"
727
+ return auth_header + result
377
728
 
378
- headers = {
379
- "Authorization": f"Bearer {access_token}",
380
- "Content-Type": "application/json",
381
- **ANTIGRAVITY_HEADERS, # Include Antigravity headers
382
- }
729
+ provider_tracker = get_provider_tracker()
383
730
 
384
- # Build inner request payload
385
- # Per API spec: contents must include role ("user" or "model")
386
-
387
- # Build parts list - text prompt plus optional image
388
- parts = [{"text": prompt}]
389
-
390
- # Add image data for vision analysis (token optimization for multimodal)
391
- if image_path:
392
- import base64
393
- from pathlib import Path
394
-
395
- image_file = Path(image_path)
396
- if image_file.exists():
397
- # Determine MIME type
398
- suffix = image_file.suffix.lower()
399
- mime_types = {
400
- ".png": "image/png",
401
- ".jpg": "image/jpeg",
402
- ".jpeg": "image/jpeg",
403
- ".gif": "image/gif",
404
- ".webp": "image/webp",
405
- ".pdf": "application/pdf",
406
- }
407
- mime_type = mime_types.get(suffix, "image/png")
731
+ # If Gemini is in cooldown, follow tier-aware fallback chain.
732
+ if not provider_tracker.is_available("gemini"):
733
+ for candidate_provider, candidate_model, use_oauth in get_oauth_fallback_chain("gemini", model):
734
+ if candidate_provider == "gemini" and use_oauth:
735
+ continue
736
+ if use_oauth and not provider_tracker.is_available(candidate_provider):
737
+ continue
738
+
739
+ if candidate_provider == "gemini" and not use_oauth:
740
+ api_key = _get_gemini_api_key()
741
+ if not api_key:
742
+ continue
743
+ _set_api_only_mode("Gemini in cooldown; using API key")
744
+ result = await _invoke_gemini_with_api_key(
745
+ api_key=api_key,
746
+ prompt=prompt,
747
+ model=candidate_model,
748
+ temperature=temperature,
749
+ max_tokens=max_tokens,
750
+ thinking_budget=thinking_budget,
751
+ image_path=image_path,
752
+ agent_context=agent_context,
753
+ )
754
+ auth_header = f"[Auth: API key (cooldown) | Model: {candidate_model}]\n\n"
755
+ return auth_header + result
756
+
757
+ if candidate_provider == "openai" and use_oauth:
758
+ return await invoke_openai(
759
+ token_store=token_store,
760
+ prompt=prompt,
761
+ model=candidate_model,
762
+ temperature=temperature,
763
+ max_tokens=max_tokens,
764
+ thinking_budget=0,
765
+ reasoning_effort="medium",
766
+ )
408
767
 
409
- # Read and base64 encode
410
- image_data = base64.b64encode(image_file.read_bytes()).decode("utf-8")
768
+ # DEFAULT: Try OAuth first (Antigravity)
769
+
770
+ # Check time-window rate limit (30 req/min)
771
+ time_limiter = get_gemini_time_limiter()
772
+ wait_time = time_limiter.acquire_visible("GEMINI", "OAuth")
773
+ if wait_time > 0:
774
+ await asyncio.sleep(wait_time)
775
+ # Re-acquire after sleep
776
+ wait_time = time_limiter.acquire_visible("GEMINI", "OAuth")
777
+
778
+ print(
779
+ f"🔮 GEMINI (OAuth): {model} | agent={agent_type}{task_info}{desc_info}",
780
+ file=sys.stderr,
781
+ )
782
+ logger.info(f"[{agent_type}] Using OAuth authentication (Antigravity)")
783
+ # Rate limit concurrent Gemini requests (configurable via ~/.stravinsky/config.json)
784
+ semaphore = _get_gemini_semaphore(model)
785
+ async with semaphore:
786
+ access_token = await _ensure_valid_token(token_store, "gemini")
787
+
788
+ # Resolve user-friendly model name to actual API model ID
789
+ api_model = resolve_gemini_model(model)
790
+
791
+ # Use persistent session ID for thinking signature caching
792
+ session_id = _get_session_id()
793
+ project_id = os.getenv("STRAVINSKY_ANTIGRAVITY_PROJECT_ID", ANTIGRAVITY_DEFAULT_PROJECT_ID)
794
+
795
+ headers = {
796
+ "Authorization": f"Bearer {access_token}",
797
+ "Content-Type": "application/json",
798
+ **ANTIGRAVITY_HEADERS, # Include Antigravity headers
799
+ }
411
800
 
412
- # Add inline image data for Gemini Vision API
413
- parts.append({
414
- "inlineData": {
415
- "mimeType": mime_type,
416
- "data": image_data,
801
+ # Build inner request payload
802
+ # Per API spec: contents must include role ("user" or "model")
803
+
804
+ # Build parts list - text prompt plus optional image
805
+ parts = [{"text": prompt}]
806
+
807
+ # Add image data for vision analysis (token optimization for multimodal)
808
+ if image_path:
809
+ import base64
810
+ from pathlib import Path
811
+
812
+ image_file = Path(image_path)
813
+ if image_file.exists():
814
+ # Determine MIME type
815
+ suffix = image_file.suffix.lower()
816
+ mime_types = {
817
+ ".png": "image/png",
818
+ ".jpg": "image/jpeg",
819
+ ".jpeg": "image/jpeg",
820
+ ".gif": "image/gif",
821
+ ".webp": "image/webp",
822
+ ".pdf": "application/pdf",
417
823
  }
418
- })
419
- logger.info(f"[multimodal] Added vision data: {image_path} ({mime_type})")
824
+ mime_type = mime_types.get(suffix, "image/png")
420
825
 
421
- inner_payload = {
422
- "contents": [{"role": "user", "parts": parts}],
423
- "generationConfig": {
424
- "temperature": temperature,
425
- "maxOutputTokens": max_tokens,
426
- },
427
- "sessionId": session_id,
428
- }
826
+ # Read and base64 encode
827
+ image_data = base64.b64encode(image_file.read_bytes()).decode("utf-8")
429
828
 
430
- # Add thinking budget if supported by model/API
431
- if thinking_budget > 0:
432
- # For Gemini 2.0+ Thinking models
433
- # Per Antigravity API: use "thinkingBudget", NOT "tokenLimit"
434
- inner_payload["generationConfig"]["thinkingConfig"] = {
435
- "includeThoughts": True,
436
- "thinkingBudget": thinking_budget,
437
- }
829
+ # Add inline image data for Gemini Vision API
830
+ parts.append(
831
+ {
832
+ "inlineData": {
833
+ "mimeType": mime_type,
834
+ "data": image_data,
835
+ }
836
+ }
837
+ )
838
+ logger.info(f"[multimodal] Added vision data: {image_path} ({mime_type})")
438
839
 
439
- # Wrap request body per reference implementation
440
- try:
441
- import uuid as uuid_module # Local import workaround for MCP context issue
840
+ inner_payload = {
841
+ "contents": [{"role": "user", "parts": parts}],
842
+ "generationConfig": {
843
+ "temperature": temperature,
844
+ "maxOutputTokens": max_tokens,
845
+ },
846
+ "sessionId": session_id,
847
+ }
442
848
 
443
- request_id = f"invoke-{uuid_module.uuid4()}"
444
- except Exception as e:
445
- logger.error(f"UUID IMPORT FAILED: {e}")
446
- raise RuntimeError(f"CUSTOM ERROR: UUID import failed: {e}")
447
-
448
- wrapped_payload = {
449
- "project": project_id,
450
- "model": api_model,
451
- "userAgent": "antigravity",
452
- "requestId": request_id,
453
- "request": inner_payload,
454
- }
849
+ # Add thinking budget if supported by model/API
850
+ if thinking_budget > 0:
851
+ # For Gemini 2.0+ Thinking models
852
+ # Per Antigravity API: use "thinkingBudget", NOT "tokenLimit"
853
+ inner_payload["generationConfig"]["thinkingConfig"] = {
854
+ "includeThoughts": True,
855
+ "thinkingBudget": thinking_budget,
856
+ }
455
857
 
456
- # Get pooled HTTP client for connection reuse
457
- client = await _get_http_client()
858
+ # Wrap request body per reference implementation
859
+ try:
860
+ import uuid as uuid_module # Local import workaround for MCP context issue
458
861
 
459
- # Try endpoints in fallback order with thinking recovery
460
- response = None
461
- last_error = None
462
- max_retries = 2 # For thinking recovery
862
+ request_id = f"invoke-{uuid_module.uuid4()}"
863
+ except Exception as e:
864
+ logger.error(f"UUID IMPORT FAILED: {e}")
865
+ raise RuntimeError(f"CUSTOM ERROR: UUID import failed: {e}")
463
866
 
464
- for retry_attempt in range(max_retries):
465
- for endpoint in ANTIGRAVITY_ENDPOINTS:
466
- # Reference uses: {endpoint}/v1internal:generateContent (NOT /models/{model})
467
- api_url = f"{endpoint}/v1internal:generateContent"
867
+ wrapped_payload = {
868
+ "project": project_id,
869
+ "model": api_model,
870
+ "userAgent": "antigravity",
871
+ "requestId": request_id,
872
+ "request": inner_payload,
873
+ }
468
874
 
469
- try:
470
- response = await client.post(
471
- api_url,
472
- headers=headers,
473
- json=wrapped_payload,
474
- timeout=120.0,
475
- )
875
+ # Get pooled HTTP client for connection reuse
876
+ client = await _get_http_client()
476
877
 
477
- # 401/403 might be endpoint-specific, try next endpoint
478
- if response.status_code in (401, 403):
479
- logger.warning(
480
- f"[Gemini] Endpoint {endpoint} returned {response.status_code}, trying next"
878
+ # Try endpoints in fallback order with thinking recovery
879
+ response = None
880
+ last_error = None
881
+ max_retries = 2 # For thinking recovery
882
+
883
+ for retry_attempt in range(max_retries):
884
+ for endpoint in ANTIGRAVITY_ENDPOINTS:
885
+ # Reference uses: {endpoint}/v1internal:generateContent (NOT /models/{model})
886
+ api_url = f"{endpoint}/v1internal:generateContent"
887
+
888
+ try:
889
+ response = await client.post(
890
+ api_url,
891
+ headers=headers,
892
+ json=wrapped_payload,
893
+ timeout=120.0,
481
894
  )
482
- last_error = Exception(f"{response.status_code} from {endpoint}")
483
- continue
484
895
 
485
- # Check for thinking-related errors that need recovery
486
- if response.status_code in (400, 500):
487
- error_text = response.text.lower()
488
- if "thinking" in error_text or "signature" in error_text:
896
+ # 401/403 might be endpoint-specific, try next endpoint
897
+ if response.status_code in (401, 403):
489
898
  logger.warning(
490
- f"[Gemini] Thinking error detected, clearing session cache and retrying"
899
+ f"[Gemini] Endpoint {endpoint} returned {response.status_code}, trying next"
491
900
  )
492
- clear_session_cache()
493
- # Update session ID for retry
494
- wrapped_payload["request"]["sessionId"] = _get_session_id()
495
- last_error = Exception(f"Thinking error: {response.text[:200]}")
496
- break # Break inner loop to retry with new session
497
-
498
- # If we got a non-retryable response (success or 4xx client error), use it
499
- if response.status_code < 500 and response.status_code != 429:
500
- break
901
+ last_error = Exception(f"{response.status_code} from {endpoint}")
902
+ continue
903
+
904
+ # Check for thinking-related errors that need recovery
905
+ if response.status_code in (400, 500):
906
+ error_text = response.text.lower()
907
+ if "thinking" in error_text or "signature" in error_text:
908
+ logger.warning(
909
+ "[Gemini] Thinking error detected, clearing session cache and retrying"
910
+ )
911
+ clear_session_cache()
912
+ # Update session ID for retry
913
+ wrapped_payload["request"]["sessionId"] = _get_session_id()
914
+ last_error = Exception(f"Thinking error: {response.text[:200]}")
915
+ break # Break inner loop to retry with new session
916
+
917
+ # If we got a non-retryable response (success or 4xx client error), use it
918
+ if response.status_code < 500 and response.status_code != 429:
919
+ break
920
+
921
+ except httpx.TimeoutException as e:
922
+ last_error = e
923
+ continue
924
+ except Exception as e:
925
+ last_error = e
926
+ continue
927
+ else:
928
+ # Inner loop completed without break - no thinking recovery needed
929
+ break
501
930
 
502
- except httpx.TimeoutException as e:
503
- last_error = e
931
+ # If we broke out of inner loop for thinking recovery, continue outer retry loop
932
+ if response and response.status_code in (400, 500):
504
933
  continue
505
- except Exception as e:
506
- last_error = e
507
- continue
508
- else:
509
- # Inner loop completed without break - no thinking recovery needed
510
934
  break
511
935
 
512
- # If we broke out of inner loop for thinking recovery, continue outer retry loop
513
- if response and response.status_code in (400, 500):
514
- continue
515
- break
936
+ # ==============================================
937
+ # 429 RATE LIMIT DETECTION: Tier-aware fallback chain
938
+ # ==============================================
939
+ if response is not None and response.status_code == 429:
940
+ provider_tracker = get_provider_tracker()
941
+ provider_tracker.mark_rate_limited(
942
+ "gemini",
943
+ duration=_OAUTH_COOLDOWN_SECONDS,
944
+ reason="Gemini OAuth rate-limited (429)",
945
+ )
516
946
 
517
- if response is None:
518
- # FALLBACK: Try Claude sonnet-4.5 for agents that support it
519
- agent_context = params.get("agent_context", {})
520
- agent_type = agent_context.get("agent_type", "unknown")
947
+ for candidate_provider, candidate_model, use_oauth in get_oauth_fallback_chain(
948
+ "gemini", model
949
+ ):
950
+ if candidate_provider == "gemini" and use_oauth:
951
+ continue
952
+ if use_oauth and not provider_tracker.is_available(candidate_provider):
953
+ continue
521
954
 
522
- if agent_type in ("dewey", "explore", "document_writer", "multimodal"):
523
- logger.warning(f"[{agent_type}] Gemini failed, falling back to Claude sonnet-4.5")
524
- try:
525
- import subprocess
526
- fallback_result = subprocess.run(
527
- ["claude", "-p", prompt, "--model", "sonnet", "--output-format", "text"],
528
- capture_output=True,
529
- text=True,
530
- timeout=120,
531
- cwd=os.getcwd(),
532
- )
533
- if fallback_result.returncode == 0 and fallback_result.stdout.strip():
534
- return fallback_result.stdout.strip()
535
- except Exception as fallback_error:
536
- logger.error(f"Fallback to Claude also failed: {fallback_error}")
955
+ if candidate_provider == "gemini" and not use_oauth:
956
+ api_key = _get_gemini_api_key()
957
+ if not api_key:
958
+ continue
959
+ _set_api_only_mode("OAuth rate-limited (429)")
960
+ logger.info("[Gemini] Retrying with API key after OAuth 429")
961
+ result = await _invoke_gemini_with_api_key(
962
+ api_key=api_key,
963
+ prompt=prompt,
964
+ model=candidate_model,
965
+ temperature=temperature,
966
+ max_tokens=max_tokens,
967
+ thinking_budget=thinking_budget,
968
+ image_path=image_path,
969
+ agent_context=agent_context,
970
+ )
971
+ auth_header = (
972
+ f"[Auth: API key (OAuth 429 fallback) | Model: {candidate_model}]\n\n"
973
+ )
974
+ return auth_header + result
975
+
976
+ if candidate_provider == "openai" and use_oauth:
977
+ return await invoke_openai(
978
+ token_store=token_store,
979
+ prompt=prompt,
980
+ model=candidate_model,
981
+ temperature=temperature,
982
+ max_tokens=max_tokens,
983
+ thinking_budget=0,
984
+ reasoning_effort="medium",
985
+ )
986
+
987
+ raise ValueError(
988
+ "OAuth rate-limited (429) and no fallback succeeded. "
989
+ "Add GEMINI_API_KEY to ~/.stravinsky/.env"
990
+ )
991
+ if response is None:
992
+ # FALLBACK: Try Claude sonnet-4.5 for agents that support it
993
+ agent_context = params.get("agent_context", {})
994
+ agent_type = agent_context.get("agent_type", "unknown")
995
+
996
+ if agent_type in ("dewey", "explore", "document_writer", "multimodal"):
997
+ logger.warning(f"[{agent_type}] Gemini failed, falling back to Claude sonnet-4.5")
998
+ try:
999
+ from mcp_bridge.utils.process import async_execute
1000
+
1001
+ result_obj = await async_execute(
1002
+ ["claude", "-p", prompt, "--model", "sonnet", "--output-format", "text"],
1003
+ timeout=120,
1004
+ )
1005
+ if result_obj.returncode == 0 and result_obj.stdout.strip():
1006
+ result = result_obj.stdout.strip()
1007
+ # Prepend auth header for visibility
1008
+ auth_header = f"[Auth: Claude fallback | Model: sonnet-4.5]\n\n"
1009
+ return auth_header + result
1010
+ except Exception as fallback_error:
1011
+ logger.error(f"Fallback to Claude also failed: {fallback_error}")
1012
+
1013
+ raise ValueError(f"All Antigravity endpoints failed: {last_error}")
537
1014
 
538
- raise ValueError(f"All Antigravity endpoints failed: {last_error}")
1015
+ response.raise_for_status()
1016
+ data = response.json()
1017
+
1018
+ # Track usage
1019
+ try:
1020
+ from mcp_bridge.metrics.cost_tracker import get_cost_tracker
1021
+
1022
+ tracker = get_cost_tracker()
1023
+ usage = data.get("usageMetadata", {})
1024
+ input_tokens = usage.get("promptTokenCount", 0)
1025
+ output_tokens = usage.get("candidatesTokenCount", 0)
1026
+
1027
+ tracker.track_usage(
1028
+ model=model,
1029
+ input_tokens=input_tokens,
1030
+ output_tokens=output_tokens,
1031
+ agent_type=agent_type,
1032
+ task_id=task_id,
1033
+ )
1034
+ except Exception as e:
1035
+ logger.warning(f"Failed to track cost: {e}")
539
1036
 
540
- response.raise_for_status()
541
- data = response.json()
1037
+ # Extract text from response using thinking-aware parser
1038
+ result = _extract_gemini_response(data)
542
1039
 
543
- # Extract text from response using thinking-aware parser
544
- return _extract_gemini_response(data)
1040
+ # Prepend auth header for visibility in logs
1041
+ auth_header = f"[Auth: OAuth | Model: {model}]\n\n"
1042
+ return auth_header + result
545
1043
 
546
1044
 
547
1045
  # ========================
@@ -552,9 +1050,57 @@ async def invoke_gemini(
552
1050
  AGENT_TOOLS = [
553
1051
  {
554
1052
  "functionDeclarations": [
1053
+ {
1054
+ "name": "semantic_search",
1055
+ "description": "Search codebase with natural language query using semantic embeddings. ALWAYS use this FIRST before grep_search or read_file to find relevant files efficiently. Returns code snippets with file paths and relevance scores.",
1056
+ "parameters": {
1057
+ "type": "object",
1058
+ "properties": {
1059
+ "query": {
1060
+ "type": "string",
1061
+ "description": "Natural language search query (e.g., 'find authentication logic', 'PDF rendering code')",
1062
+ },
1063
+ "project_path": {
1064
+ "type": "string",
1065
+ "description": "Path to the project root (default: '.')",
1066
+ },
1067
+ "n_results": {
1068
+ "type": "integer",
1069
+ "description": "Maximum number of results to return (default: 10)",
1070
+ },
1071
+ },
1072
+ "required": ["query"],
1073
+ },
1074
+ },
1075
+ {
1076
+ "name": "hybrid_search",
1077
+ "description": "Hybrid search combining semantic similarity with structural AST pattern matching. Use when you need precise structural patterns (e.g., specific function signatures) combined with semantic relevance.",
1078
+ "parameters": {
1079
+ "type": "object",
1080
+ "properties": {
1081
+ "query": {
1082
+ "type": "string",
1083
+ "description": "Natural language search query (e.g., 'find authentication logic')",
1084
+ },
1085
+ "pattern": {
1086
+ "type": "string",
1087
+ "description": "Optional ast-grep pattern for structural matching (e.g., 'def $FUNC($$$):', 'async function $NAME($$$)')",
1088
+ },
1089
+ "project_path": {
1090
+ "type": "string",
1091
+ "description": "Path to the project root (default: '.')",
1092
+ },
1093
+ "n_results": {
1094
+ "type": "integer",
1095
+ "description": "Maximum number of results to return (default: 10)",
1096
+ },
1097
+ },
1098
+ "required": ["query"],
1099
+ },
1100
+ },
555
1101
  {
556
1102
  "name": "read_file",
557
- "description": "Read the contents of a file. Returns the file contents as text.",
1103
+ "description": "Read the contents of a file. Returns the file contents as text. USE ONLY AFTER semantic_search identifies the target file.",
558
1104
  "parameters": {
559
1105
  "type": "object",
560
1106
  "properties": {
@@ -579,7 +1125,7 @@ AGENT_TOOLS = [
579
1125
  },
580
1126
  {
581
1127
  "name": "grep_search",
582
- "description": "Search for a pattern in files using ripgrep. Returns matching lines with file paths and line numbers.",
1128
+ "description": "Search for a pattern in files using ripgrep. Returns matching lines with file paths and line numbers. USE ONLY for precise pattern matching AFTER semantic_search narrows down the search scope.",
583
1129
  "parameters": {
584
1130
  "type": "object",
585
1131
  "properties": {
@@ -609,50 +1155,85 @@ AGENT_TOOLS = [
609
1155
  ]
610
1156
 
611
1157
 
612
- def _execute_tool(name: str, args: dict) -> str:
1158
+ async def _execute_tool(name: str, args: dict) -> str:
613
1159
  """Execute a tool and return the result."""
614
- import os
615
- import subprocess
616
1160
  from pathlib import Path
1161
+ from mcp_bridge.utils.process import async_execute
617
1162
 
618
1163
  try:
619
- if name == "read_file":
620
- path = Path(args["path"])
621
- if not path.exists():
622
- return f"Error: File not found: {path}"
623
- return path.read_text()
1164
+ if name == "semantic_search":
1165
+ # Import semantic_search function from tools
1166
+ from .semantic_search import semantic_search
1167
+
1168
+ # Extract args with defaults
1169
+ query = args.get("query")
1170
+ if not query:
1171
+ return "Error: 'query' parameter is required for semantic_search"
1172
+
1173
+ project_path = args.get("project_path", ".")
1174
+ n_results = args.get("n_results", 10)
1175
+
1176
+ result = await semantic_search(
1177
+ query=query,
1178
+ project_path=project_path,
1179
+ n_results=n_results,
1180
+ )
1181
+ return result
1182
+
1183
+ elif name == "hybrid_search":
1184
+ # Import hybrid_search function from tools
1185
+ from .semantic_search import hybrid_search
1186
+
1187
+ # Extract args with defaults
1188
+ query = args.get("query")
1189
+ if not query:
1190
+ return "Error: 'query' parameter is required for hybrid_search"
1191
+
1192
+ pattern = args.get("pattern")
1193
+ project_path = args.get("project_path", ".")
1194
+ n_results = args.get("n_results", 10)
1195
+
1196
+ result = await hybrid_search(
1197
+ query=query,
1198
+ pattern=pattern,
1199
+ project_path=project_path,
1200
+ n_results=n_results,
1201
+ )
1202
+ return result
1203
+
1204
+ elif name == "read_file":
1205
+ from .read_file import read_file
1206
+
1207
+ path = args["path"]
1208
+ return await read_file(path)
624
1209
 
625
1210
  elif name == "list_directory":
626
- path = Path(args["path"])
627
- if not path.exists():
628
- return f"Error: Directory not found: {path}"
629
- entries = []
630
- for entry in path.iterdir():
631
- entry_type = "DIR" if entry.is_dir() else "FILE"
632
- entries.append(f"[{entry_type}] {entry.name}")
633
- return "\n".join(entries) if entries else "(empty directory)"
1211
+ from .list_directory import list_directory
1212
+
1213
+ path = args["path"]
1214
+ return await list_directory(path)
634
1215
 
635
1216
  elif name == "grep_search":
636
1217
  pattern = args["pattern"]
637
1218
  search_path = args["path"]
638
- result = subprocess.run(
639
- ["rg", "--json", "-m", "50", pattern, search_path],
640
- capture_output=True,
641
- text=True,
642
- timeout=30,
1219
+
1220
+ result_obj = await async_execute(
1221
+ ["rg", "--json", "-m", "50", pattern, search_path], timeout=30
643
1222
  )
644
- if result.returncode == 0:
645
- return result.stdout[:10000] # Limit output size
646
- elif result.returncode == 1:
1223
+
1224
+ if result_obj.returncode == 0:
1225
+ return result_obj.stdout[:10000] # Limit output size
1226
+ elif result_obj.returncode == 1:
647
1227
  return "No matches found"
648
1228
  else:
649
- return f"Search error: {result.stderr}"
1229
+ return f"Search error: {result_obj.stderr}"
650
1230
 
651
1231
  elif name == "write_file":
652
- path = Path(args["path"])
653
- path.parent.mkdir(parents=True, exist_ok=True)
654
- path.write_text(args["content"])
655
- return f"Successfully wrote {len(args['content'])} bytes to {path}"
1232
+ from .write_file import write_file
1233
+
1234
+ path = args["path"]
1235
+ content = args["content"]
1236
+ return await write_file(path, content)
656
1237
 
657
1238
  else:
658
1239
  return f"Unknown tool: {name}"
@@ -661,32 +1242,244 @@ def _execute_tool(name: str, args: dict) -> str:
661
1242
  return f"Tool error: {str(e)}"
662
1243
 
663
1244
 
664
- async def invoke_gemini_agentic(
665
- token_store: TokenStore,
1245
+ async def _invoke_gemini_agentic_with_api_key(
1246
+ api_key: str,
666
1247
  prompt: str,
667
1248
  model: str = "gemini-3-flash",
668
1249
  max_turns: int = 10,
669
1250
  timeout: int = 120,
670
1251
  ) -> str:
671
1252
  """
672
- Invoke Gemini with function calling for agentic tasks.
1253
+ Invoke Gemini with function calling using API key authentication (google-genai library).
673
1254
 
674
- This function implements a multi-turn agentic loop:
1255
+ This implements a multi-turn agentic loop:
675
1256
  1. Send prompt with tool definitions
676
1257
  2. If model returns FunctionCall, execute the tool
677
1258
  3. Send FunctionResponse back to model
678
1259
  4. Repeat until model returns text or max_turns reached
679
1260
 
680
1261
  Args:
681
- token_store: Token store for OAuth credentials
1262
+ api_key: Gemini API key (from GEMINI_API_KEY or GOOGLE_API_KEY env var)
682
1263
  prompt: The task prompt
683
1264
  model: Gemini model to use
684
1265
  max_turns: Maximum number of tool-use turns
685
- timeout: Request timeout in seconds
1266
+ timeout: Request timeout in seconds (currently unused by google-genai)
686
1267
 
687
1268
  Returns:
688
1269
  Final text response from the model
1270
+
1271
+ Raises:
1272
+ ImportError: If google-genai library is not installed
1273
+ ValueError: If API request fails
689
1274
  """
1275
+ # USER-VISIBLE NOTIFICATION (stderr) - Shows agentic mode with API key
1276
+ import sys
1277
+
1278
+ print(f"🔮 GEMINI (API/Agentic): {model} | max_turns={max_turns}", file=sys.stderr)
1279
+
1280
+ try:
1281
+ from google import genai
1282
+ from google.genai import types
1283
+ except ImportError:
1284
+ raise ImportError(
1285
+ "google-genai library not installed. Install with: pip install google-genai"
1286
+ )
1287
+
1288
+ # Map stravinsky model names to google-genai model names
1289
+ # Pass through gemini-3-* models directly (Tier 3 benefits)
1290
+ model_map = {
1291
+ "gemini-3-flash": "gemini-3-flash-preview", # Tier 3 model (not -exp)
1292
+ "gemini-3-flash-preview": "gemini-3-flash-preview", # Pass through
1293
+ "gemini-3-pro-low": "gemini-3-flash-preview",
1294
+ "gemini-3-pro-high": "gemini-3-pro-preview", # Tier 3 pro model
1295
+ "gemini-3-pro-preview": "gemini-3-pro-preview", # Pass through
1296
+ "gemini-flash": "gemini-3-flash-preview",
1297
+ "gemini-pro": "gemini-3-pro-preview",
1298
+ "gemini-3-pro": "gemini-3-pro-preview",
1299
+ "gemini": "gemini-3-flash-preview",
1300
+ }
1301
+ genai_model = model_map.get(model, "gemini-3-flash-preview") # Default to tier 3 flash
1302
+
1303
+ # Initialize client with API key
1304
+ client = genai.Client(api_key=api_key)
1305
+
1306
+ # Convert AGENT_TOOLS to google-genai format
1307
+ # google-genai expects tools as a list of Tool objects containing function_declarations
1308
+ function_declarations = []
1309
+ for tool_group in AGENT_TOOLS:
1310
+ for func_decl in tool_group.get("functionDeclarations", []):
1311
+ function_declarations.append(
1312
+ types.FunctionDeclaration(
1313
+ name=func_decl["name"],
1314
+ description=func_decl["description"],
1315
+ parameters=func_decl["parameters"],
1316
+ )
1317
+ )
1318
+
1319
+ # Wrap function declarations in a Tool object
1320
+ tools = [types.Tool(function_declarations=function_declarations)]
1321
+
1322
+ # Initialize conversation with user message
1323
+ contents = [types.Content(role="user", parts=[types.Part(text=prompt)])]
1324
+
1325
+ for turn in range(max_turns):
1326
+ try:
1327
+ # Generate content with tools
1328
+ response = client.models.generate_content(
1329
+ model=genai_model,
1330
+ contents=contents,
1331
+ config=types.GenerateContentConfig(
1332
+ tools=tools,
1333
+ temperature=0.7,
1334
+ max_output_tokens=8192,
1335
+ ),
1336
+ )
1337
+
1338
+ # Check if response has function calls
1339
+ if not response.candidates or not response.candidates[0].content.parts:
1340
+ return "No response generated"
1341
+
1342
+ parts = response.candidates[0].content.parts
1343
+ function_calls = []
1344
+ text_parts = []
1345
+
1346
+ for part in parts:
1347
+ if part.function_call:
1348
+ function_calls.append(part.function_call)
1349
+ elif part.text:
1350
+ text_parts.append(part.text)
1351
+
1352
+ # If no function calls, return text response
1353
+ if not function_calls:
1354
+ result = "".join(text_parts)
1355
+ return result if result.strip() else "Task completed"
1356
+
1357
+ # Execute function calls and prepare responses
1358
+ function_responses = []
1359
+ for func_call in function_calls:
1360
+ func_name = func_call.name
1361
+ func_args = dict(func_call.args) if func_call.args else {}
1362
+
1363
+ logger.info(f"[AgenticGemini] Turn {turn + 1}: Executing {func_name}")
1364
+ result = await _execute_tool(func_name, func_args)
1365
+
1366
+ function_responses.append(
1367
+ types.Part(
1368
+ function_response=types.FunctionResponse(
1369
+ name=func_name,
1370
+ response={"result": result},
1371
+ )
1372
+ )
1373
+ )
1374
+
1375
+ # Add model's response to conversation
1376
+ contents.append(response.candidates[0].content)
1377
+
1378
+ # Add function responses to conversation
1379
+ contents.append(
1380
+ types.Content(
1381
+ role="user",
1382
+ parts=function_responses,
1383
+ )
1384
+ )
1385
+
1386
+ except Exception as e:
1387
+ logger.error(f"[AgenticGemini] Error in turn {turn + 1}: {e}")
1388
+ raise ValueError(f"Gemini API key request failed: {e}")
1389
+
1390
+ return "Max turns reached without final response"
1391
+
1392
+
1393
+ async def invoke_gemini_agentic(
1394
+ token_store: TokenStore,
1395
+ prompt: str,
1396
+ model: str = "gemini-3-flash",
1397
+ max_turns: int = 10,
1398
+ timeout: int = 120,
1399
+ ) -> str:
1400
+ """
1401
+ Invoke Gemini with function calling for agentic tasks.
1402
+ """
1403
+ from mcp_bridge.proxy.client import is_proxy_enabled, PROXY_URL
1404
+
1405
+ if is_proxy_enabled():
1406
+ import httpx
1407
+
1408
+ async with httpx.AsyncClient(timeout=float(timeout) + 10) as client:
1409
+ payload = {"prompt": prompt, "model": model, "max_turns": max_turns, "timeout": timeout}
1410
+ response = await client.post(f"{PROXY_URL}/v1/gemini/agentic", json=payload)
1411
+ response.raise_for_status()
1412
+ return response.json()["response"]
1413
+
1414
+ import sys
1415
+
1416
+ # Get API key from environment (loaded from ~/.stravinsky/.env)
1417
+ api_key = _get_gemini_api_key()
1418
+
1419
+ # ==============================================
1420
+ # AUTH PRIORITY: OAuth first, API fallback on 429
1421
+ # ==============================================
1422
+ # 1. If API-only mode (after 429), use API key directly
1423
+ # 2. Otherwise, try OAuth first
1424
+ # 3. On 429 from OAuth, switch to API-only mode and retry
1425
+
1426
+ # If we're in API-only mode (after a 429), use API key directly
1427
+ if _is_api_only_mode():
1428
+ if not api_key:
1429
+ raise ValueError(
1430
+ "OAuth rate-limited (429) and no API key available. "
1431
+ "Add GEMINI_API_KEY to ~/.stravinsky/.env"
1432
+ )
1433
+
1434
+ # Calculate remaining cooldown time
1435
+ if _GEMINI_OAUTH_429_TIMESTAMP is not None:
1436
+ remaining = _OAUTH_COOLDOWN_SECONDS - (time.time() - _GEMINI_OAUTH_429_TIMESTAMP)
1437
+ remaining_mins = int(remaining // 60)
1438
+ remaining_secs = int(remaining % 60)
1439
+ cooldown_msg = f" (OAuth retry in {remaining_mins}m {remaining_secs}s)"
1440
+ else:
1441
+ cooldown_msg = ""
1442
+
1443
+ # Check time-window rate limit (30 req/min)
1444
+ time_limiter = get_gemini_time_limiter()
1445
+ wait_time = time_limiter.acquire_visible("GEMINI", "API key")
1446
+ if wait_time > 0:
1447
+ await asyncio.sleep(wait_time)
1448
+ # Re-acquire after sleep
1449
+ wait_time = time_limiter.acquire_visible("GEMINI", "API key")
1450
+
1451
+ print(
1452
+ f"🔑 GEMINI (API-only cooldown{cooldown_msg}/Agentic): {model} | max_turns={max_turns}",
1453
+ file=sys.stderr,
1454
+ )
1455
+ logger.info("[AgenticGemini] Using API key (5-min cooldown after OAuth 429)")
1456
+ result = await _invoke_gemini_agentic_with_api_key(
1457
+ api_key=api_key,
1458
+ prompt=prompt,
1459
+ model=model,
1460
+ max_turns=max_turns,
1461
+ timeout=timeout,
1462
+ )
1463
+ # Prepend auth header for visibility in logs
1464
+ auth_header = f"[Auth: API key (5-min cooldown, Agentic) | Model: {model}]\n\n"
1465
+ return auth_header + result
1466
+
1467
+ # DEFAULT: Try OAuth first (Antigravity)
1468
+ logger.info("[AgenticGemini] Using OAuth authentication (Antigravity)")
1469
+
1470
+ # Check time-window rate limit (30 req/min)
1471
+ time_limiter = get_gemini_time_limiter()
1472
+ wait_time = time_limiter.acquire_visible("GEMINI", "OAuth")
1473
+ if wait_time > 0:
1474
+ await asyncio.sleep(wait_time)
1475
+ # Re-acquire after sleep
1476
+ wait_time = time_limiter.acquire_visible("GEMINI", "OAuth")
1477
+
1478
+ # USER-VISIBLE NOTIFICATION (stderr) - Shows agentic mode with OAuth
1479
+ import sys
1480
+
1481
+ print(f"🔮 GEMINI (OAuth/Agentic): {model} | max_turns={max_turns}", file=sys.stderr)
1482
+
690
1483
  access_token = await _ensure_valid_token(token_store, "gemini")
691
1484
  api_model = resolve_gemini_model(model)
692
1485
 
@@ -773,6 +1566,33 @@ async def invoke_gemini_agentic(
773
1566
  logger.warning(f"[AgenticGemini] Endpoint {endpoint} failed: {e}, trying next")
774
1567
  continue
775
1568
 
1569
+ # ==============================================
1570
+ # 429 RATE LIMIT DETECTION: Fallback to API key
1571
+ # ==============================================
1572
+ # If OAuth got rate-limited (429), switch to API-only mode and retry
1573
+ if response is not None and response.status_code == 429:
1574
+ api_key = _get_gemini_api_key()
1575
+ if api_key:
1576
+ _set_api_only_mode("OAuth rate-limited (429) in agentic mode")
1577
+ logger.info("[AgenticGemini] Retrying with API key after OAuth 429")
1578
+ # Retry entire agentic call with API key
1579
+ result = await _invoke_gemini_agentic_with_api_key(
1580
+ api_key=api_key,
1581
+ prompt=prompt,
1582
+ model=model,
1583
+ max_turns=max_turns,
1584
+ timeout=timeout,
1585
+ )
1586
+ # Prepend auth header for visibility
1587
+ auth_header = f"[Auth: API key (OAuth 429 fallback, Agentic) | Model: {model}]\n\n"
1588
+ return auth_header + result
1589
+ else:
1590
+ # No API key available - raise clear error
1591
+ raise ValueError(
1592
+ "OAuth rate-limited (429) and no API key available. "
1593
+ "Add GEMINI_API_KEY to ~/.stravinsky/.env"
1594
+ )
1595
+
776
1596
  if response is None:
777
1597
  raise ValueError(f"All Antigravity endpoints failed: {last_error}")
778
1598
 
@@ -783,13 +1603,15 @@ async def invoke_gemini_agentic(
783
1603
  inner_response = data.get("response", data)
784
1604
  candidates = inner_response.get("candidates", [])
785
1605
  if not candidates:
786
- return "No response generated"
1606
+ auth_header = f"[Auth: OAuth (Agentic) | Model: {model}]\n\n"
1607
+ return auth_header + "No response generated"
787
1608
 
788
1609
  content = candidates[0].get("content", {})
789
1610
  parts = content.get("parts", [])
790
1611
 
791
1612
  if not parts:
792
- return "No response parts"
1613
+ auth_header = f"[Auth: OAuth (Agentic) | Model: {model}]\n\n"
1614
+ return auth_header + "No response parts"
793
1615
 
794
1616
  # Check for function call
795
1617
  function_call = None
@@ -808,7 +1630,7 @@ async def invoke_gemini_agentic(
808
1630
  func_args = function_call.get("args", {})
809
1631
 
810
1632
  logger.info(f"[AgenticGemini] Turn {turn + 1}: Executing {func_name}")
811
- result = _execute_tool(func_name, func_args)
1633
+ result = await _execute_tool(func_name, func_args)
812
1634
 
813
1635
  # Add model's response and function result to conversation
814
1636
  contents.append({"role": "model", "parts": [{"functionCall": function_call}]})
@@ -822,17 +1644,20 @@ async def invoke_gemini_agentic(
822
1644
  )
823
1645
  else:
824
1646
  # No function call, return text response
825
- return text_response or "Task completed"
1647
+ result = text_response or "Task completed"
1648
+ auth_header = f"[Auth: OAuth (Agentic) | Model: {model}]\n\n"
1649
+ return auth_header + result
826
1650
 
827
- return "Max turns reached without final response"
1651
+ auth_header = f"[Auth: OAuth (Agentic) | Model: {model}]\n\n"
1652
+ return auth_header + "Max turns reached without final response"
828
1653
 
829
1654
 
830
1655
  @retry(
831
- stop=stop_after_attempt(5),
832
- wait=wait_exponential(multiplier=1, min=4, max=60),
1656
+ stop=stop_after_attempt(2), # Reduced from 5 to 2 attempts
1657
+ wait=wait_exponential(multiplier=2, min=10, max=120), # Longer waits: 10s → 20s → 40s
833
1658
  retry=retry_if_exception(is_retryable_exception),
834
1659
  before_sleep=lambda retry_state: logger.info(
835
- f"Rate limited or server error, retrying in {retry_state.next_action.sleep} seconds..."
1660
+ f"Server error, retrying in {retry_state.next_action.sleep} seconds..."
836
1661
  ),
837
1662
  )
838
1663
  async def invoke_openai(
@@ -842,24 +1667,23 @@ async def invoke_openai(
842
1667
  temperature: float = 0.7,
843
1668
  max_tokens: int = 4096,
844
1669
  thinking_budget: int = 0,
1670
+ reasoning_effort: str = "medium",
845
1671
  ) -> str:
846
1672
  """
847
1673
  Invoke an OpenAI model with the given prompt.
848
-
849
- Args:
850
- token_store: Token store for API key
851
- prompt: The prompt to send to OpenAI
852
- model: OpenAI model to use
853
- temperature: Sampling temperature (0.0-2.0)
854
- max_tokens: Maximum tokens in response
855
-
856
- Returns:
857
- The model's response text.
858
-
859
- Raises:
860
- ValueError: If not authenticated with OpenAI
861
- httpx.HTTPStatusError: If API request fails
862
1674
  """
1675
+ from mcp_bridge.proxy.client import is_proxy_enabled, proxy_invoke_openai
1676
+
1677
+ if is_proxy_enabled():
1678
+ return await proxy_invoke_openai(
1679
+ prompt=prompt,
1680
+ model=model,
1681
+ temperature=temperature,
1682
+ max_tokens=max_tokens,
1683
+ thinking_budget=thinking_budget,
1684
+ reasoning_effort=reasoning_effort,
1685
+ )
1686
+
863
1687
  # Execute pre-model invoke hooks
864
1688
  params = {
865
1689
  "prompt": prompt,
@@ -867,6 +1691,7 @@ async def invoke_openai(
867
1691
  "temperature": temperature,
868
1692
  "max_tokens": max_tokens,
869
1693
  "thinking_budget": thinking_budget,
1694
+ "reasoning_effort": reasoning_effort,
870
1695
  "token_store": token_store, # Pass for hooks that need model access
871
1696
  "provider": "openai", # Identify which provider is being called
872
1697
  }
@@ -879,6 +1704,7 @@ async def invoke_openai(
879
1704
  temperature = params["temperature"]
880
1705
  max_tokens = params["max_tokens"]
881
1706
  thinking_budget = params["thinking_budget"]
1707
+ reasoning_effort = params.get("reasoning_effort", "medium")
882
1708
 
883
1709
  # Extract agent context for logging (may be passed via params or original call)
884
1710
  agent_context = params.get("agent_context", {})
@@ -892,19 +1718,55 @@ async def invoke_openai(
892
1718
 
893
1719
  # USER-VISIBLE NOTIFICATION (stderr) - Shows when OpenAI is invoked
894
1720
  import sys
1721
+
895
1722
  task_info = f" task={task_id}" if task_id else ""
896
1723
  desc_info = f" | {description}" if description else ""
1724
+
1725
+ # ==============================================
1726
+ # AUTH PRIORITY: OAuth first, Gemini fallback on 429
1727
+ # ==============================================
1728
+ # 1. If fallback mode (after 429), use Gemini directly
1729
+ # 2. Otherwise, try OpenAI OAuth first
1730
+ # 3. On 429 from OAuth, switch to fallback mode and retry with Gemini
1731
+
1732
+ provider_tracker = get_provider_tracker()
1733
+
1734
+ # If OpenAI is in cooldown, follow tier-aware fallback chain.
1735
+ if not provider_tracker.is_available("openai"):
1736
+ for candidate_provider, candidate_model, use_oauth in get_oauth_fallback_chain("openai", model):
1737
+ if candidate_provider == "openai":
1738
+ continue
1739
+ if use_oauth and not provider_tracker.is_available(candidate_provider):
1740
+ continue
1741
+
1742
+ if candidate_provider == "gemini":
1743
+ if not use_oauth:
1744
+ # Force Gemini API-key mode for the cooldown window.
1745
+ if _get_gemini_api_key() is None:
1746
+ continue
1747
+ _set_api_only_mode("OpenAI in cooldown; using Gemini API key")
1748
+
1749
+ return await invoke_gemini(
1750
+ token_store=token_store,
1751
+ prompt=prompt,
1752
+ model=candidate_model,
1753
+ temperature=temperature,
1754
+ max_tokens=max_tokens,
1755
+ thinking_budget=0,
1756
+ image_path=None,
1757
+ )
1758
+ # DEFAULT: Try OpenAI OAuth first
897
1759
  print(f"🧠 OPENAI: {model} | agent={agent_type}{task_info}{desc_info}", file=sys.stderr)
898
1760
 
899
1761
  access_token = await _ensure_valid_token(token_store, "openai")
900
- logger.info(f"[invoke_openai] Got access token")
1762
+ logger.info("[invoke_openai] Got access token")
901
1763
 
902
1764
  # ChatGPT Backend API - Uses Codex Responses endpoint
903
1765
  # Replicates opencode-openai-codex-auth plugin behavior
904
1766
  api_url = "https://chatgpt.com/backend-api/codex/responses"
905
1767
 
906
1768
  # Extract account ID from JWT token
907
- logger.info(f"[invoke_openai] Extracting account ID from JWT")
1769
+ logger.info("[invoke_openai] Extracting account ID from JWT")
908
1770
  try:
909
1771
  parts = access_token.split(".")
910
1772
  payload_b64 = parts[1]
@@ -932,6 +1794,10 @@ async def invoke_openai(
932
1794
  if account_id:
933
1795
  headers["x-openai-account-id"] = account_id
934
1796
 
1797
+ # Determine final effort
1798
+ # Legacy: thinking_budget > 0 implies high effort
1799
+ effort = "high" if thinking_budget > 0 else reasoning_effort
1800
+
935
1801
  # Request body matching opencode transformation
936
1802
  payload = {
937
1803
  "model": model,
@@ -939,7 +1805,7 @@ async def invoke_openai(
939
1805
  "stream": True, # Always stream (handler converts to non-stream if needed)
940
1806
  "instructions": instructions,
941
1807
  "input": [{"role": "user", "content": prompt}],
942
- "reasoning": {"effort": "high" if thinking_budget > 0 else "medium", "summary": "auto"},
1808
+ "reasoning": {"effort": effort, "summary": "auto"},
943
1809
  "text": {"verbosity": "medium"},
944
1810
  "include": ["reasoning.encrypted_content"],
945
1811
  }
@@ -952,44 +1818,100 @@ async def invoke_openai(
952
1818
  logger.info(f"[invoke_openai] Instructions length: {len(instructions)}")
953
1819
 
954
1820
  try:
955
- async with httpx.AsyncClient() as client:
956
- async with client.stream(
1821
+ async with (
1822
+ httpx.AsyncClient() as client,
1823
+ client.stream(
957
1824
  "POST", api_url, headers=headers, json=payload, timeout=120.0
958
- ) as response:
959
- logger.info(f"[invoke_openai] Response status: {response.status_code}")
960
- if response.status_code == 401:
961
- raise ValueError(
962
- "OpenAI authentication failed. Run: stravinsky-auth login openai"
963
- )
1825
+ ) as response,
1826
+ ):
1827
+ logger.info(f"[invoke_openai] Response status: {response.status_code}")
1828
+ if response.status_code == 401:
1829
+ raise ValueError("OpenAI authentication failed. Run: stravinsky-auth login openai")
1830
+
1831
+ # ==============================================
1832
+ # 429 RATE LIMIT DETECTION: Tier-aware fallback chain
1833
+ # ==============================================
1834
+ if response.status_code == 429:
1835
+ provider_tracker = get_provider_tracker()
1836
+ provider_tracker.mark_rate_limited(
1837
+ "openai",
1838
+ duration=_OAUTH_COOLDOWN_SECONDS,
1839
+ reason="OpenAI OAuth rate-limited (429)",
1840
+ )
1841
+
1842
+ for candidate_provider, candidate_model, use_oauth in get_oauth_fallback_chain(
1843
+ "openai", model
1844
+ ):
1845
+ if candidate_provider == "openai":
1846
+ continue
1847
+ if use_oauth and not provider_tracker.is_available(candidate_provider):
1848
+ continue
1849
+
1850
+ if candidate_provider == "gemini":
1851
+ if not use_oauth:
1852
+ if _get_gemini_api_key() is None:
1853
+ continue
1854
+ _set_api_only_mode("OpenAI OAuth rate-limited (429)")
1855
+
1856
+ return await invoke_gemini(
1857
+ token_store=token_store,
1858
+ prompt=prompt,
1859
+ model=candidate_model,
1860
+ temperature=temperature,
1861
+ max_tokens=max_tokens,
1862
+ thinking_budget=0,
1863
+ image_path=None,
1864
+ )
964
1865
 
965
- if response.status_code >= 400:
966
- error_body = await response.aread()
967
- error_text = error_body.decode("utf-8")
968
- logger.error(f"OpenAI API error {response.status_code}: {error_text}")
969
- logger.error(f"Request payload was: {payload}")
970
- logger.error(f"Request headers were: {headers}")
971
- raise ValueError(f"OpenAI API error {response.status_code}: {error_text}")
972
-
973
- # Parse SSE stream for text deltas
974
- async for line in response.aiter_lines():
975
- if line.startswith("data: "):
976
- data_json = line[6:] # Remove "data: " prefix
977
- try:
978
- data = json_module.loads(data_json)
979
- event_type = data.get("type")
980
-
981
- # Extract text deltas from SSE stream
982
- if event_type == "response.output_text.delta":
983
- delta = data.get("delta", "")
984
- text_chunks.append(delta)
985
-
986
- except json_module.JSONDecodeError:
987
- pass # Skip malformed JSON
988
- except Exception as e:
989
- logger.warning(f"Error processing SSE event: {e}")
1866
+ raise ValueError("OpenAI OAuth rate-limited (429) and no fallback succeeded")
1867
+ if response.status_code >= 400:
1868
+ error_body = await response.aread()
1869
+ error_text = error_body.decode("utf-8")
1870
+ logger.error(f"OpenAI API error {response.status_code}: {error_text}")
1871
+ logger.error(f"Request payload was: {payload}")
1872
+ logger.error(f"Request headers were: {headers}")
1873
+ raise ValueError(f"OpenAI API error {response.status_code}: {error_text}")
1874
+
1875
+ # Parse SSE stream for text deltas
1876
+ async for line in response.aiter_lines():
1877
+ if line.startswith("data: "):
1878
+ data_json = line[6:] # Remove "data: " prefix
1879
+ try:
1880
+ data = json_module.loads(data_json)
1881
+ event_type = data.get("type")
1882
+
1883
+ # Extract text deltas from SSE stream
1884
+ if event_type == "response.output_text.delta":
1885
+ delta = data.get("delta", "")
1886
+ text_chunks.append(delta)
1887
+
1888
+ except json_module.JSONDecodeError:
1889
+ pass # Skip malformed JSON
1890
+ except Exception as e:
1891
+ logger.warning(f"Error processing SSE event: {e}")
990
1892
 
991
1893
  # Return collected text
992
1894
  result = "".join(text_chunks)
1895
+
1896
+ # Track estimated usage
1897
+ try:
1898
+ from mcp_bridge.metrics.cost_tracker import get_cost_tracker
1899
+
1900
+ tracker = get_cost_tracker()
1901
+ # Estimate: 4 chars per token
1902
+ input_tokens = len(prompt) // 4
1903
+ output_tokens = len(result) // 4
1904
+
1905
+ tracker.track_usage(
1906
+ model=model,
1907
+ input_tokens=input_tokens,
1908
+ output_tokens=output_tokens,
1909
+ agent_type=agent_type,
1910
+ task_id=task_id,
1911
+ )
1912
+ except Exception as e:
1913
+ logger.warning(f"Failed to track cost: {e}")
1914
+
993
1915
  if not result:
994
1916
  return "No response generated"
995
1917
  return result