stravinsky 0.4.18__py3-none-any.whl → 0.4.66__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of stravinsky might be problematic. Click here for more details.

Files changed (184) hide show
  1. mcp_bridge/__init__.py +1 -1
  2. mcp_bridge/auth/__init__.py +16 -6
  3. mcp_bridge/auth/cli.py +202 -11
  4. mcp_bridge/auth/oauth.py +1 -2
  5. mcp_bridge/auth/openai_oauth.py +4 -7
  6. mcp_bridge/auth/token_store.py +0 -1
  7. mcp_bridge/cli/__init__.py +1 -1
  8. mcp_bridge/cli/install_hooks.py +503 -107
  9. mcp_bridge/cli/session_report.py +0 -3
  10. mcp_bridge/config/__init__.py +2 -2
  11. mcp_bridge/config/hook_config.py +3 -5
  12. mcp_bridge/config/rate_limits.py +108 -13
  13. mcp_bridge/hooks/HOOKS_SETTINGS.json +17 -4
  14. mcp_bridge/hooks/__init__.py +14 -4
  15. mcp_bridge/hooks/agent_reminder.py +4 -4
  16. mcp_bridge/hooks/auto_slash_command.py +5 -5
  17. mcp_bridge/hooks/budget_optimizer.py +2 -2
  18. mcp_bridge/hooks/claude_limits_hook.py +114 -0
  19. mcp_bridge/hooks/comment_checker.py +3 -4
  20. mcp_bridge/hooks/compaction.py +2 -2
  21. mcp_bridge/hooks/context.py +2 -1
  22. mcp_bridge/hooks/context_monitor.py +2 -2
  23. mcp_bridge/hooks/delegation_policy.py +85 -0
  24. mcp_bridge/hooks/directory_context.py +3 -3
  25. mcp_bridge/hooks/edit_recovery.py +3 -2
  26. mcp_bridge/hooks/edit_recovery_policy.py +49 -0
  27. mcp_bridge/hooks/empty_message_sanitizer.py +2 -2
  28. mcp_bridge/hooks/events.py +160 -0
  29. mcp_bridge/hooks/git_noninteractive.py +4 -4
  30. mcp_bridge/hooks/keyword_detector.py +8 -10
  31. mcp_bridge/hooks/manager.py +35 -22
  32. mcp_bridge/hooks/notification_hook.py +13 -6
  33. mcp_bridge/hooks/parallel_enforcement_policy.py +67 -0
  34. mcp_bridge/hooks/parallel_enforcer.py +5 -5
  35. mcp_bridge/hooks/parallel_execution.py +22 -10
  36. mcp_bridge/hooks/post_tool/parallel_validation.py +103 -0
  37. mcp_bridge/hooks/pre_compact.py +8 -9
  38. mcp_bridge/hooks/pre_tool/agent_spawn_validator.py +115 -0
  39. mcp_bridge/hooks/preemptive_compaction.py +2 -3
  40. mcp_bridge/hooks/routing_notifications.py +80 -0
  41. mcp_bridge/hooks/rules_injector.py +11 -19
  42. mcp_bridge/hooks/session_idle.py +4 -4
  43. mcp_bridge/hooks/session_notifier.py +4 -4
  44. mcp_bridge/hooks/session_recovery.py +4 -5
  45. mcp_bridge/hooks/stravinsky_mode.py +1 -1
  46. mcp_bridge/hooks/subagent_stop.py +1 -3
  47. mcp_bridge/hooks/task_validator.py +2 -2
  48. mcp_bridge/hooks/tmux_manager.py +7 -8
  49. mcp_bridge/hooks/todo_delegation.py +4 -1
  50. mcp_bridge/hooks/todo_enforcer.py +180 -10
  51. mcp_bridge/hooks/truncation_policy.py +37 -0
  52. mcp_bridge/hooks/truncator.py +1 -2
  53. mcp_bridge/metrics/cost_tracker.py +115 -0
  54. mcp_bridge/native_search.py +93 -0
  55. mcp_bridge/native_watcher.py +118 -0
  56. mcp_bridge/notifications.py +3 -4
  57. mcp_bridge/orchestrator/enums.py +11 -0
  58. mcp_bridge/orchestrator/router.py +165 -0
  59. mcp_bridge/orchestrator/state.py +32 -0
  60. mcp_bridge/orchestrator/visualization.py +14 -0
  61. mcp_bridge/orchestrator/wisdom.py +34 -0
  62. mcp_bridge/prompts/__init__.py +1 -8
  63. mcp_bridge/prompts/dewey.py +1 -1
  64. mcp_bridge/prompts/planner.py +2 -4
  65. mcp_bridge/prompts/stravinsky.py +53 -31
  66. mcp_bridge/proxy/__init__.py +0 -0
  67. mcp_bridge/proxy/client.py +70 -0
  68. mcp_bridge/proxy/model_server.py +157 -0
  69. mcp_bridge/routing/__init__.py +43 -0
  70. mcp_bridge/routing/config.py +250 -0
  71. mcp_bridge/routing/model_tiers.py +135 -0
  72. mcp_bridge/routing/provider_state.py +261 -0
  73. mcp_bridge/routing/task_classifier.py +190 -0
  74. mcp_bridge/server.py +363 -34
  75. mcp_bridge/server_tools.py +298 -6
  76. mcp_bridge/tools/__init__.py +19 -8
  77. mcp_bridge/tools/agent_manager.py +549 -799
  78. mcp_bridge/tools/background_tasks.py +13 -17
  79. mcp_bridge/tools/code_search.py +54 -51
  80. mcp_bridge/tools/continuous_loop.py +0 -1
  81. mcp_bridge/tools/dashboard.py +19 -0
  82. mcp_bridge/tools/find_code.py +296 -0
  83. mcp_bridge/tools/init.py +1 -0
  84. mcp_bridge/tools/list_directory.py +42 -0
  85. mcp_bridge/tools/lsp/__init__.py +8 -8
  86. mcp_bridge/tools/lsp/manager.py +51 -28
  87. mcp_bridge/tools/lsp/tools.py +98 -65
  88. mcp_bridge/tools/model_invoke.py +1047 -152
  89. mcp_bridge/tools/mux_client.py +75 -0
  90. mcp_bridge/tools/project_context.py +1 -2
  91. mcp_bridge/tools/query_classifier.py +132 -49
  92. mcp_bridge/tools/read_file.py +84 -0
  93. mcp_bridge/tools/replace.py +45 -0
  94. mcp_bridge/tools/run_shell_command.py +38 -0
  95. mcp_bridge/tools/search_enhancements.py +347 -0
  96. mcp_bridge/tools/semantic_search.py +677 -92
  97. mcp_bridge/tools/session_manager.py +0 -2
  98. mcp_bridge/tools/skill_loader.py +0 -1
  99. mcp_bridge/tools/task_runner.py +5 -7
  100. mcp_bridge/tools/templates.py +3 -3
  101. mcp_bridge/tools/tool_search.py +331 -0
  102. mcp_bridge/tools/write_file.py +29 -0
  103. mcp_bridge/update_manager.py +33 -37
  104. mcp_bridge/update_manager_pypi.py +6 -8
  105. mcp_bridge/utils/cache.py +82 -0
  106. mcp_bridge/utils/process.py +71 -0
  107. mcp_bridge/utils/session_state.py +51 -0
  108. mcp_bridge/utils/truncation.py +76 -0
  109. {stravinsky-0.4.18.dist-info → stravinsky-0.4.66.dist-info}/METADATA +84 -35
  110. stravinsky-0.4.66.dist-info/RECORD +198 -0
  111. {stravinsky-0.4.18.dist-info → stravinsky-0.4.66.dist-info}/entry_points.txt +1 -0
  112. stravinsky_claude_assets/HOOKS_INTEGRATION.md +316 -0
  113. stravinsky_claude_assets/agents/HOOKS.md +437 -0
  114. stravinsky_claude_assets/agents/code-reviewer.md +210 -0
  115. stravinsky_claude_assets/agents/comment_checker.md +580 -0
  116. stravinsky_claude_assets/agents/debugger.md +254 -0
  117. stravinsky_claude_assets/agents/delphi.md +495 -0
  118. stravinsky_claude_assets/agents/dewey.md +248 -0
  119. stravinsky_claude_assets/agents/explore.md +1198 -0
  120. stravinsky_claude_assets/agents/frontend.md +472 -0
  121. stravinsky_claude_assets/agents/implementation-lead.md +164 -0
  122. stravinsky_claude_assets/agents/momus.md +464 -0
  123. stravinsky_claude_assets/agents/research-lead.md +141 -0
  124. stravinsky_claude_assets/agents/stravinsky.md +730 -0
  125. stravinsky_claude_assets/commands/delphi.md +9 -0
  126. stravinsky_claude_assets/commands/dewey.md +54 -0
  127. stravinsky_claude_assets/commands/git-master.md +112 -0
  128. stravinsky_claude_assets/commands/index.md +49 -0
  129. stravinsky_claude_assets/commands/publish.md +86 -0
  130. stravinsky_claude_assets/commands/review.md +73 -0
  131. stravinsky_claude_assets/commands/str/agent_cancel.md +70 -0
  132. stravinsky_claude_assets/commands/str/agent_list.md +56 -0
  133. stravinsky_claude_assets/commands/str/agent_output.md +92 -0
  134. stravinsky_claude_assets/commands/str/agent_progress.md +74 -0
  135. stravinsky_claude_assets/commands/str/agent_retry.md +94 -0
  136. stravinsky_claude_assets/commands/str/cancel.md +51 -0
  137. stravinsky_claude_assets/commands/str/clean.md +97 -0
  138. stravinsky_claude_assets/commands/str/continue.md +38 -0
  139. stravinsky_claude_assets/commands/str/index.md +199 -0
  140. stravinsky_claude_assets/commands/str/list_watchers.md +96 -0
  141. stravinsky_claude_assets/commands/str/search.md +205 -0
  142. stravinsky_claude_assets/commands/str/start_filewatch.md +136 -0
  143. stravinsky_claude_assets/commands/str/stats.md +71 -0
  144. stravinsky_claude_assets/commands/str/stop_filewatch.md +89 -0
  145. stravinsky_claude_assets/commands/str/unwatch.md +42 -0
  146. stravinsky_claude_assets/commands/str/watch.md +45 -0
  147. stravinsky_claude_assets/commands/strav.md +53 -0
  148. stravinsky_claude_assets/commands/stravinsky.md +292 -0
  149. stravinsky_claude_assets/commands/verify.md +60 -0
  150. stravinsky_claude_assets/commands/version.md +5 -0
  151. stravinsky_claude_assets/hooks/README.md +248 -0
  152. stravinsky_claude_assets/hooks/comment_checker.py +193 -0
  153. stravinsky_claude_assets/hooks/context.py +38 -0
  154. stravinsky_claude_assets/hooks/context_monitor.py +153 -0
  155. stravinsky_claude_assets/hooks/dependency_tracker.py +73 -0
  156. stravinsky_claude_assets/hooks/edit_recovery.py +46 -0
  157. stravinsky_claude_assets/hooks/execution_state_tracker.py +68 -0
  158. stravinsky_claude_assets/hooks/notification_hook.py +103 -0
  159. stravinsky_claude_assets/hooks/notification_hook_v2.py +96 -0
  160. stravinsky_claude_assets/hooks/parallel_execution.py +241 -0
  161. stravinsky_claude_assets/hooks/parallel_reinforcement.py +106 -0
  162. stravinsky_claude_assets/hooks/parallel_reinforcement_v2.py +112 -0
  163. stravinsky_claude_assets/hooks/pre_compact.py +123 -0
  164. stravinsky_claude_assets/hooks/ralph_loop.py +173 -0
  165. stravinsky_claude_assets/hooks/session_recovery.py +263 -0
  166. stravinsky_claude_assets/hooks/stop_hook.py +89 -0
  167. stravinsky_claude_assets/hooks/stravinsky_metrics.py +164 -0
  168. stravinsky_claude_assets/hooks/stravinsky_mode.py +146 -0
  169. stravinsky_claude_assets/hooks/subagent_stop.py +98 -0
  170. stravinsky_claude_assets/hooks/todo_continuation.py +111 -0
  171. stravinsky_claude_assets/hooks/todo_delegation.py +96 -0
  172. stravinsky_claude_assets/hooks/tool_messaging.py +281 -0
  173. stravinsky_claude_assets/hooks/truncator.py +23 -0
  174. stravinsky_claude_assets/rules/deployment_safety.md +51 -0
  175. stravinsky_claude_assets/rules/integration_wiring.md +89 -0
  176. stravinsky_claude_assets/rules/pypi_deployment.md +220 -0
  177. stravinsky_claude_assets/rules/stravinsky_orchestrator.md +32 -0
  178. stravinsky_claude_assets/settings.json +152 -0
  179. stravinsky_claude_assets/skills/chrome-devtools/SKILL.md +81 -0
  180. stravinsky_claude_assets/skills/sqlite/SKILL.md +77 -0
  181. stravinsky_claude_assets/skills/supabase/SKILL.md +74 -0
  182. stravinsky_claude_assets/task_dependencies.json +34 -0
  183. stravinsky-0.4.18.dist-info/RECORD +0 -88
  184. {stravinsky-0.4.18.dist-info → stravinsky-0.4.66.dist-info}/WHEEL +0 -0
@@ -6,12 +6,16 @@ API requests to external model providers.
6
6
  """
7
7
 
8
8
  import asyncio
9
+ import base64
10
+ import json as json_module
9
11
  import logging
10
12
  import os
11
13
  import time
12
14
  import uuid
13
- import base64
14
- import json as json_module
15
+
16
+ from mcp_bridge.config.rate_limits import get_rate_limiter, get_gemini_time_limiter
17
+ from mcp_bridge.routing.model_tiers import get_oauth_fallback_chain
18
+ from mcp_bridge.routing.provider_state import get_provider_tracker
15
19
 
16
20
  logger = logging.getLogger(__name__)
17
21
 
@@ -43,6 +47,130 @@ def _summarize_prompt(prompt: str, max_length: int = 120) -> str:
43
47
  _CODEX_INSTRUCTIONS_CACHE = {}
44
48
  _CODEX_INSTRUCTIONS_RELEASE_TAG = "rust-v0.77.0" # Update as needed
45
49
 
50
+ # ==============================================
51
+ # GEMINI AUTH MODE STATE (OAuth-first with 429 fallback)
52
+ # ==============================================
53
+ # When OAuth gets a 429 rate limit, we switch to API-only mode for 5 minutes.
54
+ # After 5 minutes, we automatically retry OAuth.
55
+ _GEMINI_OAUTH_429_TIMESTAMP: float | None = None # Timestamp of last 429
56
+ _OAUTH_COOLDOWN_SECONDS = 300 # 5 minutes
57
+
58
+ # ==============================================
59
+ # OPENAI AUTH MODE STATE (OAuth-first with 429 fallback)
60
+ # ==============================================
61
+ # When OpenAI OAuth gets a 429 rate limit, we fallback to Gemini for 5 minutes.
62
+ # After 5 minutes, we automatically retry OpenAI OAuth.
63
+ _OPENAI_OAUTH_429_TIMESTAMP: float | None = None # Timestamp of last OpenAI 429
64
+
65
+
66
+ def _get_gemini_api_key() -> str | None:
67
+ """Get Gemini API key from environment (loaded from ~/.stravinsky/.env)."""
68
+ return os.getenv("GEMINI_API_KEY") or os.getenv("GOOGLE_API_KEY")
69
+
70
+
71
+ def _set_api_only_mode(reason: str = "429 rate limit"):
72
+ """Switch to API-only mode after OAuth rate limit (5-minute cooldown)."""
73
+ global _GEMINI_OAUTH_429_TIMESTAMP
74
+ _GEMINI_OAUTH_429_TIMESTAMP = time.time()
75
+ logger.warning(f"[Gemini] Switching to API-only mode: {reason}")
76
+ import sys
77
+
78
+ print(
79
+ f"⚠️ GEMINI: OAuth rate-limited (429). "
80
+ f"Using API key for 5 minutes (will retry OAuth at {time.strftime('%H:%M:%S', time.localtime(_GEMINI_OAUTH_429_TIMESTAMP + _OAUTH_COOLDOWN_SECONDS))}).",
81
+ file=sys.stderr,
82
+ )
83
+
84
+
85
+ def _is_api_only_mode() -> bool:
86
+ """
87
+ Check if we're in API-only mode (5-minute cooldown after 429).
88
+
89
+ Returns True if:
90
+ - 429 occurred AND
91
+ - Less than 5 minutes have elapsed
92
+
93
+ Automatically resets to OAuth mode after 5 minutes.
94
+ """
95
+ global _GEMINI_OAUTH_429_TIMESTAMP
96
+
97
+ if _GEMINI_OAUTH_429_TIMESTAMP is None:
98
+ return False
99
+
100
+ elapsed = time.time() - _GEMINI_OAUTH_429_TIMESTAMP
101
+
102
+ if elapsed >= _OAUTH_COOLDOWN_SECONDS:
103
+ # Cooldown expired - reset to OAuth mode
104
+ logger.info(
105
+ f"[Gemini] 5-minute cooldown expired (elapsed: {elapsed:.0f}s). Retrying OAuth."
106
+ )
107
+ _GEMINI_OAUTH_429_TIMESTAMP = None
108
+ return False
109
+
110
+ # Still in cooldown
111
+ remaining = _OAUTH_COOLDOWN_SECONDS - elapsed
112
+ logger.debug(f"[Gemini] API-only mode active ({remaining:.0f}s remaining)")
113
+ return True
114
+
115
+
116
+ def reset_gemini_auth_mode():
117
+ """Reset to OAuth-first mode. Call this to manually reset cooldown."""
118
+ global _GEMINI_OAUTH_429_TIMESTAMP
119
+ _GEMINI_OAUTH_429_TIMESTAMP = None
120
+ logger.info("[Gemini] Reset to OAuth-first mode")
121
+
122
+
123
+ def _set_openai_fallback_mode(reason: str = "429 rate limit"):
124
+ """Switch to Gemini fallback after OpenAI rate limit (5-minute cooldown)."""
125
+ global _OPENAI_OAUTH_429_TIMESTAMP
126
+ _OPENAI_OAUTH_429_TIMESTAMP = time.time()
127
+ logger.warning(f"[OpenAI] Switching to Gemini fallback: {reason}")
128
+ import sys
129
+
130
+ print(
131
+ f"⚠️ OPENAI: OAuth rate-limited (429). "
132
+ f"Using Gemini for 5 minutes (will retry OpenAI at {time.strftime('%H:%M:%S', time.localtime(_OPENAI_OAUTH_429_TIMESTAMP + _OAUTH_COOLDOWN_SECONDS))}).",
133
+ file=sys.stderr,
134
+ )
135
+
136
+
137
+ def _is_openai_fallback_mode() -> bool:
138
+ """
139
+ Check if we're in Gemini fallback mode (5-minute cooldown after OpenAI 429).
140
+
141
+ Returns True if:
142
+ - OpenAI 429 occurred AND
143
+ - Less than 5 minutes have elapsed
144
+
145
+ Automatically resets to OpenAI mode after 5 minutes.
146
+ """
147
+ global _OPENAI_OAUTH_429_TIMESTAMP
148
+
149
+ if _OPENAI_OAUTH_429_TIMESTAMP is None:
150
+ return False
151
+
152
+ elapsed = time.time() - _OPENAI_OAUTH_429_TIMESTAMP
153
+
154
+ if elapsed >= _OAUTH_COOLDOWN_SECONDS:
155
+ # Cooldown expired - reset to OpenAI mode
156
+ logger.info(
157
+ f"[OpenAI] 5-minute cooldown expired (elapsed: {elapsed:.0f}s). Retrying OpenAI OAuth."
158
+ )
159
+ _OPENAI_OAUTH_429_TIMESTAMP = None
160
+ return False
161
+
162
+ # Still in cooldown
163
+ remaining = _OAUTH_COOLDOWN_SECONDS - elapsed
164
+ logger.debug(f"[OpenAI] Gemini fallback mode active ({remaining:.0f}s remaining)")
165
+ return True
166
+
167
+
168
+ def reset_openai_auth_mode():
169
+ """Reset to OpenAI-first mode. Call this to manually reset cooldown."""
170
+ global _OPENAI_OAUTH_429_TIMESTAMP
171
+ _OPENAI_OAUTH_429_TIMESTAMP = None
172
+ logger.info("[OpenAI] Reset to OAuth-first mode")
173
+
46
174
 
47
175
  async def _fetch_codex_instructions(model: str = "gpt-5.2-codex") -> str:
48
176
  """
@@ -108,20 +236,21 @@ def resolve_gemini_model(model: str) -> str:
108
236
  import httpx
109
237
  from tenacity import (
110
238
  retry,
239
+ retry_if_exception,
111
240
  stop_after_attempt,
112
241
  wait_exponential,
113
- retry_if_exception,
114
242
  )
115
243
 
116
- from ..auth.token_store import TokenStore
117
244
  from ..auth.oauth import (
118
- refresh_access_token as gemini_refresh,
119
- ANTIGRAVITY_HEADERS,
120
- ANTIGRAVITY_ENDPOINTS,
121
245
  ANTIGRAVITY_DEFAULT_PROJECT_ID,
122
- ANTIGRAVITY_API_VERSION,
246
+ ANTIGRAVITY_ENDPOINTS,
247
+ ANTIGRAVITY_HEADERS,
248
+ )
249
+ from ..auth.oauth import (
250
+ refresh_access_token as gemini_refresh,
123
251
  )
124
252
  from ..auth.openai_oauth import refresh_access_token as openai_refresh
253
+ from ..auth.token_store import TokenStore
125
254
  from ..hooks.manager import get_hook_manager
126
255
 
127
256
  # ========================
@@ -135,8 +264,52 @@ _SESSION_CACHE: dict[str, str] = {}
135
264
  # Pooled HTTP client for connection reuse
136
265
  _HTTP_CLIENT: httpx.AsyncClient | None = None
137
266
 
138
- # Rate limiting: Max 5 concurrent Gemini requests to prevent burst rate limits
139
- _GEMINI_SEMAPHORE: asyncio.Semaphore | None = None
267
+ # Per-model semaphores for async rate limiting (uses config from ~/.stravinsky/config.json)
268
+ _GEMINI_SEMAPHORES: dict[str, asyncio.Semaphore] = {}
269
+
270
+
271
+ def _get_gemini_rate_limit(model: str) -> int:
272
+ """
273
+ Get configured rate limit for a Gemini model.
274
+
275
+ Reads from ~/.stravinsky/config.json if available, otherwise uses defaults.
276
+
277
+ Args:
278
+ model: Gemini model name (e.g., "gemini-3-flash", "gemini-3-pro-high")
279
+
280
+ Returns:
281
+ Configured concurrency limit for this model
282
+ """
283
+ rate_limiter = get_rate_limiter()
284
+ # Normalize model name to match config keys
285
+ normalized = rate_limiter._normalize_model(model)
286
+ return rate_limiter._limits.get(normalized, rate_limiter._limits.get("_default", 5))
287
+
288
+
289
+ def _get_gemini_semaphore(model: str) -> asyncio.Semaphore:
290
+ """
291
+ Get or create async semaphore for Gemini model rate limiting.
292
+
293
+ Creates one semaphore per model type with limits from config.
294
+ Limits can be customized in ~/.stravinsky/config.json:
295
+ {
296
+ "rate_limits": {
297
+ "gemini-3-flash": 15,
298
+ "gemini-3-pro-high": 8
299
+ }
300
+ }
301
+
302
+ Args:
303
+ model: Gemini model name
304
+
305
+ Returns:
306
+ asyncio.Semaphore with configured limit for this model
307
+ """
308
+ if model not in _GEMINI_SEMAPHORES:
309
+ limit = _get_gemini_rate_limit(model)
310
+ _GEMINI_SEMAPHORES[model] = asyncio.Semaphore(limit)
311
+ logger.info(f"[RateLimit] Created semaphore for {model} with limit {limit}")
312
+ return _GEMINI_SEMAPHORES[model]
140
313
 
141
314
 
142
315
  def _get_session_id(conversation_key: str | None = None) -> str:
@@ -178,19 +351,6 @@ async def _get_http_client() -> httpx.AsyncClient:
178
351
  return _HTTP_CLIENT
179
352
 
180
353
 
181
- def _get_gemini_semaphore() -> asyncio.Semaphore:
182
- """
183
- Get or create semaphore for Gemini API rate limiting.
184
-
185
- Limits concurrent Gemini requests to prevent burst rate limits (429 errors).
186
- Max 5 concurrent requests balances throughput with API quota constraints.
187
- """
188
- global _GEMINI_SEMAPHORE
189
- if _GEMINI_SEMAPHORE is None:
190
- _GEMINI_SEMAPHORE = asyncio.Semaphore(5)
191
- return _GEMINI_SEMAPHORE
192
-
193
-
194
354
  def _extract_gemini_response(data: dict) -> str:
195
355
  """
196
356
  Extract text from Gemini response, handling thinking blocks.
@@ -314,45 +474,167 @@ def is_retryable_exception(e: Exception) -> bool:
314
474
  return False
315
475
 
316
476
 
317
- @retry(
318
- stop=stop_after_attempt(2), # Reduced from 5 to 2 attempts
319
- wait=wait_exponential(multiplier=2, min=10, max=120), # Longer waits: 10s → 20s → 40s
320
- retry=retry_if_exception(is_retryable_exception),
321
- before_sleep=lambda retry_state: logger.info(
322
- f"Server error, retrying in {retry_state.next_action.sleep} seconds..."
323
- ),
324
- )
325
- async def invoke_gemini(
326
- token_store: TokenStore,
477
+ async def _invoke_gemini_with_api_key(
478
+ api_key: str,
327
479
  prompt: str,
328
480
  model: str = "gemini-3-flash",
329
481
  temperature: float = 0.7,
330
482
  max_tokens: int = 4096,
331
483
  thinking_budget: int = 0,
332
484
  image_path: str | None = None,
485
+ agent_context: dict | None = None,
333
486
  ) -> str:
334
487
  """
335
- Invoke a Gemini model with the given prompt.
488
+ Invoke Gemini using API key authentication (google-genai library).
336
489
 
337
- Uses OAuth authentication with Antigravity credentials.
338
- Supports vision API for image/PDF analysis when image_path is provided.
490
+ This is an alternative to OAuth authentication that uses the official
491
+ google-genai Python library with a simple API key.
339
492
 
340
493
  Args:
341
- token_store: Token store for OAuth credentials
494
+ api_key: Gemini API key (from GEMINI_API_KEY or GOOGLE_API_KEY env var)
342
495
  prompt: The prompt to send to Gemini
343
- model: Gemini model to use
496
+ model: Gemini model to use (e.g., "gemini-3-flash-preview")
344
497
  temperature: Sampling temperature (0.0-2.0)
345
498
  max_tokens: Maximum tokens in response
346
- thinking_budget: Tokens reserved for internal reasoning
347
- image_path: Optional path to image/PDF for vision analysis (token optimization)
499
+ thinking_budget: Tokens reserved for internal reasoning (if supported)
500
+ image_path: Optional path to image/PDF for vision analysis
348
501
 
349
502
  Returns:
350
503
  The model's response text.
351
504
 
352
505
  Raises:
353
- ValueError: If not authenticated with Gemini
354
- httpx.HTTPStatusError: If API request fails
506
+ ImportError: If google-genai library is not installed
507
+ ValueError: If API request fails
355
508
  """
509
+ try:
510
+ from google import genai
511
+ except ImportError:
512
+ raise ImportError(
513
+ "google-genai library not installed. Install with: pip install google-genai"
514
+ )
515
+
516
+ # Map stravinsky model names to google-genai model names
517
+ # Pass through gemini-3-* models directly (Tier 3 benefits)
518
+ model_map = {
519
+ "gemini-3-flash": "gemini-3-flash-preview", # Tier 3 model (not -exp)
520
+ "gemini-3-flash-preview": "gemini-3-flash-preview", # Pass through
521
+ "gemini-3-pro-low": "gemini-3-flash-preview",
522
+ "gemini-3-pro-high": "gemini-3-pro-preview", # Tier 3 pro model
523
+ "gemini-3-pro-preview": "gemini-3-pro-preview", # Pass through
524
+ "gemini-flash": "gemini-3-flash-preview",
525
+ "gemini-pro": "gemini-3-pro-preview",
526
+ "gemini-3-pro": "gemini-3-pro-preview",
527
+ "gemini": "gemini-3-flash-preview",
528
+ }
529
+ genai_model = model_map.get(model, "gemini-3-flash-preview") # Default to tier 3 flash
530
+
531
+ try:
532
+ # Initialize client with API key
533
+ client = genai.Client(api_key=api_key)
534
+
535
+ # Build generation config
536
+ config = {
537
+ "temperature": temperature,
538
+ "max_output_tokens": max_tokens,
539
+ }
540
+
541
+ # Add thinking budget if supported (experimental feature)
542
+ if thinking_budget > 0:
543
+ config["thinking_config"] = {
544
+ "thinking_budget": thinking_budget,
545
+ }
546
+
547
+ # Build contents - text prompt plus optional image
548
+ contents = [prompt]
549
+
550
+ # Add image data for vision analysis
551
+ if image_path:
552
+ from pathlib import Path
553
+
554
+ image_file = Path(image_path)
555
+ if image_file.exists():
556
+ # google-genai supports direct file path or base64
557
+ # For simplicity, use the file path directly
558
+ contents.append(image_file)
559
+ logger.info(f"[API_KEY] Added vision data: {image_path}")
560
+
561
+ # Generate content
562
+ response = client.models.generate_content(
563
+ model=genai_model,
564
+ contents=contents,
565
+ config=config,
566
+ )
567
+
568
+ # Track usage
569
+ try:
570
+ from mcp_bridge.metrics.cost_tracker import get_cost_tracker
571
+
572
+ tracker = get_cost_tracker()
573
+ if hasattr(response, "usage_metadata"):
574
+ usage = response.usage_metadata
575
+ agent_type = (agent_context or {}).get("agent_type", "unknown")
576
+ task_id = (agent_context or {}).get("task_id", "")
577
+
578
+ tracker.track_usage(
579
+ model=model,
580
+ input_tokens=usage.prompt_token_count,
581
+ output_tokens=usage.candidates_token_count,
582
+ agent_type=agent_type,
583
+ task_id=task_id,
584
+ )
585
+ except Exception:
586
+ pass
587
+
588
+ # Extract text from response
589
+ if hasattr(response, "text"):
590
+ return response.text
591
+ elif hasattr(response, "candidates") and response.candidates:
592
+ # Fallback: extract from candidates
593
+ candidate = response.candidates[0]
594
+ if hasattr(candidate, "content"):
595
+ parts = candidate.content.parts
596
+ text_parts = [part.text for part in parts if hasattr(part, "text")]
597
+ return "".join(text_parts) if text_parts else "No response generated"
598
+
599
+ return "No response generated"
600
+
601
+ except Exception as e:
602
+ logger.error(f"API key authentication failed: {e}")
603
+ raise ValueError(f"Gemini API key request failed: {e}")
604
+
605
+
606
+ @retry(
607
+ stop=stop_after_attempt(2), # Reduced from 5 to 2 attempts
608
+ wait=wait_exponential(multiplier=2, min=10, max=120), # Longer waits: 10s → 20s → 40s
609
+ retry=retry_if_exception(is_retryable_exception),
610
+ before_sleep=lambda retry_state: logger.info(
611
+ f"Server error, retrying in {retry_state.next_action.sleep} seconds..."
612
+ ),
613
+ )
614
+ async def invoke_gemini(
615
+ token_store: TokenStore,
616
+ prompt: str,
617
+ model: str = "gemini-3-flash",
618
+ temperature: float = 0.7,
619
+ max_tokens: int = 4096,
620
+ thinking_budget: int = 0,
621
+ image_path: str | None = None,
622
+ ) -> str:
623
+ """
624
+ Invoke a Gemini model with the given prompt.
625
+ """
626
+ from mcp_bridge.proxy.client import is_proxy_enabled, proxy_invoke_gemini
627
+
628
+ if is_proxy_enabled():
629
+ return await proxy_invoke_gemini(
630
+ prompt=prompt,
631
+ model=model,
632
+ temperature=temperature,
633
+ max_tokens=max_tokens,
634
+ thinking_budget=thinking_budget,
635
+ image_path=image_path,
636
+ )
637
+
356
638
  logger.info(f"[DEBUG] invoke_gemini called, uuid module check: {uuid}")
357
639
  # Execute pre-model invoke hooks
358
640
  params = {
@@ -384,14 +666,122 @@ async def invoke_gemini(
384
666
  # Log with agent context and prompt summary
385
667
  logger.info(f"[{agent_type}] → {model}: {prompt_summary}")
386
668
 
387
- # USER-VISIBLE NOTIFICATION (stderr) - Shows when Gemini is invoked
669
+ # Get API key from environment (loaded from ~/.stravinsky/.env)
670
+ api_key = _get_gemini_api_key()
388
671
  import sys
672
+
389
673
  task_info = f" task={task_id}" if task_id else ""
390
674
  desc_info = f" | {description}" if description else ""
391
- print(f"🔮 GEMINI: {model} | agent={agent_type}{task_info}{desc_info}", file=sys.stderr)
392
675
 
393
- # Acquire semaphore to limit concurrent Gemini requests (prevents 429 rate limits)
394
- semaphore = _get_gemini_semaphore()
676
+ # ==============================================
677
+ # AUTH PRIORITY: OAuth first, API fallback on 429
678
+ # ==============================================
679
+ # 1. If API-only mode (after 429), use API key directly
680
+ # 2. Otherwise, try OAuth first
681
+ # 3. On 429 from OAuth, switch to API-only mode and retry
682
+
683
+ # If we're in API-only mode (after a 429), use API key directly
684
+ if _is_api_only_mode():
685
+ if not api_key:
686
+ raise ValueError(
687
+ "OAuth rate-limited (429) and no API key available. "
688
+ "Add GEMINI_API_KEY to ~/.stravinsky/.env"
689
+ )
690
+
691
+ # Calculate remaining cooldown time
692
+ if _GEMINI_OAUTH_429_TIMESTAMP is not None:
693
+ remaining = _OAUTH_COOLDOWN_SECONDS - (time.time() - _GEMINI_OAUTH_429_TIMESTAMP)
694
+ remaining_mins = int(remaining // 60)
695
+ remaining_secs = int(remaining % 60)
696
+ cooldown_msg = f" (OAuth retry in {remaining_mins}m {remaining_secs}s)"
697
+ else:
698
+ cooldown_msg = ""
699
+
700
+ # Check time-window rate limit (30 req/min)
701
+ time_limiter = get_gemini_time_limiter()
702
+ wait_time = time_limiter.acquire_visible("GEMINI", "API key")
703
+ if wait_time > 0:
704
+ await asyncio.sleep(wait_time)
705
+ # Re-acquire after sleep
706
+ wait_time = time_limiter.acquire_visible("GEMINI", "API key")
707
+
708
+ print(
709
+ f"🔑 GEMINI (API-only cooldown{cooldown_msg}): {model} | agent={agent_type}{task_info}{desc_info}",
710
+ file=sys.stderr,
711
+ )
712
+ logger.info(f"[{agent_type}] Using API key (5-min cooldown after OAuth 429)")
713
+ semaphore = _get_gemini_semaphore(model)
714
+ async with semaphore:
715
+ result = await _invoke_gemini_with_api_key(
716
+ api_key=api_key,
717
+ prompt=prompt,
718
+ model=model,
719
+ temperature=temperature,
720
+ max_tokens=max_tokens,
721
+ thinking_budget=thinking_budget,
722
+ image_path=image_path,
723
+ agent_context=agent_context,
724
+ )
725
+ # Prepend auth header for visibility in logs
726
+ auth_header = f"[Auth: API key (5-min cooldown) | Model: {model}]\n\n"
727
+ return auth_header + result
728
+
729
+ provider_tracker = get_provider_tracker()
730
+
731
+ # If Gemini is in cooldown, follow tier-aware fallback chain.
732
+ if not provider_tracker.is_available("gemini"):
733
+ for candidate_provider, candidate_model, use_oauth in get_oauth_fallback_chain("gemini", model):
734
+ if candidate_provider == "gemini" and use_oauth:
735
+ continue
736
+ if use_oauth and not provider_tracker.is_available(candidate_provider):
737
+ continue
738
+
739
+ if candidate_provider == "gemini" and not use_oauth:
740
+ api_key = _get_gemini_api_key()
741
+ if not api_key:
742
+ continue
743
+ _set_api_only_mode("Gemini in cooldown; using API key")
744
+ result = await _invoke_gemini_with_api_key(
745
+ api_key=api_key,
746
+ prompt=prompt,
747
+ model=candidate_model,
748
+ temperature=temperature,
749
+ max_tokens=max_tokens,
750
+ thinking_budget=thinking_budget,
751
+ image_path=image_path,
752
+ agent_context=agent_context,
753
+ )
754
+ auth_header = f"[Auth: API key (cooldown) | Model: {candidate_model}]\n\n"
755
+ return auth_header + result
756
+
757
+ if candidate_provider == "openai" and use_oauth:
758
+ return await invoke_openai(
759
+ token_store=token_store,
760
+ prompt=prompt,
761
+ model=candidate_model,
762
+ temperature=temperature,
763
+ max_tokens=max_tokens,
764
+ thinking_budget=0,
765
+ reasoning_effort="medium",
766
+ )
767
+
768
+ # DEFAULT: Try OAuth first (Antigravity)
769
+
770
+ # Check time-window rate limit (30 req/min)
771
+ time_limiter = get_gemini_time_limiter()
772
+ wait_time = time_limiter.acquire_visible("GEMINI", "OAuth")
773
+ if wait_time > 0:
774
+ await asyncio.sleep(wait_time)
775
+ # Re-acquire after sleep
776
+ wait_time = time_limiter.acquire_visible("GEMINI", "OAuth")
777
+
778
+ print(
779
+ f"🔮 GEMINI (OAuth): {model} | agent={agent_type}{task_info}{desc_info}",
780
+ file=sys.stderr,
781
+ )
782
+ logger.info(f"[{agent_type}] Using OAuth authentication (Antigravity)")
783
+ # Rate limit concurrent Gemini requests (configurable via ~/.stravinsky/config.json)
784
+ semaphore = _get_gemini_semaphore(model)
395
785
  async with semaphore:
396
786
  access_token = await _ensure_valid_token(token_store, "gemini")
397
787
 
@@ -437,12 +827,14 @@ async def invoke_gemini(
437
827
  image_data = base64.b64encode(image_file.read_bytes()).decode("utf-8")
438
828
 
439
829
  # Add inline image data for Gemini Vision API
440
- parts.append({
441
- "inlineData": {
442
- "mimeType": mime_type,
443
- "data": image_data,
830
+ parts.append(
831
+ {
832
+ "inlineData": {
833
+ "mimeType": mime_type,
834
+ "data": image_data,
835
+ }
444
836
  }
445
- })
837
+ )
446
838
  logger.info(f"[multimodal] Added vision data: {image_path} ({mime_type})")
447
839
 
448
840
  inner_payload = {
@@ -514,7 +906,7 @@ async def invoke_gemini(
514
906
  error_text = response.text.lower()
515
907
  if "thinking" in error_text or "signature" in error_text:
516
908
  logger.warning(
517
- f"[Gemini] Thinking error detected, clearing session cache and retrying"
909
+ "[Gemini] Thinking error detected, clearing session cache and retrying"
518
910
  )
519
911
  clear_session_cache()
520
912
  # Update session ID for retry
@@ -541,6 +933,61 @@ async def invoke_gemini(
541
933
  continue
542
934
  break
543
935
 
936
+ # ==============================================
937
+ # 429 RATE LIMIT DETECTION: Tier-aware fallback chain
938
+ # ==============================================
939
+ if response is not None and response.status_code == 429:
940
+ provider_tracker = get_provider_tracker()
941
+ provider_tracker.mark_rate_limited(
942
+ "gemini",
943
+ duration=_OAUTH_COOLDOWN_SECONDS,
944
+ reason="Gemini OAuth rate-limited (429)",
945
+ )
946
+
947
+ for candidate_provider, candidate_model, use_oauth in get_oauth_fallback_chain(
948
+ "gemini", model
949
+ ):
950
+ if candidate_provider == "gemini" and use_oauth:
951
+ continue
952
+ if use_oauth and not provider_tracker.is_available(candidate_provider):
953
+ continue
954
+
955
+ if candidate_provider == "gemini" and not use_oauth:
956
+ api_key = _get_gemini_api_key()
957
+ if not api_key:
958
+ continue
959
+ _set_api_only_mode("OAuth rate-limited (429)")
960
+ logger.info("[Gemini] Retrying with API key after OAuth 429")
961
+ result = await _invoke_gemini_with_api_key(
962
+ api_key=api_key,
963
+ prompt=prompt,
964
+ model=candidate_model,
965
+ temperature=temperature,
966
+ max_tokens=max_tokens,
967
+ thinking_budget=thinking_budget,
968
+ image_path=image_path,
969
+ agent_context=agent_context,
970
+ )
971
+ auth_header = (
972
+ f"[Auth: API key (OAuth 429 fallback) | Model: {candidate_model}]\n\n"
973
+ )
974
+ return auth_header + result
975
+
976
+ if candidate_provider == "openai" and use_oauth:
977
+ return await invoke_openai(
978
+ token_store=token_store,
979
+ prompt=prompt,
980
+ model=candidate_model,
981
+ temperature=temperature,
982
+ max_tokens=max_tokens,
983
+ thinking_budget=0,
984
+ reasoning_effort="medium",
985
+ )
986
+
987
+ raise ValueError(
988
+ "OAuth rate-limited (429) and no fallback succeeded. "
989
+ "Add GEMINI_API_KEY to ~/.stravinsky/.env"
990
+ )
544
991
  if response is None:
545
992
  # FALLBACK: Try Claude sonnet-4.5 for agents that support it
546
993
  agent_context = params.get("agent_context", {})
@@ -549,16 +996,17 @@ async def invoke_gemini(
549
996
  if agent_type in ("dewey", "explore", "document_writer", "multimodal"):
550
997
  logger.warning(f"[{agent_type}] Gemini failed, falling back to Claude sonnet-4.5")
551
998
  try:
552
- import subprocess
553
- fallback_result = subprocess.run(
999
+ from mcp_bridge.utils.process import async_execute
1000
+
1001
+ result_obj = await async_execute(
554
1002
  ["claude", "-p", prompt, "--model", "sonnet", "--output-format", "text"],
555
- capture_output=True,
556
- text=True,
557
1003
  timeout=120,
558
- cwd=os.getcwd(),
559
1004
  )
560
- if fallback_result.returncode == 0 and fallback_result.stdout.strip():
561
- return fallback_result.stdout.strip()
1005
+ if result_obj.returncode == 0 and result_obj.stdout.strip():
1006
+ result = result_obj.stdout.strip()
1007
+ # Prepend auth header for visibility
1008
+ auth_header = f"[Auth: Claude fallback | Model: sonnet-4.5]\n\n"
1009
+ return auth_header + result
562
1010
  except Exception as fallback_error:
563
1011
  logger.error(f"Fallback to Claude also failed: {fallback_error}")
564
1012
 
@@ -567,8 +1015,31 @@ async def invoke_gemini(
567
1015
  response.raise_for_status()
568
1016
  data = response.json()
569
1017
 
1018
+ # Track usage
1019
+ try:
1020
+ from mcp_bridge.metrics.cost_tracker import get_cost_tracker
1021
+
1022
+ tracker = get_cost_tracker()
1023
+ usage = data.get("usageMetadata", {})
1024
+ input_tokens = usage.get("promptTokenCount", 0)
1025
+ output_tokens = usage.get("candidatesTokenCount", 0)
1026
+
1027
+ tracker.track_usage(
1028
+ model=model,
1029
+ input_tokens=input_tokens,
1030
+ output_tokens=output_tokens,
1031
+ agent_type=agent_type,
1032
+ task_id=task_id,
1033
+ )
1034
+ except Exception as e:
1035
+ logger.warning(f"Failed to track cost: {e}")
1036
+
570
1037
  # Extract text from response using thinking-aware parser
571
- return _extract_gemini_response(data)
1038
+ result = _extract_gemini_response(data)
1039
+
1040
+ # Prepend auth header for visibility in logs
1041
+ auth_header = f"[Auth: OAuth | Model: {model}]\n\n"
1042
+ return auth_header + result
572
1043
 
573
1044
 
574
1045
  # ========================
@@ -579,9 +1050,57 @@ async def invoke_gemini(
579
1050
  AGENT_TOOLS = [
580
1051
  {
581
1052
  "functionDeclarations": [
1053
+ {
1054
+ "name": "semantic_search",
1055
+ "description": "Search codebase with natural language query using semantic embeddings. ALWAYS use this FIRST before grep_search or read_file to find relevant files efficiently. Returns code snippets with file paths and relevance scores.",
1056
+ "parameters": {
1057
+ "type": "object",
1058
+ "properties": {
1059
+ "query": {
1060
+ "type": "string",
1061
+ "description": "Natural language search query (e.g., 'find authentication logic', 'PDF rendering code')",
1062
+ },
1063
+ "project_path": {
1064
+ "type": "string",
1065
+ "description": "Path to the project root (default: '.')",
1066
+ },
1067
+ "n_results": {
1068
+ "type": "integer",
1069
+ "description": "Maximum number of results to return (default: 10)",
1070
+ },
1071
+ },
1072
+ "required": ["query"],
1073
+ },
1074
+ },
1075
+ {
1076
+ "name": "hybrid_search",
1077
+ "description": "Hybrid search combining semantic similarity with structural AST pattern matching. Use when you need precise structural patterns (e.g., specific function signatures) combined with semantic relevance.",
1078
+ "parameters": {
1079
+ "type": "object",
1080
+ "properties": {
1081
+ "query": {
1082
+ "type": "string",
1083
+ "description": "Natural language search query (e.g., 'find authentication logic')",
1084
+ },
1085
+ "pattern": {
1086
+ "type": "string",
1087
+ "description": "Optional ast-grep pattern for structural matching (e.g., 'def $FUNC($$$):', 'async function $NAME($$$)')",
1088
+ },
1089
+ "project_path": {
1090
+ "type": "string",
1091
+ "description": "Path to the project root (default: '.')",
1092
+ },
1093
+ "n_results": {
1094
+ "type": "integer",
1095
+ "description": "Maximum number of results to return (default: 10)",
1096
+ },
1097
+ },
1098
+ "required": ["query"],
1099
+ },
1100
+ },
582
1101
  {
583
1102
  "name": "read_file",
584
- "description": "Read the contents of a file. Returns the file contents as text.",
1103
+ "description": "Read the contents of a file. Returns the file contents as text. USE ONLY AFTER semantic_search identifies the target file.",
585
1104
  "parameters": {
586
1105
  "type": "object",
587
1106
  "properties": {
@@ -606,7 +1125,7 @@ AGENT_TOOLS = [
606
1125
  },
607
1126
  {
608
1127
  "name": "grep_search",
609
- "description": "Search for a pattern in files using ripgrep. Returns matching lines with file paths and line numbers.",
1128
+ "description": "Search for a pattern in files using ripgrep. Returns matching lines with file paths and line numbers. USE ONLY for precise pattern matching AFTER semantic_search narrows down the search scope.",
610
1129
  "parameters": {
611
1130
  "type": "object",
612
1131
  "properties": {
@@ -636,50 +1155,85 @@ AGENT_TOOLS = [
636
1155
  ]
637
1156
 
638
1157
 
639
- def _execute_tool(name: str, args: dict) -> str:
1158
+ async def _execute_tool(name: str, args: dict) -> str:
640
1159
  """Execute a tool and return the result."""
641
- import os
642
- import subprocess
643
1160
  from pathlib import Path
1161
+ from mcp_bridge.utils.process import async_execute
644
1162
 
645
1163
  try:
646
- if name == "read_file":
647
- path = Path(args["path"])
648
- if not path.exists():
649
- return f"Error: File not found: {path}"
650
- return path.read_text()
1164
+ if name == "semantic_search":
1165
+ # Import semantic_search function from tools
1166
+ from .semantic_search import semantic_search
1167
+
1168
+ # Extract args with defaults
1169
+ query = args.get("query")
1170
+ if not query:
1171
+ return "Error: 'query' parameter is required for semantic_search"
1172
+
1173
+ project_path = args.get("project_path", ".")
1174
+ n_results = args.get("n_results", 10)
1175
+
1176
+ result = await semantic_search(
1177
+ query=query,
1178
+ project_path=project_path,
1179
+ n_results=n_results,
1180
+ )
1181
+ return result
1182
+
1183
+ elif name == "hybrid_search":
1184
+ # Import hybrid_search function from tools
1185
+ from .semantic_search import hybrid_search
1186
+
1187
+ # Extract args with defaults
1188
+ query = args.get("query")
1189
+ if not query:
1190
+ return "Error: 'query' parameter is required for hybrid_search"
1191
+
1192
+ pattern = args.get("pattern")
1193
+ project_path = args.get("project_path", ".")
1194
+ n_results = args.get("n_results", 10)
1195
+
1196
+ result = await hybrid_search(
1197
+ query=query,
1198
+ pattern=pattern,
1199
+ project_path=project_path,
1200
+ n_results=n_results,
1201
+ )
1202
+ return result
1203
+
1204
+ elif name == "read_file":
1205
+ from .read_file import read_file
1206
+
1207
+ path = args["path"]
1208
+ return await read_file(path)
651
1209
 
652
1210
  elif name == "list_directory":
653
- path = Path(args["path"])
654
- if not path.exists():
655
- return f"Error: Directory not found: {path}"
656
- entries = []
657
- for entry in path.iterdir():
658
- entry_type = "DIR" if entry.is_dir() else "FILE"
659
- entries.append(f"[{entry_type}] {entry.name}")
660
- return "\n".join(entries) if entries else "(empty directory)"
1211
+ from .list_directory import list_directory
1212
+
1213
+ path = args["path"]
1214
+ return await list_directory(path)
661
1215
 
662
1216
  elif name == "grep_search":
663
1217
  pattern = args["pattern"]
664
1218
  search_path = args["path"]
665
- result = subprocess.run(
666
- ["rg", "--json", "-m", "50", pattern, search_path],
667
- capture_output=True,
668
- text=True,
669
- timeout=30,
1219
+
1220
+ result_obj = await async_execute(
1221
+ ["rg", "--json", "-m", "50", pattern, search_path], timeout=30
670
1222
  )
671
- if result.returncode == 0:
672
- return result.stdout[:10000] # Limit output size
673
- elif result.returncode == 1:
1223
+
1224
+ if result_obj.returncode == 0:
1225
+ return result_obj.stdout[:10000] # Limit output size
1226
+ elif result_obj.returncode == 1:
674
1227
  return "No matches found"
675
1228
  else:
676
- return f"Search error: {result.stderr}"
1229
+ return f"Search error: {result_obj.stderr}"
677
1230
 
678
1231
  elif name == "write_file":
679
- path = Path(args["path"])
680
- path.parent.mkdir(parents=True, exist_ok=True)
681
- path.write_text(args["content"])
682
- return f"Successfully wrote {len(args['content'])} bytes to {path}"
1232
+ from .write_file import write_file
1233
+
1234
+ path = args["path"]
1235
+ content = args["content"]
1236
+ return await write_file(path, content)
683
1237
 
684
1238
  else:
685
1239
  return f"Unknown tool: {name}"
@@ -688,32 +1242,244 @@ def _execute_tool(name: str, args: dict) -> str:
688
1242
  return f"Tool error: {str(e)}"
689
1243
 
690
1244
 
691
- async def invoke_gemini_agentic(
692
- token_store: TokenStore,
1245
+ async def _invoke_gemini_agentic_with_api_key(
1246
+ api_key: str,
693
1247
  prompt: str,
694
1248
  model: str = "gemini-3-flash",
695
1249
  max_turns: int = 10,
696
1250
  timeout: int = 120,
697
1251
  ) -> str:
698
1252
  """
699
- Invoke Gemini with function calling for agentic tasks.
1253
+ Invoke Gemini with function calling using API key authentication (google-genai library).
700
1254
 
701
- This function implements a multi-turn agentic loop:
1255
+ This implements a multi-turn agentic loop:
702
1256
  1. Send prompt with tool definitions
703
1257
  2. If model returns FunctionCall, execute the tool
704
1258
  3. Send FunctionResponse back to model
705
1259
  4. Repeat until model returns text or max_turns reached
706
1260
 
707
1261
  Args:
708
- token_store: Token store for OAuth credentials
1262
+ api_key: Gemini API key (from GEMINI_API_KEY or GOOGLE_API_KEY env var)
709
1263
  prompt: The task prompt
710
1264
  model: Gemini model to use
711
1265
  max_turns: Maximum number of tool-use turns
712
- timeout: Request timeout in seconds
1266
+ timeout: Request timeout in seconds (currently unused by google-genai)
713
1267
 
714
1268
  Returns:
715
1269
  Final text response from the model
1270
+
1271
+ Raises:
1272
+ ImportError: If google-genai library is not installed
1273
+ ValueError: If API request fails
716
1274
  """
1275
+ # USER-VISIBLE NOTIFICATION (stderr) - Shows agentic mode with API key
1276
+ import sys
1277
+
1278
+ print(f"🔮 GEMINI (API/Agentic): {model} | max_turns={max_turns}", file=sys.stderr)
1279
+
1280
+ try:
1281
+ from google import genai
1282
+ from google.genai import types
1283
+ except ImportError:
1284
+ raise ImportError(
1285
+ "google-genai library not installed. Install with: pip install google-genai"
1286
+ )
1287
+
1288
+ # Map stravinsky model names to google-genai model names
1289
+ # Pass through gemini-3-* models directly (Tier 3 benefits)
1290
+ model_map = {
1291
+ "gemini-3-flash": "gemini-3-flash-preview", # Tier 3 model (not -exp)
1292
+ "gemini-3-flash-preview": "gemini-3-flash-preview", # Pass through
1293
+ "gemini-3-pro-low": "gemini-3-flash-preview",
1294
+ "gemini-3-pro-high": "gemini-3-pro-preview", # Tier 3 pro model
1295
+ "gemini-3-pro-preview": "gemini-3-pro-preview", # Pass through
1296
+ "gemini-flash": "gemini-3-flash-preview",
1297
+ "gemini-pro": "gemini-3-pro-preview",
1298
+ "gemini-3-pro": "gemini-3-pro-preview",
1299
+ "gemini": "gemini-3-flash-preview",
1300
+ }
1301
+ genai_model = model_map.get(model, "gemini-3-flash-preview") # Default to tier 3 flash
1302
+
1303
+ # Initialize client with API key
1304
+ client = genai.Client(api_key=api_key)
1305
+
1306
+ # Convert AGENT_TOOLS to google-genai format
1307
+ # google-genai expects tools as a list of Tool objects containing function_declarations
1308
+ function_declarations = []
1309
+ for tool_group in AGENT_TOOLS:
1310
+ for func_decl in tool_group.get("functionDeclarations", []):
1311
+ function_declarations.append(
1312
+ types.FunctionDeclaration(
1313
+ name=func_decl["name"],
1314
+ description=func_decl["description"],
1315
+ parameters=func_decl["parameters"],
1316
+ )
1317
+ )
1318
+
1319
+ # Wrap function declarations in a Tool object
1320
+ tools = [types.Tool(function_declarations=function_declarations)]
1321
+
1322
+ # Initialize conversation with user message
1323
+ contents = [types.Content(role="user", parts=[types.Part(text=prompt)])]
1324
+
1325
+ for turn in range(max_turns):
1326
+ try:
1327
+ # Generate content with tools
1328
+ response = client.models.generate_content(
1329
+ model=genai_model,
1330
+ contents=contents,
1331
+ config=types.GenerateContentConfig(
1332
+ tools=tools,
1333
+ temperature=0.7,
1334
+ max_output_tokens=8192,
1335
+ ),
1336
+ )
1337
+
1338
+ # Check if response has function calls
1339
+ if not response.candidates or not response.candidates[0].content.parts:
1340
+ return "No response generated"
1341
+
1342
+ parts = response.candidates[0].content.parts
1343
+ function_calls = []
1344
+ text_parts = []
1345
+
1346
+ for part in parts:
1347
+ if part.function_call:
1348
+ function_calls.append(part.function_call)
1349
+ elif part.text:
1350
+ text_parts.append(part.text)
1351
+
1352
+ # If no function calls, return text response
1353
+ if not function_calls:
1354
+ result = "".join(text_parts)
1355
+ return result if result.strip() else "Task completed"
1356
+
1357
+ # Execute function calls and prepare responses
1358
+ function_responses = []
1359
+ for func_call in function_calls:
1360
+ func_name = func_call.name
1361
+ func_args = dict(func_call.args) if func_call.args else {}
1362
+
1363
+ logger.info(f"[AgenticGemini] Turn {turn + 1}: Executing {func_name}")
1364
+ result = await _execute_tool(func_name, func_args)
1365
+
1366
+ function_responses.append(
1367
+ types.Part(
1368
+ function_response=types.FunctionResponse(
1369
+ name=func_name,
1370
+ response={"result": result},
1371
+ )
1372
+ )
1373
+ )
1374
+
1375
+ # Add model's response to conversation
1376
+ contents.append(response.candidates[0].content)
1377
+
1378
+ # Add function responses to conversation
1379
+ contents.append(
1380
+ types.Content(
1381
+ role="user",
1382
+ parts=function_responses,
1383
+ )
1384
+ )
1385
+
1386
+ except Exception as e:
1387
+ logger.error(f"[AgenticGemini] Error in turn {turn + 1}: {e}")
1388
+ raise ValueError(f"Gemini API key request failed: {e}")
1389
+
1390
+ return "Max turns reached without final response"
1391
+
1392
+
1393
+ async def invoke_gemini_agentic(
1394
+ token_store: TokenStore,
1395
+ prompt: str,
1396
+ model: str = "gemini-3-flash",
1397
+ max_turns: int = 10,
1398
+ timeout: int = 120,
1399
+ ) -> str:
1400
+ """
1401
+ Invoke Gemini with function calling for agentic tasks.
1402
+ """
1403
+ from mcp_bridge.proxy.client import is_proxy_enabled, PROXY_URL
1404
+
1405
+ if is_proxy_enabled():
1406
+ import httpx
1407
+
1408
+ async with httpx.AsyncClient(timeout=float(timeout) + 10) as client:
1409
+ payload = {"prompt": prompt, "model": model, "max_turns": max_turns, "timeout": timeout}
1410
+ response = await client.post(f"{PROXY_URL}/v1/gemini/agentic", json=payload)
1411
+ response.raise_for_status()
1412
+ return response.json()["response"]
1413
+
1414
+ import sys
1415
+
1416
+ # Get API key from environment (loaded from ~/.stravinsky/.env)
1417
+ api_key = _get_gemini_api_key()
1418
+
1419
+ # ==============================================
1420
+ # AUTH PRIORITY: OAuth first, API fallback on 429
1421
+ # ==============================================
1422
+ # 1. If API-only mode (after 429), use API key directly
1423
+ # 2. Otherwise, try OAuth first
1424
+ # 3. On 429 from OAuth, switch to API-only mode and retry
1425
+
1426
+ # If we're in API-only mode (after a 429), use API key directly
1427
+ if _is_api_only_mode():
1428
+ if not api_key:
1429
+ raise ValueError(
1430
+ "OAuth rate-limited (429) and no API key available. "
1431
+ "Add GEMINI_API_KEY to ~/.stravinsky/.env"
1432
+ )
1433
+
1434
+ # Calculate remaining cooldown time
1435
+ if _GEMINI_OAUTH_429_TIMESTAMP is not None:
1436
+ remaining = _OAUTH_COOLDOWN_SECONDS - (time.time() - _GEMINI_OAUTH_429_TIMESTAMP)
1437
+ remaining_mins = int(remaining // 60)
1438
+ remaining_secs = int(remaining % 60)
1439
+ cooldown_msg = f" (OAuth retry in {remaining_mins}m {remaining_secs}s)"
1440
+ else:
1441
+ cooldown_msg = ""
1442
+
1443
+ # Check time-window rate limit (30 req/min)
1444
+ time_limiter = get_gemini_time_limiter()
1445
+ wait_time = time_limiter.acquire_visible("GEMINI", "API key")
1446
+ if wait_time > 0:
1447
+ await asyncio.sleep(wait_time)
1448
+ # Re-acquire after sleep
1449
+ wait_time = time_limiter.acquire_visible("GEMINI", "API key")
1450
+
1451
+ print(
1452
+ f"🔑 GEMINI (API-only cooldown{cooldown_msg}/Agentic): {model} | max_turns={max_turns}",
1453
+ file=sys.stderr,
1454
+ )
1455
+ logger.info("[AgenticGemini] Using API key (5-min cooldown after OAuth 429)")
1456
+ result = await _invoke_gemini_agentic_with_api_key(
1457
+ api_key=api_key,
1458
+ prompt=prompt,
1459
+ model=model,
1460
+ max_turns=max_turns,
1461
+ timeout=timeout,
1462
+ )
1463
+ # Prepend auth header for visibility in logs
1464
+ auth_header = f"[Auth: API key (5-min cooldown, Agentic) | Model: {model}]\n\n"
1465
+ return auth_header + result
1466
+
1467
+ # DEFAULT: Try OAuth first (Antigravity)
1468
+ logger.info("[AgenticGemini] Using OAuth authentication (Antigravity)")
1469
+
1470
+ # Check time-window rate limit (30 req/min)
1471
+ time_limiter = get_gemini_time_limiter()
1472
+ wait_time = time_limiter.acquire_visible("GEMINI", "OAuth")
1473
+ if wait_time > 0:
1474
+ await asyncio.sleep(wait_time)
1475
+ # Re-acquire after sleep
1476
+ wait_time = time_limiter.acquire_visible("GEMINI", "OAuth")
1477
+
1478
+ # USER-VISIBLE NOTIFICATION (stderr) - Shows agentic mode with OAuth
1479
+ import sys
1480
+
1481
+ print(f"🔮 GEMINI (OAuth/Agentic): {model} | max_turns={max_turns}", file=sys.stderr)
1482
+
717
1483
  access_token = await _ensure_valid_token(token_store, "gemini")
718
1484
  api_model = resolve_gemini_model(model)
719
1485
 
@@ -800,6 +1566,33 @@ async def invoke_gemini_agentic(
800
1566
  logger.warning(f"[AgenticGemini] Endpoint {endpoint} failed: {e}, trying next")
801
1567
  continue
802
1568
 
1569
+ # ==============================================
1570
+ # 429 RATE LIMIT DETECTION: Fallback to API key
1571
+ # ==============================================
1572
+ # If OAuth got rate-limited (429), switch to API-only mode and retry
1573
+ if response is not None and response.status_code == 429:
1574
+ api_key = _get_gemini_api_key()
1575
+ if api_key:
1576
+ _set_api_only_mode("OAuth rate-limited (429) in agentic mode")
1577
+ logger.info("[AgenticGemini] Retrying with API key after OAuth 429")
1578
+ # Retry entire agentic call with API key
1579
+ result = await _invoke_gemini_agentic_with_api_key(
1580
+ api_key=api_key,
1581
+ prompt=prompt,
1582
+ model=model,
1583
+ max_turns=max_turns,
1584
+ timeout=timeout,
1585
+ )
1586
+ # Prepend auth header for visibility
1587
+ auth_header = f"[Auth: API key (OAuth 429 fallback, Agentic) | Model: {model}]\n\n"
1588
+ return auth_header + result
1589
+ else:
1590
+ # No API key available - raise clear error
1591
+ raise ValueError(
1592
+ "OAuth rate-limited (429) and no API key available. "
1593
+ "Add GEMINI_API_KEY to ~/.stravinsky/.env"
1594
+ )
1595
+
803
1596
  if response is None:
804
1597
  raise ValueError(f"All Antigravity endpoints failed: {last_error}")
805
1598
 
@@ -810,13 +1603,15 @@ async def invoke_gemini_agentic(
810
1603
  inner_response = data.get("response", data)
811
1604
  candidates = inner_response.get("candidates", [])
812
1605
  if not candidates:
813
- return "No response generated"
1606
+ auth_header = f"[Auth: OAuth (Agentic) | Model: {model}]\n\n"
1607
+ return auth_header + "No response generated"
814
1608
 
815
1609
  content = candidates[0].get("content", {})
816
1610
  parts = content.get("parts", [])
817
1611
 
818
1612
  if not parts:
819
- return "No response parts"
1613
+ auth_header = f"[Auth: OAuth (Agentic) | Model: {model}]\n\n"
1614
+ return auth_header + "No response parts"
820
1615
 
821
1616
  # Check for function call
822
1617
  function_call = None
@@ -835,7 +1630,7 @@ async def invoke_gemini_agentic(
835
1630
  func_args = function_call.get("args", {})
836
1631
 
837
1632
  logger.info(f"[AgenticGemini] Turn {turn + 1}: Executing {func_name}")
838
- result = _execute_tool(func_name, func_args)
1633
+ result = await _execute_tool(func_name, func_args)
839
1634
 
840
1635
  # Add model's response and function result to conversation
841
1636
  contents.append({"role": "model", "parts": [{"functionCall": function_call}]})
@@ -849,9 +1644,12 @@ async def invoke_gemini_agentic(
849
1644
  )
850
1645
  else:
851
1646
  # No function call, return text response
852
- return text_response or "Task completed"
1647
+ result = text_response or "Task completed"
1648
+ auth_header = f"[Auth: OAuth (Agentic) | Model: {model}]\n\n"
1649
+ return auth_header + result
853
1650
 
854
- return "Max turns reached without final response"
1651
+ auth_header = f"[Auth: OAuth (Agentic) | Model: {model}]\n\n"
1652
+ return auth_header + "Max turns reached without final response"
855
1653
 
856
1654
 
857
1655
  @retry(
@@ -869,24 +1667,23 @@ async def invoke_openai(
869
1667
  temperature: float = 0.7,
870
1668
  max_tokens: int = 4096,
871
1669
  thinking_budget: int = 0,
1670
+ reasoning_effort: str = "medium",
872
1671
  ) -> str:
873
1672
  """
874
1673
  Invoke an OpenAI model with the given prompt.
875
-
876
- Args:
877
- token_store: Token store for API key
878
- prompt: The prompt to send to OpenAI
879
- model: OpenAI model to use
880
- temperature: Sampling temperature (0.0-2.0)
881
- max_tokens: Maximum tokens in response
882
-
883
- Returns:
884
- The model's response text.
885
-
886
- Raises:
887
- ValueError: If not authenticated with OpenAI
888
- httpx.HTTPStatusError: If API request fails
889
1674
  """
1675
+ from mcp_bridge.proxy.client import is_proxy_enabled, proxy_invoke_openai
1676
+
1677
+ if is_proxy_enabled():
1678
+ return await proxy_invoke_openai(
1679
+ prompt=prompt,
1680
+ model=model,
1681
+ temperature=temperature,
1682
+ max_tokens=max_tokens,
1683
+ thinking_budget=thinking_budget,
1684
+ reasoning_effort=reasoning_effort,
1685
+ )
1686
+
890
1687
  # Execute pre-model invoke hooks
891
1688
  params = {
892
1689
  "prompt": prompt,
@@ -894,6 +1691,7 @@ async def invoke_openai(
894
1691
  "temperature": temperature,
895
1692
  "max_tokens": max_tokens,
896
1693
  "thinking_budget": thinking_budget,
1694
+ "reasoning_effort": reasoning_effort,
897
1695
  "token_store": token_store, # Pass for hooks that need model access
898
1696
  "provider": "openai", # Identify which provider is being called
899
1697
  }
@@ -906,6 +1704,7 @@ async def invoke_openai(
906
1704
  temperature = params["temperature"]
907
1705
  max_tokens = params["max_tokens"]
908
1706
  thinking_budget = params["thinking_budget"]
1707
+ reasoning_effort = params.get("reasoning_effort", "medium")
909
1708
 
910
1709
  # Extract agent context for logging (may be passed via params or original call)
911
1710
  agent_context = params.get("agent_context", {})
@@ -919,19 +1718,55 @@ async def invoke_openai(
919
1718
 
920
1719
  # USER-VISIBLE NOTIFICATION (stderr) - Shows when OpenAI is invoked
921
1720
  import sys
1721
+
922
1722
  task_info = f" task={task_id}" if task_id else ""
923
1723
  desc_info = f" | {description}" if description else ""
1724
+
1725
+ # ==============================================
1726
+ # AUTH PRIORITY: OAuth first, Gemini fallback on 429
1727
+ # ==============================================
1728
+ # 1. If fallback mode (after 429), use Gemini directly
1729
+ # 2. Otherwise, try OpenAI OAuth first
1730
+ # 3. On 429 from OAuth, switch to fallback mode and retry with Gemini
1731
+
1732
+ provider_tracker = get_provider_tracker()
1733
+
1734
+ # If OpenAI is in cooldown, follow tier-aware fallback chain.
1735
+ if not provider_tracker.is_available("openai"):
1736
+ for candidate_provider, candidate_model, use_oauth in get_oauth_fallback_chain("openai", model):
1737
+ if candidate_provider == "openai":
1738
+ continue
1739
+ if use_oauth and not provider_tracker.is_available(candidate_provider):
1740
+ continue
1741
+
1742
+ if candidate_provider == "gemini":
1743
+ if not use_oauth:
1744
+ # Force Gemini API-key mode for the cooldown window.
1745
+ if _get_gemini_api_key() is None:
1746
+ continue
1747
+ _set_api_only_mode("OpenAI in cooldown; using Gemini API key")
1748
+
1749
+ return await invoke_gemini(
1750
+ token_store=token_store,
1751
+ prompt=prompt,
1752
+ model=candidate_model,
1753
+ temperature=temperature,
1754
+ max_tokens=max_tokens,
1755
+ thinking_budget=0,
1756
+ image_path=None,
1757
+ )
1758
+ # DEFAULT: Try OpenAI OAuth first
924
1759
  print(f"🧠 OPENAI: {model} | agent={agent_type}{task_info}{desc_info}", file=sys.stderr)
925
1760
 
926
1761
  access_token = await _ensure_valid_token(token_store, "openai")
927
- logger.info(f"[invoke_openai] Got access token")
1762
+ logger.info("[invoke_openai] Got access token")
928
1763
 
929
1764
  # ChatGPT Backend API - Uses Codex Responses endpoint
930
1765
  # Replicates opencode-openai-codex-auth plugin behavior
931
1766
  api_url = "https://chatgpt.com/backend-api/codex/responses"
932
1767
 
933
1768
  # Extract account ID from JWT token
934
- logger.info(f"[invoke_openai] Extracting account ID from JWT")
1769
+ logger.info("[invoke_openai] Extracting account ID from JWT")
935
1770
  try:
936
1771
  parts = access_token.split(".")
937
1772
  payload_b64 = parts[1]
@@ -959,6 +1794,10 @@ async def invoke_openai(
959
1794
  if account_id:
960
1795
  headers["x-openai-account-id"] = account_id
961
1796
 
1797
+ # Determine final effort
1798
+ # Legacy: thinking_budget > 0 implies high effort
1799
+ effort = "high" if thinking_budget > 0 else reasoning_effort
1800
+
962
1801
  # Request body matching opencode transformation
963
1802
  payload = {
964
1803
  "model": model,
@@ -966,7 +1805,7 @@ async def invoke_openai(
966
1805
  "stream": True, # Always stream (handler converts to non-stream if needed)
967
1806
  "instructions": instructions,
968
1807
  "input": [{"role": "user", "content": prompt}],
969
- "reasoning": {"effort": "high" if thinking_budget > 0 else "medium", "summary": "auto"},
1808
+ "reasoning": {"effort": effort, "summary": "auto"},
970
1809
  "text": {"verbosity": "medium"},
971
1810
  "include": ["reasoning.encrypted_content"],
972
1811
  }
@@ -979,44 +1818,100 @@ async def invoke_openai(
979
1818
  logger.info(f"[invoke_openai] Instructions length: {len(instructions)}")
980
1819
 
981
1820
  try:
982
- async with httpx.AsyncClient() as client:
983
- async with client.stream(
1821
+ async with (
1822
+ httpx.AsyncClient() as client,
1823
+ client.stream(
984
1824
  "POST", api_url, headers=headers, json=payload, timeout=120.0
985
- ) as response:
986
- logger.info(f"[invoke_openai] Response status: {response.status_code}")
987
- if response.status_code == 401:
988
- raise ValueError(
989
- "OpenAI authentication failed. Run: stravinsky-auth login openai"
990
- )
1825
+ ) as response,
1826
+ ):
1827
+ logger.info(f"[invoke_openai] Response status: {response.status_code}")
1828
+ if response.status_code == 401:
1829
+ raise ValueError("OpenAI authentication failed. Run: stravinsky-auth login openai")
1830
+
1831
+ # ==============================================
1832
+ # 429 RATE LIMIT DETECTION: Tier-aware fallback chain
1833
+ # ==============================================
1834
+ if response.status_code == 429:
1835
+ provider_tracker = get_provider_tracker()
1836
+ provider_tracker.mark_rate_limited(
1837
+ "openai",
1838
+ duration=_OAUTH_COOLDOWN_SECONDS,
1839
+ reason="OpenAI OAuth rate-limited (429)",
1840
+ )
991
1841
 
992
- if response.status_code >= 400:
993
- error_body = await response.aread()
994
- error_text = error_body.decode("utf-8")
995
- logger.error(f"OpenAI API error {response.status_code}: {error_text}")
996
- logger.error(f"Request payload was: {payload}")
997
- logger.error(f"Request headers were: {headers}")
998
- raise ValueError(f"OpenAI API error {response.status_code}: {error_text}")
999
-
1000
- # Parse SSE stream for text deltas
1001
- async for line in response.aiter_lines():
1002
- if line.startswith("data: "):
1003
- data_json = line[6:] # Remove "data: " prefix
1004
- try:
1005
- data = json_module.loads(data_json)
1006
- event_type = data.get("type")
1007
-
1008
- # Extract text deltas from SSE stream
1009
- if event_type == "response.output_text.delta":
1010
- delta = data.get("delta", "")
1011
- text_chunks.append(delta)
1012
-
1013
- except json_module.JSONDecodeError:
1014
- pass # Skip malformed JSON
1015
- except Exception as e:
1016
- logger.warning(f"Error processing SSE event: {e}")
1842
+ for candidate_provider, candidate_model, use_oauth in get_oauth_fallback_chain(
1843
+ "openai", model
1844
+ ):
1845
+ if candidate_provider == "openai":
1846
+ continue
1847
+ if use_oauth and not provider_tracker.is_available(candidate_provider):
1848
+ continue
1849
+
1850
+ if candidate_provider == "gemini":
1851
+ if not use_oauth:
1852
+ if _get_gemini_api_key() is None:
1853
+ continue
1854
+ _set_api_only_mode("OpenAI OAuth rate-limited (429)")
1855
+
1856
+ return await invoke_gemini(
1857
+ token_store=token_store,
1858
+ prompt=prompt,
1859
+ model=candidate_model,
1860
+ temperature=temperature,
1861
+ max_tokens=max_tokens,
1862
+ thinking_budget=0,
1863
+ image_path=None,
1864
+ )
1865
+
1866
+ raise ValueError("OpenAI OAuth rate-limited (429) and no fallback succeeded")
1867
+ if response.status_code >= 400:
1868
+ error_body = await response.aread()
1869
+ error_text = error_body.decode("utf-8")
1870
+ logger.error(f"OpenAI API error {response.status_code}: {error_text}")
1871
+ logger.error(f"Request payload was: {payload}")
1872
+ logger.error(f"Request headers were: {headers}")
1873
+ raise ValueError(f"OpenAI API error {response.status_code}: {error_text}")
1874
+
1875
+ # Parse SSE stream for text deltas
1876
+ async for line in response.aiter_lines():
1877
+ if line.startswith("data: "):
1878
+ data_json = line[6:] # Remove "data: " prefix
1879
+ try:
1880
+ data = json_module.loads(data_json)
1881
+ event_type = data.get("type")
1882
+
1883
+ # Extract text deltas from SSE stream
1884
+ if event_type == "response.output_text.delta":
1885
+ delta = data.get("delta", "")
1886
+ text_chunks.append(delta)
1887
+
1888
+ except json_module.JSONDecodeError:
1889
+ pass # Skip malformed JSON
1890
+ except Exception as e:
1891
+ logger.warning(f"Error processing SSE event: {e}")
1017
1892
 
1018
1893
  # Return collected text
1019
1894
  result = "".join(text_chunks)
1895
+
1896
+ # Track estimated usage
1897
+ try:
1898
+ from mcp_bridge.metrics.cost_tracker import get_cost_tracker
1899
+
1900
+ tracker = get_cost_tracker()
1901
+ # Estimate: 4 chars per token
1902
+ input_tokens = len(prompt) // 4
1903
+ output_tokens = len(result) // 4
1904
+
1905
+ tracker.track_usage(
1906
+ model=model,
1907
+ input_tokens=input_tokens,
1908
+ output_tokens=output_tokens,
1909
+ agent_type=agent_type,
1910
+ task_id=task_id,
1911
+ )
1912
+ except Exception as e:
1913
+ logger.warning(f"Failed to track cost: {e}")
1914
+
1020
1915
  if not result:
1021
1916
  return "No response generated"
1022
1917
  return result