stravinsky 0.1.2__py3-none-any.whl → 0.2.38__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of stravinsky might be problematic. Click here for more details.

Files changed (42) hide show
  1. mcp_bridge/__init__.py +1 -5
  2. mcp_bridge/auth/cli.py +89 -44
  3. mcp_bridge/auth/oauth.py +88 -63
  4. mcp_bridge/hooks/__init__.py +49 -0
  5. mcp_bridge/hooks/agent_reminder.py +61 -0
  6. mcp_bridge/hooks/auto_slash_command.py +186 -0
  7. mcp_bridge/hooks/budget_optimizer.py +38 -0
  8. mcp_bridge/hooks/comment_checker.py +136 -0
  9. mcp_bridge/hooks/compaction.py +32 -0
  10. mcp_bridge/hooks/context_monitor.py +58 -0
  11. mcp_bridge/hooks/directory_context.py +40 -0
  12. mcp_bridge/hooks/edit_recovery.py +41 -0
  13. mcp_bridge/hooks/empty_message_sanitizer.py +240 -0
  14. mcp_bridge/hooks/keyword_detector.py +122 -0
  15. mcp_bridge/hooks/manager.py +96 -0
  16. mcp_bridge/hooks/preemptive_compaction.py +157 -0
  17. mcp_bridge/hooks/session_recovery.py +186 -0
  18. mcp_bridge/hooks/todo_enforcer.py +75 -0
  19. mcp_bridge/hooks/truncator.py +19 -0
  20. mcp_bridge/native_hooks/context.py +38 -0
  21. mcp_bridge/native_hooks/edit_recovery.py +46 -0
  22. mcp_bridge/native_hooks/stravinsky_mode.py +109 -0
  23. mcp_bridge/native_hooks/truncator.py +23 -0
  24. mcp_bridge/prompts/delphi.py +3 -2
  25. mcp_bridge/prompts/dewey.py +105 -21
  26. mcp_bridge/prompts/stravinsky.py +452 -118
  27. mcp_bridge/server.py +491 -668
  28. mcp_bridge/server_tools.py +547 -0
  29. mcp_bridge/tools/__init__.py +13 -3
  30. mcp_bridge/tools/agent_manager.py +359 -190
  31. mcp_bridge/tools/continuous_loop.py +67 -0
  32. mcp_bridge/tools/init.py +50 -0
  33. mcp_bridge/tools/lsp/tools.py +15 -15
  34. mcp_bridge/tools/model_invoke.py +594 -48
  35. mcp_bridge/tools/skill_loader.py +51 -47
  36. mcp_bridge/tools/task_runner.py +141 -0
  37. mcp_bridge/tools/templates.py +175 -0
  38. {stravinsky-0.1.2.dist-info → stravinsky-0.2.38.dist-info}/METADATA +55 -10
  39. stravinsky-0.2.38.dist-info/RECORD +57 -0
  40. stravinsky-0.1.2.dist-info/RECORD +0 -32
  41. {stravinsky-0.1.2.dist-info → stravinsky-0.2.38.dist-info}/WHEEL +0 -0
  42. {stravinsky-0.1.2.dist-info → stravinsky-0.2.38.dist-info}/entry_points.txt +0 -0
@@ -5,39 +5,188 @@ These tools use OAuth tokens from the token store to authenticate
5
5
  API requests to external model providers.
6
6
  """
7
7
 
8
+ import logging
9
+ import os
8
10
  import time
9
11
 
12
+ logger = logging.getLogger(__name__)
13
+
14
+ # Model name mapping: user-friendly names -> Antigravity API model IDs
15
+ # Per API spec: https://github.com/NoeFabris/opencode-antigravity-auth/blob/main/docs/ANTIGRAVITY_API_SPEC.md
16
+ # VERIFIED GEMINI MODELS (as of 2025-12):
17
+ # - gemini-3-pro-high, gemini-3-pro-low
18
+ # NOTE: There is NO gemini-3-flash in the API - all flash aliases map to gemini-3-pro-low
19
+ # NOTE: Claude models should use Anthropic API directly, NOT Antigravity
20
+ GEMINI_MODEL_MAP = {
21
+ # Antigravity verified Gemini models (pass-through)
22
+ "gemini-3-pro-low": "gemini-3-pro-low",
23
+ "gemini-3-pro-high": "gemini-3-pro-high",
24
+ # Aliases for convenience (map to closest verified model)
25
+ "gemini-flash": "gemini-3-pro-low",
26
+ "gemini-3-flash": "gemini-3-pro-low", # NOT a real model - redirect to pro-low
27
+ "gemini-pro": "gemini-3-pro-low",
28
+ "gemini-3-pro": "gemini-3-pro-low",
29
+ "gemini": "gemini-3-pro-low", # Default gemini alias
30
+ # Legacy mappings (redirect to Antigravity models)
31
+ "gemini-2.0-flash": "gemini-3-pro-low",
32
+ "gemini-2.0-flash-001": "gemini-3-pro-low",
33
+ "gemini-2.0-pro": "gemini-3-pro-low",
34
+ "gemini-2.0-pro-exp": "gemini-3-pro-high",
35
+ }
36
+
37
+
38
+ def resolve_gemini_model(model: str) -> str:
39
+ """Resolve a user-friendly model name to the actual API model ID."""
40
+ return GEMINI_MODEL_MAP.get(model, model) # Pass through if not in map
41
+
42
+
10
43
  import httpx
44
+ from tenacity import (
45
+ retry,
46
+ stop_after_attempt,
47
+ wait_exponential,
48
+ retry_if_exception,
49
+ )
11
50
 
12
51
  from ..auth.token_store import TokenStore
13
- from ..auth.oauth import refresh_access_token as gemini_refresh, ANTIGRAVITY_HEADERS
52
+ from ..auth.oauth import (
53
+ refresh_access_token as gemini_refresh,
54
+ ANTIGRAVITY_HEADERS,
55
+ ANTIGRAVITY_ENDPOINTS,
56
+ ANTIGRAVITY_DEFAULT_PROJECT_ID,
57
+ ANTIGRAVITY_API_VERSION,
58
+ )
14
59
  from ..auth.openai_oauth import refresh_access_token as openai_refresh
60
+ from ..hooks.manager import get_hook_manager
61
+
62
+ # ========================
63
+ # SESSION & HTTP MANAGEMENT
64
+ # ========================
65
+
66
+ # Session cache for thinking signature persistence across multi-turn conversations
67
+ # Key: conversation_key (or "default"), Value: session UUID
68
+ _SESSION_CACHE: dict[str, str] = {}
69
+
70
+ # Pooled HTTP client for connection reuse
71
+ _HTTP_CLIENT: httpx.AsyncClient | None = None
72
+
73
+
74
+ def _get_session_id(conversation_key: str | None = None) -> str:
75
+ """
76
+ Get or create persistent session ID for thinking signature caching.
77
+
78
+ Per Antigravity API: session IDs must persist across multi-turn to maintain
79
+ thinking signature cache. Creating new UUID per call breaks this.
80
+
81
+ Args:
82
+ conversation_key: Optional key to scope session (e.g., per-agent)
83
+
84
+ Returns:
85
+ Stable session UUID for this conversation
86
+ """
87
+ import uuid
88
+
89
+ key = conversation_key or "default"
90
+ if key not in _SESSION_CACHE:
91
+ _SESSION_CACHE[key] = str(uuid.uuid4())
92
+ return _SESSION_CACHE[key]
93
+
94
+
95
+ def clear_session_cache() -> None:
96
+ """Clear session cache (for thinking recovery on error)."""
97
+ _SESSION_CACHE.clear()
98
+
99
+
100
+ async def _get_http_client() -> httpx.AsyncClient:
101
+ """
102
+ Get or create pooled HTTP client for connection reuse.
103
+
104
+ Reusing a single client across requests improves performance
105
+ by maintaining connection pools.
106
+ """
107
+ global _HTTP_CLIENT
108
+ if _HTTP_CLIENT is None or _HTTP_CLIENT.is_closed:
109
+ _HTTP_CLIENT = httpx.AsyncClient(timeout=120.0)
110
+ return _HTTP_CLIENT
111
+
112
+
113
+ def _extract_gemini_response(data: dict) -> str:
114
+ """
115
+ Extract text from Gemini response, handling thinking blocks.
116
+
117
+ Per Antigravity API, responses may contain:
118
+ - text: Regular response text
119
+ - thought: Thinking block content (when thinkingConfig enabled)
120
+ - thoughtSignature: Signature for caching (ignored)
121
+
122
+ Args:
123
+ data: Raw API response JSON
124
+
125
+ Returns:
126
+ Extracted text, with thinking blocks formatted as <thinking>...</thinking>
127
+ """
128
+ try:
129
+ # Unwrap the outer "response" envelope if present
130
+ inner_response = data.get("response", data)
131
+ candidates = inner_response.get("candidates", [])
132
+
133
+ if not candidates:
134
+ return "No response generated"
135
+
136
+ content = candidates[0].get("content", {})
137
+ parts = content.get("parts", [])
138
+
139
+ if not parts:
140
+ return "No response parts"
141
+
142
+ text_parts = []
143
+ thinking_parts = []
144
+
145
+ for part in parts:
146
+ if "thought" in part:
147
+ thinking_parts.append(part["thought"])
148
+ elif "text" in part:
149
+ text_parts.append(part["text"])
150
+ # Skip thoughtSignature parts
151
+
152
+ # Combine results
153
+ result = "".join(text_parts)
154
+
155
+ # Prepend thinking blocks if present
156
+ if thinking_parts:
157
+ thinking_content = "".join(thinking_parts)
158
+ result = f"<thinking>\n{thinking_content}\n</thinking>\n\n{result}"
159
+
160
+ return result if result.strip() else "No response generated"
161
+
162
+ except (KeyError, IndexError, TypeError) as e:
163
+ return f"Error parsing response: {e}"
15
164
 
16
165
 
17
166
  async def _ensure_valid_token(token_store: TokenStore, provider: str) -> str:
18
167
  """
19
168
  Get a valid access token, refreshing if needed.
20
-
169
+
21
170
  Args:
22
171
  token_store: Token store
23
172
  provider: Provider name
24
-
173
+
25
174
  Returns:
26
175
  Valid access token
27
-
176
+
28
177
  Raises:
29
178
  ValueError: If not authenticated
30
179
  """
31
180
  # Check if token needs refresh (with 5 minute buffer)
32
181
  if token_store.needs_refresh(provider, buffer_seconds=300):
33
182
  token = token_store.get_token(provider)
34
-
183
+
35
184
  if not token or not token.get("refresh_token"):
36
185
  raise ValueError(
37
186
  f"Not authenticated with {provider}. "
38
187
  f"Run: python -m mcp_bridge.auth.cli login {provider}"
39
188
  )
40
-
189
+
41
190
  try:
42
191
  if provider == "gemini":
43
192
  result = gemini_refresh(token["refresh_token"])
@@ -45,7 +194,7 @@ async def _ensure_valid_token(token_store: TokenStore, provider: str) -> str:
45
194
  result = openai_refresh(token["refresh_token"])
46
195
  else:
47
196
  raise ValueError(f"Unknown provider: {provider}")
48
-
197
+
49
198
  # Update stored token
50
199
  token_store.set_token(
51
200
  provider=provider,
@@ -53,24 +202,38 @@ async def _ensure_valid_token(token_store: TokenStore, provider: str) -> str:
53
202
  refresh_token=result.refresh_token or token["refresh_token"],
54
203
  expires_at=time.time() + result.expires_in,
55
204
  )
56
-
205
+
57
206
  return result.access_token
58
207
  except Exception as e:
59
208
  raise ValueError(
60
- f"Token refresh failed: {e}. "
61
- f"Run: python -m mcp_bridge.auth.cli login {provider}"
209
+ f"Token refresh failed: {e}. Run: python -m mcp_bridge.auth.cli login {provider}"
62
210
  )
63
-
211
+
64
212
  access_token = token_store.get_access_token(provider)
65
213
  if not access_token:
66
214
  raise ValueError(
67
215
  f"Not authenticated with {provider}. "
68
216
  f"Run: python -m mcp_bridge.auth.cli login {provider}"
69
217
  )
70
-
218
+
71
219
  return access_token
72
220
 
73
221
 
222
+ def is_retryable_exception(e: Exception) -> bool:
223
+ """Check if an exception is retryable (429 or 5xx)."""
224
+ if isinstance(e, httpx.HTTPStatusError):
225
+ return e.response.status_code == 429 or 500 <= e.response.status_code < 600
226
+ return False
227
+
228
+
229
+ @retry(
230
+ stop=stop_after_attempt(5),
231
+ wait=wait_exponential(multiplier=1, min=4, max=60),
232
+ retry=retry_if_exception(is_retryable_exception),
233
+ before_sleep=lambda retry_state: logger.info(
234
+ f"Rate limited or server error, retrying in {retry_state.next_action.sleep} seconds..."
235
+ ),
236
+ )
74
237
  async def invoke_gemini(
75
238
  token_store: TokenStore,
76
239
  prompt: str,
@@ -98,10 +261,32 @@ async def invoke_gemini(
98
261
  ValueError: If not authenticated with Gemini
99
262
  httpx.HTTPStatusError: If API request fails
100
263
  """
264
+ # Execute pre-model invoke hooks
265
+ params = {
266
+ "prompt": prompt,
267
+ "model": model,
268
+ "temperature": temperature,
269
+ "max_tokens": max_tokens,
270
+ "thinking_budget": thinking_budget,
271
+ }
272
+ hook_manager = get_hook_manager()
273
+ params = await hook_manager.execute_pre_model_invoke(params)
274
+
275
+ # Update local variables from possibly modified params
276
+ prompt = params["prompt"]
277
+ model = params["model"]
278
+ temperature = params["temperature"]
279
+ max_tokens = params["max_tokens"]
280
+ thinking_budget = params["thinking_budget"]
281
+
101
282
  access_token = await _ensure_valid_token(token_store, "gemini")
102
283
 
103
- # Gemini API endpoint with OAuth
104
- api_url = f"https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent"
284
+ # Resolve user-friendly model name to actual API model ID
285
+ api_model = resolve_gemini_model(model)
286
+
287
+ # Use persistent session ID for thinking signature caching
288
+ session_id = _get_session_id()
289
+ project_id = os.getenv("STRAVINSKY_ANTIGRAVITY_PROJECT_ID", ANTIGRAVITY_DEFAULT_PROJECT_ID)
105
290
 
106
291
  headers = {
107
292
  "Authorization": f"Bearer {access_token}",
@@ -109,53 +294,397 @@ async def invoke_gemini(
109
294
  **ANTIGRAVITY_HEADERS, # Include Antigravity headers
110
295
  }
111
296
 
112
- payload = {
113
- "contents": [{"parts": [{"text": prompt}]}],
297
+ # Build inner request payload
298
+ # Per API spec: contents must include role ("user" or "model")
299
+ inner_payload = {
300
+ "contents": [{"role": "user", "parts": [{"text": prompt}]}],
114
301
  "generationConfig": {
115
302
  "temperature": temperature,
116
303
  "maxOutputTokens": max_tokens,
117
304
  },
305
+ "sessionId": session_id,
118
306
  }
119
-
307
+
120
308
  # Add thinking budget if supported by model/API
121
309
  if thinking_budget > 0:
122
310
  # For Gemini 2.0+ Thinking models
123
- payload["generationConfig"]["thinkingConfig"] = {
311
+ # Per Antigravity API: use "thinkingBudget", NOT "tokenLimit"
312
+ inner_payload["generationConfig"]["thinkingConfig"] = {
124
313
  "includeThoughts": True,
125
- "tokenLimit": thinking_budget
314
+ "thinkingBudget": thinking_budget,
126
315
  }
127
316
 
128
- async with httpx.AsyncClient() as client:
129
- response = await client.post(
130
- api_url,
131
- headers=headers,
132
- json=payload,
133
- timeout=120.0,
134
- )
135
-
136
- if response.status_code == 401:
137
- raise ValueError(
138
- "Gemini authentication expired. "
139
- "Run: python -m mcp_bridge.auth.cli login gemini"
317
+ # Wrap request body per reference implementation
318
+ wrapped_payload = {
319
+ "project": project_id,
320
+ "model": api_model,
321
+ "userAgent": "antigravity",
322
+ "requestId": f"invoke-{uuid.uuid4()}",
323
+ "request": inner_payload,
324
+ }
325
+
326
+ # Get pooled HTTP client for connection reuse
327
+ client = await _get_http_client()
328
+
329
+ # Try endpoints in fallback order with thinking recovery
330
+ response = None
331
+ last_error = None
332
+ max_retries = 2 # For thinking recovery
333
+
334
+ for retry_attempt in range(max_retries):
335
+ for endpoint in ANTIGRAVITY_ENDPOINTS:
336
+ # Reference uses: {endpoint}/v1internal:generateContent (NOT /models/{model})
337
+ api_url = f"{endpoint}/v1internal:generateContent"
338
+
339
+ try:
340
+ response = await client.post(
341
+ api_url,
342
+ headers=headers,
343
+ json=wrapped_payload,
344
+ timeout=120.0,
345
+ )
346
+
347
+ # 401/403 might be endpoint-specific, try next endpoint
348
+ if response.status_code in (401, 403):
349
+ logger.warning(
350
+ f"[Gemini] Endpoint {endpoint} returned {response.status_code}, trying next"
351
+ )
352
+ last_error = Exception(f"{response.status_code} from {endpoint}")
353
+ continue
354
+
355
+ # Check for thinking-related errors that need recovery
356
+ if response.status_code in (400, 500):
357
+ error_text = response.text.lower()
358
+ if "thinking" in error_text or "signature" in error_text:
359
+ logger.warning(
360
+ f"[Gemini] Thinking error detected, clearing session cache and retrying"
361
+ )
362
+ clear_session_cache()
363
+ # Update session ID for retry
364
+ wrapped_payload["request"]["sessionId"] = _get_session_id()
365
+ last_error = Exception(f"Thinking error: {response.text[:200]}")
366
+ break # Break inner loop to retry with new session
367
+
368
+ # If we got a non-retryable response (success or 4xx client error), use it
369
+ if response.status_code < 500 and response.status_code != 429:
370
+ break
371
+
372
+ except httpx.TimeoutException as e:
373
+ last_error = e
374
+ continue
375
+ except Exception as e:
376
+ last_error = e
377
+ continue
378
+ else:
379
+ # Inner loop completed without break - no thinking recovery needed
380
+ break
381
+
382
+ # If we broke out of inner loop for thinking recovery, continue outer retry loop
383
+ if response and response.status_code in (400, 500):
384
+ continue
385
+ break
386
+
387
+ if response is None:
388
+ raise ValueError(f"All Antigravity endpoints failed: {last_error}")
389
+
390
+ response.raise_for_status()
391
+ data = response.json()
392
+
393
+ # Extract text from response using thinking-aware parser
394
+ return _extract_gemini_response(data)
395
+
396
+
397
+ # ========================
398
+ # AGENTIC FUNCTION CALLING
399
+ # ========================
400
+
401
+ # Tool definitions for background agents
402
+ AGENT_TOOLS = [
403
+ {
404
+ "functionDeclarations": [
405
+ {
406
+ "name": "read_file",
407
+ "description": "Read the contents of a file. Returns the file contents as text.",
408
+ "parameters": {
409
+ "type": "object",
410
+ "properties": {
411
+ "path": {
412
+ "type": "string",
413
+ "description": "Absolute or relative path to the file",
414
+ }
415
+ },
416
+ "required": ["path"],
417
+ },
418
+ },
419
+ {
420
+ "name": "list_directory",
421
+ "description": "List files and directories in a path",
422
+ "parameters": {
423
+ "type": "object",
424
+ "properties": {
425
+ "path": {"type": "string", "description": "Directory path to list"}
426
+ },
427
+ "required": ["path"],
428
+ },
429
+ },
430
+ {
431
+ "name": "grep_search",
432
+ "description": "Search for a pattern in files using ripgrep. Returns matching lines with file paths and line numbers.",
433
+ "parameters": {
434
+ "type": "object",
435
+ "properties": {
436
+ "pattern": {"type": "string", "description": "The search pattern (regex)"},
437
+ "path": {"type": "string", "description": "Directory or file to search in"},
438
+ },
439
+ "required": ["pattern", "path"],
440
+ },
441
+ },
442
+ {
443
+ "name": "write_file",
444
+ "description": "Write content to a file",
445
+ "parameters": {
446
+ "type": "object",
447
+ "properties": {
448
+ "path": {"type": "string", "description": "Path to the file to write"},
449
+ "content": {
450
+ "type": "string",
451
+ "description": "Content to write to the file",
452
+ },
453
+ },
454
+ "required": ["path", "content"],
455
+ },
456
+ },
457
+ ]
458
+ }
459
+ ]
460
+
461
+
462
+ def _execute_tool(name: str, args: dict) -> str:
463
+ """Execute a tool and return the result."""
464
+ import os
465
+ import subprocess
466
+ from pathlib import Path
467
+
468
+ try:
469
+ if name == "read_file":
470
+ path = Path(args["path"])
471
+ if not path.exists():
472
+ return f"Error: File not found: {path}"
473
+ return path.read_text()
474
+
475
+ elif name == "list_directory":
476
+ path = Path(args["path"])
477
+ if not path.exists():
478
+ return f"Error: Directory not found: {path}"
479
+ entries = []
480
+ for entry in path.iterdir():
481
+ entry_type = "DIR" if entry.is_dir() else "FILE"
482
+ entries.append(f"[{entry_type}] {entry.name}")
483
+ return "\n".join(entries) if entries else "(empty directory)"
484
+
485
+ elif name == "grep_search":
486
+ pattern = args["pattern"]
487
+ search_path = args["path"]
488
+ result = subprocess.run(
489
+ ["rg", "--json", "-m", "50", pattern, search_path],
490
+ capture_output=True,
491
+ text=True,
492
+ timeout=30,
140
493
  )
141
-
142
- response.raise_for_status()
494
+ if result.returncode == 0:
495
+ return result.stdout[:10000] # Limit output size
496
+ elif result.returncode == 1:
497
+ return "No matches found"
498
+ else:
499
+ return f"Search error: {result.stderr}"
500
+
501
+ elif name == "write_file":
502
+ path = Path(args["path"])
503
+ path.parent.mkdir(parents=True, exist_ok=True)
504
+ path.write_text(args["content"])
505
+ return f"Successfully wrote {len(args['content'])} bytes to {path}"
506
+
507
+ else:
508
+ return f"Unknown tool: {name}"
509
+
510
+ except Exception as e:
511
+ return f"Tool error: {str(e)}"
512
+
513
+
514
+ async def invoke_gemini_agentic(
515
+ token_store: TokenStore,
516
+ prompt: str,
517
+ model: str = "gemini-3-flash",
518
+ max_turns: int = 10,
519
+ timeout: int = 120,
520
+ ) -> str:
521
+ """
522
+ Invoke Gemini with function calling for agentic tasks.
523
+
524
+ This function implements a multi-turn agentic loop:
525
+ 1. Send prompt with tool definitions
526
+ 2. If model returns FunctionCall, execute the tool
527
+ 3. Send FunctionResponse back to model
528
+ 4. Repeat until model returns text or max_turns reached
529
+
530
+ Args:
531
+ token_store: Token store for OAuth credentials
532
+ prompt: The task prompt
533
+ model: Gemini model to use
534
+ max_turns: Maximum number of tool-use turns
535
+ timeout: Request timeout in seconds
536
+
537
+ Returns:
538
+ Final text response from the model
539
+ """
540
+ import uuid
541
+
542
+ access_token = await _ensure_valid_token(token_store, "gemini")
543
+ api_model = resolve_gemini_model(model)
544
+
545
+ # Use persistent session ID for this conversation
546
+ session_id = _get_session_id(conversation_key="agentic")
547
+
548
+ # Project ID from environment or default
549
+ project_id = os.getenv("STRAVINSKY_ANTIGRAVITY_PROJECT_ID", ANTIGRAVITY_DEFAULT_PROJECT_ID)
550
+
551
+ headers = {
552
+ "Authorization": f"Bearer {access_token}",
553
+ "Content-Type": "application/json",
554
+ **ANTIGRAVITY_HEADERS,
555
+ }
556
+
557
+ # Initialize conversation
558
+ contents = [{"role": "user", "parts": [{"text": prompt}]}]
559
+
560
+ # Get pooled HTTP client for connection reuse
561
+ client = await _get_http_client()
562
+
563
+ for turn in range(max_turns):
564
+ # Build inner request payload (what goes inside "request" wrapper)
565
+ inner_payload = {
566
+ "contents": contents,
567
+ "tools": AGENT_TOOLS,
568
+ "generationConfig": {
569
+ "temperature": 0.7,
570
+ "maxOutputTokens": 8192,
571
+ },
572
+ "sessionId": session_id,
573
+ }
574
+
575
+ # Wrap request body per reference implementation
576
+ # From request.ts wrapRequestBody()
577
+ wrapped_payload = {
578
+ "project": project_id,
579
+ "model": api_model,
580
+ "userAgent": "antigravity",
581
+ "requestId": f"agent-{uuid.uuid4()}",
582
+ "request": inner_payload,
583
+ }
143
584
 
585
+ # Try endpoints in fallback order
586
+ response = None
587
+ last_error = None
588
+
589
+ for endpoint in ANTIGRAVITY_ENDPOINTS:
590
+ # Reference uses: {endpoint}/v1internal:generateContent (NOT /models/{model})
591
+ api_url = f"{endpoint}/v1internal:generateContent"
592
+
593
+ try:
594
+ response = await client.post(
595
+ api_url,
596
+ headers=headers,
597
+ json=wrapped_payload,
598
+ timeout=float(timeout),
599
+ )
600
+
601
+ # 401/403 might be endpoint-specific, try next endpoint
602
+ if response.status_code in (401, 403):
603
+ logger.warning(
604
+ f"[AgenticGemini] Endpoint {endpoint} returned {response.status_code}, trying next"
605
+ )
606
+ last_error = Exception(f"{response.status_code} from {endpoint}")
607
+ continue
608
+
609
+ # If we got a non-retryable response (success or 4xx client error), use it
610
+ if response.status_code < 500 and response.status_code != 429:
611
+ break
612
+
613
+ logger.warning(
614
+ f"[AgenticGemini] Endpoint {endpoint} returned {response.status_code}, trying next"
615
+ )
616
+
617
+ except httpx.TimeoutException as e:
618
+ last_error = e
619
+ logger.warning(f"[AgenticGemini] Endpoint {endpoint} timed out, trying next")
620
+ continue
621
+ except Exception as e:
622
+ last_error = e
623
+ logger.warning(f"[AgenticGemini] Endpoint {endpoint} failed: {e}, trying next")
624
+ continue
625
+
626
+ if response is None:
627
+ raise ValueError(f"All Antigravity endpoints failed: {last_error}")
628
+
629
+ response.raise_for_status()
144
630
  data = response.json()
145
631
 
146
- # Extract text from response
147
- try:
148
- candidates = data.get("candidates", [])
149
- if candidates:
150
- content = candidates[0].get("content", {})
151
- parts = content.get("parts", [])
152
- if parts:
153
- return parts[0].get("text", "")
632
+ # Extract response - unwrap outer "response" envelope if present
633
+ inner_response = data.get("response", data)
634
+ candidates = inner_response.get("candidates", [])
635
+ if not candidates:
154
636
  return "No response generated"
155
- except (KeyError, IndexError) as e:
156
- return f"Error parsing response: {e}"
637
+
638
+ content = candidates[0].get("content", {})
639
+ parts = content.get("parts", [])
640
+
641
+ if not parts:
642
+ return "No response parts"
643
+
644
+ # Check for function call
645
+ function_call = None
646
+ text_response = None
647
+
648
+ for part in parts:
649
+ if "functionCall" in part:
650
+ function_call = part["functionCall"]
651
+ break
652
+ elif "text" in part:
653
+ text_response = part["text"]
654
+
655
+ if function_call:
656
+ # Execute the function
657
+ func_name = function_call.get("name")
658
+ func_args = function_call.get("args", {})
659
+
660
+ logger.info(f"[AgenticGemini] Turn {turn + 1}: Executing {func_name}")
661
+ result = _execute_tool(func_name, func_args)
662
+
663
+ # Add model's response and function result to conversation
664
+ contents.append({"role": "model", "parts": [{"functionCall": function_call}]})
665
+ contents.append(
666
+ {
667
+ "role": "user",
668
+ "parts": [
669
+ {"functionResponse": {"name": func_name, "response": {"result": result}}}
670
+ ],
671
+ }
672
+ )
673
+ else:
674
+ # No function call, return text response
675
+ return text_response or "Task completed"
676
+
677
+ return "Max turns reached without final response"
157
678
 
158
679
 
680
+ @retry(
681
+ stop=stop_after_attempt(5),
682
+ wait=wait_exponential(multiplier=1, min=4, max=60),
683
+ retry=retry_if_exception(is_retryable_exception),
684
+ before_sleep=lambda retry_state: logger.info(
685
+ f"Rate limited or server error, retrying in {retry_state.next_action.sleep} seconds..."
686
+ ),
687
+ )
159
688
  async def invoke_openai(
160
689
  token_store: TokenStore,
161
690
  prompt: str,
@@ -181,6 +710,24 @@ async def invoke_openai(
181
710
  ValueError: If not authenticated with OpenAI
182
711
  httpx.HTTPStatusError: If API request fails
183
712
  """
713
+ # Execute pre-model invoke hooks
714
+ params = {
715
+ "prompt": prompt,
716
+ "model": model,
717
+ "temperature": temperature,
718
+ "max_tokens": max_tokens,
719
+ "thinking_budget": thinking_budget,
720
+ }
721
+ hook_manager = get_hook_manager()
722
+ params = await hook_manager.execute_pre_model_invoke(params)
723
+
724
+ # Update local variables from possibly modified params
725
+ prompt = params["prompt"]
726
+ model = params["model"]
727
+ temperature = params["temperature"]
728
+ max_tokens = params["max_tokens"]
729
+ thinking_budget = params["thinking_budget"]
730
+
184
731
  access_token = await _ensure_valid_token(token_store, "openai")
185
732
 
186
733
  # OpenAI Chat Completions API
@@ -196,7 +743,7 @@ async def invoke_openai(
196
743
  "messages": [{"role": "user", "content": prompt}],
197
744
  "temperature": temperature,
198
745
  }
199
-
746
+
200
747
  # Handle thinking budget for O1/O3 style models (GPT-5.2)
201
748
  if thinking_budget > 0:
202
749
  payload["max_completion_tokens"] = max_tokens + thinking_budget
@@ -211,13 +758,12 @@ async def invoke_openai(
211
758
  json=payload,
212
759
  timeout=120.0,
213
760
  )
214
-
761
+
215
762
  if response.status_code == 401:
216
763
  raise ValueError(
217
- "OpenAI authentication failed. "
218
- "Run: python -m mcp_bridge.auth.cli login openai"
764
+ "OpenAI authentication failed. Run: python -m mcp_bridge.auth.cli login openai"
219
765
  )
220
-
766
+
221
767
  response.raise_for_status()
222
768
 
223
769
  data = response.json()