stravinsky 0.2.67__py3-none-any.whl → 0.4.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of stravinsky might be problematic. Click here for more details.

@@ -5,6 +5,7 @@ These tools use OAuth tokens from the token store to authenticate
5
5
  API requests to external model providers.
6
6
  """
7
7
 
8
+ import asyncio
8
9
  import logging
9
10
  import os
10
11
  import time
@@ -134,6 +135,9 @@ _SESSION_CACHE: dict[str, str] = {}
134
135
  # Pooled HTTP client for connection reuse
135
136
  _HTTP_CLIENT: httpx.AsyncClient | None = None
136
137
 
138
+ # Rate limiting: Max 5 concurrent Gemini requests to prevent burst rate limits
139
+ _GEMINI_SEMAPHORE: asyncio.Semaphore | None = None
140
+
137
141
 
138
142
  def _get_session_id(conversation_key: str | None = None) -> str:
139
143
  """
@@ -174,6 +178,19 @@ async def _get_http_client() -> httpx.AsyncClient:
174
178
  return _HTTP_CLIENT
175
179
 
176
180
 
181
+ def _get_gemini_semaphore() -> asyncio.Semaphore:
182
+ """
183
+ Get or create semaphore for Gemini API rate limiting.
184
+
185
+ Limits concurrent Gemini requests to prevent burst rate limits (429 errors).
186
+ Max 5 concurrent requests balances throughput with API quota constraints.
187
+ """
188
+ global _GEMINI_SEMAPHORE
189
+ if _GEMINI_SEMAPHORE is None:
190
+ _GEMINI_SEMAPHORE = asyncio.Semaphore(5)
191
+ return _GEMINI_SEMAPHORE
192
+
193
+
177
194
  def _extract_gemini_response(data: dict) -> str:
178
195
  """
179
196
  Extract text from Gemini response, handling thinking blocks.
@@ -284,18 +301,25 @@ async def _ensure_valid_token(token_store: TokenStore, provider: str) -> str:
284
301
 
285
302
 
286
303
  def is_retryable_exception(e: Exception) -> bool:
287
- """Check if an exception is retryable (429 or 5xx)."""
304
+ """
305
+ Check if an exception is retryable (5xx only, NOT 429).
306
+
307
+ 429 (Rate Limit) errors should fail fast - retrying makes the problem worse
308
+ by adding more requests to an already exhausted quota. The semaphore prevents
309
+ these in the first place, but if one slips through, we shouldn't retry.
310
+ """
288
311
  if isinstance(e, httpx.HTTPStatusError):
289
- return e.response.status_code == 429 or 500 <= e.response.status_code < 600
312
+ # Only retry server errors (5xx), not rate limits (429)
313
+ return 500 <= e.response.status_code < 600
290
314
  return False
291
315
 
292
316
 
293
317
  @retry(
294
- stop=stop_after_attempt(5),
295
- wait=wait_exponential(multiplier=1, min=4, max=60),
318
+ stop=stop_after_attempt(2), # Reduced from 5 to 2 attempts
319
+ wait=wait_exponential(multiplier=2, min=10, max=120), # Longer waits: 10s → 20s → 40s
296
320
  retry=retry_if_exception(is_retryable_exception),
297
321
  before_sleep=lambda retry_state: logger.info(
298
- f"Rate limited or server error, retrying in {retry_state.next_action.sleep} seconds..."
322
+ f"Server error, retrying in {retry_state.next_action.sleep} seconds..."
299
323
  ),
300
324
  )
301
325
  async def invoke_gemini(
@@ -366,182 +390,185 @@ async def invoke_gemini(
366
390
  desc_info = f" | {description}" if description else ""
367
391
  print(f"🔮 GEMINI: {model} | agent={agent_type}{task_info}{desc_info}", file=sys.stderr)
368
392
 
369
- access_token = await _ensure_valid_token(token_store, "gemini")
393
+ # Acquire semaphore to limit concurrent Gemini requests (prevents 429 rate limits)
394
+ semaphore = _get_gemini_semaphore()
395
+ async with semaphore:
396
+ access_token = await _ensure_valid_token(token_store, "gemini")
370
397
 
371
- # Resolve user-friendly model name to actual API model ID
372
- api_model = resolve_gemini_model(model)
398
+ # Resolve user-friendly model name to actual API model ID
399
+ api_model = resolve_gemini_model(model)
373
400
 
374
- # Use persistent session ID for thinking signature caching
375
- session_id = _get_session_id()
376
- project_id = os.getenv("STRAVINSKY_ANTIGRAVITY_PROJECT_ID", ANTIGRAVITY_DEFAULT_PROJECT_ID)
401
+ # Use persistent session ID for thinking signature caching
402
+ session_id = _get_session_id()
403
+ project_id = os.getenv("STRAVINSKY_ANTIGRAVITY_PROJECT_ID", ANTIGRAVITY_DEFAULT_PROJECT_ID)
377
404
 
378
- headers = {
379
- "Authorization": f"Bearer {access_token}",
380
- "Content-Type": "application/json",
381
- **ANTIGRAVITY_HEADERS, # Include Antigravity headers
382
- }
405
+ headers = {
406
+ "Authorization": f"Bearer {access_token}",
407
+ "Content-Type": "application/json",
408
+ **ANTIGRAVITY_HEADERS, # Include Antigravity headers
409
+ }
383
410
 
384
- # Build inner request payload
385
- # Per API spec: contents must include role ("user" or "model")
386
-
387
- # Build parts list - text prompt plus optional image
388
- parts = [{"text": prompt}]
389
-
390
- # Add image data for vision analysis (token optimization for multimodal)
391
- if image_path:
392
- import base64
393
- from pathlib import Path
394
-
395
- image_file = Path(image_path)
396
- if image_file.exists():
397
- # Determine MIME type
398
- suffix = image_file.suffix.lower()
399
- mime_types = {
400
- ".png": "image/png",
401
- ".jpg": "image/jpeg",
402
- ".jpeg": "image/jpeg",
403
- ".gif": "image/gif",
404
- ".webp": "image/webp",
405
- ".pdf": "application/pdf",
406
- }
407
- mime_type = mime_types.get(suffix, "image/png")
411
+ # Build inner request payload
412
+ # Per API spec: contents must include role ("user" or "model")
413
+
414
+ # Build parts list - text prompt plus optional image
415
+ parts = [{"text": prompt}]
416
+
417
+ # Add image data for vision analysis (token optimization for multimodal)
418
+ if image_path:
419
+ import base64
420
+ from pathlib import Path
421
+
422
+ image_file = Path(image_path)
423
+ if image_file.exists():
424
+ # Determine MIME type
425
+ suffix = image_file.suffix.lower()
426
+ mime_types = {
427
+ ".png": "image/png",
428
+ ".jpg": "image/jpeg",
429
+ ".jpeg": "image/jpeg",
430
+ ".gif": "image/gif",
431
+ ".webp": "image/webp",
432
+ ".pdf": "application/pdf",
433
+ }
434
+ mime_type = mime_types.get(suffix, "image/png")
408
435
 
409
- # Read and base64 encode
410
- image_data = base64.b64encode(image_file.read_bytes()).decode("utf-8")
436
+ # Read and base64 encode
437
+ image_data = base64.b64encode(image_file.read_bytes()).decode("utf-8")
411
438
 
412
- # Add inline image data for Gemini Vision API
413
- parts.append({
414
- "inlineData": {
415
- "mimeType": mime_type,
416
- "data": image_data,
417
- }
418
- })
419
- logger.info(f"[multimodal] Added vision data: {image_path} ({mime_type})")
420
-
421
- inner_payload = {
422
- "contents": [{"role": "user", "parts": parts}],
423
- "generationConfig": {
424
- "temperature": temperature,
425
- "maxOutputTokens": max_tokens,
426
- },
427
- "sessionId": session_id,
428
- }
439
+ # Add inline image data for Gemini Vision API
440
+ parts.append({
441
+ "inlineData": {
442
+ "mimeType": mime_type,
443
+ "data": image_data,
444
+ }
445
+ })
446
+ logger.info(f"[multimodal] Added vision data: {image_path} ({mime_type})")
429
447
 
430
- # Add thinking budget if supported by model/API
431
- if thinking_budget > 0:
432
- # For Gemini 2.0+ Thinking models
433
- # Per Antigravity API: use "thinkingBudget", NOT "tokenLimit"
434
- inner_payload["generationConfig"]["thinkingConfig"] = {
435
- "includeThoughts": True,
436
- "thinkingBudget": thinking_budget,
448
+ inner_payload = {
449
+ "contents": [{"role": "user", "parts": parts}],
450
+ "generationConfig": {
451
+ "temperature": temperature,
452
+ "maxOutputTokens": max_tokens,
453
+ },
454
+ "sessionId": session_id,
437
455
  }
438
456
 
439
- # Wrap request body per reference implementation
440
- try:
441
- import uuid as uuid_module # Local import workaround for MCP context issue
442
-
443
- request_id = f"invoke-{uuid_module.uuid4()}"
444
- except Exception as e:
445
- logger.error(f"UUID IMPORT FAILED: {e}")
446
- raise RuntimeError(f"CUSTOM ERROR: UUID import failed: {e}")
447
-
448
- wrapped_payload = {
449
- "project": project_id,
450
- "model": api_model,
451
- "userAgent": "antigravity",
452
- "requestId": request_id,
453
- "request": inner_payload,
454
- }
457
+ # Add thinking budget if supported by model/API
458
+ if thinking_budget > 0:
459
+ # For Gemini 2.0+ Thinking models
460
+ # Per Antigravity API: use "thinkingBudget", NOT "tokenLimit"
461
+ inner_payload["generationConfig"]["thinkingConfig"] = {
462
+ "includeThoughts": True,
463
+ "thinkingBudget": thinking_budget,
464
+ }
455
465
 
456
- # Get pooled HTTP client for connection reuse
457
- client = await _get_http_client()
466
+ # Wrap request body per reference implementation
467
+ try:
468
+ import uuid as uuid_module # Local import workaround for MCP context issue
458
469
 
459
- # Try endpoints in fallback order with thinking recovery
460
- response = None
461
- last_error = None
462
- max_retries = 2 # For thinking recovery
470
+ request_id = f"invoke-{uuid_module.uuid4()}"
471
+ except Exception as e:
472
+ logger.error(f"UUID IMPORT FAILED: {e}")
473
+ raise RuntimeError(f"CUSTOM ERROR: UUID import failed: {e}")
463
474
 
464
- for retry_attempt in range(max_retries):
465
- for endpoint in ANTIGRAVITY_ENDPOINTS:
466
- # Reference uses: {endpoint}/v1internal:generateContent (NOT /models/{model})
467
- api_url = f"{endpoint}/v1internal:generateContent"
475
+ wrapped_payload = {
476
+ "project": project_id,
477
+ "model": api_model,
478
+ "userAgent": "antigravity",
479
+ "requestId": request_id,
480
+ "request": inner_payload,
481
+ }
468
482
 
469
- try:
470
- response = await client.post(
471
- api_url,
472
- headers=headers,
473
- json=wrapped_payload,
474
- timeout=120.0,
475
- )
483
+ # Get pooled HTTP client for connection reuse
484
+ client = await _get_http_client()
476
485
 
477
- # 401/403 might be endpoint-specific, try next endpoint
478
- if response.status_code in (401, 403):
479
- logger.warning(
480
- f"[Gemini] Endpoint {endpoint} returned {response.status_code}, trying next"
486
+ # Try endpoints in fallback order with thinking recovery
487
+ response = None
488
+ last_error = None
489
+ max_retries = 2 # For thinking recovery
490
+
491
+ for retry_attempt in range(max_retries):
492
+ for endpoint in ANTIGRAVITY_ENDPOINTS:
493
+ # Reference uses: {endpoint}/v1internal:generateContent (NOT /models/{model})
494
+ api_url = f"{endpoint}/v1internal:generateContent"
495
+
496
+ try:
497
+ response = await client.post(
498
+ api_url,
499
+ headers=headers,
500
+ json=wrapped_payload,
501
+ timeout=120.0,
481
502
  )
482
- last_error = Exception(f"{response.status_code} from {endpoint}")
483
- continue
484
503
 
485
- # Check for thinking-related errors that need recovery
486
- if response.status_code in (400, 500):
487
- error_text = response.text.lower()
488
- if "thinking" in error_text or "signature" in error_text:
504
+ # 401/403 might be endpoint-specific, try next endpoint
505
+ if response.status_code in (401, 403):
489
506
  logger.warning(
490
- f"[Gemini] Thinking error detected, clearing session cache and retrying"
507
+ f"[Gemini] Endpoint {endpoint} returned {response.status_code}, trying next"
491
508
  )
492
- clear_session_cache()
493
- # Update session ID for retry
494
- wrapped_payload["request"]["sessionId"] = _get_session_id()
495
- last_error = Exception(f"Thinking error: {response.text[:200]}")
496
- break # Break inner loop to retry with new session
497
-
498
- # If we got a non-retryable response (success or 4xx client error), use it
499
- if response.status_code < 500 and response.status_code != 429:
500
- break
509
+ last_error = Exception(f"{response.status_code} from {endpoint}")
510
+ continue
511
+
512
+ # Check for thinking-related errors that need recovery
513
+ if response.status_code in (400, 500):
514
+ error_text = response.text.lower()
515
+ if "thinking" in error_text or "signature" in error_text:
516
+ logger.warning(
517
+ f"[Gemini] Thinking error detected, clearing session cache and retrying"
518
+ )
519
+ clear_session_cache()
520
+ # Update session ID for retry
521
+ wrapped_payload["request"]["sessionId"] = _get_session_id()
522
+ last_error = Exception(f"Thinking error: {response.text[:200]}")
523
+ break # Break inner loop to retry with new session
524
+
525
+ # If we got a non-retryable response (success or 4xx client error), use it
526
+ if response.status_code < 500 and response.status_code != 429:
527
+ break
528
+
529
+ except httpx.TimeoutException as e:
530
+ last_error = e
531
+ continue
532
+ except Exception as e:
533
+ last_error = e
534
+ continue
535
+ else:
536
+ # Inner loop completed without break - no thinking recovery needed
537
+ break
501
538
 
502
- except httpx.TimeoutException as e:
503
- last_error = e
504
- continue
505
- except Exception as e:
506
- last_error = e
539
+ # If we broke out of inner loop for thinking recovery, continue outer retry loop
540
+ if response and response.status_code in (400, 500):
507
541
  continue
508
- else:
509
- # Inner loop completed without break - no thinking recovery needed
510
542
  break
511
543
 
512
- # If we broke out of inner loop for thinking recovery, continue outer retry loop
513
- if response and response.status_code in (400, 500):
514
- continue
515
- break
516
-
517
- if response is None:
518
- # FALLBACK: Try Claude sonnet-4.5 for agents that support it
519
- agent_context = params.get("agent_context", {})
520
- agent_type = agent_context.get("agent_type", "unknown")
521
-
522
- if agent_type in ("dewey", "explore", "document_writer", "multimodal"):
523
- logger.warning(f"[{agent_type}] Gemini failed, falling back to Claude sonnet-4.5")
524
- try:
525
- import subprocess
526
- fallback_result = subprocess.run(
527
- ["claude", "-p", prompt, "--model", "sonnet", "--output-format", "text"],
528
- capture_output=True,
529
- text=True,
530
- timeout=120,
531
- cwd=os.getcwd(),
532
- )
533
- if fallback_result.returncode == 0 and fallback_result.stdout.strip():
534
- return fallback_result.stdout.strip()
535
- except Exception as fallback_error:
536
- logger.error(f"Fallback to Claude also failed: {fallback_error}")
544
+ if response is None:
545
+ # FALLBACK: Try Claude sonnet-4.5 for agents that support it
546
+ agent_context = params.get("agent_context", {})
547
+ agent_type = agent_context.get("agent_type", "unknown")
548
+
549
+ if agent_type in ("dewey", "explore", "document_writer", "multimodal"):
550
+ logger.warning(f"[{agent_type}] Gemini failed, falling back to Claude sonnet-4.5")
551
+ try:
552
+ import subprocess
553
+ fallback_result = subprocess.run(
554
+ ["claude", "-p", prompt, "--model", "sonnet", "--output-format", "text"],
555
+ capture_output=True,
556
+ text=True,
557
+ timeout=120,
558
+ cwd=os.getcwd(),
559
+ )
560
+ if fallback_result.returncode == 0 and fallback_result.stdout.strip():
561
+ return fallback_result.stdout.strip()
562
+ except Exception as fallback_error:
563
+ logger.error(f"Fallback to Claude also failed: {fallback_error}")
537
564
 
538
- raise ValueError(f"All Antigravity endpoints failed: {last_error}")
565
+ raise ValueError(f"All Antigravity endpoints failed: {last_error}")
539
566
 
540
- response.raise_for_status()
541
- data = response.json()
567
+ response.raise_for_status()
568
+ data = response.json()
542
569
 
543
- # Extract text from response using thinking-aware parser
544
- return _extract_gemini_response(data)
570
+ # Extract text from response using thinking-aware parser
571
+ return _extract_gemini_response(data)
545
572
 
546
573
 
547
574
  # ========================
@@ -828,11 +855,11 @@ async def invoke_gemini_agentic(
828
855
 
829
856
 
830
857
  @retry(
831
- stop=stop_after_attempt(5),
832
- wait=wait_exponential(multiplier=1, min=4, max=60),
858
+ stop=stop_after_attempt(2), # Reduced from 5 to 2 attempts
859
+ wait=wait_exponential(multiplier=2, min=10, max=120), # Longer waits: 10s → 20s → 40s
833
860
  retry=retry_if_exception(is_retryable_exception),
834
861
  before_sleep=lambda retry_state: logger.info(
835
- f"Rate limited or server error, retrying in {retry_state.next_action.sleep} seconds..."
862
+ f"Server error, retrying in {retry_state.next_action.sleep} seconds..."
836
863
  ),
837
864
  )
838
865
  async def invoke_openai(