stravinsky 0.2.38__py3-none-any.whl → 0.2.52__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of stravinsky might be problematic. Click here for more details.

@@ -8,8 +8,10 @@ This replaces the simple model-only invocation with true agentic execution.
8
8
  import asyncio
9
9
  import json
10
10
  import os
11
+ import shutil
11
12
  import subprocess
12
13
  import signal
14
+ import time
13
15
  import uuid
14
16
  from dataclasses import asdict, dataclass, field
15
17
  from datetime import datetime
@@ -20,6 +22,38 @@ import logging
20
22
 
21
23
  logger = logging.getLogger(__name__)
22
24
 
25
+ # Model routing configuration
26
+ # Specialized agents call external models via MCP tools:
27
+ # explore/dewey/document_writer/multimodal → invoke_gemini(gemini-3-flash)
28
+ # frontend → invoke_gemini(gemini-3-pro-high)
29
+ # delphi → invoke_openai(gpt-5.2)
30
+ # Non-specialized coding tasks use Claude CLI with --model sonnet
31
+ AGENT_MODEL_ROUTING = {
32
+ # Specialized agents - no CLI model flag, they call invoke_* tools
33
+ "explore": None,
34
+ "dewey": None,
35
+ "document_writer": None,
36
+ "multimodal": None,
37
+ "frontend": None,
38
+ "delphi": None,
39
+ # Planner uses Opus for superior reasoning about dependencies and parallelization
40
+ "planner": "opus",
41
+ # Default for unknown agent types (coding tasks) - use Sonnet 4.5
42
+ "_default": "sonnet",
43
+ }
44
+
45
+ # Cost tier classification (from oh-my-opencode pattern)
46
+ AGENT_COST_TIERS = {
47
+ "explore": "CHEAP", # Uses gemini-3-flash
48
+ "dewey": "CHEAP", # Uses gemini-3-flash
49
+ "document_writer": "CHEAP", # Uses gemini-3-flash
50
+ "multimodal": "CHEAP", # Uses gemini-3-flash
51
+ "frontend": "MEDIUM", # Uses gemini-3-pro-high
52
+ "delphi": "EXPENSIVE", # Uses gpt-5.2 (OpenAI GPT)
53
+ "planner": "EXPENSIVE", # Uses Claude Opus 4.5
54
+ "_default": "EXPENSIVE", # Claude Sonnet 4.5 via CLI
55
+ }
56
+
23
57
 
24
58
  @dataclass
25
59
  class AgentTask:
@@ -62,9 +96,13 @@ class AgentManager:
62
96
  - Provides notification mechanism for task completion
63
97
  """
64
98
 
65
- CLAUDE_CLI = "/opt/homebrew/bin/claude"
99
+ # Dynamic CLI path - find claude in PATH, fallback to common locations
100
+ CLAUDE_CLI = shutil.which("claude") or "/opt/homebrew/bin/claude"
66
101
 
67
102
  def __init__(self, base_dir: Optional[str] = None):
103
+ # Initialize lock FIRST - used by _save_tasks and _load_tasks
104
+ self._lock = threading.RLock()
105
+
68
106
  if base_dir:
69
107
  self.base_dir = Path(base_dir)
70
108
  else:
@@ -81,8 +119,7 @@ class AgentManager:
81
119
 
82
120
  # In-memory tracking for running processes
83
121
  self._processes: Dict[str, subprocess.Popen] = {}
84
- self._notification_queue: Dict[str, List[AgentTask]] = {}
85
- self._lock = threading.RLock()
122
+ self._notification_queue: Dict[str, List[Dict[str, Any]]] = {}
86
123
 
87
124
  def _load_tasks(self) -> Dict[str, Any]:
88
125
  """Load tasks from persistent storage."""
@@ -151,7 +188,9 @@ class AgentManager:
151
188
  Returns:
152
189
  Task ID for tracking
153
190
  """
154
- task_id = f"agent_{uuid.uuid4().hex[:8]}"
191
+ import uuid as uuid_module # Local import for MCP context
192
+
193
+ task_id = f"agent_{uuid_module.uuid4().hex[:8]}"
155
194
 
156
195
  task = AgentTask(
157
196
  id=task_id,
@@ -216,11 +255,20 @@ class AgentManager:
216
255
  full_prompt,
217
256
  "--output-format",
218
257
  "text",
258
+ "--dangerously-skip-permissions", # Critical: bypass permission prompts
219
259
  ]
220
260
 
221
- # Add model selection if specified
222
- if model:
223
- cmd.extend(["--model", model])
261
+ # Model routing:
262
+ # - Specialized agents (explore/dewey/etc): None = use CLI default, they call invoke_*
263
+ # - Unknown agent types (coding tasks): Use Sonnet 4.5
264
+ if agent_type in AGENT_MODEL_ROUTING:
265
+ cli_model = AGENT_MODEL_ROUTING[agent_type] # None for specialized
266
+ else:
267
+ cli_model = AGENT_MODEL_ROUTING.get("_default", "sonnet")
268
+
269
+ if cli_model:
270
+ cmd.extend(["--model", cli_model])
271
+ logger.info(f"[AgentManager] Using --model {cli_model} for {agent_type} agent")
224
272
 
225
273
  # Add system prompt file if we have one
226
274
  if system_prompt:
@@ -231,16 +279,18 @@ class AgentManager:
231
279
  # Execute Claude CLI as subprocess with full tool access
232
280
  logger.info(f"[AgentManager] Running: {' '.join(cmd[:3])}...")
233
281
 
234
- with open(log_file, "w") as log_f:
235
- process = subprocess.Popen(
236
- cmd,
237
- stdout=subprocess.PIPE,
238
- stderr=log_f,
239
- text=True,
240
- cwd=str(Path.cwd()),
241
- env={**os.environ, "CLAUDE_CODE_ENTRYPOINT": "stravinsky-agent"},
242
- start_new_session=True, # Allow process group management
243
- )
282
+ # Use PIPE for stderr to capture it properly
283
+ # (Previously used file handle which was closed before process finished)
284
+ process = subprocess.Popen(
285
+ cmd,
286
+ stdin=subprocess.DEVNULL, # Critical: prevent stdin blocking
287
+ stdout=subprocess.PIPE,
288
+ stderr=subprocess.PIPE,
289
+ text=True,
290
+ cwd=str(Path.cwd()),
291
+ env={**os.environ, "CLAUDE_CODE_ENTRYPOINT": "stravinsky-agent"},
292
+ start_new_session=True, # Allow process group management
293
+ )
244
294
 
245
295
  # Track the process
246
296
  self._processes[task_id] = process
@@ -248,9 +298,13 @@ class AgentManager:
248
298
 
249
299
  # Wait for completion with timeout
250
300
  try:
251
- stdout, _ = process.communicate(timeout=timeout)
301
+ stdout, stderr = process.communicate(timeout=timeout)
252
302
  result = stdout.strip() if stdout else ""
253
303
 
304
+ # Write stderr to log file
305
+ if stderr:
306
+ log_file.write_text(stderr)
307
+
254
308
  if process.returncode == 0:
255
309
  output_file.write_text(result)
256
310
  self._update_task(
@@ -262,8 +316,8 @@ class AgentManager:
262
316
  logger.info(f"[AgentManager] Agent {task_id} completed successfully")
263
317
  else:
264
318
  error_msg = f"Claude CLI exited with code {process.returncode}"
265
- if log_file.exists():
266
- error_msg += f"\n{log_file.read_text()}"
319
+ if stderr:
320
+ error_msg += f"\n{stderr}"
267
321
  self._update_task(
268
322
  task_id,
269
323
  status="failed",
@@ -406,9 +460,13 @@ class AgentManager:
406
460
  start = datetime.now()
407
461
  while (datetime.now() - start).total_seconds() < timeout:
408
462
  task = self.get_task(task_id)
409
- if task["status"] != "running":
463
+ if not task or task["status"] != "running":
410
464
  break
411
- asyncio.sleep(0.5)
465
+ time.sleep(0.5)
466
+
467
+ # Refresh task state after potential blocking wait
468
+ if not task:
469
+ return f"Task {task_id} not found."
412
470
 
413
471
  status = task["status"]
414
472
  description = task.get("description", "")
@@ -591,96 +649,128 @@ async def agent_spawn(
591
649
  manager = get_manager()
592
650
 
593
651
  # Map agent types to system prompts
652
+ # ALL agents use invoke_gemini or invoke_openai - NOT Claude directly
653
+ # explore/dewey/document_writer/multimodal/frontend → gemini-3-flash
654
+ # delphi → openai gpt-5.2
594
655
  system_prompts = {
595
- "explore": "You are a codebase exploration specialist. Find files, patterns, and answer 'where is X?' questions efficiently.",
596
- "dewey": "You are a documentation and research specialist. Find implementation examples, official docs, and provide evidence-based answers.",
597
- "frontend": """You are a Senior Frontend Architect & Avant-Garde UI Designer with 15+ years experience.
598
-
599
- OPERATIONAL DIRECTIVES:
600
- - Follow instructions. Execute immediately. No fluff.
601
- - Output First: Prioritize code and visual solutions.
602
-
603
- DESIGN PHILOSOPHY - "INTENTIONAL MINIMALISM":
604
- - Anti-Generic: Reject standard "bootstrapped" layouts. If it looks like a template, it's wrong.
605
- - Bespoke layouts, asymmetry, distinctive typography.
606
- - Before placing any element, calculate its purpose. No purpose = delete it.
607
-
608
- FRONTEND CODING STANDARDS:
609
- - Library Discipline: If a UI library (Shadcn, Radix, MUI) is detected, YOU MUST USE IT.
610
- - Do NOT build custom components if the library provides them.
611
- - Stack: Modern (React/Vue/Svelte), Tailwind/Custom CSS, semantic HTML5.
612
- - Focus on micro-interactions, perfect spacing, "invisible" UX.
613
-
614
- RESPONSE FORMAT:
615
- 1. Rationale: (1 sentence on why elements were placed there)
616
- 2. The Code.
617
-
618
- ULTRATHINK MODE (when user says "ULTRATHINK" or "think harder"):
619
- 1. Deep Reasoning Chain: Detailed breakdown of architectural and design decisions
620
- 2. Edge Case Analysis: What could go wrong and how we prevented it
621
- 3. The Code: Optimized, bespoke, production-ready, utilizing existing libraries""",
622
- "delphi": "You are a strategic advisor. Provide architecture guidance, debugging assistance, and code review.",
623
- "document_writer": """You are a Technical Documentation Specialist. Your expertise is creating clear, comprehensive documentation.
624
-
625
- DOCUMENT TYPES YOU EXCEL AT:
626
- - README files with proper structure
627
- - API documentation with examples
628
- - Architecture decision records (ADRs)
629
- - User guides and tutorials
630
- - Inline code documentation
631
-
632
- DOCUMENTATION PRINCIPLES:
633
- - Audience-first: Know who's reading and what they need
634
- - Progressive disclosure: Overview Details → Edge cases
635
- - Examples over explanations: Show, don't just tell
636
- - Keep it DRY: Reference rather than repeat
637
- - Version awareness: Note when behavior differs across versions
638
-
639
- RESPONSE FORMAT:
640
- 1. Document type and target audience identified
641
- 2. The documentation, properly formatted in markdown""",
642
- "multimodal": """You interpret media files that cannot be read as plain text.
643
-
644
- Your job: examine the attached file and extract ONLY what was requested.
645
-
646
- CAPABILITIES:
647
- - PDFs: extract text, structure, tables, data from specific sections
648
- - Images: describe layouts, UI elements, text, diagrams, charts
649
- - Diagrams: explain relationships, flows, architecture depicted
650
- - Screenshots: analyze UI/UX, identify components, extract text
651
-
652
- HOW YOU WORK:
653
- 1. Receive a file path and a goal describing what to extract
654
- 2. Read and analyze the file deeply using Gemini's vision capabilities
655
- 3. Return ONLY the relevant extracted information
656
- 4. The main agent never processes the raw file - you save context tokens
657
-
658
- RESPONSE RULES:
659
- - Return extracted information directly, no preamble
660
- - If info not found, state clearly what's missing
661
- - Be thorough on the goal, concise on everything else""",
656
+ "explore": """You are a codebase exploration specialist. Find files, patterns, and answer 'where is X?' questions.
657
+
658
+ MODEL ROUTING (MANDATORY):
659
+ You MUST use invoke_gemini with model="gemini-3-flash" for ALL analysis and reasoning.
660
+ Use Claude's native tools (Read, Grep, Glob) ONLY for file access, then pass content to invoke_gemini.
661
+
662
+ WORKFLOW:
663
+ 1. Use Read/Grep/Glob to get file contents
664
+ 2. Call invoke_gemini(prompt="Analyze this: <content>", model="gemini-3-flash", agent_context={"agent_type": "explore"}) for analysis
665
+ 3. Return the Gemini response""",
666
+ "dewey": """You are a documentation and research specialist. Find implementation examples and official docs.
667
+
668
+ MODEL ROUTING (MANDATORY):
669
+ You MUST use invoke_gemini with model="gemini-3-flash" for ALL analysis, summarization, and reasoning.
670
+
671
+ WORKFLOW:
672
+ 1. Gather information using available tools
673
+ 2. Call invoke_gemini(prompt="<task>", model="gemini-3-flash", agent_context={"agent_type": "dewey"}) for processing
674
+ 3. Return the Gemini response""",
675
+ "frontend": """You are a Senior Frontend Architect & UI Designer.
676
+
677
+ MODEL ROUTING (MANDATORY):
678
+ You MUST use invoke_gemini with model="gemini-3-pro-high" for ALL code generation and design work.
679
+
680
+ DESIGN PHILOSOPHY:
681
+ - Anti-Generic: Reject standard layouts. Bespoke, asymmetric, distinctive.
682
+ - Library Discipline: Use existing UI libraries (Shadcn, Radix, MUI) if detected.
683
+ - Stack: React/Vue/Svelte, Tailwind/Custom CSS, semantic HTML5.
684
+
685
+ WORKFLOW:
686
+ 1. Analyze requirements
687
+ 2. Call invoke_gemini(prompt="Generate frontend code for: <task>", model="gemini-3-pro-high", agent_context={"agent_type": "frontend"})
688
+ 3. Return the code""",
689
+ "delphi": """You are a strategic technical advisor for architecture and hard debugging.
690
+
691
+ MODEL ROUTING (MANDATORY):
692
+ You MUST use invoke_openai with model="gpt-5.2" for ALL strategic advice and analysis.
693
+
694
+ WORKFLOW:
695
+ 1. Gather context about the problem
696
+ 2. Call invoke_openai(prompt="<problem description>", model="gpt-5.2", agent_context={"agent_type": "delphi"})
697
+ 3. Return the GPT response""",
698
+ "document_writer": """You are a Technical Documentation Specialist.
699
+
700
+ MODEL ROUTING (MANDATORY):
701
+ You MUST use invoke_gemini with model="gemini-3-flash" for ALL documentation generation.
702
+
703
+ DOCUMENT TYPES: README, API docs, ADRs, user guides, inline docs.
704
+
705
+ WORKFLOW:
706
+ 1. Gather context about what to document
707
+ 2. Call invoke_gemini(prompt="Write documentation for: <topic>", model="gemini-3-flash", agent_context={"agent_type": "document_writer"})
708
+ 3. Return the documentation""",
709
+ "multimodal": """You interpret media files (PDFs, images, diagrams, screenshots).
710
+
711
+ MODEL ROUTING (MANDATORY):
712
+ You MUST use invoke_gemini with model="gemini-3-flash" for ALL visual analysis.
713
+
714
+ WORKFLOW:
715
+ 1. Receive file path and extraction goal
716
+ 2. Call invoke_gemini(prompt="Analyze this file: <path>. Extract: <goal>", model="gemini-3-flash", agent_context={"agent_type": "multimodal"})
717
+ 3. Return extracted information only""",
718
+ "planner": """You are a pre-implementation planning specialist. You analyze requests and produce structured implementation plans BEFORE any code changes begin.
719
+
720
+ PURPOSE:
721
+ - Analyze requests and produce actionable implementation plans
722
+ - Identify dependencies and parallelization opportunities
723
+ - Enable efficient parallel execution by the orchestrator
724
+ - Prevent wasted effort through upfront planning
725
+
726
+ METHODOLOGY:
727
+ 1. EXPLORE FIRST: Spawn explore agents IN PARALLEL to understand the codebase
728
+ 2. DECOMPOSE: Break request into atomic, single-purpose tasks
729
+ 3. ANALYZE DEPENDENCIES: What blocks what? What can run in parallel?
730
+ 4. ASSIGN AGENTS: Map each task to the right specialist (explore/dewey/frontend/delphi)
731
+ 5. OUTPUT STRUCTURED PLAN: Use the required format below
732
+
733
+ REQUIRED OUTPUT FORMAT:
734
+ ```
735
+ ## PLAN: [Brief title]
736
+
737
+ ### ANALYSIS
738
+ - **Request**: [One sentence summary]
739
+ - **Scope**: [What's in/out of scope]
740
+ - **Risk Level**: [Low/Medium/High]
741
+
742
+ ### EXECUTION PHASES
743
+
744
+ #### Phase 1: [Name] (PARALLEL)
745
+ | Task | Agent | Files | Est |
746
+ |------|-------|-------|-----|
747
+ | [description] | explore | file.py | S/M/L |
748
+
749
+ #### Phase 2: [Name] (SEQUENTIAL after Phase 1)
750
+ | Task | Agent | Files | Est |
751
+ |------|-------|-------|-----|
752
+
753
+ ### AGENT SPAWN COMMANDS
754
+ ```python
755
+ # Phase 1 - Fire all in parallel
756
+ agent_spawn(prompt="...", agent_type="explore", description="...")
757
+ ```
758
+ ```
759
+
760
+ CONSTRAINTS:
761
+ - You ONLY plan. You NEVER execute code changes.
762
+ - Every task must have a clear agent assignment
763
+ - Parallel phases must be truly independent
764
+ - Include ready-to-use agent_spawn commands""",
662
765
  }
663
766
 
664
767
  system_prompt = system_prompts.get(agent_type, None)
665
768
 
666
- # Override model and thinking_budget based on agent type for optimal performance
667
- # Per project requirements:
668
- # - Gemini tiers are controlled by thinking_budget, NOT model name
669
- # - high: 32000, medium: 16000, low: 8000 thinking tokens
670
- # - "gemini-3-pro-high" = gemini-3-pro-low + thinking_budget=32000
671
- # - "gemini-3-flash" equivalent = gemini-3-pro-low + thinking_budget=0
672
- agent_configs = {
673
- "stravinsky": {"model": "claude-opus-4-5", "thinking_budget": 0},
674
- "frontend": {"model": "gemini-3-pro-low", "thinking_budget": 32000}, # HIGH tier
675
- "document_writer": {"model": "gemini-3-pro-low", "thinking_budget": 0}, # Flash equiv
676
- "multimodal": {"model": "gemini-3-pro-low", "thinking_budget": 0}, # Flash equiv
677
- "explore": {"model": "gemini-3-pro-low", "thinking_budget": 0}, # Flash equiv
678
- "delphi": {"model": "gpt-5.2", "thinking_budget": 0}, # GPT-5.2 strategic
679
- "dewey": {"model": "gemini-3-pro-low", "thinking_budget": 0}, # Flash equiv
680
- }
681
- config = agent_configs.get(agent_type, {"model": model, "thinking_budget": thinking_budget})
682
- actual_model = config["model"]
683
- actual_thinking_budget = config["thinking_budget"]
769
+ # Model routing (MANDATORY - enforced in system prompts):
770
+ # - explore, dewey, document_writer, multimodal → invoke_gemini(gemini-3-flash)
771
+ # - frontend invoke_gemini(gemini-3-pro-high)
772
+ # - delphi invoke_openai(gpt-5.2)
773
+ # - Unknown agent types (coding tasks) → Claude CLI --model sonnet
684
774
 
685
775
  # Get token store for authentication
686
776
  from ..auth.token_store import TokenStore
@@ -693,8 +783,8 @@ RESPONSE RULES:
693
783
  agent_type=agent_type,
694
784
  description=description or prompt[:50],
695
785
  system_prompt=system_prompt,
696
- model=actual_model,
697
- thinking_budget=actual_thinking_budget,
786
+ model=model, # Not used for Claude CLI, kept for API compatibility
787
+ thinking_budget=thinking_budget, # Not used for Claude CLI, kept for API compatibility
698
788
  timeout=timeout,
699
789
  )
700
790
 
@@ -702,8 +792,6 @@ RESPONSE RULES:
702
792
 
703
793
  **Task ID**: {task_id}
704
794
  **Agent Type**: {agent_type}
705
- **Model**: {actual_model}
706
- **Thinking Budget**: {actual_thinking_budget if actual_thinking_budget > 0 else "N/A"}
707
795
  **Description**: {description or prompt[:50]}
708
796
 
709
797
  The agent is now running. Use:
@@ -61,7 +61,8 @@ class BackgroundManager:
61
61
  json.dump(tasks, f, indent=2)
62
62
 
63
63
  def create_task(self, prompt: str, model: str) -> str:
64
- task_id = str(uuid.uuid4())[:8]
64
+ import uuid as uuid_module # Local import for MCP context
65
+ task_id = str(uuid_module.uuid4())[:8]
65
66
  task = BackgroundTask(
66
67
  id=task_id,
67
68
  prompt=prompt,