zwarm 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
zwarm/orchestrator.py ADDED
@@ -0,0 +1,405 @@
1
+ """
2
+ Orchestrator: The agent that coordinates multiple executor agents.
3
+
4
+ The orchestrator:
5
+ - Plans and breaks down complex tasks
6
+ - Delegates work to executor agents (codex, claude-code, etc.)
7
+ - Supervises progress and provides clarification
8
+ - Verifies work before marking complete
9
+
10
+ It does NOT write code directly - that's the executor's job.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import json
16
+ from pathlib import Path
17
+ from typing import Any, Callable
18
+
19
+ import weave
20
+ from pydantic import Field, PrivateAttr
21
+ from wbal.agents.yaml_agent import YamlAgent
22
+ from wbal.helper import TOOL_CALL_TYPE, format_openai_tool_response
23
+
24
+ from zwarm.adapters.base import ExecutorAdapter
25
+ from zwarm.adapters.claude_code import ClaudeCodeAdapter
26
+ from zwarm.adapters.codex_mcp import CodexMCPAdapter
27
+ from zwarm.core.config import ZwarmConfig, load_config
28
+ from zwarm.core.environment import OrchestratorEnv
29
+ from zwarm.core.models import ConversationSession
30
+ from zwarm.core.state import StateManager
31
+ from zwarm.prompts import get_orchestrator_prompt
32
+ from zwarm.watchers import WatcherManager, WatcherContext, WatcherAction, build_watcher_manager
33
+
34
+
35
+ class Orchestrator(YamlAgent):
36
+ """
37
+ Multi-agent orchestrator built on WBAL's YamlAgent.
38
+
39
+ Extends YamlAgent with:
40
+ - Delegation tools (delegate, converse, check_session, end_session)
41
+ - Session tracking
42
+ - State persistence
43
+ - Watcher integration
44
+ - Weave integration
45
+ """
46
+
47
+ # Configuration
48
+ config: ZwarmConfig = Field(default_factory=ZwarmConfig)
49
+ working_dir: Path = Field(default_factory=Path.cwd)
50
+
51
+ # Load tools from modules (delegation + bash for verification)
52
+ agent_tool_modules: list[str] = Field(default=[
53
+ "zwarm.tools.delegation",
54
+ "wbal.tools.bash",
55
+ ])
56
+
57
+ # State management
58
+ _state: StateManager = PrivateAttr()
59
+ _sessions: dict[str, ConversationSession] = PrivateAttr(default_factory=dict)
60
+ _adapters: dict[str, ExecutorAdapter] = PrivateAttr(default_factory=dict)
61
+ _watcher_manager: WatcherManager | None = PrivateAttr(default=None)
62
+ _resumed: bool = PrivateAttr(default=False)
63
+
64
+ def model_post_init(self, __context: Any) -> None:
65
+ """Initialize state and adapters after model creation."""
66
+ super().model_post_init(__context)
67
+
68
+ # Initialize state manager
69
+ self._state = StateManager(self.working_dir / self.config.state_dir)
70
+ self._state.init()
71
+ self._state.load()
72
+
73
+ # Load existing sessions
74
+ for session in self._state.list_sessions():
75
+ self._sessions[session.id] = session
76
+
77
+ # Initialize Weave if configured
78
+ if self.config.weave.enabled and self.config.weave.project:
79
+ weave.init(self.config.weave.project)
80
+
81
+ # Initialize watchers if configured
82
+ if self.config.watchers.enabled:
83
+ self._watcher_manager = build_watcher_manager({
84
+ "watchers": [
85
+ {"name": w.name, "enabled": w.enabled, "config": w.config}
86
+ for w in self.config.watchers.watchers
87
+ ]
88
+ })
89
+
90
+ # Link sessions to environment for observe()
91
+ if hasattr(self.env, 'set_sessions'):
92
+ self.env.set_sessions(self._sessions)
93
+
94
+ @property
95
+ def state(self) -> StateManager:
96
+ """Access state manager."""
97
+ return self._state
98
+
99
+ def _get_adapter(self, name: str) -> ExecutorAdapter:
100
+ """Get or create an adapter by name."""
101
+ if name not in self._adapters:
102
+ if name == "codex_mcp":
103
+ self._adapters[name] = CodexMCPAdapter()
104
+ elif name == "claude_code":
105
+ self._adapters[name] = ClaudeCodeAdapter()
106
+ else:
107
+ raise ValueError(f"Unknown adapter: {name}")
108
+ return self._adapters[name]
109
+
110
+ def save_state(self) -> None:
111
+ """Save orchestrator state for resume."""
112
+ self._state.save_orchestrator_messages(self.messages)
113
+
114
+ def load_state(self) -> None:
115
+ """Load orchestrator state for resume."""
116
+ self.messages = self._state.load_orchestrator_messages()
117
+ self._resumed = True
118
+
119
+ def _inject_resume_message(self) -> None:
120
+ """Inject a system message about resumed state."""
121
+ if not self._resumed:
122
+ return
123
+
124
+ # Build list of old sessions
125
+ old_sessions = []
126
+ for sid, session in self._sessions.items():
127
+ old_sessions.append(f" - {sid[:8]}... ({session.adapter}, {session.status.value})")
128
+
129
+ session_info = "\n".join(old_sessions) if old_sessions else " (none)"
130
+
131
+ resume_msg = {
132
+ "role": "user",
133
+ "content": f"""[SYSTEM NOTICE] You have been resumed from a previous session.
134
+
135
+ IMPORTANT: Your previous executor sessions are NO LONGER ACTIVE. The MCP connections and subprocess handles were lost when the previous session ended.
136
+
137
+ Previous sessions (now stale):
138
+ {session_info}
139
+
140
+ You must start NEW sessions with delegate() if you need to continue work. Do NOT try to use converse() or check_session() with the old session IDs - they will fail.
141
+
142
+ Continue with your task from where you left off."""
143
+ }
144
+
145
+ self.messages.append(resume_msg)
146
+ self._resumed = False # Only inject once
147
+
148
+ def _run_watchers(self) -> WatcherAction:
149
+ """Run watchers and return the action to take."""
150
+ if not self._watcher_manager:
151
+ return WatcherAction.CONTINUE
152
+
153
+ # Build watcher context
154
+ ctx = WatcherContext(
155
+ step=self._step_count,
156
+ messages=self.messages,
157
+ sessions={sid: s.to_dict() for sid, s in self._sessions.items()},
158
+ task=self.env.task if hasattr(self.env, 'task') else "",
159
+ metadata={
160
+ "max_steps": self.maxSteps,
161
+ "config": self.config.to_dict() if hasattr(self.config, 'to_dict') else {},
162
+ },
163
+ )
164
+
165
+ # Run watchers synchronously (they're async internally)
166
+ import asyncio
167
+ try:
168
+ loop = asyncio.get_running_loop()
169
+ except RuntimeError:
170
+ loop = None
171
+
172
+ if loop and loop.is_running():
173
+ # We're in an async context, create a task
174
+ import concurrent.futures
175
+ with concurrent.futures.ThreadPoolExecutor() as pool:
176
+ result = pool.submit(asyncio.run, self._watcher_manager.observe(ctx)).result()
177
+ else:
178
+ result = asyncio.run(self._watcher_manager.observe(ctx))
179
+
180
+ # Handle watcher result
181
+ if result.action == WatcherAction.NUDGE and result.guidance:
182
+ # Inject guidance as a system message
183
+ self.messages.append({
184
+ "role": "user",
185
+ "content": f"[WATCHER: {result.metadata.get('triggered_by', 'unknown')}] {result.guidance}"
186
+ })
187
+
188
+ return result.action
189
+
190
+ def do(self) -> list[tuple[dict[str, Any], Any]]:
191
+ """
192
+ Execute tool calls from the LLM response.
193
+
194
+ Overrides base do() to capture and return tool calls with results
195
+ for Weave tracing visibility.
196
+
197
+ Returns:
198
+ List of (tool_call_info, result) tuples
199
+ """
200
+ if self._last_response is None:
201
+ return []
202
+
203
+ output = getattr(self._last_response, 'output', None)
204
+ if output is None:
205
+ return []
206
+
207
+ # Extract tool calls
208
+ tool_calls = [
209
+ item for item in output
210
+ if getattr(item, 'type', None) == TOOL_CALL_TYPE
211
+ ]
212
+
213
+ # If no tool calls, handle text output
214
+ if not tool_calls:
215
+ output_text = getattr(self._last_response, 'output_text', '')
216
+ if output_text and hasattr(self.env, 'output_handler'):
217
+ self.env.output_handler(output_text)
218
+ return []
219
+
220
+ # Execute each tool call and collect results
221
+ tool_results: list[tuple[dict[str, Any], Any]] = []
222
+
223
+ for tc in tool_calls:
224
+ tc_name = getattr(tc, 'name', '')
225
+ tc_args_raw = getattr(tc, 'arguments', '{}')
226
+ tc_id = getattr(tc, 'call_id', '')
227
+
228
+ # Parse arguments
229
+ if isinstance(tc_args_raw, str):
230
+ try:
231
+ tc_args = json.loads(tc_args_raw)
232
+ except json.JSONDecodeError:
233
+ tc_args = {}
234
+ else:
235
+ tc_args = tc_args_raw or {}
236
+
237
+ # Execute tool
238
+ if tc_name in self._tool_callables:
239
+ try:
240
+ tc_output = self._tool_callables[tc_name](**tc_args)
241
+ except Exception as e:
242
+ tc_output = f"Error executing {tc_name}: {e}"
243
+ else:
244
+ tc_output = f"Unknown tool: {tc_name}"
245
+
246
+ # Collect tool call info and result
247
+ tool_call_info = {
248
+ "name": tc_name,
249
+ "args": tc_args,
250
+ "call_id": tc_id,
251
+ }
252
+ tool_results.append((tool_call_info, tc_output))
253
+
254
+ # Format and append result to messages
255
+ result = format_openai_tool_response(tc_output, tc_id)
256
+ self.messages.append(result)
257
+
258
+ return tool_results
259
+
260
+ @weave.op()
261
+ def step(self) -> list[tuple[dict[str, Any], Any]]:
262
+ """
263
+ Execute one perceive-invoke-do cycle.
264
+
265
+ Overrides base step() to return tool calls with results
266
+ for Weave tracing visibility.
267
+
268
+ Returns:
269
+ List of (tool_call_info, result) tuples from this step.
270
+ Each tuple contains:
271
+ - tool_call_info: {"name": str, "args": dict, "call_id": str}
272
+ - result: The tool output (any type)
273
+ """
274
+ self.perceive()
275
+ self.invoke()
276
+ tool_results = self.do()
277
+ self._step_count += 1
278
+ return tool_results
279
+
280
+ @weave.op()
281
+ def run(self, task: str | None = None, max_steps: int | None = None) -> dict[str, Any]:
282
+ """
283
+ Run the orchestrator until stop condition is met.
284
+
285
+ Overrides base run() to integrate watchers.
286
+
287
+ Args:
288
+ task: The task string. If not provided, uses env.task
289
+ max_steps: Override maxSteps for this run.
290
+
291
+ Returns:
292
+ Dict with run results
293
+ """
294
+ # Set task from argument or environment
295
+ if task is not None:
296
+ self.env.task = task
297
+
298
+ # Override max_steps if provided
299
+ if max_steps is not None:
300
+ self.maxSteps = max_steps
301
+
302
+ # Reset step counter
303
+ self._step_count = 0
304
+
305
+ # Inject resume message if we were resumed
306
+ self._inject_resume_message()
307
+
308
+ for _ in range(self.maxSteps):
309
+ # Run watchers before each step
310
+ watcher_action = self._run_watchers()
311
+
312
+ if watcher_action == WatcherAction.ABORT:
313
+ return {
314
+ "steps": self._step_count,
315
+ "task": self.env.task,
316
+ "stopped_by": "watcher_abort",
317
+ }
318
+ elif watcher_action == WatcherAction.PAUSE:
319
+ # For now, treat pause as stop (could add human-in-loop later)
320
+ return {
321
+ "steps": self._step_count,
322
+ "task": self.env.task,
323
+ "stopped_by": "watcher_pause",
324
+ }
325
+ # NUDGE and CONTINUE just continue
326
+
327
+ self.step()
328
+
329
+ if self.stopCondition:
330
+ break
331
+
332
+ return {
333
+ "steps": self._step_count,
334
+ "task": self.env.task,
335
+ }
336
+
337
+ async def cleanup(self) -> None:
338
+ """Clean up resources."""
339
+ for adapter in self._adapters.values():
340
+ await adapter.cleanup()
341
+
342
+
343
+ def build_orchestrator(
344
+ config_path: Path | None = None,
345
+ task: str | None = None,
346
+ working_dir: Path | None = None,
347
+ overrides: list[str] | None = None,
348
+ resume: bool = False,
349
+ output_handler: Callable[[str], None] | None = None,
350
+ ) -> Orchestrator:
351
+ """
352
+ Build an orchestrator from configuration.
353
+
354
+ Args:
355
+ config_path: Path to YAML config file
356
+ task: The task to accomplish
357
+ working_dir: Working directory (default: cwd)
358
+ overrides: CLI overrides (--set key=value)
359
+ resume: Whether to resume from previous state
360
+ output_handler: Function to handle orchestrator output
361
+
362
+ Returns:
363
+ Configured Orchestrator instance
364
+ """
365
+ # Load configuration
366
+ config = load_config(
367
+ config_path=config_path,
368
+ overrides=overrides,
369
+ )
370
+
371
+ # Resolve working directory
372
+ working_dir = working_dir or Path.cwd()
373
+
374
+ # Build system prompt
375
+ system_prompt = _build_system_prompt(config, working_dir)
376
+
377
+ # Create lean orchestrator environment
378
+ env = OrchestratorEnv(
379
+ task=task or "",
380
+ working_dir=working_dir,
381
+ )
382
+
383
+ # Set up output handler
384
+ if output_handler:
385
+ env.output_handler = output_handler
386
+
387
+ # Create orchestrator
388
+ orchestrator = Orchestrator(
389
+ config=config,
390
+ working_dir=working_dir,
391
+ system_prompt=system_prompt,
392
+ maxSteps=config.orchestrator.max_steps,
393
+ env=env,
394
+ )
395
+
396
+ # Resume if requested
397
+ if resume:
398
+ orchestrator.load_state()
399
+
400
+ return orchestrator
401
+
402
+
403
+ def _build_system_prompt(config: ZwarmConfig, working_dir: Path | None = None) -> str:
404
+ """Build the orchestrator system prompt."""
405
+ return get_orchestrator_prompt(working_dir=str(working_dir) if working_dir else None)
@@ -0,0 +1,10 @@
1
+ """
2
+ System prompts for zwarm agents.
3
+ """
4
+
5
+ from zwarm.prompts.orchestrator import ORCHESTRATOR_SYSTEM_PROMPT, get_orchestrator_prompt
6
+
7
+ __all__ = [
8
+ "ORCHESTRATOR_SYSTEM_PROMPT",
9
+ "get_orchestrator_prompt",
10
+ ]
@@ -0,0 +1,214 @@
1
+ """
2
+ Orchestrator system prompt.
3
+
4
+ This prompt defines the behavior of the zwarm orchestrator - a staff/principal IC
5
+ level agent that coordinates multiple coding agents to complete complex tasks
6
+ with minimal user intervention.
7
+ """
8
+
9
+ ORCHESTRATOR_SYSTEM_PROMPT = """
10
+ You are an orchestrator agent - a staff/principal IC level coordinator that manages multiple CLI coding agents (executors) to complete complex software engineering tasks autonomously.
11
+
12
+ You do NOT write code directly. You delegate to executors who write code. Your job is to plan, delegate, supervise, and verify.
13
+
14
+ # Core Philosophy
15
+
16
+ You are designed to one-shot full-scale applications with minimal user intervention. Only ask the user when:
17
+ - Requirements are fundamentally ambiguous and cannot be reasonably inferred
18
+ - A critical decision would be irreversible and has multiple valid approaches
19
+ - You need access credentials or external resources
20
+
21
+ Default to making reasonable decisions yourself. You are a principal engineer - act like one.
22
+
23
+ # Your Tools
24
+
25
+ ## Delegation Tools
26
+ - `delegate(task, mode, adapter)` - Start a new executor session
27
+ - `converse(session_id, message)` - Continue a sync conversation
28
+ - `check_session(session_id)` - Check async session status
29
+ - `end_session(session_id, verdict)` - Mark session complete/failed
30
+ - `list_sessions()` - List all sessions
31
+
32
+ ## Verification Tools
33
+ - `bash(command)` - Run shell commands to verify work (tests, builds, checks)
34
+
35
+ ## Communication
36
+ - `chat(message, wait_for_user_input)` - Communicate with user (use sparingly)
37
+
38
+ # Delegation Modes
39
+
40
+ ## Sync Mode (conversational)
41
+ Use when:
42
+ - Task requires iterative refinement based on output
43
+ - You need to guide the executor step-by-step
44
+ - Requirements may need clarification during execution
45
+ - The task involves exploration or research
46
+
47
+ Pattern:
48
+ ```
49
+ 1. delegate(task, mode="sync") → get initial response
50
+ 2. Review response, identify gaps
51
+ 3. converse(session_id, clarification) → refine
52
+ 4. Repeat until satisfied
53
+ 5. end_session(session_id, verdict="completed")
54
+ ```
55
+
56
+ ## Async Mode (fire-and-forget)
57
+ Use when:
58
+ - Task is well-defined and self-contained
59
+ - You want to parallelize independent work
60
+ - The executor can complete without guidance
61
+ - You trust the executor to handle edge cases
62
+
63
+ Pattern:
64
+ ```
65
+ 1. delegate(task1, mode="async")
66
+ 2. delegate(task2, mode="async") # parallel
67
+ 3. Continue other work...
68
+ 4. check_session(id) periodically
69
+ 5. end_session when complete
70
+ ```
71
+
72
+ # Task Decomposition
73
+
74
+ Break complex tasks into delegatable chunks. Each chunk should:
75
+ - Have a clear, measurable outcome
76
+ - Be completable by a single executor session
77
+ - Include acceptance criteria
78
+ - Specify file paths when relevant
79
+
80
+ Bad: "Build the authentication system"
81
+ Good: "Implement JWT token generation in src/auth/jwt.py with the following requirements:
82
+ - Function `generate_token(user_id, expiry_hours=24) -> str`
83
+ - Use HS256 algorithm with secret from AUTH_SECRET env var
84
+ - Include user_id and exp claims
85
+ - Add unit tests in tests/test_jwt.py"
86
+
87
+ # Verification Standards
88
+
89
+ ALWAYS verify work before marking complete:
90
+
91
+ 1. **Run tests**: `bash("pytest path/to/tests -v")`
92
+ 2. **Run linters**: `bash("ruff check path/to/code")`
93
+ 3. **Run type checks**: `bash("mypy path/to/code")` if applicable
94
+ 4. **Build check**: `bash("npm run build")` or equivalent
95
+ 5. **Manual inspection**: Read the generated code if tests pass but you want to verify quality
96
+
97
+ If verification fails:
98
+ - For sync sessions: converse with the executor to fix
99
+ - For async sessions: start a new session to fix issues
100
+ - Do NOT end_session with verdict="completed" until verification passes
101
+
102
+ # Error Handling
103
+
104
+ When an executor fails or produces incorrect output:
105
+
106
+ 1. **Diagnose**: Understand what went wrong
107
+ 2. **Decide**: Can it be fixed in the current session, or start fresh?
108
+ 3. **Act**: Either converse to fix, or end_session(verdict="failed") and re-delegate
109
+
110
+ Do NOT:
111
+ - Abandon tasks silently
112
+ - Mark failed work as completed
113
+ - Ask the user to fix executor mistakes
114
+
115
+ # Quality Standards
116
+
117
+ You are responsible for the quality of the final output. Ensure:
118
+
119
+ - **Correctness**: Code does what was asked
120
+ - **Completeness**: All requirements addressed
121
+ - **Testing**: Appropriate test coverage
122
+ - **No regressions**: Existing functionality preserved
123
+ - **Clean integration**: New code fits with existing patterns
124
+
125
+ # Communication Style
126
+
127
+ When you do communicate with the user:
128
+ - Be concise and specific
129
+ - State what you've done, what's next
130
+ - Only ask questions when truly blocked
131
+ - Never ask for permission to proceed with reasonable actions
132
+
133
+ # Session Management
134
+
135
+ - Complete sessions promptly - don't leave them hanging
136
+ - Clean up failed sessions with clear verdicts
137
+ - Track multiple parallel sessions carefully
138
+ - Prioritize completing in-progress work before starting new work
139
+
140
+ # Planning Complex Tasks
141
+
142
+ For large tasks, create a mental plan:
143
+
144
+ 1. **Understand**: What is the end state? What exists now?
145
+ 2. **Decompose**: Break into ordered, dependent chunks
146
+ 3. **Sequence**: What can be parallelized? What must be sequential?
147
+ 4. **Execute**: Delegate systematically
148
+ 5. **Integrate**: Verify everything works together
149
+ 6. **Polish**: Handle edge cases, add tests, clean up
150
+
151
+ # Anti-Patterns to Avoid
152
+
153
+ - Starting many sessions without completing any
154
+ - Over-delegating simple tasks that could be verified directly
155
+ - Under-specifying requirements leading to back-and-forth
156
+ - Asking the user questions you could answer yourself
157
+ - Marking work complete without verification
158
+ - Abandoning sessions without proper cleanup
159
+
160
+ # Example Task Flow
161
+
162
+ Task: "Add user authentication to the API"
163
+
164
+ 1. **Plan**: JWT auth, login endpoint, protected routes, tests
165
+ 2. **Delegate (sync)**: "Implement JWT utilities in src/auth/jwt.py..."
166
+ 3. **Verify**: Run tests, check types
167
+ 4. **Delegate (sync)**: "Add login endpoint in src/api/auth.py..."
168
+ 5. **Verify**: Run tests, manual curl test
169
+ 6. **Delegate (sync)**: "Add auth middleware in src/middleware/auth.py..."
170
+ 7. **Verify**: Run full test suite
171
+ 8. **Integration test**: Test the complete flow
172
+ 9. **Done**: Report completion to user
173
+
174
+ # Final Notes
175
+
176
+ You have autonomy. Use it wisely. Make decisions. Move fast. Verify thoroughly. The user trusts you to deliver working software without hand-holding.
177
+
178
+ Call `exit()` when the overall task is complete and verified.
179
+ """
180
+
181
+
182
+ def get_orchestrator_prompt(
183
+ task: str | None = None,
184
+ working_dir: str | None = None,
185
+ additional_context: str | None = None,
186
+ ) -> str:
187
+ """
188
+ Build the full orchestrator system prompt with optional context.
189
+
190
+ Args:
191
+ task: The current task (added to context)
192
+ working_dir: Working directory path
193
+ additional_context: Any additional context to append
194
+
195
+ Returns:
196
+ Complete system prompt
197
+ """
198
+ prompt = ORCHESTRATOR_SYSTEM_PROMPT
199
+
200
+ context_parts = []
201
+
202
+ if working_dir:
203
+ context_parts.append(f"Working Directory: {working_dir}")
204
+
205
+ if task:
206
+ context_parts.append(f"Current Task: {task}")
207
+
208
+ if additional_context:
209
+ context_parts.append(additional_context)
210
+
211
+ if context_parts:
212
+ prompt += "\n\n# Current Context\n\n" + "\n".join(context_parts)
213
+
214
+ return prompt
@@ -0,0 +1,17 @@
1
+ """Orchestrator tools for delegating work to executors."""
2
+
3
+ from zwarm.tools.delegation import (
4
+ check_session,
5
+ converse,
6
+ delegate,
7
+ end_session,
8
+ list_sessions,
9
+ )
10
+
11
+ __all__ = [
12
+ "delegate",
13
+ "converse",
14
+ "check_session",
15
+ "end_session",
16
+ "list_sessions",
17
+ ]