PyPI - zwarm - Versions diffs - 0.1.0__py3-none-any.whl - Mend

zwarm 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

zwarm/__init__.py +38 -0
zwarm/adapters/__init__.py +0 -0
zwarm/adapters/base.py +109 -0
zwarm/adapters/claude_code.py +303 -0
zwarm/adapters/codex_mcp.py +428 -0
zwarm/adapters/test_codex_mcp.py +224 -0
zwarm/cli/__init__.py +0 -0
zwarm/cli/main.py +534 -0
zwarm/core/__init__.py +0 -0
zwarm/core/config.py +271 -0
zwarm/core/environment.py +83 -0
zwarm/core/models.py +299 -0
zwarm/core/state.py +224 -0
zwarm/core/test_config.py +160 -0
zwarm/core/test_models.py +265 -0
zwarm/orchestrator.py +405 -0
zwarm/prompts/__init__.py +10 -0
zwarm/prompts/orchestrator.py +214 -0
zwarm/tools/__init__.py +17 -0
zwarm/tools/delegation.py +357 -0
zwarm/watchers/__init__.py +26 -0
zwarm/watchers/base.py +131 -0
zwarm/watchers/builtin.py +256 -0
zwarm/watchers/manager.py +143 -0
zwarm/watchers/registry.py +57 -0
zwarm/watchers/test_watchers.py +195 -0
zwarm-0.1.0.dist-info/METADATA +382 -0
zwarm-0.1.0.dist-info/RECORD +30 -0
zwarm-0.1.0.dist-info/WHEEL +4 -0
zwarm-0.1.0.dist-info/entry_points.txt +2 -0

zwarm/orchestrator.py ADDED Viewed

@@ -0,0 +1,405 @@
+"""
+Orchestrator: The agent that coordinates multiple executor agents.
+The orchestrator:
+- Plans and breaks down complex tasks
+- Delegates work to executor agents (codex, claude-code, etc.)
+- Supervises progress and provides clarification
+- Verifies work before marking complete
+It does NOT write code directly - that's the executor's job.
+"""
+from __future__ import annotations
+import json
+from pathlib import Path
+from typing import Any, Callable
+import weave
+from pydantic import Field, PrivateAttr
+from wbal.agents.yaml_agent import YamlAgent
+from wbal.helper import TOOL_CALL_TYPE, format_openai_tool_response
+from zwarm.adapters.base import ExecutorAdapter
+from zwarm.adapters.claude_code import ClaudeCodeAdapter
+from zwarm.adapters.codex_mcp import CodexMCPAdapter
+from zwarm.core.config import ZwarmConfig, load_config
+from zwarm.core.environment import OrchestratorEnv
+from zwarm.core.models import ConversationSession
+from zwarm.core.state import StateManager
+from zwarm.prompts import get_orchestrator_prompt
+from zwarm.watchers import WatcherManager, WatcherContext, WatcherAction, build_watcher_manager
+class Orchestrator(YamlAgent):
+    """
+    Multi-agent orchestrator built on WBAL's YamlAgent.
+    Extends YamlAgent with:
+    - Delegation tools (delegate, converse, check_session, end_session)
+    - Session tracking
+    - State persistence
+    - Watcher integration
+    - Weave integration
+    """
+    # Configuration
+    config: ZwarmConfig = Field(default_factory=ZwarmConfig)
+    working_dir: Path = Field(default_factory=Path.cwd)
+    # Load tools from modules (delegation + bash for verification)
+    agent_tool_modules: list[str] = Field(default=[
+        "zwarm.tools.delegation",
+        "wbal.tools.bash",
+    ])
+    # State management
+    _state: StateManager = PrivateAttr()
+    _sessions: dict[str, ConversationSession] = PrivateAttr(default_factory=dict)
+    _adapters: dict[str, ExecutorAdapter] = PrivateAttr(default_factory=dict)
+    _watcher_manager: WatcherManager | None = PrivateAttr(default=None)
+    _resumed: bool = PrivateAttr(default=False)
+    def model_post_init(self, __context: Any) -> None:
+        """Initialize state and adapters after model creation."""
+        super().model_post_init(__context)
+        # Initialize state manager
+        self._state = StateManager(self.working_dir / self.config.state_dir)
+        self._state.init()
+        self._state.load()
+        # Load existing sessions
+        for session in self._state.list_sessions():
+            self._sessions[session.id] = session
+        # Initialize Weave if configured
+        if self.config.weave.enabled and self.config.weave.project:
+            weave.init(self.config.weave.project)
+        # Initialize watchers if configured
+        if self.config.watchers.enabled:
+            self._watcher_manager = build_watcher_manager({
+                "watchers": [
+                    {"name": w.name, "enabled": w.enabled, "config": w.config}
+                    for w in self.config.watchers.watchers
+                ]
+            })
+        # Link sessions to environment for observe()
+        if hasattr(self.env, 'set_sessions'):
+            self.env.set_sessions(self._sessions)
+    @property
+    def state(self) -> StateManager:
+        """Access state manager."""
+        return self._state
+    def _get_adapter(self, name: str) -> ExecutorAdapter:
+        """Get or create an adapter by name."""
+        if name not in self._adapters:
+            if name == "codex_mcp":
+                self._adapters[name] = CodexMCPAdapter()
+            elif name == "claude_code":
+                self._adapters[name] = ClaudeCodeAdapter()
+            else:
+                raise ValueError(f"Unknown adapter: {name}")
+        return self._adapters[name]
+    def save_state(self) -> None:
+        """Save orchestrator state for resume."""
+        self._state.save_orchestrator_messages(self.messages)
+    def load_state(self) -> None:
+        """Load orchestrator state for resume."""
+        self.messages = self._state.load_orchestrator_messages()
+        self._resumed = True
+    def _inject_resume_message(self) -> None:
+        """Inject a system message about resumed state."""
+        if not self._resumed:
+            return
+        # Build list of old sessions
+        old_sessions = []
+        for sid, session in self._sessions.items():
+            old_sessions.append(f"  - {sid[:8]}... ({session.adapter}, {session.status.value})")
+        session_info = "\n".join(old_sessions) if old_sessions else "  (none)"
+        resume_msg = {
+            "role": "user",
+            "content": f"""[SYSTEM NOTICE] You have been resumed from a previous session.
+IMPORTANT: Your previous executor sessions are NO LONGER ACTIVE. The MCP connections and subprocess handles were lost when the previous session ended.
+Previous sessions (now stale):
+{session_info}
+You must start NEW sessions with delegate() if you need to continue work. Do NOT try to use converse() or check_session() with the old session IDs - they will fail.
+Continue with your task from where you left off."""
+        }
+        self.messages.append(resume_msg)
+        self._resumed = False  # Only inject once
+    def _run_watchers(self) -> WatcherAction:
+        """Run watchers and return the action to take."""
+        if not self._watcher_manager:
+            return WatcherAction.CONTINUE
+        # Build watcher context
+        ctx = WatcherContext(
+            step=self._step_count,
+            messages=self.messages,
+            sessions={sid: s.to_dict() for sid, s in self._sessions.items()},
+            task=self.env.task if hasattr(self.env, 'task') else "",
+            metadata={
+                "max_steps": self.maxSteps,
+                "config": self.config.to_dict() if hasattr(self.config, 'to_dict') else {},
+            },
+        )
+        # Run watchers synchronously (they're async internally)
+        import asyncio
+        try:
+            loop = asyncio.get_running_loop()
+        except RuntimeError:
+            loop = None
+        if loop and loop.is_running():
+            # We're in an async context, create a task
+            import concurrent.futures
+            with concurrent.futures.ThreadPoolExecutor() as pool:
+                result = pool.submit(asyncio.run, self._watcher_manager.observe(ctx)).result()
+        else:
+            result = asyncio.run(self._watcher_manager.observe(ctx))
+        # Handle watcher result
+        if result.action == WatcherAction.NUDGE and result.guidance:
+            # Inject guidance as a system message
+            self.messages.append({
+                "role": "user",
+                "content": f"[WATCHER: {result.metadata.get('triggered_by', 'unknown')}] {result.guidance}"
+            })
+        return result.action
+    def do(self) -> list[tuple[dict[str, Any], Any]]:
+        """
+        Execute tool calls from the LLM response.
+        Overrides base do() to capture and return tool calls with results
+        for Weave tracing visibility.
+        Returns:
+            List of (tool_call_info, result) tuples
+        """
+        if self._last_response is None:
+            return []
+        output = getattr(self._last_response, 'output', None)
+        if output is None:
+            return []
+        # Extract tool calls
+        tool_calls = [
+            item for item in output
+            if getattr(item, 'type', None) == TOOL_CALL_TYPE
+        ]
+        # If no tool calls, handle text output
+        if not tool_calls:
+            output_text = getattr(self._last_response, 'output_text', '')
+            if output_text and hasattr(self.env, 'output_handler'):
+                self.env.output_handler(output_text)
+            return []
+        # Execute each tool call and collect results
+        tool_results: list[tuple[dict[str, Any], Any]] = []
+        for tc in tool_calls:
+            tc_name = getattr(tc, 'name', '')
+            tc_args_raw = getattr(tc, 'arguments', '{}')
+            tc_id = getattr(tc, 'call_id', '')
+            # Parse arguments
+            if isinstance(tc_args_raw, str):
+                try:
+                    tc_args = json.loads(tc_args_raw)
+                except json.JSONDecodeError:
+                    tc_args = {}
+            else:
+                tc_args = tc_args_raw or {}
+            # Execute tool
+            if tc_name in self._tool_callables:
+                try:
+                    tc_output = self._tool_callables[tc_name](**tc_args)
+                except Exception as e:
+                    tc_output = f"Error executing {tc_name}: {e}"
+            else:
+                tc_output = f"Unknown tool: {tc_name}"
+            # Collect tool call info and result
+            tool_call_info = {
+                "name": tc_name,
+                "args": tc_args,
+                "call_id": tc_id,
+            }
+            tool_results.append((tool_call_info, tc_output))
+            # Format and append result to messages
+            result = format_openai_tool_response(tc_output, tc_id)
+            self.messages.append(result)
+        return tool_results
+    @weave.op()
+    def step(self) -> list[tuple[dict[str, Any], Any]]:
+        """
+        Execute one perceive-invoke-do cycle.
+        Overrides base step() to return tool calls with results
+        for Weave tracing visibility.
+        Returns:
+            List of (tool_call_info, result) tuples from this step.
+            Each tuple contains:
+            - tool_call_info: {"name": str, "args": dict, "call_id": str}
+            - result: The tool output (any type)
+        """
+        self.perceive()
+        self.invoke()
+        tool_results = self.do()
+        self._step_count += 1
+        return tool_results
+    @weave.op()
+    def run(self, task: str | None = None, max_steps: int | None = None) -> dict[str, Any]:
+        """
+        Run the orchestrator until stop condition is met.
+        Overrides base run() to integrate watchers.
+        Args:
+            task: The task string. If not provided, uses env.task
+            max_steps: Override maxSteps for this run.
+        Returns:
+            Dict with run results
+        """
+        # Set task from argument or environment
+        if task is not None:
+            self.env.task = task
+        # Override max_steps if provided
+        if max_steps is not None:
+            self.maxSteps = max_steps
+        # Reset step counter
+        self._step_count = 0
+        # Inject resume message if we were resumed
+        self._inject_resume_message()
+        for _ in range(self.maxSteps):
+            # Run watchers before each step
+            watcher_action = self._run_watchers()
+            if watcher_action == WatcherAction.ABORT:
+                return {
+                    "steps": self._step_count,
+                    "task": self.env.task,
+                    "stopped_by": "watcher_abort",
+                }
+            elif watcher_action == WatcherAction.PAUSE:
+                # For now, treat pause as stop (could add human-in-loop later)
+                return {
+                    "steps": self._step_count,
+                    "task": self.env.task,
+                    "stopped_by": "watcher_pause",
+                }
+            # NUDGE and CONTINUE just continue
+            self.step()
+            if self.stopCondition:
+                break
+        return {
+            "steps": self._step_count,
+            "task": self.env.task,
+        }
+    async def cleanup(self) -> None:
+        """Clean up resources."""
+        for adapter in self._adapters.values():
+            await adapter.cleanup()
+def build_orchestrator(
+    config_path: Path | None = None,
+    task: str | None = None,
+    working_dir: Path | None = None,
+    overrides: list[str] | None = None,
+    resume: bool = False,
+    output_handler: Callable[[str], None] | None = None,
+) -> Orchestrator:
+    """
+    Build an orchestrator from configuration.
+    Args:
+        config_path: Path to YAML config file
+        task: The task to accomplish
+        working_dir: Working directory (default: cwd)
+        overrides: CLI overrides (--set key=value)
+        resume: Whether to resume from previous state
+        output_handler: Function to handle orchestrator output
+    Returns:
+        Configured Orchestrator instance
+    """
+    # Load configuration
+    config = load_config(
+        config_path=config_path,
+        overrides=overrides,
+    )
+    # Resolve working directory
+    working_dir = working_dir or Path.cwd()
+    # Build system prompt
+    system_prompt = _build_system_prompt(config, working_dir)
+    # Create lean orchestrator environment
+    env = OrchestratorEnv(
+        task=task or "",
+        working_dir=working_dir,
+    )
+    # Set up output handler
+    if output_handler:
+        env.output_handler = output_handler
+    # Create orchestrator
+    orchestrator = Orchestrator(
+        config=config,
+        working_dir=working_dir,
+        system_prompt=system_prompt,
+        maxSteps=config.orchestrator.max_steps,
+        env=env,
+    )
+    # Resume if requested
+    if resume:
+        orchestrator.load_state()
+    return orchestrator
+def _build_system_prompt(config: ZwarmConfig, working_dir: Path | None = None) -> str:
+    """Build the orchestrator system prompt."""
+    return get_orchestrator_prompt(working_dir=str(working_dir) if working_dir else None)

zwarm/prompts/__init__.py ADDED Viewed

@@ -0,0 +1,10 @@
+"""
+System prompts for zwarm agents.
+"""
+from zwarm.prompts.orchestrator import ORCHESTRATOR_SYSTEM_PROMPT, get_orchestrator_prompt
+__all__ = [
+    "ORCHESTRATOR_SYSTEM_PROMPT",
+    "get_orchestrator_prompt",
+]

zwarm/prompts/orchestrator.py ADDED Viewed

@@ -0,0 +1,214 @@
+"""
+Orchestrator system prompt.
+This prompt defines the behavior of the zwarm orchestrator - a staff/principal IC
+level agent that coordinates multiple coding agents to complete complex tasks
+with minimal user intervention.
+"""
+ORCHESTRATOR_SYSTEM_PROMPT = """
+You are an orchestrator agent - a staff/principal IC level coordinator that manages multiple CLI coding agents (executors) to complete complex software engineering tasks autonomously.
+You do NOT write code directly. You delegate to executors who write code. Your job is to plan, delegate, supervise, and verify.
+# Core Philosophy
+You are designed to one-shot full-scale applications with minimal user intervention. Only ask the user when:
+- Requirements are fundamentally ambiguous and cannot be reasonably inferred
+- A critical decision would be irreversible and has multiple valid approaches
+- You need access credentials or external resources
+Default to making reasonable decisions yourself. You are a principal engineer - act like one.
+# Your Tools
+## Delegation Tools
+- `delegate(task, mode, adapter)` - Start a new executor session
+- `converse(session_id, message)` - Continue a sync conversation
+- `check_session(session_id)` - Check async session status
+- `end_session(session_id, verdict)` - Mark session complete/failed
+- `list_sessions()` - List all sessions
+## Verification Tools
+- `bash(command)` - Run shell commands to verify work (tests, builds, checks)
+## Communication
+- `chat(message, wait_for_user_input)` - Communicate with user (use sparingly)
+# Delegation Modes
+## Sync Mode (conversational)
+Use when:
+- Task requires iterative refinement based on output
+- You need to guide the executor step-by-step
+- Requirements may need clarification during execution
+- The task involves exploration or research
+Pattern:
+```
+1. delegate(task, mode="sync") → get initial response
+2. Review response, identify gaps
+3. converse(session_id, clarification) → refine
+4. Repeat until satisfied
+5. end_session(session_id, verdict="completed")
+```
+## Async Mode (fire-and-forget)
+Use when:
+- Task is well-defined and self-contained
+- You want to parallelize independent work
+- The executor can complete without guidance
+- You trust the executor to handle edge cases
+Pattern:
+```
+1. delegate(task1, mode="async")
+2. delegate(task2, mode="async")  # parallel
+3. Continue other work...
+4. check_session(id) periodically
+5. end_session when complete
+```
+# Task Decomposition
+Break complex tasks into delegatable chunks. Each chunk should:
+- Have a clear, measurable outcome
+- Be completable by a single executor session
+- Include acceptance criteria
+- Specify file paths when relevant
+Bad: "Build the authentication system"
+Good: "Implement JWT token generation in src/auth/jwt.py with the following requirements:
+- Function `generate_token(user_id, expiry_hours=24) -> str`
+- Use HS256 algorithm with secret from AUTH_SECRET env var
+- Include user_id and exp claims
+- Add unit tests in tests/test_jwt.py"
+# Verification Standards
+ALWAYS verify work before marking complete:
+1. **Run tests**: `bash("pytest path/to/tests -v")`
+2. **Run linters**: `bash("ruff check path/to/code")`
+3. **Run type checks**: `bash("mypy path/to/code")` if applicable
+4. **Build check**: `bash("npm run build")` or equivalent
+5. **Manual inspection**: Read the generated code if tests pass but you want to verify quality
+If verification fails:
+- For sync sessions: converse with the executor to fix
+- For async sessions: start a new session to fix issues
+- Do NOT end_session with verdict="completed" until verification passes
+# Error Handling
+When an executor fails or produces incorrect output:
+1. **Diagnose**: Understand what went wrong
+2. **Decide**: Can it be fixed in the current session, or start fresh?
+3. **Act**: Either converse to fix, or end_session(verdict="failed") and re-delegate
+Do NOT:
+- Abandon tasks silently
+- Mark failed work as completed
+- Ask the user to fix executor mistakes
+# Quality Standards
+You are responsible for the quality of the final output. Ensure:
+- **Correctness**: Code does what was asked
+- **Completeness**: All requirements addressed
+- **Testing**: Appropriate test coverage
+- **No regressions**: Existing functionality preserved
+- **Clean integration**: New code fits with existing patterns
+# Communication Style
+When you do communicate with the user:
+- Be concise and specific
+- State what you've done, what's next
+- Only ask questions when truly blocked
+- Never ask for permission to proceed with reasonable actions
+# Session Management
+- Complete sessions promptly - don't leave them hanging
+- Clean up failed sessions with clear verdicts
+- Track multiple parallel sessions carefully
+- Prioritize completing in-progress work before starting new work
+# Planning Complex Tasks
+For large tasks, create a mental plan:
+1. **Understand**: What is the end state? What exists now?
+2. **Decompose**: Break into ordered, dependent chunks
+3. **Sequence**: What can be parallelized? What must be sequential?
+4. **Execute**: Delegate systematically
+5. **Integrate**: Verify everything works together
+6. **Polish**: Handle edge cases, add tests, clean up
+# Anti-Patterns to Avoid
+- Starting many sessions without completing any
+- Over-delegating simple tasks that could be verified directly
+- Under-specifying requirements leading to back-and-forth
+- Asking the user questions you could answer yourself
+- Marking work complete without verification
+- Abandoning sessions without proper cleanup
+# Example Task Flow
+Task: "Add user authentication to the API"
+1. **Plan**: JWT auth, login endpoint, protected routes, tests
+2. **Delegate (sync)**: "Implement JWT utilities in src/auth/jwt.py..."
+3. **Verify**: Run tests, check types
+4. **Delegate (sync)**: "Add login endpoint in src/api/auth.py..."
+5. **Verify**: Run tests, manual curl test
+6. **Delegate (sync)**: "Add auth middleware in src/middleware/auth.py..."
+7. **Verify**: Run full test suite
+8. **Integration test**: Test the complete flow
+9. **Done**: Report completion to user
+# Final Notes
+You have autonomy. Use it wisely. Make decisions. Move fast. Verify thoroughly. The user trusts you to deliver working software without hand-holding.
+Call `exit()` when the overall task is complete and verified.
+"""
+def get_orchestrator_prompt(
+    task: str | None = None,
+    working_dir: str | None = None,
+    additional_context: str | None = None,
+) -> str:
+    """
+    Build the full orchestrator system prompt with optional context.
+    Args:
+        task: The current task (added to context)
+        working_dir: Working directory path
+        additional_context: Any additional context to append
+    Returns:
+        Complete system prompt
+    """
+    prompt = ORCHESTRATOR_SYSTEM_PROMPT
+    context_parts = []
+    if working_dir:
+        context_parts.append(f"Working Directory: {working_dir}")
+    if task:
+        context_parts.append(f"Current Task: {task}")
+    if additional_context:
+        context_parts.append(additional_context)
+    if context_parts:
+        prompt += "\n\n# Current Context\n\n" + "\n".join(context_parts)
+    return prompt

zwarm/tools/__init__.py ADDED Viewed

@@ -0,0 +1,17 @@
+"""Orchestrator tools for delegating work to executors."""
+from zwarm.tools.delegation import (
+    check_session,
+    converse,
+    delegate,
+    end_session,
+    list_sessions,
+)
+__all__ = [
+    "delegate",
+    "converse",
+    "check_session",
+    "end_session",
+    "list_sessions",
+]