PyPI - zwarm - Versions diffs - 2.3.5__py3-none-any.whl - Mend

zwarm 2.3.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

zwarm/__init__.py +38 -0
zwarm/adapters/__init__.py +21 -0
zwarm/adapters/base.py +109 -0
zwarm/adapters/claude_code.py +357 -0
zwarm/adapters/codex_mcp.py +1262 -0
zwarm/adapters/registry.py +69 -0
zwarm/adapters/test_codex_mcp.py +274 -0
zwarm/adapters/test_registry.py +68 -0
zwarm/cli/__init__.py +0 -0
zwarm/cli/main.py +2503 -0
zwarm/core/__init__.py +0 -0
zwarm/core/compact.py +329 -0
zwarm/core/config.py +344 -0
zwarm/core/environment.py +173 -0
zwarm/core/models.py +315 -0
zwarm/core/state.py +355 -0
zwarm/core/test_compact.py +312 -0
zwarm/core/test_config.py +160 -0
zwarm/core/test_models.py +265 -0
zwarm/orchestrator.py +683 -0
zwarm/prompts/__init__.py +10 -0
zwarm/prompts/orchestrator.py +230 -0
zwarm/sessions/__init__.py +26 -0
zwarm/sessions/manager.py +792 -0
zwarm/test_orchestrator_watchers.py +23 -0
zwarm/tools/__init__.py +17 -0
zwarm/tools/delegation.py +784 -0
zwarm/watchers/__init__.py +31 -0
zwarm/watchers/base.py +131 -0
zwarm/watchers/builtin.py +518 -0
zwarm/watchers/llm_watcher.py +319 -0
zwarm/watchers/manager.py +181 -0
zwarm/watchers/registry.py +57 -0
zwarm/watchers/test_watchers.py +237 -0
zwarm-2.3.5.dist-info/METADATA +309 -0
zwarm-2.3.5.dist-info/RECORD +38 -0
zwarm-2.3.5.dist-info/WHEEL +4 -0
zwarm-2.3.5.dist-info/entry_points.txt +2 -0

zwarm/tools/delegation.py ADDED Viewed

@@ -0,0 +1,784 @@
+"""
+Delegation tools for the orchestrator.
+These are the core tools that orchestrators use to delegate work to executors.
+They use the SAME CodexSessionManager that `zwarm interactive` uses - no special
+MCP integration, no separate code path.
+The orchestrator LLM has access to the exact same tools a human would use.
+Tools:
+- delegate: Start a new codex session
+- converse: Continue a conversation (inject follow-up message)
+- check_session: Check status of a session
+- end_session: End/kill a session
+- list_sessions: List all sessions
+"""
+from __future__ import annotations
+import time
+from pathlib import Path
+from typing import TYPE_CHECKING, Any, Literal
+from wbal.helper import weaveTool
+if TYPE_CHECKING:
+    from zwarm.orchestrator import Orchestrator
+def _get_session_manager(orchestrator: "Orchestrator"):
+    """
+    Get the CodexSessionManager - the SINGLE source of truth for sessions.
+    Both `zwarm interactive` and `zwarm orchestrate` use the same session manager.
+    The orchestrator is just another user that happens to be an LLM.
+    The session manager is created eagerly in Orchestrator.model_post_init()
+    and shared with the environment for observe() visibility.
+    """
+    # Should already exist from model_post_init, but create if not
+    if not hasattr(orchestrator, "_session_manager") or orchestrator._session_manager is None:
+        from zwarm.sessions import CodexSessionManager
+        orchestrator._session_manager = CodexSessionManager(orchestrator.working_dir / ".zwarm")
+    return orchestrator._session_manager
+def _wait_for_completion(manager, session_id: str, timeout: float = 300.0, poll_interval: float = 1.0) -> bool:
+    """
+    Wait for a session to complete.
+    Args:
+        manager: CodexSessionManager
+        session_id: Session to wait for
+        timeout: Max seconds to wait
+        poll_interval: Seconds between polls
+    Returns:
+        True if completed, False if timed out
+    """
+    from zwarm.sessions import SessionStatus
+    start = time.time()
+    while time.time() - start < timeout:
+        # get_session() auto-updates status based on output completion markers
+        session = manager.get_session(session_id)
+        if not session:
+            return False
+        # Check status (not is_running - PID check is unreliable due to reuse)
+        if session.status in (SessionStatus.COMPLETED, SessionStatus.FAILED, SessionStatus.KILLED):
+            return True
+        time.sleep(poll_interval)
+    return False
+def _truncate(text: str, max_len: int = 200) -> str:
+    """Truncate text with ellipsis."""
+    if len(text) <= max_len:
+        return text
+    return text[:max_len - 3] + "..."
+def _format_session_header(session) -> str:
+    """Format a nice session header."""
+    return f"[{session.short_id}] codex ({session.status.value})"
+def _get_total_tokens(session) -> int:
+    """Get total tokens, computing from input+output if not present."""
+    usage = session.token_usage
+    if "total_tokens" in usage:
+        return usage["total_tokens"]
+    return usage.get("input_tokens", 0) + usage.get("output_tokens", 0)
+def _validate_working_dir(
+    requested_dir: Path | str | None,
+    default_dir: Path,
+    allowed_dirs: list[str] | None,
+) -> tuple[Path, str | None]:
+    """
+    Validate requested working directory against allowed_dirs config.
+    Args:
+        requested_dir: Directory requested by the agent (or None for default)
+        default_dir: The orchestrator's working directory
+        allowed_dirs: Config setting - None means only default allowed,
+                     ["*"] means any, or list of allowed paths
+    Returns:
+        (validated_path, error_message) - error is None if valid
+    """
+    if requested_dir is None:
+        return default_dir, None
+    requested = Path(requested_dir).resolve()
+    # Check if directory exists
+    if not requested.exists():
+        return default_dir, f"Directory does not exist: {requested}"
+    if not requested.is_dir():
+        return default_dir, f"Not a directory: {requested}"
+    # If allowed_dirs is None, only default is allowed
+    if allowed_dirs is None:
+        if requested == default_dir.resolve():
+            return requested, None
+        return default_dir, (
+            f"Directory not allowed: {requested}. "
+            f"Agent can only delegate to working directory ({default_dir}). "
+            "Set orchestrator.allowed_dirs in config to allow other directories."
+        )
+    # If ["*"], any directory is allowed
+    if allowed_dirs == ["*"]:
+        return requested, None
+    # Check against allowed list
+    for allowed in allowed_dirs:
+        allowed_path = Path(allowed).resolve()
+        # Allow if requested is the allowed path or a subdirectory of it
+        try:
+            requested.relative_to(allowed_path)
+            return requested, None
+        except ValueError:
+            continue
+    return default_dir, (
+        f"Directory not allowed: {requested}. "
+        f"Allowed directories: {allowed_dirs}"
+    )
+@weaveTool
+def delegate(
+    self: "Orchestrator",
+    task: str,
+    mode: Literal["sync", "async"] = "sync",
+    model: str | None = None,
+    working_dir: str | None = None,
+) -> dict[str, Any]:
+    """
+    Delegate work to a Codex agent.
+    This spawns a codex session - the exact same way `zwarm interactive` does.
+    Two modes available:
+    **sync** (default): Wait for codex to complete, then return the response.
+    Best for: most tasks - you get the full response immediately.
+    **async**: Fire-and-forget execution.
+    Check progress later with check_session().
+    Best for: long-running tasks, parallel work.
+    Args:
+        task: Clear description of what to do. Be specific about requirements.
+        mode: "sync" to wait for completion, "async" for fire-and-forget.
+        model: Model override (default: gpt-5.1-codex-mini).
+        working_dir: Directory for codex to work in (default: orchestrator's dir).
+    Returns:
+        {session_id, status, response (if sync)}
+    Example:
+        delegate(task="Add a logout button to the navbar", mode="sync")
+        # Then use converse() to refine: "Also add a confirmation dialog"
+    """
+    # Validate working directory
+    effective_dir, dir_error = _validate_working_dir(
+        working_dir,
+        self.working_dir,
+        self.config.orchestrator.allowed_dirs,
+    )
+    if dir_error:
+        return {
+            "success": False,
+            "error": dir_error,
+            "hint": "Use the default working directory or ask user to update allowed_dirs config",
+        }
+    # Get the session manager (same one zwarm interactive uses)
+    manager = _get_session_manager(self)
+    # Determine model
+    effective_model = model or self.config.executor.model or "gpt-5.1-codex-mini"
+    # Determine sandbox mode
+    sandbox = self.config.executor.sandbox or "workspace-write"
+    # Start the session using CodexSessionManager
+    # This is the SAME method that `zwarm interactive` uses
+    session = manager.start_session(
+        task=task,
+        working_dir=effective_dir,
+        model=effective_model,
+        sandbox=sandbox,
+        source=f"orchestrator:{self.instance_id or 'default'}",
+        adapter="codex",
+    )
+    # For sync mode, wait for completion
+    if mode == "sync":
+        completed = _wait_for_completion(
+            manager,
+            session.id,
+            timeout=self.config.executor.timeout or 300.0,
+        )
+        # Refresh session to get updated status and messages
+        session = manager.get_session(session.id)
+        if not completed:
+            return {
+                "success": False,
+                "session_id": session.id,
+                "status": "timeout",
+                "error": "Session timed out waiting for codex to complete",
+                "hint": "Use check_session() to monitor progress, or use async mode for long tasks",
+            }
+        # Get the response from messages
+        response_text = ""
+        messages = manager.get_messages(session.id)
+        for msg in messages:
+            if msg.role == "assistant":
+                response_text = msg.content
+                break  # Take first assistant message
+        # Build log path for debugging
+        log_path = str(manager._output_path(session.id, session.turn))
+        # Check if session failed
+        from zwarm.sessions import SessionStatus
+        if session.status == SessionStatus.FAILED:
+            return {
+                "success": False,
+                "session": _format_session_header(session),
+                "session_id": session.id,
+                "status": "failed",
+                "task": _truncate(task, 100),
+                "error": session.error or "Unknown error",
+                "response": response_text or "(no response captured)",
+                "tokens": _get_total_tokens(session),
+                "log_file": log_path,
+                "hint": "Check log_file for raw codex output. Use bash('cat <log_file>') to inspect.",
+            }
+        return {
+            "success": True,
+            "session": _format_session_header(session),
+            "session_id": session.id,
+            "status": session.status.value,
+            "task": _truncate(task, 100),
+            "response": response_text or "(no response captured)",
+            "tokens": _get_total_tokens(session),
+            "log_file": log_path,
+            "hint": "Use converse(session_id, message) to send follow-up messages",
+        }
+    else:
+        # Async mode - return immediately
+        return {
+            "success": True,
+            "session": _format_session_header(session),
+            "session_id": session.id,
+            "status": "running",
+            "task": _truncate(task, 100),
+            "hint": "Use check_session(session_id) to monitor progress",
+        }
+@weaveTool
+def converse(
+    self: "Orchestrator",
+    session_id: str,
+    message: str,
+    wait: bool = True,
+) -> dict[str, Any]:
+    """
+    Continue a conversation with a codex session.
+    This injects a follow-up message into the session, providing the
+    conversation history as context. Like chatting with a developer.
+    Two modes:
+    - **wait=True** (default): Wait for codex to respond before returning.
+    - **wait=False**: Fire-and-forget. Message sent, codex runs in background.
+      Use check_session() later to see the response.
+    Args:
+        session_id: The session to continue (from delegate() result).
+        message: Your next message to codex.
+        wait: If True, wait for response. If False, return immediately.
+    Returns:
+        {session_id, response (if wait=True), turn}
+    Example (sync):
+        result = delegate(task="Add user authentication")
+        converse(session_id=result["session_id"], message="Use JWT")
+        # Returns with response
+    Example (async - managing multiple sessions):
+        converse(session_id="abc123", message="Add tests", wait=False)
+        converse(session_id="def456", message="Fix bug", wait=False)
+        # Both running in parallel, check later with check_session()
+    """
+    manager = _get_session_manager(self)
+    # Get current session
+    session = manager.get_session(session_id)
+    if not session:
+        return {
+            "success": False,
+            "error": f"Unknown session: {session_id}",
+            "hint": "Use list_sessions() to see available sessions",
+        }
+    # Check if session is in a conversable state
+    from zwarm.sessions import SessionStatus
+    if session.status == SessionStatus.RUNNING:
+        return {
+            "success": False,
+            "error": "Session is still running",
+            "hint": "Wait for the current task to complete, or use check_session() to monitor",
+        }
+    if session.status == SessionStatus.KILLED:
+        return {
+            "success": False,
+            "error": "Session was killed",
+            "hint": "Start a new session with delegate()",
+        }
+    # Inject the follow-up message
+    # This uses CodexSessionManager.inject_message() which:
+    # 1. Builds context from previous messages
+    # 2. Starts a new turn with the context + new message (background process)
+    updated_session = manager.inject_message(session_id, message)
+    if not updated_session:
+        return {
+            "success": False,
+            "error": "Failed to inject message",
+            "session_id": session_id,
+        }
+    if not wait:
+        # Async mode - return immediately
+        return {
+            "success": True,
+            "session": _format_session_header(updated_session),
+            "session_id": session_id,
+            "turn": updated_session.turn,
+            "status": "running",
+            "you_said": _truncate(message, 100),
+            "hint": "Use check_session(session_id) to see the response when ready",
+        }
+    # Sync mode - wait for completion
+    completed = _wait_for_completion(
+        manager,
+        session_id,
+        timeout=self.config.executor.timeout or 300.0,
+    )
+    # Refresh session
+    session = manager.get_session(session_id)
+    if not completed:
+        return {
+            "success": False,
+            "session_id": session_id,
+            "status": "timeout",
+            "error": "Session timed out waiting for response",
+            "hint": "Use check_session() to monitor progress",
+        }
+    # Get the response (last assistant message)
+    response_text = ""
+    messages = manager.get_messages(session_id)
+    for msg in reversed(messages):
+        if msg.role == "assistant":
+            response_text = msg.content
+            break
+    return {
+        "success": True,
+        "session": _format_session_header(session),
+        "session_id": session_id,
+        "turn": session.turn,
+        "you_said": _truncate(message, 100),
+        "response": response_text or "(no response captured)",
+        "tokens": _get_total_tokens(session),
+    }
+@weaveTool
+def check_session(
+    self: "Orchestrator",
+    session_id: str,
+) -> dict[str, Any]:
+    """
+    Check the status of a session.
+    Use this to:
+    - Check if an async session has finished
+    - Get current status and message count
+    - View the latest response
+    Args:
+        session_id: The session to check.
+    Returns:
+        {session_id, status, messages, response}
+    """
+    manager = _get_session_manager(self)
+    session = manager.get_session(session_id)
+    if not session:
+        return {
+            "success": False,
+            "error": f"Unknown session: {session_id}",
+            "hint": "Use list_sessions() to see available sessions",
+        }
+    # Get latest response
+    response_text = ""
+    messages = manager.get_messages(session_id)
+    for msg in reversed(messages):
+        if msg.role == "assistant":
+            response_text = msg.content
+            break
+    # Build log path
+    log_path = str(manager._output_path(session.id, session.turn))
+    result = {
+        "success": True,
+        "session": _format_session_header(session),
+        "session_id": session_id,
+        "status": session.status.value,
+        "is_running": session.is_running,
+        "turn": session.turn,
+        "message_count": len(messages),
+        "task": _truncate(session.task, 80),
+        "response": _truncate(response_text, 500) if response_text else "(no response yet)",
+        "tokens": _get_total_tokens(session),
+        "runtime": session.runtime,
+        "log_file": log_path,
+    }
+    # Add error info if failed
+    from zwarm.sessions import SessionStatus
+    if session.status == SessionStatus.FAILED:
+        result["success"] = False
+        result["error"] = session.error or "Unknown error"
+    return result
+@weaveTool
+def peek_session(
+    self: "Orchestrator",
+    session_id: str,
+) -> dict[str, Any]:
+    """
+    Quick peek at a session - minimal info for fast polling.
+    Returns just status and latest message. Use check_session() for full details.
+    Args:
+        session_id: The session to peek at.
+    Returns:
+        {session_id, status, latest_message}
+    """
+    manager = _get_session_manager(self)
+    session = manager.get_session(session_id)
+    if not session:
+        return {"success": False, "error": f"Unknown session: {session_id}"}
+    # Get latest assistant message only
+    latest = ""
+    messages = manager.get_messages(session_id)
+    for msg in reversed(messages):
+        if msg.role == "assistant":
+            latest = msg.content.replace("\n", " ")
+            break
+    return {
+        "success": True,
+        "session_id": session.short_id,
+        "status": session.status.value,
+        "is_running": session.status.value == "running",
+        "latest_message": _truncate(latest, 150) if latest else None,
+    }
+@weaveTool
+def get_trajectory(
+    self: "Orchestrator",
+    session_id: str,
+    full: bool = False,
+) -> dict[str, Any]:
+    """
+    Get the full trajectory of a session - all steps the agent took.
+    Shows reasoning, commands, tool calls, and responses in order.
+    Useful for understanding HOW the agent completed a task, not just
+    the final result.
+    Args:
+        session_id: The session to get trajectory for.
+        full: If True, include full untruncated content (default: False for summary view).
+    Returns:
+        {steps: [...], step_count}
+    """
+    manager = _get_session_manager(self)
+    session = manager.get_session(session_id)
+    if not session:
+        return {"success": False, "error": f"Unknown session: {session_id}"}
+    trajectory = manager.get_trajectory(session_id, full=full)
+    # Format steps for easy reading
+    formatted_steps = []
+    for step in trajectory:
+        step_type = step.get("type", "unknown")
+        if step_type == "reasoning":
+            text = step.get("full_text") if full else step.get("summary", "")
+            formatted_steps.append(f"[thinking] {text}")
+        elif step_type == "command":
+            cmd = step.get("command", "")
+            output = step.get("output", "")
+            exit_code = step.get("exit_code")
+            step_str = f"[command] $ {cmd}"
+            if output:
+                if full:
+                    step_str += f"\n  → {output}"
+                else:
+                    step_str += f"\n  → {output[:100]}{'...' if len(output) > 100 else ''}"
+            if exit_code and exit_code != 0:
+                step_str += f" (exit: {exit_code})"
+            formatted_steps.append(step_str)
+        elif step_type == "tool_call":
+            if full and step.get("full_args"):
+                import json
+                args_str = json.dumps(step["full_args"], indent=2)
+                formatted_steps.append(f"[tool] {step.get('tool', 'unknown')}\n  {args_str}")
+            else:
+                formatted_steps.append(f"[tool] {step.get('tool', 'unknown')}({step.get('args_preview', '')})")
+        elif step_type == "tool_output":
+            output = step.get("output", "")
+            if not full:
+                output = output[:100]
+            formatted_steps.append(f"[result] {output}")
+        elif step_type == "message":
+            text = step.get("full_text") if full else step.get("summary", "")
+            formatted_steps.append(f"[response] {text}")
+    return {
+        "success": True,
+        "session_id": session.short_id,
+        "task": _truncate(session.task, 80),
+        "step_count": len(trajectory),
+        "steps": formatted_steps,
+        "mode": "full" if full else "summary",
+    }
+@weaveTool
+def end_session(
+    self: "Orchestrator",
+    session_id: str,
+    reason: str | None = None,
+    delete: bool = False,
+) -> dict[str, Any]:
+    """
+    End/kill a session.
+    Call this when:
+    - You want to stop a running session
+    - Clean up a completed session
+    - Cancel a task
+    Args:
+        session_id: The session to end.
+        reason: Optional reason for ending.
+        delete: If True, delete session entirely (removes from list_sessions).
+    Returns:
+        {session_id, status}
+    """
+    manager = _get_session_manager(self)
+    session = manager.get_session(session_id)
+    if not session:
+        return {
+            "success": False,
+            "error": f"Unknown session: {session_id}",
+        }
+    # If delete requested, remove entirely
+    if delete:
+        deleted = manager.delete_session(session_id)
+        return {
+            "success": deleted,
+            "session_id": session_id,
+            "status": "deleted",
+            "reason": reason or "deleted by orchestrator",
+        }
+    # Kill if still running
+    if session.is_running:
+        killed = manager.kill_session(session_id)
+        if not killed:
+            return {
+                "success": False,
+                "error": "Failed to kill session",
+                "session_id": session_id,
+            }
+        # Refresh
+        session = manager.get_session(session_id)
+    return {
+        "success": True,
+        "session": _format_session_header(session),
+        "session_id": session_id,
+        "status": session.status.value,
+        "reason": reason or "ended by orchestrator",
+        "turn": session.turn,
+        "tokens": _get_total_tokens(session),
+    }
+@weaveTool
+def list_sessions(
+    self: "Orchestrator",
+    status: str | None = None,
+) -> dict[str, Any]:
+    """
+    List all sessions, optionally filtered by status.
+    Returns rich information about each session including:
+    - Status (running/completed/failed)
+    - Last update time (for detecting changes)
+    - Last message preview (quick peek at response)
+    - Whether it's recently updated (needs_attention flag)
+    Use this to monitor multiple parallel sessions and see which
+    ones have new responses.
+    Args:
+        status: Filter by status ("running", "completed", "failed", "killed").
+    Returns:
+        {sessions: [...], count, running, completed, needs_attention}
+    """
+    from datetime import datetime
+    manager = _get_session_manager(self)
+    # Map string status to enum
+    from zwarm.sessions import SessionStatus
+    status_filter = None
+    if status:
+        status_map = {
+            "running": SessionStatus.RUNNING,
+            "completed": SessionStatus.COMPLETED,
+            "failed": SessionStatus.FAILED,
+            "killed": SessionStatus.KILLED,
+            "pending": SessionStatus.PENDING,
+        }
+        status_filter = status_map.get(status.lower())
+    sessions = manager.list_sessions(status=status_filter)
+    def time_ago(iso_str: str) -> tuple[str, float]:
+        """Convert ISO timestamp to ('Xm ago', seconds)."""
+        try:
+            dt = datetime.fromisoformat(iso_str)
+            delta = datetime.now() - dt
+            secs = delta.total_seconds()
+            if secs < 60:
+                return f"{int(secs)}s ago", secs
+            elif secs < 3600:
+                return f"{int(secs/60)}m ago", secs
+            elif secs < 86400:
+                return f"{secs/3600:.1f}h ago", secs
+            else:
+                return f"{secs/86400:.1f}d ago", secs
+        except:
+            return "?", 999999
+    session_list = []
+    needs_attention_count = 0
+    for s in sessions:
+        status_icon = {
+            "running": "●",
+            "completed": "✓",
+            "failed": "✗",
+            "killed": "○",
+            "pending": "◌",
+        }.get(s.status.value, "?")
+        updated_str, updated_secs = time_ago(s.updated_at)
+        # Get last assistant message
+        messages = manager.get_messages(s.id)
+        last_message = ""
+        for msg in reversed(messages):
+            if msg.role == "assistant":
+                last_message = msg.content.replace("\n", " ")
+                break
+        # Flag sessions that need attention:
+        # - Recently completed (< 60s)
+        # - Failed
+        is_recent = updated_secs < 60
+        needs_attention = (
+            (s.status == SessionStatus.COMPLETED and is_recent) or
+            s.status == SessionStatus.FAILED
+        )
+        if needs_attention:
+            needs_attention_count += 1
+        session_list.append({
+            "id": s.short_id,
+            "full_id": s.id,
+            "status": f"{status_icon} {s.status.value}",
+            "is_running": s.status == SessionStatus.RUNNING,
+            "task": _truncate(s.task, 50),
+            "turn": s.turn,
+            "updated": updated_str,
+            "updated_secs": int(updated_secs),
+            "last_message": _truncate(last_message, 100) if last_message else "(no response yet)",
+            "needs_attention": needs_attention,
+            "tokens": _get_total_tokens(s),
+        })
+    # Summary counts
+    running_count = sum(1 for s in sessions if s.status == SessionStatus.RUNNING)
+    completed_count = sum(1 for s in sessions if s.status == SessionStatus.COMPLETED)
+    return {
+        "success": True,
+        "sessions": session_list,
+        "count": len(sessions),
+        "running": running_count,
+        "completed": completed_count,
+        "needs_attention": needs_attention_count,
+        "filter": status or "all",
+        "hint": "Sessions with needs_attention=True have new responses to review" if needs_attention_count else None,
+    }