PyPI - zwarm - Versions diffs - 3.0.1__py3-none-any.whl → 3.2.0__py3-none-any.whl - Mend

zwarm 3.0.1py3-none-any.whl → 3.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

zwarm/cli/interactive.py +749 -0
zwarm/cli/main.py +207 -854
zwarm/cli/pilot.py +293 -151
zwarm/core/__init__.py +20 -0
zwarm/core/checkpoints.py +216 -0
zwarm/core/costs.py +199 -0
zwarm/tools/delegation.py +18 -161
{zwarm-3.0.1.dist-info → zwarm-3.2.0.dist-info}/METADATA +2 -1
{zwarm-3.0.1.dist-info → zwarm-3.2.0.dist-info}/RECORD +11 -8
{zwarm-3.0.1.dist-info → zwarm-3.2.0.dist-info}/WHEEL +0 -0
{zwarm-3.0.1.dist-info → zwarm-3.2.0.dist-info}/entry_points.txt +0 -0

zwarm/core/checkpoints.py ADDED Viewed

@@ -0,0 +1,216 @@
+"""
+Checkpoint primitives for state management.
+Provides time-travel capability by recording snapshots of state at key points.
+Used by pilot for turn-by-turn checkpointing, and potentially by other
+interfaces that need state restoration.
+Topology reminder:
+    orchestrator → pilot → interactive → CodexSessionManager
+These primitives sit at the core layer, usable by any interface above.
+"""
+from __future__ import annotations
+import copy
+from dataclasses import dataclass, field
+from datetime import datetime
+from typing import Any
+@dataclass
+class Checkpoint:
+    """
+    A snapshot of state at a specific point in time.
+    Attributes:
+        checkpoint_id: Unique identifier (e.g., turn number)
+        label: Human-readable label (e.g., "T1", "T2")
+        description: What action led to this state
+        state: The actual state snapshot (deep-copied)
+        timestamp: When checkpoint was created
+        metadata: Optional extra data
+    """
+    checkpoint_id: int
+    label: str
+    description: str
+    state: dict[str, Any]
+    timestamp: str = field(default_factory=lambda: datetime.now().isoformat())
+    metadata: dict[str, Any] = field(default_factory=dict)
+@dataclass
+class CheckpointManager:
+    """
+    Manages checkpoints and time travel.
+    Maintains a list of checkpoints and a current position. Supports:
+    - Recording new checkpoints
+    - Jumping to any previous checkpoint
+    - Branching (going back and continuing creates new timeline)
+    - History inspection
+    Usage:
+        mgr = CheckpointManager()
+        # Record state after each action
+        mgr.record(description="Added auth", state={"messages": [...], ...})
+        mgr.record(description="Fixed bug", state={"messages": [...], ...})
+        # Jump back
+        cp = mgr.goto(1)  # Go to first checkpoint
+        restored_state = cp.state
+        # Continue from there (branches off)
+        mgr.record(description="Different path", state={...})
+    """
+    checkpoints: list[Checkpoint] = field(default_factory=list)
+    current_index: int = -1  # -1 = root (before any checkpoints)
+    next_id: int = 1
+    label_prefix: str = "T"  # Labels will be T1, T2, etc.
+    def record(
+        self,
+        description: str,
+        state: dict[str, Any],
+        metadata: dict[str, Any] | None = None,
+    ) -> Checkpoint:
+        """
+        Record a new checkpoint.
+        If not at the end of history (i.e., we've gone back), this creates
+        a branch - future checkpoints are discarded.
+        Args:
+            description: What action led to this state
+            state: State to snapshot (will be deep-copied)
+            metadata: Optional extra data
+        Returns:
+            The created checkpoint
+        """
+        checkpoint = Checkpoint(
+            checkpoint_id=self.next_id,
+            label=f"{self.label_prefix}{self.next_id}",
+            description=description,
+            state=copy.deepcopy(state),
+            metadata=metadata or {},
+        )
+        # If we're not at the end, we're branching - truncate future
+        if self.current_index < len(self.checkpoints) - 1:
+            self.checkpoints = self.checkpoints[:self.current_index + 1]
+        self.checkpoints.append(checkpoint)
+        self.current_index = len(self.checkpoints) - 1
+        self.next_id += 1
+        return checkpoint
+    def goto(self, checkpoint_id: int) -> Checkpoint | None:
+        """
+        Jump to a specific checkpoint.
+        Args:
+            checkpoint_id: The checkpoint ID to jump to (0 = root)
+        Returns:
+            The checkpoint, or None if not found (or root)
+        """
+        if checkpoint_id == 0:
+            # Root state - before any checkpoints
+            self.current_index = -1
+            return None
+        for i, cp in enumerate(self.checkpoints):
+            if cp.checkpoint_id == checkpoint_id:
+                self.current_index = i
+                return cp
+        return None  # Not found
+    def goto_label(self, label: str) -> Checkpoint | None:
+        """
+        Jump to a checkpoint by label (e.g., "T1", "root").
+        Args:
+            label: The label to find
+        Returns:
+            The checkpoint, or None if not found
+        """
+        if label.lower() == "root":
+            self.current_index = -1
+            return None
+        for i, cp in enumerate(self.checkpoints):
+            if cp.label == label:
+                self.current_index = i
+                return cp
+        return None
+    def current(self) -> Checkpoint | None:
+        """Get the current checkpoint, or None if at root."""
+        if self.current_index < 0 or self.current_index >= len(self.checkpoints):
+            return None
+        return self.checkpoints[self.current_index]
+    def current_state(self) -> dict[str, Any] | None:
+        """Get the current state, or None if at root."""
+        cp = self.current()
+        return copy.deepcopy(cp.state) if cp else None
+    def history(
+        self,
+        limit: int | None = None,
+        include_state: bool = False,
+    ) -> list[dict[str, Any]]:
+        """
+        Get history entries for display.
+        Args:
+            limit: Max entries to return (most recent)
+            include_state: Whether to include full state in entries
+        Returns:
+            List of history entries with checkpoint info
+        """
+        entries = []
+        for i, cp in enumerate(self.checkpoints):
+            entry = {
+                "checkpoint_id": cp.checkpoint_id,
+                "label": cp.label,
+                "description": cp.description,
+                "timestamp": cp.timestamp,
+                "is_current": i == self.current_index,
+                "metadata": cp.metadata,
+            }
+            if include_state:
+                entry["state"] = cp.state
+            entries.append(entry)
+        if limit:
+            entries = entries[-limit:]
+        return entries
+    def label_for(self, checkpoint_id: int) -> str:
+        """Get label for a checkpoint ID."""
+        if checkpoint_id == 0:
+            return "root"
+        return f"{self.label_prefix}{checkpoint_id}"
+    def __len__(self) -> int:
+        """Number of checkpoints."""
+        return len(self.checkpoints)
+    def is_at_root(self) -> bool:
+        """Whether we're at root (before any checkpoints)."""
+        return self.current_index < 0
+    def is_at_end(self) -> bool:
+        """Whether we're at the most recent checkpoint."""
+        return self.current_index == len(self.checkpoints) - 1

zwarm/core/costs.py ADDED Viewed

@@ -0,0 +1,199 @@
+"""
+Token cost estimation for LLM models.
+Pricing data is hardcoded and may become stale. Last updated: 2026-01.
+Sources:
+- https://www.helicone.ai/llm-cost/provider/openai/model/gpt-5.1-codex
+- https://pricepertoken.com/pricing-page/model/openai-codex-mini
+"""
+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Any
+@dataclass
+class ModelPricing:
+    """Pricing for a model in $ per million tokens."""
+    input_per_million: float
+    output_per_million: float
+    cached_input_per_million: float | None = None  # Some models have cached input discount
+    def estimate_cost(
+        self,
+        input_tokens: int,
+        output_tokens: int,
+        cached_tokens: int = 0,
+    ) -> float:
+        """
+        Estimate cost in dollars.
+        Args:
+            input_tokens: Number of input tokens
+            output_tokens: Number of output tokens
+            cached_tokens: Number of cached input tokens (if applicable)
+        Returns:
+            Estimated cost in USD
+        """
+        input_cost = (input_tokens / 1_000_000) * self.input_per_million
+        output_cost = (output_tokens / 1_000_000) * self.output_per_million
+        cached_cost = 0.0
+        if cached_tokens and self.cached_input_per_million:
+            cached_cost = (cached_tokens / 1_000_000) * self.cached_input_per_million
+        return input_cost + output_cost + cached_cost
+# Model pricing table ($ per million tokens)
+# Last updated: 2026-01
+MODEL_PRICING: dict[str, ModelPricing] = {
+    # OpenAI Codex models
+    "gpt-5.1-codex": ModelPricing(
+        input_per_million=1.25,
+        output_per_million=10.00,
+        cached_input_per_million=0.125,  # 90% discount for cached
+    ),
+    "gpt-5.1-codex-mini": ModelPricing(
+        input_per_million=0.25,
+        output_per_million=2.00,
+        cached_input_per_million=0.025,
+    ),
+    "gpt-5.1-codex-max": ModelPricing(
+        input_per_million=1.25,
+        output_per_million=10.00,
+        cached_input_per_million=0.125,
+    ),
+    # GPT-5 base models (for reference)
+    "gpt-5": ModelPricing(
+        input_per_million=1.25,
+        output_per_million=10.00,
+    ),
+    "gpt-5-mini": ModelPricing(
+        input_per_million=0.25,
+        output_per_million=2.00,
+    ),
+    # Claude models (Anthropic)
+    "claude-sonnet-4-20250514": ModelPricing(
+        input_per_million=3.00,
+        output_per_million=15.00,
+    ),
+    "claude-opus-4-20250514": ModelPricing(
+        input_per_million=15.00,
+        output_per_million=75.00,
+    ),
+    "claude-3-5-sonnet-20241022": ModelPricing(
+        input_per_million=3.00,
+        output_per_million=15.00,
+    ),
+}
+# Aliases for common model names
+MODEL_ALIASES: dict[str, str] = {
+    "codex": "gpt-5.1-codex",
+    "codex-mini": "gpt-5.1-codex-mini",
+    "codex-max": "gpt-5.1-codex-max",
+    "gpt5": "gpt-5",
+    "gpt5-mini": "gpt-5-mini",
+    "sonnet": "claude-sonnet-4-20250514",
+    "opus": "claude-opus-4-20250514",
+}
+def get_pricing(model: str) -> ModelPricing | None:
+    """
+    Get pricing for a model.
+    Args:
+        model: Model name or alias
+    Returns:
+        ModelPricing or None if unknown
+    """
+    # Check aliases first
+    resolved = MODEL_ALIASES.get(model.lower(), model)
+    # Exact match
+    if resolved in MODEL_PRICING:
+        return MODEL_PRICING[resolved]
+    # Try lowercase
+    if resolved.lower() in MODEL_PRICING:
+        return MODEL_PRICING[resolved.lower()]
+    # Try prefix matching (e.g., "gpt-5.1-codex-mini-2026-01" -> "gpt-5.1-codex-mini")
+    for known_model in MODEL_PRICING:
+        if resolved.lower().startswith(known_model.lower()):
+            return MODEL_PRICING[known_model]
+    return None
+def estimate_cost(
+    model: str,
+    input_tokens: int,
+    output_tokens: int,
+    cached_tokens: int = 0,
+) -> float | None:
+    """
+    Estimate cost for a model run.
+    Args:
+        model: Model name
+        input_tokens: Number of input tokens
+        output_tokens: Number of output tokens
+        cached_tokens: Number of cached input tokens
+    Returns:
+        Cost in USD, or None if model pricing unknown
+    """
+    pricing = get_pricing(model)
+    if pricing is None:
+        return None
+    return pricing.estimate_cost(input_tokens, output_tokens, cached_tokens)
+def format_cost(cost: float | None) -> str:
+    """Format cost as a human-readable string."""
+    if cost is None:
+        return "?"
+    if cost < 0.01:
+        return f"${cost:.4f}"
+    elif cost < 1.00:
+        return f"${cost:.3f}"
+    else:
+        return f"${cost:.2f}"
+def estimate_session_cost(
+    model: str,
+    token_usage: dict[str, Any],
+) -> dict[str, Any]:
+    """
+    Estimate cost for a session given its token usage.
+    Args:
+        model: Model used
+        token_usage: Dict with input_tokens, output_tokens, etc.
+    Returns:
+        Dict with cost info: {cost, cost_formatted, pricing_known}
+    """
+    input_tokens = token_usage.get("input_tokens", 0)
+    output_tokens = token_usage.get("output_tokens", 0)
+    cached_tokens = token_usage.get("cached_tokens", 0)
+    cost = estimate_cost(model, input_tokens, output_tokens, cached_tokens)
+    return {
+        "cost": cost,
+        "cost_formatted": format_cost(cost),
+        "pricing_known": cost is not None,
+        "model": model,
+        "input_tokens": input_tokens,
+        "output_tokens": output_tokens,
+    }

zwarm/tools/delegation.py CHANGED Viewed

@@ -19,7 +19,7 @@ from __future__ import annotations
 import time
 from pathlib import Path
-from typing import TYPE_CHECKING, Any, Literal
+from typing import TYPE_CHECKING, Any
 from wbal.helper import weaveTool
@@ -44,37 +44,6 @@ def _get_session_manager(orchestrator: "Orchestrator"):
     return orchestrator._session_manager
-def _wait_for_completion(manager, session_id: str, timeout: float = 300.0, poll_interval: float = 1.0) -> bool:
-    """
-    Wait for a session to complete.
-    Args:
-        manager: CodexSessionManager
-        session_id: Session to wait for
-        timeout: Max seconds to wait
-        poll_interval: Seconds between polls
-    Returns:
-        True if completed, False if timed out
-    """
-    from zwarm.sessions import SessionStatus
-    start = time.time()
-    while time.time() - start < timeout:
-        # get_session() auto-updates status based on output completion markers
-        session = manager.get_session(session_id)
-        if not session:
-            return False
-        # Check status (not is_running - PID check is unreliable due to reuse)
-        if session.status in (SessionStatus.COMPLETED, SessionStatus.FAILED, SessionStatus.KILLED):
-            return True
-        time.sleep(poll_interval)
-    return False
 def _truncate(text: str, max_len: int = 200) -> str:
     """Truncate text with ellipsis."""
     if len(text) <= max_len:
@@ -158,7 +127,6 @@ def _validate_working_dir(
 def delegate(
     self: "Orchestrator",
     task: str,
-    mode: Literal["sync", "async"] = "async",
     model: str | None = None,
     working_dir: str | None = None,
 ) -> dict[str, Any]:
@@ -166,11 +134,9 @@ def delegate(
     Delegate work to a Codex agent.
     This spawns a codex session - the exact same way `zwarm interactive` does.
+    All sessions run async - you get a session_id immediately and poll for results.
-    **NOTE: All sessions run async.** The mode parameter is ignored - sessions
-    always return immediately. Use sleep() + peek_session() to poll for completion.
-    Async workflow pattern:
+    Workflow pattern:
         1. delegate(task="Add logout button") -> session_id
         2. sleep(30) -> give it time
         3. peek_session(session_id) -> check if done
@@ -179,7 +145,6 @@ def delegate(
     Args:
         task: Clear description of what to do. Be specific about requirements.
-        mode: IGNORED - always async. (Legacy parameter, will be removed.)
         model: Model override (default: gpt-5.1-codex-mini).
         working_dir: Directory for codex to work in (default: orchestrator's dir).
@@ -191,9 +156,6 @@ def delegate(
         sleep(30)
         peek_session(session_id)  # Check progress
     """
-    # Force async mode - sync is deprecated
-    # TODO: Remove sync codepath entirely (see STATE.md)
-    mode = "async"
     # Validate working directory
     effective_dir, dir_error = _validate_working_dir(
         working_dir,
@@ -228,74 +190,15 @@ def delegate(
         adapter="codex",
     )
-    # For sync mode, wait for completion
-    if mode == "sync":
-        completed = _wait_for_completion(
-            manager,
-            session.id,
-            timeout=self.config.executor.timeout or 300.0,
-        )
-        # Refresh session to get updated status and messages
-        session = manager.get_session(session.id)
-        if not completed:
-            return {
-                "success": False,
-                "session_id": session.id,
-                "status": "timeout",
-                "error": "Session timed out waiting for codex to complete",
-                "hint": "Use check_session() to monitor progress, or use async mode for long tasks",
-            }
-        # Get the response from messages
-        response_text = ""
-        messages = manager.get_messages(session.id)
-        for msg in messages:
-            if msg.role == "assistant":
-                response_text = msg.content
-                break  # Take first assistant message
-        # Build log path for debugging
-        log_path = str(manager._output_path(session.id, session.turn))
-        # Check if session failed
-        from zwarm.sessions import SessionStatus
-        if session.status == SessionStatus.FAILED:
-            return {
-                "success": False,
-                "session": _format_session_header(session),
-                "session_id": session.id,
-                "status": "failed",
-                "task": _truncate(task, 100),
-                "error": session.error or "Unknown error",
-                "response": response_text or "(no response captured)",
-                "tokens": _get_total_tokens(session),
-                "log_file": log_path,
-                "hint": "Check log_file for raw codex output. Use bash('cat <log_file>') to inspect.",
-            }
-        return {
-            "success": True,
-            "session": _format_session_header(session),
-            "session_id": session.id,
-            "status": session.status.value,
-            "task": _truncate(task, 100),
-            "response": response_text or "(no response captured)",
-            "tokens": _get_total_tokens(session),
-            "log_file": log_path,
-            "hint": "Use converse(session_id, message) to send follow-up messages",
-        }
-    else:
-        # Async mode - return immediately
-        return {
-            "success": True,
-            "session": _format_session_header(session),
-            "session_id": session.id,
-            "status": "running",
-            "task": _truncate(task, 100),
-            "hint": "Use check_session(session_id) to monitor progress",
-        }
+    # Return immediately - session runs in background
+    return {
+        "success": True,
+        "session": _format_session_header(session),
+        "session_id": session.id,
+        "status": "running",
+        "task": _truncate(task, 100),
+        "hint": "Use sleep() then check_session(session_id) to monitor progress",
+    }
 @weaveTool
@@ -303,21 +206,17 @@ def converse(
     self: "Orchestrator",
     session_id: str,
     message: str,
-    wait: bool = False,
 ) -> dict[str, Any]:
     """
     Continue a conversation with a codex session.
     This injects a follow-up message into the session, providing the
     conversation history as context. Like chatting with a developer.
-    **NOTE: Always runs async.** The wait parameter is ignored - messages
-    are sent and return immediately. Use sleep() + check_session() to poll.
+    Returns immediately - use sleep() + check_session() to poll for the response.
     Args:
         session_id: The session to continue (from delegate() result).
         message: Your next message to codex.
-        wait: IGNORED - always async. (Legacy parameter, will be removed.)
     Returns:
         {session_id, turn, status: "running"}
@@ -327,10 +226,6 @@ def converse(
         sleep(30)
         check_session(session_id)  # Get response
     """
-    # Force async mode - sync is deprecated
-    # TODO: Remove sync codepath entirely (see STATE.md)
-    wait = False
     manager = _get_session_manager(self)
     # Get current session
@@ -371,53 +266,15 @@ def converse(
             "session_id": session_id,
         }
-    if not wait:
-        # Async mode - return immediately
-        return {
-            "success": True,
-            "session": _format_session_header(updated_session),
-            "session_id": session_id,
-            "turn": updated_session.turn,
-            "status": "running",
-            "you_said": _truncate(message, 100),
-            "hint": "Use check_session(session_id) to see the response when ready",
-        }
-    # Sync mode - wait for completion
-    completed = _wait_for_completion(
-        manager,
-        session_id,
-        timeout=self.config.executor.timeout or 300.0,
-    )
-    # Refresh session
-    session = manager.get_session(session_id)
-    if not completed:
-        return {
-            "success": False,
-            "session_id": session_id,
-            "status": "timeout",
-            "error": "Session timed out waiting for response",
-            "hint": "Use check_session() to monitor progress",
-        }
-    # Get the response (last assistant message)
-    response_text = ""
-    messages = manager.get_messages(session_id)
-    for msg in reversed(messages):
-        if msg.role == "assistant":
-            response_text = msg.content
-            break
+    # Return immediately - session runs in background
     return {
         "success": True,
-        "session": _format_session_header(session),
+        "session": _format_session_header(updated_session),
         "session_id": session_id,
-        "turn": session.turn,
+        "turn": updated_session.turn,
+        "status": "running",
         "you_said": _truncate(message, 100),
-        "response": response_text or "(no response captured)",
-        "tokens": _get_total_tokens(session),
+        "hint": "Use sleep() then check_session(session_id) to see the response",
     }

{zwarm-3.0.1.dist-info → zwarm-3.2.0.dist-info}/METADATA RENAMED Viewed

@@ -1,8 +1,9 @@
 Metadata-Version: 2.4
 Name: zwarm
-Version: 3.0.1
+Version: 3.2.0
 Summary: Multi-Agent CLI Orchestration Research Platform
 Requires-Python: <3.14,>=3.13
+Requires-Dist: prompt-toolkit>=3.0.52
 Requires-Dist: python-dotenv>=1.0.0
 Requires-Dist: pyyaml>=6.0
 Requires-Dist: rich>=13.0.0

zwarm 3.0.1__py3-none-any.whl → 3.2.0__py3-none-any.whl

zwarm 3.0.1py3-none-any.whl → 3.2.0py3-none-any.whl