PyPI - zwarm - Versions diffs - 3.2.1__py3-none-any.whl → 3.6.0__py3-none-any.whl - Mend

zwarm 3.2.1py3-none-any.whl → 3.6.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

zwarm/cli/interactive.py +346 -30
zwarm/cli/main.py +221 -90
zwarm/cli/pilot.py +107 -9
zwarm/core/config.py +26 -9
zwarm/core/costs.py +55 -183
zwarm/core/registry.py +329 -0
zwarm/core/test_config.py +2 -3
zwarm/orchestrator.py +17 -43
zwarm/sessions/__init__.py +48 -9
zwarm/sessions/base.py +501 -0
zwarm/sessions/claude.py +481 -0
zwarm/sessions/manager.py +233 -486
zwarm/tools/delegation.py +93 -31
{zwarm-3.2.1.dist-info → zwarm-3.6.0.dist-info}/METADATA +73 -21
{zwarm-3.2.1.dist-info → zwarm-3.6.0.dist-info}/RECORD +17 -21
zwarm/adapters/__init__.py +0 -21
zwarm/adapters/base.py +0 -109
zwarm/adapters/claude_code.py +0 -357
zwarm/adapters/codex_mcp.py +0 -1262
zwarm/adapters/registry.py +0 -69
zwarm/adapters/test_codex_mcp.py +0 -274
zwarm/adapters/test_registry.py +0 -68
{zwarm-3.2.1.dist-info → zwarm-3.6.0.dist-info}/WHEEL +0 -0
{zwarm-3.2.1.dist-info → zwarm-3.6.0.dist-info}/entry_points.txt +0 -0

zwarm/cli/pilot.py CHANGED Viewed

@@ -81,14 +81,23 @@ class ChoogingSpinner:
 # Context window sizes for different models (in tokens)
+# These are for the ORCHESTRATOR LLM, not the executors
 MODEL_CONTEXT_WINDOWS = {
+    # OpenAI models
     "gpt-5.1-codex": 200_000,
     "gpt-5.1-codex-mini": 200_000,
     "gpt-5.1-codex-max": 400_000,
     "gpt-5": 200_000,
     "gpt-5-mini": 200_000,
-    "claude-sonnet-4": 200_000,
-    "claude-opus-4": 200_000,
+    "o3": 200_000,
+    "o3-mini": 200_000,
+    # Claude models (if used as orchestrator)
+    "claude-sonnet": 200_000,
+    "claude-opus": 200_000,
+    "claude-haiku": 200_000,
+    "sonnet": 200_000,
+    "opus": 200_000,
+    "haiku": 200_000,
     # Fallback
     "default": 128_000,
 }
@@ -186,10 +195,12 @@ def build_pilot_orchestrator(
     lm_class = lm_map.get(lm_choice, GPT5LargeVerbose)
     lm = lm_class()
-    # Load configuration
+    # Load configuration from working_dir (not cwd!)
+    # This ensures config.toml and .env are loaded from the project being worked on
     config = load_config(
         config_path=config_path,
         overrides=overrides,
+        working_dir=working_dir,
     )
     # Resolve working directory
@@ -592,12 +603,33 @@ def execute_step_with_events(
     """
     had_message = False
+    # Update environment with current progress before perceive
+    # This ensures the observation has fresh step/token counts
+    if hasattr(orchestrator, "env") and hasattr(orchestrator.env, "update_progress"):
+        total_tokens = getattr(orchestrator, "_total_tokens", 0)
+        executor_usage = orchestrator.get_executor_usage() if hasattr(orchestrator, "get_executor_usage") else {}
+        orchestrator.env.update_progress(
+            step_count=getattr(orchestrator, "_step_count", 0),
+            max_steps=getattr(orchestrator, "maxSteps", 50),
+            total_tokens=total_tokens,
+            executor_tokens=executor_usage.get("total_tokens", 0),
+        )
     # Execute perceive (updates environment observation)
     orchestrator.perceive()
     # Execute invoke (calls LLM)
     response = orchestrator.invoke()
+    # Track cumulative token usage from the API response
+    # (This mirrors what step() does in orchestrator.py)
+    if hasattr(orchestrator, "_last_response") and orchestrator._last_response:
+        last_response = orchestrator._last_response
+        if hasattr(last_response, "usage") and last_response.usage:
+            usage = last_response.usage
+            tokens_this_call = getattr(usage, "total_tokens", 0)
+            orchestrator._total_tokens = getattr(orchestrator, "_total_tokens", 0) + tokens_this_call
     # Extract and render events from response
     if response:
         events = extract_events_from_response(response)
@@ -647,7 +679,7 @@ def execute_step_with_events(
 def run_until_response(
     orchestrator: Any,
     renderer: EventRenderer,
-    max_steps: int = 20,
+    max_steps: int = 60,
 ) -> List[tuple]:
     """
     Run the orchestrator until it produces a message response.
@@ -655,7 +687,7 @@ def run_until_response(
     Keeps stepping while the agent only produces tool calls.
     Stops when:
     - Agent produces a text message (returns to user)
-    - Max steps reached
+    - Max steps reached (configurable via orchestrator.max_steps_per_turn)
     - Stop condition triggered
     This is wrapped as a weave.op to group all child calls per turn.
@@ -663,7 +695,7 @@ def run_until_response(
     Args:
         orchestrator: The orchestrator instance
         renderer: Event renderer for output
-        max_steps: Safety limit on steps per turn
+        max_steps: Safety limit on steps per turn (default: 60)
     Returns:
         All tool results from the turn
@@ -701,6 +733,9 @@ def run_until_response(
             if not results:
                 break
+        # Show session status at end of turn (if there are any sessions)
+        render_session_status(orchestrator, renderer)
         return all_results
     return _run_turn()
@@ -722,7 +757,12 @@ def print_help(renderer: EventRenderer) -> None:
         "  :goto <turn|root>    Jump to a prior turn (e.g., :goto T1)",
         "  :sessions            Show executor sessions",
         "  :reasoning [on|off]  Toggle reasoning display",
-        "  :quit / :exit        Exit the pilot",
+        "  :save                Save state (for later resume)",
+        "  :quit / :exit        Exit the pilot (auto-saves)",
+        "",
+        "Resume:",
+        "  State is auto-saved after each turn. To resume a session:",
+        "  $ zwarm pilot --resume --instance <instance_id>",
         "",
         "Multiline input:",
         '  Start with """ and end with """ to enter multiple lines.',
@@ -756,6 +796,38 @@ def get_sessions_snapshot(orchestrator: Any) -> Dict[str, Any]:
     return {"sessions": []}
+def render_session_status(orchestrator: Any, renderer: EventRenderer) -> None:
+    """
+    Render a compact session status line if there are active sessions.
+    Shows: "Sessions: 2 running, 1 done, 0 failed"
+    Only displays if there are any sessions.
+    """
+    if not hasattr(orchestrator, "_session_manager"):
+        return
+    sessions = orchestrator._session_manager.list_sessions()
+    if not sessions:
+        return
+    running = sum(1 for s in sessions if s.status.value == "running")
+    completed = sum(1 for s in sessions if s.status.value == "completed")
+    failed = sum(1 for s in sessions if s.status.value == "failed")
+    # Build status line with colors
+    parts = []
+    if running > 0:
+        parts.append(f"[cyan]{running} running[/]")
+    if completed > 0:
+        parts.append(f"[green]{completed} done[/]")
+    if failed > 0:
+        parts.append(f"[red]{failed} failed[/]")
+    if parts:
+        status_line = ", ".join(parts)
+        console.print(f"[dim]Sessions:[/] {status_line}")
 def run_pilot(
     orchestrator: Any,
     *,
@@ -812,7 +884,8 @@ def _run_pilot_repl(
         })
         renderer.reset_turn()
-        results = run_until_response(orchestrator, renderer)
+        max_steps = getattr(orchestrator.config.orchestrator, "max_steps_per_turn", 60)
+        results = run_until_response(orchestrator, renderer, max_steps=max_steps)
         # Record checkpoint
         state.record(
@@ -893,6 +966,10 @@ def _run_pilot_repl(
             # :quit / :exit
             if cmd in ("quit", "exit", "q"):
+                # Save state before exiting
+                if hasattr(orchestrator, "save_state"):
+                    orchestrator.save_state()
+                    renderer.status("[dim]State saved.[/]")
                 renderer.status("Goodbye!")
                 break
@@ -1085,6 +1162,20 @@ def _run_pilot_repl(
                 renderer.status(f"Reasoning display: {current}")
                 continue
+            # :save
+            if cmd == "save":
+                if hasattr(orchestrator, "save_state"):
+                    orchestrator.save_state()
+                    instance_id = getattr(orchestrator, "instance_id", None)
+                    if instance_id:
+                        renderer.status(f"[green]✓[/] State saved (instance: {instance_id[:8]})")
+                        renderer.status(f"  [dim]Resume with: zwarm pilot --resume --instance {instance_id[:8]}[/]")
+                    else:
+                        renderer.status("[green]✓[/] State saved")
+                else:
+                    renderer.error("State saving not available")
+                continue
             # Unknown command
             renderer.error(f"Unknown command: {cmd}")
             renderer.status("Type :help for available commands.")
@@ -1101,8 +1192,9 @@ def _run_pilot_repl(
         # Execute steps until agent responds with a message
         renderer.reset_turn()
+        max_steps = getattr(orchestrator.config.orchestrator, "max_steps_per_turn", 60)
         try:
-            results = run_until_response(orchestrator, renderer)
+            results = run_until_response(orchestrator, renderer, max_steps=max_steps)
         except Exception as e:
             renderer.error(f"Step failed: {e}")
             # Remove the user message on failure
@@ -1124,6 +1216,10 @@ def _run_pilot_repl(
             },
         )
+        # Save state for resume capability
+        if hasattr(orchestrator, "save_state"):
+            orchestrator.save_state()
         # Show turn info
         cp = state.current()
         if cp:
@@ -1139,4 +1235,6 @@ def _run_pilot_repl(
         if hasattr(orchestrator, "stopCondition") and orchestrator.stopCondition:
             renderer.status("")
             renderer.status("Orchestrator signaled completion.")
+            if hasattr(orchestrator, "save_state"):
+                orchestrator.save_state()
             break

zwarm/core/config.py CHANGED Viewed

@@ -37,6 +37,7 @@ class ExecutorConfig:
     sandbox: str = "workspace-write"  # read-only | workspace-write | danger-full-access
     timeout: int = 3600
     reasoning_effort: str | None = "high"  # low | medium | high (default to high for compatibility)
+    # Note: web_search is always enabled via .codex/config.toml (set up by `zwarm init`)
 @dataclass
@@ -59,8 +60,8 @@ class OrchestratorConfig:
     prompt: str | None = None  # path to prompt yaml
     tools: list[str] = field(default_factory=lambda: ["delegate", "converse", "check_session", "end_session", "bash"])
     max_steps: int = 50
+    max_steps_per_turn: int = 60  # Max tool-call steps before returning to user (pilot mode)
     parallel_delegations: int = 4
-    sync_first: bool = True  # prefer sync mode by default
     compaction: CompactionConfig = field(default_factory=CompactionConfig)
     # Directory restrictions for agent delegations
@@ -172,8 +173,8 @@ class ZwarmConfig:
                 "prompt": self.orchestrator.prompt,
                 "tools": self.orchestrator.tools,
                 "max_steps": self.orchestrator.max_steps,
+                "max_steps_per_turn": self.orchestrator.max_steps_per_turn,
                 "parallel_delegations": self.orchestrator.parallel_delegations,
-                "sync_first": self.orchestrator.sync_first,
                 "compaction": {
                     "enabled": self.orchestrator.compaction.enabled,
                     "max_tokens": self.orchestrator.compaction.max_tokens,
@@ -195,15 +196,16 @@ class ZwarmConfig:
         }
-def load_env(path: Path | None = None) -> None:
+def load_env(path: Path | None = None, base_dir: Path | None = None) -> None:
     """Load .env file if it exists."""
     if path is None:
-        path = Path.cwd() / ".env"
+        base = base_dir or Path.cwd()
+        path = base / ".env"
     if path.exists():
         load_dotenv(path)
-def load_toml_config(path: Path | None = None) -> dict[str, Any]:
+def load_toml_config(path: Path | None = None, base_dir: Path | None = None) -> dict[str, Any]:
     """
     Load config.toml file.
@@ -211,11 +213,16 @@ def load_toml_config(path: Path | None = None) -> dict[str, Any]:
     1. Explicit path (if provided)
     2. .zwarm/config.toml (new standard location)
     3. config.toml (legacy location for backwards compat)
+    Args:
+        path: Explicit path to config.toml
+        base_dir: Base directory to search in (defaults to cwd)
     """
     if path is None:
+        base = base_dir or Path.cwd()
         # Try new location first
-        new_path = Path.cwd() / ".zwarm" / "config.toml"
-        legacy_path = Path.cwd() / "config.toml"
+        new_path = base / ".zwarm" / "config.toml"
+        legacy_path = base / "config.toml"
         if new_path.exists():
             path = new_path
         elif legacy_path.exists():
@@ -306,6 +313,7 @@ def load_config(
     toml_path: Path | None = None,
     env_path: Path | None = None,
     overrides: list[str] | None = None,
+    working_dir: Path | None = None,
 ) -> ZwarmConfig:
     """
     Load configuration with full precedence chain:
@@ -314,15 +322,24 @@ def load_config(
     3. YAML config file (if provided)
     4. CLI overrides (--set key=value)
     5. Environment variables (for secrets)
+    Args:
+        config_path: Path to YAML config file
+        toml_path: Explicit path to config.toml
+        env_path: Explicit path to .env file
+        overrides: CLI overrides (--set key=value)
+        working_dir: Working directory to search for config files (defaults to cwd).
+                    This is important when using --working-dir flag to ensure
+                    config is loaded from the project directory, not invoke directory.
     """
     # Load .env first (for secrets)
-    load_env(env_path)
+    load_env(env_path, base_dir=working_dir)
     # Start with defaults
     config_dict: dict[str, Any] = {}
     # Layer in config.toml
-    toml_config = load_toml_config(toml_path)
+    toml_config = load_toml_config(toml_path, base_dir=working_dir)
     if toml_config:
         config_dict = deep_merge(config_dict, toml_config)

zwarm/core/costs.py CHANGED Viewed

@@ -1,109 +1,42 @@
 """
 Token cost estimation for LLM models.
-Pricing data is hardcoded and may become stale. Last updated: 2026-01.
+This module re-exports from the centralized model registry.
+For adding new models, edit: zwarm/core/registry.py
-Sources:
-- https://www.helicone.ai/llm-cost/provider/openai/model/gpt-5.1-codex
-- https://pricepertoken.com/pricing-page/model/openai-codex-mini
+Backwards-compatible API preserved for existing code.
 """
 from __future__ import annotations
-from dataclasses import dataclass
-from typing import Any
-@dataclass
-class ModelPricing:
-    """Pricing for a model in $ per million tokens."""
-    input_per_million: float
-    output_per_million: float
-    cached_input_per_million: float | None = None  # Some models have cached input discount
-    def estimate_cost(
-        self,
-        input_tokens: int,
-        output_tokens: int,
-        cached_tokens: int = 0,
-    ) -> float:
-        """
-        Estimate cost in dollars.
-        Args:
-            input_tokens: Number of input tokens
-            output_tokens: Number of output tokens
-            cached_tokens: Number of cached input tokens (if applicable)
-        Returns:
-            Estimated cost in USD
-        """
-        input_cost = (input_tokens / 1_000_000) * self.input_per_million
-        output_cost = (output_tokens / 1_000_000) * self.output_per_million
-        cached_cost = 0.0
-        if cached_tokens and self.cached_input_per_million:
-            cached_cost = (cached_tokens / 1_000_000) * self.cached_input_per_million
-        return input_cost + output_cost + cached_cost
-# Model pricing table ($ per million tokens)
-# Last updated: 2026-01
-MODEL_PRICING: dict[str, ModelPricing] = {
-    # OpenAI Codex models
-    "gpt-5.1-codex": ModelPricing(
-        input_per_million=1.25,
-        output_per_million=10.00,
-        cached_input_per_million=0.125,  # 90% discount for cached
-    ),
-    "gpt-5.1-codex-mini": ModelPricing(
-        input_per_million=0.25,
-        output_per_million=2.00,
-        cached_input_per_million=0.025,
-    ),
-    "gpt-5.1-codex-max": ModelPricing(
-        input_per_million=1.25,
-        output_per_million=10.00,
-        cached_input_per_million=0.125,
-    ),
-    # GPT-5 base models (for reference)
-    "gpt-5": ModelPricing(
-        input_per_million=1.25,
-        output_per_million=10.00,
-    ),
-    "gpt-5-mini": ModelPricing(
-        input_per_million=0.25,
-        output_per_million=2.00,
-    ),
-    # Claude models (Anthropic)
-    "claude-sonnet-4-20250514": ModelPricing(
-        input_per_million=3.00,
-        output_per_million=15.00,
-    ),
-    "claude-opus-4-20250514": ModelPricing(
-        input_per_million=15.00,
-        output_per_million=75.00,
-    ),
-    "claude-3-5-sonnet-20241022": ModelPricing(
-        input_per_million=3.00,
-        output_per_million=15.00,
-    ),
-}
-# Aliases for common model names
-MODEL_ALIASES: dict[str, str] = {
-    "codex": "gpt-5.1-codex",
-    "codex-mini": "gpt-5.1-codex-mini",
-    "codex-max": "gpt-5.1-codex-max",
-    "gpt5": "gpt-5",
-    "gpt5-mini": "gpt-5-mini",
-    "sonnet": "claude-sonnet-4-20250514",
-    "opus": "claude-opus-4-20250514",
-}
-def get_pricing(model: str) -> ModelPricing | None:
+# Re-export everything from registry for backwards compatibility
+from zwarm.core.registry import (
+    ModelInfo,
+    MODELS,
+    resolve_model,
+    get_adapter_for_model,
+    get_default_model,
+    list_models,
+    list_adapters,
+    get_models_help_text,
+    get_models_table_data,
+    estimate_cost,
+    format_cost,
+    estimate_session_cost,
+)
+# Backwards compatibility alias
+ModelPricing = ModelInfo
+# Legacy aliases for backwards compatibility
+MODEL_PRICING = {m.canonical: m for m in MODELS}
+MODEL_ALIASES = {}
+for m in MODELS:
+    for alias in m.aliases:
+        MODEL_ALIASES[alias] = m.canonical
+def get_pricing(model: str) -> ModelInfo | None:
     """
     Get pricing for a model.
@@ -111,89 +44,28 @@ def get_pricing(model: str) -> ModelPricing | None:
         model: Model name or alias
     Returns:
-        ModelPricing or None if unknown
-    """
-    # Check aliases first
-    resolved = MODEL_ALIASES.get(model.lower(), model)
-    # Exact match
-    if resolved in MODEL_PRICING:
-        return MODEL_PRICING[resolved]
-    # Try lowercase
-    if resolved.lower() in MODEL_PRICING:
-        return MODEL_PRICING[resolved.lower()]
-    # Try prefix matching (e.g., "gpt-5.1-codex-mini-2026-01" -> "gpt-5.1-codex-mini")
-    for known_model in MODEL_PRICING:
-        if resolved.lower().startswith(known_model.lower()):
-            return MODEL_PRICING[known_model]
-    return None
-def estimate_cost(
-    model: str,
-    input_tokens: int,
-    output_tokens: int,
-    cached_tokens: int = 0,
-) -> float | None:
+        ModelInfo or None if unknown
     """
-    Estimate cost for a model run.
-    Args:
-        model: Model name
-        input_tokens: Number of input tokens
-        output_tokens: Number of output tokens
-        cached_tokens: Number of cached input tokens
-    Returns:
-        Cost in USD, or None if model pricing unknown
-    """
-    pricing = get_pricing(model)
-    if pricing is None:
-        return None
-    return pricing.estimate_cost(input_tokens, output_tokens, cached_tokens)
-def format_cost(cost: float | None) -> str:
-    """Format cost as a human-readable string."""
-    if cost is None:
-        return "?"
-    if cost < 0.01:
-        return f"${cost:.4f}"
-    elif cost < 1.00:
-        return f"${cost:.3f}"
-    else:
-        return f"${cost:.2f}"
-def estimate_session_cost(
-    model: str,
-    token_usage: dict[str, Any],
-) -> dict[str, Any]:
-    """
-    Estimate cost for a session given its token usage.
-    Args:
-        model: Model used
-        token_usage: Dict with input_tokens, output_tokens, etc.
-    Returns:
-        Dict with cost info: {cost, cost_formatted, pricing_known}
-    """
-    input_tokens = token_usage.get("input_tokens", 0)
-    output_tokens = token_usage.get("output_tokens", 0)
-    cached_tokens = token_usage.get("cached_tokens", 0)
-    cost = estimate_cost(model, input_tokens, output_tokens, cached_tokens)
-    return {
-        "cost": cost,
-        "cost_formatted": format_cost(cost),
-        "pricing_known": cost is not None,
-        "model": model,
-        "input_tokens": input_tokens,
-        "output_tokens": output_tokens,
-    }
+    return resolve_model(model)
+__all__ = [
+    # New API
+    "ModelInfo",
+    "MODELS",
+    "resolve_model",
+    "get_adapter_for_model",
+    "get_default_model",
+    "list_models",
+    "list_adapters",
+    "get_models_help_text",
+    "get_models_table_data",
+    "estimate_cost",
+    "format_cost",
+    "estimate_session_cost",
+    # Legacy API
+    "MODEL_PRICING",
+    "MODEL_ALIASES",
+    "ModelPricing",
+    "get_pricing",
+]

zwarm 3.2.1__py3-none-any.whl → 3.6.0__py3-none-any.whl

zwarm 3.2.1py3-none-any.whl → 3.6.0py3-none-any.whl