PyPI - zwarm - Versions diffs - 3.4.0__py3-none-any.whl → 3.7.0__py3-none-any.whl - Mend

zwarm 3.4.0py3-none-any.whl → 3.7.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

zwarm/cli/interactive.py +420 -52
zwarm/cli/main.py +127 -14
zwarm/cli/pilot.py +52 -4
zwarm/core/costs.py +55 -183
zwarm/core/environment.py +55 -1
zwarm/core/registry.py +329 -0
zwarm/orchestrator.py +64 -12
zwarm/sessions/__init__.py +48 -9
zwarm/sessions/base.py +501 -0
zwarm/sessions/claude.py +481 -0
zwarm/sessions/manager.py +85 -458
zwarm/tools/delegation.py +126 -61
{zwarm-3.4.0.dist-info → zwarm-3.7.0.dist-info}/METADATA +70 -21
{zwarm-3.4.0.dist-info → zwarm-3.7.0.dist-info}/RECORD +16 -13
{zwarm-3.4.0.dist-info → zwarm-3.7.0.dist-info}/WHEEL +0 -0
{zwarm-3.4.0.dist-info → zwarm-3.7.0.dist-info}/entry_points.txt +0 -0

zwarm/core/registry.py ADDED Viewed

@@ -0,0 +1,329 @@
+"""
+Model Registry - Centralized LLM model definitions for zwarm.
+This registry defines all supported models with:
+- Canonical names and aliases
+- Adapter mapping (which CLI handles the model)
+- Pricing information
+Add new models here and they'll automatically appear in:
+- `zwarm interactive` help and `models` command
+- Cost estimation
+- Adapter auto-detection from model name
+"""
+from __future__ import annotations
+from dataclasses import dataclass, field
+from typing import Any
+@dataclass
+class ModelInfo:
+    """Complete information about an LLM model."""
+    # Identity
+    canonical: str  # Full model name (e.g., "gpt-5.1-codex-mini")
+    adapter: str  # "codex" or "claude"
+    aliases: list[str] = field(default_factory=list)  # Short names
+    # Pricing ($ per million tokens)
+    input_per_million: float = 0.0
+    output_per_million: float = 0.0
+    cached_input_per_million: float | None = None
+    # Metadata
+    description: str = ""
+    is_default: bool = False  # Default model for this adapter
+    def estimate_cost(
+        self,
+        input_tokens: int,
+        output_tokens: int,
+        cached_tokens: int = 0,
+    ) -> float:
+        """Estimate cost in dollars."""
+        input_cost = (input_tokens / 1_000_000) * self.input_per_million
+        output_cost = (output_tokens / 1_000_000) * self.output_per_million
+        cached_cost = 0.0
+        if cached_tokens and self.cached_input_per_million:
+            cached_cost = (cached_tokens / 1_000_000) * self.cached_input_per_million
+        return input_cost + output_cost + cached_cost
+# =============================================================================
+# Model Registry - ADD NEW MODELS HERE
+# =============================================================================
+MODELS: list[ModelInfo] = [
+    # -------------------------------------------------------------------------
+    # OpenAI Codex Models (via `codex` CLI)
+    # -------------------------------------------------------------------------
+    ModelInfo(
+        canonical="gpt-5.1-codex-mini",
+        adapter="codex",
+        aliases=["codex-mini", "mini"],
+        input_per_million=0.25,
+        output_per_million=2.00,
+        cached_input_per_million=0.025,
+        description="Fast, cost-effective coding model",
+        is_default=True,
+    ),
+    ModelInfo(
+        canonical="gpt-5.1-codex",
+        adapter="codex",
+        aliases=["codex", "codex-full"],
+        input_per_million=1.25,
+        output_per_million=10.00,
+        cached_input_per_million=0.125,
+        description="Full Codex model with extended reasoning",
+    ),
+    ModelInfo(
+        canonical="gpt-5.1-codex-max",
+        adapter="codex",
+        aliases=["codex-max", "max"],
+        input_per_million=1.25,
+        output_per_million=10.00,
+        cached_input_per_million=0.125,
+        description="Maximum context Codex model",
+    ),
+    # -------------------------------------------------------------------------
+    # Anthropic Claude Models (via `claude` CLI)
+    # -------------------------------------------------------------------------
+    ModelInfo(
+        canonical="sonnet",
+        adapter="claude",
+        aliases=["claude-sonnet", "claude-4-sonnet"],
+        input_per_million=3.00,
+        output_per_million=15.00,
+        description="Balanced Claude model for most tasks",
+        is_default=True,
+    ),
+    ModelInfo(
+        canonical="opus",
+        adapter="claude",
+        aliases=["claude-opus", "claude-4-opus"],
+        input_per_million=15.00,
+        output_per_million=75.00,
+        description="Most capable Claude model",
+    ),
+    ModelInfo(
+        canonical="haiku",
+        adapter="claude",
+        aliases=["claude-haiku", "claude-4-haiku"],
+        input_per_million=0.25,
+        output_per_million=1.25,
+        description="Fast, lightweight Claude model",
+    ),
+]
+# =============================================================================
+# Registry Lookups
+# =============================================================================
+def _build_lookup_tables() -> tuple[dict[str, ModelInfo], dict[str, ModelInfo]]:
+    """Build lookup tables for fast model resolution."""
+    by_canonical: dict[str, ModelInfo] = {}
+    by_alias: dict[str, ModelInfo] = {}
+    for model in MODELS:
+        by_canonical[model.canonical.lower()] = model
+        by_alias[model.canonical.lower()] = model
+        for alias in model.aliases:
+            by_alias[alias.lower()] = model
+    return by_canonical, by_alias
+_BY_CANONICAL, _BY_ALIAS = _build_lookup_tables()
+def resolve_model(name: str) -> ModelInfo | None:
+    """
+    Resolve a model name or alias to its ModelInfo.
+    Args:
+        name: Model name, alias, or partial match
+    Returns:
+        ModelInfo or None if not found
+    """
+    name_lower = name.lower()
+    # Exact match on alias or canonical
+    if name_lower in _BY_ALIAS:
+        return _BY_ALIAS[name_lower]
+    # Prefix match (e.g., "gpt-5.1-codex-mini-2026-01" -> "gpt-5.1-codex-mini")
+    for canonical, model in _BY_CANONICAL.items():
+        if name_lower.startswith(canonical):
+            return model
+    return None
+def get_adapter_for_model(name: str) -> str | None:
+    """
+    Get the adapter name for a model.
+    Args:
+        name: Model name or alias
+    Returns:
+        Adapter name ("codex" or "claude") or None if unknown
+    """
+    model = resolve_model(name)
+    return model.adapter if model else None
+def get_default_model(adapter: str) -> str | None:
+    """
+    Get the default model for an adapter.
+    Args:
+        adapter: Adapter name ("codex" or "claude")
+    Returns:
+        Default model canonical name or None
+    """
+    for model in MODELS:
+        if model.adapter == adapter and model.is_default:
+            return model.canonical
+    return None
+def list_models(adapter: str | None = None) -> list[ModelInfo]:
+    """
+    List available models.
+    Args:
+        adapter: Filter by adapter, or None for all
+    Returns:
+        List of ModelInfo objects
+    """
+    if adapter:
+        return [m for m in MODELS if m.adapter == adapter]
+    return MODELS.copy()
+def list_adapters() -> list[str]:
+    """Get list of unique adapter names."""
+    return sorted(set(m.adapter for m in MODELS))
+def get_models_help_text() -> str:
+    """
+    Generate help text showing all available models.
+    Returns formatted string for display in help messages.
+    """
+    lines = ["", "Available models:"]
+    for adapter in list_adapters():
+        lines.append(f"\n  {adapter.upper()}:")
+        for model in list_models(adapter):
+            default_marker = " *" if model.is_default else ""
+            aliases = ", ".join(model.aliases) if model.aliases else ""
+            alias_str = f" ({aliases})" if aliases else ""
+            lines.append(f"    {model.canonical}{alias_str}{default_marker}")
+    lines.append("\n  * = default for adapter")
+    return "\n".join(lines)
+def get_models_table_data() -> list[dict[str, Any]]:
+    """
+    Get model data formatted for table display.
+    Returns list of dicts with keys: adapter, model, aliases, default, price, description
+    """
+    data = []
+    for model in MODELS:
+        data.append({
+            "adapter": model.adapter,
+            "model": model.canonical,
+            "aliases": ", ".join(model.aliases),
+            "default": model.is_default,
+            "input_price": model.input_per_million,
+            "output_price": model.output_per_million,
+            "description": model.description,
+        })
+    return data
+# =============================================================================
+# Cost Estimation
+# =============================================================================
+def estimate_cost(
+    model: str,
+    input_tokens: int,
+    output_tokens: int,
+    cached_tokens: int = 0,
+) -> float | None:
+    """
+    Estimate cost for a model run.
+    Args:
+        model: Model name or alias
+        input_tokens: Number of input tokens
+        output_tokens: Number of output tokens
+        cached_tokens: Number of cached input tokens
+    Returns:
+        Cost in USD, or None if model unknown
+    """
+    model_info = resolve_model(model)
+    if model_info is None:
+        return None
+    return model_info.estimate_cost(input_tokens, output_tokens, cached_tokens)
+def format_cost(cost: float | None) -> str:
+    """Format cost as a human-readable string."""
+    if cost is None:
+        return "?"
+    if cost < 0.01:
+        return f"${cost:.4f}"
+    elif cost < 1.00:
+        return f"${cost:.3f}"
+    else:
+        return f"${cost:.2f}"
+def estimate_session_cost(
+    model: str,
+    token_usage: dict[str, Any],
+) -> dict[str, Any]:
+    """
+    Estimate cost for a session given its token usage.
+    Args:
+        model: Model used
+        token_usage: Dict with input_tokens, output_tokens, etc.
+    Returns:
+        Dict with cost info: {cost, cost_formatted, pricing_known, ...}
+    """
+    input_tokens = token_usage.get("input_tokens", 0)
+    output_tokens = token_usage.get("output_tokens", 0)
+    cached_tokens = token_usage.get("cached_tokens", 0)
+    cost = estimate_cost(model, input_tokens, output_tokens, cached_tokens)
+    return {
+        "cost": cost,
+        "cost_formatted": format_cost(cost),
+        "pricing_known": cost is not None,
+        "model": model,
+        "input_tokens": input_tokens,
+        "output_tokens": output_tokens,
+    }

zwarm/orchestrator.py CHANGED Viewed

@@ -293,13 +293,60 @@ Review what was accomplished in the previous session and delegate new tasks as n
     def perceive(self) -> None:
         """
-        Override perceive to refresh environment observation each step.
+        Override perceive to properly inject system prompt and environment observation.
-        The base YamlAgent only adds env.observe() on step 0. We need to
-        update it each step to show current progress, sessions, etc.
+        Fixes over base YamlAgent:
+        1. Always injects system prompt on step 0, even if messages isn't empty
+           (pilot mode adds user messages before perceive runs)
+        2. Only adds "Task: " message if there's actually a task (skips for pilot mode)
+        3. Refreshes environment observation each step
+        Note: self.messages can contain both dict messages AND OpenAI response objects
+        (ResponseReasoningItem, ResponseMessageItem, etc.), so we must check isinstance().
         """
-        # Let base class do initial setup
-        super().perceive()
+        from datetime import datetime
+        def _is_dict_msg(msg, role: str | None = None, content_check: str | None = None) -> bool:
+            """Check if msg is a dict with optional role/content matching."""
+            if not isinstance(msg, dict):
+                return False
+            if role and msg.get("role") != role:
+                return False
+            if content_check and content_check not in msg.get("content", ""):
+                return False
+            return True
+        # On step 0, ensure system prompt is present
+        if self._step_count == 0:
+            # Check if system prompt already exists (avoid duplicates on resume)
+            has_system_prompt = False
+            if self.system_prompt:
+                prompt_snippet = self.system_prompt[:100]
+                has_system_prompt = any(
+                    _is_dict_msg(msg, role="system", content_check=prompt_snippet)
+                    for msg in self.messages
+                )
+            if not has_system_prompt and self.system_prompt:
+                today = datetime.now().strftime("%Y-%m-%d")
+                # Insert at beginning to ensure it's first
+                self.messages.insert(0, {
+                    "role": "system",
+                    "content": f"{self.system_prompt}\n\nToday's date: {today}",
+                })
+            # Add task message ONLY if we have a task (skip for pilot mode where task is empty)
+            task = getattr(self.env, "task", "")
+            if task:
+                # Check if Task message already exists (avoid duplicates)
+                has_task_msg = any(
+                    isinstance(msg, dict)
+                    and msg.get("role") == "user"
+                    and msg.get("content", "").startswith("Task: ")
+                    for msg in self.messages
+                )
+                if not has_task_msg:
+                    self.messages.append({"role": "user", "content": f"Task: {task}"})
         # Update environment observation
         env_obs = (self.env.observe() or "").strip()
@@ -308,15 +355,20 @@ Review what was accomplished in the previous session and delegate new tasks as n
         # Find and update existing env observation, or append new one
         # Look for a system message containing our markers
-        env_marker = "## Progress"  # Our env observation has this
+        # Note: pilot mode uses "## Active Sessions", full mode uses "## Progress"
+        env_markers = ["## Progress", "## Active Sessions", "Working dir:"]
         for i, msg in enumerate(self.messages):
-            if msg.get("role") == "system" and env_marker in msg.get("content", ""):
-                # Update in place
-                self.messages[i]["content"] = env_obs
-                return
-        # Not found - append as new system message (shouldn't happen after step 0)
+            if not isinstance(msg, dict):
+                continue
+            if msg.get("role") == "system":
+                content = msg.get("content", "")
+                if any(marker in content for marker in env_markers):
+                    # Update in place
+                    self.messages[i]["content"] = env_obs
+                    return
+        # Not found - append as new system message
         self.messages.append({"role": "system", "content": env_obs})
     @weave.op()

zwarm/sessions/__init__.py CHANGED Viewed

@@ -1,26 +1,65 @@
 """
-Codex Session Manager.
+Session Manager - Background process management for executor agents.
-A standalone session manager for running Codex agents in the background.
-Similar to Sculptor/Claude parallel tools but for Codex.
+Supports multiple executor adapters:
+- Codex (CodexSessionManager) - OpenAI's Codex CLI
+- Claude (ClaudeSessionManager) - Anthropic's Claude Code CLI
 Features:
-- Start codex exec tasks in background processes
+- Start executor tasks in background processes
 - Monitor status and view message history
 - Inject follow-up messages (continue conversations)
 - Kill running sessions
+- Unified interface via BaseSessionManager
 """
-from zwarm.sessions.manager import (
-    CodexSession,
-    CodexSessionManager,
+from zwarm.sessions.base import (
+    BaseSessionManager,
+    CodexSession,  # Alias for Session (backwards compat)
+    Session,
     SessionMessage,
     SessionStatus,
 )
+from zwarm.sessions.manager import CodexSessionManager
+# Available adapters
+AVAILABLE_ADAPTERS = ["codex", "claude"]
 __all__ = [
-    "CodexSession",
-    "CodexSessionManager",
+    # Base classes
+    "BaseSessionManager",
+    "Session",
     "SessionMessage",
     "SessionStatus",
+    # Backwards compatibility
+    "CodexSession",
+    # Adapters
+    "CodexSessionManager",
+    # Registry
+    "AVAILABLE_ADAPTERS",
+    # Factory
+    "get_session_manager",
 ]
+def get_session_manager(adapter: str, state_dir: str = ".zwarm") -> BaseSessionManager:
+    """
+    Factory function to get a session manager for the given adapter.
+    Args:
+        adapter: Adapter name ("codex" or "claude")
+        state_dir: State directory path
+    Returns:
+        Session manager instance
+    Raises:
+        ValueError: If adapter is not recognized
+    """
+    if adapter == "codex":
+        return CodexSessionManager(state_dir)
+    elif adapter == "claude":
+        from zwarm.sessions.claude import ClaudeSessionManager
+        return ClaudeSessionManager(state_dir)
+    else:
+        raise ValueError(f"Unknown adapter: {adapter}. Available: {AVAILABLE_ADAPTERS}")

zwarm 3.4.0__py3-none-any.whl → 3.7.0__py3-none-any.whl

zwarm 3.4.0py3-none-any.whl → 3.7.0py3-none-any.whl