PyPI - zwarm - Versions diffs - 0.1.0__py3-none-any.whl → 1.0.0__py3-none-any.whl - Mend

zwarm 0.1.0py3-none-any.whl → 1.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

zwarm/adapters/claude_code.py +55 -3
zwarm/adapters/codex_mcp.py +433 -122
zwarm/adapters/test_codex_mcp.py +26 -26
zwarm/cli/main.py +464 -3
zwarm/core/compact.py +312 -0
zwarm/core/config.py +51 -9
zwarm/core/environment.py +104 -33
zwarm/core/models.py +16 -0
zwarm/core/test_compact.py +266 -0
zwarm/orchestrator.py +222 -39
zwarm/prompts/orchestrator.py +128 -146
zwarm/test_orchestrator_watchers.py +23 -0
zwarm/tools/delegation.py +23 -4
zwarm/watchers/builtin.py +90 -4
zwarm/watchers/manager.py +46 -8
zwarm/watchers/test_watchers.py +42 -0
{zwarm-0.1.0.dist-info → zwarm-1.0.0.dist-info}/METADATA +162 -36
zwarm-1.0.0.dist-info/RECORD +33 -0
zwarm-0.1.0.dist-info/RECORD +0 -30
{zwarm-0.1.0.dist-info → zwarm-1.0.0.dist-info}/WHEEL +0 -0
{zwarm-0.1.0.dist-info → zwarm-1.0.0.dist-info}/entry_points.txt +0 -0

zwarm/core/compact.py ADDED Viewed

@@ -0,0 +1,312 @@
+"""
+Message compaction for context window management.
+Safely prunes old messages while preserving:
+- System prompt and initial user task
+- Tool call/response pairs (never orphaned)
+- Recent conversation context
+"""
+from __future__ import annotations
+import logging
+from dataclasses import dataclass
+from typing import Any
+logger = logging.getLogger(__name__)
+@dataclass
+class CompactionResult:
+    """Result of a compaction operation."""
+    messages: list[dict[str, Any]]
+    removed_count: int
+    original_count: int
+    preserved_reason: str | None = None
+    @property
+    def was_compacted(self) -> bool:
+        return self.removed_count > 0
+def estimate_tokens(messages: list[dict[str, Any]]) -> int:
+    """
+    Rough token estimate for messages.
+    Uses ~4 chars per token as a simple heuristic.
+    This is intentionally conservative.
+    """
+    total_chars = 0
+    for msg in messages:
+        content = msg.get("content", "")
+        if isinstance(content, str):
+            total_chars += len(content)
+        elif isinstance(content, list):
+            # Anthropic-style content blocks
+            for block in content:
+                if isinstance(block, dict):
+                    total_chars += len(str(block.get("text", "")))
+                    total_chars += len(str(block.get("input", "")))
+                elif isinstance(block, str):
+                    total_chars += len(block)
+        # Tool calls add tokens too
+        tool_calls = msg.get("tool_calls", [])
+        for tc in tool_calls:
+            total_chars += len(str(tc.get("function", {}).get("arguments", "")))
+    return total_chars // 4
+def find_tool_groups(messages: list[dict[str, Any]]) -> list[tuple[int, int]]:
+    """
+    Find message index ranges that form tool call groups.
+    A tool call group is:
+    - An assistant message with tool_calls
+    - All following tool/user response messages until the next assistant message
+    This handles both OpenAI format (role="tool") and Anthropic format
+    (role="user" with tool_result content).
+    Returns list of (start_idx, end_idx) tuples (inclusive).
+    """
+    groups = []
+    i = 0
+    while i < len(messages):
+        msg = messages[i]
+        # Check for tool calls in assistant message
+        has_tool_calls = False
+        # OpenAI format: tool_calls field
+        if msg.get("role") == "assistant" and msg.get("tool_calls"):
+            has_tool_calls = True
+        # Anthropic format: content blocks with type="tool_use"
+        if msg.get("role") == "assistant":
+            content = msg.get("content", [])
+            if isinstance(content, list):
+                for block in content:
+                    if isinstance(block, dict) and block.get("type") == "tool_use":
+                        has_tool_calls = True
+                        break
+        if has_tool_calls:
+            start = i
+            j = i + 1
+            # Find all following tool responses
+            while j < len(messages):
+                next_msg = messages[j]
+                role = next_msg.get("role", "")
+                # OpenAI format: tool role
+                if role == "tool":
+                    j += 1
+                    continue
+                # Anthropic format: user message with tool_result
+                if role == "user":
+                    content = next_msg.get("content", [])
+                    if isinstance(content, list):
+                        has_tool_result = any(
+                            isinstance(b, dict) and b.get("type") == "tool_result"
+                            for b in content
+                        )
+                        if has_tool_result:
+                            j += 1
+                            continue
+                # Not a tool response, stop here
+                break
+            groups.append((start, j - 1))
+            i = j
+        else:
+            i += 1
+    return groups
+def compact_messages(
+    messages: list[dict[str, Any]],
+    keep_first_n: int = 2,
+    keep_last_n: int = 10,
+    max_tokens: int | None = None,
+    target_token_pct: float = 0.7,
+) -> CompactionResult:
+    """
+    Compact message history by removing old messages (LRU-style).
+    Preserves:
+    - First N messages (system prompt, user task)
+    - Last N messages (recent context)
+    - Tool call/response pairs are NEVER split
+    Args:
+        messages: The message list to compact
+        keep_first_n: Number of messages to always keep at the start
+        keep_last_n: Number of messages to always keep at the end
+        max_tokens: If set, compact when estimated tokens exceed this
+        target_token_pct: Target percentage of max_tokens after compaction
+    Returns:
+        CompactionResult with the compacted messages and stats
+    """
+    original_count = len(messages)
+    # Nothing to compact if we have few messages
+    if len(messages) <= keep_first_n + keep_last_n:
+        return CompactionResult(
+            messages=messages,
+            removed_count=0,
+            original_count=original_count,
+            preserved_reason="Too few messages to compact",
+        )
+    # Check if compaction is needed based on tokens
+    if max_tokens:
+        current_tokens = estimate_tokens(messages)
+        if current_tokens < max_tokens:
+            return CompactionResult(
+                messages=messages,
+                removed_count=0,
+                original_count=original_count,
+                preserved_reason=f"Under token limit ({current_tokens}/{max_tokens})",
+            )
+    # Find tool call groups (these must stay together)
+    tool_groups = find_tool_groups(messages)
+    # Build a set of "protected" indices (in tool groups)
+    protected_indices: set[int] = set()
+    for start, end in tool_groups:
+        for idx in range(start, end + 1):
+            protected_indices.add(idx)
+    # Determine which messages are in the "middle" (candidates for removal)
+    # Middle = not in first N, not in last N
+    middle_start = keep_first_n
+    middle_end = len(messages) - keep_last_n
+    if middle_start >= middle_end:
+        return CompactionResult(
+            messages=messages,
+            removed_count=0,
+            original_count=original_count,
+            preserved_reason="No middle messages to remove",
+        )
+    # Find removable message ranges in the middle
+    # We remove from the oldest (lowest index) first
+    removable_ranges: list[tuple[int, int]] = []
+    i = middle_start
+    while i < middle_end:
+        # Check if this index is in a tool group
+        in_group = False
+        for start, end in tool_groups:
+            if start <= i <= end:
+                # This message is part of a tool group
+                # Check if the ENTIRE group is in the middle
+                if start >= middle_start and end < middle_end:
+                    # Entire group is removable as a unit
+                    removable_ranges.append((start, end))
+                    i = end + 1
+                    in_group = True
+                    break
+                else:
+                    # Group spans protected region, skip it entirely
+                    i = end + 1
+                    in_group = True
+                    break
+        if not in_group:
+            # Single message, can be removed individually
+            removable_ranges.append((i, i))
+            i += 1
+    # Deduplicate and sort ranges
+    removable_ranges = sorted(set(removable_ranges), key=lambda x: x[0])
+    if not removable_ranges:
+        return CompactionResult(
+            messages=messages,
+            removed_count=0,
+            original_count=original_count,
+            preserved_reason="All middle messages are in protected tool groups",
+        )
+    # Determine how many to remove
+    # Start by removing the oldest half of removable ranges
+    if max_tokens:
+        # Token-based: remove until under target
+        target_tokens = int(max_tokens * target_token_pct)
+        indices_to_remove: set[int] = set()
+        for start, end in removable_ranges:
+            for idx in range(start, end + 1):
+                indices_to_remove.add(idx)
+            # Check if we've removed enough
+            remaining = [m for i, m in enumerate(messages) if i not in indices_to_remove]
+            if estimate_tokens(remaining) <= target_tokens:
+                break
+    else:
+        # Count-based: remove oldest half of middle
+        total_removable = sum(end - start + 1 for start, end in removable_ranges)
+        target_remove = total_removable // 2
+        indices_to_remove = set()
+        removed = 0
+        for start, end in removable_ranges:
+            if removed >= target_remove:
+                break
+            for idx in range(start, end + 1):
+                indices_to_remove.add(idx)
+                removed += 1
+    # Build new message list
+    new_messages = [m for i, m in enumerate(messages) if i not in indices_to_remove]
+    # Add a compaction marker so the model knows history was truncated
+    if indices_to_remove and len(new_messages) > keep_first_n:
+        # Insert marker after the preserved first messages
+        marker = {
+            "role": "system",
+            "content": (
+                f"[Context compacted: {len(indices_to_remove)} older messages removed "
+                f"to manage context window. Conversation continues below.]"
+            ),
+        }
+        new_messages.insert(keep_first_n, marker)
+    logger.info(
+        f"Compacted messages: {original_count} -> {len(new_messages)} "
+        f"(removed {len(indices_to_remove)})"
+    )
+    return CompactionResult(
+        messages=new_messages,
+        removed_count=len(indices_to_remove),
+        original_count=original_count,
+    )
+def should_compact(
+    messages: list[dict[str, Any]],
+    max_tokens: int,
+    threshold_pct: float = 0.85,
+) -> bool:
+    """
+    Check if messages should be compacted.
+    Returns True if estimated tokens exceed threshold percentage of max.
+    """
+    current = estimate_tokens(messages)
+    threshold = int(max_tokens * threshold_pct)
+    return current >= threshold

zwarm/core/config.py CHANGED Viewed

@@ -38,6 +38,18 @@ class ExecutorConfig:
     timeout: int = 3600
+@dataclass
+class CompactionConfig:
+    """Configuration for context window compaction."""
+    enabled: bool = True
+    max_tokens: int = 100000  # Trigger compaction when estimated tokens exceed this
+    threshold_pct: float = 0.85  # Compact when at this % of max_tokens
+    target_pct: float = 0.7  # Target this % after compaction
+    keep_first_n: int = 2  # Always keep first N messages (system + task)
+    keep_last_n: int = 10  # Always keep last N messages (recent context)
 @dataclass
 class OrchestratorConfig:
     """Configuration for the orchestrator."""
@@ -48,6 +60,7 @@ class OrchestratorConfig:
     max_steps: int = 50
     parallel_delegations: int = 4
     sync_first: bool = True  # prefer sync mode by default
+    compaction: CompactionConfig = field(default_factory=CompactionConfig)
 @dataclass
@@ -88,19 +101,40 @@ class ZwarmConfig:
         orchestrator_data = data.get("orchestrator", {})
         watchers_data = data.get("watchers", {})
-        # Parse watchers config
-        watchers_config = WatchersConfig(
-            enabled=watchers_data.get("enabled", True),
-            watchers=[
-                WatcherConfigItem(**w) if isinstance(w, dict) else w
-                for w in watchers_data.get("watchers", [])
-            ] or WatchersConfig().watchers,
-        )
+        # Parse compaction config from orchestrator
+        compaction_data = orchestrator_data.pop("compaction", {}) if orchestrator_data else {}
+        compaction_config = CompactionConfig(**compaction_data) if compaction_data else CompactionConfig()
+        # Parse watchers config - handle both list shorthand and dict format
+        if isinstance(watchers_data, list):
+            # Shorthand: watchers: [progress, budget, scope]
+            watchers_config = WatchersConfig(
+                enabled=True,
+                watchers=[
+                    WatcherConfigItem(name=w) if isinstance(w, str) else WatcherConfigItem(**w)
+                    for w in watchers_data
+                ],
+            )
+        else:
+            # Full format: watchers: {enabled: true, watchers: [...]}
+            watchers_config = WatchersConfig(
+                enabled=watchers_data.get("enabled", True),
+                watchers=[
+                    WatcherConfigItem(name=w) if isinstance(w, str) else WatcherConfigItem(**w)
+                    for w in watchers_data.get("watchers", [])
+                ] or WatchersConfig().watchers,
+            )
+        # Build orchestrator config with nested compaction
+        if orchestrator_data:
+            orchestrator_config = OrchestratorConfig(**orchestrator_data, compaction=compaction_config)
+        else:
+            orchestrator_config = OrchestratorConfig(compaction=compaction_config)
         return cls(
             weave=WeaveConfig(**weave_data) if weave_data else WeaveConfig(),
             executor=ExecutorConfig(**executor_data) if executor_data else ExecutorConfig(),
-            orchestrator=OrchestratorConfig(**orchestrator_data) if orchestrator_data else OrchestratorConfig(),
+            orchestrator=orchestrator_config,
             watchers=watchers_config,
             state_dir=data.get("state_dir", ".zwarm"),
         )
@@ -125,6 +159,14 @@ class ZwarmConfig:
                 "max_steps": self.orchestrator.max_steps,
                 "parallel_delegations": self.orchestrator.parallel_delegations,
                 "sync_first": self.orchestrator.sync_first,
+                "compaction": {
+                    "enabled": self.orchestrator.compaction.enabled,
+                    "max_tokens": self.orchestrator.compaction.max_tokens,
+                    "threshold_pct": self.orchestrator.compaction.threshold_pct,
+                    "target_pct": self.orchestrator.compaction.target_pct,
+                    "keep_first_n": self.orchestrator.compaction.keep_first_n,
+                    "keep_last_n": self.orchestrator.compaction.keep_last_n,
+                },
             },
             "watchers": {
                 "enabled": self.watchers.enabled,

zwarm/core/environment.py CHANGED Viewed

@@ -4,14 +4,15 @@ OrchestratorEnv: A lean environment for the zwarm orchestrator.
 Unlike ChatEnv, this environment:
 - Has no notes/observations (we use StateManager instead)
 - Has no chat() tool (orchestrator communicates via output_handler)
-- Shows active sessions in observe() for context
+- Shows active sessions, step progress, and budget in observe()
 """
 from __future__ import annotations
 from pathlib import Path
-from typing import Any, Callable, TYPE_CHECKING
+from typing import TYPE_CHECKING, Any, Callable
+from pydantic import PrivateAttr
 from wbal.environment import Environment
 if TYPE_CHECKING:
@@ -26,6 +27,8 @@ class OrchestratorEnv(Environment):
     - Task context
     - Working directory info
     - Active session visibility
+    - Step progress tracking
+    - Budget/resource monitoring
     - Output handler for messages
     """
@@ -34,50 +37,118 @@ class OrchestratorEnv(Environment):
     output_handler: Callable[[str], None] = lambda x: print(x)
     # Session tracking (set by orchestrator)
-    _sessions: dict[str, "ConversationSession"] | None = None
+    _sessions: dict[str, "ConversationSession"] | None = PrivateAttr(default=None)
+    # Progress tracking (updated by orchestrator each step)
+    _step_count: int = PrivateAttr(default=0)
+    _max_steps: int = PrivateAttr(default=50)
+    _total_tokens: int = PrivateAttr(default=0)
+    _executor_tokens: int = PrivateAttr(default=0)  # Executor token usage
+    # Budget config (set from config)
+    _budget_max_sessions: int | None = PrivateAttr(default=None)
     def set_sessions(self, sessions: dict[str, "ConversationSession"]) -> None:
         """Set the sessions dict for observe() visibility."""
         self._sessions = sessions
+    def update_progress(
+        self,
+        step_count: int,
+        max_steps: int,
+        total_tokens: int = 0,
+        executor_tokens: int = 0,
+    ) -> None:
+        """Update progress tracking (called by orchestrator each step)."""
+        self._step_count = step_count
+        self._max_steps = max_steps
+        self._total_tokens = total_tokens
+        self._executor_tokens = executor_tokens
+    def set_budget(self, max_sessions: int | None = None) -> None:
+        """Set budget limits from config."""
+        self._budget_max_sessions = max_sessions
     def observe(self) -> str:
         """
         Return observable state for the orchestrator.
         Shows:
-        - Current task
-        - Working directory
+        - Progress (steps, tokens)
+        - Session summary
         - Active sessions with their status
+        - Working directory
+        Note: Task is NOT included here as it's already in the user message.
         """
         parts = []
-        # Task
-        if self.task:
-            parts.append(f"## Current Task\n{self.task}")
-        # Working directory
-        parts.append(f"## Working Directory\n{self.working_dir.absolute()}")
-        # Active sessions
-        if self._sessions:
-            session_lines = []
-            for sid, session in self._sessions.items():
-                status_icon = {
-                    "active": "[ACTIVE]",
-                    "completed": "[DONE]",
-                    "failed": "[FAILED]",
-                }.get(session.status.value, "[?]")
-                mode_icon = "sync" if session.mode.value == "sync" else "async"
-                task_preview = session.task_description[:60] + "..." if len(session.task_description) > 60 else session.task_description
-                session_lines.append(
-                    f"  - {sid[:8]}... {status_icon} ({mode_icon}, {session.adapter}) {task_preview}"
-                )
-            if session_lines:
-                parts.append("## Sessions\n" + "\n".join(session_lines))
-            else:
-                parts.append("## Sessions\n  (none)")
+        # Progress bar and stats
+        progress_pct = (
+            (self._step_count / self._max_steps * 100) if self._max_steps > 0 else 0
+        )
+        bar_len = 20
+        filled = (
+            int(bar_len * self._step_count / self._max_steps)
+            if self._max_steps > 0
+            else 0
+        )
+        bar = "█" * filled + "░" * (bar_len - filled)
+        progress_lines = [
+            f"Steps: [{bar}] {self._step_count}/{self._max_steps} ({progress_pct:.0f}%)",
+        ]
+        if self._total_tokens > 0 or self._executor_tokens > 0:
+            token_parts = []
+            if self._total_tokens > 0:
+                token_parts.append(f"orchestrator: ~{self._total_tokens:,}")
+            if self._executor_tokens > 0:
+                token_parts.append(f"executors: ~{self._executor_tokens:,}")
+            progress_lines.append(f"Tokens: {', '.join(token_parts)}")
+        parts.append("## Progress\n" + "\n".join(progress_lines))
+        # Session summary
+        if self._sessions is not None:
+            active = sum(
+                1 for s in self._sessions.values() if s.status.value == "active"
+            )
+            completed = sum(
+                1 for s in self._sessions.values() if s.status.value == "completed"
+            )
+            failed = sum(
+                1 for s in self._sessions.values() if s.status.value == "failed"
+            )
+            total = len(self._sessions)
+            summary = f"Sessions: {active} active, {completed} done, {failed} failed ({total} total)"
+            if self._budget_max_sessions:
+                summary += f" [limit: {self._budget_max_sessions}]"
+            parts.append(f"## Resources\n{summary}")
+            # Active sessions detail
+            active_sessions = [
+                (sid, s)
+                for sid, s in self._sessions.items()
+                if s.status.value == "active"
+            ]
+            if active_sessions:
+                session_lines = []
+                for sid, session in active_sessions:
+                    mode_tag = "sync" if session.mode.value == "sync" else "async"
+                    turns = len([m for m in session.messages if m.role == "user"])
+                    task_preview = (
+                        session.task_description[:50] + "..."
+                        if len(session.task_description) > 50
+                        else session.task_description
+                    )
+                    session_lines.append(
+                        f"\n  • {sid[:8]} ({session.adapter}, {mode_tag}, {turns} turns): {task_preview}"
+                    )
+                parts.append("## Active Sessions\n" + "\n".join(session_lines))
+        # Working directory (less prominent)
+        parts.append(f"## Context\nWorking dir: {self.working_dir.absolute()}")
         return "\n\n".join(parts)

zwarm/core/models.py CHANGED Viewed

@@ -92,6 +92,20 @@ class ConversationSession:
     model: str | None = None
     exit_message: str | None = None
+    # Token usage tracking for cost calculation
+    token_usage: dict[str, int] = field(default_factory=lambda: {
+        "input_tokens": 0,
+        "output_tokens": 0,
+        "total_tokens": 0,
+    })
+    def add_usage(self, usage: dict[str, int]) -> None:
+        """Add token usage from an interaction."""
+        if not usage:
+            return
+        for key in self.token_usage:
+            self.token_usage[key] += usage.get(key, 0)
     def add_message(self, role: Literal["user", "assistant", "system"], content: str) -> Message:
         """Add a message to the conversation."""
         msg = Message(role=role, content=content)
@@ -125,6 +139,7 @@ class ConversationSession:
             "task_description": self.task_description,
             "model": self.model,
             "exit_message": self.exit_message,
+            "token_usage": self.token_usage,
         }
     @classmethod
@@ -143,6 +158,7 @@ class ConversationSession:
             task_description=data.get("task_description", ""),
             model=data.get("model"),
             exit_message=data.get("exit_message"),
+            token_usage=data.get("token_usage", {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}),
         )

zwarm 0.1.0__py3-none-any.whl → 1.0.0__py3-none-any.whl

zwarm 0.1.0py3-none-any.whl → 1.0.0py3-none-any.whl