PyPI - stirrup - Versions diffs - 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl - Mend

stirrup 0.1.2py3-none-any.whl → 0.1.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

stirrup/__init__.py +2 -0
stirrup/clients/__init__.py +5 -0
stirrup/clients/chat_completions_client.py +0 -3
stirrup/clients/litellm_client.py +20 -11
stirrup/clients/open_responses_client.py +434 -0
stirrup/clients/utils.py +6 -1
stirrup/constants.py +6 -2
stirrup/core/agent.py +196 -57
stirrup/core/cache.py +479 -0
stirrup/core/models.py +53 -9
stirrup/prompts/base_system_prompt.txt +1 -1
stirrup/tools/__init__.py +3 -0
stirrup/tools/browser_use.py +591 -0
stirrup/tools/calculator.py +1 -1
stirrup/tools/code_backends/base.py +24 -0
stirrup/tools/code_backends/docker.py +19 -0
stirrup/tools/code_backends/e2b.py +43 -11
stirrup/tools/code_backends/local.py +19 -2
stirrup/tools/finish.py +27 -1
stirrup/tools/user_input.py +130 -0
stirrup/tools/web.py +1 -0
stirrup/utils/logging.py +32 -7
{stirrup-0.1.2.dist-info → stirrup-0.1.4.dist-info}/METADATA +16 -13
stirrup-0.1.4.dist-info/RECORD +38 -0
{stirrup-0.1.2.dist-info → stirrup-0.1.4.dist-info}/WHEEL +2 -2
stirrup-0.1.2.dist-info/RECORD +0 -34

stirrup/core/cache.py ADDED Viewed

@@ -0,0 +1,479 @@
+"""Cache module for persisting and resuming agent state.
+Provides functionality to cache agent state (messages, run metadata, execution environment files)
+on non-success exits and restore that state for resumption in new runs.
+"""
+import base64
+import hashlib
+import json
+import logging
+import os
+import shutil
+from dataclasses import dataclass, field
+from datetime import UTC, datetime
+from pathlib import Path
+from typing import Any
+from pydantic import TypeAdapter
+from stirrup.core.models import (
+    AudioContentBlock,
+    ChatMessage,
+    ImageContentBlock,
+    VideoContentBlock,
+)
+logger = logging.getLogger(__name__)
+# Default cache directory relative to the project root
+DEFAULT_CACHE_DIR = Path("~/.cache/stirrup/").expanduser()
+# TypeAdapter for deserializing ChatMessage discriminated union
+ChatMessageAdapter: TypeAdapter[ChatMessage] = TypeAdapter(ChatMessage)
+def compute_task_hash(init_msgs: str | list[ChatMessage]) -> str:
+    """Compute deterministic hash from initial messages for cache identification.
+    Args:
+        init_msgs: Either a string prompt or list of ChatMessage objects.
+    Returns:
+        First 12 characters of SHA256 hash (hex) for readability.
+    """
+    if isinstance(init_msgs, str):
+        content = init_msgs
+    else:
+        # Serialize messages to JSON for hashing
+        content = json.dumps(
+            [serialize_message(msg) for msg in init_msgs],
+            sort_keys=True,
+            ensure_ascii=True,
+        )
+    hash_bytes = hashlib.sha256(content.encode("utf-8")).hexdigest()
+    return hash_bytes[:12]
+def _serialize_content_block(block: Any) -> dict | str:  # noqa: ANN401
+    """Serialize a content block, encoding binary data as base64.
+    Args:
+        block: A content block (string, ImageContentBlock, VideoContentBlock, AudioContentBlock).
+    Returns:
+        JSON-serializable representation with base64-encoded binary data.
+    """
+    if isinstance(block, str):
+        return block
+    elif isinstance(block, ImageContentBlock):
+        return {
+            "kind": "image_content_block",
+            "data": base64.b64encode(block.data).decode("ascii"),
+        }
+    elif isinstance(block, VideoContentBlock):
+        return {
+            "kind": "video_content_block",
+            "data": base64.b64encode(block.data).decode("ascii"),
+        }
+    elif isinstance(block, AudioContentBlock):
+        return {
+            "kind": "audio_content_block",
+            "data": base64.b64encode(block.data).decode("ascii"),
+        }
+    elif isinstance(block, dict):
+        # Handle dict from model_dump that might contain unencoded bytes
+        # This can happen when Pydantic fails to base64-encode bytes in mode="json"
+        if "data" in block and isinstance(block["data"], bytes):
+            return {
+                **block,
+                "data": base64.b64encode(block["data"]).decode("ascii"),
+            }
+        return block
+    else:
+        raise ValueError(f"Unknown content block type: {type(block)}")
+def _deserialize_content_block(data: dict | str) -> Any:  # noqa: ANN401
+    """Deserialize a content block, decoding base64 binary data.
+    Args:
+        data: JSON-serialized content block.
+    Returns:
+        Restored content block with decoded binary data.
+    """
+    if isinstance(data, str):
+        return data
+    if not isinstance(data, dict):
+        return data
+    kind = data.get("kind")
+    if kind == "image_content_block":
+        return ImageContentBlock(data=base64.b64decode(data["data"]))
+    elif kind == "video_content_block":
+        return VideoContentBlock(data=base64.b64decode(data["data"]))
+    elif kind == "audio_content_block":
+        return AudioContentBlock(data=base64.b64decode(data["data"]))
+    else:
+        # Unknown or already-processed block
+        return data
+def serialize_message(msg: ChatMessage) -> dict:
+    """Serialize a ChatMessage to JSON-compatible format.
+    Handles binary content blocks (images, video, audio) by base64 encoding.
+    Args:
+        msg: A ChatMessage (SystemMessage, UserMessage, AssistantMessage, ToolMessage).
+    Returns:
+        JSON-serializable dictionary.
+    """
+    # Use Pydantic's model_dump for base serialization
+    data = msg.model_dump(mode="json")
+    # Handle content field which may contain binary blocks
+    content = data.get("content")
+    if isinstance(content, list):
+        data["content"] = [_serialize_content_block(block) for block in content]
+    elif content is not None and not isinstance(content, str):
+        data["content"] = _serialize_content_block(content)
+    return data
+def deserialize_message(data: dict) -> ChatMessage:
+    """Deserialize a ChatMessage from JSON format.
+    Handles base64-encoded binary content blocks.
+    Args:
+        data: JSON dictionary representing a ChatMessage.
+    Returns:
+        Restored ChatMessage object.
+    """
+    # Handle content field which may contain base64-encoded binary blocks
+    content = data.get("content")
+    if isinstance(content, list):
+        data["content"] = [_deserialize_content_block(block) for block in content]
+    elif content is not None and not isinstance(content, str):
+        data["content"] = _deserialize_content_block(content)
+    # Use TypeAdapter for discriminated union deserialization
+    return ChatMessageAdapter.validate_python(data)
+def serialize_messages(msgs: list[ChatMessage]) -> list[dict]:
+    """Serialize a list of ChatMessages to JSON-compatible format.
+    Args:
+        msgs: List of ChatMessage objects.
+    Returns:
+        List of JSON-serializable dictionaries.
+    """
+    return [serialize_message(msg) for msg in msgs]
+def _serialize_metadata_item(item: Any) -> Any:  # noqa: ANN401
+    """Serialize a single metadata item to JSON-compatible format.
+    Handles Pydantic models by calling model_dump(mode='json').
+    Handles bytes by base64 encoding them.
+    """
+    from pydantic import BaseModel
+    if isinstance(item, BaseModel):
+        return item.model_dump(mode="json")
+    elif isinstance(item, bytes):
+        # Base64 encode raw bytes to make them JSON-serializable
+        return base64.b64encode(item).decode("ascii")
+    elif isinstance(item, dict):
+        return {k: _serialize_metadata_item(v) for k, v in item.items()}
+    elif isinstance(item, list):
+        return [_serialize_metadata_item(i) for i in item]
+    else:
+        return item
+def _serialize_run_metadata(run_metadata: dict[str, list[Any]]) -> dict[str, list[Any]]:
+    """Serialize run_metadata dict containing Pydantic models to JSON-compatible format.
+    Args:
+        run_metadata: Dict mapping tool names to lists of metadata (may contain Pydantic models).
+    Returns:
+        JSON-serializable dictionary.
+    """
+    return {
+        tool_name: [_serialize_metadata_item(item) for item in metadata_list]
+        for tool_name, metadata_list in run_metadata.items()
+    }
+def deserialize_messages(data: list[dict]) -> list[ChatMessage]:
+    """Deserialize a list of ChatMessages from JSON format.
+    Args:
+        data: List of JSON dictionaries representing ChatMessages.
+    Returns:
+        List of restored ChatMessage objects.
+    """
+    return [deserialize_message(msg_data) for msg_data in data]
+@dataclass
+class CacheState:
+    """Serializable state for resuming an agent run.
+    Captures all necessary state to resume execution from a specific turn.
+    """
+    msgs: list[ChatMessage]
+    """Current conversation messages in the active run loop."""
+    full_msg_history: list[list[ChatMessage]]
+    """Groups of messages (separated when context summarization occurs)."""
+    turn: int
+    """Current turn number (0-indexed) - resume will start from this turn."""
+    run_metadata: dict[str, list[Any]]
+    """Accumulated tool metadata from the run."""
+    task_hash: str
+    """Hash of the original init_msgs for verification on resume."""
+    timestamp: str = field(default_factory=lambda: datetime.now(UTC).isoformat())
+    """ISO timestamp when cache was created."""
+    agent_name: str = ""
+    """Name of the agent that created this cache."""
+    def to_dict(self) -> dict:
+        """Convert to JSON-serializable dictionary."""
+        return {
+            "msgs": serialize_messages(self.msgs),
+            "full_msg_history": [serialize_messages(group) for group in self.full_msg_history],
+            "turn": self.turn,
+            "run_metadata": _serialize_run_metadata(self.run_metadata),
+            "task_hash": self.task_hash,
+            "timestamp": self.timestamp,
+            "agent_name": self.agent_name,
+        }
+    @classmethod
+    def from_dict(cls, data: dict) -> "CacheState":
+        """Create CacheState from JSON dictionary."""
+        return cls(
+            msgs=deserialize_messages(data["msgs"]),
+            full_msg_history=[deserialize_messages(group) for group in data["full_msg_history"]],
+            turn=data["turn"],
+            run_metadata=data["run_metadata"],
+            task_hash=data["task_hash"],
+            timestamp=data.get("timestamp", ""),
+            agent_name=data.get("agent_name", ""),
+        )
+class CacheManager:
+    """Manages cache operations for agent sessions.
+    Handles saving/loading cache state and execution environment files.
+    """
+    def __init__(
+        self,
+        cache_base_dir: Path | None = None,
+        clear_on_success: bool = True,
+    ) -> None:
+        """Initialize CacheManager.
+        Args:
+            cache_base_dir: Base directory for cache storage.
+                           Defaults to ~/.cache/stirrup/
+            clear_on_success: If True (default), automatically clear the cache when
+                             the agent completes successfully. Set to False to preserve
+                             caches for inspection or manual management.
+        """
+        self._cache_base_dir = cache_base_dir or DEFAULT_CACHE_DIR
+        self.clear_on_success = clear_on_success
+    def _get_cache_dir(self, task_hash: str) -> Path:
+        """Get cache directory path for a task hash."""
+        return self._cache_base_dir / task_hash
+    def _get_state_file(self, task_hash: str) -> Path:
+        """Get state.json file path for a task hash."""
+        return self._get_cache_dir(task_hash) / "state.json"
+    def _get_files_dir(self, task_hash: str) -> Path:
+        """Get files directory path for a task hash."""
+        return self._get_cache_dir(task_hash) / "files"
+    def save_state(
+        self,
+        task_hash: str,
+        state: CacheState,
+        exec_env_dir: Path | None = None,
+    ) -> None:
+        """Save cache state and optionally archive execution environment files.
+        Uses atomic writes to prevent corrupted cache files if interrupted mid-write.
+        Args:
+            task_hash: Unique identifier for this task/cache.
+            state: CacheState to persist.
+            exec_env_dir: Optional path to execution environment temp directory.
+                         If provided, all files will be copied to cache.
+        """
+        cache_dir = self._get_cache_dir(task_hash)
+        cache_dir.mkdir(parents=True, exist_ok=True)
+        # Save state JSON using atomic write (write to temp file, then rename)
+        state_file = self._get_state_file(task_hash)
+        temp_file = state_file.with_suffix(".json.tmp")
+        try:
+            state_data = state.to_dict()
+            logger.debug("Serialized cache state: turn=%d, msgs=%d", state.turn, len(state.msgs))
+            with open(temp_file, "w", encoding="utf-8") as f:
+                json.dump(state_data, f, indent=2, ensure_ascii=False)
+                f.flush()
+                os.fsync(f.fileno())  # Ensure data is written to disk
+            logger.debug("Wrote temp file: %s", temp_file)
+            # Atomic rename (on POSIX systems)
+            temp_file.replace(state_file)
+            logger.info("Saved cache state to %s (turn %d)", state_file, state.turn)
+        except Exception as e:
+            logger.exception("Failed to save cache state: %s", e)
+            # Try direct write as fallback
+            try:
+                logger.warning("Attempting direct write as fallback")
+                with open(state_file, "w", encoding="utf-8") as f:
+                    json.dump(state_data, f, indent=2, ensure_ascii=False)
+                    f.flush()
+                    os.fsync(f.fileno())
+                logger.info("Fallback write succeeded to %s", state_file)
+            except Exception as e2:
+                logger.exception("Fallback write also failed: %s", e2)
+            # Clean up temp file if it exists
+            if temp_file.exists():
+                temp_file.unlink()
+            raise
+        # Copy execution environment files if provided
+        if exec_env_dir and exec_env_dir.exists():
+            files_dir = self._get_files_dir(task_hash)
+            if files_dir.exists():
+                shutil.rmtree(files_dir)  # Clear existing files
+            shutil.copytree(exec_env_dir, files_dir, dirs_exist_ok=True)
+            logger.info("Saved execution environment files to %s", files_dir)
+    def load_state(self, task_hash: str) -> CacheState | None:
+        """Load cached state for a task hash.
+        Args:
+            task_hash: Unique identifier for the task/cache.
+        Returns:
+            CacheState if cache exists, None otherwise.
+        """
+        state_file = self._get_state_file(task_hash)
+        if not state_file.exists():
+            logger.debug("No cache found for task %s", task_hash)
+            return None
+        try:
+            with open(state_file, encoding="utf-8") as f:
+                data = json.load(f)
+            state = CacheState.from_dict(data)
+            logger.info("Loaded cache state from %s (turn %d)", state_file, state.turn)
+            return state
+        except (json.JSONDecodeError, KeyError, ValueError) as e:
+            logger.warning("Failed to load cache for task %s: %s", task_hash, e)
+            return None
+    def restore_files(self, task_hash: str, dest_dir: Path) -> bool:
+        """Restore cached files to the destination directory.
+        Args:
+            task_hash: Unique identifier for the task/cache.
+            dest_dir: Destination directory (typically the new exec env temp dir).
+        Returns:
+            True if files were restored, False if no files cache exists.
+        """
+        files_dir = self._get_files_dir(task_hash)
+        if not files_dir.exists():
+            logger.debug("No cached files for task %s", task_hash)
+            return False
+        # Copy all files from cache to destination
+        for item in files_dir.iterdir():
+            dest_item = dest_dir / item.name
+            if item.is_file():
+                shutil.copy2(item, dest_item)
+            else:
+                shutil.copytree(item, dest_item, dirs_exist_ok=True)
+        logger.info("Restored cached files from %s to %s", files_dir, dest_dir)
+        return True
+    def clear_cache(self, task_hash: str) -> None:
+        """Remove cache for a specific task.
+        Called after successful completion to clean up.
+        Args:
+            task_hash: Unique identifier for the task/cache.
+        """
+        cache_dir = self._get_cache_dir(task_hash)
+        if cache_dir.exists():
+            shutil.rmtree(cache_dir)
+            logger.info("Cleared cache for task %s", task_hash)
+    def list_caches(self) -> list[str]:
+        """List all available cache hashes.
+        Returns:
+            List of task hashes with existing caches.
+        """
+        if not self._cache_base_dir.exists():
+            return []
+        return [d.name for d in self._cache_base_dir.iterdir() if d.is_dir() and (d / "state.json").exists()]
+    def get_cache_info(self, task_hash: str) -> dict | None:
+        """Get metadata about a cache without fully loading it.
+        Args:
+            task_hash: Unique identifier for the task/cache.
+        Returns:
+            Dictionary with cache info (turn, timestamp, agent_name) or None.
+        """
+        state_file = self._get_state_file(task_hash)
+        if not state_file.exists():
+            return None
+        try:
+            with open(state_file, encoding="utf-8") as f:
+                data = json.load(f)
+            return {
+                "task_hash": task_hash,
+                "turn": data.get("turn", 0),
+                "timestamp": data.get("timestamp", ""),
+                "agent_name": data.get("agent_name", ""),
+                "has_files": self._get_files_dir(task_hash).exists(),
+            }
+        except (json.JSONDecodeError, KeyError):
+            return None

stirrup/core/models.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import base64
 import mimetypes
 import warnings
 from abc import ABC, abstractmethod
@@ -15,7 +16,7 @@ import filetype
 from moviepy import AudioFileClip, VideoFileClip
 from moviepy.video.fx import Resize
 from PIL import Image
-from pydantic import BaseModel, Field, model_validator
+from pydantic import BaseModel, Field, PlainSerializer, PlainValidator, model_validator
 from stirrup.constants import RESOLUTION_1MP, RESOLUTION_480P
@@ -27,6 +28,7 @@ __all__ = [
     "ChatMessage",
     "Content",
     "ContentBlock",
+    "EmptyParams",
     "ImageContentBlock",
     "LLMClient",
     "SubAgentMetadata",
@@ -44,6 +46,25 @@ __all__ = [
 ]
+def _bytes_to_b64(v: bytes) -> str:
+    return base64.b64encode(v).decode("ascii")
+def _b64_to_bytes(v: bytes | str) -> bytes:
+    if isinstance(v, bytes):
+        return v
+    if isinstance(v, str):
+        return base64.b64decode(v.encode("ascii"))
+    raise TypeError("Invalid bytes value")
+Base64Bytes = Annotated[
+    bytes,
+    PlainValidator(_b64_to_bytes),
+    PlainSerializer(_bytes_to_b64, when_used="json"),
+]
 def downscale_image(w: int, h: int, max_pixels: int | None = 1_000_000) -> tuple[int, int]:
     """Downscale image dimensions to fit within max pixel count while maintaining aspect ratio.
@@ -58,7 +79,7 @@ def downscale_image(w: int, h: int, max_pixels: int | None = 1_000_000) -> tuple
 class BinaryContentBlock(BaseModel, ABC):
     """Base class for binary content (images, video, audio) with MIME type validation."""
-    data: bytes
+    data: Base64Bytes
     allowed_mime_types: ClassVar[set[str]]
     @property
@@ -400,12 +421,14 @@ class ToolUseCountMetadata(BaseModel):
     Implements Addable protocol for aggregation. Use this for tools that only need
     to track how many times they were called.
+    Subclasses can override __add__ with their own type thanks to Self typing.
     """
     num_uses: int = 1
-    def __add__(self, other: "ToolUseCountMetadata") -> "ToolUseCountMetadata":
-        return ToolUseCountMetadata(num_uses=self.num_uses + other.num_uses)
+    def __add__(self, other: Self) -> Self:
+        return self.__class__(num_uses=self.num_uses + other.num_uses)
 class ToolResult[M](BaseModel):
@@ -413,17 +436,27 @@ class ToolResult[M](BaseModel):
     Generic over metadata type M. M should implement Addable protocol for aggregation support,
     but this is not enforced at the class level due to Pydantic schema generation limitations.
+    Attributes:
+        content: The result content (string, list of content blocks, or images)
+        success: Whether the tool call was successful. For finish tools, controls if agent terminates.
+        metadata: Optional metadata (e.g., usage stats) that implements Addable for aggregation
     """
     content: Content
+    success: bool = True
     metadata: M | None = None
+class EmptyParams(BaseModel):
+    """Empty parameter model for tools that don't require parameters."""
 class Tool[P: BaseModel, M](BaseModel):
     """Tool definition with name, description, parameter schema, and executor function.
     Generic over:
-        P: Parameter model type (must be a Pydantic BaseModel, or None for parameterless tools)
+        P: Parameter model type (Pydantic BaseModel subclass, or EmptyParams for parameterless tools)
         M: Metadata type (should implement Addable for aggregation; use None for tools without metadata)
     Tools are simple, stateless callables. For tools requiring lifecycle management
@@ -442,9 +475,9 @@ class Tool[P: BaseModel, M](BaseModel):
         )
         ```
-    Example without parameters:
+    Example without parameters (uses EmptyParams by default):
         ```python
-        time_tool = Tool[None, None](
+        time_tool = Tool[EmptyParams, None](
             name="time",
             description="Get current time",
             executor=lambda _: ToolResult(content=datetime.now().isoformat()),
@@ -454,7 +487,7 @@ class Tool[P: BaseModel, M](BaseModel):
     name: str
     description: str
-    parameters: type[P] | None = None
+    parameters: type[P] = EmptyParams  # type: ignore[assignment]
     executor: Callable[[P], ToolResult[M] | Awaitable[ToolResult[M]]]
@@ -527,6 +560,7 @@ class ToolCall(BaseModel):
         tool_call_id: Unique identifier for tracking this tool call and its result
     """
+    signature: str | None = None
     name: str
     arguments: str
     tool_call_id: str | None = None
@@ -564,13 +598,23 @@ class AssistantMessage(BaseModel):
 class ToolMessage(BaseModel):
-    """Tool execution result returned to the LLM."""
+    """Tool execution result returned to the LLM.
+    Attributes:
+        role: Always "tool"
+        content: The tool result content
+        tool_call_id: ID linking this result to the corresponding tool call
+        name: Name of the tool that was called
+        args_was_valid: Whether the tool arguments were valid
+        success: Whether the tool executed successfully (used by finish tool to control termination)
+    """
     role: Literal["tool"] = "tool"
     content: Content
     tool_call_id: str | None = None
     name: str | None = None
     args_was_valid: bool = True
+    success: bool = False
 type ChatMessage = Annotated[SystemMessage | UserMessage | AssistantMessage | ToolMessage, Field(discriminator="role")]

stirrup/prompts/base_system_prompt.txt CHANGED Viewed

	@@ -1 +1 @@
1	- You are an AI agent that will be given a specific task. You are to complete that task using the tools provided in {max_turns} steps. You will need to call the finish tool as your last step, where you will pass your finish reason and paths to any files that you wish to return to the user. ~~You are not able to interact with the user during the task.~~
1	+ You are an AI agent that will be given a specific task. You are to complete that task using the tools provided in {max_turns} steps. You will need to call the finish tool as your last step, where you will pass your finish reason and paths to any files that you wish to return to the user.

stirrup/tools/__init__.py CHANGED Viewed

@@ -47,6 +47,7 @@ Optional tool providers require explicit imports from their submodules:
 - DockerCodeExecToolProvider: `from stirrup.tools.code_backends.docker import DockerCodeExecToolProvider`
 - E2BCodeExecToolProvider: `from stirrup.tools.code_backends.e2b import E2BCodeExecToolProvider`
 - MCPToolProvider: `from stirrup.tools.mcp import MCPToolProvider`
+- BrowserUseToolProvider: `from stirrup.tools.browser_use import BrowserUseToolProvider`
 """
 from typing import Any
@@ -55,6 +56,7 @@ from stirrup.core.models import Tool, ToolProvider
 from stirrup.tools.calculator import CALCULATOR_TOOL
 from stirrup.tools.code_backends import CodeExecToolProvider, LocalCodeExecToolProvider
 from stirrup.tools.finish import SIMPLE_FINISH_TOOL, FinishParams
+from stirrup.tools.user_input import USER_INPUT_TOOL
 from stirrup.tools.view_image import ViewImageToolProvider
 from stirrup.tools.web import WebToolProvider
@@ -69,6 +71,7 @@ __all__ = [
     "CALCULATOR_TOOL",
     "DEFAULT_TOOLS",
     "SIMPLE_FINISH_TOOL",
+    "USER_INPUT_TOOL",
     "CodeExecToolProvider",
     "FinishParams",
     "LocalCodeExecToolProvider",

stirrup 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl

stirrup 0.1.2py3-none-any.whl → 0.1.4py3-none-any.whl