PyPI - stirrup - Versions diffs - 0.1.0__py3-none-any.whl - Mend

stirrup 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

stirrup/__init__.py +76 -0
stirrup/clients/__init__.py +14 -0
stirrup/clients/chat_completions_client.py +219 -0
stirrup/clients/litellm_client.py +141 -0
stirrup/clients/utils.py +161 -0
stirrup/constants.py +14 -0
stirrup/core/__init__.py +1 -0
stirrup/core/agent.py +1097 -0
stirrup/core/exceptions.py +7 -0
stirrup/core/models.py +599 -0
stirrup/prompts/__init__.py +22 -0
stirrup/prompts/base_system_prompt.txt +1 -0
stirrup/prompts/message_summarizer.txt +27 -0
stirrup/prompts/message_summarizer_bridge.txt +11 -0
stirrup/py.typed +0 -0
stirrup/tools/__init__.py +77 -0
stirrup/tools/calculator.py +32 -0
stirrup/tools/code_backends/__init__.py +38 -0
stirrup/tools/code_backends/base.py +454 -0
stirrup/tools/code_backends/docker.py +752 -0
stirrup/tools/code_backends/e2b.py +359 -0
stirrup/tools/code_backends/local.py +481 -0
stirrup/tools/finish.py +23 -0
stirrup/tools/mcp.py +500 -0
stirrup/tools/view_image.py +83 -0
stirrup/tools/web.py +336 -0
stirrup/utils/__init__.py +10 -0
stirrup/utils/logging.py +944 -0
stirrup/utils/text.py +11 -0
stirrup-0.1.0.dist-info/METADATA +318 -0
stirrup-0.1.0.dist-info/RECORD +32 -0
stirrup-0.1.0.dist-info/WHEEL +4 -0

stirrup/__init__.py ADDED Viewed

@@ -0,0 +1,76 @@
+"""Artificial Analysis' reference agent harness - originally built for running evaluations, simple to use and extend.
+Example usage:
+    from stirrup import Agent, DEFAULT_TOOLS
+    from stirrup.clients.chat_completions_client import ChatCompletionsClient
+    from stirrup.tools.mcp import MCPToolProvider
+    # Create a client for your LLM provider
+    client = ChatCompletionsClient(model="gpt-5")
+    # Simple usage with default tools
+    agent = Agent(
+        client=client,
+        name="assistant",
+        system_prompt="You are a helpful assistant.",
+    )
+    async with agent.session(output_dir="./output") as session:
+        finish_params, history, metadata = await session.run("Your task here")
+        print(finish_params.reason)
+    # Extend default tools with MCP
+    agent = Agent(
+        client=client,
+        name="assistant",
+        tools=[*DEFAULT_TOOLS, MCPToolProvider.from_config("mcp.json")],
+    )
+"""
+from stirrup import tools
+from stirrup.core.agent import Agent
+from stirrup.core.exceptions import ContextOverflowError
+from stirrup.core.models import (
+    Addable,
+    AssistantMessage,
+    AudioContentBlock,
+    ChatMessage,
+    ImageContentBlock,
+    LLMClient,
+    SubAgentMetadata,
+    SystemMessage,
+    TokenUsage,
+    Tool,
+    ToolCall,
+    ToolMessage,
+    ToolProvider,
+    ToolResult,
+    ToolUseCountMetadata,
+    UserMessage,
+    VideoContentBlock,
+    aggregate_metadata,
+)
+__all__ = [
+    "Addable",
+    "Agent",
+    "AssistantMessage",
+    "AudioContentBlock",
+    "ChatMessage",
+    "ContextOverflowError",
+    "ImageContentBlock",
+    "LLMClient",
+    "SubAgentMetadata",
+    "SystemMessage",
+    "TokenUsage",
+    "Tool",
+    "ToolCall",
+    "ToolMessage",
+    "ToolProvider",
+    "ToolResult",
+    "ToolUseCountMetadata",
+    "UserMessage",
+    "VideoContentBlock",
+    "aggregate_metadata",
+    "tools",
+]

stirrup/clients/__init__.py ADDED Viewed

@@ -0,0 +1,14 @@
+"""LLM client implementations.
+The default client is ChatCompletionsClient, which uses the OpenAI SDK directly
+and supports any OpenAI-compatible API via the `base_url` parameter.
+For multi-provider support via LiteLLM, install the litellm extra:
+    pip install stirrup[litellm]
+"""
+from stirrup.clients.chat_completions_client import ChatCompletionsClient
+__all__ = [
+    "ChatCompletionsClient",
+]

stirrup/clients/chat_completions_client.py ADDED Viewed

@@ -0,0 +1,219 @@
+"""OpenAI SDK-based LLM client for chat completions.
+This client uses the official OpenAI Python SDK directly, supporting both OpenAI's
+API and any OpenAI-compatible endpoint via the `base_url` parameter (e.g., vLLM,
+Ollama, Azure OpenAI, local models).
+This is the default client for Stirrup.
+"""
+import logging
+import os
+from typing import Any
+from openai import (
+    APIConnectionError,
+    APITimeoutError,
+    AsyncOpenAI,
+    InternalServerError,
+    RateLimitError,
+)
+from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential
+from stirrup.clients.utils import to_openai_messages, to_openai_tools
+from stirrup.core.exceptions import ContextOverflowError
+from stirrup.core.models import (
+    AssistantMessage,
+    ChatMessage,
+    LLMClient,
+    Reasoning,
+    TokenUsage,
+    Tool,
+    ToolCall,
+)
+__all__ = [
+    "ChatCompletionsClient",
+]
+LOGGER = logging.getLogger(__name__)
+class ChatCompletionsClient(LLMClient):
+    """OpenAI SDK-based client supporting OpenAI and OpenAI-compatible APIs.
+    Uses the official OpenAI Python SDK directly for chat completions.
+    Supports custom base_url for OpenAI-compatible providers (vLLM, Ollama,
+    Azure OpenAI, local models, etc.).
+    Includes automatic retries for transient failures and token usage tracking.
+    Example:
+        >>> # Standard OpenAI usage
+        >>> client = ChatCompletionsClient(model="gpt-4o", max_tokens=128_000)
+        >>>
+        >>> # Custom OpenAI-compatible endpoint
+        >>> client = ChatCompletionsClient(
+        ...     model="llama-3.1-70b",
+        ...     base_url="http://localhost:8000/v1",
+        ...     api_key="your-api-key",
+        ... )
+    """
+    def __init__(
+        self,
+        model: str,
+        max_tokens: int = 64_000,
+        *,
+        base_url: str | None = None,
+        api_key: str | None = None,
+        supports_audio_input: bool = False,
+        reasoning_effort: str | None = None,
+        timeout: float | None = None,
+        max_retries: int = 2,
+        kwargs: dict[str, Any] | None = None,
+    ) -> None:
+        """Initialize OpenAI SDK client with model configuration.
+        Args:
+            model: Model identifier (e.g., 'gpt-5', 'gpt-4o', 'o1-preview').
+            max_tokens: Maximum context window size in tokens. Defaults to 64,000.
+            base_url: API base URL. If None, uses OpenAI's standard URL.
+                Use for OpenAI-compatible providers (e.g., 'http://localhost:8000/v1').
+            api_key: API key for authentication. If None, reads from OPENROUTER_API_KEY
+                environment variable.
+            supports_audio_input: Whether the model supports audio inputs. Defaults to False.
+            reasoning_effort: Reasoning effort level for extended thinking models
+                (e.g., 'low', 'medium', 'high'). Only used with o1/o3 style models.
+            timeout: Request timeout in seconds. If None, uses OpenAI SDK default.
+            max_retries: Number of retries for transient errors. Defaults to 2.
+                The OpenAI SDK handles retries internally with exponential backoff.
+            kwargs: Additional arguments passed to chat.completions.create().
+        """
+        self._model = model
+        self._max_tokens = max_tokens
+        self._supports_audio_input = supports_audio_input
+        self._reasoning_effort = reasoning_effort
+        self._kwargs = kwargs or {}
+        # Initialize AsyncOpenAI client
+        # Read from OPENROUTER_API_KEY if no api_key provided
+        resolved_api_key = api_key or os.environ.get("OPENROUTER_API_KEY")
+        self._client = AsyncOpenAI(
+            api_key=resolved_api_key,
+            base_url=base_url,
+            timeout=timeout,
+            max_retries=max_retries,
+        )
+    @property
+    def max_tokens(self) -> int:
+        """Maximum context window size in tokens."""
+        return self._max_tokens
+    @property
+    def model_slug(self) -> str:
+        """Model identifier."""
+        return self._model
+    @retry(
+        retry=retry_if_exception_type(
+            (
+                APIConnectionError,
+                APITimeoutError,
+                RateLimitError,
+                InternalServerError,
+            )
+        ),
+        stop=stop_after_attempt(3),
+        wait=wait_exponential(multiplier=1, min=1, max=10),
+    )
+    async def generate(
+        self,
+        messages: list[ChatMessage],
+        tools: dict[str, Tool],
+    ) -> AssistantMessage:
+        """Generate assistant response with optional tool calls.
+        Retries up to 3 times on transient errors (connection, timeout, rate limit,
+        internal server errors) with exponential backoff.
+        Args:
+            messages: List of conversation messages.
+            tools: Dictionary mapping tool names to Tool objects.
+        Returns:
+            AssistantMessage containing the model's response, any tool calls,
+            and token usage statistics.
+        Raises:
+            ContextOverflowError: If the context window is exceeded.
+        """
+        # Build request kwargs
+        request_kwargs: dict[str, Any] = {
+            "model": self._model,
+            "messages": to_openai_messages(messages),
+            "max_completion_tokens": self._max_tokens,
+            **self._kwargs,
+        }
+        # Add tools if provided
+        if tools:
+            request_kwargs["tools"] = to_openai_tools(tools)
+            request_kwargs["tool_choice"] = "auto"
+        # Add reasoning effort if configured (for o1/o3 models)
+        if self._reasoning_effort:
+            request_kwargs["reasoning_effort"] = self._reasoning_effort
+        # Make API call
+        response = await self._client.chat.completions.create(**request_kwargs)
+        choice = response.choices[0]
+        # Check for context overflow
+        if choice.finish_reason in ("max_tokens", "length"):
+            raise ContextOverflowError(
+                f"Maximal context window tokens reached for model {self.model_slug}, "
+                f"resulting in finish reason: {choice.finish_reason}. "
+                "Reduce agent.max_tokens and try again."
+            )
+        msg = choice.message
+        # Parse reasoning content (for o1/o3 models with extended thinking)
+        reasoning: Reasoning | None = None
+        if hasattr(msg, "reasoning_content") and msg.reasoning_content:
+            reasoning = Reasoning(content=msg.reasoning_content)
+        # Parse tool calls
+        tool_calls = [
+            ToolCall(
+                tool_call_id=tc.id,
+                name=tc.function.name,
+                arguments=tc.function.arguments or "",
+            )
+            for tc in (msg.tool_calls or [])
+        ]
+        # Parse token usage
+        usage = response.usage
+        input_tokens = usage.prompt_tokens if usage else 0
+        output_tokens = usage.completion_tokens if usage else 0
+        # Handle reasoning tokens if available (for o1/o3 models)
+        reasoning_tokens = 0
+        if usage and hasattr(usage, "completion_tokens_details") and usage.completion_tokens_details:
+            reasoning_tokens = getattr(usage.completion_tokens_details, "reasoning_tokens", 0) or 0
+            output_tokens = output_tokens - reasoning_tokens
+        return AssistantMessage(
+            reasoning=reasoning,
+            content=msg.content or "",
+            tool_calls=tool_calls,
+            token_usage=TokenUsage(
+                input=input_tokens,
+                output=output_tokens,
+                reasoning=reasoning_tokens,
+            ),
+        )

stirrup/clients/litellm_client.py ADDED Viewed

@@ -0,0 +1,141 @@
+"""LiteLLM-based LLM client for multi-provider support.
+This client uses LiteLLM to provide a unified interface to multiple LLM providers
+(OpenAI, Anthropic, Google, etc.) with automatic retries for transient failures.
+Requires the litellm extra: `pip install stirrup[litellm]`
+"""
+import logging
+from typing import Any
+try:
+    from litellm import acompletion
+    from litellm.exceptions import APIConnectionError, RateLimitError, Timeout
+except ImportError as e:
+    raise ImportError(
+        "Requires installation of the litellm extra. "
+        "Install with: `uv pip install stirrup[litellm]` or `uv add stirrup[litellm]`"
+    ) from e
+from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential
+from stirrup.clients.utils import to_openai_messages, to_openai_tools
+from stirrup.core.exceptions import ContextOverflowError
+from stirrup.core.models import (
+    AssistantMessage,
+    ChatMessage,
+    LLMClient,
+    Reasoning,
+    TokenUsage,
+    Tool,
+    ToolCall,
+)
+__all__ = [
+    "LiteLLMClient",
+]
+LOGGER = logging.getLogger(__name__)
+class LiteLLMClient(LLMClient):
+    """LiteLLM-based client supporting multiple LLM providers with unified interface.
+    Includes automatic retries for transient failures and token usage tracking.
+    """
+    def __init__(
+        self,
+        model_slug: str,
+        max_tokens: int,
+        supports_audio_input: bool = False,
+        reasoning_effort: str | None = None,
+        kwargs: dict[str, Any] | None = None,
+    ) -> None:
+        """Initialize LiteLLM client with model configuration and capabilities.
+        Args:
+            model_slug: Model identifier for LiteLLM (e.g., 'anthropic/claude-3-5-sonnet-20241022')
+            max_tokens: Maximum context window size in tokens
+            supports_audio_input: Whether the model supports audio inputs
+            reasoning_effort: Reasoning effort level for extended thinking models (e.g., 'medium', 'high')
+            kwargs: Additional arguments to pass to LiteLLM completion calls
+        """
+        self._model_slug = model_slug
+        self._supports_video_input = False
+        self._supports_audio_input = supports_audio_input
+        self._max_tokens = max_tokens
+        self._reasoning_effort = reasoning_effort
+        self._kwargs = kwargs or {}
+    @property
+    def max_tokens(self) -> int:
+        """Maximum context window size in tokens."""
+        return self._max_tokens
+    @property
+    def model_slug(self) -> str:
+        """Model identifier used by LiteLLM."""
+        return self._model_slug
+    @retry(
+        retry=retry_if_exception_type((Timeout, APIConnectionError, RateLimitError)),
+        stop=stop_after_attempt(3),
+        wait=wait_exponential(multiplier=1, min=1, max=10),
+    )
+    async def generate(self, messages: list[ChatMessage], tools: dict[str, Tool]) -> AssistantMessage:
+        """Generate assistant response with optional tool calls. Retries up to 3 times on timeout/connection errors."""
+        r = await acompletion(
+            model=self.model_slug,
+            messages=to_openai_messages(messages),
+            tools=to_openai_tools(tools) if tools else None,
+            tool_choice="auto" if tools else None,
+            max_tokens=self._max_tokens,
+            **self._kwargs,
+        )
+        choice = r["choices"][0]
+        if choice.finish_reason in ["max_tokens", "length"]:
+            raise ContextOverflowError(
+                f"Maximal context window tokens reached for model {self.model_slug}, resulting in finish reason: {choice.finish_reason}. Reduce agent.max_tokens and try again."
+            )
+        msg = choice["message"]
+        reasoning: Reasoning | None = None
+        if getattr(msg, "reasoning_content", None) is not None:
+            reasoning = Reasoning(content=msg.reasoning_content)
+        if getattr(msg, "thinking_blocks", None) is not None and len(msg.thinking_blocks) > 0:
+            reasoning = Reasoning(
+                signature=msg.thinking_blocks[0]["signature"], content=msg.thinking_blocks[0]["content"]
+            )
+        usage = r["usage"]
+        calls = [
+            ToolCall(
+                tool_call_id=tc.get("id"),
+                name=tc["function"]["name"],
+                arguments=tc["function"].get("arguments", "") or "",
+            )
+            for tc in (msg.get("tool_calls") or [])
+        ]
+        input_tokens = usage.prompt_tokens
+        reasoning_tokens = 0
+        if usage.completion_tokens_details:
+            reasoning_tokens = usage.completion_tokens_details.reasoning_tokens or 0
+        output_tokens = usage.completion_tokens - reasoning_tokens
+        return AssistantMessage(
+            reasoning=reasoning,
+            content=msg.get("content") or "",
+            tool_calls=calls,
+            token_usage=TokenUsage(
+                input=input_tokens,
+                output=output_tokens,
+                reasoning=reasoning_tokens,
+            ),
+        )

stirrup/clients/utils.py ADDED Viewed

@@ -0,0 +1,161 @@
+"""Shared utilities for OpenAI-compatible message and tool conversion.
+These helper functions convert Stirrup's internal message and tool formats
+to the OpenAI API format. Since LiteLLM and the OpenAI SDK use identical
+formats, these utilities are shared between both client implementations.
+"""
+from typing import Any
+from stirrup.core.models import (
+    AssistantMessage,
+    AudioContentBlock,
+    ChatMessage,
+    Content,
+    ImageContentBlock,
+    SystemMessage,
+    Tool,
+    ToolMessage,
+    UserMessage,
+    VideoContentBlock,
+)
+__all__ = [
+    "content_to_openai",
+    "to_openai_messages",
+    "to_openai_tools",
+]
+def to_openai_tools(tools: dict[str, Tool]) -> list[dict[str, Any]]:
+    """Convert Tool objects to OpenAI function calling format.
+    Args:
+        tools: Dictionary mapping tool names to Tool objects.
+    Returns:
+        List of tool definitions in OpenAI's function calling format.
+    Example:
+        >>> tools = {"calculator": calculator_tool}
+        >>> openai_tools = to_openai_tools(tools)
+        >>> # Returns: [{"type": "function", "function": {"name": "calculator", ...}}]
+    """
+    out: list[dict[str, Any]] = []
+    for t in tools.values():
+        function: dict[str, Any] = {
+            "name": t.name,
+            "description": t.description,
+        }
+        if t.parameters is not None:
+            function["parameters"] = t.parameters.model_json_schema()
+        tool_payload: dict[str, Any] = {
+            "type": "function",
+            "function": function,
+        }
+        out.append(tool_payload)
+    return out
+def content_to_openai(content: Content) -> list[dict[str, Any]] | str:
+    """Convert Content blocks to OpenAI message content format.
+    Handles text, images, audio, and video content blocks, converting them
+    to the appropriate OpenAI API structure.
+    Args:
+        content: Either a string or list of content blocks.
+    Returns:
+        List of content dictionaries in OpenAI format, or the original string
+        wrapped in a text content block.
+    Raises:
+        NotImplementedError: If an unsupported content block type is encountered.
+    """
+    if isinstance(content, str):
+        return [{"type": "text", "text": content}]
+    out: list[dict[str, Any]] = []
+    for block in content:
+        if isinstance(block, str):
+            out.append({"type": "text", "text": block})
+        elif isinstance(block, ImageContentBlock):
+            out.append({"type": "image_url", "image_url": {"url": block.to_base64_url()}})
+        elif isinstance(block, AudioContentBlock):
+            out.append(
+                {
+                    "type": "input_audio",
+                    "input_audio": {
+                        "data": block.to_base64_url().split(",")[1],
+                        "format": block.extension,
+                    },
+                }
+            )
+        elif isinstance(block, VideoContentBlock):
+            out.append({"type": "file", "file": {"file_data": block.to_base64_url()}})
+        else:
+            raise NotImplementedError(f"Unsupported content block: {type(block)}")
+    return out
+def to_openai_messages(msgs: list[ChatMessage]) -> list[dict[str, Any]]:
+    """Convert ChatMessage list to OpenAI-compatible message dictionaries.
+    Handles all message types: SystemMessage, UserMessage, AssistantMessage,
+    and ToolMessage. Preserves reasoning content and tool calls for assistant
+    messages.
+    Args:
+        msgs: List of ChatMessage objects (System, User, Assistant, or Tool messages).
+    Returns:
+        List of message dictionaries ready for the OpenAI API.
+    Raises:
+        NotImplementedError: If an unsupported message type is encountered.
+    """
+    out: list[dict[str, Any]] = []
+    for m in msgs:
+        if isinstance(m, SystemMessage):
+            out.append({"role": "system", "content": content_to_openai(m.content)})
+        elif isinstance(m, UserMessage):
+            out.append({"role": "user", "content": content_to_openai(m.content)})
+        elif isinstance(m, AssistantMessage):
+            msg: dict[str, Any] = {"role": "assistant", "content": content_to_openai(m.content)}
+            if m.reasoning:
+                if m.reasoning.content:
+                    msg["reasoning_content"] = m.reasoning.content
+                if m.reasoning.signature:
+                    msg["thinking_blocks"] = [
+                        {"type": "thinking", "signature": m.reasoning.signature, "thinking": m.reasoning.content}
+                    ]
+            if m.tool_calls:
+                msg["tool_calls"] = []
+                for tool in m.tool_calls:
+                    tool_dict = tool.model_dump()
+                    tool_dict["id"] = tool.tool_call_id
+                    tool_dict["type"] = "function"
+                    tool_dict["function"] = {
+                        "name": tool.name,
+                        "arguments": tool.arguments,
+                    }
+                    msg["tool_calls"].append(tool_dict)
+            out.append(msg)
+        elif isinstance(m, ToolMessage):
+            out.append(
+                {
+                    "role": "tool",
+                    "content": content_to_openai(m.content),
+                    "tool_call_id": m.tool_call_id,
+                    "name": m.name,
+                }
+            )
+        else:
+            raise NotImplementedError(f"Unsupported message type: {type(m)}")
+    return out

stirrup/constants.py ADDED Viewed

@@ -0,0 +1,14 @@
+# Tool naming
+FINISH_TOOL_NAME = "finish"
+# Agent execution limits
+AGENT_MAX_TURNS = 30  # Maximum agent turns before forced termination
+CONTEXT_SUMMARIZATION_CUTOFF = 0.7  # Context window usage threshold (0.0-1.0) that triggers message summarization
+# Media resolution limits
+RESOLUTION_1MP = 1_000_000  # 1 megapixel - default max resolution for images
+RESOLUTION_480P = 640 * 480  # 480p video resolution
+# Code execution
+SUBMISSION_SANDBOX_TIMEOUT = 60 * 10  # 10 minutes
+E2B_SANDBOX_TEMPLATE_ALIAS = "e2b-sandbox"

stirrup/core/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """Core agent framework components."""