stirrup 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
stirrup/__init__.py ADDED
@@ -0,0 +1,76 @@
1
+ """Artificial Analysis' reference agent harness - originally built for running evaluations, simple to use and extend.
2
+
3
+ Example usage:
4
+ from stirrup import Agent, DEFAULT_TOOLS
5
+ from stirrup.clients.chat_completions_client import ChatCompletionsClient
6
+ from stirrup.tools.mcp import MCPToolProvider
7
+
8
+ # Create a client for your LLM provider
9
+ client = ChatCompletionsClient(model="gpt-5")
10
+
11
+ # Simple usage with default tools
12
+ agent = Agent(
13
+ client=client,
14
+ name="assistant",
15
+ system_prompt="You are a helpful assistant.",
16
+ )
17
+
18
+ async with agent.session(output_dir="./output") as session:
19
+ finish_params, history, metadata = await session.run("Your task here")
20
+ print(finish_params.reason)
21
+
22
+ # Extend default tools with MCP
23
+ agent = Agent(
24
+ client=client,
25
+ name="assistant",
26
+ tools=[*DEFAULT_TOOLS, MCPToolProvider.from_config("mcp.json")],
27
+ )
28
+ """
29
+
30
+ from stirrup import tools
31
+ from stirrup.core.agent import Agent
32
+ from stirrup.core.exceptions import ContextOverflowError
33
+ from stirrup.core.models import (
34
+ Addable,
35
+ AssistantMessage,
36
+ AudioContentBlock,
37
+ ChatMessage,
38
+ ImageContentBlock,
39
+ LLMClient,
40
+ SubAgentMetadata,
41
+ SystemMessage,
42
+ TokenUsage,
43
+ Tool,
44
+ ToolCall,
45
+ ToolMessage,
46
+ ToolProvider,
47
+ ToolResult,
48
+ ToolUseCountMetadata,
49
+ UserMessage,
50
+ VideoContentBlock,
51
+ aggregate_metadata,
52
+ )
53
+
54
+ __all__ = [
55
+ "Addable",
56
+ "Agent",
57
+ "AssistantMessage",
58
+ "AudioContentBlock",
59
+ "ChatMessage",
60
+ "ContextOverflowError",
61
+ "ImageContentBlock",
62
+ "LLMClient",
63
+ "SubAgentMetadata",
64
+ "SystemMessage",
65
+ "TokenUsage",
66
+ "Tool",
67
+ "ToolCall",
68
+ "ToolMessage",
69
+ "ToolProvider",
70
+ "ToolResult",
71
+ "ToolUseCountMetadata",
72
+ "UserMessage",
73
+ "VideoContentBlock",
74
+ "aggregate_metadata",
75
+ "tools",
76
+ ]
@@ -0,0 +1,14 @@
1
+ """LLM client implementations.
2
+
3
+ The default client is ChatCompletionsClient, which uses the OpenAI SDK directly
4
+ and supports any OpenAI-compatible API via the `base_url` parameter.
5
+
6
+ For multi-provider support via LiteLLM, install the litellm extra:
7
+ pip install stirrup[litellm]
8
+ """
9
+
10
+ from stirrup.clients.chat_completions_client import ChatCompletionsClient
11
+
12
+ __all__ = [
13
+ "ChatCompletionsClient",
14
+ ]
@@ -0,0 +1,219 @@
1
+ """OpenAI SDK-based LLM client for chat completions.
2
+
3
+ This client uses the official OpenAI Python SDK directly, supporting both OpenAI's
4
+ API and any OpenAI-compatible endpoint via the `base_url` parameter (e.g., vLLM,
5
+ Ollama, Azure OpenAI, local models).
6
+
7
+ This is the default client for Stirrup.
8
+ """
9
+
10
+ import logging
11
+ import os
12
+ from typing import Any
13
+
14
+ from openai import (
15
+ APIConnectionError,
16
+ APITimeoutError,
17
+ AsyncOpenAI,
18
+ InternalServerError,
19
+ RateLimitError,
20
+ )
21
+ from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential
22
+
23
+ from stirrup.clients.utils import to_openai_messages, to_openai_tools
24
+ from stirrup.core.exceptions import ContextOverflowError
25
+ from stirrup.core.models import (
26
+ AssistantMessage,
27
+ ChatMessage,
28
+ LLMClient,
29
+ Reasoning,
30
+ TokenUsage,
31
+ Tool,
32
+ ToolCall,
33
+ )
34
+
35
+ __all__ = [
36
+ "ChatCompletionsClient",
37
+ ]
38
+
39
+ LOGGER = logging.getLogger(__name__)
40
+
41
+
42
+ class ChatCompletionsClient(LLMClient):
43
+ """OpenAI SDK-based client supporting OpenAI and OpenAI-compatible APIs.
44
+
45
+ Uses the official OpenAI Python SDK directly for chat completions.
46
+ Supports custom base_url for OpenAI-compatible providers (vLLM, Ollama,
47
+ Azure OpenAI, local models, etc.).
48
+
49
+ Includes automatic retries for transient failures and token usage tracking.
50
+
51
+ Example:
52
+ >>> # Standard OpenAI usage
53
+ >>> client = ChatCompletionsClient(model="gpt-4o", max_tokens=128_000)
54
+ >>>
55
+ >>> # Custom OpenAI-compatible endpoint
56
+ >>> client = ChatCompletionsClient(
57
+ ... model="llama-3.1-70b",
58
+ ... base_url="http://localhost:8000/v1",
59
+ ... api_key="your-api-key",
60
+ ... )
61
+ """
62
+
63
+ def __init__(
64
+ self,
65
+ model: str,
66
+ max_tokens: int = 64_000,
67
+ *,
68
+ base_url: str | None = None,
69
+ api_key: str | None = None,
70
+ supports_audio_input: bool = False,
71
+ reasoning_effort: str | None = None,
72
+ timeout: float | None = None,
73
+ max_retries: int = 2,
74
+ kwargs: dict[str, Any] | None = None,
75
+ ) -> None:
76
+ """Initialize OpenAI SDK client with model configuration.
77
+
78
+ Args:
79
+ model: Model identifier (e.g., 'gpt-5', 'gpt-4o', 'o1-preview').
80
+ max_tokens: Maximum context window size in tokens. Defaults to 64,000.
81
+ base_url: API base URL. If None, uses OpenAI's standard URL.
82
+ Use for OpenAI-compatible providers (e.g., 'http://localhost:8000/v1').
83
+ api_key: API key for authentication. If None, reads from OPENROUTER_API_KEY
84
+ environment variable.
85
+ supports_audio_input: Whether the model supports audio inputs. Defaults to False.
86
+ reasoning_effort: Reasoning effort level for extended thinking models
87
+ (e.g., 'low', 'medium', 'high'). Only used with o1/o3 style models.
88
+ timeout: Request timeout in seconds. If None, uses OpenAI SDK default.
89
+ max_retries: Number of retries for transient errors. Defaults to 2.
90
+ The OpenAI SDK handles retries internally with exponential backoff.
91
+ kwargs: Additional arguments passed to chat.completions.create().
92
+ """
93
+ self._model = model
94
+ self._max_tokens = max_tokens
95
+ self._supports_audio_input = supports_audio_input
96
+ self._reasoning_effort = reasoning_effort
97
+ self._kwargs = kwargs or {}
98
+
99
+ # Initialize AsyncOpenAI client
100
+ # Read from OPENROUTER_API_KEY if no api_key provided
101
+ resolved_api_key = api_key or os.environ.get("OPENROUTER_API_KEY")
102
+ self._client = AsyncOpenAI(
103
+ api_key=resolved_api_key,
104
+ base_url=base_url,
105
+ timeout=timeout,
106
+ max_retries=max_retries,
107
+ )
108
+
109
+ @property
110
+ def max_tokens(self) -> int:
111
+ """Maximum context window size in tokens."""
112
+ return self._max_tokens
113
+
114
+ @property
115
+ def model_slug(self) -> str:
116
+ """Model identifier."""
117
+ return self._model
118
+
119
+ @retry(
120
+ retry=retry_if_exception_type(
121
+ (
122
+ APIConnectionError,
123
+ APITimeoutError,
124
+ RateLimitError,
125
+ InternalServerError,
126
+ )
127
+ ),
128
+ stop=stop_after_attempt(3),
129
+ wait=wait_exponential(multiplier=1, min=1, max=10),
130
+ )
131
+ async def generate(
132
+ self,
133
+ messages: list[ChatMessage],
134
+ tools: dict[str, Tool],
135
+ ) -> AssistantMessage:
136
+ """Generate assistant response with optional tool calls.
137
+
138
+ Retries up to 3 times on transient errors (connection, timeout, rate limit,
139
+ internal server errors) with exponential backoff.
140
+
141
+ Args:
142
+ messages: List of conversation messages.
143
+ tools: Dictionary mapping tool names to Tool objects.
144
+
145
+ Returns:
146
+ AssistantMessage containing the model's response, any tool calls,
147
+ and token usage statistics.
148
+
149
+ Raises:
150
+ ContextOverflowError: If the context window is exceeded.
151
+ """
152
+ # Build request kwargs
153
+ request_kwargs: dict[str, Any] = {
154
+ "model": self._model,
155
+ "messages": to_openai_messages(messages),
156
+ "max_completion_tokens": self._max_tokens,
157
+ **self._kwargs,
158
+ }
159
+
160
+ # Add tools if provided
161
+ if tools:
162
+ request_kwargs["tools"] = to_openai_tools(tools)
163
+ request_kwargs["tool_choice"] = "auto"
164
+
165
+ # Add reasoning effort if configured (for o1/o3 models)
166
+ if self._reasoning_effort:
167
+ request_kwargs["reasoning_effort"] = self._reasoning_effort
168
+
169
+ # Make API call
170
+ response = await self._client.chat.completions.create(**request_kwargs)
171
+
172
+ choice = response.choices[0]
173
+
174
+ # Check for context overflow
175
+ if choice.finish_reason in ("max_tokens", "length"):
176
+ raise ContextOverflowError(
177
+ f"Maximal context window tokens reached for model {self.model_slug}, "
178
+ f"resulting in finish reason: {choice.finish_reason}. "
179
+ "Reduce agent.max_tokens and try again."
180
+ )
181
+
182
+ msg = choice.message
183
+
184
+ # Parse reasoning content (for o1/o3 models with extended thinking)
185
+ reasoning: Reasoning | None = None
186
+ if hasattr(msg, "reasoning_content") and msg.reasoning_content:
187
+ reasoning = Reasoning(content=msg.reasoning_content)
188
+
189
+ # Parse tool calls
190
+ tool_calls = [
191
+ ToolCall(
192
+ tool_call_id=tc.id,
193
+ name=tc.function.name,
194
+ arguments=tc.function.arguments or "",
195
+ )
196
+ for tc in (msg.tool_calls or [])
197
+ ]
198
+
199
+ # Parse token usage
200
+ usage = response.usage
201
+ input_tokens = usage.prompt_tokens if usage else 0
202
+ output_tokens = usage.completion_tokens if usage else 0
203
+
204
+ # Handle reasoning tokens if available (for o1/o3 models)
205
+ reasoning_tokens = 0
206
+ if usage and hasattr(usage, "completion_tokens_details") and usage.completion_tokens_details:
207
+ reasoning_tokens = getattr(usage.completion_tokens_details, "reasoning_tokens", 0) or 0
208
+ output_tokens = output_tokens - reasoning_tokens
209
+
210
+ return AssistantMessage(
211
+ reasoning=reasoning,
212
+ content=msg.content or "",
213
+ tool_calls=tool_calls,
214
+ token_usage=TokenUsage(
215
+ input=input_tokens,
216
+ output=output_tokens,
217
+ reasoning=reasoning_tokens,
218
+ ),
219
+ )
@@ -0,0 +1,141 @@
1
+ """LiteLLM-based LLM client for multi-provider support.
2
+
3
+ This client uses LiteLLM to provide a unified interface to multiple LLM providers
4
+ (OpenAI, Anthropic, Google, etc.) with automatic retries for transient failures.
5
+
6
+ Requires the litellm extra: `pip install stirrup[litellm]`
7
+ """
8
+
9
+ import logging
10
+ from typing import Any
11
+
12
+ try:
13
+ from litellm import acompletion
14
+ from litellm.exceptions import APIConnectionError, RateLimitError, Timeout
15
+ except ImportError as e:
16
+ raise ImportError(
17
+ "Requires installation of the litellm extra. "
18
+ "Install with: `uv pip install stirrup[litellm]` or `uv add stirrup[litellm]`"
19
+ ) from e
20
+
21
+ from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential
22
+
23
+ from stirrup.clients.utils import to_openai_messages, to_openai_tools
24
+ from stirrup.core.exceptions import ContextOverflowError
25
+ from stirrup.core.models import (
26
+ AssistantMessage,
27
+ ChatMessage,
28
+ LLMClient,
29
+ Reasoning,
30
+ TokenUsage,
31
+ Tool,
32
+ ToolCall,
33
+ )
34
+
35
+ __all__ = [
36
+ "LiteLLMClient",
37
+ ]
38
+
39
+ LOGGER = logging.getLogger(__name__)
40
+
41
+
42
+ class LiteLLMClient(LLMClient):
43
+ """LiteLLM-based client supporting multiple LLM providers with unified interface.
44
+
45
+ Includes automatic retries for transient failures and token usage tracking.
46
+ """
47
+
48
+ def __init__(
49
+ self,
50
+ model_slug: str,
51
+ max_tokens: int,
52
+ supports_audio_input: bool = False,
53
+ reasoning_effort: str | None = None,
54
+ kwargs: dict[str, Any] | None = None,
55
+ ) -> None:
56
+ """Initialize LiteLLM client with model configuration and capabilities.
57
+
58
+ Args:
59
+ model_slug: Model identifier for LiteLLM (e.g., 'anthropic/claude-3-5-sonnet-20241022')
60
+ max_tokens: Maximum context window size in tokens
61
+ supports_audio_input: Whether the model supports audio inputs
62
+ reasoning_effort: Reasoning effort level for extended thinking models (e.g., 'medium', 'high')
63
+ kwargs: Additional arguments to pass to LiteLLM completion calls
64
+ """
65
+ self._model_slug = model_slug
66
+ self._supports_video_input = False
67
+ self._supports_audio_input = supports_audio_input
68
+ self._max_tokens = max_tokens
69
+ self._reasoning_effort = reasoning_effort
70
+ self._kwargs = kwargs or {}
71
+
72
+ @property
73
+ def max_tokens(self) -> int:
74
+ """Maximum context window size in tokens."""
75
+ return self._max_tokens
76
+
77
+ @property
78
+ def model_slug(self) -> str:
79
+ """Model identifier used by LiteLLM."""
80
+ return self._model_slug
81
+
82
+ @retry(
83
+ retry=retry_if_exception_type((Timeout, APIConnectionError, RateLimitError)),
84
+ stop=stop_after_attempt(3),
85
+ wait=wait_exponential(multiplier=1, min=1, max=10),
86
+ )
87
+ async def generate(self, messages: list[ChatMessage], tools: dict[str, Tool]) -> AssistantMessage:
88
+ """Generate assistant response with optional tool calls. Retries up to 3 times on timeout/connection errors."""
89
+ r = await acompletion(
90
+ model=self.model_slug,
91
+ messages=to_openai_messages(messages),
92
+ tools=to_openai_tools(tools) if tools else None,
93
+ tool_choice="auto" if tools else None,
94
+ max_tokens=self._max_tokens,
95
+ **self._kwargs,
96
+ )
97
+
98
+ choice = r["choices"][0]
99
+
100
+ if choice.finish_reason in ["max_tokens", "length"]:
101
+ raise ContextOverflowError(
102
+ f"Maximal context window tokens reached for model {self.model_slug}, resulting in finish reason: {choice.finish_reason}. Reduce agent.max_tokens and try again."
103
+ )
104
+
105
+ msg = choice["message"]
106
+
107
+ reasoning: Reasoning | None = None
108
+ if getattr(msg, "reasoning_content", None) is not None:
109
+ reasoning = Reasoning(content=msg.reasoning_content)
110
+ if getattr(msg, "thinking_blocks", None) is not None and len(msg.thinking_blocks) > 0:
111
+ reasoning = Reasoning(
112
+ signature=msg.thinking_blocks[0]["signature"], content=msg.thinking_blocks[0]["content"]
113
+ )
114
+
115
+ usage = r["usage"]
116
+
117
+ calls = [
118
+ ToolCall(
119
+ tool_call_id=tc.get("id"),
120
+ name=tc["function"]["name"],
121
+ arguments=tc["function"].get("arguments", "") or "",
122
+ )
123
+ for tc in (msg.get("tool_calls") or [])
124
+ ]
125
+
126
+ input_tokens = usage.prompt_tokens
127
+ reasoning_tokens = 0
128
+ if usage.completion_tokens_details:
129
+ reasoning_tokens = usage.completion_tokens_details.reasoning_tokens or 0
130
+ output_tokens = usage.completion_tokens - reasoning_tokens
131
+
132
+ return AssistantMessage(
133
+ reasoning=reasoning,
134
+ content=msg.get("content") or "",
135
+ tool_calls=calls,
136
+ token_usage=TokenUsage(
137
+ input=input_tokens,
138
+ output=output_tokens,
139
+ reasoning=reasoning_tokens,
140
+ ),
141
+ )
@@ -0,0 +1,161 @@
1
+ """Shared utilities for OpenAI-compatible message and tool conversion.
2
+
3
+ These helper functions convert Stirrup's internal message and tool formats
4
+ to the OpenAI API format. Since LiteLLM and the OpenAI SDK use identical
5
+ formats, these utilities are shared between both client implementations.
6
+ """
7
+
8
+ from typing import Any
9
+
10
+ from stirrup.core.models import (
11
+ AssistantMessage,
12
+ AudioContentBlock,
13
+ ChatMessage,
14
+ Content,
15
+ ImageContentBlock,
16
+ SystemMessage,
17
+ Tool,
18
+ ToolMessage,
19
+ UserMessage,
20
+ VideoContentBlock,
21
+ )
22
+
23
+ __all__ = [
24
+ "content_to_openai",
25
+ "to_openai_messages",
26
+ "to_openai_tools",
27
+ ]
28
+
29
+
30
+ def to_openai_tools(tools: dict[str, Tool]) -> list[dict[str, Any]]:
31
+ """Convert Tool objects to OpenAI function calling format.
32
+
33
+ Args:
34
+ tools: Dictionary mapping tool names to Tool objects.
35
+
36
+ Returns:
37
+ List of tool definitions in OpenAI's function calling format.
38
+
39
+ Example:
40
+ >>> tools = {"calculator": calculator_tool}
41
+ >>> openai_tools = to_openai_tools(tools)
42
+ >>> # Returns: [{"type": "function", "function": {"name": "calculator", ...}}]
43
+ """
44
+ out: list[dict[str, Any]] = []
45
+ for t in tools.values():
46
+ function: dict[str, Any] = {
47
+ "name": t.name,
48
+ "description": t.description,
49
+ }
50
+ if t.parameters is not None:
51
+ function["parameters"] = t.parameters.model_json_schema()
52
+ tool_payload: dict[str, Any] = {
53
+ "type": "function",
54
+ "function": function,
55
+ }
56
+ out.append(tool_payload)
57
+ return out
58
+
59
+
60
+ def content_to_openai(content: Content) -> list[dict[str, Any]] | str:
61
+ """Convert Content blocks to OpenAI message content format.
62
+
63
+ Handles text, images, audio, and video content blocks, converting them
64
+ to the appropriate OpenAI API structure.
65
+
66
+ Args:
67
+ content: Either a string or list of content blocks.
68
+
69
+ Returns:
70
+ List of content dictionaries in OpenAI format, or the original string
71
+ wrapped in a text content block.
72
+
73
+ Raises:
74
+ NotImplementedError: If an unsupported content block type is encountered.
75
+ """
76
+ if isinstance(content, str):
77
+ return [{"type": "text", "text": content}]
78
+
79
+ out: list[dict[str, Any]] = []
80
+ for block in content:
81
+ if isinstance(block, str):
82
+ out.append({"type": "text", "text": block})
83
+ elif isinstance(block, ImageContentBlock):
84
+ out.append({"type": "image_url", "image_url": {"url": block.to_base64_url()}})
85
+ elif isinstance(block, AudioContentBlock):
86
+ out.append(
87
+ {
88
+ "type": "input_audio",
89
+ "input_audio": {
90
+ "data": block.to_base64_url().split(",")[1],
91
+ "format": block.extension,
92
+ },
93
+ }
94
+ )
95
+ elif isinstance(block, VideoContentBlock):
96
+ out.append({"type": "file", "file": {"file_data": block.to_base64_url()}})
97
+ else:
98
+ raise NotImplementedError(f"Unsupported content block: {type(block)}")
99
+ return out
100
+
101
+
102
+ def to_openai_messages(msgs: list[ChatMessage]) -> list[dict[str, Any]]:
103
+ """Convert ChatMessage list to OpenAI-compatible message dictionaries.
104
+
105
+ Handles all message types: SystemMessage, UserMessage, AssistantMessage,
106
+ and ToolMessage. Preserves reasoning content and tool calls for assistant
107
+ messages.
108
+
109
+ Args:
110
+ msgs: List of ChatMessage objects (System, User, Assistant, or Tool messages).
111
+
112
+ Returns:
113
+ List of message dictionaries ready for the OpenAI API.
114
+
115
+ Raises:
116
+ NotImplementedError: If an unsupported message type is encountered.
117
+ """
118
+ out: list[dict[str, Any]] = []
119
+ for m in msgs:
120
+ if isinstance(m, SystemMessage):
121
+ out.append({"role": "system", "content": content_to_openai(m.content)})
122
+ elif isinstance(m, UserMessage):
123
+ out.append({"role": "user", "content": content_to_openai(m.content)})
124
+ elif isinstance(m, AssistantMessage):
125
+ msg: dict[str, Any] = {"role": "assistant", "content": content_to_openai(m.content)}
126
+
127
+ if m.reasoning:
128
+ if m.reasoning.content:
129
+ msg["reasoning_content"] = m.reasoning.content
130
+
131
+ if m.reasoning.signature:
132
+ msg["thinking_blocks"] = [
133
+ {"type": "thinking", "signature": m.reasoning.signature, "thinking": m.reasoning.content}
134
+ ]
135
+
136
+ if m.tool_calls:
137
+ msg["tool_calls"] = []
138
+ for tool in m.tool_calls:
139
+ tool_dict = tool.model_dump()
140
+ tool_dict["id"] = tool.tool_call_id
141
+ tool_dict["type"] = "function"
142
+ tool_dict["function"] = {
143
+ "name": tool.name,
144
+ "arguments": tool.arguments,
145
+ }
146
+ msg["tool_calls"].append(tool_dict)
147
+
148
+ out.append(msg)
149
+ elif isinstance(m, ToolMessage):
150
+ out.append(
151
+ {
152
+ "role": "tool",
153
+ "content": content_to_openai(m.content),
154
+ "tool_call_id": m.tool_call_id,
155
+ "name": m.name,
156
+ }
157
+ )
158
+ else:
159
+ raise NotImplementedError(f"Unsupported message type: {type(m)}")
160
+
161
+ return out
stirrup/constants.py ADDED
@@ -0,0 +1,14 @@
1
+ # Tool naming
2
+ FINISH_TOOL_NAME = "finish"
3
+
4
+ # Agent execution limits
5
+ AGENT_MAX_TURNS = 30 # Maximum agent turns before forced termination
6
+ CONTEXT_SUMMARIZATION_CUTOFF = 0.7 # Context window usage threshold (0.0-1.0) that triggers message summarization
7
+
8
+ # Media resolution limits
9
+ RESOLUTION_1MP = 1_000_000 # 1 megapixel - default max resolution for images
10
+ RESOLUTION_480P = 640 * 480 # 480p video resolution
11
+
12
+ # Code execution
13
+ SUBMISSION_SANDBOX_TIMEOUT = 60 * 10 # 10 minutes
14
+ E2B_SANDBOX_TEMPLATE_ALIAS = "e2b-sandbox"
@@ -0,0 +1 @@
1
+ """Core agent framework components."""