stirrup 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
stirrup/tools/web.py ADDED
@@ -0,0 +1,336 @@
1
+ """Web tools for fetching pages and searching the web.
2
+
3
+ This module provides web_fetch and web_search tools with a WebToolProvider
4
+ class that manages the shared HTTP client lifecycle.
5
+
6
+ Example usage:
7
+ from stirrup.clients.chat_completions_client import ChatCompletionsClient
8
+
9
+ # As part of DEFAULT_TOOLS in Agent
10
+ client = ChatCompletionsClient(model="gpt-5")
11
+ agent = Agent(
12
+ client=client,
13
+ name="assistant",
14
+ tools=DEFAULT_TOOLS, # Includes WebToolProvider
15
+ )
16
+
17
+ # Standalone usage
18
+ async with WebToolProvider() as provider:
19
+ tools = provider.get_tools()
20
+ """
21
+
22
+ import os
23
+ from html import escape
24
+ from types import TracebackType
25
+ from typing import Annotated, Any
26
+
27
+ import httpx
28
+ import trafilatura
29
+ from pydantic import BaseModel, Field
30
+ from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential
31
+
32
+ from stirrup.core.models import Tool, ToolProvider, ToolResult
33
+ from stirrup.utils.text import truncate_msg
34
+
35
+ __all__ = ["WebToolProvider"]
36
+
37
+ # Constants
38
+ MAX_LENGTH_WEB_FETCH_HTML = 40000
39
+ MAX_LENGTH_WEB_SEARCH_RESULTS = 40000
40
+ DEFAULT_WEBFETCH_HEADERS = {
41
+ "User-Agent": (
42
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
43
+ "AppleWebKit/537.36 (KHTML, like Gecko) "
44
+ "Chrome/124.0.0.0 Safari/537.36"
45
+ ),
46
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
47
+ "Accept-Language": "en-US,en;q=0.9",
48
+ "Accept-Encoding": "gzip, deflate",
49
+ "Connection": "keep-alive",
50
+ }
51
+ WEB_FETCH_TIMEOUT = 60 * 3
52
+ WEB_SEARCH_TIMEOUT = 60 * 3
53
+
54
+
55
+ # =============================================================================
56
+ # Web Fetch Tool
57
+ # =============================================================================
58
+
59
+
60
+ class FetchWebPageParams(BaseModel):
61
+ """Parameters for web page fetch tool."""
62
+
63
+ url: Annotated[str, Field(description="Full HTTP or HTTPS URL of the web page to fetch and extract")]
64
+
65
+
66
+ class WebFetchMetadata(BaseModel):
67
+ """Metadata for web fetch tool tracking URLs fetched.
68
+
69
+ Implements Addable protocol for aggregation across multiple fetches.
70
+ """
71
+
72
+ num_uses: int = 1
73
+ pages_fetched: list[str] = Field(default_factory=list)
74
+
75
+ def __add__(self, other: "WebFetchMetadata") -> "WebFetchMetadata":
76
+ return WebFetchMetadata(
77
+ num_uses=self.num_uses + other.num_uses,
78
+ pages_fetched=self.pages_fetched + other.pages_fetched,
79
+ )
80
+
81
+
82
+ def _get_fetch_web_page_tool(client: httpx.AsyncClient | None = None) -> Tool[FetchWebPageParams, WebFetchMetadata]:
83
+ """Create a web page fetching tool that extracts main content as markdown.
84
+
85
+ Args:
86
+ client: Optional shared httpx.AsyncClient for connection pooling
87
+
88
+ Returns:
89
+ Tool configured to fetch web pages and extract clean markdown content
90
+ """
91
+
92
+ @retry(
93
+ retry=retry_if_exception_type((httpx.TimeoutException, httpx.NetworkError)),
94
+ stop=stop_after_attempt(3),
95
+ wait=wait_exponential(multiplier=1, min=1, max=10),
96
+ reraise=True,
97
+ )
98
+ async def _fetch(url: str, http_client: httpx.AsyncClient) -> httpx.Response:
99
+ """Execute HTTP GET request with automatic retries on network errors."""
100
+ response = await http_client.get(url, headers=DEFAULT_WEBFETCH_HEADERS)
101
+ response.raise_for_status()
102
+ return response
103
+
104
+ async def fetch_web_page_executor(params: FetchWebPageParams) -> ToolResult[WebFetchMetadata]:
105
+ """Fetch web page and extract main content as markdown using trafilatura."""
106
+ try:
107
+ # Use provided client or create temporary one for backward compatibility
108
+ if client is not None:
109
+ response = await _fetch(params.url, client)
110
+ else:
111
+ async with httpx.AsyncClient(
112
+ headers=DEFAULT_WEBFETCH_HEADERS,
113
+ follow_redirects=True,
114
+ timeout=WEB_FETCH_TIMEOUT,
115
+ ) as temp_client:
116
+ response = await _fetch(params.url, temp_client)
117
+
118
+ body_md = trafilatura.extract(response.text, output_format="markdown") or ""
119
+ return ToolResult(
120
+ content=f"<web_fetch><url>{params.url}</url><body>"
121
+ f"{truncate_msg(body_md, MAX_LENGTH_WEB_FETCH_HTML)}</body></web_fetch>",
122
+ metadata=WebFetchMetadata(pages_fetched=[params.url]),
123
+ )
124
+ except httpx.HTTPError as exc:
125
+ return ToolResult(
126
+ content=f"<web_fetch><url>{params.url}</url><error>"
127
+ f"{truncate_msg(str(exc), MAX_LENGTH_WEB_FETCH_HTML)}</error></web_fetch>",
128
+ metadata=WebFetchMetadata(pages_fetched=[params.url]),
129
+ )
130
+
131
+ return Tool[FetchWebPageParams, WebFetchMetadata](
132
+ name="fetch_web_page",
133
+ description="Fetch and extract the main content from a web page as markdown. Returns body text or error as XML.",
134
+ parameters=FetchWebPageParams,
135
+ executor=fetch_web_page_executor, # ty: ignore[invalid-argument-type]
136
+ )
137
+
138
+
139
+ # =============================================================================
140
+ # Web Search Tool
141
+ # =============================================================================
142
+
143
+
144
+ class WebSearchParams(BaseModel):
145
+ """Parameters for web search tool."""
146
+
147
+ query: Annotated[
148
+ str, Field(description="Natural language search query for Brave Search (similar to Google search syntax)")
149
+ ]
150
+
151
+
152
+ class WebSearchMetadata(BaseModel):
153
+ """Metadata for web search tool tracking search results.
154
+
155
+ Implements Addable protocol for aggregation across multiple searches.
156
+ """
157
+
158
+ num_uses: int = 1
159
+ pages_returned: int = 0
160
+
161
+ def __add__(self, other: "WebSearchMetadata") -> "WebSearchMetadata":
162
+ return WebSearchMetadata(
163
+ num_uses=self.num_uses + other.num_uses,
164
+ pages_returned=self.pages_returned + other.pages_returned,
165
+ )
166
+
167
+
168
+ def _get_websearch_tool(
169
+ brave_api_key: str | None, client: httpx.AsyncClient | None = None
170
+ ) -> Tool[WebSearchParams, WebSearchMetadata]:
171
+ """Create a web search tool using Brave Search API.
172
+
173
+ Args:
174
+ brave_api_key: Brave Search API key, or None to use BRAVE_API_KEY environment variable
175
+ client: Optional shared httpx.AsyncClient for connection pooling
176
+
177
+ Returns:
178
+ Tool configured to search the web and return top 5 results as XML
179
+
180
+ Raises:
181
+ RuntimeError: If no API key is provided or found in environment
182
+ """
183
+ if brave_api_key is None:
184
+ brave_api_key = os.getenv("BRAVE_API_KEY")
185
+
186
+ if brave_api_key is None:
187
+ raise RuntimeError("No Brave Search API key provided.")
188
+
189
+ @retry(
190
+ retry=retry_if_exception_type((httpx.TimeoutException, httpx.NetworkError)),
191
+ stop=stop_after_attempt(3),
192
+ wait=wait_exponential(multiplier=4, min=1, max=3),
193
+ reraise=True,
194
+ )
195
+ async def _search(query: str, http_client: httpx.AsyncClient) -> dict:
196
+ """Execute Brave Search API request with automatic retries on network errors."""
197
+ response = await http_client.get(
198
+ "https://api.search.brave.com/res/v1/web/search",
199
+ headers={
200
+ "X-Subscription-Token": brave_api_key,
201
+ "Accept": "application/json",
202
+ },
203
+ params={"q": query, "count": 5},
204
+ )
205
+ response.raise_for_status()
206
+ return response.json()
207
+
208
+ async def websearch_executor(params: WebSearchParams) -> ToolResult[WebSearchMetadata]:
209
+ """Execute web search and format results as XML with title, URL, and description."""
210
+ # Use provided client or create temporary one for backward compatibility
211
+ if client is not None:
212
+ data = await _search(params.query, client)
213
+ else:
214
+ async with httpx.AsyncClient(timeout=WEB_SEARCH_TIMEOUT) as temp_client:
215
+ data = await _search(params.query, temp_client)
216
+
217
+ results = data.get("web", {}).get("results", [])
218
+ results_xml = (
219
+ "<results>\n"
220
+ + "\n".join(
221
+ (
222
+ "<result>"
223
+ f"\n<title>{escape(result.get('title', '') or '')}</title>"
224
+ f"\n<url>{escape(result.get('url', '') or '')}</url>"
225
+ f"\n<description>{escape(result.get('description', '') or '')}</description>"
226
+ "\n</result>"
227
+ )
228
+ for result in results
229
+ )
230
+ + "\n</results>"
231
+ )
232
+
233
+ return ToolResult(
234
+ content=truncate_msg(results_xml, MAX_LENGTH_WEB_SEARCH_RESULTS),
235
+ metadata=WebSearchMetadata(pages_returned=len(results)),
236
+ )
237
+
238
+ return Tool[WebSearchParams, WebSearchMetadata](
239
+ name="web_search",
240
+ description="Search the web using Brave Search API. Returns top 5 results with title, URL, and description as XML.",
241
+ parameters=WebSearchParams,
242
+ executor=websearch_executor, # ty: ignore[invalid-argument-type]
243
+ )
244
+
245
+
246
+ # =============================================================================
247
+ # WebToolProvider
248
+ # =============================================================================
249
+
250
+
251
+ class WebToolProvider(ToolProvider):
252
+ """Provides web tools (web_fetch, web_search) with managed HTTP client lifecycle.
253
+
254
+ WebToolProvider implements the Tool lifecycle protocol (has_lifecycle=True),
255
+ so it can be used directly in Agent's tools list. It creates an httpx.AsyncClient
256
+ on __aenter__ and returns the web tools.
257
+
258
+ Usage as Tool in Agent (preferred):
259
+ from stirrup.clients.chat_completions_client import ChatCompletionsClient
260
+
261
+ client = ChatCompletionsClient(model="gpt-5")
262
+ agent = Agent(
263
+ client=client,
264
+ name="assistant",
265
+ tools=[LocalCodeExecToolProvider(), WebToolProvider(), CALCULATOR_TOOL],
266
+ )
267
+
268
+ async with agent.session(output_dir="./output") as session:
269
+ await session.run("Search the web and fetch a page")
270
+
271
+ Standalone usage:
272
+ async with WebToolProvider() as provider:
273
+ tools = provider.get_tools()
274
+ """
275
+
276
+ def __init__(
277
+ self,
278
+ *,
279
+ timeout: float = 60 * 3,
280
+ brave_api_key: str | None = None,
281
+ ) -> None:
282
+ """Initialize WebToolProvider.
283
+
284
+ Args:
285
+ timeout: HTTP timeout in seconds (default: 180)
286
+ brave_api_key: Brave Search API key for web_search tool.
287
+ If None, uses BRAVE_API_KEY environment variable.
288
+ Web search is only available if API key is provided.
289
+ """
290
+ self._timeout = timeout
291
+ self._brave_api_key = brave_api_key or os.getenv("BRAVE_API_KEY")
292
+ self._client: httpx.AsyncClient | None = None
293
+
294
+ async def __aenter__(self) -> list[Tool[Any, Any]]:
295
+ """Enter async context: create HTTP client and return web tools.
296
+
297
+ Returns:
298
+ List of Tool objects (web_fetch, and web_search if API key available).
299
+ """
300
+ self._client = httpx.AsyncClient(
301
+ timeout=self._timeout,
302
+ follow_redirects=True,
303
+ )
304
+ await self._client.__aenter__()
305
+ return self.get_tools()
306
+
307
+ async def __aexit__(
308
+ self,
309
+ exc_type: type[BaseException] | None,
310
+ exc_val: BaseException | None,
311
+ exc_tb: TracebackType | None,
312
+ ) -> None:
313
+ """Exit async context: close HTTP client."""
314
+ if self._client:
315
+ await self._client.__aexit__(exc_type, exc_val, exc_tb)
316
+ self._client = None
317
+
318
+ def get_tools(self) -> list[Tool[Any, Any]]:
319
+ """Get web tools configured with the managed HTTP client.
320
+
321
+ Returns:
322
+ List containing web_fetch tool, and web_search tool if API key is available.
323
+
324
+ Raises:
325
+ RuntimeError: If called before entering context.
326
+ """
327
+ if self._client is None:
328
+ raise RuntimeError("WebToolProvider not started. Use 'async with' first.")
329
+
330
+ tools: list[Tool[Any, Any]] = [_get_fetch_web_page_tool(self._client)]
331
+
332
+ # Only add web_search if API key is available
333
+ if self._brave_api_key:
334
+ tools.append(_get_websearch_tool(self._brave_api_key, self._client))
335
+
336
+ return tools
@@ -0,0 +1,10 @@
1
+ """Utility functions for agent framework."""
2
+
3
+ from stirrup.utils.logging import AgentLogger, AgentLoggerBase
4
+ from stirrup.utils.text import truncate_msg
5
+
6
+ __all__ = [
7
+ "AgentLogger",
8
+ "AgentLoggerBase",
9
+ "truncate_msg",
10
+ ]