PyPI - stirrup - Versions diffs - 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl - Mend

stirrup 0.1.1py3-none-any.whl → 0.1.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

stirrup/__init__.py +2 -0
stirrup/clients/chat_completions_client.py +0 -3
stirrup/clients/litellm_client.py +20 -11
stirrup/clients/utils.py +6 -1
stirrup/constants.py +6 -2
stirrup/core/agent.py +206 -57
stirrup/core/cache.py +479 -0
stirrup/core/models.py +53 -7
stirrup/prompts/base_system_prompt.txt +1 -1
stirrup/skills/__init__.py +24 -0
stirrup/skills/skills.py +145 -0
stirrup/tools/__init__.py +2 -0
stirrup/tools/calculator.py +1 -1
stirrup/tools/code_backends/base.py +7 -0
stirrup/tools/code_backends/docker.py +16 -4
stirrup/tools/code_backends/e2b.py +32 -13
stirrup/tools/code_backends/local.py +16 -4
stirrup/tools/finish.py +1 -1
stirrup/tools/user_input.py +130 -0
stirrup/tools/web.py +1 -0
stirrup/utils/logging.py +24 -0
{stirrup-0.1.1.dist-info → stirrup-0.1.3.dist-info}/METADATA +36 -16
stirrup-0.1.3.dist-info/RECORD +36 -0
{stirrup-0.1.1.dist-info → stirrup-0.1.3.dist-info}/WHEEL +1 -1
stirrup-0.1.1.dist-info/RECORD +0 -32

stirrup/core/agent.py CHANGED Viewed

@@ -2,9 +2,9 @@
 import contextvars
 import glob as glob_module
 import inspect
-import json
 import logging
 import re
+import signal
 from contextlib import AsyncExitStack
 from dataclasses import dataclass, field
 from itertools import chain, takewhile
@@ -19,7 +19,9 @@ from stirrup.constants import (
     AGENT_MAX_TURNS,
     CONTEXT_SUMMARIZATION_CUTOFF,
     FINISH_TOOL_NAME,
+    TURNS_REMAINING_WARNING_THRESHOLD,
 )
+from stirrup.core.cache import CacheManager, CacheState, compute_task_hash
 from stirrup.core.models import (
     AssistantMessage,
     ChatMessage,
@@ -36,6 +38,7 @@ from stirrup.core.models import (
     UserMessage,
 )
 from stirrup.prompts import MESSAGE_SUMMARIZER, MESSAGE_SUMMARIZER_BRIDGE_TEMPLATE
+from stirrup.skills import SkillMetadata, format_skills_section, load_skills_metadata
 from stirrup.tools import DEFAULT_TOOLS
 from stirrup.tools.code_backends.base import CodeExecToolProvider
 from stirrup.tools.code_backends.local import LocalCodeExecToolProvider
@@ -70,6 +73,8 @@ class SessionState:
     parent_exec_env: CodeExecToolProvider | None = None
     depth: int = 0
     uploaded_file_paths: list[str] = field(default_factory=list)  # Paths of files uploaded to exec_env
+    skills_metadata: list[SkillMetadata] = field(default_factory=list)  # Loaded skills metadata
+    logger: AgentLoggerBase | None = None  # Logger for pause/resume during user input
 _SESSION_STATE: contextvars.ContextVar[SessionState] = contextvars.ContextVar("session_state")
@@ -110,17 +115,19 @@ def _handle_text_only_tool_responses(tool_messages: list[ToolMessage]) -> tuple[
     return tool_messages, user_messages
-def _get_total_token_usage(messages: list[list[ChatMessage]]) -> TokenUsage:
-    """Aggregate token usage across all assistant messages in grouped conversation history.
+def _get_total_token_usage(messages: list[list[ChatMessage]]) -> list[TokenUsage]:
+    """
+    Returns a list of TokenUsage objects aggregated from all AssistantMessage
+    instances across the provided grouped message history.
     Args:
-        messages: List of message groups, where each group represents a segment of conversation.
+        messages: A list where each item is a list of ChatMessage objects representing a segment
+                  or turn group of the conversation history.
+    Returns:
+        List of TokenUsage corresponding to each AssistantMessage in the flattened conversation history.
     """
-    return sum(
-        [msg.token_usage for msg in chain.from_iterable(messages) if isinstance(msg, AssistantMessage)],
-        start=TokenUsage(),
-    )
+    return [msg.token_usage for msg in chain.from_iterable(messages) if isinstance(msg, AssistantMessage)]
 class SubAgentParams(BaseModel):
@@ -174,6 +181,7 @@ class Agent[FinishParams: BaseModel, FinishMeta]:
         finish_tool: Tool[FinishParams, FinishMeta] | None = None,
         # Agent options
         context_summarization_cutoff: float = CONTEXT_SUMMARIZATION_CUTOFF,
+        turns_remaining_warning_threshold: int = TURNS_REMAINING_WARNING_THRESHOLD,
         run_sync_in_thread: bool = True,
         text_only_tool_responses: bool = True,
         # Logging
@@ -213,6 +221,7 @@ class Agent[FinishParams: BaseModel, FinishMeta]:
         self._tools = tools if tools is not None else DEFAULT_TOOLS
         self._finish_tool: Tool = finish_tool if finish_tool is not None else SIMPLE_FINISH_TOOL
         self._context_summarization_cutoff = context_summarization_cutoff
+        self._turns_remaining_warning_threshold = turns_remaining_warning_threshold
         self._run_sync_in_thread = run_sync_in_thread
         self._text_only_tool_responses = text_only_tool_responses
@@ -222,6 +231,9 @@ class Agent[FinishParams: BaseModel, FinishMeta]:
         # Session configuration (set during session(), used in __aenter__)
         self._pending_output_dir: Path | None = None
         self._pending_input_files: str | Path | list[str | Path] | None = None
+        self._pending_skills_dir: Path | None = None
+        self._resume: bool = False
+        self._clear_cache_on_success: bool = True
         # Instance-scoped state (populated during __aenter__, isolated per agent instance)
         self._active_tools: dict[str, Tool] = {}
@@ -229,6 +241,10 @@ class Agent[FinishParams: BaseModel, FinishMeta]:
         self._last_run_metadata: dict[str, list[Any]] = {}
         self._transferred_paths: list[str] = []  # Paths transferred to parent (for subagents)
+        # Cache state for resumption (set during run(), used in __aexit__ for caching on interrupt)
+        self._current_task_hash: str | None = None
+        self._current_run_state: CacheState | None = None
     @property
     def name(self) -> str:
         """The name of this agent."""
@@ -258,6 +274,9 @@ class Agent[FinishParams: BaseModel, FinishMeta]:
         self,
         output_dir: Path | str | None = None,
         input_files: str | Path | list[str | Path] | None = None,
+        skills_dir: Path | str | None = None,
+        resume: bool = False,
+        clear_cache_on_success: bool = True,
     ) -> Self:
         """Configure a session and return self for use as async context manager.
@@ -270,6 +289,16 @@ class Agent[FinishParams: BaseModel, FinishMeta]:
                         - Glob patterns (e.g., "data/*.csv", "**/*.py")
                         Raises ValueError if no CodeExecToolProvider is configured
                         or if a glob pattern matches no files.
+            skills_dir: Directory containing skill definitions to load and make available
+                       to the agent. Skills are uploaded to the execution environment
+                       and their metadata is included in the system prompt.
+            resume: If True, attempt to resume from cached state if available.
+                   The cache is identified by hashing the init_msgs passed to run().
+                   Cached state includes message history, current turn, and execution
+                   environment files from a previous interrupted run.
+            clear_cache_on_success: If True (default), automatically clear the cache
+                                   when the agent completes successfully. Set to False
+                                   to preserve caches for inspection or debugging.
         Returns:
             Self, for use with `async with agent.session(...) as session:`
@@ -285,8 +314,19 @@ class Agent[FinishParams: BaseModel, FinishMeta]:
         """
         self._pending_output_dir = Path(output_dir) if output_dir else None
         self._pending_input_files = input_files
+        self._pending_skills_dir = Path(skills_dir) if skills_dir else None
+        self._resume = resume
+        self._clear_cache_on_success = clear_cache_on_success
         return self
+    def _handle_interrupt(self, _signum: int, _frame: object) -> None:
+        """Handle SIGINT to ensure caching before exit.
+        Converts the signal to a KeyboardInterrupt exception so that __aexit__
+        is properly called and can cache the state before cleanup.
+        """
+        raise KeyboardInterrupt("Agent interrupted - state will be cached")
     def _resolve_input_files(self, input_files: str | Path | list[str | Path]) -> list[Path]:
         """Resolve input file paths, expanding globs and normalizing to Path objects.
@@ -402,6 +442,15 @@ class Agent[FinishParams: BaseModel, FinishMeta]:
         # Base prompt with max_turns
         parts.append(BASE_SYSTEM_PROMPT_TEMPLATE.format(max_turns=self._max_turns))
+        # User interaction guidance based on whether user_input tool is available
+        if "user_input" in self._active_tools:
+            parts.append(
+                " You have access to the user_input tool which allows you to ask the user "
+                "questions when you need clarification or are uncertain about something."
+            )
+        else:
+            parts.append(" You are not able to interact with the user during the task.")
         # Input files section (if any were uploaded)
         state = _SESSION_STATE.get(None)
         if state and state.uploaded_file_paths:
@@ -410,6 +459,12 @@ class Agent[FinishParams: BaseModel, FinishMeta]:
                 files_section += f"\n- {file_path}"
             parts.append(files_section)
+        # Skills section (if skills were loaded)
+        if state and state.skills_metadata:
+            skills_section = format_skills_section(state.skills_metadata)
+            if skills_section:
+                parts.append(f"\n\n{skills_section}")
         # User's custom system prompt (if provided)
         if self._system_prompt:
             parts.append(f"\n\nFollow these instructions from the User:\n{self._system_prompt}")
@@ -500,6 +555,7 @@ class Agent[FinishParams: BaseModel, FinishMeta]:
             output_dir=str(self._pending_output_dir) if self._pending_output_dir else None,
             parent_exec_env=parent_state.exec_env if parent_state else None,
             depth=current_depth,
+            logger=self._logger,
         )
         _SESSION_STATE.set(state)
@@ -588,6 +644,18 @@ class Agent[FinishParams: BaseModel, FinishMeta]:
                     raise RuntimeError(f"Failed to upload files: {result.failed}")
             self._pending_input_files = None  # Clear pending state
+            # Upload skills directory if it exists and load metadata
+            if self._pending_skills_dir:
+                skills_path = self._pending_skills_dir
+                if skills_path.exists() and skills_path.is_dir():
+                    if state.exec_env:
+                        logger.debug("[%s __aenter__] Uploading skills directory: %s", self._name, skills_path)
+                        await state.exec_env.upload_files(skills_path, dest_dir="skills")
+                    # Load skills metadata (even if no exec_env, for system prompt)
+                    state.skills_metadata = load_skills_metadata(skills_path)
+                    logger.debug("[%s __aenter__] Loaded %d skills", self._name, len(state.skills_metadata))
+                self._pending_skills_dir = None  # Clear pending state
             # Configure and enter logger context
             self._logger.name = self._name
             self._logger.model = self._client.model_slug
@@ -595,6 +663,11 @@ class Agent[FinishParams: BaseModel, FinishMeta]:
             # depth is already set (0 for main agent, passed in for sub-agents)
             self._logger.__enter__()
+            # Set up signal handler for graceful caching on interrupt (root agent only)
+            if current_depth == 0:
+                self._original_sigint = signal.getsignal(signal.SIGINT)
+                signal.signal(signal.SIGINT, self._handle_interrupt)
             return self
         except Exception:
@@ -616,6 +689,47 @@ class Agent[FinishParams: BaseModel, FinishMeta]:
         state = _SESSION_STATE.get()
         try:
+            # Cache state on non-success exit (only at root level)
+            should_cache = (
+                state.depth == 0
+                and (exc_type is not None or self._last_finish_params is None)
+                and self._current_task_hash is not None
+                and self._current_run_state is not None
+            )
+            logger.debug(
+                "[%s __aexit__] Cache decision: should_cache=%s, depth=%d, exc_type=%s, "
+                "finish_params=%s, task_hash=%s, run_state=%s",
+                self._name,
+                should_cache,
+                state.depth,
+                exc_type,
+                self._last_finish_params is not None,
+                self._current_task_hash,
+                self._current_run_state is not None,
+            )
+            if should_cache:
+                cache_manager = CacheManager(clear_on_success=self._clear_cache_on_success)
+                exec_env_dir = state.exec_env.temp_dir if state.exec_env else None
+                # Explicit checks to keep type checker happy - should_cache condition guarantees these
+                if self._current_task_hash is None or self._current_run_state is None:
+                    raise ValueError("Cache state is unexpectedly None after should_cache check")
+                # Temporarily block SIGINT during cache save to prevent interruption
+                original_handler = signal.getsignal(signal.SIGINT)
+                signal.signal(signal.SIGINT, signal.SIG_IGN)
+                try:
+                    cache_manager.save_state(
+                        self._current_task_hash,
+                        self._current_run_state,
+                        exec_env_dir,
+                    )
+                finally:
+                    signal.signal(signal.SIGINT, original_handler)
+                self._logger.info(f"Cached state for task {self._current_task_hash}")
             # Save files from finish_params.paths based on depth
             if state.output_dir and self._last_finish_params and state.exec_env:
                 paths = getattr(self._last_finish_params, "paths", None)
@@ -670,6 +784,11 @@ class Agent[FinishParams: BaseModel, FinishMeta]:
                                 state.depth,
                             )
         finally:
+            # Restore original signal handler (root agent only)
+            if hasattr(self, "_original_sigint"):
+                signal.signal(signal.SIGINT, self._original_sigint)
+                del self._original_sigint
             # Exit logger context
             self._logger.finish_params = self._last_finish_params
             self._logger.run_metadata = self._last_run_metadata
@@ -695,10 +814,7 @@ class Agent[FinishParams: BaseModel, FinishMeta]:
         if tool:
             try:
-                # Parse parameters if the tool has them, otherwise use None
-                params = (
-                    tool.parameters.model_validate_json(tool_call.arguments) if tool.parameters is not None else None
-                )
+                params = tool.parameters.model_validate_json(tool_call.arguments)
                 # Set parent depth for sub-agent tools to read
                 prev_depth = _PARENT_DEPTH.set(self._logger.depth)
@@ -723,17 +839,18 @@ class Agent[FinishParams: BaseModel, FinishMeta]:
                     tool_call.name,
                     tool_call.arguments,
                 )
-                result = ToolResult(content="Tool arguments are not valid")
+                result = ToolResult(content="Tool arguments are not valid", success=False)
                 args_valid = False
         else:
             LOGGER.debug(f"LLMClient tried to use the tool {tool_call.name} which is not in the tools list")
-            result = ToolResult(content=f"{tool_call.name} is not a valid tool")
+            result = ToolResult(content=f"{tool_call.name} is not a valid tool", success=False)
         return ToolMessage(
             content=result.content,
             tool_call_id=tool_call.tool_call_id,
             name=tool_call.name,
             args_was_valid=args_valid,
+            success=result.success,
         )
     async def step(
@@ -742,7 +859,7 @@ class Agent[FinishParams: BaseModel, FinishMeta]:
         run_metadata: dict[str, list[Any]],
         turn: int = 0,
         max_turns: int = 0,
-    ) -> tuple[AssistantMessage, list[ToolMessage], ToolCall | None]:
+    ) -> tuple[AssistantMessage, list[ToolMessage], FinishParams | None]:
         """Execute one agent step: generate assistant message and run any requested tool calls.
         Args:
@@ -760,24 +877,21 @@ class Agent[FinishParams: BaseModel, FinishMeta]:
         if turn > 0:
             self._logger.assistant_message(turn, max_turns, assistant_message)
+        finish_params: FinishParams | None = None
         tool_messages: list[ToolMessage] = []
-        finish_call: ToolCall | None = None
         if assistant_message.tool_calls:
-            finish_call = next(
-                (tc for tc in assistant_message.tool_calls if tc.name == FINISH_TOOL_NAME),
-                None,
-            )
             tool_messages = []
             for tool_call in assistant_message.tool_calls:
                 tool_message = await self.run_tool(tool_call, run_metadata)
                 tool_messages.append(tool_message)
+                if tool_message.success and tool_message.name == FINISH_TOOL_NAME:
+                    finish_params = self._finish_tool.parameters.model_validate_json(tool_call.arguments)
                 # Log tool result immediately
                 self._logger.tool_result(tool_message)
-        return assistant_message, tool_messages, finish_call
+        return assistant_message, tool_messages, finish_params
     async def summarize_messages(self, messages: list[ChatMessage]) -> list[ChatMessage]:
         """Condense message history using LLM to stay within context window."""
@@ -803,7 +917,7 @@ class Agent[FinishParams: BaseModel, FinishMeta]:
         init_msgs: str | list[ChatMessage],
         *,
         depth: int | None = None,
-    ) -> tuple[FinishParams | None, list[list[ChatMessage]], dict[str, list[Any]]]:
+    ) -> tuple[FinishParams | None, list[list[ChatMessage]], dict[str, Any]]:
         """Execute the agent loop until finish tool is called or max_turns reached.
         A base system prompt is automatically prepended to all runs, including:
@@ -833,23 +947,59 @@ class Agent[FinishParams: BaseModel, FinishMeta]:
             ])
         """
-        msgs: list[ChatMessage] = []
-        # Build the complete system prompt (base + input files + user instructions)
-        full_system_prompt = self._build_system_prompt()
-        msgs.append(SystemMessage(content=full_system_prompt))
+        # Compute task hash for caching/resume
+        task_hash = compute_task_hash(init_msgs)
+        self._current_task_hash = task_hash
+        # Initialize cache manager
+        cache_manager = CacheManager(clear_on_success=self._clear_cache_on_success)
+        start_turn = 0
+        resumed = False
+        # Try to resume from cache if requested
+        if self._resume:
+            state = _SESSION_STATE.get()
+            cached = cache_manager.load_state(task_hash)
+            if cached:
+                # Restore files to exec env
+                if state.exec_env and state.exec_env.temp_dir:
+                    cache_manager.restore_files(task_hash, state.exec_env.temp_dir)
+                # Restore state
+                msgs = cached.msgs
+                full_msg_history = cached.full_msg_history
+                run_metadata = cached.run_metadata
+                start_turn = cached.turn
+                resumed = True
+                self._logger.info(f"Resuming from cached state at turn {start_turn}")
+            else:
+                self._logger.info(f"No cache found for task {task_hash}, starting fresh")
-        if isinstance(init_msgs, str):
-            msgs.append(UserMessage(content=init_msgs))
-        else:
-            msgs.extend(init_msgs)
+        if not resumed:
+            msgs: list[ChatMessage] = []
+            # Build the complete system prompt (base + input files + user instructions)
+            full_system_prompt = self._build_system_prompt()
+            msgs.append(SystemMessage(content=full_system_prompt))
+            if isinstance(init_msgs, str):
+                msgs.append(UserMessage(content=init_msgs))
+            else:
+                msgs.extend(init_msgs)
+            # Local metadata storage - isolated per run() invocation for thread safety
+            run_metadata: dict[str, list[Any]] = {}
+            full_msg_history: list[list[ChatMessage]] = []
         # Set logger depth if provided (for sub-agent runs)
         if depth is not None:
             self._logger.depth = depth
-        # Log the task at run start
-        self._logger.task_message(msgs[-1].content)
+        # Log the task at run start (only if not resuming)
+        if not resumed:
+            self._logger.task_message(msgs[-1].content)
         # Show warnings (top-level only, if logger supports it)
         if self._logger.depth == 0 and isinstance(self._logger, AgentLogger):
@@ -860,25 +1010,30 @@ class Agent[FinishParams: BaseModel, FinishMeta]:
         # Use logger callback if available and not overridden
         step_callback = self._logger.on_step
-        # Local metadata storage - isolated per run() invocation for thread safety
-        run_metadata: dict[str, list[Any]] = {}
         full_msg_history: list[list[ChatMessage]] = []
-        finish_params: FinishParams | None = None
         # Cumulative stats for spinner
         total_tool_calls = 0
         total_input_tokens = 0
         total_output_tokens = 0
-        for i in range(self._max_turns):
-            if self._max_turns - i <= 30 and i != 0:
+        for i in range(start_turn, self._max_turns):
+            # Capture current state for potential caching (before any async work)
+            self._current_run_state = CacheState(
+                msgs=list(msgs),
+                full_msg_history=[list(group) for group in full_msg_history],
+                turn=i,
+                run_metadata=dict(run_metadata),
+                task_hash=task_hash,
+                agent_name=self._name,
+            )
+            if self._max_turns - i <= self._turns_remaining_warning_threshold and i != 0:
                 num_turns_remaining_msg = _num_turns_remaining_msg(self._max_turns - i)
                 msgs.append(num_turns_remaining_msg)
                 self._logger.user_message(num_turns_remaining_msg)
             # Pass turn info to step() for real-time logging
-            assistant_message, tool_messages, finish_call = await self.step(
+            assistant_message, tool_messages, finish_params = await self.step(
                 msgs,
                 run_metadata,
                 turn=i + 1,
@@ -904,18 +1059,8 @@ class Agent[FinishParams: BaseModel, FinishMeta]:
             msgs.extend([assistant_message, *tool_messages, *user_messages])
-            if finish_call:
-                try:
-                    finish_arguments = json.loads(finish_call.arguments)
-                    if self._finish_tool.parameters is not None:
-                        finish_params = self._finish_tool.parameters.model_validate(finish_arguments)
-                    break
-                except (json.JSONDecodeError, ValidationError, TypeError):
-                    LOGGER.debug(
-                        "Agent tried to use the finish tool but the tool call is not valid: %r",
-                        finish_call.arguments,
-                    )
-                    # continue until the finish tool call is valid
+            if finish_params:
+                break
             pct_context_used = assistant_message.token_usage.total / self._client.max_tokens
             if pct_context_used >= self._context_summarization_cutoff and i + 1 != self._max_turns:
@@ -930,15 +1075,18 @@ class Agent[FinishParams: BaseModel, FinishMeta]:
         full_msg_history.append(msgs)
         # Add agent's own token usage to run_metadata under "token_usage" key
-        agent_token_usage = _get_total_token_usage(full_msg_history)
-        if "token_usage" not in run_metadata:
-            run_metadata["token_usage"] = []
-        run_metadata["token_usage"].append(agent_token_usage)
+        run_metadata["token_usage"] = _get_total_token_usage(full_msg_history)
         # Store for __aexit__ to access (on instance for this agent)
         self._last_finish_params = finish_params
         self._last_run_metadata = run_metadata
+        # Clear cache on successful completion (finish_params is set)
+        if finish_params is not None and cache_manager.clear_on_success:
+            cache_manager.clear_cache(task_hash)
+            self._current_task_hash = None
+            self._current_run_state = None
         return finish_params, full_msg_history, run_metadata
     def to_tool(
@@ -1066,6 +1214,7 @@ class Agent[FinishParams: BaseModel, FinishMeta]:
                 )
                 return ToolResult(
                     content=f"<sub_agent_result>\n<error>{e!s}</error>\n</sub_agent_result>",
+                    success=False,
                     metadata=error_metadata,
                 )
             finally:

stirrup 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl

stirrup 0.1.1py3-none-any.whl → 0.1.3py3-none-any.whl