PyPI - strix-agent - Versions diffs - 0.1.1__py3-none-any.whl - Mend

strix-agent 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (99) hide show

strix/__init__.py +0 -0
strix/agents/StrixAgent/__init__.py +4 -0
strix/agents/StrixAgent/strix_agent.py +60 -0
strix/agents/StrixAgent/system_prompt.jinja +504 -0
strix/agents/__init__.py +10 -0
strix/agents/base_agent.py +394 -0
strix/agents/state.py +139 -0
strix/cli/__init__.py +4 -0
strix/cli/app.py +1124 -0
strix/cli/assets/cli.tcss +680 -0
strix/cli/main.py +542 -0
strix/cli/tool_components/__init__.py +39 -0
strix/cli/tool_components/agents_graph_renderer.py +129 -0
strix/cli/tool_components/base_renderer.py +61 -0
strix/cli/tool_components/browser_renderer.py +107 -0
strix/cli/tool_components/file_edit_renderer.py +95 -0
strix/cli/tool_components/finish_renderer.py +32 -0
strix/cli/tool_components/notes_renderer.py +108 -0
strix/cli/tool_components/proxy_renderer.py +255 -0
strix/cli/tool_components/python_renderer.py +34 -0
strix/cli/tool_components/registry.py +72 -0
strix/cli/tool_components/reporting_renderer.py +53 -0
strix/cli/tool_components/scan_info_renderer.py +58 -0
strix/cli/tool_components/terminal_renderer.py +99 -0
strix/cli/tool_components/thinking_renderer.py +29 -0
strix/cli/tool_components/user_message_renderer.py +43 -0
strix/cli/tool_components/web_search_renderer.py +28 -0
strix/cli/tracer.py +308 -0
strix/llm/__init__.py +14 -0
strix/llm/config.py +19 -0
strix/llm/llm.py +310 -0
strix/llm/memory_compressor.py +206 -0
strix/llm/request_queue.py +63 -0
strix/llm/utils.py +84 -0
strix/prompts/__init__.py +113 -0
strix/prompts/coordination/root_agent.jinja +41 -0
strix/prompts/vulnerabilities/authentication_jwt.jinja +129 -0
strix/prompts/vulnerabilities/business_logic.jinja +143 -0
strix/prompts/vulnerabilities/csrf.jinja +168 -0
strix/prompts/vulnerabilities/idor.jinja +164 -0
strix/prompts/vulnerabilities/race_conditions.jinja +194 -0
strix/prompts/vulnerabilities/rce.jinja +222 -0
strix/prompts/vulnerabilities/sql_injection.jinja +216 -0
strix/prompts/vulnerabilities/ssrf.jinja +168 -0
strix/prompts/vulnerabilities/xss.jinja +221 -0
strix/prompts/vulnerabilities/xxe.jinja +276 -0
strix/runtime/__init__.py +19 -0
strix/runtime/docker_runtime.py +298 -0
strix/runtime/runtime.py +25 -0
strix/runtime/tool_server.py +97 -0
strix/tools/__init__.py +64 -0
strix/tools/agents_graph/__init__.py +16 -0
strix/tools/agents_graph/agents_graph_actions.py +610 -0
strix/tools/agents_graph/agents_graph_actions_schema.xml +223 -0
strix/tools/argument_parser.py +120 -0
strix/tools/browser/__init__.py +4 -0
strix/tools/browser/browser_actions.py +236 -0
strix/tools/browser/browser_actions_schema.xml +183 -0
strix/tools/browser/browser_instance.py +533 -0
strix/tools/browser/tab_manager.py +342 -0
strix/tools/executor.py +302 -0
strix/tools/file_edit/__init__.py +4 -0
strix/tools/file_edit/file_edit_actions.py +141 -0
strix/tools/file_edit/file_edit_actions_schema.xml +128 -0
strix/tools/finish/__init__.py +4 -0
strix/tools/finish/finish_actions.py +167 -0
strix/tools/finish/finish_actions_schema.xml +45 -0
strix/tools/notes/__init__.py +14 -0
strix/tools/notes/notes_actions.py +191 -0
strix/tools/notes/notes_actions_schema.xml +150 -0
strix/tools/proxy/__init__.py +20 -0
strix/tools/proxy/proxy_actions.py +101 -0
strix/tools/proxy/proxy_actions_schema.xml +267 -0
strix/tools/proxy/proxy_manager.py +785 -0
strix/tools/python/__init__.py +4 -0
strix/tools/python/python_actions.py +47 -0
strix/tools/python/python_actions_schema.xml +131 -0
strix/tools/python/python_instance.py +172 -0
strix/tools/python/python_manager.py +131 -0
strix/tools/registry.py +196 -0
strix/tools/reporting/__init__.py +6 -0
strix/tools/reporting/reporting_actions.py +63 -0
strix/tools/reporting/reporting_actions_schema.xml +30 -0
strix/tools/terminal/__init__.py +4 -0
strix/tools/terminal/terminal_actions.py +53 -0
strix/tools/terminal/terminal_actions_schema.xml +114 -0
strix/tools/terminal/terminal_instance.py +231 -0
strix/tools/terminal/terminal_manager.py +191 -0
strix/tools/thinking/__init__.py +4 -0
strix/tools/thinking/thinking_actions.py +18 -0
strix/tools/thinking/thinking_actions_schema.xml +52 -0
strix/tools/web_search/__init__.py +4 -0
strix/tools/web_search/web_search_actions.py +80 -0
strix/tools/web_search/web_search_actions_schema.xml +83 -0
strix_agent-0.1.1.dist-info/LICENSE +201 -0
strix_agent-0.1.1.dist-info/METADATA +200 -0
strix_agent-0.1.1.dist-info/RECORD +99 -0
strix_agent-0.1.1.dist-info/WHEEL +4 -0
strix_agent-0.1.1.dist-info/entry_points.txt +3 -0

strix/tools/agents_graph/agents_graph_actions_schema.xml ADDED Viewed

@@ -0,0 +1,223 @@
+<tools>
+  <tool name="agent_finish">
+    <description>Mark a subagent's task as completed and optionally report results to parent agent.
+IMPORTANT: This tool can ONLY be used by subagents (agents with a parent).
+Root/main agents must use finish_scan instead.
+This tool should be called when a subagent completes its assigned subtask to:
+- Mark the subagent's task as completed
+- Report findings back to the parent agent
+Use this tool when:
+- You are a subagent working on a specific subtask
+- You have completed your assigned task
+- You want to report your findings to the parent agent
+- You are ready to terminate this subagent's execution</description>
+    <details>This replaces the previous finish_scan tool and handles both sub-agent completion
+  and main agent completion. When a sub-agent finishes, it can report its findings
+  back to the parent agent for coordination.</details>
+    <parameters>
+      <parameter name="result_summary" type="string" required="true">
+        <description>Summary of what the agent accomplished and discovered</description>
+      </parameter>
+      <parameter name="findings" type="string" required="false">
+        <description>List of specific findings, vulnerabilities, or discoveries</description>
+      </parameter>
+      <parameter name="success" type="boolean" required="false">
+        <description>Whether the agent's task completed successfully</description>
+      </parameter>
+      <parameter name="report_to_parent" type="boolean" required="false">
+        <description>Whether to send results back to the parent agent</description>
+      </parameter>
+      <parameter name="final_recommendations" type="string" required="false">
+        <description>Recommendations for next steps or follow-up actions</description>
+      </parameter>
+    </parameters>
+    <returns type="Dict[str, Any]">
+      <description>Response containing: - agent_completed: Whether the agent was marked as completed - parent_notified: Whether parent was notified (if applicable) - completion_summary: Summary of completion status</description>
+    </returns>
+    <examples>
+  # Sub-agent completing subdomain enumeration task
+  <function=agent_finish>
+  <parameter=result_summary>Completed comprehensive subdomain enumeration for target.com.
+              Discovered 47 subdomains including several interesting ones with admin/dev
+              in the name. Found 3 subdomains with exposed services on non-standard
+              ports.</parameter>
+  <parameter=findings>["admin.target.com - exposed phpMyAdmin",
+                "dev-api.target.com - unauth API endpoints",
+                "staging.target.com - directory listing enabled",
+                "mail.target.com - POP3/IMAP services"]</parameter>
+  <parameter=success>true</parameter>
+  <parameter=report_to_parent>true</parameter>
+  <parameter=final_recommendations>["Prioritize testing admin.target.com for default creds",
+                             "Enumerate dev-api.target.com API endpoints",
+                             "Check staging.target.com for sensitive files"]</parameter>
+  </function>
+    </examples>
+  </tool>
+  <tool name="create_agent">
+    <description>Create and spawn a new agent to handle a specific subtask.
+MANDATORY REQUIREMENT: You MUST call view_agent_graph FIRST before creating any new agent to check if there is already an agent working on the same or similar task. Only create a new agent if no existing agent is handling the specific task.</description>
+    <details>The new agent inherits the parent's conversation history and context up to the point
+  of creation, then continues with its assigned subtask. This enables decomposition
+  of complex penetration testing tasks into specialized sub-agents.
+  The agent runs asynchronously and independently, allowing the parent to continue
+  immediately while the new agent executes its task in the background.
+  CRITICAL: Before calling this tool, you MUST first use view_agent_graph to:
+  - Examine all existing agents and their current tasks
+  - Verify no agent is already working on the same or similar objective
+  - Avoid duplication of effort and resource waste
+  - Ensure efficient coordination across the multi-agent system
+  If you as a parent agent don't absolutely have anything to do while your subagents are running, you can use wait_for_message tool. The subagent will continue to run in the background, and update you when it's done.
+  </details>
+    <parameters>
+      <parameter name="task" type="string" required="true">
+        <description>The specific task/objective for the new agent to accomplish</description>
+      </parameter>
+      <parameter name="name" type="string" required="true">
+        <description>Human-readable name for the agent (for tracking purposes)</description>
+      </parameter>
+      <parameter name="inherit_context" type="boolean" required="false">
+        <description>Whether the new agent should inherit parent's conversation history and context</description>
+      </parameter>
+      <parameter name="prompt_modules" type="string" required="false">
+        <description>Comma-separated list of prompt modules to use for the agent. Most agents should have at least one module in order to be useful. {{DYNAMIC_MODULES_DESCRIPTION}}</description>
+      </parameter>
+    </parameters>
+    <returns type="Dict[str, Any]">
+      <description>Response containing: - agent_id: Unique identifier for the created agent - success: Whether the agent was created successfully - message: Status message - agent_info: Details about the created agent</description>
+    </returns>
+    <examples>
+  # REQUIRED: First check agent graph before creating any new agent
+  <function=view_agent_graph>
+  </function>
+  # REQUIRED: Check agent graph again before creating another agent
+  <function=view_agent_graph>
+  </function>
+  # After confirming no SQL testing agent exists, create agent for vulnerability validation
+  <function=create_agent>
+  <parameter=task>Validate and exploit the suspected SQL injection vulnerability found in
+              the login form. Confirm exploitability and document proof of concept.</parameter>
+  <parameter=name>SQLi Validator</parameter>
+  <parameter=prompt_modules>sql_injection</parameter>
+  </function>
+  # Create specialized authentication testing agent with multiple modules (comma-separated)
+  <function=create_agent>
+  <parameter=task>Test authentication mechanisms, JWT implementation, and session management
+              for security vulnerabilities and bypass techniques.</parameter>
+  <parameter=name>Auth Specialist</parameter>
+  <parameter=prompt_modules>authentication_jwt, business_logic</parameter>
+  </function>
+    </examples>
+  </tool>
+  <tool name="send_message_to_agent">
+    <description>Send a message to another agent in the graph for coordination and communication.</description>
+    <details>This enables agents to communicate with each other during execution for:
+  - Sharing discovered information or findings
+  - Asking questions or requesting assistance
+  - Providing instructions or coordination
+  - Reporting status or results</details>
+    <parameters>
+      <parameter name="target_agent_id" type="string" required="true">
+        <description>ID of the agent to send the message to</description>
+      </parameter>
+      <parameter name="message" type="string" required="true">
+        <description>The message content to send</description>
+      </parameter>
+      <parameter name="message_type" type="string" required="false">
+        <description>Type of message being sent: - "query": Question requiring a response - "instruction": Command or directive for the target agent - "information": Informational message (findings, status, etc.)</description>
+      </parameter>
+      <parameter name="priority" type="string" required="false">
+        <description>Priority level of the message</description>
+      </parameter>
+    </parameters>
+    <returns type="Dict[str, Any]">
+      <description>Response containing: - success: Whether the message was sent successfully - message_id: Unique identifier for the message - delivery_status: Status of message delivery</description>
+    </returns>
+    <examples>
+  # Share discovered vulnerability information
+  <function=send_message_to_agent>
+  <parameter=target_agent_id>agent_abc123</parameter>
+  <parameter=message>Found SQL injection vulnerability in /login.php parameter 'username'.
+              Payload: admin' OR '1'='1' -- successfully bypassed authentication.
+              You should focus your testing on the authenticated areas of the
+              application.</parameter>
+  <parameter=message_type>information</parameter>
+  <parameter=priority>high</parameter>
+  </function>
+  # Request assistance from specialist agent
+  <function=send_message_to_agent>
+  <parameter=target_agent_id>agent_def456</parameter>
+  <parameter=message>I've identified what appears to be a custom encryption implementation
+              in the API responses. Can you analyze the cryptographic strength and look
+              for potential weaknesses?</parameter>
+  <parameter=message_type>query</parameter>
+  <parameter=priority>normal</parameter>
+  </function>
+    </examples>
+  </tool>
+  <tool name="view_agent_graph">
+    <description>View the current agent graph showing all agents, their relationships, and status.</description>
+    <details>This provides a comprehensive overview of the multi-agent system including:
+  - All agent nodes with their tasks, status, and metadata
+  - Parent-child relationships between agents
+  - Message communication patterns
+  - Current execution state</details>
+    <returns type="Dict[str, Any]">
+      <description>Response containing: - graph_structure: Human-readable representation of the agent graph - summary: High-level statistics about the graph</description>
+    </returns>
+  </tool>
+  <tool name="wait_for_message">
+    <description>Pause the agent loop indefinitely until receiving a message from another agent or user.
+This tool puts the agent into a waiting state where it remains idle until it receives any form of communication. The agent will automatically resume execution when a message arrives.
+IMPORTANT: This tool causes the agent to stop all activity until a message is received. Use it when you need to:
+- Wait for subagent completion reports
+- Coordinate with other agents before proceeding
+- Pause for user input or decisions
+- Synchronize multi-agent workflows
+NOTE: If you are waiting for an agent that is NOT your subagent, you first tell it to message you with updates before waiting for it. Otherwise, you will wait forever!
+</description>
+    <details>When this tool is called, the agent enters a waiting state and will not continue execution until:
+  - Another agent sends it a message via send_message_to_agent
+  - A user sends it a direct message through the CLI
+  - Any other form of inter-agent or user communication occurs
+  The agent will automatically resume from where it left off once a message is received.
+  This is particularly useful for parent agents waiting for subagent results or for coordination points in multi-agent workflows.</details>
+    <parameters>
+      <parameter name="reason" type="string" required="false">
+        <description>Explanation for why the agent is waiting (for logging and monitoring purposes)</description>
+      </parameter>
+    </parameters>
+    <returns type="Dict[str, Any]">
+      <description>Response containing: - success: Whether the agent successfully entered waiting state - status: Current agent status ("waiting") - reason: The reason for waiting - agent_info: Details about the waiting agent - resume_conditions: List of conditions that will resume the agent</description>
+    </returns>
+    <examples>
+  # Wait for subagents to complete their tasks
+  <function=wait_for_message>
+  <parameter=reason>Waiting for subdomain enumeration and port scanning subagents to complete their tasks and report findings</parameter>
+  </function>
+  # Wait for user input on next steps
+  <function=wait_for_message>
+  <parameter=reason>Waiting for user decision on whether to proceed with exploitation of discovered SQL injection vulnerability</parameter>
+  </function>
+  # Coordinate with other agents
+  <function=wait_for_message>
+  <parameter=reason>Waiting for vulnerability assessment agent to share discovered attack vectors before proceeding with exploitation phase</parameter>
+  </function>
+    </examples>
+  </tool>
+</tools>

strix/tools/argument_parser.py ADDED Viewed

@@ -0,0 +1,120 @@
+import contextlib
+import inspect
+import json
+from collections.abc import Callable
+from typing import Any, Union, get_args, get_origin
+class ArgumentConversionError(Exception):
+    def __init__(self, message: str, param_name: str | None = None) -> None:
+        self.param_name = param_name
+        super().__init__(message)
+def convert_arguments(func: Callable[..., Any], kwargs: dict[str, Any]) -> dict[str, Any]:
+    try:
+        sig = inspect.signature(func)
+        converted = {}
+        for param_name, value in kwargs.items():
+            if param_name not in sig.parameters:
+                converted[param_name] = value
+                continue
+            param = sig.parameters[param_name]
+            param_type = param.annotation
+            if param_type == inspect.Parameter.empty or value is None:
+                converted[param_name] = value
+                continue
+            if not isinstance(value, str):
+                converted[param_name] = value
+                continue
+            try:
+                converted[param_name] = convert_string_to_type(value, param_type)
+            except (ValueError, TypeError, json.JSONDecodeError) as e:
+                raise ArgumentConversionError(
+                    f"Failed to convert argument '{param_name}' to type {param_type}: {e}",
+                    param_name=param_name,
+                ) from e
+    except (ValueError, TypeError, AttributeError) as e:
+        raise ArgumentConversionError(f"Failed to process function arguments: {e}") from e
+    return converted
+def convert_string_to_type(value: str, param_type: Any) -> Any:
+    origin = get_origin(param_type)
+    if origin is Union or origin is type(str | None):
+        args = get_args(param_type)
+        for arg_type in args:
+            if arg_type is not type(None):
+                with contextlib.suppress(ValueError, TypeError, json.JSONDecodeError):
+                    return convert_string_to_type(value, arg_type)
+        return value
+    if hasattr(param_type, "__args__"):
+        args = getattr(param_type, "__args__", ())
+        if len(args) == 2 and type(None) in args:
+            non_none_type = args[0] if args[1] is type(None) else args[1]
+            with contextlib.suppress(ValueError, TypeError, json.JSONDecodeError):
+                return convert_string_to_type(value, non_none_type)
+            return value
+    return _convert_basic_types(value, param_type, origin)
+def _convert_basic_types(value: str, param_type: Any, origin: Any = None) -> Any:
+    basic_type_converters: dict[Any, Callable[[str], Any]] = {
+        int: int,
+        float: float,
+        bool: _convert_to_bool,
+        str: str,
+    }
+    if param_type in basic_type_converters:
+        return basic_type_converters[param_type](value)
+    if list in (origin, param_type):
+        return _convert_to_list(value)
+    if dict in (origin, param_type):
+        return _convert_to_dict(value)
+    with contextlib.suppress(json.JSONDecodeError):
+        return json.loads(value)
+    return value
+def _convert_to_bool(value: str) -> bool:
+    if value.lower() in ("true", "1", "yes", "on"):
+        return True
+    if value.lower() in ("false", "0", "no", "off"):
+        return False
+    return bool(value)
+def _convert_to_list(value: str) -> list[Any]:
+    try:
+        parsed = json.loads(value)
+        if isinstance(parsed, list):
+            return parsed
+    except json.JSONDecodeError:
+        if "," in value:
+            return [item.strip() for item in value.split(",")]
+        return [value]
+    else:
+        return [parsed]
+def _convert_to_dict(value: str) -> dict[str, Any]:
+    try:
+        parsed = json.loads(value)
+        if isinstance(parsed, dict):
+            return parsed
+    except json.JSONDecodeError:
+        return {}
+    else:
+        return {}

strix/tools/browser/__init__.py ADDED Viewed

@@ -0,0 +1,4 @@
+from .browser_actions import browser_action
+__all__ = ["browser_action"]

strix/tools/browser/browser_actions.py ADDED Viewed

@@ -0,0 +1,236 @@
+from typing import Any, Literal, NoReturn
+from strix.tools.registry import register_tool
+from .tab_manager import BrowserTabManager, get_browser_tab_manager
+BrowserAction = Literal[
+    "launch",
+    "goto",
+    "click",
+    "type",
+    "scroll_down",
+    "scroll_up",
+    "back",
+    "forward",
+    "new_tab",
+    "switch_tab",
+    "close_tab",
+    "wait",
+    "execute_js",
+    "double_click",
+    "hover",
+    "press_key",
+    "save_pdf",
+    "get_console_logs",
+    "view_source",
+    "close",
+    "list_tabs",
+]
+def _validate_url(action_name: str, url: str | None) -> None:
+    if not url:
+        raise ValueError(f"url parameter is required for {action_name} action")
+def _validate_coordinate(action_name: str, coordinate: str | None) -> None:
+    if not coordinate:
+        raise ValueError(f"coordinate parameter is required for {action_name} action")
+def _validate_text(action_name: str, text: str | None) -> None:
+    if not text:
+        raise ValueError(f"text parameter is required for {action_name} action")
+def _validate_tab_id(action_name: str, tab_id: str | None) -> None:
+    if not tab_id:
+        raise ValueError(f"tab_id parameter is required for {action_name} action")
+def _validate_js_code(action_name: str, js_code: str | None) -> None:
+    if not js_code:
+        raise ValueError(f"js_code parameter is required for {action_name} action")
+def _validate_duration(action_name: str, duration: float | None) -> None:
+    if duration is None:
+        raise ValueError(f"duration parameter is required for {action_name} action")
+def _validate_key(action_name: str, key: str | None) -> None:
+    if not key:
+        raise ValueError(f"key parameter is required for {action_name} action")
+def _validate_file_path(action_name: str, file_path: str | None) -> None:
+    if not file_path:
+        raise ValueError(f"file_path parameter is required for {action_name} action")
+def _handle_navigation_actions(
+    manager: BrowserTabManager,
+    action: str,
+    url: str | None = None,
+    tab_id: str | None = None,
+) -> dict[str, Any]:
+    if action == "launch":
+        return manager.launch_browser(url)
+    if action == "goto":
+        _validate_url(action, url)
+        assert url is not None
+        return manager.goto_url(url, tab_id)
+    if action == "back":
+        return manager.back(tab_id)
+    if action == "forward":
+        return manager.forward(tab_id)
+    raise ValueError(f"Unknown navigation action: {action}")
+def _handle_interaction_actions(
+    manager: BrowserTabManager,
+    action: str,
+    coordinate: str | None = None,
+    text: str | None = None,
+    key: str | None = None,
+    tab_id: str | None = None,
+) -> dict[str, Any]:
+    if action in {"click", "double_click", "hover"}:
+        _validate_coordinate(action, coordinate)
+        assert coordinate is not None
+        action_map = {
+            "click": manager.click,
+            "double_click": manager.double_click,
+            "hover": manager.hover,
+        }
+        return action_map[action](coordinate, tab_id)
+    if action in {"scroll_down", "scroll_up"}:
+        direction = "down" if action == "scroll_down" else "up"
+        return manager.scroll(direction, tab_id)
+    if action == "type":
+        _validate_text(action, text)
+        assert text is not None
+        return manager.type_text(text, tab_id)
+    if action == "press_key":
+        _validate_key(action, key)
+        assert key is not None
+        return manager.press_key(key, tab_id)
+    raise ValueError(f"Unknown interaction action: {action}")
+def _raise_unknown_action(action: str) -> NoReturn:
+    raise ValueError(f"Unknown action: {action}")
+def _handle_tab_actions(
+    manager: BrowserTabManager,
+    action: str,
+    url: str | None = None,
+    tab_id: str | None = None,
+) -> dict[str, Any]:
+    if action == "new_tab":
+        return manager.new_tab(url)
+    if action == "switch_tab":
+        _validate_tab_id(action, tab_id)
+        assert tab_id is not None
+        return manager.switch_tab(tab_id)
+    if action == "close_tab":
+        _validate_tab_id(action, tab_id)
+        assert tab_id is not None
+        return manager.close_tab(tab_id)
+    if action == "list_tabs":
+        return manager.list_tabs()
+    raise ValueError(f"Unknown tab action: {action}")
+def _handle_utility_actions(
+    manager: BrowserTabManager,
+    action: str,
+    duration: float | None = None,
+    js_code: str | None = None,
+    file_path: str | None = None,
+    tab_id: str | None = None,
+    clear: bool = False,
+) -> dict[str, Any]:
+    if action == "wait":
+        _validate_duration(action, duration)
+        assert duration is not None
+        return manager.wait_browser(duration, tab_id)
+    if action == "execute_js":
+        _validate_js_code(action, js_code)
+        assert js_code is not None
+        return manager.execute_js(js_code, tab_id)
+    if action == "save_pdf":
+        _validate_file_path(action, file_path)
+        assert file_path is not None
+        return manager.save_pdf(file_path, tab_id)
+    if action == "get_console_logs":
+        return manager.get_console_logs(tab_id, clear)
+    if action == "view_source":
+        return manager.view_source(tab_id)
+    if action == "close":
+        return manager.close_browser()
+    raise ValueError(f"Unknown utility action: {action}")
+@register_tool
+def browser_action(
+    action: BrowserAction,
+    url: str | None = None,
+    coordinate: str | None = None,
+    text: str | None = None,
+    tab_id: str | None = None,
+    js_code: str | None = None,
+    duration: float | None = None,
+    key: str | None = None,
+    file_path: str | None = None,
+    clear: bool = False,
+) -> dict[str, Any]:
+    manager = get_browser_tab_manager()
+    try:
+        navigation_actions = {"launch", "goto", "back", "forward"}
+        interaction_actions = {
+            "click",
+            "type",
+            "double_click",
+            "hover",
+            "press_key",
+            "scroll_down",
+            "scroll_up",
+        }
+        tab_actions = {"new_tab", "switch_tab", "close_tab", "list_tabs"}
+        utility_actions = {
+            "wait",
+            "execute_js",
+            "save_pdf",
+            "get_console_logs",
+            "view_source",
+            "close",
+        }
+        if action in navigation_actions:
+            return _handle_navigation_actions(manager, action, url, tab_id)
+        if action in interaction_actions:
+            return _handle_interaction_actions(manager, action, coordinate, text, key, tab_id)
+        if action in tab_actions:
+            return _handle_tab_actions(manager, action, url, tab_id)
+        if action in utility_actions:
+            return _handle_utility_actions(
+                manager, action, duration, js_code, file_path, tab_id, clear
+            )
+        _raise_unknown_action(action)
+    except (ValueError, RuntimeError) as e:
+        return {
+            "error": str(e),
+            "tab_id": tab_id,
+            "screenshot": "",
+            "is_running": False,
+        }