PyPI - strix-agent - Versions diffs - 0.4.0__py3-none-any.whl - Mend

strix-agent 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (118) hide show

strix/__init__.py +0 -0
strix/agents/StrixAgent/__init__.py +4 -0
strix/agents/StrixAgent/strix_agent.py +89 -0
strix/agents/StrixAgent/system_prompt.jinja +404 -0
strix/agents/__init__.py +10 -0
strix/agents/base_agent.py +518 -0
strix/agents/state.py +163 -0
strix/interface/__init__.py +4 -0
strix/interface/assets/tui_styles.tcss +694 -0
strix/interface/cli.py +230 -0
strix/interface/main.py +500 -0
strix/interface/tool_components/__init__.py +39 -0
strix/interface/tool_components/agents_graph_renderer.py +123 -0
strix/interface/tool_components/base_renderer.py +62 -0
strix/interface/tool_components/browser_renderer.py +120 -0
strix/interface/tool_components/file_edit_renderer.py +99 -0
strix/interface/tool_components/finish_renderer.py +31 -0
strix/interface/tool_components/notes_renderer.py +108 -0
strix/interface/tool_components/proxy_renderer.py +255 -0
strix/interface/tool_components/python_renderer.py +34 -0
strix/interface/tool_components/registry.py +72 -0
strix/interface/tool_components/reporting_renderer.py +53 -0
strix/interface/tool_components/scan_info_renderer.py +64 -0
strix/interface/tool_components/terminal_renderer.py +131 -0
strix/interface/tool_components/thinking_renderer.py +29 -0
strix/interface/tool_components/user_message_renderer.py +43 -0
strix/interface/tool_components/web_search_renderer.py +28 -0
strix/interface/tui.py +1274 -0
strix/interface/utils.py +559 -0
strix/llm/__init__.py +15 -0
strix/llm/config.py +20 -0
strix/llm/llm.py +465 -0
strix/llm/memory_compressor.py +212 -0
strix/llm/request_queue.py +87 -0
strix/llm/utils.py +87 -0
strix/prompts/README.md +64 -0
strix/prompts/__init__.py +109 -0
strix/prompts/cloud/.gitkeep +0 -0
strix/prompts/coordination/root_agent.jinja +41 -0
strix/prompts/custom/.gitkeep +0 -0
strix/prompts/frameworks/fastapi.jinja +142 -0
strix/prompts/frameworks/nextjs.jinja +126 -0
strix/prompts/protocols/graphql.jinja +215 -0
strix/prompts/reconnaissance/.gitkeep +0 -0
strix/prompts/technologies/firebase_firestore.jinja +177 -0
strix/prompts/technologies/supabase.jinja +189 -0
strix/prompts/vulnerabilities/authentication_jwt.jinja +147 -0
strix/prompts/vulnerabilities/broken_function_level_authorization.jinja +146 -0
strix/prompts/vulnerabilities/business_logic.jinja +171 -0
strix/prompts/vulnerabilities/csrf.jinja +174 -0
strix/prompts/vulnerabilities/idor.jinja +195 -0
strix/prompts/vulnerabilities/information_disclosure.jinja +222 -0
strix/prompts/vulnerabilities/insecure_file_uploads.jinja +188 -0
strix/prompts/vulnerabilities/mass_assignment.jinja +141 -0
strix/prompts/vulnerabilities/open_redirect.jinja +177 -0
strix/prompts/vulnerabilities/path_traversal_lfi_rfi.jinja +142 -0
strix/prompts/vulnerabilities/race_conditions.jinja +164 -0
strix/prompts/vulnerabilities/rce.jinja +154 -0
strix/prompts/vulnerabilities/sql_injection.jinja +151 -0
strix/prompts/vulnerabilities/ssrf.jinja +135 -0
strix/prompts/vulnerabilities/subdomain_takeover.jinja +155 -0
strix/prompts/vulnerabilities/xss.jinja +169 -0
strix/prompts/vulnerabilities/xxe.jinja +184 -0
strix/runtime/__init__.py +19 -0
strix/runtime/docker_runtime.py +399 -0
strix/runtime/runtime.py +29 -0
strix/runtime/tool_server.py +205 -0
strix/telemetry/__init__.py +4 -0
strix/telemetry/tracer.py +337 -0
strix/tools/__init__.py +64 -0
strix/tools/agents_graph/__init__.py +16 -0
strix/tools/agents_graph/agents_graph_actions.py +621 -0
strix/tools/agents_graph/agents_graph_actions_schema.xml +226 -0
strix/tools/argument_parser.py +121 -0
strix/tools/browser/__init__.py +4 -0
strix/tools/browser/browser_actions.py +236 -0
strix/tools/browser/browser_actions_schema.xml +183 -0
strix/tools/browser/browser_instance.py +533 -0
strix/tools/browser/tab_manager.py +342 -0
strix/tools/executor.py +305 -0
strix/tools/file_edit/__init__.py +4 -0
strix/tools/file_edit/file_edit_actions.py +141 -0
strix/tools/file_edit/file_edit_actions_schema.xml +128 -0
strix/tools/finish/__init__.py +4 -0
strix/tools/finish/finish_actions.py +174 -0
strix/tools/finish/finish_actions_schema.xml +45 -0
strix/tools/notes/__init__.py +14 -0
strix/tools/notes/notes_actions.py +191 -0
strix/tools/notes/notes_actions_schema.xml +150 -0
strix/tools/proxy/__init__.py +20 -0
strix/tools/proxy/proxy_actions.py +101 -0
strix/tools/proxy/proxy_actions_schema.xml +267 -0
strix/tools/proxy/proxy_manager.py +785 -0
strix/tools/python/__init__.py +4 -0
strix/tools/python/python_actions.py +47 -0
strix/tools/python/python_actions_schema.xml +131 -0
strix/tools/python/python_instance.py +172 -0
strix/tools/python/python_manager.py +131 -0
strix/tools/registry.py +196 -0
strix/tools/reporting/__init__.py +6 -0
strix/tools/reporting/reporting_actions.py +63 -0
strix/tools/reporting/reporting_actions_schema.xml +30 -0
strix/tools/terminal/__init__.py +4 -0
strix/tools/terminal/terminal_actions.py +35 -0
strix/tools/terminal/terminal_actions_schema.xml +146 -0
strix/tools/terminal/terminal_manager.py +151 -0
strix/tools/terminal/terminal_session.py +447 -0
strix/tools/thinking/__init__.py +4 -0
strix/tools/thinking/thinking_actions.py +18 -0
strix/tools/thinking/thinking_actions_schema.xml +52 -0
strix/tools/web_search/__init__.py +4 -0
strix/tools/web_search/web_search_actions.py +80 -0
strix/tools/web_search/web_search_actions_schema.xml +83 -0
strix_agent-0.4.0.dist-info/LICENSE +201 -0
strix_agent-0.4.0.dist-info/METADATA +282 -0
strix_agent-0.4.0.dist-info/RECORD +118 -0
strix_agent-0.4.0.dist-info/WHEEL +4 -0
strix_agent-0.4.0.dist-info/entry_points.txt +3 -0

strix/tools/agents_graph/agents_graph_actions_schema.xml ADDED Viewed

@@ -0,0 +1,226 @@
+<tools>
+  <tool name="agent_finish">
+    <description>Mark a subagent's task as completed and optionally report results to parent agent.
+IMPORTANT: This tool can ONLY be used by subagents (agents with a parent).
+Root/main agents must use finish_scan instead.
+This tool should be called when a subagent completes its assigned subtask to:
+- Mark the subagent's task as completed
+- Report findings back to the parent agent
+Use this tool when:
+- You are a subagent working on a specific subtask
+- You have completed your assigned task
+- You want to report your findings to the parent agent
+- You are ready to terminate this subagent's execution</description>
+    <details>This replaces the previous finish_scan tool and handles both sub-agent completion
+  and main agent completion. When a sub-agent finishes, it can report its findings
+  back to the parent agent for coordination.</details>
+    <parameters>
+      <parameter name="result_summary" type="string" required="true">
+        <description>Summary of what the agent accomplished and discovered</description>
+      </parameter>
+      <parameter name="findings" type="string" required="false">
+        <description>List of specific findings, vulnerabilities, or discoveries</description>
+      </parameter>
+      <parameter name="success" type="boolean" required="false">
+        <description>Whether the agent's task completed successfully</description>
+      </parameter>
+      <parameter name="report_to_parent" type="boolean" required="false">
+        <description>Whether to send results back to the parent agent</description>
+      </parameter>
+      <parameter name="final_recommendations" type="string" required="false">
+        <description>Recommendations for next steps or follow-up actions</description>
+      </parameter>
+    </parameters>
+    <returns type="Dict[str, Any]">
+      <description>Response containing: - agent_completed: Whether the agent was marked as completed - parent_notified: Whether parent was notified (if applicable) - completion_summary: Summary of completion status</description>
+    </returns>
+    <examples>
+  # Sub-agent completing subdomain enumeration task
+  <function=agent_finish>
+  <parameter=result_summary>Completed comprehensive subdomain enumeration for target.com.
+              Discovered 47 subdomains including several interesting ones with admin/dev
+              in the name. Found 3 subdomains with exposed services on non-standard
+              ports.</parameter>
+  <parameter=findings>["admin.target.com - exposed phpMyAdmin",
+                "dev-api.target.com - unauth API endpoints",
+                "staging.target.com - directory listing enabled",
+                "mail.target.com - POP3/IMAP services"]</parameter>
+  <parameter=success>true</parameter>
+  <parameter=report_to_parent>true</parameter>
+  <parameter=final_recommendations>["Prioritize testing admin.target.com for default creds",
+                             "Enumerate dev-api.target.com API endpoints",
+                             "Check staging.target.com for sensitive files"]</parameter>
+  </function>
+    </examples>
+  </tool>
+  <tool name="create_agent">
+    <description>Create and spawn a new agent to handle a specific subtask.
+Only create a new agent if no existing agent is handling the specific task.</description>
+    <details>The new agent inherits the parent's conversation history and context up to the point
+  of creation, then continues with its assigned subtask. This enables decomposition
+  of complex penetration testing tasks into specialized sub-agents.
+  The agent runs asynchronously and independently, allowing the parent to continue
+  immediately while the new agent executes its task in the background.
+  If you as a parent agent don't absolutely have anything to do while your subagents are running, you can use wait_for_message tool. The subagent will continue to run in the background, and update you when it's done.
+  </details>
+    <parameters>
+      <parameter name="task" type="string" required="true">
+        <description>The specific task/objective for the new agent to accomplish</description>
+      </parameter>
+      <parameter name="name" type="string" required="true">
+        <description>Human-readable name for the agent (for tracking purposes)</description>
+      </parameter>
+      <parameter name="inherit_context" type="boolean" required="false">
+        <description>Whether the new agent should inherit parent's conversation history and context</description>
+      </parameter>
+      <parameter name="prompt_modules" type="string" required="false">
+        <description>Comma-separated list of prompt modules to use for the agent (MAXIMUM 5 modules allowed). Most agents should have at least one module in order to be useful. Agents should be highly specialized - use 1-3 related modules; up to 5 for complex contexts. {{DYNAMIC_MODULES_DESCRIPTION}}</description>
+      </parameter>
+    </parameters>
+    <returns type="Dict[str, Any]">
+      <description>Response containing: - agent_id: Unique identifier for the created agent - success: Whether the agent was created successfully - message: Status message - agent_info: Details about the created agent</description>
+    </returns>
+    <examples>
+  # After confirming no SQL testing agent exists, create agent for vulnerability validation
+  <function=create_agent>
+  <parameter=task>Validate and exploit the suspected SQL injection vulnerability found in
+              the login form. Confirm exploitability and document proof of concept.</parameter>
+  <parameter=name>SQLi Validator</parameter>
+  <parameter=prompt_modules>sql_injection</parameter>
+  </function>
+  <function=create_agent>
+  <parameter=task>Test authentication mechanisms, JWT implementation, and session management
+              for security vulnerabilities and bypass techniques.</parameter>
+  <parameter=name>Auth Specialist</parameter>
+  <parameter=prompt_modules>authentication_jwt, business_logic</parameter>
+  </function>
+  # Example of single-module specialization (most focused)
+  <function=create_agent>
+  <parameter=task>Perform comprehensive XSS testing including reflected, stored, and DOM-based
+              variants across all identified input points.</parameter>
+  <parameter=name>XSS Specialist</parameter>
+  <parameter=prompt_modules>xss</parameter>
+  </function>
+  # Example of up to 5 related modules (borderline acceptable)
+  <function=create_agent>
+  <parameter=task>Test for server-side vulnerabilities including SSRF, XXE, and potential
+              RCE vectors in file upload and XML processing endpoints.</parameter>
+  <parameter=name>Server-Side Attack Specialist</parameter>
+  <parameter=prompt_modules>ssrf, xxe, rce</parameter>
+  </function>
+    </examples>
+  </tool>
+  <tool name="send_message_to_agent">
+    <description>Send a message to another agent in the graph for coordination and communication.</description>
+    <details>This enables agents to communicate with each other during execution, but should be used only when essential:
+  - Sharing discovered information or findings
+  - Asking questions or requesting assistance
+  - Providing instructions or coordination
+  - Reporting status or results
+Best practices:
+- Avoid routine status updates; batch non-urgent information
+- Prefer parent/child completion flows (agent_finish)
+- Do not message when the context is already known</details>
+    <parameters>
+      <parameter name="target_agent_id" type="string" required="true">
+        <description>ID of the agent to send the message to</description>
+      </parameter>
+      <parameter name="message" type="string" required="true">
+        <description>The message content to send</description>
+      </parameter>
+      <parameter name="message_type" type="string" required="false">
+        <description>Type of message being sent: - "query": Question requiring a response - "instruction": Command or directive for the target agent - "information": Informational message (findings, status, etc.)</description>
+      </parameter>
+      <parameter name="priority" type="string" required="false">
+        <description>Priority level of the message</description>
+      </parameter>
+    </parameters>
+    <returns type="Dict[str, Any]">
+      <description>Response containing: - success: Whether the message was sent successfully - message_id: Unique identifier for the message - delivery_status: Status of message delivery</description>
+    </returns>
+    <examples>
+  # Share discovered vulnerability information
+  <function=send_message_to_agent>
+  <parameter=target_agent_id>agent_abc123</parameter>
+  <parameter=message>Found SQL injection vulnerability in /login.php parameter 'username'.
+              Payload: admin' OR '1'='1' -- successfully bypassed authentication.
+              You should focus your testing on the authenticated areas of the
+              application.</parameter>
+  <parameter=message_type>information</parameter>
+  <parameter=priority>high</parameter>
+  </function>
+  # Request assistance from specialist agent
+  <function=send_message_to_agent>
+  <parameter=target_agent_id>agent_def456</parameter>
+  <parameter=message>I've identified what appears to be a custom encryption implementation
+              in the API responses. Can you analyze the cryptographic strength and look
+              for potential weaknesses?</parameter>
+  <parameter=message_type>query</parameter>
+  <parameter=priority>normal</parameter>
+  </function>
+    </examples>
+  </tool>
+  <tool name="view_agent_graph">
+    <description>View the current agent graph showing all agents, their relationships, and status.</description>
+    <details>This provides a comprehensive overview of the multi-agent system including:
+  - All agent nodes with their tasks, status, and metadata
+  - Parent-child relationships between agents
+  - Message communication patterns
+  - Current execution state</details>
+    <returns type="Dict[str, Any]">
+      <description>Response containing: - graph_structure: Human-readable representation of the agent graph - summary: High-level statistics about the graph</description>
+    </returns>
+  </tool>
+  <tool name="wait_for_message">
+    <description>Pause the agent loop indefinitely until receiving a message from another agent.
+This tool puts the agent into a waiting state where it remains idle until it receives any form of communication. The agent will automatically resume execution when a message arrives.
+IMPORTANT: This tool causes the agent to stop all activity until a message is received. Use it when you need to:
+- Wait for subagent completion reports
+- Coordinate with other agents before proceeding
+- Synchronize multi-agent workflows
+NOTE: If you are waiting for an agent that is NOT your subagent, you first tell it to message you with updates before waiting for it. Otherwise, you will wait forever!
+</description>
+    <details>When this tool is called, the agent (you) enters a waiting state and will not continue execution until:
+  - Another agent sends a message via send_message_to_agent
+  - Any other form of inter-agent communication occurs
+  - Waiting timeout is reached
+  The agent will automatically resume from where it left off once a message is received.
+  This is particularly useful for parent agents waiting for subagent results or for coordination points in multi-agent workflows.
+  NOTE: If you finished your task, and you do NOT have any child agents running, you should NEVER use this tool, and just call finish tool instead.
+  </details>
+    <parameters>
+      <parameter name="reason" type="string" required="false">
+        <description>Explanation for why the agent is waiting (for logging and monitoring purposes)</description>
+      </parameter>
+    </parameters>
+    <returns type="Dict[str, Any]">
+      <description>Response containing: - success: Whether the agent successfully entered waiting state - status: Current agent status ("waiting") - reason: The reason for waiting - agent_info: Details about the waiting agent - resume_conditions: List of conditions that will resume the agent</description>
+    </returns>
+    <examples>
+  # Wait for subagents to complete their tasks
+  <function=wait_for_message>
+  <parameter=reason>Waiting for subdomain enumeration and port scanning subagents to complete their tasks and report findings</parameter>
+  </function>
+  # Coordinate with other agents
+  <function=wait_for_message>
+  <parameter=reason>Waiting for vulnerability assessment agent to share discovered attack vectors before proceeding with exploitation phase</parameter>
+  </function>
+    </examples>
+  </tool>
+</tools>

strix/tools/argument_parser.py ADDED Viewed

@@ -0,0 +1,121 @@
+import contextlib
+import inspect
+import json
+import types
+from collections.abc import Callable
+from typing import Any, Union, get_args, get_origin
+class ArgumentConversionError(Exception):
+    def __init__(self, message: str, param_name: str | None = None) -> None:
+        self.param_name = param_name
+        super().__init__(message)
+def convert_arguments(func: Callable[..., Any], kwargs: dict[str, Any]) -> dict[str, Any]:
+    try:
+        sig = inspect.signature(func)
+        converted = {}
+        for param_name, value in kwargs.items():
+            if param_name not in sig.parameters:
+                converted[param_name] = value
+                continue
+            param = sig.parameters[param_name]
+            param_type = param.annotation
+            if param_type == inspect.Parameter.empty or value is None:
+                converted[param_name] = value
+                continue
+            if not isinstance(value, str):
+                converted[param_name] = value
+                continue
+            try:
+                converted[param_name] = convert_string_to_type(value, param_type)
+            except (ValueError, TypeError, json.JSONDecodeError) as e:
+                raise ArgumentConversionError(
+                    f"Failed to convert argument '{param_name}' to type {param_type}: {e}",
+                    param_name=param_name,
+                ) from e
+    except (ValueError, TypeError, AttributeError) as e:
+        raise ArgumentConversionError(f"Failed to process function arguments: {e}") from e
+    return converted
+def convert_string_to_type(value: str, param_type: Any) -> Any:
+    origin = get_origin(param_type)
+    if origin is Union or isinstance(param_type, types.UnionType):
+        args = get_args(param_type)
+        for arg_type in args:
+            if arg_type is not type(None):
+                with contextlib.suppress(ValueError, TypeError, json.JSONDecodeError):
+                    return convert_string_to_type(value, arg_type)
+        return value
+    if hasattr(param_type, "__args__"):
+        args = getattr(param_type, "__args__", ())
+        if len(args) == 2 and type(None) in args:
+            non_none_type = args[0] if args[1] is type(None) else args[1]
+            with contextlib.suppress(ValueError, TypeError, json.JSONDecodeError):
+                return convert_string_to_type(value, non_none_type)
+            return value
+    return _convert_basic_types(value, param_type, origin)
+def _convert_basic_types(value: str, param_type: Any, origin: Any = None) -> Any:
+    basic_type_converters: dict[Any, Callable[[str], Any]] = {
+        int: int,
+        float: float,
+        bool: _convert_to_bool,
+        str: str,
+    }
+    if param_type in basic_type_converters:
+        return basic_type_converters[param_type](value)
+    if list in (origin, param_type):
+        return _convert_to_list(value)
+    if dict in (origin, param_type):
+        return _convert_to_dict(value)
+    with contextlib.suppress(json.JSONDecodeError):
+        return json.loads(value)
+    return value
+def _convert_to_bool(value: str) -> bool:
+    if value.lower() in ("true", "1", "yes", "on"):
+        return True
+    if value.lower() in ("false", "0", "no", "off"):
+        return False
+    return bool(value)
+def _convert_to_list(value: str) -> list[Any]:
+    try:
+        parsed = json.loads(value)
+        if isinstance(parsed, list):
+            return parsed
+    except json.JSONDecodeError:
+        if "," in value:
+            return [item.strip() for item in value.split(",")]
+        return [value]
+    else:
+        return [parsed]
+def _convert_to_dict(value: str) -> dict[str, Any]:
+    try:
+        parsed = json.loads(value)
+        if isinstance(parsed, dict):
+            return parsed
+    except json.JSONDecodeError:
+        return {}
+    else:
+        return {}

strix/tools/browser/__init__.py ADDED Viewed

@@ -0,0 +1,4 @@
+from .browser_actions import browser_action
+__all__ = ["browser_action"]

strix/tools/browser/browser_actions.py ADDED Viewed

@@ -0,0 +1,236 @@
+from typing import Any, Literal, NoReturn
+from strix.tools.registry import register_tool
+from .tab_manager import BrowserTabManager, get_browser_tab_manager
+BrowserAction = Literal[
+    "launch",
+    "goto",
+    "click",
+    "type",
+    "scroll_down",
+    "scroll_up",
+    "back",
+    "forward",
+    "new_tab",
+    "switch_tab",
+    "close_tab",
+    "wait",
+    "execute_js",
+    "double_click",
+    "hover",
+    "press_key",
+    "save_pdf",
+    "get_console_logs",
+    "view_source",
+    "close",
+    "list_tabs",
+]
+def _validate_url(action_name: str, url: str | None) -> None:
+    if not url:
+        raise ValueError(f"url parameter is required for {action_name} action")
+def _validate_coordinate(action_name: str, coordinate: str | None) -> None:
+    if not coordinate:
+        raise ValueError(f"coordinate parameter is required for {action_name} action")
+def _validate_text(action_name: str, text: str | None) -> None:
+    if not text:
+        raise ValueError(f"text parameter is required for {action_name} action")
+def _validate_tab_id(action_name: str, tab_id: str | None) -> None:
+    if not tab_id:
+        raise ValueError(f"tab_id parameter is required for {action_name} action")
+def _validate_js_code(action_name: str, js_code: str | None) -> None:
+    if not js_code:
+        raise ValueError(f"js_code parameter is required for {action_name} action")
+def _validate_duration(action_name: str, duration: float | None) -> None:
+    if duration is None:
+        raise ValueError(f"duration parameter is required for {action_name} action")
+def _validate_key(action_name: str, key: str | None) -> None:
+    if not key:
+        raise ValueError(f"key parameter is required for {action_name} action")
+def _validate_file_path(action_name: str, file_path: str | None) -> None:
+    if not file_path:
+        raise ValueError(f"file_path parameter is required for {action_name} action")
+def _handle_navigation_actions(
+    manager: BrowserTabManager,
+    action: str,
+    url: str | None = None,
+    tab_id: str | None = None,
+) -> dict[str, Any]:
+    if action == "launch":
+        return manager.launch_browser(url)
+    if action == "goto":
+        _validate_url(action, url)
+        assert url is not None
+        return manager.goto_url(url, tab_id)
+    if action == "back":
+        return manager.back(tab_id)
+    if action == "forward":
+        return manager.forward(tab_id)
+    raise ValueError(f"Unknown navigation action: {action}")
+def _handle_interaction_actions(
+    manager: BrowserTabManager,
+    action: str,
+    coordinate: str | None = None,
+    text: str | None = None,
+    key: str | None = None,
+    tab_id: str | None = None,
+) -> dict[str, Any]:
+    if action in {"click", "double_click", "hover"}:
+        _validate_coordinate(action, coordinate)
+        assert coordinate is not None
+        action_map = {
+            "click": manager.click,
+            "double_click": manager.double_click,
+            "hover": manager.hover,
+        }
+        return action_map[action](coordinate, tab_id)
+    if action in {"scroll_down", "scroll_up"}:
+        direction = "down" if action == "scroll_down" else "up"
+        return manager.scroll(direction, tab_id)
+    if action == "type":
+        _validate_text(action, text)
+        assert text is not None
+        return manager.type_text(text, tab_id)
+    if action == "press_key":
+        _validate_key(action, key)
+        assert key is not None
+        return manager.press_key(key, tab_id)
+    raise ValueError(f"Unknown interaction action: {action}")
+def _raise_unknown_action(action: str) -> NoReturn:
+    raise ValueError(f"Unknown action: {action}")
+def _handle_tab_actions(
+    manager: BrowserTabManager,
+    action: str,
+    url: str | None = None,
+    tab_id: str | None = None,
+) -> dict[str, Any]:
+    if action == "new_tab":
+        return manager.new_tab(url)
+    if action == "switch_tab":
+        _validate_tab_id(action, tab_id)
+        assert tab_id is not None
+        return manager.switch_tab(tab_id)
+    if action == "close_tab":
+        _validate_tab_id(action, tab_id)
+        assert tab_id is not None
+        return manager.close_tab(tab_id)
+    if action == "list_tabs":
+        return manager.list_tabs()
+    raise ValueError(f"Unknown tab action: {action}")
+def _handle_utility_actions(
+    manager: BrowserTabManager,
+    action: str,
+    duration: float | None = None,
+    js_code: str | None = None,
+    file_path: str | None = None,
+    tab_id: str | None = None,
+    clear: bool = False,
+) -> dict[str, Any]:
+    if action == "wait":
+        _validate_duration(action, duration)
+        assert duration is not None
+        return manager.wait_browser(duration, tab_id)
+    if action == "execute_js":
+        _validate_js_code(action, js_code)
+        assert js_code is not None
+        return manager.execute_js(js_code, tab_id)
+    if action == "save_pdf":
+        _validate_file_path(action, file_path)
+        assert file_path is not None
+        return manager.save_pdf(file_path, tab_id)
+    if action == "get_console_logs":
+        return manager.get_console_logs(tab_id, clear)
+    if action == "view_source":
+        return manager.view_source(tab_id)
+    if action == "close":
+        return manager.close_browser()
+    raise ValueError(f"Unknown utility action: {action}")
+@register_tool
+def browser_action(
+    action: BrowserAction,
+    url: str | None = None,
+    coordinate: str | None = None,
+    text: str | None = None,
+    tab_id: str | None = None,
+    js_code: str | None = None,
+    duration: float | None = None,
+    key: str | None = None,
+    file_path: str | None = None,
+    clear: bool = False,
+) -> dict[str, Any]:
+    manager = get_browser_tab_manager()
+    try:
+        navigation_actions = {"launch", "goto", "back", "forward"}
+        interaction_actions = {
+            "click",
+            "type",
+            "double_click",
+            "hover",
+            "press_key",
+            "scroll_down",
+            "scroll_up",
+        }
+        tab_actions = {"new_tab", "switch_tab", "close_tab", "list_tabs"}
+        utility_actions = {
+            "wait",
+            "execute_js",
+            "save_pdf",
+            "get_console_logs",
+            "view_source",
+            "close",
+        }
+        if action in navigation_actions:
+            return _handle_navigation_actions(manager, action, url, tab_id)
+        if action in interaction_actions:
+            return _handle_interaction_actions(manager, action, coordinate, text, key, tab_id)
+        if action in tab_actions:
+            return _handle_tab_actions(manager, action, url, tab_id)
+        if action in utility_actions:
+            return _handle_utility_actions(
+                manager, action, duration, js_code, file_path, tab_id, clear
+            )
+        _raise_unknown_action(action)
+    except (ValueError, RuntimeError) as e:
+        return {
+            "error": str(e),
+            "tab_id": tab_id,
+            "screenshot": "",
+            "is_running": False,
+        }