strix-agent 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. strix/__init__.py +0 -0
  2. strix/agents/StrixAgent/__init__.py +4 -0
  3. strix/agents/StrixAgent/strix_agent.py +60 -0
  4. strix/agents/StrixAgent/system_prompt.jinja +504 -0
  5. strix/agents/__init__.py +10 -0
  6. strix/agents/base_agent.py +394 -0
  7. strix/agents/state.py +139 -0
  8. strix/cli/__init__.py +4 -0
  9. strix/cli/app.py +1124 -0
  10. strix/cli/assets/cli.tcss +680 -0
  11. strix/cli/main.py +542 -0
  12. strix/cli/tool_components/__init__.py +39 -0
  13. strix/cli/tool_components/agents_graph_renderer.py +129 -0
  14. strix/cli/tool_components/base_renderer.py +61 -0
  15. strix/cli/tool_components/browser_renderer.py +107 -0
  16. strix/cli/tool_components/file_edit_renderer.py +95 -0
  17. strix/cli/tool_components/finish_renderer.py +32 -0
  18. strix/cli/tool_components/notes_renderer.py +108 -0
  19. strix/cli/tool_components/proxy_renderer.py +255 -0
  20. strix/cli/tool_components/python_renderer.py +34 -0
  21. strix/cli/tool_components/registry.py +72 -0
  22. strix/cli/tool_components/reporting_renderer.py +53 -0
  23. strix/cli/tool_components/scan_info_renderer.py +58 -0
  24. strix/cli/tool_components/terminal_renderer.py +99 -0
  25. strix/cli/tool_components/thinking_renderer.py +29 -0
  26. strix/cli/tool_components/user_message_renderer.py +43 -0
  27. strix/cli/tool_components/web_search_renderer.py +28 -0
  28. strix/cli/tracer.py +308 -0
  29. strix/llm/__init__.py +14 -0
  30. strix/llm/config.py +19 -0
  31. strix/llm/llm.py +310 -0
  32. strix/llm/memory_compressor.py +206 -0
  33. strix/llm/request_queue.py +63 -0
  34. strix/llm/utils.py +84 -0
  35. strix/prompts/__init__.py +113 -0
  36. strix/prompts/coordination/root_agent.jinja +41 -0
  37. strix/prompts/vulnerabilities/authentication_jwt.jinja +129 -0
  38. strix/prompts/vulnerabilities/business_logic.jinja +143 -0
  39. strix/prompts/vulnerabilities/csrf.jinja +168 -0
  40. strix/prompts/vulnerabilities/idor.jinja +164 -0
  41. strix/prompts/vulnerabilities/race_conditions.jinja +194 -0
  42. strix/prompts/vulnerabilities/rce.jinja +222 -0
  43. strix/prompts/vulnerabilities/sql_injection.jinja +216 -0
  44. strix/prompts/vulnerabilities/ssrf.jinja +168 -0
  45. strix/prompts/vulnerabilities/xss.jinja +221 -0
  46. strix/prompts/vulnerabilities/xxe.jinja +276 -0
  47. strix/runtime/__init__.py +19 -0
  48. strix/runtime/docker_runtime.py +298 -0
  49. strix/runtime/runtime.py +25 -0
  50. strix/runtime/tool_server.py +97 -0
  51. strix/tools/__init__.py +64 -0
  52. strix/tools/agents_graph/__init__.py +16 -0
  53. strix/tools/agents_graph/agents_graph_actions.py +610 -0
  54. strix/tools/agents_graph/agents_graph_actions_schema.xml +223 -0
  55. strix/tools/argument_parser.py +120 -0
  56. strix/tools/browser/__init__.py +4 -0
  57. strix/tools/browser/browser_actions.py +236 -0
  58. strix/tools/browser/browser_actions_schema.xml +183 -0
  59. strix/tools/browser/browser_instance.py +533 -0
  60. strix/tools/browser/tab_manager.py +342 -0
  61. strix/tools/executor.py +302 -0
  62. strix/tools/file_edit/__init__.py +4 -0
  63. strix/tools/file_edit/file_edit_actions.py +141 -0
  64. strix/tools/file_edit/file_edit_actions_schema.xml +128 -0
  65. strix/tools/finish/__init__.py +4 -0
  66. strix/tools/finish/finish_actions.py +167 -0
  67. strix/tools/finish/finish_actions_schema.xml +45 -0
  68. strix/tools/notes/__init__.py +14 -0
  69. strix/tools/notes/notes_actions.py +191 -0
  70. strix/tools/notes/notes_actions_schema.xml +150 -0
  71. strix/tools/proxy/__init__.py +20 -0
  72. strix/tools/proxy/proxy_actions.py +101 -0
  73. strix/tools/proxy/proxy_actions_schema.xml +267 -0
  74. strix/tools/proxy/proxy_manager.py +785 -0
  75. strix/tools/python/__init__.py +4 -0
  76. strix/tools/python/python_actions.py +47 -0
  77. strix/tools/python/python_actions_schema.xml +131 -0
  78. strix/tools/python/python_instance.py +172 -0
  79. strix/tools/python/python_manager.py +131 -0
  80. strix/tools/registry.py +196 -0
  81. strix/tools/reporting/__init__.py +6 -0
  82. strix/tools/reporting/reporting_actions.py +63 -0
  83. strix/tools/reporting/reporting_actions_schema.xml +30 -0
  84. strix/tools/terminal/__init__.py +4 -0
  85. strix/tools/terminal/terminal_actions.py +53 -0
  86. strix/tools/terminal/terminal_actions_schema.xml +114 -0
  87. strix/tools/terminal/terminal_instance.py +231 -0
  88. strix/tools/terminal/terminal_manager.py +191 -0
  89. strix/tools/thinking/__init__.py +4 -0
  90. strix/tools/thinking/thinking_actions.py +18 -0
  91. strix/tools/thinking/thinking_actions_schema.xml +52 -0
  92. strix/tools/web_search/__init__.py +4 -0
  93. strix/tools/web_search/web_search_actions.py +80 -0
  94. strix/tools/web_search/web_search_actions_schema.xml +83 -0
  95. strix_agent-0.1.1.dist-info/LICENSE +201 -0
  96. strix_agent-0.1.1.dist-info/METADATA +200 -0
  97. strix_agent-0.1.1.dist-info/RECORD +99 -0
  98. strix_agent-0.1.1.dist-info/WHEEL +4 -0
  99. strix_agent-0.1.1.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,223 @@
1
+ <tools>
2
+ <tool name="agent_finish">
3
+ <description>Mark a subagent's task as completed and optionally report results to parent agent.
4
+
5
+ IMPORTANT: This tool can ONLY be used by subagents (agents with a parent).
6
+ Root/main agents must use finish_scan instead.
7
+
8
+ This tool should be called when a subagent completes its assigned subtask to:
9
+ - Mark the subagent's task as completed
10
+ - Report findings back to the parent agent
11
+
12
+ Use this tool when:
13
+ - You are a subagent working on a specific subtask
14
+ - You have completed your assigned task
15
+ - You want to report your findings to the parent agent
16
+ - You are ready to terminate this subagent's execution</description>
17
+ <details>This replaces the previous finish_scan tool and handles both sub-agent completion
18
+ and main agent completion. When a sub-agent finishes, it can report its findings
19
+ back to the parent agent for coordination.</details>
20
+ <parameters>
21
+ <parameter name="result_summary" type="string" required="true">
22
+ <description>Summary of what the agent accomplished and discovered</description>
23
+ </parameter>
24
+ <parameter name="findings" type="string" required="false">
25
+ <description>List of specific findings, vulnerabilities, or discoveries</description>
26
+ </parameter>
27
+ <parameter name="success" type="boolean" required="false">
28
+ <description>Whether the agent's task completed successfully</description>
29
+ </parameter>
30
+ <parameter name="report_to_parent" type="boolean" required="false">
31
+ <description>Whether to send results back to the parent agent</description>
32
+ </parameter>
33
+ <parameter name="final_recommendations" type="string" required="false">
34
+ <description>Recommendations for next steps or follow-up actions</description>
35
+ </parameter>
36
+ </parameters>
37
+ <returns type="Dict[str, Any]">
38
+ <description>Response containing: - agent_completed: Whether the agent was marked as completed - parent_notified: Whether parent was notified (if applicable) - completion_summary: Summary of completion status</description>
39
+ </returns>
40
+ <examples>
41
+ # Sub-agent completing subdomain enumeration task
42
+ <function=agent_finish>
43
+ <parameter=result_summary>Completed comprehensive subdomain enumeration for target.com.
44
+ Discovered 47 subdomains including several interesting ones with admin/dev
45
+ in the name. Found 3 subdomains with exposed services on non-standard
46
+ ports.</parameter>
47
+ <parameter=findings>["admin.target.com - exposed phpMyAdmin",
48
+ "dev-api.target.com - unauth API endpoints",
49
+ "staging.target.com - directory listing enabled",
50
+ "mail.target.com - POP3/IMAP services"]</parameter>
51
+ <parameter=success>true</parameter>
52
+ <parameter=report_to_parent>true</parameter>
53
+ <parameter=final_recommendations>["Prioritize testing admin.target.com for default creds",
54
+ "Enumerate dev-api.target.com API endpoints",
55
+ "Check staging.target.com for sensitive files"]</parameter>
56
+ </function>
57
+ </examples>
58
+ </tool>
59
+ <tool name="create_agent">
60
+ <description>Create and spawn a new agent to handle a specific subtask.
61
+
62
+ MANDATORY REQUIREMENT: You MUST call view_agent_graph FIRST before creating any new agent to check if there is already an agent working on the same or similar task. Only create a new agent if no existing agent is handling the specific task.</description>
63
+ <details>The new agent inherits the parent's conversation history and context up to the point
64
+ of creation, then continues with its assigned subtask. This enables decomposition
65
+ of complex penetration testing tasks into specialized sub-agents.
66
+
67
+ The agent runs asynchronously and independently, allowing the parent to continue
68
+ immediately while the new agent executes its task in the background.
69
+
70
+ CRITICAL: Before calling this tool, you MUST first use view_agent_graph to:
71
+ - Examine all existing agents and their current tasks
72
+ - Verify no agent is already working on the same or similar objective
73
+ - Avoid duplication of effort and resource waste
74
+ - Ensure efficient coordination across the multi-agent system
75
+
76
+ If you as a parent agent don't absolutely have anything to do while your subagents are running, you can use wait_for_message tool. The subagent will continue to run in the background, and update you when it's done.
77
+ </details>
78
+ <parameters>
79
+ <parameter name="task" type="string" required="true">
80
+ <description>The specific task/objective for the new agent to accomplish</description>
81
+ </parameter>
82
+ <parameter name="name" type="string" required="true">
83
+ <description>Human-readable name for the agent (for tracking purposes)</description>
84
+ </parameter>
85
+ <parameter name="inherit_context" type="boolean" required="false">
86
+ <description>Whether the new agent should inherit parent's conversation history and context</description>
87
+ </parameter>
88
+ <parameter name="prompt_modules" type="string" required="false">
89
+ <description>Comma-separated list of prompt modules to use for the agent. Most agents should have at least one module in order to be useful. {{DYNAMIC_MODULES_DESCRIPTION}}</description>
90
+ </parameter>
91
+ </parameters>
92
+ <returns type="Dict[str, Any]">
93
+ <description>Response containing: - agent_id: Unique identifier for the created agent - success: Whether the agent was created successfully - message: Status message - agent_info: Details about the created agent</description>
94
+ </returns>
95
+ <examples>
96
+ # REQUIRED: First check agent graph before creating any new agent
97
+ <function=view_agent_graph>
98
+ </function>
99
+ # REQUIRED: Check agent graph again before creating another agent
100
+ <function=view_agent_graph>
101
+ </function>
102
+
103
+ # After confirming no SQL testing agent exists, create agent for vulnerability validation
104
+ <function=create_agent>
105
+ <parameter=task>Validate and exploit the suspected SQL injection vulnerability found in
106
+ the login form. Confirm exploitability and document proof of concept.</parameter>
107
+ <parameter=name>SQLi Validator</parameter>
108
+ <parameter=prompt_modules>sql_injection</parameter>
109
+ </function>
110
+
111
+ # Create specialized authentication testing agent with multiple modules (comma-separated)
112
+ <function=create_agent>
113
+ <parameter=task>Test authentication mechanisms, JWT implementation, and session management
114
+ for security vulnerabilities and bypass techniques.</parameter>
115
+ <parameter=name>Auth Specialist</parameter>
116
+ <parameter=prompt_modules>authentication_jwt, business_logic</parameter>
117
+ </function>
118
+ </examples>
119
+ </tool>
120
+ <tool name="send_message_to_agent">
121
+ <description>Send a message to another agent in the graph for coordination and communication.</description>
122
+ <details>This enables agents to communicate with each other during execution for:
123
+ - Sharing discovered information or findings
124
+ - Asking questions or requesting assistance
125
+ - Providing instructions or coordination
126
+ - Reporting status or results</details>
127
+ <parameters>
128
+ <parameter name="target_agent_id" type="string" required="true">
129
+ <description>ID of the agent to send the message to</description>
130
+ </parameter>
131
+ <parameter name="message" type="string" required="true">
132
+ <description>The message content to send</description>
133
+ </parameter>
134
+ <parameter name="message_type" type="string" required="false">
135
+ <description>Type of message being sent: - "query": Question requiring a response - "instruction": Command or directive for the target agent - "information": Informational message (findings, status, etc.)</description>
136
+ </parameter>
137
+ <parameter name="priority" type="string" required="false">
138
+ <description>Priority level of the message</description>
139
+ </parameter>
140
+ </parameters>
141
+ <returns type="Dict[str, Any]">
142
+ <description>Response containing: - success: Whether the message was sent successfully - message_id: Unique identifier for the message - delivery_status: Status of message delivery</description>
143
+ </returns>
144
+ <examples>
145
+ # Share discovered vulnerability information
146
+ <function=send_message_to_agent>
147
+ <parameter=target_agent_id>agent_abc123</parameter>
148
+ <parameter=message>Found SQL injection vulnerability in /login.php parameter 'username'.
149
+ Payload: admin' OR '1'='1' -- successfully bypassed authentication.
150
+ You should focus your testing on the authenticated areas of the
151
+ application.</parameter>
152
+ <parameter=message_type>information</parameter>
153
+ <parameter=priority>high</parameter>
154
+ </function>
155
+
156
+ # Request assistance from specialist agent
157
+ <function=send_message_to_agent>
158
+ <parameter=target_agent_id>agent_def456</parameter>
159
+ <parameter=message>I've identified what appears to be a custom encryption implementation
160
+ in the API responses. Can you analyze the cryptographic strength and look
161
+ for potential weaknesses?</parameter>
162
+ <parameter=message_type>query</parameter>
163
+ <parameter=priority>normal</parameter>
164
+ </function>
165
+ </examples>
166
+ </tool>
167
+ <tool name="view_agent_graph">
168
+ <description>View the current agent graph showing all agents, their relationships, and status.</description>
169
+ <details>This provides a comprehensive overview of the multi-agent system including:
170
+ - All agent nodes with their tasks, status, and metadata
171
+ - Parent-child relationships between agents
172
+ - Message communication patterns
173
+ - Current execution state</details>
174
+ <returns type="Dict[str, Any]">
175
+ <description>Response containing: - graph_structure: Human-readable representation of the agent graph - summary: High-level statistics about the graph</description>
176
+ </returns>
177
+ </tool>
178
+ <tool name="wait_for_message">
179
+ <description>Pause the agent loop indefinitely until receiving a message from another agent or user.
180
+
181
+ This tool puts the agent into a waiting state where it remains idle until it receives any form of communication. The agent will automatically resume execution when a message arrives.
182
+
183
+ IMPORTANT: This tool causes the agent to stop all activity until a message is received. Use it when you need to:
184
+ - Wait for subagent completion reports
185
+ - Coordinate with other agents before proceeding
186
+ - Pause for user input or decisions
187
+ - Synchronize multi-agent workflows
188
+
189
+ NOTE: If you are waiting for an agent that is NOT your subagent, you first tell it to message you with updates before waiting for it. Otherwise, you will wait forever!
190
+ </description>
191
+ <details>When this tool is called, the agent enters a waiting state and will not continue execution until:
192
+ - Another agent sends it a message via send_message_to_agent
193
+ - A user sends it a direct message through the CLI
194
+ - Any other form of inter-agent or user communication occurs
195
+
196
+ The agent will automatically resume from where it left off once a message is received.
197
+ This is particularly useful for parent agents waiting for subagent results or for coordination points in multi-agent workflows.</details>
198
+ <parameters>
199
+ <parameter name="reason" type="string" required="false">
200
+ <description>Explanation for why the agent is waiting (for logging and monitoring purposes)</description>
201
+ </parameter>
202
+ </parameters>
203
+ <returns type="Dict[str, Any]">
204
+ <description>Response containing: - success: Whether the agent successfully entered waiting state - status: Current agent status ("waiting") - reason: The reason for waiting - agent_info: Details about the waiting agent - resume_conditions: List of conditions that will resume the agent</description>
205
+ </returns>
206
+ <examples>
207
+ # Wait for subagents to complete their tasks
208
+ <function=wait_for_message>
209
+ <parameter=reason>Waiting for subdomain enumeration and port scanning subagents to complete their tasks and report findings</parameter>
210
+ </function>
211
+
212
+ # Wait for user input on next steps
213
+ <function=wait_for_message>
214
+ <parameter=reason>Waiting for user decision on whether to proceed with exploitation of discovered SQL injection vulnerability</parameter>
215
+ </function>
216
+
217
+ # Coordinate with other agents
218
+ <function=wait_for_message>
219
+ <parameter=reason>Waiting for vulnerability assessment agent to share discovered attack vectors before proceeding with exploitation phase</parameter>
220
+ </function>
221
+ </examples>
222
+ </tool>
223
+ </tools>
@@ -0,0 +1,120 @@
1
+ import contextlib
2
+ import inspect
3
+ import json
4
+ from collections.abc import Callable
5
+ from typing import Any, Union, get_args, get_origin
6
+
7
+
8
+ class ArgumentConversionError(Exception):
9
+ def __init__(self, message: str, param_name: str | None = None) -> None:
10
+ self.param_name = param_name
11
+ super().__init__(message)
12
+
13
+
14
+ def convert_arguments(func: Callable[..., Any], kwargs: dict[str, Any]) -> dict[str, Any]:
15
+ try:
16
+ sig = inspect.signature(func)
17
+ converted = {}
18
+
19
+ for param_name, value in kwargs.items():
20
+ if param_name not in sig.parameters:
21
+ converted[param_name] = value
22
+ continue
23
+
24
+ param = sig.parameters[param_name]
25
+ param_type = param.annotation
26
+
27
+ if param_type == inspect.Parameter.empty or value is None:
28
+ converted[param_name] = value
29
+ continue
30
+
31
+ if not isinstance(value, str):
32
+ converted[param_name] = value
33
+ continue
34
+
35
+ try:
36
+ converted[param_name] = convert_string_to_type(value, param_type)
37
+ except (ValueError, TypeError, json.JSONDecodeError) as e:
38
+ raise ArgumentConversionError(
39
+ f"Failed to convert argument '{param_name}' to type {param_type}: {e}",
40
+ param_name=param_name,
41
+ ) from e
42
+
43
+ except (ValueError, TypeError, AttributeError) as e:
44
+ raise ArgumentConversionError(f"Failed to process function arguments: {e}") from e
45
+
46
+ return converted
47
+
48
+
49
+ def convert_string_to_type(value: str, param_type: Any) -> Any:
50
+ origin = get_origin(param_type)
51
+ if origin is Union or origin is type(str | None):
52
+ args = get_args(param_type)
53
+ for arg_type in args:
54
+ if arg_type is not type(None):
55
+ with contextlib.suppress(ValueError, TypeError, json.JSONDecodeError):
56
+ return convert_string_to_type(value, arg_type)
57
+ return value
58
+
59
+ if hasattr(param_type, "__args__"):
60
+ args = getattr(param_type, "__args__", ())
61
+ if len(args) == 2 and type(None) in args:
62
+ non_none_type = args[0] if args[1] is type(None) else args[1]
63
+ with contextlib.suppress(ValueError, TypeError, json.JSONDecodeError):
64
+ return convert_string_to_type(value, non_none_type)
65
+ return value
66
+
67
+ return _convert_basic_types(value, param_type, origin)
68
+
69
+
70
+ def _convert_basic_types(value: str, param_type: Any, origin: Any = None) -> Any:
71
+ basic_type_converters: dict[Any, Callable[[str], Any]] = {
72
+ int: int,
73
+ float: float,
74
+ bool: _convert_to_bool,
75
+ str: str,
76
+ }
77
+
78
+ if param_type in basic_type_converters:
79
+ return basic_type_converters[param_type](value)
80
+
81
+ if list in (origin, param_type):
82
+ return _convert_to_list(value)
83
+ if dict in (origin, param_type):
84
+ return _convert_to_dict(value)
85
+
86
+ with contextlib.suppress(json.JSONDecodeError):
87
+ return json.loads(value)
88
+ return value
89
+
90
+
91
+ def _convert_to_bool(value: str) -> bool:
92
+ if value.lower() in ("true", "1", "yes", "on"):
93
+ return True
94
+ if value.lower() in ("false", "0", "no", "off"):
95
+ return False
96
+ return bool(value)
97
+
98
+
99
+ def _convert_to_list(value: str) -> list[Any]:
100
+ try:
101
+ parsed = json.loads(value)
102
+ if isinstance(parsed, list):
103
+ return parsed
104
+ except json.JSONDecodeError:
105
+ if "," in value:
106
+ return [item.strip() for item in value.split(",")]
107
+ return [value]
108
+ else:
109
+ return [parsed]
110
+
111
+
112
+ def _convert_to_dict(value: str) -> dict[str, Any]:
113
+ try:
114
+ parsed = json.loads(value)
115
+ if isinstance(parsed, dict):
116
+ return parsed
117
+ except json.JSONDecodeError:
118
+ return {}
119
+ else:
120
+ return {}
@@ -0,0 +1,4 @@
1
+ from .browser_actions import browser_action
2
+
3
+
4
+ __all__ = ["browser_action"]
@@ -0,0 +1,236 @@
1
+ from typing import Any, Literal, NoReturn
2
+
3
+ from strix.tools.registry import register_tool
4
+
5
+ from .tab_manager import BrowserTabManager, get_browser_tab_manager
6
+
7
+
8
+ BrowserAction = Literal[
9
+ "launch",
10
+ "goto",
11
+ "click",
12
+ "type",
13
+ "scroll_down",
14
+ "scroll_up",
15
+ "back",
16
+ "forward",
17
+ "new_tab",
18
+ "switch_tab",
19
+ "close_tab",
20
+ "wait",
21
+ "execute_js",
22
+ "double_click",
23
+ "hover",
24
+ "press_key",
25
+ "save_pdf",
26
+ "get_console_logs",
27
+ "view_source",
28
+ "close",
29
+ "list_tabs",
30
+ ]
31
+
32
+
33
+ def _validate_url(action_name: str, url: str | None) -> None:
34
+ if not url:
35
+ raise ValueError(f"url parameter is required for {action_name} action")
36
+
37
+
38
+ def _validate_coordinate(action_name: str, coordinate: str | None) -> None:
39
+ if not coordinate:
40
+ raise ValueError(f"coordinate parameter is required for {action_name} action")
41
+
42
+
43
+ def _validate_text(action_name: str, text: str | None) -> None:
44
+ if not text:
45
+ raise ValueError(f"text parameter is required for {action_name} action")
46
+
47
+
48
+ def _validate_tab_id(action_name: str, tab_id: str | None) -> None:
49
+ if not tab_id:
50
+ raise ValueError(f"tab_id parameter is required for {action_name} action")
51
+
52
+
53
+ def _validate_js_code(action_name: str, js_code: str | None) -> None:
54
+ if not js_code:
55
+ raise ValueError(f"js_code parameter is required for {action_name} action")
56
+
57
+
58
+ def _validate_duration(action_name: str, duration: float | None) -> None:
59
+ if duration is None:
60
+ raise ValueError(f"duration parameter is required for {action_name} action")
61
+
62
+
63
+ def _validate_key(action_name: str, key: str | None) -> None:
64
+ if not key:
65
+ raise ValueError(f"key parameter is required for {action_name} action")
66
+
67
+
68
+ def _validate_file_path(action_name: str, file_path: str | None) -> None:
69
+ if not file_path:
70
+ raise ValueError(f"file_path parameter is required for {action_name} action")
71
+
72
+
73
+ def _handle_navigation_actions(
74
+ manager: BrowserTabManager,
75
+ action: str,
76
+ url: str | None = None,
77
+ tab_id: str | None = None,
78
+ ) -> dict[str, Any]:
79
+ if action == "launch":
80
+ return manager.launch_browser(url)
81
+ if action == "goto":
82
+ _validate_url(action, url)
83
+ assert url is not None
84
+ return manager.goto_url(url, tab_id)
85
+ if action == "back":
86
+ return manager.back(tab_id)
87
+ if action == "forward":
88
+ return manager.forward(tab_id)
89
+ raise ValueError(f"Unknown navigation action: {action}")
90
+
91
+
92
+ def _handle_interaction_actions(
93
+ manager: BrowserTabManager,
94
+ action: str,
95
+ coordinate: str | None = None,
96
+ text: str | None = None,
97
+ key: str | None = None,
98
+ tab_id: str | None = None,
99
+ ) -> dict[str, Any]:
100
+ if action in {"click", "double_click", "hover"}:
101
+ _validate_coordinate(action, coordinate)
102
+ assert coordinate is not None
103
+ action_map = {
104
+ "click": manager.click,
105
+ "double_click": manager.double_click,
106
+ "hover": manager.hover,
107
+ }
108
+ return action_map[action](coordinate, tab_id)
109
+
110
+ if action in {"scroll_down", "scroll_up"}:
111
+ direction = "down" if action == "scroll_down" else "up"
112
+ return manager.scroll(direction, tab_id)
113
+
114
+ if action == "type":
115
+ _validate_text(action, text)
116
+ assert text is not None
117
+ return manager.type_text(text, tab_id)
118
+ if action == "press_key":
119
+ _validate_key(action, key)
120
+ assert key is not None
121
+ return manager.press_key(key, tab_id)
122
+
123
+ raise ValueError(f"Unknown interaction action: {action}")
124
+
125
+
126
+ def _raise_unknown_action(action: str) -> NoReturn:
127
+ raise ValueError(f"Unknown action: {action}")
128
+
129
+
130
+ def _handle_tab_actions(
131
+ manager: BrowserTabManager,
132
+ action: str,
133
+ url: str | None = None,
134
+ tab_id: str | None = None,
135
+ ) -> dict[str, Any]:
136
+ if action == "new_tab":
137
+ return manager.new_tab(url)
138
+ if action == "switch_tab":
139
+ _validate_tab_id(action, tab_id)
140
+ assert tab_id is not None
141
+ return manager.switch_tab(tab_id)
142
+ if action == "close_tab":
143
+ _validate_tab_id(action, tab_id)
144
+ assert tab_id is not None
145
+ return manager.close_tab(tab_id)
146
+ if action == "list_tabs":
147
+ return manager.list_tabs()
148
+ raise ValueError(f"Unknown tab action: {action}")
149
+
150
+
151
+ def _handle_utility_actions(
152
+ manager: BrowserTabManager,
153
+ action: str,
154
+ duration: float | None = None,
155
+ js_code: str | None = None,
156
+ file_path: str | None = None,
157
+ tab_id: str | None = None,
158
+ clear: bool = False,
159
+ ) -> dict[str, Any]:
160
+ if action == "wait":
161
+ _validate_duration(action, duration)
162
+ assert duration is not None
163
+ return manager.wait_browser(duration, tab_id)
164
+ if action == "execute_js":
165
+ _validate_js_code(action, js_code)
166
+ assert js_code is not None
167
+ return manager.execute_js(js_code, tab_id)
168
+ if action == "save_pdf":
169
+ _validate_file_path(action, file_path)
170
+ assert file_path is not None
171
+ return manager.save_pdf(file_path, tab_id)
172
+ if action == "get_console_logs":
173
+ return manager.get_console_logs(tab_id, clear)
174
+ if action == "view_source":
175
+ return manager.view_source(tab_id)
176
+ if action == "close":
177
+ return manager.close_browser()
178
+ raise ValueError(f"Unknown utility action: {action}")
179
+
180
+
181
+ @register_tool
182
+ def browser_action(
183
+ action: BrowserAction,
184
+ url: str | None = None,
185
+ coordinate: str | None = None,
186
+ text: str | None = None,
187
+ tab_id: str | None = None,
188
+ js_code: str | None = None,
189
+ duration: float | None = None,
190
+ key: str | None = None,
191
+ file_path: str | None = None,
192
+ clear: bool = False,
193
+ ) -> dict[str, Any]:
194
+ manager = get_browser_tab_manager()
195
+
196
+ try:
197
+ navigation_actions = {"launch", "goto", "back", "forward"}
198
+ interaction_actions = {
199
+ "click",
200
+ "type",
201
+ "double_click",
202
+ "hover",
203
+ "press_key",
204
+ "scroll_down",
205
+ "scroll_up",
206
+ }
207
+ tab_actions = {"new_tab", "switch_tab", "close_tab", "list_tabs"}
208
+ utility_actions = {
209
+ "wait",
210
+ "execute_js",
211
+ "save_pdf",
212
+ "get_console_logs",
213
+ "view_source",
214
+ "close",
215
+ }
216
+
217
+ if action in navigation_actions:
218
+ return _handle_navigation_actions(manager, action, url, tab_id)
219
+ if action in interaction_actions:
220
+ return _handle_interaction_actions(manager, action, coordinate, text, key, tab_id)
221
+ if action in tab_actions:
222
+ return _handle_tab_actions(manager, action, url, tab_id)
223
+ if action in utility_actions:
224
+ return _handle_utility_actions(
225
+ manager, action, duration, js_code, file_path, tab_id, clear
226
+ )
227
+
228
+ _raise_unknown_action(action)
229
+
230
+ except (ValueError, RuntimeError) as e:
231
+ return {
232
+ "error": str(e),
233
+ "tab_id": tab_id,
234
+ "screenshot": "",
235
+ "is_running": False,
236
+ }