strix-agent 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. strix/__init__.py +0 -0
  2. strix/agents/StrixAgent/__init__.py +4 -0
  3. strix/agents/StrixAgent/strix_agent.py +89 -0
  4. strix/agents/StrixAgent/system_prompt.jinja +404 -0
  5. strix/agents/__init__.py +10 -0
  6. strix/agents/base_agent.py +518 -0
  7. strix/agents/state.py +163 -0
  8. strix/interface/__init__.py +4 -0
  9. strix/interface/assets/tui_styles.tcss +694 -0
  10. strix/interface/cli.py +230 -0
  11. strix/interface/main.py +500 -0
  12. strix/interface/tool_components/__init__.py +39 -0
  13. strix/interface/tool_components/agents_graph_renderer.py +123 -0
  14. strix/interface/tool_components/base_renderer.py +62 -0
  15. strix/interface/tool_components/browser_renderer.py +120 -0
  16. strix/interface/tool_components/file_edit_renderer.py +99 -0
  17. strix/interface/tool_components/finish_renderer.py +31 -0
  18. strix/interface/tool_components/notes_renderer.py +108 -0
  19. strix/interface/tool_components/proxy_renderer.py +255 -0
  20. strix/interface/tool_components/python_renderer.py +34 -0
  21. strix/interface/tool_components/registry.py +72 -0
  22. strix/interface/tool_components/reporting_renderer.py +53 -0
  23. strix/interface/tool_components/scan_info_renderer.py +64 -0
  24. strix/interface/tool_components/terminal_renderer.py +131 -0
  25. strix/interface/tool_components/thinking_renderer.py +29 -0
  26. strix/interface/tool_components/user_message_renderer.py +43 -0
  27. strix/interface/tool_components/web_search_renderer.py +28 -0
  28. strix/interface/tui.py +1274 -0
  29. strix/interface/utils.py +559 -0
  30. strix/llm/__init__.py +15 -0
  31. strix/llm/config.py +20 -0
  32. strix/llm/llm.py +465 -0
  33. strix/llm/memory_compressor.py +212 -0
  34. strix/llm/request_queue.py +87 -0
  35. strix/llm/utils.py +87 -0
  36. strix/prompts/README.md +64 -0
  37. strix/prompts/__init__.py +109 -0
  38. strix/prompts/cloud/.gitkeep +0 -0
  39. strix/prompts/coordination/root_agent.jinja +41 -0
  40. strix/prompts/custom/.gitkeep +0 -0
  41. strix/prompts/frameworks/fastapi.jinja +142 -0
  42. strix/prompts/frameworks/nextjs.jinja +126 -0
  43. strix/prompts/protocols/graphql.jinja +215 -0
  44. strix/prompts/reconnaissance/.gitkeep +0 -0
  45. strix/prompts/technologies/firebase_firestore.jinja +177 -0
  46. strix/prompts/technologies/supabase.jinja +189 -0
  47. strix/prompts/vulnerabilities/authentication_jwt.jinja +147 -0
  48. strix/prompts/vulnerabilities/broken_function_level_authorization.jinja +146 -0
  49. strix/prompts/vulnerabilities/business_logic.jinja +171 -0
  50. strix/prompts/vulnerabilities/csrf.jinja +174 -0
  51. strix/prompts/vulnerabilities/idor.jinja +195 -0
  52. strix/prompts/vulnerabilities/information_disclosure.jinja +222 -0
  53. strix/prompts/vulnerabilities/insecure_file_uploads.jinja +188 -0
  54. strix/prompts/vulnerabilities/mass_assignment.jinja +141 -0
  55. strix/prompts/vulnerabilities/open_redirect.jinja +177 -0
  56. strix/prompts/vulnerabilities/path_traversal_lfi_rfi.jinja +142 -0
  57. strix/prompts/vulnerabilities/race_conditions.jinja +164 -0
  58. strix/prompts/vulnerabilities/rce.jinja +154 -0
  59. strix/prompts/vulnerabilities/sql_injection.jinja +151 -0
  60. strix/prompts/vulnerabilities/ssrf.jinja +135 -0
  61. strix/prompts/vulnerabilities/subdomain_takeover.jinja +155 -0
  62. strix/prompts/vulnerabilities/xss.jinja +169 -0
  63. strix/prompts/vulnerabilities/xxe.jinja +184 -0
  64. strix/runtime/__init__.py +19 -0
  65. strix/runtime/docker_runtime.py +399 -0
  66. strix/runtime/runtime.py +29 -0
  67. strix/runtime/tool_server.py +205 -0
  68. strix/telemetry/__init__.py +4 -0
  69. strix/telemetry/tracer.py +337 -0
  70. strix/tools/__init__.py +64 -0
  71. strix/tools/agents_graph/__init__.py +16 -0
  72. strix/tools/agents_graph/agents_graph_actions.py +621 -0
  73. strix/tools/agents_graph/agents_graph_actions_schema.xml +226 -0
  74. strix/tools/argument_parser.py +121 -0
  75. strix/tools/browser/__init__.py +4 -0
  76. strix/tools/browser/browser_actions.py +236 -0
  77. strix/tools/browser/browser_actions_schema.xml +183 -0
  78. strix/tools/browser/browser_instance.py +533 -0
  79. strix/tools/browser/tab_manager.py +342 -0
  80. strix/tools/executor.py +305 -0
  81. strix/tools/file_edit/__init__.py +4 -0
  82. strix/tools/file_edit/file_edit_actions.py +141 -0
  83. strix/tools/file_edit/file_edit_actions_schema.xml +128 -0
  84. strix/tools/finish/__init__.py +4 -0
  85. strix/tools/finish/finish_actions.py +174 -0
  86. strix/tools/finish/finish_actions_schema.xml +45 -0
  87. strix/tools/notes/__init__.py +14 -0
  88. strix/tools/notes/notes_actions.py +191 -0
  89. strix/tools/notes/notes_actions_schema.xml +150 -0
  90. strix/tools/proxy/__init__.py +20 -0
  91. strix/tools/proxy/proxy_actions.py +101 -0
  92. strix/tools/proxy/proxy_actions_schema.xml +267 -0
  93. strix/tools/proxy/proxy_manager.py +785 -0
  94. strix/tools/python/__init__.py +4 -0
  95. strix/tools/python/python_actions.py +47 -0
  96. strix/tools/python/python_actions_schema.xml +131 -0
  97. strix/tools/python/python_instance.py +172 -0
  98. strix/tools/python/python_manager.py +131 -0
  99. strix/tools/registry.py +196 -0
  100. strix/tools/reporting/__init__.py +6 -0
  101. strix/tools/reporting/reporting_actions.py +63 -0
  102. strix/tools/reporting/reporting_actions_schema.xml +30 -0
  103. strix/tools/terminal/__init__.py +4 -0
  104. strix/tools/terminal/terminal_actions.py +35 -0
  105. strix/tools/terminal/terminal_actions_schema.xml +146 -0
  106. strix/tools/terminal/terminal_manager.py +151 -0
  107. strix/tools/terminal/terminal_session.py +447 -0
  108. strix/tools/thinking/__init__.py +4 -0
  109. strix/tools/thinking/thinking_actions.py +18 -0
  110. strix/tools/thinking/thinking_actions_schema.xml +52 -0
  111. strix/tools/web_search/__init__.py +4 -0
  112. strix/tools/web_search/web_search_actions.py +80 -0
  113. strix/tools/web_search/web_search_actions_schema.xml +83 -0
  114. strix_agent-0.4.0.dist-info/LICENSE +201 -0
  115. strix_agent-0.4.0.dist-info/METADATA +282 -0
  116. strix_agent-0.4.0.dist-info/RECORD +118 -0
  117. strix_agent-0.4.0.dist-info/WHEEL +4 -0
  118. strix_agent-0.4.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,226 @@
1
+ <tools>
2
+ <tool name="agent_finish">
3
+ <description>Mark a subagent's task as completed and optionally report results to parent agent.
4
+
5
+ IMPORTANT: This tool can ONLY be used by subagents (agents with a parent).
6
+ Root/main agents must use finish_scan instead.
7
+
8
+ This tool should be called when a subagent completes its assigned subtask to:
9
+ - Mark the subagent's task as completed
10
+ - Report findings back to the parent agent
11
+
12
+ Use this tool when:
13
+ - You are a subagent working on a specific subtask
14
+ - You have completed your assigned task
15
+ - You want to report your findings to the parent agent
16
+ - You are ready to terminate this subagent's execution</description>
17
+ <details>This replaces the previous finish_scan tool and handles both sub-agent completion
18
+ and main agent completion. When a sub-agent finishes, it can report its findings
19
+ back to the parent agent for coordination.</details>
20
+ <parameters>
21
+ <parameter name="result_summary" type="string" required="true">
22
+ <description>Summary of what the agent accomplished and discovered</description>
23
+ </parameter>
24
+ <parameter name="findings" type="string" required="false">
25
+ <description>List of specific findings, vulnerabilities, or discoveries</description>
26
+ </parameter>
27
+ <parameter name="success" type="boolean" required="false">
28
+ <description>Whether the agent's task completed successfully</description>
29
+ </parameter>
30
+ <parameter name="report_to_parent" type="boolean" required="false">
31
+ <description>Whether to send results back to the parent agent</description>
32
+ </parameter>
33
+ <parameter name="final_recommendations" type="string" required="false">
34
+ <description>Recommendations for next steps or follow-up actions</description>
35
+ </parameter>
36
+ </parameters>
37
+ <returns type="Dict[str, Any]">
38
+ <description>Response containing: - agent_completed: Whether the agent was marked as completed - parent_notified: Whether parent was notified (if applicable) - completion_summary: Summary of completion status</description>
39
+ </returns>
40
+ <examples>
41
+ # Sub-agent completing subdomain enumeration task
42
+ <function=agent_finish>
43
+ <parameter=result_summary>Completed comprehensive subdomain enumeration for target.com.
44
+ Discovered 47 subdomains including several interesting ones with admin/dev
45
+ in the name. Found 3 subdomains with exposed services on non-standard
46
+ ports.</parameter>
47
+ <parameter=findings>["admin.target.com - exposed phpMyAdmin",
48
+ "dev-api.target.com - unauth API endpoints",
49
+ "staging.target.com - directory listing enabled",
50
+ "mail.target.com - POP3/IMAP services"]</parameter>
51
+ <parameter=success>true</parameter>
52
+ <parameter=report_to_parent>true</parameter>
53
+ <parameter=final_recommendations>["Prioritize testing admin.target.com for default creds",
54
+ "Enumerate dev-api.target.com API endpoints",
55
+ "Check staging.target.com for sensitive files"]</parameter>
56
+ </function>
57
+ </examples>
58
+ </tool>
59
+ <tool name="create_agent">
60
+ <description>Create and spawn a new agent to handle a specific subtask.
61
+
62
+ Only create a new agent if no existing agent is handling the specific task.</description>
63
+ <details>The new agent inherits the parent's conversation history and context up to the point
64
+ of creation, then continues with its assigned subtask. This enables decomposition
65
+ of complex penetration testing tasks into specialized sub-agents.
66
+
67
+ The agent runs asynchronously and independently, allowing the parent to continue
68
+ immediately while the new agent executes its task in the background.
69
+
70
+ If you as a parent agent don't absolutely have anything to do while your subagents are running, you can use wait_for_message tool. The subagent will continue to run in the background, and update you when it's done.
71
+ </details>
72
+ <parameters>
73
+ <parameter name="task" type="string" required="true">
74
+ <description>The specific task/objective for the new agent to accomplish</description>
75
+ </parameter>
76
+ <parameter name="name" type="string" required="true">
77
+ <description>Human-readable name for the agent (for tracking purposes)</description>
78
+ </parameter>
79
+ <parameter name="inherit_context" type="boolean" required="false">
80
+ <description>Whether the new agent should inherit parent's conversation history and context</description>
81
+ </parameter>
82
+ <parameter name="prompt_modules" type="string" required="false">
83
+ <description>Comma-separated list of prompt modules to use for the agent (MAXIMUM 5 modules allowed). Most agents should have at least one module in order to be useful. Agents should be highly specialized - use 1-3 related modules; up to 5 for complex contexts. {{DYNAMIC_MODULES_DESCRIPTION}}</description>
84
+ </parameter>
85
+ </parameters>
86
+ <returns type="Dict[str, Any]">
87
+ <description>Response containing: - agent_id: Unique identifier for the created agent - success: Whether the agent was created successfully - message: Status message - agent_info: Details about the created agent</description>
88
+ </returns>
89
+ <examples>
90
+ # After confirming no SQL testing agent exists, create agent for vulnerability validation
91
+ <function=create_agent>
92
+ <parameter=task>Validate and exploit the suspected SQL injection vulnerability found in
93
+ the login form. Confirm exploitability and document proof of concept.</parameter>
94
+ <parameter=name>SQLi Validator</parameter>
95
+ <parameter=prompt_modules>sql_injection</parameter>
96
+ </function>
97
+
98
+ <function=create_agent>
99
+ <parameter=task>Test authentication mechanisms, JWT implementation, and session management
100
+ for security vulnerabilities and bypass techniques.</parameter>
101
+ <parameter=name>Auth Specialist</parameter>
102
+ <parameter=prompt_modules>authentication_jwt, business_logic</parameter>
103
+ </function>
104
+
105
+ # Example of single-module specialization (most focused)
106
+ <function=create_agent>
107
+ <parameter=task>Perform comprehensive XSS testing including reflected, stored, and DOM-based
108
+ variants across all identified input points.</parameter>
109
+ <parameter=name>XSS Specialist</parameter>
110
+ <parameter=prompt_modules>xss</parameter>
111
+ </function>
112
+
113
+ # Example of up to 5 related modules (borderline acceptable)
114
+ <function=create_agent>
115
+ <parameter=task>Test for server-side vulnerabilities including SSRF, XXE, and potential
116
+ RCE vectors in file upload and XML processing endpoints.</parameter>
117
+ <parameter=name>Server-Side Attack Specialist</parameter>
118
+ <parameter=prompt_modules>ssrf, xxe, rce</parameter>
119
+ </function>
120
+ </examples>
121
+ </tool>
122
+ <tool name="send_message_to_agent">
123
+ <description>Send a message to another agent in the graph for coordination and communication.</description>
124
+ <details>This enables agents to communicate with each other during execution, but should be used only when essential:
125
+ - Sharing discovered information or findings
126
+ - Asking questions or requesting assistance
127
+ - Providing instructions or coordination
128
+ - Reporting status or results
129
+
130
+ Best practices:
131
+ - Avoid routine status updates; batch non-urgent information
132
+ - Prefer parent/child completion flows (agent_finish)
133
+ - Do not message when the context is already known</details>
134
+ <parameters>
135
+ <parameter name="target_agent_id" type="string" required="true">
136
+ <description>ID of the agent to send the message to</description>
137
+ </parameter>
138
+ <parameter name="message" type="string" required="true">
139
+ <description>The message content to send</description>
140
+ </parameter>
141
+ <parameter name="message_type" type="string" required="false">
142
+ <description>Type of message being sent: - "query": Question requiring a response - "instruction": Command or directive for the target agent - "information": Informational message (findings, status, etc.)</description>
143
+ </parameter>
144
+ <parameter name="priority" type="string" required="false">
145
+ <description>Priority level of the message</description>
146
+ </parameter>
147
+ </parameters>
148
+ <returns type="Dict[str, Any]">
149
+ <description>Response containing: - success: Whether the message was sent successfully - message_id: Unique identifier for the message - delivery_status: Status of message delivery</description>
150
+ </returns>
151
+ <examples>
152
+ # Share discovered vulnerability information
153
+ <function=send_message_to_agent>
154
+ <parameter=target_agent_id>agent_abc123</parameter>
155
+ <parameter=message>Found SQL injection vulnerability in /login.php parameter 'username'.
156
+ Payload: admin' OR '1'='1' -- successfully bypassed authentication.
157
+ You should focus your testing on the authenticated areas of the
158
+ application.</parameter>
159
+ <parameter=message_type>information</parameter>
160
+ <parameter=priority>high</parameter>
161
+ </function>
162
+
163
+ # Request assistance from specialist agent
164
+ <function=send_message_to_agent>
165
+ <parameter=target_agent_id>agent_def456</parameter>
166
+ <parameter=message>I've identified what appears to be a custom encryption implementation
167
+ in the API responses. Can you analyze the cryptographic strength and look
168
+ for potential weaknesses?</parameter>
169
+ <parameter=message_type>query</parameter>
170
+ <parameter=priority>normal</parameter>
171
+ </function>
172
+ </examples>
173
+ </tool>
174
+ <tool name="view_agent_graph">
175
+ <description>View the current agent graph showing all agents, their relationships, and status.</description>
176
+ <details>This provides a comprehensive overview of the multi-agent system including:
177
+ - All agent nodes with their tasks, status, and metadata
178
+ - Parent-child relationships between agents
179
+ - Message communication patterns
180
+ - Current execution state</details>
181
+ <returns type="Dict[str, Any]">
182
+ <description>Response containing: - graph_structure: Human-readable representation of the agent graph - summary: High-level statistics about the graph</description>
183
+ </returns>
184
+ </tool>
185
+ <tool name="wait_for_message">
186
+ <description>Pause the agent loop indefinitely until receiving a message from another agent.
187
+
188
+ This tool puts the agent into a waiting state where it remains idle until it receives any form of communication. The agent will automatically resume execution when a message arrives.
189
+
190
+ IMPORTANT: This tool causes the agent to stop all activity until a message is received. Use it when you need to:
191
+ - Wait for subagent completion reports
192
+ - Coordinate with other agents before proceeding
193
+ - Synchronize multi-agent workflows
194
+
195
+ NOTE: If you are waiting for an agent that is NOT your subagent, you first tell it to message you with updates before waiting for it. Otherwise, you will wait forever!
196
+ </description>
197
+ <details>When this tool is called, the agent (you) enters a waiting state and will not continue execution until:
198
+ - Another agent sends a message via send_message_to_agent
199
+ - Any other form of inter-agent communication occurs
200
+ - Waiting timeout is reached
201
+
202
+ The agent will automatically resume from where it left off once a message is received.
203
+ This is particularly useful for parent agents waiting for subagent results or for coordination points in multi-agent workflows.
204
+ NOTE: If you finished your task, and you do NOT have any child agents running, you should NEVER use this tool, and just call finish tool instead.
205
+ </details>
206
+ <parameters>
207
+ <parameter name="reason" type="string" required="false">
208
+ <description>Explanation for why the agent is waiting (for logging and monitoring purposes)</description>
209
+ </parameter>
210
+ </parameters>
211
+ <returns type="Dict[str, Any]">
212
+ <description>Response containing: - success: Whether the agent successfully entered waiting state - status: Current agent status ("waiting") - reason: The reason for waiting - agent_info: Details about the waiting agent - resume_conditions: List of conditions that will resume the agent</description>
213
+ </returns>
214
+ <examples>
215
+ # Wait for subagents to complete their tasks
216
+ <function=wait_for_message>
217
+ <parameter=reason>Waiting for subdomain enumeration and port scanning subagents to complete their tasks and report findings</parameter>
218
+ </function>
219
+
220
+ # Coordinate with other agents
221
+ <function=wait_for_message>
222
+ <parameter=reason>Waiting for vulnerability assessment agent to share discovered attack vectors before proceeding with exploitation phase</parameter>
223
+ </function>
224
+ </examples>
225
+ </tool>
226
+ </tools>
@@ -0,0 +1,121 @@
1
+ import contextlib
2
+ import inspect
3
+ import json
4
+ import types
5
+ from collections.abc import Callable
6
+ from typing import Any, Union, get_args, get_origin
7
+
8
+
9
+ class ArgumentConversionError(Exception):
10
+ def __init__(self, message: str, param_name: str | None = None) -> None:
11
+ self.param_name = param_name
12
+ super().__init__(message)
13
+
14
+
15
+ def convert_arguments(func: Callable[..., Any], kwargs: dict[str, Any]) -> dict[str, Any]:
16
+ try:
17
+ sig = inspect.signature(func)
18
+ converted = {}
19
+
20
+ for param_name, value in kwargs.items():
21
+ if param_name not in sig.parameters:
22
+ converted[param_name] = value
23
+ continue
24
+
25
+ param = sig.parameters[param_name]
26
+ param_type = param.annotation
27
+
28
+ if param_type == inspect.Parameter.empty or value is None:
29
+ converted[param_name] = value
30
+ continue
31
+
32
+ if not isinstance(value, str):
33
+ converted[param_name] = value
34
+ continue
35
+
36
+ try:
37
+ converted[param_name] = convert_string_to_type(value, param_type)
38
+ except (ValueError, TypeError, json.JSONDecodeError) as e:
39
+ raise ArgumentConversionError(
40
+ f"Failed to convert argument '{param_name}' to type {param_type}: {e}",
41
+ param_name=param_name,
42
+ ) from e
43
+
44
+ except (ValueError, TypeError, AttributeError) as e:
45
+ raise ArgumentConversionError(f"Failed to process function arguments: {e}") from e
46
+
47
+ return converted
48
+
49
+
50
+ def convert_string_to_type(value: str, param_type: Any) -> Any:
51
+ origin = get_origin(param_type)
52
+ if origin is Union or isinstance(param_type, types.UnionType):
53
+ args = get_args(param_type)
54
+ for arg_type in args:
55
+ if arg_type is not type(None):
56
+ with contextlib.suppress(ValueError, TypeError, json.JSONDecodeError):
57
+ return convert_string_to_type(value, arg_type)
58
+ return value
59
+
60
+ if hasattr(param_type, "__args__"):
61
+ args = getattr(param_type, "__args__", ())
62
+ if len(args) == 2 and type(None) in args:
63
+ non_none_type = args[0] if args[1] is type(None) else args[1]
64
+ with contextlib.suppress(ValueError, TypeError, json.JSONDecodeError):
65
+ return convert_string_to_type(value, non_none_type)
66
+ return value
67
+
68
+ return _convert_basic_types(value, param_type, origin)
69
+
70
+
71
+ def _convert_basic_types(value: str, param_type: Any, origin: Any = None) -> Any:
72
+ basic_type_converters: dict[Any, Callable[[str], Any]] = {
73
+ int: int,
74
+ float: float,
75
+ bool: _convert_to_bool,
76
+ str: str,
77
+ }
78
+
79
+ if param_type in basic_type_converters:
80
+ return basic_type_converters[param_type](value)
81
+
82
+ if list in (origin, param_type):
83
+ return _convert_to_list(value)
84
+ if dict in (origin, param_type):
85
+ return _convert_to_dict(value)
86
+
87
+ with contextlib.suppress(json.JSONDecodeError):
88
+ return json.loads(value)
89
+ return value
90
+
91
+
92
+ def _convert_to_bool(value: str) -> bool:
93
+ if value.lower() in ("true", "1", "yes", "on"):
94
+ return True
95
+ if value.lower() in ("false", "0", "no", "off"):
96
+ return False
97
+ return bool(value)
98
+
99
+
100
+ def _convert_to_list(value: str) -> list[Any]:
101
+ try:
102
+ parsed = json.loads(value)
103
+ if isinstance(parsed, list):
104
+ return parsed
105
+ except json.JSONDecodeError:
106
+ if "," in value:
107
+ return [item.strip() for item in value.split(",")]
108
+ return [value]
109
+ else:
110
+ return [parsed]
111
+
112
+
113
+ def _convert_to_dict(value: str) -> dict[str, Any]:
114
+ try:
115
+ parsed = json.loads(value)
116
+ if isinstance(parsed, dict):
117
+ return parsed
118
+ except json.JSONDecodeError:
119
+ return {}
120
+ else:
121
+ return {}
@@ -0,0 +1,4 @@
1
+ from .browser_actions import browser_action
2
+
3
+
4
+ __all__ = ["browser_action"]
@@ -0,0 +1,236 @@
1
+ from typing import Any, Literal, NoReturn
2
+
3
+ from strix.tools.registry import register_tool
4
+
5
+ from .tab_manager import BrowserTabManager, get_browser_tab_manager
6
+
7
+
8
+ BrowserAction = Literal[
9
+ "launch",
10
+ "goto",
11
+ "click",
12
+ "type",
13
+ "scroll_down",
14
+ "scroll_up",
15
+ "back",
16
+ "forward",
17
+ "new_tab",
18
+ "switch_tab",
19
+ "close_tab",
20
+ "wait",
21
+ "execute_js",
22
+ "double_click",
23
+ "hover",
24
+ "press_key",
25
+ "save_pdf",
26
+ "get_console_logs",
27
+ "view_source",
28
+ "close",
29
+ "list_tabs",
30
+ ]
31
+
32
+
33
+ def _validate_url(action_name: str, url: str | None) -> None:
34
+ if not url:
35
+ raise ValueError(f"url parameter is required for {action_name} action")
36
+
37
+
38
+ def _validate_coordinate(action_name: str, coordinate: str | None) -> None:
39
+ if not coordinate:
40
+ raise ValueError(f"coordinate parameter is required for {action_name} action")
41
+
42
+
43
+ def _validate_text(action_name: str, text: str | None) -> None:
44
+ if not text:
45
+ raise ValueError(f"text parameter is required for {action_name} action")
46
+
47
+
48
+ def _validate_tab_id(action_name: str, tab_id: str | None) -> None:
49
+ if not tab_id:
50
+ raise ValueError(f"tab_id parameter is required for {action_name} action")
51
+
52
+
53
+ def _validate_js_code(action_name: str, js_code: str | None) -> None:
54
+ if not js_code:
55
+ raise ValueError(f"js_code parameter is required for {action_name} action")
56
+
57
+
58
+ def _validate_duration(action_name: str, duration: float | None) -> None:
59
+ if duration is None:
60
+ raise ValueError(f"duration parameter is required for {action_name} action")
61
+
62
+
63
+ def _validate_key(action_name: str, key: str | None) -> None:
64
+ if not key:
65
+ raise ValueError(f"key parameter is required for {action_name} action")
66
+
67
+
68
+ def _validate_file_path(action_name: str, file_path: str | None) -> None:
69
+ if not file_path:
70
+ raise ValueError(f"file_path parameter is required for {action_name} action")
71
+
72
+
73
+ def _handle_navigation_actions(
74
+ manager: BrowserTabManager,
75
+ action: str,
76
+ url: str | None = None,
77
+ tab_id: str | None = None,
78
+ ) -> dict[str, Any]:
79
+ if action == "launch":
80
+ return manager.launch_browser(url)
81
+ if action == "goto":
82
+ _validate_url(action, url)
83
+ assert url is not None
84
+ return manager.goto_url(url, tab_id)
85
+ if action == "back":
86
+ return manager.back(tab_id)
87
+ if action == "forward":
88
+ return manager.forward(tab_id)
89
+ raise ValueError(f"Unknown navigation action: {action}")
90
+
91
+
92
+ def _handle_interaction_actions(
93
+ manager: BrowserTabManager,
94
+ action: str,
95
+ coordinate: str | None = None,
96
+ text: str | None = None,
97
+ key: str | None = None,
98
+ tab_id: str | None = None,
99
+ ) -> dict[str, Any]:
100
+ if action in {"click", "double_click", "hover"}:
101
+ _validate_coordinate(action, coordinate)
102
+ assert coordinate is not None
103
+ action_map = {
104
+ "click": manager.click,
105
+ "double_click": manager.double_click,
106
+ "hover": manager.hover,
107
+ }
108
+ return action_map[action](coordinate, tab_id)
109
+
110
+ if action in {"scroll_down", "scroll_up"}:
111
+ direction = "down" if action == "scroll_down" else "up"
112
+ return manager.scroll(direction, tab_id)
113
+
114
+ if action == "type":
115
+ _validate_text(action, text)
116
+ assert text is not None
117
+ return manager.type_text(text, tab_id)
118
+ if action == "press_key":
119
+ _validate_key(action, key)
120
+ assert key is not None
121
+ return manager.press_key(key, tab_id)
122
+
123
+ raise ValueError(f"Unknown interaction action: {action}")
124
+
125
+
126
+ def _raise_unknown_action(action: str) -> NoReturn:
127
+ raise ValueError(f"Unknown action: {action}")
128
+
129
+
130
+ def _handle_tab_actions(
131
+ manager: BrowserTabManager,
132
+ action: str,
133
+ url: str | None = None,
134
+ tab_id: str | None = None,
135
+ ) -> dict[str, Any]:
136
+ if action == "new_tab":
137
+ return manager.new_tab(url)
138
+ if action == "switch_tab":
139
+ _validate_tab_id(action, tab_id)
140
+ assert tab_id is not None
141
+ return manager.switch_tab(tab_id)
142
+ if action == "close_tab":
143
+ _validate_tab_id(action, tab_id)
144
+ assert tab_id is not None
145
+ return manager.close_tab(tab_id)
146
+ if action == "list_tabs":
147
+ return manager.list_tabs()
148
+ raise ValueError(f"Unknown tab action: {action}")
149
+
150
+
151
+ def _handle_utility_actions(
152
+ manager: BrowserTabManager,
153
+ action: str,
154
+ duration: float | None = None,
155
+ js_code: str | None = None,
156
+ file_path: str | None = None,
157
+ tab_id: str | None = None,
158
+ clear: bool = False,
159
+ ) -> dict[str, Any]:
160
+ if action == "wait":
161
+ _validate_duration(action, duration)
162
+ assert duration is not None
163
+ return manager.wait_browser(duration, tab_id)
164
+ if action == "execute_js":
165
+ _validate_js_code(action, js_code)
166
+ assert js_code is not None
167
+ return manager.execute_js(js_code, tab_id)
168
+ if action == "save_pdf":
169
+ _validate_file_path(action, file_path)
170
+ assert file_path is not None
171
+ return manager.save_pdf(file_path, tab_id)
172
+ if action == "get_console_logs":
173
+ return manager.get_console_logs(tab_id, clear)
174
+ if action == "view_source":
175
+ return manager.view_source(tab_id)
176
+ if action == "close":
177
+ return manager.close_browser()
178
+ raise ValueError(f"Unknown utility action: {action}")
179
+
180
+
181
+ @register_tool
182
+ def browser_action(
183
+ action: BrowserAction,
184
+ url: str | None = None,
185
+ coordinate: str | None = None,
186
+ text: str | None = None,
187
+ tab_id: str | None = None,
188
+ js_code: str | None = None,
189
+ duration: float | None = None,
190
+ key: str | None = None,
191
+ file_path: str | None = None,
192
+ clear: bool = False,
193
+ ) -> dict[str, Any]:
194
+ manager = get_browser_tab_manager()
195
+
196
+ try:
197
+ navigation_actions = {"launch", "goto", "back", "forward"}
198
+ interaction_actions = {
199
+ "click",
200
+ "type",
201
+ "double_click",
202
+ "hover",
203
+ "press_key",
204
+ "scroll_down",
205
+ "scroll_up",
206
+ }
207
+ tab_actions = {"new_tab", "switch_tab", "close_tab", "list_tabs"}
208
+ utility_actions = {
209
+ "wait",
210
+ "execute_js",
211
+ "save_pdf",
212
+ "get_console_logs",
213
+ "view_source",
214
+ "close",
215
+ }
216
+
217
+ if action in navigation_actions:
218
+ return _handle_navigation_actions(manager, action, url, tab_id)
219
+ if action in interaction_actions:
220
+ return _handle_interaction_actions(manager, action, coordinate, text, key, tab_id)
221
+ if action in tab_actions:
222
+ return _handle_tab_actions(manager, action, url, tab_id)
223
+ if action in utility_actions:
224
+ return _handle_utility_actions(
225
+ manager, action, duration, js_code, file_path, tab_id, clear
226
+ )
227
+
228
+ _raise_unknown_action(action)
229
+
230
+ except (ValueError, RuntimeError) as e:
231
+ return {
232
+ "error": str(e),
233
+ "tab_id": tab_id,
234
+ "screenshot": "",
235
+ "is_running": False,
236
+ }