strix-agent 0.4.0__py3-none-any.whl → 0.6.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- strix/agents/StrixAgent/strix_agent.py +3 -3
- strix/agents/StrixAgent/system_prompt.jinja +30 -26
- strix/agents/base_agent.py +159 -75
- strix/agents/state.py +5 -2
- strix/config/__init__.py +12 -0
- strix/config/config.py +172 -0
- strix/interface/assets/tui_styles.tcss +195 -230
- strix/interface/cli.py +16 -41
- strix/interface/main.py +151 -74
- strix/interface/streaming_parser.py +119 -0
- strix/interface/tool_components/__init__.py +4 -0
- strix/interface/tool_components/agent_message_renderer.py +190 -0
- strix/interface/tool_components/agents_graph_renderer.py +54 -38
- strix/interface/tool_components/base_renderer.py +68 -36
- strix/interface/tool_components/browser_renderer.py +106 -91
- strix/interface/tool_components/file_edit_renderer.py +117 -36
- strix/interface/tool_components/finish_renderer.py +43 -10
- strix/interface/tool_components/notes_renderer.py +63 -38
- strix/interface/tool_components/proxy_renderer.py +133 -92
- strix/interface/tool_components/python_renderer.py +121 -8
- strix/interface/tool_components/registry.py +19 -12
- strix/interface/tool_components/reporting_renderer.py +196 -28
- strix/interface/tool_components/scan_info_renderer.py +22 -19
- strix/interface/tool_components/terminal_renderer.py +270 -90
- strix/interface/tool_components/thinking_renderer.py +8 -6
- strix/interface/tool_components/todo_renderer.py +225 -0
- strix/interface/tool_components/user_message_renderer.py +26 -19
- strix/interface/tool_components/web_search_renderer.py +7 -6
- strix/interface/tui.py +907 -262
- strix/interface/utils.py +236 -4
- strix/llm/__init__.py +6 -2
- strix/llm/config.py +8 -5
- strix/llm/dedupe.py +217 -0
- strix/llm/llm.py +209 -356
- strix/llm/memory_compressor.py +6 -5
- strix/llm/utils.py +17 -8
- strix/runtime/__init__.py +12 -3
- strix/runtime/docker_runtime.py +121 -202
- strix/runtime/tool_server.py +55 -95
- strix/skills/README.md +64 -0
- strix/skills/__init__.py +110 -0
- strix/{prompts → skills}/frameworks/nextjs.jinja +26 -0
- strix/skills/scan_modes/deep.jinja +145 -0
- strix/skills/scan_modes/quick.jinja +63 -0
- strix/skills/scan_modes/standard.jinja +91 -0
- strix/telemetry/README.md +38 -0
- strix/telemetry/__init__.py +7 -1
- strix/telemetry/posthog.py +137 -0
- strix/telemetry/tracer.py +194 -54
- strix/tools/__init__.py +11 -4
- strix/tools/agents_graph/agents_graph_actions.py +20 -21
- strix/tools/agents_graph/agents_graph_actions_schema.xml +8 -8
- strix/tools/browser/browser_actions.py +10 -6
- strix/tools/browser/browser_actions_schema.xml +6 -1
- strix/tools/browser/browser_instance.py +96 -48
- strix/tools/browser/tab_manager.py +121 -102
- strix/tools/context.py +12 -0
- strix/tools/executor.py +63 -4
- strix/tools/file_edit/file_edit_actions.py +6 -3
- strix/tools/file_edit/file_edit_actions_schema.xml +45 -3
- strix/tools/finish/finish_actions.py +80 -105
- strix/tools/finish/finish_actions_schema.xml +121 -14
- strix/tools/notes/notes_actions.py +6 -33
- strix/tools/notes/notes_actions_schema.xml +50 -46
- strix/tools/proxy/proxy_actions.py +14 -2
- strix/tools/proxy/proxy_actions_schema.xml +0 -1
- strix/tools/proxy/proxy_manager.py +28 -16
- strix/tools/python/python_actions.py +2 -2
- strix/tools/python/python_actions_schema.xml +9 -1
- strix/tools/python/python_instance.py +39 -37
- strix/tools/python/python_manager.py +43 -31
- strix/tools/registry.py +73 -12
- strix/tools/reporting/reporting_actions.py +218 -31
- strix/tools/reporting/reporting_actions_schema.xml +256 -8
- strix/tools/terminal/terminal_actions.py +2 -2
- strix/tools/terminal/terminal_actions_schema.xml +6 -0
- strix/tools/terminal/terminal_manager.py +41 -30
- strix/tools/thinking/thinking_actions_schema.xml +27 -25
- strix/tools/todo/__init__.py +18 -0
- strix/tools/todo/todo_actions.py +568 -0
- strix/tools/todo/todo_actions_schema.xml +225 -0
- strix/utils/__init__.py +0 -0
- strix/utils/resource_paths.py +13 -0
- {strix_agent-0.4.0.dist-info → strix_agent-0.6.2.dist-info}/METADATA +90 -65
- strix_agent-0.6.2.dist-info/RECORD +134 -0
- {strix_agent-0.4.0.dist-info → strix_agent-0.6.2.dist-info}/WHEEL +1 -1
- strix/llm/request_queue.py +0 -87
- strix/prompts/README.md +0 -64
- strix/prompts/__init__.py +0 -109
- strix_agent-0.4.0.dist-info/RECORD +0 -118
- /strix/{prompts → skills}/cloud/.gitkeep +0 -0
- /strix/{prompts → skills}/coordination/root_agent.jinja +0 -0
- /strix/{prompts → skills}/custom/.gitkeep +0 -0
- /strix/{prompts → skills}/frameworks/fastapi.jinja +0 -0
- /strix/{prompts → skills}/protocols/graphql.jinja +0 -0
- /strix/{prompts → skills}/reconnaissance/.gitkeep +0 -0
- /strix/{prompts → skills}/technologies/firebase_firestore.jinja +0 -0
- /strix/{prompts → skills}/technologies/supabase.jinja +0 -0
- /strix/{prompts → skills}/vulnerabilities/authentication_jwt.jinja +0 -0
- /strix/{prompts → skills}/vulnerabilities/broken_function_level_authorization.jinja +0 -0
- /strix/{prompts → skills}/vulnerabilities/business_logic.jinja +0 -0
- /strix/{prompts → skills}/vulnerabilities/csrf.jinja +0 -0
- /strix/{prompts → skills}/vulnerabilities/idor.jinja +0 -0
- /strix/{prompts → skills}/vulnerabilities/information_disclosure.jinja +0 -0
- /strix/{prompts → skills}/vulnerabilities/insecure_file_uploads.jinja +0 -0
- /strix/{prompts → skills}/vulnerabilities/mass_assignment.jinja +0 -0
- /strix/{prompts → skills}/vulnerabilities/open_redirect.jinja +0 -0
- /strix/{prompts → skills}/vulnerabilities/path_traversal_lfi_rfi.jinja +0 -0
- /strix/{prompts → skills}/vulnerabilities/race_conditions.jinja +0 -0
- /strix/{prompts → skills}/vulnerabilities/rce.jinja +0 -0
- /strix/{prompts → skills}/vulnerabilities/sql_injection.jinja +0 -0
- /strix/{prompts → skills}/vulnerabilities/ssrf.jinja +0 -0
- /strix/{prompts → skills}/vulnerabilities/subdomain_takeover.jinja +0 -0
- /strix/{prompts → skills}/vulnerabilities/xss.jinja +0 -0
- /strix/{prompts → skills}/vulnerabilities/xxe.jinja +0 -0
- {strix_agent-0.4.0.dist-info → strix_agent-0.6.2.dist-info}/entry_points.txt +0 -0
- {strix_agent-0.4.0.dist-info → strix_agent-0.6.2.dist-info/licenses}/LICENSE +0 -0
|
@@ -104,8 +104,30 @@
|
|
|
104
104
|
# Create a file
|
|
105
105
|
<function=str_replace_editor>
|
|
106
106
|
<parameter=command>create</parameter>
|
|
107
|
-
<parameter=path>/home/user/project/
|
|
108
|
-
<parameter=file_text
|
|
107
|
+
<parameter=path>/home/user/project/exploit.py</parameter>
|
|
108
|
+
<parameter=file_text>#!/usr/bin/env python3
|
|
109
|
+
"""SQL Injection exploit for Acme Corp login endpoint."""
|
|
110
|
+
|
|
111
|
+
import requests
|
|
112
|
+
import sys
|
|
113
|
+
|
|
114
|
+
TARGET = "https://app.acme-corp.com/api/v1/auth/login"
|
|
115
|
+
|
|
116
|
+
def exploit(username: str) -> dict:
|
|
117
|
+
payload = {
|
|
118
|
+
"username": f"{username}'--",
|
|
119
|
+
"password": "anything"
|
|
120
|
+
}
|
|
121
|
+
response = requests.post(TARGET, json=payload, timeout=10)
|
|
122
|
+
return response.json()
|
|
123
|
+
|
|
124
|
+
if __name__ == "__main__":
|
|
125
|
+
if len(sys.argv) < 2:
|
|
126
|
+
print(f"Usage: {sys.argv[0]} <username>")
|
|
127
|
+
sys.exit(1)
|
|
128
|
+
|
|
129
|
+
result = exploit(sys.argv[1])
|
|
130
|
+
print(f"Result: {result}")</parameter>
|
|
109
131
|
</function>
|
|
110
132
|
|
|
111
133
|
# Replace text in file
|
|
@@ -121,7 +143,27 @@
|
|
|
121
143
|
<parameter=command>insert</parameter>
|
|
122
144
|
<parameter=path>/home/user/project/file.py</parameter>
|
|
123
145
|
<parameter=insert_line>10</parameter>
|
|
124
|
-
<parameter=new_str>
|
|
146
|
+
<parameter=new_str>def validate_input(user_input: str) -> bool:
|
|
147
|
+
"""Validate user input to prevent injection attacks."""
|
|
148
|
+
forbidden_chars = ["'", '"', ";", "--", "/*", "*/"]
|
|
149
|
+
for char in forbidden_chars:
|
|
150
|
+
if char in user_input:
|
|
151
|
+
return False
|
|
152
|
+
return True</parameter>
|
|
153
|
+
</function>
|
|
154
|
+
|
|
155
|
+
# Replace code block
|
|
156
|
+
<function=str_replace_editor>
|
|
157
|
+
<parameter=command>str_replace</parameter>
|
|
158
|
+
<parameter=path>/home/user/project/auth.py</parameter>
|
|
159
|
+
<parameter=old_str>def authenticate(username, password):
|
|
160
|
+
query = f"SELECT * FROM users WHERE username = '{username}'"
|
|
161
|
+
result = db.execute(query)
|
|
162
|
+
return result</parameter>
|
|
163
|
+
<parameter=new_str>def authenticate(username, password):
|
|
164
|
+
query = "SELECT * FROM users WHERE username = %s"
|
|
165
|
+
result = db.execute(query, (username,))
|
|
166
|
+
return result</parameter>
|
|
125
167
|
</function>
|
|
126
168
|
</examples>
|
|
127
169
|
</tool>
|
|
@@ -4,49 +4,40 @@ from strix.tools.registry import register_tool
|
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
def _validate_root_agent(agent_state: Any) -> dict[str, Any] | None:
|
|
7
|
-
if (
|
|
8
|
-
agent_state is not None
|
|
9
|
-
and hasattr(agent_state, "parent_id")
|
|
10
|
-
and agent_state.parent_id is not None
|
|
11
|
-
):
|
|
7
|
+
if agent_state and hasattr(agent_state, "parent_id") and agent_state.parent_id is not None:
|
|
12
8
|
return {
|
|
13
9
|
"success": False,
|
|
14
|
-
"
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
),
|
|
10
|
+
"error": "finish_scan_wrong_agent",
|
|
11
|
+
"message": "This tool can only be used by the root/main agent",
|
|
12
|
+
"suggestion": "If you are a subagent, use agent_finish from agents_graph tool instead",
|
|
18
13
|
}
|
|
19
14
|
return None
|
|
20
15
|
|
|
21
16
|
|
|
22
|
-
def _validate_content(content: str) -> dict[str, Any] | None:
|
|
23
|
-
if not content or not content.strip():
|
|
24
|
-
return {"success": False, "message": "Content cannot be empty"}
|
|
25
|
-
return None
|
|
26
|
-
|
|
27
|
-
|
|
28
17
|
def _check_active_agents(agent_state: Any = None) -> dict[str, Any] | None:
|
|
29
18
|
try:
|
|
30
19
|
from strix.tools.agents_graph.agents_graph_actions import _agent_graph
|
|
31
20
|
|
|
32
|
-
|
|
33
|
-
if agent_state and hasattr(agent_state, "agent_id"):
|
|
21
|
+
if agent_state and agent_state.agent_id:
|
|
34
22
|
current_agent_id = agent_state.agent_id
|
|
23
|
+
else:
|
|
24
|
+
return None
|
|
35
25
|
|
|
36
|
-
|
|
26
|
+
active_agents = []
|
|
37
27
|
stopping_agents = []
|
|
38
28
|
|
|
39
|
-
for agent_id, node in _agent_graph
|
|
29
|
+
for agent_id, node in _agent_graph["nodes"].items():
|
|
40
30
|
if agent_id == current_agent_id:
|
|
41
31
|
continue
|
|
42
32
|
|
|
43
|
-
status = node.get("status", "")
|
|
33
|
+
status = node.get("status", "unknown")
|
|
44
34
|
if status == "running":
|
|
45
|
-
|
|
35
|
+
active_agents.append(
|
|
46
36
|
{
|
|
47
37
|
"id": agent_id,
|
|
48
38
|
"name": node.get("name", "Unknown"),
|
|
49
|
-
"task": node.get("task", "
|
|
39
|
+
"task": node.get("task", "Unknown task")[:300],
|
|
40
|
+
"status": status,
|
|
50
41
|
}
|
|
51
42
|
)
|
|
52
43
|
elif status == "stopping":
|
|
@@ -54,121 +45,105 @@ def _check_active_agents(agent_state: Any = None) -> dict[str, Any] | None:
|
|
|
54
45
|
{
|
|
55
46
|
"id": agent_id,
|
|
56
47
|
"name": node.get("name", "Unknown"),
|
|
48
|
+
"task": node.get("task", "Unknown task")[:300],
|
|
49
|
+
"status": status,
|
|
57
50
|
}
|
|
58
51
|
)
|
|
59
52
|
|
|
60
|
-
if
|
|
61
|
-
|
|
53
|
+
if active_agents or stopping_agents:
|
|
54
|
+
response: dict[str, Any] = {
|
|
55
|
+
"success": False,
|
|
56
|
+
"error": "agents_still_active",
|
|
57
|
+
"message": "Cannot finish scan: agents are still active",
|
|
58
|
+
}
|
|
62
59
|
|
|
63
|
-
if
|
|
64
|
-
|
|
65
|
-
message_parts.extend(
|
|
66
|
-
[
|
|
67
|
-
f" - {agent['name']} ({agent['id']}): {agent['task']}"
|
|
68
|
-
for agent in running_agents
|
|
69
|
-
]
|
|
70
|
-
)
|
|
60
|
+
if active_agents:
|
|
61
|
+
response["active_agents"] = active_agents
|
|
71
62
|
|
|
72
63
|
if stopping_agents:
|
|
73
|
-
|
|
74
|
-
message_parts.extend(
|
|
75
|
-
[f" - {agent['name']} ({agent['id']})" for agent in stopping_agents]
|
|
76
|
-
)
|
|
64
|
+
response["stopping_agents"] = stopping_agents
|
|
77
65
|
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
"3. Use view_agent_graph to monitor agent status",
|
|
84
|
-
]
|
|
85
|
-
)
|
|
66
|
+
response["suggestions"] = [
|
|
67
|
+
"Use wait_for_message to wait for all agents to complete",
|
|
68
|
+
"Use send_message_to_agent if you need agents to complete immediately",
|
|
69
|
+
"Check agent_status to see current agent states",
|
|
70
|
+
]
|
|
86
71
|
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
"active_agents": {
|
|
91
|
-
"running": len(running_agents),
|
|
92
|
-
"stopping": len(stopping_agents),
|
|
93
|
-
"details": {
|
|
94
|
-
"running": running_agents,
|
|
95
|
-
"stopping": stopping_agents,
|
|
96
|
-
},
|
|
97
|
-
},
|
|
98
|
-
}
|
|
72
|
+
response["total_active"] = len(active_agents) + len(stopping_agents)
|
|
73
|
+
|
|
74
|
+
return response
|
|
99
75
|
|
|
100
76
|
except ImportError:
|
|
77
|
+
pass
|
|
78
|
+
except Exception:
|
|
101
79
|
import logging
|
|
102
80
|
|
|
103
|
-
logging.
|
|
81
|
+
logging.exception("Error checking active agents")
|
|
104
82
|
|
|
105
83
|
return None
|
|
106
84
|
|
|
107
85
|
|
|
108
|
-
|
|
86
|
+
@register_tool(sandbox_execution=False)
|
|
87
|
+
def finish_scan(
|
|
88
|
+
executive_summary: str,
|
|
89
|
+
methodology: str,
|
|
90
|
+
technical_analysis: str,
|
|
91
|
+
recommendations: str,
|
|
92
|
+
agent_state: Any = None,
|
|
93
|
+
) -> dict[str, Any]:
|
|
94
|
+
validation_error = _validate_root_agent(agent_state)
|
|
95
|
+
if validation_error:
|
|
96
|
+
return validation_error
|
|
97
|
+
|
|
98
|
+
active_agents_error = _check_active_agents(agent_state)
|
|
99
|
+
if active_agents_error:
|
|
100
|
+
return active_agents_error
|
|
101
|
+
|
|
102
|
+
validation_errors = []
|
|
103
|
+
|
|
104
|
+
if not executive_summary or not executive_summary.strip():
|
|
105
|
+
validation_errors.append("Executive summary cannot be empty")
|
|
106
|
+
if not methodology or not methodology.strip():
|
|
107
|
+
validation_errors.append("Methodology cannot be empty")
|
|
108
|
+
if not technical_analysis or not technical_analysis.strip():
|
|
109
|
+
validation_errors.append("Technical analysis cannot be empty")
|
|
110
|
+
if not recommendations or not recommendations.strip():
|
|
111
|
+
validation_errors.append("Recommendations cannot be empty")
|
|
112
|
+
|
|
113
|
+
if validation_errors:
|
|
114
|
+
return {"success": False, "message": "Validation failed", "errors": validation_errors}
|
|
115
|
+
|
|
109
116
|
try:
|
|
110
117
|
from strix.telemetry.tracer import get_global_tracer
|
|
111
118
|
|
|
112
119
|
tracer = get_global_tracer()
|
|
113
120
|
if tracer:
|
|
114
|
-
tracer.
|
|
115
|
-
|
|
116
|
-
|
|
121
|
+
tracer.update_scan_final_fields(
|
|
122
|
+
executive_summary=executive_summary.strip(),
|
|
123
|
+
methodology=methodology.strip(),
|
|
124
|
+
technical_analysis=technical_analysis.strip(),
|
|
125
|
+
recommendations=recommendations.strip(),
|
|
117
126
|
)
|
|
118
127
|
|
|
128
|
+
vulnerability_count = len(tracer.vulnerability_reports)
|
|
129
|
+
|
|
119
130
|
return {
|
|
120
131
|
"success": True,
|
|
121
132
|
"scan_completed": True,
|
|
122
|
-
"message": "Scan completed successfully"
|
|
123
|
-
|
|
124
|
-
else "Scan completed with errors",
|
|
125
|
-
"vulnerabilities_found": len(tracer.vulnerability_reports),
|
|
133
|
+
"message": "Scan completed successfully",
|
|
134
|
+
"vulnerabilities_found": vulnerability_count,
|
|
126
135
|
}
|
|
127
136
|
|
|
128
137
|
import logging
|
|
129
138
|
|
|
130
|
-
logging.warning("
|
|
131
|
-
|
|
132
|
-
return { # noqa: TRY300
|
|
133
|
-
"success": True,
|
|
134
|
-
"scan_completed": True,
|
|
135
|
-
"message": "Scan completed successfully (not persisted)"
|
|
136
|
-
if success
|
|
137
|
-
else "Scan completed with errors (not persisted)",
|
|
138
|
-
"warning": "Final result could not be persisted - tracer unavailable",
|
|
139
|
-
}
|
|
139
|
+
logging.warning("Current tracer not available - scan results not stored")
|
|
140
140
|
|
|
141
|
-
except ImportError:
|
|
141
|
+
except (ImportError, AttributeError) as e:
|
|
142
|
+
return {"success": False, "message": f"Failed to complete scan: {e!s}"}
|
|
143
|
+
else:
|
|
142
144
|
return {
|
|
143
145
|
"success": True,
|
|
144
146
|
"scan_completed": True,
|
|
145
|
-
"message": "Scan completed
|
|
146
|
-
|
|
147
|
-
else "Scan completed with errors (not persisted)",
|
|
148
|
-
"warning": "Final result could not be persisted - tracer module unavailable",
|
|
147
|
+
"message": "Scan completed (not persisted)",
|
|
148
|
+
"warning": "Results could not be persisted - tracer unavailable",
|
|
149
149
|
}
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
@register_tool(sandbox_execution=False)
|
|
153
|
-
def finish_scan(
|
|
154
|
-
content: str,
|
|
155
|
-
success: bool = True,
|
|
156
|
-
agent_state: Any = None,
|
|
157
|
-
) -> dict[str, Any]:
|
|
158
|
-
try:
|
|
159
|
-
validation_error = _validate_root_agent(agent_state)
|
|
160
|
-
if validation_error:
|
|
161
|
-
return validation_error
|
|
162
|
-
|
|
163
|
-
validation_error = _validate_content(content)
|
|
164
|
-
if validation_error:
|
|
165
|
-
return validation_error
|
|
166
|
-
|
|
167
|
-
active_agents_error = _check_active_agents(agent_state)
|
|
168
|
-
if active_agents_error:
|
|
169
|
-
return active_agents_error
|
|
170
|
-
|
|
171
|
-
return _finalize_with_tracer(content, success)
|
|
172
|
-
|
|
173
|
-
except (ValueError, TypeError, KeyError) as e:
|
|
174
|
-
return {"success": False, "message": f"Failed to complete scan: {e!s}"}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
<tools>
|
|
2
2
|
<tool name="finish_scan">
|
|
3
|
-
<description>Complete the
|
|
3
|
+
<description>Complete the security scan by providing the final assessment fields as full penetration test report.
|
|
4
4
|
|
|
5
5
|
IMPORTANT: This tool can ONLY be used by the root/main agent.
|
|
6
6
|
Subagents must use agent_finish from agents_graph tool instead.
|
|
@@ -8,11 +8,20 @@ Subagents must use agent_finish from agents_graph tool instead.
|
|
|
8
8
|
IMPORTANT: This tool will NOT allow finishing if any agents are still running or stopping.
|
|
9
9
|
You must wait for all agents to complete before using this tool.
|
|
10
10
|
|
|
11
|
-
This tool
|
|
12
|
-
-
|
|
13
|
-
-
|
|
14
|
-
-
|
|
15
|
-
-
|
|
11
|
+
This tool directly updates the scan report data:
|
|
12
|
+
- executive_summary
|
|
13
|
+
- methodology
|
|
14
|
+
- technical_analysis
|
|
15
|
+
- recommendations
|
|
16
|
+
|
|
17
|
+
All fields are REQUIRED and map directly to the final report.
|
|
18
|
+
|
|
19
|
+
This must be the last tool called in the scan. It will:
|
|
20
|
+
1. Verify you are the root agent
|
|
21
|
+
2. Check all subagents have completed
|
|
22
|
+
3. Update the scan with your provided fields
|
|
23
|
+
4. Mark the scan as completed
|
|
24
|
+
5. Stop agent execution
|
|
16
25
|
|
|
17
26
|
Use this tool when:
|
|
18
27
|
- You are the main/root agent conducting the security assessment
|
|
@@ -23,23 +32,121 @@ Use this tool when:
|
|
|
23
32
|
IMPORTANT: Calling this tool multiple times will OVERWRITE any previous scan report.
|
|
24
33
|
Make sure you include ALL findings and details in a single comprehensive report.
|
|
25
34
|
|
|
26
|
-
If agents are still running,
|
|
35
|
+
If agents are still running, the tool will:
|
|
27
36
|
- Show you which agents are still active
|
|
28
37
|
- Suggest using wait_for_message to wait for completion
|
|
29
38
|
- Suggest messaging agents if immediate completion is needed
|
|
30
39
|
|
|
31
|
-
|
|
32
|
-
|
|
40
|
+
NOTE: Make sure the vulnerabilities found were reported with create_vulnerability_report tool, otherwise they will not be tracked and you will not be rewarded.
|
|
41
|
+
But make sure to not report the same vulnerability multiple times.
|
|
42
|
+
|
|
43
|
+
Professional, customer-facing penetration test report rules (PDF-ready):
|
|
44
|
+
- Do NOT include internal or system details: never mention local/absolute paths (e.g., "/workspace"), internal tools, agents, orchestrators, sandboxes, models, system prompts/instructions, connection/tooling issues, or tester environment details.
|
|
45
|
+
- Tone and style: formal, objective, third-person, concise. No internal checklists or engineering runbooks. Content must read as a polished client deliverable.
|
|
46
|
+
- Structure across fields should align to standard pentest reports:
|
|
47
|
+
- Executive summary: business impact, risk posture, notable criticals, remediation theme.
|
|
48
|
+
- Methodology: industry-standard methods (e.g., OWASP, OSSTMM, NIST), scope, constraints—no internal execution notes.
|
|
49
|
+
- Technical analysis: consolidated findings overview referencing created vulnerability reports; avoid raw logs.
|
|
50
|
+
- Recommendations: prioritized, actionable, aligned to risk and best practices.
|
|
51
|
+
</description>
|
|
33
52
|
<parameters>
|
|
34
|
-
<parameter name="
|
|
35
|
-
<description>
|
|
53
|
+
<parameter name="executive_summary" type="string" required="true">
|
|
54
|
+
<description>High-level summary for executives: key findings, overall security posture, critical risks, business impact</description>
|
|
36
55
|
</parameter>
|
|
37
|
-
<parameter name="
|
|
38
|
-
<description>
|
|
56
|
+
<parameter name="methodology" type="string" required="true">
|
|
57
|
+
<description>Testing methodology: approach, tools used, scope, techniques employed</description>
|
|
58
|
+
</parameter>
|
|
59
|
+
<parameter name="technical_analysis" type="string" required="true">
|
|
60
|
+
<description>Detailed technical findings and security assessment results over the scan</description>
|
|
61
|
+
</parameter>
|
|
62
|
+
<parameter name="recommendations" type="string" required="true">
|
|
63
|
+
<description>Actionable security recommendations and remediation priorities</description>
|
|
39
64
|
</parameter>
|
|
40
65
|
</parameters>
|
|
41
66
|
<returns type="Dict[str, Any]">
|
|
42
|
-
<description>Response containing success status and completion message. If agents are still running, returns details about active agents and suggested actions.</description>
|
|
67
|
+
<description>Response containing success status, vulnerability count, and completion message. If agents are still running, returns details about active agents and suggested actions.</description>
|
|
43
68
|
</returns>
|
|
69
|
+
<examples>
|
|
70
|
+
|
|
71
|
+
<function=finish_scan>
|
|
72
|
+
<parameter=executive_summary>Executive summary
|
|
73
|
+
An external penetration test of the Acme Customer Portal and associated API identified multiple security weaknesses that, if exploited, could result in unauthorized access to customer data, cross-tenant exposure, and access to internal network resources.
|
|
74
|
+
|
|
75
|
+
Overall risk posture: Elevated.
|
|
76
|
+
|
|
77
|
+
Key outcomes
|
|
78
|
+
- Confirmed server-side request forgery (SSRF) in a URL preview capability that enables the application to initiate outbound requests to attacker-controlled destinations and internal network ranges.
|
|
79
|
+
- Identified broken access control patterns in business-critical workflows that can enable cross-tenant data access (tenant isolation failures).
|
|
80
|
+
- Observed session and authorization hardening gaps that materially increase risk when combined with other weaknesses.
|
|
81
|
+
|
|
82
|
+
Business impact
|
|
83
|
+
- Increased likelihood of sensitive data exposure across customers/tenants, including invoices, orders, and account information.
|
|
84
|
+
- Increased risk of internal service exposure through server-side outbound request functionality (including link-local and private network destinations).
|
|
85
|
+
- Increased potential for account compromise and administrative abuse if tokens are stolen or misused.
|
|
86
|
+
|
|
87
|
+
Remediation theme
|
|
88
|
+
Prioritize eliminating SSRF pathways and centralizing authorization enforcement (deny-by-default). Follow with session hardening and monitoring improvements, then validate with a focused retest.</parameter>
|
|
89
|
+
<parameter=methodology>Methodology
|
|
90
|
+
The assessment followed industry-standard penetration testing practices aligned to OWASP Web Security Testing Guide (WSTG) concepts and common web/API security testing methodology.
|
|
91
|
+
|
|
92
|
+
Engagement details
|
|
93
|
+
- Assessment type: External penetration test (black-box with limited gray-box context)
|
|
94
|
+
- Target environment: Production-equivalent staging
|
|
95
|
+
|
|
96
|
+
Scope (in-scope assets)
|
|
97
|
+
- Web application: https://app.acme-corp.com
|
|
98
|
+
- API base: https://app.acme-corp.com/api/v1/
|
|
99
|
+
|
|
100
|
+
High-level testing activities
|
|
101
|
+
- Reconnaissance and attack-surface mapping (routes, parameters, workflows)
|
|
102
|
+
- Authentication and session management review (token handling, session lifetime, sensitive actions)
|
|
103
|
+
- Authorization and tenant-isolation testing (object access and privilege boundaries)
|
|
104
|
+
- Input handling and server-side request testing (URL fetchers, imports, previews, callbacks)
|
|
105
|
+
- File handling and content rendering review (uploads, previews, unsafe content types)
|
|
106
|
+
- Configuration review (transport security, security headers, caching behavior, error handling)
|
|
107
|
+
|
|
108
|
+
Evidence handling and validation standard
|
|
109
|
+
Only validated issues with reproducible impact were treated as findings. Each finding was documented with clear reproduction steps and sufficient evidence to support remediation and verification testing.</parameter>
|
|
110
|
+
<parameter=technical_analysis>Technical analysis
|
|
111
|
+
This section provides a consolidated view of the confirmed findings and observed risk patterns. Detailed reproduction steps and evidence are documented in the individual vulnerability reports.
|
|
112
|
+
|
|
113
|
+
Severity model
|
|
114
|
+
Severity reflects a combination of exploitability and potential impact to confidentiality, integrity, and availability, considering realistic attacker capabilities.
|
|
115
|
+
|
|
116
|
+
Confirmed findings (high level)
|
|
117
|
+
1) Server-side request forgery (SSRF) in URL preview (Critical)
|
|
118
|
+
The application fetches user-supplied URLs server-side to generate previews. Validation controls were insufficient to prevent access to internal and link-local destinations. This creates a pathway to internal network enumeration and potential access to sensitive internal services. Redirect and DNS/normalization bypass risk must be assumed unless controls are comprehensive and applied on every request hop.
|
|
119
|
+
|
|
120
|
+
2) Broken tenant isolation in order/invoice workflows (High)
|
|
121
|
+
Multiple endpoints accepted object identifiers without consistently enforcing tenant ownership. This is indicative of broken function- and object-level authorization checks. In practice, this can enable cross-tenant access to business-critical resources (viewing or modifying data outside the attacker’s tenant boundary).
|
|
122
|
+
|
|
123
|
+
3) Administrative action hardening gaps (Medium)
|
|
124
|
+
Several sensitive actions lacked defense-in-depth controls (e.g., re-authentication for high-risk actions, consistent authorization checks across related endpoints, and protections against session misuse). While not all behaviors were immediately exploitable in isolation, they increase the likelihood and blast radius of account compromise when chained with other vulnerabilities.
|
|
125
|
+
|
|
126
|
+
4) Unsafe file preview/content handling patterns (Medium)
|
|
127
|
+
File preview and rendering behaviors can create exposure to script execution or content-type confusion if unsafe formats are rendered inline. Controls should be consistent: strong content-type validation, forced download where appropriate, and hardening against active content.
|
|
128
|
+
|
|
129
|
+
Systemic themes and root causes
|
|
130
|
+
- Authorization enforcement appears distributed and inconsistent across endpoints instead of centralized and testable.
|
|
131
|
+
- Outbound request functionality lacks a robust, deny-by-default policy for destination validation.
|
|
132
|
+
- Hardening controls (session lifetime, sensitive-action controls, logging) are applied unevenly, increasing the likelihood of successful attack chains.</parameter>
|
|
133
|
+
<parameter=recommendations>Recommendations
|
|
134
|
+
Priority 0
|
|
135
|
+
- Eliminate SSRF by implementing a strict destination allowlist and deny-by-default policy for outbound requests. Block private, loopback, and link-local ranges (IPv4 and IPv6) after DNS resolution. Re-validate on every redirect hop. Apply URL parsing/normalization safeguards against ambiguous encodings and unusual IP notations.
|
|
136
|
+
- Apply network egress controls so the application runtime cannot reach sensitive internal ranges or link-local services. Route necessary outbound requests through a policy-enforcing egress proxy with logging.
|
|
137
|
+
|
|
138
|
+
Priority 1
|
|
139
|
+
- Centralize authorization enforcement for all object access and administrative actions. Implement consistent tenant-ownership checks for every read/write path involving orders, invoices, and account resources. Adopt deny-by-default authorization middleware/policies.
|
|
140
|
+
- Add regression tests for authorization decisions, including cross-tenant negative cases and privilege-boundary testing for administrative endpoints.
|
|
141
|
+
- Harden session management: secure cookie attributes, session rotation after authentication and privilege change events, reduced session lifetime for privileged contexts, and consistent CSRF protections for state-changing actions.
|
|
142
|
+
|
|
143
|
+
Priority 2
|
|
144
|
+
- Harden file handling and preview behaviors: strict content-type allowlists, forced download for active formats, safe rendering pipelines, and scanning/sanitization where applicable.
|
|
145
|
+
- Improve monitoring and detection: alert on high-risk events such as repeated authorization failures, anomalous outbound fetch attempts, sensitive administrative actions, and unusual access patterns to business-critical resources.
|
|
146
|
+
|
|
147
|
+
Follow-up validation
|
|
148
|
+
- Conduct a targeted retest after remediation to confirm SSRF controls, tenant isolation enforcement, and session hardening, and to ensure no bypasses exist via redirects, DNS rebinding, or encoding edge cases.</parameter>
|
|
149
|
+
</function>
|
|
150
|
+
</examples>
|
|
44
151
|
</tool>
|
|
45
152
|
</tools>
|
|
@@ -11,7 +11,6 @@ _notes_storage: dict[str, dict[str, Any]] = {}
|
|
|
11
11
|
def _filter_notes(
|
|
12
12
|
category: str | None = None,
|
|
13
13
|
tags: list[str] | None = None,
|
|
14
|
-
priority: str | None = None,
|
|
15
14
|
search_query: str | None = None,
|
|
16
15
|
) -> list[dict[str, Any]]:
|
|
17
16
|
filtered_notes = []
|
|
@@ -20,9 +19,6 @@ def _filter_notes(
|
|
|
20
19
|
if category and note.get("category") != category:
|
|
21
20
|
continue
|
|
22
21
|
|
|
23
|
-
if priority and note.get("priority") != priority:
|
|
24
|
-
continue
|
|
25
|
-
|
|
26
22
|
if tags:
|
|
27
23
|
note_tags = note.get("tags", [])
|
|
28
24
|
if not any(tag in note_tags for tag in tags):
|
|
@@ -43,13 +39,12 @@ def _filter_notes(
|
|
|
43
39
|
return filtered_notes
|
|
44
40
|
|
|
45
41
|
|
|
46
|
-
@register_tool
|
|
42
|
+
@register_tool(sandbox_execution=False)
|
|
47
43
|
def create_note(
|
|
48
44
|
title: str,
|
|
49
45
|
content: str,
|
|
50
46
|
category: str = "general",
|
|
51
47
|
tags: list[str] | None = None,
|
|
52
|
-
priority: str = "normal",
|
|
53
48
|
) -> dict[str, Any]:
|
|
54
49
|
try:
|
|
55
50
|
if not title or not title.strip():
|
|
@@ -58,7 +53,7 @@ def create_note(
|
|
|
58
53
|
if not content or not content.strip():
|
|
59
54
|
return {"success": False, "error": "Content cannot be empty", "note_id": None}
|
|
60
55
|
|
|
61
|
-
valid_categories = ["general", "findings", "methodology", "
|
|
56
|
+
valid_categories = ["general", "findings", "methodology", "questions", "plan"]
|
|
62
57
|
if category not in valid_categories:
|
|
63
58
|
return {
|
|
64
59
|
"success": False,
|
|
@@ -66,14 +61,6 @@ def create_note(
|
|
|
66
61
|
"note_id": None,
|
|
67
62
|
}
|
|
68
63
|
|
|
69
|
-
valid_priorities = ["low", "normal", "high", "urgent"]
|
|
70
|
-
if priority not in valid_priorities:
|
|
71
|
-
return {
|
|
72
|
-
"success": False,
|
|
73
|
-
"error": f"Invalid priority. Must be one of: {', '.join(valid_priorities)}",
|
|
74
|
-
"note_id": None,
|
|
75
|
-
}
|
|
76
|
-
|
|
77
64
|
note_id = str(uuid.uuid4())[:5]
|
|
78
65
|
timestamp = datetime.now(UTC).isoformat()
|
|
79
66
|
|
|
@@ -82,7 +69,6 @@ def create_note(
|
|
|
82
69
|
"content": content.strip(),
|
|
83
70
|
"category": category,
|
|
84
71
|
"tags": tags or [],
|
|
85
|
-
"priority": priority,
|
|
86
72
|
"created_at": timestamp,
|
|
87
73
|
"updated_at": timestamp,
|
|
88
74
|
}
|
|
@@ -99,17 +85,14 @@ def create_note(
|
|
|
99
85
|
}
|
|
100
86
|
|
|
101
87
|
|
|
102
|
-
@register_tool
|
|
88
|
+
@register_tool(sandbox_execution=False)
|
|
103
89
|
def list_notes(
|
|
104
90
|
category: str | None = None,
|
|
105
91
|
tags: list[str] | None = None,
|
|
106
|
-
priority: str | None = None,
|
|
107
92
|
search: str | None = None,
|
|
108
93
|
) -> dict[str, Any]:
|
|
109
94
|
try:
|
|
110
|
-
filtered_notes = _filter_notes(
|
|
111
|
-
category=category, tags=tags, priority=priority, search_query=search
|
|
112
|
-
)
|
|
95
|
+
filtered_notes = _filter_notes(category=category, tags=tags, search_query=search)
|
|
113
96
|
|
|
114
97
|
return {
|
|
115
98
|
"success": True,
|
|
@@ -126,13 +109,12 @@ def list_notes(
|
|
|
126
109
|
}
|
|
127
110
|
|
|
128
111
|
|
|
129
|
-
@register_tool
|
|
112
|
+
@register_tool(sandbox_execution=False)
|
|
130
113
|
def update_note(
|
|
131
114
|
note_id: str,
|
|
132
115
|
title: str | None = None,
|
|
133
116
|
content: str | None = None,
|
|
134
117
|
tags: list[str] | None = None,
|
|
135
|
-
priority: str | None = None,
|
|
136
118
|
) -> dict[str, Any]:
|
|
137
119
|
try:
|
|
138
120
|
if note_id not in _notes_storage:
|
|
@@ -153,15 +135,6 @@ def update_note(
|
|
|
153
135
|
if tags is not None:
|
|
154
136
|
note["tags"] = tags
|
|
155
137
|
|
|
156
|
-
if priority is not None:
|
|
157
|
-
valid_priorities = ["low", "normal", "high", "urgent"]
|
|
158
|
-
if priority not in valid_priorities:
|
|
159
|
-
return {
|
|
160
|
-
"success": False,
|
|
161
|
-
"error": f"Invalid priority. Must be one of: {', '.join(valid_priorities)}",
|
|
162
|
-
}
|
|
163
|
-
note["priority"] = priority
|
|
164
|
-
|
|
165
138
|
note["updated_at"] = datetime.now(UTC).isoformat()
|
|
166
139
|
|
|
167
140
|
return {
|
|
@@ -173,7 +146,7 @@ def update_note(
|
|
|
173
146
|
return {"success": False, "error": f"Failed to update note: {e}"}
|
|
174
147
|
|
|
175
148
|
|
|
176
|
-
@register_tool
|
|
149
|
+
@register_tool(sandbox_execution=False)
|
|
177
150
|
def delete_note(note_id: str) -> dict[str, Any]:
|
|
178
151
|
try:
|
|
179
152
|
if note_id not in _notes_storage:
|