strix-agent 0.4.0__py3-none-any.whl → 0.6.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. strix/agents/StrixAgent/strix_agent.py +3 -3
  2. strix/agents/StrixAgent/system_prompt.jinja +30 -26
  3. strix/agents/base_agent.py +159 -75
  4. strix/agents/state.py +5 -2
  5. strix/config/__init__.py +12 -0
  6. strix/config/config.py +172 -0
  7. strix/interface/assets/tui_styles.tcss +195 -230
  8. strix/interface/cli.py +16 -41
  9. strix/interface/main.py +151 -74
  10. strix/interface/streaming_parser.py +119 -0
  11. strix/interface/tool_components/__init__.py +4 -0
  12. strix/interface/tool_components/agent_message_renderer.py +190 -0
  13. strix/interface/tool_components/agents_graph_renderer.py +54 -38
  14. strix/interface/tool_components/base_renderer.py +68 -36
  15. strix/interface/tool_components/browser_renderer.py +106 -91
  16. strix/interface/tool_components/file_edit_renderer.py +117 -36
  17. strix/interface/tool_components/finish_renderer.py +43 -10
  18. strix/interface/tool_components/notes_renderer.py +63 -38
  19. strix/interface/tool_components/proxy_renderer.py +133 -92
  20. strix/interface/tool_components/python_renderer.py +121 -8
  21. strix/interface/tool_components/registry.py +19 -12
  22. strix/interface/tool_components/reporting_renderer.py +196 -28
  23. strix/interface/tool_components/scan_info_renderer.py +22 -19
  24. strix/interface/tool_components/terminal_renderer.py +270 -90
  25. strix/interface/tool_components/thinking_renderer.py +8 -6
  26. strix/interface/tool_components/todo_renderer.py +225 -0
  27. strix/interface/tool_components/user_message_renderer.py +26 -19
  28. strix/interface/tool_components/web_search_renderer.py +7 -6
  29. strix/interface/tui.py +907 -262
  30. strix/interface/utils.py +236 -4
  31. strix/llm/__init__.py +6 -2
  32. strix/llm/config.py +8 -5
  33. strix/llm/dedupe.py +217 -0
  34. strix/llm/llm.py +209 -356
  35. strix/llm/memory_compressor.py +6 -5
  36. strix/llm/utils.py +17 -8
  37. strix/runtime/__init__.py +12 -3
  38. strix/runtime/docker_runtime.py +121 -202
  39. strix/runtime/tool_server.py +55 -95
  40. strix/skills/README.md +64 -0
  41. strix/skills/__init__.py +110 -0
  42. strix/{prompts → skills}/frameworks/nextjs.jinja +26 -0
  43. strix/skills/scan_modes/deep.jinja +145 -0
  44. strix/skills/scan_modes/quick.jinja +63 -0
  45. strix/skills/scan_modes/standard.jinja +91 -0
  46. strix/telemetry/README.md +38 -0
  47. strix/telemetry/__init__.py +7 -1
  48. strix/telemetry/posthog.py +137 -0
  49. strix/telemetry/tracer.py +194 -54
  50. strix/tools/__init__.py +11 -4
  51. strix/tools/agents_graph/agents_graph_actions.py +20 -21
  52. strix/tools/agents_graph/agents_graph_actions_schema.xml +8 -8
  53. strix/tools/browser/browser_actions.py +10 -6
  54. strix/tools/browser/browser_actions_schema.xml +6 -1
  55. strix/tools/browser/browser_instance.py +96 -48
  56. strix/tools/browser/tab_manager.py +121 -102
  57. strix/tools/context.py +12 -0
  58. strix/tools/executor.py +63 -4
  59. strix/tools/file_edit/file_edit_actions.py +6 -3
  60. strix/tools/file_edit/file_edit_actions_schema.xml +45 -3
  61. strix/tools/finish/finish_actions.py +80 -105
  62. strix/tools/finish/finish_actions_schema.xml +121 -14
  63. strix/tools/notes/notes_actions.py +6 -33
  64. strix/tools/notes/notes_actions_schema.xml +50 -46
  65. strix/tools/proxy/proxy_actions.py +14 -2
  66. strix/tools/proxy/proxy_actions_schema.xml +0 -1
  67. strix/tools/proxy/proxy_manager.py +28 -16
  68. strix/tools/python/python_actions.py +2 -2
  69. strix/tools/python/python_actions_schema.xml +9 -1
  70. strix/tools/python/python_instance.py +39 -37
  71. strix/tools/python/python_manager.py +43 -31
  72. strix/tools/registry.py +73 -12
  73. strix/tools/reporting/reporting_actions.py +218 -31
  74. strix/tools/reporting/reporting_actions_schema.xml +256 -8
  75. strix/tools/terminal/terminal_actions.py +2 -2
  76. strix/tools/terminal/terminal_actions_schema.xml +6 -0
  77. strix/tools/terminal/terminal_manager.py +41 -30
  78. strix/tools/thinking/thinking_actions_schema.xml +27 -25
  79. strix/tools/todo/__init__.py +18 -0
  80. strix/tools/todo/todo_actions.py +568 -0
  81. strix/tools/todo/todo_actions_schema.xml +225 -0
  82. strix/utils/__init__.py +0 -0
  83. strix/utils/resource_paths.py +13 -0
  84. {strix_agent-0.4.0.dist-info → strix_agent-0.6.2.dist-info}/METADATA +90 -65
  85. strix_agent-0.6.2.dist-info/RECORD +134 -0
  86. {strix_agent-0.4.0.dist-info → strix_agent-0.6.2.dist-info}/WHEEL +1 -1
  87. strix/llm/request_queue.py +0 -87
  88. strix/prompts/README.md +0 -64
  89. strix/prompts/__init__.py +0 -109
  90. strix_agent-0.4.0.dist-info/RECORD +0 -118
  91. /strix/{prompts → skills}/cloud/.gitkeep +0 -0
  92. /strix/{prompts → skills}/coordination/root_agent.jinja +0 -0
  93. /strix/{prompts → skills}/custom/.gitkeep +0 -0
  94. /strix/{prompts → skills}/frameworks/fastapi.jinja +0 -0
  95. /strix/{prompts → skills}/protocols/graphql.jinja +0 -0
  96. /strix/{prompts → skills}/reconnaissance/.gitkeep +0 -0
  97. /strix/{prompts → skills}/technologies/firebase_firestore.jinja +0 -0
  98. /strix/{prompts → skills}/technologies/supabase.jinja +0 -0
  99. /strix/{prompts → skills}/vulnerabilities/authentication_jwt.jinja +0 -0
  100. /strix/{prompts → skills}/vulnerabilities/broken_function_level_authorization.jinja +0 -0
  101. /strix/{prompts → skills}/vulnerabilities/business_logic.jinja +0 -0
  102. /strix/{prompts → skills}/vulnerabilities/csrf.jinja +0 -0
  103. /strix/{prompts → skills}/vulnerabilities/idor.jinja +0 -0
  104. /strix/{prompts → skills}/vulnerabilities/information_disclosure.jinja +0 -0
  105. /strix/{prompts → skills}/vulnerabilities/insecure_file_uploads.jinja +0 -0
  106. /strix/{prompts → skills}/vulnerabilities/mass_assignment.jinja +0 -0
  107. /strix/{prompts → skills}/vulnerabilities/open_redirect.jinja +0 -0
  108. /strix/{prompts → skills}/vulnerabilities/path_traversal_lfi_rfi.jinja +0 -0
  109. /strix/{prompts → skills}/vulnerabilities/race_conditions.jinja +0 -0
  110. /strix/{prompts → skills}/vulnerabilities/rce.jinja +0 -0
  111. /strix/{prompts → skills}/vulnerabilities/sql_injection.jinja +0 -0
  112. /strix/{prompts → skills}/vulnerabilities/ssrf.jinja +0 -0
  113. /strix/{prompts → skills}/vulnerabilities/subdomain_takeover.jinja +0 -0
  114. /strix/{prompts → skills}/vulnerabilities/xss.jinja +0 -0
  115. /strix/{prompts → skills}/vulnerabilities/xxe.jinja +0 -0
  116. {strix_agent-0.4.0.dist-info → strix_agent-0.6.2.dist-info}/entry_points.txt +0 -0
  117. {strix_agent-0.4.0.dist-info → strix_agent-0.6.2.dist-info/licenses}/LICENSE +0 -0
@@ -104,8 +104,30 @@
104
104
  # Create a file
105
105
  <function=str_replace_editor>
106
106
  <parameter=command>create</parameter>
107
- <parameter=path>/home/user/project/new_file.py</parameter>
108
- <parameter=file_text>print("Hello World")</parameter>
107
+ <parameter=path>/home/user/project/exploit.py</parameter>
108
+ <parameter=file_text>#!/usr/bin/env python3
109
+ """SQL Injection exploit for Acme Corp login endpoint."""
110
+
111
+ import requests
112
+ import sys
113
+
114
+ TARGET = "https://app.acme-corp.com/api/v1/auth/login"
115
+
116
+ def exploit(username: str) -> dict:
117
+ payload = {
118
+ "username": f"{username}'--",
119
+ "password": "anything"
120
+ }
121
+ response = requests.post(TARGET, json=payload, timeout=10)
122
+ return response.json()
123
+
124
+ if __name__ == "__main__":
125
+ if len(sys.argv) < 2:
126
+ print(f"Usage: {sys.argv[0]} <username>")
127
+ sys.exit(1)
128
+
129
+ result = exploit(sys.argv[1])
130
+ print(f"Result: {result}")</parameter>
109
131
  </function>
110
132
 
111
133
  # Replace text in file
@@ -121,7 +143,27 @@
121
143
  <parameter=command>insert</parameter>
122
144
  <parameter=path>/home/user/project/file.py</parameter>
123
145
  <parameter=insert_line>10</parameter>
124
- <parameter=new_str>print("Inserted line")</parameter>
146
+ <parameter=new_str>def validate_input(user_input: str) -> bool:
147
+ """Validate user input to prevent injection attacks."""
148
+ forbidden_chars = ["'", '"', ";", "--", "/*", "*/"]
149
+ for char in forbidden_chars:
150
+ if char in user_input:
151
+ return False
152
+ return True</parameter>
153
+ </function>
154
+
155
+ # Replace code block
156
+ <function=str_replace_editor>
157
+ <parameter=command>str_replace</parameter>
158
+ <parameter=path>/home/user/project/auth.py</parameter>
159
+ <parameter=old_str>def authenticate(username, password):
160
+ query = f"SELECT * FROM users WHERE username = '{username}'"
161
+ result = db.execute(query)
162
+ return result</parameter>
163
+ <parameter=new_str>def authenticate(username, password):
164
+ query = "SELECT * FROM users WHERE username = %s"
165
+ result = db.execute(query, (username,))
166
+ return result</parameter>
125
167
  </function>
126
168
  </examples>
127
169
  </tool>
@@ -4,49 +4,40 @@ from strix.tools.registry import register_tool
4
4
 
5
5
 
6
6
  def _validate_root_agent(agent_state: Any) -> dict[str, Any] | None:
7
- if (
8
- agent_state is not None
9
- and hasattr(agent_state, "parent_id")
10
- and agent_state.parent_id is not None
11
- ):
7
+ if agent_state and hasattr(agent_state, "parent_id") and agent_state.parent_id is not None:
12
8
  return {
13
9
  "success": False,
14
- "message": (
15
- "This tool can only be used by the root/main agent. "
16
- "Subagents must use agent_finish instead."
17
- ),
10
+ "error": "finish_scan_wrong_agent",
11
+ "message": "This tool can only be used by the root/main agent",
12
+ "suggestion": "If you are a subagent, use agent_finish from agents_graph tool instead",
18
13
  }
19
14
  return None
20
15
 
21
16
 
22
- def _validate_content(content: str) -> dict[str, Any] | None:
23
- if not content or not content.strip():
24
- return {"success": False, "message": "Content cannot be empty"}
25
- return None
26
-
27
-
28
17
  def _check_active_agents(agent_state: Any = None) -> dict[str, Any] | None:
29
18
  try:
30
19
  from strix.tools.agents_graph.agents_graph_actions import _agent_graph
31
20
 
32
- current_agent_id = None
33
- if agent_state and hasattr(agent_state, "agent_id"):
21
+ if agent_state and agent_state.agent_id:
34
22
  current_agent_id = agent_state.agent_id
23
+ else:
24
+ return None
35
25
 
36
- running_agents = []
26
+ active_agents = []
37
27
  stopping_agents = []
38
28
 
39
- for agent_id, node in _agent_graph.get("nodes", {}).items():
29
+ for agent_id, node in _agent_graph["nodes"].items():
40
30
  if agent_id == current_agent_id:
41
31
  continue
42
32
 
43
- status = node.get("status", "")
33
+ status = node.get("status", "unknown")
44
34
  if status == "running":
45
- running_agents.append(
35
+ active_agents.append(
46
36
  {
47
37
  "id": agent_id,
48
38
  "name": node.get("name", "Unknown"),
49
- "task": node.get("task", "No task description"),
39
+ "task": node.get("task", "Unknown task")[:300],
40
+ "status": status,
50
41
  }
51
42
  )
52
43
  elif status == "stopping":
@@ -54,121 +45,105 @@ def _check_active_agents(agent_state: Any = None) -> dict[str, Any] | None:
54
45
  {
55
46
  "id": agent_id,
56
47
  "name": node.get("name", "Unknown"),
48
+ "task": node.get("task", "Unknown task")[:300],
49
+ "status": status,
57
50
  }
58
51
  )
59
52
 
60
- if running_agents or stopping_agents:
61
- message_parts = ["Cannot finish scan while other agents are still active:"]
53
+ if active_agents or stopping_agents:
54
+ response: dict[str, Any] = {
55
+ "success": False,
56
+ "error": "agents_still_active",
57
+ "message": "Cannot finish scan: agents are still active",
58
+ }
62
59
 
63
- if running_agents:
64
- message_parts.append("\n\nRunning agents:")
65
- message_parts.extend(
66
- [
67
- f" - {agent['name']} ({agent['id']}): {agent['task']}"
68
- for agent in running_agents
69
- ]
70
- )
60
+ if active_agents:
61
+ response["active_agents"] = active_agents
71
62
 
72
63
  if stopping_agents:
73
- message_parts.append("\n\nStopping agents:")
74
- message_parts.extend(
75
- [f" - {agent['name']} ({agent['id']})" for agent in stopping_agents]
76
- )
64
+ response["stopping_agents"] = stopping_agents
77
65
 
78
- message_parts.extend(
79
- [
80
- "\n\nSuggested actions:",
81
- "1. Use wait_for_message to wait for all agents to complete",
82
- "2. Send messages to agents asking them to finish if urgent",
83
- "3. Use view_agent_graph to monitor agent status",
84
- ]
85
- )
66
+ response["suggestions"] = [
67
+ "Use wait_for_message to wait for all agents to complete",
68
+ "Use send_message_to_agent if you need agents to complete immediately",
69
+ "Check agent_status to see current agent states",
70
+ ]
86
71
 
87
- return {
88
- "success": False,
89
- "message": "\n".join(message_parts),
90
- "active_agents": {
91
- "running": len(running_agents),
92
- "stopping": len(stopping_agents),
93
- "details": {
94
- "running": running_agents,
95
- "stopping": stopping_agents,
96
- },
97
- },
98
- }
72
+ response["total_active"] = len(active_agents) + len(stopping_agents)
73
+
74
+ return response
99
75
 
100
76
  except ImportError:
77
+ pass
78
+ except Exception:
101
79
  import logging
102
80
 
103
- logging.warning("Could not check agent graph status - agents_graph module unavailable")
81
+ logging.exception("Error checking active agents")
104
82
 
105
83
  return None
106
84
 
107
85
 
108
- def _finalize_with_tracer(content: str, success: bool) -> dict[str, Any]:
86
+ @register_tool(sandbox_execution=False)
87
+ def finish_scan(
88
+ executive_summary: str,
89
+ methodology: str,
90
+ technical_analysis: str,
91
+ recommendations: str,
92
+ agent_state: Any = None,
93
+ ) -> dict[str, Any]:
94
+ validation_error = _validate_root_agent(agent_state)
95
+ if validation_error:
96
+ return validation_error
97
+
98
+ active_agents_error = _check_active_agents(agent_state)
99
+ if active_agents_error:
100
+ return active_agents_error
101
+
102
+ validation_errors = []
103
+
104
+ if not executive_summary or not executive_summary.strip():
105
+ validation_errors.append("Executive summary cannot be empty")
106
+ if not methodology or not methodology.strip():
107
+ validation_errors.append("Methodology cannot be empty")
108
+ if not technical_analysis or not technical_analysis.strip():
109
+ validation_errors.append("Technical analysis cannot be empty")
110
+ if not recommendations or not recommendations.strip():
111
+ validation_errors.append("Recommendations cannot be empty")
112
+
113
+ if validation_errors:
114
+ return {"success": False, "message": "Validation failed", "errors": validation_errors}
115
+
109
116
  try:
110
117
  from strix.telemetry.tracer import get_global_tracer
111
118
 
112
119
  tracer = get_global_tracer()
113
120
  if tracer:
114
- tracer.set_final_scan_result(
115
- content=content.strip(),
116
- success=success,
121
+ tracer.update_scan_final_fields(
122
+ executive_summary=executive_summary.strip(),
123
+ methodology=methodology.strip(),
124
+ technical_analysis=technical_analysis.strip(),
125
+ recommendations=recommendations.strip(),
117
126
  )
118
127
 
128
+ vulnerability_count = len(tracer.vulnerability_reports)
129
+
119
130
  return {
120
131
  "success": True,
121
132
  "scan_completed": True,
122
- "message": "Scan completed successfully"
123
- if success
124
- else "Scan completed with errors",
125
- "vulnerabilities_found": len(tracer.vulnerability_reports),
133
+ "message": "Scan completed successfully",
134
+ "vulnerabilities_found": vulnerability_count,
126
135
  }
127
136
 
128
137
  import logging
129
138
 
130
- logging.warning("Global tracer not available - final scan result not stored")
131
-
132
- return { # noqa: TRY300
133
- "success": True,
134
- "scan_completed": True,
135
- "message": "Scan completed successfully (not persisted)"
136
- if success
137
- else "Scan completed with errors (not persisted)",
138
- "warning": "Final result could not be persisted - tracer unavailable",
139
- }
139
+ logging.warning("Current tracer not available - scan results not stored")
140
140
 
141
- except ImportError:
141
+ except (ImportError, AttributeError) as e:
142
+ return {"success": False, "message": f"Failed to complete scan: {e!s}"}
143
+ else:
142
144
  return {
143
145
  "success": True,
144
146
  "scan_completed": True,
145
- "message": "Scan completed successfully (not persisted)"
146
- if success
147
- else "Scan completed with errors (not persisted)",
148
- "warning": "Final result could not be persisted - tracer module unavailable",
147
+ "message": "Scan completed (not persisted)",
148
+ "warning": "Results could not be persisted - tracer unavailable",
149
149
  }
150
-
151
-
152
- @register_tool(sandbox_execution=False)
153
- def finish_scan(
154
- content: str,
155
- success: bool = True,
156
- agent_state: Any = None,
157
- ) -> dict[str, Any]:
158
- try:
159
- validation_error = _validate_root_agent(agent_state)
160
- if validation_error:
161
- return validation_error
162
-
163
- validation_error = _validate_content(content)
164
- if validation_error:
165
- return validation_error
166
-
167
- active_agents_error = _check_active_agents(agent_state)
168
- if active_agents_error:
169
- return active_agents_error
170
-
171
- return _finalize_with_tracer(content, success)
172
-
173
- except (ValueError, TypeError, KeyError) as e:
174
- return {"success": False, "message": f"Failed to complete scan: {e!s}"}
@@ -1,6 +1,6 @@
1
1
  <tools>
2
2
  <tool name="finish_scan">
3
- <description>Complete the main security scan and generate final report.
3
+ <description>Complete the security scan by providing the final assessment fields as full penetration test report.
4
4
 
5
5
  IMPORTANT: This tool can ONLY be used by the root/main agent.
6
6
  Subagents must use agent_finish from agents_graph tool instead.
@@ -8,11 +8,20 @@ Subagents must use agent_finish from agents_graph tool instead.
8
8
  IMPORTANT: This tool will NOT allow finishing if any agents are still running or stopping.
9
9
  You must wait for all agents to complete before using this tool.
10
10
 
11
- This tool MUST be called at the very end of the security assessment to:
12
- - Verify all agents have completed their tasks
13
- - Generate the final comprehensive scan report
14
- - Mark the entire scan as completed
15
- - Stop the agent from running
11
+ This tool directly updates the scan report data:
12
+ - executive_summary
13
+ - methodology
14
+ - technical_analysis
15
+ - recommendations
16
+
17
+ All fields are REQUIRED and map directly to the final report.
18
+
19
+ This must be the last tool called in the scan. It will:
20
+ 1. Verify you are the root agent
21
+ 2. Check all subagents have completed
22
+ 3. Update the scan with your provided fields
23
+ 4. Mark the scan as completed
24
+ 5. Stop agent execution
16
25
 
17
26
  Use this tool when:
18
27
  - You are the main/root agent conducting the security assessment
@@ -23,23 +32,121 @@ Use this tool when:
23
32
  IMPORTANT: Calling this tool multiple times will OVERWRITE any previous scan report.
24
33
  Make sure you include ALL findings and details in a single comprehensive report.
25
34
 
26
- If agents are still running, this tool will:
35
+ If agents are still running, the tool will:
27
36
  - Show you which agents are still active
28
37
  - Suggest using wait_for_message to wait for completion
29
38
  - Suggest messaging agents if immediate completion is needed
30
39
 
31
- Put ALL details in the content - methodology, tools used, vulnerability counts, key findings, recommendations,
32
- compliance notes, risk assessments, next steps, etc. Be comprehensive and include everything relevant.</description>
40
+ NOTE: Make sure the vulnerabilities found were reported with create_vulnerability_report tool, otherwise they will not be tracked and you will not be rewarded.
41
+ But make sure to not report the same vulnerability multiple times.
42
+
43
+ Professional, customer-facing penetration test report rules (PDF-ready):
44
+ - Do NOT include internal or system details: never mention local/absolute paths (e.g., "/workspace"), internal tools, agents, orchestrators, sandboxes, models, system prompts/instructions, connection/tooling issues, or tester environment details.
45
+ - Tone and style: formal, objective, third-person, concise. No internal checklists or engineering runbooks. Content must read as a polished client deliverable.
46
+ - Structure across fields should align to standard pentest reports:
47
+ - Executive summary: business impact, risk posture, notable criticals, remediation theme.
48
+ - Methodology: industry-standard methods (e.g., OWASP, OSSTMM, NIST), scope, constraints—no internal execution notes.
49
+ - Technical analysis: consolidated findings overview referencing created vulnerability reports; avoid raw logs.
50
+ - Recommendations: prioritized, actionable, aligned to risk and best practices.
51
+ </description>
33
52
  <parameters>
34
- <parameter name="content" type="string" required="true">
35
- <description>Complete scan report including executive summary, methodology, findings, vulnerability details, recommendations, compliance notes, risk assessment, and conclusions. Include everything relevant to the assessment.</description>
53
+ <parameter name="executive_summary" type="string" required="true">
54
+ <description>High-level summary for executives: key findings, overall security posture, critical risks, business impact</description>
36
55
  </parameter>
37
- <parameter name="success" type="boolean" required="false">
38
- <description>Whether the scan completed successfully without critical errors</description>
56
+ <parameter name="methodology" type="string" required="true">
57
+ <description>Testing methodology: approach, tools used, scope, techniques employed</description>
58
+ </parameter>
59
+ <parameter name="technical_analysis" type="string" required="true">
60
+ <description>Detailed technical findings and security assessment results over the scan</description>
61
+ </parameter>
62
+ <parameter name="recommendations" type="string" required="true">
63
+ <description>Actionable security recommendations and remediation priorities</description>
39
64
  </parameter>
40
65
  </parameters>
41
66
  <returns type="Dict[str, Any]">
42
- <description>Response containing success status and completion message. If agents are still running, returns details about active agents and suggested actions.</description>
67
+ <description>Response containing success status, vulnerability count, and completion message. If agents are still running, returns details about active agents and suggested actions.</description>
43
68
  </returns>
69
+ <examples>
70
+
71
+ <function=finish_scan>
72
+ <parameter=executive_summary>Executive summary
73
+ An external penetration test of the Acme Customer Portal and associated API identified multiple security weaknesses that, if exploited, could result in unauthorized access to customer data, cross-tenant exposure, and access to internal network resources.
74
+
75
+ Overall risk posture: Elevated.
76
+
77
+ Key outcomes
78
+ - Confirmed server-side request forgery (SSRF) in a URL preview capability that enables the application to initiate outbound requests to attacker-controlled destinations and internal network ranges.
79
+ - Identified broken access control patterns in business-critical workflows that can enable cross-tenant data access (tenant isolation failures).
80
+ - Observed session and authorization hardening gaps that materially increase risk when combined with other weaknesses.
81
+
82
+ Business impact
83
+ - Increased likelihood of sensitive data exposure across customers/tenants, including invoices, orders, and account information.
84
+ - Increased risk of internal service exposure through server-side outbound request functionality (including link-local and private network destinations).
85
+ - Increased potential for account compromise and administrative abuse if tokens are stolen or misused.
86
+
87
+ Remediation theme
88
+ Prioritize eliminating SSRF pathways and centralizing authorization enforcement (deny-by-default). Follow with session hardening and monitoring improvements, then validate with a focused retest.</parameter>
89
+ <parameter=methodology>Methodology
90
+ The assessment followed industry-standard penetration testing practices aligned to OWASP Web Security Testing Guide (WSTG) concepts and common web/API security testing methodology.
91
+
92
+ Engagement details
93
+ - Assessment type: External penetration test (black-box with limited gray-box context)
94
+ - Target environment: Production-equivalent staging
95
+
96
+ Scope (in-scope assets)
97
+ - Web application: https://app.acme-corp.com
98
+ - API base: https://app.acme-corp.com/api/v1/
99
+
100
+ High-level testing activities
101
+ - Reconnaissance and attack-surface mapping (routes, parameters, workflows)
102
+ - Authentication and session management review (token handling, session lifetime, sensitive actions)
103
+ - Authorization and tenant-isolation testing (object access and privilege boundaries)
104
+ - Input handling and server-side request testing (URL fetchers, imports, previews, callbacks)
105
+ - File handling and content rendering review (uploads, previews, unsafe content types)
106
+ - Configuration review (transport security, security headers, caching behavior, error handling)
107
+
108
+ Evidence handling and validation standard
109
+ Only validated issues with reproducible impact were treated as findings. Each finding was documented with clear reproduction steps and sufficient evidence to support remediation and verification testing.</parameter>
110
+ <parameter=technical_analysis>Technical analysis
111
+ This section provides a consolidated view of the confirmed findings and observed risk patterns. Detailed reproduction steps and evidence are documented in the individual vulnerability reports.
112
+
113
+ Severity model
114
+ Severity reflects a combination of exploitability and potential impact to confidentiality, integrity, and availability, considering realistic attacker capabilities.
115
+
116
+ Confirmed findings (high level)
117
+ 1) Server-side request forgery (SSRF) in URL preview (Critical)
118
+ The application fetches user-supplied URLs server-side to generate previews. Validation controls were insufficient to prevent access to internal and link-local destinations. This creates a pathway to internal network enumeration and potential access to sensitive internal services. Redirect and DNS/normalization bypass risk must be assumed unless controls are comprehensive and applied on every request hop.
119
+
120
+ 2) Broken tenant isolation in order/invoice workflows (High)
121
+ Multiple endpoints accepted object identifiers without consistently enforcing tenant ownership. This is indicative of broken function- and object-level authorization checks. In practice, this can enable cross-tenant access to business-critical resources (viewing or modifying data outside the attacker’s tenant boundary).
122
+
123
+ 3) Administrative action hardening gaps (Medium)
124
+ Several sensitive actions lacked defense-in-depth controls (e.g., re-authentication for high-risk actions, consistent authorization checks across related endpoints, and protections against session misuse). While not all behaviors were immediately exploitable in isolation, they increase the likelihood and blast radius of account compromise when chained with other vulnerabilities.
125
+
126
+ 4) Unsafe file preview/content handling patterns (Medium)
127
+ File preview and rendering behaviors can create exposure to script execution or content-type confusion if unsafe formats are rendered inline. Controls should be consistent: strong content-type validation, forced download where appropriate, and hardening against active content.
128
+
129
+ Systemic themes and root causes
130
+ - Authorization enforcement appears distributed and inconsistent across endpoints instead of centralized and testable.
131
+ - Outbound request functionality lacks a robust, deny-by-default policy for destination validation.
132
+ - Hardening controls (session lifetime, sensitive-action controls, logging) are applied unevenly, increasing the likelihood of successful attack chains.</parameter>
133
+ <parameter=recommendations>Recommendations
134
+ Priority 0
135
+ - Eliminate SSRF by implementing a strict destination allowlist and deny-by-default policy for outbound requests. Block private, loopback, and link-local ranges (IPv4 and IPv6) after DNS resolution. Re-validate on every redirect hop. Apply URL parsing/normalization safeguards against ambiguous encodings and unusual IP notations.
136
+ - Apply network egress controls so the application runtime cannot reach sensitive internal ranges or link-local services. Route necessary outbound requests through a policy-enforcing egress proxy with logging.
137
+
138
+ Priority 1
139
+ - Centralize authorization enforcement for all object access and administrative actions. Implement consistent tenant-ownership checks for every read/write path involving orders, invoices, and account resources. Adopt deny-by-default authorization middleware/policies.
140
+ - Add regression tests for authorization decisions, including cross-tenant negative cases and privilege-boundary testing for administrative endpoints.
141
+ - Harden session management: secure cookie attributes, session rotation after authentication and privilege change events, reduced session lifetime for privileged contexts, and consistent CSRF protections for state-changing actions.
142
+
143
+ Priority 2
144
+ - Harden file handling and preview behaviors: strict content-type allowlists, forced download for active formats, safe rendering pipelines, and scanning/sanitization where applicable.
145
+ - Improve monitoring and detection: alert on high-risk events such as repeated authorization failures, anomalous outbound fetch attempts, sensitive administrative actions, and unusual access patterns to business-critical resources.
146
+
147
+ Follow-up validation
148
+ - Conduct a targeted retest after remediation to confirm SSRF controls, tenant isolation enforcement, and session hardening, and to ensure no bypasses exist via redirects, DNS rebinding, or encoding edge cases.</parameter>
149
+ </function>
150
+ </examples>
44
151
  </tool>
45
152
  </tools>
@@ -11,7 +11,6 @@ _notes_storage: dict[str, dict[str, Any]] = {}
11
11
  def _filter_notes(
12
12
  category: str | None = None,
13
13
  tags: list[str] | None = None,
14
- priority: str | None = None,
15
14
  search_query: str | None = None,
16
15
  ) -> list[dict[str, Any]]:
17
16
  filtered_notes = []
@@ -20,9 +19,6 @@ def _filter_notes(
20
19
  if category and note.get("category") != category:
21
20
  continue
22
21
 
23
- if priority and note.get("priority") != priority:
24
- continue
25
-
26
22
  if tags:
27
23
  note_tags = note.get("tags", [])
28
24
  if not any(tag in note_tags for tag in tags):
@@ -43,13 +39,12 @@ def _filter_notes(
43
39
  return filtered_notes
44
40
 
45
41
 
46
- @register_tool
42
+ @register_tool(sandbox_execution=False)
47
43
  def create_note(
48
44
  title: str,
49
45
  content: str,
50
46
  category: str = "general",
51
47
  tags: list[str] | None = None,
52
- priority: str = "normal",
53
48
  ) -> dict[str, Any]:
54
49
  try:
55
50
  if not title or not title.strip():
@@ -58,7 +53,7 @@ def create_note(
58
53
  if not content or not content.strip():
59
54
  return {"success": False, "error": "Content cannot be empty", "note_id": None}
60
55
 
61
- valid_categories = ["general", "findings", "methodology", "todo", "questions", "plan"]
56
+ valid_categories = ["general", "findings", "methodology", "questions", "plan"]
62
57
  if category not in valid_categories:
63
58
  return {
64
59
  "success": False,
@@ -66,14 +61,6 @@ def create_note(
66
61
  "note_id": None,
67
62
  }
68
63
 
69
- valid_priorities = ["low", "normal", "high", "urgent"]
70
- if priority not in valid_priorities:
71
- return {
72
- "success": False,
73
- "error": f"Invalid priority. Must be one of: {', '.join(valid_priorities)}",
74
- "note_id": None,
75
- }
76
-
77
64
  note_id = str(uuid.uuid4())[:5]
78
65
  timestamp = datetime.now(UTC).isoformat()
79
66
 
@@ -82,7 +69,6 @@ def create_note(
82
69
  "content": content.strip(),
83
70
  "category": category,
84
71
  "tags": tags or [],
85
- "priority": priority,
86
72
  "created_at": timestamp,
87
73
  "updated_at": timestamp,
88
74
  }
@@ -99,17 +85,14 @@ def create_note(
99
85
  }
100
86
 
101
87
 
102
- @register_tool
88
+ @register_tool(sandbox_execution=False)
103
89
  def list_notes(
104
90
  category: str | None = None,
105
91
  tags: list[str] | None = None,
106
- priority: str | None = None,
107
92
  search: str | None = None,
108
93
  ) -> dict[str, Any]:
109
94
  try:
110
- filtered_notes = _filter_notes(
111
- category=category, tags=tags, priority=priority, search_query=search
112
- )
95
+ filtered_notes = _filter_notes(category=category, tags=tags, search_query=search)
113
96
 
114
97
  return {
115
98
  "success": True,
@@ -126,13 +109,12 @@ def list_notes(
126
109
  }
127
110
 
128
111
 
129
- @register_tool
112
+ @register_tool(sandbox_execution=False)
130
113
  def update_note(
131
114
  note_id: str,
132
115
  title: str | None = None,
133
116
  content: str | None = None,
134
117
  tags: list[str] | None = None,
135
- priority: str | None = None,
136
118
  ) -> dict[str, Any]:
137
119
  try:
138
120
  if note_id not in _notes_storage:
@@ -153,15 +135,6 @@ def update_note(
153
135
  if tags is not None:
154
136
  note["tags"] = tags
155
137
 
156
- if priority is not None:
157
- valid_priorities = ["low", "normal", "high", "urgent"]
158
- if priority not in valid_priorities:
159
- return {
160
- "success": False,
161
- "error": f"Invalid priority. Must be one of: {', '.join(valid_priorities)}",
162
- }
163
- note["priority"] = priority
164
-
165
138
  note["updated_at"] = datetime.now(UTC).isoformat()
166
139
 
167
140
  return {
@@ -173,7 +146,7 @@ def update_note(
173
146
  return {"success": False, "error": f"Failed to update note: {e}"}
174
147
 
175
148
 
176
- @register_tool
149
+ @register_tool(sandbox_execution=False)
177
150
  def delete_note(note_id: str) -> dict[str, Any]:
178
151
  try:
179
152
  if note_id not in _notes_storage: