strix-agent 0.1.8__py3-none-any.whl → 0.1.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- strix/agents/StrixAgent/strix_agent.py +18 -6
- strix/agents/StrixAgent/system_prompt.jinja +29 -203
- strix/agents/base_agent.py +3 -0
- strix/cli/app.py +3 -1
- strix/cli/main.py +95 -8
- strix/cli/tool_components/terminal_renderer.py +92 -60
- strix/llm/config.py +1 -1
- strix/llm/llm.py +66 -2
- strix/llm/memory_compressor.py +1 -1
- strix/prompts/__init__.py +9 -13
- strix/prompts/vulnerabilities/authentication_jwt.jinja +7 -7
- strix/prompts/vulnerabilities/csrf.jinja +1 -1
- strix/prompts/vulnerabilities/idor.jinja +3 -3
- strix/prompts/vulnerabilities/rce.jinja +1 -1
- strix/prompts/vulnerabilities/sql_injection.jinja +3 -3
- strix/prompts/vulnerabilities/xss.jinja +3 -3
- strix/prompts/vulnerabilities/xxe.jinja +1 -1
- strix/runtime/docker_runtime.py +204 -160
- strix/runtime/runtime.py +3 -2
- strix/runtime/tool_server.py +136 -28
- strix/tools/agents_graph/agents_graph_actions.py +4 -10
- strix/tools/agents_graph/agents_graph_actions_schema.xml +18 -12
- strix/tools/argument_parser.py +2 -1
- strix/tools/executor.py +3 -0
- strix/tools/terminal/__init__.py +2 -2
- strix/tools/terminal/terminal_actions.py +22 -40
- strix/tools/terminal/terminal_actions_schema.xml +113 -84
- strix/tools/terminal/terminal_manager.py +83 -123
- strix/tools/terminal/terminal_session.py +447 -0
- {strix_agent-0.1.8.dist-info → strix_agent-0.1.10.dist-info}/METADATA +6 -4
- {strix_agent-0.1.8.dist-info → strix_agent-0.1.10.dist-info}/RECORD +34 -34
- strix/tools/terminal/terminal_instance.py +0 -231
- {strix_agent-0.1.8.dist-info → strix_agent-0.1.10.dist-info}/LICENSE +0 -0
- {strix_agent-0.1.8.dist-info → strix_agent-0.1.10.dist-info}/WHEEL +0 -0
- {strix_agent-0.1.8.dist-info → strix_agent-0.1.10.dist-info}/entry_points.txt +0 -0
strix/runtime/tool_server.py
CHANGED
@@ -1,7 +1,15 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import argparse
|
4
|
+
import asyncio
|
1
5
|
import logging
|
2
6
|
import os
|
7
|
+
import signal
|
8
|
+
import sys
|
9
|
+
from multiprocessing import Process, Queue
|
3
10
|
from typing import Any
|
4
11
|
|
12
|
+
import uvicorn
|
5
13
|
from fastapi import Depends, FastAPI, HTTPException, status
|
6
14
|
from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
|
7
15
|
from pydantic import BaseModel, ValidationError
|
@@ -11,20 +19,25 @@ SANDBOX_MODE = os.getenv("STRIX_SANDBOX_MODE", "false").lower() == "true"
|
|
11
19
|
if not SANDBOX_MODE:
|
12
20
|
raise RuntimeError("Tool server should only run in sandbox mode (STRIX_SANDBOX_MODE=true)")
|
13
21
|
|
14
|
-
|
15
|
-
|
16
|
-
|
22
|
+
parser = argparse.ArgumentParser(description="Start Strix tool server")
|
23
|
+
parser.add_argument("--token", required=True, help="Authentication token")
|
24
|
+
parser.add_argument("--host", default="0.0.0.0", help="Host to bind to") # nosec
|
25
|
+
parser.add_argument("--port", type=int, required=True, help="Port to bind to")
|
26
|
+
|
27
|
+
args = parser.parse_args()
|
28
|
+
EXPECTED_TOKEN = args.token
|
17
29
|
|
18
30
|
app = FastAPI()
|
19
|
-
logger = logging.getLogger(__name__)
|
20
31
|
security = HTTPBearer()
|
21
32
|
|
22
33
|
security_dependency = Depends(security)
|
23
34
|
|
35
|
+
agent_processes: dict[str, dict[str, Any]] = {}
|
36
|
+
agent_queues: dict[str, dict[str, Queue[Any]]] = {}
|
37
|
+
|
24
38
|
|
25
39
|
def verify_token(credentials: HTTPAuthorizationCredentials) -> str:
|
26
40
|
if not credentials or credentials.scheme != "Bearer":
|
27
|
-
logger.warning("Authentication failed: Invalid or missing Bearer token scheme")
|
28
41
|
raise HTTPException(
|
29
42
|
status_code=status.HTTP_401_UNAUTHORIZED,
|
30
43
|
detail="Invalid authentication scheme. Bearer token required.",
|
@@ -32,18 +45,17 @@ def verify_token(credentials: HTTPAuthorizationCredentials) -> str:
|
|
32
45
|
)
|
33
46
|
|
34
47
|
if credentials.credentials != EXPECTED_TOKEN:
|
35
|
-
logger.warning("Authentication failed: Invalid token provided from remote host")
|
36
48
|
raise HTTPException(
|
37
49
|
status_code=status.HTTP_401_UNAUTHORIZED,
|
38
50
|
detail="Invalid authentication token",
|
39
51
|
headers={"WWW-Authenticate": "Bearer"},
|
40
52
|
)
|
41
53
|
|
42
|
-
logger.debug("Authentication successful for tool execution request")
|
43
54
|
return credentials.credentials
|
44
55
|
|
45
56
|
|
46
57
|
class ToolExecutionRequest(BaseModel):
|
58
|
+
agent_id: str
|
47
59
|
tool_name: str
|
48
60
|
kwargs: dict[str, Any]
|
49
61
|
|
@@ -53,45 +65,141 @@ class ToolExecutionResponse(BaseModel):
|
|
53
65
|
error: str | None = None
|
54
66
|
|
55
67
|
|
68
|
+
def agent_worker(_agent_id: str, request_queue: Queue[Any], response_queue: Queue[Any]) -> None:
|
69
|
+
null_handler = logging.NullHandler()
|
70
|
+
|
71
|
+
root_logger = logging.getLogger()
|
72
|
+
root_logger.handlers = [null_handler]
|
73
|
+
root_logger.setLevel(logging.CRITICAL)
|
74
|
+
|
75
|
+
from strix.tools.argument_parser import ArgumentConversionError, convert_arguments
|
76
|
+
from strix.tools.registry import get_tool_by_name
|
77
|
+
|
78
|
+
while True:
|
79
|
+
try:
|
80
|
+
request = request_queue.get()
|
81
|
+
|
82
|
+
if request is None:
|
83
|
+
break
|
84
|
+
|
85
|
+
tool_name = request["tool_name"]
|
86
|
+
kwargs = request["kwargs"]
|
87
|
+
|
88
|
+
try:
|
89
|
+
tool_func = get_tool_by_name(tool_name)
|
90
|
+
if not tool_func:
|
91
|
+
response_queue.put({"error": f"Tool '{tool_name}' not found"})
|
92
|
+
continue
|
93
|
+
|
94
|
+
converted_kwargs = convert_arguments(tool_func, kwargs)
|
95
|
+
result = tool_func(**converted_kwargs)
|
96
|
+
|
97
|
+
response_queue.put({"result": result})
|
98
|
+
|
99
|
+
except (ArgumentConversionError, ValidationError) as e:
|
100
|
+
response_queue.put({"error": f"Invalid arguments: {e}"})
|
101
|
+
except (RuntimeError, ValueError, ImportError) as e:
|
102
|
+
response_queue.put({"error": f"Tool execution error: {e}"})
|
103
|
+
|
104
|
+
except (RuntimeError, ValueError, ImportError) as e:
|
105
|
+
response_queue.put({"error": f"Worker error: {e}"})
|
106
|
+
|
107
|
+
|
108
|
+
def ensure_agent_process(agent_id: str) -> tuple[Queue[Any], Queue[Any]]:
|
109
|
+
if agent_id not in agent_processes:
|
110
|
+
request_queue: Queue[Any] = Queue()
|
111
|
+
response_queue: Queue[Any] = Queue()
|
112
|
+
|
113
|
+
process = Process(
|
114
|
+
target=agent_worker, args=(agent_id, request_queue, response_queue), daemon=True
|
115
|
+
)
|
116
|
+
process.start()
|
117
|
+
|
118
|
+
agent_processes[agent_id] = {"process": process, "pid": process.pid}
|
119
|
+
agent_queues[agent_id] = {"request": request_queue, "response": response_queue}
|
120
|
+
|
121
|
+
return agent_queues[agent_id]["request"], agent_queues[agent_id]["response"]
|
122
|
+
|
123
|
+
|
56
124
|
@app.post("/execute", response_model=ToolExecutionResponse)
|
57
125
|
async def execute_tool(
|
58
126
|
request: ToolExecutionRequest, credentials: HTTPAuthorizationCredentials = security_dependency
|
59
127
|
) -> ToolExecutionResponse:
|
60
128
|
verify_token(credentials)
|
61
129
|
|
62
|
-
|
63
|
-
|
130
|
+
request_queue, response_queue = ensure_agent_process(request.agent_id)
|
131
|
+
|
132
|
+
request_queue.put({"tool_name": request.tool_name, "kwargs": request.kwargs})
|
64
133
|
|
65
134
|
try:
|
66
|
-
|
67
|
-
|
68
|
-
return ToolExecutionResponse(error=f"Tool '{request.tool_name}' not found")
|
135
|
+
loop = asyncio.get_event_loop()
|
136
|
+
response = await loop.run_in_executor(None, response_queue.get)
|
69
137
|
|
70
|
-
|
138
|
+
if "error" in response:
|
139
|
+
return ToolExecutionResponse(error=response["error"])
|
140
|
+
return ToolExecutionResponse(result=response.get("result"))
|
71
141
|
|
72
|
-
|
142
|
+
except (RuntimeError, ValueError, OSError) as e:
|
143
|
+
return ToolExecutionResponse(error=f"Worker error: {e}")
|
73
144
|
|
74
|
-
return ToolExecutionResponse(result=result)
|
75
145
|
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
return ToolExecutionResponse(error=f"Tool execution error: {e}")
|
85
|
-
except Exception:
|
86
|
-
logger.exception("Unexpected error during tool execution")
|
87
|
-
return ToolExecutionResponse(error="Internal server error")
|
146
|
+
@app.post("/register_agent")
|
147
|
+
async def register_agent(
|
148
|
+
agent_id: str, credentials: HTTPAuthorizationCredentials = security_dependency
|
149
|
+
) -> dict[str, str]:
|
150
|
+
verify_token(credentials)
|
151
|
+
|
152
|
+
ensure_agent_process(agent_id)
|
153
|
+
return {"status": "registered", "agent_id": agent_id}
|
88
154
|
|
89
155
|
|
90
156
|
@app.get("/health")
|
91
|
-
async def health_check() -> dict[str,
|
157
|
+
async def health_check() -> dict[str, Any]:
|
92
158
|
return {
|
93
159
|
"status": "healthy",
|
94
160
|
"sandbox_mode": str(SANDBOX_MODE),
|
95
161
|
"environment": "sandbox" if SANDBOX_MODE else "main",
|
96
162
|
"auth_configured": "true" if EXPECTED_TOKEN else "false",
|
163
|
+
"active_agents": len(agent_processes),
|
164
|
+
"agents": list(agent_processes.keys()),
|
97
165
|
}
|
166
|
+
|
167
|
+
|
168
|
+
def cleanup_all_agents() -> None:
|
169
|
+
for agent_id in list(agent_processes.keys()):
|
170
|
+
try:
|
171
|
+
agent_queues[agent_id]["request"].put(None)
|
172
|
+
process = agent_processes[agent_id]["process"]
|
173
|
+
|
174
|
+
process.join(timeout=1)
|
175
|
+
|
176
|
+
if process.is_alive():
|
177
|
+
process.terminate()
|
178
|
+
process.join(timeout=1)
|
179
|
+
|
180
|
+
if process.is_alive():
|
181
|
+
process.kill()
|
182
|
+
|
183
|
+
except (BrokenPipeError, EOFError, OSError):
|
184
|
+
pass
|
185
|
+
except (RuntimeError, ValueError) as e:
|
186
|
+
logging.getLogger(__name__).debug(f"Error during agent cleanup: {e}")
|
187
|
+
|
188
|
+
|
189
|
+
def signal_handler(_signum: int, _frame: Any) -> None:
|
190
|
+
signal.signal(signal.SIGPIPE, signal.SIG_IGN) if hasattr(signal, "SIGPIPE") else None
|
191
|
+
cleanup_all_agents()
|
192
|
+
sys.exit(0)
|
193
|
+
|
194
|
+
|
195
|
+
if hasattr(signal, "SIGPIPE"):
|
196
|
+
signal.signal(signal.SIGPIPE, signal.SIG_IGN)
|
197
|
+
|
198
|
+
signal.signal(signal.SIGTERM, signal_handler)
|
199
|
+
signal.signal(signal.SIGINT, signal_handler)
|
200
|
+
|
201
|
+
if __name__ == "__main__":
|
202
|
+
try:
|
203
|
+
uvicorn.run(app, host=args.host, port=args.port, log_level="info")
|
204
|
+
finally:
|
205
|
+
cleanup_all_agents()
|
@@ -57,6 +57,10 @@ def _run_agent_in_thread(
|
|
57
57
|
- Work independently with your own approach
|
58
58
|
- Use agent_finish when complete to report back to parent
|
59
59
|
- You are a SPECIALIST for this specific task
|
60
|
+
- You share the same container as other agents but have your own tool server instance
|
61
|
+
- All agents share /workspace directory and proxy history for better collaboration
|
62
|
+
- You can see files created by other agents and proxy traffic from previous work
|
63
|
+
- Build upon previous work but focus on your specific delegated task
|
60
64
|
</instructions>
|
61
65
|
</agent_delegation>"""
|
62
66
|
|
@@ -192,16 +196,6 @@ def create_agent(
|
|
192
196
|
if prompt_modules:
|
193
197
|
module_list = [m.strip() for m in prompt_modules.split(",") if m.strip()]
|
194
198
|
|
195
|
-
if "root_agent" in module_list:
|
196
|
-
return {
|
197
|
-
"success": False,
|
198
|
-
"error": (
|
199
|
-
"The 'root_agent' module is reserved for the main agent "
|
200
|
-
"and cannot be used by sub-agents"
|
201
|
-
),
|
202
|
-
"agent_id": None,
|
203
|
-
}
|
204
|
-
|
205
199
|
if len(module_list) > 3:
|
206
200
|
return {
|
207
201
|
"success": False,
|
@@ -59,7 +59,7 @@ Use this tool when:
|
|
59
59
|
<tool name="create_agent">
|
60
60
|
<description>Create and spawn a new agent to handle a specific subtask.
|
61
61
|
|
62
|
-
|
62
|
+
Only create a new agent if no existing agent is handling the specific task.</description>
|
63
63
|
<details>The new agent inherits the parent's conversation history and context up to the point
|
64
64
|
of creation, then continues with its assigned subtask. This enables decomposition
|
65
65
|
of complex penetration testing tasks into specialized sub-agents.
|
@@ -67,12 +67,6 @@ MANDATORY REQUIREMENT: You MUST call view_agent_graph FIRST before creating any
|
|
67
67
|
The agent runs asynchronously and independently, allowing the parent to continue
|
68
68
|
immediately while the new agent executes its task in the background.
|
69
69
|
|
70
|
-
CRITICAL: Before calling this tool, you MUST first use view_agent_graph to:
|
71
|
-
- Examine all existing agents and their current tasks
|
72
|
-
- Verify no agent is already working on the same or similar objective
|
73
|
-
- Avoid duplication of effort and resource waste
|
74
|
-
- Ensure efficient coordination across the multi-agent system
|
75
|
-
|
76
70
|
If you as a parent agent don't absolutely have anything to do while your subagents are running, you can use wait_for_message tool. The subagent will continue to run in the background, and update you when it's done.
|
77
71
|
</details>
|
78
72
|
<parameters>
|
@@ -86,16 +80,13 @@ MANDATORY REQUIREMENT: You MUST call view_agent_graph FIRST before creating any
|
|
86
80
|
<description>Whether the new agent should inherit parent's conversation history and context</description>
|
87
81
|
</parameter>
|
88
82
|
<parameter name="prompt_modules" type="string" required="false">
|
89
|
-
<description>Comma-separated list of prompt modules to use for the agent. Most agents should have at least one module in order to be useful. {{DYNAMIC_MODULES_DESCRIPTION}}</description>
|
83
|
+
<description>Comma-separated list of prompt modules to use for the agent (MAXIMUM 3 modules allowed). Most agents should have at least one module in order to be useful. Agents should be highly specialized - use 1-3 related vulnerability modules only. {{DYNAMIC_MODULES_DESCRIPTION}}</description>
|
90
84
|
</parameter>
|
91
85
|
</parameters>
|
92
86
|
<returns type="Dict[str, Any]">
|
93
87
|
<description>Response containing: - agent_id: Unique identifier for the created agent - success: Whether the agent was created successfully - message: Status message - agent_info: Details about the created agent</description>
|
94
88
|
</returns>
|
95
89
|
<examples>
|
96
|
-
# REQUIRED: First check agent graph before creating any new agent
|
97
|
-
<function=view_agent_graph>
|
98
|
-
</function>
|
99
90
|
# REQUIRED: Check agent graph again before creating another agent
|
100
91
|
<function=view_agent_graph>
|
101
92
|
</function>
|
@@ -108,12 +99,27 @@ MANDATORY REQUIREMENT: You MUST call view_agent_graph FIRST before creating any
|
|
108
99
|
<parameter=prompt_modules>sql_injection</parameter>
|
109
100
|
</function>
|
110
101
|
|
111
|
-
# Create specialized authentication testing agent with multiple modules (comma-separated)
|
112
102
|
<function=create_agent>
|
113
103
|
<parameter=task>Test authentication mechanisms, JWT implementation, and session management
|
114
104
|
for security vulnerabilities and bypass techniques.</parameter>
|
115
105
|
<parameter=name>Auth Specialist</parameter>
|
116
106
|
<parameter=prompt_modules>authentication_jwt, business_logic</parameter>
|
107
|
+
</function>
|
108
|
+
|
109
|
+
# Example of single-module specialization (most focused)
|
110
|
+
<function=create_agent>
|
111
|
+
<parameter=task>Perform comprehensive XSS testing including reflected, stored, and DOM-based
|
112
|
+
variants across all identified input points.</parameter>
|
113
|
+
<parameter=name>XSS Specialist</parameter>
|
114
|
+
<parameter=prompt_modules>xss</parameter>
|
115
|
+
</function>
|
116
|
+
|
117
|
+
# Example of maximum 3 related modules (borderline acceptable)
|
118
|
+
<function=create_agent>
|
119
|
+
<parameter=task>Test for server-side vulnerabilities including SSRF, XXE, and potential
|
120
|
+
RCE vectors in file upload and XML processing endpoints.</parameter>
|
121
|
+
<parameter=name>Server-Side Attack Specialist</parameter>
|
122
|
+
<parameter=prompt_modules>ssrf, xxe, rce</parameter>
|
117
123
|
</function>
|
118
124
|
</examples>
|
119
125
|
</tool>
|
strix/tools/argument_parser.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
import contextlib
|
2
2
|
import inspect
|
3
3
|
import json
|
4
|
+
import types
|
4
5
|
from collections.abc import Callable
|
5
6
|
from typing import Any, Union, get_args, get_origin
|
6
7
|
|
@@ -48,7 +49,7 @@ def convert_arguments(func: Callable[..., Any], kwargs: dict[str, Any]) -> dict[
|
|
48
49
|
|
49
50
|
def convert_string_to_type(value: str, param_type: Any) -> Any:
|
50
51
|
origin = get_origin(param_type)
|
51
|
-
if origin is Union or
|
52
|
+
if origin is Union or isinstance(param_type, types.UnionType):
|
52
53
|
args = get_args(param_type)
|
53
54
|
for arg_type in args:
|
54
55
|
if arg_type is not type(None):
|
strix/tools/executor.py
CHANGED
@@ -49,7 +49,10 @@ async def _execute_tool_in_sandbox(tool_name: str, agent_state: Any, **kwargs: A
|
|
49
49
|
server_url = await runtime.get_sandbox_url(agent_state.sandbox_id, tool_server_port)
|
50
50
|
request_url = f"{server_url}/execute"
|
51
51
|
|
52
|
+
agent_id = getattr(agent_state, "agent_id", "unknown")
|
53
|
+
|
52
54
|
request_data = {
|
55
|
+
"agent_id": agent_id,
|
53
56
|
"tool_name": tool_name,
|
54
57
|
"kwargs": kwargs,
|
55
58
|
}
|
strix/tools/terminal/__init__.py
CHANGED
@@ -1,53 +1,35 @@
|
|
1
|
-
from typing import Any
|
1
|
+
from typing import Any
|
2
2
|
|
3
3
|
from strix.tools.registry import register_tool
|
4
4
|
|
5
5
|
from .terminal_manager import get_terminal_manager
|
6
6
|
|
7
7
|
|
8
|
-
TerminalAction = Literal["new_terminal", "send_input", "wait", "close"]
|
9
|
-
|
10
|
-
|
11
8
|
@register_tool
|
12
|
-
def
|
13
|
-
|
14
|
-
|
15
|
-
|
9
|
+
def terminal_execute(
|
10
|
+
command: str,
|
11
|
+
is_input: bool = False,
|
12
|
+
timeout: float | None = None,
|
16
13
|
terminal_id: str | None = None,
|
14
|
+
no_enter: bool = False,
|
17
15
|
) -> dict[str, Any]:
|
18
|
-
def _validate_inputs(action_name: str, inputs: list[str] | None) -> None:
|
19
|
-
if not inputs:
|
20
|
-
raise ValueError(f"inputs parameter is required for {action_name} action")
|
21
|
-
|
22
|
-
def _validate_time(time_param: float | None) -> None:
|
23
|
-
if time_param is None:
|
24
|
-
raise ValueError("time parameter is required for wait action")
|
25
|
-
|
26
|
-
def _validate_action(action_name: str) -> None:
|
27
|
-
raise ValueError(f"Unknown action: {action_name}")
|
28
|
-
|
29
16
|
manager = get_terminal_manager()
|
30
17
|
|
31
18
|
try:
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
return manager.send_input(terminal_id, inputs)
|
40
|
-
|
41
|
-
case "wait":
|
42
|
-
_validate_time(time)
|
43
|
-
assert time is not None
|
44
|
-
return manager.wait_terminal(terminal_id, time)
|
45
|
-
|
46
|
-
case "close":
|
47
|
-
return manager.close_terminal(terminal_id)
|
48
|
-
|
49
|
-
case _:
|
50
|
-
_validate_action(action) # type: ignore[unreachable]
|
51
|
-
|
19
|
+
return manager.execute_command(
|
20
|
+
command=command,
|
21
|
+
is_input=is_input,
|
22
|
+
timeout=timeout,
|
23
|
+
terminal_id=terminal_id,
|
24
|
+
no_enter=no_enter,
|
25
|
+
)
|
52
26
|
except (ValueError, RuntimeError) as e:
|
53
|
-
return {
|
27
|
+
return {
|
28
|
+
"error": str(e),
|
29
|
+
"command": command,
|
30
|
+
"terminal_id": terminal_id or "default",
|
31
|
+
"content": "",
|
32
|
+
"status": "error",
|
33
|
+
"exit_code": None,
|
34
|
+
"working_dir": None,
|
35
|
+
}
|