strix-agent 0.1.8__py3-none-any.whl → 0.1.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. strix/agents/StrixAgent/strix_agent.py +18 -6
  2. strix/agents/StrixAgent/system_prompt.jinja +29 -203
  3. strix/agents/base_agent.py +3 -0
  4. strix/cli/app.py +3 -1
  5. strix/cli/main.py +95 -8
  6. strix/cli/tool_components/terminal_renderer.py +92 -60
  7. strix/llm/config.py +1 -1
  8. strix/llm/llm.py +66 -2
  9. strix/llm/memory_compressor.py +1 -1
  10. strix/prompts/__init__.py +9 -13
  11. strix/prompts/vulnerabilities/authentication_jwt.jinja +7 -7
  12. strix/prompts/vulnerabilities/csrf.jinja +1 -1
  13. strix/prompts/vulnerabilities/idor.jinja +3 -3
  14. strix/prompts/vulnerabilities/rce.jinja +1 -1
  15. strix/prompts/vulnerabilities/sql_injection.jinja +3 -3
  16. strix/prompts/vulnerabilities/xss.jinja +3 -3
  17. strix/prompts/vulnerabilities/xxe.jinja +1 -1
  18. strix/runtime/docker_runtime.py +204 -160
  19. strix/runtime/runtime.py +3 -2
  20. strix/runtime/tool_server.py +136 -28
  21. strix/tools/agents_graph/agents_graph_actions.py +4 -10
  22. strix/tools/agents_graph/agents_graph_actions_schema.xml +18 -12
  23. strix/tools/argument_parser.py +2 -1
  24. strix/tools/executor.py +3 -0
  25. strix/tools/terminal/__init__.py +2 -2
  26. strix/tools/terminal/terminal_actions.py +22 -40
  27. strix/tools/terminal/terminal_actions_schema.xml +113 -84
  28. strix/tools/terminal/terminal_manager.py +83 -123
  29. strix/tools/terminal/terminal_session.py +447 -0
  30. {strix_agent-0.1.8.dist-info → strix_agent-0.1.10.dist-info}/METADATA +6 -4
  31. {strix_agent-0.1.8.dist-info → strix_agent-0.1.10.dist-info}/RECORD +34 -34
  32. strix/tools/terminal/terminal_instance.py +0 -231
  33. {strix_agent-0.1.8.dist-info → strix_agent-0.1.10.dist-info}/LICENSE +0 -0
  34. {strix_agent-0.1.8.dist-info → strix_agent-0.1.10.dist-info}/WHEEL +0 -0
  35. {strix_agent-0.1.8.dist-info → strix_agent-0.1.10.dist-info}/entry_points.txt +0 -0
@@ -1,7 +1,15 @@
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import asyncio
1
5
  import logging
2
6
  import os
7
+ import signal
8
+ import sys
9
+ from multiprocessing import Process, Queue
3
10
  from typing import Any
4
11
 
12
+ import uvicorn
5
13
  from fastapi import Depends, FastAPI, HTTPException, status
6
14
  from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
7
15
  from pydantic import BaseModel, ValidationError
@@ -11,20 +19,25 @@ SANDBOX_MODE = os.getenv("STRIX_SANDBOX_MODE", "false").lower() == "true"
11
19
  if not SANDBOX_MODE:
12
20
  raise RuntimeError("Tool server should only run in sandbox mode (STRIX_SANDBOX_MODE=true)")
13
21
 
14
- EXPECTED_TOKEN = os.getenv("STRIX_SANDBOX_TOKEN")
15
- if not EXPECTED_TOKEN:
16
- raise RuntimeError("STRIX_SANDBOX_TOKEN environment variable is required in sandbox mode")
22
+ parser = argparse.ArgumentParser(description="Start Strix tool server")
23
+ parser.add_argument("--token", required=True, help="Authentication token")
24
+ parser.add_argument("--host", default="0.0.0.0", help="Host to bind to") # nosec
25
+ parser.add_argument("--port", type=int, required=True, help="Port to bind to")
26
+
27
+ args = parser.parse_args()
28
+ EXPECTED_TOKEN = args.token
17
29
 
18
30
  app = FastAPI()
19
- logger = logging.getLogger(__name__)
20
31
  security = HTTPBearer()
21
32
 
22
33
  security_dependency = Depends(security)
23
34
 
35
+ agent_processes: dict[str, dict[str, Any]] = {}
36
+ agent_queues: dict[str, dict[str, Queue[Any]]] = {}
37
+
24
38
 
25
39
  def verify_token(credentials: HTTPAuthorizationCredentials) -> str:
26
40
  if not credentials or credentials.scheme != "Bearer":
27
- logger.warning("Authentication failed: Invalid or missing Bearer token scheme")
28
41
  raise HTTPException(
29
42
  status_code=status.HTTP_401_UNAUTHORIZED,
30
43
  detail="Invalid authentication scheme. Bearer token required.",
@@ -32,18 +45,17 @@ def verify_token(credentials: HTTPAuthorizationCredentials) -> str:
32
45
  )
33
46
 
34
47
  if credentials.credentials != EXPECTED_TOKEN:
35
- logger.warning("Authentication failed: Invalid token provided from remote host")
36
48
  raise HTTPException(
37
49
  status_code=status.HTTP_401_UNAUTHORIZED,
38
50
  detail="Invalid authentication token",
39
51
  headers={"WWW-Authenticate": "Bearer"},
40
52
  )
41
53
 
42
- logger.debug("Authentication successful for tool execution request")
43
54
  return credentials.credentials
44
55
 
45
56
 
46
57
  class ToolExecutionRequest(BaseModel):
58
+ agent_id: str
47
59
  tool_name: str
48
60
  kwargs: dict[str, Any]
49
61
 
@@ -53,45 +65,141 @@ class ToolExecutionResponse(BaseModel):
53
65
  error: str | None = None
54
66
 
55
67
 
68
+ def agent_worker(_agent_id: str, request_queue: Queue[Any], response_queue: Queue[Any]) -> None:
69
+ null_handler = logging.NullHandler()
70
+
71
+ root_logger = logging.getLogger()
72
+ root_logger.handlers = [null_handler]
73
+ root_logger.setLevel(logging.CRITICAL)
74
+
75
+ from strix.tools.argument_parser import ArgumentConversionError, convert_arguments
76
+ from strix.tools.registry import get_tool_by_name
77
+
78
+ while True:
79
+ try:
80
+ request = request_queue.get()
81
+
82
+ if request is None:
83
+ break
84
+
85
+ tool_name = request["tool_name"]
86
+ kwargs = request["kwargs"]
87
+
88
+ try:
89
+ tool_func = get_tool_by_name(tool_name)
90
+ if not tool_func:
91
+ response_queue.put({"error": f"Tool '{tool_name}' not found"})
92
+ continue
93
+
94
+ converted_kwargs = convert_arguments(tool_func, kwargs)
95
+ result = tool_func(**converted_kwargs)
96
+
97
+ response_queue.put({"result": result})
98
+
99
+ except (ArgumentConversionError, ValidationError) as e:
100
+ response_queue.put({"error": f"Invalid arguments: {e}"})
101
+ except (RuntimeError, ValueError, ImportError) as e:
102
+ response_queue.put({"error": f"Tool execution error: {e}"})
103
+
104
+ except (RuntimeError, ValueError, ImportError) as e:
105
+ response_queue.put({"error": f"Worker error: {e}"})
106
+
107
+
108
+ def ensure_agent_process(agent_id: str) -> tuple[Queue[Any], Queue[Any]]:
109
+ if agent_id not in agent_processes:
110
+ request_queue: Queue[Any] = Queue()
111
+ response_queue: Queue[Any] = Queue()
112
+
113
+ process = Process(
114
+ target=agent_worker, args=(agent_id, request_queue, response_queue), daemon=True
115
+ )
116
+ process.start()
117
+
118
+ agent_processes[agent_id] = {"process": process, "pid": process.pid}
119
+ agent_queues[agent_id] = {"request": request_queue, "response": response_queue}
120
+
121
+ return agent_queues[agent_id]["request"], agent_queues[agent_id]["response"]
122
+
123
+
56
124
  @app.post("/execute", response_model=ToolExecutionResponse)
57
125
  async def execute_tool(
58
126
  request: ToolExecutionRequest, credentials: HTTPAuthorizationCredentials = security_dependency
59
127
  ) -> ToolExecutionResponse:
60
128
  verify_token(credentials)
61
129
 
62
- from strix.tools.argument_parser import ArgumentConversionError, convert_arguments
63
- from strix.tools.registry import get_tool_by_name
130
+ request_queue, response_queue = ensure_agent_process(request.agent_id)
131
+
132
+ request_queue.put({"tool_name": request.tool_name, "kwargs": request.kwargs})
64
133
 
65
134
  try:
66
- tool_func = get_tool_by_name(request.tool_name)
67
- if not tool_func:
68
- return ToolExecutionResponse(error=f"Tool '{request.tool_name}' not found")
135
+ loop = asyncio.get_event_loop()
136
+ response = await loop.run_in_executor(None, response_queue.get)
69
137
 
70
- converted_kwargs = convert_arguments(tool_func, request.kwargs)
138
+ if "error" in response:
139
+ return ToolExecutionResponse(error=response["error"])
140
+ return ToolExecutionResponse(result=response.get("result"))
71
141
 
72
- result = tool_func(**converted_kwargs)
142
+ except (RuntimeError, ValueError, OSError) as e:
143
+ return ToolExecutionResponse(error=f"Worker error: {e}")
73
144
 
74
- return ToolExecutionResponse(result=result)
75
145
 
76
- except (ArgumentConversionError, ValidationError) as e:
77
- logger.warning("Invalid tool arguments: %s", e)
78
- return ToolExecutionResponse(error=f"Invalid arguments: {e}")
79
- except TypeError as e:
80
- logger.warning("Tool execution type error: %s", e)
81
- return ToolExecutionResponse(error=f"Tool execution error: {e}")
82
- except ValueError as e:
83
- logger.warning("Tool execution value error: %s", e)
84
- return ToolExecutionResponse(error=f"Tool execution error: {e}")
85
- except Exception:
86
- logger.exception("Unexpected error during tool execution")
87
- return ToolExecutionResponse(error="Internal server error")
146
+ @app.post("/register_agent")
147
+ async def register_agent(
148
+ agent_id: str, credentials: HTTPAuthorizationCredentials = security_dependency
149
+ ) -> dict[str, str]:
150
+ verify_token(credentials)
151
+
152
+ ensure_agent_process(agent_id)
153
+ return {"status": "registered", "agent_id": agent_id}
88
154
 
89
155
 
90
156
  @app.get("/health")
91
- async def health_check() -> dict[str, str]:
157
+ async def health_check() -> dict[str, Any]:
92
158
  return {
93
159
  "status": "healthy",
94
160
  "sandbox_mode": str(SANDBOX_MODE),
95
161
  "environment": "sandbox" if SANDBOX_MODE else "main",
96
162
  "auth_configured": "true" if EXPECTED_TOKEN else "false",
163
+ "active_agents": len(agent_processes),
164
+ "agents": list(agent_processes.keys()),
97
165
  }
166
+
167
+
168
+ def cleanup_all_agents() -> None:
169
+ for agent_id in list(agent_processes.keys()):
170
+ try:
171
+ agent_queues[agent_id]["request"].put(None)
172
+ process = agent_processes[agent_id]["process"]
173
+
174
+ process.join(timeout=1)
175
+
176
+ if process.is_alive():
177
+ process.terminate()
178
+ process.join(timeout=1)
179
+
180
+ if process.is_alive():
181
+ process.kill()
182
+
183
+ except (BrokenPipeError, EOFError, OSError):
184
+ pass
185
+ except (RuntimeError, ValueError) as e:
186
+ logging.getLogger(__name__).debug(f"Error during agent cleanup: {e}")
187
+
188
+
189
+ def signal_handler(_signum: int, _frame: Any) -> None:
190
+ signal.signal(signal.SIGPIPE, signal.SIG_IGN) if hasattr(signal, "SIGPIPE") else None
191
+ cleanup_all_agents()
192
+ sys.exit(0)
193
+
194
+
195
+ if hasattr(signal, "SIGPIPE"):
196
+ signal.signal(signal.SIGPIPE, signal.SIG_IGN)
197
+
198
+ signal.signal(signal.SIGTERM, signal_handler)
199
+ signal.signal(signal.SIGINT, signal_handler)
200
+
201
+ if __name__ == "__main__":
202
+ try:
203
+ uvicorn.run(app, host=args.host, port=args.port, log_level="info")
204
+ finally:
205
+ cleanup_all_agents()
@@ -57,6 +57,10 @@ def _run_agent_in_thread(
57
57
  - Work independently with your own approach
58
58
  - Use agent_finish when complete to report back to parent
59
59
  - You are a SPECIALIST for this specific task
60
+ - You share the same container as other agents but have your own tool server instance
61
+ - All agents share /workspace directory and proxy history for better collaboration
62
+ - You can see files created by other agents and proxy traffic from previous work
63
+ - Build upon previous work but focus on your specific delegated task
60
64
  </instructions>
61
65
  </agent_delegation>"""
62
66
 
@@ -192,16 +196,6 @@ def create_agent(
192
196
  if prompt_modules:
193
197
  module_list = [m.strip() for m in prompt_modules.split(",") if m.strip()]
194
198
 
195
- if "root_agent" in module_list:
196
- return {
197
- "success": False,
198
- "error": (
199
- "The 'root_agent' module is reserved for the main agent "
200
- "and cannot be used by sub-agents"
201
- ),
202
- "agent_id": None,
203
- }
204
-
205
199
  if len(module_list) > 3:
206
200
  return {
207
201
  "success": False,
@@ -59,7 +59,7 @@ Use this tool when:
59
59
  <tool name="create_agent">
60
60
  <description>Create and spawn a new agent to handle a specific subtask.
61
61
 
62
- MANDATORY REQUIREMENT: You MUST call view_agent_graph FIRST before creating any new agent to check if there is already an agent working on the same or similar task. Only create a new agent if no existing agent is handling the specific task.</description>
62
+ Only create a new agent if no existing agent is handling the specific task.</description>
63
63
  <details>The new agent inherits the parent's conversation history and context up to the point
64
64
  of creation, then continues with its assigned subtask. This enables decomposition
65
65
  of complex penetration testing tasks into specialized sub-agents.
@@ -67,12 +67,6 @@ MANDATORY REQUIREMENT: You MUST call view_agent_graph FIRST before creating any
67
67
  The agent runs asynchronously and independently, allowing the parent to continue
68
68
  immediately while the new agent executes its task in the background.
69
69
 
70
- CRITICAL: Before calling this tool, you MUST first use view_agent_graph to:
71
- - Examine all existing agents and their current tasks
72
- - Verify no agent is already working on the same or similar objective
73
- - Avoid duplication of effort and resource waste
74
- - Ensure efficient coordination across the multi-agent system
75
-
76
70
  If you as a parent agent don't absolutely have anything to do while your subagents are running, you can use wait_for_message tool. The subagent will continue to run in the background, and update you when it's done.
77
71
  </details>
78
72
  <parameters>
@@ -86,16 +80,13 @@ MANDATORY REQUIREMENT: You MUST call view_agent_graph FIRST before creating any
86
80
  <description>Whether the new agent should inherit parent's conversation history and context</description>
87
81
  </parameter>
88
82
  <parameter name="prompt_modules" type="string" required="false">
89
- <description>Comma-separated list of prompt modules to use for the agent. Most agents should have at least one module in order to be useful. {{DYNAMIC_MODULES_DESCRIPTION}}</description>
83
+ <description>Comma-separated list of prompt modules to use for the agent (MAXIMUM 3 modules allowed). Most agents should have at least one module in order to be useful. Agents should be highly specialized - use 1-3 related vulnerability modules only. {{DYNAMIC_MODULES_DESCRIPTION}}</description>
90
84
  </parameter>
91
85
  </parameters>
92
86
  <returns type="Dict[str, Any]">
93
87
  <description>Response containing: - agent_id: Unique identifier for the created agent - success: Whether the agent was created successfully - message: Status message - agent_info: Details about the created agent</description>
94
88
  </returns>
95
89
  <examples>
96
- # REQUIRED: First check agent graph before creating any new agent
97
- <function=view_agent_graph>
98
- </function>
99
90
  # REQUIRED: Check agent graph again before creating another agent
100
91
  <function=view_agent_graph>
101
92
  </function>
@@ -108,12 +99,27 @@ MANDATORY REQUIREMENT: You MUST call view_agent_graph FIRST before creating any
108
99
  <parameter=prompt_modules>sql_injection</parameter>
109
100
  </function>
110
101
 
111
- # Create specialized authentication testing agent with multiple modules (comma-separated)
112
102
  <function=create_agent>
113
103
  <parameter=task>Test authentication mechanisms, JWT implementation, and session management
114
104
  for security vulnerabilities and bypass techniques.</parameter>
115
105
  <parameter=name>Auth Specialist</parameter>
116
106
  <parameter=prompt_modules>authentication_jwt, business_logic</parameter>
107
+ </function>
108
+
109
+ # Example of single-module specialization (most focused)
110
+ <function=create_agent>
111
+ <parameter=task>Perform comprehensive XSS testing including reflected, stored, and DOM-based
112
+ variants across all identified input points.</parameter>
113
+ <parameter=name>XSS Specialist</parameter>
114
+ <parameter=prompt_modules>xss</parameter>
115
+ </function>
116
+
117
+ # Example of maximum 3 related modules (borderline acceptable)
118
+ <function=create_agent>
119
+ <parameter=task>Test for server-side vulnerabilities including SSRF, XXE, and potential
120
+ RCE vectors in file upload and XML processing endpoints.</parameter>
121
+ <parameter=name>Server-Side Attack Specialist</parameter>
122
+ <parameter=prompt_modules>ssrf, xxe, rce</parameter>
117
123
  </function>
118
124
  </examples>
119
125
  </tool>
@@ -1,6 +1,7 @@
1
1
  import contextlib
2
2
  import inspect
3
3
  import json
4
+ import types
4
5
  from collections.abc import Callable
5
6
  from typing import Any, Union, get_args, get_origin
6
7
 
@@ -48,7 +49,7 @@ def convert_arguments(func: Callable[..., Any], kwargs: dict[str, Any]) -> dict[
48
49
 
49
50
  def convert_string_to_type(value: str, param_type: Any) -> Any:
50
51
  origin = get_origin(param_type)
51
- if origin is Union or origin is type(str | None):
52
+ if origin is Union or isinstance(param_type, types.UnionType):
52
53
  args = get_args(param_type)
53
54
  for arg_type in args:
54
55
  if arg_type is not type(None):
strix/tools/executor.py CHANGED
@@ -49,7 +49,10 @@ async def _execute_tool_in_sandbox(tool_name: str, agent_state: Any, **kwargs: A
49
49
  server_url = await runtime.get_sandbox_url(agent_state.sandbox_id, tool_server_port)
50
50
  request_url = f"{server_url}/execute"
51
51
 
52
+ agent_id = getattr(agent_state, "agent_id", "unknown")
53
+
52
54
  request_data = {
55
+ "agent_id": agent_id,
53
56
  "tool_name": tool_name,
54
57
  "kwargs": kwargs,
55
58
  }
@@ -1,4 +1,4 @@
1
- from .terminal_actions import terminal_action
1
+ from .terminal_actions import terminal_execute
2
2
 
3
3
 
4
- __all__ = ["terminal_action"]
4
+ __all__ = ["terminal_execute"]
@@ -1,53 +1,35 @@
1
- from typing import Any, Literal
1
+ from typing import Any
2
2
 
3
3
  from strix.tools.registry import register_tool
4
4
 
5
5
  from .terminal_manager import get_terminal_manager
6
6
 
7
7
 
8
- TerminalAction = Literal["new_terminal", "send_input", "wait", "close"]
9
-
10
-
11
8
  @register_tool
12
- def terminal_action(
13
- action: TerminalAction,
14
- inputs: list[str] | None = None,
15
- time: float | None = None,
9
+ def terminal_execute(
10
+ command: str,
11
+ is_input: bool = False,
12
+ timeout: float | None = None,
16
13
  terminal_id: str | None = None,
14
+ no_enter: bool = False,
17
15
  ) -> dict[str, Any]:
18
- def _validate_inputs(action_name: str, inputs: list[str] | None) -> None:
19
- if not inputs:
20
- raise ValueError(f"inputs parameter is required for {action_name} action")
21
-
22
- def _validate_time(time_param: float | None) -> None:
23
- if time_param is None:
24
- raise ValueError("time parameter is required for wait action")
25
-
26
- def _validate_action(action_name: str) -> None:
27
- raise ValueError(f"Unknown action: {action_name}")
28
-
29
16
  manager = get_terminal_manager()
30
17
 
31
18
  try:
32
- match action:
33
- case "new_terminal":
34
- return manager.create_terminal(terminal_id, inputs)
35
-
36
- case "send_input":
37
- _validate_inputs(action, inputs)
38
- assert inputs is not None
39
- return manager.send_input(terminal_id, inputs)
40
-
41
- case "wait":
42
- _validate_time(time)
43
- assert time is not None
44
- return manager.wait_terminal(terminal_id, time)
45
-
46
- case "close":
47
- return manager.close_terminal(terminal_id)
48
-
49
- case _:
50
- _validate_action(action) # type: ignore[unreachable]
51
-
19
+ return manager.execute_command(
20
+ command=command,
21
+ is_input=is_input,
22
+ timeout=timeout,
23
+ terminal_id=terminal_id,
24
+ no_enter=no_enter,
25
+ )
52
26
  except (ValueError, RuntimeError) as e:
53
- return {"error": str(e), "terminal_id": terminal_id, "snapshot": "", "is_running": False}
27
+ return {
28
+ "error": str(e),
29
+ "command": command,
30
+ "terminal_id": terminal_id or "default",
31
+ "content": "",
32
+ "status": "error",
33
+ "exit_code": None,
34
+ "working_dir": None,
35
+ }