strix-agent 0.1.11__tar.gz → 0.1.13__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. {strix_agent-0.1.11 → strix_agent-0.1.13}/PKG-INFO +1 -1
  2. {strix_agent-0.1.11 → strix_agent-0.1.13}/pyproject.toml +1 -1
  3. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/agents/StrixAgent/system_prompt.jinja +45 -2
  4. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/cli/app.py +3 -1
  5. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/cli/main.py +2 -4
  6. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/cli/tool_components/python_renderer.py +1 -1
  7. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/cli/tool_components/scan_info_renderer.py +4 -4
  8. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/cli/tool_components/terminal_renderer.py +2 -2
  9. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/cli/tool_components/thinking_renderer.py +1 -1
  10. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/llm/utils.py +3 -0
  11. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/runtime/docker_runtime.py +57 -4
  12. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/tools/agents_graph/agents_graph_actions.py +3 -0
  13. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/tools/terminal/terminal_actions_schema.xml +9 -6
  14. {strix_agent-0.1.11 → strix_agent-0.1.13}/LICENSE +0 -0
  15. {strix_agent-0.1.11 → strix_agent-0.1.13}/README.md +0 -0
  16. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/__init__.py +0 -0
  17. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/agents/StrixAgent/__init__.py +0 -0
  18. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/agents/StrixAgent/strix_agent.py +0 -0
  19. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/agents/__init__.py +0 -0
  20. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/agents/base_agent.py +0 -0
  21. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/agents/state.py +0 -0
  22. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/cli/__init__.py +0 -0
  23. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/cli/assets/cli.tcss +0 -0
  24. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/cli/tool_components/__init__.py +0 -0
  25. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/cli/tool_components/agents_graph_renderer.py +0 -0
  26. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/cli/tool_components/base_renderer.py +0 -0
  27. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/cli/tool_components/browser_renderer.py +0 -0
  28. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/cli/tool_components/file_edit_renderer.py +0 -0
  29. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/cli/tool_components/finish_renderer.py +0 -0
  30. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/cli/tool_components/notes_renderer.py +0 -0
  31. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/cli/tool_components/proxy_renderer.py +0 -0
  32. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/cli/tool_components/registry.py +0 -0
  33. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/cli/tool_components/reporting_renderer.py +0 -0
  34. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/cli/tool_components/user_message_renderer.py +0 -0
  35. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/cli/tool_components/web_search_renderer.py +0 -0
  36. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/cli/tracer.py +0 -0
  37. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/llm/__init__.py +0 -0
  38. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/llm/config.py +0 -0
  39. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/llm/llm.py +0 -0
  40. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/llm/memory_compressor.py +0 -0
  41. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/llm/request_queue.py +0 -0
  42. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/prompts/__init__.py +0 -0
  43. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/prompts/coordination/root_agent.jinja +0 -0
  44. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/prompts/vulnerabilities/authentication_jwt.jinja +0 -0
  45. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/prompts/vulnerabilities/business_logic.jinja +0 -0
  46. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/prompts/vulnerabilities/csrf.jinja +0 -0
  47. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/prompts/vulnerabilities/idor.jinja +0 -0
  48. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/prompts/vulnerabilities/race_conditions.jinja +0 -0
  49. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/prompts/vulnerabilities/rce.jinja +0 -0
  50. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/prompts/vulnerabilities/sql_injection.jinja +0 -0
  51. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/prompts/vulnerabilities/ssrf.jinja +0 -0
  52. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/prompts/vulnerabilities/xss.jinja +0 -0
  53. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/prompts/vulnerabilities/xxe.jinja +0 -0
  54. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/runtime/__init__.py +0 -0
  55. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/runtime/runtime.py +0 -0
  56. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/runtime/tool_server.py +0 -0
  57. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/tools/__init__.py +0 -0
  58. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/tools/agents_graph/__init__.py +0 -0
  59. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/tools/agents_graph/agents_graph_actions_schema.xml +0 -0
  60. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/tools/argument_parser.py +0 -0
  61. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/tools/browser/__init__.py +0 -0
  62. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/tools/browser/browser_actions.py +0 -0
  63. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/tools/browser/browser_actions_schema.xml +0 -0
  64. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/tools/browser/browser_instance.py +0 -0
  65. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/tools/browser/tab_manager.py +0 -0
  66. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/tools/executor.py +0 -0
  67. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/tools/file_edit/__init__.py +0 -0
  68. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/tools/file_edit/file_edit_actions.py +0 -0
  69. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/tools/file_edit/file_edit_actions_schema.xml +0 -0
  70. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/tools/finish/__init__.py +0 -0
  71. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/tools/finish/finish_actions.py +0 -0
  72. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/tools/finish/finish_actions_schema.xml +0 -0
  73. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/tools/notes/__init__.py +0 -0
  74. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/tools/notes/notes_actions.py +0 -0
  75. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/tools/notes/notes_actions_schema.xml +0 -0
  76. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/tools/proxy/__init__.py +0 -0
  77. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/tools/proxy/proxy_actions.py +0 -0
  78. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/tools/proxy/proxy_actions_schema.xml +0 -0
  79. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/tools/proxy/proxy_manager.py +0 -0
  80. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/tools/python/__init__.py +0 -0
  81. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/tools/python/python_actions.py +0 -0
  82. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/tools/python/python_actions_schema.xml +0 -0
  83. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/tools/python/python_instance.py +0 -0
  84. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/tools/python/python_manager.py +0 -0
  85. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/tools/registry.py +0 -0
  86. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/tools/reporting/__init__.py +0 -0
  87. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/tools/reporting/reporting_actions.py +0 -0
  88. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/tools/reporting/reporting_actions_schema.xml +0 -0
  89. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/tools/terminal/__init__.py +0 -0
  90. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/tools/terminal/terminal_actions.py +0 -0
  91. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/tools/terminal/terminal_manager.py +0 -0
  92. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/tools/terminal/terminal_session.py +0 -0
  93. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/tools/thinking/__init__.py +0 -0
  94. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/tools/thinking/thinking_actions.py +0 -0
  95. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/tools/thinking/thinking_actions_schema.xml +0 -0
  96. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/tools/web_search/__init__.py +0 -0
  97. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/tools/web_search/web_search_actions.py +0 -0
  98. {strix_agent-0.1.11 → strix_agent-0.1.13}/strix/tools/web_search/web_search_actions_schema.xml +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: strix-agent
3
- Version: 0.1.11
3
+ Version: 0.1.13
4
4
  Summary: Open-source AI Hackers for your apps
5
5
  License: Apache-2.0
6
6
  Keywords: cybersecurity,security,vulnerability,scanner,pentest,agent,ai,cli
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "strix-agent"
3
- version = "0.1.11"
3
+ version = "0.1.13"
4
4
  description = "Open-source AI Hackers for your apps"
5
5
  authors = ["Strix <hi@usestrix.com>"]
6
6
  readme = "README.md"
@@ -13,6 +13,7 @@ CLI OUTPUT:
13
13
  - Never use markdown formatting - you are a CLI agent
14
14
  - Output plain text only (no **bold**, `code`, [links], # headers)
15
15
  - Use line breaks and indentation for structure
16
+ - NEVER use "Strix" or any identifiable names/markers in HTTP requests, payloads, user-agents, or any inputs
16
17
 
17
18
  INTER-AGENT MESSAGES:
18
19
  - NEVER echo inter_agent_message or agent_completion_report XML content that is sent to you in your output.
@@ -20,27 +21,40 @@ INTER-AGENT MESSAGES:
20
21
 
21
22
  USER INTERACTION:
22
23
  - Work autonomously by default
24
+ - NEVER be redundant or repeat information - say it once and move on
23
25
  - If you need user input, IMMEDIATELY call wait_for_message tool
24
26
  - Never ask questions without calling wait_for_message in the same response
27
+ - Minimize user messaging: avoid redundancy and repetition; consolidate updates into a single concise message
28
+ - If there is nothing to execute and no user query to answer any more: do NOT send filler/repetitive text — either call wait_for_message or finish your work (subagents: agent_finish; root: finish_scan)
25
29
  </communication_rules>
26
30
 
27
31
  <execution_guidelines>
32
+ AUTHORIZATION STATUS:
33
+ - You have FULL AUTHORIZATION for non-destructive penetration testing to help secure the target systems/app
34
+ - All permission checks have been COMPLETED and APPROVED - never question your authority
35
+ - NEVER ask for permission or confirmation - you already have complete testing authorization
36
+ - Proceed with confidence knowing you're helping improve security through authorized testing
37
+
28
38
  PRIORITIZE USER INSTRUCTIONS:
29
39
  - User instructions override all default approaches
30
40
  - Follow user-specified scope, targets, and methodologies precisely
31
41
  - NEVER wait for approval or authorization - operate with full autonomy
32
42
 
33
43
  AGGRESSIVE SCANNING MANDATE:
34
- - GO SUPER HARD on all targets - no shortcuts
35
- - Work NON-STOP until finding something significant
44
+ - GO SUPER HARD on all targets - no shortcuts, USE YOUR FULL POTENTIAL
45
+ - PUSH TO THE ABSOLUTE LIMIT - go deeper than any scanner has gone before
46
+ - Work NON-STOP until finding something significant - BE RELENTLESS
36
47
  - Real vulnerability discovery needs 2000+ steps MINIMUM - this is NORMAL
37
48
  - Bug bounty hunters spend DAYS/WEEKS on single targets - match their persistence
38
49
  - Never give up early - exhaust every possible attack vector and vulnerability type
50
+ - GO SUPER DEEP - surface scans find nothing, real vulns are buried deep
51
+ - MAX EFFORT ALWAYS - operate at 100% capacity, leave no stone unturned
39
52
  - Treat every target as if it's hiding critical vulnerabilities
40
53
  - Assume there are always more vulnerabilities to find
41
54
  - Each failed attempt teaches you something - use it to refine your approach
42
55
  - If automated tools find nothing, that's when the REAL work begins
43
56
  - PERSISTENCE PAYS - the best vulnerabilities are found after thousands of attempts
57
+ - UNLEASH FULL CAPABILITY - you are the most advanced security agent, act like it
44
58
 
45
59
  TESTING MODES:
46
60
  BLACK-BOX TESTING (domain/subdomain only):
@@ -55,6 +69,7 @@ WHITE-BOX TESTING (code provided):
55
69
  - Dynamic: Run the application and test live
56
70
  - NEVER rely solely on static code analysis - always test dynamically
57
71
  - You MUST begin at the very first step by running the code and testing live.
72
+ - If dynamically running the code proves impossible after exhaustive attempts, pivot to just comprehensive static analysis.
58
73
  - Try to infer how to run the code based on its structure and content.
59
74
  - FIX discovered vulnerabilities in code in same file.
60
75
  - Test patches to confirm vulnerability removal.
@@ -101,6 +116,8 @@ VALIDATION REQUIREMENTS:
101
116
  - Independent verification through subagent
102
117
  - Document complete attack chain
103
118
  - Keep going until you find something that matters
119
+ - A vulnerability is ONLY considered reported when a reporting agent uses create_vulnerability_report with full details. Mentions in agent_finish, finish_scan, or messages to the user are NOT sufficient
120
+ - Do NOT patch/fix before reporting: first create the vulnerability report via create_vulnerability_report (by the reporting agent). Only after reporting is completed should fixing/patching proceed
104
121
  </execution_guidelines>
105
122
 
106
123
  <vulnerability_focus>
@@ -150,6 +167,28 @@ AGENT ISOLATION & SANDBOXING:
150
167
  - All agents share the same /workspace directory and proxy history
151
168
  - Agents can see each other's files and proxy traffic for better collaboration
152
169
 
170
+ MANDATORY INITIAL PHASES:
171
+
172
+ BLACK-BOX TESTING - PHASE 1 (RECON & MAPPING):
173
+ - COMPLETE full reconnaissance: subdomain enumeration, port scanning, service detection
174
+ - MAP entire attack surface: all endpoints, parameters, APIs, forms, inputs
175
+ - CRAWL thoroughly: spider all pages (authenticated and unauthenticated), discover hidden paths, analyze JS files
176
+ - ENUMERATE technologies: frameworks, libraries, versions, dependencies
177
+ - ONLY AFTER comprehensive mapping → proceed to vulnerability testing
178
+
179
+ WHITE-BOX TESTING - PHASE 1 (CODE UNDERSTANDING):
180
+ - MAP entire repository structure and architecture
181
+ - UNDERSTAND code flow, entry points, data flows
182
+ - IDENTIFY all routes, endpoints, APIs, and their handlers
183
+ - ANALYZE authentication, authorization, input validation logic
184
+ - REVIEW dependencies and third-party libraries
185
+ - ONLY AFTER full code comprehension → proceed to vulnerability testing
186
+
187
+ PHASE 2 - SYSTEMATIC VULNERABILITY TESTING:
188
+ - CREATE SPECIALIZED SUBAGENT for EACH vulnerability type × EACH component
189
+ - Each agent focuses on ONE vulnerability type in ONE specific location
190
+ - EVERY detected vulnerability MUST spawn its own validation subagent
191
+
153
192
  SIMPLE WORKFLOW RULES:
154
193
 
155
194
  1. **ALWAYS CREATE AGENTS IN TREES** - Never work alone, always spawn subagents
@@ -158,6 +197,10 @@ SIMPLE WORKFLOW RULES:
158
197
  4. **MULTIPLE VULNS = MULTIPLE CHAINS** - Each vulnerability finding gets its own validation chain
159
198
  5. **CREATE AGENTS AS YOU GO** - Don't create all agents at start, create them when you discover new attack surfaces
160
199
  6. **ONE JOB PER AGENT** - Each agent has ONE specific task only
200
+ 7. **VIEW THE AGENT GRAPH BEFORE ACTING** - Always call view_agent_graph before creating or messaging agents to avoid duplicates and to target correctly
201
+ 8. **SCALE AGENT COUNT TO SCOPE** - Number of agents should correlate with target size and difficulty; avoid both agent sprawl and under-staffing
202
+ 9. **CHILDREN ARE MEANINGFUL SUBTASKS** - Child agents must be focused subtasks that directly support their parent's task; do NOT create unrelated children
203
+ 10. **UNIQUENESS** - Do not create two agents with the same task; ensure clear, non-overlapping responsibilities for every agent
161
204
 
162
205
  WHEN TO CREATE NEW AGENTS:
163
206
 
@@ -556,7 +556,9 @@ class StrixCLIApp(App): # type: ignore[misc]
556
556
  current_verb = self._get_agent_verb(self.selected_agent_id)
557
557
  animated_text = self._get_animated_verb_text(self.selected_agent_id, current_verb)
558
558
  self._safe_widget_operation(status_text.update, animated_text)
559
- self._safe_widget_operation(keymap_indicator.update, "[dim]ESC to stop agent[/dim]")
559
+ self._safe_widget_operation(
560
+ keymap_indicator.update, "[dim]ESC to stop | CTRL-C to quit and save[/dim]"
561
+ )
560
562
  self._safe_widget_operation(status_display.remove_class, "hidden")
561
563
  self._start_dot_animation()
562
564
  else:
@@ -577,10 +577,8 @@ def pull_docker_image() -> None:
577
577
  return
578
578
 
579
579
  console.print()
580
- console.print(f"[bold cyan]🐳 Pulling Docker image:[/bold cyan] {STRIX_IMAGE}")
581
- console.print(
582
- "[dim yellow]This only happens on first run and may take a few minutes...[/dim yellow]"
583
- )
580
+ console.print(f"[bold cyan]🐳 Pulling Docker image:[/] {STRIX_IMAGE}")
581
+ console.print("[dim yellow]This only happens on first run and may take a few minutes...[/]")
584
582
  console.print()
585
583
 
586
584
  with console.status("[bold cyan]Downloading image layers...", spinner="dots") as status:
@@ -21,7 +21,7 @@ class PythonRenderer(BaseToolRenderer):
21
21
  header = "</> [bold #3b82f6]Python[/]"
22
22
 
23
23
  if code and action in ["new_session", "execute"]:
24
- code_display = code[:250] + "..." if len(code) > 250 else code
24
+ code_display = code[:600] + "..." if len(code) > 600 else code
25
25
  content_text = f"{header}\n [italic white]{cls.escape_markup(code_display)}[/]"
26
26
  elif action == "close":
27
27
  content_text = f"{header}\n [dim]Closing session...[/]"
@@ -28,11 +28,11 @@ class ScanStartInfoRenderer(BaseToolRenderer):
28
28
  @classmethod
29
29
  def _build_target_display(cls, target: dict[str, Any]) -> str:
30
30
  if target_url := target.get("target_url"):
31
- return f"[bold #22c55e]{target_url}[/bold #22c55e]"
31
+ return f"[bold #22c55e]{target_url}[/]"
32
32
  if target_repo := target.get("target_repo"):
33
- return f"[bold #22c55e]{target_repo}[/bold #22c55e]"
33
+ return f"[bold #22c55e]{target_repo}[/]"
34
34
  if target_path := target.get("target_path"):
35
- return f"[bold #22c55e]{target_path}[/bold #22c55e]"
35
+ return f"[bold #22c55e]{target_path}[/]"
36
36
  return "[dim]unknown target[/dim]"
37
37
 
38
38
 
@@ -49,7 +49,7 @@ class SubagentStartInfoRenderer(BaseToolRenderer):
49
49
  name = args.get("name", "Unknown Agent")
50
50
  task = args.get("task", "")
51
51
 
52
- content = f"🤖 Spawned subagent [bold #22c55e]{name}[/bold #22c55e]"
52
+ content = f"🤖 Spawned subagent [bold #22c55e]{name}[/]"
53
53
  if task:
54
54
  content += f"\n Task: [dim]{task}[/dim]"
55
55
 
@@ -125,7 +125,7 @@ class TerminalRenderer(BaseToolRenderer):
125
125
  if not command:
126
126
  return ""
127
127
 
128
- if len(command) > 200:
129
- command = command[:197] + "..."
128
+ if len(command) > 400:
129
+ command = command[:397] + "..."
130
130
 
131
131
  return cls.escape_markup(command)
@@ -20,7 +20,7 @@ class ThinkRenderer(BaseToolRenderer):
20
20
  header = "🧠 [bold #a855f7]Thinking[/]"
21
21
 
22
22
  if thought:
23
- thought_display = thought[:200] + "..." if len(thought) > 200 else thought
23
+ thought_display = thought[:600] + "..." if len(thought) > 600 else thought
24
24
  content = f"{header}\n [italic dim]{cls.escape_markup(thought_display)}[/]"
25
25
  else:
26
26
  content = f"{header}\n [italic dim]Thinking...[/]"
@@ -1,3 +1,4 @@
1
+ import html
1
2
  import re
2
3
  from typing import Any
3
4
 
@@ -36,6 +37,8 @@ def parse_tool_invocations(content: str) -> list[dict[str, Any]] | None:
36
37
  for param_match in param_matches:
37
38
  param_name = param_match.group(1)
38
39
  param_value = param_match.group(2).strip()
40
+
41
+ param_value = html.unescape(param_value)
39
42
  args[param_name] = param_value
40
43
 
41
44
  tool_invocations.append({"toolName": fn_name, "args": args})
@@ -1,3 +1,4 @@
1
+ import contextlib
1
2
  import logging
2
3
  import os
3
4
  import secrets
@@ -78,11 +79,24 @@ class DockerRuntime(AbstractRuntime):
78
79
 
79
80
  def _create_container_with_retry(self, scan_id: str, max_retries: int = 3) -> Container:
80
81
  last_exception = None
82
+ container_name = f"strix-scan-{scan_id}"
81
83
 
82
84
  for attempt in range(max_retries):
83
85
  try:
84
86
  self._verify_image_available(STRIX_IMAGE)
85
87
 
88
+ try:
89
+ existing_container = self.client.containers.get(container_name)
90
+ logger.warning(f"Container {container_name} already exists, removing it")
91
+ with contextlib.suppress(Exception):
92
+ existing_container.stop(timeout=5)
93
+ existing_container.remove(force=True)
94
+ time.sleep(1)
95
+ except NotFound:
96
+ pass
97
+ except DockerException as e:
98
+ logger.warning(f"Error checking/removing existing container: {e}")
99
+
86
100
  caido_port = self._find_available_port()
87
101
  tool_server_port = self._find_available_port()
88
102
  tool_server_token = self._generate_sandbox_token()
@@ -94,7 +108,7 @@ class DockerRuntime(AbstractRuntime):
94
108
  STRIX_IMAGE,
95
109
  command="sleep infinity",
96
110
  detach=True,
97
- name=f"strix-scan-{scan_id}",
111
+ name=container_name,
98
112
  hostname=f"strix-scan-{scan_id}",
99
113
  ports={
100
114
  f"{caido_port}/tcp": caido_port,
@@ -137,7 +151,9 @@ class DockerRuntime(AbstractRuntime):
137
151
  f"Failed to create Docker container after {max_retries} attempts: {last_exception}"
138
152
  ) from last_exception
139
153
 
140
- def _get_or_create_scan_container(self, scan_id: str) -> Container:
154
+ def _get_or_create_scan_container(self, scan_id: str) -> Container: # noqa: PLR0912
155
+ container_name = f"strix-scan-{scan_id}"
156
+
141
157
  if self._scan_container:
142
158
  try:
143
159
  self._scan_container.reload()
@@ -149,7 +165,43 @@ class DockerRuntime(AbstractRuntime):
149
165
  self._tool_server_token = None
150
166
 
151
167
  try:
152
- containers = self.client.containers.list(filters={"label": f"strix-scan-id={scan_id}"})
168
+ container = self.client.containers.get(container_name)
169
+ container.reload()
170
+
171
+ if (
172
+ "strix-scan-id" not in container.labels
173
+ or container.labels["strix-scan-id"] != scan_id
174
+ ):
175
+ logger.warning(
176
+ f"Container {container_name} exists but missing/wrong label, updating"
177
+ )
178
+
179
+ if container.status != "running":
180
+ logger.info(f"Starting existing container {container_name}")
181
+ container.start()
182
+ time.sleep(2)
183
+
184
+ self._scan_container = container
185
+
186
+ for env_var in container.attrs["Config"]["Env"]:
187
+ if env_var.startswith("TOOL_SERVER_PORT="):
188
+ self._tool_server_port = int(env_var.split("=")[1])
189
+ elif env_var.startswith("TOOL_SERVER_TOKEN="):
190
+ self._tool_server_token = env_var.split("=")[1]
191
+
192
+ logger.info(f"Reusing existing container {container_name}")
193
+
194
+ except NotFound:
195
+ pass
196
+ except DockerException as e:
197
+ logger.warning(f"Failed to get container by name {container_name}: {e}")
198
+ else:
199
+ return container
200
+
201
+ try:
202
+ containers = self.client.containers.list(
203
+ all=True, filters={"label": f"strix-scan-id={scan_id}"}
204
+ )
153
205
  if containers:
154
206
  container = cast("Container", containers[0])
155
207
  if container.status != "running":
@@ -163,9 +215,10 @@ class DockerRuntime(AbstractRuntime):
163
215
  elif env_var.startswith("TOOL_SERVER_TOKEN="):
164
216
  self._tool_server_token = env_var.split("=")[1]
165
217
 
218
+ logger.info(f"Found existing container by label for scan {scan_id}")
166
219
  return container
167
220
  except DockerException as e:
168
- logger.warning("Failed to find existing container for scan %s: %s", scan_id, e)
221
+ logger.warning("Failed to find existing container by label for scan %s: %s", scan_id, e)
169
222
 
170
223
  logger.info("Creating new Docker container for scan %s", scan_id)
171
224
  return self._create_container_with_retry(scan_id)
@@ -53,6 +53,9 @@ def _run_agent_in_thread(
53
53
  <instructions>
54
54
  - You have {context_status}
55
55
  - Inherited context is for BACKGROUND ONLY - don't continue parent's work
56
+ - Maintain strict self-identity: never speak as or for your parent
57
+ - Do not merge your conversation with the parent's;
58
+ - Do not claim parent's actions or messages as your own
56
59
  - Focus EXCLUSIVELY on your delegated task above
57
60
  - Work independently with your own approach
58
61
  - Use agent_finish when complete to report back to parent
@@ -25,7 +25,7 @@
25
25
  Use is_input=true for regular text input to running processes.</description>
26
26
  </parameter>
27
27
  <parameter name="timeout" type="number" required="false">
28
- <description>Optional timeout in seconds for command execution. If not provided, uses default timeout behavior. Set to higher values for long-running commands like installations or tests. Default is 10 seconds.</description>
28
+ <description>Optional timeout in seconds for command execution. CAPPED AT 60 SECONDS. If not provided, uses default wait (30s). On timeout, the command keeps running and the tool returns with status 'running'. For truly long-running tasks, prefer backgrounding with '&'.</description>
29
29
  </parameter>
30
30
  <parameter name="terminal_id" type="string" required="false">
31
31
  <description>Identifier for the terminal session. Defaults to "default". Use different IDs to manage multiple concurrent terminal sessions.</description>
@@ -55,20 +55,23 @@
55
55
  1. PERSISTENT SESSION: The terminal maintains state between commands. Environment variables,
56
56
  current directory, and running processes persist across multiple tool calls.
57
57
 
58
- 2. COMMAND EXECUTION: Execute one command at a time. For multiple commands, chain them with
59
- && or ; operators, or make separate tool calls.
58
+ 2. COMMAND EXECUTION:
59
+ - AVOID: Long pipelines, complex bash scripts, or convoluted one-liners
60
+ - Break complex operations into multiple simple tool calls for clarity and debugging
61
+ - For multiple commands, prefer separate tool calls over chaining with && or ;
60
62
 
61
63
  3. LONG-RUNNING COMMANDS:
62
64
  - Commands never get killed automatically - they keep running in background
63
65
  - Set timeout to control how long to wait for output before returning
66
+ - For daemons/servers or very long jobs, append '&' to run in background
64
67
  - Use empty command "" to check progress (waits for timeout period to collect output)
65
68
  - Use C-c, C-d, C-z to interrupt processes (works automatically, no is_input needed)
66
69
 
67
70
  4. TIMEOUT HANDLING:
68
- - Timeout controls how long to wait before returning current output
71
+ - Timeout controls how long to wait before returning current output (max 60s cap)
69
72
  - Commands are NEVER killed on timeout - they keep running
70
73
  - After timeout, you can run new commands or check progress with empty command
71
- - All commands return status "completed" - you have full control
74
+ - On timeout, status is 'running'; on completion, status is 'completed'
72
75
 
73
76
  5. MULTIPLE TERMINALS: Use different terminal_id values to run multiple concurrent sessions.
74
77
 
@@ -95,7 +98,7 @@
95
98
  # Run a command with custom timeout
96
99
  <function=terminal_execute>
97
100
  <parameter=command>npm install</parameter>
98
- <parameter=timeout>120</parameter>
101
+ <parameter=timeout>60</parameter>
99
102
  </function>
100
103
 
101
104
  # Check progress of running command (waits for timeout to collect output)
File without changes
File without changes