souleyez 2.43.29__py3-none-any.whl → 3.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (358) hide show
  1. souleyez/__init__.py +1 -2
  2. souleyez/ai/__init__.py +21 -15
  3. souleyez/ai/action_mapper.py +249 -150
  4. souleyez/ai/chain_advisor.py +116 -100
  5. souleyez/ai/claude_provider.py +29 -28
  6. souleyez/ai/context_builder.py +80 -62
  7. souleyez/ai/executor.py +158 -117
  8. souleyez/ai/feedback_handler.py +136 -121
  9. souleyez/ai/llm_factory.py +27 -20
  10. souleyez/ai/llm_provider.py +4 -2
  11. souleyez/ai/ollama_provider.py +6 -9
  12. souleyez/ai/ollama_service.py +44 -37
  13. souleyez/ai/path_scorer.py +91 -76
  14. souleyez/ai/recommender.py +176 -144
  15. souleyez/ai/report_context.py +74 -73
  16. souleyez/ai/report_service.py +84 -66
  17. souleyez/ai/result_parser.py +222 -229
  18. souleyez/ai/safety.py +67 -44
  19. souleyez/auth/__init__.py +23 -22
  20. souleyez/auth/audit.py +36 -26
  21. souleyez/auth/engagement_access.py +65 -48
  22. souleyez/auth/permissions.py +14 -3
  23. souleyez/auth/session_manager.py +54 -37
  24. souleyez/auth/user_manager.py +109 -64
  25. souleyez/commands/audit.py +40 -43
  26. souleyez/commands/auth.py +35 -15
  27. souleyez/commands/deliverables.py +55 -50
  28. souleyez/commands/engagement.py +47 -28
  29. souleyez/commands/license.py +32 -23
  30. souleyez/commands/screenshots.py +36 -32
  31. souleyez/commands/user.py +82 -36
  32. souleyez/config.py +52 -44
  33. souleyez/core/credential_tester.py +87 -81
  34. souleyez/core/cve_mappings.py +179 -192
  35. souleyez/core/cve_matcher.py +162 -148
  36. souleyez/core/msf_auto_mapper.py +100 -83
  37. souleyez/core/msf_chain_engine.py +294 -256
  38. souleyez/core/msf_database.py +153 -70
  39. souleyez/core/msf_integration.py +679 -673
  40. souleyez/core/msf_rpc_client.py +40 -42
  41. souleyez/core/msf_rpc_manager.py +77 -79
  42. souleyez/core/msf_sync_manager.py +241 -181
  43. souleyez/core/network_utils.py +22 -15
  44. souleyez/core/parser_handler.py +34 -25
  45. souleyez/core/pending_chains.py +114 -63
  46. souleyez/core/templates.py +158 -107
  47. souleyez/core/tool_chaining.py +9564 -2881
  48. souleyez/core/version_utils.py +79 -94
  49. souleyez/core/vuln_correlation.py +136 -89
  50. souleyez/core/web_utils.py +33 -32
  51. souleyez/data/wordlists/ad_users.txt +378 -0
  52. souleyez/data/wordlists/api_endpoints_large.txt +769 -0
  53. souleyez/data/wordlists/home_dir_sensitive.txt +39 -0
  54. souleyez/data/wordlists/lfi_payloads.txt +82 -0
  55. souleyez/data/wordlists/passwords_brute.txt +1548 -0
  56. souleyez/data/wordlists/passwords_crack.txt +2479 -0
  57. souleyez/data/wordlists/passwords_spray.txt +386 -0
  58. souleyez/data/wordlists/subdomains_large.txt +5057 -0
  59. souleyez/data/wordlists/usernames_common.txt +694 -0
  60. souleyez/data/wordlists/web_dirs_large.txt +4769 -0
  61. souleyez/detection/__init__.py +1 -1
  62. souleyez/detection/attack_signatures.py +12 -17
  63. souleyez/detection/mitre_mappings.py +61 -55
  64. souleyez/detection/validator.py +97 -86
  65. souleyez/devtools.py +23 -10
  66. souleyez/docs/README.md +4 -4
  67. souleyez/docs/api-reference/cli-commands.md +2 -2
  68. souleyez/docs/developer-guide/adding-new-tools.md +562 -0
  69. souleyez/docs/user-guide/auto-chaining.md +30 -8
  70. souleyez/docs/user-guide/getting-started.md +1 -1
  71. souleyez/docs/user-guide/installation.md +26 -3
  72. souleyez/docs/user-guide/metasploit-integration.md +2 -2
  73. souleyez/docs/user-guide/rbac.md +1 -1
  74. souleyez/docs/user-guide/scope-management.md +1 -1
  75. souleyez/docs/user-guide/siem-integration.md +1 -1
  76. souleyez/docs/user-guide/tools-reference.md +1 -8
  77. souleyez/docs/user-guide/worker-management.md +1 -1
  78. souleyez/engine/background.py +1239 -535
  79. souleyez/engine/base.py +4 -1
  80. souleyez/engine/job_status.py +17 -49
  81. souleyez/engine/log_sanitizer.py +103 -77
  82. souleyez/engine/manager.py +38 -7
  83. souleyez/engine/result_handler.py +2200 -1550
  84. souleyez/engine/worker_manager.py +50 -41
  85. souleyez/export/evidence_bundle.py +72 -62
  86. souleyez/feature_flags/features.py +16 -20
  87. souleyez/feature_flags.py +5 -9
  88. souleyez/handlers/__init__.py +11 -0
  89. souleyez/handlers/base.py +188 -0
  90. souleyez/handlers/bash_handler.py +277 -0
  91. souleyez/handlers/bloodhound_handler.py +243 -0
  92. souleyez/handlers/certipy_handler.py +311 -0
  93. souleyez/handlers/crackmapexec_handler.py +486 -0
  94. souleyez/handlers/dnsrecon_handler.py +344 -0
  95. souleyez/handlers/enum4linux_handler.py +400 -0
  96. souleyez/handlers/evil_winrm_handler.py +493 -0
  97. souleyez/handlers/ffuf_handler.py +815 -0
  98. souleyez/handlers/gobuster_handler.py +1114 -0
  99. souleyez/handlers/gpp_extract_handler.py +334 -0
  100. souleyez/handlers/hashcat_handler.py +444 -0
  101. souleyez/handlers/hydra_handler.py +564 -0
  102. souleyez/handlers/impacket_getuserspns_handler.py +343 -0
  103. souleyez/handlers/impacket_psexec_handler.py +222 -0
  104. souleyez/handlers/impacket_secretsdump_handler.py +426 -0
  105. souleyez/handlers/john_handler.py +286 -0
  106. souleyez/handlers/katana_handler.py +425 -0
  107. souleyez/handlers/kerbrute_handler.py +298 -0
  108. souleyez/handlers/ldapsearch_handler.py +636 -0
  109. souleyez/handlers/lfi_extract_handler.py +464 -0
  110. souleyez/handlers/msf_auxiliary_handler.py +409 -0
  111. souleyez/handlers/msf_exploit_handler.py +380 -0
  112. souleyez/handlers/nikto_handler.py +413 -0
  113. souleyez/handlers/nmap_handler.py +821 -0
  114. souleyez/handlers/nuclei_handler.py +359 -0
  115. souleyez/handlers/nxc_handler.py +417 -0
  116. souleyez/handlers/rdp_sec_check_handler.py +353 -0
  117. souleyez/handlers/registry.py +292 -0
  118. souleyez/handlers/responder_handler.py +232 -0
  119. souleyez/handlers/service_explorer_handler.py +434 -0
  120. souleyez/handlers/smbclient_handler.py +344 -0
  121. souleyez/handlers/smbmap_handler.py +510 -0
  122. souleyez/handlers/smbpasswd_handler.py +296 -0
  123. souleyez/handlers/sqlmap_handler.py +1116 -0
  124. souleyez/handlers/theharvester_handler.py +601 -0
  125. souleyez/handlers/web_login_test_handler.py +327 -0
  126. souleyez/handlers/whois_handler.py +277 -0
  127. souleyez/handlers/wpscan_handler.py +554 -0
  128. souleyez/history.py +32 -16
  129. souleyez/importers/msf_importer.py +106 -75
  130. souleyez/importers/smart_importer.py +208 -147
  131. souleyez/integrations/siem/__init__.py +10 -10
  132. souleyez/integrations/siem/base.py +17 -18
  133. souleyez/integrations/siem/elastic.py +108 -122
  134. souleyez/integrations/siem/factory.py +207 -80
  135. souleyez/integrations/siem/googlesecops.py +146 -154
  136. souleyez/integrations/siem/rule_mappings/__init__.py +1 -1
  137. souleyez/integrations/siem/rule_mappings/wazuh_rules.py +8 -5
  138. souleyez/integrations/siem/sentinel.py +107 -109
  139. souleyez/integrations/siem/splunk.py +246 -212
  140. souleyez/integrations/siem/wazuh.py +65 -71
  141. souleyez/integrations/wazuh/__init__.py +5 -5
  142. souleyez/integrations/wazuh/client.py +70 -93
  143. souleyez/integrations/wazuh/config.py +85 -57
  144. souleyez/integrations/wazuh/host_mapper.py +28 -36
  145. souleyez/integrations/wazuh/sync.py +78 -68
  146. souleyez/intelligence/__init__.py +4 -5
  147. souleyez/intelligence/correlation_analyzer.py +309 -295
  148. souleyez/intelligence/exploit_knowledge.py +661 -623
  149. souleyez/intelligence/exploit_suggestions.py +159 -139
  150. souleyez/intelligence/gap_analyzer.py +132 -97
  151. souleyez/intelligence/gap_detector.py +251 -214
  152. souleyez/intelligence/sensitive_tables.py +266 -129
  153. souleyez/intelligence/service_parser.py +137 -123
  154. souleyez/intelligence/surface_analyzer.py +407 -268
  155. souleyez/intelligence/target_parser.py +159 -162
  156. souleyez/licensing/__init__.py +6 -6
  157. souleyez/licensing/validator.py +17 -19
  158. souleyez/log_config.py +79 -54
  159. souleyez/main.py +1505 -687
  160. souleyez/migrations/fix_job_counter.py +16 -14
  161. souleyez/parsers/bloodhound_parser.py +41 -39
  162. souleyez/parsers/crackmapexec_parser.py +178 -111
  163. souleyez/parsers/dalfox_parser.py +72 -77
  164. souleyez/parsers/dnsrecon_parser.py +103 -91
  165. souleyez/parsers/enum4linux_parser.py +183 -153
  166. souleyez/parsers/ffuf_parser.py +29 -25
  167. souleyez/parsers/gobuster_parser.py +301 -41
  168. souleyez/parsers/hashcat_parser.py +324 -79
  169. souleyez/parsers/http_fingerprint_parser.py +350 -103
  170. souleyez/parsers/hydra_parser.py +131 -111
  171. souleyez/parsers/impacket_parser.py +231 -178
  172. souleyez/parsers/john_parser.py +98 -86
  173. souleyez/parsers/katana_parser.py +316 -0
  174. souleyez/parsers/msf_parser.py +943 -498
  175. souleyez/parsers/nikto_parser.py +346 -65
  176. souleyez/parsers/nmap_parser.py +262 -174
  177. souleyez/parsers/nuclei_parser.py +40 -44
  178. souleyez/parsers/responder_parser.py +26 -26
  179. souleyez/parsers/searchsploit_parser.py +74 -74
  180. souleyez/parsers/service_explorer_parser.py +279 -0
  181. souleyez/parsers/smbmap_parser.py +180 -124
  182. souleyez/parsers/sqlmap_parser.py +434 -308
  183. souleyez/parsers/theharvester_parser.py +75 -57
  184. souleyez/parsers/whois_parser.py +135 -94
  185. souleyez/parsers/wpscan_parser.py +278 -190
  186. souleyez/plugins/afp.py +44 -36
  187. souleyez/plugins/afp_brute.py +114 -46
  188. souleyez/plugins/ard.py +48 -37
  189. souleyez/plugins/bloodhound.py +95 -61
  190. souleyez/plugins/certipy.py +303 -0
  191. souleyez/plugins/crackmapexec.py +186 -85
  192. souleyez/plugins/dalfox.py +120 -59
  193. souleyez/plugins/dns_hijack.py +146 -41
  194. souleyez/plugins/dnsrecon.py +97 -61
  195. souleyez/plugins/enum4linux.py +91 -66
  196. souleyez/plugins/evil_winrm.py +291 -0
  197. souleyez/plugins/ffuf.py +166 -90
  198. souleyez/plugins/firmware_extract.py +133 -29
  199. souleyez/plugins/gobuster.py +387 -190
  200. souleyez/plugins/gpp_extract.py +393 -0
  201. souleyez/plugins/hashcat.py +100 -73
  202. souleyez/plugins/http_fingerprint.py +913 -267
  203. souleyez/plugins/hydra.py +566 -200
  204. souleyez/plugins/impacket_getnpusers.py +117 -69
  205. souleyez/plugins/impacket_psexec.py +84 -64
  206. souleyez/plugins/impacket_secretsdump.py +103 -69
  207. souleyez/plugins/impacket_smbclient.py +89 -75
  208. souleyez/plugins/john.py +86 -69
  209. souleyez/plugins/katana.py +313 -0
  210. souleyez/plugins/kerbrute.py +237 -0
  211. souleyez/plugins/lfi_extract.py +541 -0
  212. souleyez/plugins/macos_ssh.py +117 -48
  213. souleyez/plugins/mdns.py +35 -30
  214. souleyez/plugins/msf_auxiliary.py +253 -130
  215. souleyez/plugins/msf_exploit.py +239 -161
  216. souleyez/plugins/nikto.py +134 -78
  217. souleyez/plugins/nmap.py +275 -91
  218. souleyez/plugins/nuclei.py +180 -89
  219. souleyez/plugins/nxc.py +285 -0
  220. souleyez/plugins/plugin_base.py +35 -36
  221. souleyez/plugins/plugin_template.py +13 -5
  222. souleyez/plugins/rdp_sec_check.py +130 -0
  223. souleyez/plugins/responder.py +112 -71
  224. souleyez/plugins/router_http_brute.py +76 -65
  225. souleyez/plugins/router_ssh_brute.py +118 -41
  226. souleyez/plugins/router_telnet_brute.py +124 -42
  227. souleyez/plugins/routersploit.py +91 -59
  228. souleyez/plugins/routersploit_exploit.py +77 -55
  229. souleyez/plugins/searchsploit.py +91 -77
  230. souleyez/plugins/service_explorer.py +1160 -0
  231. souleyez/plugins/smbmap.py +122 -72
  232. souleyez/plugins/smbpasswd.py +215 -0
  233. souleyez/plugins/sqlmap.py +301 -113
  234. souleyez/plugins/theharvester.py +127 -75
  235. souleyez/plugins/tr069.py +79 -57
  236. souleyez/plugins/upnp.py +65 -47
  237. souleyez/plugins/upnp_abuse.py +73 -55
  238. souleyez/plugins/vnc_access.py +129 -42
  239. souleyez/plugins/vnc_brute.py +109 -38
  240. souleyez/plugins/web_login_test.py +417 -0
  241. souleyez/plugins/whois.py +77 -58
  242. souleyez/plugins/wpscan.py +219 -69
  243. souleyez/reporting/__init__.py +2 -1
  244. souleyez/reporting/attack_chain.py +411 -346
  245. souleyez/reporting/charts.py +436 -501
  246. souleyez/reporting/compliance_mappings.py +334 -201
  247. souleyez/reporting/detection_report.py +126 -125
  248. souleyez/reporting/formatters.py +828 -591
  249. souleyez/reporting/generator.py +386 -302
  250. souleyez/reporting/metrics.py +72 -75
  251. souleyez/scanner.py +35 -29
  252. souleyez/security/__init__.py +37 -11
  253. souleyez/security/scope_validator.py +175 -106
  254. souleyez/security/validation.py +237 -149
  255. souleyez/security.py +22 -6
  256. souleyez/storage/credentials.py +247 -186
  257. souleyez/storage/crypto.py +296 -129
  258. souleyez/storage/database.py +73 -50
  259. souleyez/storage/db.py +58 -36
  260. souleyez/storage/deliverable_evidence.py +177 -128
  261. souleyez/storage/deliverable_exporter.py +282 -246
  262. souleyez/storage/deliverable_templates.py +134 -116
  263. souleyez/storage/deliverables.py +135 -130
  264. souleyez/storage/engagements.py +109 -56
  265. souleyez/storage/evidence.py +181 -152
  266. souleyez/storage/execution_log.py +31 -17
  267. souleyez/storage/exploit_attempts.py +93 -57
  268. souleyez/storage/exploits.py +67 -36
  269. souleyez/storage/findings.py +48 -61
  270. souleyez/storage/hosts.py +176 -144
  271. souleyez/storage/migrate_to_engagements.py +43 -19
  272. souleyez/storage/migrations/_001_add_credential_enhancements.py +22 -12
  273. souleyez/storage/migrations/_002_add_status_tracking.py +10 -7
  274. souleyez/storage/migrations/_003_add_execution_log.py +14 -8
  275. souleyez/storage/migrations/_005_screenshots.py +13 -5
  276. souleyez/storage/migrations/_006_deliverables.py +13 -5
  277. souleyez/storage/migrations/_007_deliverable_templates.py +12 -7
  278. souleyez/storage/migrations/_008_add_nuclei_table.py +10 -4
  279. souleyez/storage/migrations/_010_evidence_linking.py +17 -10
  280. souleyez/storage/migrations/_011_timeline_tracking.py +20 -13
  281. souleyez/storage/migrations/_012_team_collaboration.py +34 -21
  282. souleyez/storage/migrations/_013_add_host_tags.py +12 -6
  283. souleyez/storage/migrations/_014_exploit_attempts.py +22 -10
  284. souleyez/storage/migrations/_015_add_mac_os_fields.py +15 -7
  285. souleyez/storage/migrations/_016_add_domain_field.py +10 -4
  286. souleyez/storage/migrations/_017_msf_sessions.py +16 -8
  287. souleyez/storage/migrations/_018_add_osint_target.py +10 -6
  288. souleyez/storage/migrations/_019_add_engagement_type.py +10 -6
  289. souleyez/storage/migrations/_020_add_rbac.py +36 -15
  290. souleyez/storage/migrations/_021_wazuh_integration.py +20 -8
  291. souleyez/storage/migrations/_022_wazuh_indexer_columns.py +6 -4
  292. souleyez/storage/migrations/_023_fix_detection_results_fk.py +16 -6
  293. souleyez/storage/migrations/_024_wazuh_vulnerabilities.py +26 -10
  294. souleyez/storage/migrations/_025_multi_siem_support.py +3 -5
  295. souleyez/storage/migrations/_026_add_engagement_scope.py +31 -12
  296. souleyez/storage/migrations/_027_multi_siem_persistence.py +32 -15
  297. souleyez/storage/migrations/__init__.py +26 -26
  298. souleyez/storage/migrations/migration_manager.py +19 -19
  299. souleyez/storage/msf_sessions.py +100 -65
  300. souleyez/storage/osint.py +17 -24
  301. souleyez/storage/recommendation_engine.py +269 -235
  302. souleyez/storage/screenshots.py +33 -32
  303. souleyez/storage/smb_shares.py +136 -92
  304. souleyez/storage/sqlmap_data.py +183 -128
  305. souleyez/storage/team_collaboration.py +135 -141
  306. souleyez/storage/timeline_tracker.py +122 -94
  307. souleyez/storage/wazuh_vulns.py +64 -66
  308. souleyez/storage/web_paths.py +33 -37
  309. souleyez/testing/credential_tester.py +221 -205
  310. souleyez/ui/__init__.py +1 -1
  311. souleyez/ui/ai_quotes.py +12 -12
  312. souleyez/ui/attack_surface.py +2439 -1516
  313. souleyez/ui/chain_rules_view.py +914 -382
  314. souleyez/ui/correlation_view.py +312 -230
  315. souleyez/ui/dashboard.py +2382 -1130
  316. souleyez/ui/deliverables_view.py +148 -62
  317. souleyez/ui/design_system.py +13 -13
  318. souleyez/ui/errors.py +49 -49
  319. souleyez/ui/evidence_linking_view.py +284 -179
  320. souleyez/ui/evidence_vault.py +393 -285
  321. souleyez/ui/exploit_suggestions_view.py +555 -349
  322. souleyez/ui/export_view.py +100 -66
  323. souleyez/ui/gap_analysis_view.py +315 -171
  324. souleyez/ui/help_system.py +105 -97
  325. souleyez/ui/intelligence_view.py +436 -293
  326. souleyez/ui/interactive.py +23034 -10679
  327. souleyez/ui/interactive_selector.py +75 -68
  328. souleyez/ui/log_formatter.py +47 -39
  329. souleyez/ui/menu_components.py +22 -13
  330. souleyez/ui/msf_auxiliary_menu.py +184 -133
  331. souleyez/ui/pending_chains_view.py +336 -172
  332. souleyez/ui/progress_indicators.py +5 -3
  333. souleyez/ui/recommendations_view.py +195 -137
  334. souleyez/ui/rule_builder.py +343 -225
  335. souleyez/ui/setup_wizard.py +678 -284
  336. souleyez/ui/shortcuts.py +217 -165
  337. souleyez/ui/splunk_gap_analysis_view.py +452 -270
  338. souleyez/ui/splunk_vulns_view.py +139 -86
  339. souleyez/ui/team_dashboard.py +498 -335
  340. souleyez/ui/template_selector.py +196 -105
  341. souleyez/ui/terminal.py +6 -6
  342. souleyez/ui/timeline_view.py +198 -127
  343. souleyez/ui/tool_setup.py +264 -164
  344. souleyez/ui/tutorial.py +202 -72
  345. souleyez/ui/tutorial_state.py +40 -40
  346. souleyez/ui/wazuh_vulns_view.py +235 -141
  347. souleyez/ui/wordlist_browser.py +260 -107
  348. souleyez/ui.py +464 -312
  349. souleyez/utils/tool_checker.py +427 -367
  350. souleyez/utils.py +33 -29
  351. souleyez/wordlists.py +134 -167
  352. {souleyez-2.43.29.dist-info → souleyez-3.0.0.dist-info}/METADATA +2 -2
  353. souleyez-3.0.0.dist-info/RECORD +443 -0
  354. {souleyez-2.43.29.dist-info → souleyez-3.0.0.dist-info}/WHEEL +1 -1
  355. souleyez-2.43.29.dist-info/RECORD +0 -379
  356. {souleyez-2.43.29.dist-info → souleyez-3.0.0.dist-info}/entry_points.txt +0 -0
  357. {souleyez-2.43.29.dist-info → souleyez-3.0.0.dist-info}/licenses/LICENSE +0 -0
  358. {souleyez-2.43.29.dist-info → souleyez-3.0.0.dist-info}/top_level.txt +0 -0
@@ -14,25 +14,35 @@ Design notes:
14
14
  """
15
15
 
16
16
  from __future__ import annotations
17
- import os
18
- import sys
17
+
18
+ import fcntl
19
+ import inspect
19
20
  import json
20
- import time
21
- import signal
22
- import tempfile
21
+ import os
23
22
  import shutil
23
+ import signal
24
24
  import subprocess
25
+ import sys
26
+ import tempfile
25
27
  import threading
26
- import inspect
28
+ import time
27
29
  import traceback
28
- import fcntl
29
- from typing import List, Dict, Optional, Any
30
+ from datetime import datetime, timezone
31
+ from typing import Any, Dict, List, Optional
32
+
30
33
  from souleyez.log_config import get_logger
31
- from .log_sanitizer import LogSanitizer
34
+
32
35
  from .job_status import (
33
- STATUS_QUEUED, STATUS_RUNNING, STATUS_DONE, STATUS_NO_RESULTS,
34
- STATUS_WARNING, STATUS_ERROR, STATUS_KILLED, is_chainable
36
+ STATUS_DONE,
37
+ STATUS_ERROR,
38
+ STATUS_KILLED,
39
+ STATUS_NO_RESULTS,
40
+ STATUS_QUEUED,
41
+ STATUS_RUNNING,
42
+ STATUS_WARNING,
43
+ is_chainable,
35
44
  )
45
+ from .log_sanitizer import LogSanitizer
36
46
 
37
47
  logger = get_logger(__name__)
38
48
 
@@ -43,15 +53,102 @@ LOGS_DIR = os.path.join(DATA_DIR, "logs")
43
53
  JOBS_FILE = os.path.join(JOBS_DIR, "jobs.json")
44
54
  WORKER_LOG = os.path.join(LOGS_DIR, "worker.log")
45
55
  HEARTBEAT_FILE = os.path.join(JOBS_DIR, ".worker_heartbeat")
56
+ JOBS_LOCK_FILE = os.path.join(JOBS_DIR, ".jobs.lock") # Cross-process file lock
46
57
  JOB_TIMEOUT_SECONDS = 3600 # 1 hour (changed from 300s/5min)
47
58
  HEARTBEAT_INTERVAL = 10 # seconds between heartbeat writes
48
59
  HEARTBEAT_STALE_THRESHOLD = 30 # seconds before heartbeat considered stale
49
60
  JOB_HUNG_THRESHOLD = 300 # 5 minutes with no output = possibly hung
50
61
  JOBS_BACKUP_COUNT = 3 # Number of rotating backups to keep
62
+ MAX_RETRIES = 2 # Maximum auto-retries for transient errors
63
+
64
+ # Patterns indicating transient errors that should trigger auto-retry
65
+ # These are network/timing issues that often succeed on retry
66
+ TRANSIENT_ERROR_PATTERNS = [
67
+ "NetBIOSTimeout",
68
+ "connection timed out",
69
+ "Connection timed out",
70
+ "NETBIOS connection with the remote host timed out",
71
+ "Connection reset by peer",
72
+ "temporarily unavailable",
73
+ "Resource temporarily unavailable",
74
+ "SMBTimeout",
75
+ "timed out while waiting",
76
+ ]
51
77
 
52
78
  _lock = threading.RLock() # Reentrant lock allows nested acquisition by same thread
53
79
 
54
80
 
81
+ def _is_transient_error(log_content: str) -> bool:
82
+ """Check if log content indicates a transient error that should be retried."""
83
+ if not log_content:
84
+ return False
85
+ for pattern in TRANSIENT_ERROR_PATTERNS:
86
+ if pattern.lower() in log_content.lower():
87
+ return True
88
+ return False
89
+
90
+
91
+ class _CrossProcessLock:
92
+ """
93
+ Cross-process file lock using fcntl.flock().
94
+
95
+ This ensures that only one process (UI or worker) can read/write
96
+ jobs.json at a time, preventing race conditions where one process
97
+ overwrites another's changes.
98
+ """
99
+
100
+ def __init__(self, lock_file: str, timeout: float = 10.0):
101
+ self.lock_file = lock_file
102
+ self.timeout = timeout
103
+ self._fd = None
104
+
105
+ def __enter__(self):
106
+ import errno
107
+ import fcntl
108
+
109
+ # Ensure lock file directory exists
110
+ os.makedirs(os.path.dirname(self.lock_file), exist_ok=True)
111
+
112
+ # Open lock file (create if doesn't exist)
113
+ self._fd = open(self.lock_file, "w")
114
+
115
+ # Try to acquire lock with timeout
116
+ start_time = time.time()
117
+ while True:
118
+ try:
119
+ fcntl.flock(self._fd.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
120
+ return self # Lock acquired
121
+ except (IOError, OSError) as e:
122
+ if e.errno not in (errno.EWOULDBLOCK, errno.EAGAIN):
123
+ raise
124
+ # Lock held by another process, wait and retry
125
+ if time.time() - start_time > self.timeout:
126
+ raise TimeoutError(
127
+ f"Could not acquire lock on {self.lock_file} within {self.timeout}s"
128
+ )
129
+ time.sleep(0.05) # 50ms backoff
130
+
131
+ def __exit__(self, exc_type, exc_val, exc_tb):
132
+ import fcntl
133
+
134
+ if self._fd:
135
+ try:
136
+ fcntl.flock(self._fd.fileno(), fcntl.LOCK_UN)
137
+ except Exception:
138
+ pass
139
+ try:
140
+ self._fd.close()
141
+ except Exception:
142
+ pass
143
+ self._fd = None
144
+ return False # Don't suppress exceptions
145
+
146
+
147
+ def _jobs_lock():
148
+ """Get a cross-process lock for jobs.json access."""
149
+ return _CrossProcessLock(JOBS_LOCK_FILE)
150
+
151
+
55
152
  def _ensure_dirs():
56
153
  os.makedirs(JOBS_DIR, exist_ok=True)
57
154
  os.makedirs(LOGS_DIR, exist_ok=True)
@@ -102,11 +199,13 @@ def _recover_from_backup() -> List[Dict[str, Any]]:
102
199
  with open(backup_path, "r", encoding="utf-8") as fh:
103
200
  jobs = json.load(fh)
104
201
  if isinstance(jobs, list):
105
- _append_worker_log(f"recovered {len(jobs)} jobs from backup: {backup_path}")
106
- logger.info("Jobs recovered from backup", extra={
107
- "backup_path": backup_path,
108
- "job_count": len(jobs)
109
- })
202
+ _append_worker_log(
203
+ f"recovered {len(jobs)} jobs from backup: {backup_path}"
204
+ )
205
+ logger.info(
206
+ "Jobs recovered from backup",
207
+ extra={"backup_path": backup_path, "job_count": len(jobs)},
208
+ )
110
209
  return jobs
111
210
  except Exception as e:
112
211
  _append_worker_log(f"backup {backup_path} also corrupt: {e}")
@@ -115,19 +214,33 @@ def _recover_from_backup() -> List[Dict[str, Any]]:
115
214
 
116
215
 
117
216
  def _read_jobs() -> List[Dict[str, Any]]:
217
+ """
218
+ Read jobs from jobs.json with cross-process file locking.
219
+
220
+ The file lock ensures we don't read while another process is writing,
221
+ preventing partially-written files from being read.
222
+ """
118
223
  _ensure_dirs()
119
224
  if not os.path.exists(JOBS_FILE):
120
225
  return []
121
226
  try:
122
- with open(JOBS_FILE, "r", encoding="utf-8") as fh:
123
- return json.load(fh)
227
+ with _jobs_lock():
228
+ with open(JOBS_FILE, "r", encoding="utf-8") as fh:
229
+ return json.load(fh)
230
+ except TimeoutError:
231
+ # Lock acquisition timed out - log and try without lock
232
+ _append_worker_log("jobs.json lock timeout on read, reading anyway")
233
+ try:
234
+ with open(JOBS_FILE, "r", encoding="utf-8") as fh:
235
+ return json.load(fh)
236
+ except Exception:
237
+ return []
124
238
  except Exception as e:
125
239
  # Log corruption event
126
240
  _append_worker_log(f"jobs.json corrupt: {e}")
127
- logger.error("Jobs file corrupted", extra={
128
- "error": str(e),
129
- "jobs_file": JOBS_FILE
130
- })
241
+ logger.error(
242
+ "Jobs file corrupted", extra={"error": str(e), "jobs_file": JOBS_FILE}
243
+ )
131
244
 
132
245
  # Try to recover from backup
133
246
  recovered_jobs = _recover_from_backup()
@@ -143,7 +256,7 @@ def _read_jobs() -> List[Dict[str, Any]]:
143
256
  # If we recovered jobs, write them back
144
257
  if recovered_jobs:
145
258
  try:
146
- _write_jobs(recovered_jobs)
259
+ _write_jobs_unlocked(recovered_jobs)
147
260
  _append_worker_log(f"restored {len(recovered_jobs)} jobs from backup")
148
261
  except Exception as write_err:
149
262
  _append_worker_log(f"failed to restore jobs: {write_err}")
@@ -151,7 +264,19 @@ def _read_jobs() -> List[Dict[str, Any]]:
151
264
  return recovered_jobs
152
265
 
153
266
 
154
- def _write_jobs(jobs: List[Dict[str, Any]]):
267
+ def _read_jobs_unlocked() -> List[Dict[str, Any]]:
268
+ """Read jobs without acquiring file lock (for internal use when lock already held)."""
269
+ if not os.path.exists(JOBS_FILE):
270
+ return []
271
+ try:
272
+ with open(JOBS_FILE, "r", encoding="utf-8") as fh:
273
+ return json.load(fh)
274
+ except Exception:
275
+ return []
276
+
277
+
278
+ def _write_jobs_unlocked(jobs: List[Dict[str, Any]]):
279
+ """Write jobs without acquiring file lock (for internal use when lock already held)."""
155
280
  _ensure_dirs()
156
281
 
157
282
  # Rotate backups before writing (keeps last 3 good copies)
@@ -167,11 +292,29 @@ def _write_jobs(jobs: List[Dict[str, Any]]):
167
292
  finally:
168
293
  if os.path.exists(tmp.name):
169
294
  try:
170
- os.remove(tmp.name)
295
+ os.unlink(tmp.name)
171
296
  except Exception:
172
297
  pass
173
298
 
174
299
 
300
+ def _write_jobs(jobs: List[Dict[str, Any]]):
301
+ """
302
+ Write jobs to jobs.json with cross-process file locking.
303
+
304
+ The file lock ensures we don't write while another process is reading
305
+ or writing, preventing race conditions.
306
+ """
307
+ _ensure_dirs()
308
+
309
+ try:
310
+ with _jobs_lock():
311
+ _write_jobs_unlocked(jobs)
312
+ except TimeoutError:
313
+ # Lock acquisition timed out - log and write anyway (better than losing data)
314
+ _append_worker_log("jobs.json lock timeout on write, writing anyway")
315
+ _write_jobs_unlocked(jobs)
316
+
317
+
175
318
  def _append_worker_log(msg: str):
176
319
  _ensure_dirs()
177
320
  ts = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
@@ -184,7 +327,7 @@ def _update_heartbeat():
184
327
  """Write current timestamp to heartbeat file for health monitoring."""
185
328
  _ensure_dirs()
186
329
  try:
187
- with open(HEARTBEAT_FILE, 'w') as fh:
330
+ with open(HEARTBEAT_FILE, "w") as fh:
188
331
  fh.write(str(time.time()))
189
332
  except Exception:
190
333
  pass # Non-critical, don't crash worker
@@ -199,7 +342,7 @@ def get_heartbeat_age() -> Optional[float]:
199
342
  """
200
343
  try:
201
344
  if os.path.exists(HEARTBEAT_FILE):
202
- with open(HEARTBEAT_FILE, 'r') as fh:
345
+ with open(HEARTBEAT_FILE, "r") as fh:
203
346
  last_beat = float(fh.read().strip())
204
347
  return time.time() - last_beat
205
348
  return None
@@ -227,13 +370,13 @@ def _get_process_start_time(pid: int) -> Optional[float]:
227
370
  if not os.path.exists(stat_path):
228
371
  return None
229
372
 
230
- with open(stat_path, 'r') as f:
373
+ with open(stat_path, "r") as f:
231
374
  stat = f.read()
232
375
 
233
376
  # Parse stat file - field 22 is starttime (in clock ticks since boot)
234
377
  # Format: pid (comm) state ppid pgrp session tty_nr ... starttime ...
235
378
  # Need to handle comm field which may contain spaces/parentheses
236
- parts = stat.rsplit(')', 1)
379
+ parts = stat.rsplit(")", 1)
237
380
  if len(parts) < 2:
238
381
  return None
239
382
 
@@ -241,19 +384,21 @@ def _get_process_start_time(pid: int) -> Optional[float]:
241
384
  if len(fields) < 20:
242
385
  return None
243
386
 
244
- starttime_ticks = int(fields[19]) # 0-indexed, field 22 is at index 19 after comm
387
+ starttime_ticks = int(
388
+ fields[19]
389
+ ) # 0-indexed, field 22 is at index 19 after comm
245
390
 
246
391
  # Convert to timestamp using system boot time and clock ticks per second
247
- with open('/proc/stat', 'r') as f:
392
+ with open("/proc/stat", "r") as f:
248
393
  for line in f:
249
- if line.startswith('btime'):
394
+ if line.startswith("btime"):
250
395
  boot_time = int(line.split()[1])
251
396
  break
252
397
  else:
253
398
  return None
254
399
 
255
400
  # Get clock ticks per second (usually 100)
256
- ticks_per_sec = os.sysconf(os.sysconf_names['SC_CLK_TCK'])
401
+ ticks_per_sec = os.sysconf(os.sysconf_names["SC_CLK_TCK"])
257
402
 
258
403
  return boot_time + (starttime_ticks / ticks_per_sec)
259
404
  except Exception:
@@ -275,14 +420,14 @@ def _next_job_id(jobs: List[Dict[str, Any]]) -> int:
275
420
  _ensure_dirs()
276
421
 
277
422
  # Use a separate lock file to allow atomic read-modify-write
278
- with open(lock_file, 'w') as lock_fh:
423
+ with open(lock_file, "w") as lock_fh:
279
424
  # Acquire exclusive lock (blocks until available)
280
425
  fcntl.flock(lock_fh.fileno(), fcntl.LOCK_EX)
281
426
 
282
427
  try:
283
428
  # Read current counter
284
429
  if os.path.exists(counter_file):
285
- with open(counter_file, 'r') as f:
430
+ with open(counter_file, "r") as f:
286
431
  next_id = int(f.read().strip())
287
432
  else:
288
433
  # Initialize from existing jobs
@@ -296,8 +441,8 @@ def _next_job_id(jobs: List[Dict[str, Any]]) -> int:
296
441
  next_id = maxid + 1
297
442
 
298
443
  # Write incremented counter atomically
299
- tmp_file = counter_file + '.tmp'
300
- with open(tmp_file, 'w') as f:
444
+ tmp_file = counter_file + ".tmp"
445
+ with open(tmp_file, "w") as f:
301
446
  f.write(str(next_id + 1))
302
447
  f.flush()
303
448
  os.fsync(f.fileno())
@@ -321,133 +466,235 @@ def _next_job_id(jobs: List[Dict[str, Any]]) -> int:
321
466
  return maxid + 1
322
467
 
323
468
 
324
- def enqueue_job(tool: str, target: str, args: List[str], label: str = "", engagement_id: int = None, metadata: Dict[str, Any] = None, parent_id: int = None, reason: str = None, rule_id: int = None, skip_scope_check: bool = False) -> int:
325
- with _lock:
326
- jobs = _read_jobs()
327
- jid = _next_job_id(jobs)
328
- now = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
469
+ def enqueue_job(
470
+ tool: str,
471
+ target: str,
472
+ args: List[str],
473
+ label: str = "",
474
+ engagement_id: int = None,
475
+ metadata: Dict[str, Any] = None,
476
+ parent_id: int = None,
477
+ reason: str = None,
478
+ rule_id: int = None,
479
+ skip_scope_check: bool = False,
480
+ ) -> int:
481
+ # Prepare data outside lock to minimize lock hold time
482
+ now = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
329
483
 
330
- # Get current engagement if not specified
331
- if engagement_id is None:
332
- try:
333
- from souleyez.storage.engagements import EngagementManager
334
- em = EngagementManager()
335
- current = em.get_current()
336
- engagement_id = current['id'] if current else None
337
- except BaseException:
338
- engagement_id = None
339
-
340
- # Merge parent_id, reason, and rule_id into metadata
341
- job_metadata = metadata or {}
342
-
343
- # Scope validation - check if target is within engagement scope
344
- if not skip_scope_check and engagement_id:
345
- try:
346
- from souleyez.security.scope_validator import ScopeValidator, ScopeViolationError
347
- validator = ScopeValidator(engagement_id)
348
- result = validator.validate_target(target)
349
- enforcement = validator.get_enforcement_mode()
350
-
351
- if not result.is_in_scope and validator.has_scope_defined():
352
- if enforcement == 'block':
353
- validator.log_validation(target, result, 'blocked', job_id=jid)
354
- raise ScopeViolationError(
355
- f"Target '{target}' is out of scope. {result.reason}"
484
+ # Get current engagement if not specified
485
+ if engagement_id is None:
486
+ try:
487
+ from souleyez.storage.engagements import EngagementManager
488
+
489
+ em = EngagementManager()
490
+ current = em.get_current()
491
+ engagement_id = current["id"] if current else None
492
+ except BaseException:
493
+ engagement_id = None
494
+
495
+ # Merge parent_id, reason, and rule_id into metadata
496
+ job_metadata = metadata or {}
497
+ if parent_id is not None:
498
+ job_metadata["parent_id"] = parent_id
499
+ if reason:
500
+ job_metadata["reason"] = reason
501
+ if rule_id is not None:
502
+ job_metadata["rule_id"] = rule_id
503
+
504
+ # Atomic read-modify-write with both thread lock and cross-process file lock
505
+ with _lock: # Thread safety within this process
506
+ try:
507
+ with _jobs_lock(): # Cross-process safety
508
+ _ensure_dirs()
509
+ jobs = _read_jobs_unlocked()
510
+ jid = _next_job_id(jobs)
511
+
512
+ # Scope validation - check if target is within engagement scope
513
+ # Done inside lock because it uses jid for logging
514
+ if not skip_scope_check and engagement_id:
515
+ try:
516
+ from souleyez.security.scope_validator import (
517
+ ScopeValidator,
518
+ ScopeViolationError,
356
519
  )
357
- elif enforcement == 'warn':
358
- validator.log_validation(target, result, 'warned', job_id=jid)
359
- if 'warnings' not in job_metadata:
360
- job_metadata['warnings'] = []
361
- job_metadata['warnings'].append(
362
- f"SCOPE WARNING: {target} may be out of scope. {result.reason}"
520
+
521
+ validator = ScopeValidator(engagement_id)
522
+ result = validator.validate_target(target)
523
+ enforcement = validator.get_enforcement_mode()
524
+
525
+ if not result.is_in_scope and validator.has_scope_defined():
526
+ if enforcement == "block":
527
+ validator.log_validation(
528
+ target, result, "blocked", job_id=jid
529
+ )
530
+ raise ScopeViolationError(
531
+ f"Target '{target}' is out of scope. {result.reason}"
532
+ )
533
+ elif enforcement == "warn":
534
+ validator.log_validation(
535
+ target, result, "warned", job_id=jid
536
+ )
537
+ if "warnings" not in job_metadata:
538
+ job_metadata["warnings"] = []
539
+ job_metadata["warnings"].append(
540
+ f"SCOPE WARNING: {target} may be out of scope. {result.reason}"
541
+ )
542
+ logger.warning(
543
+ "Out-of-scope target allowed (warn mode)",
544
+ extra={
545
+ "target": target,
546
+ "engagement_id": engagement_id,
547
+ "reason": result.reason,
548
+ },
549
+ )
550
+ else:
551
+ validator.log_validation(
552
+ target, result, "allowed", job_id=jid
553
+ )
554
+ except ScopeViolationError:
555
+ raise # Re-raise scope violations
556
+ except Exception as e:
557
+ # Don't block jobs if scope validation fails unexpectedly
558
+ logger.warning(
559
+ "Scope validation error (allowing job)",
560
+ extra={"target": target, "error": str(e)},
363
561
  )
364
- logger.warning("Out-of-scope target allowed (warn mode)", extra={
365
- "target": target,
366
- "engagement_id": engagement_id,
367
- "reason": result.reason
368
- })
369
- else:
370
- validator.log_validation(target, result, 'allowed', job_id=jid)
371
- except ScopeViolationError:
372
- raise # Re-raise scope violations
373
- except Exception as e:
374
- # Don't block jobs if scope validation fails unexpectedly
375
- logger.warning("Scope validation error (allowing job)", extra={
562
+
563
+ job = {
564
+ "id": jid,
565
+ "tool": tool,
376
566
  "target": target,
377
- "error": str(e)
378
- })
379
- if parent_id is not None:
380
- job_metadata['parent_id'] = parent_id
381
- if reason:
382
- job_metadata['reason'] = reason
383
- if rule_id is not None:
384
- job_metadata['rule_id'] = rule_id
385
-
386
- job = {
387
- "id": jid,
567
+ "args": args or [],
568
+ "label": label or "",
569
+ "status": STATUS_QUEUED,
570
+ "created_at": now,
571
+ "started_at": None,
572
+ "finished_at": None,
573
+ "result_scan_id": None,
574
+ "error": None,
575
+ "log": os.path.join(JOBS_DIR, f"{jid}.log"),
576
+ "pid": None,
577
+ "engagement_id": engagement_id,
578
+ "chainable": False,
579
+ "chained": False,
580
+ "chained_job_ids": [],
581
+ "chain_error": None,
582
+ "metadata": job_metadata,
583
+ "parent_id": parent_id, # Top-level field for easier querying
584
+ "rule_id": rule_id, # Rule that triggered this job (if auto-chained)
585
+ }
586
+ jobs.append(job)
587
+ _write_jobs_unlocked(jobs)
588
+ except TimeoutError:
589
+ # Lock acquisition timed out - fall back to non-locked operation
590
+ _append_worker_log("jobs.json lock timeout in enqueue_job, using fallback")
591
+ jobs = _read_jobs()
592
+ jid = _next_job_id(jobs)
593
+
594
+ # Scope validation fallback
595
+ if not skip_scope_check and engagement_id:
596
+ try:
597
+ from souleyez.security.scope_validator import (
598
+ ScopeValidator,
599
+ ScopeViolationError,
600
+ )
601
+
602
+ validator = ScopeValidator(engagement_id)
603
+ result = validator.validate_target(target)
604
+ enforcement = validator.get_enforcement_mode()
605
+
606
+ if not result.is_in_scope and validator.has_scope_defined():
607
+ if enforcement == "block":
608
+ validator.log_validation(
609
+ target, result, "blocked", job_id=jid
610
+ )
611
+ raise ScopeViolationError(
612
+ f"Target '{target}' is out of scope. {result.reason}"
613
+ )
614
+ elif enforcement == "warn":
615
+ validator.log_validation(
616
+ target, result, "warned", job_id=jid
617
+ )
618
+ if "warnings" not in job_metadata:
619
+ job_metadata["warnings"] = []
620
+ job_metadata["warnings"].append(
621
+ f"SCOPE WARNING: {target} may be out of scope. {result.reason}"
622
+ )
623
+ else:
624
+ validator.log_validation(target, result, "allowed", job_id=jid)
625
+ except ScopeViolationError:
626
+ raise
627
+ except Exception:
628
+ pass
629
+
630
+ job = {
631
+ "id": jid,
632
+ "tool": tool,
633
+ "target": target,
634
+ "args": args or [],
635
+ "label": label or "",
636
+ "status": STATUS_QUEUED,
637
+ "created_at": now,
638
+ "started_at": None,
639
+ "finished_at": None,
640
+ "result_scan_id": None,
641
+ "error": None,
642
+ "log": os.path.join(JOBS_DIR, f"{jid}.log"),
643
+ "pid": None,
644
+ "engagement_id": engagement_id,
645
+ "chainable": False,
646
+ "chained": False,
647
+ "chained_job_ids": [],
648
+ "chain_error": None,
649
+ "metadata": job_metadata,
650
+ "parent_id": parent_id,
651
+ "rule_id": rule_id,
652
+ }
653
+ jobs.append(job)
654
+ _write_jobs(jobs)
655
+
656
+ logger.info(
657
+ "Job enqueued",
658
+ extra={
659
+ "event_type": "job_enqueued",
660
+ "job_id": jid,
388
661
  "tool": tool,
389
662
  "target": target,
390
- "args": args or [],
391
- "label": label or "",
392
- "status": STATUS_QUEUED,
393
- "created_at": now,
394
- "started_at": None,
395
- "finished_at": None,
396
- "result_scan_id": None,
397
- "error": None,
398
- "log": os.path.join(JOBS_DIR, f"{jid}.log"),
399
- "pid": None,
400
663
  "engagement_id": engagement_id,
401
- "chainable": False,
402
- "chained": False,
403
- "chained_job_ids": [],
404
- "chain_error": None,
405
- "metadata": job_metadata,
406
- "parent_id": parent_id, # Top-level field for easier querying
407
- "rule_id": rule_id # Rule that triggered this job (if auto-chained)
408
- }
409
- jobs.append(job)
410
- _write_jobs(jobs)
411
-
412
- logger.info("Job enqueued", extra={
413
- "event_type": "job_enqueued",
414
- "job_id": jid,
415
- "tool": tool,
416
- "target": target,
417
- "engagement_id": engagement_id,
418
- "label": label
419
- })
664
+ "label": label,
665
+ },
666
+ )
420
667
  _append_worker_log(f"enqueued job {jid}: {tool} {target}")
421
668
  return jid
422
669
 
423
670
 
424
671
  def list_jobs(limit: int = 100) -> List[Dict[str, Any]]:
425
672
  jobs = _read_jobs()
426
- # Sort by job ID ascending (oldest/lowest ID first)
427
- return sorted(jobs, key=lambda x: x.get("id", 0), reverse=False)[:limit]
673
+ # Sort by job ID descending (newest first) so limit cuts old jobs, not new ones
674
+ return sorted(jobs, key=lambda x: x.get("id", 0), reverse=True)[:limit]
428
675
 
429
676
 
430
677
  def get_active_jobs() -> List[Dict[str, Any]]:
431
678
  """Get all running/pending/queued jobs without limit.
432
-
679
+
433
680
  Returns jobs sorted with running jobs first, then by ID descending.
434
681
  """
435
682
  jobs = _read_jobs()
436
- active = [j for j in jobs if j.get('status') in ('pending', 'running', 'queued')]
437
-
683
+ active = [j for j in jobs if j.get("status") in ("pending", "running", "queued")]
684
+
438
685
  # Sort: running jobs first, then by ID descending (newest first)
439
686
  def sort_key(j):
440
- status = j.get('status', '')
441
- status_priority = 0 if status == 'running' else 1
442
- job_id = j.get('id', 0)
687
+ status = j.get("status", "")
688
+ status_priority = 0 if status == "running" else 1
689
+ job_id = j.get("id", 0)
443
690
  return (status_priority, -job_id)
444
-
691
+
445
692
  return sorted(active, key=sort_key)
446
693
 
447
694
 
448
695
  def get_all_jobs() -> List[Dict[str, Any]]:
449
696
  """Get ALL jobs without any limit.
450
-
697
+
451
698
  Returns jobs sorted by ID descending (newest first).
452
699
  """
453
700
  jobs = _read_jobs()
@@ -476,7 +723,7 @@ def kill_job(jid: int) -> bool:
476
723
  if not job:
477
724
  return False
478
725
 
479
- status = job.get('status')
726
+ status = job.get("status")
480
727
  now = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
481
728
 
482
729
  # Handle queued jobs - just mark as killed
@@ -493,7 +740,7 @@ def kill_job(jid: int) -> bool:
493
740
 
494
741
  # Handle running jobs - send signal
495
742
  if status == STATUS_RUNNING:
496
- pid = job.get('pid')
743
+ pid = job.get("pid")
497
744
  if not pid:
498
745
  _update_job(jid, status=STATUS_KILLED, finished_at=now)
499
746
  return True
@@ -506,7 +753,8 @@ def kill_job(jid: int) -> bool:
506
753
  pgid = os.getpgid(pid)
507
754
  except ProcessLookupError:
508
755
  # Process already dead
509
- _update_job(jid, status="killed", finished_at=now, pid=None)
756
+ _update_job(jid, status=STATUS_KILLED, finished_at=now, pid=None)
757
+ _append_worker_log(f"job {jid}: process already dead, marked as killed")
510
758
  return True
511
759
 
512
760
  # Kill entire process group (parent + all children)
@@ -515,10 +763,15 @@ def kill_job(jid: int) -> bool:
515
763
  _append_worker_log(f"job {jid}: sent SIGTERM to process group {pgid}")
516
764
  except ProcessLookupError:
517
765
  # Process group already dead
518
- _update_job(jid, status="killed", finished_at=now, pid=None)
766
+ _update_job(jid, status=STATUS_KILLED, finished_at=now, pid=None)
767
+ _append_worker_log(
768
+ f"job {jid}: process group already dead, marked as killed"
769
+ )
519
770
  return True
520
771
  except PermissionError:
521
- _append_worker_log(f"job {jid}: permission denied to kill process group {pgid}")
772
+ _append_worker_log(
773
+ f"job {jid}: permission denied to kill process group {pgid}"
774
+ )
522
775
  return False
523
776
 
524
777
  # Wait briefly for graceful termination
@@ -533,11 +786,13 @@ def kill_job(jid: int) -> bool:
533
786
  pass # Already dead, good
534
787
 
535
788
  # Update job status
536
- _update_job(jid, status="killed", finished_at=now, pid=None)
789
+ _update_job(jid, status=STATUS_KILLED, finished_at=now, pid=None)
790
+ _append_worker_log(f"job {jid}: killed successfully")
537
791
  return True
538
792
  except ProcessLookupError:
539
793
  # Process already dead
540
- _update_job(jid, status="killed", finished_at=now, pid=None)
794
+ _update_job(jid, status=STATUS_KILLED, finished_at=now, pid=None)
795
+ _append_worker_log(f"job {jid}: process already dead, marked as killed")
541
796
  return True
542
797
  except PermissionError:
543
798
  _append_worker_log(f"job {jid}: permission denied to kill PID {pid}")
@@ -547,6 +802,7 @@ def kill_job(jid: int) -> bool:
547
802
  return False
548
803
 
549
804
  # Job is in some other state (done, killed, etc.)
805
+ _append_worker_log(f"job {jid}: cannot kill - status is '{status}'")
550
806
  return False
551
807
 
552
808
 
@@ -554,32 +810,63 @@ def delete_job(jid: int) -> bool:
554
810
  """
555
811
  Delete a job from the queue (completed jobs only).
556
812
 
813
+ Uses atomic read-modify-write with cross-process file locking.
814
+
557
815
  Args:
558
816
  jid: Job ID to delete
559
817
 
560
818
  Returns:
561
819
  True if job was deleted, False if not found or still running
562
820
  """
563
- job = get_job(jid)
564
- if not job:
565
- return False
821
+ with _lock: # Thread safety within this process
822
+ try:
823
+ with _jobs_lock(): # Cross-process safety
824
+ jobs = _read_jobs_unlocked()
825
+ job = None
826
+ for j in jobs:
827
+ if j.get("id") == jid:
828
+ job = j
829
+ break
566
830
 
567
- # Don't delete running or pending jobs
568
- if job.get('status') in ('running', 'pending'):
569
- return False
831
+ if not job:
832
+ return False
570
833
 
571
- with _lock:
572
- jobs = _read_jobs()
573
- jobs = [j for j in jobs if j.get("id") != jid]
574
- _write_jobs(jobs)
834
+ # Don't delete running or pending jobs
835
+ if job.get("status") in ("running", "pending"):
836
+ return False
575
837
 
576
- return True
838
+ jobs = [j for j in jobs if j.get("id") != jid]
839
+ _write_jobs_unlocked(jobs)
840
+ return True
841
+ except TimeoutError:
842
+ # Fall back to non-locked operation
843
+ _append_worker_log(
844
+ f"jobs.json lock timeout in delete_job for {jid}, using fallback"
845
+ )
846
+ jobs = _read_jobs()
847
+ job = None
848
+ for j in jobs:
849
+ if j.get("id") == jid:
850
+ job = j
851
+ break
852
+
853
+ if not job:
854
+ return False
855
+
856
+ if job.get("status") in ("running", "pending"):
857
+ return False
858
+
859
+ jobs = [j for j in jobs if j.get("id") != jid]
860
+ _write_jobs(jobs)
861
+ return True
577
862
 
578
863
 
579
864
  def purge_jobs(status_filter: List[str] = None, engagement_id: int = None) -> int:
580
865
  """
581
866
  Purge multiple jobs at once based on filters.
582
867
 
868
+ Uses atomic read-modify-write with cross-process file locking.
869
+
583
870
  Args:
584
871
  status_filter: List of statuses to purge (e.g., ['done', 'error', 'killed'])
585
872
  If None, purges all non-running jobs
@@ -589,36 +876,46 @@ def purge_jobs(status_filter: List[str] = None, engagement_id: int = None) -> in
589
876
  Number of jobs purged
590
877
  """
591
878
  if status_filter is None:
592
- status_filter = ['done', 'error', 'killed']
593
-
594
- with _lock:
595
- jobs = _read_jobs()
596
- original_count = len(jobs)
879
+ status_filter = ["done", "error", "killed"]
597
880
 
598
- # Filter out jobs to keep
881
+ def _filter_jobs(jobs):
882
+ """Filter out jobs to keep based on criteria."""
599
883
  kept_jobs = []
600
884
  for j in jobs:
601
885
  # Keep running/pending jobs always
602
- if j.get('status') in ('running', 'pending'):
886
+ if j.get("status") in ("running", "pending"):
603
887
  kept_jobs.append(j)
604
888
  continue
605
889
 
606
890
  # Keep if status doesn't match filter
607
- if j.get('status') not in status_filter:
891
+ if j.get("status") not in status_filter:
608
892
  kept_jobs.append(j)
609
893
  continue
610
894
 
611
895
  # Keep if engagement_id specified and doesn't match
612
- if engagement_id is not None and j.get('engagement_id') != engagement_id:
896
+ if engagement_id is not None and j.get("engagement_id") != engagement_id:
613
897
  kept_jobs.append(j)
614
898
  continue
615
899
 
616
900
  # Otherwise, purge this job (don't add to kept_jobs)
901
+ return kept_jobs
617
902
 
618
- _write_jobs(kept_jobs)
619
- purged_count = original_count - len(kept_jobs)
620
-
621
- return purged_count
903
+ with _lock: # Thread safety within this process
904
+ try:
905
+ with _jobs_lock(): # Cross-process safety
906
+ jobs = _read_jobs_unlocked()
907
+ original_count = len(jobs)
908
+ kept_jobs = _filter_jobs(jobs)
909
+ _write_jobs_unlocked(kept_jobs)
910
+ return original_count - len(kept_jobs)
911
+ except TimeoutError:
912
+ # Fall back to non-locked operation
913
+ _append_worker_log("jobs.json lock timeout in purge_jobs, using fallback")
914
+ jobs = _read_jobs()
915
+ original_count = len(jobs)
916
+ kept_jobs = _filter_jobs(jobs)
917
+ _write_jobs(kept_jobs)
918
+ return original_count - len(kept_jobs)
622
919
 
623
920
 
624
921
  def purge_all_jobs() -> int:
@@ -629,12 +926,15 @@ def purge_all_jobs() -> int:
629
926
  Returns:
630
927
  Number of jobs purged
631
928
  """
632
- return purge_jobs(status_filter=['done', 'error', 'killed'])
929
+ return purge_jobs(status_filter=["done", "error", "killed"])
633
930
 
634
931
 
635
932
  def _update_job(jid: int, respect_killed: bool = True, **fields):
636
933
  """
637
- Update job fields atomically.
934
+ Update job fields atomically with cross-process locking.
935
+
936
+ Uses both threading lock (for same-process safety) and file lock
937
+ (for cross-process safety) to ensure atomic read-modify-write.
638
938
 
639
939
  Args:
640
940
  jid: Job ID to update
@@ -642,29 +942,63 @@ def _update_job(jid: int, respect_killed: bool = True, **fields):
642
942
  This prevents race condition where job is killed while completing.
643
943
  **fields: Fields to update
644
944
  """
645
- with _lock:
646
- jobs = _read_jobs()
647
- changed = False
648
- for j in jobs:
649
- if j.get("id") == jid:
650
- # Race condition protection: don't change status of killed jobs
651
- if respect_killed and j.get("status") == STATUS_KILLED and "status" in fields:
652
- # Job was killed - don't overwrite status, but allow other updates
653
- fields_copy = dict(fields)
654
- del fields_copy["status"]
655
- if fields_copy:
656
- j.update(fields_copy)
657
- changed = True
658
- logger.debug("Skipped status update for killed job", extra={
659
- "job_id": jid,
660
- "attempted_status": fields.get("status")
661
- })
662
- else:
945
+ with _lock: # Thread safety within this process
946
+ try:
947
+ with _jobs_lock(): # Cross-process safety
948
+ # Read directly without going through _read_jobs (we already have lock)
949
+ _ensure_dirs()
950
+ jobs = []
951
+ if os.path.exists(JOBS_FILE):
952
+ try:
953
+ with open(JOBS_FILE, "r", encoding="utf-8") as fh:
954
+ jobs = json.load(fh)
955
+ except Exception:
956
+ jobs = []
957
+
958
+ changed = False
959
+ for j in jobs:
960
+ if j.get("id") == jid:
961
+ # Race condition protection: don't change status of killed jobs
962
+ if (
963
+ respect_killed
964
+ and j.get("status") == STATUS_KILLED
965
+ and "status" in fields
966
+ ):
967
+ # Job was killed - don't overwrite status, but allow other updates
968
+ fields_copy = dict(fields)
969
+ del fields_copy["status"]
970
+ if fields_copy:
971
+ j.update(fields_copy)
972
+ changed = True
973
+ logger.debug(
974
+ "Skipped status update for killed job",
975
+ extra={
976
+ "job_id": jid,
977
+ "attempted_status": fields.get("status"),
978
+ },
979
+ )
980
+ else:
981
+ j.update(fields)
982
+ changed = True
983
+ break
984
+
985
+ if changed:
986
+ # Write directly without going through _write_jobs (we already have lock)
987
+ _write_jobs_unlocked(jobs)
988
+ except TimeoutError:
989
+ # Fall back to non-locked operation (better than failing)
990
+ _append_worker_log(
991
+ f"jobs.json lock timeout updating job {jid}, using fallback"
992
+ )
993
+ jobs = _read_jobs()
994
+ changed = False
995
+ for j in jobs:
996
+ if j.get("id") == jid:
663
997
  j.update(fields)
664
998
  changed = True
665
- break
666
- if changed:
667
- _write_jobs(jobs)
999
+ break
1000
+ if changed:
1001
+ _write_jobs(jobs)
668
1002
 
669
1003
 
670
1004
  def _process_pending_chains():
@@ -685,83 +1019,139 @@ def _process_pending_chains():
685
1019
  try:
686
1020
  jobs = _read_jobs()
687
1021
 
1022
+ # Cleanup: Mark jobs stuck in "chaining in progress" for too long (> 5 min) as failed
1023
+ CHAIN_TIMEOUT_SECONDS = 300 # 5 minutes
1024
+ now = datetime.now(timezone.utc)
1025
+ for j in jobs:
1026
+ chaining_started = j.get("chaining_started_at")
1027
+ if chaining_started and not j.get("chained", False):
1028
+ try:
1029
+ started_at = datetime.fromisoformat(
1030
+ chaining_started.replace("Z", "+00:00")
1031
+ )
1032
+ if (now - started_at).total_seconds() > CHAIN_TIMEOUT_SECONDS:
1033
+ jid = j.get("id")
1034
+ _append_worker_log(
1035
+ f"job {jid}: chaining timed out after {CHAIN_TIMEOUT_SECONDS}s, marking as failed"
1036
+ )
1037
+ _update_job(
1038
+ jid,
1039
+ chained=True,
1040
+ chain_error="Chaining timed out",
1041
+ chaining_started_at=None,
1042
+ )
1043
+ except Exception:
1044
+ pass # Ignore parse errors
1045
+
688
1046
  # Find jobs ready for chaining
689
1047
  # Include jobs with chainable statuses: done, no_results, warning
1048
+ # Skip jobs that are currently being chained (chaining_started_at is set)
690
1049
  chainable_jobs = [
691
- j for j in jobs
692
- if j.get('chainable', False) == True
693
- and j.get('chained', False) == False
694
- and is_chainable(j.get('status', ''))
1050
+ j
1051
+ for j in jobs
1052
+ if j.get("chainable", False) == True
1053
+ and j.get("chained", False) == False
1054
+ and is_chainable(j.get("status", ""))
1055
+ and not j.get("chaining_started_at") # Skip if already being processed
695
1056
  ]
696
1057
 
697
1058
  if not chainable_jobs:
698
1059
  return 0 # Nothing to process
699
1060
 
700
1061
  # Sort by created_at (process oldest first - FIFO)
701
- chainable_jobs.sort(key=lambda x: x.get('created_at', ''))
1062
+ chainable_jobs.sort(key=lambda x: x.get("created_at", ""))
702
1063
  job_to_chain = chainable_jobs[0]
703
1064
 
704
- jid = job_to_chain['id']
705
- tool = job_to_chain.get('tool', 'unknown')
1065
+ jid = job_to_chain["id"]
1066
+ tool = job_to_chain.get("tool", "unknown")
706
1067
 
707
1068
  _append_worker_log(f"processing chains for job {jid} ({tool})")
708
- logger.info("Processing chainable job", extra={
709
- "job_id": jid,
710
- "tool": tool,
711
- "queue_depth": len(chainable_jobs)
712
- })
1069
+ logger.info(
1070
+ "Processing chainable job",
1071
+ extra={"job_id": jid, "tool": tool, "queue_depth": len(chainable_jobs)},
1072
+ )
1073
+
1074
+ # Mark job as chaining in progress BEFORE starting (prevents retry loop if auto_chain hangs)
1075
+ chaining_start = datetime.now(timezone.utc).isoformat()
1076
+ _update_job(jid, chaining_started_at=chaining_start)
713
1077
 
714
1078
  try:
715
1079
  from souleyez.core.tool_chaining import ToolChaining
1080
+
716
1081
  chaining = ToolChaining()
717
1082
 
718
1083
  if not chaining.is_enabled():
719
1084
  # Chaining was disabled after job marked as chainable
720
- _update_job(jid, chained=True)
1085
+ _update_job(jid, chained=True, chaining_started_at=None)
721
1086
  _append_worker_log(f"job {jid}: chaining now disabled, skipping")
722
1087
  return 1
723
1088
 
724
1089
  # Get parse results from job
725
- parse_result = job_to_chain.get('parse_result', {})
1090
+ parse_result = job_to_chain.get("parse_result", {})
726
1091
 
727
1092
  if not parse_result:
728
1093
  # No parse results - this shouldn't happen if job was properly marked chainable
729
1094
  # Log warning and store reason for debugging
730
- logger.warning("Job marked chainable but has no parse_result", extra={
731
- "job_id": jid,
732
- "tool": tool,
733
- "status": job_to_chain.get('status')
734
- })
735
- _append_worker_log(f"job {jid}: WARNING - marked chainable but parse_result is empty/missing")
736
- _update_job(jid, chained=True, chain_skip_reason="parse_result missing")
1095
+ logger.warning(
1096
+ "Job marked chainable but has no parse_result",
1097
+ extra={
1098
+ "job_id": jid,
1099
+ "tool": tool,
1100
+ "status": job_to_chain.get("status"),
1101
+ },
1102
+ )
1103
+ _append_worker_log(
1104
+ f"job {jid}: WARNING - marked chainable but parse_result is empty/missing"
1105
+ )
1106
+ _update_job(
1107
+ jid,
1108
+ chained=True,
1109
+ chain_skip_reason="parse_result missing",
1110
+ chaining_started_at=None,
1111
+ )
737
1112
  return 1
738
1113
 
739
- if 'error' in parse_result:
1114
+ if "error" in parse_result:
740
1115
  # Parse had an error - log and skip
741
- logger.warning("Job has parse error, skipping chaining", extra={
742
- "job_id": jid,
743
- "tool": tool,
744
- "parse_error": parse_result.get('error')
745
- })
746
- _append_worker_log(f"job {jid}: parse error '{parse_result.get('error')}', skipping chain")
747
- _update_job(jid, chained=True, chain_skip_reason=f"parse_error: {parse_result.get('error')}")
1116
+ logger.warning(
1117
+ "Job has parse error, skipping chaining",
1118
+ extra={
1119
+ "job_id": jid,
1120
+ "tool": tool,
1121
+ "parse_error": parse_result.get("error"),
1122
+ },
1123
+ )
1124
+ _append_worker_log(
1125
+ f"job {jid}: parse error '{parse_result.get('error')}', skipping chain"
1126
+ )
1127
+ _update_job(
1128
+ jid,
1129
+ chained=True,
1130
+ chain_skip_reason=f"parse_error: {parse_result.get('error')}",
1131
+ chaining_started_at=None,
1132
+ )
748
1133
  return 1
749
1134
 
750
1135
  # Process auto-chaining
751
1136
  chained_job_ids = chaining.auto_chain(job_to_chain, parse_result)
752
1137
 
753
- # Update job with chaining results
754
- _update_job(jid,
1138
+ # Update job with chaining results (clear chaining_started_at)
1139
+ _update_job(
1140
+ jid,
755
1141
  chained=True,
756
- chained_job_ids=chained_job_ids or []
1142
+ chained_job_ids=chained_job_ids or [],
1143
+ chaining_started_at=None,
757
1144
  )
758
1145
 
759
1146
  if chained_job_ids:
760
- logger.info("Auto-chaining completed", extra={
761
- "job_id": jid,
762
- "chained_jobs": chained_job_ids,
763
- "count": len(chained_job_ids)
764
- })
1147
+ logger.info(
1148
+ "Auto-chaining completed",
1149
+ extra={
1150
+ "job_id": jid,
1151
+ "chained_jobs": chained_job_ids,
1152
+ "count": len(chained_job_ids),
1153
+ },
1154
+ )
765
1155
  _append_worker_log(
766
1156
  f"job {jid}: created {len(chained_job_ids)} chained job(s): {chained_job_ids}"
767
1157
  )
@@ -773,29 +1163,33 @@ def _process_pending_chains():
773
1163
  except Exception as chain_err:
774
1164
  # Chaining failed - mark as chained with error to prevent retry loops
775
1165
  error_msg = str(chain_err)
776
- logger.error("Auto-chaining failed", extra={
777
- "job_id": jid,
778
- "error": error_msg,
779
- "traceback": traceback.format_exc()
780
- })
1166
+ logger.error(
1167
+ "Auto-chaining failed",
1168
+ extra={
1169
+ "job_id": jid,
1170
+ "error": error_msg,
1171
+ "traceback": traceback.format_exc(),
1172
+ },
1173
+ )
781
1174
  _append_worker_log(f"job {jid} chain error: {error_msg}")
782
- _update_job(jid,
783
- chained=True,
784
- chain_error=error_msg
1175
+ _update_job(
1176
+ jid, chained=True, chain_error=error_msg, chaining_started_at=None
785
1177
  )
786
1178
  return 1 # Still count as processed (with error)
787
1179
 
788
1180
  except Exception as e:
789
1181
  # Unexpected error in chain processor itself
790
- logger.error("Chain processor error", extra={
791
- "error": str(e),
792
- "traceback": traceback.format_exc()
793
- })
1182
+ logger.error(
1183
+ "Chain processor error",
1184
+ extra={"error": str(e), "traceback": traceback.format_exc()},
1185
+ )
794
1186
  _append_worker_log(f"chain processor error: {e}")
795
1187
  return 0
796
1188
 
797
1189
 
798
- def _try_run_plugin(tool: str, target: str, args: List[str], label: str, log_path: str, jid: int = None) -> tuple:
1190
+ def _try_run_plugin(
1191
+ tool: str, target: str, args: List[str], label: str, log_path: str, jid: int = None
1192
+ ) -> tuple:
799
1193
  try:
800
1194
  from .loader import discover_plugins
801
1195
 
@@ -827,21 +1221,29 @@ def _try_run_plugin(tool: str, target: str, args: List[str], label: str, log_pat
827
1221
  fh.write(f"Target: {target}\n")
828
1222
  fh.write(f"Args: {args}\n")
829
1223
  fh.write(f"Label: {label}\n")
830
- fh.write(f"Started: {time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime())}\n\n")
831
-
1224
+ fh.write(
1225
+ f"Started: {time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime())}\n\n"
1226
+ )
1227
+
832
1228
  # Build command specification
833
- cmd_spec = build_command_method(target, args or [], label or "", log_path)
834
-
1229
+ cmd_spec = build_command_method(
1230
+ target, args or [], label or "", log_path
1231
+ )
1232
+
835
1233
  if cmd_spec is None:
836
1234
  # build_command returned None - check if this is a deliberate abort
837
1235
  # (e.g., gobuster detected host redirect and aborted to avoid wasted scan)
838
1236
  if os.path.exists(log_path):
839
- with open(log_path, 'r', encoding='utf-8', errors='replace') as fh:
1237
+ with open(
1238
+ log_path, "r", encoding="utf-8", errors="replace"
1239
+ ) as fh:
840
1240
  log_content = fh.read()
841
- if 'HOST_REDIRECT_TARGET:' in log_content:
1241
+ if "HOST_REDIRECT_TARGET:" in log_content:
842
1242
  # Plugin aborted due to host redirect - don't fall through to run()
843
1243
  # Return success (0) so parser can set WARNING status and trigger retry
844
- _append_worker_log(f"job {jid}: gobuster aborted due to host redirect")
1244
+ _append_worker_log(
1245
+ f"job {jid}: gobuster aborted due to host redirect"
1246
+ )
845
1247
  return (True, 0)
846
1248
 
847
1249
  # Otherwise check if plugin has run() method
@@ -854,7 +1256,9 @@ def _try_run_plugin(tool: str, target: str, args: List[str], label: str, log_pat
854
1256
 
855
1257
  try:
856
1258
  if "log_path" in params:
857
- rc = run_method(target, args or [], label or "", log_path)
1259
+ rc = run_method(
1260
+ target, args or [], label or "", log_path
1261
+ )
858
1262
  elif "label" in params:
859
1263
  rc = run_method(target, args or [], label or "")
860
1264
  elif "args" in params:
@@ -863,30 +1267,38 @@ def _try_run_plugin(tool: str, target: str, args: List[str], label: str, log_pat
863
1267
  rc = run_method(target)
864
1268
  return (True, rc if isinstance(rc, int) else 0)
865
1269
  except Exception as e:
866
- with open(log_path, "a", encoding="utf-8", errors="replace") as fh:
1270
+ with open(
1271
+ log_path, "a", encoding="utf-8", errors="replace"
1272
+ ) as fh:
867
1273
  fh.write(f"\n=== PLUGIN RUN ERROR ===\n")
868
1274
  fh.write(f"{type(e).__name__}: {e}\n")
869
1275
  fh.write(f"\n{traceback.format_exc()}\n")
870
1276
  return (True, 1)
871
1277
  else:
872
1278
  # No run() method either - actual validation failure
873
- with open(log_path, "a", encoding="utf-8", errors="replace") as fh:
874
- fh.write("ERROR: Plugin validation failed (build_command returned None)\n")
1279
+ with open(
1280
+ log_path, "a", encoding="utf-8", errors="replace"
1281
+ ) as fh:
1282
+ fh.write(
1283
+ "ERROR: Plugin validation failed (build_command returned None)\n"
1284
+ )
875
1285
  return (True, 1)
876
-
1286
+
877
1287
  # Execute using new subprocess handler with PID tracking
878
- rc = _run_subprocess_with_spec(cmd_spec, log_path, jid=jid, plugin=plugin)
1288
+ rc = _run_subprocess_with_spec(
1289
+ cmd_spec, log_path, jid=jid, plugin=plugin
1290
+ )
879
1291
 
880
1292
  # Completion message already written by _run_subprocess_with_spec
881
1293
  return (True, rc)
882
-
1294
+
883
1295
  except Exception as e:
884
1296
  with open(log_path, "a", encoding="utf-8", errors="replace") as fh:
885
1297
  fh.write("\n=== PLUGIN ERROR ===\n")
886
1298
  fh.write(f"{type(e).__name__}: {e}\n")
887
1299
  fh.write(f"\n{traceback.format_exc()}\n")
888
1300
  return (True, 1)
889
-
1301
+
890
1302
  # FALLBACK: Use old run() method for backward compatibility
891
1303
  run_method = getattr(plugin, "run", None)
892
1304
  if not callable(run_method):
@@ -900,24 +1312,36 @@ def _try_run_plugin(tool: str, target: str, args: List[str], label: str, log_pat
900
1312
  fh.write(f"Target: {target}\n")
901
1313
  fh.write(f"Args: {args}\n")
902
1314
  fh.write(f"Label: {label}\n")
903
- fh.write(f"Started: {time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime())}\n\n")
1315
+ fh.write(
1316
+ f"Started: {time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime())}\n\n"
1317
+ )
904
1318
 
905
1319
  try:
906
- if 'log_path' in params or len(params) >= 4:
1320
+ if "log_path" in params or len(params) >= 4:
907
1321
  rc = run_method(target, args or [], label or "", log_path)
908
1322
  else:
909
1323
  result = run_method(target, args or [], label or "")
910
1324
 
911
1325
  if isinstance(result, tuple) and len(result) >= 2:
912
1326
  rc, old_logpath = result[0], result[1]
913
- if old_logpath and os.path.exists(old_logpath) and old_logpath != log_path:
1327
+ if (
1328
+ old_logpath
1329
+ and os.path.exists(old_logpath)
1330
+ and old_logpath != log_path
1331
+ ):
914
1332
  try:
915
- with open(old_logpath, "r", encoding="utf-8", errors="replace") as src:
916
- with open(log_path, "a", encoding="utf-8", errors="replace") as dst:
1333
+ with open(
1334
+ old_logpath, "r", encoding="utf-8", errors="replace"
1335
+ ) as src:
1336
+ with open(
1337
+ log_path, "a", encoding="utf-8", errors="replace"
1338
+ ) as dst:
917
1339
  dst.write("\n=== Plugin Output ===\n")
918
1340
  dst.write(src.read())
919
1341
  except Exception as e:
920
- with open(log_path, "a", encoding="utf-8", errors="replace") as fh:
1342
+ with open(
1343
+ log_path, "a", encoding="utf-8", errors="replace"
1344
+ ) as fh:
921
1345
  fh.write(f"\nWarning: Could not copy old log: {e}\n")
922
1346
  elif isinstance(result, int):
923
1347
  rc = result
@@ -941,7 +1365,9 @@ def _try_run_plugin(tool: str, target: str, args: List[str], label: str, log_pat
941
1365
  return (False, 0)
942
1366
 
943
1367
 
944
- def _run_rpc_exploit(cmd_spec: Dict[str, Any], log_path: str, jid: int = None, plugin=None) -> int:
1368
+ def _run_rpc_exploit(
1369
+ cmd_spec: Dict[str, Any], log_path: str, jid: int = None, plugin=None
1370
+ ) -> int:
945
1371
  """
946
1372
  Execute MSF exploit via RPC mode (Pro feature).
947
1373
 
@@ -964,10 +1390,10 @@ def _run_rpc_exploit(cmd_spec: Dict[str, Any], log_path: str, jid: int = None, p
964
1390
  Returns:
965
1391
  Exit code (0 = success with session, non-zero = failure)
966
1392
  """
967
- exploit_path = cmd_spec.get('exploit_path')
968
- target = cmd_spec.get('target')
969
- options = cmd_spec.get('options', {})
970
- payload = cmd_spec.get('payload')
1393
+ exploit_path = cmd_spec.get("exploit_path")
1394
+ target = cmd_spec.get("target")
1395
+ options = cmd_spec.get("options", {})
1396
+ payload = cmd_spec.get("payload")
971
1397
 
972
1398
  _append_worker_log(f"job {jid}: RPC mode exploit - {exploit_path}")
973
1399
 
@@ -975,6 +1401,7 @@ def _run_rpc_exploit(cmd_spec: Dict[str, Any], log_path: str, jid: int = None, p
975
1401
  if plugin is None:
976
1402
  try:
977
1403
  from souleyez.plugins.msf_exploit import MsfExploitPlugin
1404
+
978
1405
  plugin = MsfExploitPlugin()
979
1406
  except Exception as e:
980
1407
  with open(log_path, "a", encoding="utf-8", errors="replace") as fh:
@@ -987,12 +1414,12 @@ def _run_rpc_exploit(cmd_spec: Dict[str, Any], log_path: str, jid: int = None, p
987
1414
  target=target,
988
1415
  options=options,
989
1416
  log_path=log_path,
990
- payload=payload
1417
+ payload=payload,
991
1418
  )
992
1419
 
993
- if result.get('success'):
994
- session_id = result.get('session_id')
995
- session_info = result.get('session_info', {})
1420
+ if result.get("success"):
1421
+ session_id = result.get("session_id")
1422
+ session_info = result.get("session_info", {})
996
1423
 
997
1424
  # Store session in database
998
1425
  try:
@@ -1001,40 +1428,46 @@ def _run_rpc_exploit(cmd_spec: Dict[str, Any], log_path: str, jid: int = None, p
1001
1428
  _append_worker_log(f"job {jid}: failed to store session: {e}")
1002
1429
 
1003
1430
  # Update job with session info
1004
- session_type = session_info.get('type', 'shell')
1431
+ session_type = session_info.get("type", "shell")
1005
1432
  _update_job(
1006
1433
  jid,
1007
1434
  exploitation_detected=True,
1008
- session_info=f"Session {session_id} ({session_type})"
1435
+ session_info=f"Session {session_id} ({session_type})",
1009
1436
  )
1010
1437
 
1011
1438
  return 0
1012
- elif result.get('no_session'):
1439
+ elif result.get("no_session"):
1013
1440
  # Exploit ran but no session opened - this is "no results", not an error
1014
1441
  # Return 1 but let parser set status to no_results
1015
- reason = result.get('reason', 'No session opened')
1442
+ reason = result.get("reason", "No session opened")
1016
1443
  _append_worker_log(f"job {jid}: exploit completed - {reason}")
1017
1444
  return 1
1018
1445
  else:
1019
1446
  # True error (connection failed, RPC error, etc.)
1020
- error = result.get('error', 'Unknown error')
1447
+ error = result.get("error", "Unknown error")
1021
1448
  _append_worker_log(f"job {jid}: RPC exploit failed - {error}")
1022
1449
  return 1
1023
1450
 
1024
1451
 
1025
- def _store_msf_session(jid: int, target: str, exploit_path: str, session_id: str, session_info: Dict[str, Any]):
1452
+ def _store_msf_session(
1453
+ jid: int,
1454
+ target: str,
1455
+ exploit_path: str,
1456
+ session_id: str,
1457
+ session_info: Dict[str, Any],
1458
+ ):
1026
1459
  """Store MSF session in database."""
1027
1460
  try:
1028
- from souleyez.storage.msf_sessions import add_msf_session
1029
1461
  from souleyez.storage.database import get_db
1030
1462
  from souleyez.storage.hosts import HostManager
1463
+ from souleyez.storage.msf_sessions import add_msf_session
1031
1464
 
1032
1465
  # Get job info for engagement_id
1033
1466
  job = get_job(jid)
1034
1467
  if not job:
1035
1468
  return
1036
1469
 
1037
- engagement_id = job.get('engagement_id')
1470
+ engagement_id = job.get("engagement_id")
1038
1471
  if not engagement_id:
1039
1472
  return
1040
1473
 
@@ -1044,7 +1477,7 @@ def _store_msf_session(jid: int, target: str, exploit_path: str, session_id: str
1044
1477
 
1045
1478
  hm = HostManager()
1046
1479
  host = hm.get_host_by_ip(engagement_id, target)
1047
- host_id = host['id'] if host else None
1480
+ host_id = host["id"] if host else None
1048
1481
 
1049
1482
  if host_id:
1050
1483
  add_msf_session(
@@ -1052,15 +1485,15 @@ def _store_msf_session(jid: int, target: str, exploit_path: str, session_id: str
1052
1485
  engagement_id=engagement_id,
1053
1486
  host_id=host_id,
1054
1487
  msf_session_id=int(session_id),
1055
- session_type=session_info.get('type'),
1488
+ session_type=session_info.get("type"),
1056
1489
  via_exploit=exploit_path,
1057
- via_payload=session_info.get('via_payload'),
1058
- platform=session_info.get('platform'),
1059
- arch=session_info.get('arch'),
1060
- username=session_info.get('username'),
1061
- port=session_info.get('target_port'),
1062
- tunnel_peer=session_info.get('tunnel_peer'),
1063
- notes=f"Created by job #{jid}"
1490
+ via_payload=session_info.get("via_payload"),
1491
+ platform=session_info.get("platform"),
1492
+ arch=session_info.get("arch"),
1493
+ username=session_info.get("username"),
1494
+ port=session_info.get("target_port"),
1495
+ tunnel_peer=session_info.get("tunnel_peer"),
1496
+ notes=f"Created by job #{jid}",
1064
1497
  )
1065
1498
  conn.commit()
1066
1499
 
@@ -1079,7 +1512,7 @@ def _is_stdbuf_available() -> bool:
1079
1512
  """Check if stdbuf is available for line-buffered output."""
1080
1513
  global _stdbuf_available
1081
1514
  if _stdbuf_available is None:
1082
- _stdbuf_available = shutil.which('stdbuf') is not None
1515
+ _stdbuf_available = shutil.which("stdbuf") is not None
1083
1516
  return _stdbuf_available
1084
1517
 
1085
1518
 
@@ -1102,7 +1535,7 @@ def _wrap_cmd_for_line_buffering(cmd: List[str]) -> List[str]:
1102
1535
 
1103
1536
  if _is_stdbuf_available():
1104
1537
  # stdbuf -oL = line-buffered stdout, -eL = line-buffered stderr
1105
- return ['stdbuf', '-oL', '-eL'] + cmd
1538
+ return ["stdbuf", "-oL", "-eL"] + cmd
1106
1539
 
1107
1540
  return cmd
1108
1541
 
@@ -1115,12 +1548,14 @@ def _get_subprocess_env() -> Dict[str, str]:
1115
1548
  to prevent interactive terminal issues.
1116
1549
  """
1117
1550
  env = os.environ.copy()
1118
- env['TERM'] = 'dumb' # Prevent stty errors from interactive tools
1119
- env['PYTHONUNBUFFERED'] = '1' # Disable Python output buffering
1551
+ env["TERM"] = "dumb" # Prevent stty errors from interactive tools
1552
+ env["PYTHONUNBUFFERED"] = "1" # Disable Python output buffering
1120
1553
  return env
1121
1554
 
1122
1555
 
1123
- def _run_subprocess_with_spec(cmd_spec: Dict[str, Any], log_path: str, jid: int = None, plugin=None) -> int:
1556
+ def _run_subprocess_with_spec(
1557
+ cmd_spec: Dict[str, Any], log_path: str, jid: int = None, plugin=None
1558
+ ) -> int:
1124
1559
  """
1125
1560
  Execute a command specification with proper PID tracking.
1126
1561
 
@@ -1153,19 +1588,19 @@ def _run_subprocess_with_spec(cmd_spec: Dict[str, Any], log_path: str, jid: int
1153
1588
  Exit code (0 = success, non-zero = failure)
1154
1589
  """
1155
1590
  # Check for RPC mode (Pro feature)
1156
- if cmd_spec.get('mode') == 'rpc':
1591
+ if cmd_spec.get("mode") == "rpc":
1157
1592
  return _run_rpc_exploit(cmd_spec, log_path, jid, plugin)
1158
1593
 
1159
- cmd = cmd_spec.get('cmd')
1594
+ cmd = cmd_spec.get("cmd")
1160
1595
  if not cmd:
1161
1596
  with open(log_path, "a", encoding="utf-8", errors="replace") as fh:
1162
1597
  fh.write("ERROR: No command provided in spec\n")
1163
1598
  return 1
1164
1599
 
1165
- timeout = cmd_spec.get('timeout', JOB_TIMEOUT_SECONDS)
1166
- spec_env = cmd_spec.get('env')
1167
- cwd = cmd_spec.get('cwd')
1168
- needs_shell = cmd_spec.get('needs_shell', False)
1600
+ timeout = cmd_spec.get("timeout", JOB_TIMEOUT_SECONDS)
1601
+ spec_env = cmd_spec.get("env")
1602
+ cwd = cmd_spec.get("cwd")
1603
+ needs_shell = cmd_spec.get("needs_shell", False)
1169
1604
 
1170
1605
  _append_worker_log(f"_run_subprocess_with_spec: timeout={timeout}s for job {jid}")
1171
1606
 
@@ -1187,7 +1622,9 @@ def _run_subprocess_with_spec(cmd_spec: Dict[str, Any], log_path: str, jid: int
1187
1622
  fh.write(f"Environment: {spec_env}\n")
1188
1623
  if cwd:
1189
1624
  fh.write(f"Working Dir: {cwd}\n")
1190
- fh.write(f"Started: {time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime())}\n\n")
1625
+ fh.write(
1626
+ f"Started: {time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime())}\n\n"
1627
+ )
1191
1628
  fh.flush()
1192
1629
 
1193
1630
  try:
@@ -1201,9 +1638,9 @@ def _run_subprocess_with_spec(cmd_spec: Dict[str, Any], log_path: str, jid: int
1201
1638
  preexec_fn=os.setsid, # Creates new session
1202
1639
  env=proc_env,
1203
1640
  cwd=cwd,
1204
- shell=needs_shell # nosec B602 - intentional for security tool command execution
1641
+ shell=needs_shell, # nosec B602 - intentional for security tool command execution
1205
1642
  )
1206
-
1643
+
1207
1644
  # Store PID and process start time for stale detection
1208
1645
  if jid is not None:
1209
1646
  proc_start_time = _get_process_start_time(proc.pid)
@@ -1225,19 +1662,31 @@ def _run_subprocess_with_spec(cmd_spec: Dict[str, Any], log_path: str, jid: int
1225
1662
  # For MSF exploits, check if a session was opened before timeout
1226
1663
  # A timeout with an open session is success, not failure
1227
1664
  session_opened = False
1228
- if hasattr(plugin, 'tool') and plugin.tool in ('msf_exploit', 'msf_auxiliary'):
1665
+ if hasattr(plugin, "tool") and plugin.tool in (
1666
+ "msf_exploit",
1667
+ "msf_auxiliary",
1668
+ ):
1229
1669
  try:
1230
1670
  fh.flush()
1231
- with open(log_path, "r", encoding="utf-8", errors="replace") as rf:
1671
+ with open(
1672
+ log_path, "r", encoding="utf-8", errors="replace"
1673
+ ) as rf:
1232
1674
  content = rf.read()
1233
1675
  import re
1234
- session_opened = bool(re.search(r'session \d+ opened', content, re.IGNORECASE))
1676
+
1677
+ session_opened = bool(
1678
+ re.search(r"session \d+ opened", content, re.IGNORECASE)
1679
+ )
1235
1680
  except Exception:
1236
1681
  pass
1237
1682
 
1238
1683
  if session_opened:
1239
- fh.write(f"\n[*] Session opened successfully (timeout expected - session is active)\n")
1240
- fh.write(f"=== Completed: {time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime())} ===\n")
1684
+ fh.write(
1685
+ f"\n[*] Session opened successfully (timeout expected - session is active)\n"
1686
+ )
1687
+ fh.write(
1688
+ f"=== Completed: {time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime())} ===\n"
1689
+ )
1241
1690
  return 0
1242
1691
  else:
1243
1692
  fh.write(f"\nERROR: Command timed out after {timeout} seconds\n")
@@ -1247,7 +1696,7 @@ def _run_subprocess_with_spec(cmd_spec: Dict[str, Any], log_path: str, jid: int
1247
1696
  # Check if job was killed externally during execution
1248
1697
  if jid is not None:
1249
1698
  job = get_job(jid)
1250
- if job and job.get('status') == 'killed':
1699
+ if job and job.get("status") == "killed":
1251
1700
  fh.write(f"\nINFO: Job was killed externally\n")
1252
1701
  # Process may already be dead, but ensure cleanup
1253
1702
  try:
@@ -1267,9 +1716,12 @@ def _run_subprocess_with_spec(cmd_spec: Dict[str, Any], log_path: str, jid: int
1267
1716
  fh.flush()
1268
1717
  return 143 # 128 + 15 (SIGTERM)
1269
1718
 
1270
- fh.write(f"\n=== Completed: {time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime())} ===\n")
1719
+ fh.write(
1720
+ f"\n=== Completed: {time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime())} ===\n"
1721
+ )
1271
1722
  fh.write(f"Exit Code: {proc.returncode}\n")
1272
1723
  fh.flush()
1724
+ os.fsync(fh.fileno()) # Ensure data is on disk before parsing
1273
1725
  return proc.returncode
1274
1726
 
1275
1727
  except FileNotFoundError:
@@ -1282,7 +1734,14 @@ def _run_subprocess_with_spec(cmd_spec: Dict[str, Any], log_path: str, jid: int
1282
1734
  return 1
1283
1735
 
1284
1736
 
1285
- def _run_subprocess(tool: str, target: str, args: List[str], log_path: str, jid: int = None, timeout: int = None) -> int:
1737
+ def _run_subprocess(
1738
+ tool: str,
1739
+ target: str,
1740
+ args: List[str],
1741
+ log_path: str,
1742
+ jid: int = None,
1743
+ timeout: int = None,
1744
+ ) -> int:
1286
1745
  # Use None as default and resolve at runtime to avoid Python's early binding issue
1287
1746
  if timeout is None:
1288
1747
  timeout = JOB_TIMEOUT_SECONDS
@@ -1298,11 +1757,13 @@ def _run_subprocess(tool: str, target: str, args: List[str], log_path: str, jid:
1298
1757
 
1299
1758
  with open(log_path, "a", encoding="utf-8", errors="replace") as fh:
1300
1759
  # Log original command (without stdbuf wrapper for clarity)
1301
- original_cmd = cmd[3:] if cmd[:3] == ['stdbuf', '-oL', '-eL'] else cmd
1760
+ original_cmd = cmd[3:] if cmd[:3] == ["stdbuf", "-oL", "-eL"] else cmd
1302
1761
  fh.write("=== Subprocess Execution ===\n")
1303
1762
  fh.write(f"Command: {' '.join(original_cmd)}\n")
1304
1763
  fh.write(f"Timeout: {timeout} seconds\n")
1305
- fh.write(f"Started: {time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime())}\n\n")
1764
+ fh.write(
1765
+ f"Started: {time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime())}\n\n"
1766
+ )
1306
1767
  fh.flush()
1307
1768
 
1308
1769
  try:
@@ -1317,7 +1778,7 @@ def _run_subprocess(tool: str, target: str, args: List[str], log_path: str, jid:
1317
1778
  stdout=fh,
1318
1779
  stderr=subprocess.STDOUT,
1319
1780
  preexec_fn=os.setsid, # Creates new session
1320
- env=env
1781
+ env=env,
1321
1782
  )
1322
1783
 
1323
1784
  # Store PID and process start time for stale detection
@@ -1344,7 +1805,7 @@ def _run_subprocess(tool: str, target: str, args: List[str], log_path: str, jid:
1344
1805
  # Check if job was killed externally during execution
1345
1806
  if jid is not None:
1346
1807
  job = get_job(jid)
1347
- if job and job.get('status') == 'killed':
1808
+ if job and job.get("status") == "killed":
1348
1809
  fh.write(f"\nINFO: Job was killed externally\n")
1349
1810
  # Process may already be dead, but ensure cleanup
1350
1811
  try:
@@ -1364,9 +1825,12 @@ def _run_subprocess(tool: str, target: str, args: List[str], log_path: str, jid:
1364
1825
  fh.flush()
1365
1826
  return 143 # 128 + 15 (SIGTERM)
1366
1827
 
1367
- fh.write(f"\n=== Completed: {time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime())} ===\n")
1828
+ fh.write(
1829
+ f"\n=== Completed: {time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime())} ===\n"
1830
+ )
1368
1831
  fh.write(f"Exit Code: {proc.returncode}\n")
1369
1832
  fh.flush()
1833
+ os.fsync(fh.fileno()) # Ensure data is on disk before parsing
1370
1834
  return proc.returncode
1371
1835
 
1372
1836
  except FileNotFoundError:
@@ -1407,7 +1871,23 @@ def _is_true_error_exit_code(rc: int, tool: str) -> bool:
1407
1871
  # msf_exploit returns 1 when no session opened (exploit ran but target not vulnerable)
1408
1872
  # nikto returns non-zero when it finds vulnerabilities (not an error!)
1409
1873
  # dnsrecon returns 1 when crt.sh lookup fails (known bug) but still collects valid DNS data
1410
- tools_with_nonzero_success = ['gobuster', 'hydra', 'medusa', 'msf_exploit', 'nikto', 'dnsrecon']
1874
+ # evil_winrm returns non-zero even on successful auth - let handler parse output
1875
+ # bloodhound exits non-zero on connection errors but still collects AD data
1876
+ # hashcat returns 1 when exhausted (no passwords cracked) - not an error, just no results
1877
+ # bash scripts and web_login_test return 1 when credentials fail - not an error, just invalid creds
1878
+ tools_with_nonzero_success = [
1879
+ "gobuster",
1880
+ "hydra",
1881
+ "medusa",
1882
+ "msf_exploit",
1883
+ "nikto",
1884
+ "dnsrecon",
1885
+ "evil_winrm",
1886
+ "bloodhound",
1887
+ "hashcat",
1888
+ "bash",
1889
+ "web_login_test",
1890
+ ]
1411
1891
 
1412
1892
  if tool.lower() in tools_with_nonzero_success:
1413
1893
  # Let parser determine status
@@ -1418,11 +1898,76 @@ def _is_true_error_exit_code(rc: int, tool: str) -> bool:
1418
1898
 
1419
1899
 
1420
1900
  def run_job(jid: int) -> None:
1421
- job = get_job(jid)
1422
- if not job:
1423
- logger.error("Job not found", extra={"job_id": jid})
1424
- _append_worker_log(f"run_job: job {jid} not found")
1425
- return
1901
+ """
1902
+ Run a job by its ID.
1903
+
1904
+ Uses atomic status transition with cross-process file locking to prevent
1905
+ race conditions with kill/delete and other processes (UI).
1906
+ If job is not in QUEUED status when we try to start it, we abort.
1907
+ """
1908
+ # Atomically check status and transition to RUNNING
1909
+ # Both thread lock and file lock ensure no other process/thread can
1910
+ # read/write jobs.json while we're modifying it
1911
+ with _lock: # Thread safety within this process
1912
+ try:
1913
+ with _jobs_lock(): # Cross-process safety
1914
+ jobs = _read_jobs_unlocked()
1915
+ job = None
1916
+ for j in jobs:
1917
+ if j.get("id") == jid:
1918
+ job = j
1919
+ break
1920
+
1921
+ if not job:
1922
+ logger.error("Job not found", extra={"job_id": jid})
1923
+ _append_worker_log(f"run_job: job {jid} not found")
1924
+ return
1925
+
1926
+ current_status = job.get("status")
1927
+ if current_status != STATUS_QUEUED:
1928
+ # Job was killed, deleted, or already running - abort
1929
+ logger.info(
1930
+ "Job not in queued status, skipping",
1931
+ extra={"job_id": jid, "current_status": current_status},
1932
+ )
1933
+ _append_worker_log(
1934
+ f"run_job: job {jid} not queued (status={current_status}), skipping"
1935
+ )
1936
+ return
1937
+
1938
+ # Atomically set to RUNNING while still holding both locks
1939
+ now = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
1940
+ job["status"] = STATUS_RUNNING
1941
+ job["started_at"] = now
1942
+ _write_jobs_unlocked(jobs)
1943
+ except TimeoutError:
1944
+ # Fall back to non-locked operation
1945
+ _append_worker_log(
1946
+ f"jobs.json lock timeout in run_job for {jid}, using fallback"
1947
+ )
1948
+ jobs = _read_jobs()
1949
+ job = None
1950
+ for j in jobs:
1951
+ if j.get("id") == jid:
1952
+ job = j
1953
+ break
1954
+
1955
+ if not job:
1956
+ logger.error("Job not found", extra={"job_id": jid})
1957
+ _append_worker_log(f"run_job: job {jid} not found")
1958
+ return
1959
+
1960
+ current_status = job.get("status")
1961
+ if current_status != STATUS_QUEUED:
1962
+ _append_worker_log(
1963
+ f"run_job: job {jid} not queued (status={current_status}), skipping"
1964
+ )
1965
+ return
1966
+
1967
+ now = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
1968
+ job["status"] = STATUS_RUNNING
1969
+ job["started_at"] = now
1970
+ _write_jobs(jobs)
1426
1971
 
1427
1972
  log_path = job.get("log") or os.path.join(JOBS_DIR, f"{jid}.log")
1428
1973
  _ensure_dirs()
@@ -1430,18 +1975,18 @@ def run_job(jid: int) -> None:
1430
1975
  log_dir = os.path.dirname(log_path)
1431
1976
  if not os.path.exists(log_dir):
1432
1977
  os.makedirs(log_dir, exist_ok=True)
1433
-
1434
- now = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
1435
- _update_job(jid, status=STATUS_RUNNING, started_at=now)
1436
1978
  _append_worker_log(f"job {jid} started: {job.get('tool')} {job.get('target')}")
1437
1979
 
1438
- logger.info("Job started", extra={
1439
- "event_type": "job_started",
1440
- "job_id": jid,
1441
- "tool": job.get("tool"),
1442
- "target": job.get("target"),
1443
- "engagement_id": job.get("engagement_id")
1444
- })
1980
+ logger.info(
1981
+ "Job started",
1982
+ extra={
1983
+ "event_type": "job_started",
1984
+ "job_id": jid,
1985
+ "tool": job.get("tool"),
1986
+ "target": job.get("target"),
1987
+ "engagement_id": job.get("engagement_id"),
1988
+ },
1989
+ )
1445
1990
 
1446
1991
  try:
1447
1992
  tool = job.get("tool", "")
@@ -1452,27 +1997,33 @@ def run_job(jid: int) -> None:
1452
1997
  # Resolve wordlist paths to actual filesystem locations
1453
1998
  try:
1454
1999
  from ..wordlists import resolve_args_wordlists
2000
+
1455
2001
  args = resolve_args_wordlists(args)
1456
2002
  except ImportError:
1457
2003
  pass # Wordlists module not available, use args as-is
1458
2004
 
1459
2005
  start_time = time.perf_counter()
1460
- plugin_executed, rc = _try_run_plugin(tool, target, args, label, log_path, jid=jid)
2006
+ plugin_executed, rc = _try_run_plugin(
2007
+ tool, target, args, label, log_path, jid=jid
2008
+ )
1461
2009
 
1462
2010
  if not plugin_executed:
1463
- _append_worker_log(f"job {jid}: no plugin found for '{tool}', using subprocess")
1464
- logger.info("Using subprocess fallback", extra={
1465
- "job_id": jid,
1466
- "tool": tool
1467
- })
2011
+ _append_worker_log(
2012
+ f"job {jid}: no plugin found for '{tool}', using subprocess"
2013
+ )
2014
+ logger.info(
2015
+ "Using subprocess fallback", extra={"job_id": jid, "tool": tool}
2016
+ )
1468
2017
  rc = _run_subprocess(tool, target, args, log_path, jid=jid)
1469
2018
 
1470
2019
  # Check if job was killed externally while we were running
1471
2020
  job = get_job(jid)
1472
- job_killed = job and job.get('status') == 'killed'
2021
+ job_killed = job and job.get("status") == "killed"
1473
2022
 
1474
2023
  if job_killed:
1475
- _append_worker_log(f"job {jid}: detected external kill signal, skipping post-processing")
2024
+ _append_worker_log(
2025
+ f"job {jid}: detected external kill signal, skipping post-processing"
2026
+ )
1476
2027
  logger.info("Job was killed externally", extra={"job_id": jid})
1477
2028
 
1478
2029
  # ALWAYS update status, finished_at, and pid - even if job was killed
@@ -1493,22 +2044,80 @@ def run_job(jid: int) -> None:
1493
2044
 
1494
2045
  _update_job(jid, status=status, finished_at=now, pid=None)
1495
2046
 
1496
- logger.info("Job completed", extra={
1497
- "event_type": "job_completed",
1498
- "job_id": jid,
1499
- "status": status,
1500
- "exit_code": rc,
1501
- "duration_ms": round(duration_ms, 2)
1502
- })
2047
+ logger.info(
2048
+ "Job completed",
2049
+ extra={
2050
+ "event_type": "job_completed",
2051
+ "job_id": jid,
2052
+ "status": status,
2053
+ "exit_code": rc,
2054
+ "duration_ms": round(duration_ms, 2),
2055
+ },
2056
+ )
1503
2057
 
1504
2058
  # Only do post-processing if job was not killed externally
1505
2059
  if job_killed:
1506
2060
  _append_worker_log(f"job {jid} finished: status={status} rc={rc}")
1507
2061
  return
1508
2062
 
2063
+ # Check for transient errors and auto-retry
2064
+ job = get_job(jid)
2065
+ retry_count = job.get("metadata", {}).get("retry_count", 0)
2066
+ if retry_count < MAX_RETRIES:
2067
+ # Read log to check for transient errors
2068
+ # Note: Check even when rc==0 because tools like nxc may exit 0 but log errors
2069
+ log_path = job.get("log", "")
2070
+ if log_path and os.path.exists(log_path):
2071
+ try:
2072
+ with open(log_path, "r", encoding="utf-8", errors="replace") as f:
2073
+ log_content = f.read()
2074
+ if _is_transient_error(log_content):
2075
+ # Transient error detected - auto-retry
2076
+ logger.info(
2077
+ "Transient error detected, auto-retrying job",
2078
+ extra={"job_id": jid, "retry_count": retry_count + 1},
2079
+ )
2080
+ _append_worker_log(
2081
+ f"job {jid}: transient error detected, auto-retry {retry_count + 1}/{MAX_RETRIES}"
2082
+ )
2083
+
2084
+ # Build new job metadata with incremented retry count
2085
+ new_metadata = job.get("metadata", {}).copy()
2086
+ new_metadata["retry_count"] = retry_count + 1
2087
+ new_metadata["retried_from"] = jid
2088
+
2089
+ # Enqueue retry job
2090
+ retry_jid = enqueue_job(
2091
+ tool=job.get("tool"),
2092
+ target=job.get("target"),
2093
+ args=job.get("args", []),
2094
+ label=job.get("label", ""),
2095
+ engagement_id=job.get("engagement_id"),
2096
+ metadata=new_metadata,
2097
+ parent_id=job.get("metadata", {}).get("parent_id"),
2098
+ reason=f"Auto-retry {retry_count + 1}/{MAX_RETRIES} (transient error)",
2099
+ rule_id=job.get("metadata", {}).get("rule_id"),
2100
+ skip_scope_check=True, # Already validated on first run
2101
+ )
2102
+ _append_worker_log(
2103
+ f"job {jid}: retry enqueued as job #{retry_jid}"
2104
+ )
2105
+
2106
+ # Mark original job as retried (not error)
2107
+ _update_job(
2108
+ jid,
2109
+ status=STATUS_WARNING,
2110
+ chained=True, # Prevent chaining from failed job
2111
+ parse_result={"note": f"Retried as job #{retry_jid}"},
2112
+ )
2113
+ return
2114
+ except Exception as e:
2115
+ logger.warning(f"Failed to check for transient errors: {e}")
2116
+
1509
2117
  # Try to parse results into database
1510
2118
  try:
1511
2119
  from .result_handler import handle_job_result
2120
+
1512
2121
  # Re-fetch job to get updated data
1513
2122
  job = get_job(jid)
1514
2123
  parse_result = handle_job_result(job)
@@ -1516,23 +2125,41 @@ def run_job(jid: int) -> None:
1516
2125
  # Handle parse failure cases
1517
2126
  if parse_result is None:
1518
2127
  # Parser returned None - likely missing log file, no parser for tool, or missing engagement
1519
- logger.error("Job parse returned None - results may be lost", extra={
1520
- "job_id": jid,
1521
- "tool": job.get('tool'),
1522
- "log_exists": os.path.exists(job.get('log', '')) if job.get('log') else False
1523
- })
1524
- _append_worker_log(f"job {jid} parse returned None (tool={job.get('tool')}) - check if parser exists")
1525
- # Update job to indicate parse failure
1526
- _update_job(jid, status=STATUS_WARNING, parse_result={'error': 'Parser returned None - no results extracted'})
2128
+ logger.warning(
2129
+ "Job parse returned None - no parser for this tool",
2130
+ extra={
2131
+ "job_id": jid,
2132
+ "tool": job.get("tool"),
2133
+ "log_exists": (
2134
+ os.path.exists(job.get("log", ""))
2135
+ if job.get("log")
2136
+ else False
2137
+ ),
2138
+ },
2139
+ )
2140
+ _append_worker_log(
2141
+ f"job {jid} parse returned None (tool={job.get('tool')}) - check if parser exists"
2142
+ )
2143
+ # Only update status to WARNING if it wasn't already an ERROR
2144
+ # (e.g., exit code 127 = command not found should stay as ERROR)
2145
+ current_status = job.get("status")
2146
+ if current_status != STATUS_ERROR:
2147
+ _update_job(
2148
+ jid,
2149
+ status=STATUS_WARNING,
2150
+ parse_result={
2151
+ "error": "Parser returned None - no results extracted"
2152
+ },
2153
+ )
1527
2154
  # Mark as chained to prevent infinite retry
1528
2155
  _update_job(jid, chained=True)
1529
2156
  return
1530
2157
 
1531
- if 'error' in parse_result:
1532
- logger.error("Job parse error - results may be incomplete", extra={
1533
- "job_id": jid,
1534
- "error": parse_result['error']
1535
- })
2158
+ if "error" in parse_result:
2159
+ logger.error(
2160
+ "Job parse error - results may be incomplete",
2161
+ extra={"job_id": jid, "error": parse_result["error"]},
2162
+ )
1536
2163
  _append_worker_log(f"job {jid} parse error: {parse_result['error']}")
1537
2164
  # Update job status to warning with the error
1538
2165
  _update_job(jid, status=STATUS_WARNING, parse_result=parse_result)
@@ -1541,49 +2168,50 @@ def run_job(jid: int) -> None:
1541
2168
  return
1542
2169
 
1543
2170
  # Parse succeeded
1544
- logger.info("Job parsed successfully", extra={
1545
- "job_id": jid,
1546
- "parse_result": parse_result
1547
- })
2171
+ logger.info(
2172
+ "Job parsed successfully",
2173
+ extra={"job_id": jid, "parse_result": parse_result},
2174
+ )
1548
2175
  _append_worker_log(f"job {jid} parsed: {parse_result}")
1549
2176
 
1550
2177
  # Determine chainable status BEFORE updating to avoid race condition
1551
2178
  # We must set parse_result and chainable in a single atomic update
1552
2179
  try:
1553
2180
  from souleyez.core.tool_chaining import ToolChaining
2181
+
1554
2182
  chaining = ToolChaining()
1555
2183
 
1556
2184
  # Get current job to check status
1557
2185
  job = get_job(jid)
1558
- job_status = job.get('status', STATUS_ERROR)
2186
+ job_status = job.get("status", STATUS_ERROR)
1559
2187
 
1560
2188
  # Determine final status from parser if provided
1561
- final_status = parse_result.get('status', job_status)
2189
+ final_status = parse_result.get("status", job_status)
1562
2190
 
1563
2191
  # Check if job should be chainable
1564
2192
  should_chain = (
1565
- chaining.is_enabled() and
1566
- parse_result and
1567
- 'error' not in parse_result and
1568
- is_chainable(final_status)
2193
+ chaining.is_enabled()
2194
+ and parse_result
2195
+ and "error" not in parse_result
2196
+ and is_chainable(final_status)
1569
2197
  )
1570
2198
 
1571
2199
  # Build update dict - ATOMIC update of parse_result + chainable
1572
- update_fields = {'parse_result': parse_result}
1573
-
1574
- if 'status' in parse_result:
1575
- update_fields['status'] = final_status
1576
- logger.info("Job status updated from parser", extra={
1577
- "job_id": jid,
1578
- "status": final_status
1579
- })
2200
+ update_fields = {"parse_result": parse_result}
2201
+
2202
+ if "status" in parse_result:
2203
+ update_fields["status"] = final_status
2204
+ logger.info(
2205
+ "Job status updated from parser",
2206
+ extra={"job_id": jid, "status": final_status},
2207
+ )
1580
2208
  _append_worker_log(f"job {jid} status updated to: {final_status}")
1581
2209
 
1582
2210
  if should_chain:
1583
- update_fields['chainable'] = True
2211
+ update_fields["chainable"] = True
1584
2212
  else:
1585
2213
  # Not chainable - mark as chained to skip
1586
- update_fields['chained'] = True
2214
+ update_fields["chained"] = True
1587
2215
 
1588
2216
  # Single atomic update to prevent race condition
1589
2217
  _update_job(jid, **update_fields)
@@ -1591,69 +2219,89 @@ def run_job(jid: int) -> None:
1591
2219
  # Log chaining decision
1592
2220
  if should_chain:
1593
2221
  if final_status == STATUS_WARNING:
1594
- logger.info("Job with warning status marked for chaining", extra={
1595
- "job_id": jid,
1596
- "tool": job.get('tool'),
1597
- "wildcard_detected": parse_result.get('wildcard_detected', False)
1598
- })
1599
- _append_worker_log(f"job {jid} (status=warning) marked as chainable")
2222
+ logger.info(
2223
+ "Job with warning status marked for chaining",
2224
+ extra={
2225
+ "job_id": jid,
2226
+ "tool": job.get("tool"),
2227
+ "wildcard_detected": parse_result.get(
2228
+ "wildcard_detected", False
2229
+ ),
2230
+ },
2231
+ )
2232
+ _append_worker_log(
2233
+ f"job {jid} (status=warning) marked as chainable"
2234
+ )
1600
2235
  else:
1601
- logger.info("Job marked as chainable", extra={
1602
- "job_id": jid,
1603
- "tool": job.get('tool'),
1604
- "status": final_status
1605
- })
1606
- _append_worker_log(f"job {jid} marked as chainable (status={final_status})")
2236
+ logger.info(
2237
+ "Job marked as chainable",
2238
+ extra={
2239
+ "job_id": jid,
2240
+ "tool": job.get("tool"),
2241
+ "status": final_status,
2242
+ },
2243
+ )
2244
+ _append_worker_log(
2245
+ f"job {jid} marked as chainable (status={final_status})"
2246
+ )
1607
2247
  else:
1608
2248
  reason = f"chaining_disabled={not chaining.is_enabled()}, has_error={'error' in parse_result}, status={final_status}"
1609
2249
  _append_worker_log(f"job {jid} not chainable ({reason})")
1610
2250
 
1611
2251
  except Exception as chain_err:
1612
- logger.error("Failed to mark job as chainable", extra={
1613
- "job_id": jid,
1614
- "error": str(chain_err)
1615
- })
2252
+ logger.error(
2253
+ "Failed to mark job as chainable",
2254
+ extra={"job_id": jid, "error": str(chain_err)},
2255
+ )
1616
2256
  _append_worker_log(f"job {jid} chainable marking error: {chain_err}")
1617
2257
  # Mark as chained to prevent retry loops
1618
2258
  _update_job(jid, chained=True, chain_error=str(chain_err))
1619
2259
 
1620
2260
  except Exception as e:
1621
- logger.error("Job parse exception", extra={
1622
- "job_id": jid,
1623
- "error": str(e),
1624
- "traceback": traceback.format_exc()
1625
- })
2261
+ logger.error(
2262
+ "Job parse exception",
2263
+ extra={
2264
+ "job_id": jid,
2265
+ "error": str(e),
2266
+ "traceback": traceback.format_exc(),
2267
+ },
2268
+ )
1626
2269
  _append_worker_log(f"job {jid} parse exception: {e}")
1627
2270
 
1628
2271
  # Sanitize log file to remove credentials
1629
2272
  try:
1630
2273
  if os.path.exists(log_path):
1631
- with open(log_path, 'r', encoding='utf-8', errors='replace') as f:
2274
+ with open(log_path, "r", encoding="utf-8", errors="replace") as f:
1632
2275
  original_log = f.read()
1633
-
2276
+
1634
2277
  # Check if encryption is enabled - only sanitize if encryption is on
1635
2278
  from souleyez.storage.crypto import CryptoManager
2279
+
1636
2280
  crypto_mgr = CryptoManager()
1637
-
1638
- if crypto_mgr.is_encryption_enabled() and LogSanitizer.contains_credentials(original_log):
2281
+
2282
+ if (
2283
+ crypto_mgr.is_encryption_enabled()
2284
+ and LogSanitizer.contains_credentials(original_log)
2285
+ ):
1639
2286
  sanitized_log = LogSanitizer.sanitize(original_log)
1640
-
2287
+
1641
2288
  # Write sanitized log back
1642
- with open(log_path, 'w', encoding='utf-8') as f:
2289
+ with open(log_path, "w", encoding="utf-8") as f:
1643
2290
  f.write(sanitized_log)
1644
-
1645
- summary = LogSanitizer.get_redaction_summary(original_log, sanitized_log)
2291
+
2292
+ summary = LogSanitizer.get_redaction_summary(
2293
+ original_log, sanitized_log
2294
+ )
1646
2295
  if summary:
1647
2296
  _append_worker_log(f"job {jid}: {summary}")
1648
- logger.info("Log sanitized", extra={
1649
- "job_id": jid,
1650
- "summary": summary
1651
- })
2297
+ logger.info(
2298
+ "Log sanitized", extra={"job_id": jid, "summary": summary}
2299
+ )
1652
2300
  except Exception as sanitize_err:
1653
- logger.warning("Log sanitization failed", extra={
1654
- "job_id": jid,
1655
- "error": str(sanitize_err)
1656
- })
2301
+ logger.warning(
2302
+ "Log sanitization failed",
2303
+ extra={"job_id": jid, "error": str(sanitize_err)},
2304
+ )
1657
2305
  # Don't fail the job if sanitization fails
1658
2306
 
1659
2307
  _append_worker_log(f"job {jid} finished: status={status} rc={rc}")
@@ -1661,27 +2309,31 @@ def run_job(jid: int) -> None:
1661
2309
  except Exception as e:
1662
2310
  now = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
1663
2311
  _update_job(jid, status="error", error=str(e), finished_at=now)
1664
- logger.error("Job crashed", extra={
1665
- "event_type": "job_failed",
1666
- "job_id": jid,
1667
- "error": str(e),
1668
- "traceback": traceback.format_exc()
1669
- })
2312
+ logger.error(
2313
+ "Job crashed",
2314
+ extra={
2315
+ "event_type": "job_failed",
2316
+ "job_id": jid,
2317
+ "error": str(e),
2318
+ "traceback": traceback.format_exc(),
2319
+ },
2320
+ )
1670
2321
  _append_worker_log(f"job {jid} crashed: {e}")
1671
-
2322
+
1672
2323
  # Sanitize log even on error
1673
2324
  try:
1674
2325
  if os.path.exists(log_path):
1675
2326
  from souleyez.storage.crypto import CryptoManager
2327
+
1676
2328
  crypto_mgr = CryptoManager()
1677
-
2329
+
1678
2330
  if crypto_mgr.is_encryption_enabled():
1679
- with open(log_path, 'r', encoding='utf-8', errors='replace') as f:
2331
+ with open(log_path, "r", encoding="utf-8", errors="replace") as f:
1680
2332
  original_log = f.read()
1681
-
2333
+
1682
2334
  if LogSanitizer.contains_credentials(original_log):
1683
2335
  sanitized_log = LogSanitizer.sanitize(original_log)
1684
- with open(log_path, 'w', encoding='utf-8') as f:
2336
+ with open(log_path, "w", encoding="utf-8") as f:
1685
2337
  f.write(sanitized_log)
1686
2338
  except Exception:
1687
2339
  pass # Silently fail sanitization on error
@@ -1714,7 +2366,7 @@ def _check_log_for_completion(log_path: str, tool: str) -> tuple:
1714
2366
  return (False, None)
1715
2367
 
1716
2368
  try:
1717
- with open(log_path, 'r', encoding='utf-8', errors='replace') as f:
2369
+ with open(log_path, "r", encoding="utf-8", errors="replace") as f:
1718
2370
  # Read last 5KB of log (completion markers are at the end)
1719
2371
  f.seek(0, 2) # End of file
1720
2372
  file_size = f.tell()
@@ -1724,26 +2376,26 @@ def _check_log_for_completion(log_path: str, tool: str) -> tuple:
1724
2376
 
1725
2377
  # Tool-specific completion markers
1726
2378
  completion_markers = {
1727
- 'nmap': ['Nmap done:', 'Nmap scan report for'],
1728
- 'gobuster': ['Finished', 'Progress:'],
1729
- 'nikto': ['host(s) tested', 'End Time:'],
1730
- 'nuclei': ['Scan completed', 'matches found', 'No results found'],
1731
- 'sqlmap': ['fetched data logged', 'shutting down'],
1732
- 'hydra': ['valid password', 'host:', 'targets finished'],
1733
- 'ffuf': ['Progress:', 'Duration:'],
1734
- 'default': ['=== Completed:', 'Exit Code:']
2379
+ "nmap": ["Nmap done:", "Nmap scan report for"],
2380
+ "gobuster": ["Finished", "Progress:"],
2381
+ "nikto": ["host(s) tested", "End Time:"],
2382
+ "nuclei": ["Scan completed", "matches found", "No results found"],
2383
+ "sqlmap": ["fetched data logged", "shutting down"],
2384
+ "hydra": ["valid password", "host:", "targets finished"],
2385
+ "ffuf": ["Progress:", "Duration:"],
2386
+ "default": ["=== Completed:", "Exit Code:"],
1735
2387
  }
1736
2388
 
1737
- markers = completion_markers.get(tool.lower(), completion_markers['default'])
2389
+ markers = completion_markers.get(tool.lower(), completion_markers["default"])
1738
2390
 
1739
2391
  for marker in markers:
1740
2392
  if marker in log_tail:
1741
2393
  # Try to extract exit code
1742
2394
  exit_code = None
1743
- if 'Exit Code:' in log_tail:
2395
+ if "Exit Code:" in log_tail:
1744
2396
  try:
1745
- idx = log_tail.index('Exit Code:')
1746
- code_str = log_tail[idx+10:idx+15].strip().split()[0]
2397
+ idx = log_tail.index("Exit Code:")
2398
+ code_str = log_tail[idx + 10 : idx + 15].strip().split()[0]
1747
2399
  exit_code = int(code_str)
1748
2400
  except (ValueError, IndexError):
1749
2401
  exit_code = 0
@@ -1771,14 +2423,14 @@ def _detect_and_recover_stale_jobs() -> int:
1771
2423
 
1772
2424
  try:
1773
2425
  jobs = _read_jobs()
1774
- running_jobs = [j for j in jobs if j.get('status') == STATUS_RUNNING]
2426
+ running_jobs = [j for j in jobs if j.get("status") == STATUS_RUNNING]
1775
2427
 
1776
2428
  for job in running_jobs:
1777
- jid = job.get('id')
1778
- pid = job.get('pid')
1779
- tool = job.get('tool', 'unknown')
1780
- log_path = job.get('log')
1781
- stored_start_time = job.get('process_start_time')
2429
+ jid = job.get("id")
2430
+ pid = job.get("pid")
2431
+ tool = job.get("tool", "unknown")
2432
+ log_path = job.get("log")
2433
+ stored_start_time = job.get("process_start_time")
1782
2434
 
1783
2435
  # Check if PID is alive
1784
2436
  if _is_pid_alive(pid):
@@ -1793,13 +2445,16 @@ def _detect_and_recover_stale_jobs() -> int:
1793
2445
  f"job {jid}: PID {pid} reused (stored start: {stored_start_time:.0f}, "
1794
2446
  f"current: {current_start_time:.0f})"
1795
2447
  )
1796
- logger.warning("PID reuse detected", extra={
1797
- "job_id": jid,
1798
- "tool": tool,
1799
- "pid": pid,
1800
- "stored_start_time": stored_start_time,
1801
- "current_start_time": current_start_time
1802
- })
2448
+ logger.warning(
2449
+ "PID reuse detected",
2450
+ extra={
2451
+ "job_id": jid,
2452
+ "tool": tool,
2453
+ "pid": pid,
2454
+ "stored_start_time": stored_start_time,
2455
+ "current_start_time": current_start_time,
2456
+ },
2457
+ )
1803
2458
  # Fall through to stale job handling
1804
2459
  else:
1805
2460
  # Same process, still running
@@ -1813,11 +2468,10 @@ def _detect_and_recover_stale_jobs() -> int:
1813
2468
  else:
1814
2469
  # PID is dead - definitely stale
1815
2470
  _append_worker_log(f"job {jid}: detected stale (PID {pid} is dead)")
1816
- logger.warning("Stale job detected", extra={
1817
- "job_id": jid,
1818
- "tool": tool,
1819
- "pid": pid
1820
- })
2471
+ logger.warning(
2472
+ "Stale job detected",
2473
+ extra={"job_id": jid, "tool": tool, "pid": pid},
2474
+ )
1821
2475
 
1822
2476
  # Check if log shows completion
1823
2477
  completed, exit_code = _check_log_for_completion(log_path, tool)
@@ -1839,74 +2493,94 @@ def _detect_and_recover_stale_jobs() -> int:
1839
2493
 
1840
2494
  # Try to parse results
1841
2495
  try:
1842
- from .result_handler import handle_job_result
1843
2496
  from souleyez.core.tool_chaining import ToolChaining
1844
2497
 
2498
+ from .result_handler import handle_job_result
2499
+
1845
2500
  job = get_job(jid)
1846
2501
  parse_result = handle_job_result(job)
1847
2502
 
1848
2503
  if parse_result:
1849
- if 'error' in parse_result:
1850
- _append_worker_log(f"job {jid} stale recovery parse error: {parse_result['error']}")
2504
+ if "error" in parse_result:
2505
+ _append_worker_log(
2506
+ f"job {jid} stale recovery parse error: {parse_result['error']}"
2507
+ )
1851
2508
  else:
1852
2509
  # Determine final status and chainable in one check
1853
- final_status = parse_result.get('status', status)
2510
+ final_status = parse_result.get("status", status)
1854
2511
  chaining = ToolChaining()
1855
- should_chain = chaining.is_enabled() and is_chainable(final_status)
2512
+ should_chain = chaining.is_enabled() and is_chainable(
2513
+ final_status
2514
+ )
1856
2515
 
1857
2516
  # Build atomic update - parse_result + status + chainable together
1858
- update_fields = {'parse_result': parse_result}
1859
- if 'status' in parse_result:
1860
- update_fields['status'] = final_status
2517
+ update_fields = {"parse_result": parse_result}
2518
+ if "status" in parse_result:
2519
+ update_fields["status"] = final_status
1861
2520
  if should_chain:
1862
- update_fields['chainable'] = True
2521
+ update_fields["chainable"] = True
1863
2522
 
1864
2523
  # Single atomic update to prevent race condition
1865
2524
  _update_job(jid, **update_fields)
1866
2525
 
1867
- _append_worker_log(f"job {jid} stale recovery parsed: {parse_result.get('findings_added', 0)} findings")
2526
+ _append_worker_log(
2527
+ f"job {jid} stale recovery parsed: {parse_result.get('findings_added', 0)} findings"
2528
+ )
1868
2529
 
1869
- logger.info("Stale job recovered with results", extra={
1870
- "job_id": jid,
1871
- "tool": tool,
1872
- "status": final_status,
1873
- "parse_result": parse_result,
1874
- "chainable": should_chain
1875
- })
2530
+ logger.info(
2531
+ "Stale job recovered with results",
2532
+ extra={
2533
+ "job_id": jid,
2534
+ "tool": tool,
2535
+ "status": final_status,
2536
+ "parse_result": parse_result,
2537
+ "chainable": should_chain,
2538
+ },
2539
+ )
1876
2540
 
1877
2541
  if should_chain:
1878
- _append_worker_log(f"job {jid} stale recovery marked as chainable")
2542
+ _append_worker_log(
2543
+ f"job {jid} stale recovery marked as chainable"
2544
+ )
1879
2545
 
1880
2546
  except Exception as parse_err:
1881
- _append_worker_log(f"job {jid} stale recovery parse exception: {parse_err}")
2547
+ _append_worker_log(
2548
+ f"job {jid} stale recovery parse exception: {parse_err}"
2549
+ )
1882
2550
 
1883
2551
  recovered += 1
1884
2552
 
1885
2553
  else:
1886
2554
  # Process died mid-execution - mark as error
1887
- _append_worker_log(f"job {jid}: process died unexpectedly, marking as error")
1888
- _update_job(jid,
2555
+ _append_worker_log(
2556
+ f"job {jid}: process died unexpectedly, marking as error"
2557
+ )
2558
+ _update_job(
2559
+ jid,
1889
2560
  status=STATUS_ERROR,
1890
2561
  finished_at=now,
1891
2562
  pid=None,
1892
- error="Process terminated unexpectedly (worker restart or crash)"
2563
+ error="Process terminated unexpectedly (worker restart or crash)",
1893
2564
  )
1894
2565
 
1895
- logger.warning("Stale job marked as error", extra={
1896
- "job_id": jid,
1897
- "tool": tool,
1898
- "reason": "process_died_unexpectedly"
1899
- })
2566
+ logger.warning(
2567
+ "Stale job marked as error",
2568
+ extra={
2569
+ "job_id": jid,
2570
+ "tool": tool,
2571
+ "reason": "process_died_unexpectedly",
2572
+ },
2573
+ )
1900
2574
 
1901
2575
  recovered += 1
1902
2576
 
1903
2577
  return recovered
1904
2578
 
1905
2579
  except Exception as e:
1906
- logger.error("Stale job detection error", extra={
1907
- "error": str(e),
1908
- "traceback": traceback.format_exc()
1909
- })
2580
+ logger.error(
2581
+ "Stale job detection error",
2582
+ extra={"error": str(e), "traceback": traceback.format_exc()},
2583
+ )
1910
2584
  _append_worker_log(f"stale job detection error: {e}")
1911
2585
  return 0
1912
2586
 
@@ -1926,10 +2600,11 @@ def _check_msf_exploitation_success():
1926
2600
  try:
1927
2601
  jobs = _read_jobs()
1928
2602
  running_msf = [
1929
- j for j in jobs
1930
- if j.get('status') == STATUS_RUNNING
1931
- and j.get('tool') in ('msfconsole', 'msf')
1932
- and not j.get('exploitation_detected') # Not already detected
2603
+ j
2604
+ for j in jobs
2605
+ if j.get("status") == STATUS_RUNNING
2606
+ and j.get("tool") in ("msfconsole", "msf")
2607
+ and not j.get("exploitation_detected") # Not already detected
1933
2608
  ]
1934
2609
 
1935
2610
  if not running_msf:
@@ -1939,22 +2614,22 @@ def _check_msf_exploitation_success():
1939
2614
 
1940
2615
  # Success patterns from MSF output
1941
2616
  success_patterns = [
1942
- r'\[\*\]\s+Command shell session \d+ opened',
1943
- r'\[\*\]\s+Meterpreter session \d+ opened',
1944
- r'\[\+\]\s+\d+\.\d+\.\d+\.\d+:\d+\s+-\s+Session \d+ created',
1945
- r'\[\+\].*session.*opened',
1946
- r'\[\+\].*session.*created',
2617
+ r"\[\*\]\s+Command shell session \d+ opened",
2618
+ r"\[\*\]\s+Meterpreter session \d+ opened",
2619
+ r"\[\+\]\s+\d+\.\d+\.\d+\.\d+:\d+\s+-\s+Session \d+ created",
2620
+ r"\[\+\].*session.*opened",
2621
+ r"\[\+\].*session.*created",
1947
2622
  ]
1948
2623
 
1949
2624
  for job in running_msf:
1950
- jid = job.get('id')
2625
+ jid = job.get("id")
1951
2626
  log_path = os.path.join(JOBS_DIR, f"{jid}.log")
1952
2627
 
1953
2628
  if not os.path.exists(log_path):
1954
2629
  continue
1955
2630
 
1956
2631
  try:
1957
- with open(log_path, 'r', encoding='utf-8', errors='replace') as f:
2632
+ with open(log_path, "r", encoding="utf-8", errors="replace") as f:
1958
2633
  content = f.read()
1959
2634
 
1960
2635
  # Check for success patterns
@@ -1966,21 +2641,27 @@ def _check_msf_exploitation_success():
1966
2641
  if match:
1967
2642
  session_opened = True
1968
2643
  # Extract session number if available
1969
- session_match = re.search(r'session (\d+)', match.group(), re.IGNORECASE)
2644
+ session_match = re.search(
2645
+ r"session (\d+)", match.group(), re.IGNORECASE
2646
+ )
1970
2647
  if session_match:
1971
2648
  session_info = f"Session {session_match.group(1)}"
1972
2649
  break
1973
2650
 
1974
2651
  if session_opened:
1975
2652
  # Update job with exploitation success
1976
- _update_job(jid, exploitation_detected=True, session_info=session_info)
1977
- _append_worker_log(f"job {jid}: exploitation success detected - {session_info or 'session opened'}")
2653
+ _update_job(
2654
+ jid, exploitation_detected=True, session_info=session_info
2655
+ )
2656
+ _append_worker_log(
2657
+ f"job {jid}: exploitation success detected - {session_info or 'session opened'}"
2658
+ )
1978
2659
 
1979
2660
  # Record exploit attempt as success
1980
- engagement_id = job.get('engagement_id')
1981
- target = job.get('target')
1982
- label = job.get('label', '')
1983
- args = job.get('args', [])
2661
+ engagement_id = job.get("engagement_id")
2662
+ target = job.get("target")
2663
+ label = job.get("label", "")
2664
+ args = job.get("args", [])
1984
2665
 
1985
2666
  if engagement_id and target:
1986
2667
  try:
@@ -1993,35 +2674,49 @@ def _check_msf_exploitation_success():
1993
2674
  if host:
1994
2675
  # Extract port from args (look for "set RPORT X" or "RPORT X")
1995
2676
  port = None
1996
- args_str = ' '.join(args) if args else ''
1997
- port_match = re.search(r'RPORT\s+(\d+)', args_str, re.IGNORECASE)
2677
+ args_str = " ".join(args) if args else ""
2678
+ port_match = re.search(
2679
+ r"RPORT\s+(\d+)", args_str, re.IGNORECASE
2680
+ )
1998
2681
  if port_match:
1999
2682
  port = int(port_match.group(1))
2000
2683
 
2001
2684
  # Find service_id for this port
2002
2685
  service_id = None
2003
2686
  if port:
2004
- services = hm.get_host_services(host['id'])
2687
+ services = hm.get_host_services(host["id"])
2005
2688
  for svc in services:
2006
- if svc.get('port') == port:
2007
- service_id = svc.get('id')
2689
+ if svc.get("port") == port:
2690
+ service_id = svc.get("id")
2008
2691
  break
2009
2692
 
2010
2693
  # Extract exploit identifier from label or args
2011
- exploit_id = label.replace('MSF: ', 'msf:') if label.startswith('MSF:') else f'msf:{label}'
2694
+ exploit_id = (
2695
+ label.replace("MSF: ", "msf:")
2696
+ if label.startswith("MSF:")
2697
+ else f"msf:{label}"
2698
+ )
2012
2699
 
2013
2700
  record_attempt(
2014
2701
  engagement_id=engagement_id,
2015
- host_id=host['id'],
2702
+ host_id=host["id"],
2016
2703
  exploit_identifier=exploit_id,
2017
2704
  exploit_title=label,
2018
- status='success',
2705
+ status="success",
2019
2706
  service_id=service_id,
2020
- notes=f'Session opened - {session_info}' if session_info else 'Session opened'
2707
+ notes=(
2708
+ f"Session opened - {session_info}"
2709
+ if session_info
2710
+ else "Session opened"
2711
+ ),
2712
+ )
2713
+ _append_worker_log(
2714
+ f"job {jid}: recorded exploitation success for {target}:{port or 'unknown'}"
2021
2715
  )
2022
- _append_worker_log(f"job {jid}: recorded exploitation success for {target}:{port or 'unknown'}")
2023
2716
  except Exception as e:
2024
- _append_worker_log(f"job {jid}: failed to record exploit attempt: {e}")
2717
+ _append_worker_log(
2718
+ f"job {jid}: failed to record exploit attempt: {e}"
2719
+ )
2025
2720
 
2026
2721
  detected_count += 1
2027
2722
 
@@ -2045,11 +2740,11 @@ def _update_job_progress():
2045
2740
  """
2046
2741
  try:
2047
2742
  jobs = _read_jobs()
2048
- running_jobs = [j for j in jobs if j.get('status') == STATUS_RUNNING]
2743
+ running_jobs = [j for j in jobs if j.get("status") == STATUS_RUNNING]
2049
2744
 
2050
2745
  for job in running_jobs:
2051
- jid = job.get('id')
2052
- log_path = job.get('log')
2746
+ jid = job.get("id")
2747
+ log_path = job.get("log")
2053
2748
 
2054
2749
  if not log_path or not os.path.exists(log_path):
2055
2750
  continue
@@ -2061,23 +2756,26 @@ def _update_job_progress():
2061
2756
  time_since_output = current_time - mtime
2062
2757
 
2063
2758
  # Update last_output_at in job record
2064
- updates = {'last_output_at': mtime}
2759
+ updates = {"last_output_at": mtime}
2065
2760
 
2066
2761
  # Flag as possibly hung if no output for threshold
2067
- was_hung = job.get('possibly_hung', False)
2762
+ was_hung = job.get("possibly_hung", False)
2068
2763
  is_hung = time_since_output > JOB_HUNG_THRESHOLD
2069
2764
 
2070
2765
  if is_hung != was_hung:
2071
- updates['possibly_hung'] = is_hung
2766
+ updates["possibly_hung"] = is_hung
2072
2767
  if is_hung:
2073
2768
  _append_worker_log(
2074
2769
  f"job {jid}: no output for {int(time_since_output)}s, flagged as possibly hung"
2075
2770
  )
2076
- logger.warning("Job possibly hung", extra={
2077
- "job_id": jid,
2078
- "tool": job.get('tool'),
2079
- "time_since_output": int(time_since_output)
2080
- })
2771
+ logger.warning(
2772
+ "Job possibly hung",
2773
+ extra={
2774
+ "job_id": jid,
2775
+ "tool": job.get("tool"),
2776
+ "time_since_output": int(time_since_output),
2777
+ },
2778
+ )
2081
2779
 
2082
2780
  _update_job(jid, **updates)
2083
2781
 
@@ -2178,9 +2876,9 @@ def worker_loop(poll_interval: float = 2.0):
2178
2876
  if processed > 0:
2179
2877
  _append_worker_log(f"processed {processed} chainable job(s)")
2180
2878
  except Exception as e:
2181
- logger.error("Chain processing error in worker loop", extra={
2182
- "error": str(e)
2183
- })
2879
+ logger.error(
2880
+ "Chain processing error in worker loop", extra={"error": str(e)}
2881
+ )
2184
2882
  _append_worker_log(f"chain processing error: {e}")
2185
2883
 
2186
2884
  # Sleep before next iteration
@@ -2209,8 +2907,14 @@ def start_worker(detach: bool = True, fg: bool = False):
2209
2907
  else:
2210
2908
  # Running as Python script
2211
2909
  python = exe or "python3"
2212
- cmd = [python, "-u", "-c",
2213
- "import sys; from souleyez.engine.background import worker_loop; worker_loop()"]
2214
-
2215
- subprocess.Popen(cmd, stdout=open(WORKER_LOG, "a"), stderr=subprocess.STDOUT, close_fds=True)
2910
+ cmd = [
2911
+ python,
2912
+ "-u",
2913
+ "-c",
2914
+ "import sys; from souleyez.engine.background import worker_loop; worker_loop()",
2915
+ ]
2916
+
2917
+ subprocess.Popen(
2918
+ cmd, stdout=open(WORKER_LOG, "a"), stderr=subprocess.STDOUT, close_fds=True
2919
+ )
2216
2920
  _append_worker_log("Started background worker (detached)")