PyPI - souleyez - Versions diffs - 2.43.29__py3-none-any.whl → 3.0.0__py3-none-any.whl - Mend

souleyez 2.43.29py3-none-any.whl → 3.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (358) hide show

souleyez/__init__.py +1 -2
souleyez/ai/__init__.py +21 -15
souleyez/ai/action_mapper.py +249 -150
souleyez/ai/chain_advisor.py +116 -100
souleyez/ai/claude_provider.py +29 -28
souleyez/ai/context_builder.py +80 -62
souleyez/ai/executor.py +158 -117
souleyez/ai/feedback_handler.py +136 -121
souleyez/ai/llm_factory.py +27 -20
souleyez/ai/llm_provider.py +4 -2
souleyez/ai/ollama_provider.py +6 -9
souleyez/ai/ollama_service.py +44 -37
souleyez/ai/path_scorer.py +91 -76
souleyez/ai/recommender.py +176 -144
souleyez/ai/report_context.py +74 -73
souleyez/ai/report_service.py +84 -66
souleyez/ai/result_parser.py +222 -229
souleyez/ai/safety.py +67 -44
souleyez/auth/__init__.py +23 -22
souleyez/auth/audit.py +36 -26
souleyez/auth/engagement_access.py +65 -48
souleyez/auth/permissions.py +14 -3
souleyez/auth/session_manager.py +54 -37
souleyez/auth/user_manager.py +109 -64
souleyez/commands/audit.py +40 -43
souleyez/commands/auth.py +35 -15
souleyez/commands/deliverables.py +55 -50
souleyez/commands/engagement.py +47 -28
souleyez/commands/license.py +32 -23
souleyez/commands/screenshots.py +36 -32
souleyez/commands/user.py +82 -36
souleyez/config.py +52 -44
souleyez/core/credential_tester.py +87 -81
souleyez/core/cve_mappings.py +179 -192
souleyez/core/cve_matcher.py +162 -148
souleyez/core/msf_auto_mapper.py +100 -83
souleyez/core/msf_chain_engine.py +294 -256
souleyez/core/msf_database.py +153 -70
souleyez/core/msf_integration.py +679 -673
souleyez/core/msf_rpc_client.py +40 -42
souleyez/core/msf_rpc_manager.py +77 -79
souleyez/core/msf_sync_manager.py +241 -181
souleyez/core/network_utils.py +22 -15
souleyez/core/parser_handler.py +34 -25
souleyez/core/pending_chains.py +114 -63
souleyez/core/templates.py +158 -107
souleyez/core/tool_chaining.py +9564 -2881
souleyez/core/version_utils.py +79 -94
souleyez/core/vuln_correlation.py +136 -89
souleyez/core/web_utils.py +33 -32
souleyez/data/wordlists/ad_users.txt +378 -0
souleyez/data/wordlists/api_endpoints_large.txt +769 -0
souleyez/data/wordlists/home_dir_sensitive.txt +39 -0
souleyez/data/wordlists/lfi_payloads.txt +82 -0
souleyez/data/wordlists/passwords_brute.txt +1548 -0
souleyez/data/wordlists/passwords_crack.txt +2479 -0
souleyez/data/wordlists/passwords_spray.txt +386 -0
souleyez/data/wordlists/subdomains_large.txt +5057 -0
souleyez/data/wordlists/usernames_common.txt +694 -0
souleyez/data/wordlists/web_dirs_large.txt +4769 -0
souleyez/detection/__init__.py +1 -1
souleyez/detection/attack_signatures.py +12 -17
souleyez/detection/mitre_mappings.py +61 -55
souleyez/detection/validator.py +97 -86
souleyez/devtools.py +23 -10
souleyez/docs/README.md +4 -4
souleyez/docs/api-reference/cli-commands.md +2 -2
souleyez/docs/developer-guide/adding-new-tools.md +562 -0
souleyez/docs/user-guide/auto-chaining.md +30 -8
souleyez/docs/user-guide/getting-started.md +1 -1
souleyez/docs/user-guide/installation.md +26 -3
souleyez/docs/user-guide/metasploit-integration.md +2 -2
souleyez/docs/user-guide/rbac.md +1 -1
souleyez/docs/user-guide/scope-management.md +1 -1
souleyez/docs/user-guide/siem-integration.md +1 -1
souleyez/docs/user-guide/tools-reference.md +1 -8
souleyez/docs/user-guide/worker-management.md +1 -1
souleyez/engine/background.py +1239 -535
souleyez/engine/base.py +4 -1
souleyez/engine/job_status.py +17 -49
souleyez/engine/log_sanitizer.py +103 -77
souleyez/engine/manager.py +38 -7
souleyez/engine/result_handler.py +2200 -1550
souleyez/engine/worker_manager.py +50 -41
souleyez/export/evidence_bundle.py +72 -62
souleyez/feature_flags/features.py +16 -20
souleyez/feature_flags.py +5 -9
souleyez/handlers/__init__.py +11 -0
souleyez/handlers/base.py +188 -0
souleyez/handlers/bash_handler.py +277 -0
souleyez/handlers/bloodhound_handler.py +243 -0
souleyez/handlers/certipy_handler.py +311 -0
souleyez/handlers/crackmapexec_handler.py +486 -0
souleyez/handlers/dnsrecon_handler.py +344 -0
souleyez/handlers/enum4linux_handler.py +400 -0
souleyez/handlers/evil_winrm_handler.py +493 -0
souleyez/handlers/ffuf_handler.py +815 -0
souleyez/handlers/gobuster_handler.py +1114 -0
souleyez/handlers/gpp_extract_handler.py +334 -0
souleyez/handlers/hashcat_handler.py +444 -0
souleyez/handlers/hydra_handler.py +564 -0
souleyez/handlers/impacket_getuserspns_handler.py +343 -0
souleyez/handlers/impacket_psexec_handler.py +222 -0
souleyez/handlers/impacket_secretsdump_handler.py +426 -0
souleyez/handlers/john_handler.py +286 -0
souleyez/handlers/katana_handler.py +425 -0
souleyez/handlers/kerbrute_handler.py +298 -0
souleyez/handlers/ldapsearch_handler.py +636 -0
souleyez/handlers/lfi_extract_handler.py +464 -0
souleyez/handlers/msf_auxiliary_handler.py +409 -0
souleyez/handlers/msf_exploit_handler.py +380 -0
souleyez/handlers/nikto_handler.py +413 -0
souleyez/handlers/nmap_handler.py +821 -0
souleyez/handlers/nuclei_handler.py +359 -0
souleyez/handlers/nxc_handler.py +417 -0
souleyez/handlers/rdp_sec_check_handler.py +353 -0
souleyez/handlers/registry.py +292 -0
souleyez/handlers/responder_handler.py +232 -0
souleyez/handlers/service_explorer_handler.py +434 -0
souleyez/handlers/smbclient_handler.py +344 -0
souleyez/handlers/smbmap_handler.py +510 -0
souleyez/handlers/smbpasswd_handler.py +296 -0
souleyez/handlers/sqlmap_handler.py +1116 -0
souleyez/handlers/theharvester_handler.py +601 -0
souleyez/handlers/web_login_test_handler.py +327 -0
souleyez/handlers/whois_handler.py +277 -0
souleyez/handlers/wpscan_handler.py +554 -0
souleyez/history.py +32 -16
souleyez/importers/msf_importer.py +106 -75
souleyez/importers/smart_importer.py +208 -147
souleyez/integrations/siem/__init__.py +10 -10
souleyez/integrations/siem/base.py +17 -18
souleyez/integrations/siem/elastic.py +108 -122
souleyez/integrations/siem/factory.py +207 -80
souleyez/integrations/siem/googlesecops.py +146 -154
souleyez/integrations/siem/rule_mappings/__init__.py +1 -1
souleyez/integrations/siem/rule_mappings/wazuh_rules.py +8 -5
souleyez/integrations/siem/sentinel.py +107 -109
souleyez/integrations/siem/splunk.py +246 -212
souleyez/integrations/siem/wazuh.py +65 -71
souleyez/integrations/wazuh/__init__.py +5 -5
souleyez/integrations/wazuh/client.py +70 -93
souleyez/integrations/wazuh/config.py +85 -57
souleyez/integrations/wazuh/host_mapper.py +28 -36
souleyez/integrations/wazuh/sync.py +78 -68
souleyez/intelligence/__init__.py +4 -5
souleyez/intelligence/correlation_analyzer.py +309 -295
souleyez/intelligence/exploit_knowledge.py +661 -623
souleyez/intelligence/exploit_suggestions.py +159 -139
souleyez/intelligence/gap_analyzer.py +132 -97
souleyez/intelligence/gap_detector.py +251 -214
souleyez/intelligence/sensitive_tables.py +266 -129
souleyez/intelligence/service_parser.py +137 -123
souleyez/intelligence/surface_analyzer.py +407 -268
souleyez/intelligence/target_parser.py +159 -162
souleyez/licensing/__init__.py +6 -6
souleyez/licensing/validator.py +17 -19
souleyez/log_config.py +79 -54
souleyez/main.py +1505 -687
souleyez/migrations/fix_job_counter.py +16 -14
souleyez/parsers/bloodhound_parser.py +41 -39
souleyez/parsers/crackmapexec_parser.py +178 -111
souleyez/parsers/dalfox_parser.py +72 -77
souleyez/parsers/dnsrecon_parser.py +103 -91
souleyez/parsers/enum4linux_parser.py +183 -153
souleyez/parsers/ffuf_parser.py +29 -25
souleyez/parsers/gobuster_parser.py +301 -41
souleyez/parsers/hashcat_parser.py +324 -79
souleyez/parsers/http_fingerprint_parser.py +350 -103
souleyez/parsers/hydra_parser.py +131 -111
souleyez/parsers/impacket_parser.py +231 -178
souleyez/parsers/john_parser.py +98 -86
souleyez/parsers/katana_parser.py +316 -0
souleyez/parsers/msf_parser.py +943 -498
souleyez/parsers/nikto_parser.py +346 -65
souleyez/parsers/nmap_parser.py +262 -174
souleyez/parsers/nuclei_parser.py +40 -44
souleyez/parsers/responder_parser.py +26 -26
souleyez/parsers/searchsploit_parser.py +74 -74
souleyez/parsers/service_explorer_parser.py +279 -0
souleyez/parsers/smbmap_parser.py +180 -124
souleyez/parsers/sqlmap_parser.py +434 -308
souleyez/parsers/theharvester_parser.py +75 -57
souleyez/parsers/whois_parser.py +135 -94
souleyez/parsers/wpscan_parser.py +278 -190
souleyez/plugins/afp.py +44 -36
souleyez/plugins/afp_brute.py +114 -46
souleyez/plugins/ard.py +48 -37
souleyez/plugins/bloodhound.py +95 -61
souleyez/plugins/certipy.py +303 -0
souleyez/plugins/crackmapexec.py +186 -85
souleyez/plugins/dalfox.py +120 -59
souleyez/plugins/dns_hijack.py +146 -41
souleyez/plugins/dnsrecon.py +97 -61
souleyez/plugins/enum4linux.py +91 -66
souleyez/plugins/evil_winrm.py +291 -0
souleyez/plugins/ffuf.py +166 -90
souleyez/plugins/firmware_extract.py +133 -29
souleyez/plugins/gobuster.py +387 -190
souleyez/plugins/gpp_extract.py +393 -0
souleyez/plugins/hashcat.py +100 -73
souleyez/plugins/http_fingerprint.py +913 -267
souleyez/plugins/hydra.py +566 -200
souleyez/plugins/impacket_getnpusers.py +117 -69
souleyez/plugins/impacket_psexec.py +84 -64
souleyez/plugins/impacket_secretsdump.py +103 -69
souleyez/plugins/impacket_smbclient.py +89 -75
souleyez/plugins/john.py +86 -69
souleyez/plugins/katana.py +313 -0
souleyez/plugins/kerbrute.py +237 -0
souleyez/plugins/lfi_extract.py +541 -0
souleyez/plugins/macos_ssh.py +117 -48
souleyez/plugins/mdns.py +35 -30
souleyez/plugins/msf_auxiliary.py +253 -130
souleyez/plugins/msf_exploit.py +239 -161
souleyez/plugins/nikto.py +134 -78
souleyez/plugins/nmap.py +275 -91
souleyez/plugins/nuclei.py +180 -89
souleyez/plugins/nxc.py +285 -0
souleyez/plugins/plugin_base.py +35 -36
souleyez/plugins/plugin_template.py +13 -5
souleyez/plugins/rdp_sec_check.py +130 -0
souleyez/plugins/responder.py +112 -71
souleyez/plugins/router_http_brute.py +76 -65
souleyez/plugins/router_ssh_brute.py +118 -41
souleyez/plugins/router_telnet_brute.py +124 -42
souleyez/plugins/routersploit.py +91 -59
souleyez/plugins/routersploit_exploit.py +77 -55
souleyez/plugins/searchsploit.py +91 -77
souleyez/plugins/service_explorer.py +1160 -0
souleyez/plugins/smbmap.py +122 -72
souleyez/plugins/smbpasswd.py +215 -0
souleyez/plugins/sqlmap.py +301 -113
souleyez/plugins/theharvester.py +127 -75
souleyez/plugins/tr069.py +79 -57
souleyez/plugins/upnp.py +65 -47
souleyez/plugins/upnp_abuse.py +73 -55
souleyez/plugins/vnc_access.py +129 -42
souleyez/plugins/vnc_brute.py +109 -38
souleyez/plugins/web_login_test.py +417 -0
souleyez/plugins/whois.py +77 -58
souleyez/plugins/wpscan.py +219 -69
souleyez/reporting/__init__.py +2 -1
souleyez/reporting/attack_chain.py +411 -346
souleyez/reporting/charts.py +436 -501
souleyez/reporting/compliance_mappings.py +334 -201
souleyez/reporting/detection_report.py +126 -125
souleyez/reporting/formatters.py +828 -591
souleyez/reporting/generator.py +386 -302
souleyez/reporting/metrics.py +72 -75
souleyez/scanner.py +35 -29
souleyez/security/__init__.py +37 -11
souleyez/security/scope_validator.py +175 -106
souleyez/security/validation.py +237 -149
souleyez/security.py +22 -6
souleyez/storage/credentials.py +247 -186
souleyez/storage/crypto.py +296 -129
souleyez/storage/database.py +73 -50
souleyez/storage/db.py +58 -36
souleyez/storage/deliverable_evidence.py +177 -128
souleyez/storage/deliverable_exporter.py +282 -246
souleyez/storage/deliverable_templates.py +134 -116
souleyez/storage/deliverables.py +135 -130
souleyez/storage/engagements.py +109 -56
souleyez/storage/evidence.py +181 -152
souleyez/storage/execution_log.py +31 -17
souleyez/storage/exploit_attempts.py +93 -57
souleyez/storage/exploits.py +67 -36
souleyez/storage/findings.py +48 -61
souleyez/storage/hosts.py +176 -144
souleyez/storage/migrate_to_engagements.py +43 -19
souleyez/storage/migrations/_001_add_credential_enhancements.py +22 -12
souleyez/storage/migrations/_002_add_status_tracking.py +10 -7
souleyez/storage/migrations/_003_add_execution_log.py +14 -8
souleyez/storage/migrations/_005_screenshots.py +13 -5
souleyez/storage/migrations/_006_deliverables.py +13 -5
souleyez/storage/migrations/_007_deliverable_templates.py +12 -7
souleyez/storage/migrations/_008_add_nuclei_table.py +10 -4
souleyez/storage/migrations/_010_evidence_linking.py +17 -10
souleyez/storage/migrations/_011_timeline_tracking.py +20 -13
souleyez/storage/migrations/_012_team_collaboration.py +34 -21
souleyez/storage/migrations/_013_add_host_tags.py +12 -6
souleyez/storage/migrations/_014_exploit_attempts.py +22 -10
souleyez/storage/migrations/_015_add_mac_os_fields.py +15 -7
souleyez/storage/migrations/_016_add_domain_field.py +10 -4
souleyez/storage/migrations/_017_msf_sessions.py +16 -8
souleyez/storage/migrations/_018_add_osint_target.py +10 -6
souleyez/storage/migrations/_019_add_engagement_type.py +10 -6
souleyez/storage/migrations/_020_add_rbac.py +36 -15
souleyez/storage/migrations/_021_wazuh_integration.py +20 -8
souleyez/storage/migrations/_022_wazuh_indexer_columns.py +6 -4
souleyez/storage/migrations/_023_fix_detection_results_fk.py +16 -6
souleyez/storage/migrations/_024_wazuh_vulnerabilities.py +26 -10
souleyez/storage/migrations/_025_multi_siem_support.py +3 -5
souleyez/storage/migrations/_026_add_engagement_scope.py +31 -12
souleyez/storage/migrations/_027_multi_siem_persistence.py +32 -15
souleyez/storage/migrations/__init__.py +26 -26
souleyez/storage/migrations/migration_manager.py +19 -19
souleyez/storage/msf_sessions.py +100 -65
souleyez/storage/osint.py +17 -24
souleyez/storage/recommendation_engine.py +269 -235
souleyez/storage/screenshots.py +33 -32
souleyez/storage/smb_shares.py +136 -92
souleyez/storage/sqlmap_data.py +183 -128
souleyez/storage/team_collaboration.py +135 -141
souleyez/storage/timeline_tracker.py +122 -94
souleyez/storage/wazuh_vulns.py +64 -66
souleyez/storage/web_paths.py +33 -37
souleyez/testing/credential_tester.py +221 -205
souleyez/ui/__init__.py +1 -1
souleyez/ui/ai_quotes.py +12 -12
souleyez/ui/attack_surface.py +2439 -1516
souleyez/ui/chain_rules_view.py +914 -382
souleyez/ui/correlation_view.py +312 -230
souleyez/ui/dashboard.py +2382 -1130
souleyez/ui/deliverables_view.py +148 -62
souleyez/ui/design_system.py +13 -13
souleyez/ui/errors.py +49 -49
souleyez/ui/evidence_linking_view.py +284 -179
souleyez/ui/evidence_vault.py +393 -285
souleyez/ui/exploit_suggestions_view.py +555 -349
souleyez/ui/export_view.py +100 -66
souleyez/ui/gap_analysis_view.py +315 -171
souleyez/ui/help_system.py +105 -97
souleyez/ui/intelligence_view.py +436 -293
souleyez/ui/interactive.py +23034 -10679
souleyez/ui/interactive_selector.py +75 -68
souleyez/ui/log_formatter.py +47 -39
souleyez/ui/menu_components.py +22 -13
souleyez/ui/msf_auxiliary_menu.py +184 -133
souleyez/ui/pending_chains_view.py +336 -172
souleyez/ui/progress_indicators.py +5 -3
souleyez/ui/recommendations_view.py +195 -137
souleyez/ui/rule_builder.py +343 -225
souleyez/ui/setup_wizard.py +678 -284
souleyez/ui/shortcuts.py +217 -165
souleyez/ui/splunk_gap_analysis_view.py +452 -270
souleyez/ui/splunk_vulns_view.py +139 -86
souleyez/ui/team_dashboard.py +498 -335
souleyez/ui/template_selector.py +196 -105
souleyez/ui/terminal.py +6 -6
souleyez/ui/timeline_view.py +198 -127
souleyez/ui/tool_setup.py +264 -164
souleyez/ui/tutorial.py +202 -72
souleyez/ui/tutorial_state.py +40 -40
souleyez/ui/wazuh_vulns_view.py +235 -141
souleyez/ui/wordlist_browser.py +260 -107
souleyez/ui.py +464 -312
souleyez/utils/tool_checker.py +427 -367
souleyez/utils.py +33 -29
souleyez/wordlists.py +134 -167
{souleyez-2.43.29.dist-info → souleyez-3.0.0.dist-info}/METADATA +2 -2
souleyez-3.0.0.dist-info/RECORD +443 -0
{souleyez-2.43.29.dist-info → souleyez-3.0.0.dist-info}/WHEEL +1 -1
souleyez-2.43.29.dist-info/RECORD +0 -379
{souleyez-2.43.29.dist-info → souleyez-3.0.0.dist-info}/entry_points.txt +0 -0
{souleyez-2.43.29.dist-info → souleyez-3.0.0.dist-info}/licenses/LICENSE +0 -0
{souleyez-2.43.29.dist-info → souleyez-3.0.0.dist-info}/top_level.txt +0 -0

souleyez/engine/background.py CHANGED Viewed

@@ -14,25 +14,35 @@ Design notes:
 """
 from __future__ import annotations
-import os
-import sys
+import fcntl
+import inspect
 import json
-import time
-import signal
-import tempfile
+import os
 import shutil
+import signal
 import subprocess
+import sys
+import tempfile
 import threading
-import inspect
+import time
 import traceback
-import fcntl
-from typing import List, Dict, Optional, Any
+from datetime import datetime, timezone
+from typing import Any, Dict, List, Optional
 from souleyez.log_config import get_logger
-from .log_sanitizer import LogSanitizer
 from .job_status import (
-    STATUS_QUEUED, STATUS_RUNNING, STATUS_DONE, STATUS_NO_RESULTS,
-    STATUS_WARNING, STATUS_ERROR, STATUS_KILLED, is_chainable
+    STATUS_DONE,
+    STATUS_ERROR,
+    STATUS_KILLED,
+    STATUS_NO_RESULTS,
+    STATUS_QUEUED,
+    STATUS_RUNNING,
+    STATUS_WARNING,
+    is_chainable,
 )
+from .log_sanitizer import LogSanitizer
 logger = get_logger(__name__)
@@ -43,15 +53,102 @@ LOGS_DIR = os.path.join(DATA_DIR, "logs")
 JOBS_FILE = os.path.join(JOBS_DIR, "jobs.json")
 WORKER_LOG = os.path.join(LOGS_DIR, "worker.log")
 HEARTBEAT_FILE = os.path.join(JOBS_DIR, ".worker_heartbeat")
+JOBS_LOCK_FILE = os.path.join(JOBS_DIR, ".jobs.lock")  # Cross-process file lock
 JOB_TIMEOUT_SECONDS = 3600  # 1 hour (changed from 300s/5min)
 HEARTBEAT_INTERVAL = 10  # seconds between heartbeat writes
 HEARTBEAT_STALE_THRESHOLD = 30  # seconds before heartbeat considered stale
 JOB_HUNG_THRESHOLD = 300  # 5 minutes with no output = possibly hung
 JOBS_BACKUP_COUNT = 3  # Number of rotating backups to keep
+MAX_RETRIES = 2  # Maximum auto-retries for transient errors
+# Patterns indicating transient errors that should trigger auto-retry
+# These are network/timing issues that often succeed on retry
+TRANSIENT_ERROR_PATTERNS = [
+    "NetBIOSTimeout",
+    "connection timed out",
+    "Connection timed out",
+    "NETBIOS connection with the remote host timed out",
+    "Connection reset by peer",
+    "temporarily unavailable",
+    "Resource temporarily unavailable",
+    "SMBTimeout",
+    "timed out while waiting",
+]
 _lock = threading.RLock()  # Reentrant lock allows nested acquisition by same thread
+def _is_transient_error(log_content: str) -> bool:
+    """Check if log content indicates a transient error that should be retried."""
+    if not log_content:
+        return False
+    for pattern in TRANSIENT_ERROR_PATTERNS:
+        if pattern.lower() in log_content.lower():
+            return True
+    return False
+class _CrossProcessLock:
+    """
+    Cross-process file lock using fcntl.flock().
+    This ensures that only one process (UI or worker) can read/write
+    jobs.json at a time, preventing race conditions where one process
+    overwrites another's changes.
+    """
+    def __init__(self, lock_file: str, timeout: float = 10.0):
+        self.lock_file = lock_file
+        self.timeout = timeout
+        self._fd = None
+    def __enter__(self):
+        import errno
+        import fcntl
+        # Ensure lock file directory exists
+        os.makedirs(os.path.dirname(self.lock_file), exist_ok=True)
+        # Open lock file (create if doesn't exist)
+        self._fd = open(self.lock_file, "w")
+        # Try to acquire lock with timeout
+        start_time = time.time()
+        while True:
+            try:
+                fcntl.flock(self._fd.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
+                return self  # Lock acquired
+            except (IOError, OSError) as e:
+                if e.errno not in (errno.EWOULDBLOCK, errno.EAGAIN):
+                    raise
+                # Lock held by another process, wait and retry
+                if time.time() - start_time > self.timeout:
+                    raise TimeoutError(
+                        f"Could not acquire lock on {self.lock_file} within {self.timeout}s"
+                    )
+                time.sleep(0.05)  # 50ms backoff
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        import fcntl
+        if self._fd:
+            try:
+                fcntl.flock(self._fd.fileno(), fcntl.LOCK_UN)
+            except Exception:
+                pass
+            try:
+                self._fd.close()
+            except Exception:
+                pass
+            self._fd = None
+        return False  # Don't suppress exceptions
+def _jobs_lock():
+    """Get a cross-process lock for jobs.json access."""
+    return _CrossProcessLock(JOBS_LOCK_FILE)
 def _ensure_dirs():
     os.makedirs(JOBS_DIR, exist_ok=True)
     os.makedirs(LOGS_DIR, exist_ok=True)
@@ -102,11 +199,13 @@ def _recover_from_backup() -> List[Dict[str, Any]]:
             with open(backup_path, "r", encoding="utf-8") as fh:
                 jobs = json.load(fh)
             if isinstance(jobs, list):
-                _append_worker_log(f"recovered {len(jobs)} jobs from backup: {backup_path}")
-                logger.info("Jobs recovered from backup", extra={
-                    "backup_path": backup_path,
-                    "job_count": len(jobs)
-                })
+                _append_worker_log(
+                    f"recovered {len(jobs)} jobs from backup: {backup_path}"
+                )
+                logger.info(
+                    "Jobs recovered from backup",
+                    extra={"backup_path": backup_path, "job_count": len(jobs)},
+                )
                 return jobs
         except Exception as e:
             _append_worker_log(f"backup {backup_path} also corrupt: {e}")
@@ -115,19 +214,33 @@ def _recover_from_backup() -> List[Dict[str, Any]]:
 def _read_jobs() -> List[Dict[str, Any]]:
+    """
+    Read jobs from jobs.json with cross-process file locking.
+    The file lock ensures we don't read while another process is writing,
+    preventing partially-written files from being read.
+    """
     _ensure_dirs()
     if not os.path.exists(JOBS_FILE):
         return []
     try:
-        with open(JOBS_FILE, "r", encoding="utf-8") as fh:
-            return json.load(fh)
+        with _jobs_lock():
+            with open(JOBS_FILE, "r", encoding="utf-8") as fh:
+                return json.load(fh)
+    except TimeoutError:
+        # Lock acquisition timed out - log and try without lock
+        _append_worker_log("jobs.json lock timeout on read, reading anyway")
+        try:
+            with open(JOBS_FILE, "r", encoding="utf-8") as fh:
+                return json.load(fh)
+        except Exception:
+            return []
     except Exception as e:
         # Log corruption event
         _append_worker_log(f"jobs.json corrupt: {e}")
-        logger.error("Jobs file corrupted", extra={
-            "error": str(e),
-            "jobs_file": JOBS_FILE
-        })
+        logger.error(
+            "Jobs file corrupted", extra={"error": str(e), "jobs_file": JOBS_FILE}
+        )
         # Try to recover from backup
         recovered_jobs = _recover_from_backup()
@@ -143,7 +256,7 @@ def _read_jobs() -> List[Dict[str, Any]]:
         # If we recovered jobs, write them back
         if recovered_jobs:
             try:
-                _write_jobs(recovered_jobs)
+                _write_jobs_unlocked(recovered_jobs)
                 _append_worker_log(f"restored {len(recovered_jobs)} jobs from backup")
             except Exception as write_err:
                 _append_worker_log(f"failed to restore jobs: {write_err}")
@@ -151,7 +264,19 @@ def _read_jobs() -> List[Dict[str, Any]]:
         return recovered_jobs
-def _write_jobs(jobs: List[Dict[str, Any]]):
+def _read_jobs_unlocked() -> List[Dict[str, Any]]:
+    """Read jobs without acquiring file lock (for internal use when lock already held)."""
+    if not os.path.exists(JOBS_FILE):
+        return []
+    try:
+        with open(JOBS_FILE, "r", encoding="utf-8") as fh:
+            return json.load(fh)
+    except Exception:
+        return []
+def _write_jobs_unlocked(jobs: List[Dict[str, Any]]):
+    """Write jobs without acquiring file lock (for internal use when lock already held)."""
     _ensure_dirs()
     # Rotate backups before writing (keeps last 3 good copies)
@@ -167,11 +292,29 @@ def _write_jobs(jobs: List[Dict[str, Any]]):
     finally:
         if os.path.exists(tmp.name):
             try:
-                os.remove(tmp.name)
+                os.unlink(tmp.name)
             except Exception:
                 pass
+def _write_jobs(jobs: List[Dict[str, Any]]):
+    """
+    Write jobs to jobs.json with cross-process file locking.
+    The file lock ensures we don't write while another process is reading
+    or writing, preventing race conditions.
+    """
+    _ensure_dirs()
+    try:
+        with _jobs_lock():
+            _write_jobs_unlocked(jobs)
+    except TimeoutError:
+        # Lock acquisition timed out - log and write anyway (better than losing data)
+        _append_worker_log("jobs.json lock timeout on write, writing anyway")
+        _write_jobs_unlocked(jobs)
 def _append_worker_log(msg: str):
     _ensure_dirs()
     ts = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
@@ -184,7 +327,7 @@ def _update_heartbeat():
     """Write current timestamp to heartbeat file for health monitoring."""
     _ensure_dirs()
     try:
-        with open(HEARTBEAT_FILE, 'w') as fh:
+        with open(HEARTBEAT_FILE, "w") as fh:
             fh.write(str(time.time()))
     except Exception:
         pass  # Non-critical, don't crash worker
@@ -199,7 +342,7 @@ def get_heartbeat_age() -> Optional[float]:
     """
     try:
         if os.path.exists(HEARTBEAT_FILE):
-            with open(HEARTBEAT_FILE, 'r') as fh:
+            with open(HEARTBEAT_FILE, "r") as fh:
                 last_beat = float(fh.read().strip())
             return time.time() - last_beat
         return None
@@ -227,13 +370,13 @@ def _get_process_start_time(pid: int) -> Optional[float]:
         if not os.path.exists(stat_path):
             return None
-        with open(stat_path, 'r') as f:
+        with open(stat_path, "r") as f:
             stat = f.read()
         # Parse stat file - field 22 is starttime (in clock ticks since boot)
         # Format: pid (comm) state ppid pgrp session tty_nr ... starttime ...
         # Need to handle comm field which may contain spaces/parentheses
-        parts = stat.rsplit(')', 1)
+        parts = stat.rsplit(")", 1)
         if len(parts) < 2:
             return None
@@ -241,19 +384,21 @@ def _get_process_start_time(pid: int) -> Optional[float]:
         if len(fields) < 20:
             return None
-        starttime_ticks = int(fields[19])  # 0-indexed, field 22 is at index 19 after comm
+        starttime_ticks = int(
+            fields[19]
+        )  # 0-indexed, field 22 is at index 19 after comm
         # Convert to timestamp using system boot time and clock ticks per second
-        with open('/proc/stat', 'r') as f:
+        with open("/proc/stat", "r") as f:
             for line in f:
-                if line.startswith('btime'):
+                if line.startswith("btime"):
                     boot_time = int(line.split()[1])
                     break
             else:
                 return None
         # Get clock ticks per second (usually 100)
-        ticks_per_sec = os.sysconf(os.sysconf_names['SC_CLK_TCK'])
+        ticks_per_sec = os.sysconf(os.sysconf_names["SC_CLK_TCK"])
         return boot_time + (starttime_ticks / ticks_per_sec)
     except Exception:
@@ -275,14 +420,14 @@ def _next_job_id(jobs: List[Dict[str, Any]]) -> int:
         _ensure_dirs()
         # Use a separate lock file to allow atomic read-modify-write
-        with open(lock_file, 'w') as lock_fh:
+        with open(lock_file, "w") as lock_fh:
             # Acquire exclusive lock (blocks until available)
             fcntl.flock(lock_fh.fileno(), fcntl.LOCK_EX)
             try:
                 # Read current counter
                 if os.path.exists(counter_file):
-                    with open(counter_file, 'r') as f:
+                    with open(counter_file, "r") as f:
                         next_id = int(f.read().strip())
                 else:
                     # Initialize from existing jobs
@@ -296,8 +441,8 @@ def _next_job_id(jobs: List[Dict[str, Any]]) -> int:
                     next_id = maxid + 1
                 # Write incremented counter atomically
-                tmp_file = counter_file + '.tmp'
-                with open(tmp_file, 'w') as f:
+                tmp_file = counter_file + ".tmp"
+                with open(tmp_file, "w") as f:
                     f.write(str(next_id + 1))
                     f.flush()
                     os.fsync(f.fileno())
@@ -321,133 +466,235 @@ def _next_job_id(jobs: List[Dict[str, Any]]) -> int:
         return maxid + 1
-def enqueue_job(tool: str, target: str, args: List[str], label: str = "", engagement_id: int = None, metadata: Dict[str, Any] = None, parent_id: int = None, reason: str = None, rule_id: int = None, skip_scope_check: bool = False) -> int:
-    with _lock:
-        jobs = _read_jobs()
-        jid = _next_job_id(jobs)
-        now = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
+def enqueue_job(
+    tool: str,
+    target: str,
+    args: List[str],
+    label: str = "",
+    engagement_id: int = None,
+    metadata: Dict[str, Any] = None,
+    parent_id: int = None,
+    reason: str = None,
+    rule_id: int = None,
+    skip_scope_check: bool = False,
+) -> int:
+    # Prepare data outside lock to minimize lock hold time
+    now = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
-        # Get current engagement if not specified
-        if engagement_id is None:
-            try:
-                from souleyez.storage.engagements import EngagementManager
-                em = EngagementManager()
-                current = em.get_current()
-                engagement_id = current['id'] if current else None
-            except BaseException:
-                engagement_id = None
-        # Merge parent_id, reason, and rule_id into metadata
-        job_metadata = metadata or {}
-        # Scope validation - check if target is within engagement scope
-        if not skip_scope_check and engagement_id:
-            try:
-                from souleyez.security.scope_validator import ScopeValidator, ScopeViolationError
-                validator = ScopeValidator(engagement_id)
-                result = validator.validate_target(target)
-                enforcement = validator.get_enforcement_mode()
-                if not result.is_in_scope and validator.has_scope_defined():
-                    if enforcement == 'block':
-                        validator.log_validation(target, result, 'blocked', job_id=jid)
-                        raise ScopeViolationError(
-                            f"Target '{target}' is out of scope. {result.reason}"
+    # Get current engagement if not specified
+    if engagement_id is None:
+        try:
+            from souleyez.storage.engagements import EngagementManager
+            em = EngagementManager()
+            current = em.get_current()
+            engagement_id = current["id"] if current else None
+        except BaseException:
+            engagement_id = None
+    # Merge parent_id, reason, and rule_id into metadata
+    job_metadata = metadata or {}
+    if parent_id is not None:
+        job_metadata["parent_id"] = parent_id
+    if reason:
+        job_metadata["reason"] = reason
+    if rule_id is not None:
+        job_metadata["rule_id"] = rule_id
+    # Atomic read-modify-write with both thread lock and cross-process file lock
+    with _lock:  # Thread safety within this process
+        try:
+            with _jobs_lock():  # Cross-process safety
+                _ensure_dirs()
+                jobs = _read_jobs_unlocked()
+                jid = _next_job_id(jobs)
+                # Scope validation - check if target is within engagement scope
+                # Done inside lock because it uses jid for logging
+                if not skip_scope_check and engagement_id:
+                    try:
+                        from souleyez.security.scope_validator import (
+                            ScopeValidator,
+                            ScopeViolationError,
                         )
-                    elif enforcement == 'warn':
-                        validator.log_validation(target, result, 'warned', job_id=jid)
-                        if 'warnings' not in job_metadata:
-                            job_metadata['warnings'] = []
-                        job_metadata['warnings'].append(
-                            f"SCOPE WARNING: {target} may be out of scope. {result.reason}"
+                        validator = ScopeValidator(engagement_id)
+                        result = validator.validate_target(target)
+                        enforcement = validator.get_enforcement_mode()
+                        if not result.is_in_scope and validator.has_scope_defined():
+                            if enforcement == "block":
+                                validator.log_validation(
+                                    target, result, "blocked", job_id=jid
+                                )
+                                raise ScopeViolationError(
+                                    f"Target '{target}' is out of scope. {result.reason}"
+                                )
+                            elif enforcement == "warn":
+                                validator.log_validation(
+                                    target, result, "warned", job_id=jid
+                                )
+                                if "warnings" not in job_metadata:
+                                    job_metadata["warnings"] = []
+                                job_metadata["warnings"].append(
+                                    f"SCOPE WARNING: {target} may be out of scope. {result.reason}"
+                                )
+                                logger.warning(
+                                    "Out-of-scope target allowed (warn mode)",
+                                    extra={
+                                        "target": target,
+                                        "engagement_id": engagement_id,
+                                        "reason": result.reason,
+                                    },
+                                )
+                        else:
+                            validator.log_validation(
+                                target, result, "allowed", job_id=jid
+                            )
+                    except ScopeViolationError:
+                        raise  # Re-raise scope violations
+                    except Exception as e:
+                        # Don't block jobs if scope validation fails unexpectedly
+                        logger.warning(
+                            "Scope validation error (allowing job)",
+                            extra={"target": target, "error": str(e)},
                         )
-                        logger.warning("Out-of-scope target allowed (warn mode)", extra={
-                            "target": target,
-                            "engagement_id": engagement_id,
-                            "reason": result.reason
-                        })
-                else:
-                    validator.log_validation(target, result, 'allowed', job_id=jid)
-            except ScopeViolationError:
-                raise  # Re-raise scope violations
-            except Exception as e:
-                # Don't block jobs if scope validation fails unexpectedly
-                logger.warning("Scope validation error (allowing job)", extra={
+                job = {
+                    "id": jid,
+                    "tool": tool,
                     "target": target,
-                    "error": str(e)
-                })
-        if parent_id is not None:
-            job_metadata['parent_id'] = parent_id
-        if reason:
-            job_metadata['reason'] = reason
-        if rule_id is not None:
-            job_metadata['rule_id'] = rule_id
-        job = {
-            "id": jid,
+                    "args": args or [],
+                    "label": label or "",
+                    "status": STATUS_QUEUED,
+                    "created_at": now,
+                    "started_at": None,
+                    "finished_at": None,
+                    "result_scan_id": None,
+                    "error": None,
+                    "log": os.path.join(JOBS_DIR, f"{jid}.log"),
+                    "pid": None,
+                    "engagement_id": engagement_id,
+                    "chainable": False,
+                    "chained": False,
+                    "chained_job_ids": [],
+                    "chain_error": None,
+                    "metadata": job_metadata,
+                    "parent_id": parent_id,  # Top-level field for easier querying
+                    "rule_id": rule_id,  # Rule that triggered this job (if auto-chained)
+                }
+                jobs.append(job)
+                _write_jobs_unlocked(jobs)
+        except TimeoutError:
+            # Lock acquisition timed out - fall back to non-locked operation
+            _append_worker_log("jobs.json lock timeout in enqueue_job, using fallback")
+            jobs = _read_jobs()
+            jid = _next_job_id(jobs)
+            # Scope validation fallback
+            if not skip_scope_check and engagement_id:
+                try:
+                    from souleyez.security.scope_validator import (
+                        ScopeValidator,
+                        ScopeViolationError,
+                    )
+                    validator = ScopeValidator(engagement_id)
+                    result = validator.validate_target(target)
+                    enforcement = validator.get_enforcement_mode()
+                    if not result.is_in_scope and validator.has_scope_defined():
+                        if enforcement == "block":
+                            validator.log_validation(
+                                target, result, "blocked", job_id=jid
+                            )
+                            raise ScopeViolationError(
+                                f"Target '{target}' is out of scope. {result.reason}"
+                            )
+                        elif enforcement == "warn":
+                            validator.log_validation(
+                                target, result, "warned", job_id=jid
+                            )
+                            if "warnings" not in job_metadata:
+                                job_metadata["warnings"] = []
+                            job_metadata["warnings"].append(
+                                f"SCOPE WARNING: {target} may be out of scope. {result.reason}"
+                            )
+                    else:
+                        validator.log_validation(target, result, "allowed", job_id=jid)
+                except ScopeViolationError:
+                    raise
+                except Exception:
+                    pass
+            job = {
+                "id": jid,
+                "tool": tool,
+                "target": target,
+                "args": args or [],
+                "label": label or "",
+                "status": STATUS_QUEUED,
+                "created_at": now,
+                "started_at": None,
+                "finished_at": None,
+                "result_scan_id": None,
+                "error": None,
+                "log": os.path.join(JOBS_DIR, f"{jid}.log"),
+                "pid": None,
+                "engagement_id": engagement_id,
+                "chainable": False,
+                "chained": False,
+                "chained_job_ids": [],
+                "chain_error": None,
+                "metadata": job_metadata,
+                "parent_id": parent_id,
+                "rule_id": rule_id,
+            }
+            jobs.append(job)
+            _write_jobs(jobs)
+    logger.info(
+        "Job enqueued",
+        extra={
+            "event_type": "job_enqueued",
+            "job_id": jid,
             "tool": tool,
             "target": target,
-            "args": args or [],
-            "label": label or "",
-            "status": STATUS_QUEUED,
-            "created_at": now,
-            "started_at": None,
-            "finished_at": None,
-            "result_scan_id": None,
-            "error": None,
-            "log": os.path.join(JOBS_DIR, f"{jid}.log"),
-            "pid": None,
             "engagement_id": engagement_id,
-            "chainable": False,
-            "chained": False,
-            "chained_job_ids": [],
-            "chain_error": None,
-            "metadata": job_metadata,
-            "parent_id": parent_id,  # Top-level field for easier querying
-            "rule_id": rule_id  # Rule that triggered this job (if auto-chained)
-        }
-        jobs.append(job)
-        _write_jobs(jobs)
-    logger.info("Job enqueued", extra={
-        "event_type": "job_enqueued",
-        "job_id": jid,
-        "tool": tool,
-        "target": target,
-        "engagement_id": engagement_id,
-        "label": label
-    })
+            "label": label,
+        },
+    )
     _append_worker_log(f"enqueued job {jid}: {tool} {target}")
     return jid
 def list_jobs(limit: int = 100) -> List[Dict[str, Any]]:
     jobs = _read_jobs()
-    # Sort by job ID ascending (oldest/lowest ID first)
-    return sorted(jobs, key=lambda x: x.get("id", 0), reverse=False)[:limit]
+    # Sort by job ID descending (newest first) so limit cuts old jobs, not new ones
+    return sorted(jobs, key=lambda x: x.get("id", 0), reverse=True)[:limit]
 def get_active_jobs() -> List[Dict[str, Any]]:
     """Get all running/pending/queued jobs without limit.
     Returns jobs sorted with running jobs first, then by ID descending.
     """
     jobs = _read_jobs()
-    active = [j for j in jobs if j.get('status') in ('pending', 'running', 'queued')]
+    active = [j for j in jobs if j.get("status") in ("pending", "running", "queued")]
     # Sort: running jobs first, then by ID descending (newest first)
     def sort_key(j):
-        status = j.get('status', '')
-        status_priority = 0 if status == 'running' else 1
-        job_id = j.get('id', 0)
+        status = j.get("status", "")
+        status_priority = 0 if status == "running" else 1
+        job_id = j.get("id", 0)
         return (status_priority, -job_id)
     return sorted(active, key=sort_key)
 def get_all_jobs() -> List[Dict[str, Any]]:
     """Get ALL jobs without any limit.
     Returns jobs sorted by ID descending (newest first).
     """
     jobs = _read_jobs()
@@ -476,7 +723,7 @@ def kill_job(jid: int) -> bool:
     if not job:
         return False
-    status = job.get('status')
+    status = job.get("status")
     now = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
     # Handle queued jobs - just mark as killed
@@ -493,7 +740,7 @@ def kill_job(jid: int) -> bool:
     # Handle running jobs - send signal
     if status == STATUS_RUNNING:
-        pid = job.get('pid')
+        pid = job.get("pid")
         if not pid:
             _update_job(jid, status=STATUS_KILLED, finished_at=now)
             return True
@@ -506,7 +753,8 @@ def kill_job(jid: int) -> bool:
                 pgid = os.getpgid(pid)
             except ProcessLookupError:
                 # Process already dead
-                _update_job(jid, status="killed", finished_at=now, pid=None)
+                _update_job(jid, status=STATUS_KILLED, finished_at=now, pid=None)
+                _append_worker_log(f"job {jid}: process already dead, marked as killed")
                 return True
             # Kill entire process group (parent + all children)
@@ -515,10 +763,15 @@ def kill_job(jid: int) -> bool:
                 _append_worker_log(f"job {jid}: sent SIGTERM to process group {pgid}")
             except ProcessLookupError:
                 # Process group already dead
-                _update_job(jid, status="killed", finished_at=now, pid=None)
+                _update_job(jid, status=STATUS_KILLED, finished_at=now, pid=None)
+                _append_worker_log(
+                    f"job {jid}: process group already dead, marked as killed"
+                )
                 return True
             except PermissionError:
-                _append_worker_log(f"job {jid}: permission denied to kill process group {pgid}")
+                _append_worker_log(
+                    f"job {jid}: permission denied to kill process group {pgid}"
+                )
                 return False
             # Wait briefly for graceful termination
@@ -533,11 +786,13 @@ def kill_job(jid: int) -> bool:
                 pass  # Already dead, good
             # Update job status
-            _update_job(jid, status="killed", finished_at=now, pid=None)
+            _update_job(jid, status=STATUS_KILLED, finished_at=now, pid=None)
+            _append_worker_log(f"job {jid}: killed successfully")
             return True
         except ProcessLookupError:
             # Process already dead
-            _update_job(jid, status="killed", finished_at=now, pid=None)
+            _update_job(jid, status=STATUS_KILLED, finished_at=now, pid=None)
+            _append_worker_log(f"job {jid}: process already dead, marked as killed")
             return True
         except PermissionError:
             _append_worker_log(f"job {jid}: permission denied to kill PID {pid}")
@@ -547,6 +802,7 @@ def kill_job(jid: int) -> bool:
             return False
     # Job is in some other state (done, killed, etc.)
+    _append_worker_log(f"job {jid}: cannot kill - status is '{status}'")
     return False
@@ -554,32 +810,63 @@ def delete_job(jid: int) -> bool:
     """
     Delete a job from the queue (completed jobs only).
+    Uses atomic read-modify-write with cross-process file locking.
     Args:
         jid: Job ID to delete
     Returns:
         True if job was deleted, False if not found or still running
     """
-    job = get_job(jid)
-    if not job:
-        return False
+    with _lock:  # Thread safety within this process
+        try:
+            with _jobs_lock():  # Cross-process safety
+                jobs = _read_jobs_unlocked()
+                job = None
+                for j in jobs:
+                    if j.get("id") == jid:
+                        job = j
+                        break
-    # Don't delete running or pending jobs
-    if job.get('status') in ('running', 'pending'):
-        return False
+                if not job:
+                    return False
-    with _lock:
-        jobs = _read_jobs()
-        jobs = [j for j in jobs if j.get("id") != jid]
-        _write_jobs(jobs)
+                # Don't delete running or pending jobs
+                if job.get("status") in ("running", "pending"):
+                    return False
-    return True
+                jobs = [j for j in jobs if j.get("id") != jid]
+                _write_jobs_unlocked(jobs)
+                return True
+        except TimeoutError:
+            # Fall back to non-locked operation
+            _append_worker_log(
+                f"jobs.json lock timeout in delete_job for {jid}, using fallback"
+            )
+            jobs = _read_jobs()
+            job = None
+            for j in jobs:
+                if j.get("id") == jid:
+                    job = j
+                    break
+            if not job:
+                return False
+            if job.get("status") in ("running", "pending"):
+                return False
+            jobs = [j for j in jobs if j.get("id") != jid]
+            _write_jobs(jobs)
+            return True
 def purge_jobs(status_filter: List[str] = None, engagement_id: int = None) -> int:
     """
     Purge multiple jobs at once based on filters.
+    Uses atomic read-modify-write with cross-process file locking.
     Args:
         status_filter: List of statuses to purge (e.g., ['done', 'error', 'killed'])
                       If None, purges all non-running jobs
@@ -589,36 +876,46 @@ def purge_jobs(status_filter: List[str] = None, engagement_id: int = None) -> in
         Number of jobs purged
     """
     if status_filter is None:
-        status_filter = ['done', 'error', 'killed']
-    with _lock:
-        jobs = _read_jobs()
-        original_count = len(jobs)
+        status_filter = ["done", "error", "killed"]
-        # Filter out jobs to keep
+    def _filter_jobs(jobs):
+        """Filter out jobs to keep based on criteria."""
         kept_jobs = []
         for j in jobs:
             # Keep running/pending jobs always
-            if j.get('status') in ('running', 'pending'):
+            if j.get("status") in ("running", "pending"):
                 kept_jobs.append(j)
                 continue
             # Keep if status doesn't match filter
-            if j.get('status') not in status_filter:
+            if j.get("status") not in status_filter:
                 kept_jobs.append(j)
                 continue
             # Keep if engagement_id specified and doesn't match
-            if engagement_id is not None and j.get('engagement_id') != engagement_id:
+            if engagement_id is not None and j.get("engagement_id") != engagement_id:
                 kept_jobs.append(j)
                 continue
             # Otherwise, purge this job (don't add to kept_jobs)
+        return kept_jobs
-        _write_jobs(kept_jobs)
-        purged_count = original_count - len(kept_jobs)
-    return purged_count
+    with _lock:  # Thread safety within this process
+        try:
+            with _jobs_lock():  # Cross-process safety
+                jobs = _read_jobs_unlocked()
+                original_count = len(jobs)
+                kept_jobs = _filter_jobs(jobs)
+                _write_jobs_unlocked(kept_jobs)
+                return original_count - len(kept_jobs)
+        except TimeoutError:
+            # Fall back to non-locked operation
+            _append_worker_log("jobs.json lock timeout in purge_jobs, using fallback")
+            jobs = _read_jobs()
+            original_count = len(jobs)
+            kept_jobs = _filter_jobs(jobs)
+            _write_jobs(kept_jobs)
+            return original_count - len(kept_jobs)
 def purge_all_jobs() -> int:
@@ -629,12 +926,15 @@ def purge_all_jobs() -> int:
     Returns:
         Number of jobs purged
     """
-    return purge_jobs(status_filter=['done', 'error', 'killed'])
+    return purge_jobs(status_filter=["done", "error", "killed"])
 def _update_job(jid: int, respect_killed: bool = True, **fields):
     """
-    Update job fields atomically.
+    Update job fields atomically with cross-process locking.
+    Uses both threading lock (for same-process safety) and file lock
+    (for cross-process safety) to ensure atomic read-modify-write.
     Args:
         jid: Job ID to update
@@ -642,29 +942,63 @@ def _update_job(jid: int, respect_killed: bool = True, **fields):
                        This prevents race condition where job is killed while completing.
         **fields: Fields to update
     """
-    with _lock:
-        jobs = _read_jobs()
-        changed = False
-        for j in jobs:
-            if j.get("id") == jid:
-                # Race condition protection: don't change status of killed jobs
-                if respect_killed and j.get("status") == STATUS_KILLED and "status" in fields:
-                    # Job was killed - don't overwrite status, but allow other updates
-                    fields_copy = dict(fields)
-                    del fields_copy["status"]
-                    if fields_copy:
-                        j.update(fields_copy)
-                        changed = True
-                    logger.debug("Skipped status update for killed job", extra={
-                        "job_id": jid,
-                        "attempted_status": fields.get("status")
-                    })
-                else:
+    with _lock:  # Thread safety within this process
+        try:
+            with _jobs_lock():  # Cross-process safety
+                # Read directly without going through _read_jobs (we already have lock)
+                _ensure_dirs()
+                jobs = []
+                if os.path.exists(JOBS_FILE):
+                    try:
+                        with open(JOBS_FILE, "r", encoding="utf-8") as fh:
+                            jobs = json.load(fh)
+                    except Exception:
+                        jobs = []
+                changed = False
+                for j in jobs:
+                    if j.get("id") == jid:
+                        # Race condition protection: don't change status of killed jobs
+                        if (
+                            respect_killed
+                            and j.get("status") == STATUS_KILLED
+                            and "status" in fields
+                        ):
+                            # Job was killed - don't overwrite status, but allow other updates
+                            fields_copy = dict(fields)
+                            del fields_copy["status"]
+                            if fields_copy:
+                                j.update(fields_copy)
+                                changed = True
+                            logger.debug(
+                                "Skipped status update for killed job",
+                                extra={
+                                    "job_id": jid,
+                                    "attempted_status": fields.get("status"),
+                                },
+                            )
+                        else:
+                            j.update(fields)
+                            changed = True
+                        break
+                if changed:
+                    # Write directly without going through _write_jobs (we already have lock)
+                    _write_jobs_unlocked(jobs)
+        except TimeoutError:
+            # Fall back to non-locked operation (better than failing)
+            _append_worker_log(
+                f"jobs.json lock timeout updating job {jid}, using fallback"
+            )
+            jobs = _read_jobs()
+            changed = False
+            for j in jobs:
+                if j.get("id") == jid:
                     j.update(fields)
                     changed = True
-                break
-        if changed:
-            _write_jobs(jobs)
+                    break
+            if changed:
+                _write_jobs(jobs)
 def _process_pending_chains():
@@ -685,83 +1019,139 @@ def _process_pending_chains():
     try:
         jobs = _read_jobs()
+        # Cleanup: Mark jobs stuck in "chaining in progress" for too long (> 5 min) as failed
+        CHAIN_TIMEOUT_SECONDS = 300  # 5 minutes
+        now = datetime.now(timezone.utc)
+        for j in jobs:
+            chaining_started = j.get("chaining_started_at")
+            if chaining_started and not j.get("chained", False):
+                try:
+                    started_at = datetime.fromisoformat(
+                        chaining_started.replace("Z", "+00:00")
+                    )
+                    if (now - started_at).total_seconds() > CHAIN_TIMEOUT_SECONDS:
+                        jid = j.get("id")
+                        _append_worker_log(
+                            f"job {jid}: chaining timed out after {CHAIN_TIMEOUT_SECONDS}s, marking as failed"
+                        )
+                        _update_job(
+                            jid,
+                            chained=True,
+                            chain_error="Chaining timed out",
+                            chaining_started_at=None,
+                        )
+                except Exception:
+                    pass  # Ignore parse errors
         # Find jobs ready for chaining
         # Include jobs with chainable statuses: done, no_results, warning
+        # Skip jobs that are currently being chained (chaining_started_at is set)
         chainable_jobs = [
-            j for j in jobs
-            if j.get('chainable', False) == True
-            and j.get('chained', False) == False
-            and is_chainable(j.get('status', ''))
+            j
+            for j in jobs
+            if j.get("chainable", False) == True
+            and j.get("chained", False) == False
+            and is_chainable(j.get("status", ""))
+            and not j.get("chaining_started_at")  # Skip if already being processed
         ]
         if not chainable_jobs:
             return 0  # Nothing to process
         # Sort by created_at (process oldest first - FIFO)
-        chainable_jobs.sort(key=lambda x: x.get('created_at', ''))
+        chainable_jobs.sort(key=lambda x: x.get("created_at", ""))
         job_to_chain = chainable_jobs[0]
-        jid = job_to_chain['id']
-        tool = job_to_chain.get('tool', 'unknown')
+        jid = job_to_chain["id"]
+        tool = job_to_chain.get("tool", "unknown")
         _append_worker_log(f"processing chains for job {jid} ({tool})")
-        logger.info("Processing chainable job", extra={
-            "job_id": jid,
-            "tool": tool,
-            "queue_depth": len(chainable_jobs)
-        })
+        logger.info(
+            "Processing chainable job",
+            extra={"job_id": jid, "tool": tool, "queue_depth": len(chainable_jobs)},
+        )
+        # Mark job as chaining in progress BEFORE starting (prevents retry loop if auto_chain hangs)
+        chaining_start = datetime.now(timezone.utc).isoformat()
+        _update_job(jid, chaining_started_at=chaining_start)
         try:
             from souleyez.core.tool_chaining import ToolChaining
             chaining = ToolChaining()
             if not chaining.is_enabled():
                 # Chaining was disabled after job marked as chainable
-                _update_job(jid, chained=True)
+                _update_job(jid, chained=True, chaining_started_at=None)
                 _append_worker_log(f"job {jid}: chaining now disabled, skipping")
                 return 1
             # Get parse results from job
-            parse_result = job_to_chain.get('parse_result', {})
+            parse_result = job_to_chain.get("parse_result", {})
             if not parse_result:
                 # No parse results - this shouldn't happen if job was properly marked chainable
                 # Log warning and store reason for debugging
-                logger.warning("Job marked chainable but has no parse_result", extra={
-                    "job_id": jid,
-                    "tool": tool,
-                    "status": job_to_chain.get('status')
-                })
-                _append_worker_log(f"job {jid}: WARNING - marked chainable but parse_result is empty/missing")
-                _update_job(jid, chained=True, chain_skip_reason="parse_result missing")
+                logger.warning(
+                    "Job marked chainable but has no parse_result",
+                    extra={
+                        "job_id": jid,
+                        "tool": tool,
+                        "status": job_to_chain.get("status"),
+                    },
+                )
+                _append_worker_log(
+                    f"job {jid}: WARNING - marked chainable but parse_result is empty/missing"
+                )
+                _update_job(
+                    jid,
+                    chained=True,
+                    chain_skip_reason="parse_result missing",
+                    chaining_started_at=None,
+                )
                 return 1
-            if 'error' in parse_result:
+            if "error" in parse_result:
                 # Parse had an error - log and skip
-                logger.warning("Job has parse error, skipping chaining", extra={
-                    "job_id": jid,
-                    "tool": tool,
-                    "parse_error": parse_result.get('error')
-                })
-                _append_worker_log(f"job {jid}: parse error '{parse_result.get('error')}', skipping chain")
-                _update_job(jid, chained=True, chain_skip_reason=f"parse_error: {parse_result.get('error')}")
+                logger.warning(
+                    "Job has parse error, skipping chaining",
+                    extra={
+                        "job_id": jid,
+                        "tool": tool,
+                        "parse_error": parse_result.get("error"),
+                    },
+                )
+                _append_worker_log(
+                    f"job {jid}: parse error '{parse_result.get('error')}', skipping chain"
+                )
+                _update_job(
+                    jid,
+                    chained=True,
+                    chain_skip_reason=f"parse_error: {parse_result.get('error')}",
+                    chaining_started_at=None,
+                )
                 return 1
             # Process auto-chaining
             chained_job_ids = chaining.auto_chain(job_to_chain, parse_result)
-            # Update job with chaining results
-            _update_job(jid,
+            # Update job with chaining results (clear chaining_started_at)
+            _update_job(
+                jid,
                 chained=True,
-                chained_job_ids=chained_job_ids or []
+                chained_job_ids=chained_job_ids or [],
+                chaining_started_at=None,
             )
             if chained_job_ids:
-                logger.info("Auto-chaining completed", extra={
-                    "job_id": jid,
-                    "chained_jobs": chained_job_ids,
-                    "count": len(chained_job_ids)
-                })
+                logger.info(
+                    "Auto-chaining completed",
+                    extra={
+                        "job_id": jid,
+                        "chained_jobs": chained_job_ids,
+                        "count": len(chained_job_ids),
+                    },
+                )
                 _append_worker_log(
                     f"job {jid}: created {len(chained_job_ids)} chained job(s): {chained_job_ids}"
                 )
@@ -773,29 +1163,33 @@ def _process_pending_chains():
         except Exception as chain_err:
             # Chaining failed - mark as chained with error to prevent retry loops
             error_msg = str(chain_err)
-            logger.error("Auto-chaining failed", extra={
-                "job_id": jid,
-                "error": error_msg,
-                "traceback": traceback.format_exc()
-            })
+            logger.error(
+                "Auto-chaining failed",
+                extra={
+                    "job_id": jid,
+                    "error": error_msg,
+                    "traceback": traceback.format_exc(),
+                },
+            )
             _append_worker_log(f"job {jid} chain error: {error_msg}")
-            _update_job(jid,
-                chained=True,
-                chain_error=error_msg
+            _update_job(
+                jid, chained=True, chain_error=error_msg, chaining_started_at=None
             )
             return 1  # Still count as processed (with error)
     except Exception as e:
         # Unexpected error in chain processor itself
-        logger.error("Chain processor error", extra={
-            "error": str(e),
-            "traceback": traceback.format_exc()
-        })
+        logger.error(
+            "Chain processor error",
+            extra={"error": str(e), "traceback": traceback.format_exc()},
+        )
         _append_worker_log(f"chain processor error: {e}")
         return 0
-def _try_run_plugin(tool: str, target: str, args: List[str], label: str, log_path: str, jid: int = None) -> tuple:
+def _try_run_plugin(
+    tool: str, target: str, args: List[str], label: str, log_path: str, jid: int = None
+) -> tuple:
     try:
         from .loader import discover_plugins
@@ -827,21 +1221,29 @@ def _try_run_plugin(tool: str, target: str, args: List[str], label: str, log_pat
                     fh.write(f"Target: {target}\n")
                     fh.write(f"Args: {args}\n")
                     fh.write(f"Label: {label}\n")
-                    fh.write(f"Started: {time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime())}\n\n")
+                    fh.write(
+                        f"Started: {time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime())}\n\n"
+                    )
                 # Build command specification
-                cmd_spec = build_command_method(target, args or [], label or "", log_path)
+                cmd_spec = build_command_method(
+                    target, args or [], label or "", log_path
+                )
                 if cmd_spec is None:
                     # build_command returned None - check if this is a deliberate abort
                     # (e.g., gobuster detected host redirect and aborted to avoid wasted scan)
                     if os.path.exists(log_path):
-                        with open(log_path, 'r', encoding='utf-8', errors='replace') as fh:
+                        with open(
+                            log_path, "r", encoding="utf-8", errors="replace"
+                        ) as fh:
                             log_content = fh.read()
-                            if 'HOST_REDIRECT_TARGET:' in log_content:
+                            if "HOST_REDIRECT_TARGET:" in log_content:
                                 # Plugin aborted due to host redirect - don't fall through to run()
                                 # Return success (0) so parser can set WARNING status and trigger retry
-                                _append_worker_log(f"job {jid}: gobuster aborted due to host redirect")
+                                _append_worker_log(
+                                    f"job {jid}: gobuster aborted due to host redirect"
+                                )
                                 return (True, 0)
                     # Otherwise check if plugin has run() method
@@ -854,7 +1256,9 @@ def _try_run_plugin(tool: str, target: str, args: List[str], label: str, log_pat
                         try:
                             if "log_path" in params:
-                                rc = run_method(target, args or [], label or "", log_path)
+                                rc = run_method(
+                                    target, args or [], label or "", log_path
+                                )
                             elif "label" in params:
                                 rc = run_method(target, args or [], label or "")
                             elif "args" in params:
@@ -863,30 +1267,38 @@ def _try_run_plugin(tool: str, target: str, args: List[str], label: str, log_pat
                                 rc = run_method(target)
                             return (True, rc if isinstance(rc, int) else 0)
                         except Exception as e:
-                            with open(log_path, "a", encoding="utf-8", errors="replace") as fh:
+                            with open(
+                                log_path, "a", encoding="utf-8", errors="replace"
+                            ) as fh:
                                 fh.write(f"\n=== PLUGIN RUN ERROR ===\n")
                                 fh.write(f"{type(e).__name__}: {e}\n")
                                 fh.write(f"\n{traceback.format_exc()}\n")
                             return (True, 1)
                     else:
                         # No run() method either - actual validation failure
-                        with open(log_path, "a", encoding="utf-8", errors="replace") as fh:
-                            fh.write("ERROR: Plugin validation failed (build_command returned None)\n")
+                        with open(
+                            log_path, "a", encoding="utf-8", errors="replace"
+                        ) as fh:
+                            fh.write(
+                                "ERROR: Plugin validation failed (build_command returned None)\n"
+                            )
                         return (True, 1)
                 # Execute using new subprocess handler with PID tracking
-                rc = _run_subprocess_with_spec(cmd_spec, log_path, jid=jid, plugin=plugin)
+                rc = _run_subprocess_with_spec(
+                    cmd_spec, log_path, jid=jid, plugin=plugin
+                )
                 # Completion message already written by _run_subprocess_with_spec
                 return (True, rc)
             except Exception as e:
                 with open(log_path, "a", encoding="utf-8", errors="replace") as fh:
                     fh.write("\n=== PLUGIN ERROR ===\n")
                     fh.write(f"{type(e).__name__}: {e}\n")
                     fh.write(f"\n{traceback.format_exc()}\n")
                 return (True, 1)
         # FALLBACK: Use old run() method for backward compatibility
         run_method = getattr(plugin, "run", None)
         if not callable(run_method):
@@ -900,24 +1312,36 @@ def _try_run_plugin(tool: str, target: str, args: List[str], label: str, log_pat
             fh.write(f"Target: {target}\n")
             fh.write(f"Args: {args}\n")
             fh.write(f"Label: {label}\n")
-            fh.write(f"Started: {time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime())}\n\n")
+            fh.write(
+                f"Started: {time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime())}\n\n"
+            )
         try:
-            if 'log_path' in params or len(params) >= 4:
+            if "log_path" in params or len(params) >= 4:
                 rc = run_method(target, args or [], label or "", log_path)
             else:
                 result = run_method(target, args or [], label or "")
                 if isinstance(result, tuple) and len(result) >= 2:
                     rc, old_logpath = result[0], result[1]
-                    if old_logpath and os.path.exists(old_logpath) and old_logpath != log_path:
+                    if (
+                        old_logpath
+                        and os.path.exists(old_logpath)
+                        and old_logpath != log_path
+                    ):
                         try:
-                            with open(old_logpath, "r", encoding="utf-8", errors="replace") as src:
-                                with open(log_path, "a", encoding="utf-8", errors="replace") as dst:
+                            with open(
+                                old_logpath, "r", encoding="utf-8", errors="replace"
+                            ) as src:
+                                with open(
+                                    log_path, "a", encoding="utf-8", errors="replace"
+                                ) as dst:
                                     dst.write("\n=== Plugin Output ===\n")
                                     dst.write(src.read())
                         except Exception as e:
-                            with open(log_path, "a", encoding="utf-8", errors="replace") as fh:
+                            with open(
+                                log_path, "a", encoding="utf-8", errors="replace"
+                            ) as fh:
                                 fh.write(f"\nWarning: Could not copy old log: {e}\n")
                 elif isinstance(result, int):
                     rc = result
@@ -941,7 +1365,9 @@ def _try_run_plugin(tool: str, target: str, args: List[str], label: str, log_pat
         return (False, 0)
-def _run_rpc_exploit(cmd_spec: Dict[str, Any], log_path: str, jid: int = None, plugin=None) -> int:
+def _run_rpc_exploit(
+    cmd_spec: Dict[str, Any], log_path: str, jid: int = None, plugin=None
+) -> int:
     """
     Execute MSF exploit via RPC mode (Pro feature).
@@ -964,10 +1390,10 @@ def _run_rpc_exploit(cmd_spec: Dict[str, Any], log_path: str, jid: int = None, p
     Returns:
         Exit code (0 = success with session, non-zero = failure)
     """
-    exploit_path = cmd_spec.get('exploit_path')
-    target = cmd_spec.get('target')
-    options = cmd_spec.get('options', {})
-    payload = cmd_spec.get('payload')
+    exploit_path = cmd_spec.get("exploit_path")
+    target = cmd_spec.get("target")
+    options = cmd_spec.get("options", {})
+    payload = cmd_spec.get("payload")
     _append_worker_log(f"job {jid}: RPC mode exploit - {exploit_path}")
@@ -975,6 +1401,7 @@ def _run_rpc_exploit(cmd_spec: Dict[str, Any], log_path: str, jid: int = None, p
     if plugin is None:
         try:
             from souleyez.plugins.msf_exploit import MsfExploitPlugin
             plugin = MsfExploitPlugin()
         except Exception as e:
             with open(log_path, "a", encoding="utf-8", errors="replace") as fh:
@@ -987,12 +1414,12 @@ def _run_rpc_exploit(cmd_spec: Dict[str, Any], log_path: str, jid: int = None, p
         target=target,
         options=options,
         log_path=log_path,
-        payload=payload
+        payload=payload,
     )
-    if result.get('success'):
-        session_id = result.get('session_id')
-        session_info = result.get('session_info', {})
+    if result.get("success"):
+        session_id = result.get("session_id")
+        session_info = result.get("session_info", {})
         # Store session in database
         try:
@@ -1001,40 +1428,46 @@ def _run_rpc_exploit(cmd_spec: Dict[str, Any], log_path: str, jid: int = None, p
             _append_worker_log(f"job {jid}: failed to store session: {e}")
         # Update job with session info
-        session_type = session_info.get('type', 'shell')
+        session_type = session_info.get("type", "shell")
         _update_job(
             jid,
             exploitation_detected=True,
-            session_info=f"Session {session_id} ({session_type})"
+            session_info=f"Session {session_id} ({session_type})",
         )
         return 0
-    elif result.get('no_session'):
+    elif result.get("no_session"):
         # Exploit ran but no session opened - this is "no results", not an error
         # Return 1 but let parser set status to no_results
-        reason = result.get('reason', 'No session opened')
+        reason = result.get("reason", "No session opened")
         _append_worker_log(f"job {jid}: exploit completed - {reason}")
         return 1
     else:
         # True error (connection failed, RPC error, etc.)
-        error = result.get('error', 'Unknown error')
+        error = result.get("error", "Unknown error")
         _append_worker_log(f"job {jid}: RPC exploit failed - {error}")
         return 1
-def _store_msf_session(jid: int, target: str, exploit_path: str, session_id: str, session_info: Dict[str, Any]):
+def _store_msf_session(
+    jid: int,
+    target: str,
+    exploit_path: str,
+    session_id: str,
+    session_info: Dict[str, Any],
+):
     """Store MSF session in database."""
     try:
-        from souleyez.storage.msf_sessions import add_msf_session
         from souleyez.storage.database import get_db
         from souleyez.storage.hosts import HostManager
+        from souleyez.storage.msf_sessions import add_msf_session
         # Get job info for engagement_id
         job = get_job(jid)
         if not job:
             return
-        engagement_id = job.get('engagement_id')
+        engagement_id = job.get("engagement_id")
         if not engagement_id:
             return
@@ -1044,7 +1477,7 @@ def _store_msf_session(jid: int, target: str, exploit_path: str, session_id: str
         hm = HostManager()
         host = hm.get_host_by_ip(engagement_id, target)
-        host_id = host['id'] if host else None
+        host_id = host["id"] if host else None
         if host_id:
             add_msf_session(
@@ -1052,15 +1485,15 @@ def _store_msf_session(jid: int, target: str, exploit_path: str, session_id: str
                 engagement_id=engagement_id,
                 host_id=host_id,
                 msf_session_id=int(session_id),
-                session_type=session_info.get('type'),
+                session_type=session_info.get("type"),
                 via_exploit=exploit_path,
-                via_payload=session_info.get('via_payload'),
-                platform=session_info.get('platform'),
-                arch=session_info.get('arch'),
-                username=session_info.get('username'),
-                port=session_info.get('target_port'),
-                tunnel_peer=session_info.get('tunnel_peer'),
-                notes=f"Created by job #{jid}"
+                via_payload=session_info.get("via_payload"),
+                platform=session_info.get("platform"),
+                arch=session_info.get("arch"),
+                username=session_info.get("username"),
+                port=session_info.get("target_port"),
+                tunnel_peer=session_info.get("tunnel_peer"),
+                notes=f"Created by job #{jid}",
             )
             conn.commit()
@@ -1079,7 +1512,7 @@ def _is_stdbuf_available() -> bool:
     """Check if stdbuf is available for line-buffered output."""
     global _stdbuf_available
     if _stdbuf_available is None:
-        _stdbuf_available = shutil.which('stdbuf') is not None
+        _stdbuf_available = shutil.which("stdbuf") is not None
     return _stdbuf_available
@@ -1102,7 +1535,7 @@ def _wrap_cmd_for_line_buffering(cmd: List[str]) -> List[str]:
     if _is_stdbuf_available():
         # stdbuf -oL = line-buffered stdout, -eL = line-buffered stderr
-        return ['stdbuf', '-oL', '-eL'] + cmd
+        return ["stdbuf", "-oL", "-eL"] + cmd
     return cmd
@@ -1115,12 +1548,14 @@ def _get_subprocess_env() -> Dict[str, str]:
     to prevent interactive terminal issues.
     """
     env = os.environ.copy()
-    env['TERM'] = 'dumb'  # Prevent stty errors from interactive tools
-    env['PYTHONUNBUFFERED'] = '1'  # Disable Python output buffering
+    env["TERM"] = "dumb"  # Prevent stty errors from interactive tools
+    env["PYTHONUNBUFFERED"] = "1"  # Disable Python output buffering
     return env
-def _run_subprocess_with_spec(cmd_spec: Dict[str, Any], log_path: str, jid: int = None, plugin=None) -> int:
+def _run_subprocess_with_spec(
+    cmd_spec: Dict[str, Any], log_path: str, jid: int = None, plugin=None
+) -> int:
     """
     Execute a command specification with proper PID tracking.
@@ -1153,19 +1588,19 @@ def _run_subprocess_with_spec(cmd_spec: Dict[str, Any], log_path: str, jid: int
         Exit code (0 = success, non-zero = failure)
     """
     # Check for RPC mode (Pro feature)
-    if cmd_spec.get('mode') == 'rpc':
+    if cmd_spec.get("mode") == "rpc":
         return _run_rpc_exploit(cmd_spec, log_path, jid, plugin)
-    cmd = cmd_spec.get('cmd')
+    cmd = cmd_spec.get("cmd")
     if not cmd:
         with open(log_path, "a", encoding="utf-8", errors="replace") as fh:
             fh.write("ERROR: No command provided in spec\n")
         return 1
-    timeout = cmd_spec.get('timeout', JOB_TIMEOUT_SECONDS)
-    spec_env = cmd_spec.get('env')
-    cwd = cmd_spec.get('cwd')
-    needs_shell = cmd_spec.get('needs_shell', False)
+    timeout = cmd_spec.get("timeout", JOB_TIMEOUT_SECONDS)
+    spec_env = cmd_spec.get("env")
+    cwd = cmd_spec.get("cwd")
+    needs_shell = cmd_spec.get("needs_shell", False)
     _append_worker_log(f"_run_subprocess_with_spec: timeout={timeout}s for job {jid}")
@@ -1187,7 +1622,9 @@ def _run_subprocess_with_spec(cmd_spec: Dict[str, Any], log_path: str, jid: int
             fh.write(f"Environment: {spec_env}\n")
         if cwd:
             fh.write(f"Working Dir: {cwd}\n")
-        fh.write(f"Started: {time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime())}\n\n")
+        fh.write(
+            f"Started: {time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime())}\n\n"
+        )
         fh.flush()
         try:
@@ -1201,9 +1638,9 @@ def _run_subprocess_with_spec(cmd_spec: Dict[str, Any], log_path: str, jid: int
                 preexec_fn=os.setsid,  # Creates new session
                 env=proc_env,
                 cwd=cwd,
-                shell=needs_shell  # nosec B602 - intentional for security tool command execution
+                shell=needs_shell,  # nosec B602 - intentional for security tool command execution
             )
             # Store PID and process start time for stale detection
             if jid is not None:
                 proc_start_time = _get_process_start_time(proc.pid)
@@ -1225,19 +1662,31 @@ def _run_subprocess_with_spec(cmd_spec: Dict[str, Any], log_path: str, jid: int
                 # For MSF exploits, check if a session was opened before timeout
                 # A timeout with an open session is success, not failure
                 session_opened = False
-                if hasattr(plugin, 'tool') and plugin.tool in ('msf_exploit', 'msf_auxiliary'):
+                if hasattr(plugin, "tool") and plugin.tool in (
+                    "msf_exploit",
+                    "msf_auxiliary",
+                ):
                     try:
                         fh.flush()
-                        with open(log_path, "r", encoding="utf-8", errors="replace") as rf:
+                        with open(
+                            log_path, "r", encoding="utf-8", errors="replace"
+                        ) as rf:
                             content = rf.read()
                         import re
-                        session_opened = bool(re.search(r'session \d+ opened', content, re.IGNORECASE))
+                        session_opened = bool(
+                            re.search(r"session \d+ opened", content, re.IGNORECASE)
+                        )
                     except Exception:
                         pass
                 if session_opened:
-                    fh.write(f"\n[*] Session opened successfully (timeout expected - session is active)\n")
-                    fh.write(f"=== Completed: {time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime())} ===\n")
+                    fh.write(
+                        f"\n[*] Session opened successfully (timeout expected - session is active)\n"
+                    )
+                    fh.write(
+                        f"=== Completed: {time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime())} ===\n"
+                    )
                     return 0
                 else:
                     fh.write(f"\nERROR: Command timed out after {timeout} seconds\n")
@@ -1247,7 +1696,7 @@ def _run_subprocess_with_spec(cmd_spec: Dict[str, Any], log_path: str, jid: int
             # Check if job was killed externally during execution
             if jid is not None:
                 job = get_job(jid)
-                if job and job.get('status') == 'killed':
+                if job and job.get("status") == "killed":
                     fh.write(f"\nINFO: Job was killed externally\n")
                     # Process may already be dead, but ensure cleanup
                     try:
@@ -1267,9 +1716,12 @@ def _run_subprocess_with_spec(cmd_spec: Dict[str, Any], log_path: str, jid: int
                     fh.flush()
                     return 143  # 128 + 15 (SIGTERM)
-            fh.write(f"\n=== Completed: {time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime())} ===\n")
+            fh.write(
+                f"\n=== Completed: {time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime())} ===\n"
+            )
             fh.write(f"Exit Code: {proc.returncode}\n")
             fh.flush()
+            os.fsync(fh.fileno())  # Ensure data is on disk before parsing
             return proc.returncode
         except FileNotFoundError:
@@ -1282,7 +1734,14 @@ def _run_subprocess_with_spec(cmd_spec: Dict[str, Any], log_path: str, jid: int
             return 1
-def _run_subprocess(tool: str, target: str, args: List[str], log_path: str, jid: int = None, timeout: int = None) -> int:
+def _run_subprocess(
+    tool: str,
+    target: str,
+    args: List[str],
+    log_path: str,
+    jid: int = None,
+    timeout: int = None,
+) -> int:
     # Use None as default and resolve at runtime to avoid Python's early binding issue
     if timeout is None:
         timeout = JOB_TIMEOUT_SECONDS
@@ -1298,11 +1757,13 @@ def _run_subprocess(tool: str, target: str, args: List[str], log_path: str, jid:
     with open(log_path, "a", encoding="utf-8", errors="replace") as fh:
         # Log original command (without stdbuf wrapper for clarity)
-        original_cmd = cmd[3:] if cmd[:3] == ['stdbuf', '-oL', '-eL'] else cmd
+        original_cmd = cmd[3:] if cmd[:3] == ["stdbuf", "-oL", "-eL"] else cmd
         fh.write("=== Subprocess Execution ===\n")
         fh.write(f"Command: {' '.join(original_cmd)}\n")
         fh.write(f"Timeout: {timeout} seconds\n")
-        fh.write(f"Started: {time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime())}\n\n")
+        fh.write(
+            f"Started: {time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime())}\n\n"
+        )
         fh.flush()
         try:
@@ -1317,7 +1778,7 @@ def _run_subprocess(tool: str, target: str, args: List[str], log_path: str, jid:
                 stdout=fh,
                 stderr=subprocess.STDOUT,
                 preexec_fn=os.setsid,  # Creates new session
-                env=env
+                env=env,
             )
             # Store PID and process start time for stale detection
@@ -1344,7 +1805,7 @@ def _run_subprocess(tool: str, target: str, args: List[str], log_path: str, jid:
             # Check if job was killed externally during execution
             if jid is not None:
                 job = get_job(jid)
-                if job and job.get('status') == 'killed':
+                if job and job.get("status") == "killed":
                     fh.write(f"\nINFO: Job was killed externally\n")
                     # Process may already be dead, but ensure cleanup
                     try:
@@ -1364,9 +1825,12 @@ def _run_subprocess(tool: str, target: str, args: List[str], log_path: str, jid:
                     fh.flush()
                     return 143  # 128 + 15 (SIGTERM)
-            fh.write(f"\n=== Completed: {time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime())} ===\n")
+            fh.write(
+                f"\n=== Completed: {time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime())} ===\n"
+            )
             fh.write(f"Exit Code: {proc.returncode}\n")
             fh.flush()
+            os.fsync(fh.fileno())  # Ensure data is on disk before parsing
             return proc.returncode
         except FileNotFoundError:
@@ -1407,7 +1871,23 @@ def _is_true_error_exit_code(rc: int, tool: str) -> bool:
     # msf_exploit returns 1 when no session opened (exploit ran but target not vulnerable)
     # nikto returns non-zero when it finds vulnerabilities (not an error!)
     # dnsrecon returns 1 when crt.sh lookup fails (known bug) but still collects valid DNS data
-    tools_with_nonzero_success = ['gobuster', 'hydra', 'medusa', 'msf_exploit', 'nikto', 'dnsrecon']
+    # evil_winrm returns non-zero even on successful auth - let handler parse output
+    # bloodhound exits non-zero on connection errors but still collects AD data
+    # hashcat returns 1 when exhausted (no passwords cracked) - not an error, just no results
+    # bash scripts and web_login_test return 1 when credentials fail - not an error, just invalid creds
+    tools_with_nonzero_success = [
+        "gobuster",
+        "hydra",
+        "medusa",
+        "msf_exploit",
+        "nikto",
+        "dnsrecon",
+        "evil_winrm",
+        "bloodhound",
+        "hashcat",
+        "bash",
+        "web_login_test",
+    ]
     if tool.lower() in tools_with_nonzero_success:
         # Let parser determine status
@@ -1418,11 +1898,76 @@ def _is_true_error_exit_code(rc: int, tool: str) -> bool:
 def run_job(jid: int) -> None:
-    job = get_job(jid)
-    if not job:
-        logger.error("Job not found", extra={"job_id": jid})
-        _append_worker_log(f"run_job: job {jid} not found")
-        return
+    """
+    Run a job by its ID.
+    Uses atomic status transition with cross-process file locking to prevent
+    race conditions with kill/delete and other processes (UI).
+    If job is not in QUEUED status when we try to start it, we abort.
+    """
+    # Atomically check status and transition to RUNNING
+    # Both thread lock and file lock ensure no other process/thread can
+    # read/write jobs.json while we're modifying it
+    with _lock:  # Thread safety within this process
+        try:
+            with _jobs_lock():  # Cross-process safety
+                jobs = _read_jobs_unlocked()
+                job = None
+                for j in jobs:
+                    if j.get("id") == jid:
+                        job = j
+                        break
+                if not job:
+                    logger.error("Job not found", extra={"job_id": jid})
+                    _append_worker_log(f"run_job: job {jid} not found")
+                    return
+                current_status = job.get("status")
+                if current_status != STATUS_QUEUED:
+                    # Job was killed, deleted, or already running - abort
+                    logger.info(
+                        "Job not in queued status, skipping",
+                        extra={"job_id": jid, "current_status": current_status},
+                    )
+                    _append_worker_log(
+                        f"run_job: job {jid} not queued (status={current_status}), skipping"
+                    )
+                    return
+                # Atomically set to RUNNING while still holding both locks
+                now = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
+                job["status"] = STATUS_RUNNING
+                job["started_at"] = now
+                _write_jobs_unlocked(jobs)
+        except TimeoutError:
+            # Fall back to non-locked operation
+            _append_worker_log(
+                f"jobs.json lock timeout in run_job for {jid}, using fallback"
+            )
+            jobs = _read_jobs()
+            job = None
+            for j in jobs:
+                if j.get("id") == jid:
+                    job = j
+                    break
+            if not job:
+                logger.error("Job not found", extra={"job_id": jid})
+                _append_worker_log(f"run_job: job {jid} not found")
+                return
+            current_status = job.get("status")
+            if current_status != STATUS_QUEUED:
+                _append_worker_log(
+                    f"run_job: job {jid} not queued (status={current_status}), skipping"
+                )
+                return
+            now = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
+            job["status"] = STATUS_RUNNING
+            job["started_at"] = now
+            _write_jobs(jobs)
     log_path = job.get("log") or os.path.join(JOBS_DIR, f"{jid}.log")
     _ensure_dirs()
@@ -1430,18 +1975,18 @@ def run_job(jid: int) -> None:
     log_dir = os.path.dirname(log_path)
     if not os.path.exists(log_dir):
         os.makedirs(log_dir, exist_ok=True)
-    now = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
-    _update_job(jid, status=STATUS_RUNNING, started_at=now)
     _append_worker_log(f"job {jid} started: {job.get('tool')} {job.get('target')}")
-    logger.info("Job started", extra={
-        "event_type": "job_started",
-        "job_id": jid,
-        "tool": job.get("tool"),
-        "target": job.get("target"),
-        "engagement_id": job.get("engagement_id")
-    })
+    logger.info(
+        "Job started",
+        extra={
+            "event_type": "job_started",
+            "job_id": jid,
+            "tool": job.get("tool"),
+            "target": job.get("target"),
+            "engagement_id": job.get("engagement_id"),
+        },
+    )
     try:
         tool = job.get("tool", "")
@@ -1452,27 +1997,33 @@ def run_job(jid: int) -> None:
         # Resolve wordlist paths to actual filesystem locations
         try:
             from ..wordlists import resolve_args_wordlists
             args = resolve_args_wordlists(args)
         except ImportError:
             pass  # Wordlists module not available, use args as-is
         start_time = time.perf_counter()
-        plugin_executed, rc = _try_run_plugin(tool, target, args, label, log_path, jid=jid)
+        plugin_executed, rc = _try_run_plugin(
+            tool, target, args, label, log_path, jid=jid
+        )
         if not plugin_executed:
-            _append_worker_log(f"job {jid}: no plugin found for '{tool}', using subprocess")
-            logger.info("Using subprocess fallback", extra={
-                "job_id": jid,
-                "tool": tool
-            })
+            _append_worker_log(
+                f"job {jid}: no plugin found for '{tool}', using subprocess"
+            )
+            logger.info(
+                "Using subprocess fallback", extra={"job_id": jid, "tool": tool}
+            )
             rc = _run_subprocess(tool, target, args, log_path, jid=jid)
         # Check if job was killed externally while we were running
         job = get_job(jid)
-        job_killed = job and job.get('status') == 'killed'
+        job_killed = job and job.get("status") == "killed"
         if job_killed:
-            _append_worker_log(f"job {jid}: detected external kill signal, skipping post-processing")
+            _append_worker_log(
+                f"job {jid}: detected external kill signal, skipping post-processing"
+            )
             logger.info("Job was killed externally", extra={"job_id": jid})
         # ALWAYS update status, finished_at, and pid - even if job was killed
@@ -1493,22 +2044,80 @@ def run_job(jid: int) -> None:
         _update_job(jid, status=status, finished_at=now, pid=None)
-        logger.info("Job completed", extra={
-            "event_type": "job_completed",
-            "job_id": jid,
-            "status": status,
-            "exit_code": rc,
-            "duration_ms": round(duration_ms, 2)
-        })
+        logger.info(
+            "Job completed",
+            extra={
+                "event_type": "job_completed",
+                "job_id": jid,
+                "status": status,
+                "exit_code": rc,
+                "duration_ms": round(duration_ms, 2),
+            },
+        )
         # Only do post-processing if job was not killed externally
         if job_killed:
             _append_worker_log(f"job {jid} finished: status={status} rc={rc}")
             return
+        # Check for transient errors and auto-retry
+        job = get_job(jid)
+        retry_count = job.get("metadata", {}).get("retry_count", 0)
+        if retry_count < MAX_RETRIES:
+            # Read log to check for transient errors
+            # Note: Check even when rc==0 because tools like nxc may exit 0 but log errors
+            log_path = job.get("log", "")
+            if log_path and os.path.exists(log_path):
+                try:
+                    with open(log_path, "r", encoding="utf-8", errors="replace") as f:
+                        log_content = f.read()
+                    if _is_transient_error(log_content):
+                        # Transient error detected - auto-retry
+                        logger.info(
+                            "Transient error detected, auto-retrying job",
+                            extra={"job_id": jid, "retry_count": retry_count + 1},
+                        )
+                        _append_worker_log(
+                            f"job {jid}: transient error detected, auto-retry {retry_count + 1}/{MAX_RETRIES}"
+                        )
+                        # Build new job metadata with incremented retry count
+                        new_metadata = job.get("metadata", {}).copy()
+                        new_metadata["retry_count"] = retry_count + 1
+                        new_metadata["retried_from"] = jid
+                        # Enqueue retry job
+                        retry_jid = enqueue_job(
+                            tool=job.get("tool"),
+                            target=job.get("target"),
+                            args=job.get("args", []),
+                            label=job.get("label", ""),
+                            engagement_id=job.get("engagement_id"),
+                            metadata=new_metadata,
+                            parent_id=job.get("metadata", {}).get("parent_id"),
+                            reason=f"Auto-retry {retry_count + 1}/{MAX_RETRIES} (transient error)",
+                            rule_id=job.get("metadata", {}).get("rule_id"),
+                            skip_scope_check=True,  # Already validated on first run
+                        )
+                        _append_worker_log(
+                            f"job {jid}: retry enqueued as job #{retry_jid}"
+                        )
+                        # Mark original job as retried (not error)
+                        _update_job(
+                            jid,
+                            status=STATUS_WARNING,
+                            chained=True,  # Prevent chaining from failed job
+                            parse_result={"note": f"Retried as job #{retry_jid}"},
+                        )
+                        return
+                except Exception as e:
+                    logger.warning(f"Failed to check for transient errors: {e}")
         # Try to parse results into database
         try:
             from .result_handler import handle_job_result
             # Re-fetch job to get updated data
             job = get_job(jid)
             parse_result = handle_job_result(job)
@@ -1516,23 +2125,41 @@ def run_job(jid: int) -> None:
             # Handle parse failure cases
             if parse_result is None:
                 # Parser returned None - likely missing log file, no parser for tool, or missing engagement
-                logger.error("Job parse returned None - results may be lost", extra={
-                    "job_id": jid,
-                    "tool": job.get('tool'),
-                    "log_exists": os.path.exists(job.get('log', '')) if job.get('log') else False
-                })
-                _append_worker_log(f"job {jid} parse returned None (tool={job.get('tool')}) - check if parser exists")
-                # Update job to indicate parse failure
-                _update_job(jid, status=STATUS_WARNING, parse_result={'error': 'Parser returned None - no results extracted'})
+                logger.warning(
+                    "Job parse returned None - no parser for this tool",
+                    extra={
+                        "job_id": jid,
+                        "tool": job.get("tool"),
+                        "log_exists": (
+                            os.path.exists(job.get("log", ""))
+                            if job.get("log")
+                            else False
+                        ),
+                    },
+                )
+                _append_worker_log(
+                    f"job {jid} parse returned None (tool={job.get('tool')}) - check if parser exists"
+                )
+                # Only update status to WARNING if it wasn't already an ERROR
+                # (e.g., exit code 127 = command not found should stay as ERROR)
+                current_status = job.get("status")
+                if current_status != STATUS_ERROR:
+                    _update_job(
+                        jid,
+                        status=STATUS_WARNING,
+                        parse_result={
+                            "error": "Parser returned None - no results extracted"
+                        },
+                    )
                 # Mark as chained to prevent infinite retry
                 _update_job(jid, chained=True)
                 return
-            if 'error' in parse_result:
-                logger.error("Job parse error - results may be incomplete", extra={
-                    "job_id": jid,
-                    "error": parse_result['error']
-                })
+            if "error" in parse_result:
+                logger.error(
+                    "Job parse error - results may be incomplete",
+                    extra={"job_id": jid, "error": parse_result["error"]},
+                )
                 _append_worker_log(f"job {jid} parse error: {parse_result['error']}")
                 # Update job status to warning with the error
                 _update_job(jid, status=STATUS_WARNING, parse_result=parse_result)
@@ -1541,49 +2168,50 @@ def run_job(jid: int) -> None:
                 return
             # Parse succeeded
-            logger.info("Job parsed successfully", extra={
-                "job_id": jid,
-                "parse_result": parse_result
-            })
+            logger.info(
+                "Job parsed successfully",
+                extra={"job_id": jid, "parse_result": parse_result},
+            )
             _append_worker_log(f"job {jid} parsed: {parse_result}")
             # Determine chainable status BEFORE updating to avoid race condition
             # We must set parse_result and chainable in a single atomic update
             try:
                 from souleyez.core.tool_chaining import ToolChaining
                 chaining = ToolChaining()
                 # Get current job to check status
                 job = get_job(jid)
-                job_status = job.get('status', STATUS_ERROR)
+                job_status = job.get("status", STATUS_ERROR)
                 # Determine final status from parser if provided
-                final_status = parse_result.get('status', job_status)
+                final_status = parse_result.get("status", job_status)
                 # Check if job should be chainable
                 should_chain = (
-                    chaining.is_enabled() and
-                    parse_result and
-                    'error' not in parse_result and
-                    is_chainable(final_status)
+                    chaining.is_enabled()
+                    and parse_result
+                    and "error" not in parse_result
+                    and is_chainable(final_status)
                 )
                 # Build update dict - ATOMIC update of parse_result + chainable
-                update_fields = {'parse_result': parse_result}
-                if 'status' in parse_result:
-                    update_fields['status'] = final_status
-                    logger.info("Job status updated from parser", extra={
-                        "job_id": jid,
-                        "status": final_status
-                    })
+                update_fields = {"parse_result": parse_result}
+                if "status" in parse_result:
+                    update_fields["status"] = final_status
+                    logger.info(
+                        "Job status updated from parser",
+                        extra={"job_id": jid, "status": final_status},
+                    )
                     _append_worker_log(f"job {jid} status updated to: {final_status}")
                 if should_chain:
-                    update_fields['chainable'] = True
+                    update_fields["chainable"] = True
                 else:
                     # Not chainable - mark as chained to skip
-                    update_fields['chained'] = True
+                    update_fields["chained"] = True
                 # Single atomic update to prevent race condition
                 _update_job(jid, **update_fields)
@@ -1591,69 +2219,89 @@ def run_job(jid: int) -> None:
                 # Log chaining decision
                 if should_chain:
                     if final_status == STATUS_WARNING:
-                        logger.info("Job with warning status marked for chaining", extra={
-                            "job_id": jid,
-                            "tool": job.get('tool'),
-                            "wildcard_detected": parse_result.get('wildcard_detected', False)
-                        })
-                        _append_worker_log(f"job {jid} (status=warning) marked as chainable")
+                        logger.info(
+                            "Job with warning status marked for chaining",
+                            extra={
+                                "job_id": jid,
+                                "tool": job.get("tool"),
+                                "wildcard_detected": parse_result.get(
+                                    "wildcard_detected", False
+                                ),
+                            },
+                        )
+                        _append_worker_log(
+                            f"job {jid} (status=warning) marked as chainable"
+                        )
                     else:
-                        logger.info("Job marked as chainable", extra={
-                            "job_id": jid,
-                            "tool": job.get('tool'),
-                            "status": final_status
-                        })
-                        _append_worker_log(f"job {jid} marked as chainable (status={final_status})")
+                        logger.info(
+                            "Job marked as chainable",
+                            extra={
+                                "job_id": jid,
+                                "tool": job.get("tool"),
+                                "status": final_status,
+                            },
+                        )
+                        _append_worker_log(
+                            f"job {jid} marked as chainable (status={final_status})"
+                        )
                 else:
                     reason = f"chaining_disabled={not chaining.is_enabled()}, has_error={'error' in parse_result}, status={final_status}"
                     _append_worker_log(f"job {jid} not chainable ({reason})")
             except Exception as chain_err:
-                logger.error("Failed to mark job as chainable", extra={
-                    "job_id": jid,
-                    "error": str(chain_err)
-                })
+                logger.error(
+                    "Failed to mark job as chainable",
+                    extra={"job_id": jid, "error": str(chain_err)},
+                )
                 _append_worker_log(f"job {jid} chainable marking error: {chain_err}")
                 # Mark as chained to prevent retry loops
                 _update_job(jid, chained=True, chain_error=str(chain_err))
         except Exception as e:
-            logger.error("Job parse exception", extra={
-                "job_id": jid,
-                "error": str(e),
-                "traceback": traceback.format_exc()
-            })
+            logger.error(
+                "Job parse exception",
+                extra={
+                    "job_id": jid,
+                    "error": str(e),
+                    "traceback": traceback.format_exc(),
+                },
+            )
             _append_worker_log(f"job {jid} parse exception: {e}")
         # Sanitize log file to remove credentials
         try:
             if os.path.exists(log_path):
-                with open(log_path, 'r', encoding='utf-8', errors='replace') as f:
+                with open(log_path, "r", encoding="utf-8", errors="replace") as f:
                     original_log = f.read()
                 # Check if encryption is enabled - only sanitize if encryption is on
                 from souleyez.storage.crypto import CryptoManager
                 crypto_mgr = CryptoManager()
-                if crypto_mgr.is_encryption_enabled() and LogSanitizer.contains_credentials(original_log):
+                if (
+                    crypto_mgr.is_encryption_enabled()
+                    and LogSanitizer.contains_credentials(original_log)
+                ):
                     sanitized_log = LogSanitizer.sanitize(original_log)
                     # Write sanitized log back
-                    with open(log_path, 'w', encoding='utf-8') as f:
+                    with open(log_path, "w", encoding="utf-8") as f:
                         f.write(sanitized_log)
-                    summary = LogSanitizer.get_redaction_summary(original_log, sanitized_log)
+                    summary = LogSanitizer.get_redaction_summary(
+                        original_log, sanitized_log
+                    )
                     if summary:
                         _append_worker_log(f"job {jid}: {summary}")
-                        logger.info("Log sanitized", extra={
-                            "job_id": jid,
-                            "summary": summary
-                        })
+                        logger.info(
+                            "Log sanitized", extra={"job_id": jid, "summary": summary}
+                        )
         except Exception as sanitize_err:
-            logger.warning("Log sanitization failed", extra={
-                "job_id": jid,
-                "error": str(sanitize_err)
-            })
+            logger.warning(
+                "Log sanitization failed",
+                extra={"job_id": jid, "error": str(sanitize_err)},
+            )
             # Don't fail the job if sanitization fails
         _append_worker_log(f"job {jid} finished: status={status} rc={rc}")
@@ -1661,27 +2309,31 @@ def run_job(jid: int) -> None:
     except Exception as e:
         now = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
         _update_job(jid, status="error", error=str(e), finished_at=now)
-        logger.error("Job crashed", extra={
-            "event_type": "job_failed",
-            "job_id": jid,
-            "error": str(e),
-            "traceback": traceback.format_exc()
-        })
+        logger.error(
+            "Job crashed",
+            extra={
+                "event_type": "job_failed",
+                "job_id": jid,
+                "error": str(e),
+                "traceback": traceback.format_exc(),
+            },
+        )
         _append_worker_log(f"job {jid} crashed: {e}")
         # Sanitize log even on error
         try:
             if os.path.exists(log_path):
                 from souleyez.storage.crypto import CryptoManager
                 crypto_mgr = CryptoManager()
                 if crypto_mgr.is_encryption_enabled():
-                    with open(log_path, 'r', encoding='utf-8', errors='replace') as f:
+                    with open(log_path, "r", encoding="utf-8", errors="replace") as f:
                         original_log = f.read()
                     if LogSanitizer.contains_credentials(original_log):
                         sanitized_log = LogSanitizer.sanitize(original_log)
-                        with open(log_path, 'w', encoding='utf-8') as f:
+                        with open(log_path, "w", encoding="utf-8") as f:
                             f.write(sanitized_log)
         except Exception:
             pass  # Silently fail sanitization on error
@@ -1714,7 +2366,7 @@ def _check_log_for_completion(log_path: str, tool: str) -> tuple:
         return (False, None)
     try:
-        with open(log_path, 'r', encoding='utf-8', errors='replace') as f:
+        with open(log_path, "r", encoding="utf-8", errors="replace") as f:
             # Read last 5KB of log (completion markers are at the end)
             f.seek(0, 2)  # End of file
             file_size = f.tell()
@@ -1724,26 +2376,26 @@ def _check_log_for_completion(log_path: str, tool: str) -> tuple:
         # Tool-specific completion markers
         completion_markers = {
-            'nmap': ['Nmap done:', 'Nmap scan report for'],
-            'gobuster': ['Finished', 'Progress:'],
-            'nikto': ['host(s) tested', 'End Time:'],
-            'nuclei': ['Scan completed', 'matches found', 'No results found'],
-            'sqlmap': ['fetched data logged', 'shutting down'],
-            'hydra': ['valid password', 'host:', 'targets finished'],
-            'ffuf': ['Progress:', 'Duration:'],
-            'default': ['=== Completed:', 'Exit Code:']
+            "nmap": ["Nmap done:", "Nmap scan report for"],
+            "gobuster": ["Finished", "Progress:"],
+            "nikto": ["host(s) tested", "End Time:"],
+            "nuclei": ["Scan completed", "matches found", "No results found"],
+            "sqlmap": ["fetched data logged", "shutting down"],
+            "hydra": ["valid password", "host:", "targets finished"],
+            "ffuf": ["Progress:", "Duration:"],
+            "default": ["=== Completed:", "Exit Code:"],
         }
-        markers = completion_markers.get(tool.lower(), completion_markers['default'])
+        markers = completion_markers.get(tool.lower(), completion_markers["default"])
         for marker in markers:
             if marker in log_tail:
                 # Try to extract exit code
                 exit_code = None
-                if 'Exit Code:' in log_tail:
+                if "Exit Code:" in log_tail:
                     try:
-                        idx = log_tail.index('Exit Code:')
-                        code_str = log_tail[idx+10:idx+15].strip().split()[0]
+                        idx = log_tail.index("Exit Code:")
+                        code_str = log_tail[idx + 10 : idx + 15].strip().split()[0]
                         exit_code = int(code_str)
                     except (ValueError, IndexError):
                         exit_code = 0
@@ -1771,14 +2423,14 @@ def _detect_and_recover_stale_jobs() -> int:
     try:
         jobs = _read_jobs()
-        running_jobs = [j for j in jobs if j.get('status') == STATUS_RUNNING]
+        running_jobs = [j for j in jobs if j.get("status") == STATUS_RUNNING]
         for job in running_jobs:
-            jid = job.get('id')
-            pid = job.get('pid')
-            tool = job.get('tool', 'unknown')
-            log_path = job.get('log')
-            stored_start_time = job.get('process_start_time')
+            jid = job.get("id")
+            pid = job.get("pid")
+            tool = job.get("tool", "unknown")
+            log_path = job.get("log")
+            stored_start_time = job.get("process_start_time")
             # Check if PID is alive
             if _is_pid_alive(pid):
@@ -1793,13 +2445,16 @@ def _detect_and_recover_stale_jobs() -> int:
                                 f"job {jid}: PID {pid} reused (stored start: {stored_start_time:.0f}, "
                                 f"current: {current_start_time:.0f})"
                             )
-                            logger.warning("PID reuse detected", extra={
-                                "job_id": jid,
-                                "tool": tool,
-                                "pid": pid,
-                                "stored_start_time": stored_start_time,
-                                "current_start_time": current_start_time
-                            })
+                            logger.warning(
+                                "PID reuse detected",
+                                extra={
+                                    "job_id": jid,
+                                    "tool": tool,
+                                    "pid": pid,
+                                    "stored_start_time": stored_start_time,
+                                    "current_start_time": current_start_time,
+                                },
+                            )
                             # Fall through to stale job handling
                         else:
                             # Same process, still running
@@ -1813,11 +2468,10 @@ def _detect_and_recover_stale_jobs() -> int:
             else:
                 # PID is dead - definitely stale
                 _append_worker_log(f"job {jid}: detected stale (PID {pid} is dead)")
-                logger.warning("Stale job detected", extra={
-                    "job_id": jid,
-                    "tool": tool,
-                    "pid": pid
-                })
+                logger.warning(
+                    "Stale job detected",
+                    extra={"job_id": jid, "tool": tool, "pid": pid},
+                )
             # Check if log shows completion
             completed, exit_code = _check_log_for_completion(log_path, tool)
@@ -1839,74 +2493,94 @@ def _detect_and_recover_stale_jobs() -> int:
                 # Try to parse results
                 try:
-                    from .result_handler import handle_job_result
                     from souleyez.core.tool_chaining import ToolChaining
+                    from .result_handler import handle_job_result
                     job = get_job(jid)
                     parse_result = handle_job_result(job)
                     if parse_result:
-                        if 'error' in parse_result:
-                            _append_worker_log(f"job {jid} stale recovery parse error: {parse_result['error']}")
+                        if "error" in parse_result:
+                            _append_worker_log(
+                                f"job {jid} stale recovery parse error: {parse_result['error']}"
+                            )
                         else:
                             # Determine final status and chainable in one check
-                            final_status = parse_result.get('status', status)
+                            final_status = parse_result.get("status", status)
                             chaining = ToolChaining()
-                            should_chain = chaining.is_enabled() and is_chainable(final_status)
+                            should_chain = chaining.is_enabled() and is_chainable(
+                                final_status
+                            )
                             # Build atomic update - parse_result + status + chainable together
-                            update_fields = {'parse_result': parse_result}
-                            if 'status' in parse_result:
-                                update_fields['status'] = final_status
+                            update_fields = {"parse_result": parse_result}
+                            if "status" in parse_result:
+                                update_fields["status"] = final_status
                             if should_chain:
-                                update_fields['chainable'] = True
+                                update_fields["chainable"] = True
                             # Single atomic update to prevent race condition
                             _update_job(jid, **update_fields)
-                            _append_worker_log(f"job {jid} stale recovery parsed: {parse_result.get('findings_added', 0)} findings")
+                            _append_worker_log(
+                                f"job {jid} stale recovery parsed: {parse_result.get('findings_added', 0)} findings"
+                            )
-                            logger.info("Stale job recovered with results", extra={
-                                "job_id": jid,
-                                "tool": tool,
-                                "status": final_status,
-                                "parse_result": parse_result,
-                                "chainable": should_chain
-                            })
+                            logger.info(
+                                "Stale job recovered with results",
+                                extra={
+                                    "job_id": jid,
+                                    "tool": tool,
+                                    "status": final_status,
+                                    "parse_result": parse_result,
+                                    "chainable": should_chain,
+                                },
+                            )
                             if should_chain:
-                                _append_worker_log(f"job {jid} stale recovery marked as chainable")
+                                _append_worker_log(
+                                    f"job {jid} stale recovery marked as chainable"
+                                )
                 except Exception as parse_err:
-                    _append_worker_log(f"job {jid} stale recovery parse exception: {parse_err}")
+                    _append_worker_log(
+                        f"job {jid} stale recovery parse exception: {parse_err}"
+                    )
                 recovered += 1
             else:
                 # Process died mid-execution - mark as error
-                _append_worker_log(f"job {jid}: process died unexpectedly, marking as error")
-                _update_job(jid,
+                _append_worker_log(
+                    f"job {jid}: process died unexpectedly, marking as error"
+                )
+                _update_job(
+                    jid,
                     status=STATUS_ERROR,
                     finished_at=now,
                     pid=None,
-                    error="Process terminated unexpectedly (worker restart or crash)"
+                    error="Process terminated unexpectedly (worker restart or crash)",
                 )
-                logger.warning("Stale job marked as error", extra={
-                    "job_id": jid,
-                    "tool": tool,
-                    "reason": "process_died_unexpectedly"
-                })
+                logger.warning(
+                    "Stale job marked as error",
+                    extra={
+                        "job_id": jid,
+                        "tool": tool,
+                        "reason": "process_died_unexpectedly",
+                    },
+                )
                 recovered += 1
         return recovered
     except Exception as e:
-        logger.error("Stale job detection error", extra={
-            "error": str(e),
-            "traceback": traceback.format_exc()
-        })
+        logger.error(
+            "Stale job detection error",
+            extra={"error": str(e), "traceback": traceback.format_exc()},
+        )
         _append_worker_log(f"stale job detection error: {e}")
         return 0
@@ -1926,10 +2600,11 @@ def _check_msf_exploitation_success():
     try:
         jobs = _read_jobs()
         running_msf = [
-            j for j in jobs
-            if j.get('status') == STATUS_RUNNING
-            and j.get('tool') in ('msfconsole', 'msf')
-            and not j.get('exploitation_detected')  # Not already detected
+            j
+            for j in jobs
+            if j.get("status") == STATUS_RUNNING
+            and j.get("tool") in ("msfconsole", "msf")
+            and not j.get("exploitation_detected")  # Not already detected
         ]
         if not running_msf:
@@ -1939,22 +2614,22 @@ def _check_msf_exploitation_success():
         # Success patterns from MSF output
         success_patterns = [
-            r'\[\*\]\s+Command shell session \d+ opened',
-            r'\[\*\]\s+Meterpreter session \d+ opened',
-            r'\[\+\]\s+\d+\.\d+\.\d+\.\d+:\d+\s+-\s+Session \d+ created',
-            r'\[\+\].*session.*opened',
-            r'\[\+\].*session.*created',
+            r"\[\*\]\s+Command shell session \d+ opened",
+            r"\[\*\]\s+Meterpreter session \d+ opened",
+            r"\[\+\]\s+\d+\.\d+\.\d+\.\d+:\d+\s+-\s+Session \d+ created",
+            r"\[\+\].*session.*opened",
+            r"\[\+\].*session.*created",
         ]
         for job in running_msf:
-            jid = job.get('id')
+            jid = job.get("id")
             log_path = os.path.join(JOBS_DIR, f"{jid}.log")
             if not os.path.exists(log_path):
                 continue
             try:
-                with open(log_path, 'r', encoding='utf-8', errors='replace') as f:
+                with open(log_path, "r", encoding="utf-8", errors="replace") as f:
                     content = f.read()
                 # Check for success patterns
@@ -1966,21 +2641,27 @@ def _check_msf_exploitation_success():
                     if match:
                         session_opened = True
                         # Extract session number if available
-                        session_match = re.search(r'session (\d+)', match.group(), re.IGNORECASE)
+                        session_match = re.search(
+                            r"session (\d+)", match.group(), re.IGNORECASE
+                        )
                         if session_match:
                             session_info = f"Session {session_match.group(1)}"
                         break
                 if session_opened:
                     # Update job with exploitation success
-                    _update_job(jid, exploitation_detected=True, session_info=session_info)
-                    _append_worker_log(f"job {jid}: exploitation success detected - {session_info or 'session opened'}")
+                    _update_job(
+                        jid, exploitation_detected=True, session_info=session_info
+                    )
+                    _append_worker_log(
+                        f"job {jid}: exploitation success detected - {session_info or 'session opened'}"
+                    )
                     # Record exploit attempt as success
-                    engagement_id = job.get('engagement_id')
-                    target = job.get('target')
-                    label = job.get('label', '')
-                    args = job.get('args', [])
+                    engagement_id = job.get("engagement_id")
+                    target = job.get("target")
+                    label = job.get("label", "")
+                    args = job.get("args", [])
                     if engagement_id and target:
                         try:
@@ -1993,35 +2674,49 @@ def _check_msf_exploitation_success():
                             if host:
                                 # Extract port from args (look for "set RPORT X" or "RPORT X")
                                 port = None
-                                args_str = ' '.join(args) if args else ''
-                                port_match = re.search(r'RPORT\s+(\d+)', args_str, re.IGNORECASE)
+                                args_str = " ".join(args) if args else ""
+                                port_match = re.search(
+                                    r"RPORT\s+(\d+)", args_str, re.IGNORECASE
+                                )
                                 if port_match:
                                     port = int(port_match.group(1))
                                 # Find service_id for this port
                                 service_id = None
                                 if port:
-                                    services = hm.get_host_services(host['id'])
+                                    services = hm.get_host_services(host["id"])
                                     for svc in services:
-                                        if svc.get('port') == port:
-                                            service_id = svc.get('id')
+                                        if svc.get("port") == port:
+                                            service_id = svc.get("id")
                                             break
                                 # Extract exploit identifier from label or args
-                                exploit_id = label.replace('MSF: ', 'msf:') if label.startswith('MSF:') else f'msf:{label}'
+                                exploit_id = (
+                                    label.replace("MSF: ", "msf:")
+                                    if label.startswith("MSF:")
+                                    else f"msf:{label}"
+                                )
                                 record_attempt(
                                     engagement_id=engagement_id,
-                                    host_id=host['id'],
+                                    host_id=host["id"],
                                     exploit_identifier=exploit_id,
                                     exploit_title=label,
-                                    status='success',
+                                    status="success",
                                     service_id=service_id,
-                                    notes=f'Session opened - {session_info}' if session_info else 'Session opened'
+                                    notes=(
+                                        f"Session opened - {session_info}"
+                                        if session_info
+                                        else "Session opened"
+                                    ),
+                                )
+                                _append_worker_log(
+                                    f"job {jid}: recorded exploitation success for {target}:{port or 'unknown'}"
                                 )
-                                _append_worker_log(f"job {jid}: recorded exploitation success for {target}:{port or 'unknown'}")
                         except Exception as e:
-                            _append_worker_log(f"job {jid}: failed to record exploit attempt: {e}")
+                            _append_worker_log(
+                                f"job {jid}: failed to record exploit attempt: {e}"
+                            )
                     detected_count += 1
@@ -2045,11 +2740,11 @@ def _update_job_progress():
     """
     try:
         jobs = _read_jobs()
-        running_jobs = [j for j in jobs if j.get('status') == STATUS_RUNNING]
+        running_jobs = [j for j in jobs if j.get("status") == STATUS_RUNNING]
         for job in running_jobs:
-            jid = job.get('id')
-            log_path = job.get('log')
+            jid = job.get("id")
+            log_path = job.get("log")
             if not log_path or not os.path.exists(log_path):
                 continue
@@ -2061,23 +2756,26 @@ def _update_job_progress():
                 time_since_output = current_time - mtime
                 # Update last_output_at in job record
-                updates = {'last_output_at': mtime}
+                updates = {"last_output_at": mtime}
                 # Flag as possibly hung if no output for threshold
-                was_hung = job.get('possibly_hung', False)
+                was_hung = job.get("possibly_hung", False)
                 is_hung = time_since_output > JOB_HUNG_THRESHOLD
                 if is_hung != was_hung:
-                    updates['possibly_hung'] = is_hung
+                    updates["possibly_hung"] = is_hung
                     if is_hung:
                         _append_worker_log(
                             f"job {jid}: no output for {int(time_since_output)}s, flagged as possibly hung"
                         )
-                        logger.warning("Job possibly hung", extra={
-                            "job_id": jid,
-                            "tool": job.get('tool'),
-                            "time_since_output": int(time_since_output)
-                        })
+                        logger.warning(
+                            "Job possibly hung",
+                            extra={
+                                "job_id": jid,
+                                "tool": job.get("tool"),
+                                "time_since_output": int(time_since_output),
+                            },
+                        )
                 _update_job(jid, **updates)
@@ -2178,9 +2876,9 @@ def worker_loop(poll_interval: float = 2.0):
                 if processed > 0:
                     _append_worker_log(f"processed {processed} chainable job(s)")
             except Exception as e:
-                logger.error("Chain processing error in worker loop", extra={
-                    "error": str(e)
-                })
+                logger.error(
+                    "Chain processing error in worker loop", extra={"error": str(e)}
+                )
                 _append_worker_log(f"chain processing error: {e}")
             # Sleep before next iteration
@@ -2209,8 +2907,14 @@ def start_worker(detach: bool = True, fg: bool = False):
         else:
             # Running as Python script
             python = exe or "python3"
-            cmd = [python, "-u", "-c",
-                   "import sys; from souleyez.engine.background import worker_loop; worker_loop()"]
-        subprocess.Popen(cmd, stdout=open(WORKER_LOG, "a"), stderr=subprocess.STDOUT, close_fds=True)
+            cmd = [
+                python,
+                "-u",
+                "-c",
+                "import sys; from souleyez.engine.background import worker_loop; worker_loop()",
+            ]
+        subprocess.Popen(
+            cmd, stdout=open(WORKER_LOG, "a"), stderr=subprocess.STDOUT, close_fds=True
+        )
         _append_worker_log("Started background worker (detached)")

souleyez 2.43.29__py3-none-any.whl → 3.0.0__py3-none-any.whl

souleyez 2.43.29py3-none-any.whl → 3.0.0py3-none-any.whl