souleyez 2.26.0__py3-none-any.whl → 2.28.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- souleyez/__init__.py +1 -1
- souleyez/core/tool_chaining.py +36 -12
- souleyez/docs/README.md +2 -2
- souleyez/docs/user-guide/configuration.md +1 -1
- souleyez/docs/user-guide/scope-management.md +683 -0
- souleyez/engine/background.py +655 -168
- souleyez/engine/result_handler.py +340 -11
- souleyez/engine/worker_manager.py +98 -2
- souleyez/main.py +222 -1
- souleyez/plugins/http_fingerprint.py +8 -2
- souleyez/plugins/nuclei.py +2 -1
- souleyez/plugins/searchsploit.py +21 -18
- souleyez/security/scope_validator.py +615 -0
- souleyez/storage/hosts.py +87 -2
- souleyez/storage/migrations/_026_add_engagement_scope.py +87 -0
- souleyez/ui/interactive.py +289 -5
- {souleyez-2.26.0.dist-info → souleyez-2.28.0.dist-info}/METADATA +9 -3
- {souleyez-2.26.0.dist-info → souleyez-2.28.0.dist-info}/RECORD +22 -19
- {souleyez-2.26.0.dist-info → souleyez-2.28.0.dist-info}/WHEEL +0 -0
- {souleyez-2.26.0.dist-info → souleyez-2.28.0.dist-info}/entry_points.txt +0 -0
- {souleyez-2.26.0.dist-info → souleyez-2.28.0.dist-info}/licenses/LICENSE +0 -0
- {souleyez-2.26.0.dist-info → souleyez-2.28.0.dist-info}/top_level.txt +0 -0
souleyez/engine/background.py
CHANGED
|
@@ -25,6 +25,7 @@ import subprocess
|
|
|
25
25
|
import threading
|
|
26
26
|
import inspect
|
|
27
27
|
import traceback
|
|
28
|
+
import fcntl
|
|
28
29
|
from typing import List, Dict, Optional, Any
|
|
29
30
|
from souleyez.log_config import get_logger
|
|
30
31
|
from .log_sanitizer import LogSanitizer
|
|
@@ -41,7 +42,12 @@ JOBS_DIR = os.path.join(DATA_DIR, "jobs")
|
|
|
41
42
|
LOGS_DIR = os.path.join(DATA_DIR, "logs")
|
|
42
43
|
JOBS_FILE = os.path.join(JOBS_DIR, "jobs.json")
|
|
43
44
|
WORKER_LOG = os.path.join(LOGS_DIR, "worker.log")
|
|
45
|
+
HEARTBEAT_FILE = os.path.join(JOBS_DIR, ".worker_heartbeat")
|
|
44
46
|
JOB_TIMEOUT_SECONDS = 3600 # 1 hour (changed from 300s/5min)
|
|
47
|
+
HEARTBEAT_INTERVAL = 10 # seconds between heartbeat writes
|
|
48
|
+
HEARTBEAT_STALE_THRESHOLD = 30 # seconds before heartbeat considered stale
|
|
49
|
+
JOB_HUNG_THRESHOLD = 300 # 5 minutes with no output = possibly hung
|
|
50
|
+
JOBS_BACKUP_COUNT = 3 # Number of rotating backups to keep
|
|
45
51
|
|
|
46
52
|
_lock = threading.RLock() # Reentrant lock allows nested acquisition by same thread
|
|
47
53
|
|
|
@@ -51,6 +57,63 @@ def _ensure_dirs():
|
|
|
51
57
|
os.makedirs(LOGS_DIR, exist_ok=True)
|
|
52
58
|
|
|
53
59
|
|
|
60
|
+
def _get_backup_files() -> List[str]:
|
|
61
|
+
"""Get list of backup files sorted by modification time (newest first)."""
|
|
62
|
+
backups = []
|
|
63
|
+
for i in range(1, JOBS_BACKUP_COUNT + 1):
|
|
64
|
+
backup_path = f"{JOBS_FILE}.bak.{i}"
|
|
65
|
+
if os.path.exists(backup_path):
|
|
66
|
+
backups.append((os.path.getmtime(backup_path), backup_path))
|
|
67
|
+
# Sort by mtime descending (newest first)
|
|
68
|
+
backups.sort(reverse=True)
|
|
69
|
+
return [path for _, path in backups]
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def _rotate_backups():
|
|
73
|
+
"""Rotate backup files, keeping only JOBS_BACKUP_COUNT backups."""
|
|
74
|
+
# Shift existing backups: .bak.2 -> .bak.3, .bak.1 -> .bak.2
|
|
75
|
+
for i in range(JOBS_BACKUP_COUNT, 1, -1):
|
|
76
|
+
src = f"{JOBS_FILE}.bak.{i - 1}"
|
|
77
|
+
dst = f"{JOBS_FILE}.bak.{i}"
|
|
78
|
+
if os.path.exists(src):
|
|
79
|
+
try:
|
|
80
|
+
shutil.move(src, dst)
|
|
81
|
+
except Exception:
|
|
82
|
+
pass
|
|
83
|
+
|
|
84
|
+
# Create new .bak.1 from current jobs.json
|
|
85
|
+
if os.path.exists(JOBS_FILE):
|
|
86
|
+
try:
|
|
87
|
+
shutil.copy2(JOBS_FILE, f"{JOBS_FILE}.bak.1")
|
|
88
|
+
except Exception:
|
|
89
|
+
pass
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def _recover_from_backup() -> List[Dict[str, Any]]:
|
|
93
|
+
"""
|
|
94
|
+
Attempt to recover jobs from backup files.
|
|
95
|
+
|
|
96
|
+
Returns:
|
|
97
|
+
List of jobs from the first valid backup, or empty list if no valid backup found
|
|
98
|
+
"""
|
|
99
|
+
backups = _get_backup_files()
|
|
100
|
+
for backup_path in backups:
|
|
101
|
+
try:
|
|
102
|
+
with open(backup_path, "r", encoding="utf-8") as fh:
|
|
103
|
+
jobs = json.load(fh)
|
|
104
|
+
if isinstance(jobs, list):
|
|
105
|
+
_append_worker_log(f"recovered {len(jobs)} jobs from backup: {backup_path}")
|
|
106
|
+
logger.info("Jobs recovered from backup", extra={
|
|
107
|
+
"backup_path": backup_path,
|
|
108
|
+
"job_count": len(jobs)
|
|
109
|
+
})
|
|
110
|
+
return jobs
|
|
111
|
+
except Exception as e:
|
|
112
|
+
_append_worker_log(f"backup {backup_path} also corrupt: {e}")
|
|
113
|
+
continue
|
|
114
|
+
return []
|
|
115
|
+
|
|
116
|
+
|
|
54
117
|
def _read_jobs() -> List[Dict[str, Any]]:
|
|
55
118
|
_ensure_dirs()
|
|
56
119
|
if not os.path.exists(JOBS_FILE):
|
|
@@ -58,18 +121,42 @@ def _read_jobs() -> List[Dict[str, Any]]:
|
|
|
58
121
|
try:
|
|
59
122
|
with open(JOBS_FILE, "r", encoding="utf-8") as fh:
|
|
60
123
|
return json.load(fh)
|
|
61
|
-
except Exception:
|
|
124
|
+
except Exception as e:
|
|
125
|
+
# Log corruption event
|
|
126
|
+
_append_worker_log(f"jobs.json corrupt: {e}")
|
|
127
|
+
logger.error("Jobs file corrupted", extra={
|
|
128
|
+
"error": str(e),
|
|
129
|
+
"jobs_file": JOBS_FILE
|
|
130
|
+
})
|
|
131
|
+
|
|
132
|
+
# Try to recover from backup
|
|
133
|
+
recovered_jobs = _recover_from_backup()
|
|
134
|
+
|
|
135
|
+
# Move corrupt file aside
|
|
62
136
|
try:
|
|
63
137
|
corrupt = JOBS_FILE + ".corrupt." + str(int(time.time()))
|
|
64
138
|
shutil.move(JOBS_FILE, corrupt)
|
|
65
|
-
_append_worker_log(f"jobs file
|
|
139
|
+
_append_worker_log(f"corrupt jobs file moved to {corrupt}")
|
|
66
140
|
except Exception:
|
|
67
141
|
pass
|
|
68
|
-
|
|
142
|
+
|
|
143
|
+
# If we recovered jobs, write them back
|
|
144
|
+
if recovered_jobs:
|
|
145
|
+
try:
|
|
146
|
+
_write_jobs(recovered_jobs)
|
|
147
|
+
_append_worker_log(f"restored {len(recovered_jobs)} jobs from backup")
|
|
148
|
+
except Exception as write_err:
|
|
149
|
+
_append_worker_log(f"failed to restore jobs: {write_err}")
|
|
150
|
+
|
|
151
|
+
return recovered_jobs
|
|
69
152
|
|
|
70
153
|
|
|
71
154
|
def _write_jobs(jobs: List[Dict[str, Any]]):
|
|
72
155
|
_ensure_dirs()
|
|
156
|
+
|
|
157
|
+
# Rotate backups before writing (keeps last 3 good copies)
|
|
158
|
+
_rotate_backups()
|
|
159
|
+
|
|
73
160
|
tmp = tempfile.NamedTemporaryFile("w", delete=False, dir=JOBS_DIR, encoding="utf-8")
|
|
74
161
|
try:
|
|
75
162
|
json.dump(jobs, tmp, indent=2, ensure_ascii=False)
|
|
@@ -93,36 +180,135 @@ def _append_worker_log(msg: str):
|
|
|
93
180
|
fh.write(line)
|
|
94
181
|
|
|
95
182
|
|
|
183
|
+
def _update_heartbeat():
|
|
184
|
+
"""Write current timestamp to heartbeat file for health monitoring."""
|
|
185
|
+
_ensure_dirs()
|
|
186
|
+
try:
|
|
187
|
+
with open(HEARTBEAT_FILE, 'w') as fh:
|
|
188
|
+
fh.write(str(time.time()))
|
|
189
|
+
except Exception:
|
|
190
|
+
pass # Non-critical, don't crash worker
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def get_heartbeat_age() -> Optional[float]:
|
|
194
|
+
"""
|
|
195
|
+
Get age of worker heartbeat in seconds.
|
|
196
|
+
|
|
197
|
+
Returns:
|
|
198
|
+
Age in seconds, or None if heartbeat file doesn't exist
|
|
199
|
+
"""
|
|
200
|
+
try:
|
|
201
|
+
if os.path.exists(HEARTBEAT_FILE):
|
|
202
|
+
with open(HEARTBEAT_FILE, 'r') as fh:
|
|
203
|
+
last_beat = float(fh.read().strip())
|
|
204
|
+
return time.time() - last_beat
|
|
205
|
+
return None
|
|
206
|
+
except Exception:
|
|
207
|
+
return None
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def is_heartbeat_stale() -> bool:
|
|
211
|
+
"""Check if worker heartbeat is stale (older than threshold)."""
|
|
212
|
+
age = get_heartbeat_age()
|
|
213
|
+
if age is None:
|
|
214
|
+
return True # No heartbeat = stale
|
|
215
|
+
return age > HEARTBEAT_STALE_THRESHOLD
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
def _get_process_start_time(pid: int) -> Optional[float]:
|
|
219
|
+
"""
|
|
220
|
+
Get process start time from /proc filesystem (Linux only).
|
|
221
|
+
|
|
222
|
+
Returns:
|
|
223
|
+
Process start time as Unix timestamp, or None if not available
|
|
224
|
+
"""
|
|
225
|
+
try:
|
|
226
|
+
stat_path = f"/proc/{pid}/stat"
|
|
227
|
+
if not os.path.exists(stat_path):
|
|
228
|
+
return None
|
|
229
|
+
|
|
230
|
+
with open(stat_path, 'r') as f:
|
|
231
|
+
stat = f.read()
|
|
232
|
+
|
|
233
|
+
# Parse stat file - field 22 is starttime (in clock ticks since boot)
|
|
234
|
+
# Format: pid (comm) state ppid pgrp session tty_nr ... starttime ...
|
|
235
|
+
# Need to handle comm field which may contain spaces/parentheses
|
|
236
|
+
parts = stat.rsplit(')', 1)
|
|
237
|
+
if len(parts) < 2:
|
|
238
|
+
return None
|
|
239
|
+
|
|
240
|
+
fields = parts[1].split()
|
|
241
|
+
if len(fields) < 20:
|
|
242
|
+
return None
|
|
243
|
+
|
|
244
|
+
starttime_ticks = int(fields[19]) # 0-indexed, field 22 is at index 19 after comm
|
|
245
|
+
|
|
246
|
+
# Convert to timestamp using system boot time and clock ticks per second
|
|
247
|
+
with open('/proc/stat', 'r') as f:
|
|
248
|
+
for line in f:
|
|
249
|
+
if line.startswith('btime'):
|
|
250
|
+
boot_time = int(line.split()[1])
|
|
251
|
+
break
|
|
252
|
+
else:
|
|
253
|
+
return None
|
|
254
|
+
|
|
255
|
+
# Get clock ticks per second (usually 100)
|
|
256
|
+
ticks_per_sec = os.sysconf(os.sysconf_names['SC_CLK_TCK'])
|
|
257
|
+
|
|
258
|
+
return boot_time + (starttime_ticks / ticks_per_sec)
|
|
259
|
+
except Exception:
|
|
260
|
+
return None
|
|
261
|
+
|
|
262
|
+
|
|
96
263
|
def _next_job_id(jobs: List[Dict[str, Any]]) -> int:
|
|
97
264
|
"""
|
|
98
|
-
Get next available job ID.
|
|
99
|
-
|
|
100
|
-
Uses a persistent counter to ensure IDs are never
|
|
101
|
-
|
|
265
|
+
Get next available job ID with file locking.
|
|
266
|
+
|
|
267
|
+
Uses a persistent counter with fcntl locking to ensure IDs are never
|
|
268
|
+
reused, even across multiple processes. This prevents duplicate job IDs
|
|
269
|
+
when multiple jobs are enqueued concurrently.
|
|
102
270
|
"""
|
|
103
271
|
counter_file = os.path.join(JOBS_DIR, ".job_counter")
|
|
104
|
-
|
|
272
|
+
lock_file = os.path.join(JOBS_DIR, ".job_counter.lock")
|
|
273
|
+
|
|
105
274
|
try:
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
275
|
+
_ensure_dirs()
|
|
276
|
+
|
|
277
|
+
# Use a separate lock file to allow atomic read-modify-write
|
|
278
|
+
with open(lock_file, 'w') as lock_fh:
|
|
279
|
+
# Acquire exclusive lock (blocks until available)
|
|
280
|
+
fcntl.flock(lock_fh.fileno(), fcntl.LOCK_EX)
|
|
281
|
+
|
|
282
|
+
try:
|
|
283
|
+
# Read current counter
|
|
284
|
+
if os.path.exists(counter_file):
|
|
285
|
+
with open(counter_file, 'r') as f:
|
|
286
|
+
next_id = int(f.read().strip())
|
|
287
|
+
else:
|
|
288
|
+
# Initialize from existing jobs
|
|
289
|
+
maxid = 0
|
|
290
|
+
for j in jobs:
|
|
291
|
+
try:
|
|
292
|
+
if isinstance(j.get("id"), int) and j["id"] > maxid:
|
|
293
|
+
maxid = j["id"]
|
|
294
|
+
except Exception:
|
|
295
|
+
continue
|
|
296
|
+
next_id = maxid + 1
|
|
297
|
+
|
|
298
|
+
# Write incremented counter atomically
|
|
299
|
+
tmp_file = counter_file + '.tmp'
|
|
300
|
+
with open(tmp_file, 'w') as f:
|
|
301
|
+
f.write(str(next_id + 1))
|
|
302
|
+
f.flush()
|
|
303
|
+
os.fsync(f.fileno())
|
|
304
|
+
os.replace(tmp_file, counter_file)
|
|
305
|
+
|
|
306
|
+
return next_id
|
|
307
|
+
|
|
308
|
+
finally:
|
|
309
|
+
# Release lock
|
|
310
|
+
fcntl.flock(lock_fh.fileno(), fcntl.LOCK_UN)
|
|
311
|
+
|
|
126
312
|
except Exception:
|
|
127
313
|
# Fallback to old behavior if file operations fail
|
|
128
314
|
maxid = 0
|
|
@@ -135,7 +321,7 @@ def _next_job_id(jobs: List[Dict[str, Any]]) -> int:
|
|
|
135
321
|
return maxid + 1
|
|
136
322
|
|
|
137
323
|
|
|
138
|
-
def enqueue_job(tool: str, target: str, args: List[str], label: str = "", engagement_id: int = None, metadata: Dict[str, Any] = None, parent_id: int = None, reason: str = None, rule_id: int = None) -> int:
|
|
324
|
+
def enqueue_job(tool: str, target: str, args: List[str], label: str = "", engagement_id: int = None, metadata: Dict[str, Any] = None, parent_id: int = None, reason: str = None, rule_id: int = None, skip_scope_check: bool = False) -> int:
|
|
139
325
|
with _lock:
|
|
140
326
|
jobs = _read_jobs()
|
|
141
327
|
jid = _next_job_id(jobs)
|
|
@@ -153,6 +339,43 @@ def enqueue_job(tool: str, target: str, args: List[str], label: str = "", engage
|
|
|
153
339
|
|
|
154
340
|
# Merge parent_id, reason, and rule_id into metadata
|
|
155
341
|
job_metadata = metadata or {}
|
|
342
|
+
|
|
343
|
+
# Scope validation - check if target is within engagement scope
|
|
344
|
+
if not skip_scope_check and engagement_id:
|
|
345
|
+
try:
|
|
346
|
+
from souleyez.security.scope_validator import ScopeValidator, ScopeViolationError
|
|
347
|
+
validator = ScopeValidator(engagement_id)
|
|
348
|
+
result = validator.validate_target(target)
|
|
349
|
+
enforcement = validator.get_enforcement_mode()
|
|
350
|
+
|
|
351
|
+
if not result.is_in_scope and validator.has_scope_defined():
|
|
352
|
+
if enforcement == 'block':
|
|
353
|
+
validator.log_validation(target, result, 'blocked', job_id=jid)
|
|
354
|
+
raise ScopeViolationError(
|
|
355
|
+
f"Target '{target}' is out of scope. {result.reason}"
|
|
356
|
+
)
|
|
357
|
+
elif enforcement == 'warn':
|
|
358
|
+
validator.log_validation(target, result, 'warned', job_id=jid)
|
|
359
|
+
if 'warnings' not in job_metadata:
|
|
360
|
+
job_metadata['warnings'] = []
|
|
361
|
+
job_metadata['warnings'].append(
|
|
362
|
+
f"SCOPE WARNING: {target} may be out of scope. {result.reason}"
|
|
363
|
+
)
|
|
364
|
+
logger.warning("Out-of-scope target allowed (warn mode)", extra={
|
|
365
|
+
"target": target,
|
|
366
|
+
"engagement_id": engagement_id,
|
|
367
|
+
"reason": result.reason
|
|
368
|
+
})
|
|
369
|
+
else:
|
|
370
|
+
validator.log_validation(target, result, 'allowed', job_id=jid)
|
|
371
|
+
except ScopeViolationError:
|
|
372
|
+
raise # Re-raise scope violations
|
|
373
|
+
except Exception as e:
|
|
374
|
+
# Don't block jobs if scope validation fails unexpectedly
|
|
375
|
+
logger.warning("Scope validation error (allowing job)", extra={
|
|
376
|
+
"target": target,
|
|
377
|
+
"error": str(e)
|
|
378
|
+
})
|
|
156
379
|
if parent_id is not None:
|
|
157
380
|
job_metadata['parent_id'] = parent_id
|
|
158
381
|
if reason:
|
|
@@ -409,14 +632,36 @@ def purge_all_jobs() -> int:
|
|
|
409
632
|
return purge_jobs(status_filter=['done', 'error', 'killed'])
|
|
410
633
|
|
|
411
634
|
|
|
412
|
-
def _update_job(jid: int, **fields):
|
|
635
|
+
def _update_job(jid: int, respect_killed: bool = True, **fields):
|
|
636
|
+
"""
|
|
637
|
+
Update job fields atomically.
|
|
638
|
+
|
|
639
|
+
Args:
|
|
640
|
+
jid: Job ID to update
|
|
641
|
+
respect_killed: If True (default), don't overwrite status if job is killed.
|
|
642
|
+
This prevents race condition where job is killed while completing.
|
|
643
|
+
**fields: Fields to update
|
|
644
|
+
"""
|
|
413
645
|
with _lock:
|
|
414
646
|
jobs = _read_jobs()
|
|
415
647
|
changed = False
|
|
416
648
|
for j in jobs:
|
|
417
649
|
if j.get("id") == jid:
|
|
418
|
-
|
|
419
|
-
|
|
650
|
+
# Race condition protection: don't change status of killed jobs
|
|
651
|
+
if respect_killed and j.get("status") == STATUS_KILLED and "status" in fields:
|
|
652
|
+
# Job was killed - don't overwrite status, but allow other updates
|
|
653
|
+
fields_copy = dict(fields)
|
|
654
|
+
del fields_copy["status"]
|
|
655
|
+
if fields_copy:
|
|
656
|
+
j.update(fields_copy)
|
|
657
|
+
changed = True
|
|
658
|
+
logger.debug("Skipped status update for killed job", extra={
|
|
659
|
+
"job_id": jid,
|
|
660
|
+
"attempted_status": fields.get("status")
|
|
661
|
+
})
|
|
662
|
+
else:
|
|
663
|
+
j.update(fields)
|
|
664
|
+
changed = True
|
|
420
665
|
break
|
|
421
666
|
if changed:
|
|
422
667
|
_write_jobs(jobs)
|
|
@@ -479,10 +724,27 @@ def _process_pending_chains():
|
|
|
479
724
|
# Get parse results from job
|
|
480
725
|
parse_result = job_to_chain.get('parse_result', {})
|
|
481
726
|
|
|
482
|
-
if not parse_result
|
|
483
|
-
# No results
|
|
484
|
-
|
|
485
|
-
|
|
727
|
+
if not parse_result:
|
|
728
|
+
# No parse results - this shouldn't happen if job was properly marked chainable
|
|
729
|
+
# Log warning and store reason for debugging
|
|
730
|
+
logger.warning("Job marked chainable but has no parse_result", extra={
|
|
731
|
+
"job_id": jid,
|
|
732
|
+
"tool": tool,
|
|
733
|
+
"status": job_to_chain.get('status')
|
|
734
|
+
})
|
|
735
|
+
_append_worker_log(f"job {jid}: WARNING - marked chainable but parse_result is empty/missing")
|
|
736
|
+
_update_job(jid, chained=True, chain_skip_reason="parse_result missing")
|
|
737
|
+
return 1
|
|
738
|
+
|
|
739
|
+
if 'error' in parse_result:
|
|
740
|
+
# Parse had an error - log and skip
|
|
741
|
+
logger.warning("Job has parse error, skipping chaining", extra={
|
|
742
|
+
"job_id": jid,
|
|
743
|
+
"tool": tool,
|
|
744
|
+
"parse_error": parse_result.get('error')
|
|
745
|
+
})
|
|
746
|
+
_append_worker_log(f"job {jid}: parse error '{parse_result.get('error')}', skipping chain")
|
|
747
|
+
_update_job(jid, chained=True, chain_skip_reason=f"parse_error: {parse_result.get('error')}")
|
|
486
748
|
return 1
|
|
487
749
|
|
|
488
750
|
# Process auto-chaining
|
|
@@ -571,10 +833,35 @@ def _try_run_plugin(tool: str, target: str, args: List[str], label: str, log_pat
|
|
|
571
833
|
cmd_spec = build_command_method(target, args or [], label or "", log_path)
|
|
572
834
|
|
|
573
835
|
if cmd_spec is None:
|
|
574
|
-
#
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
836
|
+
# build_command returned None - check if plugin has run() method
|
|
837
|
+
# This allows plugins to signal "use run() instead" by returning None
|
|
838
|
+
run_method = getattr(plugin, "run", None)
|
|
839
|
+
if callable(run_method):
|
|
840
|
+
# Plugin wants to handle execution itself via run() method
|
|
841
|
+
sig = inspect.signature(run_method)
|
|
842
|
+
params = list(sig.parameters.keys())
|
|
843
|
+
|
|
844
|
+
try:
|
|
845
|
+
if "log_path" in params:
|
|
846
|
+
rc = run_method(target, args or [], label or "", log_path)
|
|
847
|
+
elif "label" in params:
|
|
848
|
+
rc = run_method(target, args or [], label or "")
|
|
849
|
+
elif "args" in params:
|
|
850
|
+
rc = run_method(target, args or [])
|
|
851
|
+
else:
|
|
852
|
+
rc = run_method(target)
|
|
853
|
+
return (True, rc if isinstance(rc, int) else 0)
|
|
854
|
+
except Exception as e:
|
|
855
|
+
with open(log_path, "a", encoding="utf-8", errors="replace") as fh:
|
|
856
|
+
fh.write(f"\n=== PLUGIN RUN ERROR ===\n")
|
|
857
|
+
fh.write(f"{type(e).__name__}: {e}\n")
|
|
858
|
+
fh.write(f"\n{traceback.format_exc()}\n")
|
|
859
|
+
return (True, 1)
|
|
860
|
+
else:
|
|
861
|
+
# No run() method either - actual validation failure
|
|
862
|
+
with open(log_path, "a", encoding="utf-8", errors="replace") as fh:
|
|
863
|
+
fh.write("ERROR: Plugin validation failed (build_command returned None)\n")
|
|
864
|
+
return (True, 1)
|
|
578
865
|
|
|
579
866
|
# Execute using new subprocess handler with PID tracking
|
|
580
867
|
rc = _run_subprocess_with_spec(cmd_spec, log_path, jid=jid, plugin=plugin)
|
|
@@ -773,6 +1060,55 @@ def _store_msf_session(jid: int, target: str, exploit_path: str, session_id: str
|
|
|
773
1060
|
_append_worker_log(f"job {jid}: session storage error: {e}")
|
|
774
1061
|
|
|
775
1062
|
|
|
1063
|
+
# Cache stdbuf availability check
|
|
1064
|
+
_stdbuf_available = None
|
|
1065
|
+
|
|
1066
|
+
|
|
1067
|
+
def _is_stdbuf_available() -> bool:
|
|
1068
|
+
"""Check if stdbuf is available for line-buffered output."""
|
|
1069
|
+
global _stdbuf_available
|
|
1070
|
+
if _stdbuf_available is None:
|
|
1071
|
+
_stdbuf_available = shutil.which('stdbuf') is not None
|
|
1072
|
+
return _stdbuf_available
|
|
1073
|
+
|
|
1074
|
+
|
|
1075
|
+
def _wrap_cmd_for_line_buffering(cmd: List[str]) -> List[str]:
|
|
1076
|
+
"""
|
|
1077
|
+
Wrap a command with stdbuf for line-buffered output when available.
|
|
1078
|
+
|
|
1079
|
+
This ensures output is written line-by-line instead of in 4-8KB blocks,
|
|
1080
|
+
improving real-time log monitoring and ensuring output is captured
|
|
1081
|
+
before process termination.
|
|
1082
|
+
|
|
1083
|
+
Args:
|
|
1084
|
+
cmd: Command to wrap
|
|
1085
|
+
|
|
1086
|
+
Returns:
|
|
1087
|
+
Command wrapped with stdbuf if available, original command otherwise
|
|
1088
|
+
"""
|
|
1089
|
+
if not cmd:
|
|
1090
|
+
return cmd
|
|
1091
|
+
|
|
1092
|
+
if _is_stdbuf_available():
|
|
1093
|
+
# stdbuf -oL = line-buffered stdout, -eL = line-buffered stderr
|
|
1094
|
+
return ['stdbuf', '-oL', '-eL'] + cmd
|
|
1095
|
+
|
|
1096
|
+
return cmd
|
|
1097
|
+
|
|
1098
|
+
|
|
1099
|
+
def _get_subprocess_env() -> Dict[str, str]:
|
|
1100
|
+
"""
|
|
1101
|
+
Get environment for subprocess with buffering disabled.
|
|
1102
|
+
|
|
1103
|
+
Sets PYTHONUNBUFFERED=1 for Python subprocesses and TERM=dumb
|
|
1104
|
+
to prevent interactive terminal issues.
|
|
1105
|
+
"""
|
|
1106
|
+
env = os.environ.copy()
|
|
1107
|
+
env['TERM'] = 'dumb' # Prevent stty errors from interactive tools
|
|
1108
|
+
env['PYTHONUNBUFFERED'] = '1' # Disable Python output buffering
|
|
1109
|
+
return env
|
|
1110
|
+
|
|
1111
|
+
|
|
776
1112
|
def _run_subprocess_with_spec(cmd_spec: Dict[str, Any], log_path: str, jid: int = None, plugin=None) -> int:
|
|
777
1113
|
"""
|
|
778
1114
|
Execute a command specification with proper PID tracking.
|
|
@@ -814,32 +1150,35 @@ def _run_subprocess_with_spec(cmd_spec: Dict[str, Any], log_path: str, jid: int
|
|
|
814
1150
|
with open(log_path, "a", encoding="utf-8", errors="replace") as fh:
|
|
815
1151
|
fh.write("ERROR: No command provided in spec\n")
|
|
816
1152
|
return 1
|
|
817
|
-
|
|
1153
|
+
|
|
818
1154
|
timeout = cmd_spec.get('timeout', JOB_TIMEOUT_SECONDS)
|
|
819
|
-
|
|
1155
|
+
spec_env = cmd_spec.get('env')
|
|
820
1156
|
cwd = cmd_spec.get('cwd')
|
|
821
1157
|
needs_shell = cmd_spec.get('needs_shell', False)
|
|
822
|
-
|
|
1158
|
+
|
|
823
1159
|
_append_worker_log(f"_run_subprocess_with_spec: timeout={timeout}s for job {jid}")
|
|
824
|
-
|
|
825
|
-
#
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
1160
|
+
|
|
1161
|
+
# Wrap command with stdbuf for line-buffered output (unless shell mode)
|
|
1162
|
+
original_cmd = cmd
|
|
1163
|
+
if not needs_shell:
|
|
1164
|
+
cmd = _wrap_cmd_for_line_buffering(cmd)
|
|
1165
|
+
|
|
1166
|
+
# Prepare environment with PYTHONUNBUFFERED=1 and TERM=dumb
|
|
1167
|
+
proc_env = _get_subprocess_env()
|
|
1168
|
+
if spec_env:
|
|
1169
|
+
proc_env.update(spec_env)
|
|
1170
|
+
|
|
832
1171
|
with open(log_path, "a", encoding="utf-8", errors="replace") as fh:
|
|
833
1172
|
fh.write("=== Command Execution (build_command) ===\n")
|
|
834
|
-
fh.write(f"Command: {' '.join(
|
|
1173
|
+
fh.write(f"Command: {' '.join(original_cmd)}\n")
|
|
835
1174
|
fh.write(f"Timeout: {timeout} seconds\n")
|
|
836
|
-
if
|
|
837
|
-
fh.write(f"Environment: {
|
|
1175
|
+
if spec_env:
|
|
1176
|
+
fh.write(f"Environment: {spec_env}\n")
|
|
838
1177
|
if cwd:
|
|
839
1178
|
fh.write(f"Working Dir: {cwd}\n")
|
|
840
1179
|
fh.write(f"Started: {time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime())}\n\n")
|
|
841
1180
|
fh.flush()
|
|
842
|
-
|
|
1181
|
+
|
|
843
1182
|
try:
|
|
844
1183
|
# Create new process group so all children can be killed together
|
|
845
1184
|
# Redirect stdin to /dev/null to prevent password prompts from hanging
|
|
@@ -849,16 +1188,17 @@ def _run_subprocess_with_spec(cmd_spec: Dict[str, Any], log_path: str, jid: int
|
|
|
849
1188
|
stdout=fh,
|
|
850
1189
|
stderr=subprocess.STDOUT,
|
|
851
1190
|
preexec_fn=os.setsid, # Creates new session
|
|
852
|
-
env=proc_env,
|
|
1191
|
+
env=proc_env,
|
|
853
1192
|
cwd=cwd,
|
|
854
1193
|
shell=needs_shell # nosec B602 - intentional for security tool command execution
|
|
855
1194
|
)
|
|
856
1195
|
|
|
857
|
-
# Store PID
|
|
1196
|
+
# Store PID and process start time for stale detection
|
|
858
1197
|
if jid is not None:
|
|
859
|
-
|
|
1198
|
+
proc_start_time = _get_process_start_time(proc.pid)
|
|
1199
|
+
_update_job(jid, pid=proc.pid, process_start_time=proc_start_time)
|
|
860
1200
|
_append_worker_log(f"job {jid}: running with PID {proc.pid}")
|
|
861
|
-
|
|
1201
|
+
|
|
862
1202
|
# Wait for process with timeout
|
|
863
1203
|
try:
|
|
864
1204
|
proc.wait(timeout=timeout)
|
|
@@ -890,6 +1230,7 @@ def _run_subprocess_with_spec(cmd_spec: Dict[str, Any], log_path: str, jid: int
|
|
|
890
1230
|
return 0
|
|
891
1231
|
else:
|
|
892
1232
|
fh.write(f"\nERROR: Command timed out after {timeout} seconds\n")
|
|
1233
|
+
fh.flush()
|
|
893
1234
|
return 124
|
|
894
1235
|
|
|
895
1236
|
# Check if job was killed externally during execution
|
|
@@ -912,17 +1253,21 @@ def _run_subprocess_with_spec(cmd_spec: Dict[str, Any], log_path: str, jid: int
|
|
|
912
1253
|
proc.wait(timeout=5)
|
|
913
1254
|
except:
|
|
914
1255
|
pass
|
|
1256
|
+
fh.flush()
|
|
915
1257
|
return 143 # 128 + 15 (SIGTERM)
|
|
916
|
-
|
|
1258
|
+
|
|
917
1259
|
fh.write(f"\n=== Completed: {time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime())} ===\n")
|
|
918
1260
|
fh.write(f"Exit Code: {proc.returncode}\n")
|
|
1261
|
+
fh.flush()
|
|
919
1262
|
return proc.returncode
|
|
920
|
-
|
|
1263
|
+
|
|
921
1264
|
except FileNotFoundError:
|
|
922
1265
|
fh.write(f"\nERROR: Tool not found: {cmd[0]}\n")
|
|
1266
|
+
fh.flush()
|
|
923
1267
|
return 127
|
|
924
1268
|
except Exception as e:
|
|
925
1269
|
fh.write(f"\nERROR: {type(e).__name__}: {e}\n")
|
|
1270
|
+
fh.flush()
|
|
926
1271
|
return 1
|
|
927
1272
|
|
|
928
1273
|
|
|
@@ -937,9 +1282,14 @@ def _run_subprocess(tool: str, target: str, args: List[str], log_path: str, jid:
|
|
|
937
1282
|
cmd = [tool] + (args or [])
|
|
938
1283
|
cmd = [c.replace("<target>", target) for c in cmd]
|
|
939
1284
|
|
|
1285
|
+
# Wrap command with stdbuf for line-buffered output
|
|
1286
|
+
cmd = _wrap_cmd_for_line_buffering(cmd)
|
|
1287
|
+
|
|
940
1288
|
with open(log_path, "a", encoding="utf-8", errors="replace") as fh:
|
|
1289
|
+
# Log original command (without stdbuf wrapper for clarity)
|
|
1290
|
+
original_cmd = cmd[3:] if cmd[:3] == ['stdbuf', '-oL', '-eL'] else cmd
|
|
941
1291
|
fh.write("=== Subprocess Execution ===\n")
|
|
942
|
-
fh.write(f"Command: {' '.join(
|
|
1292
|
+
fh.write(f"Command: {' '.join(original_cmd)}\n")
|
|
943
1293
|
fh.write(f"Timeout: {timeout} seconds\n")
|
|
944
1294
|
fh.write(f"Started: {time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime())}\n\n")
|
|
945
1295
|
fh.flush()
|
|
@@ -947,9 +1297,8 @@ def _run_subprocess(tool: str, target: str, args: List[str], log_path: str, jid:
|
|
|
947
1297
|
try:
|
|
948
1298
|
# Create new process group so all children can be killed together
|
|
949
1299
|
# Redirect stdin to /dev/null to prevent password prompts from hanging
|
|
950
|
-
#
|
|
951
|
-
env =
|
|
952
|
-
env['TERM'] = 'dumb'
|
|
1300
|
+
# Use env with PYTHONUNBUFFERED=1 and TERM=dumb
|
|
1301
|
+
env = _get_subprocess_env()
|
|
953
1302
|
|
|
954
1303
|
proc = subprocess.Popen(
|
|
955
1304
|
cmd,
|
|
@@ -960,9 +1309,10 @@ def _run_subprocess(tool: str, target: str, args: List[str], log_path: str, jid:
|
|
|
960
1309
|
env=env
|
|
961
1310
|
)
|
|
962
1311
|
|
|
963
|
-
# Store PID
|
|
1312
|
+
# Store PID and process start time for stale detection
|
|
964
1313
|
if jid is not None:
|
|
965
|
-
|
|
1314
|
+
proc_start_time = _get_process_start_time(proc.pid)
|
|
1315
|
+
_update_job(jid, pid=proc.pid, process_start_time=proc_start_time)
|
|
966
1316
|
_append_worker_log(f"job {jid}: running with PID {proc.pid}")
|
|
967
1317
|
|
|
968
1318
|
# Wait for process with timeout
|
|
@@ -977,6 +1327,7 @@ def _run_subprocess(tool: str, target: str, args: List[str], log_path: str, jid:
|
|
|
977
1327
|
proc.kill() # Fallback to single process
|
|
978
1328
|
proc.wait()
|
|
979
1329
|
fh.write(f"\nERROR: Command timed out after {timeout} seconds\n")
|
|
1330
|
+
fh.flush()
|
|
980
1331
|
return 124
|
|
981
1332
|
|
|
982
1333
|
# Check if job was killed externally during execution
|
|
@@ -999,17 +1350,21 @@ def _run_subprocess(tool: str, target: str, args: List[str], log_path: str, jid:
|
|
|
999
1350
|
proc.wait(timeout=5)
|
|
1000
1351
|
except:
|
|
1001
1352
|
pass
|
|
1353
|
+
fh.flush()
|
|
1002
1354
|
return 143 # 128 + 15 (SIGTERM)
|
|
1003
1355
|
|
|
1004
1356
|
fh.write(f"\n=== Completed: {time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime())} ===\n")
|
|
1005
1357
|
fh.write(f"Exit Code: {proc.returncode}\n")
|
|
1358
|
+
fh.flush()
|
|
1006
1359
|
return proc.returncode
|
|
1007
1360
|
|
|
1008
1361
|
except FileNotFoundError:
|
|
1009
1362
|
fh.write(f"\nERROR: Tool not found: {cmd[0]}\n")
|
|
1363
|
+
fh.flush()
|
|
1010
1364
|
return 127
|
|
1011
1365
|
except Exception as e:
|
|
1012
1366
|
fh.write(f"\nERROR: {type(e).__name__}: {e}\n")
|
|
1367
|
+
fh.flush()
|
|
1013
1368
|
return 1
|
|
1014
1369
|
|
|
1015
1370
|
|
|
@@ -1145,77 +1500,110 @@ def run_job(jid: int) -> None:
|
|
|
1145
1500
|
# Re-fetch job to get updated data
|
|
1146
1501
|
job = get_job(jid)
|
|
1147
1502
|
parse_result = handle_job_result(job)
|
|
1148
|
-
|
|
1149
|
-
|
|
1150
|
-
|
|
1503
|
+
|
|
1504
|
+
# Handle parse failure cases
|
|
1505
|
+
if parse_result is None:
|
|
1506
|
+
# Parser returned None - likely missing log file, no parser for tool, or missing engagement
|
|
1507
|
+
logger.error("Job parse returned None - results may be lost", extra={
|
|
1508
|
+
"job_id": jid,
|
|
1509
|
+
"tool": job.get('tool'),
|
|
1510
|
+
"log_exists": os.path.exists(job.get('log', '')) if job.get('log') else False
|
|
1511
|
+
})
|
|
1512
|
+
_append_worker_log(f"job {jid} parse returned None (tool={job.get('tool')}) - check if parser exists")
|
|
1513
|
+
# Update job to indicate parse failure
|
|
1514
|
+
_update_job(jid, status=STATUS_WARNING, parse_result={'error': 'Parser returned None - no results extracted'})
|
|
1515
|
+
# Mark as chained to prevent infinite retry
|
|
1516
|
+
_update_job(jid, chained=True)
|
|
1517
|
+
return
|
|
1518
|
+
|
|
1519
|
+
if 'error' in parse_result:
|
|
1520
|
+
logger.error("Job parse error - results may be incomplete", extra={
|
|
1521
|
+
"job_id": jid,
|
|
1522
|
+
"error": parse_result['error']
|
|
1523
|
+
})
|
|
1524
|
+
_append_worker_log(f"job {jid} parse error: {parse_result['error']}")
|
|
1525
|
+
# Update job status to warning with the error
|
|
1526
|
+
_update_job(jid, status=STATUS_WARNING, parse_result=parse_result)
|
|
1527
|
+
# Mark as chained to prevent infinite retry
|
|
1528
|
+
_update_job(jid, chained=True)
|
|
1529
|
+
return
|
|
1530
|
+
|
|
1531
|
+
# Parse succeeded
|
|
1532
|
+
logger.info("Job parsed successfully", extra={
|
|
1533
|
+
"job_id": jid,
|
|
1534
|
+
"parse_result": parse_result
|
|
1535
|
+
})
|
|
1536
|
+
_append_worker_log(f"job {jid} parsed: {parse_result}")
|
|
1537
|
+
|
|
1538
|
+
# Determine chainable status BEFORE updating to avoid race condition
|
|
1539
|
+
# We must set parse_result and chainable in a single atomic update
|
|
1540
|
+
try:
|
|
1541
|
+
from souleyez.core.tool_chaining import ToolChaining
|
|
1542
|
+
chaining = ToolChaining()
|
|
1543
|
+
|
|
1544
|
+
# Get current job to check status
|
|
1545
|
+
job = get_job(jid)
|
|
1546
|
+
job_status = job.get('status', STATUS_ERROR)
|
|
1547
|
+
|
|
1548
|
+
# Determine final status from parser if provided
|
|
1549
|
+
final_status = parse_result.get('status', job_status)
|
|
1550
|
+
|
|
1551
|
+
# Check if job should be chainable
|
|
1552
|
+
should_chain = (
|
|
1553
|
+
chaining.is_enabled() and
|
|
1554
|
+
parse_result and
|
|
1555
|
+
'error' not in parse_result and
|
|
1556
|
+
is_chainable(final_status)
|
|
1557
|
+
)
|
|
1558
|
+
|
|
1559
|
+
# Build update dict - ATOMIC update of parse_result + chainable
|
|
1560
|
+
update_fields = {'parse_result': parse_result}
|
|
1561
|
+
|
|
1562
|
+
if 'status' in parse_result:
|
|
1563
|
+
update_fields['status'] = final_status
|
|
1564
|
+
logger.info("Job status updated from parser", extra={
|
|
1151
1565
|
"job_id": jid,
|
|
1152
|
-
"
|
|
1566
|
+
"status": final_status
|
|
1153
1567
|
})
|
|
1154
|
-
_append_worker_log(f"job {jid}
|
|
1568
|
+
_append_worker_log(f"job {jid} status updated to: {final_status}")
|
|
1569
|
+
|
|
1570
|
+
if should_chain:
|
|
1571
|
+
update_fields['chainable'] = True
|
|
1155
1572
|
else:
|
|
1156
|
-
|
|
1157
|
-
|
|
1158
|
-
"parse_result": parse_result
|
|
1159
|
-
})
|
|
1160
|
-
_append_worker_log(f"job {jid} parsed: {parse_result}")
|
|
1573
|
+
# Not chainable - mark as chained to skip
|
|
1574
|
+
update_fields['chained'] = True
|
|
1161
1575
|
|
|
1162
|
-
|
|
1163
|
-
|
|
1164
|
-
|
|
1165
|
-
|
|
1166
|
-
|
|
1576
|
+
# Single atomic update to prevent race condition
|
|
1577
|
+
_update_job(jid, **update_fields)
|
|
1578
|
+
|
|
1579
|
+
# Log chaining decision
|
|
1580
|
+
if should_chain:
|
|
1581
|
+
if final_status == STATUS_WARNING:
|
|
1582
|
+
logger.info("Job with warning status marked for chaining", extra={
|
|
1167
1583
|
"job_id": jid,
|
|
1168
|
-
"
|
|
1584
|
+
"tool": job.get('tool'),
|
|
1585
|
+
"wildcard_detected": parse_result.get('wildcard_detected', False)
|
|
1169
1586
|
})
|
|
1170
|
-
_append_worker_log(f"job {jid} status
|
|
1587
|
+
_append_worker_log(f"job {jid} (status=warning) marked as chainable")
|
|
1171
1588
|
else:
|
|
1172
|
-
|
|
1173
|
-
_update_job(jid, parse_result=parse_result)
|
|
1174
|
-
|
|
1175
|
-
# Mark job as chainable instead of chaining immediately
|
|
1176
|
-
# Worker loop will process it when database is idle
|
|
1177
|
-
try:
|
|
1178
|
-
from souleyez.core.tool_chaining import ToolChaining
|
|
1179
|
-
chaining = ToolChaining()
|
|
1180
|
-
|
|
1181
|
-
# Re-fetch job to get updated status
|
|
1182
|
-
job = get_job(jid)
|
|
1183
|
-
job_status = job.get('status', STATUS_ERROR)
|
|
1184
|
-
|
|
1185
|
-
# Check if status is chainable (done, no_results, warning)
|
|
1186
|
-
if chaining.is_enabled() and parse_result and 'error' not in parse_result and is_chainable(job_status):
|
|
1187
|
-
# Mark for deferred chaining
|
|
1188
|
-
_update_job(jid, chainable=True)
|
|
1189
|
-
|
|
1190
|
-
# Log special handling for warning status
|
|
1191
|
-
if job_status == STATUS_WARNING:
|
|
1192
|
-
logger.info("Job with warning status marked for chaining", extra={
|
|
1193
|
-
"job_id": jid,
|
|
1194
|
-
"tool": job.get('tool'),
|
|
1195
|
-
"wildcard_detected": parse_result.get('wildcard_detected', False)
|
|
1196
|
-
})
|
|
1197
|
-
_append_worker_log(f"job {jid} (status=warning) marked as chainable")
|
|
1198
|
-
else:
|
|
1199
|
-
logger.info("Job marked as chainable", extra={
|
|
1200
|
-
"job_id": jid,
|
|
1201
|
-
"tool": job.get('tool'),
|
|
1202
|
-
"status": job_status
|
|
1203
|
-
})
|
|
1204
|
-
_append_worker_log(f"job {jid} marked as chainable (status={job_status})")
|
|
1205
|
-
else:
|
|
1206
|
-
# Chaining disabled or job has errors - mark as chained (skip)
|
|
1207
|
-
_update_job(jid, chained=True)
|
|
1208
|
-
reason = f"chaining_disabled={not chaining.is_enabled()}, has_error={'error' in parse_result}, status={job_status}"
|
|
1209
|
-
_append_worker_log(f"job {jid} not chainable ({reason})")
|
|
1210
|
-
|
|
1211
|
-
except Exception as chain_err:
|
|
1212
|
-
logger.error("Failed to mark job as chainable", extra={
|
|
1589
|
+
logger.info("Job marked as chainable", extra={
|
|
1213
1590
|
"job_id": jid,
|
|
1214
|
-
"
|
|
1591
|
+
"tool": job.get('tool'),
|
|
1592
|
+
"status": final_status
|
|
1215
1593
|
})
|
|
1216
|
-
_append_worker_log(f"job {jid}
|
|
1217
|
-
|
|
1218
|
-
|
|
1594
|
+
_append_worker_log(f"job {jid} marked as chainable (status={final_status})")
|
|
1595
|
+
else:
|
|
1596
|
+
reason = f"chaining_disabled={not chaining.is_enabled()}, has_error={'error' in parse_result}, status={final_status}"
|
|
1597
|
+
_append_worker_log(f"job {jid} not chainable ({reason})")
|
|
1598
|
+
|
|
1599
|
+
except Exception as chain_err:
|
|
1600
|
+
logger.error("Failed to mark job as chainable", extra={
|
|
1601
|
+
"job_id": jid,
|
|
1602
|
+
"error": str(chain_err)
|
|
1603
|
+
})
|
|
1604
|
+
_append_worker_log(f"job {jid} chainable marking error: {chain_err}")
|
|
1605
|
+
# Mark as chained to prevent retry loops
|
|
1606
|
+
_update_job(jid, chained=True, chain_error=str(chain_err))
|
|
1219
1607
|
|
|
1220
1608
|
except Exception as e:
|
|
1221
1609
|
logger.error("Job parse exception", extra={
|
|
@@ -1378,18 +1766,46 @@ def _detect_and_recover_stale_jobs() -> int:
|
|
|
1378
1766
|
pid = job.get('pid')
|
|
1379
1767
|
tool = job.get('tool', 'unknown')
|
|
1380
1768
|
log_path = job.get('log')
|
|
1769
|
+
stored_start_time = job.get('process_start_time')
|
|
1381
1770
|
|
|
1382
|
-
#
|
|
1771
|
+
# Check if PID is alive
|
|
1383
1772
|
if _is_pid_alive(pid):
|
|
1384
|
-
|
|
1385
|
-
|
|
1386
|
-
|
|
1387
|
-
|
|
1388
|
-
|
|
1389
|
-
|
|
1390
|
-
|
|
1391
|
-
|
|
1392
|
-
|
|
1773
|
+
# PID is alive - but check for PID reuse
|
|
1774
|
+
if stored_start_time is not None:
|
|
1775
|
+
current_start_time = _get_process_start_time(pid)
|
|
1776
|
+
if current_start_time is not None:
|
|
1777
|
+
# Allow 2 second tolerance for timing differences
|
|
1778
|
+
if abs(current_start_time - stored_start_time) > 2:
|
|
1779
|
+
# PID reused by different process
|
|
1780
|
+
_append_worker_log(
|
|
1781
|
+
f"job {jid}: PID {pid} reused (stored start: {stored_start_time:.0f}, "
|
|
1782
|
+
f"current: {current_start_time:.0f})"
|
|
1783
|
+
)
|
|
1784
|
+
logger.warning("PID reuse detected", extra={
|
|
1785
|
+
"job_id": jid,
|
|
1786
|
+
"tool": tool,
|
|
1787
|
+
"pid": pid,
|
|
1788
|
+
"stored_start_time": stored_start_time,
|
|
1789
|
+
"current_start_time": current_start_time
|
|
1790
|
+
})
|
|
1791
|
+
# Fall through to stale job handling
|
|
1792
|
+
else:
|
|
1793
|
+
# Same process, still running
|
|
1794
|
+
continue
|
|
1795
|
+
else:
|
|
1796
|
+
# Can't get current start time, assume still valid
|
|
1797
|
+
continue
|
|
1798
|
+
else:
|
|
1799
|
+
# No stored start time (old job), assume still valid
|
|
1800
|
+
continue
|
|
1801
|
+
else:
|
|
1802
|
+
# PID is dead - definitely stale
|
|
1803
|
+
_append_worker_log(f"job {jid}: detected stale (PID {pid} is dead)")
|
|
1804
|
+
logger.warning("Stale job detected", extra={
|
|
1805
|
+
"job_id": jid,
|
|
1806
|
+
"tool": tool,
|
|
1807
|
+
"pid": pid
|
|
1808
|
+
})
|
|
1393
1809
|
|
|
1394
1810
|
# Check if log shows completion
|
|
1395
1811
|
completed, exit_code = _check_log_for_completion(log_path, tool)
|
|
@@ -1412,6 +1828,8 @@ def _detect_and_recover_stale_jobs() -> int:
|
|
|
1412
1828
|
# Try to parse results
|
|
1413
1829
|
try:
|
|
1414
1830
|
from .result_handler import handle_job_result
|
|
1831
|
+
from souleyez.core.tool_chaining import ToolChaining
|
|
1832
|
+
|
|
1415
1833
|
job = get_job(jid)
|
|
1416
1834
|
parse_result = handle_job_result(job)
|
|
1417
1835
|
|
|
@@ -1419,36 +1837,34 @@ def _detect_and_recover_stale_jobs() -> int:
|
|
|
1419
1837
|
if 'error' in parse_result:
|
|
1420
1838
|
_append_worker_log(f"job {jid} stale recovery parse error: {parse_result['error']}")
|
|
1421
1839
|
else:
|
|
1422
|
-
#
|
|
1840
|
+
# Determine final status and chainable in one check
|
|
1841
|
+
final_status = parse_result.get('status', status)
|
|
1842
|
+
chaining = ToolChaining()
|
|
1843
|
+
should_chain = chaining.is_enabled() and is_chainable(final_status)
|
|
1844
|
+
|
|
1845
|
+
# Build atomic update - parse_result + status + chainable together
|
|
1846
|
+
update_fields = {'parse_result': parse_result}
|
|
1423
1847
|
if 'status' in parse_result:
|
|
1424
|
-
|
|
1425
|
-
|
|
1426
|
-
|
|
1427
|
-
|
|
1848
|
+
update_fields['status'] = final_status
|
|
1849
|
+
if should_chain:
|
|
1850
|
+
update_fields['chainable'] = True
|
|
1851
|
+
|
|
1852
|
+
# Single atomic update to prevent race condition
|
|
1853
|
+
_update_job(jid, **update_fields)
|
|
1428
1854
|
|
|
1429
1855
|
_append_worker_log(f"job {jid} stale recovery parsed: {parse_result.get('findings_added', 0)} findings")
|
|
1430
1856
|
|
|
1431
1857
|
logger.info("Stale job recovered with results", extra={
|
|
1432
1858
|
"job_id": jid,
|
|
1433
1859
|
"tool": tool,
|
|
1434
|
-
"status":
|
|
1435
|
-
"parse_result": parse_result
|
|
1860
|
+
"status": final_status,
|
|
1861
|
+
"parse_result": parse_result,
|
|
1862
|
+
"chainable": should_chain
|
|
1436
1863
|
})
|
|
1437
1864
|
|
|
1438
|
-
|
|
1439
|
-
|
|
1440
|
-
|
|
1441
|
-
chaining = ToolChaining()
|
|
1442
|
-
if chaining.is_enabled() and is_chainable(status):
|
|
1443
|
-
_update_job(jid, chainable=True)
|
|
1444
|
-
_append_worker_log(f"job {jid} stale recovery marked as chainable")
|
|
1445
|
-
logger.info("Stale job marked as chainable", extra={
|
|
1446
|
-
"job_id": jid,
|
|
1447
|
-
"tool": tool,
|
|
1448
|
-
"status": status
|
|
1449
|
-
})
|
|
1450
|
-
except Exception as chain_err:
|
|
1451
|
-
_append_worker_log(f"job {jid} stale recovery chainable error: {chain_err}")
|
|
1865
|
+
if should_chain:
|
|
1866
|
+
_append_worker_log(f"job {jid} stale recovery marked as chainable")
|
|
1867
|
+
|
|
1452
1868
|
except Exception as parse_err:
|
|
1453
1869
|
_append_worker_log(f"job {jid} stale recovery parse exception: {parse_err}")
|
|
1454
1870
|
|
|
@@ -1608,26 +2024,85 @@ def _check_msf_exploitation_success():
|
|
|
1608
2024
|
return 0
|
|
1609
2025
|
|
|
1610
2026
|
|
|
2027
|
+
def _update_job_progress():
|
|
2028
|
+
"""
|
|
2029
|
+
Update progress tracking for running jobs.
|
|
2030
|
+
|
|
2031
|
+
Checks log file modification times and flags jobs with no recent output
|
|
2032
|
+
as possibly hung (no output for JOB_HUNG_THRESHOLD seconds).
|
|
2033
|
+
"""
|
|
2034
|
+
try:
|
|
2035
|
+
jobs = _read_jobs()
|
|
2036
|
+
running_jobs = [j for j in jobs if j.get('status') == STATUS_RUNNING]
|
|
2037
|
+
|
|
2038
|
+
for job in running_jobs:
|
|
2039
|
+
jid = job.get('id')
|
|
2040
|
+
log_path = job.get('log')
|
|
2041
|
+
|
|
2042
|
+
if not log_path or not os.path.exists(log_path):
|
|
2043
|
+
continue
|
|
2044
|
+
|
|
2045
|
+
try:
|
|
2046
|
+
# Get log file modification time
|
|
2047
|
+
mtime = os.path.getmtime(log_path)
|
|
2048
|
+
current_time = time.time()
|
|
2049
|
+
time_since_output = current_time - mtime
|
|
2050
|
+
|
|
2051
|
+
# Update last_output_at in job record
|
|
2052
|
+
updates = {'last_output_at': mtime}
|
|
2053
|
+
|
|
2054
|
+
# Flag as possibly hung if no output for threshold
|
|
2055
|
+
was_hung = job.get('possibly_hung', False)
|
|
2056
|
+
is_hung = time_since_output > JOB_HUNG_THRESHOLD
|
|
2057
|
+
|
|
2058
|
+
if is_hung != was_hung:
|
|
2059
|
+
updates['possibly_hung'] = is_hung
|
|
2060
|
+
if is_hung:
|
|
2061
|
+
_append_worker_log(
|
|
2062
|
+
f"job {jid}: no output for {int(time_since_output)}s, flagged as possibly hung"
|
|
2063
|
+
)
|
|
2064
|
+
logger.warning("Job possibly hung", extra={
|
|
2065
|
+
"job_id": jid,
|
|
2066
|
+
"tool": job.get('tool'),
|
|
2067
|
+
"time_since_output": int(time_since_output)
|
|
2068
|
+
})
|
|
2069
|
+
|
|
2070
|
+
_update_job(jid, **updates)
|
|
2071
|
+
|
|
2072
|
+
except Exception as e:
|
|
2073
|
+
# Non-critical, just skip this job
|
|
2074
|
+
pass
|
|
2075
|
+
|
|
2076
|
+
except Exception as e:
|
|
2077
|
+
logger.error("Job progress tracking error", extra={"error": str(e)})
|
|
2078
|
+
|
|
2079
|
+
|
|
1611
2080
|
def worker_loop(poll_interval: float = 2.0):
|
|
1612
2081
|
"""
|
|
1613
2082
|
Main worker loop that processes jobs and handles auto-chaining.
|
|
1614
2083
|
|
|
1615
2084
|
Loop behavior:
|
|
1616
|
-
1.
|
|
1617
|
-
2.
|
|
1618
|
-
3.
|
|
1619
|
-
4.
|
|
1620
|
-
5.
|
|
2085
|
+
1. Update heartbeat for health monitoring
|
|
2086
|
+
2. Detect and recover stale jobs (dead PIDs)
|
|
2087
|
+
3. Update progress tracking for running jobs
|
|
2088
|
+
4. Check for running jobs
|
|
2089
|
+
5. If none running, start next queued job
|
|
2090
|
+
6. Process one chainable job (if any)
|
|
2091
|
+
7. Sleep poll_interval seconds, repeat
|
|
1621
2092
|
|
|
1622
2093
|
Args:
|
|
1623
2094
|
poll_interval: Seconds to sleep between iterations (default: 2.0)
|
|
1624
2095
|
"""
|
|
1625
2096
|
_ensure_dirs()
|
|
2097
|
+
_update_heartbeat() # Initial heartbeat
|
|
1626
2098
|
_append_worker_log("souleyez background worker: starting loop")
|
|
1627
2099
|
|
|
1628
|
-
# Track last stale job check time (check every
|
|
2100
|
+
# Track last stale job check time (check every 15 seconds, not every iteration)
|
|
1629
2101
|
last_stale_check = 0
|
|
1630
|
-
stale_check_interval =
|
|
2102
|
+
stale_check_interval = 15 # seconds (reduced from 30s for faster detection)
|
|
2103
|
+
|
|
2104
|
+
# Track last heartbeat time
|
|
2105
|
+
last_heartbeat = time.time()
|
|
1631
2106
|
|
|
1632
2107
|
# Run stale job detection on startup
|
|
1633
2108
|
try:
|
|
@@ -1639,8 +2114,14 @@ def worker_loop(poll_interval: float = 2.0):
|
|
|
1639
2114
|
|
|
1640
2115
|
try:
|
|
1641
2116
|
while True:
|
|
1642
|
-
# Periodic stale job detection (every 30 seconds)
|
|
1643
2117
|
current_time = time.time()
|
|
2118
|
+
|
|
2119
|
+
# Update heartbeat every HEARTBEAT_INTERVAL seconds
|
|
2120
|
+
if current_time - last_heartbeat >= HEARTBEAT_INTERVAL:
|
|
2121
|
+
_update_heartbeat()
|
|
2122
|
+
last_heartbeat = current_time
|
|
2123
|
+
|
|
2124
|
+
# Periodic stale job detection (every 15 seconds)
|
|
1644
2125
|
if current_time - last_stale_check >= stale_check_interval:
|
|
1645
2126
|
try:
|
|
1646
2127
|
recovered = _detect_and_recover_stale_jobs()
|
|
@@ -1650,6 +2131,12 @@ def worker_loop(poll_interval: float = 2.0):
|
|
|
1650
2131
|
_append_worker_log(f"stale job detection error: {e}")
|
|
1651
2132
|
last_stale_check = current_time
|
|
1652
2133
|
|
|
2134
|
+
# Update progress tracking for running jobs
|
|
2135
|
+
try:
|
|
2136
|
+
_update_job_progress()
|
|
2137
|
+
except Exception as e:
|
|
2138
|
+
_append_worker_log(f"progress tracking error: {e}")
|
|
2139
|
+
|
|
1653
2140
|
# Check running MSF jobs for exploitation success (every iteration)
|
|
1654
2141
|
try:
|
|
1655
2142
|
detected = _check_msf_exploitation_success()
|