spatelier 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- analytics/__init__.py +1 -0
- analytics/reporter.py +497 -0
- cli/__init__.py +1 -0
- cli/app.py +147 -0
- cli/audio.py +129 -0
- cli/cli_analytics.py +320 -0
- cli/cli_utils.py +282 -0
- cli/error_handlers.py +122 -0
- cli/files.py +299 -0
- cli/update.py +325 -0
- cli/video.py +823 -0
- cli/worker.py +615 -0
- core/__init__.py +1 -0
- core/analytics_dashboard.py +368 -0
- core/base.py +303 -0
- core/base_service.py +69 -0
- core/config.py +345 -0
- core/database_service.py +116 -0
- core/decorators.py +263 -0
- core/error_handler.py +210 -0
- core/file_tracker.py +254 -0
- core/interactive_cli.py +366 -0
- core/interfaces.py +166 -0
- core/job_queue.py +437 -0
- core/logger.py +79 -0
- core/package_updater.py +469 -0
- core/progress.py +228 -0
- core/service_factory.py +295 -0
- core/streaming.py +299 -0
- core/worker.py +765 -0
- database/__init__.py +1 -0
- database/connection.py +265 -0
- database/metadata.py +516 -0
- database/models.py +288 -0
- database/repository.py +592 -0
- database/transcription_storage.py +219 -0
- modules/__init__.py +1 -0
- modules/audio/__init__.py +5 -0
- modules/audio/converter.py +197 -0
- modules/video/__init__.py +16 -0
- modules/video/converter.py +191 -0
- modules/video/fallback_extractor.py +334 -0
- modules/video/services/__init__.py +18 -0
- modules/video/services/audio_extraction_service.py +274 -0
- modules/video/services/download_service.py +852 -0
- modules/video/services/metadata_service.py +190 -0
- modules/video/services/playlist_service.py +445 -0
- modules/video/services/transcription_service.py +491 -0
- modules/video/transcription_service.py +385 -0
- modules/video/youtube_api.py +397 -0
- spatelier/__init__.py +33 -0
- spatelier-0.3.0.dist-info/METADATA +260 -0
- spatelier-0.3.0.dist-info/RECORD +59 -0
- spatelier-0.3.0.dist-info/WHEEL +5 -0
- spatelier-0.3.0.dist-info/entry_points.txt +2 -0
- spatelier-0.3.0.dist-info/licenses/LICENSE +21 -0
- spatelier-0.3.0.dist-info/top_level.txt +7 -0
- utils/__init__.py +1 -0
- utils/helpers.py +250 -0
core/worker.py
ADDED
|
@@ -0,0 +1,765 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Unified worker for background job processing.
|
|
3
|
+
|
|
4
|
+
This module provides a single, configurable worker that consolidates all worker
|
|
5
|
+
functionality: throttling, stuck job detection, PID tracking, retry logic, and statistics.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import os
|
|
9
|
+
import signal
|
|
10
|
+
import threading
|
|
11
|
+
import time
|
|
12
|
+
from datetime import datetime, timedelta
|
|
13
|
+
from enum import Enum
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
from typing import Any, Callable, Dict, Optional
|
|
16
|
+
|
|
17
|
+
from core.config import Config
|
|
18
|
+
from core.job_queue import Job, JobQueue, JobStatus, JobType
|
|
19
|
+
from core.logger import get_logger
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class WorkerMode(str, Enum):
|
|
23
|
+
"""Worker execution mode."""
|
|
24
|
+
|
|
25
|
+
THREAD = "thread" # In-process thread worker
|
|
26
|
+
DAEMON = "daemon" # System daemon worker
|
|
27
|
+
AUTO = "auto" # Auto-start/stop worker
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class Worker:
|
|
31
|
+
"""
|
|
32
|
+
Unified worker for background job processing.
|
|
33
|
+
|
|
34
|
+
Consolidates functionality from JobWorker, AutoWorker, DaemonWorker, and WorkerManager.
|
|
35
|
+
Supports multiple execution modes and includes throttling, stuck job detection,
|
|
36
|
+
PID tracking, retry logic, and comprehensive statistics.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
def __init__(
|
|
40
|
+
self,
|
|
41
|
+
config: Config,
|
|
42
|
+
mode: WorkerMode = WorkerMode.THREAD,
|
|
43
|
+
verbose: bool = False,
|
|
44
|
+
max_retries: int = 10,
|
|
45
|
+
min_time_between_jobs: int = 60,
|
|
46
|
+
additional_sleep_time: int = 0,
|
|
47
|
+
poll_interval: int = 30,
|
|
48
|
+
stuck_job_timeout: int = 1800, # 30 minutes
|
|
49
|
+
services: Optional[Any] = None,
|
|
50
|
+
):
|
|
51
|
+
"""
|
|
52
|
+
Initialize unified worker.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
config: Configuration instance
|
|
56
|
+
mode: Worker execution mode (thread, daemon, or auto)
|
|
57
|
+
verbose: Enable verbose logging
|
|
58
|
+
max_retries: Maximum retries for failed jobs
|
|
59
|
+
min_time_between_jobs: Minimum seconds between jobs (throttling)
|
|
60
|
+
additional_sleep_time: Additional sleep time after throttling
|
|
61
|
+
poll_interval: Seconds between queue polls
|
|
62
|
+
stuck_job_timeout: Seconds before a job is considered stuck
|
|
63
|
+
services: Optional service container for job processors
|
|
64
|
+
"""
|
|
65
|
+
self.config = config
|
|
66
|
+
self.mode = mode
|
|
67
|
+
self.verbose = verbose
|
|
68
|
+
self.max_retries = max_retries
|
|
69
|
+
self.min_time_between_jobs = min_time_between_jobs
|
|
70
|
+
self.additional_sleep_time = additional_sleep_time
|
|
71
|
+
self.poll_interval = poll_interval
|
|
72
|
+
self.stuck_job_timeout = stuck_job_timeout
|
|
73
|
+
self.services = services
|
|
74
|
+
|
|
75
|
+
self.logger = get_logger("Worker", verbose=verbose)
|
|
76
|
+
|
|
77
|
+
# Job queue
|
|
78
|
+
self.job_queue = JobQueue(config, verbose=verbose)
|
|
79
|
+
|
|
80
|
+
# Worker state
|
|
81
|
+
self.running = False
|
|
82
|
+
self.worker_thread: Optional[threading.Thread] = None
|
|
83
|
+
self.stop_event = threading.Event()
|
|
84
|
+
self.last_job_time: Optional[datetime] = None
|
|
85
|
+
|
|
86
|
+
# Job processors
|
|
87
|
+
self.job_processors: Dict[JobType, Callable[[Job], bool]] = {}
|
|
88
|
+
|
|
89
|
+
# PID tracking for active jobs
|
|
90
|
+
self.active_jobs: Dict[
|
|
91
|
+
int, Dict[str, Any]
|
|
92
|
+
] = {} # job_id -> {"pid": int, "started_at": datetime, "job_type": str}
|
|
93
|
+
|
|
94
|
+
# Statistics
|
|
95
|
+
self.stats = {
|
|
96
|
+
"jobs_processed": 0,
|
|
97
|
+
"jobs_failed": 0,
|
|
98
|
+
"jobs_retried": 0,
|
|
99
|
+
"jobs_stuck_detected": 0,
|
|
100
|
+
"jobs_stuck_reset": 0,
|
|
101
|
+
"total_runtime": 0,
|
|
102
|
+
"start_time": None,
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
# Daemon management (for daemon mode)
|
|
106
|
+
self.pid_file: Optional[Path] = None
|
|
107
|
+
self.lock_file: Optional[Path] = None
|
|
108
|
+
|
|
109
|
+
# Setup signal handlers for daemon mode
|
|
110
|
+
if mode == WorkerMode.DAEMON:
|
|
111
|
+
signal.signal(signal.SIGTERM, self._signal_handler)
|
|
112
|
+
signal.signal(signal.SIGINT, self._signal_handler)
|
|
113
|
+
from core.config import get_default_data_dir
|
|
114
|
+
|
|
115
|
+
data_dir = get_default_data_dir()
|
|
116
|
+
self.pid_file = data_dir / "worker.pid"
|
|
117
|
+
self.lock_file = data_dir / "worker.lock"
|
|
118
|
+
|
|
119
|
+
def set_throttling(
|
|
120
|
+
self, min_time_seconds: int, additional_sleep_seconds: int = 0
|
|
121
|
+
) -> None:
|
|
122
|
+
"""Set throttling configuration."""
|
|
123
|
+
self.min_time_between_jobs = min_time_seconds
|
|
124
|
+
self.additional_sleep_time = additional_sleep_seconds
|
|
125
|
+
self.logger.info(
|
|
126
|
+
f"Throttling set: min {min_time_seconds}s, additional {additional_sleep_seconds}s"
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
def register_processor(
|
|
130
|
+
self, job_type: JobType, processor: Callable[[Job], bool]
|
|
131
|
+
) -> None:
|
|
132
|
+
"""Register a job processor for a specific job type."""
|
|
133
|
+
self.job_processors[job_type] = processor
|
|
134
|
+
self.logger.info(f"Registered processor for {job_type.value}")
|
|
135
|
+
|
|
136
|
+
def start(self) -> None:
|
|
137
|
+
"""Start the worker."""
|
|
138
|
+
if self.running:
|
|
139
|
+
self.logger.warning("Worker is already running")
|
|
140
|
+
return
|
|
141
|
+
|
|
142
|
+
if self.mode == WorkerMode.DAEMON:
|
|
143
|
+
self._start_daemon()
|
|
144
|
+
elif self.mode == WorkerMode.AUTO:
|
|
145
|
+
self._start_auto()
|
|
146
|
+
else: # THREAD mode
|
|
147
|
+
self._start_thread()
|
|
148
|
+
|
|
149
|
+
def stop(self) -> None:
|
|
150
|
+
"""Stop the worker."""
|
|
151
|
+
if not self.running:
|
|
152
|
+
self.logger.warning("Worker is not running")
|
|
153
|
+
return
|
|
154
|
+
|
|
155
|
+
self.running = False
|
|
156
|
+
self.stop_event.set()
|
|
157
|
+
|
|
158
|
+
if self.worker_thread:
|
|
159
|
+
self.worker_thread.join(timeout=10)
|
|
160
|
+
|
|
161
|
+
if self.stats["start_time"]:
|
|
162
|
+
self.stats["total_runtime"] = (
|
|
163
|
+
datetime.now() - self.stats["start_time"]
|
|
164
|
+
).total_seconds()
|
|
165
|
+
|
|
166
|
+
# Clean up PID tracking
|
|
167
|
+
self.active_jobs.clear()
|
|
168
|
+
|
|
169
|
+
# Clean up daemon files
|
|
170
|
+
if self.mode == WorkerMode.DAEMON:
|
|
171
|
+
self._cleanup_daemon_files()
|
|
172
|
+
|
|
173
|
+
self.logger.info("Worker stopped")
|
|
174
|
+
|
|
175
|
+
def _start_thread(self) -> None:
|
|
176
|
+
"""Start worker in thread mode."""
|
|
177
|
+
self.running = True
|
|
178
|
+
self.stop_event.clear()
|
|
179
|
+
self.stats["start_time"] = datetime.now()
|
|
180
|
+
|
|
181
|
+
self.worker_thread = threading.Thread(target=self._worker_loop, daemon=True)
|
|
182
|
+
self.worker_thread.start()
|
|
183
|
+
|
|
184
|
+
self.logger.info("Worker started in thread mode")
|
|
185
|
+
|
|
186
|
+
def _start_auto(self) -> None:
|
|
187
|
+
"""Start worker in auto mode (thread with auto-management)."""
|
|
188
|
+
self._start_thread()
|
|
189
|
+
self.logger.info("Worker started in auto mode")
|
|
190
|
+
|
|
191
|
+
def _start_daemon(self) -> None:
|
|
192
|
+
"""Start worker in daemon mode (system daemon)."""
|
|
193
|
+
try:
|
|
194
|
+
import psutil
|
|
195
|
+
except ImportError:
|
|
196
|
+
self.logger.error("psutil not available, cannot start daemon mode")
|
|
197
|
+
raise RuntimeError("psutil required for daemon mode")
|
|
198
|
+
|
|
199
|
+
# Check if already running
|
|
200
|
+
if self.pid_file and self.pid_file.exists():
|
|
201
|
+
try:
|
|
202
|
+
pid = int(self.pid_file.read_text().strip())
|
|
203
|
+
if psutil.pid_exists(pid):
|
|
204
|
+
self.logger.warning("Worker daemon is already running")
|
|
205
|
+
return
|
|
206
|
+
except (ValueError, FileNotFoundError):
|
|
207
|
+
pass
|
|
208
|
+
|
|
209
|
+
# Create lock file
|
|
210
|
+
if self.lock_file:
|
|
211
|
+
if self.lock_file.exists():
|
|
212
|
+
self.logger.warning("Lock file exists, worker may be starting")
|
|
213
|
+
return
|
|
214
|
+
self.lock_file.touch()
|
|
215
|
+
|
|
216
|
+
# Fork to background
|
|
217
|
+
pid = os.fork()
|
|
218
|
+
|
|
219
|
+
if pid == 0:
|
|
220
|
+
# Child process - start worker daemon
|
|
221
|
+
os.setsid() # Create new session
|
|
222
|
+
|
|
223
|
+
# Redirect stdio
|
|
224
|
+
devnull = os.open(os.devnull, os.O_RDWR)
|
|
225
|
+
os.dup2(devnull, 0) # stdin
|
|
226
|
+
os.dup2(devnull, 1) # stdout
|
|
227
|
+
os.dup2(devnull, 2) # stderr
|
|
228
|
+
|
|
229
|
+
# Start worker loop
|
|
230
|
+
self.running = True
|
|
231
|
+
self.stop_event.clear()
|
|
232
|
+
self.stats["start_time"] = datetime.now()
|
|
233
|
+
|
|
234
|
+
# Write PID file
|
|
235
|
+
if self.pid_file:
|
|
236
|
+
self.pid_file.write_text(str(os.getpid()))
|
|
237
|
+
|
|
238
|
+
# Remove lock file
|
|
239
|
+
if self.lock_file:
|
|
240
|
+
self.lock_file.unlink(missing_ok=True)
|
|
241
|
+
|
|
242
|
+
self.logger.info("Worker daemon started")
|
|
243
|
+
self._worker_loop()
|
|
244
|
+
os._exit(0)
|
|
245
|
+
else:
|
|
246
|
+
# Parent process
|
|
247
|
+
self.logger.info(f"Started worker daemon with PID {pid}")
|
|
248
|
+
|
|
249
|
+
# Write PID file
|
|
250
|
+
if self.pid_file:
|
|
251
|
+
self.pid_file.write_text(str(pid))
|
|
252
|
+
|
|
253
|
+
# Remove lock file
|
|
254
|
+
if self.lock_file:
|
|
255
|
+
self.lock_file.unlink(missing_ok=True)
|
|
256
|
+
|
|
257
|
+
def _worker_loop(self) -> None:
|
|
258
|
+
"""Main worker loop."""
|
|
259
|
+
self.logger.info("Worker loop started")
|
|
260
|
+
|
|
261
|
+
while self.running and not self.stop_event.is_set():
|
|
262
|
+
try:
|
|
263
|
+
# Check for stuck jobs first
|
|
264
|
+
stuck_jobs = self._get_stuck_jobs()
|
|
265
|
+
if stuck_jobs:
|
|
266
|
+
self.logger.warning(f"Found {len(stuck_jobs)} stuck jobs")
|
|
267
|
+
self._handle_stuck_jobs(stuck_jobs)
|
|
268
|
+
|
|
269
|
+
# Check for pending jobs
|
|
270
|
+
jobs = self.job_queue.get_jobs_by_status(JobStatus.PENDING, limit=5)
|
|
271
|
+
|
|
272
|
+
# Also retry failed jobs that haven't exceeded max retries
|
|
273
|
+
failed_jobs = self._get_retryable_failed_jobs()
|
|
274
|
+
if failed_jobs:
|
|
275
|
+
self.logger.info(f"Found {len(failed_jobs)} retryable failed jobs")
|
|
276
|
+
jobs.extend(failed_jobs)
|
|
277
|
+
|
|
278
|
+
if jobs:
|
|
279
|
+
# Process jobs with throttling
|
|
280
|
+
for job in jobs:
|
|
281
|
+
if not self.running:
|
|
282
|
+
break
|
|
283
|
+
|
|
284
|
+
# Check throttling
|
|
285
|
+
if self._should_throttle():
|
|
286
|
+
self.logger.debug("Throttling job processing")
|
|
287
|
+
break
|
|
288
|
+
|
|
289
|
+
# Process job
|
|
290
|
+
self._process_job(job)
|
|
291
|
+
self.last_job_time = datetime.now()
|
|
292
|
+
else:
|
|
293
|
+
self.logger.debug("No jobs found to process")
|
|
294
|
+
|
|
295
|
+
# Sleep before next poll
|
|
296
|
+
self.stop_event.wait(self.poll_interval)
|
|
297
|
+
|
|
298
|
+
except Exception as e:
|
|
299
|
+
self.logger.error(f"Error in worker loop: {e}")
|
|
300
|
+
self.stop_event.wait(self.poll_interval)
|
|
301
|
+
|
|
302
|
+
self.logger.info("Worker loop ended")
|
|
303
|
+
|
|
304
|
+
def _should_throttle(self) -> bool:
|
|
305
|
+
"""Check if we should throttle based on timing."""
|
|
306
|
+
if not self.last_job_time:
|
|
307
|
+
return False
|
|
308
|
+
|
|
309
|
+
time_since_last = (datetime.now() - self.last_job_time).total_seconds()
|
|
310
|
+
return time_since_last < self.min_time_between_jobs
|
|
311
|
+
|
|
312
|
+
def _get_retryable_failed_jobs(self) -> list:
|
|
313
|
+
"""Get failed jobs that can be retried."""
|
|
314
|
+
try:
|
|
315
|
+
failed_jobs = self.job_queue.get_jobs_by_status(JobStatus.FAILED, limit=5)
|
|
316
|
+
|
|
317
|
+
# Filter for retryable jobs (retry_count < max_retries)
|
|
318
|
+
retryable_jobs = []
|
|
319
|
+
for job in failed_jobs:
|
|
320
|
+
if job.retry_count < self.max_retries:
|
|
321
|
+
retryable_jobs.append(job)
|
|
322
|
+
|
|
323
|
+
return retryable_jobs
|
|
324
|
+
|
|
325
|
+
except Exception as e:
|
|
326
|
+
self.logger.error(f"Failed to get retryable failed jobs: {e}")
|
|
327
|
+
return []
|
|
328
|
+
|
|
329
|
+
def _get_stuck_jobs(self) -> list:
|
|
330
|
+
"""Get jobs that are actually stuck (not just running long)."""
|
|
331
|
+
try:
|
|
332
|
+
running_jobs = self.job_queue.get_jobs_by_status(
|
|
333
|
+
JobStatus.RUNNING, limit=10
|
|
334
|
+
)
|
|
335
|
+
|
|
336
|
+
stuck_jobs = []
|
|
337
|
+
cutoff_time = datetime.now() - timedelta(seconds=self.stuck_job_timeout)
|
|
338
|
+
|
|
339
|
+
for job in running_jobs:
|
|
340
|
+
# Check if job has been running too long
|
|
341
|
+
job_start_time = job.started_at or job.created_at
|
|
342
|
+
if not job_start_time or job_start_time > cutoff_time:
|
|
343
|
+
continue # Job is not old enough to be considered stuck
|
|
344
|
+
|
|
345
|
+
# Check if we have PID tracking for this job
|
|
346
|
+
if job.id in self.active_jobs:
|
|
347
|
+
job_info = self.active_jobs[job.id]
|
|
348
|
+
pid = job_info.get("pid")
|
|
349
|
+
|
|
350
|
+
if pid and self._is_process_running(pid):
|
|
351
|
+
# Process is still running, check if it's making progress
|
|
352
|
+
if self._is_job_making_progress(job, job_info):
|
|
353
|
+
self.logger.debug(
|
|
354
|
+
f"Job {job.id} is still running and making progress"
|
|
355
|
+
)
|
|
356
|
+
continue
|
|
357
|
+
else:
|
|
358
|
+
self.logger.warning(
|
|
359
|
+
f"Job {job.id} process {pid} is running but not making progress"
|
|
360
|
+
)
|
|
361
|
+
stuck_jobs.append(job)
|
|
362
|
+
else:
|
|
363
|
+
# Process is not running - job failed silently
|
|
364
|
+
self.logger.warning(
|
|
365
|
+
f"Job {job.id} process {pid} is not running - failed silently"
|
|
366
|
+
)
|
|
367
|
+
stuck_jobs.append(job)
|
|
368
|
+
else:
|
|
369
|
+
# No PID tracking - job might be stuck without a process
|
|
370
|
+
self.logger.warning(
|
|
371
|
+
f"Job {job.id} has no PID tracking - might be stuck"
|
|
372
|
+
)
|
|
373
|
+
stuck_jobs.append(job)
|
|
374
|
+
|
|
375
|
+
return stuck_jobs
|
|
376
|
+
|
|
377
|
+
except Exception as e:
|
|
378
|
+
self.logger.error(f"Failed to get stuck jobs: {e}")
|
|
379
|
+
return []
|
|
380
|
+
|
|
381
|
+
def _is_process_running(self, pid: int) -> bool:
|
|
382
|
+
"""Check if a process with the given PID is still running."""
|
|
383
|
+
try:
|
|
384
|
+
os.kill(pid, 0) # Signal 0 does nothing, but checks if PID exists
|
|
385
|
+
return True
|
|
386
|
+
except (OSError, ProcessLookupError):
|
|
387
|
+
return False
|
|
388
|
+
|
|
389
|
+
def _is_job_making_progress(self, job: Job, job_info: Dict[str, Any]) -> bool:
|
|
390
|
+
"""Check if a job is making progress (files being created, etc.)."""
|
|
391
|
+
try:
|
|
392
|
+
# For download jobs, check if output files are being created/modified
|
|
393
|
+
if job.job_type.value in ["download_video", "download_playlist"]:
|
|
394
|
+
output_path = Path(job.job_path)
|
|
395
|
+
|
|
396
|
+
# Check if any files in the output directory have been modified recently
|
|
397
|
+
if output_path.exists():
|
|
398
|
+
recent_files = []
|
|
399
|
+
for file_path in output_path.rglob("*"):
|
|
400
|
+
if file_path.is_file():
|
|
401
|
+
# Check if file was modified in the last 5 minutes
|
|
402
|
+
if time.time() - file_path.stat().st_mtime < 300:
|
|
403
|
+
recent_files.append(file_path)
|
|
404
|
+
|
|
405
|
+
if recent_files:
|
|
406
|
+
self.logger.debug(
|
|
407
|
+
f"Job {job.id} has {len(recent_files)} recently modified files"
|
|
408
|
+
)
|
|
409
|
+
return True
|
|
410
|
+
|
|
411
|
+
# Check if temp files are being created
|
|
412
|
+
from core.config import get_default_data_dir
|
|
413
|
+
|
|
414
|
+
temp_path = get_default_data_dir() / "tmp" / "video" / str(job.id)
|
|
415
|
+
if temp_path.exists():
|
|
416
|
+
temp_files = list(temp_path.rglob("*"))
|
|
417
|
+
if temp_files:
|
|
418
|
+
# Check if any temp files were modified recently
|
|
419
|
+
recent_temp_files = [
|
|
420
|
+
f
|
|
421
|
+
for f in temp_files
|
|
422
|
+
if time.time() - f.stat().st_mtime < 300
|
|
423
|
+
]
|
|
424
|
+
if recent_temp_files:
|
|
425
|
+
self.logger.debug(
|
|
426
|
+
f"Job {job.id} has {len(recent_temp_files)} recently modified temp files"
|
|
427
|
+
)
|
|
428
|
+
return True
|
|
429
|
+
|
|
430
|
+
# For other job types, assume it's making progress if we can't determine otherwise
|
|
431
|
+
return True
|
|
432
|
+
|
|
433
|
+
except Exception as e:
|
|
434
|
+
self.logger.error(f"Failed to check job progress: {e}")
|
|
435
|
+
return False
|
|
436
|
+
|
|
437
|
+
def _handle_stuck_jobs(self, stuck_jobs: list) -> None:
|
|
438
|
+
"""Handle stuck jobs intelligently based on their actual status."""
|
|
439
|
+
try:
|
|
440
|
+
for job in stuck_jobs:
|
|
441
|
+
self.logger.warning(f"Analyzing stuck job {job.id}")
|
|
442
|
+
self.stats["jobs_stuck_detected"] += 1
|
|
443
|
+
|
|
444
|
+
# Check if we have PID tracking for this job
|
|
445
|
+
if job.id in self.active_jobs:
|
|
446
|
+
job_info = self.active_jobs[job.id]
|
|
447
|
+
pid = job_info.get("pid")
|
|
448
|
+
|
|
449
|
+
if pid and self._is_process_running(pid):
|
|
450
|
+
# Process is still running but not making progress
|
|
451
|
+
self.logger.warning(
|
|
452
|
+
f"Job {job.id} process {pid} is running but stuck"
|
|
453
|
+
)
|
|
454
|
+
|
|
455
|
+
# Check if we got any output files
|
|
456
|
+
if self._check_job_output_success(job):
|
|
457
|
+
self.logger.info(
|
|
458
|
+
f"Job {job.id} actually completed successfully, marking as completed"
|
|
459
|
+
)
|
|
460
|
+
self.job_queue.update_job_status(
|
|
461
|
+
job.id, JobStatus.COMPLETED
|
|
462
|
+
)
|
|
463
|
+
else:
|
|
464
|
+
self.logger.warning(
|
|
465
|
+
f"Job {job.id} is stuck with no output, resetting to pending"
|
|
466
|
+
)
|
|
467
|
+
self.job_queue.update_job_status(
|
|
468
|
+
job.id,
|
|
469
|
+
JobStatus.PENDING,
|
|
470
|
+
error_message=f"Job was stuck in running state for {self.stuck_job_timeout}s, reset to pending",
|
|
471
|
+
)
|
|
472
|
+
self.stats["jobs_stuck_reset"] += 1
|
|
473
|
+
else:
|
|
474
|
+
# Process is not running - job failed silently
|
|
475
|
+
self.logger.warning(
|
|
476
|
+
f"Job {job.id} process {pid} is not running - failed silently"
|
|
477
|
+
)
|
|
478
|
+
|
|
479
|
+
# Check if we got any output files despite the failure
|
|
480
|
+
if self._check_job_output_success(job):
|
|
481
|
+
self.logger.info(
|
|
482
|
+
f"Job {job.id} completed successfully despite process failure, marking as completed"
|
|
483
|
+
)
|
|
484
|
+
self.job_queue.update_job_status(
|
|
485
|
+
job.id, JobStatus.COMPLETED
|
|
486
|
+
)
|
|
487
|
+
else:
|
|
488
|
+
self.logger.warning(
|
|
489
|
+
f"Job {job.id} failed silently with no output, marking as failed"
|
|
490
|
+
)
|
|
491
|
+
self.job_queue.update_job_status(
|
|
492
|
+
job.id,
|
|
493
|
+
JobStatus.FAILED,
|
|
494
|
+
error_message=f"Job failed silently - process not running after {self.stuck_job_timeout}s",
|
|
495
|
+
)
|
|
496
|
+
else:
|
|
497
|
+
# No PID tracking - job might be stuck without a process
|
|
498
|
+
self.logger.warning(
|
|
499
|
+
f"Job {job.id} has no PID tracking - might be stuck"
|
|
500
|
+
)
|
|
501
|
+
|
|
502
|
+
# Check if we got any output files
|
|
503
|
+
if self._check_job_output_success(job):
|
|
504
|
+
self.logger.info(
|
|
505
|
+
f"Job {job.id} actually completed successfully, marking as completed"
|
|
506
|
+
)
|
|
507
|
+
self.job_queue.update_job_status(job.id, JobStatus.COMPLETED)
|
|
508
|
+
else:
|
|
509
|
+
self.logger.warning(
|
|
510
|
+
f"Job {job.id} is stuck with no output, resetting to pending"
|
|
511
|
+
)
|
|
512
|
+
self.job_queue.update_job_status(
|
|
513
|
+
job.id,
|
|
514
|
+
JobStatus.PENDING,
|
|
515
|
+
error_message=f"Job was stuck in running state for {self.stuck_job_timeout}s, reset to pending",
|
|
516
|
+
)
|
|
517
|
+
self.stats["jobs_stuck_reset"] += 1
|
|
518
|
+
|
|
519
|
+
except Exception as e:
|
|
520
|
+
self.logger.error(f"Failed to handle stuck jobs: {e}")
|
|
521
|
+
|
|
522
|
+
def _check_job_output_success(self, job: Job) -> bool:
|
|
523
|
+
"""Check if a job actually succeeded by looking for output files."""
|
|
524
|
+
try:
|
|
525
|
+
# For download jobs, check if output files exist
|
|
526
|
+
if job.job_type.value in ["download_video", "download_playlist"]:
|
|
527
|
+
output_path = Path(job.job_path)
|
|
528
|
+
|
|
529
|
+
if output_path.exists():
|
|
530
|
+
# Check if there are any video files in the output directory
|
|
531
|
+
video_files = (
|
|
532
|
+
list(output_path.rglob("*.mp4"))
|
|
533
|
+
+ list(output_path.rglob("*.mkv"))
|
|
534
|
+
+ list(output_path.rglob("*.avi"))
|
|
535
|
+
)
|
|
536
|
+
if video_files:
|
|
537
|
+
self.logger.info(
|
|
538
|
+
f"Job {job.id} has {len(video_files)} video files in output directory"
|
|
539
|
+
)
|
|
540
|
+
return True
|
|
541
|
+
|
|
542
|
+
# Check if there are any files at all
|
|
543
|
+
all_files = list(output_path.rglob("*"))
|
|
544
|
+
if all_files:
|
|
545
|
+
self.logger.info(
|
|
546
|
+
f"Job {job.id} has {len(all_files)} files in output directory"
|
|
547
|
+
)
|
|
548
|
+
return True
|
|
549
|
+
|
|
550
|
+
return False
|
|
551
|
+
|
|
552
|
+
except Exception as e:
|
|
553
|
+
self.logger.error(f"Failed to check job output success: {e}")
|
|
554
|
+
return False
|
|
555
|
+
|
|
556
|
+
def _process_job(self, job: Job) -> None:
|
|
557
|
+
"""Process a single job."""
|
|
558
|
+
self.logger.info(f"Processing job {job.id}: {job.job_type.value}")
|
|
559
|
+
|
|
560
|
+
# Update job status to running
|
|
561
|
+
self.job_queue.update_job_status(job.id, JobStatus.RUNNING)
|
|
562
|
+
|
|
563
|
+
# Track this job's PID
|
|
564
|
+
self.active_jobs[job.id] = {
|
|
565
|
+
"pid": os.getpid(), # Current process PID
|
|
566
|
+
"started_at": datetime.now(),
|
|
567
|
+
"job_type": job.job_type.value,
|
|
568
|
+
}
|
|
569
|
+
|
|
570
|
+
try:
|
|
571
|
+
# If this is a retry, increment retry count
|
|
572
|
+
if job.status == JobStatus.FAILED:
|
|
573
|
+
job.retry_count += 1
|
|
574
|
+
self.stats["jobs_retried"] += 1
|
|
575
|
+
|
|
576
|
+
# Get processor for job type
|
|
577
|
+
processor = self.job_processors.get(job.job_type)
|
|
578
|
+
if not processor:
|
|
579
|
+
raise ValueError(
|
|
580
|
+
f"No processor registered for job type: {job.job_type.value}"
|
|
581
|
+
)
|
|
582
|
+
|
|
583
|
+
# Process job
|
|
584
|
+
success = processor(job)
|
|
585
|
+
|
|
586
|
+
if success:
|
|
587
|
+
self.job_queue.update_job_status(job.id, JobStatus.COMPLETED)
|
|
588
|
+
self.stats["jobs_processed"] += 1
|
|
589
|
+
self.logger.info(f"Job {job.id} completed successfully")
|
|
590
|
+
else:
|
|
591
|
+
self.job_queue.update_job_status(
|
|
592
|
+
job.id, JobStatus.FAILED, "Processor returned False"
|
|
593
|
+
)
|
|
594
|
+
self.stats["jobs_failed"] += 1
|
|
595
|
+
self.logger.error(f"Job {job.id} failed: Processor returned False")
|
|
596
|
+
|
|
597
|
+
except Exception as e:
|
|
598
|
+
error_msg = str(e)
|
|
599
|
+
self.job_queue.update_job_status(job.id, JobStatus.FAILED, error_msg)
|
|
600
|
+
self.stats["jobs_failed"] += 1
|
|
601
|
+
self.logger.error(f"Job {job.id} failed: {error_msg}")
|
|
602
|
+
|
|
603
|
+
finally:
|
|
604
|
+
# Clean up PID tracking
|
|
605
|
+
if job.id in self.active_jobs:
|
|
606
|
+
del self.active_jobs[job.id]
|
|
607
|
+
|
|
608
|
+
def get_stats(self) -> Dict[str, Any]:
|
|
609
|
+
"""Get worker statistics."""
|
|
610
|
+
queue_status = self.job_queue.get_queue_status()
|
|
611
|
+
|
|
612
|
+
return {
|
|
613
|
+
"worker_running": self.running,
|
|
614
|
+
"mode": self.mode.value,
|
|
615
|
+
"throttling": {
|
|
616
|
+
"min_time_between_jobs": self.min_time_between_jobs,
|
|
617
|
+
"additional_sleep_time": self.additional_sleep_time,
|
|
618
|
+
"last_job_time": self.last_job_time.isoformat()
|
|
619
|
+
if self.last_job_time
|
|
620
|
+
else None,
|
|
621
|
+
},
|
|
622
|
+
"queue_status": queue_status,
|
|
623
|
+
"worker_stats": self.stats,
|
|
624
|
+
"registered_processors": [jt.value for jt in self.job_processors.keys()],
|
|
625
|
+
"active_jobs": len(self.active_jobs),
|
|
626
|
+
"stuck_jobs_detected": self.stats["jobs_stuck_detected"],
|
|
627
|
+
"stuck_jobs_reset": self.stats["jobs_stuck_reset"],
|
|
628
|
+
}
|
|
629
|
+
|
|
630
|
+
def is_running(self) -> bool:
|
|
631
|
+
"""Check if worker is running (for daemon mode)."""
|
|
632
|
+
if self.mode != WorkerMode.DAEMON:
|
|
633
|
+
return self.running
|
|
634
|
+
|
|
635
|
+
if not self.pid_file or not self.pid_file.exists():
|
|
636
|
+
return False
|
|
637
|
+
|
|
638
|
+
try:
|
|
639
|
+
import psutil
|
|
640
|
+
|
|
641
|
+
pid = int(self.pid_file.read_text().strip())
|
|
642
|
+
|
|
643
|
+
if not psutil.pid_exists(pid):
|
|
644
|
+
self._cleanup_daemon_files()
|
|
645
|
+
return False
|
|
646
|
+
|
|
647
|
+
# Check if it's actually our worker process
|
|
648
|
+
try:
|
|
649
|
+
process = psutil.Process(pid)
|
|
650
|
+
if "python" in process.name().lower():
|
|
651
|
+
return True
|
|
652
|
+
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
|
653
|
+
pass
|
|
654
|
+
|
|
655
|
+
return False
|
|
656
|
+
|
|
657
|
+
except (ValueError, FileNotFoundError, ImportError):
|
|
658
|
+
return False
|
|
659
|
+
|
|
660
|
+
def stop_daemon(self) -> bool:
|
|
661
|
+
"""Stop daemon worker (for daemon mode)."""
|
|
662
|
+
if self.mode != WorkerMode.DAEMON:
|
|
663
|
+
return False
|
|
664
|
+
|
|
665
|
+
if not self.is_running():
|
|
666
|
+
return True
|
|
667
|
+
|
|
668
|
+
try:
|
|
669
|
+
import psutil
|
|
670
|
+
|
|
671
|
+
pid = int(self.pid_file.read_text().strip())
|
|
672
|
+
|
|
673
|
+
# Send SIGTERM
|
|
674
|
+
os.kill(pid, signal.SIGTERM)
|
|
675
|
+
|
|
676
|
+
# Wait for process to stop
|
|
677
|
+
for _ in range(10):
|
|
678
|
+
if not psutil.pid_exists(pid):
|
|
679
|
+
break
|
|
680
|
+
time.sleep(1)
|
|
681
|
+
|
|
682
|
+
# Force kill if still running
|
|
683
|
+
if psutil.pid_exists(pid):
|
|
684
|
+
self.logger.warning(f"Force killing worker PID {pid}")
|
|
685
|
+
os.kill(pid, signal.SIGKILL)
|
|
686
|
+
|
|
687
|
+
self._cleanup_daemon_files()
|
|
688
|
+
return True
|
|
689
|
+
|
|
690
|
+
except Exception as e:
|
|
691
|
+
self.logger.error(f"Failed to stop daemon: {e}")
|
|
692
|
+
return False
|
|
693
|
+
|
|
694
|
+
def _signal_handler(self, signum, frame):
|
|
695
|
+
"""Handle shutdown signals."""
|
|
696
|
+
self.logger.info(f"Received signal {signum}, shutting down...")
|
|
697
|
+
self.running = False
|
|
698
|
+
|
|
699
|
+
def _cleanup_daemon_files(self):
|
|
700
|
+
"""Clean up daemon files."""
|
|
701
|
+
if self.pid_file and self.pid_file.exists():
|
|
702
|
+
self.pid_file.unlink(missing_ok=True)
|
|
703
|
+
if self.lock_file and self.lock_file.exists():
|
|
704
|
+
self.lock_file.unlink(missing_ok=True)
|
|
705
|
+
|
|
706
|
+
|
|
707
|
+
# Helper functions for creating job processors
|
|
708
|
+
def create_download_processor(services) -> Callable[[Job], bool]:
|
|
709
|
+
"""Create a download job processor."""
|
|
710
|
+
|
|
711
|
+
def process_download_job(job: Job) -> bool:
|
|
712
|
+
"""Process a download job."""
|
|
713
|
+
try:
|
|
714
|
+
# Extract job data
|
|
715
|
+
job_data = job.job_data
|
|
716
|
+
video_url = job_data.get("url")
|
|
717
|
+
output_path = Path(job.job_path)
|
|
718
|
+
|
|
719
|
+
if not video_url:
|
|
720
|
+
raise ValueError("No URL in job data")
|
|
721
|
+
|
|
722
|
+
# Create output directory
|
|
723
|
+
output_path.mkdir(parents=True, exist_ok=True)
|
|
724
|
+
|
|
725
|
+
# Download video using existing service
|
|
726
|
+
result = services.video_download.download_video(
|
|
727
|
+
video_url, output_path, quality=job_data.get("quality", "1080p")
|
|
728
|
+
)
|
|
729
|
+
|
|
730
|
+
return result.is_successful()
|
|
731
|
+
|
|
732
|
+
except Exception as e:
|
|
733
|
+
raise Exception(f"Download failed: {e}")
|
|
734
|
+
|
|
735
|
+
return process_download_job
|
|
736
|
+
|
|
737
|
+
|
|
738
|
+
def create_playlist_processor(services) -> Callable[[Job], bool]:
|
|
739
|
+
"""Create a playlist download processor."""
|
|
740
|
+
|
|
741
|
+
def process_playlist_job(job: Job) -> bool:
|
|
742
|
+
"""Process a playlist download job."""
|
|
743
|
+
try:
|
|
744
|
+
# Extract job data
|
|
745
|
+
job_data = job.job_data
|
|
746
|
+
playlist_url = job_data.get("url")
|
|
747
|
+
output_path = Path(job.job_path)
|
|
748
|
+
|
|
749
|
+
if not playlist_url:
|
|
750
|
+
raise ValueError("No URL in job data")
|
|
751
|
+
|
|
752
|
+
# Create output directory
|
|
753
|
+
output_path.mkdir(parents=True, exist_ok=True)
|
|
754
|
+
|
|
755
|
+
# Download playlist using existing service
|
|
756
|
+
result = services.playlist.download_playlist(
|
|
757
|
+
playlist_url, output_path, quality=job_data.get("quality", "1080p")
|
|
758
|
+
)
|
|
759
|
+
|
|
760
|
+
return result.is_successful()
|
|
761
|
+
|
|
762
|
+
except Exception as e:
|
|
763
|
+
raise Exception(f"Playlist download failed: {e}")
|
|
764
|
+
|
|
765
|
+
return process_playlist_job
|