spatelier 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. analytics/__init__.py +1 -0
  2. analytics/reporter.py +497 -0
  3. cli/__init__.py +1 -0
  4. cli/app.py +147 -0
  5. cli/audio.py +129 -0
  6. cli/cli_analytics.py +320 -0
  7. cli/cli_utils.py +282 -0
  8. cli/error_handlers.py +122 -0
  9. cli/files.py +299 -0
  10. cli/update.py +325 -0
  11. cli/video.py +823 -0
  12. cli/worker.py +615 -0
  13. core/__init__.py +1 -0
  14. core/analytics_dashboard.py +368 -0
  15. core/base.py +303 -0
  16. core/base_service.py +69 -0
  17. core/config.py +345 -0
  18. core/database_service.py +116 -0
  19. core/decorators.py +263 -0
  20. core/error_handler.py +210 -0
  21. core/file_tracker.py +254 -0
  22. core/interactive_cli.py +366 -0
  23. core/interfaces.py +166 -0
  24. core/job_queue.py +437 -0
  25. core/logger.py +79 -0
  26. core/package_updater.py +469 -0
  27. core/progress.py +228 -0
  28. core/service_factory.py +295 -0
  29. core/streaming.py +299 -0
  30. core/worker.py +765 -0
  31. database/__init__.py +1 -0
  32. database/connection.py +265 -0
  33. database/metadata.py +516 -0
  34. database/models.py +288 -0
  35. database/repository.py +592 -0
  36. database/transcription_storage.py +219 -0
  37. modules/__init__.py +1 -0
  38. modules/audio/__init__.py +5 -0
  39. modules/audio/converter.py +197 -0
  40. modules/video/__init__.py +16 -0
  41. modules/video/converter.py +191 -0
  42. modules/video/fallback_extractor.py +334 -0
  43. modules/video/services/__init__.py +18 -0
  44. modules/video/services/audio_extraction_service.py +274 -0
  45. modules/video/services/download_service.py +852 -0
  46. modules/video/services/metadata_service.py +190 -0
  47. modules/video/services/playlist_service.py +445 -0
  48. modules/video/services/transcription_service.py +491 -0
  49. modules/video/transcription_service.py +385 -0
  50. modules/video/youtube_api.py +397 -0
  51. spatelier/__init__.py +33 -0
  52. spatelier-0.3.0.dist-info/METADATA +260 -0
  53. spatelier-0.3.0.dist-info/RECORD +59 -0
  54. spatelier-0.3.0.dist-info/WHEEL +5 -0
  55. spatelier-0.3.0.dist-info/entry_points.txt +2 -0
  56. spatelier-0.3.0.dist-info/licenses/LICENSE +21 -0
  57. spatelier-0.3.0.dist-info/top_level.txt +7 -0
  58. utils/__init__.py +1 -0
  59. utils/helpers.py +250 -0
core/worker.py ADDED
@@ -0,0 +1,765 @@
1
+ """
2
+ Unified worker for background job processing.
3
+
4
+ This module provides a single, configurable worker that consolidates all worker
5
+ functionality: throttling, stuck job detection, PID tracking, retry logic, and statistics.
6
+ """
7
+
8
+ import os
9
+ import signal
10
+ import threading
11
+ import time
12
+ from datetime import datetime, timedelta
13
+ from enum import Enum
14
+ from pathlib import Path
15
+ from typing import Any, Callable, Dict, Optional
16
+
17
+ from core.config import Config
18
+ from core.job_queue import Job, JobQueue, JobStatus, JobType
19
+ from core.logger import get_logger
20
+
21
+
22
+ class WorkerMode(str, Enum):
23
+ """Worker execution mode."""
24
+
25
+ THREAD = "thread" # In-process thread worker
26
+ DAEMON = "daemon" # System daemon worker
27
+ AUTO = "auto" # Auto-start/stop worker
28
+
29
+
30
+ class Worker:
31
+ """
32
+ Unified worker for background job processing.
33
+
34
+ Consolidates functionality from JobWorker, AutoWorker, DaemonWorker, and WorkerManager.
35
+ Supports multiple execution modes and includes throttling, stuck job detection,
36
+ PID tracking, retry logic, and comprehensive statistics.
37
+ """
38
+
39
+ def __init__(
40
+ self,
41
+ config: Config,
42
+ mode: WorkerMode = WorkerMode.THREAD,
43
+ verbose: bool = False,
44
+ max_retries: int = 10,
45
+ min_time_between_jobs: int = 60,
46
+ additional_sleep_time: int = 0,
47
+ poll_interval: int = 30,
48
+ stuck_job_timeout: int = 1800, # 30 minutes
49
+ services: Optional[Any] = None,
50
+ ):
51
+ """
52
+ Initialize unified worker.
53
+
54
+ Args:
55
+ config: Configuration instance
56
+ mode: Worker execution mode (thread, daemon, or auto)
57
+ verbose: Enable verbose logging
58
+ max_retries: Maximum retries for failed jobs
59
+ min_time_between_jobs: Minimum seconds between jobs (throttling)
60
+ additional_sleep_time: Additional sleep time after throttling
61
+ poll_interval: Seconds between queue polls
62
+ stuck_job_timeout: Seconds before a job is considered stuck
63
+ services: Optional service container for job processors
64
+ """
65
+ self.config = config
66
+ self.mode = mode
67
+ self.verbose = verbose
68
+ self.max_retries = max_retries
69
+ self.min_time_between_jobs = min_time_between_jobs
70
+ self.additional_sleep_time = additional_sleep_time
71
+ self.poll_interval = poll_interval
72
+ self.stuck_job_timeout = stuck_job_timeout
73
+ self.services = services
74
+
75
+ self.logger = get_logger("Worker", verbose=verbose)
76
+
77
+ # Job queue
78
+ self.job_queue = JobQueue(config, verbose=verbose)
79
+
80
+ # Worker state
81
+ self.running = False
82
+ self.worker_thread: Optional[threading.Thread] = None
83
+ self.stop_event = threading.Event()
84
+ self.last_job_time: Optional[datetime] = None
85
+
86
+ # Job processors
87
+ self.job_processors: Dict[JobType, Callable[[Job], bool]] = {}
88
+
89
+ # PID tracking for active jobs
90
+ self.active_jobs: Dict[
91
+ int, Dict[str, Any]
92
+ ] = {} # job_id -> {"pid": int, "started_at": datetime, "job_type": str}
93
+
94
+ # Statistics
95
+ self.stats = {
96
+ "jobs_processed": 0,
97
+ "jobs_failed": 0,
98
+ "jobs_retried": 0,
99
+ "jobs_stuck_detected": 0,
100
+ "jobs_stuck_reset": 0,
101
+ "total_runtime": 0,
102
+ "start_time": None,
103
+ }
104
+
105
+ # Daemon management (for daemon mode)
106
+ self.pid_file: Optional[Path] = None
107
+ self.lock_file: Optional[Path] = None
108
+
109
+ # Setup signal handlers for daemon mode
110
+ if mode == WorkerMode.DAEMON:
111
+ signal.signal(signal.SIGTERM, self._signal_handler)
112
+ signal.signal(signal.SIGINT, self._signal_handler)
113
+ from core.config import get_default_data_dir
114
+
115
+ data_dir = get_default_data_dir()
116
+ self.pid_file = data_dir / "worker.pid"
117
+ self.lock_file = data_dir / "worker.lock"
118
+
119
+ def set_throttling(
120
+ self, min_time_seconds: int, additional_sleep_seconds: int = 0
121
+ ) -> None:
122
+ """Set throttling configuration."""
123
+ self.min_time_between_jobs = min_time_seconds
124
+ self.additional_sleep_time = additional_sleep_seconds
125
+ self.logger.info(
126
+ f"Throttling set: min {min_time_seconds}s, additional {additional_sleep_seconds}s"
127
+ )
128
+
129
+ def register_processor(
130
+ self, job_type: JobType, processor: Callable[[Job], bool]
131
+ ) -> None:
132
+ """Register a job processor for a specific job type."""
133
+ self.job_processors[job_type] = processor
134
+ self.logger.info(f"Registered processor for {job_type.value}")
135
+
136
+ def start(self) -> None:
137
+ """Start the worker."""
138
+ if self.running:
139
+ self.logger.warning("Worker is already running")
140
+ return
141
+
142
+ if self.mode == WorkerMode.DAEMON:
143
+ self._start_daemon()
144
+ elif self.mode == WorkerMode.AUTO:
145
+ self._start_auto()
146
+ else: # THREAD mode
147
+ self._start_thread()
148
+
149
+ def stop(self) -> None:
150
+ """Stop the worker."""
151
+ if not self.running:
152
+ self.logger.warning("Worker is not running")
153
+ return
154
+
155
+ self.running = False
156
+ self.stop_event.set()
157
+
158
+ if self.worker_thread:
159
+ self.worker_thread.join(timeout=10)
160
+
161
+ if self.stats["start_time"]:
162
+ self.stats["total_runtime"] = (
163
+ datetime.now() - self.stats["start_time"]
164
+ ).total_seconds()
165
+
166
+ # Clean up PID tracking
167
+ self.active_jobs.clear()
168
+
169
+ # Clean up daemon files
170
+ if self.mode == WorkerMode.DAEMON:
171
+ self._cleanup_daemon_files()
172
+
173
+ self.logger.info("Worker stopped")
174
+
175
+ def _start_thread(self) -> None:
176
+ """Start worker in thread mode."""
177
+ self.running = True
178
+ self.stop_event.clear()
179
+ self.stats["start_time"] = datetime.now()
180
+
181
+ self.worker_thread = threading.Thread(target=self._worker_loop, daemon=True)
182
+ self.worker_thread.start()
183
+
184
+ self.logger.info("Worker started in thread mode")
185
+
186
+ def _start_auto(self) -> None:
187
+ """Start worker in auto mode (thread with auto-management)."""
188
+ self._start_thread()
189
+ self.logger.info("Worker started in auto mode")
190
+
191
+ def _start_daemon(self) -> None:
192
+ """Start worker in daemon mode (system daemon)."""
193
+ try:
194
+ import psutil
195
+ except ImportError:
196
+ self.logger.error("psutil not available, cannot start daemon mode")
197
+ raise RuntimeError("psutil required for daemon mode")
198
+
199
+ # Check if already running
200
+ if self.pid_file and self.pid_file.exists():
201
+ try:
202
+ pid = int(self.pid_file.read_text().strip())
203
+ if psutil.pid_exists(pid):
204
+ self.logger.warning("Worker daemon is already running")
205
+ return
206
+ except (ValueError, FileNotFoundError):
207
+ pass
208
+
209
+ # Create lock file
210
+ if self.lock_file:
211
+ if self.lock_file.exists():
212
+ self.logger.warning("Lock file exists, worker may be starting")
213
+ return
214
+ self.lock_file.touch()
215
+
216
+ # Fork to background
217
+ pid = os.fork()
218
+
219
+ if pid == 0:
220
+ # Child process - start worker daemon
221
+ os.setsid() # Create new session
222
+
223
+ # Redirect stdio
224
+ devnull = os.open(os.devnull, os.O_RDWR)
225
+ os.dup2(devnull, 0) # stdin
226
+ os.dup2(devnull, 1) # stdout
227
+ os.dup2(devnull, 2) # stderr
228
+
229
+ # Start worker loop
230
+ self.running = True
231
+ self.stop_event.clear()
232
+ self.stats["start_time"] = datetime.now()
233
+
234
+ # Write PID file
235
+ if self.pid_file:
236
+ self.pid_file.write_text(str(os.getpid()))
237
+
238
+ # Remove lock file
239
+ if self.lock_file:
240
+ self.lock_file.unlink(missing_ok=True)
241
+
242
+ self.logger.info("Worker daemon started")
243
+ self._worker_loop()
244
+ os._exit(0)
245
+ else:
246
+ # Parent process
247
+ self.logger.info(f"Started worker daemon with PID {pid}")
248
+
249
+ # Write PID file
250
+ if self.pid_file:
251
+ self.pid_file.write_text(str(pid))
252
+
253
+ # Remove lock file
254
+ if self.lock_file:
255
+ self.lock_file.unlink(missing_ok=True)
256
+
257
+ def _worker_loop(self) -> None:
258
+ """Main worker loop."""
259
+ self.logger.info("Worker loop started")
260
+
261
+ while self.running and not self.stop_event.is_set():
262
+ try:
263
+ # Check for stuck jobs first
264
+ stuck_jobs = self._get_stuck_jobs()
265
+ if stuck_jobs:
266
+ self.logger.warning(f"Found {len(stuck_jobs)} stuck jobs")
267
+ self._handle_stuck_jobs(stuck_jobs)
268
+
269
+ # Check for pending jobs
270
+ jobs = self.job_queue.get_jobs_by_status(JobStatus.PENDING, limit=5)
271
+
272
+ # Also retry failed jobs that haven't exceeded max retries
273
+ failed_jobs = self._get_retryable_failed_jobs()
274
+ if failed_jobs:
275
+ self.logger.info(f"Found {len(failed_jobs)} retryable failed jobs")
276
+ jobs.extend(failed_jobs)
277
+
278
+ if jobs:
279
+ # Process jobs with throttling
280
+ for job in jobs:
281
+ if not self.running:
282
+ break
283
+
284
+ # Check throttling
285
+ if self._should_throttle():
286
+ self.logger.debug("Throttling job processing")
287
+ break
288
+
289
+ # Process job
290
+ self._process_job(job)
291
+ self.last_job_time = datetime.now()
292
+ else:
293
+ self.logger.debug("No jobs found to process")
294
+
295
+ # Sleep before next poll
296
+ self.stop_event.wait(self.poll_interval)
297
+
298
+ except Exception as e:
299
+ self.logger.error(f"Error in worker loop: {e}")
300
+ self.stop_event.wait(self.poll_interval)
301
+
302
+ self.logger.info("Worker loop ended")
303
+
304
+ def _should_throttle(self) -> bool:
305
+ """Check if we should throttle based on timing."""
306
+ if not self.last_job_time:
307
+ return False
308
+
309
+ time_since_last = (datetime.now() - self.last_job_time).total_seconds()
310
+ return time_since_last < self.min_time_between_jobs
311
+
312
+ def _get_retryable_failed_jobs(self) -> list:
313
+ """Get failed jobs that can be retried."""
314
+ try:
315
+ failed_jobs = self.job_queue.get_jobs_by_status(JobStatus.FAILED, limit=5)
316
+
317
+ # Filter for retryable jobs (retry_count < max_retries)
318
+ retryable_jobs = []
319
+ for job in failed_jobs:
320
+ if job.retry_count < self.max_retries:
321
+ retryable_jobs.append(job)
322
+
323
+ return retryable_jobs
324
+
325
+ except Exception as e:
326
+ self.logger.error(f"Failed to get retryable failed jobs: {e}")
327
+ return []
328
+
329
+ def _get_stuck_jobs(self) -> list:
330
+ """Get jobs that are actually stuck (not just running long)."""
331
+ try:
332
+ running_jobs = self.job_queue.get_jobs_by_status(
333
+ JobStatus.RUNNING, limit=10
334
+ )
335
+
336
+ stuck_jobs = []
337
+ cutoff_time = datetime.now() - timedelta(seconds=self.stuck_job_timeout)
338
+
339
+ for job in running_jobs:
340
+ # Check if job has been running too long
341
+ job_start_time = job.started_at or job.created_at
342
+ if not job_start_time or job_start_time > cutoff_time:
343
+ continue # Job is not old enough to be considered stuck
344
+
345
+ # Check if we have PID tracking for this job
346
+ if job.id in self.active_jobs:
347
+ job_info = self.active_jobs[job.id]
348
+ pid = job_info.get("pid")
349
+
350
+ if pid and self._is_process_running(pid):
351
+ # Process is still running, check if it's making progress
352
+ if self._is_job_making_progress(job, job_info):
353
+ self.logger.debug(
354
+ f"Job {job.id} is still running and making progress"
355
+ )
356
+ continue
357
+ else:
358
+ self.logger.warning(
359
+ f"Job {job.id} process {pid} is running but not making progress"
360
+ )
361
+ stuck_jobs.append(job)
362
+ else:
363
+ # Process is not running - job failed silently
364
+ self.logger.warning(
365
+ f"Job {job.id} process {pid} is not running - failed silently"
366
+ )
367
+ stuck_jobs.append(job)
368
+ else:
369
+ # No PID tracking - job might be stuck without a process
370
+ self.logger.warning(
371
+ f"Job {job.id} has no PID tracking - might be stuck"
372
+ )
373
+ stuck_jobs.append(job)
374
+
375
+ return stuck_jobs
376
+
377
+ except Exception as e:
378
+ self.logger.error(f"Failed to get stuck jobs: {e}")
379
+ return []
380
+
381
+ def _is_process_running(self, pid: int) -> bool:
382
+ """Check if a process with the given PID is still running."""
383
+ try:
384
+ os.kill(pid, 0) # Signal 0 does nothing, but checks if PID exists
385
+ return True
386
+ except (OSError, ProcessLookupError):
387
+ return False
388
+
389
+ def _is_job_making_progress(self, job: Job, job_info: Dict[str, Any]) -> bool:
390
+ """Check if a job is making progress (files being created, etc.)."""
391
+ try:
392
+ # For download jobs, check if output files are being created/modified
393
+ if job.job_type.value in ["download_video", "download_playlist"]:
394
+ output_path = Path(job.job_path)
395
+
396
+ # Check if any files in the output directory have been modified recently
397
+ if output_path.exists():
398
+ recent_files = []
399
+ for file_path in output_path.rglob("*"):
400
+ if file_path.is_file():
401
+ # Check if file was modified in the last 5 minutes
402
+ if time.time() - file_path.stat().st_mtime < 300:
403
+ recent_files.append(file_path)
404
+
405
+ if recent_files:
406
+ self.logger.debug(
407
+ f"Job {job.id} has {len(recent_files)} recently modified files"
408
+ )
409
+ return True
410
+
411
+ # Check if temp files are being created
412
+ from core.config import get_default_data_dir
413
+
414
+ temp_path = get_default_data_dir() / "tmp" / "video" / str(job.id)
415
+ if temp_path.exists():
416
+ temp_files = list(temp_path.rglob("*"))
417
+ if temp_files:
418
+ # Check if any temp files were modified recently
419
+ recent_temp_files = [
420
+ f
421
+ for f in temp_files
422
+ if time.time() - f.stat().st_mtime < 300
423
+ ]
424
+ if recent_temp_files:
425
+ self.logger.debug(
426
+ f"Job {job.id} has {len(recent_temp_files)} recently modified temp files"
427
+ )
428
+ return True
429
+
430
+ # For other job types, assume it's making progress if we can't determine otherwise
431
+ return True
432
+
433
+ except Exception as e:
434
+ self.logger.error(f"Failed to check job progress: {e}")
435
+ return False
436
+
437
+ def _handle_stuck_jobs(self, stuck_jobs: list) -> None:
438
+ """Handle stuck jobs intelligently based on their actual status."""
439
+ try:
440
+ for job in stuck_jobs:
441
+ self.logger.warning(f"Analyzing stuck job {job.id}")
442
+ self.stats["jobs_stuck_detected"] += 1
443
+
444
+ # Check if we have PID tracking for this job
445
+ if job.id in self.active_jobs:
446
+ job_info = self.active_jobs[job.id]
447
+ pid = job_info.get("pid")
448
+
449
+ if pid and self._is_process_running(pid):
450
+ # Process is still running but not making progress
451
+ self.logger.warning(
452
+ f"Job {job.id} process {pid} is running but stuck"
453
+ )
454
+
455
+ # Check if we got any output files
456
+ if self._check_job_output_success(job):
457
+ self.logger.info(
458
+ f"Job {job.id} actually completed successfully, marking as completed"
459
+ )
460
+ self.job_queue.update_job_status(
461
+ job.id, JobStatus.COMPLETED
462
+ )
463
+ else:
464
+ self.logger.warning(
465
+ f"Job {job.id} is stuck with no output, resetting to pending"
466
+ )
467
+ self.job_queue.update_job_status(
468
+ job.id,
469
+ JobStatus.PENDING,
470
+ error_message=f"Job was stuck in running state for {self.stuck_job_timeout}s, reset to pending",
471
+ )
472
+ self.stats["jobs_stuck_reset"] += 1
473
+ else:
474
+ # Process is not running - job failed silently
475
+ self.logger.warning(
476
+ f"Job {job.id} process {pid} is not running - failed silently"
477
+ )
478
+
479
+ # Check if we got any output files despite the failure
480
+ if self._check_job_output_success(job):
481
+ self.logger.info(
482
+ f"Job {job.id} completed successfully despite process failure, marking as completed"
483
+ )
484
+ self.job_queue.update_job_status(
485
+ job.id, JobStatus.COMPLETED
486
+ )
487
+ else:
488
+ self.logger.warning(
489
+ f"Job {job.id} failed silently with no output, marking as failed"
490
+ )
491
+ self.job_queue.update_job_status(
492
+ job.id,
493
+ JobStatus.FAILED,
494
+ error_message=f"Job failed silently - process not running after {self.stuck_job_timeout}s",
495
+ )
496
+ else:
497
+ # No PID tracking - job might be stuck without a process
498
+ self.logger.warning(
499
+ f"Job {job.id} has no PID tracking - might be stuck"
500
+ )
501
+
502
+ # Check if we got any output files
503
+ if self._check_job_output_success(job):
504
+ self.logger.info(
505
+ f"Job {job.id} actually completed successfully, marking as completed"
506
+ )
507
+ self.job_queue.update_job_status(job.id, JobStatus.COMPLETED)
508
+ else:
509
+ self.logger.warning(
510
+ f"Job {job.id} is stuck with no output, resetting to pending"
511
+ )
512
+ self.job_queue.update_job_status(
513
+ job.id,
514
+ JobStatus.PENDING,
515
+ error_message=f"Job was stuck in running state for {self.stuck_job_timeout}s, reset to pending",
516
+ )
517
+ self.stats["jobs_stuck_reset"] += 1
518
+
519
+ except Exception as e:
520
+ self.logger.error(f"Failed to handle stuck jobs: {e}")
521
+
522
+ def _check_job_output_success(self, job: Job) -> bool:
523
+ """Check if a job actually succeeded by looking for output files."""
524
+ try:
525
+ # For download jobs, check if output files exist
526
+ if job.job_type.value in ["download_video", "download_playlist"]:
527
+ output_path = Path(job.job_path)
528
+
529
+ if output_path.exists():
530
+ # Check if there are any video files in the output directory
531
+ video_files = (
532
+ list(output_path.rglob("*.mp4"))
533
+ + list(output_path.rglob("*.mkv"))
534
+ + list(output_path.rglob("*.avi"))
535
+ )
536
+ if video_files:
537
+ self.logger.info(
538
+ f"Job {job.id} has {len(video_files)} video files in output directory"
539
+ )
540
+ return True
541
+
542
+ # Check if there are any files at all
543
+ all_files = list(output_path.rglob("*"))
544
+ if all_files:
545
+ self.logger.info(
546
+ f"Job {job.id} has {len(all_files)} files in output directory"
547
+ )
548
+ return True
549
+
550
+ return False
551
+
552
+ except Exception as e:
553
+ self.logger.error(f"Failed to check job output success: {e}")
554
+ return False
555
+
556
+ def _process_job(self, job: Job) -> None:
557
+ """Process a single job."""
558
+ self.logger.info(f"Processing job {job.id}: {job.job_type.value}")
559
+
560
+ # Update job status to running
561
+ self.job_queue.update_job_status(job.id, JobStatus.RUNNING)
562
+
563
+ # Track this job's PID
564
+ self.active_jobs[job.id] = {
565
+ "pid": os.getpid(), # Current process PID
566
+ "started_at": datetime.now(),
567
+ "job_type": job.job_type.value,
568
+ }
569
+
570
+ try:
571
+ # If this is a retry, increment retry count
572
+ if job.status == JobStatus.FAILED:
573
+ job.retry_count += 1
574
+ self.stats["jobs_retried"] += 1
575
+
576
+ # Get processor for job type
577
+ processor = self.job_processors.get(job.job_type)
578
+ if not processor:
579
+ raise ValueError(
580
+ f"No processor registered for job type: {job.job_type.value}"
581
+ )
582
+
583
+ # Process job
584
+ success = processor(job)
585
+
586
+ if success:
587
+ self.job_queue.update_job_status(job.id, JobStatus.COMPLETED)
588
+ self.stats["jobs_processed"] += 1
589
+ self.logger.info(f"Job {job.id} completed successfully")
590
+ else:
591
+ self.job_queue.update_job_status(
592
+ job.id, JobStatus.FAILED, "Processor returned False"
593
+ )
594
+ self.stats["jobs_failed"] += 1
595
+ self.logger.error(f"Job {job.id} failed: Processor returned False")
596
+
597
+ except Exception as e:
598
+ error_msg = str(e)
599
+ self.job_queue.update_job_status(job.id, JobStatus.FAILED, error_msg)
600
+ self.stats["jobs_failed"] += 1
601
+ self.logger.error(f"Job {job.id} failed: {error_msg}")
602
+
603
+ finally:
604
+ # Clean up PID tracking
605
+ if job.id in self.active_jobs:
606
+ del self.active_jobs[job.id]
607
+
608
+ def get_stats(self) -> Dict[str, Any]:
609
+ """Get worker statistics."""
610
+ queue_status = self.job_queue.get_queue_status()
611
+
612
+ return {
613
+ "worker_running": self.running,
614
+ "mode": self.mode.value,
615
+ "throttling": {
616
+ "min_time_between_jobs": self.min_time_between_jobs,
617
+ "additional_sleep_time": self.additional_sleep_time,
618
+ "last_job_time": self.last_job_time.isoformat()
619
+ if self.last_job_time
620
+ else None,
621
+ },
622
+ "queue_status": queue_status,
623
+ "worker_stats": self.stats,
624
+ "registered_processors": [jt.value for jt in self.job_processors.keys()],
625
+ "active_jobs": len(self.active_jobs),
626
+ "stuck_jobs_detected": self.stats["jobs_stuck_detected"],
627
+ "stuck_jobs_reset": self.stats["jobs_stuck_reset"],
628
+ }
629
+
630
+ def is_running(self) -> bool:
631
+ """Check if worker is running (for daemon mode)."""
632
+ if self.mode != WorkerMode.DAEMON:
633
+ return self.running
634
+
635
+ if not self.pid_file or not self.pid_file.exists():
636
+ return False
637
+
638
+ try:
639
+ import psutil
640
+
641
+ pid = int(self.pid_file.read_text().strip())
642
+
643
+ if not psutil.pid_exists(pid):
644
+ self._cleanup_daemon_files()
645
+ return False
646
+
647
+ # Check if it's actually our worker process
648
+ try:
649
+ process = psutil.Process(pid)
650
+ if "python" in process.name().lower():
651
+ return True
652
+ except (psutil.NoSuchProcess, psutil.AccessDenied):
653
+ pass
654
+
655
+ return False
656
+
657
+ except (ValueError, FileNotFoundError, ImportError):
658
+ return False
659
+
660
+ def stop_daemon(self) -> bool:
661
+ """Stop daemon worker (for daemon mode)."""
662
+ if self.mode != WorkerMode.DAEMON:
663
+ return False
664
+
665
+ if not self.is_running():
666
+ return True
667
+
668
+ try:
669
+ import psutil
670
+
671
+ pid = int(self.pid_file.read_text().strip())
672
+
673
+ # Send SIGTERM
674
+ os.kill(pid, signal.SIGTERM)
675
+
676
+ # Wait for process to stop
677
+ for _ in range(10):
678
+ if not psutil.pid_exists(pid):
679
+ break
680
+ time.sleep(1)
681
+
682
+ # Force kill if still running
683
+ if psutil.pid_exists(pid):
684
+ self.logger.warning(f"Force killing worker PID {pid}")
685
+ os.kill(pid, signal.SIGKILL)
686
+
687
+ self._cleanup_daemon_files()
688
+ return True
689
+
690
+ except Exception as e:
691
+ self.logger.error(f"Failed to stop daemon: {e}")
692
+ return False
693
+
694
+ def _signal_handler(self, signum, frame):
695
+ """Handle shutdown signals."""
696
+ self.logger.info(f"Received signal {signum}, shutting down...")
697
+ self.running = False
698
+
699
+ def _cleanup_daemon_files(self):
700
+ """Clean up daemon files."""
701
+ if self.pid_file and self.pid_file.exists():
702
+ self.pid_file.unlink(missing_ok=True)
703
+ if self.lock_file and self.lock_file.exists():
704
+ self.lock_file.unlink(missing_ok=True)
705
+
706
+
707
+ # Helper functions for creating job processors
708
+ def create_download_processor(services) -> Callable[[Job], bool]:
709
+ """Create a download job processor."""
710
+
711
+ def process_download_job(job: Job) -> bool:
712
+ """Process a download job."""
713
+ try:
714
+ # Extract job data
715
+ job_data = job.job_data
716
+ video_url = job_data.get("url")
717
+ output_path = Path(job.job_path)
718
+
719
+ if not video_url:
720
+ raise ValueError("No URL in job data")
721
+
722
+ # Create output directory
723
+ output_path.mkdir(parents=True, exist_ok=True)
724
+
725
+ # Download video using existing service
726
+ result = services.video_download.download_video(
727
+ video_url, output_path, quality=job_data.get("quality", "1080p")
728
+ )
729
+
730
+ return result.is_successful()
731
+
732
+ except Exception as e:
733
+ raise Exception(f"Download failed: {e}")
734
+
735
+ return process_download_job
736
+
737
+
738
+ def create_playlist_processor(services) -> Callable[[Job], bool]:
739
+ """Create a playlist download processor."""
740
+
741
+ def process_playlist_job(job: Job) -> bool:
742
+ """Process a playlist download job."""
743
+ try:
744
+ # Extract job data
745
+ job_data = job.job_data
746
+ playlist_url = job_data.get("url")
747
+ output_path = Path(job.job_path)
748
+
749
+ if not playlist_url:
750
+ raise ValueError("No URL in job data")
751
+
752
+ # Create output directory
753
+ output_path.mkdir(parents=True, exist_ok=True)
754
+
755
+ # Download playlist using existing service
756
+ result = services.playlist.download_playlist(
757
+ playlist_url, output_path, quality=job_data.get("quality", "1080p")
758
+ )
759
+
760
+ return result.is_successful()
761
+
762
+ except Exception as e:
763
+ raise Exception(f"Playlist download failed: {e}")
764
+
765
+ return process_playlist_job