spatelier 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. analytics/__init__.py +1 -0
  2. analytics/reporter.py +497 -0
  3. cli/__init__.py +1 -0
  4. cli/app.py +147 -0
  5. cli/audio.py +129 -0
  6. cli/cli_analytics.py +320 -0
  7. cli/cli_utils.py +282 -0
  8. cli/error_handlers.py +122 -0
  9. cli/files.py +299 -0
  10. cli/update.py +325 -0
  11. cli/video.py +823 -0
  12. cli/worker.py +615 -0
  13. core/__init__.py +1 -0
  14. core/analytics_dashboard.py +368 -0
  15. core/base.py +303 -0
  16. core/base_service.py +69 -0
  17. core/config.py +345 -0
  18. core/database_service.py +116 -0
  19. core/decorators.py +263 -0
  20. core/error_handler.py +210 -0
  21. core/file_tracker.py +254 -0
  22. core/interactive_cli.py +366 -0
  23. core/interfaces.py +166 -0
  24. core/job_queue.py +437 -0
  25. core/logger.py +79 -0
  26. core/package_updater.py +469 -0
  27. core/progress.py +228 -0
  28. core/service_factory.py +295 -0
  29. core/streaming.py +299 -0
  30. core/worker.py +765 -0
  31. database/__init__.py +1 -0
  32. database/connection.py +265 -0
  33. database/metadata.py +516 -0
  34. database/models.py +288 -0
  35. database/repository.py +592 -0
  36. database/transcription_storage.py +219 -0
  37. modules/__init__.py +1 -0
  38. modules/audio/__init__.py +5 -0
  39. modules/audio/converter.py +197 -0
  40. modules/video/__init__.py +16 -0
  41. modules/video/converter.py +191 -0
  42. modules/video/fallback_extractor.py +334 -0
  43. modules/video/services/__init__.py +18 -0
  44. modules/video/services/audio_extraction_service.py +274 -0
  45. modules/video/services/download_service.py +852 -0
  46. modules/video/services/metadata_service.py +190 -0
  47. modules/video/services/playlist_service.py +445 -0
  48. modules/video/services/transcription_service.py +491 -0
  49. modules/video/transcription_service.py +385 -0
  50. modules/video/youtube_api.py +397 -0
  51. spatelier/__init__.py +33 -0
  52. spatelier-0.3.0.dist-info/METADATA +260 -0
  53. spatelier-0.3.0.dist-info/RECORD +59 -0
  54. spatelier-0.3.0.dist-info/WHEEL +5 -0
  55. spatelier-0.3.0.dist-info/entry_points.txt +2 -0
  56. spatelier-0.3.0.dist-info/licenses/LICENSE +21 -0
  57. spatelier-0.3.0.dist-info/top_level.txt +7 -0
  58. utils/__init__.py +1 -0
  59. utils/helpers.py +250 -0
@@ -0,0 +1,852 @@
1
+ """
2
+ Video download service.
3
+
4
+ This module provides focused video downloading functionality,
5
+ separated from transcription and metadata concerns.
6
+ """
7
+
8
+ import subprocess
9
+ import tempfile
10
+ import time
11
+ from pathlib import Path
12
+ from typing import Any, Dict, List, Optional, Union
13
+
14
+ from core.base import BaseDownloader, ProcessingResult
15
+ from core.base_service import BaseService
16
+ from core.config import Config
17
+ from database.metadata import MetadataExtractor, MetadataManager
18
+ from database.models import MediaType, ProcessingStatus
19
+ from modules.video.fallback_extractor import FallbackExtractor
20
+ from utils.helpers import get_file_hash, get_file_type, safe_filename
21
+
22
+
23
+ class VideoDownloadService(BaseService):
24
+ """
25
+ Focused video download service.
26
+
27
+ Handles only video downloading, without transcription or complex metadata processing.
28
+ """
29
+
30
+ def __init__(self, config: Config, verbose: bool = False, db_service=None):
31
+ """Initialize the video download service."""
32
+ # Initialize base service
33
+ super().__init__(config, verbose, db_service)
34
+
35
+ # Service-specific initialization
36
+ self.supported_sites = [
37
+ "youtube.com",
38
+ "youtu.be",
39
+ "vimeo.com",
40
+ "dailymotion.com",
41
+ "twitch.tv",
42
+ "twitter.com",
43
+ "instagram.com",
44
+ "tiktok.com",
45
+ ]
46
+
47
+ # Initialize metadata management
48
+ self.metadata_extractor = MetadataExtractor(config, verbose=verbose)
49
+ self.metadata_manager = MetadataManager(config, verbose=verbose)
50
+
51
+ # Initialize fallback extractor
52
+ try:
53
+ self.fallback_extractor = FallbackExtractor(config)
54
+ except RuntimeError as exc:
55
+ self.fallback_extractor = None
56
+ self.logger.info(f"Fallback extractor disabled: {exc}")
57
+
58
+ def download_video(
59
+ self, url: str, output_path: Optional[Union[str, Path]] = None, **kwargs
60
+ ) -> ProcessingResult:
61
+ """
62
+ Download a single video from URL.
63
+
64
+ Args:
65
+ url: URL to download from
66
+ output_path: Optional output path
67
+ **kwargs: Additional download options
68
+
69
+ Returns:
70
+ ProcessingResult with download details
71
+ """
72
+ # Track download start
73
+ self.repos.analytics.track_event("download_start", event_data={"url": url})
74
+
75
+ # Extract metadata before download
76
+ source_metadata = {}
77
+ if "youtube.com" in url or "youtu.be" in url:
78
+ source_metadata = self.metadata_extractor.extract_youtube_metadata(url)
79
+ self.logger.info(
80
+ f"Extracted YouTube metadata: {source_metadata.get('title', 'Unknown')}"
81
+ )
82
+
83
+ try:
84
+ # Determine output path
85
+ output_file = None
86
+ if output_path is None:
87
+ from core.config import get_default_data_dir
88
+
89
+ repo_root = get_default_data_dir().parent
90
+ output_dir = self.config.video.output_dir or (repo_root / "downloads")
91
+ else:
92
+ output_path = Path(output_path)
93
+ if output_path.suffix:
94
+ output_file = output_path
95
+ output_dir = output_path.parent
96
+ else:
97
+ output_dir = output_path
98
+
99
+ output_dir.mkdir(parents=True, exist_ok=True)
100
+
101
+ # Create processing job
102
+ job = self.repos.jobs.create(
103
+ media_file_id=None, # Will be updated after processing
104
+ job_type="download_video",
105
+ input_path=url,
106
+ output_path=str(output_file or output_dir),
107
+ parameters=str(kwargs),
108
+ )
109
+ self.logger.info(f"Created video processing job: {job.id}")
110
+
111
+ # Check if output is on NAS and set up temp processing if needed
112
+ is_nas = self._is_nas_path(output_dir)
113
+
114
+ temp_dir = None
115
+ processing_path = output_dir
116
+
117
+ if is_nas:
118
+ # Create job-specific temp processing directory
119
+ temp_dir = self._get_temp_processing_dir(job.id)
120
+ processing_path = temp_dir
121
+ self.logger.info(f"NAS detected, using temp processing: {temp_dir}")
122
+ self.logger.info(f"Video will be processed in: {processing_path}")
123
+
124
+ # Mark job as processing (sets started_at for duration tracking)
125
+ self.repos.jobs.update_status(job.id, ProcessingStatus.PROCESSING)
126
+
127
+ # Download using yt-dlp
128
+ downloaded_file = self._download_with_ytdlp(url, processing_path, **kwargs)
129
+
130
+ if downloaded_file and downloaded_file.exists():
131
+ # Extract video metadata
132
+ video_id = self._extract_video_id_from_url(url)
133
+
134
+ # Create media file record
135
+ media_file = self.repos.media.create(
136
+ file_path=str(downloaded_file),
137
+ file_name=downloaded_file.name,
138
+ file_size=downloaded_file.stat().st_size,
139
+ file_hash=get_file_hash(downloaded_file),
140
+ media_type=MediaType.VIDEO,
141
+ mime_type=get_file_type(downloaded_file),
142
+ source_url=url,
143
+ source_platform=(
144
+ "youtube"
145
+ if "youtube.com" in url or "youtu.be" in url
146
+ else "unknown"
147
+ ),
148
+ source_id=video_id,
149
+ title=source_metadata.get("title", downloaded_file.stem),
150
+ description=source_metadata.get("description"),
151
+ uploader=source_metadata.get("uploader"),
152
+ uploader_id=source_metadata.get("uploader_id"),
153
+ upload_date=source_metadata.get("upload_date"),
154
+ view_count=source_metadata.get("view_count"),
155
+ like_count=source_metadata.get("like_count"),
156
+ duration=source_metadata.get("duration"),
157
+ language=source_metadata.get("language"),
158
+ )
159
+
160
+ # Enrich with additional metadata
161
+ self.metadata_manager.enrich_media_file(
162
+ media_file, self.repos.media, extract_source_metadata=True
163
+ )
164
+
165
+ # Update job with media file ID
166
+ self.repos.jobs.update(
167
+ job.id,
168
+ media_file_id=media_file.id,
169
+ output_path=str(downloaded_file),
170
+ )
171
+
172
+ # If we used temp processing, move file to final destination
173
+ if is_nas and temp_dir:
174
+ self.logger.info("Moving video to NAS destination...")
175
+ final_file_path = output_file or (output_dir / downloaded_file.name)
176
+
177
+ if self._move_file_to_nas(downloaded_file, final_file_path):
178
+ self.logger.info(
179
+ f"Successfully moved video to NAS: {final_file_path}"
180
+ )
181
+
182
+ # Check if a media file with this path already exists
183
+ existing_media = self.repos.media.get_by_file_path(
184
+ str(final_file_path)
185
+ )
186
+ if existing_media:
187
+ # Delete the old record and update the current one
188
+ self.logger.info(
189
+ f"Found existing media file {existing_media.id} with same path, updating it"
190
+ )
191
+ self.repos.media.delete(existing_media.id)
192
+
193
+ # Update media file record with final path
194
+ self.repos.media.update(
195
+ media_file.id,
196
+ file_path=str(final_file_path),
197
+ file_name=final_file_path.name,
198
+ )
199
+
200
+ # Update job status
201
+ self.repos.jobs.update_status(
202
+ job.id, ProcessingStatus.COMPLETED
203
+ )
204
+
205
+ # Clean up temp directory
206
+ self._cleanup_temp_directory(temp_dir)
207
+ self.logger.info(f"Cleaned up temp directory: {temp_dir}")
208
+
209
+ return ProcessingResult(
210
+ success=True,
211
+ message="Video downloaded and moved to NAS successfully",
212
+ output_path=str(final_file_path),
213
+ metadata={
214
+ "media_file_id": media_file.id,
215
+ "job_id": job.id,
216
+ "nas_processing": True,
217
+ },
218
+ )
219
+ else:
220
+ self.logger.error("Failed to move video to NAS")
221
+ self.repos.jobs.update_status(
222
+ job.id,
223
+ ProcessingStatus.FAILED,
224
+ error_message="Failed to move to NAS",
225
+ )
226
+ return ProcessingResult(
227
+ success=False,
228
+ message="Video downloaded but failed to move to NAS",
229
+ errors=["Failed to move file to final destination"],
230
+ )
231
+ else:
232
+ # For local downloads, update job status
233
+ self.repos.jobs.update_status(job.id, ProcessingStatus.COMPLETED)
234
+
235
+ final_file_path = downloaded_file
236
+ if output_file and final_file_path.exists():
237
+ output_file.parent.mkdir(parents=True, exist_ok=True)
238
+ if final_file_path.resolve() != output_file.resolve():
239
+ final_file_path.replace(output_file)
240
+ self.repos.media.update(
241
+ media_file.id,
242
+ file_path=str(output_file),
243
+ file_name=output_file.name,
244
+ )
245
+ self.repos.jobs.update(job.id, output_path=str(output_file))
246
+ final_file_path = output_file
247
+
248
+ return ProcessingResult(
249
+ success=True,
250
+ message="Video downloaded successfully",
251
+ output_path=str(final_file_path),
252
+ metadata={
253
+ "media_file_id": media_file.id,
254
+ "job_id": job.id,
255
+ "nas_processing": False,
256
+ },
257
+ )
258
+ else:
259
+ self.repos.jobs.update_status(
260
+ job.id, ProcessingStatus.FAILED, error_message="Download failed"
261
+ )
262
+ return ProcessingResult(
263
+ success=False,
264
+ message="Video download failed",
265
+ errors=["No video file found after download"],
266
+ )
267
+
268
+ except Exception as e:
269
+ self.logger.error(f"Video download failed: {e}")
270
+ return ProcessingResult(
271
+ success=False, message=f"Video download failed: {e}", errors=[str(e)]
272
+ )
273
+
274
+ def _download_with_ytdlp(
275
+ self, url: str, output_path: Path, **kwargs
276
+ ) -> Optional[Path]:
277
+ """Download video using yt-dlp.
278
+
279
+ Automatically refreshes cookies and retries if download fails due to
280
+ authentication issues with age-restricted content.
281
+ """
282
+ try:
283
+ # Build yt-dlp options
284
+ ydl_opts = self._build_ydl_opts(output_path, **kwargs)
285
+
286
+ output_path.mkdir(parents=True, exist_ok=True)
287
+
288
+ # Execute download
289
+ import yt_dlp
290
+
291
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
292
+ info = ydl.extract_info(url, download=True)
293
+ downloaded_file = self._resolve_downloaded_path(ydl, info)
294
+ if (
295
+ downloaded_file
296
+ and downloaded_file.exists()
297
+ and downloaded_file.stat().st_size > 0
298
+ ):
299
+ return downloaded_file
300
+
301
+ # Only fallback to finding latest if we can't resolve the path
302
+ # But validate it matches the expected video ID to avoid picking up old files
303
+ return self._validate_fallback_file(output_path, url)
304
+
305
+ except Exception as e:
306
+ error_msg = str(e)
307
+ # Check if this is a cookie/authentication error
308
+ if any(
309
+ keyword in error_msg.lower()
310
+ for keyword in ["sign in", "age", "cookies", "authentication"]
311
+ ):
312
+ self.logger.warning(
313
+ "Download failed due to authentication - attempting to refresh cookies..."
314
+ )
315
+ # Try to refresh cookies and get cookie file
316
+ cookie_file = self._refresh_youtube_cookies()
317
+ if cookie_file:
318
+ self.logger.info("Retrying download with refreshed cookies...")
319
+ # Retry the download with cookie file
320
+ try:
321
+ ydl_opts = self._build_ydl_opts(output_path, **kwargs)
322
+ # Use the cookie file instead of cookies_from_browser
323
+ ydl_opts["cookies"] = cookie_file
324
+ if "cookies_from_browser" in ydl_opts:
325
+ del ydl_opts["cookies_from_browser"]
326
+
327
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
328
+ info = ydl.extract_info(url, download=True)
329
+ downloaded_file = self._resolve_downloaded_path(ydl, info)
330
+ if (
331
+ downloaded_file
332
+ and downloaded_file.exists()
333
+ and downloaded_file.stat().st_size > 0
334
+ ):
335
+ return downloaded_file
336
+
337
+ # Clean up cookie file
338
+ import os
339
+
340
+ try:
341
+ os.unlink(cookie_file)
342
+ except:
343
+ pass
344
+
345
+ # Validate fallback file matches video ID
346
+ return self._validate_fallback_file(output_path, url)
347
+ except Exception as retry_error:
348
+ # Clean up cookie file
349
+ import os
350
+
351
+ try:
352
+ os.unlink(cookie_file)
353
+ except:
354
+ pass
355
+ self.logger.error(
356
+ f"Download failed after cookie refresh: {retry_error}"
357
+ )
358
+ return None
359
+ else:
360
+ self.logger.error(f"yt-dlp download failed: {e}")
361
+ return None
362
+ else:
363
+ self.logger.error(f"yt-dlp download failed: {e}")
364
+ return self._validate_fallback_file(output_path, url)
365
+
366
+ def _refresh_youtube_cookies(self) -> Optional[str]:
367
+ """Refresh YouTube cookies by visiting YouTube and extracting fresh cookies.
368
+
369
+ Uses Playwright to launch Chrome with the user's profile, visit YouTube,
370
+ extract the cookies, and save them to a temporary file for yt-dlp to use.
371
+
372
+ Returns:
373
+ Path to cookie file if successful, None otherwise
374
+ """
375
+ try:
376
+ import os
377
+ import platform
378
+ import tempfile
379
+
380
+ from playwright.sync_api import sync_playwright
381
+
382
+ system = platform.system().lower()
383
+ if system != "darwin":
384
+ # Only implemented for macOS for now
385
+ return None
386
+
387
+ # Get Chrome user data directory
388
+ chrome_user_data = os.path.expanduser(
389
+ "~/Library/Application Support/Google/Chrome"
390
+ )
391
+
392
+ if not os.path.exists(chrome_user_data):
393
+ return None
394
+
395
+ self.logger.info(
396
+ "Refreshing YouTube cookies by visiting YouTube in Chrome..."
397
+ )
398
+
399
+ with sync_playwright() as p:
400
+ # Launch Chrome with user's profile
401
+ browser = p.chromium.launch_persistent_context(
402
+ user_data_dir=chrome_user_data,
403
+ headless=True,
404
+ args=["--disable-blink-features=AutomationControlled"],
405
+ )
406
+
407
+ # Visit YouTube to refresh session
408
+ page = browser.new_page()
409
+ page.goto(
410
+ "https://www.youtube.com", wait_until="networkidle", timeout=15000
411
+ )
412
+ # Wait a moment for cookies to be set
413
+ page.wait_for_timeout(3000)
414
+
415
+ # Extract cookies from the page
416
+ cookies = browser.cookies()
417
+ browser.close()
418
+
419
+ # Filter for YouTube cookies only
420
+ youtube_cookies = [
421
+ c
422
+ for c in cookies
423
+ if "youtube.com" in c.get("domain", "")
424
+ or ".youtube.com" in c.get("domain", "")
425
+ ]
426
+
427
+ if not youtube_cookies:
428
+ self.logger.warning("No YouTube cookies found after refresh")
429
+ return None
430
+
431
+ # Save cookies to Netscape format file for yt-dlp
432
+ cookie_file = tempfile.NamedTemporaryFile(
433
+ mode="w", suffix=".txt", delete=False
434
+ )
435
+ cookie_file.write("# Netscape HTTP Cookie File\n")
436
+ cookie_file.write("# This file was generated by spatelier\n\n")
437
+
438
+ for cookie in youtube_cookies:
439
+ domain = cookie.get("domain", "")
440
+ domain_flag = "TRUE" if domain.startswith(".") else "FALSE"
441
+ path = cookie.get("path", "/")
442
+ secure = "TRUE" if cookie.get("secure", False) else "FALSE"
443
+ expires = str(int(cookie.get("expires", 0)))
444
+ name = cookie.get("name", "")
445
+ value = cookie.get("value", "")
446
+
447
+ cookie_file.write(
448
+ f"{domain}\t{domain_flag}\t{path}\t{secure}\t{expires}\t{name}\t{value}\n"
449
+ )
450
+
451
+ cookie_file.close()
452
+ self.logger.info(
453
+ f"YouTube cookies refreshed and saved to: {cookie_file.name}"
454
+ )
455
+ return cookie_file.name
456
+
457
+ except Exception as e:
458
+ self.logger.warning(f"Failed to refresh cookies automatically: {e}")
459
+ return None
460
+
461
+ def _resolve_downloaded_path(
462
+ self, ydl, info: Optional[Dict[str, Any]]
463
+ ) -> Optional[Path]:
464
+ """Resolve downloaded file path from yt-dlp info."""
465
+ if not info:
466
+ return None
467
+
468
+ if isinstance(info, dict) and info.get("_type") == "playlist":
469
+ entries = [entry for entry in info.get("entries") or [] if entry]
470
+ if not entries:
471
+ return None
472
+ info = entries[0]
473
+
474
+ if not isinstance(info, dict):
475
+ return None
476
+
477
+ return Path(ydl.prepare_filename(info))
478
+
479
+ def _find_latest_download(self, output_path: Path) -> Optional[Path]:
480
+ """Find the most recently modified downloaded video file."""
481
+ candidates: List[Path] = []
482
+ for ext in self.config.video_extensions:
483
+ candidates.extend(output_path.glob(f"*{ext}"))
484
+
485
+ candidates = [path for path in candidates if path.is_file()]
486
+ if not candidates:
487
+ return None
488
+
489
+ return max(candidates, key=lambda path: path.stat().st_mtime)
490
+
491
+ def _validate_fallback_file(self, output_path: Path, url: str) -> Optional[Path]:
492
+ """Find latest download and validate it matches the expected video ID."""
493
+ fallback_file = self._find_latest_download(output_path)
494
+ if not fallback_file:
495
+ return None
496
+
497
+ # Extract video ID from URL to validate
498
+ import re
499
+
500
+ # Match YouTube URLs including /shorts/, /watch?v=, /v/, /embed/, youtu.be
501
+ video_id_match = re.search(
502
+ r'(?:youtube\.com/(?:shorts/|watch\?v=|v/|embed/|[^/]+/.+/|.*[?&]v=)|youtu\.be/)([^"&?/\s]{11})',
503
+ url,
504
+ )
505
+ if video_id_match:
506
+ expected_id = video_id_match.group(1)
507
+ # Check if the filename contains the expected video ID
508
+ if expected_id in fallback_file.name:
509
+ return fallback_file
510
+ else:
511
+ self.logger.warning(
512
+ f"Found file {fallback_file.name} but it doesn't match expected video ID {expected_id}. "
513
+ "Download may have failed."
514
+ )
515
+ return None
516
+ # If we can't extract video ID, return the file anyway (for non-YouTube URLs)
517
+ return fallback_file
518
+
519
+ def _get_cookies_from_browser(self) -> Optional[tuple]:
520
+ """Try to get cookies from common browsers automatically.
521
+
522
+ Returns a tuple of browsers to try in order. yt-dlp will try each browser
523
+ until one works, or continue without cookies if none are available.
524
+
525
+ Note: On macOS, Chrome is more reliable than Safari for cookie extraction.
526
+ """
527
+ # Try browsers in order of preference
528
+ # On macOS, Chrome is more reliable than Safari (Safari cookies are harder to access)
529
+ # yt-dlp will try each browser until one works
530
+ import platform
531
+
532
+ system = platform.system().lower()
533
+
534
+ if system == "darwin": # macOS - prioritize Chrome over Safari
535
+ browsers = ("chrome", "safari", "firefox", "edge")
536
+ else: # Linux, Windows, etc.
537
+ browsers = ("chrome", "firefox", "safari", "edge")
538
+
539
+ return browsers
540
+
541
+ def _build_ydl_opts(self, output_path: Path, **kwargs) -> Dict:
542
+ """Build yt-dlp options."""
543
+ # Output template
544
+ output_template = str(output_path / "%(title)s [%(id)s].%(ext)s")
545
+
546
+ ydl_opts = {
547
+ "outtmpl": output_template,
548
+ "format": self._get_format_selector(
549
+ kwargs.get("quality", self.config.video.quality),
550
+ kwargs.get("format", self.config.video.default_format),
551
+ ),
552
+ "writeinfojson": False,
553
+ "writesubtitles": False,
554
+ "writeautomaticsub": False,
555
+ "no_warnings": not self.verbose,
556
+ "quiet": not self.verbose,
557
+ # Add fallback formats for YouTube SABR streaming issues
558
+ "format_sort": ["res", "ext", "codec", "br", "asr"],
559
+ # Try to use available formats even if preferred format fails
560
+ "ignoreerrors": False,
561
+ }
562
+
563
+ # Automatically try to use cookies from browser for age-restricted content
564
+ cookies_browser = self._get_cookies_from_browser()
565
+ if cookies_browser:
566
+ ydl_opts["cookies_from_browser"] = cookies_browser
567
+ if self.verbose:
568
+ self.logger.info(
569
+ f"Attempting to use cookies from browsers: {cookies_browser}"
570
+ )
571
+
572
+ if self.verbose:
573
+ ydl_opts["verbose"] = True
574
+
575
+ return ydl_opts
576
+
577
+ def _get_format_selector(self, quality: str, format: str) -> str:
578
+ """Get format selector for yt-dlp with fallbacks for YouTube issues."""
579
+ if quality == "best":
580
+ # Add fallback chain: preferred format -> any format -> best available
581
+ return f"best[ext={format}]/bestvideo[ext={format}]+bestaudio/best[ext={format}]/best"
582
+ elif quality == "worst":
583
+ return f"worst[ext={format}]/worst"
584
+ else:
585
+ # Extract numeric part from quality (e.g., "1080p" -> "1080")
586
+ try:
587
+ height = quality.replace("p", "")
588
+ # Add fallback chain with height constraint
589
+ return f"best[height<={height}][ext={format}]/bestvideo[height<={height}]+bestaudio/best[height<={height}]/best"
590
+ except:
591
+ # Fallback to simpler selector if parsing fails
592
+ return f"best[ext={format}]/bestvideo+bestaudio/best"
593
+
594
+ def _is_nas_path(self, path: Union[str, Path]) -> bool:
595
+ """Check if path is on NAS."""
596
+ path_str = str(path)
597
+ return any(
598
+ nas_indicator in path_str.lower()
599
+ for nas_indicator in [
600
+ "/volumes/",
601
+ "/mnt/",
602
+ "nas",
603
+ "network",
604
+ "smb://",
605
+ "nfs://",
606
+ ]
607
+ )
608
+
609
+ def _get_temp_processing_dir(self, job_id: int) -> Path:
610
+ """Get temporary processing directory for job."""
611
+ temp_dir = self.config.video.temp_dir / str(job_id)
612
+ temp_dir.mkdir(parents=True, exist_ok=True)
613
+ return temp_dir
614
+
615
+ def _move_file_to_nas(self, source_file: Path, dest_file: Path) -> bool:
616
+ """Move file to NAS destination."""
617
+ try:
618
+ import shutil
619
+
620
+ dest_file.parent.mkdir(parents=True, exist_ok=True)
621
+ shutil.move(str(source_file), str(dest_file))
622
+ return True
623
+ except Exception as e:
624
+ self.logger.error(f"Failed to move file to NAS: {e}")
625
+ return False
626
+
627
+ def _cleanup_temp_directory(self, temp_dir: Path):
628
+ """Clean up temporary directory."""
629
+ try:
630
+ import shutil
631
+
632
+ shutil.rmtree(temp_dir)
633
+ except Exception as e:
634
+ self.logger.warning(f"Failed to clean up temp directory {temp_dir}: {e}")
635
+
636
+ def _extract_video_id_from_url(self, url: str) -> str:
637
+ """Extract video ID from URL."""
638
+ if "youtube.com" in url or "youtu.be" in url:
639
+ if "v=" in url:
640
+ return url.split("v=")[1].split("&")[0]
641
+ elif "youtu.be/" in url:
642
+ return url.split("youtu.be/")[1].split("?")[0]
643
+ return "unknown"
644
+
645
+ def _get_playlist_progress(self, playlist_id: str) -> Dict[str, int]:
646
+ """Get playlist download progress."""
647
+ try:
648
+ # Get playlist from database
649
+ playlist = self.repos.playlists.get_by_playlist_id(playlist_id)
650
+ if not playlist:
651
+ return {"total": 0, "completed": 0, "failed": 0, "remaining": 0}
652
+
653
+ # Get playlist videos
654
+ playlist_videos = self.repos.playlist_videos.get_by_playlist_id(playlist.id)
655
+ total = len(playlist_videos)
656
+
657
+ completed = 0
658
+ failed = 0
659
+
660
+ for pv in playlist_videos:
661
+ media_file = self.repos.media.get_by_id(pv.media_file_id)
662
+ if media_file and media_file.file_path:
663
+ file_path = Path(media_file.file_path)
664
+ if file_path.exists():
665
+ # Check if has transcription
666
+ if self._check_video_has_transcription(media_file):
667
+ completed += 1
668
+ else:
669
+ failed += 1
670
+ else:
671
+ failed += 1
672
+ else:
673
+ failed += 1
674
+
675
+ remaining = total - completed - failed
676
+
677
+ return {
678
+ "total": total,
679
+ "completed": completed,
680
+ "failed": failed,
681
+ "remaining": remaining,
682
+ }
683
+
684
+ except Exception as e:
685
+ self.logger.error(f"Failed to get playlist progress: {e}")
686
+ return {"total": 0, "completed": 0, "failed": 0, "remaining": 0}
687
+
688
+ def _get_failed_videos(self, playlist_id: str) -> List[Dict[str, Any]]:
689
+ """Get failed videos from playlist."""
690
+ try:
691
+ # Get playlist from database
692
+ playlist = self.repos.playlists.get_by_playlist_id(playlist_id)
693
+ if not playlist:
694
+ return []
695
+
696
+ # Get playlist videos
697
+ playlist_videos = self.repos.playlist_videos.get_by_playlist_id(playlist.id)
698
+ failed_videos = []
699
+
700
+ for pv in playlist_videos:
701
+ media_file = self.repos.media.get_by_id(pv.media_file_id)
702
+ if media_file and media_file.file_path:
703
+ file_path = Path(media_file.file_path)
704
+ if not file_path.exists():
705
+ failed_videos.append(
706
+ {
707
+ "position": pv.position,
708
+ "video_title": pv.video_title or "Unknown",
709
+ "reason": "File missing",
710
+ }
711
+ )
712
+ elif not self._check_video_has_transcription(media_file):
713
+ failed_videos.append(
714
+ {
715
+ "position": pv.position,
716
+ "video_title": pv.video_title or "Unknown",
717
+ "reason": "No transcription",
718
+ }
719
+ )
720
+ else:
721
+ failed_videos.append(
722
+ {
723
+ "position": pv.position,
724
+ "video_title": pv.video_title or "Unknown",
725
+ "reason": "Media file not found",
726
+ }
727
+ )
728
+
729
+ return failed_videos
730
+
731
+ except Exception as e:
732
+ self.logger.error(f"Failed to get failed videos: {e}")
733
+ return []
734
+
735
+ def _check_video_has_transcription(self, media_file) -> bool:
736
+ """Check if video has transcription."""
737
+ try:
738
+ if not media_file or not media_file.file_path:
739
+ return False
740
+
741
+ file_path = Path(media_file.file_path)
742
+ if not file_path.exists():
743
+ return False
744
+
745
+ # Check for transcription files
746
+ base_name = file_path.stem
747
+ transcription_files = [
748
+ file_path.parent / f"{base_name}.srt",
749
+ file_path.parent / f"{base_name}.vtt",
750
+ file_path.parent / f"{base_name}.json",
751
+ ]
752
+
753
+ return any(f.exists() for f in transcription_files)
754
+
755
+ except Exception as e:
756
+ self.logger.error(f"Failed to check transcription: {e}")
757
+ return False
758
+
759
+ def download_playlist_with_transcription(
760
+ self,
761
+ url: str,
762
+ output_path: Optional[Union[str, Path]] = None,
763
+ continue_download: bool = True,
764
+ **kwargs,
765
+ ) -> Dict[str, Any]:
766
+ """Download playlist with transcription support."""
767
+ try:
768
+ # This method would integrate with PlaylistService
769
+ # For now, return a placeholder implementation
770
+ from modules.video.services.playlist_service import PlaylistService
771
+
772
+ playlist_service = PlaylistService(
773
+ self.config, verbose=self.verbose, db_service=self.db_factory
774
+ )
775
+ result = playlist_service.download_playlist(url, output_path, **kwargs)
776
+
777
+ # Add transcription logic here if needed
778
+ return result
779
+
780
+ except Exception as e:
781
+ self.logger.error(f"Playlist download with transcription failed: {e}")
782
+ return {
783
+ "success": False,
784
+ "message": f"Playlist download failed: {e}",
785
+ "errors": [str(e)],
786
+ }
787
+
788
+ def _check_existing_video(self, file_path: Path, url: str) -> Dict[str, Any]:
789
+ """Check if video file exists and has subtitles."""
790
+ result = {
791
+ "exists": False,
792
+ "has_subtitles": False,
793
+ "should_overwrite": True,
794
+ "reason": "",
795
+ }
796
+
797
+ if not file_path.exists():
798
+ result["reason"] = f"File {file_path} does not exist"
799
+ return result
800
+
801
+ result["exists"] = True
802
+
803
+ # Check for subtitles
804
+ has_subtitles = self._has_whisper_subtitles(file_path)
805
+ result["has_subtitles"] = has_subtitles
806
+
807
+ if has_subtitles:
808
+ result["should_overwrite"] = False
809
+ result["reason"] = f"File {file_path} exists with WhisperAI subtitles"
810
+ else:
811
+ result["should_overwrite"] = True
812
+ result["reason"] = f"File {file_path} exists without subtitles"
813
+
814
+ return result
815
+
816
+ def _has_whisper_subtitles(self, file_path: Path) -> bool:
817
+ """Check if video file has Whisper subtitles."""
818
+ try:
819
+ # Use ffprobe to check for subtitle tracks
820
+ cmd = [
821
+ "ffprobe",
822
+ "-v",
823
+ "quiet",
824
+ "-print_format",
825
+ "json",
826
+ "-show_streams",
827
+ "-show_format",
828
+ str(file_path),
829
+ ]
830
+
831
+ result = subprocess.run(cmd, capture_output=True, text=True, timeout=10)
832
+
833
+ if result.returncode != 0:
834
+ return False
835
+
836
+ import json
837
+
838
+ data = json.loads(result.stdout)
839
+
840
+ # Check for subtitle streams
841
+ for stream in data.get("streams", []):
842
+ if stream.get("codec_type") == "subtitle":
843
+ # Check if it's a Whisper subtitle
844
+ title = stream.get("tags", {}).get("title", "")
845
+ if "whisper" in title.lower() or "whisperai" in title.lower():
846
+ return True
847
+
848
+ return False
849
+
850
+ except Exception as e:
851
+ self.logger.warning(f"Error checking subtitles for {file_path}: {e}")
852
+ return False