PyPI - spatelier - Versions diffs - 0.3.0__py3-none-any.whl - Mend

spatelier 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (59) hide show

analytics/__init__.py +1 -0
analytics/reporter.py +497 -0
cli/__init__.py +1 -0
cli/app.py +147 -0
cli/audio.py +129 -0
cli/cli_analytics.py +320 -0
cli/cli_utils.py +282 -0
cli/error_handlers.py +122 -0
cli/files.py +299 -0
cli/update.py +325 -0
cli/video.py +823 -0
cli/worker.py +615 -0
core/__init__.py +1 -0
core/analytics_dashboard.py +368 -0
core/base.py +303 -0
core/base_service.py +69 -0
core/config.py +345 -0
core/database_service.py +116 -0
core/decorators.py +263 -0
core/error_handler.py +210 -0
core/file_tracker.py +254 -0
core/interactive_cli.py +366 -0
core/interfaces.py +166 -0
core/job_queue.py +437 -0
core/logger.py +79 -0
core/package_updater.py +469 -0
core/progress.py +228 -0
core/service_factory.py +295 -0
core/streaming.py +299 -0
core/worker.py +765 -0
database/__init__.py +1 -0
database/connection.py +265 -0
database/metadata.py +516 -0
database/models.py +288 -0
database/repository.py +592 -0
database/transcription_storage.py +219 -0
modules/__init__.py +1 -0
modules/audio/__init__.py +5 -0
modules/audio/converter.py +197 -0
modules/video/__init__.py +16 -0
modules/video/converter.py +191 -0
modules/video/fallback_extractor.py +334 -0
modules/video/services/__init__.py +18 -0
modules/video/services/audio_extraction_service.py +274 -0
modules/video/services/download_service.py +852 -0
modules/video/services/metadata_service.py +190 -0
modules/video/services/playlist_service.py +445 -0
modules/video/services/transcription_service.py +491 -0
modules/video/transcription_service.py +385 -0
modules/video/youtube_api.py +397 -0
spatelier/__init__.py +33 -0
spatelier-0.3.0.dist-info/METADATA +260 -0
spatelier-0.3.0.dist-info/RECORD +59 -0
spatelier-0.3.0.dist-info/WHEEL +5 -0
spatelier-0.3.0.dist-info/entry_points.txt +2 -0
spatelier-0.3.0.dist-info/licenses/LICENSE +21 -0
spatelier-0.3.0.dist-info/top_level.txt +7 -0
utils/__init__.py +1 -0
utils/helpers.py +250 -0

modules/video/services/metadata_service.py ADDED Viewed

@@ -0,0 +1,190 @@
+"""
+Video metadata service.
+This module provides focused metadata extraction and management functionality.
+"""
+from pathlib import Path
+from typing import Any, Dict, Optional, Union
+from core.base_service import BaseService
+from core.config import Config
+from database.metadata import MetadataExtractor, MetadataManager
+class MetadataService(BaseService):
+    """
+    Focused metadata service.
+    Handles metadata extraction, enrichment, and management for video files.
+    """
+    def __init__(self, config: Config, verbose: bool = False, db_service=None):
+        """Initialize the metadata service."""
+        # Initialize base service
+        super().__init__(config, verbose, db_service)
+        # Initialize metadata management
+        self.metadata_extractor = MetadataExtractor(config, verbose=verbose)
+        self.metadata_manager = MetadataManager(config, verbose=verbose)
+    def extract_video_metadata(self, url: str) -> Dict[str, Any]:
+        """
+        Extract metadata from video URL.
+        Args:
+            url: Video URL to extract metadata from
+        Returns:
+            Dictionary containing extracted metadata
+        """
+        try:
+            if "youtube.com" in url or "youtu.be" in url:
+                metadata = self.metadata_extractor.extract_youtube_metadata(url)
+                self.logger.info(
+                    f"Extracted YouTube metadata: {metadata.get('title', 'Unknown')}"
+                )
+                return metadata
+            else:
+                self.logger.warning(f"Unsupported URL for metadata extraction: {url}")
+                return {}
+        except Exception as e:
+            self.logger.error(f"Failed to extract metadata from {url}: {e}")
+            return {}
+    def enrich_media_file(self, media_file_id: int) -> bool:
+        """
+        Enrich media file with additional metadata.
+        Args:
+            media_file_id: ID of media file to enrich
+        Returns:
+            True if enrichment successful, False otherwise
+        """
+        try:
+            media_file = self.repos.media.get_by_id(media_file_id)
+            if not media_file:
+                self.logger.error(f"Media file not found: {media_file_id}")
+                return False
+            # Enrich with additional metadata
+            self.metadata_manager.enrich_media_file(
+                media_file, self.repos.media, extract_source_metadata=True
+            )
+            self.logger.info(f"Enriched metadata for media file: {media_file_id}")
+            return True
+        except Exception as e:
+            self.logger.error(f"Failed to enrich media file {media_file_id}: {e}")
+            return False
+    def update_media_file_metadata(
+        self, media_file_id: int, metadata: Dict[str, Any]
+    ) -> bool:
+        """
+        Update media file with new metadata.
+        Args:
+            media_file_id: ID of media file to update
+            metadata: New metadata to apply
+        Returns:
+            True if update successful, False otherwise
+        """
+        try:
+            media_file = self.repos.media.get_by_id(media_file_id)
+            if not media_file:
+                self.logger.error(f"Media file not found: {media_file_id}")
+                return False
+            # Update media file with new metadata
+            self.repos.media.update(media_file_id, **metadata)
+            self.logger.info(f"Updated metadata for media file: {media_file_id}")
+            return True
+        except Exception as e:
+            self.logger.error(
+                f"Failed to update metadata for media file {media_file_id}: {e}"
+            )
+            return False
+    def get_media_file_metadata(self, media_file_id: int) -> Optional[Dict[str, Any]]:
+        """
+        Get metadata for a media file.
+        Args:
+            media_file_id: ID of media file
+        Returns:
+            Dictionary containing media file metadata, or None if not found
+        """
+        try:
+            media_file = self.repos.media.get_by_id(media_file_id)
+            if not media_file:
+                return None
+            # Convert SQLAlchemy object to dictionary
+            metadata = {
+                "id": media_file.id,
+                "file_path": media_file.file_path,
+                "file_name": media_file.file_name,
+                "file_size": media_file.file_size,
+                "file_hash": media_file.file_hash,
+                "media_type": media_file.media_type,
+                "mime_type": media_file.mime_type,
+                "source_url": media_file.source_url,
+                "source_platform": media_file.source_platform,
+                "source_id": media_file.source_id,
+                "title": media_file.title,
+                "description": media_file.description,
+                "uploader": media_file.uploader,
+                "uploader_id": media_file.uploader_id,
+                "upload_date": media_file.upload_date,
+                "view_count": media_file.view_count,
+                "like_count": media_file.like_count,
+                "duration": media_file.duration,
+                "language": media_file.language,
+                "created_at": media_file.created_at,
+                "updated_at": media_file.updated_at,
+            }
+            return metadata
+        except Exception as e:
+            self.logger.error(
+                f"Failed to get metadata for media file {media_file_id}: {e}"
+            )
+            return None
+    def search_media_files(self, query: str, media_type: Optional[str] = None) -> list:
+        """
+        Search media files by metadata.
+        Args:
+            query: Search query
+            media_type: Optional media type filter
+        Returns:
+            List of matching media files
+        """
+        try:
+            from database.models import MediaType
+            media_type_enum = None
+            if media_type:
+                try:
+                    media_type_enum = MediaType(media_type)
+                except ValueError:
+                    self.logger.warning(f"Invalid media type: {media_type}")
+            results = self.repos.media.search(query, media_type_enum)
+            self.logger.info(f"Found {len(results)} media files matching '{query}'")
+            return results
+        except Exception as e:
+            self.logger.error(f"Failed to search media files: {e}")
+            return []

modules/video/services/playlist_service.py ADDED Viewed

@@ -0,0 +1,445 @@
+"""
+Playlist service.
+This module provides focused playlist management functionality.
+"""
+import shutil
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Union
+from core.base import ProcessingResult
+from core.base_service import BaseService
+from core.config import Config
+from database.models import MediaType, ProcessingStatus
+class PlaylistService(BaseService):
+    """
+    Focused playlist service.
+    Handles playlist downloading and management without transcription concerns.
+    """
+    def __init__(self, config: Config, verbose: bool = False, db_service=None):
+        """Initialize the playlist service."""
+        # Initialize base service
+        super().__init__(config, verbose, db_service)
+        # Initialize metadata management
+        from database.metadata import MetadataExtractor, MetadataManager
+        self.metadata_extractor = MetadataExtractor(config, verbose=verbose)
+        self.metadata_manager = MetadataManager(config, verbose=verbose)
+    def download_playlist(
+        self, url: str, output_path: Optional[Union[str, Path]] = None, **kwargs
+    ) -> ProcessingResult:
+        """
+        Download playlist without transcription.
+        Args:
+            url: Playlist URL to download
+            output_path: Optional output directory (will create playlist folder)
+            **kwargs: Additional download options
+        Returns:
+            Dictionary with download results
+        """
+        try:
+            # Get playlist metadata first
+            playlist_info = self._get_playlist_info(url)
+            if not playlist_info:
+                return ProcessingResult(
+                    success=False,
+                    message="Failed to get playlist information",
+                    errors=["Could not extract playlist metadata"],
+                )
+            # Create playlist folder
+            playlist_name = self._sanitize_filename(
+                playlist_info.get("title", "Unknown Playlist")
+            )
+            playlist_id = playlist_info.get("id", "unknown")
+            folder_name = f"{playlist_name} [{playlist_id}]"
+            if output_path:
+                playlist_dir = Path(output_path) / folder_name
+            else:
+                from core.config import get_default_data_dir
+                repo_root = get_default_data_dir().parent
+                playlist_dir = repo_root / "downloads" / folder_name
+            # Check if output is on NAS and set up temp processing if needed
+            is_nas = self._is_nas_path(playlist_dir)
+            # Create processing job
+            job = self.repos.jobs.create(
+                media_file_id=None,  # Will be updated after processing
+                job_type="download_playlist",
+                input_path=url,
+                output_path=str(playlist_dir),
+                parameters=str(kwargs),
+            )
+            self.logger.info(f"Created playlist processing job: {job.id}")
+            temp_dir = None
+            processing_dir = playlist_dir
+            if is_nas:
+                # Create job-specific temp processing directory with playlist folder
+                temp_dir = self._get_temp_processing_dir(job.id)
+                processing_dir = temp_dir / folder_name
+                self.logger.info(
+                    f"NAS detected for playlist, using temp processing: {temp_dir}"
+                )
+                self.logger.info(f"Playlist will be processed in: {processing_dir}")
+            processing_dir.mkdir(parents=True, exist_ok=True)
+            # Create or update playlist record in database
+            existing_playlist = self.repos.playlists.get_by_playlist_id(playlist_id)
+            if existing_playlist:
+                # Update existing playlist
+                existing_playlist.title = playlist_name
+                existing_playlist.description = playlist_info.get("description")
+                existing_playlist.uploader = playlist_info.get("uploader")
+                existing_playlist.uploader_id = playlist_info.get("uploader_id")
+                existing_playlist.source_url = url
+                existing_playlist.source_platform = "youtube"
+                existing_playlist.video_count = playlist_info.get("playlist_count")
+                existing_playlist.view_count = playlist_info.get("view_count")
+                existing_playlist.thumbnail_url = playlist_info.get("thumbnail")
+                playlist_record = existing_playlist
+            else:
+                playlist_record = self.repos.playlists.create(
+                    playlist_id=playlist_id,
+                    title=playlist_name,
+                    description=playlist_info.get("description"),
+                    uploader=playlist_info.get("uploader"),
+                    uploader_id=playlist_info.get("uploader_id"),
+                    source_url=url,
+                    source_platform="youtube",
+                    video_count=playlist_info.get("playlist_count"),
+                    view_count=playlist_info.get("view_count"),
+                    thumbnail_url=playlist_info.get("thumbnail"),
+                )
+            self.logger.info(f"Downloading playlist to: {playlist_dir}")
+            self.logger.info(f"Playlist record: {playlist_record.id}")
+            # Mark job as processing (sets started_at for duration tracking)
+            self.repos.jobs.update_status(job.id, ProcessingStatus.PROCESSING)
+            # Download playlist using yt-dlp Python package
+            self.logger.info(f"Downloading playlist from: {url}")
+            # Build yt-dlp options
+            ydl_opts = self._build_playlist_ydl_opts(processing_dir, **kwargs)
+            # Execute download
+            import yt_dlp
+            with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+                ydl.download([url])
+            # Check if download was successful by looking for files
+            downloaded_videos = self._find_playlist_videos(processing_dir)
+            max_videos = kwargs.get("max_videos")
+            if (
+                isinstance(max_videos, int)
+                and max_videos > 0
+                and len(downloaded_videos) > max_videos
+            ):
+                downloaded_videos = sorted(
+                    downloaded_videos,
+                    key=lambda path: path.stat().st_mtime,
+                    reverse=True,
+                )[:max_videos]
+                self.logger.info(
+                    f"Limiting processed videos to most recent {max_videos}"
+                )
+            if downloaded_videos:
+                self.logger.info(
+                    f"Processing {len(downloaded_videos)} downloaded videos from playlist"
+                )
+                # Process each video (metadata only, no transcription)
+                successful_downloads = []
+                failed_downloads = []
+                for position, video_path in enumerate(downloaded_videos, 1):
+                    try:
+                        # Extract video metadata and create/update media file record
+                        video_id = self._extract_video_id_from_path(video_path)
+                        # Get source metadata for this video
+                        source_metadata = (
+                            self.metadata_extractor.extract_youtube_metadata(
+                                f"https://www.youtube.com/watch?v={video_id}"
+                            )
+                        )
+                        # Create media file record
+                        from utils.helpers import get_file_hash, get_file_type
+                        media_file = self.repos.media.create(
+                            file_path=str(video_path),
+                            file_name=video_path.name,
+                            file_size=video_path.stat().st_size,
+                            file_hash=get_file_hash(video_path),
+                            media_type=MediaType.VIDEO,
+                            mime_type=get_file_type(video_path),
+                            source_url=f"https://www.youtube.com/watch?v={video_id}",
+                            source_platform="youtube",
+                            source_id=video_id,
+                            title=source_metadata.get("title", video_path.stem),
+                            description=source_metadata.get("description"),
+                            uploader=source_metadata.get("uploader"),
+                            uploader_id=source_metadata.get("uploader_id"),
+                            upload_date=source_metadata.get("upload_date"),
+                            view_count=source_metadata.get("view_count"),
+                            like_count=source_metadata.get("like_count"),
+                            duration=source_metadata.get("duration"),
+                            language=source_metadata.get("language"),
+                        )
+                        # Enrich with additional metadata
+                        self.metadata_manager.enrich_media_file(
+                            media_file, self.repos.media, extract_source_metadata=True
+                        )
+                        # Link video to playlist
+                        self.repos.playlist_videos.add_video_to_playlist(
+                            playlist_id=playlist_record.id,
+                            media_file_id=media_file.id,
+                            position=position,
+                            video_title=media_file.title,
+                        )
+                        successful_downloads.append(str(video_path))
+                    except Exception as e:
+                        self.logger.error(f"Failed to process {video_path.name}: {e}")
+                        failed_downloads.append(str(video_path))
+                # If we used temp processing, move entire playlist directory to final destination
+                if is_nas and temp_dir:
+                    self.logger.info("Moving playlist directory to NAS destination...")
+                    # Move the entire playlist directory from temp to final destination
+                    if self._move_playlist_to_final_destination(
+                        processing_dir, playlist_dir
+                    ):
+                        self.logger.info(
+                            f"Successfully moved playlist directory to NAS: {playlist_dir}"
+                        )
+                        # Update job status to completed
+                        self.repos.jobs.update_status(
+                            job.id, ProcessingStatus.COMPLETED
+                        )
+                        # Clean up temp directory after successful move
+                        self._cleanup_temp_directory(temp_dir)
+                        self.logger.info(f"Cleaned up temp directory: {temp_dir}")
+                    else:
+                        self.logger.error("Failed to move playlist directory to NAS")
+                        self.repos.jobs.update_status(
+                            job.id,
+                            ProcessingStatus.FAILED,
+                            error_message="Failed to move playlist to NAS",
+                        )
+                        return ProcessingResult.error_result(
+                            message="Playlist downloaded but failed to move to NAS",
+                            errors=[
+                                "Failed to move playlist directory to final destination"
+                            ],
+                        )
+                else:
+                    # For local downloads, update job status
+                    self.repos.jobs.update_status(job.id, ProcessingStatus.COMPLETED)
+                return ProcessingResult.success_result(
+                    message=f"Playlist downloaded successfully: {len(successful_downloads)} videos",
+                    output_path=playlist_dir,
+                    metadata={
+                        "playlist_title": playlist_name,
+                        "playlist_id": playlist_id,
+                        "total_videos": len(downloaded_videos),
+                        "successful_downloads": len(successful_downloads),
+                        "failed_downloads": len(failed_downloads),
+                        "nas_processing": is_nas,
+                        "videos_downloaded": len(successful_downloads),
+                    },
+                )
+            else:
+                return ProcessingResult.error_result(
+                    message="Playlist download completed but no videos found",
+                    errors=["No video files found in download directory"],
+                )
+        except Exception as e:
+            self.logger.error(f"Playlist download failed: {e}")
+            return ProcessingResult.error_result(
+                message=f"Playlist download failed: {e}", errors=[str(e)]
+            )
+    def _get_playlist_info(self, url: str) -> Optional[Dict[str, Any]]:
+        """Get playlist information."""
+        try:
+            import yt_dlp
+            ydl_opts = {
+                "quiet": True,
+                "no_warnings": True,
+                "extract_flat": True,
+            }
+            with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+                info = ydl.extract_info(url, download=False)
+                return info
+        except Exception as e:
+            self.logger.error(f"Failed to get playlist info: {e}")
+            return None
+    def _get_cookies_from_browser(self) -> Optional[tuple]:
+        """Try to get cookies from common browsers automatically.
+        Returns a tuple of browsers to try in order. yt-dlp will try each browser
+        until one works, or continue without cookies if none are available.
+        Note: On macOS, Chrome is more reliable than Safari for cookie extraction.
+        """
+        # Try browsers in order of preference
+        # On macOS, Chrome is more reliable than Safari (Safari cookies are harder to access)
+        # yt-dlp will try each browser until one works
+        import platform
+        system = platform.system().lower()
+        if system == "darwin":  # macOS - prioritize Chrome over Safari
+            browsers = ("chrome", "safari", "firefox", "edge")
+        else:  # Linux, Windows, etc.
+            browsers = ("chrome", "firefox", "safari", "edge")
+        return browsers
+    def _build_playlist_ydl_opts(self, output_dir: Path, **kwargs) -> Dict:
+        """Build yt-dlp options for playlist download."""
+        # Output template for playlist
+        output_template = str(output_dir / "%(title)s [%(id)s].%(ext)s")
+        ydl_opts = {
+            "outtmpl": output_template,
+            "format": self._get_format_selector(
+                kwargs.get("quality", self.config.video.quality),
+                kwargs.get("format", self.config.video.default_format),
+            ),
+            "writeinfojson": False,  # Don't write info files
+            "writesubtitles": False,  # We handle subtitles separately
+            "writeautomaticsub": False,
+            "ignoreerrors": True,  # Continue on individual video errors
+            "no_warnings": not self.verbose,
+            "quiet": not self.verbose,
+            "extract_flat": False,  # We want to download, not just extract info
+        }
+        max_videos = kwargs.get("max_videos")
+        if isinstance(max_videos, int) and max_videos > 0:
+            ydl_opts["playlistend"] = max_videos
+        # Automatically try to use cookies from browser for age-restricted content
+        cookies_browser = self._get_cookies_from_browser()
+        if cookies_browser:
+            ydl_opts["cookies_from_browser"] = cookies_browser
+        # Additional options
+        if self.verbose:
+            ydl_opts["verbose"] = True
+        return ydl_opts
+    def _get_format_selector(self, quality: str, format: str) -> str:
+        """Get format selector for yt-dlp."""
+        if quality == "best":
+            return f"best[ext={format}]/best"
+        elif quality == "worst":
+            return f"worst[ext={format}]/worst"
+        else:
+            return f"best[height<={quality}][ext={format}]/best"
+    def _find_playlist_videos(self, directory: Path) -> List[Path]:
+        """Find downloaded video files in playlist directory."""
+        video_extensions = self.config.video_extensions
+        video_files = []
+        for ext in video_extensions:
+            video_files.extend(directory.rglob(f"*{ext}"))
+        return video_files
+    def _extract_video_id_from_path(self, video_path: Path) -> str:
+        """Extract video ID from file path."""
+        # Look for [video_id] pattern in filename
+        import re
+        match = re.search(r"\[([a-zA-Z0-9_-]{11})\]", video_path.name)
+        if match:
+            return match.group(1)
+        return "unknown"
+    def _sanitize_filename(self, filename: str) -> str:
+        """Sanitize filename for filesystem."""
+        import re
+        # Remove or replace invalid characters
+        filename = re.sub(r'[<>:"/\\|?*]', "_", filename)
+        # Limit length
+        max_length = self.config.max_filename_length
+        if len(filename) > max_length:
+            filename = filename[:max_length]
+        return filename
+    def _is_nas_path(self, path: Union[str, Path]) -> bool:
+        """Check if path is on NAS."""
+        path_str = str(path)
+        return any(
+            nas_indicator in path_str.lower()
+            for nas_indicator in [
+                "/volumes/",
+                "/mnt/",
+                "nas",
+                "network",
+                "smb://",
+                "nfs://",
+            ]
+        )
+    def _get_temp_processing_dir(self, job_id: int) -> Path:
+        """Get temporary processing directory for job."""
+        temp_dir = self.config.video.temp_dir / str(job_id)
+        temp_dir.mkdir(parents=True, exist_ok=True)
+        return temp_dir
+    def _move_playlist_to_final_destination(
+        self, source_dir: Path, dest_dir: Path
+    ) -> bool:
+        """Move playlist directory to final destination."""
+        try:
+            dest_dir.parent.mkdir(parents=True, exist_ok=True)
+            shutil.move(str(source_dir), str(dest_dir))
+            return True
+        except Exception as e:
+            self.logger.error(f"Failed to move playlist directory: {e}")
+            return False
+    def _cleanup_temp_directory(self, temp_dir: Path):
+        """Clean up temporary directory."""
+        try:
+            shutil.rmtree(temp_dir)
+        except Exception as e:
+            self.logger.warning(f"Failed to clean up temp directory {temp_dir}: {e}")