PyPI - spatelier - Versions diffs - 0.3.0__py3-none-any.whl - Mend

spatelier 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (59) hide show

analytics/__init__.py +1 -0
analytics/reporter.py +497 -0
cli/__init__.py +1 -0
cli/app.py +147 -0
cli/audio.py +129 -0
cli/cli_analytics.py +320 -0
cli/cli_utils.py +282 -0
cli/error_handlers.py +122 -0
cli/files.py +299 -0
cli/update.py +325 -0
cli/video.py +823 -0
cli/worker.py +615 -0
core/__init__.py +1 -0
core/analytics_dashboard.py +368 -0
core/base.py +303 -0
core/base_service.py +69 -0
core/config.py +345 -0
core/database_service.py +116 -0
core/decorators.py +263 -0
core/error_handler.py +210 -0
core/file_tracker.py +254 -0
core/interactive_cli.py +366 -0
core/interfaces.py +166 -0
core/job_queue.py +437 -0
core/logger.py +79 -0
core/package_updater.py +469 -0
core/progress.py +228 -0
core/service_factory.py +295 -0
core/streaming.py +299 -0
core/worker.py +765 -0
database/__init__.py +1 -0
database/connection.py +265 -0
database/metadata.py +516 -0
database/models.py +288 -0
database/repository.py +592 -0
database/transcription_storage.py +219 -0
modules/__init__.py +1 -0
modules/audio/__init__.py +5 -0
modules/audio/converter.py +197 -0
modules/video/__init__.py +16 -0
modules/video/converter.py +191 -0
modules/video/fallback_extractor.py +334 -0
modules/video/services/__init__.py +18 -0
modules/video/services/audio_extraction_service.py +274 -0
modules/video/services/download_service.py +852 -0
modules/video/services/metadata_service.py +190 -0
modules/video/services/playlist_service.py +445 -0
modules/video/services/transcription_service.py +491 -0
modules/video/transcription_service.py +385 -0
modules/video/youtube_api.py +397 -0
spatelier/__init__.py +33 -0
spatelier-0.3.0.dist-info/METADATA +260 -0
spatelier-0.3.0.dist-info/RECORD +59 -0
spatelier-0.3.0.dist-info/WHEEL +5 -0
spatelier-0.3.0.dist-info/entry_points.txt +2 -0
spatelier-0.3.0.dist-info/licenses/LICENSE +21 -0
spatelier-0.3.0.dist-info/top_level.txt +7 -0
utils/__init__.py +1 -0
utils/helpers.py +250 -0

modules/video/fallback_extractor.py ADDED Viewed

@@ -0,0 +1,334 @@
+"""
+Fallback URL extractor for video downloads.
+This module provides fallback functionality when yt-dlp fails to download a video.
+It attempts to extract video URLs directly from the webpage source code.
+"""
+import re
+import time
+from pathlib import Path
+from typing import Dict, List, Optional, Tuple
+from urllib.parse import urljoin, urlparse
+try:
+    import requests
+    from bs4 import BeautifulSoup
+except ImportError:  # pragma: no cover - optional dependency
+    requests = None
+    BeautifulSoup = None
+from loguru import logger
+from core.config import Config
+class FallbackExtractor:
+    """
+    Fallback video URL extractor.
+    Extracts video URLs from webpage source when yt-dlp fails.
+    Includes safety limits to prevent runaway downloads.
+    """
+    def __init__(self, config: Config):
+        """
+        Initialize the fallback extractor.
+        Args:
+            config: Main configuration instance
+        """
+        if requests is None or BeautifulSoup is None:
+            raise RuntimeError(
+                "Fallback extraction requires 'web' dependencies. Install the 'web' extra to enable it."
+            )
+        self.config = config
+        # Use flattened config properties
+        self.max_files = config.fallback_max_files
+        self.max_total_size_mb = (
+            1000 * 1024 * 1024
+        )  # 1GB default (fallback_max_total_size_mb removed, use constant)
+        self.timeout = config.fallback_timeout_seconds
+        self.session = requests.Session()
+        self.session.headers.update(
+            {
+                "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36"
+            }
+        )
+        # Video file extensions to look for
+        self.video_extensions = set(config.video_extensions)
+        # Common video URL patterns
+        extensions_pattern = "|".join(config.video_extensions).replace(".", "")
+        self.video_patterns = [
+            f'https?://[^"\\s]+\\.(?:{extensions_pattern})(?:\\?[^"\\s]*)?',
+            f'https?://[^"\\s]*video[^"\\s]*\\.(?:{extensions_pattern})(?:\\?[^"\\s]*)?',
+            f'https?://[^"\\s]*stream[^"\\s]*\\.(?:{extensions_pattern})(?:\\?[^"\\s]*)?',
+        ]
+    def extract_video_urls(self, url: str) -> List[str]:
+        """
+        Extract potential video URLs from a webpage.
+        Args:
+            url: The webpage URL to analyze
+        Returns:
+            List of potential video URLs
+        """
+        try:
+            logger.info(f"Extracting video URLs from: {url}")
+            # Get the webpage content
+            response = self.session.get(url, timeout=self.timeout)
+            response.raise_for_status()
+            # Parse HTML
+            soup = BeautifulSoup(response.content, "html.parser")
+            # Find video URLs using multiple methods
+            video_urls = set()
+            # Method 1: Look for direct video links in href/src attributes
+            for tag in soup.find_all(["a", "source", "video"]):
+                for attr in ["href", "src", "data-src"]:
+                    if tag.get(attr):
+                        full_url = urljoin(url, tag[attr])
+                        if self._is_video_url(full_url):
+                            video_urls.add(full_url)
+            # Method 2: Search for video URLs in script tags and page content
+            page_text = response.text
+            for pattern in self.video_patterns:
+                matches = re.findall(pattern, page_text, re.IGNORECASE)
+                for match in matches:
+                    full_url = urljoin(url, match)
+                    if self._is_video_url(full_url):
+                        video_urls.add(full_url)
+            # Method 3: Look for common video hosting patterns
+            video_urls.update(self._extract_hosting_urls(soup, url))
+            logger.info(f"Found {len(video_urls)} potential video URLs")
+            return list(video_urls)
+        except Exception as e:
+            logger.error(f"Failed to extract video URLs from {url}: {e}")
+            return []
+    def _is_video_url(self, url: str) -> bool:
+        """Check if a URL points to a video file."""
+        try:
+            parsed = urlparse(url)
+            path = Path(parsed.path)
+            # Check file extension
+            if path.suffix.lower() in self.video_extensions:
+                return True
+            # Check for common video hosting domains
+            video_domains = {
+                "youtube.com",
+                "youtu.be",
+                "vimeo.com",
+                "dailymotion.com",
+                "twitch.tv",
+                "streamable.com",
+                "gfycat.com",
+            }
+            if any(domain in parsed.netloc.lower() for domain in video_domains):
+                return True
+            return False
+        except Exception:
+            return False
+    def _extract_hosting_urls(self, soup: BeautifulSoup, base_url: str) -> set:
+        """Extract URLs from common video hosting platforms."""
+        urls = set()
+        # Look for iframe sources (common for embedded videos)
+        for iframe in soup.find_all("iframe"):
+            src = iframe.get("src")
+            if src:
+                full_url = urljoin(base_url, src)
+                urls.add(full_url)
+        # Look for video elements
+        for video in soup.find_all("video"):
+            for source in video.find_all("source"):
+                src = source.get("src")
+                if src:
+                    full_url = urljoin(base_url, src)
+                    urls.add(full_url)
+        return urls
+    def download_video(
+        self, video_url: str, output_path: Path
+    ) -> Tuple[bool, str, int]:
+        """
+        Download a video from URL with safety checks.
+        Args:
+            video_url: URL of the video to download
+            output_path: Path to save the video
+        Returns:
+            Tuple of (success, message, file_size_bytes)
+        """
+        try:
+            logger.info(f"Attempting to download: {video_url}")
+            # Get file info first (HEAD request)
+            head_response = self.session.head(video_url, timeout=self.timeout)
+            head_response.raise_for_status()
+            # Check content type
+            content_type = head_response.headers.get("content-type", "").lower()
+            if not any(
+                video_type in content_type
+                for video_type in ["video/", "application/octet-stream"]
+            ):
+                return (
+                    False,
+                    f"URL does not appear to be a video (content-type: {content_type})",
+                    0,
+                )
+            # Check file size
+            content_length = head_response.headers.get("content-length")
+            if content_length:
+                file_size = int(content_length)
+                if file_size > self.max_total_size_mb:
+                    return (
+                        False,
+                        f"File too large: {file_size / (1024*1024):.1f}MB",
+                        file_size,
+                    )
+            else:
+                # If we can't determine size, we'll check during download
+                file_size = 0
+            # Download the file
+            response = self.session.get(video_url, stream=True, timeout=self.timeout)
+            response.raise_for_status()
+            # Ensure output directory exists
+            output_path.parent.mkdir(parents=True, exist_ok=True)
+            # Download with size checking
+            downloaded_size = 0
+            with open(output_path, "wb") as f:
+                for chunk in response.iter_content(chunk_size=8192):
+                    if chunk:
+                        f.write(chunk)
+                        downloaded_size += len(chunk)
+                        # Check size limit during download
+                        if downloaded_size > self.max_total_size_mb:
+                            output_path.unlink()  # Remove partial file
+                            return (
+                                False,
+                                f"Download exceeded size limit: {downloaded_size / (1024*1024):.1f}MB",
+                                downloaded_size,
+                            )
+            logger.info(
+                f"Successfully downloaded: {output_path} ({downloaded_size / (1024*1024):.1f}MB)"
+            )
+            return True, "Download successful", downloaded_size
+        except Exception as e:
+            logger.error(f"Failed to download {video_url}: {e}")
+            return False, str(e), 0
+    def fallback_download(self, url: str, output_dir: Path) -> List[Dict]:
+        """
+        Attempt fallback download when yt-dlp fails.
+        Args:
+            url: Original URL that failed
+            output_dir: Directory to save downloaded files
+        Returns:
+            List of download results with success status and file paths
+        """
+        logger.info(f"Starting fallback download for: {url}")
+        # Extract potential video URLs
+        video_urls = self.extract_video_urls(url)
+        if not video_urls:
+            logger.warning("No video URLs found in fallback extraction")
+            return [
+                {"success": False, "message": "No video URLs found", "file_path": None}
+            ]
+        # Limit number of URLs to try
+        video_urls = video_urls[: self.max_files]
+        logger.info(f"Attempting to download {len(video_urls)} potential videos")
+        results = []
+        total_downloaded = 0
+        for i, video_url in enumerate(video_urls):
+            if total_downloaded >= self.max_total_size_mb:
+                logger.warning(
+                    f"Reached total size limit: {total_downloaded / (1024*1024):.1f}MB"
+                )
+                break
+            # Create output filename
+            parsed_url = urlparse(video_url)
+            filename = Path(parsed_url.path).name
+            if not filename or "." not in filename:
+                filename = f"video_{i+1}.mp4"
+            output_path = output_dir / filename
+            # Skip if file already exists
+            if output_path.exists():
+                logger.info(f"File already exists, skipping: {output_path}")
+                results.append(
+                    {
+                        "success": True,
+                        "message": "File already exists",
+                        "file_path": output_path,
+                        "size": output_path.stat().st_size,
+                    }
+                )
+                continue
+            # Attempt download
+            success, message, file_size = self.download_video(video_url, output_path)
+            results.append(
+                {
+                    "success": success,
+                    "message": message,
+                    "file_path": output_path if success else None,
+                    "size": file_size,
+                }
+            )
+            if success:
+                total_downloaded += file_size
+                logger.info(f"Downloaded {i+1}/{len(video_urls)}: {output_path}")
+            else:
+                logger.warning(f"Failed to download {i+1}/{len(video_urls)}: {message}")
+            # Small delay between downloads
+            time.sleep(1)
+        successful_downloads = [r for r in results if r["success"]]
+        logger.info(
+            f"Fallback download completed: {len(successful_downloads)}/{len(results)} successful"
+        )
+        return results

modules/video/services/__init__.py ADDED Viewed

@@ -0,0 +1,18 @@
+"""
+Video services module.
+This module provides focused services for video processing,
+separated by concern for better maintainability.
+"""
+from .download_service import VideoDownloadService
+from .metadata_service import MetadataService
+from .playlist_service import PlaylistService
+from .transcription_service import TranscriptionService
+__all__ = [
+    "VideoDownloadService",
+    "MetadataService",
+    "TranscriptionService",
+    "PlaylistService",
+]

modules/video/services/audio_extraction_service.py ADDED Viewed

@@ -0,0 +1,274 @@
+"""
+Audio extraction service for YouTube videos.
+This service provides clean, object-oriented audio extraction from YouTube videos
+with proper separation of concerns and error handling.
+"""
+import shutil
+import tempfile
+from pathlib import Path
+from typing import Any, Dict, Optional
+from core.base import ProcessingResult
+from core.base_service import BaseService
+from core.progress import track_progress
+from modules.audio.converter import AudioConverter
+from modules.video.services.download_service import VideoDownloadService
+class AudioExtractionService(BaseService):
+    """
+    Service for extracting audio from YouTube videos.
+    Provides clean, object-oriented audio extraction with proper error handling,
+    progress tracking, and resource management.
+    """
+    def __init__(self, config, verbose: bool = False, db_service=None):
+        """
+        Initialize audio extraction service.
+        Args:
+            config: Configuration instance
+            verbose: Enable verbose logging
+            db_service: Optional database service instance
+        """
+        super().__init__(config, verbose, db_service)
+        self.logger = self.logger.bind(service="AudioExtractionService")
+        # Initialize dependencies
+        self._audio_converter: Optional[AudioConverter] = None
+        self._download_service: Optional[VideoDownloadService] = None
+    @property
+    def audio_converter(self) -> AudioConverter:
+        """Get audio converter service (lazy initialization)."""
+        if self._audio_converter is None:
+            self._audio_converter = AudioConverter(
+                self.config, verbose=self.verbose, db_service=self.db_factory
+            )
+        return self._audio_converter
+    @property
+    def download_service(self) -> VideoDownloadService:
+        """Get video download service (lazy initialization)."""
+        if self._download_service is None:
+            self._download_service = VideoDownloadService(
+                self.config, verbose=self.verbose, db_service=self.db_factory
+            )
+        return self._download_service
+    def extract_audio_from_url(
+        self, url: str, output_dir: Path, format: str = "mp3", bitrate: int = 320
+    ) -> ProcessingResult:
+        """
+        Extract audio from YouTube URL.
+        Args:
+            url: YouTube video URL
+            output_dir: Output directory for audio file
+            format: Audio format (mp3, wav, flac, aac, ogg, m4a)
+            bitrate: Audio bitrate in kbps
+        Returns:
+            ProcessingResult with extraction details
+        """
+        self.logger.info(f"Starting audio extraction from URL: {url}")
+        # Validate inputs
+        validation_result = self._validate_inputs(url, output_dir, format, bitrate)
+        if not validation_result.is_successful():
+            return validation_result
+        # Create output directory
+        output_dir.mkdir(parents=True, exist_ok=True)
+        temp_dir = None
+        try:
+            with track_progress(
+                "Extracting audio from video...", verbose=self.verbose
+            ) as progress:
+                # Step 1: Download video to temporary location
+                temp_dir = self._create_temp_directory()
+                download_result = self._download_video(url, temp_dir)
+                if not download_result.is_successful():
+                    return ProcessingResult.fail(
+                        f"Failed to download video: {download_result.message}",
+                        errors=download_result.errors,
+                    )
+                progress.update(0.3, "Video downloaded, extracting audio...")
+                # Step 2: Find downloaded file
+                input_file = self._find_downloaded_file(temp_dir)
+                if not input_file:
+                    return ProcessingResult.fail("No audio file found after download")
+                progress.update(0.2, "Converting audio format...")
+                # Step 3: Convert to desired format
+                output_file = self._generate_output_path(input_file, output_dir, format)
+                conversion_result = self._convert_audio(
+                    input_file, output_file, format, bitrate
+                )
+                if not conversion_result.is_successful():
+                    return ProcessingResult.fail(
+                        f"Audio conversion failed: {conversion_result.message}",
+                        errors=conversion_result.errors,
+                    )
+                progress.update(0.5, "Audio extraction completed!")
+                # Step 4: Prepare success result
+                return self._create_success_result(output_file, format, bitrate)
+        except Exception as e:
+            self.logger.error(f"Audio extraction failed: {e}")
+            return ProcessingResult.fail(f"Audio extraction failed: {e}")
+        finally:
+            # Clean up temporary directory
+            if temp_dir and temp_dir.exists():
+                self._cleanup_temp_directory(temp_dir)
+    def _validate_inputs(
+        self, url: str, output_dir: Path, format: str, bitrate: int
+    ) -> ProcessingResult:
+        """Validate input parameters."""
+        errors = []
+        if not url or not url.strip():
+            errors.append("URL cannot be empty")
+        if not url.startswith(("http://", "https://")):
+            errors.append("URL must start with http:// or https://")
+        if format.lower() not in ["mp3", "wav", "flac", "aac", "ogg", "m4a"]:
+            errors.append(f"Unsupported format: {format}")
+        if bitrate < 64 or bitrate > 512:
+            errors.append("Bitrate must be between 64 and 512 kbps")
+        if errors:
+            return ProcessingResult.fail("Input validation failed", errors=errors)
+        return ProcessingResult.success("Input validation passed")
+    def _create_temp_directory(self) -> Path:
+        """Create temporary directory for processing."""
+        temp_dir = Path(tempfile.mkdtemp(prefix="spatelier_audio_"))
+        self.logger.debug(f"Created temp directory: {temp_dir}")
+        return temp_dir
+    def _download_video(self, url: str, temp_dir: Path) -> ProcessingResult:
+        """Download video to temporary directory."""
+        try:
+            self.logger.info(f"Downloading video to: {temp_dir}")
+            result = self.download_service.download_video(
+                url=url,
+                output_path=temp_dir,
+                quality="bestaudio",  # Get best audio quality
+                format="bestaudio/best",  # Prefer audio-only formats
+            )
+            return result
+        except Exception as e:
+            self.logger.error(f"Video download failed: {e}")
+            return ProcessingResult.fail(f"Video download failed: {e}")
+    def _find_downloaded_file(self, temp_dir: Path) -> Optional[Path]:
+        """Find the downloaded audio/video file."""
+        # Look for common audio/video extensions
+        extensions = [".mp4", ".webm", ".mkv", ".avi", ".mov", ".m4a", ".mp3", ".wav"]
+        for ext in extensions:
+            files = list(temp_dir.glob(f"*{ext}"))
+            if files:
+                file_path = files[0]
+                self.logger.info(f"Found downloaded file: {file_path}")
+                return file_path
+        self.logger.warning("No audio/video file found after download")
+        return None
+    def _generate_output_path(
+        self, input_file: Path, output_dir: Path, format: str
+    ) -> Path:
+        """Generate output file path."""
+        output_filename = f"{input_file.stem}.{format}"
+        return output_dir / output_filename
+    def _convert_audio(
+        self, input_file: Path, output_file: Path, format: str, bitrate: int
+    ) -> ProcessingResult:
+        """Convert audio to desired format."""
+        try:
+            self.logger.info(f"Converting audio: {input_file} -> {output_file}")
+            result = self.audio_converter.convert(
+                input_path=input_file,
+                output_path=output_file,
+                bitrate=bitrate,
+                format=format,
+            )
+            return result
+        except Exception as e:
+            self.logger.error(f"Audio conversion failed: {e}")
+            return ProcessingResult.fail(f"Audio conversion failed: {e}")
+    def _create_success_result(
+        self, output_file: Path, format: str, bitrate: int
+    ) -> ProcessingResult:
+        """Create success result with metadata."""
+        try:
+            file_size = output_file.stat().st_size
+            file_size_mb = file_size / (1024 * 1024)
+            metadata = {
+                "file_size": file_size,
+                "file_size_mb": file_size_mb,
+                "format": format,
+                "bitrate": bitrate,
+                "output_path": str(output_file),
+            }
+            return ProcessingResult.success(
+                message=f"Audio extracted successfully: {output_file.name}",
+                output_path=output_file,
+                metadata=metadata,
+            )
+        except Exception as e:
+            self.logger.error(f"Failed to create success result: {e}")
+            return ProcessingResult.fail(f"Failed to create success result: {e}")
+    def _cleanup_temp_directory(self, temp_dir: Path):
+        """Clean up temporary directory."""
+        try:
+            if temp_dir.exists():
+                shutil.rmtree(temp_dir)
+                self.logger.debug(f"Cleaned up temp directory: {temp_dir}")
+        except Exception as e:
+            self.logger.warning(f"Failed to cleanup temp directory {temp_dir}: {e}")
+    def get_supported_formats(self) -> list[str]:
+        """Get list of supported audio formats."""
+        return ["mp3", "wav", "flac", "aac", "ogg", "m4a"]
+    def get_supported_bitrates(self) -> Dict[str, Any]:
+        """Get supported bitrate ranges by format."""
+        return {
+            "mp3": {"min": 64, "max": 320, "default": 320},
+            "wav": {"min": 128, "max": 1536, "default": 1411},
+            "flac": {"min": 128, "max": 1536, "default": 1411},
+            "aac": {"min": 64, "max": 320, "default": 256},
+            "ogg": {"min": 64, "max": 320, "default": 192},
+            "m4a": {"min": 64, "max": 320, "default": 256},
+        }