PyPI - spatelier - Versions diffs - 0.3.0__py3-none-any.whl - Mend

spatelier 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (59) hide show

analytics/__init__.py +1 -0
analytics/reporter.py +497 -0
cli/__init__.py +1 -0
cli/app.py +147 -0
cli/audio.py +129 -0
cli/cli_analytics.py +320 -0
cli/cli_utils.py +282 -0
cli/error_handlers.py +122 -0
cli/files.py +299 -0
cli/update.py +325 -0
cli/video.py +823 -0
cli/worker.py +615 -0
core/__init__.py +1 -0
core/analytics_dashboard.py +368 -0
core/base.py +303 -0
core/base_service.py +69 -0
core/config.py +345 -0
core/database_service.py +116 -0
core/decorators.py +263 -0
core/error_handler.py +210 -0
core/file_tracker.py +254 -0
core/interactive_cli.py +366 -0
core/interfaces.py +166 -0
core/job_queue.py +437 -0
core/logger.py +79 -0
core/package_updater.py +469 -0
core/progress.py +228 -0
core/service_factory.py +295 -0
core/streaming.py +299 -0
core/worker.py +765 -0
database/__init__.py +1 -0
database/connection.py +265 -0
database/metadata.py +516 -0
database/models.py +288 -0
database/repository.py +592 -0
database/transcription_storage.py +219 -0
modules/__init__.py +1 -0
modules/audio/__init__.py +5 -0
modules/audio/converter.py +197 -0
modules/video/__init__.py +16 -0
modules/video/converter.py +191 -0
modules/video/fallback_extractor.py +334 -0
modules/video/services/__init__.py +18 -0
modules/video/services/audio_extraction_service.py +274 -0
modules/video/services/download_service.py +852 -0
modules/video/services/metadata_service.py +190 -0
modules/video/services/playlist_service.py +445 -0
modules/video/services/transcription_service.py +491 -0
modules/video/transcription_service.py +385 -0
modules/video/youtube_api.py +397 -0
spatelier/__init__.py +33 -0
spatelier-0.3.0.dist-info/METADATA +260 -0
spatelier-0.3.0.dist-info/RECORD +59 -0
spatelier-0.3.0.dist-info/WHEEL +5 -0
spatelier-0.3.0.dist-info/entry_points.txt +2 -0
spatelier-0.3.0.dist-info/licenses/LICENSE +21 -0
spatelier-0.3.0.dist-info/top_level.txt +7 -0
utils/__init__.py +1 -0
utils/helpers.py +250 -0

database/transcription_storage.py ADDED Viewed

@@ -0,0 +1,219 @@
+"""
+SQLite-backed transcription storage (JSON + FTS5).
+Stores full transcription segments as JSON, with a flattened full_text column
+indexed by SQLite FTS5 for search.
+"""
+import json
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Union
+from sqlalchemy import text
+from sqlalchemy.orm import Session
+from database.models import Transcription
+class SQLiteTranscriptionStorage:
+    """SQLite transcription storage adapter."""
+    def __init__(self, session: Session):
+        """
+        Initialize SQLite transcription storage.
+        Args:
+            session: SQLAlchemy session bound to SQLite
+        """
+        self.session = session
+    def store_transcription(
+        self, video_id: Union[str, int], transcription_data: Dict[str, Any]
+    ) -> int:
+        """
+        Store transcription data in SQLite.
+        Args:
+            video_id: ID of the video file (converted to int)
+            transcription_data: Transcription results with segments
+        Returns:
+            SQLite record ID
+        """
+        video_id_int = int(video_id) if isinstance(video_id, (str, int)) else video_id
+        segments = transcription_data.get("segments", [])
+        full_text = " ".join([seg.get("text", "").strip() for seg in segments]).strip()
+        record = Transcription(
+            media_file_id=video_id_int,
+            language=transcription_data.get("language"),
+            duration=transcription_data.get("duration"),
+            processing_time=transcription_data.get("processing_time"),
+            model_used=transcription_data.get("model_used"),
+            segments_json=segments,
+            full_text=full_text,
+        )
+        self.session.add(record)
+        self.session.commit()
+        self.session.refresh(record)
+        return record.id
+    def get_transcription(self, video_id: Union[str, int]) -> Optional[Dict[str, Any]]:
+        """
+        Get transcription for a video.
+        Args:
+            video_id: Video ID
+        Returns:
+            Transcription dictionary or None
+        """
+        video_id_int = int(video_id) if isinstance(video_id, (str, int)) else video_id
+        record = (
+            self.session.query(Transcription)
+            .filter(Transcription.media_file_id == video_id_int)
+            .order_by(Transcription.id.desc())
+            .first()
+        )
+        if not record:
+            return None
+        return {
+            "id": record.id,
+            "video_id": record.media_file_id,
+            "language": record.language,
+            "duration": record.duration,
+            "processing_time": record.processing_time,
+            "model_used": record.model_used,
+            "segments": record.segments_json,
+            "full_text": record.full_text,
+            "created_at": record.created_at,
+        }
+    def search_transcriptions(
+        self, query: str, limit: int = 10
+    ) -> List[Dict[str, Any]]:
+        """
+        Search transcriptions by text content using SQLite FTS5.
+        Args:
+            query: Search query
+            limit: Maximum number of results
+        Returns:
+            List of matching transcription dictionaries
+        """
+        sql = text(
+            """
+            SELECT t.id, t.media_file_id, t.language, t.duration, t.processing_time,
+                   t.model_used, t.segments_json, t.full_text, t.created_at
+            FROM transcriptions t
+            JOIN transcriptions_fts fts ON fts.rowid = t.id
+            WHERE fts.full_text MATCH :query
+            ORDER BY bm25(transcriptions_fts)
+            LIMIT :limit
+            """
+        )
+        rows = self.session.execute(sql, {"query": query, "limit": limit}).fetchall()
+        results: List[Dict[str, Any]] = []
+        for row in rows:
+            segments = row.segments_json
+            if isinstance(segments, str):
+                try:
+                    segments = json.loads(segments)
+                except json.JSONDecodeError:
+                    segments = []
+            results.append(
+                {
+                    "id": row.id,
+                    "video_id": row.media_file_id,
+                    "language": row.language,
+                    "duration": row.duration,
+                    "processing_time": row.processing_time,
+                    "model_used": row.model_used,
+                    "segments": segments,
+                    "full_text": row.full_text,
+                    "created_at": row.created_at,
+                }
+            )
+        return results
+    def generate_srt_subtitle(
+        self, transcription_data: Dict[str, Any], output_path: Path
+    ) -> bool:
+        """
+        Generate SRT subtitle file from transcription data.
+        Args:
+            transcription_data: Transcription data with segments
+            output_path: Path to save SRT file
+        Returns:
+            True if successful, False otherwise
+        """
+        try:
+            segments = transcription_data.get("segments", [])
+            if not segments:
+                return False
+            with open(output_path, "w", encoding="utf-8") as f:
+                for i, segment in enumerate(segments, 1):
+                    start_time = self._format_srt_time(segment.get("start", 0.0))
+                    end_time = self._format_srt_time(segment.get("end", 0.0))
+                    text_value = segment.get("text", "").strip()
+                    f.write(f"{i}\n")
+                    f.write(f"{start_time} --> {end_time}\n")
+                    f.write(f"{text_value}\n\n")
+            return True
+        except Exception:
+            return False
+    def generate_vtt_subtitle(
+        self, transcription_data: Dict[str, Any], output_path: Path
+    ) -> bool:
+        """
+        Generate VTT subtitle file from transcription data.
+        Args:
+            transcription_data: Transcription data with segments
+            output_path: Path to save VTT file
+        Returns:
+            True if successful, False otherwise
+        """
+        try:
+            segments = transcription_data.get("segments", [])
+            if not segments:
+                return False
+            with open(output_path, "w", encoding="utf-8") as f:
+                f.write("WEBVTT\n\n")
+                for segment in segments:
+                    start_time = self._format_vtt_time(segment.get("start", 0.0))
+                    end_time = self._format_vtt_time(segment.get("end", 0.0))
+                    text_value = segment.get("text", "").strip()
+                    f.write(f"{start_time} --> {end_time}\n")
+                    f.write(f"{text_value}\n\n")
+            return True
+        except Exception:
+            return False
+    def _format_srt_time(self, seconds: float) -> str:
+        """Format time for SRT format (HH:MM:SS,mmm)."""
+        hours = int(seconds // 3600)
+        minutes = int((seconds % 3600) // 60)
+        secs = int(seconds % 60)
+        millisecs = int((seconds % 1) * 1000)
+        return f"{hours:02d}:{minutes:02d}:{secs:02d},{millisecs:03d}"  # noqa: E231
+    def _format_vtt_time(self, seconds: float) -> str:
+        """Format time for VTT format (HH:MM:SS.mmm)."""
+        hours = int(seconds // 3600)
+        minutes = int((seconds % 3600) // 60)
+        secs = int(seconds % 60)
+        millisecs = int((seconds % 1) * 1000)
+        return f"{hours:02d}:{minutes:02d}:{secs:02d}.{millisecs:03d}"  # noqa: E231

modules/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """Processing modules for different media types."""

modules/audio/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+"""Audio processing modules."""
+from .converter import AudioConverter
+__all__ = ['AudioConverter']

modules/audio/converter.py ADDED Viewed

@@ -0,0 +1,197 @@
+"""
+Audio conversion service.
+This module provides audio conversion functionality using FFmpeg.
+"""
+import tempfile
+from pathlib import Path
+from typing import Any, Dict, Optional, Union
+import ffmpeg
+from core.base import ProcessingResult
+from core.base_service import BaseService
+from core.config import Config
+from utils.helpers import safe_filename
+class AudioConverter(BaseService):
+    """
+    Audio conversion service using FFmpeg.
+    Handles audio format conversion, quality adjustment, and basic processing.
+    """
+    def __init__(self, config: Config, verbose: bool = False, db_service=None):
+        """Initialize the audio converter."""
+        super().__init__(config, verbose, db_service)
+        # Supported formats
+        self.supported_formats = {
+            "mp3": {"codec": "libmp3lame", "ext": ".mp3"},
+            "wav": {"codec": "pcm_s16le", "ext": ".wav"},
+            "flac": {"codec": "flac", "ext": ".flac"},
+            "aac": {"codec": "aac", "ext": ".aac"},
+            "ogg": {"codec": "libvorbis", "ext": ".ogg"},
+            "m4a": {"codec": "aac", "ext": ".m4a"},
+        }
+    def convert(
+        self,
+        input_file: Union[str, Path],
+        output_file: Union[str, Path],
+        format: str = "mp3",
+        bitrate: int = 320,
+        **kwargs,
+    ) -> ProcessingResult:
+        """
+        Convert audio file to different format.
+        Args:
+            input_file: Path to input audio file
+            output_file: Path to output audio file
+            format: Output format (mp3, wav, flac, aac, ogg, m4a)
+            bitrate: Audio bitrate in kbps
+            **kwargs: Additional conversion options
+        Returns:
+            ProcessingResult with conversion details
+        """
+        input_path = Path(input_file)
+        output_path = Path(output_file)
+        # Validate input file
+        if not input_path.exists():
+            raise FileNotFoundError(f"Input file not found: {input_path}")
+        if not input_path.is_file():
+            raise ValueError(f"Input path is not a file: {input_path}")
+        # Validate format
+        if format.lower() not in self.supported_formats:
+            raise ValueError(
+                f"Unsupported format: {format}. Supported: {list(self.supported_formats.keys())}"
+            )
+        # Ensure output directory exists
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+        try:
+            self.logger.info(f"Converting {input_path} to {output_path}")
+            # Build FFmpeg stream
+            stream = ffmpeg.input(str(input_path))
+            # Apply audio codec and bitrate
+            format_info = self.supported_formats[format.lower()]
+            stream = ffmpeg.output(
+                stream,
+                str(output_path),
+                acodec=format_info["codec"],
+                audio_bitrate=f"{bitrate}k",
+            )
+            # Add additional options
+            if "start_time" in kwargs:
+                stream = stream.overwrite_output()
+            if "duration" in kwargs:
+                stream = stream.overwrite_output()
+            if "sample_rate" in kwargs:
+                stream = stream.overwrite_output()
+            if "channels" in kwargs:
+                stream = stream.overwrite_output()
+            # Run conversion
+            ffmpeg.run(stream, overwrite_output=True, quiet=True)
+            # Verify output file was created
+            if not output_path.exists():
+                raise RuntimeError("Output file was not created")
+            # Get file info
+            input_size = input_path.stat().st_size
+            output_size = output_path.stat().st_size
+            self.logger.info(
+                f"Conversion successful: {input_size:,} -> {output_size:,} bytes"
+            )
+            return ProcessingResult(
+                success=True,
+                message=f"Converted {input_path.name} to {output_path.name}",
+                input_file=str(input_path),
+                output_file=str(output_path),
+                duration_seconds=0,  # Could be calculated from metadata
+                metadata={
+                    "input_size": input_size,
+                    "output_size": output_size,
+                    "format": format,
+                    "bitrate": bitrate,
+                    "compression_ratio": round(output_size / input_size, 2)
+                    if input_size > 0
+                    else 0,
+                },
+            )
+        except ffmpeg.Error as e:
+            # Clean up output file if it exists
+            if output_path.exists():
+                output_path.unlink()
+            raise RuntimeError(f"FFmpeg conversion failed: {e}")
+        except Exception as e:
+            # Clean up output file if it exists
+            if output_path.exists():
+                output_path.unlink()
+            raise RuntimeError(f"Audio conversion failed: {e}")
+    def get_audio_info(self, file_path: Union[str, Path]) -> Dict[str, Any]:
+        """
+        Get audio file information using FFprobe.
+        Args:
+            file_path: Path to audio file
+        Returns:
+            Dictionary with audio information
+        """
+        file_path = Path(file_path)
+        if not file_path.exists():
+            raise FileNotFoundError(f"File not found: {file_path}")
+        try:
+            # Use ffmpeg-python to probe the file
+            probe = ffmpeg.probe(str(file_path))
+            # Extract format info
+            format_info = probe.get("format", {})
+            # Extract audio stream info
+            audio_stream = None
+            for stream in probe.get("streams", []):
+                if stream.get("codec_type") == "audio":
+                    audio_stream = stream
+                    break
+            if not audio_stream:
+                raise ValueError("No audio stream found in file")
+            return {
+                "format": format_info.get("format_name", "unknown"),
+                "duration": float(format_info.get("duration", 0)),
+                "size": int(format_info.get("size", 0)),
+                "bitrate": int(format_info.get("bit_rate", 0)),
+                "codec": audio_stream.get("codec_name", "unknown"),
+                "sample_rate": int(audio_stream.get("sample_rate", 0)),
+                "channels": int(audio_stream.get("channels", 0)),
+                "channel_layout": audio_stream.get("channel_layout", "unknown"),
+            }
+        except ffmpeg.Error as e:
+            raise RuntimeError(f"FFprobe failed: {e}")
+        except Exception as e:
+            raise RuntimeError(f"Audio analysis failed: {e}")

modules/video/__init__.py ADDED Viewed

@@ -0,0 +1,16 @@
+"""Video processing modules."""
+# Import the new service modules
+from .converter import VideoConverter
+from .services.download_service import VideoDownloadService
+from .services.metadata_service import MetadataService
+from .services.playlist_service import PlaylistService
+from .services.transcription_service import TranscriptionService
+__all__ = [
+    "VideoDownloadService",
+    "PlaylistService",
+    "MetadataService",
+    "TranscriptionService",
+    "VideoConverter",
+]

modules/video/converter.py ADDED Viewed

@@ -0,0 +1,191 @@
+"""
+Video converter module.
+This module provides video format conversion functionality using FFmpeg.
+"""
+import subprocess
+from pathlib import Path
+from typing import Union
+from core.base import BaseConverter, ProcessingResult
+from core.config import Config
+from core.logger import get_logger
+class VideoConverter(BaseConverter):
+    """
+    Video converter using FFmpeg.
+    Supports various input and output formats.
+    """
+    def __init__(self, config: Config, verbose: bool = False):
+        """Initialize the video converter."""
+        super().__init__(config, verbose)
+        self.supported_input_formats = [
+            "mp4",
+            "avi",
+            "mkv",
+            "mov",
+            "wmv",
+            "flv",
+            "webm",
+            "m4v",
+            "3gp",
+        ]
+        self.supported_output_formats = [
+            "mp4",
+            "avi",
+            "mkv",
+            "mov",
+            "wmv",
+            "flv",
+            "webm",
+            "m4v",
+            "3gp",
+        ]
+        self.logger = get_logger("VideoConverter", verbose=verbose)
+    def convert(
+        self, input_path: Union[str, Path], output_path: Union[str, Path], **kwargs
+    ) -> ProcessingResult:
+        """
+        Convert video from one format to another.
+        Args:
+            input_path: Path to input file
+            output_path: Path to output file
+            **kwargs: Additional conversion options
+        Returns:
+            ProcessingResult with conversion details
+        """
+        try:
+            input_path = Path(input_path).expanduser().resolve()
+            output_path = Path(output_path).expanduser().resolve()
+            # Validate input
+            if not self.validate_input(input_path):
+                return ProcessingResult(
+                    success=False,
+                    message=f"Invalid input file: {input_path}",
+                    errors=[f"Input file not found or invalid: {input_path}"],
+                )
+            # Validate formats
+            if not self.is_supported_format(input_path, is_input=True):
+                return ProcessingResult(
+                    success=False,
+                    message=f"Unsupported input format: {input_path.suffix}",
+                    errors=[f"Unsupported input format: {input_path.suffix}"],
+                )
+            if not self.is_supported_format(output_path, is_input=False):
+                return ProcessingResult(
+                    success=False,
+                    message=f"Unsupported output format: {output_path.suffix}",
+                    errors=[f"Unsupported output format: {output_path.suffix}"],
+                )
+            # Ensure output directory exists
+            if not self.ensure_output_dir(output_path):
+                return ProcessingResult(
+                    success=False,
+                    message=f"Failed to create output directory: {output_path.parent}",
+                    errors=[f"Cannot create output directory: {output_path.parent}"],
+                )
+            # Build FFmpeg command
+            cmd = self._build_command(input_path, output_path, **kwargs)
+            self.logger.info(f"Converting video: {input_path} -> {output_path}")
+            self.logger.debug(f"Command: {' '.join(cmd)}")
+            # Execute conversion
+            result = subprocess.run(cmd, capture_output=True, text=True)
+            if result.returncode == 0 and output_path.exists():
+                return ProcessingResult(
+                    success=True,
+                    message=f"Video converted successfully: {output_path}",
+                    output_path=output_path,
+                    metadata={
+                        "input_file": str(input_path),
+                        "output_file": str(output_path),
+                        "input_size": input_path.stat().st_size,
+                        "output_size": output_path.stat().st_size,
+                        "command": " ".join(cmd),
+                    },
+                )
+            else:
+                return ProcessingResult(
+                    success=False,
+                    message=f"Conversion failed: {result.stderr}",
+                    errors=[result.stderr],
+                )
+        except Exception as e:
+            self.logger.error(f"Conversion failed: {e}")
+            return ProcessingResult(
+                success=False, message=f"Conversion failed: {str(e)}", errors=[str(e)]
+            )
+    def _build_command(
+        self,
+        input_path: Path,
+        output_path: Path,
+        quality: str = "medium",
+        codec: str = "auto",
+        **kwargs,
+    ) -> list:
+        """
+        Build FFmpeg command.
+        Args:
+            input_path: Input file path
+            output_path: Output file path
+            quality: Output quality
+            codec: Video codec
+            **kwargs: Additional options
+        Returns:
+            Command list for subprocess
+        """
+        cmd = ["ffmpeg", "-i", str(input_path)]
+        # Video codec
+        if codec == "auto":
+            if output_path.suffix.lower() == ".mp4":
+                cmd.extend(["-c:v", "libx264"])
+            elif output_path.suffix.lower() == ".webm":
+                cmd.extend(["-c:v", "libvpx-vp9"])
+            else:
+                cmd.extend(["-c:v", "libx264"])
+        else:
+            cmd.extend(["-c:v", codec])
+        # Quality settings
+        if quality == "high":
+            cmd.extend(["-crf", "18", "-preset", "slow"])
+        elif quality == "medium":
+            cmd.extend(["-crf", "23", "-preset", "medium"])
+        elif quality == "low":
+            cmd.extend(["-crf", "28", "-preset", "fast"])
+        else:
+            cmd.extend(["-crf", "23", "-preset", "medium"])
+        # Audio codec
+        cmd.extend(["-c:a", "aac"])
+        # Additional options
+        if self.verbose:
+            cmd.append("-v")
+            cmd.append("info")
+        else:
+            cmd.extend(["-v", "quiet"])
+        # Output file
+        cmd.append(str(output_path))
+        return cmd