PyPI - spatelier - Versions diffs - 0.3.0__py3-none-any.whl - Mend

spatelier 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (59) hide show

analytics/__init__.py +1 -0
analytics/reporter.py +497 -0
cli/__init__.py +1 -0
cli/app.py +147 -0
cli/audio.py +129 -0
cli/cli_analytics.py +320 -0
cli/cli_utils.py +282 -0
cli/error_handlers.py +122 -0
cli/files.py +299 -0
cli/update.py +325 -0
cli/video.py +823 -0
cli/worker.py +615 -0
core/__init__.py +1 -0
core/analytics_dashboard.py +368 -0
core/base.py +303 -0
core/base_service.py +69 -0
core/config.py +345 -0
core/database_service.py +116 -0
core/decorators.py +263 -0
core/error_handler.py +210 -0
core/file_tracker.py +254 -0
core/interactive_cli.py +366 -0
core/interfaces.py +166 -0
core/job_queue.py +437 -0
core/logger.py +79 -0
core/package_updater.py +469 -0
core/progress.py +228 -0
core/service_factory.py +295 -0
core/streaming.py +299 -0
core/worker.py +765 -0
database/__init__.py +1 -0
database/connection.py +265 -0
database/metadata.py +516 -0
database/models.py +288 -0
database/repository.py +592 -0
database/transcription_storage.py +219 -0
modules/__init__.py +1 -0
modules/audio/__init__.py +5 -0
modules/audio/converter.py +197 -0
modules/video/__init__.py +16 -0
modules/video/converter.py +191 -0
modules/video/fallback_extractor.py +334 -0
modules/video/services/__init__.py +18 -0
modules/video/services/audio_extraction_service.py +274 -0
modules/video/services/download_service.py +852 -0
modules/video/services/metadata_service.py +190 -0
modules/video/services/playlist_service.py +445 -0
modules/video/services/transcription_service.py +491 -0
modules/video/transcription_service.py +385 -0
modules/video/youtube_api.py +397 -0
spatelier/__init__.py +33 -0
spatelier-0.3.0.dist-info/METADATA +260 -0
spatelier-0.3.0.dist-info/RECORD +59 -0
spatelier-0.3.0.dist-info/WHEEL +5 -0
spatelier-0.3.0.dist-info/entry_points.txt +2 -0
spatelier-0.3.0.dist-info/licenses/LICENSE +21 -0
spatelier-0.3.0.dist-info/top_level.txt +7 -0
utils/__init__.py +1 -0
utils/helpers.py +250 -0

core/interfaces.py ADDED Viewed

@@ -0,0 +1,166 @@
+"""
+Core interfaces for dependency injection and service layer.
+This module defines abstract interfaces for the service layer,
+enabling dependency injection and better testability.
+"""
+from abc import ABC, abstractmethod
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Union
+from core.base import ProcessingResult
+from core.config import Config
+class IDatabaseService(ABC):
+    """Interface for database services."""
+    @abstractmethod
+    def initialize(self) -> "IRepositoryContainer":
+        """Initialize database connections and return repository container."""
+        pass
+    @abstractmethod
+    def close_connections(self):
+        """Close all database connections."""
+        pass
+class IRepositoryContainer(ABC):
+    """Interface for repository container."""
+    @property
+    @abstractmethod
+    def media(self):
+        """Media file repository."""
+        pass
+    @property
+    @abstractmethod
+    def jobs(self):
+        """Processing job repository."""
+        pass
+    @property
+    @abstractmethod
+    def analytics(self):
+        """Analytics repository."""
+        pass
+    @property
+    @abstractmethod
+    def playlists(self):
+        """Playlist repository."""
+        pass
+    @property
+    @abstractmethod
+    def playlist_videos(self):
+        """Playlist video repository."""
+        pass
+class IVideoDownloadService(ABC):
+    """Interface for video download service."""
+    @abstractmethod
+    def download_video(
+        self, url: str, output_path: Optional[Union[str, Path]] = None, **kwargs
+    ) -> ProcessingResult:
+        """Download a single video from URL."""
+        pass
+class IMetadataService(ABC):
+    """Interface for metadata service."""
+    @abstractmethod
+    def extract_video_metadata(self, url: str) -> Dict[str, Any]:
+        """Extract metadata from video URL."""
+        pass
+    @abstractmethod
+    def enrich_media_file(self, media_file_id: int) -> bool:
+        """Enrich media file with additional metadata."""
+        pass
+    @abstractmethod
+    def get_media_file_metadata(self, media_file_id: int) -> Optional[Dict[str, Any]]:
+        """Get metadata for a media file."""
+        pass
+class ITranscriptionService(ABC):
+    """Interface for transcription service."""
+    @abstractmethod
+    def transcribe_video(
+        self,
+        video_path: Union[str, Path],
+        media_file_id: Optional[int] = None,
+        language: Optional[str] = None,
+        model_size: Optional[str] = None,
+    ) -> bool:
+        """Transcribe a video file."""
+        pass
+    @abstractmethod
+    def embed_subtitles(
+        self,
+        video_path: Union[str, Path],
+        output_path: Union[str, Path],
+        media_file_id: Optional[int] = None,
+    ) -> bool:
+        """Embed subtitles into video file."""
+        pass
+class IPlaylistService(ABC):
+    """Interface for playlist service."""
+    @abstractmethod
+    def download_playlist(
+        self, url: str, output_path: Optional[Union[str, Path]] = None, **kwargs
+    ) -> Dict[str, Any]:
+        """Download playlist without transcription."""
+        pass
+class IServiceFactory(ABC):
+    """Interface for service factory."""
+    @abstractmethod
+    def create_database_service(
+        self, config: Config, verbose: bool = False
+    ) -> IDatabaseService:
+        """Create database service."""
+        pass
+    @abstractmethod
+    def create_video_download_service(
+        self, config: Config, verbose: bool = False
+    ) -> IVideoDownloadService:
+        """Create video download service."""
+        pass
+    @abstractmethod
+    def create_metadata_service(
+        self, config: Config, verbose: bool = False
+    ) -> IMetadataService:
+        """Create metadata service."""
+        pass
+    @abstractmethod
+    def create_transcription_service(
+        self, config: Config, verbose: bool = False
+    ) -> ITranscriptionService:
+        """Create transcription service."""
+        pass
+    @abstractmethod
+    def create_playlist_service(
+        self, config: Config, verbose: bool = False
+    ) -> IPlaylistService:
+        """Create playlist service."""
+        pass

core/job_queue.py ADDED Viewed

@@ -0,0 +1,437 @@
+"""
+Generic job queue system with SQLite persistence.
+This module provides a flexible job queue system that can handle any type of job,
+with configurable throttling, persistent storage, and background processing.
+"""
+import json
+import sqlite3
+import threading
+import time
+from dataclasses import asdict, dataclass
+from datetime import datetime, timedelta
+from enum import Enum
+from pathlib import Path
+from typing import Any, Callable, Dict, List, Optional
+from core.config import Config
+from core.logger import get_logger
+class JobStatus(Enum):
+    """Job status enumeration."""
+    PENDING = "pending"
+    RUNNING = "running"
+    COMPLETED = "completed"
+    FAILED = "failed"
+    CANCELLED = "cancelled"
+class JobType(Enum):
+    """Job type enumeration."""
+    DOWNLOAD_VIDEO = "download_video"
+    DOWNLOAD_PLAYLIST = "download_playlist"
+    TRANSCRIBE_VIDEO = "transcribe_video"
+    PROCESS_AUDIO = "process_audio"
+    CUSTOM = "custom"
+@dataclass
+class Job:
+    """Generic job definition."""
+    id: Optional[int] = None
+    job_type: JobType = JobType.CUSTOM
+    job_data: Dict[str, Any] = None
+    job_path: str = ""
+    status: JobStatus = JobStatus.PENDING
+    priority: int = 0
+    created_at: Optional[datetime] = None
+    started_at: Optional[datetime] = None
+    completed_at: Optional[datetime] = None
+    error_message: Optional[str] = None
+    retry_count: int = 0
+    max_retries: int = 3
+    def __post_init__(self):
+        if self.job_data is None:
+            self.job_data = {}
+        if self.created_at is None:
+            self.created_at = datetime.now()
+    @property
+    def duration(self) -> Optional[float]:
+        """Get job duration in seconds."""
+        if self.started_at and self.completed_at:
+            return (self.completed_at - self.started_at).total_seconds()
+        return None
+    @property
+    def is_finished(self) -> bool:
+        """Check if job is in a finished state."""
+        return self.status in [
+            JobStatus.COMPLETED,
+            JobStatus.FAILED,
+            JobStatus.CANCELLED,
+        ]
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert job to dictionary for JSON serialization."""
+        return {
+            "id": self.id,
+            "job_type": self.job_type.value,
+            "job_data": self.job_data,
+            "job_path": self.job_path,
+            "status": self.status.value,
+            "priority": self.priority,
+            "created_at": self.created_at.isoformat() if self.created_at else None,
+            "started_at": self.started_at.isoformat() if self.started_at else None,
+            "completed_at": self.completed_at.isoformat()
+            if self.completed_at
+            else None,
+            "error_message": self.error_message,
+            "retry_count": self.retry_count,
+            "max_retries": self.max_retries,
+        }
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "Job":
+        """Create job from dictionary."""
+        job = cls()
+        job.id = data.get("id")
+        job.job_type = JobType(data.get("job_type", "custom"))
+        job.job_data = data.get("job_data", {})
+        job.job_path = data.get("job_path", "")
+        job.status = JobStatus(data.get("status", "pending"))
+        job.priority = data.get("priority", 0)
+        job.created_at = (
+            datetime.fromisoformat(data["created_at"])
+            if data.get("created_at")
+            else None
+        )
+        job.started_at = (
+            datetime.fromisoformat(data["started_at"])
+            if data.get("started_at")
+            else None
+        )
+        job.completed_at = (
+            datetime.fromisoformat(data["completed_at"])
+            if data.get("completed_at")
+            else None
+        )
+        job.error_message = data.get("error_message")
+        job.retry_count = data.get("retry_count", 0)
+        job.max_retries = data.get("max_retries", 3)
+        return job
+class JobQueue:
+    """Generic job queue with SQLite persistence."""
+    def __init__(self, config: Config, verbose: bool = False):
+        """Initialize job queue."""
+        self.config = config
+        self.verbose = verbose
+        self.logger = get_logger("JobQueue", verbose=verbose)
+        # Database connection
+        self.db_path = Path(config.database.sqlite_path)
+        self.db_path.parent.mkdir(parents=True, exist_ok=True)
+        # Threading
+        self._lock = threading.Lock()
+        # Initialize database
+        self._init_database()
+    def _init_database(self) -> None:
+        """Initialize job queue database."""
+        with sqlite3.connect(self.db_path) as conn:
+            conn.execute(
+                """
+                CREATE TABLE IF NOT EXISTS jobs (
+                    id INTEGER PRIMARY KEY AUTOINCREMENT,
+                    job_type TEXT NOT NULL,
+                    job_data TEXT NOT NULL,
+                    job_path TEXT NOT NULL,
+                    status TEXT DEFAULT 'pending',
+                    priority INTEGER DEFAULT 0,
+                    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+                    started_at TIMESTAMP,
+                    completed_at TIMESTAMP,
+                    error_message TEXT,
+                    retry_count INTEGER DEFAULT 0,
+                    max_retries INTEGER DEFAULT 3
+                )
+            """
+            )
+            # Create indexes for performance
+            conn.execute("CREATE INDEX IF NOT EXISTS idx_jobs_status ON jobs(status)")
+            conn.execute(
+                "CREATE INDEX IF NOT EXISTS idx_jobs_priority ON jobs(priority DESC)"
+            )
+            conn.execute(
+                "CREATE INDEX IF NOT EXISTS idx_jobs_created_at ON jobs(created_at)"
+            )
+            conn.commit()
+    def add_job(self, job: Job) -> int:
+        """Add job to queue."""
+        with self._lock:
+            with sqlite3.connect(self.db_path) as conn:
+                cursor = conn.execute(
+                    """
+                    INSERT INTO jobs (job_type, job_data, job_path, status, priority, created_at, retry_count, max_retries)
+                    VALUES (?, ?, ?, ?, ?, ?, ?, ?)
+                """,
+                    (
+                        job.job_type.value,
+                        json.dumps(job.job_data),
+                        job.job_path,
+                        job.status.value,
+                        job.priority,
+                        job.created_at.isoformat(),
+                        job.retry_count,
+                        job.max_retries,
+                    ),
+                )
+                job_id = cursor.lastrowid
+                self.logger.info(f"Added job {job_id} to queue: {job.job_type.value}")
+                return job_id
+    def get_next_job(self) -> Optional[Job]:
+        """Get next job to process (highest priority, oldest first)."""
+        with self._lock:
+            with sqlite3.connect(self.db_path) as conn:
+                cursor = conn.execute(
+                    """
+                    SELECT * FROM jobs
+                    WHERE status = 'pending'
+                    ORDER BY priority DESC, created_at ASC
+                    LIMIT 1
+                """
+                )
+                row = cursor.fetchone()
+                if not row:
+                    return None
+                # Convert row to job
+                job_data = {
+                    "id": row[0],
+                    "job_type": row[1],
+                    "job_data": json.loads(row[2]),
+                    "job_path": row[3],
+                    "status": row[4],
+                    "priority": row[5],
+                    "created_at": row[6],
+                    "started_at": row[7],
+                    "completed_at": row[8],
+                    "error_message": row[9],
+                    "retry_count": row[10],
+                    "max_retries": row[11],
+                }
+                return Job.from_dict(job_data)
+    def update_job_status(
+        self, job_id: int, status: JobStatus, error_message: Optional[str] = None
+    ) -> bool:
+        """Update job status."""
+        with self._lock:
+            with sqlite3.connect(self.db_path) as conn:
+                if status == JobStatus.RUNNING:
+                    conn.execute(
+                        """
+                        UPDATE jobs
+                        SET status = ?, started_at = CURRENT_TIMESTAMP
+                        WHERE id = ?
+                    """,
+                        (status.value, job_id),
+                    )
+                elif status in [JobStatus.COMPLETED, JobStatus.FAILED]:
+                    conn.execute(
+                        """
+                        UPDATE jobs
+                        SET status = ?, completed_at = CURRENT_TIMESTAMP, error_message = ?
+                        WHERE id = ?
+                    """,
+                        (status.value, error_message, job_id),
+                    )
+                else:
+                    conn.execute(
+                        """
+                        UPDATE jobs
+                        SET status = ?
+                        WHERE id = ?
+                    """,
+                        (status.value, job_id),
+                    )
+                conn.commit()
+                return True
+    def get_job(self, job_id: int) -> Optional[Job]:
+        """Get job by ID."""
+        with self._lock:
+            with sqlite3.connect(self.db_path) as conn:
+                cursor = conn.execute("SELECT * FROM jobs WHERE id = ?", (job_id,))
+                row = cursor.fetchone()
+                if not row:
+                    return None
+                job_data = {
+                    "id": row[0],
+                    "job_type": row[1],
+                    "job_data": json.loads(row[2]),
+                    "job_path": row[3],
+                    "status": row[4],
+                    "priority": row[5],
+                    "created_at": row[6],
+                    "started_at": row[7],
+                    "completed_at": row[8],
+                    "error_message": row[9],
+                    "retry_count": row[10],
+                    "max_retries": row[11],
+                }
+                return Job.from_dict(job_data)
+    def get_queue_status(self) -> Dict[str, int]:
+        """Get queue status summary."""
+        with self._lock:
+            with sqlite3.connect(self.db_path) as conn:
+                cursor = conn.execute(
+                    """
+                    SELECT status, COUNT(*)
+                    FROM jobs
+                    GROUP BY status
+                """
+                )
+                status_counts = dict(cursor.fetchall())
+                return {
+                    "pending": status_counts.get("pending", 0),
+                    "running": status_counts.get("running", 0),
+                    "completed": status_counts.get("completed", 0),
+                    "failed": status_counts.get("failed", 0),
+                    "cancelled": status_counts.get("cancelled", 0),
+                }
+    def get_jobs_by_status(
+        self, status: JobStatus, limit: Optional[int] = None
+    ) -> List[Job]:
+        """Get jobs by status."""
+        with self._lock:
+            with sqlite3.connect(self.db_path) as conn:
+                query = "SELECT * FROM jobs WHERE status = ? ORDER BY created_at DESC"
+                params = [status.value]
+                if limit:
+                    query += " LIMIT ?"
+                    params.append(limit)
+                cursor = conn.execute(query, params)
+                rows = cursor.fetchall()
+                jobs = []
+                for row in rows:
+                    job_data = {
+                        "id": row[0],
+                        "job_type": row[1],
+                        "job_data": json.loads(row[2]),
+                        "job_path": row[3],
+                        "status": row[4],
+                        "priority": row[5],
+                        "created_at": row[6],
+                        "started_at": row[7],
+                        "completed_at": row[8],
+                        "error_message": row[9],
+                        "retry_count": row[10],
+                        "max_retries": row[11],
+                    }
+                    jobs.append(Job.from_dict(job_data))
+                return jobs
+    def get_all_jobs(self) -> List[Job]:
+        """Get all jobs from queue."""
+        with self._lock:
+            with sqlite3.connect(self.db_path) as conn:
+                cursor = conn.execute("SELECT * FROM jobs ORDER BY created_at DESC")
+                rows = cursor.fetchall()
+                jobs = []
+                for row in rows:
+                    job_data = {
+                        "id": row[0],
+                        "job_type": row[1],
+                        "job_data": json.loads(row[2]),
+                        "job_path": row[3],
+                        "status": row[4],
+                        "priority": row[5],
+                        "created_at": row[6],
+                        "started_at": row[7],
+                        "completed_at": row[8],
+                        "error_message": row[9],
+                        "retry_count": row[10],
+                        "max_retries": row[11],
+                    }
+                    jobs.append(Job.from_dict(job_data))
+                return jobs
+    def cancel_job(self, job_id: int) -> bool:
+        """Cancel a job."""
+        return self.update_job_status(job_id, JobStatus.CANCELLED)
+    def retry_failed_jobs(self) -> int:
+        """Retry failed jobs that haven't exceeded max retries."""
+        with self._lock:
+            with sqlite3.connect(self.db_path) as conn:
+                cursor = conn.execute(
+                    """
+                    UPDATE jobs
+                    SET status = 'pending', retry_count = retry_count + 1
+                    WHERE status = 'failed' AND retry_count < max_retries
+                """
+                )
+                retry_count = cursor.rowcount
+                conn.commit()
+                if retry_count > 0:
+                    self.logger.info(f"Retrying {retry_count} failed jobs")
+                return retry_count
+    def cleanup_old_jobs(self, max_age_days: int = 30) -> int:
+        """Clean up old completed jobs."""
+        cutoff_date = datetime.now() - timedelta(days=max_age_days)
+        with self._lock:
+            with sqlite3.connect(self.db_path) as conn:
+                cursor = conn.execute(
+                    """
+                    DELETE FROM jobs
+                    WHERE status IN ('completed', 'failed', 'cancelled')
+                    AND completed_at < ?
+                """,
+                    (cutoff_date.isoformat(),),
+                )
+                deleted_count = cursor.rowcount
+                conn.commit()
+                if deleted_count > 0:
+                    self.logger.info(f"Cleaned up {deleted_count} old jobs")
+                return deleted_count

core/logger.py ADDED Viewed

@@ -0,0 +1,79 @@
+"""
+Logging configuration and utilities.
+This module provides centralized logging configuration using loguru.
+"""
+import sys
+from pathlib import Path
+from typing import Optional
+from loguru import logger
+def get_logger(
+    name: Optional[str] = None,
+    verbose: bool = False,
+    log_file: Optional[Path] = None,
+    level: str = "INFO",
+) -> "logger":
+    """
+    Get a configured logger instance.
+    Args:
+        name: Logger name (optional)
+        verbose: Enable verbose logging
+        log_file: Path to log file (optional)
+        level: Logging level
+    Returns:
+        Configured logger instance
+    """
+    # Remove default handler
+    logger.remove()
+    # Set log level
+    log_level = "DEBUG" if verbose else level
+    # Console handler with colors
+    logger.add(
+        sys.stderr,
+        level=log_level,
+        format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | "
+        "<level>{level: <8}</level> | "
+        "<cyan>{name}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> - "
+        "<level>{message}</level>",
+        colorize=True,
+    )
+    # File handler (if specified)
+    if log_file:
+        log_file.parent.mkdir(parents=True, exist_ok=True)
+        logger.add(
+            log_file,
+            level=log_level,
+            format="{time:YYYY-MM-DD HH:mm:ss} | {level: <8} | {name}:{function}:{line} - {message}",
+            rotation="10 MB",
+            retention="30 days",
+            compression="zip",
+        )
+    # Set the name attribute for compatibility
+    if name:
+        logger.name = name
+    return logger
+def setup_logging(
+    verbose: bool = False, log_file: Optional[Path] = None, level: str = "INFO"
+) -> None:
+    """
+    Set up global logging configuration.
+    Args:
+        verbose: Enable verbose logging
+        log_file: Path to log file (optional)
+        level: Logging level
+    """
+    get_logger(verbose=verbose, log_file=log_file, level=level)