spatelier 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. analytics/__init__.py +1 -0
  2. analytics/reporter.py +497 -0
  3. cli/__init__.py +1 -0
  4. cli/app.py +147 -0
  5. cli/audio.py +129 -0
  6. cli/cli_analytics.py +320 -0
  7. cli/cli_utils.py +282 -0
  8. cli/error_handlers.py +122 -0
  9. cli/files.py +299 -0
  10. cli/update.py +325 -0
  11. cli/video.py +823 -0
  12. cli/worker.py +615 -0
  13. core/__init__.py +1 -0
  14. core/analytics_dashboard.py +368 -0
  15. core/base.py +303 -0
  16. core/base_service.py +69 -0
  17. core/config.py +345 -0
  18. core/database_service.py +116 -0
  19. core/decorators.py +263 -0
  20. core/error_handler.py +210 -0
  21. core/file_tracker.py +254 -0
  22. core/interactive_cli.py +366 -0
  23. core/interfaces.py +166 -0
  24. core/job_queue.py +437 -0
  25. core/logger.py +79 -0
  26. core/package_updater.py +469 -0
  27. core/progress.py +228 -0
  28. core/service_factory.py +295 -0
  29. core/streaming.py +299 -0
  30. core/worker.py +765 -0
  31. database/__init__.py +1 -0
  32. database/connection.py +265 -0
  33. database/metadata.py +516 -0
  34. database/models.py +288 -0
  35. database/repository.py +592 -0
  36. database/transcription_storage.py +219 -0
  37. modules/__init__.py +1 -0
  38. modules/audio/__init__.py +5 -0
  39. modules/audio/converter.py +197 -0
  40. modules/video/__init__.py +16 -0
  41. modules/video/converter.py +191 -0
  42. modules/video/fallback_extractor.py +334 -0
  43. modules/video/services/__init__.py +18 -0
  44. modules/video/services/audio_extraction_service.py +274 -0
  45. modules/video/services/download_service.py +852 -0
  46. modules/video/services/metadata_service.py +190 -0
  47. modules/video/services/playlist_service.py +445 -0
  48. modules/video/services/transcription_service.py +491 -0
  49. modules/video/transcription_service.py +385 -0
  50. modules/video/youtube_api.py +397 -0
  51. spatelier/__init__.py +33 -0
  52. spatelier-0.3.0.dist-info/METADATA +260 -0
  53. spatelier-0.3.0.dist-info/RECORD +59 -0
  54. spatelier-0.3.0.dist-info/WHEEL +5 -0
  55. spatelier-0.3.0.dist-info/entry_points.txt +2 -0
  56. spatelier-0.3.0.dist-info/licenses/LICENSE +21 -0
  57. spatelier-0.3.0.dist-info/top_level.txt +7 -0
  58. utils/__init__.py +1 -0
  59. utils/helpers.py +250 -0
@@ -0,0 +1,219 @@
1
+ """
2
+ SQLite-backed transcription storage (JSON + FTS5).
3
+
4
+ Stores full transcription segments as JSON, with a flattened full_text column
5
+ indexed by SQLite FTS5 for search.
6
+ """
7
+
8
+ import json
9
+ from pathlib import Path
10
+ from typing import Any, Dict, List, Optional, Union
11
+
12
+ from sqlalchemy import text
13
+ from sqlalchemy.orm import Session
14
+
15
+ from database.models import Transcription
16
+
17
+
18
+ class SQLiteTranscriptionStorage:
19
+ """SQLite transcription storage adapter."""
20
+
21
+ def __init__(self, session: Session):
22
+ """
23
+ Initialize SQLite transcription storage.
24
+
25
+ Args:
26
+ session: SQLAlchemy session bound to SQLite
27
+ """
28
+ self.session = session
29
+
30
+ def store_transcription(
31
+ self, video_id: Union[str, int], transcription_data: Dict[str, Any]
32
+ ) -> int:
33
+ """
34
+ Store transcription data in SQLite.
35
+
36
+ Args:
37
+ video_id: ID of the video file (converted to int)
38
+ transcription_data: Transcription results with segments
39
+
40
+ Returns:
41
+ SQLite record ID
42
+ """
43
+ video_id_int = int(video_id) if isinstance(video_id, (str, int)) else video_id
44
+ segments = transcription_data.get("segments", [])
45
+ full_text = " ".join([seg.get("text", "").strip() for seg in segments]).strip()
46
+
47
+ record = Transcription(
48
+ media_file_id=video_id_int,
49
+ language=transcription_data.get("language"),
50
+ duration=transcription_data.get("duration"),
51
+ processing_time=transcription_data.get("processing_time"),
52
+ model_used=transcription_data.get("model_used"),
53
+ segments_json=segments,
54
+ full_text=full_text,
55
+ )
56
+ self.session.add(record)
57
+ self.session.commit()
58
+ self.session.refresh(record)
59
+ return record.id
60
+
61
+ def get_transcription(self, video_id: Union[str, int]) -> Optional[Dict[str, Any]]:
62
+ """
63
+ Get transcription for a video.
64
+
65
+ Args:
66
+ video_id: Video ID
67
+
68
+ Returns:
69
+ Transcription dictionary or None
70
+ """
71
+ video_id_int = int(video_id) if isinstance(video_id, (str, int)) else video_id
72
+ record = (
73
+ self.session.query(Transcription)
74
+ .filter(Transcription.media_file_id == video_id_int)
75
+ .order_by(Transcription.id.desc())
76
+ .first()
77
+ )
78
+ if not record:
79
+ return None
80
+
81
+ return {
82
+ "id": record.id,
83
+ "video_id": record.media_file_id,
84
+ "language": record.language,
85
+ "duration": record.duration,
86
+ "processing_time": record.processing_time,
87
+ "model_used": record.model_used,
88
+ "segments": record.segments_json,
89
+ "full_text": record.full_text,
90
+ "created_at": record.created_at,
91
+ }
92
+
93
+ def search_transcriptions(
94
+ self, query: str, limit: int = 10
95
+ ) -> List[Dict[str, Any]]:
96
+ """
97
+ Search transcriptions by text content using SQLite FTS5.
98
+
99
+ Args:
100
+ query: Search query
101
+ limit: Maximum number of results
102
+
103
+ Returns:
104
+ List of matching transcription dictionaries
105
+ """
106
+ sql = text(
107
+ """
108
+ SELECT t.id, t.media_file_id, t.language, t.duration, t.processing_time,
109
+ t.model_used, t.segments_json, t.full_text, t.created_at
110
+ FROM transcriptions t
111
+ JOIN transcriptions_fts fts ON fts.rowid = t.id
112
+ WHERE fts.full_text MATCH :query
113
+ ORDER BY bm25(transcriptions_fts)
114
+ LIMIT :limit
115
+ """
116
+ )
117
+ rows = self.session.execute(sql, {"query": query, "limit": limit}).fetchall()
118
+ results: List[Dict[str, Any]] = []
119
+ for row in rows:
120
+ segments = row.segments_json
121
+ if isinstance(segments, str):
122
+ try:
123
+ segments = json.loads(segments)
124
+ except json.JSONDecodeError:
125
+ segments = []
126
+ results.append(
127
+ {
128
+ "id": row.id,
129
+ "video_id": row.media_file_id,
130
+ "language": row.language,
131
+ "duration": row.duration,
132
+ "processing_time": row.processing_time,
133
+ "model_used": row.model_used,
134
+ "segments": segments,
135
+ "full_text": row.full_text,
136
+ "created_at": row.created_at,
137
+ }
138
+ )
139
+ return results
140
+
141
+ def generate_srt_subtitle(
142
+ self, transcription_data: Dict[str, Any], output_path: Path
143
+ ) -> bool:
144
+ """
145
+ Generate SRT subtitle file from transcription data.
146
+
147
+ Args:
148
+ transcription_data: Transcription data with segments
149
+ output_path: Path to save SRT file
150
+
151
+ Returns:
152
+ True if successful, False otherwise
153
+ """
154
+ try:
155
+ segments = transcription_data.get("segments", [])
156
+ if not segments:
157
+ return False
158
+
159
+ with open(output_path, "w", encoding="utf-8") as f:
160
+ for i, segment in enumerate(segments, 1):
161
+ start_time = self._format_srt_time(segment.get("start", 0.0))
162
+ end_time = self._format_srt_time(segment.get("end", 0.0))
163
+ text_value = segment.get("text", "").strip()
164
+
165
+ f.write(f"{i}\n")
166
+ f.write(f"{start_time} --> {end_time}\n")
167
+ f.write(f"{text_value}\n\n")
168
+
169
+ return True
170
+ except Exception:
171
+ return False
172
+
173
+ def generate_vtt_subtitle(
174
+ self, transcription_data: Dict[str, Any], output_path: Path
175
+ ) -> bool:
176
+ """
177
+ Generate VTT subtitle file from transcription data.
178
+
179
+ Args:
180
+ transcription_data: Transcription data with segments
181
+ output_path: Path to save VTT file
182
+
183
+ Returns:
184
+ True if successful, False otherwise
185
+ """
186
+ try:
187
+ segments = transcription_data.get("segments", [])
188
+ if not segments:
189
+ return False
190
+
191
+ with open(output_path, "w", encoding="utf-8") as f:
192
+ f.write("WEBVTT\n\n")
193
+ for segment in segments:
194
+ start_time = self._format_vtt_time(segment.get("start", 0.0))
195
+ end_time = self._format_vtt_time(segment.get("end", 0.0))
196
+ text_value = segment.get("text", "").strip()
197
+
198
+ f.write(f"{start_time} --> {end_time}\n")
199
+ f.write(f"{text_value}\n\n")
200
+
201
+ return True
202
+ except Exception:
203
+ return False
204
+
205
+ def _format_srt_time(self, seconds: float) -> str:
206
+ """Format time for SRT format (HH:MM:SS,mmm)."""
207
+ hours = int(seconds // 3600)
208
+ minutes = int((seconds % 3600) // 60)
209
+ secs = int(seconds % 60)
210
+ millisecs = int((seconds % 1) * 1000)
211
+ return f"{hours:02d}:{minutes:02d}:{secs:02d},{millisecs:03d}" # noqa: E231
212
+
213
+ def _format_vtt_time(self, seconds: float) -> str:
214
+ """Format time for VTT format (HH:MM:SS.mmm)."""
215
+ hours = int(seconds // 3600)
216
+ minutes = int((seconds % 3600) // 60)
217
+ secs = int(seconds % 60)
218
+ millisecs = int((seconds % 1) * 1000)
219
+ return f"{hours:02d}:{minutes:02d}:{secs:02d}.{millisecs:03d}" # noqa: E231
modules/__init__.py ADDED
@@ -0,0 +1 @@
1
+ """Processing modules for different media types."""
@@ -0,0 +1,5 @@
1
+ """Audio processing modules."""
2
+
3
+ from .converter import AudioConverter
4
+
5
+ __all__ = ['AudioConverter']
@@ -0,0 +1,197 @@
1
+ """
2
+ Audio conversion service.
3
+
4
+ This module provides audio conversion functionality using FFmpeg.
5
+ """
6
+
7
+ import tempfile
8
+ from pathlib import Path
9
+ from typing import Any, Dict, Optional, Union
10
+
11
+ import ffmpeg
12
+
13
+ from core.base import ProcessingResult
14
+ from core.base_service import BaseService
15
+ from core.config import Config
16
+ from utils.helpers import safe_filename
17
+
18
+
19
+ class AudioConverter(BaseService):
20
+ """
21
+ Audio conversion service using FFmpeg.
22
+
23
+ Handles audio format conversion, quality adjustment, and basic processing.
24
+ """
25
+
26
+ def __init__(self, config: Config, verbose: bool = False, db_service=None):
27
+ """Initialize the audio converter."""
28
+ super().__init__(config, verbose, db_service)
29
+
30
+ # Supported formats
31
+ self.supported_formats = {
32
+ "mp3": {"codec": "libmp3lame", "ext": ".mp3"},
33
+ "wav": {"codec": "pcm_s16le", "ext": ".wav"},
34
+ "flac": {"codec": "flac", "ext": ".flac"},
35
+ "aac": {"codec": "aac", "ext": ".aac"},
36
+ "ogg": {"codec": "libvorbis", "ext": ".ogg"},
37
+ "m4a": {"codec": "aac", "ext": ".m4a"},
38
+ }
39
+
40
+ def convert(
41
+ self,
42
+ input_file: Union[str, Path],
43
+ output_file: Union[str, Path],
44
+ format: str = "mp3",
45
+ bitrate: int = 320,
46
+ **kwargs,
47
+ ) -> ProcessingResult:
48
+ """
49
+ Convert audio file to different format.
50
+
51
+ Args:
52
+ input_file: Path to input audio file
53
+ output_file: Path to output audio file
54
+ format: Output format (mp3, wav, flac, aac, ogg, m4a)
55
+ bitrate: Audio bitrate in kbps
56
+ **kwargs: Additional conversion options
57
+
58
+ Returns:
59
+ ProcessingResult with conversion details
60
+ """
61
+ input_path = Path(input_file)
62
+ output_path = Path(output_file)
63
+
64
+ # Validate input file
65
+ if not input_path.exists():
66
+ raise FileNotFoundError(f"Input file not found: {input_path}")
67
+
68
+ if not input_path.is_file():
69
+ raise ValueError(f"Input path is not a file: {input_path}")
70
+
71
+ # Validate format
72
+ if format.lower() not in self.supported_formats:
73
+ raise ValueError(
74
+ f"Unsupported format: {format}. Supported: {list(self.supported_formats.keys())}"
75
+ )
76
+
77
+ # Ensure output directory exists
78
+ output_path.parent.mkdir(parents=True, exist_ok=True)
79
+
80
+ try:
81
+ self.logger.info(f"Converting {input_path} to {output_path}")
82
+
83
+ # Build FFmpeg stream
84
+ stream = ffmpeg.input(str(input_path))
85
+
86
+ # Apply audio codec and bitrate
87
+ format_info = self.supported_formats[format.lower()]
88
+ stream = ffmpeg.output(
89
+ stream,
90
+ str(output_path),
91
+ acodec=format_info["codec"],
92
+ audio_bitrate=f"{bitrate}k",
93
+ )
94
+
95
+ # Add additional options
96
+ if "start_time" in kwargs:
97
+ stream = stream.overwrite_output()
98
+
99
+ if "duration" in kwargs:
100
+ stream = stream.overwrite_output()
101
+
102
+ if "sample_rate" in kwargs:
103
+ stream = stream.overwrite_output()
104
+
105
+ if "channels" in kwargs:
106
+ stream = stream.overwrite_output()
107
+
108
+ # Run conversion
109
+ ffmpeg.run(stream, overwrite_output=True, quiet=True)
110
+
111
+ # Verify output file was created
112
+ if not output_path.exists():
113
+ raise RuntimeError("Output file was not created")
114
+
115
+ # Get file info
116
+ input_size = input_path.stat().st_size
117
+ output_size = output_path.stat().st_size
118
+
119
+ self.logger.info(
120
+ f"Conversion successful: {input_size:,} -> {output_size:,} bytes"
121
+ )
122
+
123
+ return ProcessingResult(
124
+ success=True,
125
+ message=f"Converted {input_path.name} to {output_path.name}",
126
+ input_file=str(input_path),
127
+ output_file=str(output_path),
128
+ duration_seconds=0, # Could be calculated from metadata
129
+ metadata={
130
+ "input_size": input_size,
131
+ "output_size": output_size,
132
+ "format": format,
133
+ "bitrate": bitrate,
134
+ "compression_ratio": round(output_size / input_size, 2)
135
+ if input_size > 0
136
+ else 0,
137
+ },
138
+ )
139
+
140
+ except ffmpeg.Error as e:
141
+ # Clean up output file if it exists
142
+ if output_path.exists():
143
+ output_path.unlink()
144
+ raise RuntimeError(f"FFmpeg conversion failed: {e}")
145
+ except Exception as e:
146
+ # Clean up output file if it exists
147
+ if output_path.exists():
148
+ output_path.unlink()
149
+ raise RuntimeError(f"Audio conversion failed: {e}")
150
+
151
+ def get_audio_info(self, file_path: Union[str, Path]) -> Dict[str, Any]:
152
+ """
153
+ Get audio file information using FFprobe.
154
+
155
+ Args:
156
+ file_path: Path to audio file
157
+
158
+ Returns:
159
+ Dictionary with audio information
160
+ """
161
+ file_path = Path(file_path)
162
+
163
+ if not file_path.exists():
164
+ raise FileNotFoundError(f"File not found: {file_path}")
165
+
166
+ try:
167
+ # Use ffmpeg-python to probe the file
168
+ probe = ffmpeg.probe(str(file_path))
169
+
170
+ # Extract format info
171
+ format_info = probe.get("format", {})
172
+
173
+ # Extract audio stream info
174
+ audio_stream = None
175
+ for stream in probe.get("streams", []):
176
+ if stream.get("codec_type") == "audio":
177
+ audio_stream = stream
178
+ break
179
+
180
+ if not audio_stream:
181
+ raise ValueError("No audio stream found in file")
182
+
183
+ return {
184
+ "format": format_info.get("format_name", "unknown"),
185
+ "duration": float(format_info.get("duration", 0)),
186
+ "size": int(format_info.get("size", 0)),
187
+ "bitrate": int(format_info.get("bit_rate", 0)),
188
+ "codec": audio_stream.get("codec_name", "unknown"),
189
+ "sample_rate": int(audio_stream.get("sample_rate", 0)),
190
+ "channels": int(audio_stream.get("channels", 0)),
191
+ "channel_layout": audio_stream.get("channel_layout", "unknown"),
192
+ }
193
+
194
+ except ffmpeg.Error as e:
195
+ raise RuntimeError(f"FFprobe failed: {e}")
196
+ except Exception as e:
197
+ raise RuntimeError(f"Audio analysis failed: {e}")
@@ -0,0 +1,16 @@
1
+ """Video processing modules."""
2
+
3
+ # Import the new service modules
4
+ from .converter import VideoConverter
5
+ from .services.download_service import VideoDownloadService
6
+ from .services.metadata_service import MetadataService
7
+ from .services.playlist_service import PlaylistService
8
+ from .services.transcription_service import TranscriptionService
9
+
10
+ __all__ = [
11
+ "VideoDownloadService",
12
+ "PlaylistService",
13
+ "MetadataService",
14
+ "TranscriptionService",
15
+ "VideoConverter",
16
+ ]
@@ -0,0 +1,191 @@
1
+ """
2
+ Video converter module.
3
+
4
+ This module provides video format conversion functionality using FFmpeg.
5
+ """
6
+
7
+ import subprocess
8
+ from pathlib import Path
9
+ from typing import Union
10
+
11
+ from core.base import BaseConverter, ProcessingResult
12
+ from core.config import Config
13
+ from core.logger import get_logger
14
+
15
+
16
+ class VideoConverter(BaseConverter):
17
+ """
18
+ Video converter using FFmpeg.
19
+
20
+ Supports various input and output formats.
21
+ """
22
+
23
+ def __init__(self, config: Config, verbose: bool = False):
24
+ """Initialize the video converter."""
25
+ super().__init__(config, verbose)
26
+ self.supported_input_formats = [
27
+ "mp4",
28
+ "avi",
29
+ "mkv",
30
+ "mov",
31
+ "wmv",
32
+ "flv",
33
+ "webm",
34
+ "m4v",
35
+ "3gp",
36
+ ]
37
+ self.supported_output_formats = [
38
+ "mp4",
39
+ "avi",
40
+ "mkv",
41
+ "mov",
42
+ "wmv",
43
+ "flv",
44
+ "webm",
45
+ "m4v",
46
+ "3gp",
47
+ ]
48
+ self.logger = get_logger("VideoConverter", verbose=verbose)
49
+
50
+ def convert(
51
+ self, input_path: Union[str, Path], output_path: Union[str, Path], **kwargs
52
+ ) -> ProcessingResult:
53
+ """
54
+ Convert video from one format to another.
55
+
56
+ Args:
57
+ input_path: Path to input file
58
+ output_path: Path to output file
59
+ **kwargs: Additional conversion options
60
+
61
+ Returns:
62
+ ProcessingResult with conversion details
63
+ """
64
+ try:
65
+ input_path = Path(input_path).expanduser().resolve()
66
+ output_path = Path(output_path).expanduser().resolve()
67
+
68
+ # Validate input
69
+ if not self.validate_input(input_path):
70
+ return ProcessingResult(
71
+ success=False,
72
+ message=f"Invalid input file: {input_path}",
73
+ errors=[f"Input file not found or invalid: {input_path}"],
74
+ )
75
+
76
+ # Validate formats
77
+ if not self.is_supported_format(input_path, is_input=True):
78
+ return ProcessingResult(
79
+ success=False,
80
+ message=f"Unsupported input format: {input_path.suffix}",
81
+ errors=[f"Unsupported input format: {input_path.suffix}"],
82
+ )
83
+
84
+ if not self.is_supported_format(output_path, is_input=False):
85
+ return ProcessingResult(
86
+ success=False,
87
+ message=f"Unsupported output format: {output_path.suffix}",
88
+ errors=[f"Unsupported output format: {output_path.suffix}"],
89
+ )
90
+
91
+ # Ensure output directory exists
92
+ if not self.ensure_output_dir(output_path):
93
+ return ProcessingResult(
94
+ success=False,
95
+ message=f"Failed to create output directory: {output_path.parent}",
96
+ errors=[f"Cannot create output directory: {output_path.parent}"],
97
+ )
98
+
99
+ # Build FFmpeg command
100
+ cmd = self._build_command(input_path, output_path, **kwargs)
101
+
102
+ self.logger.info(f"Converting video: {input_path} -> {output_path}")
103
+ self.logger.debug(f"Command: {' '.join(cmd)}")
104
+
105
+ # Execute conversion
106
+ result = subprocess.run(cmd, capture_output=True, text=True)
107
+
108
+ if result.returncode == 0 and output_path.exists():
109
+ return ProcessingResult(
110
+ success=True,
111
+ message=f"Video converted successfully: {output_path}",
112
+ output_path=output_path,
113
+ metadata={
114
+ "input_file": str(input_path),
115
+ "output_file": str(output_path),
116
+ "input_size": input_path.stat().st_size,
117
+ "output_size": output_path.stat().st_size,
118
+ "command": " ".join(cmd),
119
+ },
120
+ )
121
+ else:
122
+ return ProcessingResult(
123
+ success=False,
124
+ message=f"Conversion failed: {result.stderr}",
125
+ errors=[result.stderr],
126
+ )
127
+
128
+ except Exception as e:
129
+ self.logger.error(f"Conversion failed: {e}")
130
+ return ProcessingResult(
131
+ success=False, message=f"Conversion failed: {str(e)}", errors=[str(e)]
132
+ )
133
+
134
+ def _build_command(
135
+ self,
136
+ input_path: Path,
137
+ output_path: Path,
138
+ quality: str = "medium",
139
+ codec: str = "auto",
140
+ **kwargs,
141
+ ) -> list:
142
+ """
143
+ Build FFmpeg command.
144
+
145
+ Args:
146
+ input_path: Input file path
147
+ output_path: Output file path
148
+ quality: Output quality
149
+ codec: Video codec
150
+ **kwargs: Additional options
151
+
152
+ Returns:
153
+ Command list for subprocess
154
+ """
155
+ cmd = ["ffmpeg", "-i", str(input_path)]
156
+
157
+ # Video codec
158
+ if codec == "auto":
159
+ if output_path.suffix.lower() == ".mp4":
160
+ cmd.extend(["-c:v", "libx264"])
161
+ elif output_path.suffix.lower() == ".webm":
162
+ cmd.extend(["-c:v", "libvpx-vp9"])
163
+ else:
164
+ cmd.extend(["-c:v", "libx264"])
165
+ else:
166
+ cmd.extend(["-c:v", codec])
167
+
168
+ # Quality settings
169
+ if quality == "high":
170
+ cmd.extend(["-crf", "18", "-preset", "slow"])
171
+ elif quality == "medium":
172
+ cmd.extend(["-crf", "23", "-preset", "medium"])
173
+ elif quality == "low":
174
+ cmd.extend(["-crf", "28", "-preset", "fast"])
175
+ else:
176
+ cmd.extend(["-crf", "23", "-preset", "medium"])
177
+
178
+ # Audio codec
179
+ cmd.extend(["-c:a", "aac"])
180
+
181
+ # Additional options
182
+ if self.verbose:
183
+ cmd.append("-v")
184
+ cmd.append("info")
185
+ else:
186
+ cmd.extend(["-v", "quiet"])
187
+
188
+ # Output file
189
+ cmd.append(str(output_path))
190
+
191
+ return cmd