spatelier 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. analytics/__init__.py +1 -0
  2. analytics/reporter.py +497 -0
  3. cli/__init__.py +1 -0
  4. cli/app.py +147 -0
  5. cli/audio.py +129 -0
  6. cli/cli_analytics.py +320 -0
  7. cli/cli_utils.py +282 -0
  8. cli/error_handlers.py +122 -0
  9. cli/files.py +299 -0
  10. cli/update.py +325 -0
  11. cli/video.py +823 -0
  12. cli/worker.py +615 -0
  13. core/__init__.py +1 -0
  14. core/analytics_dashboard.py +368 -0
  15. core/base.py +303 -0
  16. core/base_service.py +69 -0
  17. core/config.py +345 -0
  18. core/database_service.py +116 -0
  19. core/decorators.py +263 -0
  20. core/error_handler.py +210 -0
  21. core/file_tracker.py +254 -0
  22. core/interactive_cli.py +366 -0
  23. core/interfaces.py +166 -0
  24. core/job_queue.py +437 -0
  25. core/logger.py +79 -0
  26. core/package_updater.py +469 -0
  27. core/progress.py +228 -0
  28. core/service_factory.py +295 -0
  29. core/streaming.py +299 -0
  30. core/worker.py +765 -0
  31. database/__init__.py +1 -0
  32. database/connection.py +265 -0
  33. database/metadata.py +516 -0
  34. database/models.py +288 -0
  35. database/repository.py +592 -0
  36. database/transcription_storage.py +219 -0
  37. modules/__init__.py +1 -0
  38. modules/audio/__init__.py +5 -0
  39. modules/audio/converter.py +197 -0
  40. modules/video/__init__.py +16 -0
  41. modules/video/converter.py +191 -0
  42. modules/video/fallback_extractor.py +334 -0
  43. modules/video/services/__init__.py +18 -0
  44. modules/video/services/audio_extraction_service.py +274 -0
  45. modules/video/services/download_service.py +852 -0
  46. modules/video/services/metadata_service.py +190 -0
  47. modules/video/services/playlist_service.py +445 -0
  48. modules/video/services/transcription_service.py +491 -0
  49. modules/video/transcription_service.py +385 -0
  50. modules/video/youtube_api.py +397 -0
  51. spatelier/__init__.py +33 -0
  52. spatelier-0.3.0.dist-info/METADATA +260 -0
  53. spatelier-0.3.0.dist-info/RECORD +59 -0
  54. spatelier-0.3.0.dist-info/WHEEL +5 -0
  55. spatelier-0.3.0.dist-info/entry_points.txt +2 -0
  56. spatelier-0.3.0.dist-info/licenses/LICENSE +21 -0
  57. spatelier-0.3.0.dist-info/top_level.txt +7 -0
  58. utils/__init__.py +1 -0
  59. utils/helpers.py +250 -0
@@ -0,0 +1,491 @@
1
+ """
2
+ Unified transcription service for video files.
3
+
4
+ This module provides automatic transcription capabilities using OpenAI Whisper,
5
+ with database integration, analytics tracking, and subtitle embedding.
6
+ """
7
+
8
+ import time
9
+ from pathlib import Path
10
+ from typing import Any, Dict, List, Optional, Union
11
+
12
+ try:
13
+ import whisper
14
+ from faster_whisper import WhisperModel
15
+
16
+ WHISPER_AVAILABLE = True
17
+ except ImportError:
18
+ WHISPER_AVAILABLE = False
19
+
20
+ from core.base_service import BaseService
21
+ from core.config import Config
22
+ from database.models import MediaType
23
+ from database.transcription_storage import SQLiteTranscriptionStorage
24
+ from utils.helpers import get_file_hash, get_file_type
25
+
26
+ # Global model cache to avoid reloading models
27
+ _MODEL_CACHE = {}
28
+
29
+
30
+ class TranscriptionService(BaseService):
31
+ """
32
+ Unified transcription service using OpenAI Whisper.
33
+
34
+ Supports both openai-whisper and faster-whisper for different speed/accuracy needs.
35
+ Includes database integration, analytics tracking, and subtitle embedding.
36
+ """
37
+
38
+ def __init__(self, config: Config, verbose: bool = False, db_service=None):
39
+ """
40
+ Initialize the transcription service.
41
+
42
+ Args:
43
+ config: Configuration instance
44
+ verbose: Enable verbose logging
45
+ db_service: Optional database service instance
46
+ """
47
+ super().__init__(config, verbose, db_service)
48
+
49
+ # Transcription configuration
50
+ self.model_size = self.config.transcription.default_model
51
+ self.use_faster_whisper = self.config.transcription.use_faster_whisper
52
+ self.device = self.config.transcription.device
53
+ self.compute_type = self.config.transcription.compute_type
54
+
55
+ # Model and storage (lazy-loaded)
56
+ self.model = None
57
+ self.transcription_storage = None
58
+
59
+ def _initialize_transcription(self, model_size: Optional[str] = None):
60
+ """Initialize transcription service if not already done."""
61
+ if self.model is None:
62
+ if not WHISPER_AVAILABLE:
63
+ raise ImportError(
64
+ "Whisper dependencies not available. Install with: pip install spatelier[transcription]"
65
+ )
66
+
67
+ model_size = model_size or self.model_size
68
+
69
+ # Load model with caching
70
+ cache_key = f"{model_size}_{self.device}_{self.compute_type}_{self.use_faster_whisper}"
71
+
72
+ if cache_key in _MODEL_CACHE:
73
+ self.logger.info(f"Using cached Whisper model: {model_size}")
74
+ self.model = _MODEL_CACHE[cache_key]
75
+ else:
76
+ if self.use_faster_whisper:
77
+ self.logger.info(f"Loading faster-whisper model: {model_size}")
78
+ self.model = WhisperModel(
79
+ model_size, device=self.device, compute_type=self.compute_type
80
+ )
81
+ else:
82
+ self.logger.info(f"Loading openai-whisper model: {model_size}")
83
+ self.model = whisper.load_model(model_size)
84
+
85
+ _MODEL_CACHE[cache_key] = self.model
86
+ self.logger.info("Whisper model loaded and cached successfully")
87
+
88
+ # Initialize storage
89
+ if self.transcription_storage is None:
90
+ if self.db_manager is None:
91
+ self.db_manager = self.db_factory.get_db_manager()
92
+
93
+ session = self.db_manager.get_sqlite_session()
94
+ self.transcription_storage = SQLiteTranscriptionStorage(session)
95
+ self.logger.info("SQLite transcription storage initialized")
96
+
97
+ def transcribe_video(
98
+ self,
99
+ video_path: Union[str, Path],
100
+ media_file_id: Optional[int] = None,
101
+ language: Optional[str] = None,
102
+ model_size: Optional[str] = None,
103
+ ) -> bool:
104
+ """
105
+ Transcribe a video file.
106
+
107
+ Args:
108
+ video_path: Path to video file
109
+ media_file_id: Optional media file ID for database tracking
110
+ language: Language code for transcription
111
+ model_size: Whisper model size
112
+
113
+ Returns:
114
+ True if transcription successful, False otherwise
115
+ """
116
+ try:
117
+ video_path = Path(video_path)
118
+ if not video_path.exists():
119
+ self.logger.error(f"Video file not found: {video_path}")
120
+ return False
121
+
122
+ if media_file_id is None:
123
+ existing_media = self.repos.media.get_by_file_path(str(video_path))
124
+ if existing_media:
125
+ media_file_id = existing_media.id
126
+ else:
127
+ created_media = self.repos.media.create(
128
+ file_path=video_path,
129
+ file_name=video_path.name,
130
+ file_size=video_path.stat().st_size,
131
+ file_hash=get_file_hash(video_path),
132
+ media_type=MediaType.VIDEO,
133
+ mime_type=get_file_type(video_path),
134
+ title=video_path.stem,
135
+ source_platform="local",
136
+ source_id=None,
137
+ source_url=None,
138
+ )
139
+ media_file_id = created_media.id
140
+
141
+ # Initialize transcription service
142
+ effective_model_size = model_size or self.model_size
143
+ self._initialize_transcription(effective_model_size)
144
+
145
+ # Get language
146
+ language = language or self.config.transcription.default_language
147
+
148
+ # Track transcription start
149
+ self.repos.analytics.track_event(
150
+ "transcription_start",
151
+ event_data={
152
+ "video_path": str(video_path),
153
+ "media_file_id": media_file_id,
154
+ "language": language,
155
+ },
156
+ )
157
+
158
+ # Transcribe video
159
+ self.logger.info(f"Starting transcription of: {video_path}")
160
+ start_time = time.time()
161
+
162
+ if self.use_faster_whisper:
163
+ result = self._transcribe_with_faster_whisper(video_path, language)
164
+ else:
165
+ result = self._transcribe_with_openai_whisper(video_path, language)
166
+
167
+ processing_time = time.time() - start_time
168
+ result["processing_time"] = processing_time
169
+ result["model_used"] = f"whisper-{effective_model_size}"
170
+ result["language"] = language
171
+
172
+ self.logger.info(f"Transcription completed in {processing_time:.1f}s")
173
+
174
+ if result and "segments" in result:
175
+ # Store transcription in database
176
+ transcription_id = self.transcription_storage.store_transcription(
177
+ media_file_id, result
178
+ )
179
+
180
+ if transcription_id:
181
+ self.logger.info(
182
+ f"Transcription stored with ID: {transcription_id}"
183
+ )
184
+
185
+ # Track successful transcription
186
+ self.repos.analytics.track_event(
187
+ "transcription_completed",
188
+ event_data={
189
+ "video_path": str(video_path),
190
+ "media_file_id": media_file_id,
191
+ "transcription_id": transcription_id,
192
+ "segments_count": len(result["segments"]),
193
+ },
194
+ )
195
+
196
+ return True
197
+ else:
198
+ self.logger.error("Failed to store transcription in database")
199
+ return False
200
+ else:
201
+ self.logger.error("Transcription failed - no segments generated")
202
+ return False
203
+
204
+ except Exception as e:
205
+ self.logger.error(f"Transcription failed: {e}")
206
+
207
+ # Track transcription error
208
+ self.repos.analytics.track_event(
209
+ "transcription_error",
210
+ event_data={
211
+ "video_path": str(video_path),
212
+ "media_file_id": media_file_id,
213
+ "error": str(e),
214
+ },
215
+ )
216
+
217
+ return False
218
+
219
+ def _transcribe_with_faster_whisper(self, video_path: Path, language: str) -> Dict:
220
+ """Transcribe using faster-whisper (faster, less accurate)."""
221
+ result = self.model.transcribe(
222
+ str(video_path), language=language, word_timestamps=True
223
+ )
224
+
225
+ # faster-whisper returns (segments, info) tuple
226
+ segments, info = result
227
+
228
+ # Convert segments to our format
229
+ transcription_segments = []
230
+ for segment in segments:
231
+ transcription_segments.append(
232
+ {
233
+ "start": segment.start,
234
+ "end": segment.end,
235
+ "text": segment.text.strip(),
236
+ "confidence": getattr(segment, "avg_logprob", 0.0),
237
+ }
238
+ )
239
+
240
+ return {
241
+ "segments": transcription_segments,
242
+ "language": info.language,
243
+ "language_probability": info.language_probability,
244
+ "duration": info.duration,
245
+ }
246
+
247
+ def _transcribe_with_openai_whisper(self, video_path: Path, language: str) -> Dict:
248
+ """Transcribe using openai-whisper (more accurate, slower)."""
249
+ result = self.model.transcribe(
250
+ str(video_path), language=language, word_timestamps=True
251
+ )
252
+
253
+ # Convert to our format
254
+ transcription_segments = []
255
+ for segment in result["segments"]:
256
+ transcription_segments.append(
257
+ {
258
+ "start": segment["start"],
259
+ "end": segment["end"],
260
+ "text": segment["text"].strip(),
261
+ "confidence": segment.get("avg_logprob", 0.0),
262
+ }
263
+ )
264
+
265
+ return {
266
+ "segments": transcription_segments,
267
+ "language": result.get("language", language),
268
+ "language_probability": 1.0, # openai-whisper doesn't provide this
269
+ "duration": result.get("duration", 0.0),
270
+ }
271
+
272
+ def embed_subtitles(
273
+ self,
274
+ video_path: Union[str, Path],
275
+ output_path: Union[str, Path],
276
+ media_file_id: Optional[int] = None,
277
+ ) -> bool:
278
+ """
279
+ Embed subtitles into video file.
280
+
281
+ Args:
282
+ video_path: Path to input video file
283
+ output_path: Path for output video with subtitles
284
+ media_file_id: Optional media file ID for database tracking
285
+
286
+ Returns:
287
+ True if embedding successful, False otherwise
288
+ """
289
+ try:
290
+ video_path = Path(video_path)
291
+ output_path = Path(output_path)
292
+
293
+ if not video_path.exists():
294
+ self.logger.error(f"Video file not found: {video_path}")
295
+ return False
296
+
297
+ if media_file_id is None:
298
+ existing_media = self.repos.media.get_by_file_path(str(video_path))
299
+ if existing_media:
300
+ media_file_id = existing_media.id
301
+ else:
302
+ created_media = self.repos.media.create(
303
+ file_path=video_path,
304
+ file_name=video_path.name,
305
+ file_size=video_path.stat().st_size,
306
+ file_hash=get_file_hash(video_path),
307
+ media_type=MediaType.VIDEO,
308
+ mime_type=get_file_type(video_path),
309
+ title=video_path.stem,
310
+ source_platform="local",
311
+ source_id=None,
312
+ source_url=None,
313
+ )
314
+ media_file_id = created_media.id
315
+
316
+ # Initialize transcription service
317
+ self._initialize_transcription()
318
+
319
+ # Get transcription data
320
+ transcription_data = self._get_transcription_data(video_path, media_file_id)
321
+
322
+ if not transcription_data or "segments" not in transcription_data:
323
+ self.logger.error("No transcription data available for embedding")
324
+ return False
325
+
326
+ # Embed subtitles
327
+ success = self._embed_subtitles_into_video(
328
+ video_path, output_path, transcription_data
329
+ )
330
+
331
+ if success:
332
+ self.logger.info(
333
+ f"Successfully embedded subtitles into video: {output_path}"
334
+ )
335
+
336
+ # Track successful embedding
337
+ self.repos.analytics.track_event(
338
+ "subtitle_embedding_completed",
339
+ event_data={
340
+ "input_path": str(video_path),
341
+ "output_path": str(output_path),
342
+ "media_file_id": media_file_id,
343
+ },
344
+ )
345
+
346
+ return True
347
+ else:
348
+ self.logger.error("Failed to embed subtitles")
349
+ return False
350
+
351
+ except Exception as e:
352
+ self.logger.error(f"Subtitle embedding failed: {e}")
353
+
354
+ # Track embedding error
355
+ self.repos.analytics.track_event(
356
+ "subtitle_embedding_error",
357
+ event_data={
358
+ "input_path": str(video_path),
359
+ "output_path": str(output_path),
360
+ "media_file_id": media_file_id,
361
+ "error": str(e),
362
+ },
363
+ )
364
+
365
+ return False
366
+
367
+ def _get_transcription_data(
368
+ self, video_path: Path, media_file_id: Optional[int] = None
369
+ ) -> Optional[Dict]:
370
+ """Get transcription data from database or transcribe if not found."""
371
+ # Try to get from database first
372
+ if media_file_id and self.transcription_storage:
373
+ transcription = self.transcription_storage.get_transcription(media_file_id)
374
+ if transcription:
375
+ return {
376
+ "segments": transcription.get("segments", []),
377
+ "language": transcription.get("language", "en"),
378
+ "duration": transcription.get("duration", 0.0),
379
+ }
380
+
381
+ # If not in database, transcribe now
382
+ self.logger.info("Transcription not found in database, transcribing now...")
383
+ language = self.config.transcription.default_language
384
+
385
+ if self.use_faster_whisper:
386
+ result = self._transcribe_with_faster_whisper(video_path, language)
387
+ else:
388
+ result = self._transcribe_with_openai_whisper(video_path, language)
389
+
390
+ return result
391
+
392
+ def _embed_subtitles_into_video(
393
+ self, video_path: Path, output_path: Path, transcription_data: Dict[str, Any]
394
+ ) -> bool:
395
+ """Embed subtitles into video file."""
396
+ import ffmpeg
397
+
398
+ subtitle_file = video_path.parent / f"{video_path.stem}_temp.srt"
399
+ temp_output_path = None
400
+ try:
401
+ # Create subtitle file
402
+ self._create_srt_file(subtitle_file, transcription_data["segments"])
403
+
404
+ final_output_path = output_path
405
+ if output_path.resolve() == video_path.resolve():
406
+ temp_output_path = video_path.with_name(
407
+ f"{video_path.stem}_subs_tmp{video_path.suffix}"
408
+ )
409
+ final_output_path = temp_output_path
410
+
411
+ # Embed subtitles using ffmpeg
412
+ video_input = ffmpeg.input(str(video_path))
413
+ subtitle_input = ffmpeg.input(str(subtitle_file))
414
+ (
415
+ ffmpeg.output(
416
+ video_input,
417
+ subtitle_input,
418
+ str(final_output_path),
419
+ vcodec="copy",
420
+ acodec="copy",
421
+ scodec="mov_text",
422
+ **{"metadata:s:s:0": "language=eng"},
423
+ )
424
+ .overwrite_output()
425
+ .run(quiet=True)
426
+ )
427
+
428
+ if temp_output_path:
429
+ temp_output_path.replace(output_path)
430
+
431
+ return True
432
+ except Exception as e:
433
+ self.logger.error(f"Failed to embed subtitles: {e}")
434
+ return False
435
+ finally:
436
+ subtitle_file.unlink(missing_ok=True)
437
+ if temp_output_path and temp_output_path.exists():
438
+ temp_output_path.unlink()
439
+
440
+ def _create_srt_file(self, subtitle_file: Path, segments: list):
441
+ """Create SRT subtitle file from segments."""
442
+ with open(subtitle_file, "w", encoding="utf-8") as f:
443
+ for i, segment in enumerate(segments, 1):
444
+ start_time = self._format_timestamp(segment["start"])
445
+ end_time = self._format_timestamp(segment["end"])
446
+ text = segment["text"].strip()
447
+
448
+ f.write(f"{i}\n")
449
+ f.write(f"{start_time} --> {end_time}\n")
450
+ f.write(f"{text}\n\n")
451
+
452
+ def _format_timestamp(self, seconds: float) -> str:
453
+ """Format timestamp for SRT format."""
454
+ hours = int(seconds // 3600)
455
+ minutes = int((seconds % 3600) // 60)
456
+ secs = seconds % 60
457
+ return f"{hours:02d}:{minutes:02d}:{secs:06.3f}".replace(".", ",")
458
+
459
+ def get_transcription(self, media_file_id: int) -> Optional[Dict[str, Any]]:
460
+ """
461
+ Get transcription data for a media file.
462
+
463
+ Args:
464
+ media_file_id: Media file ID
465
+
466
+ Returns:
467
+ Transcription data or None if not found
468
+ """
469
+ try:
470
+ if self.transcription_storage is None:
471
+ self._initialize_transcription()
472
+
473
+ return self.transcription_storage.get_transcription(media_file_id)
474
+
475
+ except Exception as e:
476
+ self.logger.error(
477
+ f"Failed to get transcription for media file {media_file_id}: {e}"
478
+ )
479
+ return None
480
+
481
+ def get_available_models(self) -> List[str]:
482
+ """Get list of available Whisper models."""
483
+ return ["tiny", "base", "small", "medium", "large"]
484
+
485
+ def get_model_info(self) -> Dict:
486
+ """Get information about the current model."""
487
+ return {
488
+ "model_size": self.model_size,
489
+ "use_faster_whisper": self.use_faster_whisper,
490
+ "available_models": self.get_available_models(),
491
+ }