spatelier 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. analytics/__init__.py +1 -0
  2. analytics/reporter.py +497 -0
  3. cli/__init__.py +1 -0
  4. cli/app.py +147 -0
  5. cli/audio.py +129 -0
  6. cli/cli_analytics.py +320 -0
  7. cli/cli_utils.py +282 -0
  8. cli/error_handlers.py +122 -0
  9. cli/files.py +299 -0
  10. cli/update.py +325 -0
  11. cli/video.py +823 -0
  12. cli/worker.py +615 -0
  13. core/__init__.py +1 -0
  14. core/analytics_dashboard.py +368 -0
  15. core/base.py +303 -0
  16. core/base_service.py +69 -0
  17. core/config.py +345 -0
  18. core/database_service.py +116 -0
  19. core/decorators.py +263 -0
  20. core/error_handler.py +210 -0
  21. core/file_tracker.py +254 -0
  22. core/interactive_cli.py +366 -0
  23. core/interfaces.py +166 -0
  24. core/job_queue.py +437 -0
  25. core/logger.py +79 -0
  26. core/package_updater.py +469 -0
  27. core/progress.py +228 -0
  28. core/service_factory.py +295 -0
  29. core/streaming.py +299 -0
  30. core/worker.py +765 -0
  31. database/__init__.py +1 -0
  32. database/connection.py +265 -0
  33. database/metadata.py +516 -0
  34. database/models.py +288 -0
  35. database/repository.py +592 -0
  36. database/transcription_storage.py +219 -0
  37. modules/__init__.py +1 -0
  38. modules/audio/__init__.py +5 -0
  39. modules/audio/converter.py +197 -0
  40. modules/video/__init__.py +16 -0
  41. modules/video/converter.py +191 -0
  42. modules/video/fallback_extractor.py +334 -0
  43. modules/video/services/__init__.py +18 -0
  44. modules/video/services/audio_extraction_service.py +274 -0
  45. modules/video/services/download_service.py +852 -0
  46. modules/video/services/metadata_service.py +190 -0
  47. modules/video/services/playlist_service.py +445 -0
  48. modules/video/services/transcription_service.py +491 -0
  49. modules/video/transcription_service.py +385 -0
  50. modules/video/youtube_api.py +397 -0
  51. spatelier/__init__.py +33 -0
  52. spatelier-0.3.0.dist-info/METADATA +260 -0
  53. spatelier-0.3.0.dist-info/RECORD +59 -0
  54. spatelier-0.3.0.dist-info/WHEEL +5 -0
  55. spatelier-0.3.0.dist-info/entry_points.txt +2 -0
  56. spatelier-0.3.0.dist-info/licenses/LICENSE +21 -0
  57. spatelier-0.3.0.dist-info/top_level.txt +7 -0
  58. utils/__init__.py +1 -0
  59. utils/helpers.py +250 -0
@@ -0,0 +1,385 @@
1
+ """
2
+ Transcription service for video files.
3
+
4
+ This module provides automatic transcription capabilities using OpenAI Whisper.
5
+ Supports multiple models for speed vs accuracy tradeoffs.
6
+ """
7
+
8
+ import json
9
+ import time
10
+ from pathlib import Path
11
+ from typing import Dict, List, Optional, Tuple, Union
12
+
13
+ from loguru import logger
14
+
15
+ from core.config import Config, TranscriptionConfig
16
+
17
+ try:
18
+ import whisper
19
+ from faster_whisper import WhisperModel
20
+
21
+ WHISPER_AVAILABLE = True
22
+ except ImportError:
23
+ WHISPER_AVAILABLE = False
24
+
25
+ # Global model cache to avoid reloading models
26
+ _MODEL_CACHE = {}
27
+
28
+
29
+ class TranscriptionService:
30
+ """
31
+ Video transcription service using OpenAI Whisper.
32
+
33
+ Supports both openai-whisper and faster-whisper for different speed/accuracy needs.
34
+ """
35
+
36
+ def __init__(
37
+ self, config: Config, transcription_config: Optional[TranscriptionConfig] = None
38
+ ):
39
+ """
40
+ Initialize the transcription service.
41
+
42
+ Args:
43
+ config: Main configuration instance
44
+ transcription_config: Transcription-specific configuration (optional)
45
+ """
46
+ self.config = config
47
+ self.transcription_config = transcription_config or config.transcription
48
+
49
+ self.model_size = self.transcription_config.default_model
50
+ self.use_faster_whisper = self.transcription_config.use_faster_whisper
51
+ self.device = self.transcription_config.device
52
+ self.compute_type = self.transcription_config.compute_type
53
+ self.model = None
54
+ self._load_model()
55
+
56
+ def _load_model(self):
57
+ """Load the Whisper model with caching."""
58
+ if not WHISPER_AVAILABLE:
59
+ raise ImportError(
60
+ "Whisper dependencies not available. Install with: pip install spatelier[transcription]"
61
+ )
62
+
63
+ try:
64
+ # Create cache key based on model configuration
65
+ cache_key = f"{self.model_size}_{self.device}_{self.compute_type}_{self.use_faster_whisper}"
66
+
67
+ # Check if model is already cached
68
+ if cache_key in _MODEL_CACHE:
69
+ logger.info(f"Using cached Whisper model: {self.model_size}")
70
+ self.model = _MODEL_CACHE[cache_key]
71
+ return
72
+
73
+ # Load new model
74
+ if self.use_faster_whisper:
75
+ logger.info(f"Loading faster-whisper model: {self.model_size}")
76
+ self.model = WhisperModel(
77
+ self.model_size, device=self.device, compute_type=self.compute_type
78
+ )
79
+ else:
80
+ logger.info(f"Loading openai-whisper model: {self.model_size}")
81
+ self.model = whisper.load_model(self.model_size)
82
+
83
+ # Cache the model
84
+ _MODEL_CACHE[cache_key] = self.model
85
+ logger.info("Whisper model loaded and cached successfully")
86
+
87
+ except Exception as e:
88
+ logger.error(f"Failed to load Whisper model: {e}")
89
+ raise
90
+
91
+ def transcribe_video(self, video_path: Path, language: str = "en") -> Dict:
92
+ """
93
+ Transcribe a video file.
94
+
95
+ Args:
96
+ video_path: Path to the video file
97
+ language: Language code (e.g., 'en', 'es', 'fr')
98
+
99
+ Returns:
100
+ Dictionary with transcription results
101
+ """
102
+ if not video_path.exists():
103
+ raise FileNotFoundError(f"Video file not found: {video_path}")
104
+
105
+ logger.info(f"Starting transcription of: {video_path}")
106
+ start_time = time.time()
107
+
108
+ try:
109
+ if self.use_faster_whisper:
110
+ result = self._transcribe_with_faster_whisper(video_path, language)
111
+ else:
112
+ result = self._transcribe_with_openai_whisper(video_path, language)
113
+
114
+ processing_time = time.time() - start_time
115
+ result["processing_time"] = processing_time
116
+ result["model_used"] = f"whisper-{self.model_size}"
117
+ result["language"] = language
118
+
119
+ logger.info(f"Transcription completed in {processing_time:.1f}s")
120
+ return result
121
+
122
+ except Exception as e:
123
+ logger.error(f"Transcription failed: {e}")
124
+ raise
125
+
126
+ def _transcribe_with_faster_whisper(self, video_path: Path, language: str) -> Dict:
127
+ """Transcribe using faster-whisper (faster, less accurate)."""
128
+ result = self.model.transcribe(
129
+ str(video_path), language=language, word_timestamps=True
130
+ )
131
+
132
+ # faster-whisper returns (segments, info) tuple
133
+ segments, info = result
134
+
135
+ # Convert segments to our format
136
+ transcription_segments = []
137
+ for segment in segments:
138
+ transcription_segments.append(
139
+ {
140
+ "start": segment.start,
141
+ "end": segment.end,
142
+ "text": segment.text.strip(),
143
+ "confidence": getattr(
144
+ segment, "avg_logprob", 0.0
145
+ ), # Convert logprob to confidence
146
+ }
147
+ )
148
+
149
+ return {
150
+ "segments": transcription_segments,
151
+ "language": info.language,
152
+ "language_probability": info.language_probability,
153
+ "duration": info.duration,
154
+ }
155
+
156
+ def _transcribe_with_openai_whisper(self, video_path: Path, language: str) -> Dict:
157
+ """Transcribe using openai-whisper (more accurate, slower)."""
158
+ result = self.model.transcribe(
159
+ str(video_path), language=language, word_timestamps=True
160
+ )
161
+
162
+ # Convert to our format
163
+ transcription_segments = []
164
+ for segment in result["segments"]:
165
+ transcription_segments.append(
166
+ {
167
+ "start": segment["start"],
168
+ "end": segment["end"],
169
+ "text": segment["text"].strip(),
170
+ "confidence": segment.get("avg_logprob", 0.0),
171
+ }
172
+ )
173
+
174
+ return {
175
+ "segments": transcription_segments,
176
+ "language": result.get("language", language),
177
+ "language_probability": 1.0, # openai-whisper doesn't provide this
178
+ "duration": result.get("duration", 0.0),
179
+ }
180
+
181
+ def get_available_models(self) -> List[str]:
182
+ """Get list of available Whisper models."""
183
+ return ["tiny", "base", "small", "medium", "large"]
184
+
185
+ def get_model_info(self) -> Dict:
186
+ """Get information about the current model."""
187
+ return {
188
+ "model_size": self.model_size,
189
+ "use_faster_whisper": self.use_faster_whisper,
190
+ "available_models": self.get_available_models(),
191
+ }
192
+
193
+
194
+ class TranscriptionStorage:
195
+ """
196
+ Handles storage and retrieval of transcriptions in MongoDB.
197
+ """
198
+
199
+ def __init__(self, mongo_db):
200
+ """
201
+ Initialize transcription storage.
202
+
203
+ Args:
204
+ mongo_db: MongoDB database instance
205
+ """
206
+ self.db = mongo_db
207
+ self.collection = self.db.transcriptions
208
+
209
+ def store_transcription(
210
+ self, video_id: Union[str, int], transcription_data: Dict
211
+ ) -> str:
212
+ """
213
+ Store transcription data in MongoDB.
214
+
215
+ Args:
216
+ video_id: ID of the video file (will be converted to int for consistency)
217
+ transcription_data: Transcription results from Whisper
218
+
219
+ Returns:
220
+ MongoDB document ID
221
+ """
222
+ # Ensure video_id is always stored as an integer for consistency
223
+ video_id_int = int(video_id) if isinstance(video_id, (str, int)) else video_id
224
+
225
+ document = {
226
+ "video_id": video_id_int,
227
+ "created_at": time.time(),
228
+ "segments": transcription_data["segments"],
229
+ "language": transcription_data["language"],
230
+ "language_probability": transcription_data.get("language_probability", 1.0),
231
+ "duration": transcription_data.get("duration", 0.0),
232
+ "model_used": transcription_data.get("model_used", "unknown"),
233
+ "processing_time": transcription_data.get("processing_time", 0.0),
234
+ "total_segments": len(transcription_data["segments"]),
235
+ "full_text": " ".join(
236
+ [seg["text"] for seg in transcription_data["segments"]]
237
+ ),
238
+ }
239
+
240
+ result = self.collection.insert_one(document)
241
+ logger.info(f"Stored transcription for video {video_id}: {result.inserted_id}")
242
+ return str(result.inserted_id)
243
+
244
+ def get_transcription(self, video_id: Union[str, int]) -> Optional[Dict]:
245
+ """Get transcription for a video."""
246
+ # Ensure consistent integer lookup
247
+ video_id_int = int(video_id) if isinstance(video_id, (str, int)) else video_id
248
+ return self.collection.find_one({"video_id": video_id_int})
249
+
250
+ def search_transcriptions(self, query: str, limit: int = 10) -> List[Dict]:
251
+ """
252
+ Search transcriptions by text content.
253
+
254
+ Args:
255
+ query: Search query
256
+ limit: Maximum number of results
257
+
258
+ Returns:
259
+ List of matching transcriptions
260
+ """
261
+ # Create text index if it doesn't exist
262
+ try:
263
+ self.collection.create_index([("full_text", "text")])
264
+ except Exception:
265
+ pass # Index might already exist
266
+
267
+ # Search using MongoDB text search
268
+ results = (
269
+ self.collection.find(
270
+ {"$text": {"$search": query}}, {"score": {"$meta": "textScore"}}
271
+ )
272
+ .sort("score", -1)
273
+ .limit(limit)
274
+ )
275
+
276
+ return list(results)
277
+
278
+ def generate_srt_subtitle(
279
+ self, transcription_data: Dict, output_path: Path
280
+ ) -> bool:
281
+ """
282
+ Generate SRT subtitle file from transcription data.
283
+
284
+ Args:
285
+ transcription_data: Transcription data with segments
286
+ output_path: Path to save SRT file
287
+
288
+ Returns:
289
+ True if successful, False otherwise
290
+ """
291
+ try:
292
+ segments = transcription_data.get("segments", [])
293
+ if not segments:
294
+ logger.warning("No segments found in transcription data")
295
+ return False
296
+
297
+ with open(output_path, "w", encoding="utf-8") as f:
298
+ for i, segment in enumerate(segments, 1):
299
+ start_time = self._format_srt_time(segment["start"])
300
+ end_time = self._format_srt_time(segment["end"])
301
+ text = segment["text"].strip()
302
+
303
+ f.write(f"{i}\n")
304
+ f.write(f"{start_time} --> {end_time}\n")
305
+ f.write(f"{text}\n\n")
306
+
307
+ logger.info(f"Generated SRT subtitle file: {output_path}")
308
+ return True
309
+
310
+ except Exception as e:
311
+ logger.error(f"Failed to generate SRT subtitle: {e}")
312
+ return False
313
+
314
+ def generate_vtt_subtitle(
315
+ self, transcription_data: Dict, output_path: Path
316
+ ) -> bool:
317
+ """
318
+ Generate VTT subtitle file from transcription data.
319
+
320
+ Args:
321
+ transcription_data: Transcription data with segments
322
+ output_path: Path to save VTT file
323
+
324
+ Returns:
325
+ True if successful, False otherwise
326
+ """
327
+ try:
328
+ segments = transcription_data.get("segments", [])
329
+ if not segments:
330
+ logger.warning("No segments found in transcription data")
331
+ return False
332
+
333
+ with open(output_path, "w", encoding="utf-8") as f:
334
+ f.write("WEBVTT\n\n")
335
+
336
+ for segment in segments:
337
+ start_time = self._format_vtt_time(segment["start"])
338
+ end_time = self._format_vtt_time(segment["end"])
339
+ text = segment["text"].strip()
340
+
341
+ f.write(f"{start_time} --> {end_time}\n")
342
+ f.write(f"{text}\n\n")
343
+
344
+ logger.info(f"Generated VTT subtitle file: {output_path}")
345
+ return True
346
+
347
+ except Exception as e:
348
+ logger.error(f"Failed to generate VTT subtitle: {e}")
349
+ return False
350
+
351
+ def _format_srt_time(self, seconds: float) -> str:
352
+ """Format time for SRT format (HH:MM:SS,mmm)."""
353
+ hours = int(seconds // 3600)
354
+ minutes = int((seconds % 3600) // 60)
355
+ secs = int(seconds % 60)
356
+ millisecs = int((seconds % 1) * 1000)
357
+ return f"{hours:02d}:{minutes:02d}:{secs:02d},{millisecs:03d}"
358
+
359
+ def _format_vtt_time(self, seconds: float) -> str:
360
+ """Format time for VTT format (HH:MM:SS.mmm)."""
361
+ hours = int(seconds // 3600)
362
+ minutes = int((seconds % 3600) // 60)
363
+ secs = int(seconds % 60)
364
+ millisecs = int((seconds % 1) * 1000)
365
+ return f"{hours:02d}:{minutes:02d}:{secs:02d}.{millisecs:03d}"
366
+
367
+ def get_analytics(self) -> Dict:
368
+ """Get analytics about stored transcriptions."""
369
+ pipeline = [
370
+ {
371
+ "$group": {
372
+ "_id": None,
373
+ "total_transcriptions": {"$sum": 1},
374
+ "total_duration": {"$sum": "$duration"},
375
+ "avg_processing_time": {"$avg": "$processing_time"},
376
+ "languages": {"$addToSet": "$language"},
377
+ "models_used": {"$addToSet": "$model_used"},
378
+ }
379
+ }
380
+ ]
381
+
382
+ result = list(self.collection.aggregate(pipeline))
383
+ if result:
384
+ return result[0]
385
+ return {}