spatelier 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. analytics/__init__.py +1 -0
  2. analytics/reporter.py +497 -0
  3. cli/__init__.py +1 -0
  4. cli/app.py +147 -0
  5. cli/audio.py +129 -0
  6. cli/cli_analytics.py +320 -0
  7. cli/cli_utils.py +282 -0
  8. cli/error_handlers.py +122 -0
  9. cli/files.py +299 -0
  10. cli/update.py +325 -0
  11. cli/video.py +823 -0
  12. cli/worker.py +615 -0
  13. core/__init__.py +1 -0
  14. core/analytics_dashboard.py +368 -0
  15. core/base.py +303 -0
  16. core/base_service.py +69 -0
  17. core/config.py +345 -0
  18. core/database_service.py +116 -0
  19. core/decorators.py +263 -0
  20. core/error_handler.py +210 -0
  21. core/file_tracker.py +254 -0
  22. core/interactive_cli.py +366 -0
  23. core/interfaces.py +166 -0
  24. core/job_queue.py +437 -0
  25. core/logger.py +79 -0
  26. core/package_updater.py +469 -0
  27. core/progress.py +228 -0
  28. core/service_factory.py +295 -0
  29. core/streaming.py +299 -0
  30. core/worker.py +765 -0
  31. database/__init__.py +1 -0
  32. database/connection.py +265 -0
  33. database/metadata.py +516 -0
  34. database/models.py +288 -0
  35. database/repository.py +592 -0
  36. database/transcription_storage.py +219 -0
  37. modules/__init__.py +1 -0
  38. modules/audio/__init__.py +5 -0
  39. modules/audio/converter.py +197 -0
  40. modules/video/__init__.py +16 -0
  41. modules/video/converter.py +191 -0
  42. modules/video/fallback_extractor.py +334 -0
  43. modules/video/services/__init__.py +18 -0
  44. modules/video/services/audio_extraction_service.py +274 -0
  45. modules/video/services/download_service.py +852 -0
  46. modules/video/services/metadata_service.py +190 -0
  47. modules/video/services/playlist_service.py +445 -0
  48. modules/video/services/transcription_service.py +491 -0
  49. modules/video/transcription_service.py +385 -0
  50. modules/video/youtube_api.py +397 -0
  51. spatelier/__init__.py +33 -0
  52. spatelier-0.3.0.dist-info/METADATA +260 -0
  53. spatelier-0.3.0.dist-info/RECORD +59 -0
  54. spatelier-0.3.0.dist-info/WHEEL +5 -0
  55. spatelier-0.3.0.dist-info/entry_points.txt +2 -0
  56. spatelier-0.3.0.dist-info/licenses/LICENSE +21 -0
  57. spatelier-0.3.0.dist-info/top_level.txt +7 -0
  58. utils/__init__.py +1 -0
  59. utils/helpers.py +250 -0
database/repository.py ADDED
@@ -0,0 +1,592 @@
1
+ """
2
+ Database repository for data operations.
3
+
4
+ This module provides repository classes for database operations on both SQLite and MongoDB.
5
+ """
6
+
7
+ import json
8
+ from datetime import datetime, timedelta
9
+ from pathlib import Path
10
+ from typing import Any, Dict, List, Optional, Union
11
+
12
+ from sqlalchemy import and_, desc, func, or_
13
+ from sqlalchemy.orm import Session
14
+
15
+ from core.logger import get_logger
16
+ from database.models import (
17
+ AnalyticsEvent,
18
+ DownloadSource,
19
+ MediaFile,
20
+ MediaType,
21
+ Playlist,
22
+ PlaylistVideo,
23
+ ProcessingJob,
24
+ ProcessingStatus,
25
+ UserPreference,
26
+ )
27
+
28
+
29
+ class MediaFileRepository:
30
+ """Repository for media file operations."""
31
+
32
+ def __init__(self, session: Session, verbose: bool = False):
33
+ """Initialize media file repository."""
34
+ self.session = session
35
+ self.verbose = verbose
36
+ self.logger = get_logger("MediaFileRepository", verbose=verbose)
37
+
38
+ def create(self, file_path: Union[str, Path], **kwargs) -> MediaFile:
39
+ """
40
+ Create a new media file record.
41
+
42
+ Args:
43
+ file_path: Path to media file
44
+ **kwargs: Additional media file attributes
45
+
46
+ Returns:
47
+ Created MediaFile instance
48
+ """
49
+ file_path = Path(file_path)
50
+
51
+ # Extract file_name from kwargs if provided, otherwise use file_path.name
52
+ file_name = kwargs.pop("file_name", file_path.name)
53
+
54
+ # Remove file_size from kwargs if it exists to avoid conflicts
55
+ file_size = kwargs.pop(
56
+ "file_size", file_path.stat().st_size if file_path.exists() else 0
57
+ )
58
+
59
+ media_file = MediaFile(
60
+ file_path=str(file_path), file_name=file_name, file_size=file_size, **kwargs
61
+ )
62
+
63
+ self.session.add(media_file)
64
+ self.session.commit()
65
+ self.session.refresh(media_file)
66
+
67
+ self.logger.info(f"Created media file record: {file_path}")
68
+ return media_file
69
+
70
+ def get_by_id(self, file_id: int) -> Optional[MediaFile]:
71
+ """Get media file by ID."""
72
+ return self.session.query(MediaFile).filter(MediaFile.id == file_id).first()
73
+
74
+ def get_by_path(self, file_path: Union[str, Path]) -> Optional[MediaFile]:
75
+ """Get media file by path."""
76
+ return (
77
+ self.session.query(MediaFile)
78
+ .filter(MediaFile.file_path == str(file_path))
79
+ .first()
80
+ )
81
+
82
+ def get_by_hash(self, file_hash: str) -> Optional[MediaFile]:
83
+ """Get media file by hash."""
84
+ return (
85
+ self.session.query(MediaFile)
86
+ .filter(MediaFile.file_hash == file_hash)
87
+ .first()
88
+ )
89
+
90
+ def list_by_type(self, media_type: MediaType, limit: int = 100) -> List[MediaFile]:
91
+ """List media files by type."""
92
+ return (
93
+ self.session.query(MediaFile)
94
+ .filter(MediaFile.media_type == media_type)
95
+ .order_by(desc(MediaFile.created_at))
96
+ .limit(limit)
97
+ .all()
98
+ )
99
+
100
+ def search(
101
+ self, query: str, media_type: Optional[MediaType] = None
102
+ ) -> List[MediaFile]:
103
+ """Search media files by name or path."""
104
+ filters = [
105
+ or_(
106
+ MediaFile.file_name.ilike(f"%{query}%"),
107
+ MediaFile.file_path.ilike(f"%{query}%"),
108
+ )
109
+ ]
110
+
111
+ if media_type:
112
+ filters.append(MediaFile.media_type == media_type)
113
+
114
+ return (
115
+ self.session.query(MediaFile)
116
+ .filter(and_(*filters))
117
+ .order_by(desc(MediaFile.created_at))
118
+ .all()
119
+ )
120
+
121
+ def get_by_file_path(self, file_path: str) -> Optional[MediaFile]:
122
+ """Get media file by file path."""
123
+ return (
124
+ self.session.query(MediaFile)
125
+ .filter(MediaFile.file_path == file_path)
126
+ .first()
127
+ )
128
+
129
+ def delete(self, media_file_id: int) -> bool:
130
+ """Delete media file by ID."""
131
+ media_file = (
132
+ self.session.query(MediaFile).filter(MediaFile.id == media_file_id).first()
133
+ )
134
+ if media_file:
135
+ self.session.delete(media_file)
136
+ self.session.commit()
137
+ self.logger.info(f"Deleted media file {media_file_id}")
138
+ return True
139
+ return False
140
+
141
+ def get_by_source_id(self, source_id: str) -> List[MediaFile]:
142
+ """Get media files by source ID (e.g., YouTube video ID)."""
143
+ return (
144
+ self.session.query(MediaFile).filter(MediaFile.source_id == source_id).all()
145
+ )
146
+
147
+ def update(self, media_file_id: int, **kwargs) -> MediaFile:
148
+ """Update media file with new data."""
149
+ media_file = self.get_by_id(media_file_id)
150
+ if not media_file:
151
+ raise ValueError(f"Media file with ID {media_file_id} not found")
152
+
153
+ # Update fields that exist on the model
154
+ for key, value in kwargs.items():
155
+ if hasattr(media_file, key):
156
+ setattr(media_file, key, value)
157
+
158
+ media_file.updated_at = datetime.now()
159
+ self.session.commit()
160
+ return media_file
161
+
162
+ def get_statistics(self) -> Dict[str, Any]:
163
+ """Get media file statistics."""
164
+ stats = {}
165
+
166
+ # Total files by type
167
+ type_counts = (
168
+ self.session.query(MediaFile.media_type, func.count(MediaFile.id))
169
+ .group_by(MediaFile.media_type)
170
+ .all()
171
+ )
172
+ stats["files_by_type"] = dict(type_counts)
173
+
174
+ # Total file size by type
175
+ size_by_type = (
176
+ self.session.query(MediaFile.media_type, func.sum(MediaFile.file_size))
177
+ .group_by(MediaFile.media_type)
178
+ .all()
179
+ )
180
+ stats["size_by_type"] = dict(size_by_type)
181
+
182
+ # Recent files (last 30 days)
183
+ thirty_days_ago = datetime.now() - timedelta(days=30)
184
+ recent_count = (
185
+ self.session.query(MediaFile)
186
+ .filter(MediaFile.created_at >= thirty_days_ago)
187
+ .count()
188
+ )
189
+ stats["recent_files"] = recent_count
190
+
191
+ return stats
192
+
193
+
194
+ class ProcessingJobRepository:
195
+ """Repository for processing job operations."""
196
+
197
+ def __init__(self, session: Session, verbose: bool = False):
198
+ """Initialize processing job repository."""
199
+ self.session = session
200
+ self.verbose = verbose
201
+ self.logger = get_logger("ProcessingJobRepository", verbose=verbose)
202
+
203
+ def create(
204
+ self, media_file_id: int, job_type: str, input_path: str, **kwargs
205
+ ) -> ProcessingJob:
206
+ """
207
+ Create a new processing job.
208
+
209
+ Args:
210
+ media_file_id: ID of associated media file
211
+ job_type: Type of processing job
212
+ input_path: Path to input file
213
+ **kwargs: Additional job attributes
214
+
215
+ Returns:
216
+ Created ProcessingJob instance
217
+ """
218
+ job = ProcessingJob(
219
+ media_file_id=media_file_id,
220
+ job_type=job_type,
221
+ input_path=input_path,
222
+ status=ProcessingStatus.PENDING,
223
+ **kwargs,
224
+ )
225
+
226
+ self.session.add(job)
227
+ self.session.commit()
228
+ self.session.refresh(job)
229
+
230
+ self.logger.info(f"Created processing job: {job_type} for {input_path}")
231
+ return job
232
+
233
+ def update_status(
234
+ self,
235
+ job_id: int,
236
+ status: ProcessingStatus,
237
+ output_path: Optional[str] = None,
238
+ error_message: Optional[str] = None,
239
+ ) -> ProcessingJob:
240
+ """Update processing job status."""
241
+ job = (
242
+ self.session.query(ProcessingJob).filter(ProcessingJob.id == job_id).first()
243
+ )
244
+
245
+ if job:
246
+ job.status = status
247
+ if output_path:
248
+ job.output_path = output_path
249
+ if error_message:
250
+ job.error_message = error_message
251
+
252
+ if status == ProcessingStatus.PROCESSING:
253
+ # Only set started_at if not already set
254
+ if not job.started_at:
255
+ job.started_at = datetime.now()
256
+ elif status in [ProcessingStatus.COMPLETED, ProcessingStatus.FAILED]:
257
+ job.completed_at = datetime.now()
258
+ # Calculate duration using started_at if available, otherwise use created_at
259
+ if job.started_at:
260
+ job.duration_seconds = (
261
+ job.completed_at - job.started_at
262
+ ).total_seconds()
263
+ elif job.created_at:
264
+ # Fallback to created_at if started_at was never set
265
+ job.duration_seconds = (
266
+ job.completed_at - job.created_at
267
+ ).total_seconds()
268
+
269
+ self.session.commit()
270
+ self.logger.info(f"Updated job {job_id} status to {status}")
271
+
272
+ return job
273
+
274
+ def get_by_id(self, job_id: int) -> Optional[ProcessingJob]:
275
+ """Get processing job by ID."""
276
+ return (
277
+ self.session.query(ProcessingJob).filter(ProcessingJob.id == job_id).first()
278
+ )
279
+
280
+ def update(self, job_id: int, **kwargs) -> Optional[ProcessingJob]:
281
+ """Update processing job with given fields."""
282
+ job = (
283
+ self.session.query(ProcessingJob).filter(ProcessingJob.id == job_id).first()
284
+ )
285
+
286
+ if job:
287
+ for key, value in kwargs.items():
288
+ if hasattr(job, key):
289
+ setattr(job, key, value)
290
+
291
+ self.session.commit()
292
+ self.logger.info(f"Updated job {job_id} with fields: {list(kwargs.keys())}")
293
+
294
+ return job
295
+
296
+ def get_by_status(self, status: ProcessingStatus) -> List[ProcessingJob]:
297
+ """Get jobs by status."""
298
+ return (
299
+ self.session.query(ProcessingJob)
300
+ .filter(ProcessingJob.status == status)
301
+ .order_by(desc(ProcessingJob.created_at))
302
+ .all()
303
+ )
304
+
305
+ def get_job_statistics(self) -> Dict[str, Any]:
306
+ """Get processing job statistics."""
307
+ stats = {}
308
+
309
+ # Jobs by status
310
+ status_counts = (
311
+ self.session.query(ProcessingJob.status, func.count(ProcessingJob.id))
312
+ .group_by(ProcessingJob.status)
313
+ .all()
314
+ )
315
+ stats["jobs_by_status"] = dict(status_counts)
316
+
317
+ # Jobs by type
318
+ type_counts = (
319
+ self.session.query(ProcessingJob.job_type, func.count(ProcessingJob.id))
320
+ .group_by(ProcessingJob.job_type)
321
+ .all()
322
+ )
323
+ stats["jobs_by_type"] = dict(type_counts)
324
+
325
+ # Average processing time
326
+ avg_duration = (
327
+ self.session.query(func.avg(ProcessingJob.duration_seconds))
328
+ .filter(ProcessingJob.duration_seconds.isnot(None))
329
+ .scalar()
330
+ )
331
+ stats["avg_processing_time"] = avg_duration
332
+
333
+ return stats
334
+
335
+
336
+ class AnalyticsRepository:
337
+ """Repository for analytics operations."""
338
+
339
+ def __init__(self, session: Session, verbose: bool = False):
340
+ """Initialize analytics repository."""
341
+ self.session = session
342
+ self.verbose = verbose
343
+ self.logger = get_logger("AnalyticsRepository", verbose=verbose)
344
+
345
+ def track_event(
346
+ self,
347
+ event_type: str,
348
+ media_file_id: Optional[int] = None,
349
+ processing_job_id: Optional[int] = None,
350
+ event_data: Optional[Dict[str, Any]] = None,
351
+ user_id: Optional[str] = None,
352
+ session_id: Optional[str] = None,
353
+ ) -> AnalyticsEvent:
354
+ """
355
+ Track an analytics event.
356
+
357
+ Args:
358
+ event_type: Type of event
359
+ media_file_id: Associated media file ID
360
+ processing_job_id: Associated processing job ID
361
+ event_data: Additional event data
362
+ user_id: User ID
363
+ session_id: Session ID
364
+
365
+ Returns:
366
+ Created AnalyticsEvent instance
367
+ """
368
+ event = AnalyticsEvent(
369
+ event_type=event_type,
370
+ media_file_id=media_file_id,
371
+ processing_job_id=processing_job_id,
372
+ event_data=json.dumps(event_data) if event_data else None,
373
+ user_id=user_id,
374
+ session_id=session_id,
375
+ )
376
+
377
+ self.session.add(event)
378
+ self.session.commit()
379
+ self.session.refresh(event)
380
+
381
+ self.logger.debug(f"Tracked event: {event_type}")
382
+ return event
383
+
384
+ def get_events_by_type(
385
+ self, event_type: str, days: int = 30
386
+ ) -> List[AnalyticsEvent]:
387
+ """Get events by type within specified days."""
388
+ since = datetime.now() - timedelta(days=days)
389
+
390
+ return (
391
+ self.session.query(AnalyticsEvent)
392
+ .filter(
393
+ and_(
394
+ AnalyticsEvent.event_type == event_type,
395
+ AnalyticsEvent.timestamp >= since,
396
+ )
397
+ )
398
+ .order_by(desc(AnalyticsEvent.timestamp))
399
+ .all()
400
+ )
401
+
402
+ def get_usage_statistics(self, days: int = 30) -> Dict[str, Any]:
403
+ """Get usage statistics for specified days."""
404
+ since = datetime.now() - timedelta(days=days)
405
+
406
+ stats = {}
407
+
408
+ # Events by type
409
+ event_counts = (
410
+ self.session.query(AnalyticsEvent.event_type, func.count(AnalyticsEvent.id))
411
+ .filter(AnalyticsEvent.timestamp >= since)
412
+ .group_by(AnalyticsEvent.event_type)
413
+ .all()
414
+ )
415
+ stats["events_by_type"] = dict(event_counts)
416
+
417
+ # Daily activity
418
+ daily_activity = (
419
+ self.session.query(
420
+ func.date(AnalyticsEvent.timestamp).label("date"),
421
+ func.count(AnalyticsEvent.id).label("count"),
422
+ )
423
+ .filter(AnalyticsEvent.timestamp >= since)
424
+ .group_by(func.date(AnalyticsEvent.timestamp))
425
+ .order_by("date")
426
+ .all()
427
+ )
428
+ stats["daily_activity"] = [
429
+ {"date": str(row.date), "count": row.count} for row in daily_activity
430
+ ]
431
+
432
+ return stats
433
+
434
+
435
+ class PlaylistRepository:
436
+ """Repository for playlist operations."""
437
+
438
+ def __init__(self, session: Session, verbose: bool = False):
439
+ """Initialize playlist repository."""
440
+ self.session = session
441
+ self.verbose = verbose
442
+ self.logger = get_logger("PlaylistRepository", verbose=verbose)
443
+
444
+ def create(self, **kwargs) -> Playlist:
445
+ """Create a new playlist."""
446
+ playlist = Playlist(**kwargs)
447
+ self.session.add(playlist)
448
+ self.session.commit()
449
+ self.session.refresh(playlist)
450
+ self.logger.info(f"Created playlist: {playlist.id}")
451
+ return playlist
452
+
453
+ def get_by_id(self, playlist_id: int) -> Optional[Playlist]:
454
+ """Get playlist by ID."""
455
+ return self.session.query(Playlist).filter(Playlist.id == playlist_id).first()
456
+
457
+ def get_by_playlist_id(self, playlist_id: str) -> Optional[Playlist]:
458
+ """Get playlist by external playlist ID (e.g., YouTube playlist ID)."""
459
+ return (
460
+ self.session.query(Playlist)
461
+ .filter(Playlist.playlist_id == playlist_id)
462
+ .first()
463
+ )
464
+
465
+ def get_all(self, limit: int = 100, offset: int = 0) -> List[Playlist]:
466
+ """Get all playlists with pagination."""
467
+ return (
468
+ self.session.query(Playlist)
469
+ .order_by(desc(Playlist.created_at))
470
+ .offset(offset)
471
+ .limit(limit)
472
+ .all()
473
+ )
474
+
475
+ def update(self, playlist_id: int, **kwargs) -> Optional[Playlist]:
476
+ """Update playlist."""
477
+ playlist = self.get_by_id(playlist_id)
478
+ if not playlist:
479
+ return None
480
+
481
+ for key, value in kwargs.items():
482
+ if hasattr(playlist, key):
483
+ setattr(playlist, key, value)
484
+
485
+ playlist.updated_at = datetime.now()
486
+ self.session.commit()
487
+ return playlist
488
+
489
+ def delete(self, playlist_id: int) -> bool:
490
+ """Delete playlist and all associated playlist_videos."""
491
+ playlist = self.get_by_id(playlist_id)
492
+ if not playlist:
493
+ return False
494
+
495
+ self.session.delete(playlist)
496
+ self.session.commit()
497
+ self.logger.info(f"Deleted playlist: {playlist_id}")
498
+ return True
499
+
500
+
501
+ class PlaylistVideoRepository:
502
+ """Repository for playlist-video relationship operations."""
503
+
504
+ def __init__(self, session: Session, verbose: bool = False):
505
+ """Initialize playlist video repository."""
506
+ self.session = session
507
+ self.verbose = verbose
508
+ self.logger = get_logger("PlaylistVideoRepository", verbose=verbose)
509
+
510
+ def add_video_to_playlist(
511
+ self,
512
+ playlist_id: int,
513
+ media_file_id: int,
514
+ position: Optional[int] = None,
515
+ video_title: Optional[str] = None,
516
+ ) -> PlaylistVideo:
517
+ """Add video to playlist."""
518
+ playlist_video = PlaylistVideo(
519
+ playlist_id=playlist_id,
520
+ media_file_id=media_file_id,
521
+ position=position,
522
+ video_title=video_title,
523
+ )
524
+ self.session.add(playlist_video)
525
+ self.session.commit()
526
+ self.session.refresh(playlist_video)
527
+ self.logger.info(f"Added video {media_file_id} to playlist {playlist_id}")
528
+ return playlist_video
529
+
530
+ def remove_video_from_playlist(self, playlist_id: int, media_file_id: int) -> bool:
531
+ """Remove video from playlist."""
532
+ playlist_video = (
533
+ self.session.query(PlaylistVideo)
534
+ .filter(
535
+ and_(
536
+ PlaylistVideo.playlist_id == playlist_id,
537
+ PlaylistVideo.media_file_id == media_file_id,
538
+ )
539
+ )
540
+ .first()
541
+ )
542
+
543
+ if not playlist_video:
544
+ return False
545
+
546
+ self.session.delete(playlist_video)
547
+ self.session.commit()
548
+ self.logger.info(f"Removed video {media_file_id} from playlist {playlist_id}")
549
+ return True
550
+
551
+ def get_playlist_videos(self, playlist_id: int) -> List[PlaylistVideo]:
552
+ """Get all videos in a playlist."""
553
+ return (
554
+ self.session.query(PlaylistVideo)
555
+ .filter(PlaylistVideo.playlist_id == playlist_id)
556
+ .order_by(PlaylistVideo.position, PlaylistVideo.added_at)
557
+ .all()
558
+ )
559
+
560
+ def get_by_playlist_id(self, playlist_id: int) -> List[PlaylistVideo]:
561
+ """Get all videos in a playlist by playlist ID."""
562
+ return self.get_playlist_videos(playlist_id)
563
+
564
+ def get_video_playlists(self, media_file_id: int) -> List[PlaylistVideo]:
565
+ """Get all playlists containing a video."""
566
+ return (
567
+ self.session.query(PlaylistVideo)
568
+ .filter(PlaylistVideo.media_file_id == media_file_id)
569
+ .all()
570
+ )
571
+
572
+ def update_video_position(
573
+ self, playlist_id: int, media_file_id: int, position: int
574
+ ) -> bool:
575
+ """Update video position in playlist."""
576
+ playlist_video = (
577
+ self.session.query(PlaylistVideo)
578
+ .filter(
579
+ and_(
580
+ PlaylistVideo.playlist_id == playlist_id,
581
+ PlaylistVideo.media_file_id == media_file_id,
582
+ )
583
+ )
584
+ .first()
585
+ )
586
+
587
+ if not playlist_video:
588
+ return False
589
+
590
+ playlist_video.position = position
591
+ self.session.commit()
592
+ return True