spatelier 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. analytics/__init__.py +1 -0
  2. analytics/reporter.py +497 -0
  3. cli/__init__.py +1 -0
  4. cli/app.py +147 -0
  5. cli/audio.py +129 -0
  6. cli/cli_analytics.py +320 -0
  7. cli/cli_utils.py +282 -0
  8. cli/error_handlers.py +122 -0
  9. cli/files.py +299 -0
  10. cli/update.py +325 -0
  11. cli/video.py +823 -0
  12. cli/worker.py +615 -0
  13. core/__init__.py +1 -0
  14. core/analytics_dashboard.py +368 -0
  15. core/base.py +303 -0
  16. core/base_service.py +69 -0
  17. core/config.py +345 -0
  18. core/database_service.py +116 -0
  19. core/decorators.py +263 -0
  20. core/error_handler.py +210 -0
  21. core/file_tracker.py +254 -0
  22. core/interactive_cli.py +366 -0
  23. core/interfaces.py +166 -0
  24. core/job_queue.py +437 -0
  25. core/logger.py +79 -0
  26. core/package_updater.py +469 -0
  27. core/progress.py +228 -0
  28. core/service_factory.py +295 -0
  29. core/streaming.py +299 -0
  30. core/worker.py +765 -0
  31. database/__init__.py +1 -0
  32. database/connection.py +265 -0
  33. database/metadata.py +516 -0
  34. database/models.py +288 -0
  35. database/repository.py +592 -0
  36. database/transcription_storage.py +219 -0
  37. modules/__init__.py +1 -0
  38. modules/audio/__init__.py +5 -0
  39. modules/audio/converter.py +197 -0
  40. modules/video/__init__.py +16 -0
  41. modules/video/converter.py +191 -0
  42. modules/video/fallback_extractor.py +334 -0
  43. modules/video/services/__init__.py +18 -0
  44. modules/video/services/audio_extraction_service.py +274 -0
  45. modules/video/services/download_service.py +852 -0
  46. modules/video/services/metadata_service.py +190 -0
  47. modules/video/services/playlist_service.py +445 -0
  48. modules/video/services/transcription_service.py +491 -0
  49. modules/video/transcription_service.py +385 -0
  50. modules/video/youtube_api.py +397 -0
  51. spatelier/__init__.py +33 -0
  52. spatelier-0.3.0.dist-info/METADATA +260 -0
  53. spatelier-0.3.0.dist-info/RECORD +59 -0
  54. spatelier-0.3.0.dist-info/WHEEL +5 -0
  55. spatelier-0.3.0.dist-info/entry_points.txt +2 -0
  56. spatelier-0.3.0.dist-info/licenses/LICENSE +21 -0
  57. spatelier-0.3.0.dist-info/top_level.txt +7 -0
  58. utils/__init__.py +1 -0
  59. utils/helpers.py +250 -0
core/base_service.py ADDED
@@ -0,0 +1,69 @@
1
+ """
2
+ Base service class for Spatelier services.
3
+
4
+ This module provides a common base class that eliminates duplication
5
+ in service initialization patterns across all service classes.
6
+ """
7
+
8
+ from abc import ABC
9
+ from typing import Any, Optional
10
+
11
+ from core.config import Config
12
+ from core.database_service import DatabaseServiceFactory
13
+ from core.logger import get_logger
14
+
15
+
16
+ class BaseService(ABC):
17
+ """
18
+ Base class for all Spatelier services.
19
+
20
+ Provides common initialization patterns and database service management
21
+ to eliminate code duplication across service classes.
22
+ """
23
+
24
+ def __init__(
25
+ self, config: Config, verbose: bool = False, db_service: Optional[Any] = None
26
+ ):
27
+ """
28
+ Initialize the base service.
29
+
30
+ Args:
31
+ config: Configuration instance
32
+ verbose: Enable verbose logging
33
+ db_service: Optional database service instance
34
+ """
35
+ self.config = config
36
+ self.verbose = verbose
37
+ self.logger = get_logger(self.__class__.__name__, verbose=verbose)
38
+
39
+ # Initialize database service
40
+ self._init_database_service(db_service)
41
+
42
+ def _init_database_service(self, db_service: Optional[Any] = None):
43
+ """
44
+ Initialize database service and repositories.
45
+
46
+ Args:
47
+ db_service: Optional database service instance
48
+ """
49
+ if db_service:
50
+ self.db_factory = db_service
51
+ self.repos = self.db_factory.initialize()
52
+ self.db_manager = getattr(self.db_factory, "get_db_manager", lambda: None)()
53
+ else:
54
+ # Fallback for backward compatibility
55
+ self.db_factory = DatabaseServiceFactory(self.config, verbose=self.verbose)
56
+ self.repos = self.db_factory.initialize()
57
+ self.db_manager = self.db_factory.get_db_manager()
58
+
59
+ def get_database_service(self):
60
+ """Get the database service factory."""
61
+ return self.db_factory
62
+
63
+ def get_repositories(self):
64
+ """Get the repository container."""
65
+ return self.repos
66
+
67
+ def get_db_manager(self):
68
+ """Get the database manager."""
69
+ return self.db_manager
core/config.py ADDED
@@ -0,0 +1,345 @@
1
+ """
2
+ Configuration management for Spatelier.
3
+
4
+ This module handles all configuration loading, validation, and management.
5
+ Simplified to reduce unnecessary nesting while maintaining essential structure.
6
+ """
7
+
8
+ import os
9
+ from pathlib import Path
10
+ from typing import Any, Dict, List, Optional, Union
11
+
12
+ import yaml
13
+ from pydantic import BaseModel, Field, field_validator, model_validator
14
+
15
+
16
+ def _find_repo_root() -> Optional[Path]:
17
+ """Find repository root by looking for pyproject.toml."""
18
+ current = Path(__file__).resolve()
19
+ for parent in current.parents:
20
+ if (parent / "pyproject.toml").exists():
21
+ return parent
22
+ return None
23
+
24
+
25
+ def get_default_data_dir() -> Path:
26
+ """
27
+ Get default data directory.
28
+
29
+ If running from repo, use repo/.data
30
+ If installed (e.g., via Homebrew), use ~/.local/share/spatelier (or ~/Library/Application Support/spatelier on macOS)
31
+ """
32
+ repo_root = _find_repo_root()
33
+ if repo_root:
34
+ # Running from development repo
35
+ return repo_root / ".data"
36
+
37
+ # Running from installed location - use user data directory
38
+ import platform
39
+
40
+ if platform.system() == "Darwin": # macOS
41
+ data_dir = Path.home() / "Library" / "Application Support" / "spatelier"
42
+ else:
43
+ # Linux/Unix
44
+ data_dir = Path.home() / ".local" / "share" / "spatelier"
45
+
46
+ # Create directory if it doesn't exist
47
+ data_dir.mkdir(parents=True, exist_ok=True)
48
+ return data_dir
49
+
50
+
51
+ class VideoConfig(BaseModel):
52
+ """Video processing configuration."""
53
+
54
+ default_format: str = "mp4"
55
+ quality: str = "best"
56
+ output_dir: Optional[Path] = None
57
+ temp_dir: Path = Field(
58
+ default_factory=lambda: get_default_data_dir() / "tmp" / "video"
59
+ )
60
+
61
+ @field_validator("default_format")
62
+ @classmethod
63
+ def validate_format(cls, v):
64
+ """Validate video format."""
65
+ valid_formats = ["mp4", "mkv", "webm", "avi", "mov", "m4v", "flv"]
66
+ if v.lower() not in valid_formats:
67
+ raise ValueError(
68
+ f"Invalid format '{v}'. Must be one of: {', '.join(valid_formats)}"
69
+ )
70
+ return v.lower()
71
+
72
+ @field_validator("quality")
73
+ @classmethod
74
+ def validate_quality(cls, v):
75
+ """Validate video quality."""
76
+ valid_qualities = ["best", "worst", "720p", "1080p", "480p", "360p", "240p"]
77
+ if v.lower() not in valid_qualities and not v.isdigit():
78
+ raise ValueError(
79
+ f"Invalid quality '{v}'. Must be one of: {', '.join(valid_qualities)} or a number"
80
+ )
81
+ return v.lower()
82
+
83
+ @model_validator(mode="after")
84
+ def ensure_paths_exist(self):
85
+ """Ensure paths exist and are writable."""
86
+ self.temp_dir.mkdir(parents=True, exist_ok=True)
87
+ if self.output_dir is not None:
88
+ self.output_dir.mkdir(parents=True, exist_ok=True)
89
+ return self
90
+
91
+
92
+ class AudioConfig(BaseModel):
93
+ """Audio processing configuration."""
94
+
95
+ default_format: str = "mp3"
96
+ bitrate: int = 320
97
+ output_dir: Optional[Path] = None
98
+ temp_dir: Path = Field(
99
+ default_factory=lambda: get_default_data_dir() / "tmp" / "audio"
100
+ )
101
+
102
+ @field_validator("default_format")
103
+ @classmethod
104
+ def validate_format(cls, v):
105
+ """Validate audio format."""
106
+ valid_formats = ["mp3", "wav", "flac", "aac", "ogg", "m4a", "wma"]
107
+ if v.lower() not in valid_formats:
108
+ raise ValueError(
109
+ f"Invalid format '{v}'. Must be one of: {', '.join(valid_formats)}"
110
+ )
111
+ return v.lower()
112
+
113
+ @field_validator("bitrate")
114
+ @classmethod
115
+ def validate_bitrate(cls, v):
116
+ """Validate bitrate."""
117
+ if not isinstance(v, int) or v < 64 or v > 320:
118
+ raise ValueError(f"Invalid bitrate '{v}'. Must be between 64 and 320")
119
+ return v
120
+
121
+ @model_validator(mode="after")
122
+ def ensure_paths_exist(self):
123
+ """Ensure paths exist and are writable."""
124
+ self.temp_dir.mkdir(parents=True, exist_ok=True)
125
+ if self.output_dir is not None:
126
+ self.output_dir.mkdir(parents=True, exist_ok=True)
127
+ return self
128
+
129
+
130
+ class DatabaseConfig(BaseModel):
131
+ """Database configuration."""
132
+
133
+ sqlite_path: Path = Field(
134
+ default_factory=lambda: get_default_data_dir() / "spatelier.db"
135
+ )
136
+ mongodb_url: str = "mongodb://localhost:27017"
137
+ mongodb_database: str = "spatelier"
138
+ enable_mongodb: bool = False
139
+ retention_days: int = 365
140
+ enable_analytics: bool = True
141
+
142
+
143
+ class TranscriptionConfig(BaseModel):
144
+ """Transcription configuration."""
145
+
146
+ default_model: str = "small" # Changed from "large" - faster, good accuracy
147
+ default_language: str = "en"
148
+ use_faster_whisper: bool = True
149
+ device: str = "auto"
150
+ compute_type: str = "auto"
151
+
152
+ @field_validator("default_model")
153
+ @classmethod
154
+ def validate_model(cls, v):
155
+ """Validate Whisper model."""
156
+ valid_models = ["tiny", "base", "small", "medium", "large"]
157
+ if v.lower() not in valid_models:
158
+ raise ValueError(
159
+ f"Invalid model '{v}'. Must be one of: {', '.join(valid_models)}"
160
+ )
161
+ return v.lower()
162
+
163
+ @field_validator("device")
164
+ @classmethod
165
+ def validate_device(cls, v):
166
+ """Validate device."""
167
+ valid_devices = ["auto", "cpu", "cuda", "mps"]
168
+ if v.lower() not in valid_devices:
169
+ raise ValueError(
170
+ f"Invalid device '{v}'. Must be one of: {', '.join(valid_devices)}"
171
+ )
172
+ return v.lower()
173
+
174
+ @field_validator("compute_type")
175
+ @classmethod
176
+ def validate_compute_type(cls, v):
177
+ """Validate compute type."""
178
+ valid_types = ["auto", "int8", "int8_float16", "int16", "float16", "float32"]
179
+ if v.lower() not in valid_types:
180
+ raise ValueError(
181
+ f"Invalid compute type '{v}'. Must be one of: {', '.join(valid_types)}"
182
+ )
183
+ return v.lower()
184
+
185
+
186
+ class Config(BaseModel):
187
+ """
188
+ Main configuration class for Spatelier.
189
+
190
+ Simplified structure with essential nested configs and flattened simple settings.
191
+ """
192
+
193
+ # Essential nested configurations
194
+ video: VideoConfig = Field(default_factory=VideoConfig)
195
+ audio: AudioConfig = Field(default_factory=AudioConfig)
196
+ database: DatabaseConfig = Field(default_factory=DatabaseConfig)
197
+ transcription: TranscriptionConfig = Field(default_factory=TranscriptionConfig)
198
+
199
+ # Flattened simple settings (previously nested)
200
+ log_level: str = "INFO"
201
+ log_file: Optional[Path] = None
202
+
203
+ # File processing settings (flattened)
204
+ video_extensions: List[str] = Field(
205
+ default_factory=lambda: [
206
+ ".mp4",
207
+ ".webm",
208
+ ".avi",
209
+ ".mov",
210
+ ".mkv",
211
+ ".m4v",
212
+ ".flv",
213
+ ]
214
+ )
215
+ audio_extensions: List[str] = Field(
216
+ default_factory=lambda: [".mp3", ".wav", ".flac", ".aac", ".ogg", ".m4a"]
217
+ )
218
+ max_filename_length: int = 255
219
+
220
+ # Fallback settings (flattened)
221
+ fallback_max_files: int = 10
222
+ fallback_timeout_seconds: int = 30
223
+
224
+ # Global settings
225
+ verbose: bool = False
226
+ debug: bool = False
227
+
228
+ model_config = {"arbitrary_types_allowed": True}
229
+
230
+ @field_validator("log_level")
231
+ @classmethod
232
+ def validate_log_level(cls, v):
233
+ """Validate log level."""
234
+ valid_levels = ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]
235
+ if v.upper() not in valid_levels:
236
+ raise ValueError(
237
+ f"Invalid log level '{v}'. Must be one of: {', '.join(valid_levels)}"
238
+ )
239
+ return v.upper()
240
+
241
+ @field_validator("max_filename_length")
242
+ @classmethod
243
+ def validate_filename_length(cls, v):
244
+ """Validate filename length."""
245
+ if not isinstance(v, int) or v < 1 or v > 1000:
246
+ raise ValueError(
247
+ f"Invalid max_filename_length '{v}'. Must be between 1 and 1000"
248
+ )
249
+ return v
250
+
251
+ @classmethod
252
+ def load_from_file(cls, config_file: Union[str, Path]) -> "Config":
253
+ """Load configuration from YAML file."""
254
+ config_path = Path(config_file)
255
+
256
+ if not config_path.exists():
257
+ raise FileNotFoundError(f"Configuration file not found: {config_path}")
258
+
259
+ with open(config_path, "r") as f:
260
+ data = yaml.safe_load(f)
261
+
262
+ return cls(**data)
263
+
264
+ @classmethod
265
+ def load_from_env(cls) -> "Config":
266
+ """Load configuration from environment variables."""
267
+ return cls(
268
+ verbose=os.getenv("SPATELIER_VERBOSE", "false").lower() == "true",
269
+ debug=os.getenv("SPATELIER_DEBUG", "false").lower() == "true",
270
+ log_level=os.getenv("SPATELIER_LOG_LEVEL", "INFO"),
271
+ )
272
+
273
+ def save_to_file(self, config_file: Union[str, Path]) -> None:
274
+ """Save configuration to YAML file."""
275
+ config_path = Path(config_file)
276
+ config_path.parent.mkdir(parents=True, exist_ok=True)
277
+
278
+ with open(config_path, "w") as f:
279
+ config_dict = self.model_dump()
280
+
281
+ def convert_paths(obj):
282
+ if isinstance(obj, dict):
283
+ return {k: convert_paths(v) for k, v in obj.items()}
284
+ elif isinstance(obj, list):
285
+ return [convert_paths(item) for item in obj]
286
+ elif isinstance(obj, Path):
287
+ return str(obj)
288
+ else:
289
+ return obj
290
+
291
+ yaml.dump(convert_paths(config_dict), f, default_flow_style=False, indent=2)
292
+
293
+ def get_default_config_path(self) -> Path:
294
+ """Get the default configuration file path."""
295
+ return get_default_data_dir() / "config.yaml"
296
+
297
+ def ensure_default_config(self) -> None:
298
+ """Ensure default configuration file exists."""
299
+ default_path = self.get_default_config_path()
300
+
301
+ if not default_path.exists():
302
+ default_path.parent.mkdir(parents=True, exist_ok=True)
303
+ self.save_to_file(default_path)
304
+
305
+ def validate_config(self) -> List[str]:
306
+ """
307
+ Validate configuration and return list of issues.
308
+
309
+ Returns:
310
+ List of validation issues (empty if valid)
311
+ """
312
+ issues = []
313
+
314
+ # Validate video config
315
+ try:
316
+ if self.video.output_dir and not self.video.output_dir.exists():
317
+ issues.append(
318
+ f"Video output directory does not exist: {self.video.output_dir}"
319
+ )
320
+ except Exception as e:
321
+ issues.append(f"Video output directory error: {e}")
322
+
323
+ # Validate audio config
324
+ try:
325
+ if self.audio.output_dir and not self.audio.output_dir.exists():
326
+ issues.append(
327
+ f"Audio output directory does not exist: {self.audio.output_dir}"
328
+ )
329
+ except Exception as e:
330
+ issues.append(f"Audio output directory error: {e}")
331
+
332
+ # Validate database config
333
+ try:
334
+ if not self.database.sqlite_path.parent.exists():
335
+ issues.append(
336
+ f"Database directory does not exist: {self.database.sqlite_path.parent}"
337
+ )
338
+ except Exception as e:
339
+ issues.append(f"Database directory error: {e}")
340
+
341
+ return issues
342
+
343
+ def is_valid(self) -> bool:
344
+ """Check if configuration is valid."""
345
+ return len(self.validate_config()) == 0
@@ -0,0 +1,116 @@
1
+ """
2
+ Database service factory for centralized database management.
3
+
4
+ This module provides the database service factory, separated from the main
5
+ service factory to avoid circular imports.
6
+ """
7
+
8
+ from typing import Optional
9
+
10
+ from sqlalchemy.orm import Session
11
+
12
+ from core.config import Config
13
+ from core.logger import get_logger
14
+ from database.connection import DatabaseManager
15
+ from database.repository import (
16
+ AnalyticsRepository,
17
+ MediaFileRepository,
18
+ PlaylistRepository,
19
+ PlaylistVideoRepository,
20
+ ProcessingJobRepository,
21
+ )
22
+
23
+
24
+ class RepositoryContainer:
25
+ """Container for all database repositories."""
26
+
27
+ def __init__(self, session: Session, verbose: bool = False):
28
+ """Initialize repository container."""
29
+ self.session = session
30
+ self.verbose = verbose
31
+
32
+ # Initialize all repositories
33
+ self.media = MediaFileRepository(session, verbose)
34
+ self.jobs = ProcessingJobRepository(session, verbose)
35
+ self.analytics = AnalyticsRepository(session, verbose)
36
+ self.playlists = PlaylistRepository(session, verbose)
37
+ self.playlist_videos = PlaylistVideoRepository(session, verbose)
38
+
39
+
40
+ class DatabaseServiceFactory:
41
+ """Factory for creating database services and repositories."""
42
+
43
+ def __init__(self, config: Config, verbose: bool = False):
44
+ """
45
+ Initialize database service factory.
46
+
47
+ Args:
48
+ config: Configuration instance
49
+ verbose: Enable verbose logging
50
+ """
51
+ self.config = config
52
+ self.verbose = verbose
53
+ self.logger = get_logger("DatabaseServiceFactory", verbose=verbose)
54
+
55
+ # Database manager
56
+ self.db_manager = DatabaseManager(config, verbose=verbose)
57
+ self._repositories: Optional[RepositoryContainer] = None
58
+
59
+ def initialize(self) -> RepositoryContainer:
60
+ """
61
+ Initialize database connections and return repository container.
62
+
63
+ Returns:
64
+ RepositoryContainer with all repositories
65
+ """
66
+ if self._repositories is None:
67
+ # Connect to databases
68
+ self.db_manager.connect_sqlite()
69
+ if self.config.database.enable_mongodb:
70
+ self.db_manager.connect_mongodb()
71
+
72
+ # Create repository container
73
+ session = self.db_manager.get_sqlite_session()
74
+ self._repositories = RepositoryContainer(session, self.verbose)
75
+
76
+ self.logger.info("Database services initialized")
77
+
78
+ return self._repositories
79
+
80
+ def get_repositories(self) -> RepositoryContainer:
81
+ """
82
+ Get repository container.
83
+
84
+ Returns:
85
+ RepositoryContainer with all repositories
86
+
87
+ Raises:
88
+ RuntimeError: If database not initialized
89
+ """
90
+ if self._repositories is None:
91
+ raise RuntimeError("Database not initialized. Call initialize() first.")
92
+ return self._repositories
93
+
94
+ def get_db_manager(self) -> DatabaseManager:
95
+ """
96
+ Get database manager.
97
+
98
+ Returns:
99
+ DatabaseManager instance
100
+ """
101
+ return self.db_manager
102
+
103
+ def close_connections(self):
104
+ """Close all database connections."""
105
+ if self.db_manager:
106
+ self.db_manager.close_connections()
107
+ self._repositories = None
108
+ self.logger.info("Database connections closed")
109
+
110
+ def __enter__(self):
111
+ """Context manager entry."""
112
+ return self.initialize()
113
+
114
+ def __exit__(self, exc_type, exc_val, exc_tb):
115
+ """Context manager exit."""
116
+ self.close_connections()