spatelier 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- analytics/__init__.py +1 -0
- analytics/reporter.py +497 -0
- cli/__init__.py +1 -0
- cli/app.py +147 -0
- cli/audio.py +129 -0
- cli/cli_analytics.py +320 -0
- cli/cli_utils.py +282 -0
- cli/error_handlers.py +122 -0
- cli/files.py +299 -0
- cli/update.py +325 -0
- cli/video.py +823 -0
- cli/worker.py +615 -0
- core/__init__.py +1 -0
- core/analytics_dashboard.py +368 -0
- core/base.py +303 -0
- core/base_service.py +69 -0
- core/config.py +345 -0
- core/database_service.py +116 -0
- core/decorators.py +263 -0
- core/error_handler.py +210 -0
- core/file_tracker.py +254 -0
- core/interactive_cli.py +366 -0
- core/interfaces.py +166 -0
- core/job_queue.py +437 -0
- core/logger.py +79 -0
- core/package_updater.py +469 -0
- core/progress.py +228 -0
- core/service_factory.py +295 -0
- core/streaming.py +299 -0
- core/worker.py +765 -0
- database/__init__.py +1 -0
- database/connection.py +265 -0
- database/metadata.py +516 -0
- database/models.py +288 -0
- database/repository.py +592 -0
- database/transcription_storage.py +219 -0
- modules/__init__.py +1 -0
- modules/audio/__init__.py +5 -0
- modules/audio/converter.py +197 -0
- modules/video/__init__.py +16 -0
- modules/video/converter.py +191 -0
- modules/video/fallback_extractor.py +334 -0
- modules/video/services/__init__.py +18 -0
- modules/video/services/audio_extraction_service.py +274 -0
- modules/video/services/download_service.py +852 -0
- modules/video/services/metadata_service.py +190 -0
- modules/video/services/playlist_service.py +445 -0
- modules/video/services/transcription_service.py +491 -0
- modules/video/transcription_service.py +385 -0
- modules/video/youtube_api.py +397 -0
- spatelier/__init__.py +33 -0
- spatelier-0.3.0.dist-info/METADATA +260 -0
- spatelier-0.3.0.dist-info/RECORD +59 -0
- spatelier-0.3.0.dist-info/WHEEL +5 -0
- spatelier-0.3.0.dist-info/entry_points.txt +2 -0
- spatelier-0.3.0.dist-info/licenses/LICENSE +21 -0
- spatelier-0.3.0.dist-info/top_level.txt +7 -0
- utils/__init__.py +1 -0
- utils/helpers.py +250 -0
core/service_factory.py
ADDED
|
@@ -0,0 +1,295 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Consolidated service factory for dependency injection.
|
|
3
|
+
|
|
4
|
+
This module provides a single factory for creating and managing all services,
|
|
5
|
+
eliminating duplication and ensuring consistent service lifecycle management.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from typing import TYPE_CHECKING, Optional
|
|
9
|
+
|
|
10
|
+
from sqlalchemy.orm import Session
|
|
11
|
+
|
|
12
|
+
from core.config import Config
|
|
13
|
+
from core.database_service import DatabaseServiceFactory, RepositoryContainer
|
|
14
|
+
from core.interfaces import (
|
|
15
|
+
IDatabaseService,
|
|
16
|
+
IMetadataService,
|
|
17
|
+
IPlaylistService,
|
|
18
|
+
IRepositoryContainer,
|
|
19
|
+
IServiceFactory,
|
|
20
|
+
ITranscriptionService,
|
|
21
|
+
IVideoDownloadService,
|
|
22
|
+
)
|
|
23
|
+
from core.logger import get_logger
|
|
24
|
+
|
|
25
|
+
if TYPE_CHECKING:
|
|
26
|
+
from core.job_queue import JobQueue
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class ServiceFactory(IServiceFactory):
|
|
30
|
+
"""
|
|
31
|
+
Consolidated factory for creating and managing all services.
|
|
32
|
+
|
|
33
|
+
Supports context manager usage and lazy-loaded service properties.
|
|
34
|
+
Replaces ServiceContainer with a cleaner, more direct approach.
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
def __init__(self, config: Config, verbose: bool = False):
|
|
38
|
+
"""
|
|
39
|
+
Initialize service factory.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
config: Configuration instance
|
|
43
|
+
verbose: Enable verbose logging
|
|
44
|
+
"""
|
|
45
|
+
self.config = config
|
|
46
|
+
self.verbose = verbose
|
|
47
|
+
self.logger = get_logger("ServiceFactory", verbose=verbose)
|
|
48
|
+
|
|
49
|
+
# Services will be created lazily
|
|
50
|
+
self._database_service: Optional[IDatabaseService] = None
|
|
51
|
+
self._repositories: Optional[IRepositoryContainer] = None
|
|
52
|
+
self._video_download_service: Optional[IVideoDownloadService] = None
|
|
53
|
+
self._metadata_service: Optional[IMetadataService] = None
|
|
54
|
+
self._transcription_service: Optional[ITranscriptionService] = None
|
|
55
|
+
self._playlist_service: Optional[IPlaylistService] = None
|
|
56
|
+
self._job_queue: Optional["JobQueue"] = None
|
|
57
|
+
|
|
58
|
+
def create_database_service(
|
|
59
|
+
self, config: Optional[Config] = None, verbose: Optional[bool] = None
|
|
60
|
+
) -> IDatabaseService:
|
|
61
|
+
"""
|
|
62
|
+
Create database service.
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
config: Optional config override (defaults to instance config)
|
|
66
|
+
verbose: Optional verbose override (defaults to instance verbose)
|
|
67
|
+
"""
|
|
68
|
+
if self._database_service is None:
|
|
69
|
+
use_config = config if config is not None else self.config
|
|
70
|
+
use_verbose = verbose if verbose is not None else self.verbose
|
|
71
|
+
self._database_service = DatabaseServiceFactory(
|
|
72
|
+
use_config, verbose=use_verbose
|
|
73
|
+
)
|
|
74
|
+
return self._database_service
|
|
75
|
+
|
|
76
|
+
def create_video_download_service(
|
|
77
|
+
self, config: Optional[Config] = None, verbose: Optional[bool] = None
|
|
78
|
+
) -> IVideoDownloadService:
|
|
79
|
+
"""
|
|
80
|
+
Create video download service.
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
config: Optional config override (defaults to instance config)
|
|
84
|
+
verbose: Optional verbose override (defaults to instance verbose)
|
|
85
|
+
"""
|
|
86
|
+
if self._video_download_service is None:
|
|
87
|
+
use_config = config if config is not None else self.config
|
|
88
|
+
use_verbose = verbose if verbose is not None else self.verbose
|
|
89
|
+
# Get database service for dependency injection
|
|
90
|
+
db_service = self.create_database_service(use_config, use_verbose)
|
|
91
|
+
# Import here to avoid circular imports
|
|
92
|
+
from modules.video.services import VideoDownloadService
|
|
93
|
+
|
|
94
|
+
self._video_download_service = VideoDownloadService(
|
|
95
|
+
use_config, verbose=use_verbose, db_service=db_service
|
|
96
|
+
)
|
|
97
|
+
return self._video_download_service
|
|
98
|
+
|
|
99
|
+
def create_metadata_service(
|
|
100
|
+
self, config: Optional[Config] = None, verbose: Optional[bool] = None
|
|
101
|
+
) -> IMetadataService:
|
|
102
|
+
"""
|
|
103
|
+
Create metadata service.
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
config: Optional config override (defaults to instance config)
|
|
107
|
+
verbose: Optional verbose override (defaults to instance verbose)
|
|
108
|
+
"""
|
|
109
|
+
if self._metadata_service is None:
|
|
110
|
+
use_config = config if config is not None else self.config
|
|
111
|
+
use_verbose = verbose if verbose is not None else self.verbose
|
|
112
|
+
# Get database service for dependency injection
|
|
113
|
+
db_service = self.create_database_service(use_config, use_verbose)
|
|
114
|
+
# Import here to avoid circular imports
|
|
115
|
+
from modules.video.services import MetadataService
|
|
116
|
+
|
|
117
|
+
self._metadata_service = MetadataService(
|
|
118
|
+
use_config, verbose=use_verbose, db_service=db_service
|
|
119
|
+
)
|
|
120
|
+
return self._metadata_service
|
|
121
|
+
|
|
122
|
+
def create_transcription_service(
|
|
123
|
+
self, config: Optional[Config] = None, verbose: Optional[bool] = None
|
|
124
|
+
) -> ITranscriptionService:
|
|
125
|
+
"""
|
|
126
|
+
Create transcription service.
|
|
127
|
+
|
|
128
|
+
Args:
|
|
129
|
+
config: Optional config override (defaults to instance config)
|
|
130
|
+
verbose: Optional verbose override (defaults to instance verbose)
|
|
131
|
+
"""
|
|
132
|
+
if self._transcription_service is None:
|
|
133
|
+
use_config = config if config is not None else self.config
|
|
134
|
+
use_verbose = verbose if verbose is not None else self.verbose
|
|
135
|
+
# Get database service for dependency injection
|
|
136
|
+
db_service = self.create_database_service(use_config, use_verbose)
|
|
137
|
+
# Import here to avoid circular imports
|
|
138
|
+
from modules.video.services.transcription_service import (
|
|
139
|
+
TranscriptionService,
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
self._transcription_service = TranscriptionService(
|
|
143
|
+
use_config, verbose=use_verbose, db_service=db_service
|
|
144
|
+
)
|
|
145
|
+
return self._transcription_service
|
|
146
|
+
|
|
147
|
+
def create_playlist_service(
|
|
148
|
+
self, config: Optional[Config] = None, verbose: Optional[bool] = None
|
|
149
|
+
) -> IPlaylistService:
|
|
150
|
+
"""
|
|
151
|
+
Create playlist service.
|
|
152
|
+
|
|
153
|
+
Args:
|
|
154
|
+
config: Optional config override (defaults to instance config)
|
|
155
|
+
verbose: Optional verbose override (defaults to instance verbose)
|
|
156
|
+
"""
|
|
157
|
+
if self._playlist_service is None:
|
|
158
|
+
use_config = config if config is not None else self.config
|
|
159
|
+
use_verbose = verbose if verbose is not None else self.verbose
|
|
160
|
+
# Get database service for dependency injection
|
|
161
|
+
db_service = self.create_database_service(use_config, use_verbose)
|
|
162
|
+
# Import here to avoid circular imports
|
|
163
|
+
from modules.video.services import PlaylistService
|
|
164
|
+
|
|
165
|
+
self._playlist_service = PlaylistService(
|
|
166
|
+
use_config, verbose=use_verbose, db_service=db_service
|
|
167
|
+
)
|
|
168
|
+
return self._playlist_service
|
|
169
|
+
|
|
170
|
+
# Property-based access for convenience (replaces ServiceContainer properties)
|
|
171
|
+
@property
|
|
172
|
+
def database(self) -> IDatabaseService:
|
|
173
|
+
"""Get database service (lazy-loaded)."""
|
|
174
|
+
return self.create_database_service()
|
|
175
|
+
|
|
176
|
+
@property
|
|
177
|
+
def repositories(self) -> IRepositoryContainer:
|
|
178
|
+
"""Get repository container (lazy-loaded, initializes database)."""
|
|
179
|
+
if self._repositories is None:
|
|
180
|
+
self._repositories = self.database.initialize()
|
|
181
|
+
return self._repositories
|
|
182
|
+
|
|
183
|
+
@property
|
|
184
|
+
def video_download(self) -> IVideoDownloadService:
|
|
185
|
+
"""Get video download service (lazy-loaded)."""
|
|
186
|
+
return self.create_video_download_service()
|
|
187
|
+
|
|
188
|
+
@property
|
|
189
|
+
def metadata(self) -> IMetadataService:
|
|
190
|
+
"""Get metadata service (lazy-loaded)."""
|
|
191
|
+
return self.create_metadata_service()
|
|
192
|
+
|
|
193
|
+
@property
|
|
194
|
+
def transcription(self) -> ITranscriptionService:
|
|
195
|
+
"""Get transcription service (lazy-loaded)."""
|
|
196
|
+
return self.create_transcription_service()
|
|
197
|
+
|
|
198
|
+
@property
|
|
199
|
+
def playlist(self) -> IPlaylistService:
|
|
200
|
+
"""Get playlist service (lazy-loaded)."""
|
|
201
|
+
return self.create_playlist_service()
|
|
202
|
+
|
|
203
|
+
@property
|
|
204
|
+
def job_queue(self) -> "JobQueue":
|
|
205
|
+
"""Get job queue (lazy-loaded)."""
|
|
206
|
+
if self._job_queue is None:
|
|
207
|
+
from core.job_queue import JobQueue
|
|
208
|
+
|
|
209
|
+
self._job_queue = JobQueue(self.config, self.verbose)
|
|
210
|
+
return self._job_queue
|
|
211
|
+
|
|
212
|
+
def initialize_database(self) -> IRepositoryContainer:
|
|
213
|
+
"""
|
|
214
|
+
Initialize database and return repositories.
|
|
215
|
+
|
|
216
|
+
Returns:
|
|
217
|
+
RepositoryContainer with all repositories
|
|
218
|
+
"""
|
|
219
|
+
return self.repositories
|
|
220
|
+
|
|
221
|
+
def close_all_services(self):
|
|
222
|
+
"""Close all services and connections."""
|
|
223
|
+
if self._database_service:
|
|
224
|
+
self._database_service.close_connections()
|
|
225
|
+
|
|
226
|
+
# Reset all services
|
|
227
|
+
self._database_service = None
|
|
228
|
+
self._repositories = None
|
|
229
|
+
self._video_download_service = None
|
|
230
|
+
self._metadata_service = None
|
|
231
|
+
self._transcription_service = None
|
|
232
|
+
self._playlist_service = None
|
|
233
|
+
self._job_queue = None
|
|
234
|
+
|
|
235
|
+
def get_database_service(self) -> Optional[IDatabaseService]:
|
|
236
|
+
"""Get existing database service."""
|
|
237
|
+
return self._database_service
|
|
238
|
+
|
|
239
|
+
def get_video_download_service(self) -> Optional[IVideoDownloadService]:
|
|
240
|
+
"""Get existing video download service."""
|
|
241
|
+
return self._video_download_service
|
|
242
|
+
|
|
243
|
+
def get_metadata_service(self) -> Optional[IMetadataService]:
|
|
244
|
+
"""Get existing metadata service."""
|
|
245
|
+
return self._metadata_service
|
|
246
|
+
|
|
247
|
+
def get_transcription_service(self) -> Optional[ITranscriptionService]:
|
|
248
|
+
"""Get existing transcription service."""
|
|
249
|
+
return self._transcription_service
|
|
250
|
+
|
|
251
|
+
def get_playlist_service(self) -> Optional[IPlaylistService]:
|
|
252
|
+
"""Get existing playlist service."""
|
|
253
|
+
return self._playlist_service
|
|
254
|
+
|
|
255
|
+
def reset_services(self):
|
|
256
|
+
"""Reset all services (useful for testing)."""
|
|
257
|
+
self.close_all_services()
|
|
258
|
+
|
|
259
|
+
# Context manager support
|
|
260
|
+
def __enter__(self):
|
|
261
|
+
"""Context manager entry."""
|
|
262
|
+
return self
|
|
263
|
+
|
|
264
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
265
|
+
"""Context manager exit."""
|
|
266
|
+
self.close_all_services()
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
# Legacy global factory support (for backward compatibility during transition)
|
|
270
|
+
# Note: This is deprecated and will be removed. Use ServiceFactory(config, verbose) directly.
|
|
271
|
+
_service_factory: Optional[ServiceFactory] = None
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
def get_service_factory() -> ServiceFactory:
|
|
275
|
+
"""
|
|
276
|
+
Get global service factory instance.
|
|
277
|
+
|
|
278
|
+
DEPRECATED: Use ServiceFactory(config, verbose) directly instead.
|
|
279
|
+
This function exists only for backward compatibility during transition.
|
|
280
|
+
"""
|
|
281
|
+
global _service_factory
|
|
282
|
+
if _service_factory is None:
|
|
283
|
+
# Create with default config - this is not ideal but maintains compatibility
|
|
284
|
+
_service_factory = ServiceFactory(Config(), verbose=False)
|
|
285
|
+
return _service_factory
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
def reset_service_factory():
|
|
289
|
+
"""
|
|
290
|
+
Reset global service factory (useful for testing).
|
|
291
|
+
|
|
292
|
+
DEPRECATED: Use ServiceFactory(config, verbose) directly instead.
|
|
293
|
+
"""
|
|
294
|
+
global _service_factory
|
|
295
|
+
_service_factory = None
|
core/streaming.py
ADDED
|
@@ -0,0 +1,299 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Streaming video processing utilities.
|
|
3
|
+
|
|
4
|
+
This module provides streaming capabilities for processing large video files
|
|
5
|
+
without loading them entirely into memory.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import json
|
|
9
|
+
import os
|
|
10
|
+
import tempfile
|
|
11
|
+
from contextlib import contextmanager
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import Any, Callable, Dict, Iterator, Optional
|
|
14
|
+
|
|
15
|
+
import ffmpeg
|
|
16
|
+
|
|
17
|
+
from core.logger import get_logger
|
|
18
|
+
from core.progress import track_progress
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class VideoStreamProcessor:
|
|
22
|
+
"""Stream-based video processor for large files."""
|
|
23
|
+
|
|
24
|
+
def __init__(self, config, verbose: bool = False):
|
|
25
|
+
"""
|
|
26
|
+
Initialize streaming video processor.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
config: Configuration instance
|
|
30
|
+
verbose: Enable verbose logging
|
|
31
|
+
"""
|
|
32
|
+
self.config = config
|
|
33
|
+
self.verbose = verbose
|
|
34
|
+
self.logger = get_logger("VideoStreamProcessor", verbose=verbose)
|
|
35
|
+
|
|
36
|
+
def get_video_info(self, video_path: Path) -> Dict[str, Any]:
|
|
37
|
+
"""
|
|
38
|
+
Get video information using ffmpeg-python without loading the entire file.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
video_path: Path to video file
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
Dictionary with video metadata
|
|
45
|
+
"""
|
|
46
|
+
try:
|
|
47
|
+
probe = ffmpeg.probe(str(video_path))
|
|
48
|
+
return probe
|
|
49
|
+
|
|
50
|
+
except ffmpeg.Error as e:
|
|
51
|
+
self.logger.error(f"FFmpeg probe failed: {e}")
|
|
52
|
+
raise RuntimeError(f"Failed to probe video: {e}")
|
|
53
|
+
except Exception as e:
|
|
54
|
+
self.logger.error(f"Failed to get video info: {e}")
|
|
55
|
+
raise
|
|
56
|
+
|
|
57
|
+
def stream_video_segments(
|
|
58
|
+
self, video_path: Path, segment_duration: int = 60
|
|
59
|
+
) -> Iterator[Path]:
|
|
60
|
+
"""
|
|
61
|
+
Stream video in segments for processing.
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
video_path: Path to video file
|
|
65
|
+
segment_duration: Duration of each segment in seconds
|
|
66
|
+
|
|
67
|
+
Yields:
|
|
68
|
+
Path to temporary segment files
|
|
69
|
+
"""
|
|
70
|
+
temp_dir = Path(tempfile.mkdtemp(prefix="spatelier_segments_"))
|
|
71
|
+
|
|
72
|
+
try:
|
|
73
|
+
# Get video duration
|
|
74
|
+
info = self.get_video_info(video_path)
|
|
75
|
+
duration = float(info["format"]["duration"])
|
|
76
|
+
|
|
77
|
+
self.logger.info(
|
|
78
|
+
f"Streaming video: {duration:.1f}s total, {segment_duration}s segments"
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
segment_count = int(duration // segment_duration) + 1
|
|
82
|
+
|
|
83
|
+
with track_progress(
|
|
84
|
+
f"Creating {segment_count} video segments", total=segment_count
|
|
85
|
+
) as progress:
|
|
86
|
+
for i in range(segment_count):
|
|
87
|
+
start_time = i * segment_duration
|
|
88
|
+
segment_path = temp_dir / f"segment_{i:03d}.mp4"
|
|
89
|
+
|
|
90
|
+
try:
|
|
91
|
+
# Extract segment using ffmpeg-python
|
|
92
|
+
(
|
|
93
|
+
ffmpeg.input(
|
|
94
|
+
str(video_path), ss=start_time, t=segment_duration
|
|
95
|
+
)
|
|
96
|
+
.output(
|
|
97
|
+
str(segment_path),
|
|
98
|
+
c="copy",
|
|
99
|
+
avoid_negative_ts="make_zero",
|
|
100
|
+
)
|
|
101
|
+
.overwrite_output()
|
|
102
|
+
.run(quiet=True)
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
progress.update(1, f"Created segment {i + 1}/{segment_count}")
|
|
106
|
+
yield segment_path
|
|
107
|
+
|
|
108
|
+
except ffmpeg.Error as e:
|
|
109
|
+
self.logger.warning(f"Failed to create segment {i}: {e}")
|
|
110
|
+
continue
|
|
111
|
+
|
|
112
|
+
except Exception as e:
|
|
113
|
+
self.logger.error(f"Streaming failed: {e}")
|
|
114
|
+
# Cleanup on error
|
|
115
|
+
self._cleanup_temp_dir(temp_dir)
|
|
116
|
+
raise
|
|
117
|
+
|
|
118
|
+
def process_video_stream(
|
|
119
|
+
self,
|
|
120
|
+
video_path: Path,
|
|
121
|
+
processor_func: Callable[[Path], Any],
|
|
122
|
+
segment_duration: int = 60,
|
|
123
|
+
) -> Iterator[Any]:
|
|
124
|
+
"""
|
|
125
|
+
Process video in streaming fashion.
|
|
126
|
+
|
|
127
|
+
Args:
|
|
128
|
+
video_path: Path to video file
|
|
129
|
+
processor_func: Function to process each segment
|
|
130
|
+
segment_duration: Duration of each segment in seconds
|
|
131
|
+
|
|
132
|
+
Yields:
|
|
133
|
+
Results from processing each segment
|
|
134
|
+
"""
|
|
135
|
+
try:
|
|
136
|
+
for segment_path in self.stream_video_segments(
|
|
137
|
+
video_path, segment_duration
|
|
138
|
+
):
|
|
139
|
+
try:
|
|
140
|
+
result = processor_func(segment_path)
|
|
141
|
+
yield result
|
|
142
|
+
except Exception as e:
|
|
143
|
+
self.logger.error(f"Failed to process segment {segment_path}: {e}")
|
|
144
|
+
yield None
|
|
145
|
+
finally:
|
|
146
|
+
# Clean up segment file
|
|
147
|
+
if segment_path.exists():
|
|
148
|
+
segment_path.unlink()
|
|
149
|
+
|
|
150
|
+
except Exception as e:
|
|
151
|
+
self.logger.error(f"Stream processing failed: {e}")
|
|
152
|
+
raise
|
|
153
|
+
|
|
154
|
+
def _cleanup_temp_dir(self, temp_dir: Path):
|
|
155
|
+
"""Clean up temporary directory."""
|
|
156
|
+
try:
|
|
157
|
+
import shutil
|
|
158
|
+
|
|
159
|
+
if temp_dir.exists():
|
|
160
|
+
shutil.rmtree(temp_dir)
|
|
161
|
+
except Exception as e:
|
|
162
|
+
self.logger.warning(f"Failed to cleanup temp dir {temp_dir}: {e}")
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
class AudioStreamProcessor:
|
|
166
|
+
"""Stream-based audio processor for large files."""
|
|
167
|
+
|
|
168
|
+
def __init__(self, config, verbose: bool = False):
|
|
169
|
+
"""
|
|
170
|
+
Initialize streaming audio processor.
|
|
171
|
+
|
|
172
|
+
Args:
|
|
173
|
+
config: Configuration instance
|
|
174
|
+
verbose: Enable verbose logging
|
|
175
|
+
"""
|
|
176
|
+
self.config = config
|
|
177
|
+
self.verbose = verbose
|
|
178
|
+
self.logger = get_logger("AudioStreamProcessor", verbose=verbose)
|
|
179
|
+
|
|
180
|
+
def stream_audio_chunks(
|
|
181
|
+
self, audio_path: Path, chunk_duration: int = 30
|
|
182
|
+
) -> Iterator[Path]:
|
|
183
|
+
"""
|
|
184
|
+
Stream audio in chunks for processing.
|
|
185
|
+
|
|
186
|
+
Args:
|
|
187
|
+
audio_path: Path to audio file
|
|
188
|
+
chunk_duration: Duration of each chunk in seconds
|
|
189
|
+
|
|
190
|
+
Yields:
|
|
191
|
+
Path to temporary chunk files
|
|
192
|
+
"""
|
|
193
|
+
temp_dir = Path(tempfile.mkdtemp(prefix="spatelier_audio_chunks_"))
|
|
194
|
+
|
|
195
|
+
try:
|
|
196
|
+
# Get audio duration using ffmpeg-python
|
|
197
|
+
info = ffmpeg.probe(str(audio_path))
|
|
198
|
+
duration = float(info["format"]["duration"])
|
|
199
|
+
|
|
200
|
+
self.logger.info(
|
|
201
|
+
f"Streaming audio: {duration:.1f}s total, {chunk_duration}s chunks"
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
chunk_count = int(duration // chunk_duration) + 1
|
|
205
|
+
|
|
206
|
+
with track_progress(
|
|
207
|
+
f"Creating {chunk_count} audio chunks", total=chunk_count
|
|
208
|
+
) as progress:
|
|
209
|
+
for i in range(chunk_count):
|
|
210
|
+
start_time = i * chunk_duration
|
|
211
|
+
chunk_path = temp_dir / f"chunk_{i:03d}.wav"
|
|
212
|
+
|
|
213
|
+
try:
|
|
214
|
+
# Extract chunk using ffmpeg-python
|
|
215
|
+
(
|
|
216
|
+
ffmpeg.input(
|
|
217
|
+
str(audio_path), ss=start_time, t=chunk_duration
|
|
218
|
+
)
|
|
219
|
+
.output(str(chunk_path), acodec="pcm_s16le")
|
|
220
|
+
.overwrite_output()
|
|
221
|
+
.run(quiet=True)
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
progress.update(1, f"Created chunk {i + 1}/{chunk_count}")
|
|
225
|
+
yield chunk_path
|
|
226
|
+
|
|
227
|
+
except ffmpeg.Error as e:
|
|
228
|
+
self.logger.warning(f"Failed to create chunk {i}: {e}")
|
|
229
|
+
continue
|
|
230
|
+
|
|
231
|
+
except Exception as e:
|
|
232
|
+
self.logger.error(f"Audio streaming failed: {e}")
|
|
233
|
+
# Cleanup on error
|
|
234
|
+
self._cleanup_temp_dir(temp_dir)
|
|
235
|
+
raise
|
|
236
|
+
|
|
237
|
+
def _cleanup_temp_dir(self, temp_dir: Path):
|
|
238
|
+
"""Clean up temporary directory."""
|
|
239
|
+
try:
|
|
240
|
+
import shutil
|
|
241
|
+
|
|
242
|
+
if temp_dir.exists():
|
|
243
|
+
shutil.rmtree(temp_dir)
|
|
244
|
+
except Exception as e:
|
|
245
|
+
self.logger.warning(f"Failed to cleanup temp dir {temp_dir}: {e}")
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
@contextmanager
|
|
249
|
+
def stream_video_processing(video_path: Path, segment_duration: int = 60):
|
|
250
|
+
"""
|
|
251
|
+
Context manager for streaming video processing.
|
|
252
|
+
|
|
253
|
+
Args:
|
|
254
|
+
video_path: Path to video file
|
|
255
|
+
segment_duration: Duration of each segment in seconds
|
|
256
|
+
|
|
257
|
+
Usage:
|
|
258
|
+
with stream_video_processing(video_path) as processor:
|
|
259
|
+
for segment in processor.segments:
|
|
260
|
+
# Process segment
|
|
261
|
+
result = process_segment(segment)
|
|
262
|
+
"""
|
|
263
|
+
from core.config import Config
|
|
264
|
+
|
|
265
|
+
config = Config()
|
|
266
|
+
processor = VideoStreamProcessor(config)
|
|
267
|
+
|
|
268
|
+
try:
|
|
269
|
+
yield processor
|
|
270
|
+
finally:
|
|
271
|
+
# Cleanup is handled by the processor
|
|
272
|
+
pass
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
@contextmanager
|
|
276
|
+
def stream_audio_processing(audio_path: Path, chunk_duration: int = 30):
|
|
277
|
+
"""
|
|
278
|
+
Context manager for streaming audio processing.
|
|
279
|
+
|
|
280
|
+
Args:
|
|
281
|
+
audio_path: Path to audio file
|
|
282
|
+
chunk_duration: Duration of each chunk in seconds
|
|
283
|
+
|
|
284
|
+
Usage:
|
|
285
|
+
with stream_audio_processing(audio_path) as processor:
|
|
286
|
+
for chunk in processor.chunks:
|
|
287
|
+
# Process chunk
|
|
288
|
+
result = process_chunk(chunk)
|
|
289
|
+
"""
|
|
290
|
+
from core.config import Config
|
|
291
|
+
|
|
292
|
+
config = Config()
|
|
293
|
+
processor = AudioStreamProcessor(config)
|
|
294
|
+
|
|
295
|
+
try:
|
|
296
|
+
yield processor
|
|
297
|
+
finally:
|
|
298
|
+
# Cleanup is handled by the processor
|
|
299
|
+
pass
|