spatelier 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- analytics/__init__.py +1 -0
- analytics/reporter.py +497 -0
- cli/__init__.py +1 -0
- cli/app.py +147 -0
- cli/audio.py +129 -0
- cli/cli_analytics.py +320 -0
- cli/cli_utils.py +282 -0
- cli/error_handlers.py +122 -0
- cli/files.py +299 -0
- cli/update.py +325 -0
- cli/video.py +823 -0
- cli/worker.py +615 -0
- core/__init__.py +1 -0
- core/analytics_dashboard.py +368 -0
- core/base.py +303 -0
- core/base_service.py +69 -0
- core/config.py +345 -0
- core/database_service.py +116 -0
- core/decorators.py +263 -0
- core/error_handler.py +210 -0
- core/file_tracker.py +254 -0
- core/interactive_cli.py +366 -0
- core/interfaces.py +166 -0
- core/job_queue.py +437 -0
- core/logger.py +79 -0
- core/package_updater.py +469 -0
- core/progress.py +228 -0
- core/service_factory.py +295 -0
- core/streaming.py +299 -0
- core/worker.py +765 -0
- database/__init__.py +1 -0
- database/connection.py +265 -0
- database/metadata.py +516 -0
- database/models.py +288 -0
- database/repository.py +592 -0
- database/transcription_storage.py +219 -0
- modules/__init__.py +1 -0
- modules/audio/__init__.py +5 -0
- modules/audio/converter.py +197 -0
- modules/video/__init__.py +16 -0
- modules/video/converter.py +191 -0
- modules/video/fallback_extractor.py +334 -0
- modules/video/services/__init__.py +18 -0
- modules/video/services/audio_extraction_service.py +274 -0
- modules/video/services/download_service.py +852 -0
- modules/video/services/metadata_service.py +190 -0
- modules/video/services/playlist_service.py +445 -0
- modules/video/services/transcription_service.py +491 -0
- modules/video/transcription_service.py +385 -0
- modules/video/youtube_api.py +397 -0
- spatelier/__init__.py +33 -0
- spatelier-0.3.0.dist-info/METADATA +260 -0
- spatelier-0.3.0.dist-info/RECORD +59 -0
- spatelier-0.3.0.dist-info/WHEEL +5 -0
- spatelier-0.3.0.dist-info/entry_points.txt +2 -0
- spatelier-0.3.0.dist-info/licenses/LICENSE +21 -0
- spatelier-0.3.0.dist-info/top_level.txt +7 -0
- utils/__init__.py +1 -0
- utils/helpers.py +250 -0
|
@@ -0,0 +1,368 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Analytics dashboard for Spatelier.
|
|
3
|
+
|
|
4
|
+
This module provides a web-based analytics dashboard for viewing
|
|
5
|
+
processing statistics, usage metrics, and system health.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import json
|
|
9
|
+
import time
|
|
10
|
+
from dataclasses import asdict, dataclass
|
|
11
|
+
from datetime import datetime, timedelta
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import Any, Dict, List, Optional
|
|
14
|
+
|
|
15
|
+
from rich.console import Console
|
|
16
|
+
from rich.layout import Layout
|
|
17
|
+
from rich.live import Live
|
|
18
|
+
from rich.panel import Panel
|
|
19
|
+
from rich.table import Table
|
|
20
|
+
from rich.text import Text
|
|
21
|
+
|
|
22
|
+
from core.config import Config
|
|
23
|
+
from core.logger import get_logger
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dataclass
|
|
27
|
+
class ProcessingStats:
|
|
28
|
+
"""Processing statistics data class."""
|
|
29
|
+
|
|
30
|
+
total_videos: int = 0
|
|
31
|
+
total_audio: int = 0
|
|
32
|
+
total_playlists: int = 0
|
|
33
|
+
total_duration: float = 0.0
|
|
34
|
+
total_size: int = 0
|
|
35
|
+
success_rate: float = 0.0
|
|
36
|
+
avg_processing_time: float = 0.0
|
|
37
|
+
last_24h_videos: int = 0
|
|
38
|
+
last_24h_audio: int = 0
|
|
39
|
+
last_24h_playlists: int = 0
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@dataclass
|
|
43
|
+
class SystemHealth:
|
|
44
|
+
"""System health metrics."""
|
|
45
|
+
|
|
46
|
+
cpu_usage: float = 0.0
|
|
47
|
+
memory_usage: float = 0.0
|
|
48
|
+
disk_usage: float = 0.0
|
|
49
|
+
active_jobs: int = 0
|
|
50
|
+
queue_size: int = 0
|
|
51
|
+
last_activity: Optional[datetime] = None
|
|
52
|
+
uptime: float = 0.0
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class AnalyticsDashboard:
|
|
56
|
+
"""Analytics dashboard for Spatelier."""
|
|
57
|
+
|
|
58
|
+
def __init__(self, config: Config, verbose: bool = False):
|
|
59
|
+
"""
|
|
60
|
+
Initialize analytics dashboard.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
config: Configuration instance
|
|
64
|
+
verbose: Enable verbose logging
|
|
65
|
+
"""
|
|
66
|
+
self.config = config
|
|
67
|
+
self.verbose = verbose
|
|
68
|
+
self.logger = get_logger("AnalyticsDashboard", verbose=verbose)
|
|
69
|
+
self.console = Console()
|
|
70
|
+
|
|
71
|
+
def get_processing_stats(self) -> ProcessingStats:
|
|
72
|
+
"""Get current processing statistics."""
|
|
73
|
+
try:
|
|
74
|
+
# This would typically query the database
|
|
75
|
+
# For now, return mock data
|
|
76
|
+
return ProcessingStats(
|
|
77
|
+
total_videos=1250,
|
|
78
|
+
total_audio=890,
|
|
79
|
+
total_playlists=45,
|
|
80
|
+
total_duration=125000.0, # seconds
|
|
81
|
+
total_size=2.5 * 1024**3, # 2.5 GB
|
|
82
|
+
success_rate=94.5,
|
|
83
|
+
avg_processing_time=45.2,
|
|
84
|
+
last_24h_videos=23,
|
|
85
|
+
last_24h_audio=15,
|
|
86
|
+
last_24h_playlists=3,
|
|
87
|
+
)
|
|
88
|
+
except Exception as e:
|
|
89
|
+
self.logger.error(f"Failed to get processing stats: {e}")
|
|
90
|
+
return ProcessingStats()
|
|
91
|
+
|
|
92
|
+
def get_system_health(self) -> SystemHealth:
|
|
93
|
+
"""Get current system health metrics."""
|
|
94
|
+
try:
|
|
95
|
+
import psutil
|
|
96
|
+
|
|
97
|
+
return SystemHealth(
|
|
98
|
+
cpu_usage=psutil.cpu_percent(),
|
|
99
|
+
memory_usage=psutil.virtual_memory().percent,
|
|
100
|
+
disk_usage=psutil.disk_usage("/").percent,
|
|
101
|
+
active_jobs=5,
|
|
102
|
+
queue_size=12,
|
|
103
|
+
last_activity=datetime.now() - timedelta(minutes=5),
|
|
104
|
+
uptime=time.time() - psutil.boot_time(),
|
|
105
|
+
)
|
|
106
|
+
except ImportError:
|
|
107
|
+
self.logger.warning("psutil not available, using mock health data")
|
|
108
|
+
return SystemHealth(
|
|
109
|
+
cpu_usage=25.5,
|
|
110
|
+
memory_usage=68.2,
|
|
111
|
+
disk_usage=45.8,
|
|
112
|
+
active_jobs=3,
|
|
113
|
+
queue_size=8,
|
|
114
|
+
last_activity=datetime.now() - timedelta(minutes=2),
|
|
115
|
+
uptime=86400.0, # 1 day
|
|
116
|
+
)
|
|
117
|
+
except Exception as e:
|
|
118
|
+
self.logger.error(f"Failed to get system health: {e}")
|
|
119
|
+
return SystemHealth()
|
|
120
|
+
|
|
121
|
+
def create_stats_table(self, stats: ProcessingStats) -> Table:
|
|
122
|
+
"""Create processing statistics table."""
|
|
123
|
+
table = Table(
|
|
124
|
+
title="📊 Processing Statistics",
|
|
125
|
+
show_header=True,
|
|
126
|
+
header_style="bold magenta",
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
table.add_column("Metric", style="cyan", no_wrap=True)
|
|
130
|
+
table.add_column("Value", style="green")
|
|
131
|
+
table.add_column("Last 24h", style="yellow")
|
|
132
|
+
|
|
133
|
+
# Format file sizes
|
|
134
|
+
def format_size(size_bytes):
|
|
135
|
+
if size_bytes < 1024**2:
|
|
136
|
+
return f"{size_bytes / 1024:.1f} KB"
|
|
137
|
+
elif size_bytes < 1024**3:
|
|
138
|
+
return f"{size_bytes / (1024**2):.1f} MB"
|
|
139
|
+
else:
|
|
140
|
+
return f"{size_bytes / (1024**3):.1f} GB"
|
|
141
|
+
|
|
142
|
+
def format_duration(seconds):
|
|
143
|
+
hours = int(seconds // 3600)
|
|
144
|
+
minutes = int((seconds % 3600) // 60)
|
|
145
|
+
return f"{hours}h {minutes}m"
|
|
146
|
+
|
|
147
|
+
table.add_row(
|
|
148
|
+
"Videos Processed", str(stats.total_videos), str(stats.last_24h_videos)
|
|
149
|
+
)
|
|
150
|
+
table.add_row("Audio Files", str(stats.total_audio), str(stats.last_24h_audio))
|
|
151
|
+
table.add_row(
|
|
152
|
+
"Playlists", str(stats.total_playlists), str(stats.last_24h_playlists)
|
|
153
|
+
)
|
|
154
|
+
table.add_row("Total Duration", format_duration(stats.total_duration), "")
|
|
155
|
+
table.add_row("Total Size", format_size(stats.total_size), "")
|
|
156
|
+
table.add_row("Success Rate", f"{stats.success_rate:.1f}%", "")
|
|
157
|
+
table.add_row("Avg Processing Time", f"{stats.avg_processing_time:.1f}s", "")
|
|
158
|
+
|
|
159
|
+
return table
|
|
160
|
+
|
|
161
|
+
def create_health_table(self, health: SystemHealth) -> Table:
|
|
162
|
+
"""Create system health table."""
|
|
163
|
+
table = Table(
|
|
164
|
+
title="🏥 System Health", show_header=True, header_style="bold red"
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
table.add_column("Metric", style="cyan", no_wrap=True)
|
|
168
|
+
table.add_column("Value", style="green")
|
|
169
|
+
table.add_column("Status", style="yellow")
|
|
170
|
+
|
|
171
|
+
def get_status_color(value, thresholds):
|
|
172
|
+
if value < thresholds[0]:
|
|
173
|
+
return "green"
|
|
174
|
+
elif value < thresholds[1]:
|
|
175
|
+
return "yellow"
|
|
176
|
+
else:
|
|
177
|
+
return "red"
|
|
178
|
+
|
|
179
|
+
def get_status_text(value, thresholds, labels):
|
|
180
|
+
if value < thresholds[0]:
|
|
181
|
+
return labels[0]
|
|
182
|
+
elif value < thresholds[1]:
|
|
183
|
+
return labels[1]
|
|
184
|
+
else:
|
|
185
|
+
return labels[2]
|
|
186
|
+
|
|
187
|
+
# CPU usage
|
|
188
|
+
cpu_status = get_status_text(
|
|
189
|
+
health.cpu_usage, [50, 80], ["Good", "Warning", "Critical"]
|
|
190
|
+
)
|
|
191
|
+
table.add_row("CPU Usage", f"{health.cpu_usage:.1f}%", cpu_status)
|
|
192
|
+
|
|
193
|
+
# Memory usage
|
|
194
|
+
memory_status = get_status_text(
|
|
195
|
+
health.memory_usage, [70, 90], ["Good", "Warning", "Critical"]
|
|
196
|
+
)
|
|
197
|
+
table.add_row("Memory Usage", f"{health.memory_usage:.1f}%", memory_status)
|
|
198
|
+
|
|
199
|
+
# Disk usage
|
|
200
|
+
disk_status = get_status_text(
|
|
201
|
+
health.disk_usage, [80, 95], ["Good", "Warning", "Critical"]
|
|
202
|
+
)
|
|
203
|
+
table.add_row("Disk Usage", f"{health.disk_usage:.1f}%", disk_status)
|
|
204
|
+
|
|
205
|
+
# Active jobs
|
|
206
|
+
table.add_row(
|
|
207
|
+
"Active Jobs",
|
|
208
|
+
str(health.active_jobs),
|
|
209
|
+
"Running" if health.active_jobs > 0 else "Idle",
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
# Queue size
|
|
213
|
+
queue_status = get_status_text(
|
|
214
|
+
health.queue_size, [5, 20], ["Good", "Busy", "Overloaded"]
|
|
215
|
+
)
|
|
216
|
+
table.add_row("Queue Size", str(health.queue_size), queue_status)
|
|
217
|
+
|
|
218
|
+
# Last activity
|
|
219
|
+
if health.last_activity:
|
|
220
|
+
time_ago = datetime.now() - health.last_activity
|
|
221
|
+
minutes_ago = int(time_ago.total_seconds() / 60)
|
|
222
|
+
table.add_row(
|
|
223
|
+
"Last Activity",
|
|
224
|
+
f"{minutes_ago}m ago",
|
|
225
|
+
"Active" if minutes_ago < 10 else "Idle",
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
# Uptime
|
|
229
|
+
uptime_hours = health.uptime / 3600
|
|
230
|
+
table.add_row(
|
|
231
|
+
"Uptime",
|
|
232
|
+
f"{uptime_hours:.1f}h",
|
|
233
|
+
"Stable" if uptime_hours > 24 else "Recent",
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
return table
|
|
237
|
+
|
|
238
|
+
def create_dashboard_layout(self) -> Layout:
|
|
239
|
+
"""Create the main dashboard layout."""
|
|
240
|
+
layout = Layout()
|
|
241
|
+
|
|
242
|
+
# Split into main and sidebar
|
|
243
|
+
layout.split_column(
|
|
244
|
+
Layout(name="header", size=3),
|
|
245
|
+
Layout(name="main"),
|
|
246
|
+
Layout(name="footer", size=3),
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
# Split main into stats and health
|
|
250
|
+
layout["main"].split_row(Layout(name="stats"), Layout(name="health"))
|
|
251
|
+
|
|
252
|
+
return layout
|
|
253
|
+
|
|
254
|
+
def render_dashboard(self):
|
|
255
|
+
"""Render the complete analytics dashboard."""
|
|
256
|
+
stats = self.get_processing_stats()
|
|
257
|
+
health = self.get_system_health()
|
|
258
|
+
|
|
259
|
+
layout = self.create_dashboard_layout()
|
|
260
|
+
|
|
261
|
+
# Header
|
|
262
|
+
header_text = Text("🚀 Spatelier Analytics Dashboard", style="bold blue")
|
|
263
|
+
layout["header"].update(Panel(header_text, border_style="blue"))
|
|
264
|
+
|
|
265
|
+
# Stats section
|
|
266
|
+
stats_table = self.create_stats_table(stats)
|
|
267
|
+
layout["stats"].update(
|
|
268
|
+
Panel(stats_table, title="Processing Overview", border_style="green")
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
# Health section
|
|
272
|
+
health_table = self.create_health_table(health)
|
|
273
|
+
layout["health"].update(
|
|
274
|
+
Panel(health_table, title="System Status", border_style="red")
|
|
275
|
+
)
|
|
276
|
+
|
|
277
|
+
# Footer
|
|
278
|
+
footer_text = Text(
|
|
279
|
+
f"Last updated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", style="dim"
|
|
280
|
+
)
|
|
281
|
+
layout["footer"].update(Panel(footer_text, border_style="dim"))
|
|
282
|
+
|
|
283
|
+
return layout
|
|
284
|
+
|
|
285
|
+
def show_dashboard(self, refresh_interval: int = 5):
|
|
286
|
+
"""
|
|
287
|
+
Show live updating dashboard.
|
|
288
|
+
|
|
289
|
+
Args:
|
|
290
|
+
refresh_interval: Refresh interval in seconds
|
|
291
|
+
"""
|
|
292
|
+
try:
|
|
293
|
+
with Live(
|
|
294
|
+
self.render_dashboard(),
|
|
295
|
+
refresh_per_second=1 / refresh_interval,
|
|
296
|
+
screen=True,
|
|
297
|
+
) as live:
|
|
298
|
+
while True:
|
|
299
|
+
time.sleep(refresh_interval)
|
|
300
|
+
live.update(self.render_dashboard())
|
|
301
|
+
|
|
302
|
+
except KeyboardInterrupt:
|
|
303
|
+
self.logger.info("Dashboard closed by user")
|
|
304
|
+
except Exception as e:
|
|
305
|
+
self.logger.error(f"Dashboard error: {e}")
|
|
306
|
+
|
|
307
|
+
def export_stats(self, output_path: Path) -> bool:
|
|
308
|
+
"""
|
|
309
|
+
Export statistics to JSON file.
|
|
310
|
+
|
|
311
|
+
Args:
|
|
312
|
+
output_path: Path to output JSON file
|
|
313
|
+
|
|
314
|
+
Returns:
|
|
315
|
+
True if successful, False otherwise
|
|
316
|
+
"""
|
|
317
|
+
try:
|
|
318
|
+
stats = self.get_processing_stats()
|
|
319
|
+
health = self.get_system_health()
|
|
320
|
+
|
|
321
|
+
data = {
|
|
322
|
+
"timestamp": datetime.now().isoformat(),
|
|
323
|
+
"processing_stats": asdict(stats),
|
|
324
|
+
"system_health": asdict(health),
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
with open(output_path, "w") as f:
|
|
328
|
+
json.dump(data, f, indent=2, default=str)
|
|
329
|
+
|
|
330
|
+
self.logger.info(f"Statistics exported to {output_path}")
|
|
331
|
+
return True
|
|
332
|
+
|
|
333
|
+
except Exception as e:
|
|
334
|
+
self.logger.error(f"Failed to export stats: {e}")
|
|
335
|
+
return False
|
|
336
|
+
|
|
337
|
+
|
|
338
|
+
def show_analytics_dashboard(
|
|
339
|
+
config: Config, verbose: bool = False, refresh_interval: int = 5
|
|
340
|
+
):
|
|
341
|
+
"""
|
|
342
|
+
Show the analytics dashboard.
|
|
343
|
+
|
|
344
|
+
Args:
|
|
345
|
+
config: Configuration instance
|
|
346
|
+
verbose: Enable verbose logging
|
|
347
|
+
refresh_interval: Refresh interval in seconds
|
|
348
|
+
"""
|
|
349
|
+
dashboard = AnalyticsDashboard(config, verbose)
|
|
350
|
+
dashboard.show_dashboard(refresh_interval)
|
|
351
|
+
|
|
352
|
+
|
|
353
|
+
def export_analytics_data(
|
|
354
|
+
config: Config, output_path: Path, verbose: bool = False
|
|
355
|
+
) -> bool:
|
|
356
|
+
"""
|
|
357
|
+
Export analytics data to file.
|
|
358
|
+
|
|
359
|
+
Args:
|
|
360
|
+
config: Configuration instance
|
|
361
|
+
output_path: Path to output file
|
|
362
|
+
verbose: Enable verbose logging
|
|
363
|
+
|
|
364
|
+
Returns:
|
|
365
|
+
True if successful, False otherwise
|
|
366
|
+
"""
|
|
367
|
+
dashboard = AnalyticsDashboard(config, verbose)
|
|
368
|
+
return dashboard.export_stats(output_path)
|
core/base.py
ADDED
|
@@ -0,0 +1,303 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Base classes for Spatelier modules.
|
|
3
|
+
|
|
4
|
+
This module provides base classes that all processing modules should inherit from.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from abc import ABC, abstractmethod
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Any, Dict, List, Optional, Union
|
|
10
|
+
|
|
11
|
+
from pydantic import BaseModel, Field
|
|
12
|
+
|
|
13
|
+
from core.config import Config
|
|
14
|
+
from core.logger import get_logger
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class ProcessingResult(BaseModel):
|
|
18
|
+
"""Base class for processing results with enhanced error handling."""
|
|
19
|
+
|
|
20
|
+
success: bool = Field(description="Whether the operation was successful")
|
|
21
|
+
message: str = Field(description="Human-readable message about the operation")
|
|
22
|
+
output_path: Optional[Path] = Field(default=None, description="Path to output file")
|
|
23
|
+
metadata: Dict[str, Any] = Field(
|
|
24
|
+
default_factory=dict, description="Additional metadata"
|
|
25
|
+
)
|
|
26
|
+
errors: List[str] = Field(
|
|
27
|
+
default_factory=list, description="List of errors encountered"
|
|
28
|
+
)
|
|
29
|
+
warnings: List[str] = Field(
|
|
30
|
+
default_factory=list, description="List of warnings encountered"
|
|
31
|
+
)
|
|
32
|
+
temp_dir: Optional[Path] = Field(
|
|
33
|
+
default=None, description="Temporary directory used for processing"
|
|
34
|
+
)
|
|
35
|
+
duration_seconds: Optional[float] = Field(
|
|
36
|
+
default=None, description="Processing duration in seconds"
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
@classmethod
|
|
40
|
+
def success_result(
|
|
41
|
+
cls,
|
|
42
|
+
message: str,
|
|
43
|
+
output_path: Optional[Union[str, Path]] = None,
|
|
44
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
45
|
+
warnings: Optional[List[str]] = None,
|
|
46
|
+
) -> "ProcessingResult":
|
|
47
|
+
"""Create a successful processing result."""
|
|
48
|
+
return cls(
|
|
49
|
+
success=True,
|
|
50
|
+
message=message,
|
|
51
|
+
output_path=Path(output_path) if output_path else None,
|
|
52
|
+
metadata=metadata or {},
|
|
53
|
+
warnings=warnings or [],
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
@classmethod
|
|
57
|
+
def error_result(
|
|
58
|
+
cls,
|
|
59
|
+
message: str,
|
|
60
|
+
errors: Optional[List[str]] = None,
|
|
61
|
+
warnings: Optional[List[str]] = None,
|
|
62
|
+
) -> "ProcessingResult":
|
|
63
|
+
"""Create an error processing result."""
|
|
64
|
+
return cls(
|
|
65
|
+
success=False, message=message, errors=errors or [], warnings=warnings or []
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
@classmethod
|
|
69
|
+
def warning_result(
|
|
70
|
+
cls,
|
|
71
|
+
message: str,
|
|
72
|
+
warnings: List[str],
|
|
73
|
+
output_path: Optional[Union[str, Path]] = None,
|
|
74
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
75
|
+
) -> "ProcessingResult":
|
|
76
|
+
"""Create a result with warnings but still successful."""
|
|
77
|
+
return cls(
|
|
78
|
+
success=True,
|
|
79
|
+
message=message,
|
|
80
|
+
output_path=Path(output_path) if output_path else None,
|
|
81
|
+
metadata=metadata or {},
|
|
82
|
+
warnings=warnings,
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
def add_error(self, error: str):
|
|
86
|
+
"""Add an error to the result."""
|
|
87
|
+
self.errors.append(error)
|
|
88
|
+
self.success = False
|
|
89
|
+
|
|
90
|
+
def add_warning(self, warning: str):
|
|
91
|
+
"""Add a warning to the result."""
|
|
92
|
+
self.warnings.append(warning)
|
|
93
|
+
|
|
94
|
+
def add_metadata(self, key: str, value: Any):
|
|
95
|
+
"""Add metadata to the result."""
|
|
96
|
+
self.metadata[key] = value
|
|
97
|
+
|
|
98
|
+
def has_errors(self) -> bool:
|
|
99
|
+
"""Check if result has errors."""
|
|
100
|
+
return len(self.errors) > 0
|
|
101
|
+
|
|
102
|
+
def has_warnings(self) -> bool:
|
|
103
|
+
"""Check if result has warnings."""
|
|
104
|
+
return len(self.warnings) > 0
|
|
105
|
+
|
|
106
|
+
def is_successful(self) -> bool:
|
|
107
|
+
"""Check if result is successful (no errors)."""
|
|
108
|
+
return self.success and not self.has_errors()
|
|
109
|
+
|
|
110
|
+
def get_summary(self) -> str:
|
|
111
|
+
"""Get a summary of the result."""
|
|
112
|
+
summary = f"Success: {self.success}, Message: {self.message}"
|
|
113
|
+
if self.has_errors():
|
|
114
|
+
summary += f", Errors: {len(self.errors)}"
|
|
115
|
+
if self.has_warnings():
|
|
116
|
+
summary += f", Warnings: {len(self.warnings)}"
|
|
117
|
+
if self.duration_seconds:
|
|
118
|
+
summary += f", Duration: {self.duration_seconds:.2f}s"
|
|
119
|
+
return summary
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
class BaseProcessor(ABC):
|
|
123
|
+
"""
|
|
124
|
+
Base class for all processors.
|
|
125
|
+
|
|
126
|
+
This class provides common functionality that all processors should have,
|
|
127
|
+
including configuration management, logging, and error handling.
|
|
128
|
+
"""
|
|
129
|
+
|
|
130
|
+
def __init__(self, config: Config, verbose: bool = False):
|
|
131
|
+
"""
|
|
132
|
+
Initialize the processor.
|
|
133
|
+
|
|
134
|
+
Args:
|
|
135
|
+
config: Configuration instance
|
|
136
|
+
verbose: Enable verbose logging
|
|
137
|
+
"""
|
|
138
|
+
self.config = config
|
|
139
|
+
self.verbose = verbose
|
|
140
|
+
self.logger = get_logger(self.__class__.__name__, verbose=verbose)
|
|
141
|
+
|
|
142
|
+
@abstractmethod
|
|
143
|
+
def process(self, input_path: Union[str, Path], **kwargs) -> ProcessingResult:
|
|
144
|
+
"""
|
|
145
|
+
Process the input and return a result.
|
|
146
|
+
|
|
147
|
+
Args:
|
|
148
|
+
input_path: Path to input file
|
|
149
|
+
**kwargs: Additional processing options
|
|
150
|
+
|
|
151
|
+
Returns:
|
|
152
|
+
ProcessingResult with operation details
|
|
153
|
+
"""
|
|
154
|
+
pass
|
|
155
|
+
|
|
156
|
+
def validate_input(self, input_path: Union[str, Path]) -> bool:
|
|
157
|
+
"""
|
|
158
|
+
Validate that the input file exists and is accessible.
|
|
159
|
+
|
|
160
|
+
Args:
|
|
161
|
+
input_path: Path to input file
|
|
162
|
+
|
|
163
|
+
Returns:
|
|
164
|
+
True if valid, False otherwise
|
|
165
|
+
"""
|
|
166
|
+
path = Path(input_path)
|
|
167
|
+
|
|
168
|
+
if not path.exists():
|
|
169
|
+
self.logger.error(f"Input file does not exist: {path}")
|
|
170
|
+
return False
|
|
171
|
+
|
|
172
|
+
if not path.is_file():
|
|
173
|
+
self.logger.error(f"Input path is not a file: {path}")
|
|
174
|
+
return False
|
|
175
|
+
|
|
176
|
+
return True
|
|
177
|
+
|
|
178
|
+
def ensure_output_dir(self, output_path: Union[str, Path]) -> bool:
|
|
179
|
+
"""
|
|
180
|
+
Ensure the output directory exists.
|
|
181
|
+
|
|
182
|
+
Args:
|
|
183
|
+
output_path: Path to output file
|
|
184
|
+
|
|
185
|
+
Returns:
|
|
186
|
+
True if directory exists or was created, False otherwise
|
|
187
|
+
"""
|
|
188
|
+
output_dir = Path(output_path).parent
|
|
189
|
+
|
|
190
|
+
try:
|
|
191
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
192
|
+
return True
|
|
193
|
+
except Exception as e:
|
|
194
|
+
self.logger.error(f"Failed to create output directory {output_dir}: {e}")
|
|
195
|
+
return False
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
class BaseDownloader(BaseProcessor):
|
|
199
|
+
"""
|
|
200
|
+
Base class for download processors.
|
|
201
|
+
|
|
202
|
+
Extends BaseProcessor with download-specific functionality.
|
|
203
|
+
"""
|
|
204
|
+
|
|
205
|
+
def __init__(self, config: Config, verbose: bool = False):
|
|
206
|
+
"""Initialize the downloader."""
|
|
207
|
+
super().__init__(config, verbose)
|
|
208
|
+
self.supported_sites = []
|
|
209
|
+
|
|
210
|
+
@abstractmethod
|
|
211
|
+
def download(
|
|
212
|
+
self, url: str, output_path: Optional[Union[str, Path]] = None, **kwargs
|
|
213
|
+
) -> ProcessingResult:
|
|
214
|
+
"""
|
|
215
|
+
Download content from URL.
|
|
216
|
+
|
|
217
|
+
Args:
|
|
218
|
+
url: URL to download from
|
|
219
|
+
output_path: Optional output path
|
|
220
|
+
**kwargs: Additional download options
|
|
221
|
+
|
|
222
|
+
Returns:
|
|
223
|
+
ProcessingResult with download details
|
|
224
|
+
"""
|
|
225
|
+
pass
|
|
226
|
+
|
|
227
|
+
def process(self, input_data: Any, **kwargs) -> ProcessingResult:
|
|
228
|
+
"""Process method implementation for downloaders."""
|
|
229
|
+
if isinstance(input_data, str):
|
|
230
|
+
return self.download(input_data, **kwargs)
|
|
231
|
+
else:
|
|
232
|
+
raise ValueError("Downloaders expect URL strings as input")
|
|
233
|
+
|
|
234
|
+
def is_supported(self, url: str) -> bool:
|
|
235
|
+
"""
|
|
236
|
+
Check if the URL is supported by this downloader.
|
|
237
|
+
|
|
238
|
+
Args:
|
|
239
|
+
url: URL to check
|
|
240
|
+
|
|
241
|
+
Returns:
|
|
242
|
+
True if supported, False otherwise
|
|
243
|
+
"""
|
|
244
|
+
# Basic implementation - subclasses should override
|
|
245
|
+
return any(site in url.lower() for site in self.supported_sites)
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
class BaseConverter(BaseProcessor):
|
|
249
|
+
"""
|
|
250
|
+
Base class for format converters.
|
|
251
|
+
|
|
252
|
+
Extends BaseProcessor with conversion-specific functionality.
|
|
253
|
+
"""
|
|
254
|
+
|
|
255
|
+
def __init__(self, config: Config, verbose: bool = False):
|
|
256
|
+
"""Initialize the converter."""
|
|
257
|
+
super().__init__(config, verbose)
|
|
258
|
+
self.supported_input_formats = []
|
|
259
|
+
self.supported_output_formats = []
|
|
260
|
+
|
|
261
|
+
@abstractmethod
|
|
262
|
+
def convert(
|
|
263
|
+
self, input_path: Union[str, Path], output_path: Union[str, Path], **kwargs
|
|
264
|
+
) -> ProcessingResult:
|
|
265
|
+
"""
|
|
266
|
+
Convert file from one format to another.
|
|
267
|
+
|
|
268
|
+
Args:
|
|
269
|
+
input_path: Path to input file
|
|
270
|
+
output_path: Path to output file
|
|
271
|
+
**kwargs: Additional conversion options
|
|
272
|
+
|
|
273
|
+
Returns:
|
|
274
|
+
ProcessingResult with conversion details
|
|
275
|
+
"""
|
|
276
|
+
pass
|
|
277
|
+
|
|
278
|
+
def process(self, input_path: Union[str, Path], **kwargs) -> ProcessingResult:
|
|
279
|
+
"""Process method implementation for converters."""
|
|
280
|
+
output_path = kwargs.pop("output_path", None)
|
|
281
|
+
if not output_path:
|
|
282
|
+
raise ValueError("Converters require output_path in kwargs")
|
|
283
|
+
return self.convert(input_path, output_path, **kwargs)
|
|
284
|
+
|
|
285
|
+
def is_supported_format(
|
|
286
|
+
self, file_path: Union[str, Path], is_input: bool = True
|
|
287
|
+
) -> bool:
|
|
288
|
+
"""
|
|
289
|
+
Check if the file format is supported.
|
|
290
|
+
|
|
291
|
+
Args:
|
|
292
|
+
file_path: Path to file
|
|
293
|
+
is_input: Whether this is an input file (True) or output file (False)
|
|
294
|
+
|
|
295
|
+
Returns:
|
|
296
|
+
True if format is supported, False otherwise
|
|
297
|
+
"""
|
|
298
|
+
suffix = Path(file_path).suffix.lower().lstrip(".")
|
|
299
|
+
|
|
300
|
+
if is_input:
|
|
301
|
+
return suffix in self.supported_input_formats
|
|
302
|
+
else:
|
|
303
|
+
return suffix in self.supported_output_formats
|