supervoxtral 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- supervoxtral-0.1.0.dist-info/METADATA +23 -0
- supervoxtral-0.1.0.dist-info/RECORD +18 -0
- supervoxtral-0.1.0.dist-info/WHEEL +4 -0
- supervoxtral-0.1.0.dist-info/entry_points.txt +2 -0
- supervoxtral-0.1.0.dist-info/licenses/LICENSE +21 -0
- svx/__init__.py +28 -0
- svx/cli.py +264 -0
- svx/core/__init__.py +92 -0
- svx/core/audio.py +256 -0
- svx/core/clipboard.py +122 -0
- svx/core/config.py +400 -0
- svx/core/pipeline.py +260 -0
- svx/core/prompt.py +165 -0
- svx/core/storage.py +118 -0
- svx/providers/__init__.py +88 -0
- svx/providers/base.py +83 -0
- svx/providers/mistral.py +189 -0
- svx/ui/qt_app.py +491 -0
svx/core/config.py
ADDED
|
@@ -0,0 +1,400 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Core configuration utilities for SuperVoxtral.
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
- Resolves a per-user configuration directory (cross-platform).
|
|
6
|
+
|
|
7
|
+
- Exposes project path constants (ROOT_DIR, RECORDINGS_DIR, TRANSCRIPTS_DIR, LOGS_DIR)
|
|
8
|
+
as well as user-scoped paths (USER_CONFIG_DIR, USER_PROMPT_DIR).
|
|
9
|
+
- Configures logging and ensures required directories exist.
|
|
10
|
+
|
|
11
|
+
Design:
|
|
12
|
+
- User config is optional and lives in a platform-standard location:
|
|
13
|
+
- Linux: ${XDG_CONFIG_HOME:-~/.config}/supervoxtral
|
|
14
|
+
- macOS: ~/Library/Application Support/SuperVoxtral
|
|
15
|
+
- Windows: %APPDATA%/SuperVoxtral
|
|
16
|
+
|
|
17
|
+
- User config file: config.toml (TOML). For Python 3.11+, `tomllib` is used;
|
|
18
|
+
for 3.10, a fallback to `tomli` would be expected
|
|
19
|
+
(the project should add `tomli` to dependencies for 3.10).
|
|
20
|
+
|
|
21
|
+
This module aims to remain small and import-safe.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
from __future__ import annotations
|
|
25
|
+
|
|
26
|
+
import logging
|
|
27
|
+
import os
|
|
28
|
+
import sys
|
|
29
|
+
from dataclasses import asdict, dataclass, field
|
|
30
|
+
from pathlib import Path
|
|
31
|
+
from typing import Any, Final
|
|
32
|
+
|
|
33
|
+
# Use stdlib tomllib (Python >= 3.11 required by project)
|
|
34
|
+
import tomllib
|
|
35
|
+
|
|
36
|
+
# Project paths (relative to current working directory)
|
|
37
|
+
ROOT_DIR: Final[Path] = Path.cwd()
|
|
38
|
+
RECORDINGS_DIR: Final[Path] = ROOT_DIR / "recordings"
|
|
39
|
+
TRANSCRIPTS_DIR: Final[Path] = ROOT_DIR / "transcripts"
|
|
40
|
+
LOGS_DIR: Final[Path] = ROOT_DIR / "logs"
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
# User config (platform standard)
|
|
44
|
+
def get_user_config_dir() -> Path:
|
|
45
|
+
"""
|
|
46
|
+
Resolve the user configuration directory for SuperVoxtral in a cross-platform way.
|
|
47
|
+
|
|
48
|
+
Returns a Path that may not yet exist.
|
|
49
|
+
"""
|
|
50
|
+
# Windows: %APPDATA%
|
|
51
|
+
if sys.platform.startswith("win"):
|
|
52
|
+
appdata = os.environ.get("APPDATA")
|
|
53
|
+
if appdata:
|
|
54
|
+
return Path(appdata) / "SuperVoxtral"
|
|
55
|
+
# Fallback to home
|
|
56
|
+
return Path.home() / "AppData" / "Roaming" / "SuperVoxtral"
|
|
57
|
+
|
|
58
|
+
# macOS: ~/Library/Application Support/SuperVoxtral
|
|
59
|
+
if sys.platform == "darwin":
|
|
60
|
+
return Path.home() / "Library" / "Application Support" / "SuperVoxtral"
|
|
61
|
+
|
|
62
|
+
# Linux/Unix: XDG_CONFIG_HOME or ~/.config
|
|
63
|
+
xdg = os.environ.get("XDG_CONFIG_HOME")
|
|
64
|
+
if xdg:
|
|
65
|
+
return Path(xdg) / "supervoxtral"
|
|
66
|
+
return Path.home() / ".config" / "supervoxtral"
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
USER_CONFIG_DIR: Final[Path] = get_user_config_dir()
|
|
70
|
+
USER_PROMPT_DIR: Final[Path] = USER_CONFIG_DIR / "prompt"
|
|
71
|
+
USER_CONFIG_FILE: Final[Path] = USER_CONFIG_DIR / "config.toml"
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _get_log_level(level: str) -> int:
|
|
75
|
+
"""
|
|
76
|
+
Convert a string log level to logging module constant, defaulting to INFO.
|
|
77
|
+
"""
|
|
78
|
+
try:
|
|
79
|
+
return getattr(logging, level.upper())
|
|
80
|
+
except AttributeError:
|
|
81
|
+
return logging.INFO
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def _configure_logging(level: str) -> None:
|
|
85
|
+
"""
|
|
86
|
+
Configure root logger with stream and file handlers.
|
|
87
|
+
|
|
88
|
+
This function resets existing handlers to avoid duplicate logs if called multiple times.
|
|
89
|
+
"""
|
|
90
|
+
log_level = _get_log_level(level)
|
|
91
|
+
|
|
92
|
+
# Ensure logs directory exists before configuring FileHandler
|
|
93
|
+
LOGS_DIR.mkdir(parents=True, exist_ok=True)
|
|
94
|
+
|
|
95
|
+
# Reset handlers if any (idempotent setup)
|
|
96
|
+
root_logger = logging.getLogger()
|
|
97
|
+
root_logger.setLevel(log_level)
|
|
98
|
+
while root_logger.handlers:
|
|
99
|
+
root_logger.handlers.pop()
|
|
100
|
+
|
|
101
|
+
formatter = logging.Formatter("%(asctime)s | %(levelname)s | %(name)s | %(message)s")
|
|
102
|
+
|
|
103
|
+
stream_handler = logging.StreamHandler(sys.stdout)
|
|
104
|
+
stream_handler.setLevel(log_level)
|
|
105
|
+
stream_handler.setFormatter(formatter)
|
|
106
|
+
|
|
107
|
+
file_handler = logging.FileHandler(LOGS_DIR / "app.log", encoding="utf-8")
|
|
108
|
+
file_handler.setLevel(log_level)
|
|
109
|
+
file_handler.setFormatter(formatter)
|
|
110
|
+
|
|
111
|
+
root_logger.addHandler(stream_handler)
|
|
112
|
+
root_logger.addHandler(file_handler)
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def setup_environment(log_level: str = "INFO") -> None:
|
|
116
|
+
"""
|
|
117
|
+
Ensure project directories exist and configure logging.
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
- Creates recordings/, transcripts/, logs/ directories as needed.
|
|
121
|
+
- Ensures user prompt dir exists (but does not overwrite user files).
|
|
122
|
+
- Configures logging according to `log_level`.
|
|
123
|
+
"""
|
|
124
|
+
|
|
125
|
+
# Ensure user config/prompt dirs exist (created but files not overwritten)
|
|
126
|
+
USER_PROMPT_DIR.mkdir(parents=True, exist_ok=True)
|
|
127
|
+
USER_CONFIG_DIR.mkdir(parents=True, exist_ok=True)
|
|
128
|
+
|
|
129
|
+
# Initial stream logging (file logging added conditionally later)
|
|
130
|
+
log_level_int = _get_log_level(log_level)
|
|
131
|
+
root_logger = logging.getLogger()
|
|
132
|
+
root_logger.setLevel(log_level_int)
|
|
133
|
+
while root_logger.handlers:
|
|
134
|
+
root_logger.handlers.pop()
|
|
135
|
+
formatter = logging.Formatter("%(asctime)s | %(levelname)s | %(name)s | %(message)s")
|
|
136
|
+
stream_handler = logging.StreamHandler(sys.stdout)
|
|
137
|
+
stream_handler.setLevel(log_level_int)
|
|
138
|
+
stream_handler.setFormatter(formatter)
|
|
139
|
+
root_logger.addHandler(stream_handler)
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def _read_toml(path: Path) -> dict[str, Any]:
|
|
143
|
+
"""
|
|
144
|
+
Read a TOML file and return its contents as a dict using stdlib tomllib.
|
|
145
|
+
If reading/parsing fails, return an empty dict.
|
|
146
|
+
"""
|
|
147
|
+
try:
|
|
148
|
+
text = path.read_text(encoding="utf-8")
|
|
149
|
+
return tomllib.loads(text)
|
|
150
|
+
except Exception:
|
|
151
|
+
return {}
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def load_user_config() -> dict[str, Any]:
|
|
155
|
+
"""
|
|
156
|
+
Load and return a dictionary representing the user's configuration (from USER_CONFIG_FILE).
|
|
157
|
+
|
|
158
|
+
If the file does not exist or cannot be parsed, returns an empty dict.
|
|
159
|
+
|
|
160
|
+
Expected layout (example):
|
|
161
|
+
|
|
162
|
+
[defaults]
|
|
163
|
+
provider = "mistral"
|
|
164
|
+
format = "mp3"
|
|
165
|
+
model = "voxtral-small-latest"
|
|
166
|
+
language = "fr"
|
|
167
|
+
rate = 16000
|
|
168
|
+
channels = 1
|
|
169
|
+
device = ""
|
|
170
|
+
keep_audio_files = false
|
|
171
|
+
copy = true
|
|
172
|
+
log_level = "INFO"
|
|
173
|
+
|
|
174
|
+
[prompt]
|
|
175
|
+
# optional: either file or text
|
|
176
|
+
file = "~/path/to/user.md"
|
|
177
|
+
text = "inline prompt text (less recommended)"
|
|
178
|
+
"""
|
|
179
|
+
if not USER_CONFIG_FILE.exists():
|
|
180
|
+
return {}
|
|
181
|
+
return _read_toml(USER_CONFIG_FILE)
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def init_user_config(force: bool = False, prompt_file: Path | None = None) -> Path:
|
|
185
|
+
"""
|
|
186
|
+
Initialize the user's config.toml with example content.
|
|
187
|
+
|
|
188
|
+
- Ensures USER_CONFIG_DIR exists.
|
|
189
|
+
- Writes USER_CONFIG_FILE with example content if missing or force=True.
|
|
190
|
+
- The example references the provided prompt_file (or USER_PROMPT_DIR/'user.md' by default).
|
|
191
|
+
"""
|
|
192
|
+
USER_CONFIG_DIR.mkdir(parents=True, exist_ok=True)
|
|
193
|
+
if prompt_file is None:
|
|
194
|
+
prompt_file = USER_PROMPT_DIR / "user.md"
|
|
195
|
+
|
|
196
|
+
example_toml = (
|
|
197
|
+
"# SuperVoxtral - User configuration\n"
|
|
198
|
+
"#\n"
|
|
199
|
+
"# Basics:\n"
|
|
200
|
+
"# - This configuration controls the default behavior of `svx record`.\n"
|
|
201
|
+
"# - The parameters below override the binary's built-in defaults.\n"
|
|
202
|
+
"# - You can override a few options at runtime via the CLI:\n"
|
|
203
|
+
"# --prompt / --prompt-file (set a one-off prompt for this run)\n"
|
|
204
|
+
"# --log-level (debugging)\n"
|
|
205
|
+
"# --outfile-prefix (one-off output naming)\n"
|
|
206
|
+
"#\n"
|
|
207
|
+
"# Output persistence:\n"
|
|
208
|
+
"# - Set keep_* = true to create and save files to project\n"
|
|
209
|
+
"# directories (recordings/, transcripts/, logs/).\n"
|
|
210
|
+
"# - false (default): use temp files/console only (no disk\n"
|
|
211
|
+
"# footprint in project dir).\n"
|
|
212
|
+
"#\n"
|
|
213
|
+
"# Authentication:\n"
|
|
214
|
+
"# - API keys are defined in provider-specific sections in this file.\n"
|
|
215
|
+
"[providers.mistral]\n"
|
|
216
|
+
'# api_key = ""\n\n'
|
|
217
|
+
"[defaults]\n"
|
|
218
|
+
'# Provider to use (currently supported: "mistral")\n'
|
|
219
|
+
'provider = "mistral"\n\n'
|
|
220
|
+
'# File format sent to the provider: "wav" | "mp3" | "opus"\n'
|
|
221
|
+
'# Recording is always WAV; conversion is applied if "mp3" or "opus"\n'
|
|
222
|
+
'format = "opus"\n\n'
|
|
223
|
+
"# Model to use on the provider side (example for Mistral Voxtral)\n"
|
|
224
|
+
'model = "voxtral-mini-latest"\n\n'
|
|
225
|
+
"# Language hint (may help the provider)\n"
|
|
226
|
+
'language = "fr"\n\n'
|
|
227
|
+
"# Audio recording parameters\n"
|
|
228
|
+
"rate = 16000\n"
|
|
229
|
+
"channels = 1\n"
|
|
230
|
+
'device = ""\n\n'
|
|
231
|
+
"# Output persistence:\n"
|
|
232
|
+
"# - keep_audio_files: false uses temp files (no recordings/ dir),\n"
|
|
233
|
+
"# true saves to recordings/\n"
|
|
234
|
+
"keep_audio_files = false\n"
|
|
235
|
+
"# - keep_transcript_files: false prints/copies only (no\n"
|
|
236
|
+
"# transcripts/ dir), true saves to transcripts/\n"
|
|
237
|
+
"keep_transcript_files = false\n"
|
|
238
|
+
"# - keep_log_files: false console only (no logs/ dir), true\n"
|
|
239
|
+
"# saves to logs/app.log\n"
|
|
240
|
+
"keep_log_files = false\n\n"
|
|
241
|
+
"# Automatically copy the transcribed text to the system clipboard\n"
|
|
242
|
+
"copy = true\n\n"
|
|
243
|
+
'# Log level: "DEBUG" | "INFO" | "WARNING" | "ERROR"\n'
|
|
244
|
+
'log_level = "INFO"\n\n'
|
|
245
|
+
"[prompt]\n"
|
|
246
|
+
"# Default user prompt source:\n"
|
|
247
|
+
"# - Option 1: Use a file (recommended)\n"
|
|
248
|
+
f'file = "{str(prompt_file)}"\n'
|
|
249
|
+
"#\n"
|
|
250
|
+
"# - Option 2: Inline prompt (less recommended for long text)\n"
|
|
251
|
+
'# text = "Please transcribe the audio and provide a concise summary in French."\n'
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
if not USER_CONFIG_FILE.exists() or force:
|
|
255
|
+
try:
|
|
256
|
+
USER_CONFIG_FILE.write_text(example_toml, encoding="utf-8")
|
|
257
|
+
except Exception:
|
|
258
|
+
logging.debug("Could not write user config file: %s", USER_CONFIG_FILE)
|
|
259
|
+
return USER_CONFIG_FILE
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
@dataclass
|
|
263
|
+
class ProviderConfig:
|
|
264
|
+
api_key: str | None = None
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
@dataclass
|
|
268
|
+
class DefaultsConfig:
|
|
269
|
+
provider: str = "mistral"
|
|
270
|
+
format: str = "opus"
|
|
271
|
+
model: str = "voxtral-mini-latest"
|
|
272
|
+
language: str | None = None
|
|
273
|
+
rate: int = 16000
|
|
274
|
+
channels: int = 1
|
|
275
|
+
device: str | None = None
|
|
276
|
+
keep_audio_files: bool = False
|
|
277
|
+
keep_transcript_files: bool = False
|
|
278
|
+
keep_log_files: bool = False
|
|
279
|
+
copy: bool = True
|
|
280
|
+
log_level: str = "INFO"
|
|
281
|
+
outfile_prefix: str | None = None
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
@dataclass
|
|
285
|
+
class PromptConfig:
|
|
286
|
+
text: str | None = None
|
|
287
|
+
file: str | None = None
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
@dataclass
|
|
291
|
+
class Config:
|
|
292
|
+
providers: dict[str, ProviderConfig] = field(default_factory=dict)
|
|
293
|
+
defaults: DefaultsConfig = field(default_factory=DefaultsConfig)
|
|
294
|
+
prompt: PromptConfig = field(default_factory=PromptConfig)
|
|
295
|
+
recordings_dir: Path = RECORDINGS_DIR
|
|
296
|
+
transcripts_dir: Path = TRANSCRIPTS_DIR
|
|
297
|
+
logs_dir: Path = LOGS_DIR
|
|
298
|
+
user_prompt_dir: Path = USER_PROMPT_DIR
|
|
299
|
+
user_config_file: Path = USER_CONFIG_FILE
|
|
300
|
+
|
|
301
|
+
@classmethod
|
|
302
|
+
def load(cls, log_level: str = "INFO") -> Config:
|
|
303
|
+
setup_environment(log_level)
|
|
304
|
+
user_config = load_user_config()
|
|
305
|
+
user_defaults_raw = user_config.get("defaults", {})
|
|
306
|
+
# Coerce defaults
|
|
307
|
+
defaults_data = {
|
|
308
|
+
"provider": str(user_defaults_raw.get("provider", "mistral")),
|
|
309
|
+
"format": str(user_defaults_raw.get("format", "opus")),
|
|
310
|
+
"model": str(user_defaults_raw.get("model", "voxtral-mini-latest")),
|
|
311
|
+
"language": user_defaults_raw.get("language"),
|
|
312
|
+
"rate": int(user_defaults_raw.get("rate", 16000)),
|
|
313
|
+
"channels": int(user_defaults_raw.get("channels", 1)),
|
|
314
|
+
"device": user_defaults_raw.get("device"),
|
|
315
|
+
"keep_audio_files": bool(user_defaults_raw.get("keep_audio_files", False)),
|
|
316
|
+
"keep_transcript_files": bool(user_defaults_raw.get("keep_transcript_files", False)),
|
|
317
|
+
"keep_log_files": bool(user_defaults_raw.get("keep_log_files", False)),
|
|
318
|
+
"copy": bool(user_defaults_raw.get("copy", True)),
|
|
319
|
+
"log_level": str(user_defaults_raw.get("log_level", log_level)),
|
|
320
|
+
"outfile_prefix": user_defaults_raw.get("outfile_prefix"),
|
|
321
|
+
}
|
|
322
|
+
channels = defaults_data["channels"]
|
|
323
|
+
if channels not in (1, 2):
|
|
324
|
+
raise ValueError("channels must be 1 or 2")
|
|
325
|
+
rate = defaults_data["rate"]
|
|
326
|
+
if rate <= 0:
|
|
327
|
+
raise ValueError("rate must be > 0")
|
|
328
|
+
format_ = defaults_data["format"]
|
|
329
|
+
if format_ not in {"wav", "mp3", "opus"}:
|
|
330
|
+
raise ValueError("format must be one of wav|mp3|opus")
|
|
331
|
+
defaults = DefaultsConfig(**defaults_data)
|
|
332
|
+
# Conditional output directories
|
|
333
|
+
if defaults.keep_audio_files:
|
|
334
|
+
RECORDINGS_DIR.mkdir(parents=True, exist_ok=True)
|
|
335
|
+
if defaults.keep_transcript_files:
|
|
336
|
+
TRANSCRIPTS_DIR.mkdir(parents=True, exist_ok=True)
|
|
337
|
+
if defaults.keep_log_files:
|
|
338
|
+
LOGS_DIR.mkdir(parents=True, exist_ok=True)
|
|
339
|
+
# Update logging level to effective (user or CLI fallback)
|
|
340
|
+
root_logger = logging.getLogger()
|
|
341
|
+
root_logger.setLevel(_get_log_level(defaults.log_level))
|
|
342
|
+
# Add file handler if enabled
|
|
343
|
+
if defaults.keep_log_files:
|
|
344
|
+
formatter = logging.Formatter("%(asctime)s | %(levelname)s | %(name)s | %(message)s")
|
|
345
|
+
file_handler = logging.FileHandler(LOGS_DIR / "app.log", encoding="utf-8")
|
|
346
|
+
file_level = _get_log_level(defaults.log_level)
|
|
347
|
+
file_handler.setLevel(file_level)
|
|
348
|
+
file_handler.setFormatter(formatter)
|
|
349
|
+
root_logger.addHandler(file_handler)
|
|
350
|
+
# Providers
|
|
351
|
+
providers_raw = user_config.get("providers", {})
|
|
352
|
+
providers_data = {}
|
|
353
|
+
for name, prov_raw in providers_raw.items():
|
|
354
|
+
if isinstance(prov_raw, dict):
|
|
355
|
+
api_key = str(prov_raw.get("api_key", ""))
|
|
356
|
+
providers_data[name] = ProviderConfig(api_key=api_key)
|
|
357
|
+
# Prompt
|
|
358
|
+
prompt_raw = user_config.get("prompt", {})
|
|
359
|
+
prompt_data = {
|
|
360
|
+
"text": prompt_raw.get("text") if isinstance(prompt_raw.get("text"), str) else None,
|
|
361
|
+
"file": prompt_raw.get("file") if isinstance(prompt_raw.get("file"), str) else None,
|
|
362
|
+
}
|
|
363
|
+
prompt = PromptConfig(**prompt_data)
|
|
364
|
+
data = {
|
|
365
|
+
"defaults": defaults,
|
|
366
|
+
"providers": providers_data,
|
|
367
|
+
"prompt": prompt,
|
|
368
|
+
"recordings_dir": RECORDINGS_DIR,
|
|
369
|
+
"transcripts_dir": TRANSCRIPTS_DIR,
|
|
370
|
+
"logs_dir": LOGS_DIR,
|
|
371
|
+
"user_prompt_dir": USER_PROMPT_DIR,
|
|
372
|
+
"user_config_file": USER_CONFIG_FILE,
|
|
373
|
+
}
|
|
374
|
+
return cls(**data)
|
|
375
|
+
|
|
376
|
+
def resolve_prompt(self, inline: str | None = None, file_path: Path | None = None) -> str:
|
|
377
|
+
from svx.core.prompt import resolve_user_prompt
|
|
378
|
+
|
|
379
|
+
return resolve_user_prompt(self, inline, file_path, self.user_prompt_dir)
|
|
380
|
+
|
|
381
|
+
def get_provider_config(self, name: str) -> dict[str, Any]:
|
|
382
|
+
return asdict(self.providers.get(name, ProviderConfig()))
|
|
383
|
+
|
|
384
|
+
|
|
385
|
+
__all__ = [
|
|
386
|
+
"ROOT_DIR",
|
|
387
|
+
"RECORDINGS_DIR",
|
|
388
|
+
"TRANSCRIPTS_DIR",
|
|
389
|
+
"LOGS_DIR",
|
|
390
|
+
"USER_CONFIG_DIR",
|
|
391
|
+
"USER_PROMPT_DIR",
|
|
392
|
+
"USER_CONFIG_FILE",
|
|
393
|
+
"setup_environment",
|
|
394
|
+
"load_user_config",
|
|
395
|
+
"init_user_config",
|
|
396
|
+
"Config",
|
|
397
|
+
"ProviderConfig",
|
|
398
|
+
"DefaultsConfig",
|
|
399
|
+
"PromptConfig",
|
|
400
|
+
]
|
svx/core/pipeline.py
ADDED
|
@@ -0,0 +1,260 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import tempfile
|
|
5
|
+
import threading
|
|
6
|
+
from collections.abc import Callable
|
|
7
|
+
from logging import FileHandler
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
import svx.core.config as config
|
|
12
|
+
from svx.core.audio import convert_audio, record_wav, timestamp
|
|
13
|
+
from svx.core.clipboard import copy_to_clipboard
|
|
14
|
+
from svx.core.config import Config
|
|
15
|
+
from svx.core.storage import save_transcript
|
|
16
|
+
from svx.providers import get_provider
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class RecordingPipeline:
|
|
20
|
+
"""
|
|
21
|
+
Centralized pipeline for recording audio, transcribing via provider, saving outputs,
|
|
22
|
+
and copying to clipboard. Handles temporary files when not keeping audio.
|
|
23
|
+
|
|
24
|
+
Supports runtime overrides like save_all for keeping all files and adding log handlers.
|
|
25
|
+
Optional progress_callback for status updates (e.g., for GUI).
|
|
26
|
+
Supports transcribe_mode for pure transcription without prompt using dedicated endpoint.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
def __init__(
|
|
30
|
+
self,
|
|
31
|
+
cfg: Config,
|
|
32
|
+
user_prompt: str | None = None,
|
|
33
|
+
user_prompt_file: Path | None = None,
|
|
34
|
+
save_all: bool = False,
|
|
35
|
+
outfile_prefix: str | None = None,
|
|
36
|
+
progress_callback: Callable[[str], None] | None = None,
|
|
37
|
+
transcribe_mode: bool = False,
|
|
38
|
+
) -> None:
|
|
39
|
+
self.cfg = cfg
|
|
40
|
+
self.user_prompt = user_prompt
|
|
41
|
+
self.user_prompt_file = user_prompt_file
|
|
42
|
+
self.save_all = save_all
|
|
43
|
+
self.outfile_prefix = outfile_prefix
|
|
44
|
+
self.progress_callback = progress_callback
|
|
45
|
+
self.transcribe_mode = transcribe_mode
|
|
46
|
+
|
|
47
|
+
def _status(self, msg: str) -> None:
|
|
48
|
+
"""Emit status update via callback if provided."""
|
|
49
|
+
if self.progress_callback:
|
|
50
|
+
self.progress_callback(msg)
|
|
51
|
+
logging.info(msg)
|
|
52
|
+
|
|
53
|
+
def _setup_save_all(self) -> None:
|
|
54
|
+
"""Apply save_all overrides: set keeps to True, create dirs, add file logging."""
|
|
55
|
+
if not self.save_all:
|
|
56
|
+
return
|
|
57
|
+
|
|
58
|
+
# Override config defaults
|
|
59
|
+
self.cfg.defaults.keep_audio_files = True
|
|
60
|
+
self.cfg.defaults.keep_transcript_files = True
|
|
61
|
+
self.cfg.defaults.keep_log_files = True
|
|
62
|
+
|
|
63
|
+
# Ensure directories
|
|
64
|
+
config.RECORDINGS_DIR.mkdir(parents=True, exist_ok=True)
|
|
65
|
+
config.TRANSCRIPTS_DIR.mkdir(parents=True, exist_ok=True)
|
|
66
|
+
config.LOGS_DIR.mkdir(parents=True, exist_ok=True)
|
|
67
|
+
|
|
68
|
+
# Add file handler if not present
|
|
69
|
+
root_logger = logging.getLogger()
|
|
70
|
+
if not any(isinstance(h, FileHandler) for h in root_logger.handlers): # type: ignore[reportUnknownMemberType]
|
|
71
|
+
from svx.core.config import _get_log_level
|
|
72
|
+
|
|
73
|
+
log_level_int = _get_log_level(self.cfg.defaults.log_level)
|
|
74
|
+
formatter = logging.Formatter("%(asctime)s | %(levelname)s | %(name)s | %(message)s")
|
|
75
|
+
file_handler = logging.FileHandler(config.LOGS_DIR / "app.log", encoding="utf-8")
|
|
76
|
+
file_handler.setLevel(log_level_int)
|
|
77
|
+
file_handler.setFormatter(formatter)
|
|
78
|
+
root_logger.addHandler(file_handler)
|
|
79
|
+
logging.info("File logging enabled for this run")
|
|
80
|
+
|
|
81
|
+
def run(self, stop_event: threading.Event | None = None) -> dict[str, Any]:
|
|
82
|
+
"""
|
|
83
|
+
Execute the full pipeline.
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
stop_event: Optional event to signal recording stop (e.g., for GUI).
|
|
87
|
+
|
|
88
|
+
Returns:
|
|
89
|
+
Dict with 'text' (str), 'raw' (dict), 'duration' (float),
|
|
90
|
+
'paths' (dict of Path or None).
|
|
91
|
+
|
|
92
|
+
Raises:
|
|
93
|
+
Exception: On recording, conversion, or transcription errors.
|
|
94
|
+
"""
|
|
95
|
+
self._setup_save_all()
|
|
96
|
+
|
|
97
|
+
# Resolve parameters
|
|
98
|
+
provider = self.cfg.defaults.provider
|
|
99
|
+
audio_format = self.cfg.defaults.format
|
|
100
|
+
model = self.cfg.defaults.model
|
|
101
|
+
original_model = model
|
|
102
|
+
if self.transcribe_mode:
|
|
103
|
+
model = "voxtral-mini-latest"
|
|
104
|
+
if original_model != "voxtral-mini-latest":
|
|
105
|
+
logging.warning(
|
|
106
|
+
"Mode Transcribe : modèle override de '%s' vers 'voxtral-mini-latest' "
|
|
107
|
+
"(optimisé pour la transcription).",
|
|
108
|
+
original_model,
|
|
109
|
+
)
|
|
110
|
+
language = self.cfg.defaults.language
|
|
111
|
+
rate = self.cfg.defaults.rate
|
|
112
|
+
channels = self.cfg.defaults.channels
|
|
113
|
+
device = self.cfg.defaults.device
|
|
114
|
+
base = self.outfile_prefix or f"rec_{timestamp()}"
|
|
115
|
+
if self.transcribe_mode:
|
|
116
|
+
final_user_prompt = None
|
|
117
|
+
self._status("Mode Transcribe activated: no prompt used.")
|
|
118
|
+
else:
|
|
119
|
+
final_user_prompt = self.cfg.resolve_prompt(self.user_prompt, self.user_prompt_file)
|
|
120
|
+
keep_audio = self.cfg.defaults.keep_audio_files
|
|
121
|
+
keep_transcript = self.cfg.defaults.keep_transcript_files
|
|
122
|
+
copy_to_clip = self.cfg.defaults.copy
|
|
123
|
+
|
|
124
|
+
# Validation (fail fast)
|
|
125
|
+
if channels not in (1, 2):
|
|
126
|
+
raise ValueError("channels must be 1 or 2")
|
|
127
|
+
if rate <= 0:
|
|
128
|
+
raise ValueError("rate must be > 0")
|
|
129
|
+
if audio_format not in {"wav", "mp3", "opus"}: # noqa: E501
|
|
130
|
+
raise ValueError("format must be one of wav|mp3|opus")
|
|
131
|
+
|
|
132
|
+
paths: dict[str, Path | None] = {}
|
|
133
|
+
stop_for_recording = stop_event or threading.Event()
|
|
134
|
+
|
|
135
|
+
try:
|
|
136
|
+
self._status("Recording...")
|
|
137
|
+
if keep_audio:
|
|
138
|
+
self.cfg.recordings_dir.mkdir(parents=True, exist_ok=True)
|
|
139
|
+
wav_path = self.cfg.recordings_dir / f"{base}.wav"
|
|
140
|
+
duration = record_wav(
|
|
141
|
+
wav_path,
|
|
142
|
+
samplerate=rate,
|
|
143
|
+
channels=channels,
|
|
144
|
+
device=device,
|
|
145
|
+
stop_event=stop_for_recording,
|
|
146
|
+
)
|
|
147
|
+
to_send_path = wav_path
|
|
148
|
+
paths["wav"] = wav_path
|
|
149
|
+
else:
|
|
150
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
151
|
+
tmp_path = Path(tmpdir)
|
|
152
|
+
wav_path = tmp_path / f"{base}.wav"
|
|
153
|
+
duration = record_wav(
|
|
154
|
+
wav_path,
|
|
155
|
+
samplerate=rate,
|
|
156
|
+
channels=channels,
|
|
157
|
+
device=device,
|
|
158
|
+
stop_event=stop_for_recording,
|
|
159
|
+
)
|
|
160
|
+
to_send_path = wav_path
|
|
161
|
+
|
|
162
|
+
# Convert if needed
|
|
163
|
+
if audio_format in {"mp3", "opus"}:
|
|
164
|
+
self._status("Converting...")
|
|
165
|
+
to_send_path = convert_audio(wav_path, audio_format)
|
|
166
|
+
logging.info("Converted %s -> %s", wav_path, to_send_path)
|
|
167
|
+
|
|
168
|
+
# Transcribe
|
|
169
|
+
self._status("Transcribing...")
|
|
170
|
+
prov = get_provider(provider, cfg=self.cfg)
|
|
171
|
+
result = prov.transcribe(
|
|
172
|
+
to_send_path,
|
|
173
|
+
user_prompt=final_user_prompt,
|
|
174
|
+
model=model,
|
|
175
|
+
language=language,
|
|
176
|
+
transcribe_mode=self.transcribe_mode,
|
|
177
|
+
)
|
|
178
|
+
text = result["text"]
|
|
179
|
+
raw = result["raw"]
|
|
180
|
+
|
|
181
|
+
# Save if keeping transcripts
|
|
182
|
+
if keep_transcript:
|
|
183
|
+
self.cfg.transcripts_dir.mkdir(parents=True, exist_ok=True)
|
|
184
|
+
txt_path, json_path = save_transcript(
|
|
185
|
+
self.cfg.transcripts_dir, base, provider, text, raw
|
|
186
|
+
)
|
|
187
|
+
paths["txt"] = txt_path
|
|
188
|
+
paths["json"] = json_path
|
|
189
|
+
else:
|
|
190
|
+
paths["txt"] = None
|
|
191
|
+
paths["json"] = None
|
|
192
|
+
|
|
193
|
+
# Copy to clipboard
|
|
194
|
+
if copy_to_clip:
|
|
195
|
+
try:
|
|
196
|
+
copy_to_clipboard(text)
|
|
197
|
+
logging.info("Copied transcription to clipboard")
|
|
198
|
+
except Exception as e:
|
|
199
|
+
logging.warning("Failed to copy to clipboard: %s", e)
|
|
200
|
+
|
|
201
|
+
logging.info("Pipeline finished (%.2fs)", duration)
|
|
202
|
+
return {
|
|
203
|
+
"text": text,
|
|
204
|
+
"raw": raw,
|
|
205
|
+
"duration": duration,
|
|
206
|
+
"paths": paths,
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
# For keep_audio=True: continue outside tempdir
|
|
210
|
+
# Convert if needed
|
|
211
|
+
if audio_format in {"mp3", "opus"}:
|
|
212
|
+
self._status("Converting...")
|
|
213
|
+
to_send_path = convert_audio(wav_path, audio_format)
|
|
214
|
+
logging.info("Converted %s -> %s", wav_path, to_send_path)
|
|
215
|
+
paths["converted"] = to_send_path
|
|
216
|
+
|
|
217
|
+
# Transcribe
|
|
218
|
+
self._status("Transcribing...")
|
|
219
|
+
prov = get_provider(provider, cfg=self.cfg)
|
|
220
|
+
result = prov.transcribe(
|
|
221
|
+
to_send_path,
|
|
222
|
+
user_prompt=final_user_prompt,
|
|
223
|
+
model=model,
|
|
224
|
+
language=language,
|
|
225
|
+
transcribe_mode=self.transcribe_mode,
|
|
226
|
+
)
|
|
227
|
+
text = result["text"]
|
|
228
|
+
raw = result["raw"]
|
|
229
|
+
|
|
230
|
+
# Save if keeping transcripts
|
|
231
|
+
if keep_transcript:
|
|
232
|
+
self.cfg.transcripts_dir.mkdir(parents=True, exist_ok=True)
|
|
233
|
+
txt_path, json_path = save_transcript(
|
|
234
|
+
self.cfg.transcripts_dir, base, provider, text, raw
|
|
235
|
+
)
|
|
236
|
+
paths["txt"] = txt_path
|
|
237
|
+
paths["json"] = json_path
|
|
238
|
+
else:
|
|
239
|
+
paths["txt"] = None
|
|
240
|
+
paths["json"] = None
|
|
241
|
+
|
|
242
|
+
# Copy to clipboard
|
|
243
|
+
if copy_to_clip:
|
|
244
|
+
try:
|
|
245
|
+
copy_to_clipboard(text)
|
|
246
|
+
logging.info("Copied transcription to clipboard")
|
|
247
|
+
except Exception as e:
|
|
248
|
+
logging.warning("Failed to copy to clipboard: %s", e)
|
|
249
|
+
|
|
250
|
+
logging.info("Pipeline finished (%.2fs)", duration)
|
|
251
|
+
return {
|
|
252
|
+
"text": text,
|
|
253
|
+
"raw": raw,
|
|
254
|
+
"duration": duration,
|
|
255
|
+
"paths": paths,
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
except Exception:
|
|
259
|
+
logging.exception("Pipeline failed")
|
|
260
|
+
raise
|