supervoxtral 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
svx/core/config.py ADDED
@@ -0,0 +1,400 @@
1
+ """
2
+ Core configuration utilities for SuperVoxtral.
3
+
4
+
5
+ - Resolves a per-user configuration directory (cross-platform).
6
+
7
+ - Exposes project path constants (ROOT_DIR, RECORDINGS_DIR, TRANSCRIPTS_DIR, LOGS_DIR)
8
+ as well as user-scoped paths (USER_CONFIG_DIR, USER_PROMPT_DIR).
9
+ - Configures logging and ensures required directories exist.
10
+
11
+ Design:
12
+ - User config is optional and lives in a platform-standard location:
13
+ - Linux: ${XDG_CONFIG_HOME:-~/.config}/supervoxtral
14
+ - macOS: ~/Library/Application Support/SuperVoxtral
15
+ - Windows: %APPDATA%/SuperVoxtral
16
+
17
+ - User config file: config.toml (TOML). For Python 3.11+, `tomllib` is used;
18
+ for 3.10, a fallback to `tomli` would be expected
19
+ (the project should add `tomli` to dependencies for 3.10).
20
+
21
+ This module aims to remain small and import-safe.
22
+ """
23
+
24
+ from __future__ import annotations
25
+
26
+ import logging
27
+ import os
28
+ import sys
29
+ from dataclasses import asdict, dataclass, field
30
+ from pathlib import Path
31
+ from typing import Any, Final
32
+
33
+ # Use stdlib tomllib (Python >= 3.11 required by project)
34
+ import tomllib
35
+
36
+ # Project paths (relative to current working directory)
37
+ ROOT_DIR: Final[Path] = Path.cwd()
38
+ RECORDINGS_DIR: Final[Path] = ROOT_DIR / "recordings"
39
+ TRANSCRIPTS_DIR: Final[Path] = ROOT_DIR / "transcripts"
40
+ LOGS_DIR: Final[Path] = ROOT_DIR / "logs"
41
+
42
+
43
+ # User config (platform standard)
44
+ def get_user_config_dir() -> Path:
45
+ """
46
+ Resolve the user configuration directory for SuperVoxtral in a cross-platform way.
47
+
48
+ Returns a Path that may not yet exist.
49
+ """
50
+ # Windows: %APPDATA%
51
+ if sys.platform.startswith("win"):
52
+ appdata = os.environ.get("APPDATA")
53
+ if appdata:
54
+ return Path(appdata) / "SuperVoxtral"
55
+ # Fallback to home
56
+ return Path.home() / "AppData" / "Roaming" / "SuperVoxtral"
57
+
58
+ # macOS: ~/Library/Application Support/SuperVoxtral
59
+ if sys.platform == "darwin":
60
+ return Path.home() / "Library" / "Application Support" / "SuperVoxtral"
61
+
62
+ # Linux/Unix: XDG_CONFIG_HOME or ~/.config
63
+ xdg = os.environ.get("XDG_CONFIG_HOME")
64
+ if xdg:
65
+ return Path(xdg) / "supervoxtral"
66
+ return Path.home() / ".config" / "supervoxtral"
67
+
68
+
69
+ USER_CONFIG_DIR: Final[Path] = get_user_config_dir()
70
+ USER_PROMPT_DIR: Final[Path] = USER_CONFIG_DIR / "prompt"
71
+ USER_CONFIG_FILE: Final[Path] = USER_CONFIG_DIR / "config.toml"
72
+
73
+
74
+ def _get_log_level(level: str) -> int:
75
+ """
76
+ Convert a string log level to logging module constant, defaulting to INFO.
77
+ """
78
+ try:
79
+ return getattr(logging, level.upper())
80
+ except AttributeError:
81
+ return logging.INFO
82
+
83
+
84
+ def _configure_logging(level: str) -> None:
85
+ """
86
+ Configure root logger with stream and file handlers.
87
+
88
+ This function resets existing handlers to avoid duplicate logs if called multiple times.
89
+ """
90
+ log_level = _get_log_level(level)
91
+
92
+ # Ensure logs directory exists before configuring FileHandler
93
+ LOGS_DIR.mkdir(parents=True, exist_ok=True)
94
+
95
+ # Reset handlers if any (idempotent setup)
96
+ root_logger = logging.getLogger()
97
+ root_logger.setLevel(log_level)
98
+ while root_logger.handlers:
99
+ root_logger.handlers.pop()
100
+
101
+ formatter = logging.Formatter("%(asctime)s | %(levelname)s | %(name)s | %(message)s")
102
+
103
+ stream_handler = logging.StreamHandler(sys.stdout)
104
+ stream_handler.setLevel(log_level)
105
+ stream_handler.setFormatter(formatter)
106
+
107
+ file_handler = logging.FileHandler(LOGS_DIR / "app.log", encoding="utf-8")
108
+ file_handler.setLevel(log_level)
109
+ file_handler.setFormatter(formatter)
110
+
111
+ root_logger.addHandler(stream_handler)
112
+ root_logger.addHandler(file_handler)
113
+
114
+
115
+ def setup_environment(log_level: str = "INFO") -> None:
116
+ """
117
+ Ensure project directories exist and configure logging.
118
+
119
+
120
+ - Creates recordings/, transcripts/, logs/ directories as needed.
121
+ - Ensures user prompt dir exists (but does not overwrite user files).
122
+ - Configures logging according to `log_level`.
123
+ """
124
+
125
+ # Ensure user config/prompt dirs exist (created but files not overwritten)
126
+ USER_PROMPT_DIR.mkdir(parents=True, exist_ok=True)
127
+ USER_CONFIG_DIR.mkdir(parents=True, exist_ok=True)
128
+
129
+ # Initial stream logging (file logging added conditionally later)
130
+ log_level_int = _get_log_level(log_level)
131
+ root_logger = logging.getLogger()
132
+ root_logger.setLevel(log_level_int)
133
+ while root_logger.handlers:
134
+ root_logger.handlers.pop()
135
+ formatter = logging.Formatter("%(asctime)s | %(levelname)s | %(name)s | %(message)s")
136
+ stream_handler = logging.StreamHandler(sys.stdout)
137
+ stream_handler.setLevel(log_level_int)
138
+ stream_handler.setFormatter(formatter)
139
+ root_logger.addHandler(stream_handler)
140
+
141
+
142
+ def _read_toml(path: Path) -> dict[str, Any]:
143
+ """
144
+ Read a TOML file and return its contents as a dict using stdlib tomllib.
145
+ If reading/parsing fails, return an empty dict.
146
+ """
147
+ try:
148
+ text = path.read_text(encoding="utf-8")
149
+ return tomllib.loads(text)
150
+ except Exception:
151
+ return {}
152
+
153
+
154
+ def load_user_config() -> dict[str, Any]:
155
+ """
156
+ Load and return a dictionary representing the user's configuration (from USER_CONFIG_FILE).
157
+
158
+ If the file does not exist or cannot be parsed, returns an empty dict.
159
+
160
+ Expected layout (example):
161
+
162
+ [defaults]
163
+ provider = "mistral"
164
+ format = "mp3"
165
+ model = "voxtral-small-latest"
166
+ language = "fr"
167
+ rate = 16000
168
+ channels = 1
169
+ device = ""
170
+ keep_audio_files = false
171
+ copy = true
172
+ log_level = "INFO"
173
+
174
+ [prompt]
175
+ # optional: either file or text
176
+ file = "~/path/to/user.md"
177
+ text = "inline prompt text (less recommended)"
178
+ """
179
+ if not USER_CONFIG_FILE.exists():
180
+ return {}
181
+ return _read_toml(USER_CONFIG_FILE)
182
+
183
+
184
+ def init_user_config(force: bool = False, prompt_file: Path | None = None) -> Path:
185
+ """
186
+ Initialize the user's config.toml with example content.
187
+
188
+ - Ensures USER_CONFIG_DIR exists.
189
+ - Writes USER_CONFIG_FILE with example content if missing or force=True.
190
+ - The example references the provided prompt_file (or USER_PROMPT_DIR/'user.md' by default).
191
+ """
192
+ USER_CONFIG_DIR.mkdir(parents=True, exist_ok=True)
193
+ if prompt_file is None:
194
+ prompt_file = USER_PROMPT_DIR / "user.md"
195
+
196
+ example_toml = (
197
+ "# SuperVoxtral - User configuration\n"
198
+ "#\n"
199
+ "# Basics:\n"
200
+ "# - This configuration controls the default behavior of `svx record`.\n"
201
+ "# - The parameters below override the binary's built-in defaults.\n"
202
+ "# - You can override a few options at runtime via the CLI:\n"
203
+ "# --prompt / --prompt-file (set a one-off prompt for this run)\n"
204
+ "# --log-level (debugging)\n"
205
+ "# --outfile-prefix (one-off output naming)\n"
206
+ "#\n"
207
+ "# Output persistence:\n"
208
+ "# - Set keep_* = true to create and save files to project\n"
209
+ "# directories (recordings/, transcripts/, logs/).\n"
210
+ "# - false (default): use temp files/console only (no disk\n"
211
+ "# footprint in project dir).\n"
212
+ "#\n"
213
+ "# Authentication:\n"
214
+ "# - API keys are defined in provider-specific sections in this file.\n"
215
+ "[providers.mistral]\n"
216
+ '# api_key = ""\n\n'
217
+ "[defaults]\n"
218
+ '# Provider to use (currently supported: "mistral")\n'
219
+ 'provider = "mistral"\n\n'
220
+ '# File format sent to the provider: "wav" | "mp3" | "opus"\n'
221
+ '# Recording is always WAV; conversion is applied if "mp3" or "opus"\n'
222
+ 'format = "opus"\n\n'
223
+ "# Model to use on the provider side (example for Mistral Voxtral)\n"
224
+ 'model = "voxtral-mini-latest"\n\n'
225
+ "# Language hint (may help the provider)\n"
226
+ 'language = "fr"\n\n'
227
+ "# Audio recording parameters\n"
228
+ "rate = 16000\n"
229
+ "channels = 1\n"
230
+ 'device = ""\n\n'
231
+ "# Output persistence:\n"
232
+ "# - keep_audio_files: false uses temp files (no recordings/ dir),\n"
233
+ "# true saves to recordings/\n"
234
+ "keep_audio_files = false\n"
235
+ "# - keep_transcript_files: false prints/copies only (no\n"
236
+ "# transcripts/ dir), true saves to transcripts/\n"
237
+ "keep_transcript_files = false\n"
238
+ "# - keep_log_files: false console only (no logs/ dir), true\n"
239
+ "# saves to logs/app.log\n"
240
+ "keep_log_files = false\n\n"
241
+ "# Automatically copy the transcribed text to the system clipboard\n"
242
+ "copy = true\n\n"
243
+ '# Log level: "DEBUG" | "INFO" | "WARNING" | "ERROR"\n'
244
+ 'log_level = "INFO"\n\n'
245
+ "[prompt]\n"
246
+ "# Default user prompt source:\n"
247
+ "# - Option 1: Use a file (recommended)\n"
248
+ f'file = "{str(prompt_file)}"\n'
249
+ "#\n"
250
+ "# - Option 2: Inline prompt (less recommended for long text)\n"
251
+ '# text = "Please transcribe the audio and provide a concise summary in French."\n'
252
+ )
253
+
254
+ if not USER_CONFIG_FILE.exists() or force:
255
+ try:
256
+ USER_CONFIG_FILE.write_text(example_toml, encoding="utf-8")
257
+ except Exception:
258
+ logging.debug("Could not write user config file: %s", USER_CONFIG_FILE)
259
+ return USER_CONFIG_FILE
260
+
261
+
262
+ @dataclass
263
+ class ProviderConfig:
264
+ api_key: str | None = None
265
+
266
+
267
+ @dataclass
268
+ class DefaultsConfig:
269
+ provider: str = "mistral"
270
+ format: str = "opus"
271
+ model: str = "voxtral-mini-latest"
272
+ language: str | None = None
273
+ rate: int = 16000
274
+ channels: int = 1
275
+ device: str | None = None
276
+ keep_audio_files: bool = False
277
+ keep_transcript_files: bool = False
278
+ keep_log_files: bool = False
279
+ copy: bool = True
280
+ log_level: str = "INFO"
281
+ outfile_prefix: str | None = None
282
+
283
+
284
+ @dataclass
285
+ class PromptConfig:
286
+ text: str | None = None
287
+ file: str | None = None
288
+
289
+
290
+ @dataclass
291
+ class Config:
292
+ providers: dict[str, ProviderConfig] = field(default_factory=dict)
293
+ defaults: DefaultsConfig = field(default_factory=DefaultsConfig)
294
+ prompt: PromptConfig = field(default_factory=PromptConfig)
295
+ recordings_dir: Path = RECORDINGS_DIR
296
+ transcripts_dir: Path = TRANSCRIPTS_DIR
297
+ logs_dir: Path = LOGS_DIR
298
+ user_prompt_dir: Path = USER_PROMPT_DIR
299
+ user_config_file: Path = USER_CONFIG_FILE
300
+
301
+ @classmethod
302
+ def load(cls, log_level: str = "INFO") -> Config:
303
+ setup_environment(log_level)
304
+ user_config = load_user_config()
305
+ user_defaults_raw = user_config.get("defaults", {})
306
+ # Coerce defaults
307
+ defaults_data = {
308
+ "provider": str(user_defaults_raw.get("provider", "mistral")),
309
+ "format": str(user_defaults_raw.get("format", "opus")),
310
+ "model": str(user_defaults_raw.get("model", "voxtral-mini-latest")),
311
+ "language": user_defaults_raw.get("language"),
312
+ "rate": int(user_defaults_raw.get("rate", 16000)),
313
+ "channels": int(user_defaults_raw.get("channels", 1)),
314
+ "device": user_defaults_raw.get("device"),
315
+ "keep_audio_files": bool(user_defaults_raw.get("keep_audio_files", False)),
316
+ "keep_transcript_files": bool(user_defaults_raw.get("keep_transcript_files", False)),
317
+ "keep_log_files": bool(user_defaults_raw.get("keep_log_files", False)),
318
+ "copy": bool(user_defaults_raw.get("copy", True)),
319
+ "log_level": str(user_defaults_raw.get("log_level", log_level)),
320
+ "outfile_prefix": user_defaults_raw.get("outfile_prefix"),
321
+ }
322
+ channels = defaults_data["channels"]
323
+ if channels not in (1, 2):
324
+ raise ValueError("channels must be 1 or 2")
325
+ rate = defaults_data["rate"]
326
+ if rate <= 0:
327
+ raise ValueError("rate must be > 0")
328
+ format_ = defaults_data["format"]
329
+ if format_ not in {"wav", "mp3", "opus"}:
330
+ raise ValueError("format must be one of wav|mp3|opus")
331
+ defaults = DefaultsConfig(**defaults_data)
332
+ # Conditional output directories
333
+ if defaults.keep_audio_files:
334
+ RECORDINGS_DIR.mkdir(parents=True, exist_ok=True)
335
+ if defaults.keep_transcript_files:
336
+ TRANSCRIPTS_DIR.mkdir(parents=True, exist_ok=True)
337
+ if defaults.keep_log_files:
338
+ LOGS_DIR.mkdir(parents=True, exist_ok=True)
339
+ # Update logging level to effective (user or CLI fallback)
340
+ root_logger = logging.getLogger()
341
+ root_logger.setLevel(_get_log_level(defaults.log_level))
342
+ # Add file handler if enabled
343
+ if defaults.keep_log_files:
344
+ formatter = logging.Formatter("%(asctime)s | %(levelname)s | %(name)s | %(message)s")
345
+ file_handler = logging.FileHandler(LOGS_DIR / "app.log", encoding="utf-8")
346
+ file_level = _get_log_level(defaults.log_level)
347
+ file_handler.setLevel(file_level)
348
+ file_handler.setFormatter(formatter)
349
+ root_logger.addHandler(file_handler)
350
+ # Providers
351
+ providers_raw = user_config.get("providers", {})
352
+ providers_data = {}
353
+ for name, prov_raw in providers_raw.items():
354
+ if isinstance(prov_raw, dict):
355
+ api_key = str(prov_raw.get("api_key", ""))
356
+ providers_data[name] = ProviderConfig(api_key=api_key)
357
+ # Prompt
358
+ prompt_raw = user_config.get("prompt", {})
359
+ prompt_data = {
360
+ "text": prompt_raw.get("text") if isinstance(prompt_raw.get("text"), str) else None,
361
+ "file": prompt_raw.get("file") if isinstance(prompt_raw.get("file"), str) else None,
362
+ }
363
+ prompt = PromptConfig(**prompt_data)
364
+ data = {
365
+ "defaults": defaults,
366
+ "providers": providers_data,
367
+ "prompt": prompt,
368
+ "recordings_dir": RECORDINGS_DIR,
369
+ "transcripts_dir": TRANSCRIPTS_DIR,
370
+ "logs_dir": LOGS_DIR,
371
+ "user_prompt_dir": USER_PROMPT_DIR,
372
+ "user_config_file": USER_CONFIG_FILE,
373
+ }
374
+ return cls(**data)
375
+
376
+ def resolve_prompt(self, inline: str | None = None, file_path: Path | None = None) -> str:
377
+ from svx.core.prompt import resolve_user_prompt
378
+
379
+ return resolve_user_prompt(self, inline, file_path, self.user_prompt_dir)
380
+
381
+ def get_provider_config(self, name: str) -> dict[str, Any]:
382
+ return asdict(self.providers.get(name, ProviderConfig()))
383
+
384
+
385
+ __all__ = [
386
+ "ROOT_DIR",
387
+ "RECORDINGS_DIR",
388
+ "TRANSCRIPTS_DIR",
389
+ "LOGS_DIR",
390
+ "USER_CONFIG_DIR",
391
+ "USER_PROMPT_DIR",
392
+ "USER_CONFIG_FILE",
393
+ "setup_environment",
394
+ "load_user_config",
395
+ "init_user_config",
396
+ "Config",
397
+ "ProviderConfig",
398
+ "DefaultsConfig",
399
+ "PromptConfig",
400
+ ]
svx/core/pipeline.py ADDED
@@ -0,0 +1,260 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ import tempfile
5
+ import threading
6
+ from collections.abc import Callable
7
+ from logging import FileHandler
8
+ from pathlib import Path
9
+ from typing import Any
10
+
11
+ import svx.core.config as config
12
+ from svx.core.audio import convert_audio, record_wav, timestamp
13
+ from svx.core.clipboard import copy_to_clipboard
14
+ from svx.core.config import Config
15
+ from svx.core.storage import save_transcript
16
+ from svx.providers import get_provider
17
+
18
+
19
+ class RecordingPipeline:
20
+ """
21
+ Centralized pipeline for recording audio, transcribing via provider, saving outputs,
22
+ and copying to clipboard. Handles temporary files when not keeping audio.
23
+
24
+ Supports runtime overrides like save_all for keeping all files and adding log handlers.
25
+ Optional progress_callback for status updates (e.g., for GUI).
26
+ Supports transcribe_mode for pure transcription without prompt using dedicated endpoint.
27
+ """
28
+
29
+ def __init__(
30
+ self,
31
+ cfg: Config,
32
+ user_prompt: str | None = None,
33
+ user_prompt_file: Path | None = None,
34
+ save_all: bool = False,
35
+ outfile_prefix: str | None = None,
36
+ progress_callback: Callable[[str], None] | None = None,
37
+ transcribe_mode: bool = False,
38
+ ) -> None:
39
+ self.cfg = cfg
40
+ self.user_prompt = user_prompt
41
+ self.user_prompt_file = user_prompt_file
42
+ self.save_all = save_all
43
+ self.outfile_prefix = outfile_prefix
44
+ self.progress_callback = progress_callback
45
+ self.transcribe_mode = transcribe_mode
46
+
47
+ def _status(self, msg: str) -> None:
48
+ """Emit status update via callback if provided."""
49
+ if self.progress_callback:
50
+ self.progress_callback(msg)
51
+ logging.info(msg)
52
+
53
+ def _setup_save_all(self) -> None:
54
+ """Apply save_all overrides: set keeps to True, create dirs, add file logging."""
55
+ if not self.save_all:
56
+ return
57
+
58
+ # Override config defaults
59
+ self.cfg.defaults.keep_audio_files = True
60
+ self.cfg.defaults.keep_transcript_files = True
61
+ self.cfg.defaults.keep_log_files = True
62
+
63
+ # Ensure directories
64
+ config.RECORDINGS_DIR.mkdir(parents=True, exist_ok=True)
65
+ config.TRANSCRIPTS_DIR.mkdir(parents=True, exist_ok=True)
66
+ config.LOGS_DIR.mkdir(parents=True, exist_ok=True)
67
+
68
+ # Add file handler if not present
69
+ root_logger = logging.getLogger()
70
+ if not any(isinstance(h, FileHandler) for h in root_logger.handlers): # type: ignore[reportUnknownMemberType]
71
+ from svx.core.config import _get_log_level
72
+
73
+ log_level_int = _get_log_level(self.cfg.defaults.log_level)
74
+ formatter = logging.Formatter("%(asctime)s | %(levelname)s | %(name)s | %(message)s")
75
+ file_handler = logging.FileHandler(config.LOGS_DIR / "app.log", encoding="utf-8")
76
+ file_handler.setLevel(log_level_int)
77
+ file_handler.setFormatter(formatter)
78
+ root_logger.addHandler(file_handler)
79
+ logging.info("File logging enabled for this run")
80
+
81
+ def run(self, stop_event: threading.Event | None = None) -> dict[str, Any]:
82
+ """
83
+ Execute the full pipeline.
84
+
85
+ Args:
86
+ stop_event: Optional event to signal recording stop (e.g., for GUI).
87
+
88
+ Returns:
89
+ Dict with 'text' (str), 'raw' (dict), 'duration' (float),
90
+ 'paths' (dict of Path or None).
91
+
92
+ Raises:
93
+ Exception: On recording, conversion, or transcription errors.
94
+ """
95
+ self._setup_save_all()
96
+
97
+ # Resolve parameters
98
+ provider = self.cfg.defaults.provider
99
+ audio_format = self.cfg.defaults.format
100
+ model = self.cfg.defaults.model
101
+ original_model = model
102
+ if self.transcribe_mode:
103
+ model = "voxtral-mini-latest"
104
+ if original_model != "voxtral-mini-latest":
105
+ logging.warning(
106
+ "Mode Transcribe : modèle override de '%s' vers 'voxtral-mini-latest' "
107
+ "(optimisé pour la transcription).",
108
+ original_model,
109
+ )
110
+ language = self.cfg.defaults.language
111
+ rate = self.cfg.defaults.rate
112
+ channels = self.cfg.defaults.channels
113
+ device = self.cfg.defaults.device
114
+ base = self.outfile_prefix or f"rec_{timestamp()}"
115
+ if self.transcribe_mode:
116
+ final_user_prompt = None
117
+ self._status("Mode Transcribe activated: no prompt used.")
118
+ else:
119
+ final_user_prompt = self.cfg.resolve_prompt(self.user_prompt, self.user_prompt_file)
120
+ keep_audio = self.cfg.defaults.keep_audio_files
121
+ keep_transcript = self.cfg.defaults.keep_transcript_files
122
+ copy_to_clip = self.cfg.defaults.copy
123
+
124
+ # Validation (fail fast)
125
+ if channels not in (1, 2):
126
+ raise ValueError("channels must be 1 or 2")
127
+ if rate <= 0:
128
+ raise ValueError("rate must be > 0")
129
+ if audio_format not in {"wav", "mp3", "opus"}: # noqa: E501
130
+ raise ValueError("format must be one of wav|mp3|opus")
131
+
132
+ paths: dict[str, Path | None] = {}
133
+ stop_for_recording = stop_event or threading.Event()
134
+
135
+ try:
136
+ self._status("Recording...")
137
+ if keep_audio:
138
+ self.cfg.recordings_dir.mkdir(parents=True, exist_ok=True)
139
+ wav_path = self.cfg.recordings_dir / f"{base}.wav"
140
+ duration = record_wav(
141
+ wav_path,
142
+ samplerate=rate,
143
+ channels=channels,
144
+ device=device,
145
+ stop_event=stop_for_recording,
146
+ )
147
+ to_send_path = wav_path
148
+ paths["wav"] = wav_path
149
+ else:
150
+ with tempfile.TemporaryDirectory() as tmpdir:
151
+ tmp_path = Path(tmpdir)
152
+ wav_path = tmp_path / f"{base}.wav"
153
+ duration = record_wav(
154
+ wav_path,
155
+ samplerate=rate,
156
+ channels=channels,
157
+ device=device,
158
+ stop_event=stop_for_recording,
159
+ )
160
+ to_send_path = wav_path
161
+
162
+ # Convert if needed
163
+ if audio_format in {"mp3", "opus"}:
164
+ self._status("Converting...")
165
+ to_send_path = convert_audio(wav_path, audio_format)
166
+ logging.info("Converted %s -> %s", wav_path, to_send_path)
167
+
168
+ # Transcribe
169
+ self._status("Transcribing...")
170
+ prov = get_provider(provider, cfg=self.cfg)
171
+ result = prov.transcribe(
172
+ to_send_path,
173
+ user_prompt=final_user_prompt,
174
+ model=model,
175
+ language=language,
176
+ transcribe_mode=self.transcribe_mode,
177
+ )
178
+ text = result["text"]
179
+ raw = result["raw"]
180
+
181
+ # Save if keeping transcripts
182
+ if keep_transcript:
183
+ self.cfg.transcripts_dir.mkdir(parents=True, exist_ok=True)
184
+ txt_path, json_path = save_transcript(
185
+ self.cfg.transcripts_dir, base, provider, text, raw
186
+ )
187
+ paths["txt"] = txt_path
188
+ paths["json"] = json_path
189
+ else:
190
+ paths["txt"] = None
191
+ paths["json"] = None
192
+
193
+ # Copy to clipboard
194
+ if copy_to_clip:
195
+ try:
196
+ copy_to_clipboard(text)
197
+ logging.info("Copied transcription to clipboard")
198
+ except Exception as e:
199
+ logging.warning("Failed to copy to clipboard: %s", e)
200
+
201
+ logging.info("Pipeline finished (%.2fs)", duration)
202
+ return {
203
+ "text": text,
204
+ "raw": raw,
205
+ "duration": duration,
206
+ "paths": paths,
207
+ }
208
+
209
+ # For keep_audio=True: continue outside tempdir
210
+ # Convert if needed
211
+ if audio_format in {"mp3", "opus"}:
212
+ self._status("Converting...")
213
+ to_send_path = convert_audio(wav_path, audio_format)
214
+ logging.info("Converted %s -> %s", wav_path, to_send_path)
215
+ paths["converted"] = to_send_path
216
+
217
+ # Transcribe
218
+ self._status("Transcribing...")
219
+ prov = get_provider(provider, cfg=self.cfg)
220
+ result = prov.transcribe(
221
+ to_send_path,
222
+ user_prompt=final_user_prompt,
223
+ model=model,
224
+ language=language,
225
+ transcribe_mode=self.transcribe_mode,
226
+ )
227
+ text = result["text"]
228
+ raw = result["raw"]
229
+
230
+ # Save if keeping transcripts
231
+ if keep_transcript:
232
+ self.cfg.transcripts_dir.mkdir(parents=True, exist_ok=True)
233
+ txt_path, json_path = save_transcript(
234
+ self.cfg.transcripts_dir, base, provider, text, raw
235
+ )
236
+ paths["txt"] = txt_path
237
+ paths["json"] = json_path
238
+ else:
239
+ paths["txt"] = None
240
+ paths["json"] = None
241
+
242
+ # Copy to clipboard
243
+ if copy_to_clip:
244
+ try:
245
+ copy_to_clipboard(text)
246
+ logging.info("Copied transcription to clipboard")
247
+ except Exception as e:
248
+ logging.warning("Failed to copy to clipboard: %s", e)
249
+
250
+ logging.info("Pipeline finished (%.2fs)", duration)
251
+ return {
252
+ "text": text,
253
+ "raw": raw,
254
+ "duration": duration,
255
+ "paths": paths,
256
+ }
257
+
258
+ except Exception:
259
+ logging.exception("Pipeline failed")
260
+ raise