supervoxtral 0.1.1__tar.gz → 0.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {supervoxtral-0.1.1 → supervoxtral-0.1.2}/AGENTS.md +2 -2
- {supervoxtral-0.1.1 → supervoxtral-0.1.2}/PKG-INFO +1 -1
- {supervoxtral-0.1.1 → supervoxtral-0.1.2}/README.md +6 -5
- {supervoxtral-0.1.1 → supervoxtral-0.1.2}/notes.md +0 -1
- {supervoxtral-0.1.1 → supervoxtral-0.1.2}/pyproject.toml +1 -1
- supervoxtral-0.1.2/supervoxtral.gif +0 -0
- {supervoxtral-0.1.1 → supervoxtral-0.1.2}/svx/cli.py +5 -1
- supervoxtral-0.1.2/svx/core/pipeline.py +286 -0
- {supervoxtral-0.1.1 → supervoxtral-0.1.2}/svx/ui/qt_app.py +95 -45
- supervoxtral-0.1.1/supervoxtral.png +0 -0
- supervoxtral-0.1.1/svx/core/pipeline.py +0 -260
- {supervoxtral-0.1.1 → supervoxtral-0.1.2}/.gitignore +0 -0
- {supervoxtral-0.1.1 → supervoxtral-0.1.2}/LICENSE +0 -0
- {supervoxtral-0.1.1 → supervoxtral-0.1.2}/logs/.gitkeep +0 -0
- {supervoxtral-0.1.1 → supervoxtral-0.1.2}/macos-shortcut.png +0 -0
- {supervoxtral-0.1.1 → supervoxtral-0.1.2}/prompt/.gitkeep +0 -0
- {supervoxtral-0.1.1 → supervoxtral-0.1.2}/recordings/.gitkeep +0 -0
- {supervoxtral-0.1.1 → supervoxtral-0.1.2}/svx/__init__.py +0 -0
- {supervoxtral-0.1.1 → supervoxtral-0.1.2}/svx/core/__init__.py +0 -0
- {supervoxtral-0.1.1 → supervoxtral-0.1.2}/svx/core/audio.py +0 -0
- {supervoxtral-0.1.1 → supervoxtral-0.1.2}/svx/core/clipboard.py +0 -0
- {supervoxtral-0.1.1 → supervoxtral-0.1.2}/svx/core/config.py +0 -0
- {supervoxtral-0.1.1 → supervoxtral-0.1.2}/svx/core/prompt.py +0 -0
- {supervoxtral-0.1.1 → supervoxtral-0.1.2}/svx/core/storage.py +0 -0
- {supervoxtral-0.1.1 → supervoxtral-0.1.2}/svx/providers/__init__.py +0 -0
- {supervoxtral-0.1.1 → supervoxtral-0.1.2}/svx/providers/base.py +0 -0
- {supervoxtral-0.1.1 → supervoxtral-0.1.2}/svx/providers/mistral.py +0 -0
- {supervoxtral-0.1.1 → supervoxtral-0.1.2}/transcripts/.gitkeep +0 -0
|
@@ -34,9 +34,9 @@ supervoxtral/
|
|
|
34
34
|
|
|
35
35
|
- **Entry**: `svx/cli.py` Typer `record` command parses args (e.g., --prompt, --save-all, --gui, --transcribe).
|
|
36
36
|
- **Config & Prompt**: Load `Config` via `Config.load()` (`core/config.py`); if transcribe_mode, skip prompt resolution; else resolve prompt with `cfg.resolve_prompt()` (`core/prompt.py`).
|
|
37
|
-
- **Pipeline**: Run `RecordingPipeline` (`core/pipeline.py`): record WAV/stop (`core/audio.py`), optional conversion (ffmpeg), get provider/init (`providers/__init__.py`, e.g., `mistral.py` from `cfg`); if transcribe_mode: no prompt, model override to voxtral-mini-latest (with warning if changed), pass transcribe_mode to provider.transcribe; transcribe, conditional save (`core/storage.py` based on `keep_*`/`save_all`), clipboard copy, logging setup.
|
|
37
|
+
- **Pipeline**: Run `RecordingPipeline` (`core/pipeline.py`): record WAV/stop (`core/audio.py`), optional conversion (ffmpeg), get provider/init (`providers/__init__.py`, e.g., `mistral.py` from `cfg`); if transcribe_mode (CLI only): no prompt, model override to voxtral-mini-latest (with warning if changed), pass transcribe_mode to provider.transcribe; for GUI: --transcribe ignored (warning), recording starts immediately, uses modular record()/process()/clean() with dynamic mode (Transcribe: no prompt, model override; Prompt: resolved prompt); transcribe, conditional save (`core/storage.py` based on `keep_*`/`save_all`), clipboard copy, logging setup.
|
|
38
38
|
- **Cleanup**: Temp files auto-deleted (tempfile) if `keep_*=false`; dirs created only if persistence enabled.
|
|
39
|
-
- **End**: Return `{"text": str, "raw": dict, "duration": float, "paths": dict}`; CLI prints result, GUI emits progress/updates via callback.
|
|
39
|
+
- **End**: Return `{"text": str, "raw": dict, "duration": float, "paths": dict}`; CLI prints result, GUI emits progress/updates via callback (buttons: 'Transcribe' for stop/transcribe without prompt; 'Prompt' for stop/use resolved prompt; default 'Prompt' on Esc/close).
|
|
40
40
|
|
|
41
41
|
## Build & test
|
|
42
42
|
```bash
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# supervoxtral
|
|
2
2
|
|
|
3
|
-

|
|
4
4
|
|
|
5
5
|
SuperVoxtral is a lightweight Python CLI/GUI utility for recording microphone audio and integrate with Mistral's Voxtral APIs for transcription or audio-enabled chat.
|
|
6
6
|
|
|
@@ -85,8 +85,8 @@ To get started quickly with SuperVoxtral:
|
|
|
85
85
|
api_key = "your_mistral_api_key_here"
|
|
86
86
|
```
|
|
87
87
|
|
|
88
|
-
3. Launch the GUI
|
|
89
|
-
This opens the minimal GUI, starts recording
|
|
88
|
+
3. Launch the GUI: `svx record --gui`
|
|
89
|
+
This opens the minimal GUI, starts recording immediately; click 'Transcribe' for pure transcription (no prompt) or 'Prompt' for prompted transcription (resolved prompt); --transcribe ignored with warning (results copied to clipboard).
|
|
90
90
|
|
|
91
91
|
### macOS Shortcuts Integration
|
|
92
92
|
|
|
@@ -206,7 +206,7 @@ svx record [OPTIONS]
|
|
|
206
206
|
- `--user-prompt-file PATH` (or `--prompt-file PATH`): Path to a markdown file with the user prompt.
|
|
207
207
|
- `--transcribe`: Enable pure transcription mode (ignores prompts; uses dedicated endpoint).
|
|
208
208
|
- `--outfile-prefix PREFIX`: Custom prefix for output files (default: timestamp).
|
|
209
|
-
- `--gui`: Launch the GUI frontend (respects config and other CLI options).
|
|
209
|
+
- `--gui`: Launch the GUI frontend (interactive: recording starts immediately; buttons 'Transcribe' (pure, no prompt) or 'Prompt' (with resolved prompt); respects config and other CLI options; --transcribe ignored with warning).
|
|
210
210
|
- `--save-all`: Override config to keep audio, transcripts, and logs for this run.
|
|
211
211
|
- `--log-level LEVEL`: Set logging level (DEBUG, INFO, WARNING, ERROR; default: INFO).
|
|
212
212
|
|
|
@@ -218,7 +218,7 @@ svx record [OPTIONS]
|
|
|
218
218
|
- Transcribe only: `svx record --transcribe`
|
|
219
219
|
- No prompt; direct transcription. Add `--save-all` to persist.
|
|
220
220
|
- Launch GUI: `svx record --gui`
|
|
221
|
-
- GUI respects config.toml and CLI flags (e.g., `--gui --save-all`).
|
|
221
|
+
- Interactive mode: recording starts immediately; click 'Transcribe' (pure transcription, no prompt) or 'Prompt' (with resolved prompt); --transcribe ignored with warning. GUI respects config.toml and CLI flags (e.g., `--gui --save-all`).
|
|
222
222
|
|
|
223
223
|
**Prompt Resolution Priority** (for non-transcribe mode):
|
|
224
224
|
1. CLI `--user-prompt` or `--user-prompt-file`
|
|
@@ -228,6 +228,7 @@ svx record [OPTIONS]
|
|
|
228
228
|
|
|
229
229
|
## Changelog
|
|
230
230
|
|
|
231
|
+
- 0.1.2: Interactive mode in GUI (choose transcribe / prompt / cancel while recording)
|
|
231
232
|
- 0.1.1: Minor updates to default config and default prompt
|
|
232
233
|
|
|
233
234
|
## License
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "supervoxtral"
|
|
7
|
-
version = "0.1.
|
|
7
|
+
version = "0.1.2"
|
|
8
8
|
description = "CLI/GUI audio recorder and transcription client using Mistral Voxtral (chat with audio and transcription)."
|
|
9
9
|
requires-python = ">=3.11"
|
|
10
10
|
license = { text = "MIT" }
|
|
Binary file
|
|
@@ -191,6 +191,11 @@ def record(
|
|
|
191
191
|
user_prompt = None
|
|
192
192
|
user_prompt_file = None
|
|
193
193
|
|
|
194
|
+
if gui and transcribe:
|
|
195
|
+
console.print("[yellow]Warning: --transcribe has no effect in GUI mode.[/yellow]")
|
|
196
|
+
console.print("[yellow]Use the 'Transcribe' or 'Prompt' buttons in the interface.[/yellow]")
|
|
197
|
+
transcribe = False
|
|
198
|
+
|
|
194
199
|
# If GUI requested, launch GUI with the resolved parameters and exit.
|
|
195
200
|
if gui:
|
|
196
201
|
from svx.ui.qt_app import run_gui
|
|
@@ -202,7 +207,6 @@ def record(
|
|
|
202
207
|
user_prompt_file=user_prompt_file,
|
|
203
208
|
save_all=save_all,
|
|
204
209
|
outfile_prefix=outfile_prefix,
|
|
205
|
-
transcribe_mode=transcribe,
|
|
206
210
|
)
|
|
207
211
|
return
|
|
208
212
|
|
|
@@ -0,0 +1,286 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import tempfile
|
|
5
|
+
import threading
|
|
6
|
+
from collections.abc import Callable
|
|
7
|
+
from logging import FileHandler
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
import svx.core.config as config
|
|
12
|
+
from svx.core.audio import convert_audio, record_wav, timestamp
|
|
13
|
+
from svx.core.clipboard import copy_to_clipboard
|
|
14
|
+
from svx.core.config import Config
|
|
15
|
+
from svx.core.storage import save_transcript
|
|
16
|
+
from svx.providers import get_provider
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class RecordingPipeline:
|
|
20
|
+
"""
|
|
21
|
+
Centralized pipeline for recording audio, transcribing via provider, saving outputs,
|
|
22
|
+
and copying to clipboard. Handles temporary files when not keeping audio.
|
|
23
|
+
|
|
24
|
+
Supports runtime overrides like save_all for keeping all files and adding log handlers.
|
|
25
|
+
Optional progress_callback for status updates (e.g., for GUI).
|
|
26
|
+
Supports transcribe_mode for pure transcription without prompt using dedicated endpoint.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
def __init__(
|
|
30
|
+
self,
|
|
31
|
+
cfg: Config,
|
|
32
|
+
user_prompt: str | None = None,
|
|
33
|
+
user_prompt_file: Path | None = None,
|
|
34
|
+
save_all: bool = False,
|
|
35
|
+
outfile_prefix: str | None = None,
|
|
36
|
+
progress_callback: Callable[[str], None] | None = None,
|
|
37
|
+
transcribe_mode: bool = False,
|
|
38
|
+
) -> None:
|
|
39
|
+
self.cfg = cfg
|
|
40
|
+
self.user_prompt = user_prompt
|
|
41
|
+
self.user_prompt_file = user_prompt_file
|
|
42
|
+
self.save_all = save_all
|
|
43
|
+
self.outfile_prefix = outfile_prefix
|
|
44
|
+
self.progress_callback = progress_callback
|
|
45
|
+
self.transcribe_mode = transcribe_mode
|
|
46
|
+
|
|
47
|
+
def _status(self, msg: str) -> None:
|
|
48
|
+
"""Emit status update via callback if provided."""
|
|
49
|
+
if self.progress_callback:
|
|
50
|
+
self.progress_callback(msg)
|
|
51
|
+
logging.info(msg)
|
|
52
|
+
|
|
53
|
+
def record(self, stop_event: threading.Event | None = None) -> tuple[Path, float]:
|
|
54
|
+
"""
|
|
55
|
+
Record audio and return wav_path, duration.
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
tuple[Path, float]: wav_path, duration.
|
|
59
|
+
"""
|
|
60
|
+
# Resolve parameters
|
|
61
|
+
_provider = self.cfg.defaults.provider
|
|
62
|
+
audio_format = self.cfg.defaults.format
|
|
63
|
+
model = self.cfg.defaults.model
|
|
64
|
+
_original_model = model
|
|
65
|
+
_language = self.cfg.defaults.language
|
|
66
|
+
rate = self.cfg.defaults.rate
|
|
67
|
+
channels = self.cfg.defaults.channels
|
|
68
|
+
device = self.cfg.defaults.device
|
|
69
|
+
base = self.outfile_prefix or f"rec_{timestamp()}"
|
|
70
|
+
keep_audio = self.save_all or self.cfg.defaults.keep_audio_files
|
|
71
|
+
|
|
72
|
+
# Validation (fail fast)
|
|
73
|
+
if channels not in (1, 2):
|
|
74
|
+
raise ValueError("channels must be 1 or 2")
|
|
75
|
+
if rate <= 0:
|
|
76
|
+
raise ValueError("rate must be > 0")
|
|
77
|
+
if audio_format not in {"wav", "mp3", "opus"}:
|
|
78
|
+
raise ValueError("format must be one of wav|mp3|opus")
|
|
79
|
+
|
|
80
|
+
stop_for_recording = stop_event or threading.Event()
|
|
81
|
+
|
|
82
|
+
self._status("Recording...")
|
|
83
|
+
if keep_audio:
|
|
84
|
+
self.cfg.recordings_dir.mkdir(parents=True, exist_ok=True)
|
|
85
|
+
wav_path = self.cfg.recordings_dir / f"{base}.wav"
|
|
86
|
+
duration = record_wav(
|
|
87
|
+
wav_path,
|
|
88
|
+
samplerate=rate,
|
|
89
|
+
channels=channels,
|
|
90
|
+
device=device,
|
|
91
|
+
stop_event=stop_for_recording,
|
|
92
|
+
)
|
|
93
|
+
else:
|
|
94
|
+
# Use mktemp for temp wav_path
|
|
95
|
+
wav_path = Path(tempfile.mktemp(suffix=".wav"))
|
|
96
|
+
duration = record_wav(
|
|
97
|
+
wav_path,
|
|
98
|
+
samplerate=rate,
|
|
99
|
+
channels=channels,
|
|
100
|
+
device=device,
|
|
101
|
+
stop_event=stop_for_recording,
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
self._status("Recording completed.")
|
|
105
|
+
return wav_path, duration
|
|
106
|
+
|
|
107
|
+
def _setup_save_all(self) -> None:
|
|
108
|
+
"""Apply save_all overrides: set keeps to True, create dirs, add file logging."""
|
|
109
|
+
if not self.save_all:
|
|
110
|
+
return
|
|
111
|
+
|
|
112
|
+
# Override config defaults
|
|
113
|
+
self.cfg.defaults.keep_audio_files = True
|
|
114
|
+
self.cfg.defaults.keep_transcript_files = True
|
|
115
|
+
self.cfg.defaults.keep_log_files = True
|
|
116
|
+
|
|
117
|
+
# Ensure directories
|
|
118
|
+
config.RECORDINGS_DIR.mkdir(parents=True, exist_ok=True)
|
|
119
|
+
config.TRANSCRIPTS_DIR.mkdir(parents=True, exist_ok=True)
|
|
120
|
+
config.LOGS_DIR.mkdir(parents=True, exist_ok=True)
|
|
121
|
+
|
|
122
|
+
# Add file handler if not present
|
|
123
|
+
root_logger = logging.getLogger()
|
|
124
|
+
if not any(isinstance(h, FileHandler) for h in root_logger.handlers): # type: ignore[reportUnknownMemberType]
|
|
125
|
+
from svx.core.config import _get_log_level
|
|
126
|
+
|
|
127
|
+
log_level_int = _get_log_level(self.cfg.defaults.log_level)
|
|
128
|
+
formatter = logging.Formatter("%(asctime)s | %(levelname)s | %(name)s | %(message)s")
|
|
129
|
+
file_handler = logging.FileHandler(config.LOGS_DIR / "app.log", encoding="utf-8")
|
|
130
|
+
file_handler.setLevel(log_level_int)
|
|
131
|
+
file_handler.setFormatter(formatter)
|
|
132
|
+
root_logger.addHandler(file_handler)
|
|
133
|
+
logging.info("File logging enabled for this run")
|
|
134
|
+
|
|
135
|
+
def process(
|
|
136
|
+
self, wav_path: Path, duration: float, transcribe_mode: bool, user_prompt: str | None = None
|
|
137
|
+
) -> dict[str, Any]:
|
|
138
|
+
"""
|
|
139
|
+
Process recorded audio: convert if needed, transcribe, save, copy.
|
|
140
|
+
|
|
141
|
+
Args:
|
|
142
|
+
wav_path: Path to the recorded WAV file.
|
|
143
|
+
duration: Recording duration in seconds.
|
|
144
|
+
transcribe_mode: Whether to use pure transcription mode.
|
|
145
|
+
user_prompt: User prompt to use (None for transcribe_mode).
|
|
146
|
+
|
|
147
|
+
Returns:
|
|
148
|
+
Dict with 'text' (str), 'raw' (dict), 'duration' (float),
|
|
149
|
+
'paths' (dict of Path or None).
|
|
150
|
+
"""
|
|
151
|
+
# Resolve parameters
|
|
152
|
+
provider = self.cfg.defaults.provider
|
|
153
|
+
audio_format = self.cfg.defaults.format
|
|
154
|
+
model = self.cfg.defaults.model
|
|
155
|
+
original_model = model
|
|
156
|
+
if transcribe_mode:
|
|
157
|
+
model = "voxtral-mini-latest"
|
|
158
|
+
if original_model != "voxtral-mini-latest":
|
|
159
|
+
logging.warning(
|
|
160
|
+
"Transcribe mode: model override from '%s' to 'voxtral-mini-latest'\n"
|
|
161
|
+
"(optimized for transcription).",
|
|
162
|
+
original_model,
|
|
163
|
+
)
|
|
164
|
+
language = self.cfg.defaults.language
|
|
165
|
+
if wav_path.stem.endswith(".wav"):
|
|
166
|
+
base = wav_path.stem.replace(".wav", "")
|
|
167
|
+
else:
|
|
168
|
+
base = wav_path.stem
|
|
169
|
+
keep_transcript = self.save_all or self.cfg.defaults.keep_transcript_files
|
|
170
|
+
copy_to_clip = self.cfg.defaults.copy
|
|
171
|
+
|
|
172
|
+
# Resolve user prompt if not provided
|
|
173
|
+
final_user_prompt = None
|
|
174
|
+
if not transcribe_mode:
|
|
175
|
+
if user_prompt is None:
|
|
176
|
+
final_user_prompt = self.cfg.resolve_prompt(self.user_prompt, self.user_prompt_file)
|
|
177
|
+
else:
|
|
178
|
+
final_user_prompt = user_prompt
|
|
179
|
+
self._status("Transcribe mode not activated: using prompt.")
|
|
180
|
+
else:
|
|
181
|
+
self._status("Transcribe mode activated: no prompt used.")
|
|
182
|
+
|
|
183
|
+
paths: dict[str, Path | None] = {"wav": wav_path}
|
|
184
|
+
|
|
185
|
+
# Convert if needed
|
|
186
|
+
to_send_path = wav_path
|
|
187
|
+
_converted = False
|
|
188
|
+
if audio_format in {"mp3", "opus"}:
|
|
189
|
+
self._status("Converting...")
|
|
190
|
+
to_send_path = convert_audio(wav_path, audio_format)
|
|
191
|
+
logging.info("Converted %s -> %s", wav_path, to_send_path)
|
|
192
|
+
paths["converted"] = to_send_path
|
|
193
|
+
_converted = True
|
|
194
|
+
|
|
195
|
+
# Transcribe
|
|
196
|
+
self._status("Transcribing...")
|
|
197
|
+
prov = get_provider(provider, cfg=self.cfg)
|
|
198
|
+
result = prov.transcribe(
|
|
199
|
+
to_send_path,
|
|
200
|
+
user_prompt=final_user_prompt,
|
|
201
|
+
model=model,
|
|
202
|
+
language=language,
|
|
203
|
+
transcribe_mode=transcribe_mode,
|
|
204
|
+
)
|
|
205
|
+
text = result["text"]
|
|
206
|
+
raw = result["raw"]
|
|
207
|
+
|
|
208
|
+
# Save if keeping transcripts
|
|
209
|
+
if keep_transcript:
|
|
210
|
+
self.cfg.transcripts_dir.mkdir(parents=True, exist_ok=True)
|
|
211
|
+
txt_path, json_path = save_transcript(
|
|
212
|
+
self.cfg.transcripts_dir, base, provider, text, raw
|
|
213
|
+
)
|
|
214
|
+
paths["txt"] = txt_path
|
|
215
|
+
paths["json"] = json_path
|
|
216
|
+
else:
|
|
217
|
+
paths["txt"] = None
|
|
218
|
+
paths["json"] = None
|
|
219
|
+
|
|
220
|
+
# Copy to clipboard
|
|
221
|
+
if copy_to_clip:
|
|
222
|
+
try:
|
|
223
|
+
copy_to_clipboard(text)
|
|
224
|
+
logging.info("Copied transcription to clipboard")
|
|
225
|
+
except Exception as e:
|
|
226
|
+
logging.warning("Failed to copy to clipboard: %s", e)
|
|
227
|
+
|
|
228
|
+
logging.info("Processing finished (%.2fs)", duration)
|
|
229
|
+
return {
|
|
230
|
+
"text": text,
|
|
231
|
+
"raw": raw,
|
|
232
|
+
"duration": duration,
|
|
233
|
+
"paths": paths,
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
def clean(self, wav_path: Path, paths: dict[str, Path | None], keep_audio: bool) -> None:
|
|
237
|
+
"""
|
|
238
|
+
Clean up temporary files.
|
|
239
|
+
|
|
240
|
+
Args:
|
|
241
|
+
wav_path: The original WAV path.
|
|
242
|
+
paths: The paths dict from process().
|
|
243
|
+
keep_audio: Whether to keep audio files (if True, no deletion).
|
|
244
|
+
"""
|
|
245
|
+
if not keep_audio and wav_path.exists():
|
|
246
|
+
wav_path.unlink()
|
|
247
|
+
logging.info("Deleted temp WAV: %s", wav_path)
|
|
248
|
+
|
|
249
|
+
if "converted" in paths and paths["converted"] and paths["converted"] != wav_path:
|
|
250
|
+
if paths["converted"].exists():
|
|
251
|
+
paths["converted"].unlink()
|
|
252
|
+
logging.info("Deleted temp converted: %s", paths["converted"])
|
|
253
|
+
|
|
254
|
+
self._status("Cleanup completed.")
|
|
255
|
+
|
|
256
|
+
def run(self, stop_event: threading.Event | None = None) -> dict[str, Any]:
|
|
257
|
+
"""
|
|
258
|
+
Execute the full pipeline.
|
|
259
|
+
|
|
260
|
+
Args:
|
|
261
|
+
stop_event: Optional event to signal recording stop (e.g., for GUI).
|
|
262
|
+
|
|
263
|
+
Returns:
|
|
264
|
+
Dict with 'text' (str), 'raw' (dict), 'duration' (float),
|
|
265
|
+
'paths' (dict of Path or None).
|
|
266
|
+
|
|
267
|
+
Raises:
|
|
268
|
+
Exception: On recording, conversion, or transcription errors.
|
|
269
|
+
"""
|
|
270
|
+
self._setup_save_all()
|
|
271
|
+
|
|
272
|
+
wav_path, duration = self.record(stop_event)
|
|
273
|
+
keep_audio = self.save_all or self.cfg.defaults.keep_audio_files
|
|
274
|
+
|
|
275
|
+
if self.transcribe_mode:
|
|
276
|
+
final_user_prompt = None
|
|
277
|
+
self._status("Mode Transcribe activated: no prompt used.")
|
|
278
|
+
else:
|
|
279
|
+
final_user_prompt = self.cfg.resolve_prompt(self.user_prompt, self.user_prompt_file)
|
|
280
|
+
|
|
281
|
+
result = self.process(wav_path, duration, self.transcribe_mode, final_user_prompt)
|
|
282
|
+
|
|
283
|
+
self.clean(wav_path, result["paths"], keep_audio=keep_audio)
|
|
284
|
+
|
|
285
|
+
logging.info("Pipeline finished (%.2fs)", duration)
|
|
286
|
+
return result
|
|
@@ -19,12 +19,14 @@ from __future__ import annotations
|
|
|
19
19
|
|
|
20
20
|
import logging
|
|
21
21
|
import threading
|
|
22
|
+
import time
|
|
22
23
|
from pathlib import Path
|
|
23
24
|
|
|
24
25
|
from PySide6.QtCore import QObject, QPoint, Qt, QTimer, Signal
|
|
25
26
|
from PySide6.QtGui import QAction, QFont, QFontDatabase, QKeySequence
|
|
26
27
|
from PySide6.QtWidgets import (
|
|
27
28
|
QApplication,
|
|
29
|
+
QHBoxLayout,
|
|
28
30
|
QLabel,
|
|
29
31
|
QMessageBox,
|
|
30
32
|
QPushButton,
|
|
@@ -79,6 +81,18 @@ QPushButton:hover {
|
|
|
79
81
|
background-color: #2a78ff;
|
|
80
82
|
}
|
|
81
83
|
|
|
84
|
+
/* Cancel button */
|
|
85
|
+
QPushButton#cancel_btn {
|
|
86
|
+
background-color: #da3633;
|
|
87
|
+
}
|
|
88
|
+
QPushButton#cancel_btn:hover {
|
|
89
|
+
background-color: #f85149;
|
|
90
|
+
}
|
|
91
|
+
QPushButton#cancel_btn:disabled {
|
|
92
|
+
background-color: #8b0000;
|
|
93
|
+
color: #9fb8e6;
|
|
94
|
+
}
|
|
95
|
+
|
|
82
96
|
/* Small window border effect (subtle) */
|
|
83
97
|
QWidget#recorder_window {
|
|
84
98
|
border: 1px solid #203040;
|
|
@@ -189,12 +203,12 @@ class RecorderWorker(QObject):
|
|
|
189
203
|
status (str): human-readable status updates for the UI.
|
|
190
204
|
done (str): emitted with the final transcription text on success.
|
|
191
205
|
error (str): emitted with an error message on failure.
|
|
192
|
-
Supports transcribe_mode for pure transcription without prompt.
|
|
193
206
|
"""
|
|
194
207
|
|
|
195
208
|
status = Signal(str)
|
|
196
209
|
done = Signal(str)
|
|
197
210
|
error = Signal(str)
|
|
211
|
+
canceled = Signal()
|
|
198
212
|
|
|
199
213
|
def __init__(
|
|
200
214
|
self,
|
|
@@ -203,7 +217,6 @@ class RecorderWorker(QObject):
|
|
|
203
217
|
user_prompt_file: Path | None = None,
|
|
204
218
|
save_all: bool = False,
|
|
205
219
|
outfile_prefix: str | None = None,
|
|
206
|
-
transcribe_mode: bool = False,
|
|
207
220
|
) -> None:
|
|
208
221
|
super().__init__()
|
|
209
222
|
self.cfg = cfg
|
|
@@ -211,13 +224,21 @@ class RecorderWorker(QObject):
|
|
|
211
224
|
self.user_prompt_file = user_prompt_file
|
|
212
225
|
self.save_all = save_all
|
|
213
226
|
self.outfile_prefix = outfile_prefix
|
|
214
|
-
self.
|
|
227
|
+
self.mode: str | None = None
|
|
228
|
+
self.cancel_requested: bool = False
|
|
215
229
|
self._stop_event = threading.Event()
|
|
216
230
|
|
|
231
|
+
def set_mode(self, mode: str) -> None:
|
|
232
|
+
self.mode = mode
|
|
233
|
+
|
|
217
234
|
def stop(self) -> None:
|
|
218
235
|
"""Request the recording to stop."""
|
|
219
236
|
self._stop_event.set()
|
|
220
237
|
|
|
238
|
+
def cancel(self) -> None:
|
|
239
|
+
self.cancel_requested = True
|
|
240
|
+
self._stop_event.set()
|
|
241
|
+
|
|
221
242
|
def _resolve_user_prompt(self) -> str:
|
|
222
243
|
"""
|
|
223
244
|
Determine the final user prompt using the shared resolver.
|
|
@@ -227,14 +248,12 @@ class RecorderWorker(QObject):
|
|
|
227
248
|
def run(self) -> None:
|
|
228
249
|
"""
|
|
229
250
|
Execute the pipeline:
|
|
230
|
-
-
|
|
231
|
-
-
|
|
232
|
-
-
|
|
233
|
-
-
|
|
234
|
-
- copy_to_clipboard
|
|
235
|
-
- optionally delete audio files
|
|
236
|
-
Supports transcribe_mode for pure transcription without prompt.
|
|
251
|
+
- record (until stop)
|
|
252
|
+
- wait for mode
|
|
253
|
+
- process
|
|
254
|
+
- clean
|
|
237
255
|
"""
|
|
256
|
+
|
|
238
257
|
try:
|
|
239
258
|
pipeline = RecordingPipeline(
|
|
240
259
|
cfg=self.cfg,
|
|
@@ -242,10 +261,24 @@ class RecorderWorker(QObject):
|
|
|
242
261
|
user_prompt_file=self.user_prompt_file,
|
|
243
262
|
save_all=self.save_all,
|
|
244
263
|
outfile_prefix=self.outfile_prefix,
|
|
245
|
-
transcribe_mode=self.transcribe_mode,
|
|
246
264
|
progress_callback=self.status.emit,
|
|
247
265
|
)
|
|
248
|
-
|
|
266
|
+
self.status.emit("Recording in progress...")
|
|
267
|
+
wav_path, duration = pipeline.record(self._stop_event)
|
|
268
|
+
self.status.emit("Recording finished.")
|
|
269
|
+
if self.cancel_requested:
|
|
270
|
+
keep_audio = self.save_all or self.cfg.defaults.keep_audio_files
|
|
271
|
+
pipeline.clean(wav_path, {"wav": wav_path}, keep_audio)
|
|
272
|
+
self.canceled.emit()
|
|
273
|
+
return
|
|
274
|
+
self.status.emit("Processing in progress...")
|
|
275
|
+
while self.mode is None:
|
|
276
|
+
time.sleep(0.05)
|
|
277
|
+
transcribe_mode = self.mode == "transcribe"
|
|
278
|
+
user_prompt = None if transcribe_mode else self._resolve_user_prompt()
|
|
279
|
+
result = pipeline.process(wav_path, duration, transcribe_mode, user_prompt)
|
|
280
|
+
keep_audio = self.save_all or self.cfg.defaults.keep_audio_files
|
|
281
|
+
pipeline.clean(wav_path, result["paths"], keep_audio)
|
|
249
282
|
self.done.emit(result["text"])
|
|
250
283
|
except Exception as e:
|
|
251
284
|
logging.exception("Pipeline failed")
|
|
@@ -254,13 +287,12 @@ class RecorderWorker(QObject):
|
|
|
254
287
|
|
|
255
288
|
class RecorderWindow(QWidget):
|
|
256
289
|
"""
|
|
257
|
-
Frameless always-on-top window with
|
|
290
|
+
Frameless always-on-top window with Transcribe and Prompt buttons.
|
|
258
291
|
|
|
259
292
|
Launching this window will immediately start the recording in a background thread.
|
|
260
293
|
|
|
261
294
|
Window can be dragged by clicking anywhere on the widget background.
|
|
262
|
-
Pressing Esc triggers
|
|
263
|
-
Supports transcribe_mode for pure transcription without prompt.
|
|
295
|
+
Pressing Esc triggers Prompt mode.
|
|
264
296
|
"""
|
|
265
297
|
|
|
266
298
|
def __init__(
|
|
@@ -270,7 +302,6 @@ class RecorderWindow(QWidget):
|
|
|
270
302
|
user_prompt_file: Path | None = None,
|
|
271
303
|
save_all: bool = False,
|
|
272
304
|
outfile_prefix: str | None = None,
|
|
273
|
-
transcribe_mode: bool = False,
|
|
274
305
|
) -> None:
|
|
275
306
|
super().__init__()
|
|
276
307
|
|
|
@@ -279,7 +310,16 @@ class RecorderWindow(QWidget):
|
|
|
279
310
|
self.user_prompt_file = user_prompt_file
|
|
280
311
|
self.save_all = save_all
|
|
281
312
|
self.outfile_prefix = outfile_prefix
|
|
282
|
-
|
|
313
|
+
|
|
314
|
+
# Background worker (create early for signal connections)
|
|
315
|
+
self._worker = RecorderWorker(
|
|
316
|
+
cfg=self.cfg,
|
|
317
|
+
user_prompt=user_prompt,
|
|
318
|
+
user_prompt_file=user_prompt_file,
|
|
319
|
+
save_all=save_all,
|
|
320
|
+
outfile_prefix=outfile_prefix,
|
|
321
|
+
)
|
|
322
|
+
self._thread = threading.Thread(target=self._worker.run, daemon=True)
|
|
283
323
|
|
|
284
324
|
# Environment and prompt files
|
|
285
325
|
|
|
@@ -313,14 +353,9 @@ class RecorderWindow(QWidget):
|
|
|
313
353
|
"</span>"
|
|
314
354
|
)
|
|
315
355
|
format_html = f"<span style='color:#ffa657'>{self.cfg.defaults.format}</span>"
|
|
316
|
-
if self.transcribe_mode:
|
|
317
|
-
mode_html = "<span style='color:#ff7b72'>Transcribe</span>"
|
|
318
|
-
else:
|
|
319
|
-
mode_html = "<span style='color:#7ee787'>Completion</span>"
|
|
320
356
|
parts = [
|
|
321
357
|
prov_model_html,
|
|
322
358
|
format_html,
|
|
323
|
-
mode_html,
|
|
324
359
|
]
|
|
325
360
|
if self.cfg.defaults.language:
|
|
326
361
|
lang_html = f"<span style='color:#c9b4ff'>{self.cfg.defaults.language}</span>"
|
|
@@ -337,34 +372,42 @@ class RecorderWindow(QWidget):
|
|
|
337
372
|
self._info_label.setAlignment(Qt.AlignmentFlag.AlignCenter)
|
|
338
373
|
layout.addWidget(self._info_label)
|
|
339
374
|
|
|
340
|
-
self._status_label = QLabel("Recording
|
|
375
|
+
self._status_label = QLabel("Recording in progress...")
|
|
341
376
|
self._status_label.setAlignment(Qt.AlignmentFlag.AlignCenter)
|
|
342
377
|
layout.addWidget(self._status_label)
|
|
343
378
|
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
379
|
+
# Buttons layout
|
|
380
|
+
button_layout = QHBoxLayout()
|
|
381
|
+
button_layout.addStretch()
|
|
382
|
+
self._transcribe_btn = QPushButton("Transcribe")
|
|
383
|
+
self._transcribe_btn.setToolTip("Stop and transcribe without prompt")
|
|
384
|
+
self._transcribe_btn.clicked.connect(lambda: self._on_button_clicked("transcribe"))
|
|
385
|
+
button_layout.addWidget(self._transcribe_btn)
|
|
386
|
+
self._prompt_btn = QPushButton("Prompt")
|
|
387
|
+
self._prompt_btn.setToolTip("Stop and transcribe with prompt")
|
|
388
|
+
self._prompt_btn.clicked.connect(lambda: self._on_button_clicked("prompt"))
|
|
389
|
+
button_layout.addWidget(self._prompt_btn)
|
|
390
|
+
self._cancel_btn = QPushButton("Cancel")
|
|
391
|
+
self._cancel_btn.setObjectName("cancel_btn")
|
|
392
|
+
self._cancel_btn.setToolTip("Stop recording and quit without processing")
|
|
393
|
+
self._cancel_btn.clicked.connect(self._on_cancel_clicked)
|
|
394
|
+
button_layout.addWidget(self._cancel_btn)
|
|
395
|
+
button_layout.addStretch()
|
|
396
|
+
button_widget = QWidget()
|
|
397
|
+
button_widget.setLayout(button_layout)
|
|
398
|
+
layout.addWidget(button_widget, 0, Qt.AlignmentFlag.AlignCenter)
|
|
347
399
|
|
|
348
400
|
# Keyboard shortcut: Esc to stop
|
|
349
401
|
stop_action = QAction(self)
|
|
350
402
|
stop_action.setShortcut(QKeySequence.StandardKey.Cancel) # Esc
|
|
351
|
-
stop_action.triggered.connect(self.
|
|
403
|
+
stop_action.triggered.connect(lambda: self._worker.cancel())
|
|
352
404
|
self.addAction(stop_action)
|
|
353
405
|
|
|
354
|
-
# Background worker
|
|
355
|
-
self._worker = RecorderWorker(
|
|
356
|
-
cfg=self.cfg,
|
|
357
|
-
user_prompt=user_prompt,
|
|
358
|
-
user_prompt_file=user_prompt_file,
|
|
359
|
-
save_all=save_all,
|
|
360
|
-
outfile_prefix=outfile_prefix,
|
|
361
|
-
)
|
|
362
|
-
self._thread = threading.Thread(target=self._worker.run, daemon=True)
|
|
363
|
-
|
|
364
406
|
# Signals wiring
|
|
365
407
|
self._worker.status.connect(self._on_status)
|
|
366
408
|
self._worker.done.connect(self._on_done)
|
|
367
409
|
self._worker.error.connect(self._on_error)
|
|
410
|
+
self._worker.canceled.connect(self._close_soon)
|
|
368
411
|
|
|
369
412
|
# Apply stylesheet to the application for consistent appearance
|
|
370
413
|
app = QApplication.instance()
|
|
@@ -410,14 +453,24 @@ class RecorderWindow(QWidget):
|
|
|
410
453
|
|
|
411
454
|
def closeEvent(self, event) -> None: # type: ignore[override]
|
|
412
455
|
# Attempt to stop recording if the user closes the window via window controls.
|
|
413
|
-
self._worker.
|
|
456
|
+
self._worker.cancel()
|
|
414
457
|
super().closeEvent(event)
|
|
415
458
|
|
|
416
|
-
def
|
|
417
|
-
self.
|
|
418
|
-
self.
|
|
459
|
+
def _on_button_clicked(self, mode: str) -> None:
|
|
460
|
+
self._transcribe_btn.setEnabled(False)
|
|
461
|
+
self._prompt_btn.setEnabled(False)
|
|
462
|
+
self._cancel_btn.setEnabled(False)
|
|
463
|
+
self._status_label.setText("Stopping and processing...")
|
|
464
|
+
self._worker.set_mode(mode)
|
|
419
465
|
self._worker.stop()
|
|
420
466
|
|
|
467
|
+
def _on_cancel_clicked(self) -> None:
|
|
468
|
+
self._transcribe_btn.setEnabled(False)
|
|
469
|
+
self._prompt_btn.setEnabled(False)
|
|
470
|
+
self._cancel_btn.setEnabled(False)
|
|
471
|
+
self._status_label.setText("Canceling...")
|
|
472
|
+
self._worker.cancel()
|
|
473
|
+
|
|
421
474
|
# --- Drag handling for frameless window ---
|
|
422
475
|
def mousePressEvent(self, event) -> None: # type: ignore[override]
|
|
423
476
|
if event.button() == Qt.MouseButton.LeftButton:
|
|
@@ -447,7 +500,7 @@ class RecorderWindow(QWidget):
|
|
|
447
500
|
def keyPressEvent(self, event) -> None: # type: ignore[override]
|
|
448
501
|
# Qt.Key_Escape is a safety stop
|
|
449
502
|
if event.key() == Qt.Key.Key_Escape:
|
|
450
|
-
self.
|
|
503
|
+
self._worker.cancel()
|
|
451
504
|
else:
|
|
452
505
|
super().keyPressEvent(event)
|
|
453
506
|
|
|
@@ -458,14 +511,12 @@ def run_gui(
|
|
|
458
511
|
user_prompt_file: Path | None = None,
|
|
459
512
|
save_all: bool = False,
|
|
460
513
|
outfile_prefix: str | None = None,
|
|
461
|
-
transcribe_mode: bool = False,
|
|
462
514
|
log_level: str = "INFO",
|
|
463
515
|
) -> None:
|
|
464
516
|
if cfg is None:
|
|
465
517
|
cfg = Config.load(log_level=log_level)
|
|
466
518
|
"""
|
|
467
519
|
Launch the PySide6 app with the minimal recorder window.
|
|
468
|
-
Supports transcribe_mode for pure transcription without prompt.
|
|
469
520
|
"""
|
|
470
521
|
config.setup_environment(log_level=log_level)
|
|
471
522
|
|
|
@@ -485,7 +536,6 @@ def run_gui(
|
|
|
485
536
|
user_prompt_file=user_prompt_file,
|
|
486
537
|
save_all=save_all,
|
|
487
538
|
outfile_prefix=outfile_prefix,
|
|
488
|
-
transcribe_mode=transcribe_mode,
|
|
489
539
|
)
|
|
490
540
|
window.show()
|
|
491
541
|
app.exec()
|
|
Binary file
|
|
@@ -1,260 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import logging
|
|
4
|
-
import tempfile
|
|
5
|
-
import threading
|
|
6
|
-
from collections.abc import Callable
|
|
7
|
-
from logging import FileHandler
|
|
8
|
-
from pathlib import Path
|
|
9
|
-
from typing import Any
|
|
10
|
-
|
|
11
|
-
import svx.core.config as config
|
|
12
|
-
from svx.core.audio import convert_audio, record_wav, timestamp
|
|
13
|
-
from svx.core.clipboard import copy_to_clipboard
|
|
14
|
-
from svx.core.config import Config
|
|
15
|
-
from svx.core.storage import save_transcript
|
|
16
|
-
from svx.providers import get_provider
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
class RecordingPipeline:
|
|
20
|
-
"""
|
|
21
|
-
Centralized pipeline for recording audio, transcribing via provider, saving outputs,
|
|
22
|
-
and copying to clipboard. Handles temporary files when not keeping audio.
|
|
23
|
-
|
|
24
|
-
Supports runtime overrides like save_all for keeping all files and adding log handlers.
|
|
25
|
-
Optional progress_callback for status updates (e.g., for GUI).
|
|
26
|
-
Supports transcribe_mode for pure transcription without prompt using dedicated endpoint.
|
|
27
|
-
"""
|
|
28
|
-
|
|
29
|
-
def __init__(
|
|
30
|
-
self,
|
|
31
|
-
cfg: Config,
|
|
32
|
-
user_prompt: str | None = None,
|
|
33
|
-
user_prompt_file: Path | None = None,
|
|
34
|
-
save_all: bool = False,
|
|
35
|
-
outfile_prefix: str | None = None,
|
|
36
|
-
progress_callback: Callable[[str], None] | None = None,
|
|
37
|
-
transcribe_mode: bool = False,
|
|
38
|
-
) -> None:
|
|
39
|
-
self.cfg = cfg
|
|
40
|
-
self.user_prompt = user_prompt
|
|
41
|
-
self.user_prompt_file = user_prompt_file
|
|
42
|
-
self.save_all = save_all
|
|
43
|
-
self.outfile_prefix = outfile_prefix
|
|
44
|
-
self.progress_callback = progress_callback
|
|
45
|
-
self.transcribe_mode = transcribe_mode
|
|
46
|
-
|
|
47
|
-
def _status(self, msg: str) -> None:
|
|
48
|
-
"""Emit status update via callback if provided."""
|
|
49
|
-
if self.progress_callback:
|
|
50
|
-
self.progress_callback(msg)
|
|
51
|
-
logging.info(msg)
|
|
52
|
-
|
|
53
|
-
def _setup_save_all(self) -> None:
|
|
54
|
-
"""Apply save_all overrides: set keeps to True, create dirs, add file logging."""
|
|
55
|
-
if not self.save_all:
|
|
56
|
-
return
|
|
57
|
-
|
|
58
|
-
# Override config defaults
|
|
59
|
-
self.cfg.defaults.keep_audio_files = True
|
|
60
|
-
self.cfg.defaults.keep_transcript_files = True
|
|
61
|
-
self.cfg.defaults.keep_log_files = True
|
|
62
|
-
|
|
63
|
-
# Ensure directories
|
|
64
|
-
config.RECORDINGS_DIR.mkdir(parents=True, exist_ok=True)
|
|
65
|
-
config.TRANSCRIPTS_DIR.mkdir(parents=True, exist_ok=True)
|
|
66
|
-
config.LOGS_DIR.mkdir(parents=True, exist_ok=True)
|
|
67
|
-
|
|
68
|
-
# Add file handler if not present
|
|
69
|
-
root_logger = logging.getLogger()
|
|
70
|
-
if not any(isinstance(h, FileHandler) for h in root_logger.handlers): # type: ignore[reportUnknownMemberType]
|
|
71
|
-
from svx.core.config import _get_log_level
|
|
72
|
-
|
|
73
|
-
log_level_int = _get_log_level(self.cfg.defaults.log_level)
|
|
74
|
-
formatter = logging.Formatter("%(asctime)s | %(levelname)s | %(name)s | %(message)s")
|
|
75
|
-
file_handler = logging.FileHandler(config.LOGS_DIR / "app.log", encoding="utf-8")
|
|
76
|
-
file_handler.setLevel(log_level_int)
|
|
77
|
-
file_handler.setFormatter(formatter)
|
|
78
|
-
root_logger.addHandler(file_handler)
|
|
79
|
-
logging.info("File logging enabled for this run")
|
|
80
|
-
|
|
81
|
-
def run(self, stop_event: threading.Event | None = None) -> dict[str, Any]:
|
|
82
|
-
"""
|
|
83
|
-
Execute the full pipeline.
|
|
84
|
-
|
|
85
|
-
Args:
|
|
86
|
-
stop_event: Optional event to signal recording stop (e.g., for GUI).
|
|
87
|
-
|
|
88
|
-
Returns:
|
|
89
|
-
Dict with 'text' (str), 'raw' (dict), 'duration' (float),
|
|
90
|
-
'paths' (dict of Path or None).
|
|
91
|
-
|
|
92
|
-
Raises:
|
|
93
|
-
Exception: On recording, conversion, or transcription errors.
|
|
94
|
-
"""
|
|
95
|
-
self._setup_save_all()
|
|
96
|
-
|
|
97
|
-
# Resolve parameters
|
|
98
|
-
provider = self.cfg.defaults.provider
|
|
99
|
-
audio_format = self.cfg.defaults.format
|
|
100
|
-
model = self.cfg.defaults.model
|
|
101
|
-
original_model = model
|
|
102
|
-
if self.transcribe_mode:
|
|
103
|
-
model = "voxtral-mini-latest"
|
|
104
|
-
if original_model != "voxtral-mini-latest":
|
|
105
|
-
logging.warning(
|
|
106
|
-
"Mode Transcribe : modèle override de '%s' vers 'voxtral-mini-latest' "
|
|
107
|
-
"(optimisé pour la transcription).",
|
|
108
|
-
original_model,
|
|
109
|
-
)
|
|
110
|
-
language = self.cfg.defaults.language
|
|
111
|
-
rate = self.cfg.defaults.rate
|
|
112
|
-
channels = self.cfg.defaults.channels
|
|
113
|
-
device = self.cfg.defaults.device
|
|
114
|
-
base = self.outfile_prefix or f"rec_{timestamp()}"
|
|
115
|
-
if self.transcribe_mode:
|
|
116
|
-
final_user_prompt = None
|
|
117
|
-
self._status("Mode Transcribe activated: no prompt used.")
|
|
118
|
-
else:
|
|
119
|
-
final_user_prompt = self.cfg.resolve_prompt(self.user_prompt, self.user_prompt_file)
|
|
120
|
-
keep_audio = self.cfg.defaults.keep_audio_files
|
|
121
|
-
keep_transcript = self.cfg.defaults.keep_transcript_files
|
|
122
|
-
copy_to_clip = self.cfg.defaults.copy
|
|
123
|
-
|
|
124
|
-
# Validation (fail fast)
|
|
125
|
-
if channels not in (1, 2):
|
|
126
|
-
raise ValueError("channels must be 1 or 2")
|
|
127
|
-
if rate <= 0:
|
|
128
|
-
raise ValueError("rate must be > 0")
|
|
129
|
-
if audio_format not in {"wav", "mp3", "opus"}: # noqa: E501
|
|
130
|
-
raise ValueError("format must be one of wav|mp3|opus")
|
|
131
|
-
|
|
132
|
-
paths: dict[str, Path | None] = {}
|
|
133
|
-
stop_for_recording = stop_event or threading.Event()
|
|
134
|
-
|
|
135
|
-
try:
|
|
136
|
-
self._status("Recording...")
|
|
137
|
-
if keep_audio:
|
|
138
|
-
self.cfg.recordings_dir.mkdir(parents=True, exist_ok=True)
|
|
139
|
-
wav_path = self.cfg.recordings_dir / f"{base}.wav"
|
|
140
|
-
duration = record_wav(
|
|
141
|
-
wav_path,
|
|
142
|
-
samplerate=rate,
|
|
143
|
-
channels=channels,
|
|
144
|
-
device=device,
|
|
145
|
-
stop_event=stop_for_recording,
|
|
146
|
-
)
|
|
147
|
-
to_send_path = wav_path
|
|
148
|
-
paths["wav"] = wav_path
|
|
149
|
-
else:
|
|
150
|
-
with tempfile.TemporaryDirectory() as tmpdir:
|
|
151
|
-
tmp_path = Path(tmpdir)
|
|
152
|
-
wav_path = tmp_path / f"{base}.wav"
|
|
153
|
-
duration = record_wav(
|
|
154
|
-
wav_path,
|
|
155
|
-
samplerate=rate,
|
|
156
|
-
channels=channels,
|
|
157
|
-
device=device,
|
|
158
|
-
stop_event=stop_for_recording,
|
|
159
|
-
)
|
|
160
|
-
to_send_path = wav_path
|
|
161
|
-
|
|
162
|
-
# Convert if needed
|
|
163
|
-
if audio_format in {"mp3", "opus"}:
|
|
164
|
-
self._status("Converting...")
|
|
165
|
-
to_send_path = convert_audio(wav_path, audio_format)
|
|
166
|
-
logging.info("Converted %s -> %s", wav_path, to_send_path)
|
|
167
|
-
|
|
168
|
-
# Transcribe
|
|
169
|
-
self._status("Transcribing...")
|
|
170
|
-
prov = get_provider(provider, cfg=self.cfg)
|
|
171
|
-
result = prov.transcribe(
|
|
172
|
-
to_send_path,
|
|
173
|
-
user_prompt=final_user_prompt,
|
|
174
|
-
model=model,
|
|
175
|
-
language=language,
|
|
176
|
-
transcribe_mode=self.transcribe_mode,
|
|
177
|
-
)
|
|
178
|
-
text = result["text"]
|
|
179
|
-
raw = result["raw"]
|
|
180
|
-
|
|
181
|
-
# Save if keeping transcripts
|
|
182
|
-
if keep_transcript:
|
|
183
|
-
self.cfg.transcripts_dir.mkdir(parents=True, exist_ok=True)
|
|
184
|
-
txt_path, json_path = save_transcript(
|
|
185
|
-
self.cfg.transcripts_dir, base, provider, text, raw
|
|
186
|
-
)
|
|
187
|
-
paths["txt"] = txt_path
|
|
188
|
-
paths["json"] = json_path
|
|
189
|
-
else:
|
|
190
|
-
paths["txt"] = None
|
|
191
|
-
paths["json"] = None
|
|
192
|
-
|
|
193
|
-
# Copy to clipboard
|
|
194
|
-
if copy_to_clip:
|
|
195
|
-
try:
|
|
196
|
-
copy_to_clipboard(text)
|
|
197
|
-
logging.info("Copied transcription to clipboard")
|
|
198
|
-
except Exception as e:
|
|
199
|
-
logging.warning("Failed to copy to clipboard: %s", e)
|
|
200
|
-
|
|
201
|
-
logging.info("Pipeline finished (%.2fs)", duration)
|
|
202
|
-
return {
|
|
203
|
-
"text": text,
|
|
204
|
-
"raw": raw,
|
|
205
|
-
"duration": duration,
|
|
206
|
-
"paths": paths,
|
|
207
|
-
}
|
|
208
|
-
|
|
209
|
-
# For keep_audio=True: continue outside tempdir
|
|
210
|
-
# Convert if needed
|
|
211
|
-
if audio_format in {"mp3", "opus"}:
|
|
212
|
-
self._status("Converting...")
|
|
213
|
-
to_send_path = convert_audio(wav_path, audio_format)
|
|
214
|
-
logging.info("Converted %s -> %s", wav_path, to_send_path)
|
|
215
|
-
paths["converted"] = to_send_path
|
|
216
|
-
|
|
217
|
-
# Transcribe
|
|
218
|
-
self._status("Transcribing...")
|
|
219
|
-
prov = get_provider(provider, cfg=self.cfg)
|
|
220
|
-
result = prov.transcribe(
|
|
221
|
-
to_send_path,
|
|
222
|
-
user_prompt=final_user_prompt,
|
|
223
|
-
model=model,
|
|
224
|
-
language=language,
|
|
225
|
-
transcribe_mode=self.transcribe_mode,
|
|
226
|
-
)
|
|
227
|
-
text = result["text"]
|
|
228
|
-
raw = result["raw"]
|
|
229
|
-
|
|
230
|
-
# Save if keeping transcripts
|
|
231
|
-
if keep_transcript:
|
|
232
|
-
self.cfg.transcripts_dir.mkdir(parents=True, exist_ok=True)
|
|
233
|
-
txt_path, json_path = save_transcript(
|
|
234
|
-
self.cfg.transcripts_dir, base, provider, text, raw
|
|
235
|
-
)
|
|
236
|
-
paths["txt"] = txt_path
|
|
237
|
-
paths["json"] = json_path
|
|
238
|
-
else:
|
|
239
|
-
paths["txt"] = None
|
|
240
|
-
paths["json"] = None
|
|
241
|
-
|
|
242
|
-
# Copy to clipboard
|
|
243
|
-
if copy_to_clip:
|
|
244
|
-
try:
|
|
245
|
-
copy_to_clipboard(text)
|
|
246
|
-
logging.info("Copied transcription to clipboard")
|
|
247
|
-
except Exception as e:
|
|
248
|
-
logging.warning("Failed to copy to clipboard: %s", e)
|
|
249
|
-
|
|
250
|
-
logging.info("Pipeline finished (%.2fs)", duration)
|
|
251
|
-
return {
|
|
252
|
-
"text": text,
|
|
253
|
-
"raw": raw,
|
|
254
|
-
"duration": duration,
|
|
255
|
-
"paths": paths,
|
|
256
|
-
}
|
|
257
|
-
|
|
258
|
-
except Exception:
|
|
259
|
-
logging.exception("Pipeline failed")
|
|
260
|
-
raise
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|