supervoxtral 0.1.3__tar.gz → 0.1.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {supervoxtral-0.1.3 → supervoxtral-0.1.5}/AGENTS.md +4 -4
- {supervoxtral-0.1.3 → supervoxtral-0.1.5}/PKG-INFO +1 -1
- {supervoxtral-0.1.3 → supervoxtral-0.1.5}/README.md +17 -7
- {supervoxtral-0.1.3 → supervoxtral-0.1.5}/pyproject.toml +1 -1
- supervoxtral-0.1.5/supervoxtral.gif +0 -0
- {supervoxtral-0.1.3 → supervoxtral-0.1.5}/svx/cli.py +1 -1
- {supervoxtral-0.1.3 → supervoxtral-0.1.5}/svx/core/config.py +43 -8
- {supervoxtral-0.1.3 → supervoxtral-0.1.5}/svx/core/pipeline.py +2 -0
- {supervoxtral-0.1.3 → supervoxtral-0.1.5}/svx/core/prompt.py +69 -23
- {supervoxtral-0.1.3 → supervoxtral-0.1.5}/svx/ui/qt_app.py +52 -14
- supervoxtral-0.1.3/supervoxtral.gif +0 -0
- {supervoxtral-0.1.3 → supervoxtral-0.1.5}/.gitignore +0 -0
- {supervoxtral-0.1.3 → supervoxtral-0.1.5}/LICENSE +0 -0
- {supervoxtral-0.1.3 → supervoxtral-0.1.5}/logs/.gitkeep +0 -0
- {supervoxtral-0.1.3 → supervoxtral-0.1.5}/macos-shortcut.png +0 -0
- {supervoxtral-0.1.3 → supervoxtral-0.1.5}/notes.md +0 -0
- {supervoxtral-0.1.3 → supervoxtral-0.1.5}/prompt/.gitkeep +0 -0
- {supervoxtral-0.1.3 → supervoxtral-0.1.5}/recordings/.gitkeep +0 -0
- {supervoxtral-0.1.3 → supervoxtral-0.1.5}/svx/__init__.py +0 -0
- {supervoxtral-0.1.3 → supervoxtral-0.1.5}/svx/core/__init__.py +0 -0
- {supervoxtral-0.1.3 → supervoxtral-0.1.5}/svx/core/audio.py +0 -0
- {supervoxtral-0.1.3 → supervoxtral-0.1.5}/svx/core/clipboard.py +0 -0
- {supervoxtral-0.1.3 → supervoxtral-0.1.5}/svx/core/storage.py +0 -0
- {supervoxtral-0.1.3 → supervoxtral-0.1.5}/svx/providers/__init__.py +0 -0
- {supervoxtral-0.1.3 → supervoxtral-0.1.5}/svx/providers/base.py +0 -0
- {supervoxtral-0.1.3 → supervoxtral-0.1.5}/svx/providers/mistral.py +0 -0
- {supervoxtral-0.1.3 → supervoxtral-0.1.5}/transcripts/.gitkeep +0 -0
|
@@ -13,7 +13,7 @@ supervoxtral/
|
|
|
13
13
|
│ │ ├── audio.py # Recording, ffmpeg detection/conversion
|
|
14
14
|
│ │ ├── config.py # Structured Config dataclasses, loading, resolution, logging setup
|
|
15
15
|
│ │ ├── pipeline.py # Centralized RecordingPipeline for CLI/GUI unification
|
|
16
|
-
│ │ ├── prompt.py # Prompt resolution (via Config)
|
|
16
|
+
│ │ ├── prompt.py # Prompt resolution (supports multiple prompts via Config dict, key-based)
|
|
17
17
|
│ │ └── storage.py # Save transcripts and raw JSON (conditional on keep_transcript_files)
|
|
18
18
|
│ ├── providers/ # API integrations
|
|
19
19
|
│ │ ├── __init__.py # Provider registry (get_provider with Config support)
|
|
@@ -33,10 +33,10 @@ supervoxtral/
|
|
|
33
33
|
## Typical Execution Flow
|
|
34
34
|
|
|
35
35
|
- **Entry**: `svx/cli.py` Typer `record` command parses args (e.g., --prompt, --save-all, --gui, --transcribe).
|
|
36
|
-
- **Config & Prompt**: Load `Config` via `Config.load()` (`core/config.py`); if transcribe_mode, skip prompt resolution; else resolve prompt with `cfg.resolve_prompt()` (`core/prompt.py`).
|
|
37
|
-
- **Pipeline**: Run `RecordingPipeline` (`core/pipeline.py`): record WAV/stop (`core/audio.py`), optional conversion (ffmpeg), get provider/init (`providers/__init__.py`, e.g., `mistral.py` from `cfg`); if transcribe_mode (CLI only): no prompt, model override to voxtral-mini-latest (with warning if changed), pass transcribe_mode to provider.transcribe; for GUI: --transcribe ignored (warning), recording starts immediately, uses modular record()/process()/clean() with dynamic mode (Transcribe: no prompt, model override; Prompt: resolved prompt); transcribe, conditional save (`core/storage.py` based on `keep_*`/`save_all`), clipboard copy, logging setup.
|
|
36
|
+
- **Config & Prompt**: Load `Config` via `Config.load()` (`core/config.py`); supports dict of prompts in config.toml (e.g., [prompt.default], [prompt.other]); if transcribe_mode, skip prompt resolution; else resolve prompt with `cfg.resolve_prompt(key="default" for CLI, or selected key for GUI)` (`core/prompt.py`).
|
|
37
|
+
- **Pipeline**: Run `RecordingPipeline` (`core/pipeline.py`): record WAV/stop (`core/audio.py`), optional conversion (ffmpeg), get provider/init (`providers/__init__.py`, e.g., `mistral.py` from `cfg`); if transcribe_mode (CLI only): no prompt, model override to voxtral-mini-latest (with warning if changed), pass transcribe_mode to provider.transcribe; for GUI: --transcribe ignored (warning), recording starts immediately, uses modular record()/process()/clean() with dynamic mode (Transcribe: no prompt, model override; Prompt key: resolved prompt for selected key); transcribe, conditional save (`core/storage.py` based on `keep_*`/`save_all`), clipboard copy, logging setup.
|
|
38
38
|
- **Cleanup**: Temp files auto-deleted (tempfile) if `keep_*=false`; dirs created only if persistence enabled.
|
|
39
|
-
- **End**: Return `{"text": str, "raw": dict, "duration": float, "paths": dict}`; CLI prints result, GUI emits progress/updates via callback (buttons: 'Transcribe' for
|
|
39
|
+
- **End**: Return `{"text": str, "raw": dict, "duration": float, "paths": dict}`; CLI prints result (uses "default" prompt), GUI emits progress/updates via callback (buttons: 'Transcribe' for no prompt; capitalized prompt keys (e.g., 'Default', 'Test') for selected prompt; 'Cancel'; Esc/close cancels).
|
|
40
40
|
|
|
41
41
|
## Build & test
|
|
42
42
|
```bash
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
|
|
5
5
|
SuperVoxtral is a lightweight Python CLI/GUI utility for recording microphone audio and integrate with Mistral's Voxtral APIs for transcription or audio-enabled chat.
|
|
6
6
|
|
|
7
|
-
Voxtral models, such as `voxtral-mini-latest` and `voxtral-small-latest`, deliver fast inference times, high transcription accuracy across languages and accents, and minimal API costs.
|
|
7
|
+
Voxtral models, such as `voxtral-mini-latest` and `voxtral-small-latest`, deliver fast inference times, high transcription accuracy across languages and accents, and minimal API costs. Voxtral supports two modes: pure transcription via a dedicated endpoint (no prompts needed) or chat mode, where audio input combines with text prompts for refined outputs—like error correction or contextual summarization—without invoking a separate LLM.
|
|
8
8
|
|
|
9
9
|
For instance, use a prompt like: "_Transcribe this audio precisely and remove all minor speech hesitations: "um", "uh", "er", "euh", "ben", etc._"
|
|
10
10
|
|
|
@@ -26,6 +26,7 @@ The package is available on PyPI. We recommend using `uv` (a fast Python package
|
|
|
26
26
|
- For GUI support (includes PySide6):
|
|
27
27
|
```
|
|
28
28
|
uv tool install "supervoxtral[gui]"
|
|
29
|
+
# to update: uv tool update "supervoxtral[gui]"
|
|
29
30
|
```
|
|
30
31
|
|
|
31
32
|
- For core CLI only functionality:
|
|
@@ -84,7 +85,7 @@ To get started quickly with SuperVoxtral:
|
|
|
84
85
|
```
|
|
85
86
|
|
|
86
87
|
3. Launch the GUI: `svx record --gui`
|
|
87
|
-
This opens the minimal GUI, starts recording immediately; click 'Transcribe' for pure transcription (no prompt) or '
|
|
88
|
+
This opens the minimal GUI, starts recording immediately; click 'Transcribe' for pure transcription (no prompt) or a button for each configured prompt (e.g., 'Default', 'Mail', 'Translate') for prompted transcription using the selected prompt; --transcribe ignored with warning (results copied to clipboard).
|
|
88
89
|
|
|
89
90
|
### macOS Shortcuts Integration
|
|
90
91
|
|
|
@@ -171,13 +172,18 @@ copy = true
|
|
|
171
172
|
# Log level: "DEBUG" | "INFO" | "WARNING" | "ERROR"
|
|
172
173
|
log_level = "INFO"
|
|
173
174
|
|
|
174
|
-
[prompt]
|
|
175
|
+
[prompt.default]
|
|
175
176
|
# Default user prompt source:
|
|
176
177
|
# - Option 1: Use a file (recommended)
|
|
177
178
|
file = "~/.config/supervoxtral/prompt/user.md"
|
|
178
179
|
#
|
|
179
180
|
# - Option 2: Inline prompt (less recommended for long text)
|
|
180
181
|
# text = "Please transcribe the audio and provide a concise summary in French."
|
|
182
|
+
|
|
183
|
+
[prompt.test]
|
|
184
|
+
# Example additional prompt
|
|
185
|
+
# file = "/path/to/another_prompt.md"
|
|
186
|
+
# text = "Summarize the meeting in bullet points."
|
|
181
187
|
```
|
|
182
188
|
|
|
183
189
|
**Configuration is centralized via a structured `Config` object loaded from your user configuration file (`config.toml`). CLI arguments override select values (e.g., prompt, log level), but most defaults (provider, model, keep flags) come from `config.toml`. No environment variables are used for API keys or settings.**
|
|
@@ -219,14 +225,18 @@ svx record [OPTIONS]
|
|
|
219
225
|
- Interactive mode: recording starts immediately; click 'Transcribe' (pure transcription, no prompt) or 'Prompt' (with resolved prompt); --transcribe ignored with warning. GUI respects config.toml and CLI flags (e.g., `--gui --save-all`).
|
|
220
226
|
|
|
221
227
|
**Prompt Resolution Priority** (for non-transcribe mode):
|
|
228
|
+
By default in CLI, uses the 'default' prompt from config.toml [prompt.default]. For overrides:
|
|
222
229
|
1. CLI `--user-prompt` or `--user-prompt-file`
|
|
223
|
-
2.
|
|
224
|
-
3.
|
|
225
|
-
4.
|
|
230
|
+
2. Specified prompt key (future: via --prompt-key; currently implicit 'default')
|
|
231
|
+
3. config.toml [prompt.default] (text or file)
|
|
232
|
+
4. User prompt file (user.md in config dir)
|
|
233
|
+
5. Fallback: "What's in this audio?"
|
|
226
234
|
|
|
227
235
|
## Changelog
|
|
228
236
|
|
|
229
|
-
- 0.1.
|
|
237
|
+
- 0.1.5: Fix bug on prompt selecting
|
|
238
|
+
- 0.1.4: Support for multiple prompts in config.toml with dynamic GUI buttons for each prompt key
|
|
239
|
+
- 0.1.3: Minor style update
|
|
230
240
|
- 0.1.2: Interactive mode in GUI (choose transcribe / prompt / cancel while recording)
|
|
231
241
|
- 0.1.1: Minor updates to default config and default prompt
|
|
232
242
|
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "supervoxtral"
|
|
7
|
-
version = "0.1.
|
|
7
|
+
version = "0.1.5"
|
|
8
8
|
description = "CLI/GUI audio recorder and transcription client using Mistral Voxtral (chat with audio and transcription)."
|
|
9
9
|
requires-python = ">=3.11"
|
|
10
10
|
license = { text = "MIT" }
|
|
Binary file
|
|
@@ -72,7 +72,7 @@ def config_show() -> None:
|
|
|
72
72
|
user_prompt_file = cfg.user_prompt_dir / "user.md"
|
|
73
73
|
|
|
74
74
|
defaults_section = asdict(cfg.defaults)
|
|
75
|
-
prompt_section = asdict(cfg.prompt)
|
|
75
|
+
prompt_section = {k: asdict(e) for k, e in cfg.prompt.prompts.items()}
|
|
76
76
|
|
|
77
77
|
# Resolve prompt source (same logic as record command, but read-only)
|
|
78
78
|
resolved_prompt = cfg.resolve_prompt(None, None)
|
|
@@ -242,13 +242,15 @@ def init_user_config(force: bool = False, prompt_file: Path | None = None) -> Pa
|
|
|
242
242
|
"copy = true\n\n"
|
|
243
243
|
'# Log level: "DEBUG" | "INFO" | "WARNING" | "ERROR"\n'
|
|
244
244
|
'log_level = "INFO"\n\n'
|
|
245
|
-
"[prompt]\n"
|
|
245
|
+
"[prompt.default]\n"
|
|
246
246
|
"# Default user prompt source:\n"
|
|
247
247
|
"# - Option 1: Use a file (recommended)\n"
|
|
248
248
|
f'file = "{str(prompt_file)}"\n'
|
|
249
249
|
"#\n"
|
|
250
250
|
"# - Option 2: Inline prompt (less recommended for long text)\n"
|
|
251
251
|
'# text = "Please transcribe the audio and provide a concise summary in French."\n'
|
|
252
|
+
"#\n"
|
|
253
|
+
"# For multiple prompts in future, add [prompt.other] sections.\n"
|
|
252
254
|
)
|
|
253
255
|
|
|
254
256
|
if not USER_CONFIG_FILE.exists() or force:
|
|
@@ -282,11 +284,16 @@ class DefaultsConfig:
|
|
|
282
284
|
|
|
283
285
|
|
|
284
286
|
@dataclass
|
|
285
|
-
class
|
|
287
|
+
class PromptEntry:
|
|
286
288
|
text: str | None = None
|
|
287
289
|
file: str | None = None
|
|
288
290
|
|
|
289
291
|
|
|
292
|
+
@dataclass
|
|
293
|
+
class PromptConfig:
|
|
294
|
+
prompts: dict[str, PromptEntry] = field(default_factory=lambda: {"default": PromptEntry()})
|
|
295
|
+
|
|
296
|
+
|
|
290
297
|
@dataclass
|
|
291
298
|
class Config:
|
|
292
299
|
providers: dict[str, ProviderConfig] = field(default_factory=dict)
|
|
@@ -356,11 +363,39 @@ class Config:
|
|
|
356
363
|
providers_data[name] = ProviderConfig(api_key=api_key)
|
|
357
364
|
# Prompt
|
|
358
365
|
prompt_raw = user_config.get("prompt", {})
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
366
|
+
prompts_data: dict[str, PromptEntry] = {}
|
|
367
|
+
if isinstance(prompt_raw, dict):
|
|
368
|
+
if any(k in prompt_raw for k in ["text", "file"]): # old flat style
|
|
369
|
+
logging.warning(
|
|
370
|
+
"Old [prompt] format detected in %s; "
|
|
371
|
+
"please migrate to [prompt.default] manually.",
|
|
372
|
+
USER_CONFIG_FILE,
|
|
373
|
+
)
|
|
374
|
+
entry = PromptEntry(
|
|
375
|
+
text=prompt_raw.get("text")
|
|
376
|
+
if isinstance(prompt_raw.get("text"), str)
|
|
377
|
+
else None,
|
|
378
|
+
file=prompt_raw.get("file")
|
|
379
|
+
if isinstance(prompt_raw.get("file"), str)
|
|
380
|
+
else None,
|
|
381
|
+
)
|
|
382
|
+
prompts_data["default"] = entry
|
|
383
|
+
else: # new nested style
|
|
384
|
+
for key, entry_raw in prompt_raw.items():
|
|
385
|
+
if isinstance(entry_raw, dict):
|
|
386
|
+
entry = PromptEntry(
|
|
387
|
+
text=entry_raw.get("text")
|
|
388
|
+
if isinstance(entry_raw.get("text"), str)
|
|
389
|
+
else None,
|
|
390
|
+
file=entry_raw.get("file")
|
|
391
|
+
if isinstance(entry_raw.get("file"), str)
|
|
392
|
+
else None,
|
|
393
|
+
)
|
|
394
|
+
prompts_data[key] = entry
|
|
395
|
+
# Ensure "default" always exists
|
|
396
|
+
if "default" not in prompts_data:
|
|
397
|
+
prompts_data["default"] = PromptEntry()
|
|
398
|
+
prompt = PromptConfig(prompts=prompts_data)
|
|
364
399
|
data = {
|
|
365
400
|
"defaults": defaults,
|
|
366
401
|
"providers": providers_data,
|
|
@@ -376,7 +411,7 @@ class Config:
|
|
|
376
411
|
def resolve_prompt(self, inline: str | None = None, file_path: Path | None = None) -> str:
|
|
377
412
|
from svx.core.prompt import resolve_user_prompt
|
|
378
413
|
|
|
379
|
-
return resolve_user_prompt(self, inline, file_path, self.user_prompt_dir)
|
|
414
|
+
return resolve_user_prompt(self, inline, file_path, self.user_prompt_dir, key="default")
|
|
380
415
|
|
|
381
416
|
def get_provider_config(self, name: str) -> dict[str, Any]:
|
|
382
417
|
return asdict(self.providers.get(name, ProviderConfig()))
|
|
@@ -180,6 +180,8 @@ class RecordingPipeline:
|
|
|
180
180
|
else:
|
|
181
181
|
self._status("Transcribe mode activated: no prompt used.")
|
|
182
182
|
|
|
183
|
+
logging.debug(f"Applied prompt: {final_user_prompt or 'None (transcribe mode)'}")
|
|
184
|
+
|
|
183
185
|
paths: dict[str, Path | None] = {"wav": wav_path}
|
|
184
186
|
|
|
185
187
|
# Convert if needed
|
|
@@ -12,9 +12,10 @@ Intended to be small and dependency-light so it can be imported broadly.
|
|
|
12
12
|
from __future__ import annotations
|
|
13
13
|
|
|
14
14
|
import logging
|
|
15
|
+
from collections.abc import Callable
|
|
15
16
|
from pathlib import Path
|
|
16
17
|
|
|
17
|
-
from .config import USER_PROMPT_DIR, Config
|
|
18
|
+
from .config import USER_PROMPT_DIR, Config, PromptEntry
|
|
18
19
|
|
|
19
20
|
__all__ = [
|
|
20
21
|
"read_text_file",
|
|
@@ -68,16 +69,16 @@ def resolve_user_prompt(
|
|
|
68
69
|
inline: str | None = None,
|
|
69
70
|
file: Path | None = None,
|
|
70
71
|
user_prompt_dir: Path | None = None,
|
|
72
|
+
key: str | None = None,
|
|
71
73
|
) -> str:
|
|
72
74
|
"""
|
|
73
75
|
Resolve the effective user prompt from multiple sources, by priority:
|
|
74
76
|
|
|
75
77
|
1) inline text (CLI --user-prompt)
|
|
76
78
|
2) explicit file (CLI --user-prompt-file)
|
|
77
|
-
3) user config
|
|
78
|
-
4) user
|
|
79
|
-
5)
|
|
80
|
-
6) literal fallback: "What's in this audio?"
|
|
79
|
+
3) user config prompt for key (cfg.prompt.prompts[key or "default"])
|
|
80
|
+
4) user prompt dir file (user_prompt_dir / 'user.md')
|
|
81
|
+
5) literal fallback: "What's in this audio?"
|
|
81
82
|
|
|
82
83
|
Returns the first non-empty string after stripping.
|
|
83
84
|
"""
|
|
@@ -94,17 +95,18 @@ def resolve_user_prompt(
|
|
|
94
95
|
logging.warning("Failed to read user prompt file: %s", p)
|
|
95
96
|
return ""
|
|
96
97
|
|
|
97
|
-
def _from_user_cfg() -> str:
|
|
98
|
+
def _from_user_cfg(key: str) -> str:
|
|
98
99
|
try:
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
100
|
+
entry = cfg.prompt.prompts.get(key, PromptEntry())
|
|
101
|
+
if entry.text and entry.text.strip():
|
|
102
|
+
return entry.text.strip()
|
|
103
|
+
if entry.file:
|
|
104
|
+
file_path = Path(entry.file).expanduser()
|
|
105
|
+
if not file_path.is_absolute():
|
|
106
|
+
file_path = (user_prompt_dir or cfg.user_prompt_dir) / entry.file
|
|
107
|
+
return read_text_file(file_path).strip()
|
|
106
108
|
except Exception:
|
|
107
|
-
logging.debug("User config prompt processing failed.", exc_info=True)
|
|
109
|
+
logging.debug("User config prompt processing failed for key '%s'.", key, exc_info=True)
|
|
108
110
|
return ""
|
|
109
111
|
|
|
110
112
|
def _from_user_prompt_dir() -> str:
|
|
@@ -119,22 +121,46 @@ def resolve_user_prompt(
|
|
|
119
121
|
)
|
|
120
122
|
return ""
|
|
121
123
|
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
124
|
+
key = key or "default"
|
|
125
|
+
|
|
126
|
+
# Suppliers annotated with a name for tracing which one returned the prompt.
|
|
127
|
+
named_suppliers: list[tuple[str, Callable[[], str]]] = [
|
|
128
|
+
("inline", lambda: _strip(inline)),
|
|
129
|
+
("file", lambda: _read(file)),
|
|
130
|
+
(f"prompt_config[{key}]", lambda: _from_user_cfg(key)),
|
|
131
|
+
("user_prompt_dir/user.md", _from_user_prompt_dir),
|
|
127
132
|
]
|
|
128
133
|
|
|
129
|
-
for supplier in
|
|
134
|
+
for name, supplier in named_suppliers:
|
|
130
135
|
try:
|
|
131
136
|
val = supplier()
|
|
132
137
|
if val:
|
|
138
|
+
# Log which supplier provided the prompt and a short snippet for debugging.
|
|
139
|
+
try:
|
|
140
|
+
if len(val) > 200:
|
|
141
|
+
snippet = val[:200] + "..."
|
|
142
|
+
else:
|
|
143
|
+
snippet = val
|
|
144
|
+
logging.info(
|
|
145
|
+
"resolve_user_prompt: supplier '%s' provided prompt snippet: %s",
|
|
146
|
+
name,
|
|
147
|
+
snippet,
|
|
148
|
+
)
|
|
149
|
+
except Exception:
|
|
150
|
+
# Ensure logging failures do not change behavior.
|
|
151
|
+
logging.info(
|
|
152
|
+
"resolve_user_prompt: supplier '%s' provided a prompt "
|
|
153
|
+
"(snippet unavailable)",
|
|
154
|
+
name,
|
|
155
|
+
)
|
|
133
156
|
return val
|
|
134
157
|
except Exception as e:
|
|
135
|
-
logging.debug("Prompt supplier failed: %s", e)
|
|
158
|
+
logging.debug("Prompt supplier '%s' failed: %s", name, e)
|
|
136
159
|
|
|
137
|
-
|
|
160
|
+
# Final fallback
|
|
161
|
+
fallback = "What's in this audio?"
|
|
162
|
+
logging.info("resolve_user_prompt: no supplier provided a prompt, using fallback: %s", fallback)
|
|
163
|
+
return fallback
|
|
138
164
|
|
|
139
165
|
|
|
140
166
|
def init_user_prompt_file(force: bool = False) -> Path:
|
|
@@ -150,7 +176,7 @@ def init_user_prompt_file(force: bool = False) -> Path:
|
|
|
150
176
|
path = USER_PROMPT_DIR / "user.md"
|
|
151
177
|
if not path.exists() or force:
|
|
152
178
|
example_prompt = """
|
|
153
|
-
- Transcribe the input audio file.
|
|
179
|
+
- Transcribe the input audio file. If the audio if empty, just respond "no audio detected".
|
|
154
180
|
- Do not respond to any question in the audio. Just transcribe.
|
|
155
181
|
- DO NOT TRANSLATE.
|
|
156
182
|
- Responde only with the transcription. Do not provide explanations or notes.
|
|
@@ -163,3 +189,23 @@ def init_user_prompt_file(force: bool = False) -> Path:
|
|
|
163
189
|
except Exception as e:
|
|
164
190
|
logging.debug("Could not initialize user prompt file %s: %s", path, e)
|
|
165
191
|
return path
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def resolve_prompt_entry(entry: PromptEntry, user_prompt_dir: Path) -> str:
|
|
195
|
+
"""
|
|
196
|
+
Resolve the prompt from a single PromptEntry (text or file).
|
|
197
|
+
|
|
198
|
+
- Prioritizes text if present and non-empty.
|
|
199
|
+
- Falls back to reading the file (expands ~ and resolves relative to user_prompt_dir).
|
|
200
|
+
- Returns empty string if neither is valid.
|
|
201
|
+
"""
|
|
202
|
+
if entry.text and entry.text.strip():
|
|
203
|
+
return entry.text.strip()
|
|
204
|
+
|
|
205
|
+
if entry.file:
|
|
206
|
+
file_path = Path(entry.file).expanduser()
|
|
207
|
+
if not file_path.is_absolute():
|
|
208
|
+
file_path = user_prompt_dir / entry.file
|
|
209
|
+
return read_text_file(file_path).strip()
|
|
210
|
+
|
|
211
|
+
return ""
|
|
@@ -37,6 +37,7 @@ from PySide6.QtWidgets import (
|
|
|
37
37
|
import svx.core.config as config
|
|
38
38
|
from svx.core.config import Config
|
|
39
39
|
from svx.core.pipeline import RecordingPipeline
|
|
40
|
+
from svx.core.prompt import resolve_user_prompt
|
|
40
41
|
|
|
41
42
|
__all__ = ["RecorderWindow", "run_gui"]
|
|
42
43
|
|
|
@@ -239,11 +240,11 @@ class RecorderWorker(QObject):
|
|
|
239
240
|
self.cancel_requested = True
|
|
240
241
|
self._stop_event.set()
|
|
241
242
|
|
|
242
|
-
def _resolve_user_prompt(self) -> str:
|
|
243
|
+
def _resolve_user_prompt(self, key: str) -> str:
|
|
243
244
|
"""
|
|
244
|
-
Determine the final user prompt using the shared resolver.
|
|
245
|
+
Determine the final user prompt using the shared resolver for the given key.
|
|
245
246
|
"""
|
|
246
|
-
return self.cfg
|
|
247
|
+
return resolve_user_prompt(self.cfg, None, None, self.cfg.user_prompt_dir, key=key)
|
|
247
248
|
|
|
248
249
|
def run(self) -> None:
|
|
249
250
|
"""
|
|
@@ -272,10 +273,39 @@ class RecorderWorker(QObject):
|
|
|
272
273
|
self.canceled.emit()
|
|
273
274
|
return
|
|
274
275
|
self.status.emit("Processing in progress...")
|
|
276
|
+
# Wait for user to select mode in the GUI
|
|
275
277
|
while self.mode is None:
|
|
276
278
|
time.sleep(0.05)
|
|
279
|
+
|
|
280
|
+
# Log the selected mode/key for debugging prompt application
|
|
281
|
+
try:
|
|
282
|
+
logging.info("RecorderWorker: selected mode/key: %s", self.mode)
|
|
283
|
+
except Exception:
|
|
284
|
+
# ensure failures in logging don't break the worker
|
|
285
|
+
pass
|
|
286
|
+
|
|
277
287
|
transcribe_mode = self.mode == "transcribe"
|
|
278
|
-
|
|
288
|
+
if transcribe_mode:
|
|
289
|
+
user_prompt = None
|
|
290
|
+
else:
|
|
291
|
+
# Resolve the user prompt for the selected key and log a short snippet
|
|
292
|
+
user_prompt = self._resolve_user_prompt(self.mode)
|
|
293
|
+
try:
|
|
294
|
+
if user_prompt:
|
|
295
|
+
snippet = (
|
|
296
|
+
user_prompt[:200] + "..." if len(user_prompt) > 200 else user_prompt
|
|
297
|
+
)
|
|
298
|
+
else:
|
|
299
|
+
snippet = "<EMPTY>"
|
|
300
|
+
logging.info(
|
|
301
|
+
"RecorderWorker: resolved prompt snippet for key '%s': %s",
|
|
302
|
+
self.mode,
|
|
303
|
+
snippet,
|
|
304
|
+
)
|
|
305
|
+
except Exception:
|
|
306
|
+
# avoid breaking the flow on logging errors
|
|
307
|
+
pass
|
|
308
|
+
|
|
279
309
|
result = pipeline.process(wav_path, duration, transcribe_mode, user_prompt)
|
|
280
310
|
keep_audio = self.save_all or self.cfg.defaults.keep_audio_files
|
|
281
311
|
pipeline.clean(wav_path, result["paths"], keep_audio)
|
|
@@ -310,6 +340,7 @@ class RecorderWindow(QWidget):
|
|
|
310
340
|
self.user_prompt_file = user_prompt_file
|
|
311
341
|
self.save_all = save_all
|
|
312
342
|
self.outfile_prefix = outfile_prefix
|
|
343
|
+
self.prompt_keys = sorted(self.cfg.prompt.prompts.keys())
|
|
313
344
|
|
|
314
345
|
# Background worker (create early for signal connections)
|
|
315
346
|
self._worker = RecorderWorker(
|
|
@@ -381,12 +412,17 @@ class RecorderWindow(QWidget):
|
|
|
381
412
|
button_layout.addStretch()
|
|
382
413
|
self._transcribe_btn = QPushButton("Transcribe")
|
|
383
414
|
self._transcribe_btn.setToolTip("Stop and transcribe without prompt")
|
|
384
|
-
self._transcribe_btn.clicked.connect(
|
|
415
|
+
self._transcribe_btn.clicked.connect(
|
|
416
|
+
lambda checked=False, m="transcribe": self._on_mode_selected(m)
|
|
417
|
+
)
|
|
385
418
|
button_layout.addWidget(self._transcribe_btn)
|
|
386
|
-
self.
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
419
|
+
self._prompt_buttons: dict[str, QPushButton] = {}
|
|
420
|
+
for key in self.prompt_keys:
|
|
421
|
+
btn = QPushButton(key.capitalize())
|
|
422
|
+
btn.setToolTip(f"Stop and transcribe with '{key}' prompt")
|
|
423
|
+
btn.clicked.connect(lambda checked=False, k=key: self._on_mode_selected(k))
|
|
424
|
+
self._prompt_buttons[key] = btn
|
|
425
|
+
button_layout.addWidget(btn)
|
|
390
426
|
self._cancel_btn = QPushButton("Cancel")
|
|
391
427
|
self._cancel_btn.setObjectName("cancel_btn")
|
|
392
428
|
self._cancel_btn.setToolTip("Stop recording and quit without processing")
|
|
@@ -397,6 +433,8 @@ class RecorderWindow(QWidget):
|
|
|
397
433
|
button_widget.setLayout(button_layout)
|
|
398
434
|
layout.addWidget(button_widget, 0, Qt.AlignmentFlag.AlignCenter)
|
|
399
435
|
|
|
436
|
+
self._action_buttons = [self._transcribe_btn] + list(self._prompt_buttons.values())
|
|
437
|
+
|
|
400
438
|
# Keyboard shortcut: Esc to stop
|
|
401
439
|
stop_action = QAction(self)
|
|
402
440
|
stop_action.setShortcut(QKeySequence.StandardKey.Cancel) # Esc
|
|
@@ -456,17 +494,17 @@ class RecorderWindow(QWidget):
|
|
|
456
494
|
self._worker.cancel()
|
|
457
495
|
super().closeEvent(event)
|
|
458
496
|
|
|
459
|
-
def
|
|
460
|
-
self.
|
|
461
|
-
|
|
497
|
+
def _on_mode_selected(self, mode: str) -> None:
|
|
498
|
+
for btn in self._action_buttons:
|
|
499
|
+
btn.setEnabled(False)
|
|
462
500
|
self._cancel_btn.setEnabled(False)
|
|
463
501
|
self._status_label.setText("Stopping and processing...")
|
|
464
502
|
self._worker.set_mode(mode)
|
|
465
503
|
self._worker.stop()
|
|
466
504
|
|
|
467
505
|
def _on_cancel_clicked(self) -> None:
|
|
468
|
-
self.
|
|
469
|
-
|
|
506
|
+
for btn in self._action_buttons:
|
|
507
|
+
btn.setEnabled(False)
|
|
470
508
|
self._cancel_btn.setEnabled(False)
|
|
471
509
|
self._status_label.setText("Canceling...")
|
|
472
510
|
self._worker.cancel()
|
|
Binary file
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|