supervoxtral 0.1.2__tar.gz → 0.1.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {supervoxtral-0.1.2 → supervoxtral-0.1.4}/AGENTS.md +4 -4
- {supervoxtral-0.1.2 → supervoxtral-0.1.4}/PKG-INFO +1 -1
- {supervoxtral-0.1.2 → supervoxtral-0.1.4}/README.md +20 -13
- {supervoxtral-0.1.2 → supervoxtral-0.1.4}/pyproject.toml +1 -1
- supervoxtral-0.1.4/supervoxtral.gif +0 -0
- {supervoxtral-0.1.2 → supervoxtral-0.1.4}/svx/cli.py +1 -1
- {supervoxtral-0.1.2 → supervoxtral-0.1.4}/svx/core/config.py +43 -8
- {supervoxtral-0.1.2 → supervoxtral-0.1.4}/svx/core/prompt.py +38 -16
- {supervoxtral-0.1.2 → supervoxtral-0.1.4}/svx/ui/qt_app.py +32 -25
- supervoxtral-0.1.2/supervoxtral.gif +0 -0
- {supervoxtral-0.1.2 → supervoxtral-0.1.4}/.gitignore +0 -0
- {supervoxtral-0.1.2 → supervoxtral-0.1.4}/LICENSE +0 -0
- {supervoxtral-0.1.2 → supervoxtral-0.1.4}/logs/.gitkeep +0 -0
- {supervoxtral-0.1.2 → supervoxtral-0.1.4}/macos-shortcut.png +0 -0
- {supervoxtral-0.1.2 → supervoxtral-0.1.4}/notes.md +0 -0
- {supervoxtral-0.1.2 → supervoxtral-0.1.4}/prompt/.gitkeep +0 -0
- {supervoxtral-0.1.2 → supervoxtral-0.1.4}/recordings/.gitkeep +0 -0
- {supervoxtral-0.1.2 → supervoxtral-0.1.4}/svx/__init__.py +0 -0
- {supervoxtral-0.1.2 → supervoxtral-0.1.4}/svx/core/__init__.py +0 -0
- {supervoxtral-0.1.2 → supervoxtral-0.1.4}/svx/core/audio.py +0 -0
- {supervoxtral-0.1.2 → supervoxtral-0.1.4}/svx/core/clipboard.py +0 -0
- {supervoxtral-0.1.2 → supervoxtral-0.1.4}/svx/core/pipeline.py +0 -0
- {supervoxtral-0.1.2 → supervoxtral-0.1.4}/svx/core/storage.py +0 -0
- {supervoxtral-0.1.2 → supervoxtral-0.1.4}/svx/providers/__init__.py +0 -0
- {supervoxtral-0.1.2 → supervoxtral-0.1.4}/svx/providers/base.py +0 -0
- {supervoxtral-0.1.2 → supervoxtral-0.1.4}/svx/providers/mistral.py +0 -0
- {supervoxtral-0.1.2 → supervoxtral-0.1.4}/transcripts/.gitkeep +0 -0
|
@@ -13,7 +13,7 @@ supervoxtral/
|
|
|
13
13
|
│ │ ├── audio.py # Recording, ffmpeg detection/conversion
|
|
14
14
|
│ │ ├── config.py # Structured Config dataclasses, loading, resolution, logging setup
|
|
15
15
|
│ │ ├── pipeline.py # Centralized RecordingPipeline for CLI/GUI unification
|
|
16
|
-
│ │ ├── prompt.py # Prompt resolution (via Config)
|
|
16
|
+
│ │ ├── prompt.py # Prompt resolution (supports multiple prompts via Config dict, key-based)
|
|
17
17
|
│ │ └── storage.py # Save transcripts and raw JSON (conditional on keep_transcript_files)
|
|
18
18
|
│ ├── providers/ # API integrations
|
|
19
19
|
│ │ ├── __init__.py # Provider registry (get_provider with Config support)
|
|
@@ -33,10 +33,10 @@ supervoxtral/
|
|
|
33
33
|
## Typical Execution Flow
|
|
34
34
|
|
|
35
35
|
- **Entry**: `svx/cli.py` Typer `record` command parses args (e.g., --prompt, --save-all, --gui, --transcribe).
|
|
36
|
-
- **Config & Prompt**: Load `Config` via `Config.load()` (`core/config.py`); if transcribe_mode, skip prompt resolution; else resolve prompt with `cfg.resolve_prompt()` (`core/prompt.py`).
|
|
37
|
-
- **Pipeline**: Run `RecordingPipeline` (`core/pipeline.py`): record WAV/stop (`core/audio.py`), optional conversion (ffmpeg), get provider/init (`providers/__init__.py`, e.g., `mistral.py` from `cfg`); if transcribe_mode (CLI only): no prompt, model override to voxtral-mini-latest (with warning if changed), pass transcribe_mode to provider.transcribe; for GUI: --transcribe ignored (warning), recording starts immediately, uses modular record()/process()/clean() with dynamic mode (Transcribe: no prompt, model override; Prompt: resolved prompt); transcribe, conditional save (`core/storage.py` based on `keep_*`/`save_all`), clipboard copy, logging setup.
|
|
36
|
+
- **Config & Prompt**: Load `Config` via `Config.load()` (`core/config.py`); supports dict of prompts in config.toml (e.g., [prompt.default], [prompt.other]); if transcribe_mode, skip prompt resolution; else resolve prompt with `cfg.resolve_prompt(key="default" for CLI, or selected key for GUI)` (`core/prompt.py`).
|
|
37
|
+
- **Pipeline**: Run `RecordingPipeline` (`core/pipeline.py`): record WAV/stop (`core/audio.py`), optional conversion (ffmpeg), get provider/init (`providers/__init__.py`, e.g., `mistral.py` from `cfg`); if transcribe_mode (CLI only): no prompt, model override to voxtral-mini-latest (with warning if changed), pass transcribe_mode to provider.transcribe; for GUI: --transcribe ignored (warning), recording starts immediately, uses modular record()/process()/clean() with dynamic mode (Transcribe: no prompt, model override; Prompt key: resolved prompt for selected key); transcribe, conditional save (`core/storage.py` based on `keep_*`/`save_all`), clipboard copy, logging setup.
|
|
38
38
|
- **Cleanup**: Temp files auto-deleted (tempfile) if `keep_*=false`; dirs created only if persistence enabled.
|
|
39
|
-
- **End**: Return `{"text": str, "raw": dict, "duration": float, "paths": dict}`; CLI prints result, GUI emits progress/updates via callback (buttons: 'Transcribe' for
|
|
39
|
+
- **End**: Return `{"text": str, "raw": dict, "duration": float, "paths": dict}`; CLI prints result (uses "default" prompt), GUI emits progress/updates via callback (buttons: 'Transcribe' for no prompt; capitalized prompt keys (e.g., 'Default', 'Test') for selected prompt; 'Cancel'; Esc/close cancels).
|
|
40
40
|
|
|
41
41
|
## Build & test
|
|
42
42
|
```bash
|
|
@@ -22,16 +22,17 @@ The GUI is minimal, launches fast, and can be bound to a system hotkey. Upon sto
|
|
|
22
22
|
|
|
23
23
|
The package is available on PyPI. We recommend using `uv` (a fast Python package installer) for a simple, global tool installation—no virtual environment setup required.
|
|
24
24
|
|
|
25
|
-
- For core CLI functionality:
|
|
26
|
-
```
|
|
27
|
-
uv tool install supervoxtral
|
|
28
|
-
```
|
|
29
25
|
|
|
30
26
|
- For GUI support (includes PySide6):
|
|
31
27
|
```
|
|
32
28
|
uv tool install "supervoxtral[gui]"
|
|
33
29
|
```
|
|
34
30
|
|
|
31
|
+
- For core CLI only functionality:
|
|
32
|
+
```
|
|
33
|
+
uv tool install supervoxtral
|
|
34
|
+
```
|
|
35
|
+
|
|
35
36
|
This installs the `svx` command globally. If you don't have `uv`, install it first via `curl -LsSf https://astral.sh/uv/install.sh | sh` (or from https://docs.astral.sh/uv/getting-started/installation/).
|
|
36
37
|
|
|
37
38
|
**Alternative: Using pip with a virtual environment**
|
|
@@ -53,13 +54,10 @@ If you prefer not to use uv, you can install via pip in a virtual environment:
|
|
|
53
54
|
```
|
|
54
55
|
|
|
55
56
|
2. Install the package:
|
|
56
|
-
```
|
|
57
|
-
pip install supervoxtral
|
|
58
|
-
```
|
|
59
57
|
|
|
60
58
|
For GUI support (includes PySide6):
|
|
61
59
|
```
|
|
62
|
-
pip install supervoxtral[gui]
|
|
60
|
+
pip install "supervoxtral[gui]"
|
|
63
61
|
```
|
|
64
62
|
|
|
65
63
|
This installs the `svx` command within the virtual environment. Make sure to activate the environment before running `svx`.
|
|
@@ -86,7 +84,7 @@ To get started quickly with SuperVoxtral:
|
|
|
86
84
|
```
|
|
87
85
|
|
|
88
86
|
3. Launch the GUI: `svx record --gui`
|
|
89
|
-
This opens the minimal GUI, starts recording immediately; click 'Transcribe' for pure transcription (no prompt) or '
|
|
87
|
+
This opens the minimal GUI, starts recording immediately; click 'Transcribe' for pure transcription (no prompt) or a button for each configured prompt (e.g., 'Default', 'Mail', 'Translate') for prompted transcription using the selected prompt; --transcribe ignored with warning (results copied to clipboard).
|
|
90
88
|
|
|
91
89
|
### macOS Shortcuts Integration
|
|
92
90
|
|
|
@@ -173,13 +171,18 @@ copy = true
|
|
|
173
171
|
# Log level: "DEBUG" | "INFO" | "WARNING" | "ERROR"
|
|
174
172
|
log_level = "INFO"
|
|
175
173
|
|
|
176
|
-
[prompt]
|
|
174
|
+
[prompt.default]
|
|
177
175
|
# Default user prompt source:
|
|
178
176
|
# - Option 1: Use a file (recommended)
|
|
179
177
|
file = "~/.config/supervoxtral/prompt/user.md"
|
|
180
178
|
#
|
|
181
179
|
# - Option 2: Inline prompt (less recommended for long text)
|
|
182
180
|
# text = "Please transcribe the audio and provide a concise summary in French."
|
|
181
|
+
|
|
182
|
+
[prompt.test]
|
|
183
|
+
# Example additional prompt
|
|
184
|
+
# file = "/path/to/another_prompt.md"
|
|
185
|
+
# text = "Summarize the meeting in bullet points."
|
|
183
186
|
```
|
|
184
187
|
|
|
185
188
|
**Configuration is centralized via a structured `Config` object loaded from your user configuration file (`config.toml`). CLI arguments override select values (e.g., prompt, log level), but most defaults (provider, model, keep flags) come from `config.toml`. No environment variables are used for API keys or settings.**
|
|
@@ -221,13 +224,17 @@ svx record [OPTIONS]
|
|
|
221
224
|
- Interactive mode: recording starts immediately; click 'Transcribe' (pure transcription, no prompt) or 'Prompt' (with resolved prompt); --transcribe ignored with warning. GUI respects config.toml and CLI flags (e.g., `--gui --save-all`).
|
|
222
225
|
|
|
223
226
|
**Prompt Resolution Priority** (for non-transcribe mode):
|
|
227
|
+
By default in CLI, uses the 'default' prompt from config.toml [prompt.default]. For overrides:
|
|
224
228
|
1. CLI `--user-prompt` or `--user-prompt-file`
|
|
225
|
-
2.
|
|
226
|
-
3.
|
|
227
|
-
4.
|
|
229
|
+
2. Specified prompt key (future: via --prompt-key; currently implicit 'default')
|
|
230
|
+
3. config.toml [prompt.default] (text or file)
|
|
231
|
+
4. User prompt file (user.md in config dir)
|
|
232
|
+
5. Fallback: "What's in this audio?"
|
|
228
233
|
|
|
229
234
|
## Changelog
|
|
230
235
|
|
|
236
|
+
- 0.1.4: Support for multiple prompts in config.toml with dynamic GUI buttons for each prompt key
|
|
237
|
+
- 0.1.3: Minor style update
|
|
231
238
|
- 0.1.2: Interactive mode in GUI (choose transcribe / prompt / cancel while recording)
|
|
232
239
|
- 0.1.1: Minor updates to default config and default prompt
|
|
233
240
|
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "supervoxtral"
|
|
7
|
-
version = "0.1.
|
|
7
|
+
version = "0.1.4"
|
|
8
8
|
description = "CLI/GUI audio recorder and transcription client using Mistral Voxtral (chat with audio and transcription)."
|
|
9
9
|
requires-python = ">=3.11"
|
|
10
10
|
license = { text = "MIT" }
|
|
Binary file
|
|
@@ -72,7 +72,7 @@ def config_show() -> None:
|
|
|
72
72
|
user_prompt_file = cfg.user_prompt_dir / "user.md"
|
|
73
73
|
|
|
74
74
|
defaults_section = asdict(cfg.defaults)
|
|
75
|
-
prompt_section = asdict(cfg.prompt)
|
|
75
|
+
prompt_section = {k: asdict(e) for k, e in cfg.prompt.prompts.items()}
|
|
76
76
|
|
|
77
77
|
# Resolve prompt source (same logic as record command, but read-only)
|
|
78
78
|
resolved_prompt = cfg.resolve_prompt(None, None)
|
|
@@ -242,13 +242,15 @@ def init_user_config(force: bool = False, prompt_file: Path | None = None) -> Pa
|
|
|
242
242
|
"copy = true\n\n"
|
|
243
243
|
'# Log level: "DEBUG" | "INFO" | "WARNING" | "ERROR"\n'
|
|
244
244
|
'log_level = "INFO"\n\n'
|
|
245
|
-
"[prompt]\n"
|
|
245
|
+
"[prompt.default]\n"
|
|
246
246
|
"# Default user prompt source:\n"
|
|
247
247
|
"# - Option 1: Use a file (recommended)\n"
|
|
248
248
|
f'file = "{str(prompt_file)}"\n'
|
|
249
249
|
"#\n"
|
|
250
250
|
"# - Option 2: Inline prompt (less recommended for long text)\n"
|
|
251
251
|
'# text = "Please transcribe the audio and provide a concise summary in French."\n'
|
|
252
|
+
"#\n"
|
|
253
|
+
"# For multiple prompts in future, add [prompt.other] sections.\n"
|
|
252
254
|
)
|
|
253
255
|
|
|
254
256
|
if not USER_CONFIG_FILE.exists() or force:
|
|
@@ -282,11 +284,16 @@ class DefaultsConfig:
|
|
|
282
284
|
|
|
283
285
|
|
|
284
286
|
@dataclass
|
|
285
|
-
class
|
|
287
|
+
class PromptEntry:
|
|
286
288
|
text: str | None = None
|
|
287
289
|
file: str | None = None
|
|
288
290
|
|
|
289
291
|
|
|
292
|
+
@dataclass
|
|
293
|
+
class PromptConfig:
|
|
294
|
+
prompts: dict[str, PromptEntry] = field(default_factory=lambda: {"default": PromptEntry()})
|
|
295
|
+
|
|
296
|
+
|
|
290
297
|
@dataclass
|
|
291
298
|
class Config:
|
|
292
299
|
providers: dict[str, ProviderConfig] = field(default_factory=dict)
|
|
@@ -356,11 +363,39 @@ class Config:
|
|
|
356
363
|
providers_data[name] = ProviderConfig(api_key=api_key)
|
|
357
364
|
# Prompt
|
|
358
365
|
prompt_raw = user_config.get("prompt", {})
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
366
|
+
prompts_data: dict[str, PromptEntry] = {}
|
|
367
|
+
if isinstance(prompt_raw, dict):
|
|
368
|
+
if any(k in prompt_raw for k in ["text", "file"]): # old flat style
|
|
369
|
+
logging.warning(
|
|
370
|
+
"Old [prompt] format detected in %s; "
|
|
371
|
+
"please migrate to [prompt.default] manually.",
|
|
372
|
+
USER_CONFIG_FILE,
|
|
373
|
+
)
|
|
374
|
+
entry = PromptEntry(
|
|
375
|
+
text=prompt_raw.get("text")
|
|
376
|
+
if isinstance(prompt_raw.get("text"), str)
|
|
377
|
+
else None,
|
|
378
|
+
file=prompt_raw.get("file")
|
|
379
|
+
if isinstance(prompt_raw.get("file"), str)
|
|
380
|
+
else None,
|
|
381
|
+
)
|
|
382
|
+
prompts_data["default"] = entry
|
|
383
|
+
else: # new nested style
|
|
384
|
+
for key, entry_raw in prompt_raw.items():
|
|
385
|
+
if isinstance(entry_raw, dict):
|
|
386
|
+
entry = PromptEntry(
|
|
387
|
+
text=entry_raw.get("text")
|
|
388
|
+
if isinstance(entry_raw.get("text"), str)
|
|
389
|
+
else None,
|
|
390
|
+
file=entry_raw.get("file")
|
|
391
|
+
if isinstance(entry_raw.get("file"), str)
|
|
392
|
+
else None,
|
|
393
|
+
)
|
|
394
|
+
prompts_data[key] = entry
|
|
395
|
+
# Ensure "default" always exists
|
|
396
|
+
if "default" not in prompts_data:
|
|
397
|
+
prompts_data["default"] = PromptEntry()
|
|
398
|
+
prompt = PromptConfig(prompts=prompts_data)
|
|
364
399
|
data = {
|
|
365
400
|
"defaults": defaults,
|
|
366
401
|
"providers": providers_data,
|
|
@@ -376,7 +411,7 @@ class Config:
|
|
|
376
411
|
def resolve_prompt(self, inline: str | None = None, file_path: Path | None = None) -> str:
|
|
377
412
|
from svx.core.prompt import resolve_user_prompt
|
|
378
413
|
|
|
379
|
-
return resolve_user_prompt(self, inline, file_path, self.user_prompt_dir)
|
|
414
|
+
return resolve_user_prompt(self, inline, file_path, self.user_prompt_dir, key="default")
|
|
380
415
|
|
|
381
416
|
def get_provider_config(self, name: str) -> dict[str, Any]:
|
|
382
417
|
return asdict(self.providers.get(name, ProviderConfig()))
|
|
@@ -14,7 +14,7 @@ from __future__ import annotations
|
|
|
14
14
|
import logging
|
|
15
15
|
from pathlib import Path
|
|
16
16
|
|
|
17
|
-
from .config import USER_PROMPT_DIR, Config
|
|
17
|
+
from .config import USER_PROMPT_DIR, Config, PromptEntry
|
|
18
18
|
|
|
19
19
|
__all__ = [
|
|
20
20
|
"read_text_file",
|
|
@@ -68,16 +68,16 @@ def resolve_user_prompt(
|
|
|
68
68
|
inline: str | None = None,
|
|
69
69
|
file: Path | None = None,
|
|
70
70
|
user_prompt_dir: Path | None = None,
|
|
71
|
+
key: str | None = None,
|
|
71
72
|
) -> str:
|
|
72
73
|
"""
|
|
73
74
|
Resolve the effective user prompt from multiple sources, by priority:
|
|
74
75
|
|
|
75
76
|
1) inline text (CLI --user-prompt)
|
|
76
77
|
2) explicit file (CLI --user-prompt-file)
|
|
77
|
-
3) user config
|
|
78
|
-
4) user
|
|
79
|
-
5)
|
|
80
|
-
6) literal fallback: "What's in this audio?"
|
|
78
|
+
3) user config prompt for key (cfg.prompt.prompts[key or "default"])
|
|
79
|
+
4) user prompt dir file (user_prompt_dir / 'user.md')
|
|
80
|
+
5) literal fallback: "What's in this audio?"
|
|
81
81
|
|
|
82
82
|
Returns the first non-empty string after stripping.
|
|
83
83
|
"""
|
|
@@ -94,17 +94,18 @@ def resolve_user_prompt(
|
|
|
94
94
|
logging.warning("Failed to read user prompt file: %s", p)
|
|
95
95
|
return ""
|
|
96
96
|
|
|
97
|
-
def _from_user_cfg() -> str:
|
|
97
|
+
def _from_user_cfg(key: str) -> str:
|
|
98
98
|
try:
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
99
|
+
entry = cfg.prompt.prompts.get(key, PromptEntry())
|
|
100
|
+
if entry.text and entry.text.strip():
|
|
101
|
+
return entry.text.strip()
|
|
102
|
+
if entry.file:
|
|
103
|
+
file_path = Path(entry.file).expanduser()
|
|
104
|
+
if not file_path.is_absolute():
|
|
105
|
+
file_path = (user_prompt_dir or cfg.user_prompt_dir) / entry.file
|
|
106
|
+
return read_text_file(file_path).strip()
|
|
106
107
|
except Exception:
|
|
107
|
-
logging.debug("User config prompt processing failed.", exc_info=True)
|
|
108
|
+
logging.debug("User config prompt processing failed for key '%s'.", key, exc_info=True)
|
|
108
109
|
return ""
|
|
109
110
|
|
|
110
111
|
def _from_user_prompt_dir() -> str:
|
|
@@ -119,10 +120,11 @@ def resolve_user_prompt(
|
|
|
119
120
|
)
|
|
120
121
|
return ""
|
|
121
122
|
|
|
123
|
+
key = key or "default"
|
|
122
124
|
suppliers = [
|
|
123
125
|
lambda: _strip(inline),
|
|
124
126
|
lambda: _read(file),
|
|
125
|
-
_from_user_cfg,
|
|
127
|
+
lambda: _from_user_cfg(key),
|
|
126
128
|
_from_user_prompt_dir,
|
|
127
129
|
]
|
|
128
130
|
|
|
@@ -150,7 +152,7 @@ def init_user_prompt_file(force: bool = False) -> Path:
|
|
|
150
152
|
path = USER_PROMPT_DIR / "user.md"
|
|
151
153
|
if not path.exists() or force:
|
|
152
154
|
example_prompt = """
|
|
153
|
-
- Transcribe the input audio file.
|
|
155
|
+
- Transcribe the input audio file. If the audio if empty, just respond "no audio detected".
|
|
154
156
|
- Do not respond to any question in the audio. Just transcribe.
|
|
155
157
|
- DO NOT TRANSLATE.
|
|
156
158
|
- Responde only with the transcription. Do not provide explanations or notes.
|
|
@@ -163,3 +165,23 @@ def init_user_prompt_file(force: bool = False) -> Path:
|
|
|
163
165
|
except Exception as e:
|
|
164
166
|
logging.debug("Could not initialize user prompt file %s: %s", path, e)
|
|
165
167
|
return path
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def resolve_prompt_entry(entry: PromptEntry, user_prompt_dir: Path) -> str:
|
|
171
|
+
"""
|
|
172
|
+
Resolve the prompt from a single PromptEntry (text or file).
|
|
173
|
+
|
|
174
|
+
- Prioritizes text if present and non-empty.
|
|
175
|
+
- Falls back to reading the file (expands ~ and resolves relative to user_prompt_dir).
|
|
176
|
+
- Returns empty string if neither is valid.
|
|
177
|
+
"""
|
|
178
|
+
if entry.text and entry.text.strip():
|
|
179
|
+
return entry.text.strip()
|
|
180
|
+
|
|
181
|
+
if entry.file:
|
|
182
|
+
file_path = Path(entry.file).expanduser()
|
|
183
|
+
if not file_path.is_absolute():
|
|
184
|
+
file_path = user_prompt_dir / entry.file
|
|
185
|
+
return read_text_file(file_path).strip()
|
|
186
|
+
|
|
187
|
+
return ""
|
|
@@ -37,6 +37,7 @@ from PySide6.QtWidgets import (
|
|
|
37
37
|
import svx.core.config as config
|
|
38
38
|
from svx.core.config import Config
|
|
39
39
|
from svx.core.pipeline import RecordingPipeline
|
|
40
|
+
from svx.core.prompt import resolve_user_prompt
|
|
40
41
|
|
|
41
42
|
__all__ = ["RecorderWindow", "run_gui"]
|
|
42
43
|
|
|
@@ -65,32 +66,32 @@ QLabel#info_label {
|
|
|
65
66
|
|
|
66
67
|
/* Stop button */
|
|
67
68
|
QPushButton {
|
|
68
|
-
background-color: #
|
|
69
|
+
background-color: #1e40af;
|
|
69
70
|
color: #ffffff;
|
|
70
71
|
border: none;
|
|
71
|
-
border-radius:
|
|
72
|
-
padding: 8px
|
|
72
|
+
border-radius: 2px;
|
|
73
|
+
padding: 4px 8px;
|
|
73
74
|
margin: 6px;
|
|
74
|
-
min-width:
|
|
75
|
+
min-width: 60px;
|
|
75
76
|
}
|
|
76
77
|
QPushButton:disabled {
|
|
77
|
-
background-color: #
|
|
78
|
-
color: #
|
|
78
|
+
background-color: #374151;
|
|
79
|
+
color: #9ca3af;
|
|
79
80
|
}
|
|
80
81
|
QPushButton:hover {
|
|
81
|
-
background-color: #
|
|
82
|
+
background-color: #1d4ed8;
|
|
82
83
|
}
|
|
83
84
|
|
|
84
85
|
/* Cancel button */
|
|
85
86
|
QPushButton#cancel_btn {
|
|
86
|
-
background-color: #
|
|
87
|
+
background-color: #b91c1c;
|
|
87
88
|
}
|
|
88
89
|
QPushButton#cancel_btn:hover {
|
|
89
|
-
background-color: #
|
|
90
|
+
background-color: #ef4444;
|
|
90
91
|
}
|
|
91
92
|
QPushButton#cancel_btn:disabled {
|
|
92
|
-
background-color: #
|
|
93
|
-
color: #
|
|
93
|
+
background-color: #4b5563;
|
|
94
|
+
color: #9ca3af;
|
|
94
95
|
}
|
|
95
96
|
|
|
96
97
|
/* Small window border effect (subtle) */
|
|
@@ -239,11 +240,11 @@ class RecorderWorker(QObject):
|
|
|
239
240
|
self.cancel_requested = True
|
|
240
241
|
self._stop_event.set()
|
|
241
242
|
|
|
242
|
-
def _resolve_user_prompt(self) -> str:
|
|
243
|
+
def _resolve_user_prompt(self, key: str) -> str:
|
|
243
244
|
"""
|
|
244
|
-
Determine the final user prompt using the shared resolver.
|
|
245
|
+
Determine the final user prompt using the shared resolver for the given key.
|
|
245
246
|
"""
|
|
246
|
-
return self.cfg
|
|
247
|
+
return resolve_user_prompt(self.cfg, None, None, self.cfg.user_prompt_dir, key=key)
|
|
247
248
|
|
|
248
249
|
def run(self) -> None:
|
|
249
250
|
"""
|
|
@@ -275,7 +276,7 @@ class RecorderWorker(QObject):
|
|
|
275
276
|
while self.mode is None:
|
|
276
277
|
time.sleep(0.05)
|
|
277
278
|
transcribe_mode = self.mode == "transcribe"
|
|
278
|
-
user_prompt = None if transcribe_mode else self._resolve_user_prompt()
|
|
279
|
+
user_prompt = None if transcribe_mode else self._resolve_user_prompt(self.mode)
|
|
279
280
|
result = pipeline.process(wav_path, duration, transcribe_mode, user_prompt)
|
|
280
281
|
keep_audio = self.save_all or self.cfg.defaults.keep_audio_files
|
|
281
282
|
pipeline.clean(wav_path, result["paths"], keep_audio)
|
|
@@ -310,6 +311,7 @@ class RecorderWindow(QWidget):
|
|
|
310
311
|
self.user_prompt_file = user_prompt_file
|
|
311
312
|
self.save_all = save_all
|
|
312
313
|
self.outfile_prefix = outfile_prefix
|
|
314
|
+
self.prompt_keys = sorted(self.cfg.prompt.prompts.keys())
|
|
313
315
|
|
|
314
316
|
# Background worker (create early for signal connections)
|
|
315
317
|
self._worker = RecorderWorker(
|
|
@@ -381,12 +383,15 @@ class RecorderWindow(QWidget):
|
|
|
381
383
|
button_layout.addStretch()
|
|
382
384
|
self._transcribe_btn = QPushButton("Transcribe")
|
|
383
385
|
self._transcribe_btn.setToolTip("Stop and transcribe without prompt")
|
|
384
|
-
self._transcribe_btn.clicked.connect(lambda: self.
|
|
386
|
+
self._transcribe_btn.clicked.connect(lambda: self._on_mode_selected("transcribe"))
|
|
385
387
|
button_layout.addWidget(self._transcribe_btn)
|
|
386
|
-
self.
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
388
|
+
self._prompt_buttons: dict[str, QPushButton] = {}
|
|
389
|
+
for key in self.prompt_keys:
|
|
390
|
+
btn = QPushButton(key.capitalize())
|
|
391
|
+
btn.setToolTip(f"Stop and transcribe with '{key}' prompt")
|
|
392
|
+
btn.clicked.connect(lambda k=key: self._on_mode_selected(k))
|
|
393
|
+
self._prompt_buttons[key] = btn
|
|
394
|
+
button_layout.addWidget(btn)
|
|
390
395
|
self._cancel_btn = QPushButton("Cancel")
|
|
391
396
|
self._cancel_btn.setObjectName("cancel_btn")
|
|
392
397
|
self._cancel_btn.setToolTip("Stop recording and quit without processing")
|
|
@@ -397,6 +402,8 @@ class RecorderWindow(QWidget):
|
|
|
397
402
|
button_widget.setLayout(button_layout)
|
|
398
403
|
layout.addWidget(button_widget, 0, Qt.AlignmentFlag.AlignCenter)
|
|
399
404
|
|
|
405
|
+
self._action_buttons = [self._transcribe_btn] + list(self._prompt_buttons.values())
|
|
406
|
+
|
|
400
407
|
# Keyboard shortcut: Esc to stop
|
|
401
408
|
stop_action = QAction(self)
|
|
402
409
|
stop_action.setShortcut(QKeySequence.StandardKey.Cancel) # Esc
|
|
@@ -456,17 +463,17 @@ class RecorderWindow(QWidget):
|
|
|
456
463
|
self._worker.cancel()
|
|
457
464
|
super().closeEvent(event)
|
|
458
465
|
|
|
459
|
-
def
|
|
460
|
-
self.
|
|
461
|
-
|
|
466
|
+
def _on_mode_selected(self, mode: str) -> None:
|
|
467
|
+
for btn in self._action_buttons:
|
|
468
|
+
btn.setEnabled(False)
|
|
462
469
|
self._cancel_btn.setEnabled(False)
|
|
463
470
|
self._status_label.setText("Stopping and processing...")
|
|
464
471
|
self._worker.set_mode(mode)
|
|
465
472
|
self._worker.stop()
|
|
466
473
|
|
|
467
474
|
def _on_cancel_clicked(self) -> None:
|
|
468
|
-
self.
|
|
469
|
-
|
|
475
|
+
for btn in self._action_buttons:
|
|
476
|
+
btn.setEnabled(False)
|
|
470
477
|
self._cancel_btn.setEnabled(False)
|
|
471
478
|
self._status_label.setText("Canceling...")
|
|
472
479
|
self._worker.cancel()
|
|
Binary file
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|