supervoxtral 0.1.3__tar.gz → 0.1.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. {supervoxtral-0.1.3 → supervoxtral-0.1.5}/AGENTS.md +4 -4
  2. {supervoxtral-0.1.3 → supervoxtral-0.1.5}/PKG-INFO +1 -1
  3. {supervoxtral-0.1.3 → supervoxtral-0.1.5}/README.md +17 -7
  4. {supervoxtral-0.1.3 → supervoxtral-0.1.5}/pyproject.toml +1 -1
  5. supervoxtral-0.1.5/supervoxtral.gif +0 -0
  6. {supervoxtral-0.1.3 → supervoxtral-0.1.5}/svx/cli.py +1 -1
  7. {supervoxtral-0.1.3 → supervoxtral-0.1.5}/svx/core/config.py +43 -8
  8. {supervoxtral-0.1.3 → supervoxtral-0.1.5}/svx/core/pipeline.py +2 -0
  9. {supervoxtral-0.1.3 → supervoxtral-0.1.5}/svx/core/prompt.py +69 -23
  10. {supervoxtral-0.1.3 → supervoxtral-0.1.5}/svx/ui/qt_app.py +52 -14
  11. supervoxtral-0.1.3/supervoxtral.gif +0 -0
  12. {supervoxtral-0.1.3 → supervoxtral-0.1.5}/.gitignore +0 -0
  13. {supervoxtral-0.1.3 → supervoxtral-0.1.5}/LICENSE +0 -0
  14. {supervoxtral-0.1.3 → supervoxtral-0.1.5}/logs/.gitkeep +0 -0
  15. {supervoxtral-0.1.3 → supervoxtral-0.1.5}/macos-shortcut.png +0 -0
  16. {supervoxtral-0.1.3 → supervoxtral-0.1.5}/notes.md +0 -0
  17. {supervoxtral-0.1.3 → supervoxtral-0.1.5}/prompt/.gitkeep +0 -0
  18. {supervoxtral-0.1.3 → supervoxtral-0.1.5}/recordings/.gitkeep +0 -0
  19. {supervoxtral-0.1.3 → supervoxtral-0.1.5}/svx/__init__.py +0 -0
  20. {supervoxtral-0.1.3 → supervoxtral-0.1.5}/svx/core/__init__.py +0 -0
  21. {supervoxtral-0.1.3 → supervoxtral-0.1.5}/svx/core/audio.py +0 -0
  22. {supervoxtral-0.1.3 → supervoxtral-0.1.5}/svx/core/clipboard.py +0 -0
  23. {supervoxtral-0.1.3 → supervoxtral-0.1.5}/svx/core/storage.py +0 -0
  24. {supervoxtral-0.1.3 → supervoxtral-0.1.5}/svx/providers/__init__.py +0 -0
  25. {supervoxtral-0.1.3 → supervoxtral-0.1.5}/svx/providers/base.py +0 -0
  26. {supervoxtral-0.1.3 → supervoxtral-0.1.5}/svx/providers/mistral.py +0 -0
  27. {supervoxtral-0.1.3 → supervoxtral-0.1.5}/transcripts/.gitkeep +0 -0
@@ -13,7 +13,7 @@ supervoxtral/
13
13
  │ │ ├── audio.py # Recording, ffmpeg detection/conversion
14
14
  │ │ ├── config.py # Structured Config dataclasses, loading, resolution, logging setup
15
15
  │ │ ├── pipeline.py # Centralized RecordingPipeline for CLI/GUI unification
16
- │ │ ├── prompt.py # Prompt resolution (via Config)
16
+ │ │ ├── prompt.py # Prompt resolution (supports multiple prompts via Config dict, key-based)
17
17
  │ │ └── storage.py # Save transcripts and raw JSON (conditional on keep_transcript_files)
18
18
  │ ├── providers/ # API integrations
19
19
  │ │ ├── __init__.py # Provider registry (get_provider with Config support)
@@ -33,10 +33,10 @@ supervoxtral/
33
33
  ## Typical Execution Flow
34
34
 
35
35
  - **Entry**: `svx/cli.py` Typer `record` command parses args (e.g., --prompt, --save-all, --gui, --transcribe).
36
- - **Config & Prompt**: Load `Config` via `Config.load()` (`core/config.py`); if transcribe_mode, skip prompt resolution; else resolve prompt with `cfg.resolve_prompt()` (`core/prompt.py`).
37
- - **Pipeline**: Run `RecordingPipeline` (`core/pipeline.py`): record WAV/stop (`core/audio.py`), optional conversion (ffmpeg), get provider/init (`providers/__init__.py`, e.g., `mistral.py` from `cfg`); if transcribe_mode (CLI only): no prompt, model override to voxtral-mini-latest (with warning if changed), pass transcribe_mode to provider.transcribe; for GUI: --transcribe ignored (warning), recording starts immediately, uses modular record()/process()/clean() with dynamic mode (Transcribe: no prompt, model override; Prompt: resolved prompt); transcribe, conditional save (`core/storage.py` based on `keep_*`/`save_all`), clipboard copy, logging setup.
36
+ - **Config & Prompt**: Load `Config` via `Config.load()` (`core/config.py`); supports dict of prompts in config.toml (e.g., [prompt.default], [prompt.other]); if transcribe_mode, skip prompt resolution; else resolve prompt with `cfg.resolve_prompt(key="default" for CLI, or selected key for GUI)` (`core/prompt.py`).
37
+ - **Pipeline**: Run `RecordingPipeline` (`core/pipeline.py`): record WAV/stop (`core/audio.py`), optional conversion (ffmpeg), get provider/init (`providers/__init__.py`, e.g., `mistral.py` from `cfg`); if transcribe_mode (CLI only): no prompt, model override to voxtral-mini-latest (with warning if changed), pass transcribe_mode to provider.transcribe; for GUI: --transcribe ignored (warning), recording starts immediately, uses modular record()/process()/clean() with dynamic mode (Transcribe: no prompt, model override; Prompt key: resolved prompt for selected key); transcribe, conditional save (`core/storage.py` based on `keep_*`/`save_all`), clipboard copy, logging setup.
38
38
  - **Cleanup**: Temp files auto-deleted (tempfile) if `keep_*=false`; dirs created only if persistence enabled.
39
- - **End**: Return `{"text": str, "raw": dict, "duration": float, "paths": dict}`; CLI prints result, GUI emits progress/updates via callback (buttons: 'Transcribe' for stop/transcribe without prompt; 'Prompt' for stop/use resolved prompt; default 'Prompt' on Esc/close).
39
+ - **End**: Return `{"text": str, "raw": dict, "duration": float, "paths": dict}`; CLI prints result (uses "default" prompt), GUI emits progress/updates via callback (buttons: 'Transcribe' for no prompt; capitalized prompt keys (e.g., 'Default', 'Test') for selected prompt; 'Cancel'; Esc/close cancels).
40
40
 
41
41
  ## Build & test
42
42
  ```bash
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: supervoxtral
3
- Version: 0.1.3
3
+ Version: 0.1.5
4
4
  Summary: CLI/GUI audio recorder and transcription client using Mistral Voxtral (chat with audio and transcription).
5
5
  License: MIT
6
6
  License-File: LICENSE
@@ -4,7 +4,7 @@
4
4
 
5
5
  SuperVoxtral is a lightweight Python CLI/GUI utility for recording microphone audio and integrate with Mistral's Voxtral APIs for transcription or audio-enabled chat.
6
6
 
7
- Voxtral models, such as `voxtral-mini-latest` and `voxtral-small-latest`, deliver fast inference times, high transcription accuracy across languages and accents, and minimal API costs. In contrast to OpenAI's Whisper, which performs only standalone transcription, Voxtral supports two modes: pure transcription via a dedicated endpoint (no prompts needed) or chat mode, where audio input combines with text prompts for refined outputs—like error correction or contextual summarization—without invoking a separate LLM.
7
+ Voxtral models, such as `voxtral-mini-latest` and `voxtral-small-latest`, deliver fast inference times, high transcription accuracy across languages and accents, and minimal API costs. Voxtral supports two modes: pure transcription via a dedicated endpoint (no prompts needed) or chat mode, where audio input combines with text prompts for refined outputs—like error correction or contextual summarization—without invoking a separate LLM.
8
8
 
9
9
  For instance, use a prompt like: "_Transcribe this audio precisely and remove all minor speech hesitations: "um", "uh", "er", "euh", "ben", etc._"
10
10
 
@@ -26,6 +26,7 @@ The package is available on PyPI. We recommend using `uv` (a fast Python package
26
26
  - For GUI support (includes PySide6):
27
27
  ```
28
28
  uv tool install "supervoxtral[gui]"
29
+ # to update: uv tool update "supervoxtral[gui]"
29
30
  ```
30
31
 
31
32
  - For core CLI only functionality:
@@ -84,7 +85,7 @@ To get started quickly with SuperVoxtral:
84
85
  ```
85
86
 
86
87
  3. Launch the GUI: `svx record --gui`
87
- This opens the minimal GUI, starts recording immediately; click 'Transcribe' for pure transcription (no prompt) or 'Prompt' for prompted transcription (resolved prompt); --transcribe ignored with warning (results copied to clipboard).
88
+ This opens the minimal GUI, starts recording immediately; click 'Transcribe' for pure transcription (no prompt) or a button for each configured prompt (e.g., 'Default', 'Mail', 'Translate') for prompted transcription using the selected prompt; --transcribe ignored with warning (results copied to clipboard).
88
89
 
89
90
  ### macOS Shortcuts Integration
90
91
 
@@ -171,13 +172,18 @@ copy = true
171
172
  # Log level: "DEBUG" | "INFO" | "WARNING" | "ERROR"
172
173
  log_level = "INFO"
173
174
 
174
- [prompt]
175
+ [prompt.default]
175
176
  # Default user prompt source:
176
177
  # - Option 1: Use a file (recommended)
177
178
  file = "~/.config/supervoxtral/prompt/user.md"
178
179
  #
179
180
  # - Option 2: Inline prompt (less recommended for long text)
180
181
  # text = "Please transcribe the audio and provide a concise summary in French."
182
+
183
+ [prompt.test]
184
+ # Example additional prompt
185
+ # file = "/path/to/another_prompt.md"
186
+ # text = "Summarize the meeting in bullet points."
181
187
  ```
182
188
 
183
189
  **Configuration is centralized via a structured `Config` object loaded from your user configuration file (`config.toml`). CLI arguments override select values (e.g., prompt, log level), but most defaults (provider, model, keep flags) come from `config.toml`. No environment variables are used for API keys or settings.**
@@ -219,14 +225,18 @@ svx record [OPTIONS]
219
225
  - Interactive mode: recording starts immediately; click 'Transcribe' (pure transcription, no prompt) or 'Prompt' (with resolved prompt); --transcribe ignored with warning. GUI respects config.toml and CLI flags (e.g., `--gui --save-all`).
220
226
 
221
227
  **Prompt Resolution Priority** (for non-transcribe mode):
228
+ By default in CLI, uses the 'default' prompt from config.toml [prompt.default]. For overrides:
222
229
  1. CLI `--user-prompt` or `--user-prompt-file`
223
- 2. config.toml [prompt] section (text or file)
224
- 3. User prompt file (user.md in config dir)
225
- 4. Fallback: "What's in this audio?"
230
+ 2. Specified prompt key (future: via --prompt-key; currently implicit 'default')
231
+ 3. config.toml [prompt.default] (text or file)
232
+ 4. User prompt file (user.md in config dir)
233
+ 5. Fallback: "What's in this audio?"
226
234
 
227
235
  ## Changelog
228
236
 
229
- - 0.1.3: Minor sytle update
237
+ - 0.1.5: Fix bug on prompt selecting
238
+ - 0.1.4: Support for multiple prompts in config.toml with dynamic GUI buttons for each prompt key
239
+ - 0.1.3: Minor style update
230
240
  - 0.1.2: Interactive mode in GUI (choose transcribe / prompt / cancel while recording)
231
241
  - 0.1.1: Minor updates to default config and default prompt
232
242
 
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "supervoxtral"
7
- version = "0.1.3"
7
+ version = "0.1.5"
8
8
  description = "CLI/GUI audio recorder and transcription client using Mistral Voxtral (chat with audio and transcription)."
9
9
  requires-python = ">=3.11"
10
10
  license = { text = "MIT" }
Binary file
@@ -72,7 +72,7 @@ def config_show() -> None:
72
72
  user_prompt_file = cfg.user_prompt_dir / "user.md"
73
73
 
74
74
  defaults_section = asdict(cfg.defaults)
75
- prompt_section = asdict(cfg.prompt)
75
+ prompt_section = {k: asdict(e) for k, e in cfg.prompt.prompts.items()}
76
76
 
77
77
  # Resolve prompt source (same logic as record command, but read-only)
78
78
  resolved_prompt = cfg.resolve_prompt(None, None)
@@ -242,13 +242,15 @@ def init_user_config(force: bool = False, prompt_file: Path | None = None) -> Pa
242
242
  "copy = true\n\n"
243
243
  '# Log level: "DEBUG" | "INFO" | "WARNING" | "ERROR"\n'
244
244
  'log_level = "INFO"\n\n'
245
- "[prompt]\n"
245
+ "[prompt.default]\n"
246
246
  "# Default user prompt source:\n"
247
247
  "# - Option 1: Use a file (recommended)\n"
248
248
  f'file = "{str(prompt_file)}"\n'
249
249
  "#\n"
250
250
  "# - Option 2: Inline prompt (less recommended for long text)\n"
251
251
  '# text = "Please transcribe the audio and provide a concise summary in French."\n'
252
+ "#\n"
253
+ "# For multiple prompts in future, add [prompt.other] sections.\n"
252
254
  )
253
255
 
254
256
  if not USER_CONFIG_FILE.exists() or force:
@@ -282,11 +284,16 @@ class DefaultsConfig:
282
284
 
283
285
 
284
286
  @dataclass
285
- class PromptConfig:
287
+ class PromptEntry:
286
288
  text: str | None = None
287
289
  file: str | None = None
288
290
 
289
291
 
292
+ @dataclass
293
+ class PromptConfig:
294
+ prompts: dict[str, PromptEntry] = field(default_factory=lambda: {"default": PromptEntry()})
295
+
296
+
290
297
  @dataclass
291
298
  class Config:
292
299
  providers: dict[str, ProviderConfig] = field(default_factory=dict)
@@ -356,11 +363,39 @@ class Config:
356
363
  providers_data[name] = ProviderConfig(api_key=api_key)
357
364
  # Prompt
358
365
  prompt_raw = user_config.get("prompt", {})
359
- prompt_data = {
360
- "text": prompt_raw.get("text") if isinstance(prompt_raw.get("text"), str) else None,
361
- "file": prompt_raw.get("file") if isinstance(prompt_raw.get("file"), str) else None,
362
- }
363
- prompt = PromptConfig(**prompt_data)
366
+ prompts_data: dict[str, PromptEntry] = {}
367
+ if isinstance(prompt_raw, dict):
368
+ if any(k in prompt_raw for k in ["text", "file"]): # old flat style
369
+ logging.warning(
370
+ "Old [prompt] format detected in %s; "
371
+ "please migrate to [prompt.default] manually.",
372
+ USER_CONFIG_FILE,
373
+ )
374
+ entry = PromptEntry(
375
+ text=prompt_raw.get("text")
376
+ if isinstance(prompt_raw.get("text"), str)
377
+ else None,
378
+ file=prompt_raw.get("file")
379
+ if isinstance(prompt_raw.get("file"), str)
380
+ else None,
381
+ )
382
+ prompts_data["default"] = entry
383
+ else: # new nested style
384
+ for key, entry_raw in prompt_raw.items():
385
+ if isinstance(entry_raw, dict):
386
+ entry = PromptEntry(
387
+ text=entry_raw.get("text")
388
+ if isinstance(entry_raw.get("text"), str)
389
+ else None,
390
+ file=entry_raw.get("file")
391
+ if isinstance(entry_raw.get("file"), str)
392
+ else None,
393
+ )
394
+ prompts_data[key] = entry
395
+ # Ensure "default" always exists
396
+ if "default" not in prompts_data:
397
+ prompts_data["default"] = PromptEntry()
398
+ prompt = PromptConfig(prompts=prompts_data)
364
399
  data = {
365
400
  "defaults": defaults,
366
401
  "providers": providers_data,
@@ -376,7 +411,7 @@ class Config:
376
411
  def resolve_prompt(self, inline: str | None = None, file_path: Path | None = None) -> str:
377
412
  from svx.core.prompt import resolve_user_prompt
378
413
 
379
- return resolve_user_prompt(self, inline, file_path, self.user_prompt_dir)
414
+ return resolve_user_prompt(self, inline, file_path, self.user_prompt_dir, key="default")
380
415
 
381
416
  def get_provider_config(self, name: str) -> dict[str, Any]:
382
417
  return asdict(self.providers.get(name, ProviderConfig()))
@@ -180,6 +180,8 @@ class RecordingPipeline:
180
180
  else:
181
181
  self._status("Transcribe mode activated: no prompt used.")
182
182
 
183
+ logging.debug(f"Applied prompt: {final_user_prompt or 'None (transcribe mode)'}")
184
+
183
185
  paths: dict[str, Path | None] = {"wav": wav_path}
184
186
 
185
187
  # Convert if needed
@@ -12,9 +12,10 @@ Intended to be small and dependency-light so it can be imported broadly.
12
12
  from __future__ import annotations
13
13
 
14
14
  import logging
15
+ from collections.abc import Callable
15
16
  from pathlib import Path
16
17
 
17
- from .config import USER_PROMPT_DIR, Config
18
+ from .config import USER_PROMPT_DIR, Config, PromptEntry
18
19
 
19
20
  __all__ = [
20
21
  "read_text_file",
@@ -68,16 +69,16 @@ def resolve_user_prompt(
68
69
  inline: str | None = None,
69
70
  file: Path | None = None,
70
71
  user_prompt_dir: Path | None = None,
72
+ key: str | None = None,
71
73
  ) -> str:
72
74
  """
73
75
  Resolve the effective user prompt from multiple sources, by priority:
74
76
 
75
77
  1) inline text (CLI --user-prompt)
76
78
  2) explicit file (CLI --user-prompt-file)
77
- 3) user config inline text (cfg.prompt.text)
78
- 4) user config file path (cfg.prompt.file)
79
- 5) user prompt dir file (user_prompt_dir / 'user.md')
80
- 6) literal fallback: "What's in this audio?"
79
+ 3) user config prompt for key (cfg.prompt.prompts[key or "default"])
80
+ 4) user prompt dir file (user_prompt_dir / 'user.md')
81
+ 5) literal fallback: "What's in this audio?"
81
82
 
82
83
  Returns the first non-empty string after stripping.
83
84
  """
@@ -94,17 +95,18 @@ def resolve_user_prompt(
94
95
  logging.warning("Failed to read user prompt file: %s", p)
95
96
  return ""
96
97
 
97
- def _from_user_cfg() -> str:
98
+ def _from_user_cfg(key: str) -> str:
98
99
  try:
99
- cfg_prompt = cfg.prompt
100
- cfg_text = cfg_prompt.text
101
- if isinstance(cfg_text, str) and cfg_text.strip():
102
- return cfg_text.strip()
103
- cfg_file = cfg_prompt.file
104
- if isinstance(cfg_file, str) and cfg_file.strip():
105
- return read_text_file(Path(cfg_file).expanduser()).strip()
100
+ entry = cfg.prompt.prompts.get(key, PromptEntry())
101
+ if entry.text and entry.text.strip():
102
+ return entry.text.strip()
103
+ if entry.file:
104
+ file_path = Path(entry.file).expanduser()
105
+ if not file_path.is_absolute():
106
+ file_path = (user_prompt_dir or cfg.user_prompt_dir) / entry.file
107
+ return read_text_file(file_path).strip()
106
108
  except Exception:
107
- logging.debug("User config prompt processing failed.", exc_info=True)
109
+ logging.debug("User config prompt processing failed for key '%s'.", key, exc_info=True)
108
110
  return ""
109
111
 
110
112
  def _from_user_prompt_dir() -> str:
@@ -119,22 +121,46 @@ def resolve_user_prompt(
119
121
  )
120
122
  return ""
121
123
 
122
- suppliers = [
123
- lambda: _strip(inline),
124
- lambda: _read(file),
125
- _from_user_cfg,
126
- _from_user_prompt_dir,
124
+ key = key or "default"
125
+
126
+ # Suppliers annotated with a name for tracing which one returned the prompt.
127
+ named_suppliers: list[tuple[str, Callable[[], str]]] = [
128
+ ("inline", lambda: _strip(inline)),
129
+ ("file", lambda: _read(file)),
130
+ (f"prompt_config[{key}]", lambda: _from_user_cfg(key)),
131
+ ("user_prompt_dir/user.md", _from_user_prompt_dir),
127
132
  ]
128
133
 
129
- for supplier in suppliers:
134
+ for name, supplier in named_suppliers:
130
135
  try:
131
136
  val = supplier()
132
137
  if val:
138
+ # Log which supplier provided the prompt and a short snippet for debugging.
139
+ try:
140
+ if len(val) > 200:
141
+ snippet = val[:200] + "..."
142
+ else:
143
+ snippet = val
144
+ logging.info(
145
+ "resolve_user_prompt: supplier '%s' provided prompt snippet: %s",
146
+ name,
147
+ snippet,
148
+ )
149
+ except Exception:
150
+ # Ensure logging failures do not change behavior.
151
+ logging.info(
152
+ "resolve_user_prompt: supplier '%s' provided a prompt "
153
+ "(snippet unavailable)",
154
+ name,
155
+ )
133
156
  return val
134
157
  except Exception as e:
135
- logging.debug("Prompt supplier failed: %s", e)
158
+ logging.debug("Prompt supplier '%s' failed: %s", name, e)
136
159
 
137
- return "What's in this audio?"
160
+ # Final fallback
161
+ fallback = "What's in this audio?"
162
+ logging.info("resolve_user_prompt: no supplier provided a prompt, using fallback: %s", fallback)
163
+ return fallback
138
164
 
139
165
 
140
166
  def init_user_prompt_file(force: bool = False) -> Path:
@@ -150,7 +176,7 @@ def init_user_prompt_file(force: bool = False) -> Path:
150
176
  path = USER_PROMPT_DIR / "user.md"
151
177
  if not path.exists() or force:
152
178
  example_prompt = """
153
- - Transcribe the input audio file.
179
+ - Transcribe the input audio file. If the audio if empty, just respond "no audio detected".
154
180
  - Do not respond to any question in the audio. Just transcribe.
155
181
  - DO NOT TRANSLATE.
156
182
  - Responde only with the transcription. Do not provide explanations or notes.
@@ -163,3 +189,23 @@ def init_user_prompt_file(force: bool = False) -> Path:
163
189
  except Exception as e:
164
190
  logging.debug("Could not initialize user prompt file %s: %s", path, e)
165
191
  return path
192
+
193
+
194
+ def resolve_prompt_entry(entry: PromptEntry, user_prompt_dir: Path) -> str:
195
+ """
196
+ Resolve the prompt from a single PromptEntry (text or file).
197
+
198
+ - Prioritizes text if present and non-empty.
199
+ - Falls back to reading the file (expands ~ and resolves relative to user_prompt_dir).
200
+ - Returns empty string if neither is valid.
201
+ """
202
+ if entry.text and entry.text.strip():
203
+ return entry.text.strip()
204
+
205
+ if entry.file:
206
+ file_path = Path(entry.file).expanduser()
207
+ if not file_path.is_absolute():
208
+ file_path = user_prompt_dir / entry.file
209
+ return read_text_file(file_path).strip()
210
+
211
+ return ""
@@ -37,6 +37,7 @@ from PySide6.QtWidgets import (
37
37
  import svx.core.config as config
38
38
  from svx.core.config import Config
39
39
  from svx.core.pipeline import RecordingPipeline
40
+ from svx.core.prompt import resolve_user_prompt
40
41
 
41
42
  __all__ = ["RecorderWindow", "run_gui"]
42
43
 
@@ -239,11 +240,11 @@ class RecorderWorker(QObject):
239
240
  self.cancel_requested = True
240
241
  self._stop_event.set()
241
242
 
242
- def _resolve_user_prompt(self) -> str:
243
+ def _resolve_user_prompt(self, key: str) -> str:
243
244
  """
244
- Determine the final user prompt using the shared resolver.
245
+ Determine the final user prompt using the shared resolver for the given key.
245
246
  """
246
- return self.cfg.resolve_prompt(self.user_prompt, self.user_prompt_file)
247
+ return resolve_user_prompt(self.cfg, None, None, self.cfg.user_prompt_dir, key=key)
247
248
 
248
249
  def run(self) -> None:
249
250
  """
@@ -272,10 +273,39 @@ class RecorderWorker(QObject):
272
273
  self.canceled.emit()
273
274
  return
274
275
  self.status.emit("Processing in progress...")
276
+ # Wait for user to select mode in the GUI
275
277
  while self.mode is None:
276
278
  time.sleep(0.05)
279
+
280
+ # Log the selected mode/key for debugging prompt application
281
+ try:
282
+ logging.info("RecorderWorker: selected mode/key: %s", self.mode)
283
+ except Exception:
284
+ # ensure failures in logging don't break the worker
285
+ pass
286
+
277
287
  transcribe_mode = self.mode == "transcribe"
278
- user_prompt = None if transcribe_mode else self._resolve_user_prompt()
288
+ if transcribe_mode:
289
+ user_prompt = None
290
+ else:
291
+ # Resolve the user prompt for the selected key and log a short snippet
292
+ user_prompt = self._resolve_user_prompt(self.mode)
293
+ try:
294
+ if user_prompt:
295
+ snippet = (
296
+ user_prompt[:200] + "..." if len(user_prompt) > 200 else user_prompt
297
+ )
298
+ else:
299
+ snippet = "<EMPTY>"
300
+ logging.info(
301
+ "RecorderWorker: resolved prompt snippet for key '%s': %s",
302
+ self.mode,
303
+ snippet,
304
+ )
305
+ except Exception:
306
+ # avoid breaking the flow on logging errors
307
+ pass
308
+
279
309
  result = pipeline.process(wav_path, duration, transcribe_mode, user_prompt)
280
310
  keep_audio = self.save_all or self.cfg.defaults.keep_audio_files
281
311
  pipeline.clean(wav_path, result["paths"], keep_audio)
@@ -310,6 +340,7 @@ class RecorderWindow(QWidget):
310
340
  self.user_prompt_file = user_prompt_file
311
341
  self.save_all = save_all
312
342
  self.outfile_prefix = outfile_prefix
343
+ self.prompt_keys = sorted(self.cfg.prompt.prompts.keys())
313
344
 
314
345
  # Background worker (create early for signal connections)
315
346
  self._worker = RecorderWorker(
@@ -381,12 +412,17 @@ class RecorderWindow(QWidget):
381
412
  button_layout.addStretch()
382
413
  self._transcribe_btn = QPushButton("Transcribe")
383
414
  self._transcribe_btn.setToolTip("Stop and transcribe without prompt")
384
- self._transcribe_btn.clicked.connect(lambda: self._on_button_clicked("transcribe"))
415
+ self._transcribe_btn.clicked.connect(
416
+ lambda checked=False, m="transcribe": self._on_mode_selected(m)
417
+ )
385
418
  button_layout.addWidget(self._transcribe_btn)
386
- self._prompt_btn = QPushButton("Prompt")
387
- self._prompt_btn.setToolTip("Stop and transcribe with prompt")
388
- self._prompt_btn.clicked.connect(lambda: self._on_button_clicked("prompt"))
389
- button_layout.addWidget(self._prompt_btn)
419
+ self._prompt_buttons: dict[str, QPushButton] = {}
420
+ for key in self.prompt_keys:
421
+ btn = QPushButton(key.capitalize())
422
+ btn.setToolTip(f"Stop and transcribe with '{key}' prompt")
423
+ btn.clicked.connect(lambda checked=False, k=key: self._on_mode_selected(k))
424
+ self._prompt_buttons[key] = btn
425
+ button_layout.addWidget(btn)
390
426
  self._cancel_btn = QPushButton("Cancel")
391
427
  self._cancel_btn.setObjectName("cancel_btn")
392
428
  self._cancel_btn.setToolTip("Stop recording and quit without processing")
@@ -397,6 +433,8 @@ class RecorderWindow(QWidget):
397
433
  button_widget.setLayout(button_layout)
398
434
  layout.addWidget(button_widget, 0, Qt.AlignmentFlag.AlignCenter)
399
435
 
436
+ self._action_buttons = [self._transcribe_btn] + list(self._prompt_buttons.values())
437
+
400
438
  # Keyboard shortcut: Esc to stop
401
439
  stop_action = QAction(self)
402
440
  stop_action.setShortcut(QKeySequence.StandardKey.Cancel) # Esc
@@ -456,17 +494,17 @@ class RecorderWindow(QWidget):
456
494
  self._worker.cancel()
457
495
  super().closeEvent(event)
458
496
 
459
- def _on_button_clicked(self, mode: str) -> None:
460
- self._transcribe_btn.setEnabled(False)
461
- self._prompt_btn.setEnabled(False)
497
+ def _on_mode_selected(self, mode: str) -> None:
498
+ for btn in self._action_buttons:
499
+ btn.setEnabled(False)
462
500
  self._cancel_btn.setEnabled(False)
463
501
  self._status_label.setText("Stopping and processing...")
464
502
  self._worker.set_mode(mode)
465
503
  self._worker.stop()
466
504
 
467
505
  def _on_cancel_clicked(self) -> None:
468
- self._transcribe_btn.setEnabled(False)
469
- self._prompt_btn.setEnabled(False)
506
+ for btn in self._action_buttons:
507
+ btn.setEnabled(False)
470
508
  self._cancel_btn.setEnabled(False)
471
509
  self._status_label.setText("Canceling...")
472
510
  self._worker.cancel()
Binary file
File without changes
File without changes
File without changes
File without changes