supervoxtral 0.1.4__tar.gz → 0.1.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. {supervoxtral-0.1.4 → supervoxtral-0.1.5}/PKG-INFO +1 -1
  2. {supervoxtral-0.1.4 → supervoxtral-0.1.5}/README.md +3 -1
  3. {supervoxtral-0.1.4 → supervoxtral-0.1.5}/pyproject.toml +1 -1
  4. {supervoxtral-0.1.4 → supervoxtral-0.1.5}/svx/core/pipeline.py +2 -0
  5. {supervoxtral-0.1.4 → supervoxtral-0.1.5}/svx/core/prompt.py +32 -8
  6. {supervoxtral-0.1.4 → supervoxtral-0.1.5}/svx/ui/qt_app.py +34 -3
  7. {supervoxtral-0.1.4 → supervoxtral-0.1.5}/.gitignore +0 -0
  8. {supervoxtral-0.1.4 → supervoxtral-0.1.5}/AGENTS.md +0 -0
  9. {supervoxtral-0.1.4 → supervoxtral-0.1.5}/LICENSE +0 -0
  10. {supervoxtral-0.1.4 → supervoxtral-0.1.5}/logs/.gitkeep +0 -0
  11. {supervoxtral-0.1.4 → supervoxtral-0.1.5}/macos-shortcut.png +0 -0
  12. {supervoxtral-0.1.4 → supervoxtral-0.1.5}/notes.md +0 -0
  13. {supervoxtral-0.1.4 → supervoxtral-0.1.5}/prompt/.gitkeep +0 -0
  14. {supervoxtral-0.1.4 → supervoxtral-0.1.5}/recordings/.gitkeep +0 -0
  15. {supervoxtral-0.1.4 → supervoxtral-0.1.5}/supervoxtral.gif +0 -0
  16. {supervoxtral-0.1.4 → supervoxtral-0.1.5}/svx/__init__.py +0 -0
  17. {supervoxtral-0.1.4 → supervoxtral-0.1.5}/svx/cli.py +0 -0
  18. {supervoxtral-0.1.4 → supervoxtral-0.1.5}/svx/core/__init__.py +0 -0
  19. {supervoxtral-0.1.4 → supervoxtral-0.1.5}/svx/core/audio.py +0 -0
  20. {supervoxtral-0.1.4 → supervoxtral-0.1.5}/svx/core/clipboard.py +0 -0
  21. {supervoxtral-0.1.4 → supervoxtral-0.1.5}/svx/core/config.py +0 -0
  22. {supervoxtral-0.1.4 → supervoxtral-0.1.5}/svx/core/storage.py +0 -0
  23. {supervoxtral-0.1.4 → supervoxtral-0.1.5}/svx/providers/__init__.py +0 -0
  24. {supervoxtral-0.1.4 → supervoxtral-0.1.5}/svx/providers/base.py +0 -0
  25. {supervoxtral-0.1.4 → supervoxtral-0.1.5}/svx/providers/mistral.py +0 -0
  26. {supervoxtral-0.1.4 → supervoxtral-0.1.5}/transcripts/.gitkeep +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: supervoxtral
3
- Version: 0.1.4
3
+ Version: 0.1.5
4
4
  Summary: CLI/GUI audio recorder and transcription client using Mistral Voxtral (chat with audio and transcription).
5
5
  License: MIT
6
6
  License-File: LICENSE
@@ -4,7 +4,7 @@
4
4
 
5
5
  SuperVoxtral is a lightweight Python CLI/GUI utility for recording microphone audio and integrate with Mistral's Voxtral APIs for transcription or audio-enabled chat.
6
6
 
7
- Voxtral models, such as `voxtral-mini-latest` and `voxtral-small-latest`, deliver fast inference times, high transcription accuracy across languages and accents, and minimal API costs. In contrast to OpenAI's Whisper, which performs only standalone transcription, Voxtral supports two modes: pure transcription via a dedicated endpoint (no prompts needed) or chat mode, where audio input combines with text prompts for refined outputs—like error correction or contextual summarization—without invoking a separate LLM.
7
+ Voxtral models, such as `voxtral-mini-latest` and `voxtral-small-latest`, deliver fast inference times, high transcription accuracy across languages and accents, and minimal API costs. Voxtral supports two modes: pure transcription via a dedicated endpoint (no prompts needed) or chat mode, where audio input combines with text prompts for refined outputs—like error correction or contextual summarization—without invoking a separate LLM.
8
8
 
9
9
  For instance, use a prompt like: "_Transcribe this audio precisely and remove all minor speech hesitations: "um", "uh", "er", "euh", "ben", etc._"
10
10
 
@@ -26,6 +26,7 @@ The package is available on PyPI. We recommend using `uv` (a fast Python package
26
26
  - For GUI support (includes PySide6):
27
27
  ```
28
28
  uv tool install "supervoxtral[gui]"
29
+ # to update: uv tool update "supervoxtral[gui]"
29
30
  ```
30
31
 
31
32
  - For core CLI only functionality:
@@ -233,6 +234,7 @@ By default in CLI, uses the 'default' prompt from config.toml [prompt.default].
233
234
 
234
235
  ## Changelog
235
236
 
237
+ - 0.1.5: Fix bug on prompt selecting
236
238
  - 0.1.4: Support for multiple prompts in config.toml with dynamic GUI buttons for each prompt key
237
239
  - 0.1.3: Minor style update
238
240
  - 0.1.2: Interactive mode in GUI (choose transcribe / prompt / cancel while recording)
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "supervoxtral"
7
- version = "0.1.4"
7
+ version = "0.1.5"
8
8
  description = "CLI/GUI audio recorder and transcription client using Mistral Voxtral (chat with audio and transcription)."
9
9
  requires-python = ">=3.11"
10
10
  license = { text = "MIT" }
@@ -180,6 +180,8 @@ class RecordingPipeline:
180
180
  else:
181
181
  self._status("Transcribe mode activated: no prompt used.")
182
182
 
183
+ logging.debug(f"Applied prompt: {final_user_prompt or 'None (transcribe mode)'}")
184
+
183
185
  paths: dict[str, Path | None] = {"wav": wav_path}
184
186
 
185
187
  # Convert if needed
@@ -12,6 +12,7 @@ Intended to be small and dependency-light so it can be imported broadly.
12
12
  from __future__ import annotations
13
13
 
14
14
  import logging
15
+ from collections.abc import Callable
15
16
  from pathlib import Path
16
17
 
17
18
  from .config import USER_PROMPT_DIR, Config, PromptEntry
@@ -121,22 +122,45 @@ def resolve_user_prompt(
121
122
  return ""
122
123
 
123
124
  key = key or "default"
124
- suppliers = [
125
- lambda: _strip(inline),
126
- lambda: _read(file),
127
- lambda: _from_user_cfg(key),
128
- _from_user_prompt_dir,
125
+
126
+ # Suppliers annotated with a name for tracing which one returned the prompt.
127
+ named_suppliers: list[tuple[str, Callable[[], str]]] = [
128
+ ("inline", lambda: _strip(inline)),
129
+ ("file", lambda: _read(file)),
130
+ (f"prompt_config[{key}]", lambda: _from_user_cfg(key)),
131
+ ("user_prompt_dir/user.md", _from_user_prompt_dir),
129
132
  ]
130
133
 
131
- for supplier in suppliers:
134
+ for name, supplier in named_suppliers:
132
135
  try:
133
136
  val = supplier()
134
137
  if val:
138
+ # Log which supplier provided the prompt and a short snippet for debugging.
139
+ try:
140
+ if len(val) > 200:
141
+ snippet = val[:200] + "..."
142
+ else:
143
+ snippet = val
144
+ logging.info(
145
+ "resolve_user_prompt: supplier '%s' provided prompt snippet: %s",
146
+ name,
147
+ snippet,
148
+ )
149
+ except Exception:
150
+ # Ensure logging failures do not change behavior.
151
+ logging.info(
152
+ "resolve_user_prompt: supplier '%s' provided a prompt "
153
+ "(snippet unavailable)",
154
+ name,
155
+ )
135
156
  return val
136
157
  except Exception as e:
137
- logging.debug("Prompt supplier failed: %s", e)
158
+ logging.debug("Prompt supplier '%s' failed: %s", name, e)
138
159
 
139
- return "What's in this audio?"
160
+ # Final fallback
161
+ fallback = "What's in this audio?"
162
+ logging.info("resolve_user_prompt: no supplier provided a prompt, using fallback: %s", fallback)
163
+ return fallback
140
164
 
141
165
 
142
166
  def init_user_prompt_file(force: bool = False) -> Path:
@@ -273,10 +273,39 @@ class RecorderWorker(QObject):
273
273
  self.canceled.emit()
274
274
  return
275
275
  self.status.emit("Processing in progress...")
276
+ # Wait for user to select mode in the GUI
276
277
  while self.mode is None:
277
278
  time.sleep(0.05)
279
+
280
+ # Log the selected mode/key for debugging prompt application
281
+ try:
282
+ logging.info("RecorderWorker: selected mode/key: %s", self.mode)
283
+ except Exception:
284
+ # ensure failures in logging don't break the worker
285
+ pass
286
+
278
287
  transcribe_mode = self.mode == "transcribe"
279
- user_prompt = None if transcribe_mode else self._resolve_user_prompt(self.mode)
288
+ if transcribe_mode:
289
+ user_prompt = None
290
+ else:
291
+ # Resolve the user prompt for the selected key and log a short snippet
292
+ user_prompt = self._resolve_user_prompt(self.mode)
293
+ try:
294
+ if user_prompt:
295
+ snippet = (
296
+ user_prompt[:200] + "..." if len(user_prompt) > 200 else user_prompt
297
+ )
298
+ else:
299
+ snippet = "<EMPTY>"
300
+ logging.info(
301
+ "RecorderWorker: resolved prompt snippet for key '%s': %s",
302
+ self.mode,
303
+ snippet,
304
+ )
305
+ except Exception:
306
+ # avoid breaking the flow on logging errors
307
+ pass
308
+
280
309
  result = pipeline.process(wav_path, duration, transcribe_mode, user_prompt)
281
310
  keep_audio = self.save_all or self.cfg.defaults.keep_audio_files
282
311
  pipeline.clean(wav_path, result["paths"], keep_audio)
@@ -383,13 +412,15 @@ class RecorderWindow(QWidget):
383
412
  button_layout.addStretch()
384
413
  self._transcribe_btn = QPushButton("Transcribe")
385
414
  self._transcribe_btn.setToolTip("Stop and transcribe without prompt")
386
- self._transcribe_btn.clicked.connect(lambda: self._on_mode_selected("transcribe"))
415
+ self._transcribe_btn.clicked.connect(
416
+ lambda checked=False, m="transcribe": self._on_mode_selected(m)
417
+ )
387
418
  button_layout.addWidget(self._transcribe_btn)
388
419
  self._prompt_buttons: dict[str, QPushButton] = {}
389
420
  for key in self.prompt_keys:
390
421
  btn = QPushButton(key.capitalize())
391
422
  btn.setToolTip(f"Stop and transcribe with '{key}' prompt")
392
- btn.clicked.connect(lambda k=key: self._on_mode_selected(k))
423
+ btn.clicked.connect(lambda checked=False, k=key: self._on_mode_selected(k))
393
424
  self._prompt_buttons[key] = btn
394
425
  button_layout.addWidget(btn)
395
426
  self._cancel_btn = QPushButton("Cancel")
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes