supervoxtral 0.1.4__tar.gz → 0.1.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {supervoxtral-0.1.4 → supervoxtral-0.1.5}/PKG-INFO +1 -1
- {supervoxtral-0.1.4 → supervoxtral-0.1.5}/README.md +3 -1
- {supervoxtral-0.1.4 → supervoxtral-0.1.5}/pyproject.toml +1 -1
- {supervoxtral-0.1.4 → supervoxtral-0.1.5}/svx/core/pipeline.py +2 -0
- {supervoxtral-0.1.4 → supervoxtral-0.1.5}/svx/core/prompt.py +32 -8
- {supervoxtral-0.1.4 → supervoxtral-0.1.5}/svx/ui/qt_app.py +34 -3
- {supervoxtral-0.1.4 → supervoxtral-0.1.5}/.gitignore +0 -0
- {supervoxtral-0.1.4 → supervoxtral-0.1.5}/AGENTS.md +0 -0
- {supervoxtral-0.1.4 → supervoxtral-0.1.5}/LICENSE +0 -0
- {supervoxtral-0.1.4 → supervoxtral-0.1.5}/logs/.gitkeep +0 -0
- {supervoxtral-0.1.4 → supervoxtral-0.1.5}/macos-shortcut.png +0 -0
- {supervoxtral-0.1.4 → supervoxtral-0.1.5}/notes.md +0 -0
- {supervoxtral-0.1.4 → supervoxtral-0.1.5}/prompt/.gitkeep +0 -0
- {supervoxtral-0.1.4 → supervoxtral-0.1.5}/recordings/.gitkeep +0 -0
- {supervoxtral-0.1.4 → supervoxtral-0.1.5}/supervoxtral.gif +0 -0
- {supervoxtral-0.1.4 → supervoxtral-0.1.5}/svx/__init__.py +0 -0
- {supervoxtral-0.1.4 → supervoxtral-0.1.5}/svx/cli.py +0 -0
- {supervoxtral-0.1.4 → supervoxtral-0.1.5}/svx/core/__init__.py +0 -0
- {supervoxtral-0.1.4 → supervoxtral-0.1.5}/svx/core/audio.py +0 -0
- {supervoxtral-0.1.4 → supervoxtral-0.1.5}/svx/core/clipboard.py +0 -0
- {supervoxtral-0.1.4 → supervoxtral-0.1.5}/svx/core/config.py +0 -0
- {supervoxtral-0.1.4 → supervoxtral-0.1.5}/svx/core/storage.py +0 -0
- {supervoxtral-0.1.4 → supervoxtral-0.1.5}/svx/providers/__init__.py +0 -0
- {supervoxtral-0.1.4 → supervoxtral-0.1.5}/svx/providers/base.py +0 -0
- {supervoxtral-0.1.4 → supervoxtral-0.1.5}/svx/providers/mistral.py +0 -0
- {supervoxtral-0.1.4 → supervoxtral-0.1.5}/transcripts/.gitkeep +0 -0
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
|
|
5
5
|
SuperVoxtral is a lightweight Python CLI/GUI utility for recording microphone audio and integrate with Mistral's Voxtral APIs for transcription or audio-enabled chat.
|
|
6
6
|
|
|
7
|
-
Voxtral models, such as `voxtral-mini-latest` and `voxtral-small-latest`, deliver fast inference times, high transcription accuracy across languages and accents, and minimal API costs.
|
|
7
|
+
Voxtral models, such as `voxtral-mini-latest` and `voxtral-small-latest`, deliver fast inference times, high transcription accuracy across languages and accents, and minimal API costs. Voxtral supports two modes: pure transcription via a dedicated endpoint (no prompts needed) or chat mode, where audio input combines with text prompts for refined outputs—like error correction or contextual summarization—without invoking a separate LLM.
|
|
8
8
|
|
|
9
9
|
For instance, use a prompt like: "_Transcribe this audio precisely and remove all minor speech hesitations: "um", "uh", "er", "euh", "ben", etc._"
|
|
10
10
|
|
|
@@ -26,6 +26,7 @@ The package is available on PyPI. We recommend using `uv` (a fast Python package
|
|
|
26
26
|
- For GUI support (includes PySide6):
|
|
27
27
|
```
|
|
28
28
|
uv tool install "supervoxtral[gui]"
|
|
29
|
+
# to update: uv tool update "supervoxtral[gui]"
|
|
29
30
|
```
|
|
30
31
|
|
|
31
32
|
- For core CLI only functionality:
|
|
@@ -233,6 +234,7 @@ By default in CLI, uses the 'default' prompt from config.toml [prompt.default].
|
|
|
233
234
|
|
|
234
235
|
## Changelog
|
|
235
236
|
|
|
237
|
+
- 0.1.5: Fix bug on prompt selecting
|
|
236
238
|
- 0.1.4: Support for multiple prompts in config.toml with dynamic GUI buttons for each prompt key
|
|
237
239
|
- 0.1.3: Minor style update
|
|
238
240
|
- 0.1.2: Interactive mode in GUI (choose transcribe / prompt / cancel while recording)
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "supervoxtral"
|
|
7
|
-
version = "0.1.
|
|
7
|
+
version = "0.1.5"
|
|
8
8
|
description = "CLI/GUI audio recorder and transcription client using Mistral Voxtral (chat with audio and transcription)."
|
|
9
9
|
requires-python = ">=3.11"
|
|
10
10
|
license = { text = "MIT" }
|
|
@@ -180,6 +180,8 @@ class RecordingPipeline:
|
|
|
180
180
|
else:
|
|
181
181
|
self._status("Transcribe mode activated: no prompt used.")
|
|
182
182
|
|
|
183
|
+
logging.debug(f"Applied prompt: {final_user_prompt or 'None (transcribe mode)'}")
|
|
184
|
+
|
|
183
185
|
paths: dict[str, Path | None] = {"wav": wav_path}
|
|
184
186
|
|
|
185
187
|
# Convert if needed
|
|
@@ -12,6 +12,7 @@ Intended to be small and dependency-light so it can be imported broadly.
|
|
|
12
12
|
from __future__ import annotations
|
|
13
13
|
|
|
14
14
|
import logging
|
|
15
|
+
from collections.abc import Callable
|
|
15
16
|
from pathlib import Path
|
|
16
17
|
|
|
17
18
|
from .config import USER_PROMPT_DIR, Config, PromptEntry
|
|
@@ -121,22 +122,45 @@ def resolve_user_prompt(
|
|
|
121
122
|
return ""
|
|
122
123
|
|
|
123
124
|
key = key or "default"
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
lambda:
|
|
128
|
-
|
|
125
|
+
|
|
126
|
+
# Suppliers annotated with a name for tracing which one returned the prompt.
|
|
127
|
+
named_suppliers: list[tuple[str, Callable[[], str]]] = [
|
|
128
|
+
("inline", lambda: _strip(inline)),
|
|
129
|
+
("file", lambda: _read(file)),
|
|
130
|
+
(f"prompt_config[{key}]", lambda: _from_user_cfg(key)),
|
|
131
|
+
("user_prompt_dir/user.md", _from_user_prompt_dir),
|
|
129
132
|
]
|
|
130
133
|
|
|
131
|
-
for supplier in
|
|
134
|
+
for name, supplier in named_suppliers:
|
|
132
135
|
try:
|
|
133
136
|
val = supplier()
|
|
134
137
|
if val:
|
|
138
|
+
# Log which supplier provided the prompt and a short snippet for debugging.
|
|
139
|
+
try:
|
|
140
|
+
if len(val) > 200:
|
|
141
|
+
snippet = val[:200] + "..."
|
|
142
|
+
else:
|
|
143
|
+
snippet = val
|
|
144
|
+
logging.info(
|
|
145
|
+
"resolve_user_prompt: supplier '%s' provided prompt snippet: %s",
|
|
146
|
+
name,
|
|
147
|
+
snippet,
|
|
148
|
+
)
|
|
149
|
+
except Exception:
|
|
150
|
+
# Ensure logging failures do not change behavior.
|
|
151
|
+
logging.info(
|
|
152
|
+
"resolve_user_prompt: supplier '%s' provided a prompt "
|
|
153
|
+
"(snippet unavailable)",
|
|
154
|
+
name,
|
|
155
|
+
)
|
|
135
156
|
return val
|
|
136
157
|
except Exception as e:
|
|
137
|
-
logging.debug("Prompt supplier failed: %s", e)
|
|
158
|
+
logging.debug("Prompt supplier '%s' failed: %s", name, e)
|
|
138
159
|
|
|
139
|
-
|
|
160
|
+
# Final fallback
|
|
161
|
+
fallback = "What's in this audio?"
|
|
162
|
+
logging.info("resolve_user_prompt: no supplier provided a prompt, using fallback: %s", fallback)
|
|
163
|
+
return fallback
|
|
140
164
|
|
|
141
165
|
|
|
142
166
|
def init_user_prompt_file(force: bool = False) -> Path:
|
|
@@ -273,10 +273,39 @@ class RecorderWorker(QObject):
|
|
|
273
273
|
self.canceled.emit()
|
|
274
274
|
return
|
|
275
275
|
self.status.emit("Processing in progress...")
|
|
276
|
+
# Wait for user to select mode in the GUI
|
|
276
277
|
while self.mode is None:
|
|
277
278
|
time.sleep(0.05)
|
|
279
|
+
|
|
280
|
+
# Log the selected mode/key for debugging prompt application
|
|
281
|
+
try:
|
|
282
|
+
logging.info("RecorderWorker: selected mode/key: %s", self.mode)
|
|
283
|
+
except Exception:
|
|
284
|
+
# ensure failures in logging don't break the worker
|
|
285
|
+
pass
|
|
286
|
+
|
|
278
287
|
transcribe_mode = self.mode == "transcribe"
|
|
279
|
-
|
|
288
|
+
if transcribe_mode:
|
|
289
|
+
user_prompt = None
|
|
290
|
+
else:
|
|
291
|
+
# Resolve the user prompt for the selected key and log a short snippet
|
|
292
|
+
user_prompt = self._resolve_user_prompt(self.mode)
|
|
293
|
+
try:
|
|
294
|
+
if user_prompt:
|
|
295
|
+
snippet = (
|
|
296
|
+
user_prompt[:200] + "..." if len(user_prompt) > 200 else user_prompt
|
|
297
|
+
)
|
|
298
|
+
else:
|
|
299
|
+
snippet = "<EMPTY>"
|
|
300
|
+
logging.info(
|
|
301
|
+
"RecorderWorker: resolved prompt snippet for key '%s': %s",
|
|
302
|
+
self.mode,
|
|
303
|
+
snippet,
|
|
304
|
+
)
|
|
305
|
+
except Exception:
|
|
306
|
+
# avoid breaking the flow on logging errors
|
|
307
|
+
pass
|
|
308
|
+
|
|
280
309
|
result = pipeline.process(wav_path, duration, transcribe_mode, user_prompt)
|
|
281
310
|
keep_audio = self.save_all or self.cfg.defaults.keep_audio_files
|
|
282
311
|
pipeline.clean(wav_path, result["paths"], keep_audio)
|
|
@@ -383,13 +412,15 @@ class RecorderWindow(QWidget):
|
|
|
383
412
|
button_layout.addStretch()
|
|
384
413
|
self._transcribe_btn = QPushButton("Transcribe")
|
|
385
414
|
self._transcribe_btn.setToolTip("Stop and transcribe without prompt")
|
|
386
|
-
self._transcribe_btn.clicked.connect(
|
|
415
|
+
self._transcribe_btn.clicked.connect(
|
|
416
|
+
lambda checked=False, m="transcribe": self._on_mode_selected(m)
|
|
417
|
+
)
|
|
387
418
|
button_layout.addWidget(self._transcribe_btn)
|
|
388
419
|
self._prompt_buttons: dict[str, QPushButton] = {}
|
|
389
420
|
for key in self.prompt_keys:
|
|
390
421
|
btn = QPushButton(key.capitalize())
|
|
391
422
|
btn.setToolTip(f"Stop and transcribe with '{key}' prompt")
|
|
392
|
-
btn.clicked.connect(lambda k=key: self._on_mode_selected(k))
|
|
423
|
+
btn.clicked.connect(lambda checked=False, k=key: self._on_mode_selected(k))
|
|
393
424
|
self._prompt_buttons[key] = btn
|
|
394
425
|
button_layout.addWidget(btn)
|
|
395
426
|
self._cancel_btn = QPushButton("Cancel")
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|