PyPI - supervoxtral - Versions diffs - 0.1.4__tar.gz → 0.1.5__tar.gz - Mend

supervoxtral 0.1.4tar.gz → 0.1.5tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

{supervoxtral-0.1.4 → supervoxtral-0.1.5}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: supervoxtral
-Version: 0.1.4
+Version: 0.1.5
 Summary: CLI/GUI audio recorder and transcription client using Mistral Voxtral (chat with audio and transcription).
 License: MIT
 License-File: LICENSE

{supervoxtral-0.1.4 → supervoxtral-0.1.5}/README.md RENAMED Viewed

@@ -4,7 +4,7 @@
 SuperVoxtral is a lightweight Python CLI/GUI utility for recording microphone audio and integrate with Mistral's Voxtral APIs for transcription or audio-enabled chat.
-Voxtral models, such as `voxtral-mini-latest` and `voxtral-small-latest`, deliver fast inference times, high transcription accuracy across languages and accents, and minimal API costs. In contrast to OpenAI's Whisper, which performs only standalone transcription, Voxtral supports two modes: pure transcription via a dedicated endpoint (no prompts needed) or chat mode, where audio input combines with text prompts for refined outputs—like error correction or contextual summarization—without invoking a separate LLM.
+Voxtral models, such as `voxtral-mini-latest` and `voxtral-small-latest`, deliver fast inference times, high transcription accuracy across languages and accents, and minimal API costs. Voxtral supports two modes: pure transcription via a dedicated endpoint (no prompts needed) or chat mode, where audio input combines with text prompts for refined outputs—like error correction or contextual summarization—without invoking a separate LLM.
 For instance, use a prompt like: "_Transcribe this audio precisely and remove all minor speech hesitations: "um", "uh", "er", "euh", "ben", etc._"
@@ -26,6 +26,7 @@ The package is available on PyPI. We recommend using `uv` (a fast Python package
 - For GUI support (includes PySide6):
   ```
   uv tool install "supervoxtral[gui]"
+  # to update: uv tool update "supervoxtral[gui]"
   ```
 - For core CLI only functionality:
@@ -233,6 +234,7 @@ By default in CLI, uses the 'default' prompt from config.toml [prompt.default].
 ## Changelog
+- 0.1.5: Fix bug on prompt selecting
 - 0.1.4: Support for multiple prompts in config.toml with dynamic GUI buttons for each prompt key
 - 0.1.3: Minor style update
 - 0.1.2: Interactive mode in GUI (choose transcribe / prompt / cancel while recording)

{supervoxtral-0.1.4 → supervoxtral-0.1.5}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 [project]
 name = "supervoxtral"
-version = "0.1.4"
+version = "0.1.5"
 description = "CLI/GUI audio recorder and transcription client using Mistral Voxtral (chat with audio and transcription)."
 requires-python = ">=3.11"
 license = { text = "MIT" }

{supervoxtral-0.1.4 → supervoxtral-0.1.5}/svx/core/pipeline.py RENAMED Viewed

@@ -180,6 +180,8 @@ class RecordingPipeline:
         else:
             self._status("Transcribe mode activated: no prompt used.")
+        logging.debug(f"Applied prompt: {final_user_prompt or 'None (transcribe mode)'}")
         paths: dict[str, Path | None] = {"wav": wav_path}
         # Convert if needed

{supervoxtral-0.1.4 → supervoxtral-0.1.5}/svx/core/prompt.py RENAMED Viewed

@@ -12,6 +12,7 @@ Intended to be small and dependency-light so it can be imported broadly.
 from __future__ import annotations
 import logging
+from collections.abc import Callable
 from pathlib import Path
 from .config import USER_PROMPT_DIR, Config, PromptEntry
@@ -121,22 +122,45 @@ def resolve_user_prompt(
         return ""
     key = key or "default"
-    suppliers = [
-        lambda: _strip(inline),
-        lambda: _read(file),
-        lambda: _from_user_cfg(key),
-        _from_user_prompt_dir,
+    # Suppliers annotated with a name for tracing which one returned the prompt.
+    named_suppliers: list[tuple[str, Callable[[], str]]] = [
+        ("inline", lambda: _strip(inline)),
+        ("file", lambda: _read(file)),
+        (f"prompt_config[{key}]", lambda: _from_user_cfg(key)),
+        ("user_prompt_dir/user.md", _from_user_prompt_dir),
     ]
-    for supplier in suppliers:
+    for name, supplier in named_suppliers:
         try:
             val = supplier()
             if val:
+                # Log which supplier provided the prompt and a short snippet for debugging.
+                try:
+                    if len(val) > 200:
+                        snippet = val[:200] + "..."
+                    else:
+                        snippet = val
+                    logging.info(
+                        "resolve_user_prompt: supplier '%s' provided prompt snippet: %s",
+                        name,
+                        snippet,
+                    )
+                except Exception:
+                    # Ensure logging failures do not change behavior.
+                    logging.info(
+                        "resolve_user_prompt: supplier '%s' provided a prompt "
+                        "(snippet unavailable)",
+                        name,
+                    )
                 return val
         except Exception as e:
-            logging.debug("Prompt supplier failed: %s", e)
+            logging.debug("Prompt supplier '%s' failed: %s", name, e)
-    return "What's in this audio?"
+    # Final fallback
+    fallback = "What's in this audio?"
+    logging.info("resolve_user_prompt: no supplier provided a prompt, using fallback: %s", fallback)
+    return fallback
 def init_user_prompt_file(force: bool = False) -> Path:

{supervoxtral-0.1.4 → supervoxtral-0.1.5}/svx/ui/qt_app.py RENAMED Viewed

@@ -273,10 +273,39 @@ class RecorderWorker(QObject):
                 self.canceled.emit()
                 return
             self.status.emit("Processing in progress...")
+            # Wait for user to select mode in the GUI
             while self.mode is None:
                 time.sleep(0.05)
+            # Log the selected mode/key for debugging prompt application
+            try:
+                logging.info("RecorderWorker: selected mode/key: %s", self.mode)
+            except Exception:
+                # ensure failures in logging don't break the worker
+                pass
             transcribe_mode = self.mode == "transcribe"
-            user_prompt = None if transcribe_mode else self._resolve_user_prompt(self.mode)
+            if transcribe_mode:
+                user_prompt = None
+            else:
+                # Resolve the user prompt for the selected key and log a short snippet
+                user_prompt = self._resolve_user_prompt(self.mode)
+                try:
+                    if user_prompt:
+                        snippet = (
+                            user_prompt[:200] + "..." if len(user_prompt) > 200 else user_prompt
+                        )
+                    else:
+                        snippet = "<EMPTY>"
+                    logging.info(
+                        "RecorderWorker: resolved prompt snippet for key '%s': %s",
+                        self.mode,
+                        snippet,
+                    )
+                except Exception:
+                    # avoid breaking the flow on logging errors
+                    pass
             result = pipeline.process(wav_path, duration, transcribe_mode, user_prompt)
             keep_audio = self.save_all or self.cfg.defaults.keep_audio_files
             pipeline.clean(wav_path, result["paths"], keep_audio)
@@ -383,13 +412,15 @@ class RecorderWindow(QWidget):
         button_layout.addStretch()
         self._transcribe_btn = QPushButton("Transcribe")
         self._transcribe_btn.setToolTip("Stop and transcribe without prompt")
-        self._transcribe_btn.clicked.connect(lambda: self._on_mode_selected("transcribe"))
+        self._transcribe_btn.clicked.connect(
+            lambda checked=False, m="transcribe": self._on_mode_selected(m)
+        )
         button_layout.addWidget(self._transcribe_btn)
         self._prompt_buttons: dict[str, QPushButton] = {}
         for key in self.prompt_keys:
             btn = QPushButton(key.capitalize())
             btn.setToolTip(f"Stop and transcribe with '{key}' prompt")
-            btn.clicked.connect(lambda k=key: self._on_mode_selected(k))
+            btn.clicked.connect(lambda checked=False, k=key: self._on_mode_selected(k))
             self._prompt_buttons[key] = btn
             button_layout.addWidget(btn)
         self._cancel_btn = QPushButton("Cancel")