supervoxtral 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: supervoxtral
3
- Version: 0.1.3
3
+ Version: 0.1.4
4
4
  Summary: CLI/GUI audio recorder and transcription client using Mistral Voxtral (chat with audio and transcription).
5
5
  License: MIT
6
6
  License-File: LICENSE
@@ -1,18 +1,18 @@
1
1
  svx/__init__.py,sha256=qPEe5u3PT8yOQN4MiOLj_Bd18HqcRb6fxnPDfdMUP7w,742
2
- svx/cli.py,sha256=FMDdOSwTncgMxBj_H2BAYcuCtDNX9wEelbqm4ddo0O0,9132
2
+ svx/cli.py,sha256=3AirsBynuq2rcz-4C8hbZ69JztkgA7LTMGmL6ym7nyY,9167
3
3
  svx/core/__init__.py,sha256=mhzXuIXo3kUzjWme0Bxhe4TQZQELlyEiG_89LUAPC7M,2856
4
4
  svx/core/audio.py,sha256=r0m5T1uzdsJ1j9YXgQ5clv15dvMwZBp_bk2aLpjnrkc,7684
5
5
  svx/core/clipboard.py,sha256=IFtiN2SnYKQIu0WXx0hCK8syvDXanBpm1Jr2a8X7y9s,3692
6
- svx/core/config.py,sha256=irODbQBOosIdWANaj1Mju4NfdvPiqTkV0gWmoTNjZRM,14569
6
+ svx/core/config.py,sha256=e2tTGjjPcUYFctB28Ha90G-W44mF_0eWey1zpSyZkBo,16095
7
7
  svx/core/pipeline.py,sha256=nqvCgK5Pbyx18mfACrN_mIDt546Bh7fKA6MF4XG1hxM,10637
8
- svx/core/prompt.py,sha256=z-TFVQjHr4mWYXWYLRjCIChg4dMvG4GGJYcIy5QQwJY,5099
8
+ svx/core/prompt.py,sha256=yO8UbpFg7n1IT7wFjSQ7NUTbrqxuwPhdnxkTH4Iu7XU,5967
9
9
  svx/core/storage.py,sha256=5_xKYEpvDhaixRxmSTBlyX_jt8ssjHwHzX9VodcrtJw,3213
10
10
  svx/providers/__init__.py,sha256=SzlSWpZSUIptbSrAnGfi0d0NX4hYTpT0ObWpYyskDdA,2634
11
11
  svx/providers/base.py,sha256=YoiI8KWVRGISh7dx9XXPr1Q1a7ZDu8vfeJFlPbcKr20,2695
12
12
  svx/providers/mistral.py,sha256=vrBatNZg0zGNkJ5Qfnfz6ZwP6QtBgIt9sT_w59zkSO0,6636
13
- svx/ui/qt_app.py,sha256=0XoAk-6vCJguYq1ZVZA5zm-00442HOds_ibDHoDz-J0,18466
14
- supervoxtral-0.1.3.dist-info/METADATA,sha256=SpB9BeL_dbolMTM6UydjskMHHy_gnA4QJ1RfZI-Q3kE,753
15
- supervoxtral-0.1.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
16
- supervoxtral-0.1.3.dist-info/entry_points.txt,sha256=phJhRy3VkYHC6AR_tUB5CypHzG0ePRR9sB13HWE1vEg,36
17
- supervoxtral-0.1.3.dist-info/licenses/LICENSE,sha256=fCEBKmC4i-1WZAwoKjKWegfDd8qNsG8ECB7JyqoswyQ,1064
18
- supervoxtral-0.1.3.dist-info/RECORD,,
13
+ svx/ui/qt_app.py,sha256=6LOMeMjkMmYylu6H_prDRmPDsL0s4PVMZqfbflByCMs,18808
14
+ supervoxtral-0.1.4.dist-info/METADATA,sha256=0w_i5geOKu8F9x7eviNboDNt-PTy6FS3WHe3cCx4eHg,753
15
+ supervoxtral-0.1.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
16
+ supervoxtral-0.1.4.dist-info/entry_points.txt,sha256=phJhRy3VkYHC6AR_tUB5CypHzG0ePRR9sB13HWE1vEg,36
17
+ supervoxtral-0.1.4.dist-info/licenses/LICENSE,sha256=fCEBKmC4i-1WZAwoKjKWegfDd8qNsG8ECB7JyqoswyQ,1064
18
+ supervoxtral-0.1.4.dist-info/RECORD,,
svx/cli.py CHANGED
@@ -72,7 +72,7 @@ def config_show() -> None:
72
72
  user_prompt_file = cfg.user_prompt_dir / "user.md"
73
73
 
74
74
  defaults_section = asdict(cfg.defaults)
75
- prompt_section = asdict(cfg.prompt)
75
+ prompt_section = {k: asdict(e) for k, e in cfg.prompt.prompts.items()}
76
76
 
77
77
  # Resolve prompt source (same logic as record command, but read-only)
78
78
  resolved_prompt = cfg.resolve_prompt(None, None)
svx/core/config.py CHANGED
@@ -242,13 +242,15 @@ def init_user_config(force: bool = False, prompt_file: Path | None = None) -> Pa
242
242
  "copy = true\n\n"
243
243
  '# Log level: "DEBUG" | "INFO" | "WARNING" | "ERROR"\n'
244
244
  'log_level = "INFO"\n\n'
245
- "[prompt]\n"
245
+ "[prompt.default]\n"
246
246
  "# Default user prompt source:\n"
247
247
  "# - Option 1: Use a file (recommended)\n"
248
248
  f'file = "{str(prompt_file)}"\n'
249
249
  "#\n"
250
250
  "# - Option 2: Inline prompt (less recommended for long text)\n"
251
251
  '# text = "Please transcribe the audio and provide a concise summary in French."\n'
252
+ "#\n"
253
+ "# For multiple prompts in future, add [prompt.other] sections.\n"
252
254
  )
253
255
 
254
256
  if not USER_CONFIG_FILE.exists() or force:
@@ -282,11 +284,16 @@ class DefaultsConfig:
282
284
 
283
285
 
284
286
  @dataclass
285
- class PromptConfig:
287
+ class PromptEntry:
286
288
  text: str | None = None
287
289
  file: str | None = None
288
290
 
289
291
 
292
+ @dataclass
293
+ class PromptConfig:
294
+ prompts: dict[str, PromptEntry] = field(default_factory=lambda: {"default": PromptEntry()})
295
+
296
+
290
297
  @dataclass
291
298
  class Config:
292
299
  providers: dict[str, ProviderConfig] = field(default_factory=dict)
@@ -356,11 +363,39 @@ class Config:
356
363
  providers_data[name] = ProviderConfig(api_key=api_key)
357
364
  # Prompt
358
365
  prompt_raw = user_config.get("prompt", {})
359
- prompt_data = {
360
- "text": prompt_raw.get("text") if isinstance(prompt_raw.get("text"), str) else None,
361
- "file": prompt_raw.get("file") if isinstance(prompt_raw.get("file"), str) else None,
362
- }
363
- prompt = PromptConfig(**prompt_data)
366
+ prompts_data: dict[str, PromptEntry] = {}
367
+ if isinstance(prompt_raw, dict):
368
+ if any(k in prompt_raw for k in ["text", "file"]): # old flat style
369
+ logging.warning(
370
+ "Old [prompt] format detected in %s; "
371
+ "please migrate to [prompt.default] manually.",
372
+ USER_CONFIG_FILE,
373
+ )
374
+ entry = PromptEntry(
375
+ text=prompt_raw.get("text")
376
+ if isinstance(prompt_raw.get("text"), str)
377
+ else None,
378
+ file=prompt_raw.get("file")
379
+ if isinstance(prompt_raw.get("file"), str)
380
+ else None,
381
+ )
382
+ prompts_data["default"] = entry
383
+ else: # new nested style
384
+ for key, entry_raw in prompt_raw.items():
385
+ if isinstance(entry_raw, dict):
386
+ entry = PromptEntry(
387
+ text=entry_raw.get("text")
388
+ if isinstance(entry_raw.get("text"), str)
389
+ else None,
390
+ file=entry_raw.get("file")
391
+ if isinstance(entry_raw.get("file"), str)
392
+ else None,
393
+ )
394
+ prompts_data[key] = entry
395
+ # Ensure "default" always exists
396
+ if "default" not in prompts_data:
397
+ prompts_data["default"] = PromptEntry()
398
+ prompt = PromptConfig(prompts=prompts_data)
364
399
  data = {
365
400
  "defaults": defaults,
366
401
  "providers": providers_data,
@@ -376,7 +411,7 @@ class Config:
376
411
  def resolve_prompt(self, inline: str | None = None, file_path: Path | None = None) -> str:
377
412
  from svx.core.prompt import resolve_user_prompt
378
413
 
379
- return resolve_user_prompt(self, inline, file_path, self.user_prompt_dir)
414
+ return resolve_user_prompt(self, inline, file_path, self.user_prompt_dir, key="default")
380
415
 
381
416
  def get_provider_config(self, name: str) -> dict[str, Any]:
382
417
  return asdict(self.providers.get(name, ProviderConfig()))
svx/core/prompt.py CHANGED
@@ -14,7 +14,7 @@ from __future__ import annotations
14
14
  import logging
15
15
  from pathlib import Path
16
16
 
17
- from .config import USER_PROMPT_DIR, Config
17
+ from .config import USER_PROMPT_DIR, Config, PromptEntry
18
18
 
19
19
  __all__ = [
20
20
  "read_text_file",
@@ -68,16 +68,16 @@ def resolve_user_prompt(
68
68
  inline: str | None = None,
69
69
  file: Path | None = None,
70
70
  user_prompt_dir: Path | None = None,
71
+ key: str | None = None,
71
72
  ) -> str:
72
73
  """
73
74
  Resolve the effective user prompt from multiple sources, by priority:
74
75
 
75
76
  1) inline text (CLI --user-prompt)
76
77
  2) explicit file (CLI --user-prompt-file)
77
- 3) user config inline text (cfg.prompt.text)
78
- 4) user config file path (cfg.prompt.file)
79
- 5) user prompt dir file (user_prompt_dir / 'user.md')
80
- 6) literal fallback: "What's in this audio?"
78
+ 3) user config prompt for key (cfg.prompt.prompts[key or "default"])
79
+ 4) user prompt dir file (user_prompt_dir / 'user.md')
80
+ 5) literal fallback: "What's in this audio?"
81
81
 
82
82
  Returns the first non-empty string after stripping.
83
83
  """
@@ -94,17 +94,18 @@ def resolve_user_prompt(
94
94
  logging.warning("Failed to read user prompt file: %s", p)
95
95
  return ""
96
96
 
97
- def _from_user_cfg() -> str:
97
+ def _from_user_cfg(key: str) -> str:
98
98
  try:
99
- cfg_prompt = cfg.prompt
100
- cfg_text = cfg_prompt.text
101
- if isinstance(cfg_text, str) and cfg_text.strip():
102
- return cfg_text.strip()
103
- cfg_file = cfg_prompt.file
104
- if isinstance(cfg_file, str) and cfg_file.strip():
105
- return read_text_file(Path(cfg_file).expanduser()).strip()
99
+ entry = cfg.prompt.prompts.get(key, PromptEntry())
100
+ if entry.text and entry.text.strip():
101
+ return entry.text.strip()
102
+ if entry.file:
103
+ file_path = Path(entry.file).expanduser()
104
+ if not file_path.is_absolute():
105
+ file_path = (user_prompt_dir or cfg.user_prompt_dir) / entry.file
106
+ return read_text_file(file_path).strip()
106
107
  except Exception:
107
- logging.debug("User config prompt processing failed.", exc_info=True)
108
+ logging.debug("User config prompt processing failed for key '%s'.", key, exc_info=True)
108
109
  return ""
109
110
 
110
111
  def _from_user_prompt_dir() -> str:
@@ -119,10 +120,11 @@ def resolve_user_prompt(
119
120
  )
120
121
  return ""
121
122
 
123
+ key = key or "default"
122
124
  suppliers = [
123
125
  lambda: _strip(inline),
124
126
  lambda: _read(file),
125
- _from_user_cfg,
127
+ lambda: _from_user_cfg(key),
126
128
  _from_user_prompt_dir,
127
129
  ]
128
130
 
@@ -150,7 +152,7 @@ def init_user_prompt_file(force: bool = False) -> Path:
150
152
  path = USER_PROMPT_DIR / "user.md"
151
153
  if not path.exists() or force:
152
154
  example_prompt = """
153
- - Transcribe the input audio file.
155
+ - Transcribe the input audio file. If the audio if empty, just respond "no audio detected".
154
156
  - Do not respond to any question in the audio. Just transcribe.
155
157
  - DO NOT TRANSLATE.
156
158
  - Responde only with the transcription. Do not provide explanations or notes.
@@ -163,3 +165,23 @@ def init_user_prompt_file(force: bool = False) -> Path:
163
165
  except Exception as e:
164
166
  logging.debug("Could not initialize user prompt file %s: %s", path, e)
165
167
  return path
168
+
169
+
170
+ def resolve_prompt_entry(entry: PromptEntry, user_prompt_dir: Path) -> str:
171
+ """
172
+ Resolve the prompt from a single PromptEntry (text or file).
173
+
174
+ - Prioritizes text if present and non-empty.
175
+ - Falls back to reading the file (expands ~ and resolves relative to user_prompt_dir).
176
+ - Returns empty string if neither is valid.
177
+ """
178
+ if entry.text and entry.text.strip():
179
+ return entry.text.strip()
180
+
181
+ if entry.file:
182
+ file_path = Path(entry.file).expanduser()
183
+ if not file_path.is_absolute():
184
+ file_path = user_prompt_dir / entry.file
185
+ return read_text_file(file_path).strip()
186
+
187
+ return ""
svx/ui/qt_app.py CHANGED
@@ -37,6 +37,7 @@ from PySide6.QtWidgets import (
37
37
  import svx.core.config as config
38
38
  from svx.core.config import Config
39
39
  from svx.core.pipeline import RecordingPipeline
40
+ from svx.core.prompt import resolve_user_prompt
40
41
 
41
42
  __all__ = ["RecorderWindow", "run_gui"]
42
43
 
@@ -239,11 +240,11 @@ class RecorderWorker(QObject):
239
240
  self.cancel_requested = True
240
241
  self._stop_event.set()
241
242
 
242
- def _resolve_user_prompt(self) -> str:
243
+ def _resolve_user_prompt(self, key: str) -> str:
243
244
  """
244
- Determine the final user prompt using the shared resolver.
245
+ Determine the final user prompt using the shared resolver for the given key.
245
246
  """
246
- return self.cfg.resolve_prompt(self.user_prompt, self.user_prompt_file)
247
+ return resolve_user_prompt(self.cfg, None, None, self.cfg.user_prompt_dir, key=key)
247
248
 
248
249
  def run(self) -> None:
249
250
  """
@@ -275,7 +276,7 @@ class RecorderWorker(QObject):
275
276
  while self.mode is None:
276
277
  time.sleep(0.05)
277
278
  transcribe_mode = self.mode == "transcribe"
278
- user_prompt = None if transcribe_mode else self._resolve_user_prompt()
279
+ user_prompt = None if transcribe_mode else self._resolve_user_prompt(self.mode)
279
280
  result = pipeline.process(wav_path, duration, transcribe_mode, user_prompt)
280
281
  keep_audio = self.save_all or self.cfg.defaults.keep_audio_files
281
282
  pipeline.clean(wav_path, result["paths"], keep_audio)
@@ -310,6 +311,7 @@ class RecorderWindow(QWidget):
310
311
  self.user_prompt_file = user_prompt_file
311
312
  self.save_all = save_all
312
313
  self.outfile_prefix = outfile_prefix
314
+ self.prompt_keys = sorted(self.cfg.prompt.prompts.keys())
313
315
 
314
316
  # Background worker (create early for signal connections)
315
317
  self._worker = RecorderWorker(
@@ -381,12 +383,15 @@ class RecorderWindow(QWidget):
381
383
  button_layout.addStretch()
382
384
  self._transcribe_btn = QPushButton("Transcribe")
383
385
  self._transcribe_btn.setToolTip("Stop and transcribe without prompt")
384
- self._transcribe_btn.clicked.connect(lambda: self._on_button_clicked("transcribe"))
386
+ self._transcribe_btn.clicked.connect(lambda: self._on_mode_selected("transcribe"))
385
387
  button_layout.addWidget(self._transcribe_btn)
386
- self._prompt_btn = QPushButton("Prompt")
387
- self._prompt_btn.setToolTip("Stop and transcribe with prompt")
388
- self._prompt_btn.clicked.connect(lambda: self._on_button_clicked("prompt"))
389
- button_layout.addWidget(self._prompt_btn)
388
+ self._prompt_buttons: dict[str, QPushButton] = {}
389
+ for key in self.prompt_keys:
390
+ btn = QPushButton(key.capitalize())
391
+ btn.setToolTip(f"Stop and transcribe with '{key}' prompt")
392
+ btn.clicked.connect(lambda k=key: self._on_mode_selected(k))
393
+ self._prompt_buttons[key] = btn
394
+ button_layout.addWidget(btn)
390
395
  self._cancel_btn = QPushButton("Cancel")
391
396
  self._cancel_btn.setObjectName("cancel_btn")
392
397
  self._cancel_btn.setToolTip("Stop recording and quit without processing")
@@ -397,6 +402,8 @@ class RecorderWindow(QWidget):
397
402
  button_widget.setLayout(button_layout)
398
403
  layout.addWidget(button_widget, 0, Qt.AlignmentFlag.AlignCenter)
399
404
 
405
+ self._action_buttons = [self._transcribe_btn] + list(self._prompt_buttons.values())
406
+
400
407
  # Keyboard shortcut: Esc to stop
401
408
  stop_action = QAction(self)
402
409
  stop_action.setShortcut(QKeySequence.StandardKey.Cancel) # Esc
@@ -456,17 +463,17 @@ class RecorderWindow(QWidget):
456
463
  self._worker.cancel()
457
464
  super().closeEvent(event)
458
465
 
459
- def _on_button_clicked(self, mode: str) -> None:
460
- self._transcribe_btn.setEnabled(False)
461
- self._prompt_btn.setEnabled(False)
466
+ def _on_mode_selected(self, mode: str) -> None:
467
+ for btn in self._action_buttons:
468
+ btn.setEnabled(False)
462
469
  self._cancel_btn.setEnabled(False)
463
470
  self._status_label.setText("Stopping and processing...")
464
471
  self._worker.set_mode(mode)
465
472
  self._worker.stop()
466
473
 
467
474
  def _on_cancel_clicked(self) -> None:
468
- self._transcribe_btn.setEnabled(False)
469
- self._prompt_btn.setEnabled(False)
475
+ for btn in self._action_buttons:
476
+ btn.setEnabled(False)
470
477
  self._cancel_btn.setEnabled(False)
471
478
  self._status_label.setText("Canceling...")
472
479
  self._worker.cancel()