supervoxtral 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
svx/ui/qt_app.py ADDED
@@ -0,0 +1,491 @@
1
+ """
2
+ Minimal PySide6 GUI for SuperVoxtral.
3
+
4
+ This module provides a tiny always-on-top frameless window with a single "Stop" button.
5
+ Behavior:
6
+ - Starts recording immediately on launch.
7
+ - When "Stop" is pressed (or Esc), stops recording, converts to desired format (default: opus),
8
+ sends to the transcription provider (default: mistral), copies the result to clipboard,
9
+ optionally deletes audio files, and then exits.
10
+
11
+ UI changes in this file:
12
+ - Frameless window (no native title bar).
13
+ - Draggable window via mouse press/move on the widget.
14
+ - Monospace dark stylesheet applied to the application.
15
+ - Esc shortcut bound to Stop.
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import logging
21
+ import threading
22
+ from pathlib import Path
23
+
24
+ from PySide6.QtCore import QObject, QPoint, Qt, QTimer, Signal
25
+ from PySide6.QtGui import QAction, QFont, QFontDatabase, QKeySequence
26
+ from PySide6.QtWidgets import (
27
+ QApplication,
28
+ QLabel,
29
+ QMessageBox,
30
+ QPushButton,
31
+ QVBoxLayout,
32
+ QWidget,
33
+ )
34
+
35
+ import svx.core.config as config
36
+ from svx.core.config import Config
37
+ from svx.core.pipeline import RecordingPipeline
38
+
39
+ __all__ = ["RecorderWindow", "run_gui"]
40
+
41
+
42
+ # Simple dark monospace stylesheet
43
+ DARK_MONO_STYLESHEET = """
44
+ /* Base window */
45
+ QWidget {
46
+ background-color: #0f1113;
47
+ color: #e6eef3;
48
+ /* font-family set via QApplication.setFont */
49
+ font-size: 11pt;
50
+ }
51
+
52
+ /* Labels */
53
+ QLabel {
54
+ color: #cfe8ff;
55
+ padding: 6px;
56
+ }
57
+ /* Info line (geek/minimal) */
58
+ QLabel#info_label {
59
+ color: #9fb8e6;
60
+ padding: 2px 6px;
61
+ font-size: 10pt;
62
+ }
63
+
64
+ /* Stop button */
65
+ QPushButton {
66
+ background-color: #1f6feb;
67
+ color: #ffffff;
68
+ border: none;
69
+ border-radius: 6px;
70
+ padding: 8px 14px;
71
+ margin: 6px;
72
+ min-width: 80px;
73
+ }
74
+ QPushButton:disabled {
75
+ background-color: #274a7a;
76
+ color: #9fb8e6;
77
+ }
78
+ QPushButton:hover {
79
+ background-color: #2a78ff;
80
+ }
81
+
82
+ /* Small window border effect (subtle) */
83
+ QWidget#recorder_window {
84
+ border: 1px solid #203040;
85
+ border-radius: 8px;
86
+ }
87
+ """
88
+
89
+
90
+ def get_fixed_font(point_size: int = 11) -> QFont:
91
+ """
92
+ Return the system fixed-width font with the given point size.
93
+ Using QFontDatabase.FixedFont avoids missing-family substitution warnings.
94
+ """
95
+ f = QFontDatabase.systemFont(QFontDatabase.SystemFont.FixedFont)
96
+ f.setPointSize(point_size)
97
+ return f
98
+
99
+
100
+ class WaveformWidget(QWidget):
101
+ """
102
+ Simple autonomous waveform-like widget.
103
+ This widget does not read audio; it animates a smooth sinusoidal/breathing
104
+ waveform to indicate recording activity. It is lightweight and self-contained.
105
+ """
106
+
107
+ def __init__(self, parent=None, height: int = 64) -> None:
108
+ super().__init__(parent)
109
+ self.setMinimumHeight(height)
110
+ self.setMaximumHeight(height)
111
+ self.phase: float = 0.0
112
+ self.amp: float = 0.18 # base amplitude (increased for stronger motion)
113
+ self._target_amp: float = 0.12
114
+ self._tick_timer = QTimer(self)
115
+ self._tick_timer.setInterval(16) # ~60 FPS
116
+ self._tick_timer.timeout.connect(self._on_tick)
117
+ self._tick_timer.start()
118
+ # lazily import time to avoid top-level dependency issues
119
+ import time as _time
120
+
121
+ self._last_time = _time.time()
122
+
123
+ def _on_tick(self) -> None:
124
+ # advance phase and animate a subtle breathing amplitude
125
+ import math as _math
126
+ import time as _time
127
+
128
+ now = _time.time()
129
+ dt = max(0.0, now - self._last_time)
130
+ self._last_time = now
131
+ self.phase += 10.0 * dt # speed factor (increased for faster motion)
132
+
133
+ # simpler breathing target using a sine on phase
134
+ # increase breathing depth and slightly faster breathing frequency
135
+ self._target_amp = 0.12 + 0.12 * (0.5 + 0.5 * _math.sin(self.phase * 0.35))
136
+
137
+ # simple lerp towards target amplitude
138
+ lerp_alpha = 0.06
139
+ self.amp = (1.0 - lerp_alpha) * self.amp + lerp_alpha * self._target_amp
140
+ self.update()
141
+
142
+ def paintEvent(self, event) -> None: # type: ignore[override]
143
+ import math as _math
144
+
145
+ from PySide6.QtGui import QColor, QPainter, QPainterPath, QPen
146
+
147
+ w = self.width()
148
+ h = self.height()
149
+ center_y = h / 2.0
150
+
151
+ p = QPainter(self)
152
+ # Use RenderHint enum for compatibility with type checkers
153
+ p.setRenderHint(QPainter.RenderHint.Antialiasing)
154
+ # background is handled by stylesheet; draw a subtle inner rect
155
+ bg_color = QColor(20, 24, 28, 120)
156
+ p.fillRect(0, 0, w, h, bg_color)
157
+
158
+ # waveform color
159
+ wave_color = QColor(90, 200, 255, 220)
160
+ pen = QPen(wave_color)
161
+ pen.setWidthF(2.0)
162
+ p.setPen(pen)
163
+
164
+ path = QPainterPath()
165
+ samples = max(64, max(1, w // 3))
166
+ # larger visual amplitude for a more noticeable waveform
167
+ amplitude = (h / 1.8) * self.amp
168
+ # draw a sin-based waveform with phase offset for motion
169
+ for i in range(samples):
170
+ x = (i / (samples - 1)) * w if samples > 1 else 0
171
+ angle = (i / samples) * 4.0 * 3.14159 + self.phase
172
+ # combine fundamental and harmonic for a richer shape
173
+ y = center_y + amplitude * (
174
+ 0.9 * (0.6 * _math.sin(angle) + 0.4 * _math.sin(2.3 * angle))
175
+ )
176
+ if i == 0:
177
+ path.moveTo(x, y)
178
+ else:
179
+ path.lineTo(x, y)
180
+
181
+ p.drawPath(path)
182
+
183
+
184
+ class RecorderWorker(QObject):
185
+ """
186
+ Worker object running the audio/transcription pipeline in a background thread.
187
+
188
+ Signals:
189
+ status (str): human-readable status updates for the UI.
190
+ done (str): emitted with the final transcription text on success.
191
+ error (str): emitted with an error message on failure.
192
+ Supports transcribe_mode for pure transcription without prompt.
193
+ """
194
+
195
+ status = Signal(str)
196
+ done = Signal(str)
197
+ error = Signal(str)
198
+
199
+ def __init__(
200
+ self,
201
+ cfg: Config,
202
+ user_prompt: str | None = None,
203
+ user_prompt_file: Path | None = None,
204
+ save_all: bool = False,
205
+ outfile_prefix: str | None = None,
206
+ transcribe_mode: bool = False,
207
+ ) -> None:
208
+ super().__init__()
209
+ self.cfg = cfg
210
+ self.user_prompt = user_prompt
211
+ self.user_prompt_file = user_prompt_file
212
+ self.save_all = save_all
213
+ self.outfile_prefix = outfile_prefix
214
+ self.transcribe_mode = transcribe_mode
215
+ self._stop_event = threading.Event()
216
+
217
+ def stop(self) -> None:
218
+ """Request the recording to stop."""
219
+ self._stop_event.set()
220
+
221
+ def _resolve_user_prompt(self) -> str:
222
+ """
223
+ Determine the final user prompt using the shared resolver.
224
+ """
225
+ return self.cfg.resolve_prompt(self.user_prompt, self.user_prompt_file)
226
+
227
+ def run(self) -> None:
228
+ """
229
+ Execute the pipeline:
230
+ - record_wav (until stop)
231
+ - optional convert (mp3/opus)
232
+ - provider.transcribe
233
+ - save_transcript
234
+ - copy_to_clipboard
235
+ - optionally delete audio files
236
+ Supports transcribe_mode for pure transcription without prompt.
237
+ """
238
+ try:
239
+ pipeline = RecordingPipeline(
240
+ cfg=self.cfg,
241
+ user_prompt=self.user_prompt,
242
+ user_prompt_file=self.user_prompt_file,
243
+ save_all=self.save_all,
244
+ outfile_prefix=self.outfile_prefix,
245
+ transcribe_mode=self.transcribe_mode,
246
+ progress_callback=self.status.emit,
247
+ )
248
+ result = pipeline.run(stop_event=self._stop_event)
249
+ self.done.emit(result["text"])
250
+ except Exception as e:
251
+ logging.exception("Pipeline failed")
252
+ self.error.emit(str(e))
253
+
254
+
255
+ class RecorderWindow(QWidget):
256
+ """
257
+ Frameless always-on-top window with a single Stop button.
258
+
259
+ Launching this window will immediately start the recording in a background thread.
260
+
261
+ Window can be dragged by clicking anywhere on the widget background.
262
+ Pressing Esc triggers Stop.
263
+ Supports transcribe_mode for pure transcription without prompt.
264
+ """
265
+
266
+ def __init__(
267
+ self,
268
+ cfg: Config,
269
+ user_prompt: str | None = None,
270
+ user_prompt_file: Path | None = None,
271
+ save_all: bool = False,
272
+ outfile_prefix: str | None = None,
273
+ transcribe_mode: bool = False,
274
+ ) -> None:
275
+ super().__init__()
276
+
277
+ self.cfg = cfg
278
+ self.user_prompt = user_prompt
279
+ self.user_prompt_file = user_prompt_file
280
+ self.save_all = save_all
281
+ self.outfile_prefix = outfile_prefix
282
+ self.transcribe_mode = transcribe_mode
283
+
284
+ # Environment and prompt files
285
+
286
+ # Window basics
287
+ self.setObjectName("recorder_window")
288
+ self.setWindowTitle("SuperVoxtral")
289
+ # Frameless and always on top
290
+ self.setWindowFlag(Qt.WindowType.WindowStaysOnTopHint, True)
291
+ self.setWindowFlag(Qt.WindowType.FramelessWindowHint, True)
292
+ self.setAttribute(Qt.WidgetAttribute.WA_TranslucentBackground, False)
293
+ self.setMinimumWidth(260)
294
+
295
+ # For dragging
296
+ self._drag_active = False
297
+ self._drag_pos = QPoint(0, 0)
298
+
299
+ # UI layout
300
+ layout = QVBoxLayout(self)
301
+ layout.setContentsMargins(10, 10, 10, 10)
302
+ layout.setSpacing(6)
303
+
304
+ # Animated waveform (autonomous, not yet linked to audio)
305
+ self._waveform = WaveformWidget(self, height=64)
306
+ layout.addWidget(self._waveform)
307
+
308
+ # Minimal geek status line under waveform (colored + bullets)
309
+ sep = "<span style='color:#8b949e'> • </span>"
310
+ prov_model_html = (
311
+ f"<span style='color:#7ee787'>"
312
+ f"{self.cfg.defaults.provider}/{self.cfg.defaults.model}"
313
+ "</span>"
314
+ )
315
+ format_html = f"<span style='color:#ffa657'>{self.cfg.defaults.format}</span>"
316
+ if self.transcribe_mode:
317
+ mode_html = "<span style='color:#ff7b72'>Transcribe</span>"
318
+ else:
319
+ mode_html = "<span style='color:#7ee787'>Completion</span>"
320
+ parts = [
321
+ prov_model_html,
322
+ format_html,
323
+ mode_html,
324
+ ]
325
+ if self.cfg.defaults.language:
326
+ lang_html = f"<span style='color:#c9b4ff'>{self.cfg.defaults.language}</span>"
327
+ parts.append(lang_html)
328
+ info_core = sep.join(parts)
329
+ info_line = (
330
+ "<span style='color:#8b949e'>[svx:</span> "
331
+ f"{info_core} "
332
+ "<span style='color:#8b949e'>]</span>"
333
+ )
334
+ self._info_label = QLabel(info_line)
335
+ self._info_label.setObjectName("info_label")
336
+ self._info_label.setTextFormat(Qt.TextFormat.RichText)
337
+ self._info_label.setAlignment(Qt.AlignmentFlag.AlignCenter)
338
+ layout.addWidget(self._info_label)
339
+
340
+ self._status_label = QLabel("Recording... Press Stop to finish")
341
+ self._status_label.setAlignment(Qt.AlignmentFlag.AlignCenter)
342
+ layout.addWidget(self._status_label)
343
+
344
+ self._stop_btn = QPushButton("Stop")
345
+ self._stop_btn.clicked.connect(self._on_stop_clicked)
346
+ layout.addWidget(self._stop_btn, 0, Qt.AlignmentFlag.AlignCenter)
347
+
348
+ # Keyboard shortcut: Esc to stop
349
+ stop_action = QAction(self)
350
+ stop_action.setShortcut(QKeySequence.StandardKey.Cancel) # Esc
351
+ stop_action.triggered.connect(self._on_stop_clicked)
352
+ self.addAction(stop_action)
353
+
354
+ # Background worker
355
+ self._worker = RecorderWorker(
356
+ cfg=self.cfg,
357
+ user_prompt=user_prompt,
358
+ user_prompt_file=user_prompt_file,
359
+ save_all=save_all,
360
+ outfile_prefix=outfile_prefix,
361
+ )
362
+ self._thread = threading.Thread(target=self._worker.run, daemon=True)
363
+
364
+ # Signals wiring
365
+ self._worker.status.connect(self._on_status)
366
+ self._worker.done.connect(self._on_done)
367
+ self._worker.error.connect(self._on_error)
368
+
369
+ # Apply stylesheet to the application for consistent appearance
370
+ app = QApplication.instance()
371
+ # Narrow the type to QApplication before accessing styleSheet/setStyleSheet
372
+ if isinstance(app, QApplication):
373
+ # Set system fixed-width font and merge stylesheet conservatively
374
+ app.setFont(get_fixed_font(11))
375
+ existing = app.styleSheet() or ""
376
+ app.setStyleSheet(existing + DARK_MONO_STYLESHEET)
377
+ else:
378
+ # If no app exists yet, we'll rely on run_gui to set the stylesheet.
379
+ pass
380
+
381
+ # Start recording immediately
382
+ self._thread.start()
383
+ QApplication.beep()
384
+
385
+ # Ensure proper shutdown if user closes the window directly
386
+ self._closing = False
387
+ self._schedule_topmost_refresh()
388
+
389
+ def _schedule_topmost_refresh(self) -> None:
390
+ # Some WMs may ignore the first set; nudge it again shortly after show.
391
+ QTimer.singleShot(50, lambda: self.setWindowFlag(Qt.WindowType.WindowStaysOnTopHint, True))
392
+
393
+ def _on_status(self, msg: str) -> None:
394
+ self._status_label.setText(msg)
395
+
396
+ def _on_done(self, text: str) -> None:
397
+ self._status_label.setText("Done.")
398
+ QApplication.beep()
399
+ self._close_soon()
400
+
401
+ def _on_error(self, message: str) -> None:
402
+ QApplication.beep()
403
+ QMessageBox.critical(self, "SuperVoxtral", f"Error: {message}")
404
+ self._close_soon()
405
+
406
+ def _close_soon(self) -> None:
407
+ if not self._closing:
408
+ self._closing = True
409
+ QTimer.singleShot(200, self.close)
410
+
411
+ def closeEvent(self, event) -> None: # type: ignore[override]
412
+ # Attempt to stop recording if the user closes the window via window controls.
413
+ self._worker.stop()
414
+ super().closeEvent(event)
415
+
416
+ def _on_stop_clicked(self) -> None:
417
+ self._stop_btn.setEnabled(False)
418
+ self._status_label.setText("Stopping...")
419
+ self._worker.stop()
420
+
421
+ # --- Drag handling for frameless window ---
422
+ def mousePressEvent(self, event) -> None: # type: ignore[override]
423
+ if event.button() == Qt.MouseButton.LeftButton:
424
+ self._drag_active = True
425
+ # global position minus top-left corner gives offset
426
+ self._drag_pos = event.globalPosition().toPoint() - self.frameGeometry().topLeft()
427
+ event.accept()
428
+ else:
429
+ super().mousePressEvent(event)
430
+
431
+ def mouseMoveEvent(self, event) -> None: # type: ignore[override]
432
+ if self._drag_active and event.buttons() & Qt.MouseButton.LeftButton:
433
+ new_pos = event.globalPosition().toPoint() - self._drag_pos
434
+ self.move(new_pos)
435
+ event.accept()
436
+ else:
437
+ super().mouseMoveEvent(event)
438
+
439
+ def mouseReleaseEvent(self, event) -> None: # type: ignore[override]
440
+ if event.button() == Qt.MouseButton.LeftButton:
441
+ self._drag_active = False
442
+ event.accept()
443
+ else:
444
+ super().mouseReleaseEvent(event)
445
+
446
+ # Support pressing Esc as an alternative to clicking Stop
447
+ def keyPressEvent(self, event) -> None: # type: ignore[override]
448
+ # Qt.Key_Escape is a safety stop
449
+ if event.key() == Qt.Key.Key_Escape:
450
+ self._on_stop_clicked()
451
+ else:
452
+ super().keyPressEvent(event)
453
+
454
+
455
+ def run_gui(
456
+ cfg: Config | None = None,
457
+ user_prompt: str | None = None,
458
+ user_prompt_file: Path | None = None,
459
+ save_all: bool = False,
460
+ outfile_prefix: str | None = None,
461
+ transcribe_mode: bool = False,
462
+ log_level: str = "INFO",
463
+ ) -> None:
464
+ if cfg is None:
465
+ cfg = Config.load(log_level=log_level)
466
+ """
467
+ Launch the PySide6 app with the minimal recorder window.
468
+ Supports transcribe_mode for pure transcription without prompt.
469
+ """
470
+ config.setup_environment(log_level=log_level)
471
+
472
+ app = QApplication.instance() or QApplication([])
473
+ if isinstance(app, QApplication):
474
+ app.setFont(get_fixed_font(11))
475
+
476
+ # Ensure our stylesheet is applied as early as possible
477
+ # Narrow runtime type before calling QWidget-specific methods to satisfy static checkers.
478
+ if isinstance(app, QApplication):
479
+ existing = app.styleSheet() or ""
480
+ app.setStyleSheet(existing + DARK_MONO_STYLESHEET)
481
+
482
+ window = RecorderWindow(
483
+ cfg=cfg,
484
+ user_prompt=user_prompt,
485
+ user_prompt_file=user_prompt_file,
486
+ save_all=save_all,
487
+ outfile_prefix=outfile_prefix,
488
+ transcribe_mode=transcribe_mode,
489
+ )
490
+ window.show()
491
+ app.exec()