supervoxtral 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {supervoxtral-0.1.1.dist-info → supervoxtral-0.1.3.dist-info}/METADATA +1 -1
- {supervoxtral-0.1.1.dist-info → supervoxtral-0.1.3.dist-info}/RECORD +8 -8
- svx/cli.py +5 -1
- svx/core/pipeline.py +179 -153
- svx/ui/qt_app.py +102 -52
- {supervoxtral-0.1.1.dist-info → supervoxtral-0.1.3.dist-info}/WHEEL +0 -0
- {supervoxtral-0.1.1.dist-info → supervoxtral-0.1.3.dist-info}/entry_points.txt +0 -0
- {supervoxtral-0.1.1.dist-info → supervoxtral-0.1.3.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,18 +1,18 @@
|
|
|
1
1
|
svx/__init__.py,sha256=qPEe5u3PT8yOQN4MiOLj_Bd18HqcRb6fxnPDfdMUP7w,742
|
|
2
|
-
svx/cli.py,sha256=
|
|
2
|
+
svx/cli.py,sha256=FMDdOSwTncgMxBj_H2BAYcuCtDNX9wEelbqm4ddo0O0,9132
|
|
3
3
|
svx/core/__init__.py,sha256=mhzXuIXo3kUzjWme0Bxhe4TQZQELlyEiG_89LUAPC7M,2856
|
|
4
4
|
svx/core/audio.py,sha256=r0m5T1uzdsJ1j9YXgQ5clv15dvMwZBp_bk2aLpjnrkc,7684
|
|
5
5
|
svx/core/clipboard.py,sha256=IFtiN2SnYKQIu0WXx0hCK8syvDXanBpm1Jr2a8X7y9s,3692
|
|
6
6
|
svx/core/config.py,sha256=irODbQBOosIdWANaj1Mju4NfdvPiqTkV0gWmoTNjZRM,14569
|
|
7
|
-
svx/core/pipeline.py,sha256=
|
|
7
|
+
svx/core/pipeline.py,sha256=nqvCgK5Pbyx18mfACrN_mIDt546Bh7fKA6MF4XG1hxM,10637
|
|
8
8
|
svx/core/prompt.py,sha256=z-TFVQjHr4mWYXWYLRjCIChg4dMvG4GGJYcIy5QQwJY,5099
|
|
9
9
|
svx/core/storage.py,sha256=5_xKYEpvDhaixRxmSTBlyX_jt8ssjHwHzX9VodcrtJw,3213
|
|
10
10
|
svx/providers/__init__.py,sha256=SzlSWpZSUIptbSrAnGfi0d0NX4hYTpT0ObWpYyskDdA,2634
|
|
11
11
|
svx/providers/base.py,sha256=YoiI8KWVRGISh7dx9XXPr1Q1a7ZDu8vfeJFlPbcKr20,2695
|
|
12
12
|
svx/providers/mistral.py,sha256=vrBatNZg0zGNkJ5Qfnfz6ZwP6QtBgIt9sT_w59zkSO0,6636
|
|
13
|
-
svx/ui/qt_app.py,sha256=
|
|
14
|
-
supervoxtral-0.1.
|
|
15
|
-
supervoxtral-0.1.
|
|
16
|
-
supervoxtral-0.1.
|
|
17
|
-
supervoxtral-0.1.
|
|
18
|
-
supervoxtral-0.1.
|
|
13
|
+
svx/ui/qt_app.py,sha256=0XoAk-6vCJguYq1ZVZA5zm-00442HOds_ibDHoDz-J0,18466
|
|
14
|
+
supervoxtral-0.1.3.dist-info/METADATA,sha256=SpB9BeL_dbolMTM6UydjskMHHy_gnA4QJ1RfZI-Q3kE,753
|
|
15
|
+
supervoxtral-0.1.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
16
|
+
supervoxtral-0.1.3.dist-info/entry_points.txt,sha256=phJhRy3VkYHC6AR_tUB5CypHzG0ePRR9sB13HWE1vEg,36
|
|
17
|
+
supervoxtral-0.1.3.dist-info/licenses/LICENSE,sha256=fCEBKmC4i-1WZAwoKjKWegfDd8qNsG8ECB7JyqoswyQ,1064
|
|
18
|
+
supervoxtral-0.1.3.dist-info/RECORD,,
|
svx/cli.py
CHANGED
|
@@ -191,6 +191,11 @@ def record(
|
|
|
191
191
|
user_prompt = None
|
|
192
192
|
user_prompt_file = None
|
|
193
193
|
|
|
194
|
+
if gui and transcribe:
|
|
195
|
+
console.print("[yellow]Warning: --transcribe has no effect in GUI mode.[/yellow]")
|
|
196
|
+
console.print("[yellow]Use the 'Transcribe' or 'Prompt' buttons in the interface.[/yellow]")
|
|
197
|
+
transcribe = False
|
|
198
|
+
|
|
194
199
|
# If GUI requested, launch GUI with the resolved parameters and exit.
|
|
195
200
|
if gui:
|
|
196
201
|
from svx.ui.qt_app import run_gui
|
|
@@ -202,7 +207,6 @@ def record(
|
|
|
202
207
|
user_prompt_file=user_prompt_file,
|
|
203
208
|
save_all=save_all,
|
|
204
209
|
outfile_prefix=outfile_prefix,
|
|
205
|
-
transcribe_mode=transcribe,
|
|
206
210
|
)
|
|
207
211
|
return
|
|
208
212
|
|
svx/core/pipeline.py
CHANGED
|
@@ -50,6 +50,60 @@ class RecordingPipeline:
|
|
|
50
50
|
self.progress_callback(msg)
|
|
51
51
|
logging.info(msg)
|
|
52
52
|
|
|
53
|
+
def record(self, stop_event: threading.Event | None = None) -> tuple[Path, float]:
|
|
54
|
+
"""
|
|
55
|
+
Record audio and return wav_path, duration.
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
tuple[Path, float]: wav_path, duration.
|
|
59
|
+
"""
|
|
60
|
+
# Resolve parameters
|
|
61
|
+
_provider = self.cfg.defaults.provider
|
|
62
|
+
audio_format = self.cfg.defaults.format
|
|
63
|
+
model = self.cfg.defaults.model
|
|
64
|
+
_original_model = model
|
|
65
|
+
_language = self.cfg.defaults.language
|
|
66
|
+
rate = self.cfg.defaults.rate
|
|
67
|
+
channels = self.cfg.defaults.channels
|
|
68
|
+
device = self.cfg.defaults.device
|
|
69
|
+
base = self.outfile_prefix or f"rec_{timestamp()}"
|
|
70
|
+
keep_audio = self.save_all or self.cfg.defaults.keep_audio_files
|
|
71
|
+
|
|
72
|
+
# Validation (fail fast)
|
|
73
|
+
if channels not in (1, 2):
|
|
74
|
+
raise ValueError("channels must be 1 or 2")
|
|
75
|
+
if rate <= 0:
|
|
76
|
+
raise ValueError("rate must be > 0")
|
|
77
|
+
if audio_format not in {"wav", "mp3", "opus"}:
|
|
78
|
+
raise ValueError("format must be one of wav|mp3|opus")
|
|
79
|
+
|
|
80
|
+
stop_for_recording = stop_event or threading.Event()
|
|
81
|
+
|
|
82
|
+
self._status("Recording...")
|
|
83
|
+
if keep_audio:
|
|
84
|
+
self.cfg.recordings_dir.mkdir(parents=True, exist_ok=True)
|
|
85
|
+
wav_path = self.cfg.recordings_dir / f"{base}.wav"
|
|
86
|
+
duration = record_wav(
|
|
87
|
+
wav_path,
|
|
88
|
+
samplerate=rate,
|
|
89
|
+
channels=channels,
|
|
90
|
+
device=device,
|
|
91
|
+
stop_event=stop_for_recording,
|
|
92
|
+
)
|
|
93
|
+
else:
|
|
94
|
+
# Use mktemp for temp wav_path
|
|
95
|
+
wav_path = Path(tempfile.mktemp(suffix=".wav"))
|
|
96
|
+
duration = record_wav(
|
|
97
|
+
wav_path,
|
|
98
|
+
samplerate=rate,
|
|
99
|
+
channels=channels,
|
|
100
|
+
device=device,
|
|
101
|
+
stop_event=stop_for_recording,
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
self._status("Recording completed.")
|
|
105
|
+
return wav_path, duration
|
|
106
|
+
|
|
53
107
|
def _setup_save_all(self) -> None:
|
|
54
108
|
"""Apply save_all overrides: set keeps to True, create dirs, add file logging."""
|
|
55
109
|
if not self.save_all:
|
|
@@ -78,183 +132,155 @@ class RecordingPipeline:
|
|
|
78
132
|
root_logger.addHandler(file_handler)
|
|
79
133
|
logging.info("File logging enabled for this run")
|
|
80
134
|
|
|
81
|
-
def
|
|
135
|
+
def process(
|
|
136
|
+
self, wav_path: Path, duration: float, transcribe_mode: bool, user_prompt: str | None = None
|
|
137
|
+
) -> dict[str, Any]:
|
|
82
138
|
"""
|
|
83
|
-
|
|
139
|
+
Process recorded audio: convert if needed, transcribe, save, copy.
|
|
84
140
|
|
|
85
141
|
Args:
|
|
86
|
-
|
|
142
|
+
wav_path: Path to the recorded WAV file.
|
|
143
|
+
duration: Recording duration in seconds.
|
|
144
|
+
transcribe_mode: Whether to use pure transcription mode.
|
|
145
|
+
user_prompt: User prompt to use (None for transcribe_mode).
|
|
87
146
|
|
|
88
147
|
Returns:
|
|
89
148
|
Dict with 'text' (str), 'raw' (dict), 'duration' (float),
|
|
90
149
|
'paths' (dict of Path or None).
|
|
91
|
-
|
|
92
|
-
Raises:
|
|
93
|
-
Exception: On recording, conversion, or transcription errors.
|
|
94
150
|
"""
|
|
95
|
-
self._setup_save_all()
|
|
96
|
-
|
|
97
151
|
# Resolve parameters
|
|
98
152
|
provider = self.cfg.defaults.provider
|
|
99
153
|
audio_format = self.cfg.defaults.format
|
|
100
154
|
model = self.cfg.defaults.model
|
|
101
155
|
original_model = model
|
|
102
|
-
if
|
|
156
|
+
if transcribe_mode:
|
|
103
157
|
model = "voxtral-mini-latest"
|
|
104
158
|
if original_model != "voxtral-mini-latest":
|
|
105
159
|
logging.warning(
|
|
106
|
-
"
|
|
107
|
-
"(
|
|
160
|
+
"Transcribe mode: model override from '%s' to 'voxtral-mini-latest'\n"
|
|
161
|
+
"(optimized for transcription).",
|
|
108
162
|
original_model,
|
|
109
163
|
)
|
|
110
164
|
language = self.cfg.defaults.language
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
165
|
+
if wav_path.stem.endswith(".wav"):
|
|
166
|
+
base = wav_path.stem.replace(".wav", "")
|
|
167
|
+
else:
|
|
168
|
+
base = wav_path.stem
|
|
169
|
+
keep_transcript = self.save_all or self.cfg.defaults.keep_transcript_files
|
|
170
|
+
copy_to_clip = self.cfg.defaults.copy
|
|
171
|
+
|
|
172
|
+
# Resolve user prompt if not provided
|
|
173
|
+
final_user_prompt = None
|
|
174
|
+
if not transcribe_mode:
|
|
175
|
+
if user_prompt is None:
|
|
176
|
+
final_user_prompt = self.cfg.resolve_prompt(self.user_prompt, self.user_prompt_file)
|
|
177
|
+
else:
|
|
178
|
+
final_user_prompt = user_prompt
|
|
179
|
+
self._status("Transcribe mode not activated: using prompt.")
|
|
180
|
+
else:
|
|
181
|
+
self._status("Transcribe mode activated: no prompt used.")
|
|
182
|
+
|
|
183
|
+
paths: dict[str, Path | None] = {"wav": wav_path}
|
|
184
|
+
|
|
185
|
+
# Convert if needed
|
|
186
|
+
to_send_path = wav_path
|
|
187
|
+
_converted = False
|
|
188
|
+
if audio_format in {"mp3", "opus"}:
|
|
189
|
+
self._status("Converting...")
|
|
190
|
+
to_send_path = convert_audio(wav_path, audio_format)
|
|
191
|
+
logging.info("Converted %s -> %s", wav_path, to_send_path)
|
|
192
|
+
paths["converted"] = to_send_path
|
|
193
|
+
_converted = True
|
|
194
|
+
|
|
195
|
+
# Transcribe
|
|
196
|
+
self._status("Transcribing...")
|
|
197
|
+
prov = get_provider(provider, cfg=self.cfg)
|
|
198
|
+
result = prov.transcribe(
|
|
199
|
+
to_send_path,
|
|
200
|
+
user_prompt=final_user_prompt,
|
|
201
|
+
model=model,
|
|
202
|
+
language=language,
|
|
203
|
+
transcribe_mode=transcribe_mode,
|
|
204
|
+
)
|
|
205
|
+
text = result["text"]
|
|
206
|
+
raw = result["raw"]
|
|
207
|
+
|
|
208
|
+
# Save if keeping transcripts
|
|
209
|
+
if keep_transcript:
|
|
210
|
+
self.cfg.transcripts_dir.mkdir(parents=True, exist_ok=True)
|
|
211
|
+
txt_path, json_path = save_transcript(
|
|
212
|
+
self.cfg.transcripts_dir, base, provider, text, raw
|
|
213
|
+
)
|
|
214
|
+
paths["txt"] = txt_path
|
|
215
|
+
paths["json"] = json_path
|
|
216
|
+
else:
|
|
217
|
+
paths["txt"] = None
|
|
218
|
+
paths["json"] = None
|
|
219
|
+
|
|
220
|
+
# Copy to clipboard
|
|
221
|
+
if copy_to_clip:
|
|
222
|
+
try:
|
|
223
|
+
copy_to_clipboard(text)
|
|
224
|
+
logging.info("Copied transcription to clipboard")
|
|
225
|
+
except Exception as e:
|
|
226
|
+
logging.warning("Failed to copy to clipboard: %s", e)
|
|
227
|
+
|
|
228
|
+
logging.info("Processing finished (%.2fs)", duration)
|
|
229
|
+
return {
|
|
230
|
+
"text": text,
|
|
231
|
+
"raw": raw,
|
|
232
|
+
"duration": duration,
|
|
233
|
+
"paths": paths,
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
def clean(self, wav_path: Path, paths: dict[str, Path | None], keep_audio: bool) -> None:
|
|
237
|
+
"""
|
|
238
|
+
Clean up temporary files.
|
|
239
|
+
|
|
240
|
+
Args:
|
|
241
|
+
wav_path: The original WAV path.
|
|
242
|
+
paths: The paths dict from process().
|
|
243
|
+
keep_audio: Whether to keep audio files (if True, no deletion).
|
|
244
|
+
"""
|
|
245
|
+
if not keep_audio and wav_path.exists():
|
|
246
|
+
wav_path.unlink()
|
|
247
|
+
logging.info("Deleted temp WAV: %s", wav_path)
|
|
248
|
+
|
|
249
|
+
if "converted" in paths and paths["converted"] and paths["converted"] != wav_path:
|
|
250
|
+
if paths["converted"].exists():
|
|
251
|
+
paths["converted"].unlink()
|
|
252
|
+
logging.info("Deleted temp converted: %s", paths["converted"])
|
|
253
|
+
|
|
254
|
+
self._status("Cleanup completed.")
|
|
255
|
+
|
|
256
|
+
def run(self, stop_event: threading.Event | None = None) -> dict[str, Any]:
|
|
257
|
+
"""
|
|
258
|
+
Execute the full pipeline.
|
|
259
|
+
|
|
260
|
+
Args:
|
|
261
|
+
stop_event: Optional event to signal recording stop (e.g., for GUI).
|
|
262
|
+
|
|
263
|
+
Returns:
|
|
264
|
+
Dict with 'text' (str), 'raw' (dict), 'duration' (float),
|
|
265
|
+
'paths' (dict of Path or None).
|
|
266
|
+
|
|
267
|
+
Raises:
|
|
268
|
+
Exception: On recording, conversion, or transcription errors.
|
|
269
|
+
"""
|
|
270
|
+
self._setup_save_all()
|
|
271
|
+
|
|
272
|
+
wav_path, duration = self.record(stop_event)
|
|
273
|
+
keep_audio = self.save_all or self.cfg.defaults.keep_audio_files
|
|
274
|
+
|
|
115
275
|
if self.transcribe_mode:
|
|
116
276
|
final_user_prompt = None
|
|
117
277
|
self._status("Mode Transcribe activated: no prompt used.")
|
|
118
278
|
else:
|
|
119
279
|
final_user_prompt = self.cfg.resolve_prompt(self.user_prompt, self.user_prompt_file)
|
|
120
|
-
keep_audio = self.cfg.defaults.keep_audio_files
|
|
121
|
-
keep_transcript = self.cfg.defaults.keep_transcript_files
|
|
122
|
-
copy_to_clip = self.cfg.defaults.copy
|
|
123
280
|
|
|
124
|
-
|
|
125
|
-
if channels not in (1, 2):
|
|
126
|
-
raise ValueError("channels must be 1 or 2")
|
|
127
|
-
if rate <= 0:
|
|
128
|
-
raise ValueError("rate must be > 0")
|
|
129
|
-
if audio_format not in {"wav", "mp3", "opus"}: # noqa: E501
|
|
130
|
-
raise ValueError("format must be one of wav|mp3|opus")
|
|
281
|
+
result = self.process(wav_path, duration, self.transcribe_mode, final_user_prompt)
|
|
131
282
|
|
|
132
|
-
|
|
133
|
-
stop_for_recording = stop_event or threading.Event()
|
|
283
|
+
self.clean(wav_path, result["paths"], keep_audio=keep_audio)
|
|
134
284
|
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
if keep_audio:
|
|
138
|
-
self.cfg.recordings_dir.mkdir(parents=True, exist_ok=True)
|
|
139
|
-
wav_path = self.cfg.recordings_dir / f"{base}.wav"
|
|
140
|
-
duration = record_wav(
|
|
141
|
-
wav_path,
|
|
142
|
-
samplerate=rate,
|
|
143
|
-
channels=channels,
|
|
144
|
-
device=device,
|
|
145
|
-
stop_event=stop_for_recording,
|
|
146
|
-
)
|
|
147
|
-
to_send_path = wav_path
|
|
148
|
-
paths["wav"] = wav_path
|
|
149
|
-
else:
|
|
150
|
-
with tempfile.TemporaryDirectory() as tmpdir:
|
|
151
|
-
tmp_path = Path(tmpdir)
|
|
152
|
-
wav_path = tmp_path / f"{base}.wav"
|
|
153
|
-
duration = record_wav(
|
|
154
|
-
wav_path,
|
|
155
|
-
samplerate=rate,
|
|
156
|
-
channels=channels,
|
|
157
|
-
device=device,
|
|
158
|
-
stop_event=stop_for_recording,
|
|
159
|
-
)
|
|
160
|
-
to_send_path = wav_path
|
|
161
|
-
|
|
162
|
-
# Convert if needed
|
|
163
|
-
if audio_format in {"mp3", "opus"}:
|
|
164
|
-
self._status("Converting...")
|
|
165
|
-
to_send_path = convert_audio(wav_path, audio_format)
|
|
166
|
-
logging.info("Converted %s -> %s", wav_path, to_send_path)
|
|
167
|
-
|
|
168
|
-
# Transcribe
|
|
169
|
-
self._status("Transcribing...")
|
|
170
|
-
prov = get_provider(provider, cfg=self.cfg)
|
|
171
|
-
result = prov.transcribe(
|
|
172
|
-
to_send_path,
|
|
173
|
-
user_prompt=final_user_prompt,
|
|
174
|
-
model=model,
|
|
175
|
-
language=language,
|
|
176
|
-
transcribe_mode=self.transcribe_mode,
|
|
177
|
-
)
|
|
178
|
-
text = result["text"]
|
|
179
|
-
raw = result["raw"]
|
|
180
|
-
|
|
181
|
-
# Save if keeping transcripts
|
|
182
|
-
if keep_transcript:
|
|
183
|
-
self.cfg.transcripts_dir.mkdir(parents=True, exist_ok=True)
|
|
184
|
-
txt_path, json_path = save_transcript(
|
|
185
|
-
self.cfg.transcripts_dir, base, provider, text, raw
|
|
186
|
-
)
|
|
187
|
-
paths["txt"] = txt_path
|
|
188
|
-
paths["json"] = json_path
|
|
189
|
-
else:
|
|
190
|
-
paths["txt"] = None
|
|
191
|
-
paths["json"] = None
|
|
192
|
-
|
|
193
|
-
# Copy to clipboard
|
|
194
|
-
if copy_to_clip:
|
|
195
|
-
try:
|
|
196
|
-
copy_to_clipboard(text)
|
|
197
|
-
logging.info("Copied transcription to clipboard")
|
|
198
|
-
except Exception as e:
|
|
199
|
-
logging.warning("Failed to copy to clipboard: %s", e)
|
|
200
|
-
|
|
201
|
-
logging.info("Pipeline finished (%.2fs)", duration)
|
|
202
|
-
return {
|
|
203
|
-
"text": text,
|
|
204
|
-
"raw": raw,
|
|
205
|
-
"duration": duration,
|
|
206
|
-
"paths": paths,
|
|
207
|
-
}
|
|
208
|
-
|
|
209
|
-
# For keep_audio=True: continue outside tempdir
|
|
210
|
-
# Convert if needed
|
|
211
|
-
if audio_format in {"mp3", "opus"}:
|
|
212
|
-
self._status("Converting...")
|
|
213
|
-
to_send_path = convert_audio(wav_path, audio_format)
|
|
214
|
-
logging.info("Converted %s -> %s", wav_path, to_send_path)
|
|
215
|
-
paths["converted"] = to_send_path
|
|
216
|
-
|
|
217
|
-
# Transcribe
|
|
218
|
-
self._status("Transcribing...")
|
|
219
|
-
prov = get_provider(provider, cfg=self.cfg)
|
|
220
|
-
result = prov.transcribe(
|
|
221
|
-
to_send_path,
|
|
222
|
-
user_prompt=final_user_prompt,
|
|
223
|
-
model=model,
|
|
224
|
-
language=language,
|
|
225
|
-
transcribe_mode=self.transcribe_mode,
|
|
226
|
-
)
|
|
227
|
-
text = result["text"]
|
|
228
|
-
raw = result["raw"]
|
|
229
|
-
|
|
230
|
-
# Save if keeping transcripts
|
|
231
|
-
if keep_transcript:
|
|
232
|
-
self.cfg.transcripts_dir.mkdir(parents=True, exist_ok=True)
|
|
233
|
-
txt_path, json_path = save_transcript(
|
|
234
|
-
self.cfg.transcripts_dir, base, provider, text, raw
|
|
235
|
-
)
|
|
236
|
-
paths["txt"] = txt_path
|
|
237
|
-
paths["json"] = json_path
|
|
238
|
-
else:
|
|
239
|
-
paths["txt"] = None
|
|
240
|
-
paths["json"] = None
|
|
241
|
-
|
|
242
|
-
# Copy to clipboard
|
|
243
|
-
if copy_to_clip:
|
|
244
|
-
try:
|
|
245
|
-
copy_to_clipboard(text)
|
|
246
|
-
logging.info("Copied transcription to clipboard")
|
|
247
|
-
except Exception as e:
|
|
248
|
-
logging.warning("Failed to copy to clipboard: %s", e)
|
|
249
|
-
|
|
250
|
-
logging.info("Pipeline finished (%.2fs)", duration)
|
|
251
|
-
return {
|
|
252
|
-
"text": text,
|
|
253
|
-
"raw": raw,
|
|
254
|
-
"duration": duration,
|
|
255
|
-
"paths": paths,
|
|
256
|
-
}
|
|
257
|
-
|
|
258
|
-
except Exception:
|
|
259
|
-
logging.exception("Pipeline failed")
|
|
260
|
-
raise
|
|
285
|
+
logging.info("Pipeline finished (%.2fs)", duration)
|
|
286
|
+
return result
|
svx/ui/qt_app.py
CHANGED
|
@@ -19,12 +19,14 @@ from __future__ import annotations
|
|
|
19
19
|
|
|
20
20
|
import logging
|
|
21
21
|
import threading
|
|
22
|
+
import time
|
|
22
23
|
from pathlib import Path
|
|
23
24
|
|
|
24
25
|
from PySide6.QtCore import QObject, QPoint, Qt, QTimer, Signal
|
|
25
26
|
from PySide6.QtGui import QAction, QFont, QFontDatabase, QKeySequence
|
|
26
27
|
from PySide6.QtWidgets import (
|
|
27
28
|
QApplication,
|
|
29
|
+
QHBoxLayout,
|
|
28
30
|
QLabel,
|
|
29
31
|
QMessageBox,
|
|
30
32
|
QPushButton,
|
|
@@ -63,20 +65,32 @@ QLabel#info_label {
|
|
|
63
65
|
|
|
64
66
|
/* Stop button */
|
|
65
67
|
QPushButton {
|
|
66
|
-
background-color: #
|
|
68
|
+
background-color: #1e40af;
|
|
67
69
|
color: #ffffff;
|
|
68
70
|
border: none;
|
|
69
|
-
border-radius:
|
|
70
|
-
padding: 8px
|
|
71
|
+
border-radius: 2px;
|
|
72
|
+
padding: 4px 8px;
|
|
71
73
|
margin: 6px;
|
|
72
|
-
min-width:
|
|
74
|
+
min-width: 60px;
|
|
73
75
|
}
|
|
74
76
|
QPushButton:disabled {
|
|
75
|
-
background-color: #
|
|
76
|
-
color: #
|
|
77
|
+
background-color: #374151;
|
|
78
|
+
color: #9ca3af;
|
|
77
79
|
}
|
|
78
80
|
QPushButton:hover {
|
|
79
|
-
background-color: #
|
|
81
|
+
background-color: #1d4ed8;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
/* Cancel button */
|
|
85
|
+
QPushButton#cancel_btn {
|
|
86
|
+
background-color: #b91c1c;
|
|
87
|
+
}
|
|
88
|
+
QPushButton#cancel_btn:hover {
|
|
89
|
+
background-color: #ef4444;
|
|
90
|
+
}
|
|
91
|
+
QPushButton#cancel_btn:disabled {
|
|
92
|
+
background-color: #4b5563;
|
|
93
|
+
color: #9ca3af;
|
|
80
94
|
}
|
|
81
95
|
|
|
82
96
|
/* Small window border effect (subtle) */
|
|
@@ -189,12 +203,12 @@ class RecorderWorker(QObject):
|
|
|
189
203
|
status (str): human-readable status updates for the UI.
|
|
190
204
|
done (str): emitted with the final transcription text on success.
|
|
191
205
|
error (str): emitted with an error message on failure.
|
|
192
|
-
Supports transcribe_mode for pure transcription without prompt.
|
|
193
206
|
"""
|
|
194
207
|
|
|
195
208
|
status = Signal(str)
|
|
196
209
|
done = Signal(str)
|
|
197
210
|
error = Signal(str)
|
|
211
|
+
canceled = Signal()
|
|
198
212
|
|
|
199
213
|
def __init__(
|
|
200
214
|
self,
|
|
@@ -203,7 +217,6 @@ class RecorderWorker(QObject):
|
|
|
203
217
|
user_prompt_file: Path | None = None,
|
|
204
218
|
save_all: bool = False,
|
|
205
219
|
outfile_prefix: str | None = None,
|
|
206
|
-
transcribe_mode: bool = False,
|
|
207
220
|
) -> None:
|
|
208
221
|
super().__init__()
|
|
209
222
|
self.cfg = cfg
|
|
@@ -211,13 +224,21 @@ class RecorderWorker(QObject):
|
|
|
211
224
|
self.user_prompt_file = user_prompt_file
|
|
212
225
|
self.save_all = save_all
|
|
213
226
|
self.outfile_prefix = outfile_prefix
|
|
214
|
-
self.
|
|
227
|
+
self.mode: str | None = None
|
|
228
|
+
self.cancel_requested: bool = False
|
|
215
229
|
self._stop_event = threading.Event()
|
|
216
230
|
|
|
231
|
+
def set_mode(self, mode: str) -> None:
|
|
232
|
+
self.mode = mode
|
|
233
|
+
|
|
217
234
|
def stop(self) -> None:
|
|
218
235
|
"""Request the recording to stop."""
|
|
219
236
|
self._stop_event.set()
|
|
220
237
|
|
|
238
|
+
def cancel(self) -> None:
|
|
239
|
+
self.cancel_requested = True
|
|
240
|
+
self._stop_event.set()
|
|
241
|
+
|
|
221
242
|
def _resolve_user_prompt(self) -> str:
|
|
222
243
|
"""
|
|
223
244
|
Determine the final user prompt using the shared resolver.
|
|
@@ -227,14 +248,12 @@ class RecorderWorker(QObject):
|
|
|
227
248
|
def run(self) -> None:
|
|
228
249
|
"""
|
|
229
250
|
Execute the pipeline:
|
|
230
|
-
-
|
|
231
|
-
-
|
|
232
|
-
-
|
|
233
|
-
-
|
|
234
|
-
- copy_to_clipboard
|
|
235
|
-
- optionally delete audio files
|
|
236
|
-
Supports transcribe_mode for pure transcription without prompt.
|
|
251
|
+
- record (until stop)
|
|
252
|
+
- wait for mode
|
|
253
|
+
- process
|
|
254
|
+
- clean
|
|
237
255
|
"""
|
|
256
|
+
|
|
238
257
|
try:
|
|
239
258
|
pipeline = RecordingPipeline(
|
|
240
259
|
cfg=self.cfg,
|
|
@@ -242,10 +261,24 @@ class RecorderWorker(QObject):
|
|
|
242
261
|
user_prompt_file=self.user_prompt_file,
|
|
243
262
|
save_all=self.save_all,
|
|
244
263
|
outfile_prefix=self.outfile_prefix,
|
|
245
|
-
transcribe_mode=self.transcribe_mode,
|
|
246
264
|
progress_callback=self.status.emit,
|
|
247
265
|
)
|
|
248
|
-
|
|
266
|
+
self.status.emit("Recording in progress...")
|
|
267
|
+
wav_path, duration = pipeline.record(self._stop_event)
|
|
268
|
+
self.status.emit("Recording finished.")
|
|
269
|
+
if self.cancel_requested:
|
|
270
|
+
keep_audio = self.save_all or self.cfg.defaults.keep_audio_files
|
|
271
|
+
pipeline.clean(wav_path, {"wav": wav_path}, keep_audio)
|
|
272
|
+
self.canceled.emit()
|
|
273
|
+
return
|
|
274
|
+
self.status.emit("Processing in progress...")
|
|
275
|
+
while self.mode is None:
|
|
276
|
+
time.sleep(0.05)
|
|
277
|
+
transcribe_mode = self.mode == "transcribe"
|
|
278
|
+
user_prompt = None if transcribe_mode else self._resolve_user_prompt()
|
|
279
|
+
result = pipeline.process(wav_path, duration, transcribe_mode, user_prompt)
|
|
280
|
+
keep_audio = self.save_all or self.cfg.defaults.keep_audio_files
|
|
281
|
+
pipeline.clean(wav_path, result["paths"], keep_audio)
|
|
249
282
|
self.done.emit(result["text"])
|
|
250
283
|
except Exception as e:
|
|
251
284
|
logging.exception("Pipeline failed")
|
|
@@ -254,13 +287,12 @@ class RecorderWorker(QObject):
|
|
|
254
287
|
|
|
255
288
|
class RecorderWindow(QWidget):
|
|
256
289
|
"""
|
|
257
|
-
Frameless always-on-top window with
|
|
290
|
+
Frameless always-on-top window with Transcribe and Prompt buttons.
|
|
258
291
|
|
|
259
292
|
Launching this window will immediately start the recording in a background thread.
|
|
260
293
|
|
|
261
294
|
Window can be dragged by clicking anywhere on the widget background.
|
|
262
|
-
Pressing Esc triggers
|
|
263
|
-
Supports transcribe_mode for pure transcription without prompt.
|
|
295
|
+
Pressing Esc triggers Prompt mode.
|
|
264
296
|
"""
|
|
265
297
|
|
|
266
298
|
def __init__(
|
|
@@ -270,7 +302,6 @@ class RecorderWindow(QWidget):
|
|
|
270
302
|
user_prompt_file: Path | None = None,
|
|
271
303
|
save_all: bool = False,
|
|
272
304
|
outfile_prefix: str | None = None,
|
|
273
|
-
transcribe_mode: bool = False,
|
|
274
305
|
) -> None:
|
|
275
306
|
super().__init__()
|
|
276
307
|
|
|
@@ -279,7 +310,16 @@ class RecorderWindow(QWidget):
|
|
|
279
310
|
self.user_prompt_file = user_prompt_file
|
|
280
311
|
self.save_all = save_all
|
|
281
312
|
self.outfile_prefix = outfile_prefix
|
|
282
|
-
|
|
313
|
+
|
|
314
|
+
# Background worker (create early for signal connections)
|
|
315
|
+
self._worker = RecorderWorker(
|
|
316
|
+
cfg=self.cfg,
|
|
317
|
+
user_prompt=user_prompt,
|
|
318
|
+
user_prompt_file=user_prompt_file,
|
|
319
|
+
save_all=save_all,
|
|
320
|
+
outfile_prefix=outfile_prefix,
|
|
321
|
+
)
|
|
322
|
+
self._thread = threading.Thread(target=self._worker.run, daemon=True)
|
|
283
323
|
|
|
284
324
|
# Environment and prompt files
|
|
285
325
|
|
|
@@ -313,14 +353,9 @@ class RecorderWindow(QWidget):
|
|
|
313
353
|
"</span>"
|
|
314
354
|
)
|
|
315
355
|
format_html = f"<span style='color:#ffa657'>{self.cfg.defaults.format}</span>"
|
|
316
|
-
if self.transcribe_mode:
|
|
317
|
-
mode_html = "<span style='color:#ff7b72'>Transcribe</span>"
|
|
318
|
-
else:
|
|
319
|
-
mode_html = "<span style='color:#7ee787'>Completion</span>"
|
|
320
356
|
parts = [
|
|
321
357
|
prov_model_html,
|
|
322
358
|
format_html,
|
|
323
|
-
mode_html,
|
|
324
359
|
]
|
|
325
360
|
if self.cfg.defaults.language:
|
|
326
361
|
lang_html = f"<span style='color:#c9b4ff'>{self.cfg.defaults.language}</span>"
|
|
@@ -337,34 +372,42 @@ class RecorderWindow(QWidget):
|
|
|
337
372
|
self._info_label.setAlignment(Qt.AlignmentFlag.AlignCenter)
|
|
338
373
|
layout.addWidget(self._info_label)
|
|
339
374
|
|
|
340
|
-
self._status_label = QLabel("Recording
|
|
375
|
+
self._status_label = QLabel("Recording in progress...")
|
|
341
376
|
self._status_label.setAlignment(Qt.AlignmentFlag.AlignCenter)
|
|
342
377
|
layout.addWidget(self._status_label)
|
|
343
378
|
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
379
|
+
# Buttons layout
|
|
380
|
+
button_layout = QHBoxLayout()
|
|
381
|
+
button_layout.addStretch()
|
|
382
|
+
self._transcribe_btn = QPushButton("Transcribe")
|
|
383
|
+
self._transcribe_btn.setToolTip("Stop and transcribe without prompt")
|
|
384
|
+
self._transcribe_btn.clicked.connect(lambda: self._on_button_clicked("transcribe"))
|
|
385
|
+
button_layout.addWidget(self._transcribe_btn)
|
|
386
|
+
self._prompt_btn = QPushButton("Prompt")
|
|
387
|
+
self._prompt_btn.setToolTip("Stop and transcribe with prompt")
|
|
388
|
+
self._prompt_btn.clicked.connect(lambda: self._on_button_clicked("prompt"))
|
|
389
|
+
button_layout.addWidget(self._prompt_btn)
|
|
390
|
+
self._cancel_btn = QPushButton("Cancel")
|
|
391
|
+
self._cancel_btn.setObjectName("cancel_btn")
|
|
392
|
+
self._cancel_btn.setToolTip("Stop recording and quit without processing")
|
|
393
|
+
self._cancel_btn.clicked.connect(self._on_cancel_clicked)
|
|
394
|
+
button_layout.addWidget(self._cancel_btn)
|
|
395
|
+
button_layout.addStretch()
|
|
396
|
+
button_widget = QWidget()
|
|
397
|
+
button_widget.setLayout(button_layout)
|
|
398
|
+
layout.addWidget(button_widget, 0, Qt.AlignmentFlag.AlignCenter)
|
|
347
399
|
|
|
348
400
|
# Keyboard shortcut: Esc to stop
|
|
349
401
|
stop_action = QAction(self)
|
|
350
402
|
stop_action.setShortcut(QKeySequence.StandardKey.Cancel) # Esc
|
|
351
|
-
stop_action.triggered.connect(self.
|
|
403
|
+
stop_action.triggered.connect(lambda: self._worker.cancel())
|
|
352
404
|
self.addAction(stop_action)
|
|
353
405
|
|
|
354
|
-
# Background worker
|
|
355
|
-
self._worker = RecorderWorker(
|
|
356
|
-
cfg=self.cfg,
|
|
357
|
-
user_prompt=user_prompt,
|
|
358
|
-
user_prompt_file=user_prompt_file,
|
|
359
|
-
save_all=save_all,
|
|
360
|
-
outfile_prefix=outfile_prefix,
|
|
361
|
-
)
|
|
362
|
-
self._thread = threading.Thread(target=self._worker.run, daemon=True)
|
|
363
|
-
|
|
364
406
|
# Signals wiring
|
|
365
407
|
self._worker.status.connect(self._on_status)
|
|
366
408
|
self._worker.done.connect(self._on_done)
|
|
367
409
|
self._worker.error.connect(self._on_error)
|
|
410
|
+
self._worker.canceled.connect(self._close_soon)
|
|
368
411
|
|
|
369
412
|
# Apply stylesheet to the application for consistent appearance
|
|
370
413
|
app = QApplication.instance()
|
|
@@ -410,14 +453,24 @@ class RecorderWindow(QWidget):
|
|
|
410
453
|
|
|
411
454
|
def closeEvent(self, event) -> None: # type: ignore[override]
|
|
412
455
|
# Attempt to stop recording if the user closes the window via window controls.
|
|
413
|
-
self._worker.
|
|
456
|
+
self._worker.cancel()
|
|
414
457
|
super().closeEvent(event)
|
|
415
458
|
|
|
416
|
-
def
|
|
417
|
-
self.
|
|
418
|
-
self.
|
|
459
|
+
def _on_button_clicked(self, mode: str) -> None:
|
|
460
|
+
self._transcribe_btn.setEnabled(False)
|
|
461
|
+
self._prompt_btn.setEnabled(False)
|
|
462
|
+
self._cancel_btn.setEnabled(False)
|
|
463
|
+
self._status_label.setText("Stopping and processing...")
|
|
464
|
+
self._worker.set_mode(mode)
|
|
419
465
|
self._worker.stop()
|
|
420
466
|
|
|
467
|
+
def _on_cancel_clicked(self) -> None:
|
|
468
|
+
self._transcribe_btn.setEnabled(False)
|
|
469
|
+
self._prompt_btn.setEnabled(False)
|
|
470
|
+
self._cancel_btn.setEnabled(False)
|
|
471
|
+
self._status_label.setText("Canceling...")
|
|
472
|
+
self._worker.cancel()
|
|
473
|
+
|
|
421
474
|
# --- Drag handling for frameless window ---
|
|
422
475
|
def mousePressEvent(self, event) -> None: # type: ignore[override]
|
|
423
476
|
if event.button() == Qt.MouseButton.LeftButton:
|
|
@@ -447,7 +500,7 @@ class RecorderWindow(QWidget):
|
|
|
447
500
|
def keyPressEvent(self, event) -> None: # type: ignore[override]
|
|
448
501
|
# Qt.Key_Escape is a safety stop
|
|
449
502
|
if event.key() == Qt.Key.Key_Escape:
|
|
450
|
-
self.
|
|
503
|
+
self._worker.cancel()
|
|
451
504
|
else:
|
|
452
505
|
super().keyPressEvent(event)
|
|
453
506
|
|
|
@@ -458,14 +511,12 @@ def run_gui(
|
|
|
458
511
|
user_prompt_file: Path | None = None,
|
|
459
512
|
save_all: bool = False,
|
|
460
513
|
outfile_prefix: str | None = None,
|
|
461
|
-
transcribe_mode: bool = False,
|
|
462
514
|
log_level: str = "INFO",
|
|
463
515
|
) -> None:
|
|
464
516
|
if cfg is None:
|
|
465
517
|
cfg = Config.load(log_level=log_level)
|
|
466
518
|
"""
|
|
467
519
|
Launch the PySide6 app with the minimal recorder window.
|
|
468
|
-
Supports transcribe_mode for pure transcription without prompt.
|
|
469
520
|
"""
|
|
470
521
|
config.setup_environment(log_level=log_level)
|
|
471
522
|
|
|
@@ -485,7 +536,6 @@ def run_gui(
|
|
|
485
536
|
user_prompt_file=user_prompt_file,
|
|
486
537
|
save_all=save_all,
|
|
487
538
|
outfile_prefix=outfile_prefix,
|
|
488
|
-
transcribe_mode=transcribe_mode,
|
|
489
539
|
)
|
|
490
540
|
window.show()
|
|
491
541
|
app.exec()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|