voxing 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
voxing-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,42 @@
1
+ Metadata-Version: 2.4
2
+ Name: voxing
3
+ Version: 0.1.0
4
+ Summary: Voice assistant TUI powered by local MLX models
5
+ Author: yeungadrian
6
+ License-Expression: MIT
7
+ Classifier: Development Status :: 3 - Alpha
8
+ Classifier: Environment :: Console
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Programming Language :: Python :: 3.13
11
+ Classifier: Operating System :: MacOS
12
+ Requires-Dist: langdetect>=1.0.9
13
+ Requires-Dist: mlx-audio[tts]==0.3.1
14
+ Requires-Dist: mlx-lm>=0.30.5
15
+ Requires-Dist: pip>=26.0.1
16
+ Requires-Dist: pydantic-settings>=2.12.0
17
+ Requires-Dist: rich>=14.3.1
18
+ Requires-Dist: sounddevice>=0.5.3
19
+ Requires-Dist: textual>=7.5.0
20
+ Requires-Dist: typer>=0.23.1
21
+ Requires-Python: ==3.13.*
22
+ Project-URL: Homepage, https://github.com/yeungadrian/Voxing
23
+ Project-URL: Repository, https://github.com/yeungadrian/Voxing
24
+ Description-Content-Type: text/markdown
25
+
26
+ # Voxing
27
+ TUI for local voice / text assistant on macos
28
+
29
+ ## Testing
30
+
31
+ Run the test suite:
32
+ ```bash
33
+ uv run pytest
34
+ ```
35
+
36
+ Update visual snapshots after UI changes:
37
+ ```bash
38
+ uv run pytest --snapshot-update
39
+ ```
40
+
41
+ TODO:
42
+ - Ability to run via `uvx voxing`?
voxing-0.1.0/README.md ADDED
@@ -0,0 +1,17 @@
1
+ # Voxing
2
+ TUI for local voice / text assistant on macos
3
+
4
+ ## Testing
5
+
6
+ Run the test suite:
7
+ ```bash
8
+ uv run pytest
9
+ ```
10
+
11
+ Update visual snapshots after UI changes:
12
+ ```bash
13
+ uv run pytest --snapshot-update
14
+ ```
15
+
16
+ TODO:
17
+ - Ability to run via `uvx voxing`?
@@ -0,0 +1,66 @@
1
+ [project]
2
+ name = "voxing"
3
+ version = "0.1.0"
4
+ description = "Voice assistant TUI powered by local MLX models"
5
+ readme = "README.md"
6
+ requires-python = "==3.13.*"
7
+ authors = [{ name = "yeungadrian" }]
8
+ license = "MIT"
9
+ classifiers = [
10
+ "Development Status :: 3 - Alpha",
11
+ "Environment :: Console",
12
+ "License :: OSI Approved :: MIT License",
13
+ "Programming Language :: Python :: 3.13",
14
+ "Operating System :: MacOS",
15
+ ]
16
+ dependencies = [
17
+ "langdetect>=1.0.9",
18
+ "mlx-audio[tts]==0.3.1",
19
+ "mlx-lm>=0.30.5",
20
+ "pip>=26.0.1",
21
+ "pydantic-settings>=2.12.0",
22
+ "rich>=14.3.1",
23
+ "sounddevice>=0.5.3",
24
+ "textual>=7.5.0",
25
+ "typer>=0.23.1",
26
+ ]
27
+
28
+ [project.urls]
29
+ Homepage = "https://github.com/yeungadrian/Voxing"
30
+ Repository = "https://github.com/yeungadrian/Voxing"
31
+
32
+ [project.scripts]
33
+ voxing = "voxing.__main__:main"
34
+
35
+ [build-system]
36
+ requires = ["uv_build>=0.10.2,<0.11.0"]
37
+ build-backend = "uv_build"
38
+
39
+ [dependency-groups]
40
+ dev = [
41
+ "pre-commit>=4.5.1",
42
+ "pytest>=9.0.2",
43
+ "pytest-asyncio>=1.3.0",
44
+ "pytest-textual-snapshot>=1.0.0",
45
+ ]
46
+
47
+ [tool.ruff]
48
+ target-version = "py313"
49
+
50
+ [tool.ruff.lint]
51
+ select = [
52
+ "F", # Pyflakes
53
+ "E", # pycodestyle errors
54
+ "W", # pycodestyle warnings
55
+ "I", # isort
56
+ "UP", # pyupgrade
57
+ "B", # flake8-bugbear
58
+ "SIM", # flake8-simplify
59
+ "C4", # flake8-comprehensions
60
+ "RET", # flake8-return
61
+ "PTH", # flake8-use-pathlib
62
+ ]
63
+
64
+ [tool.pytest.ini_options]
65
+ asyncio_mode = "auto"
66
+ testpaths = ["tests"]
@@ -0,0 +1 @@
1
+ """Voxing TUI - A Textual-based Terminal User Interface for Voxing Voice Assistant."""
@@ -0,0 +1,13 @@
1
+ """Entry point for Voxinging TUI application."""
2
+
3
+ from voxing.app import VoxingApp
4
+
5
+
6
+ def main() -> None:
7
+ """Run the Voxing TUI application."""
8
+ app = VoxingApp()
9
+ app.run()
10
+
11
+
12
+ if __name__ == "__main__":
13
+ main()
@@ -0,0 +1,492 @@
1
+ """Main Voxing TUI application."""
2
+
3
+ import asyncio
4
+ import contextlib
5
+ import time
6
+ from os.path import commonprefix
7
+
8
+ from rich.text import Text
9
+ from textual import on
10
+ from textual.app import App, ComposeResult
11
+ from textual.binding import Binding
12
+ from textual.containers import Container, Horizontal, Vertical
13
+ from textual.css.query import NoMatches
14
+ from textual.events import Key
15
+ from textual.reactive import reactive
16
+ from textual.timer import Timer
17
+ from textual.widgets import Footer, Label
18
+ from textual.worker import Worker
19
+
20
+ from voxing import audio as audio_mod
21
+ from voxing.config import settings
22
+ from voxing.models import Models, load_llm, load_stt, load_tts
23
+ from voxing.models import llm as llm_mod
24
+ from voxing.models import stt as stt_mod
25
+ from voxing.models import tts as tts_mod
26
+ from voxing.models.llm import ChatMessage
27
+ from voxing.state import AppState, InteractionStats
28
+ from voxing.themes import FOREGROUND, PALETTE_1, TOKYO_NIGHT
29
+ from voxing.widgets import (
30
+ ChatInput,
31
+ ConversationLog,
32
+ MetricsPanel,
33
+ ModelSelection,
34
+ ModelSelector,
35
+ StatusPanel,
36
+ )
37
+
38
+ COMMAND_DESCRIPTIONS: dict[str, str] = {
39
+ "/record": "Record and process voice",
40
+ "/transcribe": "Transcribe audio to text",
41
+ "/tts": "Toggle text-to-speech",
42
+ "/model": "Switch models",
43
+ "/clear": "Clear conversation",
44
+ "/exit": "Exit application",
45
+ }
46
+ COMMANDS = list(COMMAND_DESCRIPTIONS)
47
+
48
+
49
+ class VoxingApp(App):
50
+ """Voxing Voice Assistant TUI Application."""
51
+
52
+ CSS_PATH = "styles.tcss"
53
+ TITLE = "Voxing Voice Assistant"
54
+
55
+ BINDINGS = [
56
+ Binding("q", "quit", "Quit", priority=True),
57
+ Binding("ctrl+c", "clear_conversation", "Clear"),
58
+ ]
59
+
60
+ state: reactive[AppState] = reactive(AppState.LOADING)
61
+
62
+ def __init__(self) -> None:
63
+ """Initialize the Voxing TUI app."""
64
+ super().__init__()
65
+ self.models: Models = None
66
+ self.is_processing = False
67
+ self.tts_enabled = False
68
+ self.chat_history: list[ChatMessage] = []
69
+ self._active_worker: Worker[None] | None = None
70
+ self._esc_pending: bool = False
71
+ self._esc_timer: Timer | None = None
72
+
73
+ def compose(self) -> ComposeResult:
74
+ """Compose the app layout."""
75
+ with Horizontal(id="status-bar"):
76
+ yield StatusPanel(id="status-panel")
77
+ yield MetricsPanel(id="metrics-panel")
78
+
79
+ yield ConversationLog(id="conversation-log", wrap=True)
80
+
81
+ with Vertical(id="bottom-section"), Container(id="input-container"):
82
+ yield Label(id="command-hint", classes="hidden")
83
+ yield ChatInput(id="user-input")
84
+
85
+ yield Footer()
86
+
87
+ def on_mount(self) -> None:
88
+ """Called when app is mounted."""
89
+ self.design = TOKYO_NIGHT
90
+
91
+ text_area = self.query_one("#user-input", ChatInput)
92
+ text_area.focus()
93
+ text_area.show_line_numbers = False
94
+ text_area.disabled = True
95
+
96
+ status_panel = self.query_one("#status-panel", StatusPanel)
97
+ status_panel.current_state = self.state
98
+ status_panel.tts_enabled = self.tts_enabled
99
+
100
+ self.run_worker(self._load_models())
101
+
102
+ async def _load_models(self) -> None:
103
+ """Load all models in the background."""
104
+ loop = asyncio.get_running_loop()
105
+ status_panel = self.query_one("#status-panel", StatusPanel)
106
+
107
+ status_panel.show_status_message("Loading STT...")
108
+ stt = await loop.run_in_executor(None, load_stt)
109
+
110
+ status_panel.show_status_message("Loading LLM...")
111
+ llm, tokenizer = await loop.run_in_executor(None, load_llm)
112
+
113
+ status_panel.show_status_message("Loading TTS...")
114
+ tts = await loop.run_in_executor(None, load_tts)
115
+
116
+ status_panel.clear_status_message()
117
+
118
+ self.models = Models(stt=stt, llm=llm, tts=tts, tokenizer=tokenizer)
119
+
120
+ text_area = self.query_one("#user-input", ChatInput)
121
+ text_area.disabled = False
122
+ text_area.focus()
123
+ self.state = AppState.READY
124
+
125
+ def watch_state(self, new_state: AppState) -> None:
126
+ """Called when state changes."""
127
+ with contextlib.suppress(NoMatches):
128
+ status_panel = self.query_one("#status-panel", StatusPanel)
129
+ status_panel.current_state = new_state
130
+
131
+ def on_key(self, event: Key) -> None:
132
+ """Handle key events."""
133
+ if event.key == "escape":
134
+ if not self.is_processing:
135
+ return
136
+ event.prevent_default()
137
+ event.stop()
138
+ if self._esc_pending:
139
+ self._cancel_processing()
140
+ else:
141
+ self._esc_pending = True
142
+ self._show_status("Press ESC again to cancel", timeout=2.0)
143
+ self._esc_timer = self.set_timer(2.0, self._reset_esc_pending)
144
+ return
145
+
146
+ if event.key != "tab":
147
+ return
148
+
149
+ text_area = self.query_one("#user-input", ChatInput)
150
+ text = text_area.text.strip()
151
+
152
+ if not text.startswith("/"):
153
+ return
154
+
155
+ matches = [cmd for cmd in COMMANDS if cmd.startswith(text.lower())]
156
+ if not matches:
157
+ return
158
+
159
+ event.prevent_default()
160
+ event.stop()
161
+
162
+ replacement = matches[0] if len(matches) == 1 else commonprefix(matches)
163
+
164
+ text_area.clear()
165
+ text_area.insert(replacement)
166
+
167
+ @on(ChatInput.Changed)
168
+ def on_chat_input_changed(self, event: ChatInput.Changed) -> None:
169
+ """Update command hints when text changes."""
170
+ self._update_command_hints(event.text_area.text)
171
+
172
+ @on(ChatInput.Submitted)
173
+ def on_chat_input_submitted(self, event: ChatInput.Submitted) -> None:
174
+ """Handle input submission."""
175
+ if self.is_processing:
176
+ return
177
+ user_input = event.value.strip()
178
+ if user_input:
179
+ event.chat_input.clear()
180
+ self._hide_command_hints()
181
+ self._active_worker = self.run_worker(self._process_input(user_input))
182
+
183
+ def _update_command_hints(self, text: str) -> None:
184
+ """Update command hints based on current input."""
185
+ hint_label = self.query_one("#command-hint", Label)
186
+ typed = text.strip().lower()
187
+
188
+ if typed.startswith("/"):
189
+ matches = [cmd for cmd in COMMANDS if cmd.startswith(typed)]
190
+
191
+ if matches:
192
+ hint_text = Text()
193
+ for i, cmd in enumerate(matches):
194
+ if i > 0:
195
+ hint_text.append("\n")
196
+ hint_text.append(cmd, style=f"bold {FOREGROUND}")
197
+ hint_text.append(f" {COMMAND_DESCRIPTIONS[cmd]}", style=FOREGROUND)
198
+ hint_label.update(hint_text)
199
+ hint_label.remove_class("hidden")
200
+ else:
201
+ self._hide_command_hints()
202
+ else:
203
+ self._hide_command_hints()
204
+
205
+ def _hide_command_hints(self) -> None:
206
+ """Hide command hints."""
207
+ with contextlib.suppress(NoMatches):
208
+ self.query_one("#command-hint", Label).add_class("hidden")
209
+
210
+ async def _process_input(self, text: str) -> None:
211
+ """Route user input to the appropriate handler."""
212
+ if self.state == AppState.LOADING:
213
+ return
214
+ self.is_processing = True
215
+ conv_log = self.query_one("#conversation-log", ConversationLog)
216
+
217
+ try:
218
+ if text.startswith("/"):
219
+ command = text.lower().strip()
220
+ if command == "/record":
221
+ await self._run_record_pipeline()
222
+ elif command == "/transcribe":
223
+ await self._run_transcribe()
224
+ elif command == "/tts":
225
+ self._toggle_tts()
226
+ elif command == "/model":
227
+ self._open_model_selector()
228
+ elif command == "/clear":
229
+ self.action_clear_conversation()
230
+ elif command == "/exit":
231
+ self.exit()
232
+ else:
233
+ conv_log.add_system_message(
234
+ f"Unknown command: {command}. Available: {', '.join(COMMANDS)}",
235
+ style=f"bold {PALETTE_1}",
236
+ )
237
+ else:
238
+ conv_log.add_user_message(text)
239
+ await self._run_llm_pipeline(text)
240
+ except Exception as e:
241
+ conv_log.add_error(str(e))
242
+ self.state = AppState.READY
243
+ finally:
244
+ self._active_worker = None
245
+ self.is_processing = False
246
+
247
+ async def _run_llm_pipeline(
248
+ self, text: str, transcribe_time: float | None = None
249
+ ) -> None:
250
+ """Run LLM generation on text input."""
251
+ conv_log = self.query_one("#conversation-log", ConversationLog)
252
+ metrics_panel = self.query_one("#metrics-panel", MetricsPanel)
253
+
254
+ self.state = AppState.THINKING
255
+
256
+ llm_start = time.time()
257
+ token_count = 0
258
+ full_response = ""
259
+ phrase_buffer = ""
260
+ phrases: list[str] = []
261
+
262
+ conv_log.start_streaming_response()
263
+
264
+ async for token in llm_mod.generate_streaming(
265
+ self.models.llm, self.models.tokenizer, text, history=self.chat_history
266
+ ):
267
+ full_response += token
268
+ conv_log.update_streaming_response(token)
269
+ token_count += 1
270
+
271
+ if self.tts_enabled:
272
+ phrase_buffer += token
273
+ if "\n" in phrase_buffer:
274
+ phrase, phrase_buffer = phrase_buffer.rsplit("\n", 1)
275
+ phrases.append(phrase)
276
+
277
+ if self.tts_enabled and phrase_buffer.strip():
278
+ phrases.append(phrase_buffer)
279
+
280
+ conv_log.finish_streaming_response()
281
+
282
+ self.chat_history.append({"role": "user", "content": text})
283
+ self.chat_history.append({"role": "assistant", "content": full_response})
284
+
285
+ total_tokens = llm_mod.count_conversation_tokens(
286
+ self.models.tokenizer, self.chat_history
287
+ )
288
+
289
+ llm_time = time.time() - llm_start
290
+ tts_time: float | None = None
291
+
292
+ if self.tts_enabled and phrases:
293
+ self.state = AppState.SYNTHESIZING
294
+ loop = asyncio.get_running_loop()
295
+
296
+ def on_play() -> None:
297
+ loop.call_soon_threadsafe(setattr, self, "state", AppState.SPEAKING)
298
+
299
+ tts_time = await tts_mod.speak_phrases(
300
+ self.models.tts, phrases, on_first_chunk=on_play
301
+ )
302
+
303
+ stats = InteractionStats(
304
+ transcribe_time=transcribe_time,
305
+ llm_time=llm_time,
306
+ tts_time=tts_time,
307
+ tokens=token_count,
308
+ total_conversation_tokens=total_tokens,
309
+ max_tokens=settings.llm_max_tokens,
310
+ )
311
+ metrics_panel.update_metrics(stats)
312
+ self.state = AppState.READY
313
+
314
+ def _show_status(self, message: str, timeout: float = 3.0) -> None:
315
+ """Show an ephemeral message in the status bar."""
316
+ status_panel = self.query_one("#status-panel", StatusPanel)
317
+ status_panel.show_ephemeral_message(message, timeout)
318
+
319
+ def _show_sticky_status(self, message: str) -> None:
320
+ """Show a status message that persists until explicitly cleared."""
321
+ status_panel = self.query_one("#status-panel", StatusPanel)
322
+ status_panel.show_status_message(message)
323
+
324
+ def _reset_esc_pending(self) -> None:
325
+ """Reset the ESC pending state."""
326
+ self._esc_pending = False
327
+ self._esc_timer = None
328
+
329
+ def _cancel_processing(self) -> None:
330
+ """Cancel the active processing pipeline."""
331
+ if self._active_worker is not None:
332
+ self._active_worker.cancel()
333
+ self._active_worker = None
334
+ conv_log = self.query_one("#conversation-log", ConversationLog)
335
+ conv_log.finish_streaming_response()
336
+ self.is_processing = False
337
+ self.state = AppState.READY
338
+ self._esc_pending = False
339
+ if self._esc_timer is not None:
340
+ self._esc_timer.stop()
341
+ self._esc_timer = None
342
+ self._show_status("Cancelled")
343
+
344
+ async def _run_record_pipeline(self) -> None:
345
+ """Record audio, transcribe, then run LLM pipeline."""
346
+ self.state = AppState.RECORDING
347
+
348
+ audio_data = await audio_mod.record()
349
+
350
+ if audio_data is None:
351
+ self.state = AppState.READY
352
+ self._show_status("No audio detected.")
353
+ return
354
+
355
+ self.state = AppState.TRANSCRIBING
356
+ stt_start = time.time()
357
+ transcribed = await stt_mod.transcribe(self.models.stt, audio_data)
358
+ stt_time = time.time() - stt_start
359
+
360
+ if not transcribed:
361
+ self.state = AppState.READY
362
+ self._show_status("Could not transcribe audio.")
363
+ return
364
+
365
+ conv_log = self.query_one("#conversation-log", ConversationLog)
366
+ conv_log.add_user_message(transcribed)
367
+ await self._run_llm_pipeline(transcribed, transcribe_time=stt_time)
368
+
369
+ async def _run_transcribe(self) -> None:
370
+ """Record extended audio and stream transcription."""
371
+ self.state = AppState.RECORDING
372
+ self._show_sticky_status("Transcribe mode: recording up to 3 min...")
373
+
374
+ audio_data = await audio_mod.record_long()
375
+
376
+ if audio_data is None:
377
+ self.state = AppState.READY
378
+ self._show_status("No audio detected.")
379
+ return
380
+
381
+ self.state = AppState.TRANSCRIBING
382
+ full_text = ""
383
+
384
+ async for chunk in stt_mod.transcribe_streaming(self.models.stt, audio_data):
385
+ full_text += chunk
386
+
387
+ self.state = AppState.READY
388
+ if full_text.strip():
389
+ text_area = self.query_one("#user-input", ChatInput)
390
+ text_area.clear()
391
+ text_area.insert(full_text.strip())
392
+ self.copy_to_clipboard(full_text.strip())
393
+ self._show_status("Transcribed to input and copied to clipboard.")
394
+ else:
395
+ self._show_status("Could not transcribe audio.")
396
+
397
+ def _toggle_tts(self) -> None:
398
+ """Toggle TTS on/off."""
399
+ status_panel = self.query_one("#status-panel", StatusPanel)
400
+ self.tts_enabled = not self.tts_enabled
401
+ status_panel.tts_enabled = self.tts_enabled
402
+
403
+ def _open_model_selector(self) -> None:
404
+ """Open the model selector modal."""
405
+ self.push_screen(
406
+ ModelSelector(
407
+ current_stt=settings.stt_model,
408
+ current_llm=settings.llm_model,
409
+ current_tts=settings.tts_model,
410
+ ),
411
+ )
412
+
413
+ def on_model_selector_changed(self, event: ModelSelector.Changed) -> None:
414
+ """Handle model selection changes from the modal."""
415
+ self._apply_model_selection(event.selection)
416
+
417
+ def _apply_model_selection(self, selection: ModelSelection | None) -> None:
418
+ """Apply model changes from the selector modal."""
419
+ if selection is None:
420
+ return
421
+
422
+ reload_stt = selection.stt_model != settings.stt_model
423
+ reload_llm = selection.llm_model != settings.llm_model
424
+ reload_tts = selection.tts_model != settings.tts_model
425
+
426
+ if not (reload_stt or reload_llm or reload_tts):
427
+ return
428
+
429
+ settings.stt_model = selection.stt_model
430
+ settings.llm_model = selection.llm_model
431
+ settings.tts_model = selection.tts_model
432
+
433
+ self.run_worker(
434
+ self._reload_models(
435
+ reload_stt=reload_stt,
436
+ reload_llm=reload_llm,
437
+ reload_tts=reload_tts,
438
+ )
439
+ )
440
+
441
+ async def _reload_models(
442
+ self,
443
+ *,
444
+ reload_stt: bool,
445
+ reload_llm: bool,
446
+ reload_tts: bool,
447
+ ) -> None:
448
+ """Reload changed models in the background."""
449
+ loop = asyncio.get_running_loop()
450
+ status_panel = self.query_one("#status-panel", StatusPanel)
451
+ text_area = self.query_one("#user-input", ChatInput)
452
+ text_area.disabled = True
453
+ self.state = AppState.LOADING
454
+
455
+ if reload_stt:
456
+ status_panel.show_status_message("Reloading STT...")
457
+ stt = await loop.run_in_executor(None, load_stt, settings.stt_model)
458
+ self.models.stt = stt
459
+
460
+ if reload_llm:
461
+ status_panel.show_status_message("Reloading LLM...")
462
+ llm, tokenizer = await loop.run_in_executor(
463
+ None, load_llm, settings.llm_model
464
+ )
465
+ self.models.llm = llm
466
+ self.models.tokenizer = tokenizer
467
+
468
+ if reload_tts:
469
+ status_panel.show_status_message("Reloading TTS...")
470
+ tts = await loop.run_in_executor(None, load_tts, settings.tts_model)
471
+ self.models.tts = tts
472
+
473
+ status_panel.clear_status_message()
474
+ text_area.disabled = False
475
+ text_area.focus()
476
+ self.state = AppState.READY
477
+
478
+ if isinstance(self.screen, ModelSelector):
479
+ self.screen.update_loaded_models(
480
+ stt=settings.stt_model,
481
+ llm=settings.llm_model,
482
+ tts=settings.tts_model,
483
+ )
484
+
485
+ def action_clear_conversation(self) -> None:
486
+ """Clear the conversation history."""
487
+ self.chat_history.clear()
488
+ conv_log = self.query_one("#conversation-log", ConversationLog)
489
+ conv_log.clear()
490
+
491
+ metrics_panel = self.query_one("#metrics-panel", MetricsPanel)
492
+ metrics_panel.clear_metrics()