PyPI - voxing - Versions diffs - 0.1.0__tar.gz - Mend

voxing 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

voxing-0.1.0/PKG-INFO +42 -0
voxing-0.1.0/README.md +17 -0
voxing-0.1.0/pyproject.toml +66 -0
voxing-0.1.0/src/voxing/__init__.py +1 -0
voxing-0.1.0/src/voxing/__main__.py +13 -0
voxing-0.1.0/src/voxing/app.py +492 -0
voxing-0.1.0/src/voxing/audio.py +115 -0
voxing-0.1.0/src/voxing/config.py +38 -0
voxing-0.1.0/src/voxing/models/__init__.py +70 -0
voxing-0.1.0/src/voxing/models/_streaming.py +16 -0
voxing-0.1.0/src/voxing/models/llm.py +73 -0
voxing-0.1.0/src/voxing/models/stt.py +29 -0
voxing-0.1.0/src/voxing/models/tts.py +68 -0
voxing-0.1.0/src/voxing/state.py +38 -0
voxing-0.1.0/src/voxing/styles.tcss +166 -0
voxing-0.1.0/src/voxing/themes.py +45 -0
voxing-0.1.0/src/voxing/widgets/__init__.py +16 -0
voxing-0.1.0/src/voxing/widgets/chat_input.py +28 -0
voxing-0.1.0/src/voxing/widgets/conversation.py +78 -0
voxing-0.1.0/src/voxing/widgets/metrics_panel.py +59 -0
voxing-0.1.0/src/voxing/widgets/model_selector.py +191 -0
voxing-0.1.0/src/voxing/widgets/status_panel.py +118 -0

voxing-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,42 @@
+Metadata-Version: 2.4
+Name: voxing
+Version: 0.1.0
+Summary: Voice assistant TUI powered by local MLX models
+Author: yeungadrian
+License-Expression: MIT
+Classifier: Development Status :: 3 - Alpha
+Classifier: Environment :: Console
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3.13
+Classifier: Operating System :: MacOS
+Requires-Dist: langdetect>=1.0.9
+Requires-Dist: mlx-audio[tts]==0.3.1
+Requires-Dist: mlx-lm>=0.30.5
+Requires-Dist: pip>=26.0.1
+Requires-Dist: pydantic-settings>=2.12.0
+Requires-Dist: rich>=14.3.1
+Requires-Dist: sounddevice>=0.5.3
+Requires-Dist: textual>=7.5.0
+Requires-Dist: typer>=0.23.1
+Requires-Python: ==3.13.*
+Project-URL: Homepage, https://github.com/yeungadrian/Voxing
+Project-URL: Repository, https://github.com/yeungadrian/Voxing
+Description-Content-Type: text/markdown
+# Voxing
+TUI for local voice / text assistant on macos
+## Testing
+Run the test suite:
+```bash
+uv run pytest
+```
+Update visual snapshots after UI changes:
+```bash
+uv run pytest --snapshot-update
+```
+TODO:
+- Ability to run via `uvx voxing`?

voxing-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,17 @@
+# Voxing
+TUI for local voice / text assistant on macos
+## Testing
+Run the test suite:
+```bash
+uv run pytest
+```
+Update visual snapshots after UI changes:
+```bash
+uv run pytest --snapshot-update
+```
+TODO:
+- Ability to run via `uvx voxing`?

voxing-0.1.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,66 @@
+[project]
+name = "voxing"
+version = "0.1.0"
+description = "Voice assistant TUI powered by local MLX models"
+readme = "README.md"
+requires-python = "==3.13.*"
+authors = [{ name = "yeungadrian" }]
+license = "MIT"
+classifiers = [
+    "Development Status :: 3 - Alpha",
+    "Environment :: Console",
+    "License :: OSI Approved :: MIT License",
+    "Programming Language :: Python :: 3.13",
+    "Operating System :: MacOS",
+]
+dependencies = [
+    "langdetect>=1.0.9",
+    "mlx-audio[tts]==0.3.1",
+    "mlx-lm>=0.30.5",
+    "pip>=26.0.1",
+    "pydantic-settings>=2.12.0",
+    "rich>=14.3.1",
+    "sounddevice>=0.5.3",
+    "textual>=7.5.0",
+    "typer>=0.23.1",
+]
+[project.urls]
+Homepage = "https://github.com/yeungadrian/Voxing"
+Repository = "https://github.com/yeungadrian/Voxing"
+[project.scripts]
+voxing = "voxing.__main__:main"
+[build-system]
+requires = ["uv_build>=0.10.2,<0.11.0"]
+build-backend = "uv_build"
+[dependency-groups]
+dev = [
+    "pre-commit>=4.5.1",
+    "pytest>=9.0.2",
+    "pytest-asyncio>=1.3.0",
+    "pytest-textual-snapshot>=1.0.0",
+]
+[tool.ruff]
+target-version = "py313"
+[tool.ruff.lint]
+select = [
+    "F",    # Pyflakes
+    "E",    # pycodestyle errors
+    "W",    # pycodestyle warnings
+    "I",    # isort
+    "UP",   # pyupgrade
+    "B",    # flake8-bugbear
+    "SIM",  # flake8-simplify
+    "C4",   # flake8-comprehensions
+    "RET",  # flake8-return
+    "PTH",  # flake8-use-pathlib
+]
+[tool.pytest.ini_options]
+asyncio_mode = "auto"
+testpaths = ["tests"]

voxing-0.1.0/src/voxing/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """Voxing TUI - A Textual-based Terminal User Interface for Voxing Voice Assistant."""

voxing-0.1.0/src/voxing/__main__.py ADDED Viewed

@@ -0,0 +1,13 @@
+"""Entry point for Voxinging TUI application."""
+from voxing.app import VoxingApp
+def main() -> None:
+    """Run the Voxing TUI application."""
+    app = VoxingApp()
+    app.run()
+if __name__ == "__main__":
+    main()

voxing-0.1.0/src/voxing/app.py ADDED Viewed

@@ -0,0 +1,492 @@
+"""Main Voxing TUI application."""
+import asyncio
+import contextlib
+import time
+from os.path import commonprefix
+from rich.text import Text
+from textual import on
+from textual.app import App, ComposeResult
+from textual.binding import Binding
+from textual.containers import Container, Horizontal, Vertical
+from textual.css.query import NoMatches
+from textual.events import Key
+from textual.reactive import reactive
+from textual.timer import Timer
+from textual.widgets import Footer, Label
+from textual.worker import Worker
+from voxing import audio as audio_mod
+from voxing.config import settings
+from voxing.models import Models, load_llm, load_stt, load_tts
+from voxing.models import llm as llm_mod
+from voxing.models import stt as stt_mod
+from voxing.models import tts as tts_mod
+from voxing.models.llm import ChatMessage
+from voxing.state import AppState, InteractionStats
+from voxing.themes import FOREGROUND, PALETTE_1, TOKYO_NIGHT
+from voxing.widgets import (
+    ChatInput,
+    ConversationLog,
+    MetricsPanel,
+    ModelSelection,
+    ModelSelector,
+    StatusPanel,
+)
+COMMAND_DESCRIPTIONS: dict[str, str] = {
+    "/record": "Record and process voice",
+    "/transcribe": "Transcribe audio to text",
+    "/tts": "Toggle text-to-speech",
+    "/model": "Switch models",
+    "/clear": "Clear conversation",
+    "/exit": "Exit application",
+}
+COMMANDS = list(COMMAND_DESCRIPTIONS)
+class VoxingApp(App):
+    """Voxing Voice Assistant TUI Application."""
+    CSS_PATH = "styles.tcss"
+    TITLE = "Voxing Voice Assistant"
+    BINDINGS = [
+        Binding("q", "quit", "Quit", priority=True),
+        Binding("ctrl+c", "clear_conversation", "Clear"),
+    ]
+    state: reactive[AppState] = reactive(AppState.LOADING)
+    def __init__(self) -> None:
+        """Initialize the Voxing TUI app."""
+        super().__init__()
+        self.models: Models = None
+        self.is_processing = False
+        self.tts_enabled = False
+        self.chat_history: list[ChatMessage] = []
+        self._active_worker: Worker[None] | None = None
+        self._esc_pending: bool = False
+        self._esc_timer: Timer | None = None
+    def compose(self) -> ComposeResult:
+        """Compose the app layout."""
+        with Horizontal(id="status-bar"):
+            yield StatusPanel(id="status-panel")
+            yield MetricsPanel(id="metrics-panel")
+        yield ConversationLog(id="conversation-log", wrap=True)
+        with Vertical(id="bottom-section"), Container(id="input-container"):
+            yield Label(id="command-hint", classes="hidden")
+            yield ChatInput(id="user-input")
+        yield Footer()
+    def on_mount(self) -> None:
+        """Called when app is mounted."""
+        self.design = TOKYO_NIGHT
+        text_area = self.query_one("#user-input", ChatInput)
+        text_area.focus()
+        text_area.show_line_numbers = False
+        text_area.disabled = True
+        status_panel = self.query_one("#status-panel", StatusPanel)
+        status_panel.current_state = self.state
+        status_panel.tts_enabled = self.tts_enabled
+        self.run_worker(self._load_models())
+    async def _load_models(self) -> None:
+        """Load all models in the background."""
+        loop = asyncio.get_running_loop()
+        status_panel = self.query_one("#status-panel", StatusPanel)
+        status_panel.show_status_message("Loading STT...")
+        stt = await loop.run_in_executor(None, load_stt)
+        status_panel.show_status_message("Loading LLM...")
+        llm, tokenizer = await loop.run_in_executor(None, load_llm)
+        status_panel.show_status_message("Loading TTS...")
+        tts = await loop.run_in_executor(None, load_tts)
+        status_panel.clear_status_message()
+        self.models = Models(stt=stt, llm=llm, tts=tts, tokenizer=tokenizer)
+        text_area = self.query_one("#user-input", ChatInput)
+        text_area.disabled = False
+        text_area.focus()
+        self.state = AppState.READY
+    def watch_state(self, new_state: AppState) -> None:
+        """Called when state changes."""
+        with contextlib.suppress(NoMatches):
+            status_panel = self.query_one("#status-panel", StatusPanel)
+            status_panel.current_state = new_state
+    def on_key(self, event: Key) -> None:
+        """Handle key events."""
+        if event.key == "escape":
+            if not self.is_processing:
+                return
+            event.prevent_default()
+            event.stop()
+            if self._esc_pending:
+                self._cancel_processing()
+            else:
+                self._esc_pending = True
+                self._show_status("Press ESC again to cancel", timeout=2.0)
+                self._esc_timer = self.set_timer(2.0, self._reset_esc_pending)
+            return
+        if event.key != "tab":
+            return
+        text_area = self.query_one("#user-input", ChatInput)
+        text = text_area.text.strip()
+        if not text.startswith("/"):
+            return
+        matches = [cmd for cmd in COMMANDS if cmd.startswith(text.lower())]
+        if not matches:
+            return
+        event.prevent_default()
+        event.stop()
+        replacement = matches[0] if len(matches) == 1 else commonprefix(matches)
+        text_area.clear()
+        text_area.insert(replacement)
+    @on(ChatInput.Changed)
+    def on_chat_input_changed(self, event: ChatInput.Changed) -> None:
+        """Update command hints when text changes."""
+        self._update_command_hints(event.text_area.text)
+    @on(ChatInput.Submitted)
+    def on_chat_input_submitted(self, event: ChatInput.Submitted) -> None:
+        """Handle input submission."""
+        if self.is_processing:
+            return
+        user_input = event.value.strip()
+        if user_input:
+            event.chat_input.clear()
+            self._hide_command_hints()
+            self._active_worker = self.run_worker(self._process_input(user_input))
+    def _update_command_hints(self, text: str) -> None:
+        """Update command hints based on current input."""
+        hint_label = self.query_one("#command-hint", Label)
+        typed = text.strip().lower()
+        if typed.startswith("/"):
+            matches = [cmd for cmd in COMMANDS if cmd.startswith(typed)]
+            if matches:
+                hint_text = Text()
+                for i, cmd in enumerate(matches):
+                    if i > 0:
+                        hint_text.append("\n")
+                    hint_text.append(cmd, style=f"bold {FOREGROUND}")
+                    hint_text.append(f" {COMMAND_DESCRIPTIONS[cmd]}", style=FOREGROUND)
+                hint_label.update(hint_text)
+                hint_label.remove_class("hidden")
+            else:
+                self._hide_command_hints()
+        else:
+            self._hide_command_hints()
+    def _hide_command_hints(self) -> None:
+        """Hide command hints."""
+        with contextlib.suppress(NoMatches):
+            self.query_one("#command-hint", Label).add_class("hidden")
+    async def _process_input(self, text: str) -> None:
+        """Route user input to the appropriate handler."""
+        if self.state == AppState.LOADING:
+            return
+        self.is_processing = True
+        conv_log = self.query_one("#conversation-log", ConversationLog)
+        try:
+            if text.startswith("/"):
+                command = text.lower().strip()
+                if command == "/record":
+                    await self._run_record_pipeline()
+                elif command == "/transcribe":
+                    await self._run_transcribe()
+                elif command == "/tts":
+                    self._toggle_tts()
+                elif command == "/model":
+                    self._open_model_selector()
+                elif command == "/clear":
+                    self.action_clear_conversation()
+                elif command == "/exit":
+                    self.exit()
+                else:
+                    conv_log.add_system_message(
+                        f"Unknown command: {command}. Available: {', '.join(COMMANDS)}",
+                        style=f"bold {PALETTE_1}",
+                    )
+            else:
+                conv_log.add_user_message(text)
+                await self._run_llm_pipeline(text)
+        except Exception as e:
+            conv_log.add_error(str(e))
+            self.state = AppState.READY
+        finally:
+            self._active_worker = None
+            self.is_processing = False
+    async def _run_llm_pipeline(
+        self, text: str, transcribe_time: float | None = None
+    ) -> None:
+        """Run LLM generation on text input."""
+        conv_log = self.query_one("#conversation-log", ConversationLog)
+        metrics_panel = self.query_one("#metrics-panel", MetricsPanel)
+        self.state = AppState.THINKING
+        llm_start = time.time()
+        token_count = 0
+        full_response = ""
+        phrase_buffer = ""
+        phrases: list[str] = []
+        conv_log.start_streaming_response()
+        async for token in llm_mod.generate_streaming(
+            self.models.llm, self.models.tokenizer, text, history=self.chat_history
+        ):
+            full_response += token
+            conv_log.update_streaming_response(token)
+            token_count += 1
+            if self.tts_enabled:
+                phrase_buffer += token
+                if "\n" in phrase_buffer:
+                    phrase, phrase_buffer = phrase_buffer.rsplit("\n", 1)
+                    phrases.append(phrase)
+        if self.tts_enabled and phrase_buffer.strip():
+            phrases.append(phrase_buffer)
+        conv_log.finish_streaming_response()
+        self.chat_history.append({"role": "user", "content": text})
+        self.chat_history.append({"role": "assistant", "content": full_response})
+        total_tokens = llm_mod.count_conversation_tokens(
+            self.models.tokenizer, self.chat_history
+        )
+        llm_time = time.time() - llm_start
+        tts_time: float | None = None
+        if self.tts_enabled and phrases:
+            self.state = AppState.SYNTHESIZING
+            loop = asyncio.get_running_loop()
+            def on_play() -> None:
+                loop.call_soon_threadsafe(setattr, self, "state", AppState.SPEAKING)
+            tts_time = await tts_mod.speak_phrases(
+                self.models.tts, phrases, on_first_chunk=on_play
+            )
+        stats = InteractionStats(
+            transcribe_time=transcribe_time,
+            llm_time=llm_time,
+            tts_time=tts_time,
+            tokens=token_count,
+            total_conversation_tokens=total_tokens,
+            max_tokens=settings.llm_max_tokens,
+        )
+        metrics_panel.update_metrics(stats)
+        self.state = AppState.READY
+    def _show_status(self, message: str, timeout: float = 3.0) -> None:
+        """Show an ephemeral message in the status bar."""
+        status_panel = self.query_one("#status-panel", StatusPanel)
+        status_panel.show_ephemeral_message(message, timeout)
+    def _show_sticky_status(self, message: str) -> None:
+        """Show a status message that persists until explicitly cleared."""
+        status_panel = self.query_one("#status-panel", StatusPanel)
+        status_panel.show_status_message(message)
+    def _reset_esc_pending(self) -> None:
+        """Reset the ESC pending state."""
+        self._esc_pending = False
+        self._esc_timer = None
+    def _cancel_processing(self) -> None:
+        """Cancel the active processing pipeline."""
+        if self._active_worker is not None:
+            self._active_worker.cancel()
+            self._active_worker = None
+        conv_log = self.query_one("#conversation-log", ConversationLog)
+        conv_log.finish_streaming_response()
+        self.is_processing = False
+        self.state = AppState.READY
+        self._esc_pending = False
+        if self._esc_timer is not None:
+            self._esc_timer.stop()
+            self._esc_timer = None
+        self._show_status("Cancelled")
+    async def _run_record_pipeline(self) -> None:
+        """Record audio, transcribe, then run LLM pipeline."""
+        self.state = AppState.RECORDING
+        audio_data = await audio_mod.record()
+        if audio_data is None:
+            self.state = AppState.READY
+            self._show_status("No audio detected.")
+            return
+        self.state = AppState.TRANSCRIBING
+        stt_start = time.time()
+        transcribed = await stt_mod.transcribe(self.models.stt, audio_data)
+        stt_time = time.time() - stt_start
+        if not transcribed:
+            self.state = AppState.READY
+            self._show_status("Could not transcribe audio.")
+            return
+        conv_log = self.query_one("#conversation-log", ConversationLog)
+        conv_log.add_user_message(transcribed)
+        await self._run_llm_pipeline(transcribed, transcribe_time=stt_time)
+    async def _run_transcribe(self) -> None:
+        """Record extended audio and stream transcription."""
+        self.state = AppState.RECORDING
+        self._show_sticky_status("Transcribe mode: recording up to 3 min...")
+        audio_data = await audio_mod.record_long()
+        if audio_data is None:
+            self.state = AppState.READY
+            self._show_status("No audio detected.")
+            return
+        self.state = AppState.TRANSCRIBING
+        full_text = ""
+        async for chunk in stt_mod.transcribe_streaming(self.models.stt, audio_data):
+            full_text += chunk
+        self.state = AppState.READY
+        if full_text.strip():
+            text_area = self.query_one("#user-input", ChatInput)
+            text_area.clear()
+            text_area.insert(full_text.strip())
+            self.copy_to_clipboard(full_text.strip())
+            self._show_status("Transcribed to input and copied to clipboard.")
+        else:
+            self._show_status("Could not transcribe audio.")
+    def _toggle_tts(self) -> None:
+        """Toggle TTS on/off."""
+        status_panel = self.query_one("#status-panel", StatusPanel)
+        self.tts_enabled = not self.tts_enabled
+        status_panel.tts_enabled = self.tts_enabled
+    def _open_model_selector(self) -> None:
+        """Open the model selector modal."""
+        self.push_screen(
+            ModelSelector(
+                current_stt=settings.stt_model,
+                current_llm=settings.llm_model,
+                current_tts=settings.tts_model,
+            ),
+        )
+    def on_model_selector_changed(self, event: ModelSelector.Changed) -> None:
+        """Handle model selection changes from the modal."""
+        self._apply_model_selection(event.selection)
+    def _apply_model_selection(self, selection: ModelSelection | None) -> None:
+        """Apply model changes from the selector modal."""
+        if selection is None:
+            return
+        reload_stt = selection.stt_model != settings.stt_model
+        reload_llm = selection.llm_model != settings.llm_model
+        reload_tts = selection.tts_model != settings.tts_model
+        if not (reload_stt or reload_llm or reload_tts):
+            return
+        settings.stt_model = selection.stt_model
+        settings.llm_model = selection.llm_model
+        settings.tts_model = selection.tts_model
+        self.run_worker(
+            self._reload_models(
+                reload_stt=reload_stt,
+                reload_llm=reload_llm,
+                reload_tts=reload_tts,
+            )
+        )
+    async def _reload_models(
+        self,
+        *,
+        reload_stt: bool,
+        reload_llm: bool,
+        reload_tts: bool,
+    ) -> None:
+        """Reload changed models in the background."""
+        loop = asyncio.get_running_loop()
+        status_panel = self.query_one("#status-panel", StatusPanel)
+        text_area = self.query_one("#user-input", ChatInput)
+        text_area.disabled = True
+        self.state = AppState.LOADING
+        if reload_stt:
+            status_panel.show_status_message("Reloading STT...")
+            stt = await loop.run_in_executor(None, load_stt, settings.stt_model)
+            self.models.stt = stt
+        if reload_llm:
+            status_panel.show_status_message("Reloading LLM...")
+            llm, tokenizer = await loop.run_in_executor(
+                None, load_llm, settings.llm_model
+            )
+            self.models.llm = llm
+            self.models.tokenizer = tokenizer
+        if reload_tts:
+            status_panel.show_status_message("Reloading TTS...")
+            tts = await loop.run_in_executor(None, load_tts, settings.tts_model)
+            self.models.tts = tts
+        status_panel.clear_status_message()
+        text_area.disabled = False
+        text_area.focus()
+        self.state = AppState.READY
+        if isinstance(self.screen, ModelSelector):
+            self.screen.update_loaded_models(
+                stt=settings.stt_model,
+                llm=settings.llm_model,
+                tts=settings.tts_model,
+            )
+    def action_clear_conversation(self) -> None:
+        """Clear the conversation history."""
+        self.chat_history.clear()
+        conv_log = self.query_one("#conversation-log", ConversationLog)
+        conv_log.clear()
+        metrics_panel = self.query_one("#metrics-panel", MetricsPanel)
+        metrics_panel.clear_metrics()