PyPI - talktype - Versions diffs - 0.1.0__tar.gz - Mend

talktype 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

talktype-0.1.0/.env.example +16 -0
talktype-0.1.0/.gitignore +4 -0
talktype-0.1.0/PKG-INFO +125 -0
talktype-0.1.0/PLAN.md +35 -0
talktype-0.1.0/README.md +97 -0
talktype-0.1.0/RESEARCH.md +15 -0
talktype-0.1.0/pyproject.toml +46 -0
talktype-0.1.0/scripts/doctor_macos.sh +7 -0
talktype-0.1.0/scripts/install_macos.sh +62 -0
talktype-0.1.0/scripts/run_macos.sh +7 -0
talktype-0.1.0/src/talk/__init__.py +4 -0
talktype-0.1.0/src/talk/__main__.py +85 -0
talktype-0.1.0/src/talk/app.py +137 -0
talktype-0.1.0/src/talk/audio.py +107 -0
talktype-0.1.0/src/talk/backends/__init__.py +1 -0
talktype-0.1.0/src/talk/backends/base.py +12 -0
talktype-0.1.0/src/talk/backends/factory.py +24 -0
talktype-0.1.0/src/talk/backends/openai_backend.py +31 -0
talktype-0.1.0/src/talk/backends/parakeet_backend.py +29 -0
talktype-0.1.0/src/talk/config.py +94 -0
talktype-0.1.0/src/talk/doctor.py +226 -0
talktype-0.1.0/src/talk/paste.py +28 -0
talktype-0.1.0/uv.lock +1557 -0

talktype-0.1.0/.env.example ADDED Viewed

@@ -0,0 +1,16 @@
+# Core app
+DICTATE_BACKEND=parakeet
+DICTATE_HOTKEY=<cmd>+<shift>+<space>
+DICTATE_QUIT_HOTKEY=<cmd>+<shift>+q
+DICTATE_AUTOPASTE=true
+DICTATE_SAMPLE_RATE=16000
+DICTATE_CHANNELS=1
+DICTATE_MIN_SECONDS=0.35
+# Local backend (Apple Silicon)
+PARAKEET_MODEL=mlx-community/parakeet-tdt-0.6b-v3
+# Cloud fallback
+OPENAI_API_KEY=
+OPENAI_TRANSCRIBE_MODEL=gpt-4o-transcribe
+OPENAI_TRANSCRIBE_LANGUAGE=

talktype-0.1.0/.gitignore ADDED Viewed

@@ -0,0 +1,4 @@
+.venv/
+__pycache__/
+*.pyc
+.env

talktype-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,125 @@
+Metadata-Version: 2.4
+Name: talktype
+Version: 0.1.0
+Summary: Local-first dictation for macOS — press a hotkey, talk, text appears at your cursor
+Project-URL: Homepage, https://github.com/strangeloopcanon/talk
+Project-URL: Repository, https://github.com/strangeloopcanon/talk
+Project-URL: Issues, https://github.com/strangeloopcanon/talk/issues
+Author: Rohit Krishnan
+License: MIT
+Keywords: dictation,macos,mlx,parakeet,speech-to-text,transcription
+Classifier: Development Status :: 4 - Beta
+Classifier: Environment :: MacOS X
+Classifier: Intended Audience :: Developers
+Classifier: Operating System :: MacOS
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Classifier: Topic :: Multimedia :: Sound/Audio :: Speech
+Requires-Python: >=3.11
+Requires-Dist: numpy>=1.26
+Requires-Dist: openai>=1.108.0
+Requires-Dist: parakeet-mlx>=0.4.2
+Requires-Dist: pynput>=1.8.1
+Requires-Dist: python-dotenv>=1.1.1
+Requires-Dist: sounddevice>=0.5.2
+Description-Content-Type: text/markdown
+# talk
+> **macOS only** (Apple Silicon recommended). Requires Python 3.11+.
+Local-first dictation for macOS.
+Press a hotkey, talk, text appears at your cursor.
+- Parakeet local transcription by default (zero API cost on Apple Silicon).
+- OpenAI `gpt-4o-transcribe` fallback with one env switch.
+- Automatic paste at cursor (clipboard-safe restore).
+## Install
+```bash
+pip install talk
+```
+That's it. On first run, Parakeet model weights (~1.2 GB) download automatically.
+### Alternative: from source
+```bash
+git clone https://github.com/strangeloopcanon/talk.git
+cd talk
+./scripts/install_macos.sh
+./scripts/doctor_macos.sh
+./scripts/run_macos.sh
+```
+## Usage
+```bash
+talk run
+```
+- Press `Cmd+Shift+Space` to start recording.
+- Press `Cmd+Shift+Space` again to stop, transcribe, and paste.
+- Press `Cmd+Shift+Q` to quit.
+### Preflight checks
+```bash
+talk doctor
+```
+### File transcription
+```bash
+talk transcribe-file /path/to/sample.wav
+```
+## macOS permissions
+You must grant your terminal app:
+- **Microphone** -- for audio recording
+- **Accessibility** -- for paste keystroke automation
+- **Input Monitoring** -- for global hotkeys
+If paste fails but transcription works, Accessibility permission is usually the issue.
+## Configuration
+Works out of the box with zero configuration. To customize, create `~/.config/talk/.env`:
+```bash
+mkdir -p ~/.config/talk
+```
+Key options (all have sensible defaults):
+| Variable | Default | Notes |
+|----------|---------|-------|
+| `DICTATE_BACKEND` | `parakeet` | `parakeet` (local) or `openai` (cloud) |
+| `DICTATE_HOTKEY` | `<cmd>+<shift>+<space>` | Toggle recording |
+| `DICTATE_QUIT_HOTKEY` | `<cmd>+<shift>+q` | Quit the app |
+| `DICTATE_AUTOPASTE` | `true` | Paste transcription at cursor |
+| `PARAKEET_MODEL` | `mlx-community/parakeet-tdt-0.6b-v3` | Local model |
+| `OPENAI_API_KEY` | *(empty)* | Required if backend=openai |
+See `.env.example` for the full list.
+## Switching backend
+```bash
+# In ~/.config/talk/.env or as env vars:
+DICTATE_BACKEND=openai
+OPENAI_API_KEY=sk-...
+```
+## Cleanup
+Parakeet model weights are cached in `~/.cache/huggingface/hub/`. To reclaim disk space:
+```bash
+rm -rf ~/.cache/huggingface/hub/models--mlx-community--parakeet-tdt-0.6b-v3
+```

talktype-0.1.0/PLAN.md ADDED Viewed

@@ -0,0 +1,35 @@
+# Dictation Build Plan (Executed)
+## Human-friendly plan
+Build a simple, reliable "press hotkey, talk, text appears" loop that feels close to WisprFlow for laptop use.
+- Prioritize **local transcription first** so per-use cost is effectively zero.
+- Keep a **cloud fallback** for quality and edge cases.
+- Make setup short enough to run in minutes.
+- Avoid heavy UI work until the core dictation loop is proven.
+## Task-focused plan
+1. Research model/API options and choose architecture.
+2. Scaffold Python app with environment-based config.
+3. Implement global hotkey + microphone recorder.
+4. Implement local Parakeet backend.
+5. Implement OpenAI backend fallback.
+6. Implement auto-paste insertion into active app on macOS.
+7. Add docs and `.env.example`.
+8. Smoke-test both backends with a sample WAV clip.
+9. Verify live app starts and reports permission requirements clearly.
+## Status
+All steps above are complete in this repo.
+## Onboarding hardening (completed)
+To make this easy for other laptop users, the repo now includes:
+1. `scripts/install_macos.sh` for one-command setup.
+2. `scripts/doctor_macos.sh` for preflight checks.
+3. `scripts/run_macos.sh` to launch dictation.
+4. README guidance that starts with the script-based flow.

talktype-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,97 @@
+# talk
+> **macOS only** (Apple Silicon recommended). Requires Python 3.11+.
+Local-first dictation for macOS.
+Press a hotkey, talk, text appears at your cursor.
+- Parakeet local transcription by default (zero API cost on Apple Silicon).
+- OpenAI `gpt-4o-transcribe` fallback with one env switch.
+- Automatic paste at cursor (clipboard-safe restore).
+## Install
+```bash
+pip install talk
+```
+That's it. On first run, Parakeet model weights (~1.2 GB) download automatically.
+### Alternative: from source
+```bash
+git clone https://github.com/strangeloopcanon/talk.git
+cd talk
+./scripts/install_macos.sh
+./scripts/doctor_macos.sh
+./scripts/run_macos.sh
+```
+## Usage
+```bash
+talk run
+```
+- Press `Cmd+Shift+Space` to start recording.
+- Press `Cmd+Shift+Space` again to stop, transcribe, and paste.
+- Press `Cmd+Shift+Q` to quit.
+### Preflight checks
+```bash
+talk doctor
+```
+### File transcription
+```bash
+talk transcribe-file /path/to/sample.wav
+```
+## macOS permissions
+You must grant your terminal app:
+- **Microphone** -- for audio recording
+- **Accessibility** -- for paste keystroke automation
+- **Input Monitoring** -- for global hotkeys
+If paste fails but transcription works, Accessibility permission is usually the issue.
+## Configuration
+Works out of the box with zero configuration. To customize, create `~/.config/talk/.env`:
+```bash
+mkdir -p ~/.config/talk
+```
+Key options (all have sensible defaults):
+| Variable | Default | Notes |
+|----------|---------|-------|
+| `DICTATE_BACKEND` | `parakeet` | `parakeet` (local) or `openai` (cloud) |
+| `DICTATE_HOTKEY` | `<cmd>+<shift>+<space>` | Toggle recording |
+| `DICTATE_QUIT_HOTKEY` | `<cmd>+<shift>+q` | Quit the app |
+| `DICTATE_AUTOPASTE` | `true` | Paste transcription at cursor |
+| `PARAKEET_MODEL` | `mlx-community/parakeet-tdt-0.6b-v3` | Local model |
+| `OPENAI_API_KEY` | *(empty)* | Required if backend=openai |
+See `.env.example` for the full list.
+## Switching backend
+```bash
+# In ~/.config/talk/.env or as env vars:
+DICTATE_BACKEND=openai
+OPENAI_API_KEY=sk-...
+```
+## Cleanup
+Parakeet model weights are cached in `~/.cache/huggingface/hub/`. To reclaim disk space:
+```bash
+rm -rf ~/.cache/huggingface/hub/models--mlx-community--parakeet-tdt-0.6b-v3
+```

talktype-0.1.0/RESEARCH.md ADDED Viewed

@@ -0,0 +1,15 @@
+# Research Notes (February 25, 2026)
+## Sources used
+- OpenAI blog: [Introducing next-generation audio models](https://openai.com/index/introducing-our-next-generation-audio-models/)
+- OpenAI Python SDK README (audio transcription examples): [openai/openai-python](https://github.com/openai/openai-python)
+- NVIDIA model card (Parakeet CTC 1.1B): [nvidia/parakeet-ctc-1.1b](https://huggingface.co/nvidia/parakeet-ctc-1.1b)
+- NVIDIA model card (Parakeet TDT 0.6B v2): [nvidia/parakeet-tdt-0.6b-v2](https://huggingface.co/nvidia/parakeet-tdt-0.6b-v2)
+- Parakeet MLX implementation docs/repo: [Parakeet-MLX](https://github.com/senstella/parakeet-mlx)
+## Key decisions
+- Use **Parakeet locally by default** for near-zero marginal cost dictation on Apple Silicon.
+- Use **OpenAI `gpt-4o-transcribe`** as a switchable fallback backend.
+- Keep backend swapping to one env var (`DICTATE_BACKEND`) so the same UX works across engines.

talktype-0.1.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,46 @@
+[project]
+name = "talktype"
+version = "0.1.0"
+description = "Local-first dictation for macOS — press a hotkey, talk, text appears at your cursor"
+readme = "README.md"
+requires-python = ">=3.11"
+license = {text = "MIT"}
+authors = [{name = "Rohit Krishnan"}]
+keywords = ["dictation", "speech-to-text", "transcription", "macos", "parakeet", "mlx"]
+classifiers = [
+  "Development Status :: 4 - Beta",
+  "Environment :: MacOS X",
+  "Intended Audience :: Developers",
+  "Operating System :: MacOS",
+  "Programming Language :: Python :: 3",
+  "Programming Language :: Python :: 3.11",
+  "Programming Language :: Python :: 3.12",
+  "Programming Language :: Python :: 3.13",
+  "Topic :: Multimedia :: Sound/Audio :: Speech",
+]
+dependencies = [
+  "numpy>=1.26",
+  "openai>=1.108.0",
+  "parakeet-mlx>=0.4.2",
+  "pynput>=1.8.1",
+  "python-dotenv>=1.1.1",
+  "sounddevice>=0.5.2",
+]
+[project.urls]
+Homepage = "https://github.com/strangeloopcanon/talk"
+Repository = "https://github.com/strangeloopcanon/talk"
+Issues = "https://github.com/strangeloopcanon/talk/issues"
+[project.scripts]
+talk = "talk.__main__:main"
+[build-system]
+requires = ["hatchling>=1.24.2"]
+build-backend = "hatchling.build"
+[tool.hatch.build.targets.wheel]
+packages = ["src/talk"]
+[tool.uv]
+package = true

talktype-0.1.0/scripts/doctor_macos.sh ADDED Viewed

@@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+set -euo pipefail
+ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+cd "$ROOT_DIR"
+uv run talk doctor

talktype-0.1.0/scripts/install_macos.sh ADDED Viewed

@@ -0,0 +1,62 @@
+#!/usr/bin/env bash
+set -euo pipefail
+ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+cd "$ROOT_DIR"
+if [[ "$(uname -s)" != "Darwin" ]]; then
+  echo "[error] This installer currently targets macOS." >&2
+  exit 1
+fi
+if ! command -v uv >/dev/null 2>&1; then
+  echo "[error] 'uv' is required but not installed." >&2
+  if command -v brew >/dev/null 2>&1; then
+    echo "Install it with: brew install uv" >&2
+  else
+    echo "Install instructions: https://docs.astral.sh/uv/getting-started/installation/" >&2
+  fi
+  exit 1
+fi
+python_version="$(python3 -c 'import sys; print(f"{sys.version_info.major}.{sys.version_info.minor}")' 2>/dev/null || true)"
+if [[ -z "$python_version" ]]; then
+  echo "[error] Python 3 not found. Install Python 3.11+ first." >&2
+  exit 1
+fi
+python_minor="${python_version#*.}"
+if [[ "$python_minor" -lt 11 ]]; then
+  echo "[error] Python >= 3.11 required (found $python_version)." >&2
+  echo "Install a newer Python: brew install python@3.13" >&2
+  exit 1
+fi
+echo "[setup] Python $python_version detected."
+if [[ ! -f .env ]]; then
+  cp .env.example .env
+  echo "[setup] Created .env from .env.example"
+fi
+ensure_key_default() {
+  local key="$1"
+  local value="$2"
+  if ! grep -q "^${key}=" .env; then
+    printf '%s=%s\n' "$key" "$value" >> .env
+    echo "[setup] Added default ${key}=${value}"
+  fi
+}
+# Keep Parakeet as the default for fresh installs.
+ensure_key_default "DICTATE_BACKEND" "parakeet"
+ensure_key_default "DICTATE_HOTKEY" "<cmd>+<shift>+<space>"
+ensure_key_default "DICTATE_QUIT_HOTKEY" "<cmd>+<shift>+q"
+ensure_key_default "DICTATE_AUTOPASTE" "true"
+ensure_key_default "PARAKEET_MODEL" "mlx-community/parakeet-tdt-0.6b-v3"
+uv sync
+echo ""
+echo "[done] Installation complete."
+echo "Next steps:"
+echo "  1) ./scripts/doctor_macos.sh"
+echo "  2) ./scripts/run_macos.sh"

talktype-0.1.0/scripts/run_macos.sh ADDED Viewed

@@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+set -euo pipefail
+ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+cd "$ROOT_DIR"
+uv run talk run

talktype-0.1.0/src/talk/__init__.py ADDED Viewed

@@ -0,0 +1,4 @@
+"""talk — local-first dictation for macOS."""
+__all__ = ["__version__"]
+__version__ = "0.1.0"

talktype-0.1.0/src/talk/__main__.py ADDED Viewed

@@ -0,0 +1,85 @@
+from __future__ import annotations
+import argparse
+import sys
+from talk import __version__
+def build_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(
+        prog="talk",
+        description="Local-first laptop dictation with a global hotkey.",
+    )
+    parser.add_argument(
+        "--version", action="version", version=f"%(prog)s {__version__}",
+    )
+    sub = parser.add_subparsers(dest="command", required=True)
+    sub.add_parser("run", help="Start the live dictation app")
+    sub.add_parser("doctor", help="Run setup preflight checks")
+    file_parser = sub.add_parser("transcribe-file", help="Transcribe a WAV file")
+    file_parser.add_argument("path", help="Path to a 16-bit PCM WAV file")
+    file_parser.add_argument(
+        "--paste",
+        action="store_true",
+        help="Paste transcription at cursor after transcription",
+    )
+    return parser
+def _cmd_run() -> None:
+    from talk.app import DictationApp
+    from talk.config import load_settings
+    settings = load_settings()
+    app = DictationApp(settings)
+    app.run()
+def _cmd_doctor() -> None:
+    from talk.config import load_settings
+    from talk.doctor import run_doctor
+    settings = load_settings()
+    code = run_doctor(settings)
+    if code:
+        sys.exit(code)
+def _cmd_transcribe_file(path: str, paste: bool) -> None:
+    from talk.audio import load_wav_mono
+    from talk.backends.factory import build_backend
+    from talk.config import load_settings
+    from talk.paste import paste_text
+    settings = load_settings()
+    backend = build_backend(settings)
+    chunk = load_wav_mono(path)
+    text = backend.transcribe(chunk.samples, chunk.sample_rate).strip()
+    if not text:
+        print("[warn] No transcription text produced.")
+        sys.exit(1)
+    print(text)
+    if paste:
+        paste_text(text)
+def main() -> None:
+    parser = build_parser()
+    args = parser.parse_args()
+    if args.command == "run":
+        _cmd_run()
+    elif args.command == "doctor":
+        _cmd_doctor()
+    elif args.command == "transcribe-file":
+        _cmd_transcribe_file(args.path, args.paste)
+if __name__ == "__main__":
+    main()

talktype-0.1.0/src/talk/app.py ADDED Viewed

@@ -0,0 +1,137 @@
+from __future__ import annotations
+import signal
+import subprocess
+import threading
+import time
+from pathlib import Path
+from pynput import keyboard
+from talk.audio import AudioChunk, MicRecorder, RecorderError
+from talk.backends.factory import build_backend
+from talk.config import Settings
+from talk.paste import paste_text
+def _play_sound(filename: str) -> None:
+    sound_path = Path("/System/Library/Sounds") / filename
+    if not sound_path.exists():
+        return
+    subprocess.Popen(
+        ["afplay", str(sound_path)],
+        stdout=subprocess.DEVNULL,
+        stderr=subprocess.DEVNULL,
+    )
+class DictationApp:
+    def __init__(self, settings: Settings) -> None:
+        self.settings = settings
+        self.backend = build_backend(settings)
+        self.recorder = MicRecorder(
+            sample_rate=settings.sample_rate,
+            channels=settings.channels,
+        )
+        self._lock = threading.Lock()
+        self._stop_event = threading.Event()
+        self._is_recording = False
+        self._is_transcribing = False
+    def _toggle_recording(self) -> None:
+        chunk: AudioChunk | None = None
+        with self._lock:
+            if self._is_transcribing:
+                print("[busy] Still transcribing previous clip.")
+                return
+            if not self._is_recording:
+                try:
+                    self.recorder.start()
+                except Exception as exc:  # noqa: BLE001
+                    print(f"[error] Could not start recording: {exc}")
+                    return
+                self._is_recording = True
+                _play_sound("Glass.aiff")
+                print("[rec] Recording... press hotkey again to stop.")
+                return
+            try:
+                chunk = self.recorder.stop()
+            except RecorderError as exc:
+                print(f"[error] Could not stop recording cleanly: {exc}")
+                self._is_recording = False
+                return
+            self._is_recording = False
+            self._is_transcribing = True
+        _play_sound("Pop.aiff")
+        worker = threading.Thread(
+            target=self._transcribe_and_emit,
+            args=(chunk,),
+            daemon=True,
+        )
+        worker.start()
+    def _transcribe_and_emit(self, chunk: AudioChunk) -> None:
+        try:
+            duration_seconds = 0.0
+            if chunk.sample_rate > 0:
+                duration_seconds = len(chunk.samples) / float(chunk.sample_rate)
+            if duration_seconds < self.settings.min_seconds:
+                print("[skip] Clip too short. Try speaking a bit longer.")
+                return
+            text = self.backend.transcribe(chunk.samples, chunk.sample_rate).strip()
+            if not text:
+                print("[skip] No speech detected.")
+                return
+            print(f"[text] {text}")
+            if self.settings.autopaste:
+                paste_text(text)
+                print("[paste] Inserted at current cursor.")
+        except Exception as exc:  # noqa: BLE001
+            print(f"[error] Transcription failed: {exc}")
+        finally:
+            with self._lock:
+                self._is_transcribing = False
+    def _request_shutdown(self) -> None:
+        with self._lock:
+            if self._is_recording:
+                try:
+                    self.recorder.stop()
+                except Exception:  # noqa: BLE001
+                    pass
+                self._is_recording = False
+        print("[exit] Shutting down dictation app.")
+        self._stop_event.set()
+    def run(self) -> None:
+        print(f"[ready] Backend: {self.backend.name}")
+        print(f"[ready] Toggle dictation: {self.settings.hotkey}")
+        print(f"[ready] Quit app: {self.settings.quit_hotkey}")
+        keymap = {
+            self.settings.hotkey: self._toggle_recording,
+            self.settings.quit_hotkey: self._request_shutdown,
+        }
+        signal.signal(signal.SIGTERM, lambda *_: self._request_shutdown())
+        listener = keyboard.GlobalHotKeys(keymap)
+        listener.start()
+        try:
+            while not self._stop_event.is_set():
+                time.sleep(0.15)
+        except KeyboardInterrupt:
+            self._request_shutdown()
+        finally:
+            listener.stop()