yohoho 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- yohoho-0.1.0/.gitignore +56 -0
- yohoho-0.1.0/LICENSE +21 -0
- yohoho-0.1.0/PKG-INFO +162 -0
- yohoho-0.1.0/README.md +124 -0
- yohoho-0.1.0/pyproject.toml +75 -0
- yohoho-0.1.0/src/yohoho/__init__.py +1 -0
- yohoho-0.1.0/src/yohoho/__main__.py +4 -0
- yohoho-0.1.0/src/yohoho/assets/fonts/Doto-VF.ttf +0 -0
- yohoho-0.1.0/src/yohoho/assets/fonts/Doto.ttf +0 -0
- yohoho-0.1.0/src/yohoho/core/__init__.py +0 -0
- yohoho-0.1.0/src/yohoho/core/audio.py +42 -0
- yohoho-0.1.0/src/yohoho/core/cli.py +756 -0
- yohoho-0.1.0/src/yohoho/core/config.py +276 -0
- yohoho-0.1.0/src/yohoho/core/config_access.py +156 -0
- yohoho-0.1.0/src/yohoho/core/config_tui.py +262 -0
- yohoho-0.1.0/src/yohoho/core/controller.py +318 -0
- yohoho-0.1.0/src/yohoho/core/engine.py +259 -0
- yohoho-0.1.0/src/yohoho/core/events.py +38 -0
- yohoho-0.1.0/src/yohoho/core/history.py +182 -0
- yohoho-0.1.0/src/yohoho/core/null_platform.py +84 -0
- yohoho-0.1.0/src/yohoho/core/observability.py +249 -0
- yohoho-0.1.0/src/yohoho/core/platform_api.py +145 -0
- yohoho-0.1.0/src/yohoho/core/platform_factory.py +17 -0
- yohoho-0.1.0/src/yohoho/core/recorder.py +160 -0
- yohoho-0.1.0/src/yohoho/core/run_loop.py +219 -0
- yohoho-0.1.0/src/yohoho/core/sounds.py +147 -0
- yohoho-0.1.0/src/yohoho/core/ui/__init__.py +5 -0
- yohoho-0.1.0/src/yohoho/core/ui/_dpi.py +20 -0
- yohoho-0.1.0/src/yohoho/core/ui/_tcl_env.py +26 -0
- yohoho-0.1.0/src/yohoho/core/ui/events.py +20 -0
- yohoho-0.1.0/src/yohoho/core/ui/fonts.py +53 -0
- yohoho-0.1.0/src/yohoho/core/ui/main_thread.py +161 -0
- yohoho-0.1.0/src/yohoho/core/ui/panel.py +492 -0
- yohoho-0.1.0/src/yohoho/core/ui/panel_model.py +307 -0
- yohoho-0.1.0/src/yohoho/core/ui/runner.py +348 -0
- yohoho-0.1.0/src/yohoho/core/ui/term.py +172 -0
- yohoho-0.1.0/src/yohoho/core/ui/theme.py +22 -0
- yohoho-0.1.0/src/yohoho/platform/__init__.py +0 -0
- yohoho-0.1.0/src/yohoho/platform/_shared/__init__.py +0 -0
- yohoho-0.1.0/src/yohoho/platform/_shared/chords.py +91 -0
- yohoho-0.1.0/src/yohoho/platform/_shared/hotkey_capture.py +86 -0
- yohoho-0.1.0/src/yohoho/platform/_shared/pynput_hotkey.py +55 -0
- yohoho-0.1.0/src/yohoho/platform/macos/__init__.py +39 -0
- yohoho-0.1.0/src/yohoho/platform/macos/_appkit.py +89 -0
- yohoho-0.1.0/src/yohoho/platform/macos/autostart.py +99 -0
- yohoho-0.1.0/src/yohoho/platform/macos/chrome.py +31 -0
- yohoho-0.1.0/src/yohoho/platform/macos/clipboard.py +19 -0
- yohoho-0.1.0/src/yohoho/platform/macos/focus.py +36 -0
- yohoho-0.1.0/src/yohoho/platform/macos/hotkey.py +9 -0
- yohoho-0.1.0/src/yohoho/platform/macos/inject.py +83 -0
- yohoho-0.1.0/src/yohoho/platform/macos/input_source.py +55 -0
- yohoho-0.1.0/src/yohoho/platform/macos/permissions.py +46 -0
- yohoho-0.1.0/src/yohoho/platform/macos_window.py +70 -0
- yohoho-0.1.0/src/yohoho/platform/windows/__init__.py +32 -0
- yohoho-0.1.0/src/yohoho/platform/windows/autostart.py +53 -0
- yohoho-0.1.0/src/yohoho/platform/windows/chrome.py +341 -0
- yohoho-0.1.0/src/yohoho/platform/windows/clipboard.py +53 -0
- yohoho-0.1.0/src/yohoho/platform/windows/focus.py +21 -0
- yohoho-0.1.0/src/yohoho/platform/windows/hotkey.py +7 -0
- yohoho-0.1.0/src/yohoho/platform/windows/inject.py +36 -0
- yohoho-0.1.0/src/yohoho/platform/windows/permissions.py +13 -0
yohoho-0.1.0/.gitignore
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
.venv/
|
|
5
|
+
venv/
|
|
6
|
+
*.egg-info/
|
|
7
|
+
build/
|
|
8
|
+
dist/
|
|
9
|
+
.pytest_cache/
|
|
10
|
+
.mypy_cache/
|
|
11
|
+
.ruff_cache/
|
|
12
|
+
|
|
13
|
+
# Node (install wrapper)
|
|
14
|
+
node_modules/
|
|
15
|
+
npm-debug.log*
|
|
16
|
+
yarn-error.log*
|
|
17
|
+
|
|
18
|
+
# Models & caches (downloaded at runtime, never committed)
|
|
19
|
+
*.onnx
|
|
20
|
+
models/
|
|
21
|
+
.cache/
|
|
22
|
+
.huggingface/
|
|
23
|
+
|
|
24
|
+
# Env / secrets
|
|
25
|
+
.env
|
|
26
|
+
.env.*
|
|
27
|
+
|
|
28
|
+
# Bundled fonts are tracked deliberately; ignore stray downloads
|
|
29
|
+
*.ttf.download
|
|
30
|
+
|
|
31
|
+
# OS cruft
|
|
32
|
+
.DS_Store
|
|
33
|
+
Thumbs.db
|
|
34
|
+
desktop.ini
|
|
35
|
+
|
|
36
|
+
# Claude local-only settings
|
|
37
|
+
.claude/settings.local.json
|
|
38
|
+
|
|
39
|
+
# yohoho runtime model cache (downloaded at runtime, never committed)
|
|
40
|
+
hf/
|
|
41
|
+
|
|
42
|
+
# brainstorming visual-companion artifacts
|
|
43
|
+
.superpowers/
|
|
44
|
+
|
|
45
|
+
# Private dev/process docs — excluded from the public repo (kept locally)
|
|
46
|
+
/CLAUDE.md
|
|
47
|
+
/.claude/
|
|
48
|
+
/docs/superpowers/
|
|
49
|
+
/docs/plans/
|
|
50
|
+
/docs/specs/
|
|
51
|
+
/docs/m4-followups.md
|
|
52
|
+
/docs/HANDOFF.md
|
|
53
|
+
/docs/README.md
|
|
54
|
+
|
|
55
|
+
# Skills marketplace lockfile (harness artifact, not project content)
|
|
56
|
+
skills-lock.json
|
yohoho-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 dev-CPC
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
yohoho-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: yohoho
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Free, fully-local voice dictation
|
|
5
|
+
Project-URL: Homepage, https://github.com/by-k4n/yohoho
|
|
6
|
+
Project-URL: Repository, https://github.com/by-k4n/yohoho
|
|
7
|
+
Project-URL: Issues, https://github.com/by-k4n/yohoho/issues
|
|
8
|
+
Author: by-k4n
|
|
9
|
+
License: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: dictation,local,macos,parakeet,privacy,speech-to-text,transcription,voice
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: Environment :: MacOS X
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
16
|
+
Classifier: Operating System :: MacOS :: MacOS X
|
|
17
|
+
Classifier: Operating System :: Microsoft :: Windows
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Topic :: Multimedia :: Sound/Audio :: Speech
|
|
20
|
+
Classifier: Topic :: Utilities
|
|
21
|
+
Requires-Python: <3.12,>=3.11
|
|
22
|
+
Requires-Dist: numpy>=1.26
|
|
23
|
+
Requires-Dist: onnx-asr[cpu,hub]>=0.6
|
|
24
|
+
Requires-Dist: onnxruntime>=1.27
|
|
25
|
+
Requires-Dist: pynput>=1.7
|
|
26
|
+
Requires-Dist: pyobjc-framework-applicationservices>=10; sys_platform == 'darwin'
|
|
27
|
+
Requires-Dist: pyobjc-framework-cocoa>=10; sys_platform == 'darwin'
|
|
28
|
+
Requires-Dist: pyobjc-framework-quartz>=10; sys_platform == 'darwin'
|
|
29
|
+
Requires-Dist: pywin32>=306; sys_platform == 'win32'
|
|
30
|
+
Requires-Dist: pyyaml>=6
|
|
31
|
+
Requires-Dist: sounddevice>=0.4.7
|
|
32
|
+
Requires-Dist: soundfile>=0.12
|
|
33
|
+
Requires-Dist: soxr>=0.5
|
|
34
|
+
Provides-Extra: dev
|
|
35
|
+
Requires-Dist: pytest>=8; extra == 'dev'
|
|
36
|
+
Requires-Dist: ruff>=0.6; extra == 'dev'
|
|
37
|
+
Description-Content-Type: text/markdown
|
|
38
|
+
|
|
39
|
+
# yohoho
|
|
40
|
+
|
|
41
|
+
> **speak. it types.** — free, fully-local voice dictation for developers.
|
|
42
|
+
|
|
43
|
+
`yohoho` turns speech into text entirely on your machine. Hit a hotkey, talk, and an on-device model
|
|
44
|
+
(NVIDIA Parakeet) transcribes your speech and pastes the text into whatever app is focused. No cloud,
|
|
45
|
+
no API key, no subscription — your voice never leaves your laptop.
|
|
46
|
+
|
|
47
|
+
It's a free, open-source alternative to Wispr Flow and VoiceInk, for people who'd rather own their
|
|
48
|
+
tools than rent them. (The name is Brook's laugh from *One Piece* crossed with the "yo ho ho" shanty
|
|
49
|
+
— a laugh is a voice, after all.)
|
|
50
|
+
|
|
51
|
+
## Status
|
|
52
|
+
|
|
53
|
+
**Working on macOS today.** Press the hotkey, speak, press again — your words transcribe on-device and
|
|
54
|
+
paste at the cursor, with a live dot-matrix panel and on/off chimes. Windows and a one-line installer
|
|
55
|
+
are next.
|
|
56
|
+
|
|
57
|
+
| | |
|
|
58
|
+
|---|---|
|
|
59
|
+
| ✅ Working (macOS / Apple Silicon) | global hotkey, on-device transcription (Parakeet int8), live dot-matrix status panel, auto-paste, on/off chimes, run-on-login |
|
|
60
|
+
| 🚧 Next | smoother permission setup, background-daemon supervisor, Windows adapter |
|
|
61
|
+
|
|
62
|
+
## Install & set up (macOS)
|
|
63
|
+
|
|
64
|
+
Install with whichever you have — each puts a `yohoho` command on your PATH:
|
|
65
|
+
|
|
66
|
+
```bash
|
|
67
|
+
npm i -g @by-k4n/yohoho # Node users — bootstraps Python via uv under the hood
|
|
68
|
+
uv tool install yohoho # uv users
|
|
69
|
+
pipx install yohoho # pipx users
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
The **npm** install adds `yohoho` to your PATH automatically (it lands in npm's global bin) — open a
|
|
73
|
+
new shell and you're set, no Python needed. With **uv** or **pipx**, if `yohoho` isn't found afterward,
|
|
74
|
+
run `uv tool ensurepath` (or `pipx ensurepath`) once to add their bin directory, then restart your shell.
|
|
75
|
+
|
|
76
|
+
Bleeding edge / no PyPI: `uv tool install 'git+https://github.com/by-k4n/yohoho.git@vX.Y.Z'`.
|
|
77
|
+
|
|
78
|
+
Then:
|
|
79
|
+
|
|
80
|
+
```bash
|
|
81
|
+
yohoho setup # pick a hotkey, grant permissions, download the model (~660 MB, first run)
|
|
82
|
+
yohoho start # press your hotkey anywhere to dictate
|
|
83
|
+
yohoho config # interactive settings menu — record a new hotkey, tweak chimes, and more
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
`setup` walks you through it, opens the right System Settings panes, and installs a launch-on-login
|
|
87
|
+
agent so yohoho is ready whenever you are; the default hotkey is **⌃⌥Space** (Control-Option-Space).
|
|
88
|
+
`start` runs the dictation loop in the foreground now (Ctrl-C to quit).
|
|
89
|
+
|
|
90
|
+
**To dictate:** press **⌃⌥Space** (you'll hear the "on" chime), speak, then press **⌃⌥Space** again —
|
|
91
|
+
the text transcribes on-device and pastes at your cursor (the "off" chime confirms it). Run
|
|
92
|
+
`yohoho doctor` any time to check permissions and your hotkey.
|
|
93
|
+
|
|
94
|
+
## Permissions (macOS) — please read
|
|
95
|
+
|
|
96
|
+
macOS gates the hotkey and the paste behind three privacy permissions. **Grant them to the terminal
|
|
97
|
+
app you launch yohoho from** — Terminal, iTerm, Warp, Ghostty, … — *not* to "python":
|
|
98
|
+
|
|
99
|
+
| Permission | Why it's needed | System Settings ▸ Privacy & Security ▸ |
|
|
100
|
+
|---|---|---|
|
|
101
|
+
| **Microphone** | record your voice | Microphone |
|
|
102
|
+
| **Input Monitoring** | detect the global hotkey | Input Monitoring |
|
|
103
|
+
| **Accessibility** | paste into the focused app | Accessibility |
|
|
104
|
+
|
|
105
|
+
> **Why your terminal, not python?** macOS attributes these grants to the *responsible process* — the
|
|
106
|
+
> app that launched yohoho — which is your terminal, not the Python interpreter. `yohoho setup` opens
|
|
107
|
+
> the right panes; add your terminal app under each one and toggle it on. If you later launch from a
|
|
108
|
+
> *different* terminal, grant it there too.
|
|
109
|
+
|
|
110
|
+
**Known rough edge:** if dictation transcribes but doesn't paste (you have to press ⌘V yourself), your
|
|
111
|
+
terminal is missing **Accessibility** — add it there and restart the terminal. This terminal-by-terminal
|
|
112
|
+
grant is the price of shipping as a dev script today; a future version will ship a small signed app so
|
|
113
|
+
you grant once and forget it. For now, that's a known trade-off we've chosen on purpose.
|
|
114
|
+
|
|
115
|
+
## Why
|
|
116
|
+
|
|
117
|
+
- **Private** — audio is transcribed locally and never touches a server. Transcripts are never written
|
|
118
|
+
to logs, and history stays on your machine.
|
|
119
|
+
- **Fast** — Parakeet runs several times faster than realtime on CPU; on Apple Silicon it offloads to
|
|
120
|
+
the Neural Engine via CoreML. Text lands in ~1–2 s for a short clip.
|
|
121
|
+
- **Free** — MIT licensed. No subscription, ever.
|
|
122
|
+
|
|
123
|
+
## Architecture
|
|
124
|
+
|
|
125
|
+
A portable **core** (identical on every OS) sits behind six small platform-adapter contracts —
|
|
126
|
+
hotkey · clipboard · inject · focus · autostart · permissions — the only OS-specific code, selected at
|
|
127
|
+
runtime by `platform_factory`. Engine: NVIDIA Parakeet TDT 0.6b v2 (int8 ONNX) via `onnx-asr`. UI: a
|
|
128
|
+
Tkinter dot-matrix panel. Output: clipboard paste (lossless, unlike per-key typing).
|
|
129
|
+
|
|
130
|
+
The full design is in [`docs/DESIGN.md`](docs/DESIGN.md); the 149-case failure-mode matrix is in
|
|
131
|
+
[`docs/edge-cases.md`](docs/edge-cases.md).
|
|
132
|
+
|
|
133
|
+
## Roadmap
|
|
134
|
+
|
|
135
|
+
- [x] **M1** — portable core (engine, recorder, controller, config, observability, history) + `yohoho dictate`
|
|
136
|
+
- [x] **M2** — dot-matrix status panel (Tkinter)
|
|
137
|
+
- [x] **M3** — macOS adapter: global hotkey, TCC permissions, auto-paste, on/off chimes, run-on-login
|
|
138
|
+
- [x] **M4 (install)** — PyPI + npm wrapper install (this ship); daemon/signed-app/tray are later M4 pieces
|
|
139
|
+
- [ ] **M4** — background-daemon supervisor, smoother permission flow (signed app), full `status`/`history`/`logs`
|
|
140
|
+
- [ ] **M5** — Windows adapter
|
|
141
|
+
- [ ] **M6** — standalone per-OS binaries
|
|
142
|
+
|
|
143
|
+
Linux is on the map but deferred from v1; the adapter layer is kept Linux-ready.
|
|
144
|
+
|
|
145
|
+
## Development
|
|
146
|
+
|
|
147
|
+
```bash
|
|
148
|
+
uv sync --extra dev
|
|
149
|
+
uv run pytest # unit suite
|
|
150
|
+
uv run pytest -m "gui or not gui" # include the Tk panel tests
|
|
151
|
+
uv run pytest -m integration # real-model test (needs the model cached + tests/fixtures/hello.wav)
|
|
152
|
+
uv run ruff check .
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
## Design
|
|
156
|
+
|
|
157
|
+
Terminal / dot-matrix aesthetic — brand color `#39BFC6` on near-black,
|
|
158
|
+
[Doto](https://fonts.google.com/specimen/Doto) wordmark, everything rendered in dots.
|
|
159
|
+
|
|
160
|
+
## License
|
|
161
|
+
|
|
162
|
+
MIT — see [LICENSE](LICENSE). If it saves you a subscription, buy yourself a coffee.
|
yohoho-0.1.0/README.md
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
# yohoho
|
|
2
|
+
|
|
3
|
+
> **speak. it types.** — free, fully-local voice dictation for developers.
|
|
4
|
+
|
|
5
|
+
`yohoho` turns speech into text entirely on your machine. Hit a hotkey, talk, and an on-device model
|
|
6
|
+
(NVIDIA Parakeet) transcribes your speech and pastes the text into whatever app is focused. No cloud,
|
|
7
|
+
no API key, no subscription — your voice never leaves your laptop.
|
|
8
|
+
|
|
9
|
+
It's a free, open-source alternative to Wispr Flow and VoiceInk, for people who'd rather own their
|
|
10
|
+
tools than rent them. (The name is Brook's laugh from *One Piece* crossed with the "yo ho ho" shanty
|
|
11
|
+
— a laugh is a voice, after all.)
|
|
12
|
+
|
|
13
|
+
## Status
|
|
14
|
+
|
|
15
|
+
**Working on macOS today.** Press the hotkey, speak, press again — your words transcribe on-device and
|
|
16
|
+
paste at the cursor, with a live dot-matrix panel and on/off chimes. Windows and a one-line installer
|
|
17
|
+
are next.
|
|
18
|
+
|
|
19
|
+
| | |
|
|
20
|
+
|---|---|
|
|
21
|
+
| ✅ Working (macOS / Apple Silicon) | global hotkey, on-device transcription (Parakeet int8), live dot-matrix status panel, auto-paste, on/off chimes, run-on-login |
|
|
22
|
+
| 🚧 Next | smoother permission setup, background-daemon supervisor, Windows adapter |
|
|
23
|
+
|
|
24
|
+
## Install & set up (macOS)
|
|
25
|
+
|
|
26
|
+
Install with whichever you have — each puts a `yohoho` command on your PATH:
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
npm i -g @by-k4n/yohoho # Node users — bootstraps Python via uv under the hood
|
|
30
|
+
uv tool install yohoho # uv users
|
|
31
|
+
pipx install yohoho # pipx users
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
The **npm** install adds `yohoho` to your PATH automatically (it lands in npm's global bin) — open a
|
|
35
|
+
new shell and you're set, no Python needed. With **uv** or **pipx**, if `yohoho` isn't found afterward,
|
|
36
|
+
run `uv tool ensurepath` (or `pipx ensurepath`) once to add their bin directory, then restart your shell.
|
|
37
|
+
|
|
38
|
+
Bleeding edge / no PyPI: `uv tool install 'git+https://github.com/by-k4n/yohoho.git@vX.Y.Z'`.
|
|
39
|
+
|
|
40
|
+
Then:
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
yohoho setup # pick a hotkey, grant permissions, download the model (~660 MB, first run)
|
|
44
|
+
yohoho start # press your hotkey anywhere to dictate
|
|
45
|
+
yohoho config # interactive settings menu — record a new hotkey, tweak chimes, and more
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
`setup` walks you through it, opens the right System Settings panes, and installs a launch-on-login
|
|
49
|
+
agent so yohoho is ready whenever you are; the default hotkey is **⌃⌥Space** (Control-Option-Space).
|
|
50
|
+
`start` runs the dictation loop in the foreground now (Ctrl-C to quit).
|
|
51
|
+
|
|
52
|
+
**To dictate:** press **⌃⌥Space** (you'll hear the "on" chime), speak, then press **⌃⌥Space** again —
|
|
53
|
+
the text transcribes on-device and pastes at your cursor (the "off" chime confirms it). Run
|
|
54
|
+
`yohoho doctor` any time to check permissions and your hotkey.
|
|
55
|
+
|
|
56
|
+
## Permissions (macOS) — please read
|
|
57
|
+
|
|
58
|
+
macOS gates the hotkey and the paste behind three privacy permissions. **Grant them to the terminal
|
|
59
|
+
app you launch yohoho from** — Terminal, iTerm, Warp, Ghostty, … — *not* to "python":
|
|
60
|
+
|
|
61
|
+
| Permission | Why it's needed | System Settings ▸ Privacy & Security ▸ |
|
|
62
|
+
|---|---|---|
|
|
63
|
+
| **Microphone** | record your voice | Microphone |
|
|
64
|
+
| **Input Monitoring** | detect the global hotkey | Input Monitoring |
|
|
65
|
+
| **Accessibility** | paste into the focused app | Accessibility |
|
|
66
|
+
|
|
67
|
+
> **Why your terminal, not python?** macOS attributes these grants to the *responsible process* — the
|
|
68
|
+
> app that launched yohoho — which is your terminal, not the Python interpreter. `yohoho setup` opens
|
|
69
|
+
> the right panes; add your terminal app under each one and toggle it on. If you later launch from a
|
|
70
|
+
> *different* terminal, grant it there too.
|
|
71
|
+
|
|
72
|
+
**Known rough edge:** if dictation transcribes but doesn't paste (you have to press ⌘V yourself), your
|
|
73
|
+
terminal is missing **Accessibility** — add it there and restart the terminal. This terminal-by-terminal
|
|
74
|
+
grant is the price of shipping as a dev script today; a future version will ship a small signed app so
|
|
75
|
+
you grant once and forget it. For now, that's a known trade-off we've chosen on purpose.
|
|
76
|
+
|
|
77
|
+
## Why
|
|
78
|
+
|
|
79
|
+
- **Private** — audio is transcribed locally and never touches a server. Transcripts are never written
|
|
80
|
+
to logs, and history stays on your machine.
|
|
81
|
+
- **Fast** — Parakeet runs several times faster than realtime on CPU; on Apple Silicon it offloads to
|
|
82
|
+
the Neural Engine via CoreML. Text lands in ~1–2 s for a short clip.
|
|
83
|
+
- **Free** — MIT licensed. No subscription, ever.
|
|
84
|
+
|
|
85
|
+
## Architecture
|
|
86
|
+
|
|
87
|
+
A portable **core** (identical on every OS) sits behind six small platform-adapter contracts —
|
|
88
|
+
hotkey · clipboard · inject · focus · autostart · permissions — the only OS-specific code, selected at
|
|
89
|
+
runtime by `platform_factory`. Engine: NVIDIA Parakeet TDT 0.6b v2 (int8 ONNX) via `onnx-asr`. UI: a
|
|
90
|
+
Tkinter dot-matrix panel. Output: clipboard paste (lossless, unlike per-key typing).
|
|
91
|
+
|
|
92
|
+
The full design is in [`docs/DESIGN.md`](docs/DESIGN.md); the 149-case failure-mode matrix is in
|
|
93
|
+
[`docs/edge-cases.md`](docs/edge-cases.md).
|
|
94
|
+
|
|
95
|
+
## Roadmap
|
|
96
|
+
|
|
97
|
+
- [x] **M1** — portable core (engine, recorder, controller, config, observability, history) + `yohoho dictate`
|
|
98
|
+
- [x] **M2** — dot-matrix status panel (Tkinter)
|
|
99
|
+
- [x] **M3** — macOS adapter: global hotkey, TCC permissions, auto-paste, on/off chimes, run-on-login
|
|
100
|
+
- [x] **M4 (install)** — PyPI + npm wrapper install (this ship); daemon/signed-app/tray are later M4 pieces
|
|
101
|
+
- [ ] **M4** — background-daemon supervisor, smoother permission flow (signed app), full `status`/`history`/`logs`
|
|
102
|
+
- [ ] **M5** — Windows adapter
|
|
103
|
+
- [ ] **M6** — standalone per-OS binaries
|
|
104
|
+
|
|
105
|
+
Linux is on the map but deferred from v1; the adapter layer is kept Linux-ready.
|
|
106
|
+
|
|
107
|
+
## Development
|
|
108
|
+
|
|
109
|
+
```bash
|
|
110
|
+
uv sync --extra dev
|
|
111
|
+
uv run pytest # unit suite
|
|
112
|
+
uv run pytest -m "gui or not gui" # include the Tk panel tests
|
|
113
|
+
uv run pytest -m integration # real-model test (needs the model cached + tests/fixtures/hello.wav)
|
|
114
|
+
uv run ruff check .
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
## Design
|
|
118
|
+
|
|
119
|
+
Terminal / dot-matrix aesthetic — brand color `#39BFC6` on near-black,
|
|
120
|
+
[Doto](https://fonts.google.com/specimen/Doto) wordmark, everything rendered in dots.
|
|
121
|
+
|
|
122
|
+
## License
|
|
123
|
+
|
|
124
|
+
MIT — see [LICENSE](LICENSE). If it saves you a subscription, buy yourself a coffee.
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "yohoho"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "Free, fully-local voice dictation"
|
|
5
|
+
requires-python = ">=3.11,<3.12"
|
|
6
|
+
dependencies = [
|
|
7
|
+
"onnx-asr[cpu,hub]>=0.6",
|
|
8
|
+
"onnxruntime>=1.27",
|
|
9
|
+
"sounddevice>=0.4.7",
|
|
10
|
+
"soundfile>=0.12",
|
|
11
|
+
"soxr>=0.5",
|
|
12
|
+
"numpy>=1.26",
|
|
13
|
+
"pyyaml>=6",
|
|
14
|
+
"pyobjc-framework-Cocoa>=10 ; sys_platform == 'darwin'",
|
|
15
|
+
"pyobjc-framework-Quartz>=10 ; sys_platform == 'darwin'",
|
|
16
|
+
"pyobjc-framework-ApplicationServices>=10 ; sys_platform == 'darwin'",
|
|
17
|
+
"pynput>=1.7",
|
|
18
|
+
"pywin32>=306 ; sys_platform == 'win32'",
|
|
19
|
+
]
|
|
20
|
+
readme = "README.md"
|
|
21
|
+
license = { text = "MIT" }
|
|
22
|
+
authors = [{ name = "by-k4n" }]
|
|
23
|
+
keywords = ["dictation", "speech-to-text", "voice", "transcription", "parakeet", "local", "privacy", "macos"]
|
|
24
|
+
classifiers = [
|
|
25
|
+
"Development Status :: 3 - Alpha",
|
|
26
|
+
"Environment :: MacOS X",
|
|
27
|
+
"Intended Audience :: Developers",
|
|
28
|
+
"License :: OSI Approved :: MIT License",
|
|
29
|
+
"Operating System :: MacOS :: MacOS X",
|
|
30
|
+
"Operating System :: Microsoft :: Windows",
|
|
31
|
+
"Programming Language :: Python :: 3.11",
|
|
32
|
+
"Topic :: Multimedia :: Sound/Audio :: Speech",
|
|
33
|
+
"Topic :: Utilities",
|
|
34
|
+
]
|
|
35
|
+
|
|
36
|
+
[project.urls]
|
|
37
|
+
Homepage = "https://github.com/by-k4n/yohoho"
|
|
38
|
+
Repository = "https://github.com/by-k4n/yohoho"
|
|
39
|
+
Issues = "https://github.com/by-k4n/yohoho/issues"
|
|
40
|
+
|
|
41
|
+
[project.scripts]
|
|
42
|
+
yohoho = "yohoho.core.cli:main"
|
|
43
|
+
|
|
44
|
+
[project.optional-dependencies]
|
|
45
|
+
dev = ["pytest>=8", "ruff>=0.6"]
|
|
46
|
+
|
|
47
|
+
[build-system]
|
|
48
|
+
requires = ["hatchling"]
|
|
49
|
+
build-backend = "hatchling.build"
|
|
50
|
+
|
|
51
|
+
[tool.hatch.build.targets.wheel]
|
|
52
|
+
packages = ["src/yohoho"]
|
|
53
|
+
|
|
54
|
+
[tool.hatch.build.targets.sdist]
|
|
55
|
+
# Lean source distribution: only the importable code + user-facing README/LICENSE/
|
|
56
|
+
# pyproject. Excludes docs/ (design + planning notes), CLAUDE.md, .claude/, tests/
|
|
57
|
+
# (incl. fixtures), packaging/, RELEASING.md — none belong in the installable package.
|
|
58
|
+
include = [
|
|
59
|
+
"/src/yohoho",
|
|
60
|
+
"/README.md",
|
|
61
|
+
"/LICENSE",
|
|
62
|
+
"/pyproject.toml",
|
|
63
|
+
]
|
|
64
|
+
|
|
65
|
+
[tool.pytest.ini_options]
|
|
66
|
+
pythonpath = ["src", "."]
|
|
67
|
+
markers = [
|
|
68
|
+
"integration: loads the real model / touches real audio devices (slow)",
|
|
69
|
+
"gui: real-Tk smoke tests; need a windowing server (skipped headless)",
|
|
70
|
+
]
|
|
71
|
+
addopts = "-m 'not integration and not gui'"
|
|
72
|
+
|
|
73
|
+
[tool.ruff]
|
|
74
|
+
line-length = 100
|
|
75
|
+
src = ["src", "tests"]
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.0.1"
|
|
Binary file
|
|
Binary file
|
|
File without changes
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
"""Pure DSP helpers — no device I/O, fully testable in isolation.
|
|
2
|
+
|
|
3
|
+
Functions:
|
|
4
|
+
resample_to_16k — downsample arbitrary-rate mono audio to 16 kHz (soxr)
|
|
5
|
+
rms — root-mean-square amplitude of a float32 array
|
|
6
|
+
is_silent — resilience P2 silence guard: rms(x) < floor
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import numpy as np
|
|
10
|
+
import soxr
|
|
11
|
+
|
|
12
|
+
_TARGET_SR = 16000
|
|
13
|
+
_SILENCE_FLOOR = 0.003
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def resample_to_16k(x: np.ndarray, sr: int) -> np.ndarray:
|
|
17
|
+
"""Return mono float32 audio resampled to 16 kHz.
|
|
18
|
+
|
|
19
|
+
If *sr* is already 16000 the array is returned unchanged (zero-copy passthrough).
|
|
20
|
+
Built-in / Bluetooth mics are typically 44.1 or 48 kHz; skipping this step feeds
|
|
21
|
+
the model confident garbage that passes the silence guard.
|
|
22
|
+
"""
|
|
23
|
+
if sr == _TARGET_SR:
|
|
24
|
+
return x
|
|
25
|
+
out = soxr.resample(x, sr, _TARGET_SR, quality="HQ")
|
|
26
|
+
return out.astype(np.float32, copy=False)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def rms(x: np.ndarray) -> float:
|
|
30
|
+
"""Root-mean-square of *x*. Returns 0.0 for empty input."""
|
|
31
|
+
if x.size == 0:
|
|
32
|
+
return 0.0
|
|
33
|
+
return float(np.sqrt(np.mean(x.astype(np.float64) ** 2)))
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def is_silent(x: np.ndarray, floor: float = _SILENCE_FLOOR) -> bool:
|
|
37
|
+
"""Return True when the clip is below *floor* RMS (i.e. silence / no speech).
|
|
38
|
+
|
|
39
|
+
Used by the controller before calling recognize() so an empty or background-noise
|
|
40
|
+
clip never reaches the model (resilience primitive P2).
|
|
41
|
+
"""
|
|
42
|
+
return rms(x) < floor
|