yohoho 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. yohoho-0.1.0/.gitignore +56 -0
  2. yohoho-0.1.0/LICENSE +21 -0
  3. yohoho-0.1.0/PKG-INFO +162 -0
  4. yohoho-0.1.0/README.md +124 -0
  5. yohoho-0.1.0/pyproject.toml +75 -0
  6. yohoho-0.1.0/src/yohoho/__init__.py +1 -0
  7. yohoho-0.1.0/src/yohoho/__main__.py +4 -0
  8. yohoho-0.1.0/src/yohoho/assets/fonts/Doto-VF.ttf +0 -0
  9. yohoho-0.1.0/src/yohoho/assets/fonts/Doto.ttf +0 -0
  10. yohoho-0.1.0/src/yohoho/core/__init__.py +0 -0
  11. yohoho-0.1.0/src/yohoho/core/audio.py +42 -0
  12. yohoho-0.1.0/src/yohoho/core/cli.py +756 -0
  13. yohoho-0.1.0/src/yohoho/core/config.py +276 -0
  14. yohoho-0.1.0/src/yohoho/core/config_access.py +156 -0
  15. yohoho-0.1.0/src/yohoho/core/config_tui.py +262 -0
  16. yohoho-0.1.0/src/yohoho/core/controller.py +318 -0
  17. yohoho-0.1.0/src/yohoho/core/engine.py +259 -0
  18. yohoho-0.1.0/src/yohoho/core/events.py +38 -0
  19. yohoho-0.1.0/src/yohoho/core/history.py +182 -0
  20. yohoho-0.1.0/src/yohoho/core/null_platform.py +84 -0
  21. yohoho-0.1.0/src/yohoho/core/observability.py +249 -0
  22. yohoho-0.1.0/src/yohoho/core/platform_api.py +145 -0
  23. yohoho-0.1.0/src/yohoho/core/platform_factory.py +17 -0
  24. yohoho-0.1.0/src/yohoho/core/recorder.py +160 -0
  25. yohoho-0.1.0/src/yohoho/core/run_loop.py +219 -0
  26. yohoho-0.1.0/src/yohoho/core/sounds.py +147 -0
  27. yohoho-0.1.0/src/yohoho/core/ui/__init__.py +5 -0
  28. yohoho-0.1.0/src/yohoho/core/ui/_dpi.py +20 -0
  29. yohoho-0.1.0/src/yohoho/core/ui/_tcl_env.py +26 -0
  30. yohoho-0.1.0/src/yohoho/core/ui/events.py +20 -0
  31. yohoho-0.1.0/src/yohoho/core/ui/fonts.py +53 -0
  32. yohoho-0.1.0/src/yohoho/core/ui/main_thread.py +161 -0
  33. yohoho-0.1.0/src/yohoho/core/ui/panel.py +492 -0
  34. yohoho-0.1.0/src/yohoho/core/ui/panel_model.py +307 -0
  35. yohoho-0.1.0/src/yohoho/core/ui/runner.py +348 -0
  36. yohoho-0.1.0/src/yohoho/core/ui/term.py +172 -0
  37. yohoho-0.1.0/src/yohoho/core/ui/theme.py +22 -0
  38. yohoho-0.1.0/src/yohoho/platform/__init__.py +0 -0
  39. yohoho-0.1.0/src/yohoho/platform/_shared/__init__.py +0 -0
  40. yohoho-0.1.0/src/yohoho/platform/_shared/chords.py +91 -0
  41. yohoho-0.1.0/src/yohoho/platform/_shared/hotkey_capture.py +86 -0
  42. yohoho-0.1.0/src/yohoho/platform/_shared/pynput_hotkey.py +55 -0
  43. yohoho-0.1.0/src/yohoho/platform/macos/__init__.py +39 -0
  44. yohoho-0.1.0/src/yohoho/platform/macos/_appkit.py +89 -0
  45. yohoho-0.1.0/src/yohoho/platform/macos/autostart.py +99 -0
  46. yohoho-0.1.0/src/yohoho/platform/macos/chrome.py +31 -0
  47. yohoho-0.1.0/src/yohoho/platform/macos/clipboard.py +19 -0
  48. yohoho-0.1.0/src/yohoho/platform/macos/focus.py +36 -0
  49. yohoho-0.1.0/src/yohoho/platform/macos/hotkey.py +9 -0
  50. yohoho-0.1.0/src/yohoho/platform/macos/inject.py +83 -0
  51. yohoho-0.1.0/src/yohoho/platform/macos/input_source.py +55 -0
  52. yohoho-0.1.0/src/yohoho/platform/macos/permissions.py +46 -0
  53. yohoho-0.1.0/src/yohoho/platform/macos_window.py +70 -0
  54. yohoho-0.1.0/src/yohoho/platform/windows/__init__.py +32 -0
  55. yohoho-0.1.0/src/yohoho/platform/windows/autostart.py +53 -0
  56. yohoho-0.1.0/src/yohoho/platform/windows/chrome.py +341 -0
  57. yohoho-0.1.0/src/yohoho/platform/windows/clipboard.py +53 -0
  58. yohoho-0.1.0/src/yohoho/platform/windows/focus.py +21 -0
  59. yohoho-0.1.0/src/yohoho/platform/windows/hotkey.py +7 -0
  60. yohoho-0.1.0/src/yohoho/platform/windows/inject.py +36 -0
  61. yohoho-0.1.0/src/yohoho/platform/windows/permissions.py +13 -0
@@ -0,0 +1,56 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ .venv/
5
+ venv/
6
+ *.egg-info/
7
+ build/
8
+ dist/
9
+ .pytest_cache/
10
+ .mypy_cache/
11
+ .ruff_cache/
12
+
13
+ # Node (install wrapper)
14
+ node_modules/
15
+ npm-debug.log*
16
+ yarn-error.log*
17
+
18
+ # Models & caches (downloaded at runtime, never committed)
19
+ *.onnx
20
+ models/
21
+ .cache/
22
+ .huggingface/
23
+
24
+ # Env / secrets
25
+ .env
26
+ .env.*
27
+
28
+ # Bundled fonts are tracked deliberately; ignore stray downloads
29
+ *.ttf.download
30
+
31
+ # OS cruft
32
+ .DS_Store
33
+ Thumbs.db
34
+ desktop.ini
35
+
36
+ # Claude local-only settings
37
+ .claude/settings.local.json
38
+
39
+ # yohoho runtime model cache (downloaded at runtime, never committed)
40
+ hf/
41
+
42
+ # brainstorming visual-companion artifacts
43
+ .superpowers/
44
+
45
+ # Private dev/process docs — excluded from the public repo (kept locally)
46
+ /CLAUDE.md
47
+ /.claude/
48
+ /docs/superpowers/
49
+ /docs/plans/
50
+ /docs/specs/
51
+ /docs/m4-followups.md
52
+ /docs/HANDOFF.md
53
+ /docs/README.md
54
+
55
+ # Skills marketplace lockfile (harness artifact, not project content)
56
+ skills-lock.json
yohoho-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 dev-CPC
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
yohoho-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,162 @@
1
+ Metadata-Version: 2.4
2
+ Name: yohoho
3
+ Version: 0.1.0
4
+ Summary: Free, fully-local voice dictation
5
+ Project-URL: Homepage, https://github.com/by-k4n/yohoho
6
+ Project-URL: Repository, https://github.com/by-k4n/yohoho
7
+ Project-URL: Issues, https://github.com/by-k4n/yohoho/issues
8
+ Author: by-k4n
9
+ License: MIT
10
+ License-File: LICENSE
11
+ Keywords: dictation,local,macos,parakeet,privacy,speech-to-text,transcription,voice
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Environment :: MacOS X
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: License :: OSI Approved :: MIT License
16
+ Classifier: Operating System :: MacOS :: MacOS X
17
+ Classifier: Operating System :: Microsoft :: Windows
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Topic :: Multimedia :: Sound/Audio :: Speech
20
+ Classifier: Topic :: Utilities
21
+ Requires-Python: <3.12,>=3.11
22
+ Requires-Dist: numpy>=1.26
23
+ Requires-Dist: onnx-asr[cpu,hub]>=0.6
24
+ Requires-Dist: onnxruntime>=1.27
25
+ Requires-Dist: pynput>=1.7
26
+ Requires-Dist: pyobjc-framework-applicationservices>=10; sys_platform == 'darwin'
27
+ Requires-Dist: pyobjc-framework-cocoa>=10; sys_platform == 'darwin'
28
+ Requires-Dist: pyobjc-framework-quartz>=10; sys_platform == 'darwin'
29
+ Requires-Dist: pywin32>=306; sys_platform == 'win32'
30
+ Requires-Dist: pyyaml>=6
31
+ Requires-Dist: sounddevice>=0.4.7
32
+ Requires-Dist: soundfile>=0.12
33
+ Requires-Dist: soxr>=0.5
34
+ Provides-Extra: dev
35
+ Requires-Dist: pytest>=8; extra == 'dev'
36
+ Requires-Dist: ruff>=0.6; extra == 'dev'
37
+ Description-Content-Type: text/markdown
38
+
39
+ # yohoho
40
+
41
+ > **speak. it types.** — free, fully-local voice dictation for developers.
42
+
43
+ `yohoho` turns speech into text entirely on your machine. Hit a hotkey, talk, and an on-device model
44
+ (NVIDIA Parakeet) transcribes your speech and pastes the text into whatever app is focused. No cloud,
45
+ no API key, no subscription — your voice never leaves your laptop.
46
+
47
+ It's a free, open-source alternative to Wispr Flow and VoiceInk, for people who'd rather own their
48
+ tools than rent them. (The name is Brook's laugh from *One Piece* crossed with the "yo ho ho" shanty
49
+ — a laugh is a voice, after all.)
50
+
51
+ ## Status
52
+
53
+ **Working on macOS today.** Press the hotkey, speak, press again — your words transcribe on-device and
54
+ paste at the cursor, with a live dot-matrix panel and on/off chimes. Windows and a one-line installer
55
+ are next.
56
+
57
+ | | |
58
+ |---|---|
59
+ | ✅ Working (macOS / Apple Silicon) | global hotkey, on-device transcription (Parakeet int8), live dot-matrix status panel, auto-paste, on/off chimes, run-on-login |
60
+ | 🚧 Next | smoother permission setup, background-daemon supervisor, Windows adapter |
61
+
62
+ ## Install & set up (macOS)
63
+
64
+ Install with whichever you have — each puts a `yohoho` command on your PATH:
65
+
66
+ ```bash
67
+ npm i -g @by-k4n/yohoho # Node users — bootstraps Python via uv under the hood
68
+ uv tool install yohoho # uv users
69
+ pipx install yohoho # pipx users
70
+ ```
71
+
72
+ The **npm** install adds `yohoho` to your PATH automatically (it lands in npm's global bin) — open a
73
+ new shell and you're set, no Python needed. With **uv** or **pipx**, if `yohoho` isn't found afterward,
74
+ run `uv tool ensurepath` (or `pipx ensurepath`) once to add their bin directory, then restart your shell.
75
+
76
+ Bleeding edge / no PyPI: `uv tool install 'git+https://github.com/by-k4n/yohoho.git@vX.Y.Z'`.
77
+
78
+ Then:
79
+
80
+ ```bash
81
+ yohoho setup # pick a hotkey, grant permissions, download the model (~660 MB, first run)
82
+ yohoho start # press your hotkey anywhere to dictate
83
+ yohoho config # interactive settings menu — record a new hotkey, tweak chimes, and more
84
+ ```
85
+
86
+ `setup` walks you through it, opens the right System Settings panes, and installs a launch-on-login
87
+ agent so yohoho is ready whenever you are; the default hotkey is **⌃⌥Space** (Control-Option-Space).
88
+ `start` runs the dictation loop in the foreground now (Ctrl-C to quit).
89
+
90
+ **To dictate:** press **⌃⌥Space** (you'll hear the "on" chime), speak, then press **⌃⌥Space** again —
91
+ the text transcribes on-device and pastes at your cursor (the "off" chime confirms it). Run
92
+ `yohoho doctor` any time to check permissions and your hotkey.
93
+
94
+ ## Permissions (macOS) — please read
95
+
96
+ macOS gates the hotkey and the paste behind three privacy permissions. **Grant them to the terminal
97
+ app you launch yohoho from** — Terminal, iTerm, Warp, Ghostty, … — *not* to "python":
98
+
99
+ | Permission | Why it's needed | System Settings ▸ Privacy & Security ▸ |
100
+ |---|---|---|
101
+ | **Microphone** | record your voice | Microphone |
102
+ | **Input Monitoring** | detect the global hotkey | Input Monitoring |
103
+ | **Accessibility** | paste into the focused app | Accessibility |
104
+
105
+ > **Why your terminal, not python?** macOS attributes these grants to the *responsible process* — the
106
+ > app that launched yohoho — which is your terminal, not the Python interpreter. `yohoho setup` opens
107
+ > the right panes; add your terminal app under each one and toggle it on. If you later launch from a
108
+ > *different* terminal, grant it there too.
109
+
110
+ **Known rough edge:** if dictation transcribes but doesn't paste (you have to press ⌘V yourself), your
111
+ terminal is missing **Accessibility** — add it there and restart the terminal. This terminal-by-terminal
112
+ grant is the price of shipping as a dev script today; a future version will ship a small signed app so
113
+ you grant once and forget it. For now, that's a known trade-off we've chosen on purpose.
114
+
115
+ ## Why
116
+
117
+ - **Private** — audio is transcribed locally and never touches a server. Transcripts are never written
118
+ to logs, and history stays on your machine.
119
+ - **Fast** — Parakeet runs several times faster than realtime on CPU; on Apple Silicon it offloads to
120
+ the Neural Engine via CoreML. Text lands in ~1–2 s for a short clip.
121
+ - **Free** — MIT licensed. No subscription, ever.
122
+
123
+ ## Architecture
124
+
125
+ A portable **core** (identical on every OS) sits behind six small platform-adapter contracts —
126
+ hotkey · clipboard · inject · focus · autostart · permissions — the only OS-specific code, selected at
127
+ runtime by `platform_factory`. Engine: NVIDIA Parakeet TDT 0.6b v2 (int8 ONNX) via `onnx-asr`. UI: a
128
+ Tkinter dot-matrix panel. Output: clipboard paste (lossless, unlike per-key typing).
129
+
130
+ The full design is in [`docs/DESIGN.md`](docs/DESIGN.md); the 149-case failure-mode matrix is in
131
+ [`docs/edge-cases.md`](docs/edge-cases.md).
132
+
133
+ ## Roadmap
134
+
135
+ - [x] **M1** — portable core (engine, recorder, controller, config, observability, history) + `yohoho dictate`
136
+ - [x] **M2** — dot-matrix status panel (Tkinter)
137
+ - [x] **M3** — macOS adapter: global hotkey, TCC permissions, auto-paste, on/off chimes, run-on-login
138
+ - [x] **M4 (install)** — PyPI + npm wrapper install (this ship); daemon/signed-app/tray are later M4 pieces
139
+ - [ ] **M4** — background-daemon supervisor, smoother permission flow (signed app), full `status`/`history`/`logs`
140
+ - [ ] **M5** — Windows adapter
141
+ - [ ] **M6** — standalone per-OS binaries
142
+
143
+ Linux is on the map but deferred from v1; the adapter layer is kept Linux-ready.
144
+
145
+ ## Development
146
+
147
+ ```bash
148
+ uv sync --extra dev
149
+ uv run pytest # unit suite
150
+ uv run pytest -m "gui or not gui" # include the Tk panel tests
151
+ uv run pytest -m integration # real-model test (needs the model cached + tests/fixtures/hello.wav)
152
+ uv run ruff check .
153
+ ```
154
+
155
+ ## Design
156
+
157
+ Terminal / dot-matrix aesthetic — brand color `#39BFC6` on near-black,
158
+ [Doto](https://fonts.google.com/specimen/Doto) wordmark, everything rendered in dots.
159
+
160
+ ## License
161
+
162
+ MIT — see [LICENSE](LICENSE). If it saves you a subscription, buy yourself a coffee.
yohoho-0.1.0/README.md ADDED
@@ -0,0 +1,124 @@
1
+ # yohoho
2
+
3
+ > **speak. it types.** — free, fully-local voice dictation for developers.
4
+
5
+ `yohoho` turns speech into text entirely on your machine. Hit a hotkey, talk, and an on-device model
6
+ (NVIDIA Parakeet) transcribes your speech and pastes the text into whatever app is focused. No cloud,
7
+ no API key, no subscription — your voice never leaves your laptop.
8
+
9
+ It's a free, open-source alternative to Wispr Flow and VoiceInk, for people who'd rather own their
10
+ tools than rent them. (The name is Brook's laugh from *One Piece* crossed with the "yo ho ho" shanty
11
+ — a laugh is a voice, after all.)
12
+
13
+ ## Status
14
+
15
+ **Working on macOS today.** Press the hotkey, speak, press again — your words transcribe on-device and
16
+ paste at the cursor, with a live dot-matrix panel and on/off chimes. Windows and a one-line installer
17
+ are next.
18
+
19
+ | | |
20
+ |---|---|
21
+ | ✅ Working (macOS / Apple Silicon) | global hotkey, on-device transcription (Parakeet int8), live dot-matrix status panel, auto-paste, on/off chimes, run-on-login |
22
+ | 🚧 Next | smoother permission setup, background-daemon supervisor, Windows adapter |
23
+
24
+ ## Install & set up (macOS)
25
+
26
+ Install with whichever you have — each puts a `yohoho` command on your PATH:
27
+
28
+ ```bash
29
+ npm i -g @by-k4n/yohoho # Node users — bootstraps Python via uv under the hood
30
+ uv tool install yohoho # uv users
31
+ pipx install yohoho # pipx users
32
+ ```
33
+
34
+ The **npm** install adds `yohoho` to your PATH automatically (it lands in npm's global bin) — open a
35
+ new shell and you're set, no Python needed. With **uv** or **pipx**, if `yohoho` isn't found afterward,
36
+ run `uv tool ensurepath` (or `pipx ensurepath`) once to add their bin directory, then restart your shell.
37
+
38
+ Bleeding edge / no PyPI: `uv tool install 'git+https://github.com/by-k4n/yohoho.git@vX.Y.Z'`.
39
+
40
+ Then:
41
+
42
+ ```bash
43
+ yohoho setup # pick a hotkey, grant permissions, download the model (~660 MB, first run)
44
+ yohoho start # press your hotkey anywhere to dictate
45
+ yohoho config # interactive settings menu — record a new hotkey, tweak chimes, and more
46
+ ```
47
+
48
+ `setup` walks you through it, opens the right System Settings panes, and installs a launch-on-login
49
+ agent so yohoho is ready whenever you are; the default hotkey is **⌃⌥Space** (Control-Option-Space).
50
+ `start` runs the dictation loop in the foreground now (Ctrl-C to quit).
51
+
52
+ **To dictate:** press **⌃⌥Space** (you'll hear the "on" chime), speak, then press **⌃⌥Space** again —
53
+ the text transcribes on-device and pastes at your cursor (the "off" chime confirms it). Run
54
+ `yohoho doctor` any time to check permissions and your hotkey.
55
+
56
+ ## Permissions (macOS) — please read
57
+
58
+ macOS gates the hotkey and the paste behind three privacy permissions. **Grant them to the terminal
59
+ app you launch yohoho from** — Terminal, iTerm, Warp, Ghostty, … — *not* to "python":
60
+
61
+ | Permission | Why it's needed | System Settings ▸ Privacy & Security ▸ |
62
+ |---|---|---|
63
+ | **Microphone** | record your voice | Microphone |
64
+ | **Input Monitoring** | detect the global hotkey | Input Monitoring |
65
+ | **Accessibility** | paste into the focused app | Accessibility |
66
+
67
+ > **Why your terminal, not python?** macOS attributes these grants to the *responsible process* — the
68
+ > app that launched yohoho — which is your terminal, not the Python interpreter. `yohoho setup` opens
69
+ > the right panes; add your terminal app under each one and toggle it on. If you later launch from a
70
+ > *different* terminal, grant it there too.
71
+
72
+ **Known rough edge:** if dictation transcribes but doesn't paste (you have to press ⌘V yourself), your
73
+ terminal is missing **Accessibility** — add it there and restart the terminal. This terminal-by-terminal
74
+ grant is the price of shipping as a dev script today; a future version will ship a small signed app so
75
+ you grant once and forget it. For now, that's a known trade-off we've chosen on purpose.
76
+
77
+ ## Why
78
+
79
+ - **Private** — audio is transcribed locally and never touches a server. Transcripts are never written
80
+ to logs, and history stays on your machine.
81
+ - **Fast** — Parakeet runs several times faster than realtime on CPU; on Apple Silicon it offloads to
82
+ the Neural Engine via CoreML. Text lands in ~1–2 s for a short clip.
83
+ - **Free** — MIT licensed. No subscription, ever.
84
+
85
+ ## Architecture
86
+
87
+ A portable **core** (identical on every OS) sits behind six small platform-adapter contracts —
88
+ hotkey · clipboard · inject · focus · autostart · permissions — the only OS-specific code, selected at
89
+ runtime by `platform_factory`. Engine: NVIDIA Parakeet TDT 0.6b v2 (int8 ONNX) via `onnx-asr`. UI: a
90
+ Tkinter dot-matrix panel. Output: clipboard paste (lossless, unlike per-key typing).
91
+
92
+ The full design is in [`docs/DESIGN.md`](docs/DESIGN.md); the 149-case failure-mode matrix is in
93
+ [`docs/edge-cases.md`](docs/edge-cases.md).
94
+
95
+ ## Roadmap
96
+
97
+ - [x] **M1** — portable core (engine, recorder, controller, config, observability, history) + `yohoho dictate`
98
+ - [x] **M2** — dot-matrix status panel (Tkinter)
99
+ - [x] **M3** — macOS adapter: global hotkey, TCC permissions, auto-paste, on/off chimes, run-on-login
100
+ - [x] **M4 (install)** — PyPI + npm wrapper install (this ship); daemon/signed-app/tray are later M4 pieces
101
+ - [ ] **M4** — background-daemon supervisor, smoother permission flow (signed app), full `status`/`history`/`logs`
102
+ - [ ] **M5** — Windows adapter
103
+ - [ ] **M6** — standalone per-OS binaries
104
+
105
+ Linux is on the map but deferred from v1; the adapter layer is kept Linux-ready.
106
+
107
+ ## Development
108
+
109
+ ```bash
110
+ uv sync --extra dev
111
+ uv run pytest # unit suite
112
+ uv run pytest -m "gui or not gui" # include the Tk panel tests
113
+ uv run pytest -m integration # real-model test (needs the model cached + tests/fixtures/hello.wav)
114
+ uv run ruff check .
115
+ ```
116
+
117
+ ## Design
118
+
119
+ Terminal / dot-matrix aesthetic — brand color `#39BFC6` on near-black,
120
+ [Doto](https://fonts.google.com/specimen/Doto) wordmark, everything rendered in dots.
121
+
122
+ ## License
123
+
124
+ MIT — see [LICENSE](LICENSE). If it saves you a subscription, buy yourself a coffee.
@@ -0,0 +1,75 @@
1
+ [project]
2
+ name = "yohoho"
3
+ version = "0.1.0"
4
+ description = "Free, fully-local voice dictation"
5
+ requires-python = ">=3.11,<3.12"
6
+ dependencies = [
7
+ "onnx-asr[cpu,hub]>=0.6",
8
+ "onnxruntime>=1.27",
9
+ "sounddevice>=0.4.7",
10
+ "soundfile>=0.12",
11
+ "soxr>=0.5",
12
+ "numpy>=1.26",
13
+ "pyyaml>=6",
14
+ "pyobjc-framework-Cocoa>=10 ; sys_platform == 'darwin'",
15
+ "pyobjc-framework-Quartz>=10 ; sys_platform == 'darwin'",
16
+ "pyobjc-framework-ApplicationServices>=10 ; sys_platform == 'darwin'",
17
+ "pynput>=1.7",
18
+ "pywin32>=306 ; sys_platform == 'win32'",
19
+ ]
20
+ readme = "README.md"
21
+ license = { text = "MIT" }
22
+ authors = [{ name = "by-k4n" }]
23
+ keywords = ["dictation", "speech-to-text", "voice", "transcription", "parakeet", "local", "privacy", "macos"]
24
+ classifiers = [
25
+ "Development Status :: 3 - Alpha",
26
+ "Environment :: MacOS X",
27
+ "Intended Audience :: Developers",
28
+ "License :: OSI Approved :: MIT License",
29
+ "Operating System :: MacOS :: MacOS X",
30
+ "Operating System :: Microsoft :: Windows",
31
+ "Programming Language :: Python :: 3.11",
32
+ "Topic :: Multimedia :: Sound/Audio :: Speech",
33
+ "Topic :: Utilities",
34
+ ]
35
+
36
+ [project.urls]
37
+ Homepage = "https://github.com/by-k4n/yohoho"
38
+ Repository = "https://github.com/by-k4n/yohoho"
39
+ Issues = "https://github.com/by-k4n/yohoho/issues"
40
+
41
+ [project.scripts]
42
+ yohoho = "yohoho.core.cli:main"
43
+
44
+ [project.optional-dependencies]
45
+ dev = ["pytest>=8", "ruff>=0.6"]
46
+
47
+ [build-system]
48
+ requires = ["hatchling"]
49
+ build-backend = "hatchling.build"
50
+
51
+ [tool.hatch.build.targets.wheel]
52
+ packages = ["src/yohoho"]
53
+
54
+ [tool.hatch.build.targets.sdist]
55
+ # Lean source distribution: only the importable code + user-facing README/LICENSE/
56
+ # pyproject. Excludes docs/ (design + planning notes), CLAUDE.md, .claude/, tests/
57
+ # (incl. fixtures), packaging/, RELEASING.md — none belong in the installable package.
58
+ include = [
59
+ "/src/yohoho",
60
+ "/README.md",
61
+ "/LICENSE",
62
+ "/pyproject.toml",
63
+ ]
64
+
65
+ [tool.pytest.ini_options]
66
+ pythonpath = ["src", "."]
67
+ markers = [
68
+ "integration: loads the real model / touches real audio devices (slow)",
69
+ "gui: real-Tk smoke tests; need a windowing server (skipped headless)",
70
+ ]
71
+ addopts = "-m 'not integration and not gui'"
72
+
73
+ [tool.ruff]
74
+ line-length = 100
75
+ src = ["src", "tests"]
@@ -0,0 +1 @@
1
+ __version__ = "0.0.1"
@@ -0,0 +1,4 @@
1
+ """Enables `python -m yohoho ...` (used by the LaunchAgent's ProgramArguments)."""
2
+ import sys
3
+ from yohoho.core.cli import main
4
+ sys.exit(main())
File without changes
@@ -0,0 +1,42 @@
1
+ """Pure DSP helpers — no device I/O, fully testable in isolation.
2
+
3
+ Functions:
4
+ resample_to_16k — downsample arbitrary-rate mono audio to 16 kHz (soxr)
5
+ rms — root-mean-square amplitude of a float32 array
6
+ is_silent — resilience P2 silence guard: rms(x) < floor
7
+ """
8
+
9
+ import numpy as np
10
+ import soxr
11
+
12
+ _TARGET_SR = 16000
13
+ _SILENCE_FLOOR = 0.003
14
+
15
+
16
+ def resample_to_16k(x: np.ndarray, sr: int) -> np.ndarray:
17
+ """Return mono float32 audio resampled to 16 kHz.
18
+
19
+ If *sr* is already 16000 the array is returned unchanged (zero-copy passthrough).
20
+ Built-in / Bluetooth mics are typically 44.1 or 48 kHz; skipping this step feeds
21
+ the model confident garbage that passes the silence guard.
22
+ """
23
+ if sr == _TARGET_SR:
24
+ return x
25
+ out = soxr.resample(x, sr, _TARGET_SR, quality="HQ")
26
+ return out.astype(np.float32, copy=False)
27
+
28
+
29
+ def rms(x: np.ndarray) -> float:
30
+ """Root-mean-square of *x*. Returns 0.0 for empty input."""
31
+ if x.size == 0:
32
+ return 0.0
33
+ return float(np.sqrt(np.mean(x.astype(np.float64) ** 2)))
34
+
35
+
36
+ def is_silent(x: np.ndarray, floor: float = _SILENCE_FLOOR) -> bool:
37
+ """Return True when the clip is below *floor* RMS (i.e. silence / no speech).
38
+
39
+ Used by the controller before calling recognize() so an empty or background-noise
40
+ clip never reaches the model (resilience primitive P2).
41
+ """
42
+ return rms(x) < floor