speaksy 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- speaksy-0.1.0/.claude/settings.local.json +19 -0
- speaksy-0.1.0/.gitignore +30 -0
- speaksy-0.1.0/LICENSE +21 -0
- speaksy-0.1.0/PKG-INFO +246 -0
- speaksy-0.1.0/README.md +210 -0
- speaksy-0.1.0/docs/plans/2026-02-05-speaksy-design.md +173 -0
- speaksy-0.1.0/pyproject.toml +52 -0
- speaksy-0.1.0/src/speaksy/__init__.py +3 -0
- speaksy-0.1.0/src/speaksy/__main__.py +6 -0
- speaksy-0.1.0/src/speaksy/cli.py +307 -0
- speaksy-0.1.0/src/speaksy/config.py +157 -0
- speaksy-0.1.0/src/speaksy/core.py +540 -0
- speaksy-0.1.0/src/speaksy/runner.py +31 -0
- speaksy-0.1.0/src/speaksy/service.py +205 -0
- speaksy-0.1.0/src/speaksy/setup_wizard.py +216 -0
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
{
|
|
2
|
+
"permissions": {
|
|
3
|
+
"allow": [
|
|
4
|
+
"Bash(source:*)",
|
|
5
|
+
"Bash(systemctl:*)",
|
|
6
|
+
"Bash(systemctl --user enable:*)",
|
|
7
|
+
"Bash(for name in scribe mumble hark dictly speaksy flowkey)",
|
|
8
|
+
"Bash(do echo -n \"$name: \")",
|
|
9
|
+
"Bash(pip index:*)",
|
|
10
|
+
"Bash(done)",
|
|
11
|
+
"Bash(pip install:*)",
|
|
12
|
+
"Bash(python -c:*)",
|
|
13
|
+
"Bash(git init:*)",
|
|
14
|
+
"Bash(git branch:*)",
|
|
15
|
+
"Bash(git rm:*)",
|
|
16
|
+
"Bash(python -m build:*)"
|
|
17
|
+
]
|
|
18
|
+
}
|
|
19
|
+
}
|
speaksy-0.1.0/.gitignore
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# Environment
|
|
2
|
+
.env
|
|
3
|
+
.venv/
|
|
4
|
+
venv/
|
|
5
|
+
|
|
6
|
+
# Python
|
|
7
|
+
__pycache__/
|
|
8
|
+
*.pyc
|
|
9
|
+
*.pyo
|
|
10
|
+
*.egg-info/
|
|
11
|
+
dist/
|
|
12
|
+
build/
|
|
13
|
+
*.egg
|
|
14
|
+
|
|
15
|
+
# Audio
|
|
16
|
+
*.wav
|
|
17
|
+
|
|
18
|
+
# IDE
|
|
19
|
+
.idea/
|
|
20
|
+
.vscode/
|
|
21
|
+
*.swp
|
|
22
|
+
|
|
23
|
+
# OS
|
|
24
|
+
.DS_Store
|
|
25
|
+
Thumbs.db
|
|
26
|
+
|
|
27
|
+
# Old files (replaced by src/speaksy/)
|
|
28
|
+
voicetype.py
|
|
29
|
+
config.yaml
|
|
30
|
+
requirements.txt
|
speaksy-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 oneknight
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
speaksy-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,246 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: speaksy
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Voice typing for Linux. Talk it. Type it. Ship it.
|
|
5
|
+
Project-URL: Homepage, https://github.com/oneKn8/speaksy
|
|
6
|
+
Project-URL: Repository, https://github.com/oneKn8/speaksy
|
|
7
|
+
Project-URL: Issues, https://github.com/oneKn8/speaksy/issues
|
|
8
|
+
Author: oneknight
|
|
9
|
+
License-Expression: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: dictation,linux,speech-to-text,typing,voice,whisper
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Environment :: Console
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: Intended Audience :: End Users/Desktop
|
|
16
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
17
|
+
Classifier: Operating System :: POSIX :: Linux
|
|
18
|
+
Classifier: Programming Language :: Python :: 3
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
22
|
+
Classifier: Topic :: Multimedia :: Sound/Audio :: Speech
|
|
23
|
+
Classifier: Topic :: Text Processing
|
|
24
|
+
Requires-Python: >=3.10
|
|
25
|
+
Requires-Dist: faster-whisper>=1.1.0
|
|
26
|
+
Requires-Dist: httpx>=0.27.0
|
|
27
|
+
Requires-Dist: numpy>=1.24.0
|
|
28
|
+
Requires-Dist: pillow>=10.0.0
|
|
29
|
+
Requires-Dist: pynput>=1.7.0
|
|
30
|
+
Requires-Dist: pystray>=0.19.0
|
|
31
|
+
Requires-Dist: python-dotenv>=1.0.0
|
|
32
|
+
Requires-Dist: pyyaml>=6.0
|
|
33
|
+
Requires-Dist: rich>=13.0.0
|
|
34
|
+
Requires-Dist: sounddevice>=0.5.0
|
|
35
|
+
Description-Content-Type: text/markdown
|
|
36
|
+
|
|
37
|
+
<p align="center">
|
|
38
|
+
<img src="https://img.shields.io/badge/speaksy-voice%20typing-blueviolet?style=for-the-badge&logo=microphone" alt="speaksy">
|
|
39
|
+
</p>
|
|
40
|
+
|
|
41
|
+
<h1 align="center">speaksy</h1>
|
|
42
|
+
|
|
43
|
+
<p align="center">
|
|
44
|
+
<strong>talk it. type it. ship it.</strong>
|
|
45
|
+
</p>
|
|
46
|
+
|
|
47
|
+
<p align="center">
|
|
48
|
+
<a href="https://github.com/oneKn8/speaksy/blob/main/LICENSE"><img src="https://img.shields.io/badge/license-MIT-green.svg" alt="License"></a>
|
|
49
|
+
<a href="https://www.python.org/downloads/"><img src="https://img.shields.io/badge/python-3.10+-blue.svg" alt="Python"></a>
|
|
50
|
+
<a href="https://github.com/oneKn8/speaksy"><img src="https://img.shields.io/badge/platform-Linux-orange.svg" alt="Platform"></a>
|
|
51
|
+
<a href="https://console.groq.com"><img src="https://img.shields.io/badge/powered%20by-Groq-ff6600.svg" alt="Groq"></a>
|
|
52
|
+
</p>
|
|
53
|
+
|
|
54
|
+
<p align="center">
|
|
55
|
+
<em>Voice typing for Linux that actually works.<br>Hold a key, speak, release — your words appear wherever you're typing.</em>
|
|
56
|
+
</p>
|
|
57
|
+
|
|
58
|
+
---
|
|
59
|
+
|
|
60
|
+
## Demo
|
|
61
|
+
|
|
62
|
+
```
|
|
63
|
+
$ speaksy
|
|
64
|
+
|
|
65
|
+
╭────────────────────────────────────────╮
|
|
66
|
+
│ SPEAKSY │
|
|
67
|
+
│ talk it. type it. ship it. │
|
|
68
|
+
╰────────────────────────────────────────╯
|
|
69
|
+
|
|
70
|
+
Status: vibing
|
|
71
|
+
Hotkeys: Right Ctrl (hold) | F8 (toggle)
|
|
72
|
+
|
|
73
|
+
speaksy> _
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
<!-- TODO: Add demo GIF here -->
|
|
77
|
+
<!--  -->
|
|
78
|
+
|
|
79
|
+
---
|
|
80
|
+
|
|
81
|
+
## Quick Start
|
|
82
|
+
|
|
83
|
+
```bash
|
|
84
|
+
# Install
|
|
85
|
+
pipx install speaksy
|
|
86
|
+
|
|
87
|
+
# Run (interactive setup on first launch)
|
|
88
|
+
speaksy
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
That's it. 30 seconds to voice typing.
|
|
92
|
+
|
|
93
|
+
---
|
|
94
|
+
|
|
95
|
+
## Features
|
|
96
|
+
|
|
97
|
+
| | Feature | Description |
|
|
98
|
+
|---|---------|-------------|
|
|
99
|
+
| **Speed** | < 1 second latency | Groq's Whisper API is blazing fast |
|
|
100
|
+
| **Smart** | AI text cleanup | Fixes grammar, removes "um", "uh", "like" |
|
|
101
|
+
| **Free** | No credit card | Groq's free tier is generous |
|
|
102
|
+
| **Offline** | Local fallback | Works without internet via faster-whisper |
|
|
103
|
+
| **Private** | Privacy mode | Keep voice 100% on your machine |
|
|
104
|
+
| **Auto** | Runs on login | Always ready when you are |
|
|
105
|
+
|
|
106
|
+
---
|
|
107
|
+
|
|
108
|
+
## How It Works
|
|
109
|
+
|
|
110
|
+
```
|
|
111
|
+
┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐
|
|
112
|
+
│ Hold Key │ -> │ Speak │ -> │ Release │ -> │ Text Appears│
|
|
113
|
+
│ (Right Ctrl) │ naturally │ │ key │ │ at cursor │
|
|
114
|
+
└─────────────┘ └─────────────┘ └─────────────┘ └─────────────┘
|
|
115
|
+
|
|
|
116
|
+
v
|
|
117
|
+
┌─────────────────┐
|
|
118
|
+
│ Groq Whisper │
|
|
119
|
+
│ + LLM cleanup │
|
|
120
|
+
└─────────────────┘
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
1. Press hotkey (Right Ctrl = hold, F8 = toggle)
|
|
124
|
+
2. Speak naturally
|
|
125
|
+
3. Release — text appears in < 1 second
|
|
126
|
+
|
|
127
|
+
Works everywhere: browser, terminal, IDE, Slack, Discord, anywhere you type.
|
|
128
|
+
|
|
129
|
+
---
|
|
130
|
+
|
|
131
|
+
## Commands
|
|
132
|
+
|
|
133
|
+
Run `speaksy` to open the interactive CLI:
|
|
134
|
+
|
|
135
|
+
| Command | Description |
|
|
136
|
+
|---------|-------------|
|
|
137
|
+
| `/setup` | Configure API key & hotkeys |
|
|
138
|
+
| `/start` | Start voice typing |
|
|
139
|
+
| `/stop` | Take a break |
|
|
140
|
+
| `/status` | Check the vibe |
|
|
141
|
+
| `/logs` | View receipts |
|
|
142
|
+
| `/config` | Tweak settings |
|
|
143
|
+
| `/help` | Get backup |
|
|
144
|
+
| `/quit` | Peace out |
|
|
145
|
+
|
|
146
|
+
---
|
|
147
|
+
|
|
148
|
+
## Requirements
|
|
149
|
+
|
|
150
|
+
- **OS:** Linux (X11 or XWayland)
|
|
151
|
+
- **Python:** 3.10+
|
|
152
|
+
- **API Key:** Free from [console.groq.com](https://console.groq.com)
|
|
153
|
+
|
|
154
|
+
System dependencies (auto-installed during setup):
|
|
155
|
+
```bash
|
|
156
|
+
sudo apt install xclip xdotool
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
---
|
|
160
|
+
|
|
161
|
+
## Privacy Mode
|
|
162
|
+
|
|
163
|
+
By default, audio goes to Groq for fast transcription. Want to keep it local?
|
|
164
|
+
|
|
165
|
+
```
|
|
166
|
+
speaksy> /config
|
|
167
|
+
# Select "Privacy mode" -> "local"
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
Local mode uses [faster-whisper](https://github.com/SYSTRAN/faster-whisper) on your CPU. Slower (~3-5s) but your voice never leaves your machine.
|
|
171
|
+
|
|
172
|
+
---
|
|
173
|
+
|
|
174
|
+
## Troubleshooting
|
|
175
|
+
|
|
176
|
+
<details>
|
|
177
|
+
<summary><strong>No audio input detected</strong></summary>
|
|
178
|
+
|
|
179
|
+
- Check your mic is connected
|
|
180
|
+
- Run `arecord -l` to list audio devices
|
|
181
|
+
</details>
|
|
182
|
+
|
|
183
|
+
<details>
|
|
184
|
+
<summary><strong>Text not appearing</strong></summary>
|
|
185
|
+
|
|
186
|
+
- Install dependencies: `sudo apt install xclip xdotool`
|
|
187
|
+
- Some pure Wayland apps may not work with xdotool
|
|
188
|
+
</details>
|
|
189
|
+
|
|
190
|
+
<details>
|
|
191
|
+
<summary><strong>Service won't start</strong></summary>
|
|
192
|
+
|
|
193
|
+
- Check logs: run `speaksy` then `/logs`
|
|
194
|
+
- Verify API key at console.groq.com
|
|
195
|
+
</details>
|
|
196
|
+
|
|
197
|
+
---
|
|
198
|
+
|
|
199
|
+
## Uninstall
|
|
200
|
+
|
|
201
|
+
```bash
|
|
202
|
+
# Stop service
|
|
203
|
+
speaksy
|
|
204
|
+
# > /stop
|
|
205
|
+
# > /quit
|
|
206
|
+
|
|
207
|
+
# Remove package
|
|
208
|
+
pipx uninstall speaksy
|
|
209
|
+
|
|
210
|
+
# Remove config (optional)
|
|
211
|
+
rm -rf ~/.config/speaksy
|
|
212
|
+
rm ~/.config/systemd/user/speaksy.service
|
|
213
|
+
systemctl --user daemon-reload
|
|
214
|
+
```
|
|
215
|
+
|
|
216
|
+
---
|
|
217
|
+
|
|
218
|
+
## Tech Stack
|
|
219
|
+
|
|
220
|
+
- **STT:** [Groq Whisper API](https://groq.com) / [faster-whisper](https://github.com/SYSTRAN/faster-whisper)
|
|
221
|
+
- **LLM:** Llama 3.1 8B (via Groq) for text cleanup
|
|
222
|
+
- **Audio:** [sounddevice](https://python-sounddevice.readthedocs.io/)
|
|
223
|
+
- **Hotkeys:** [pynput](https://pynput.readthedocs.io/)
|
|
224
|
+
- **CLI:** [Rich](https://rich.readthedocs.io/)
|
|
225
|
+
|
|
226
|
+
---
|
|
227
|
+
|
|
228
|
+
## Contributing
|
|
229
|
+
|
|
230
|
+
PRs and issues welcome!
|
|
231
|
+
|
|
232
|
+
<a href="https://github.com/oneKn8/speaksy/issues">Report Bug</a>
|
|
233
|
+
·
|
|
234
|
+
<a href="https://github.com/oneKn8/speaksy/issues">Request Feature</a>
|
|
235
|
+
|
|
236
|
+
---
|
|
237
|
+
|
|
238
|
+
## License
|
|
239
|
+
|
|
240
|
+
MIT - do whatever you want with it.
|
|
241
|
+
|
|
242
|
+
---
|
|
243
|
+
|
|
244
|
+
<p align="center">
|
|
245
|
+
<sub>Built with caffeine and voice commands</sub>
|
|
246
|
+
</p>
|
speaksy-0.1.0/README.md
ADDED
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
<p align="center">
|
|
2
|
+
<img src="https://img.shields.io/badge/speaksy-voice%20typing-blueviolet?style=for-the-badge&logo=microphone" alt="speaksy">
|
|
3
|
+
</p>
|
|
4
|
+
|
|
5
|
+
<h1 align="center">speaksy</h1>
|
|
6
|
+
|
|
7
|
+
<p align="center">
|
|
8
|
+
<strong>talk it. type it. ship it.</strong>
|
|
9
|
+
</p>
|
|
10
|
+
|
|
11
|
+
<p align="center">
|
|
12
|
+
<a href="https://github.com/oneKn8/speaksy/blob/main/LICENSE"><img src="https://img.shields.io/badge/license-MIT-green.svg" alt="License"></a>
|
|
13
|
+
<a href="https://www.python.org/downloads/"><img src="https://img.shields.io/badge/python-3.10+-blue.svg" alt="Python"></a>
|
|
14
|
+
<a href="https://github.com/oneKn8/speaksy"><img src="https://img.shields.io/badge/platform-Linux-orange.svg" alt="Platform"></a>
|
|
15
|
+
<a href="https://console.groq.com"><img src="https://img.shields.io/badge/powered%20by-Groq-ff6600.svg" alt="Groq"></a>
|
|
16
|
+
</p>
|
|
17
|
+
|
|
18
|
+
<p align="center">
|
|
19
|
+
<em>Voice typing for Linux that actually works.<br>Hold a key, speak, release — your words appear wherever you're typing.</em>
|
|
20
|
+
</p>
|
|
21
|
+
|
|
22
|
+
---
|
|
23
|
+
|
|
24
|
+
## Demo
|
|
25
|
+
|
|
26
|
+
```
|
|
27
|
+
$ speaksy
|
|
28
|
+
|
|
29
|
+
╭────────────────────────────────────────╮
|
|
30
|
+
│ SPEAKSY │
|
|
31
|
+
│ talk it. type it. ship it. │
|
|
32
|
+
╰────────────────────────────────────────╯
|
|
33
|
+
|
|
34
|
+
Status: vibing
|
|
35
|
+
Hotkeys: Right Ctrl (hold) | F8 (toggle)
|
|
36
|
+
|
|
37
|
+
speaksy> _
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
<!-- TODO: Add demo GIF here -->
|
|
41
|
+
<!--  -->
|
|
42
|
+
|
|
43
|
+
---
|
|
44
|
+
|
|
45
|
+
## Quick Start
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
# Install
|
|
49
|
+
pipx install speaksy
|
|
50
|
+
|
|
51
|
+
# Run (interactive setup on first launch)
|
|
52
|
+
speaksy
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
That's it. 30 seconds to voice typing.
|
|
56
|
+
|
|
57
|
+
---
|
|
58
|
+
|
|
59
|
+
## Features
|
|
60
|
+
|
|
61
|
+
| | Feature | Description |
|
|
62
|
+
|---|---------|-------------|
|
|
63
|
+
| **Speed** | < 1 second latency | Groq's Whisper API is blazing fast |
|
|
64
|
+
| **Smart** | AI text cleanup | Fixes grammar, removes "um", "uh", "like" |
|
|
65
|
+
| **Free** | No credit card | Groq's free tier is generous |
|
|
66
|
+
| **Offline** | Local fallback | Works without internet via faster-whisper |
|
|
67
|
+
| **Private** | Privacy mode | Keep voice 100% on your machine |
|
|
68
|
+
| **Auto** | Runs on login | Always ready when you are |
|
|
69
|
+
|
|
70
|
+
---
|
|
71
|
+
|
|
72
|
+
## How It Works
|
|
73
|
+
|
|
74
|
+
```
|
|
75
|
+
┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐
|
|
76
|
+
│ Hold Key │ -> │ Speak │ -> │ Release │ -> │ Text Appears│
|
|
77
|
+
│ (Right Ctrl) │ naturally │ │ key │ │ at cursor │
|
|
78
|
+
└─────────────┘ └─────────────┘ └─────────────┘ └─────────────┘
|
|
79
|
+
|
|
|
80
|
+
v
|
|
81
|
+
┌─────────────────┐
|
|
82
|
+
│ Groq Whisper │
|
|
83
|
+
│ + LLM cleanup │
|
|
84
|
+
└─────────────────┘
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
1. Press hotkey (Right Ctrl = hold, F8 = toggle)
|
|
88
|
+
2. Speak naturally
|
|
89
|
+
3. Release — text appears in < 1 second
|
|
90
|
+
|
|
91
|
+
Works everywhere: browser, terminal, IDE, Slack, Discord, anywhere you type.
|
|
92
|
+
|
|
93
|
+
---
|
|
94
|
+
|
|
95
|
+
## Commands
|
|
96
|
+
|
|
97
|
+
Run `speaksy` to open the interactive CLI:
|
|
98
|
+
|
|
99
|
+
| Command | Description |
|
|
100
|
+
|---------|-------------|
|
|
101
|
+
| `/setup` | Configure API key & hotkeys |
|
|
102
|
+
| `/start` | Start voice typing |
|
|
103
|
+
| `/stop` | Take a break |
|
|
104
|
+
| `/status` | Check the vibe |
|
|
105
|
+
| `/logs` | View receipts |
|
|
106
|
+
| `/config` | Tweak settings |
|
|
107
|
+
| `/help` | Get backup |
|
|
108
|
+
| `/quit` | Peace out |
|
|
109
|
+
|
|
110
|
+
---
|
|
111
|
+
|
|
112
|
+
## Requirements
|
|
113
|
+
|
|
114
|
+
- **OS:** Linux (X11 or XWayland)
|
|
115
|
+
- **Python:** 3.10+
|
|
116
|
+
- **API Key:** Free from [console.groq.com](https://console.groq.com)
|
|
117
|
+
|
|
118
|
+
System dependencies (auto-installed during setup):
|
|
119
|
+
```bash
|
|
120
|
+
sudo apt install xclip xdotool
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
---
|
|
124
|
+
|
|
125
|
+
## Privacy Mode
|
|
126
|
+
|
|
127
|
+
By default, audio goes to Groq for fast transcription. Want to keep it local?
|
|
128
|
+
|
|
129
|
+
```
|
|
130
|
+
speaksy> /config
|
|
131
|
+
# Select "Privacy mode" -> "local"
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
Local mode uses [faster-whisper](https://github.com/SYSTRAN/faster-whisper) on your CPU. Slower (~3-5s) but your voice never leaves your machine.
|
|
135
|
+
|
|
136
|
+
---
|
|
137
|
+
|
|
138
|
+
## Troubleshooting
|
|
139
|
+
|
|
140
|
+
<details>
|
|
141
|
+
<summary><strong>No audio input detected</strong></summary>
|
|
142
|
+
|
|
143
|
+
- Check your mic is connected
|
|
144
|
+
- Run `arecord -l` to list audio devices
|
|
145
|
+
</details>
|
|
146
|
+
|
|
147
|
+
<details>
|
|
148
|
+
<summary><strong>Text not appearing</strong></summary>
|
|
149
|
+
|
|
150
|
+
- Install dependencies: `sudo apt install xclip xdotool`
|
|
151
|
+
- Some pure Wayland apps may not work with xdotool
|
|
152
|
+
</details>
|
|
153
|
+
|
|
154
|
+
<details>
|
|
155
|
+
<summary><strong>Service won't start</strong></summary>
|
|
156
|
+
|
|
157
|
+
- Check logs: run `speaksy` then `/logs`
|
|
158
|
+
- Verify API key at console.groq.com
|
|
159
|
+
</details>
|
|
160
|
+
|
|
161
|
+
---
|
|
162
|
+
|
|
163
|
+
## Uninstall
|
|
164
|
+
|
|
165
|
+
```bash
|
|
166
|
+
# Stop service
|
|
167
|
+
speaksy
|
|
168
|
+
# > /stop
|
|
169
|
+
# > /quit
|
|
170
|
+
|
|
171
|
+
# Remove package
|
|
172
|
+
pipx uninstall speaksy
|
|
173
|
+
|
|
174
|
+
# Remove config (optional)
|
|
175
|
+
rm -rf ~/.config/speaksy
|
|
176
|
+
rm ~/.config/systemd/user/speaksy.service
|
|
177
|
+
systemctl --user daemon-reload
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
---
|
|
181
|
+
|
|
182
|
+
## Tech Stack
|
|
183
|
+
|
|
184
|
+
- **STT:** [Groq Whisper API](https://groq.com) / [faster-whisper](https://github.com/SYSTRAN/faster-whisper)
|
|
185
|
+
- **LLM:** Llama 3.1 8B (via Groq) for text cleanup
|
|
186
|
+
- **Audio:** [sounddevice](https://python-sounddevice.readthedocs.io/)
|
|
187
|
+
- **Hotkeys:** [pynput](https://pynput.readthedocs.io/)
|
|
188
|
+
- **CLI:** [Rich](https://rich.readthedocs.io/)
|
|
189
|
+
|
|
190
|
+
---
|
|
191
|
+
|
|
192
|
+
## Contributing
|
|
193
|
+
|
|
194
|
+
PRs and issues welcome!
|
|
195
|
+
|
|
196
|
+
<a href="https://github.com/oneKn8/speaksy/issues">Report Bug</a>
|
|
197
|
+
·
|
|
198
|
+
<a href="https://github.com/oneKn8/speaksy/issues">Request Feature</a>
|
|
199
|
+
|
|
200
|
+
---
|
|
201
|
+
|
|
202
|
+
## License
|
|
203
|
+
|
|
204
|
+
MIT - do whatever you want with it.
|
|
205
|
+
|
|
206
|
+
---
|
|
207
|
+
|
|
208
|
+
<p align="center">
|
|
209
|
+
<sub>Built with caffeine and voice commands</sub>
|
|
210
|
+
</p>
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
# Speaksy Design Document
|
|
2
|
+
|
|
3
|
+
**Date:** 2026-02-05
|
|
4
|
+
**Status:** Approved
|
|
5
|
+
|
|
6
|
+
## Overview
|
|
7
|
+
|
|
8
|
+
Speaksy is a voice typing tool for Linux. Users speak, and their words appear wherever they're typing.
|
|
9
|
+
|
|
10
|
+
**Tagline:** talk it. type it. ship it.
|
|
11
|
+
|
|
12
|
+
## Installation
|
|
13
|
+
|
|
14
|
+
```bash
|
|
15
|
+
pipx install speaksy # recommended
|
|
16
|
+
pip3 install speaksy # alternative
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
## User Experience
|
|
20
|
+
|
|
21
|
+
Single command `speaksy` opens an interactive CLI with slash commands:
|
|
22
|
+
|
|
23
|
+
- `/setup` - Configure API key, hotkeys, install service
|
|
24
|
+
- `/start` - Start voice typing service
|
|
25
|
+
- `/stop` - Stop voice typing service
|
|
26
|
+
- `/status` - Show current status
|
|
27
|
+
- `/logs` - View recent activity
|
|
28
|
+
- `/config` - Edit settings
|
|
29
|
+
- `/help` - Show all commands
|
|
30
|
+
- `/quit` - Exit CLI (service keeps running)
|
|
31
|
+
|
|
32
|
+
### First Run
|
|
33
|
+
|
|
34
|
+
On first run (no config), auto-triggers `/setup` wizard:
|
|
35
|
+
|
|
36
|
+
1. Check system deps (xclip, xdotool)
|
|
37
|
+
2. Prompt for Groq API key
|
|
38
|
+
3. Validate key works
|
|
39
|
+
4. Optional: customize hotkeys
|
|
40
|
+
5. Install systemd user service
|
|
41
|
+
6. Start service
|
|
42
|
+
|
|
43
|
+
### UI Style
|
|
44
|
+
|
|
45
|
+
Gen-Z friendly with colors and personality:
|
|
46
|
+
- Status messages: "vibing", "sleeping", "dead"
|
|
47
|
+
- Helpful with links and next steps
|
|
48
|
+
- Error messages are friendly, not scary
|
|
49
|
+
|
|
50
|
+
## Architecture
|
|
51
|
+
|
|
52
|
+
### Project Structure
|
|
53
|
+
|
|
54
|
+
```
|
|
55
|
+
speaksy/
|
|
56
|
+
├── pyproject.toml
|
|
57
|
+
├── README.md
|
|
58
|
+
├── LICENSE
|
|
59
|
+
└── src/
|
|
60
|
+
└── speaksy/
|
|
61
|
+
├── __init__.py
|
|
62
|
+
├── __main__.py
|
|
63
|
+
├── cli.py
|
|
64
|
+
├── core.py
|
|
65
|
+
├── config.py
|
|
66
|
+
├── service.py
|
|
67
|
+
└── setup_wizard.py
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
### Config Location
|
|
71
|
+
|
|
72
|
+
- Config: `~/.config/speaksy/config.yaml`
|
|
73
|
+
- API key: `~/.config/speaksy/.env`
|
|
74
|
+
- Service: `~/.config/systemd/user/speaksy.service`
|
|
75
|
+
|
|
76
|
+
### Runtime
|
|
77
|
+
|
|
78
|
+
Always runs as systemd user service. CLI is for management only.
|
|
79
|
+
|
|
80
|
+
## Features
|
|
81
|
+
|
|
82
|
+
### Transcription
|
|
83
|
+
|
|
84
|
+
- **Primary:** Groq Whisper API (fast, <1s)
|
|
85
|
+
- **Fallback:** Local faster-whisper (offline, ~3-5s)
|
|
86
|
+
- **Cleanup:** LLM post-processing for grammar/filler words
|
|
87
|
+
|
|
88
|
+
### Hotkeys
|
|
89
|
+
|
|
90
|
+
- Push-to-talk: Hold Right Ctrl (default)
|
|
91
|
+
- Toggle mode: Press F8 (default)
|
|
92
|
+
- Customizable via `/config`
|
|
93
|
+
|
|
94
|
+
### Privacy Mode
|
|
95
|
+
|
|
96
|
+
Local-only option for users who don't want cloud:
|
|
97
|
+
- Uses faster-whisper on CPU
|
|
98
|
+
- Voice never leaves the machine
|
|
99
|
+
- Slower but private
|
|
100
|
+
|
|
101
|
+
### Error Handling
|
|
102
|
+
|
|
103
|
+
- Invalid API key: Warn user, fall back to local
|
|
104
|
+
- Expired key: Same behavior
|
|
105
|
+
- Network down: Silent fallback to local
|
|
106
|
+
- Rate limited: Wait, retry, fallback
|
|
107
|
+
|
|
108
|
+
### Tray Icon
|
|
109
|
+
|
|
110
|
+
- Green: Groq API working
|
|
111
|
+
- Yellow: Using local fallback
|
|
112
|
+
- Red: Both failed
|
|
113
|
+
|
|
114
|
+
## CLI Commands Detail
|
|
115
|
+
|
|
116
|
+
### /setup
|
|
117
|
+
Interactive wizard for first-time configuration.
|
|
118
|
+
|
|
119
|
+
### /start
|
|
120
|
+
```
|
|
121
|
+
speaksy> /start
|
|
122
|
+
🚀 lesss gooo! speaksy is now listening...
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
### /stop
|
|
126
|
+
```
|
|
127
|
+
speaksy> /stop
|
|
128
|
+
😴 aight speaksy is taking a nap
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
### /status
|
|
132
|
+
```
|
|
133
|
+
speaksy> /status
|
|
134
|
+
📊 the vibe check:
|
|
135
|
+
├─ service: running for 2h 34m
|
|
136
|
+
├─ api key: configured ✓
|
|
137
|
+
└─ mode: cloud (groq)
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
### /config
|
|
141
|
+
Interactive menu for:
|
|
142
|
+
1. API key
|
|
143
|
+
2. Hotkeys
|
|
144
|
+
3. Privacy mode (cloud/local)
|
|
145
|
+
4. Text cleanup on/off
|
|
146
|
+
|
|
147
|
+
### /logs
|
|
148
|
+
Shows recent transcription activity and errors.
|
|
149
|
+
|
|
150
|
+
## Dependencies
|
|
151
|
+
|
|
152
|
+
### Python
|
|
153
|
+
- faster-whisper
|
|
154
|
+
- sounddevice
|
|
155
|
+
- numpy
|
|
156
|
+
- pynput
|
|
157
|
+
- pystray
|
|
158
|
+
- Pillow
|
|
159
|
+
- PyYAML
|
|
160
|
+
- httpx
|
|
161
|
+
- python-dotenv
|
|
162
|
+
- rich (for CLI styling)
|
|
163
|
+
|
|
164
|
+
### System
|
|
165
|
+
- xclip
|
|
166
|
+
- xdotool
|
|
167
|
+
- Python 3.10+
|
|
168
|
+
|
|
169
|
+
## Success Metrics
|
|
170
|
+
|
|
171
|
+
- Simple install: 2 commands max
|
|
172
|
+
- Setup time: <60 seconds
|
|
173
|
+
- First transcription: <30 seconds after setup
|