voxa-code 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- voxa_code-0.1.0/LICENSE +21 -0
- voxa_code-0.1.0/MANIFEST.in +14 -0
- voxa_code-0.1.0/PKG-INFO +227 -0
- voxa_code-0.1.0/README.md +185 -0
- voxa_code-0.1.0/pyproject.toml +72 -0
- voxa_code-0.1.0/requirements.txt +12 -0
- voxa_code-0.1.0/server/__init__.py +0 -0
- voxa_code-0.1.0/server/apns.py +89 -0
- voxa_code-0.1.0/server/app.py +589 -0
- voxa_code-0.1.0/server/appattest.py +310 -0
- voxa_code-0.1.0/server/appstore.py +141 -0
- voxa_code-0.1.0/server/attested_store.py +60 -0
- voxa_code-0.1.0/server/auth.py +70 -0
- voxa_code-0.1.0/server/ax_controller.py +202 -0
- voxa_code-0.1.0/server/billing.py +177 -0
- voxa_code-0.1.0/server/call_manager.py +91 -0
- voxa_code-0.1.0/server/certs/AppleRootCA-G3.pem +15 -0
- voxa_code-0.1.0/server/certs/Apple_App_Attestation_Root_CA.pem +14 -0
- voxa_code-0.1.0/server/claude_controller.py +156 -0
- voxa_code-0.1.0/server/cli.py +365 -0
- voxa_code-0.1.0/server/cloud_app.py +345 -0
- voxa_code-0.1.0/server/config.py +56 -0
- voxa_code-0.1.0/server/device_registry.py +52 -0
- voxa_code-0.1.0/server/gemini_operator.py +677 -0
- voxa_code-0.1.0/server/hooks.py +202 -0
- voxa_code-0.1.0/server/orchestrator.py +315 -0
- voxa_code-0.1.0/server/push_routes.py +50 -0
- voxa_code-0.1.0/server/ratelimit.py +41 -0
- voxa_code-0.1.0/server/relay.py +157 -0
- voxa_code-0.1.0/server/relay_client.py +89 -0
- voxa_code-0.1.0/server/remote_operator.py +128 -0
- voxa_code-0.1.0/server/session_hub.py +33 -0
- voxa_code-0.1.0/server/terminal_watcher.py +241 -0
- voxa_code-0.1.0/server/terminals.py +510 -0
- voxa_code-0.1.0/server/tmux_controller.py +580 -0
- voxa_code-0.1.0/server/transcript_monitor.py +134 -0
- voxa_code-0.1.0/server/transcripts.py +143 -0
- voxa_code-0.1.0/server/users.py +90 -0
- voxa_code-0.1.0/server/voxa_cloud.py +132 -0
- voxa_code-0.1.0/server/waitlist.py +130 -0
- voxa_code-0.1.0/setup.cfg +4 -0
- voxa_code-0.1.0/static/app.js +388 -0
- voxa_code-0.1.0/static/favicon.svg +1 -0
- voxa_code-0.1.0/static/index.html +253 -0
- voxa_code-0.1.0/static/pcm-worklet.js +69 -0
- voxa_code-0.1.0/static/pro.html +29 -0
- voxa_code-0.1.0/static/pro2.html +33 -0
- voxa_code-0.1.0/static/voxa-mark-white.svg +1 -0
- voxa_code-0.1.0/voxa_code.egg-info/PKG-INFO +227 -0
- voxa_code-0.1.0/voxa_code.egg-info/SOURCES.txt +52 -0
- voxa_code-0.1.0/voxa_code.egg-info/dependency_links.txt +1 -0
- voxa_code-0.1.0/voxa_code.egg-info/entry_points.txt +2 -0
- voxa_code-0.1.0/voxa_code.egg-info/requires.txt +19 -0
- voxa_code-0.1.0/voxa_code.egg-info/top_level.txt +2 -0
voxa_code-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Voxa (Ti)
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
graft static
|
|
2
|
+
graft server
|
|
3
|
+
include README.md
|
|
4
|
+
include LICENSE
|
|
5
|
+
include requirements.txt
|
|
6
|
+
prune tests
|
|
7
|
+
prune ios
|
|
8
|
+
prune web
|
|
9
|
+
prune docs
|
|
10
|
+
prune deploy
|
|
11
|
+
global-exclude __pycache__
|
|
12
|
+
global-exclude *.py[cod]
|
|
13
|
+
global-exclude *.so
|
|
14
|
+
global-exclude .DS_Store
|
voxa_code-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: voxa-code
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Hands-free voice operator for Claude Code
|
|
5
|
+
Author-email: Ti <voxa@voxa.space>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://voxa.space
|
|
8
|
+
Project-URL: Repository, https://github.com/Ti-03/voxa
|
|
9
|
+
Project-URL: Issues, https://github.com/Ti-03/voxa/issues
|
|
10
|
+
Keywords: claude,claude-code,voice,cli,agent,gemini
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Environment :: Console
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Operating System :: OS Independent
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
20
|
+
Classifier: Topic :: Software Development
|
|
21
|
+
Classifier: Topic :: Utilities
|
|
22
|
+
Requires-Python: >=3.11
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
License-File: LICENSE
|
|
25
|
+
Requires-Dist: fastapi>=0.110
|
|
26
|
+
Requires-Dist: uvicorn[standard]>=0.29
|
|
27
|
+
Requires-Dist: websockets>=12
|
|
28
|
+
Requires-Dist: google-genai>=0.3
|
|
29
|
+
Requires-Dist: claude-agent-sdk>=0.1
|
|
30
|
+
Requires-Dist: python-dotenv>=1.0
|
|
31
|
+
Requires-Dist: pyjwt[crypto]>=2.8
|
|
32
|
+
Requires-Dist: httpx[http2]>=0.27
|
|
33
|
+
Requires-Dist: qrcode>=7.4
|
|
34
|
+
Requires-Dist: cbor2>=5.6
|
|
35
|
+
Requires-Dist: pyobjc-framework-Quartz>=10; sys_platform == "darwin"
|
|
36
|
+
Requires-Dist: pyobjc-framework-ApplicationServices>=10; sys_platform == "darwin"
|
|
37
|
+
Provides-Extra: dev
|
|
38
|
+
Requires-Dist: pytest>=8; extra == "dev"
|
|
39
|
+
Requires-Dist: pytest-asyncio>=0.23; extra == "dev"
|
|
40
|
+
Requires-Dist: httpx>=0.27; extra == "dev"
|
|
41
|
+
Dynamic: license-file
|
|
42
|
+
|
|
43
|
+
# Voxa
|
|
44
|
+
|
|
45
|
+
Voxa lets you call into your laptop from a phone browser, talk to a Gemini Live
|
|
46
|
+
"operator," and have it drive Claude Code by voice.
|
|
47
|
+
|
|
48
|
+
**MVP scope (drive mode only):** pick a working directory by voice, send spoken
|
|
49
|
+
instructions, hear Claude's final result read back. Attach mode, voice
|
|
50
|
+
folder-browsing, and barge-in interruption are V2 backlog items (see
|
|
51
|
+
`docs/superpowers/specs/2026-06-27-loop-design.md` and
|
|
52
|
+
`docs/superpowers/plans/2026-06-27-loop-mvp.md`).
|
|
53
|
+
|
|
54
|
+
---
|
|
55
|
+
|
|
56
|
+
## Prerequisites
|
|
57
|
+
|
|
58
|
+
- **Python 3.11+** on the laptop.
|
|
59
|
+
- **Tailscale** installed and logged in on both the laptop and the phone (free
|
|
60
|
+
personal plan is fine). The phone must be on the same tailnet as the laptop,
|
|
61
|
+
or MagicDNS must be enabled.
|
|
62
|
+
- **A Gemini API key** from [Google AI Studio](https://aistudio.google.com/live)
|
|
63
|
+
with Gemini Live access.
|
|
64
|
+
- **Claude Code logged in** on the laptop (`claude` CLI authenticated). The
|
|
65
|
+
agent SDK reuses your existing Claude Code credentials; no separate
|
|
66
|
+
`ANTHROPIC_API_KEY` is needed unless you prefer to supply one.
|
|
67
|
+
|
|
68
|
+
---
|
|
69
|
+
|
|
70
|
+
## Quickstart
|
|
71
|
+
|
|
72
|
+
Install Voxa on the laptop you want to control with one command.
|
|
73
|
+
|
|
74
|
+
**macOS / Linux:**
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
curl -fsSL https://voxa.space/install.sh | sh
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
**Windows (PowerShell):**
|
|
81
|
+
|
|
82
|
+
```powershell
|
|
83
|
+
irm https://voxa.space/install.ps1 | iex
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
Prefer a package runner? These work on any OS:
|
|
87
|
+
|
|
88
|
+
```bash
|
|
89
|
+
npx voxa-code # Node users
|
|
90
|
+
uvx voxa-code # Python users (or: pipx install voxa-code)
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
Then start it:
|
|
94
|
+
|
|
95
|
+
```bash
|
|
96
|
+
voxa
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
Voxa is zero-config by default: it uses the hosted relay, so there are no API
|
|
100
|
+
keys to set up. `voxa` starts the server and prints a pairing QR code. Scan it
|
|
101
|
+
with the Voxa phone app (or open the printed URL in your phone browser) to
|
|
102
|
+
connect.
|
|
103
|
+
|
|
104
|
+
---
|
|
105
|
+
|
|
106
|
+
## Develop from source
|
|
107
|
+
|
|
108
|
+
Contributors who want to hack on Voxa can run it from a checkout instead of the
|
|
109
|
+
published package.
|
|
110
|
+
|
|
111
|
+
### 1. Create and activate the virtual environment
|
|
112
|
+
|
|
113
|
+
```bash
|
|
114
|
+
python3 -m venv .venv
|
|
115
|
+
.venv/bin/pip install -e ".[dev]"
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
### 2. Configure secrets
|
|
119
|
+
|
|
120
|
+
```bash
|
|
121
|
+
cp .env.example .env
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
Open `.env` and fill in:
|
|
125
|
+
|
|
126
|
+
| Key | Value |
|
|
127
|
+
|---|---|
|
|
128
|
+
| `GEMINI_API_KEY` | Your Google AI Studio key |
|
|
129
|
+
| `VOXA_AUTH_TOKEN` | Any random secret string (protects the WebSocket endpoint on your tailnet) |
|
|
130
|
+
|
|
131
|
+
`GEMINI_LIVE_MODEL`, `VOXA_HOST`, and `VOXA_PORT` have sensible defaults and
|
|
132
|
+
can be left as-is.
|
|
133
|
+
|
|
134
|
+
### 3. Start the server
|
|
135
|
+
|
|
136
|
+
```bash
|
|
137
|
+
bash scripts/serve.sh
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
The script:
|
|
141
|
+
1. Starts the Voxa FastAPI server on `127.0.0.1:8787` (or `$VOXA_PORT`).
|
|
142
|
+
2. Calls `tailscale serve` to expose it over HTTPS on your tailnet (required
|
|
143
|
+
because the phone browser needs a secure context for microphone access).
|
|
144
|
+
3. Prints the full HTTPS URL including your auth token.
|
|
145
|
+
|
|
146
|
+
### 4. Connect from the phone
|
|
147
|
+
|
|
148
|
+
Open the printed URL on your phone browser. Tap **Connect**, then speak.
|
|
149
|
+
|
|
150
|
+
---
|
|
151
|
+
|
|
152
|
+
## Architecture (brief)
|
|
153
|
+
|
|
154
|
+
```
|
|
155
|
+
Phone browser (static/)
|
|
156
|
+
| HTTPS WebSocket (auth token required)
|
|
157
|
+
v
|
|
158
|
+
FastAPI server (server/app.py)
|
|
159
|
+
| audio bytes (16 kHz PCM)
|
|
160
|
+
v
|
|
161
|
+
GeminiOperator (server/gemini_operator.py) <--> Gemini Live API
|
|
162
|
+
| tool calls (start_claude_session, send_to_claude, …)
|
|
163
|
+
v
|
|
164
|
+
Orchestrator (server/orchestrator.py)
|
|
165
|
+
|
|
|
166
|
+
v
|
|
167
|
+
ClaudeController (server/claude_controller.py) --> Claude Code (agent SDK, bypassPermissions)
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
Config is loaded from `.env` via `server/config.py`.
|
|
171
|
+
|
|
172
|
+
---
|
|
173
|
+
|
|
174
|
+
## Running the test suite
|
|
175
|
+
|
|
176
|
+
```bash
|
|
177
|
+
.venv/bin/python -m pytest -v
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
Expected: 22 tests pass, no warnings.
|
|
181
|
+
|
|
182
|
+
---
|
|
183
|
+
|
|
184
|
+
## Manual end-to-end smoke test
|
|
185
|
+
|
|
186
|
+
The smoke test requires a real phone, real Tailscale connectivity, and real API
|
|
187
|
+
keys. Run it against a scratch directory, not a real project.
|
|
188
|
+
|
|
189
|
+
**Before you start:**
|
|
190
|
+
- `.env` is fully filled in (real `GEMINI_API_KEY` and `VOXA_AUTH_TOKEN`).
|
|
191
|
+
- Tailscale is running on both the laptop and the phone.
|
|
192
|
+
- Claude Code is logged in on the laptop.
|
|
193
|
+
|
|
194
|
+
**Procedure:**
|
|
195
|
+
|
|
196
|
+
1. Open a terminal on the laptop and run:
|
|
197
|
+
```bash
|
|
198
|
+
bash scripts/serve.sh
|
|
199
|
+
```
|
|
200
|
+
Wait for the line `Voxa is live. On your phone open: https://...`
|
|
201
|
+
|
|
202
|
+
2. Copy the printed HTTPS URL (it already includes `?token=...`).
|
|
203
|
+
|
|
204
|
+
3. On the phone, open the URL in Safari or Chrome. You should see the Voxa
|
|
205
|
+
interface. Grant microphone permission when prompted.
|
|
206
|
+
|
|
207
|
+
4. Tap **Connect**. The button should change state to indicate an active
|
|
208
|
+
session.
|
|
209
|
+
|
|
210
|
+
5. Speak: "Start a session in `/tmp/loop-smoke` and create a file called
|
|
211
|
+
`hello.txt` that says hi."
|
|
212
|
+
|
|
213
|
+
6. **Verify:**
|
|
214
|
+
- Gemini acknowledges the instruction verbally (you hear a response through
|
|
215
|
+
the phone speaker).
|
|
216
|
+
- On the laptop terminal you see Claude Code start with `bypassPermissions`
|
|
217
|
+
active (no permission prompts appear).
|
|
218
|
+
- After Claude finishes, `/tmp/loop-smoke/hello.txt` exists on the laptop
|
|
219
|
+
and contains `hi`.
|
|
220
|
+
- Gemini speaks the final result back to you.
|
|
221
|
+
|
|
222
|
+
7. To stop: press Ctrl-C in the laptop terminal. The `trap` in `serve.sh` will
|
|
223
|
+
kill the server and tear down `tailscale serve`.
|
|
224
|
+
|
|
225
|
+
**Warning:** Use a throwaway scratch directory (like `/tmp/loop-smoke`) for
|
|
226
|
+
your first smoke test. Claude Code runs with `bypassPermissions`, so it will
|
|
227
|
+
write files without asking.
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
# Voxa
|
|
2
|
+
|
|
3
|
+
Voxa lets you call into your laptop from a phone browser, talk to a Gemini Live
|
|
4
|
+
"operator," and have it drive Claude Code by voice.
|
|
5
|
+
|
|
6
|
+
**MVP scope (drive mode only):** pick a working directory by voice, send spoken
|
|
7
|
+
instructions, hear Claude's final result read back. Attach mode, voice
|
|
8
|
+
folder-browsing, and barge-in interruption are V2 backlog items (see
|
|
9
|
+
`docs/superpowers/specs/2026-06-27-loop-design.md` and
|
|
10
|
+
`docs/superpowers/plans/2026-06-27-loop-mvp.md`).
|
|
11
|
+
|
|
12
|
+
---
|
|
13
|
+
|
|
14
|
+
## Prerequisites
|
|
15
|
+
|
|
16
|
+
- **Python 3.11+** on the laptop.
|
|
17
|
+
- **Tailscale** installed and logged in on both the laptop and the phone (free
|
|
18
|
+
personal plan is fine). The phone must be on the same tailnet as the laptop,
|
|
19
|
+
or MagicDNS must be enabled.
|
|
20
|
+
- **A Gemini API key** from [Google AI Studio](https://aistudio.google.com/live)
|
|
21
|
+
with Gemini Live access.
|
|
22
|
+
- **Claude Code logged in** on the laptop (`claude` CLI authenticated). The
|
|
23
|
+
agent SDK reuses your existing Claude Code credentials; no separate
|
|
24
|
+
`ANTHROPIC_API_KEY` is needed unless you prefer to supply one.
|
|
25
|
+
|
|
26
|
+
---
|
|
27
|
+
|
|
28
|
+
## Quickstart
|
|
29
|
+
|
|
30
|
+
Install Voxa on the laptop you want to control with one command.
|
|
31
|
+
|
|
32
|
+
**macOS / Linux:**
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
curl -fsSL https://voxa.space/install.sh | sh
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
**Windows (PowerShell):**
|
|
39
|
+
|
|
40
|
+
```powershell
|
|
41
|
+
irm https://voxa.space/install.ps1 | iex
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
Prefer a package runner? These work on any OS:
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
npx voxa-code # Node users
|
|
48
|
+
uvx voxa-code # Python users (or: pipx install voxa-code)
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
Then start it:
|
|
52
|
+
|
|
53
|
+
```bash
|
|
54
|
+
voxa
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
Voxa is zero-config by default: it uses the hosted relay, so there are no API
|
|
58
|
+
keys to set up. `voxa` starts the server and prints a pairing QR code. Scan it
|
|
59
|
+
with the Voxa phone app (or open the printed URL in your phone browser) to
|
|
60
|
+
connect.
|
|
61
|
+
|
|
62
|
+
---
|
|
63
|
+
|
|
64
|
+
## Develop from source
|
|
65
|
+
|
|
66
|
+
Contributors who want to hack on Voxa can run it from a checkout instead of the
|
|
67
|
+
published package.
|
|
68
|
+
|
|
69
|
+
### 1. Create and activate the virtual environment
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
python3 -m venv .venv
|
|
73
|
+
.venv/bin/pip install -e ".[dev]"
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
### 2. Configure secrets
|
|
77
|
+
|
|
78
|
+
```bash
|
|
79
|
+
cp .env.example .env
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
Open `.env` and fill in:
|
|
83
|
+
|
|
84
|
+
| Key | Value |
|
|
85
|
+
|---|---|
|
|
86
|
+
| `GEMINI_API_KEY` | Your Google AI Studio key |
|
|
87
|
+
| `VOXA_AUTH_TOKEN` | Any random secret string (protects the WebSocket endpoint on your tailnet) |
|
|
88
|
+
|
|
89
|
+
`GEMINI_LIVE_MODEL`, `VOXA_HOST`, and `VOXA_PORT` have sensible defaults and
|
|
90
|
+
can be left as-is.
|
|
91
|
+
|
|
92
|
+
### 3. Start the server
|
|
93
|
+
|
|
94
|
+
```bash
|
|
95
|
+
bash scripts/serve.sh
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
The script:
|
|
99
|
+
1. Starts the Voxa FastAPI server on `127.0.0.1:8787` (or `$VOXA_PORT`).
|
|
100
|
+
2. Calls `tailscale serve` to expose it over HTTPS on your tailnet (required
|
|
101
|
+
because the phone browser needs a secure context for microphone access).
|
|
102
|
+
3. Prints the full HTTPS URL including your auth token.
|
|
103
|
+
|
|
104
|
+
### 4. Connect from the phone
|
|
105
|
+
|
|
106
|
+
Open the printed URL on your phone browser. Tap **Connect**, then speak.
|
|
107
|
+
|
|
108
|
+
---
|
|
109
|
+
|
|
110
|
+
## Architecture (brief)
|
|
111
|
+
|
|
112
|
+
```
|
|
113
|
+
Phone browser (static/)
|
|
114
|
+
| HTTPS WebSocket (auth token required)
|
|
115
|
+
v
|
|
116
|
+
FastAPI server (server/app.py)
|
|
117
|
+
| audio bytes (16 kHz PCM)
|
|
118
|
+
v
|
|
119
|
+
GeminiOperator (server/gemini_operator.py) <--> Gemini Live API
|
|
120
|
+
| tool calls (start_claude_session, send_to_claude, …)
|
|
121
|
+
v
|
|
122
|
+
Orchestrator (server/orchestrator.py)
|
|
123
|
+
|
|
|
124
|
+
v
|
|
125
|
+
ClaudeController (server/claude_controller.py) --> Claude Code (agent SDK, bypassPermissions)
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
Config is loaded from `.env` via `server/config.py`.
|
|
129
|
+
|
|
130
|
+
---
|
|
131
|
+
|
|
132
|
+
## Running the test suite
|
|
133
|
+
|
|
134
|
+
```bash
|
|
135
|
+
.venv/bin/python -m pytest -v
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
Expected: 22 tests pass, no warnings.
|
|
139
|
+
|
|
140
|
+
---
|
|
141
|
+
|
|
142
|
+
## Manual end-to-end smoke test
|
|
143
|
+
|
|
144
|
+
The smoke test requires a real phone, real Tailscale connectivity, and real API
|
|
145
|
+
keys. Run it against a scratch directory, not a real project.
|
|
146
|
+
|
|
147
|
+
**Before you start:**
|
|
148
|
+
- `.env` is fully filled in (real `GEMINI_API_KEY` and `VOXA_AUTH_TOKEN`).
|
|
149
|
+
- Tailscale is running on both the laptop and the phone.
|
|
150
|
+
- Claude Code is logged in on the laptop.
|
|
151
|
+
|
|
152
|
+
**Procedure:**
|
|
153
|
+
|
|
154
|
+
1. Open a terminal on the laptop and run:
|
|
155
|
+
```bash
|
|
156
|
+
bash scripts/serve.sh
|
|
157
|
+
```
|
|
158
|
+
Wait for the line `Voxa is live. On your phone open: https://...`
|
|
159
|
+
|
|
160
|
+
2. Copy the printed HTTPS URL (it already includes `?token=...`).
|
|
161
|
+
|
|
162
|
+
3. On the phone, open the URL in Safari or Chrome. You should see the Voxa
|
|
163
|
+
interface. Grant microphone permission when prompted.
|
|
164
|
+
|
|
165
|
+
4. Tap **Connect**. The button should change state to indicate an active
|
|
166
|
+
session.
|
|
167
|
+
|
|
168
|
+
5. Speak: "Start a session in `/tmp/loop-smoke` and create a file called
|
|
169
|
+
`hello.txt` that says hi."
|
|
170
|
+
|
|
171
|
+
6. **Verify:**
|
|
172
|
+
- Gemini acknowledges the instruction verbally (you hear a response through
|
|
173
|
+
the phone speaker).
|
|
174
|
+
- On the laptop terminal you see Claude Code start with `bypassPermissions`
|
|
175
|
+
active (no permission prompts appear).
|
|
176
|
+
- After Claude finishes, `/tmp/loop-smoke/hello.txt` exists on the laptop
|
|
177
|
+
and contains `hi`.
|
|
178
|
+
- Gemini speaks the final result back to you.
|
|
179
|
+
|
|
180
|
+
7. To stop: press Ctrl-C in the laptop terminal. The `trap` in `serve.sh` will
|
|
181
|
+
kill the server and tear down `tailscale serve`.
|
|
182
|
+
|
|
183
|
+
**Warning:** Use a throwaway scratch directory (like `/tmp/loop-smoke`) for
|
|
184
|
+
your first smoke test. Claude Code runs with `bypassPermissions`, so it will
|
|
185
|
+
write files without asking.
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "voxa-code"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "Hands-free voice operator for Claude Code"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
requires-python = ">=3.11"
|
|
7
|
+
license = { text = "MIT" }
|
|
8
|
+
authors = [{ name = "Ti", email = "voxa@voxa.space" }]
|
|
9
|
+
keywords = ["claude", "claude-code", "voice", "cli", "agent", "gemini"]
|
|
10
|
+
classifiers = [
|
|
11
|
+
"Development Status :: 4 - Beta",
|
|
12
|
+
"Environment :: Console",
|
|
13
|
+
"Intended Audience :: Developers",
|
|
14
|
+
"License :: OSI Approved :: MIT License",
|
|
15
|
+
"Operating System :: OS Independent",
|
|
16
|
+
"Programming Language :: Python :: 3",
|
|
17
|
+
"Programming Language :: Python :: 3.11",
|
|
18
|
+
"Programming Language :: Python :: 3.12",
|
|
19
|
+
"Programming Language :: Python :: 3.13",
|
|
20
|
+
"Topic :: Software Development",
|
|
21
|
+
"Topic :: Utilities",
|
|
22
|
+
]
|
|
23
|
+
dependencies = [
|
|
24
|
+
"fastapi>=0.110",
|
|
25
|
+
"uvicorn[standard]>=0.29",
|
|
26
|
+
"websockets>=12",
|
|
27
|
+
"google-genai>=0.3",
|
|
28
|
+
"claude-agent-sdk>=0.1",
|
|
29
|
+
"python-dotenv>=1.0",
|
|
30
|
+
"pyjwt[crypto]>=2.8",
|
|
31
|
+
"httpx[http2]>=0.27",
|
|
32
|
+
"qrcode>=7.4",
|
|
33
|
+
"cbor2>=5.6",
|
|
34
|
+
"pyobjc-framework-Quartz>=10; sys_platform == 'darwin'",
|
|
35
|
+
"pyobjc-framework-ApplicationServices>=10; sys_platform == 'darwin'",
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
[project.optional-dependencies]
|
|
39
|
+
dev = ["pytest>=8", "pytest-asyncio>=0.23", "httpx>=0.27"]
|
|
40
|
+
|
|
41
|
+
[project.urls]
|
|
42
|
+
Homepage = "https://voxa.space"
|
|
43
|
+
Repository = "https://github.com/Ti-03/voxa"
|
|
44
|
+
Issues = "https://github.com/Ti-03/voxa/issues"
|
|
45
|
+
|
|
46
|
+
[project.scripts]
|
|
47
|
+
voxa = "server.cli:main"
|
|
48
|
+
|
|
49
|
+
[build-system]
|
|
50
|
+
requires = ["setuptools>=68"]
|
|
51
|
+
build-backend = "setuptools.build_meta"
|
|
52
|
+
|
|
53
|
+
[tool.setuptools]
|
|
54
|
+
# `static/` ships as a top-level data tree so that, once installed, it sits as a
|
|
55
|
+
# sibling of the `server` package (site-packages/static/). server/app.py resolves
|
|
56
|
+
# it via `Path(__file__).resolve().parent.parent / "static"`, i.e. relative to the
|
|
57
|
+
# installed package, so it must live next to `server`, not inside it.
|
|
58
|
+
packages = ["server", "static"]
|
|
59
|
+
include-package-data = true
|
|
60
|
+
|
|
61
|
+
[tool.setuptools.package-data]
|
|
62
|
+
static = ["**/*"]
|
|
63
|
+
# Ship Apple's StoreKit root cert so purchase verification works out of the box.
|
|
64
|
+
server = ["certs/*.pem"]
|
|
65
|
+
|
|
66
|
+
[tool.pytest.ini_options]
|
|
67
|
+
asyncio_mode = "auto"
|
|
68
|
+
testpaths = ["tests"]
|
|
69
|
+
filterwarnings = [
|
|
70
|
+
# Third-party deprecation from Starlette's TestClient (recommends httpx2); not our code.
|
|
71
|
+
"ignore:Using `httpx` with `starlette.testclient` is deprecated",
|
|
72
|
+
]
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
fastapi>=0.110
|
|
2
|
+
uvicorn[standard]>=0.29
|
|
3
|
+
websockets>=12
|
|
4
|
+
google-genai>=0.3
|
|
5
|
+
claude-agent-sdk>=0.1
|
|
6
|
+
python-dotenv>=1.0
|
|
7
|
+
pyjwt[crypto]>=2.8
|
|
8
|
+
httpx[http2]>=0.27
|
|
9
|
+
qrcode>=7.4
|
|
10
|
+
cbor2>=5.6
|
|
11
|
+
pyobjc-framework-Quartz>=10; sys_platform == "darwin"
|
|
12
|
+
pyobjc-framework-ApplicationServices>=10; sys_platform == "darwin"
|
|
File without changes
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import logging
|
|
5
|
+
|
|
6
|
+
import httpx
|
|
7
|
+
import jwt
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def build_apns_jwt(key_pem: str, key_id: str, team_id: str, issued_at: int) -> str:
|
|
13
|
+
return jwt.encode(
|
|
14
|
+
{"iss": team_id, "iat": issued_at},
|
|
15
|
+
key_pem,
|
|
16
|
+
algorithm="ES256",
|
|
17
|
+
headers={"alg": "ES256", "kid": key_id},
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def build_voip_payload(call_id: str, summary: str) -> dict:
|
|
22
|
+
return {"call_id": call_id, "summary": summary, "aps": {"content-available": 1}}
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def build_cancel_payload(call_id: str) -> dict:
|
|
26
|
+
return {"call_id": call_id, "type": "cancel", "aps": {"content-available": 1}}
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class ApnsClient:
|
|
30
|
+
"""Sends VoIP pushes via APNs HTTP/2. One per server process."""
|
|
31
|
+
|
|
32
|
+
PROD_HOST = "https://api.push.apple.com"
|
|
33
|
+
SANDBOX_HOST = "https://api.sandbox.push.apple.com"
|
|
34
|
+
|
|
35
|
+
def __init__(self, config, now_fn=None):
|
|
36
|
+
self._cfg = config
|
|
37
|
+
# Xcode/dev-signed builds get sandbox push tokens, which only work
|
|
38
|
+
# against the sandbox host; TestFlight/App Store builds use production.
|
|
39
|
+
self._host = self.SANDBOX_HOST if getattr(config, "apns_sandbox", False) else self.PROD_HOST
|
|
40
|
+
import time
|
|
41
|
+
self._now = now_fn or (lambda: int(time.time()))
|
|
42
|
+
self._jwt = ""
|
|
43
|
+
self._jwt_at = 0
|
|
44
|
+
|
|
45
|
+
def _token(self) -> str:
|
|
46
|
+
now = self._now()
|
|
47
|
+
if not self._jwt or now - self._jwt_at > 50 * 60:
|
|
48
|
+
# Prefer the key contents (set as a secret on container hosts); fall
|
|
49
|
+
# back to a file path for local/dev use.
|
|
50
|
+
key_pem = getattr(self._cfg, "apns_key", "") or open(self._cfg.apns_key_path).read()
|
|
51
|
+
self._jwt = build_apns_jwt(
|
|
52
|
+
key_pem, self._cfg.apns_key_id, self._cfg.apns_team_id, now
|
|
53
|
+
)
|
|
54
|
+
self._jwt_at = now
|
|
55
|
+
return self._jwt
|
|
56
|
+
|
|
57
|
+
async def send_voip(self, device_token: str, call_id: str, summary: str) -> bool | int:
|
|
58
|
+
"""Send a VoIP ring. Returns True on success, or the HTTP status code on
|
|
59
|
+
failure (so the caller can prune a 410 Gone / dead token)."""
|
|
60
|
+
url = f"{self._host}/3/device/{device_token}"
|
|
61
|
+
headers = {
|
|
62
|
+
"apns-topic": f"{self._cfg.apns_bundle_id}.voip",
|
|
63
|
+
"apns-push-type": "voip",
|
|
64
|
+
"apns-priority": "10",
|
|
65
|
+
"authorization": f"bearer {self._token()}",
|
|
66
|
+
}
|
|
67
|
+
payload = build_voip_payload(call_id, summary)
|
|
68
|
+
async with httpx.AsyncClient(http2=True, timeout=10) as client:
|
|
69
|
+
resp = await client.post(url, headers=headers, content=json.dumps(payload))
|
|
70
|
+
if resp.status_code != 200:
|
|
71
|
+
# 410 = the token is dead (app deleted/reinstalled); other codes are
|
|
72
|
+
# transient/config. Log the reason so silent no-rings are diagnosable.
|
|
73
|
+
logger.warning("APNs voip push failed: status=%s body=%s token=%s",
|
|
74
|
+
resp.status_code, resp.text[:200], device_token[:8])
|
|
75
|
+
return resp.status_code
|
|
76
|
+
return True
|
|
77
|
+
|
|
78
|
+
async def send_voip_cancel(self, device_token: str, call_id: str) -> bool:
|
|
79
|
+
url = f"{self._host}/3/device/{device_token}"
|
|
80
|
+
headers = {
|
|
81
|
+
"apns-topic": f"{self._cfg.apns_bundle_id}.voip",
|
|
82
|
+
"apns-push-type": "voip",
|
|
83
|
+
"apns-priority": "10",
|
|
84
|
+
"authorization": f"bearer {self._token()}",
|
|
85
|
+
}
|
|
86
|
+
payload = build_cancel_payload(call_id)
|
|
87
|
+
async with httpx.AsyncClient(http2=True, timeout=10) as client:
|
|
88
|
+
resp = await client.post(url, headers=headers, content=json.dumps(payload))
|
|
89
|
+
return resp.status_code == 200
|