voxa-code 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- server/__init__.py +0 -0
- server/apns.py +89 -0
- server/app.py +589 -0
- server/appattest.py +310 -0
- server/appstore.py +141 -0
- server/attested_store.py +60 -0
- server/auth.py +70 -0
- server/ax_controller.py +202 -0
- server/billing.py +177 -0
- server/call_manager.py +91 -0
- server/certs/AppleRootCA-G3.pem +15 -0
- server/certs/Apple_App_Attestation_Root_CA.pem +14 -0
- server/claude_controller.py +156 -0
- server/cli.py +365 -0
- server/cloud_app.py +345 -0
- server/config.py +56 -0
- server/device_registry.py +52 -0
- server/gemini_operator.py +677 -0
- server/hooks.py +202 -0
- server/orchestrator.py +315 -0
- server/push_routes.py +50 -0
- server/ratelimit.py +41 -0
- server/relay.py +157 -0
- server/relay_client.py +89 -0
- server/remote_operator.py +128 -0
- server/session_hub.py +33 -0
- server/terminal_watcher.py +241 -0
- server/terminals.py +510 -0
- server/tmux_controller.py +580 -0
- server/transcript_monitor.py +134 -0
- server/transcripts.py +143 -0
- server/users.py +90 -0
- server/voxa_cloud.py +132 -0
- server/waitlist.py +130 -0
- static/app.js +388 -0
- static/favicon.svg +1 -0
- static/index.html +253 -0
- static/pcm-worklet.js +69 -0
- static/pro.html +29 -0
- static/pro2.html +33 -0
- static/voxa-mark-white.svg +1 -0
- voxa_code-0.1.0.dist-info/METADATA +227 -0
- voxa_code-0.1.0.dist-info/RECORD +47 -0
- voxa_code-0.1.0.dist-info/WHEEL +5 -0
- voxa_code-0.1.0.dist-info/entry_points.txt +2 -0
- voxa_code-0.1.0.dist-info/licenses/LICENSE +21 -0
- voxa_code-0.1.0.dist-info/top_level.txt +2 -0
server/relay.py
ADDED
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
"""Self-hosted relay: pairs a phone and a laptop by a short code and pipes their
|
|
2
|
+
audio + control frames, so you don't need Tailscale or a per-laptop tunnel.
|
|
3
|
+
|
|
4
|
+
Topology (all on YOUR server):
|
|
5
|
+
- Laptop connects OUTBOUND to wss://you/agent?code=<code>&token=<relay-token>
|
|
6
|
+
- Phone connects to wss://you/ws?code=<code>&token=<pair-token>
|
|
7
|
+
- The relay matches them by `code` and copies frames both ways (bytes + text).
|
|
8
|
+
|
|
9
|
+
Because the laptop dials out, there's no NAT/inbound problem on the laptop side.
|
|
10
|
+
The relay is a dumb, encrypted pipe; the V2V brain + Claude stay where they are.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import asyncio
|
|
16
|
+
import contextlib
|
|
17
|
+
import json
|
|
18
|
+
import os
|
|
19
|
+
|
|
20
|
+
from fastapi import FastAPI, WebSocket, WebSocketDisconnect
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
async def _notify_down(agent: WebSocket) -> None:
|
|
24
|
+
with contextlib.suppress(Exception):
|
|
25
|
+
await agent.send_text('{"type":"__peer","state":"down"}')
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
async def _notify_phone_offline(phone: WebSocket) -> None:
|
|
29
|
+
"""Tell the phone the laptop isn't bridged for its code, so it can stop showing
|
|
30
|
+
a live session and prompt the user to re-scan (the laptop stopped or the saved
|
|
31
|
+
code is stale)."""
|
|
32
|
+
with contextlib.suppress(Exception):
|
|
33
|
+
await phone.send_text('{"type":"status","status":"laptop offline"}')
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
async def _notify_up(agent: WebSocket, account: str, voice: str = "") -> None:
|
|
37
|
+
"""Tell the laptop a phone is here, carrying the phone's account id (for metering)
|
|
38
|
+
and chosen voice. Both come from the phone's own connection, never from the QR."""
|
|
39
|
+
msg = {"type": "__peer", "state": "up"}
|
|
40
|
+
if account:
|
|
41
|
+
msg["account"] = account
|
|
42
|
+
if voice:
|
|
43
|
+
msg["voice"] = voice
|
|
44
|
+
with contextlib.suppress(Exception):
|
|
45
|
+
await agent.send_text(json.dumps(msg))
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class _Pair:
|
|
49
|
+
def __init__(self) -> None:
|
|
50
|
+
self.agent: WebSocket | None = None
|
|
51
|
+
self.phone: WebSocket | None = None
|
|
52
|
+
self.account: str = ""
|
|
53
|
+
self.voice: str = ""
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def add_relay_routes(app: FastAPI, relay_token: str | None = None) -> None:
|
|
57
|
+
"""Register /agent and /ws relay endpoints on an existing app (used by the
|
|
58
|
+
combined voxa_cloud app and by create_relay_app)."""
|
|
59
|
+
pairs: dict[str, _Pair] = {}
|
|
60
|
+
if relay_token is None:
|
|
61
|
+
relay_token = os.environ.get("VOXA_RELAY_TOKEN", "").strip()
|
|
62
|
+
app.state.pairs = pairs
|
|
63
|
+
|
|
64
|
+
async def _pipe(src: WebSocket, get_dst, on_close) -> None:
|
|
65
|
+
try:
|
|
66
|
+
while True:
|
|
67
|
+
msg = await src.receive()
|
|
68
|
+
if msg["type"] == "websocket.disconnect":
|
|
69
|
+
break
|
|
70
|
+
dst = get_dst()
|
|
71
|
+
if dst is None:
|
|
72
|
+
continue # peer not connected yet; drop (audio) / nothing to do
|
|
73
|
+
try:
|
|
74
|
+
if msg.get("bytes") is not None:
|
|
75
|
+
await dst.send_bytes(msg["bytes"])
|
|
76
|
+
elif msg.get("text") is not None:
|
|
77
|
+
await dst.send_text(msg["text"])
|
|
78
|
+
except Exception:
|
|
79
|
+
break
|
|
80
|
+
except (WebSocketDisconnect, RuntimeError):
|
|
81
|
+
pass
|
|
82
|
+
finally:
|
|
83
|
+
on_close()
|
|
84
|
+
|
|
85
|
+
@app.websocket("/agent")
|
|
86
|
+
async def agent(ws: WebSocket):
|
|
87
|
+
code = ws.query_params.get("code", "")
|
|
88
|
+
if (relay_token and ws.query_params.get("token") != relay_token) or not code:
|
|
89
|
+
await ws.close(code=4401)
|
|
90
|
+
return
|
|
91
|
+
await ws.accept()
|
|
92
|
+
pair = pairs.setdefault(code, _Pair())
|
|
93
|
+
pair.agent = ws
|
|
94
|
+
# If a phone is already waiting, tell the agent right away (account + voice).
|
|
95
|
+
if pair.phone is not None:
|
|
96
|
+
await _notify_up(ws, pair.account, pair.voice)
|
|
97
|
+
|
|
98
|
+
def on_close():
|
|
99
|
+
if pair.agent is ws:
|
|
100
|
+
pair.agent = None
|
|
101
|
+
# Laptop went away while a phone is connected -> let the phone know so
|
|
102
|
+
# it disconnects and offers to re-scan.
|
|
103
|
+
if pair.phone is not None:
|
|
104
|
+
asyncio.create_task(_notify_phone_offline(pair.phone))
|
|
105
|
+
if pair.agent is None and pair.phone is None:
|
|
106
|
+
pairs.pop(code, None)
|
|
107
|
+
|
|
108
|
+
await _pipe(ws, lambda: pairs.get(code, pair).phone, on_close)
|
|
109
|
+
|
|
110
|
+
@app.websocket("/ws")
|
|
111
|
+
async def phone(ws: WebSocket):
|
|
112
|
+
code = ws.query_params.get("code", "")
|
|
113
|
+
# The pair token is validated by the laptop end (it owns the auth token);
|
|
114
|
+
# the relay only needs a code to match. A relay_token (optional) gates who
|
|
115
|
+
# may open agent/phone sockets at all.
|
|
116
|
+
if (relay_token and ws.query_params.get("relay") not in (None, relay_token)) or not code:
|
|
117
|
+
await ws.close(code=4401)
|
|
118
|
+
return
|
|
119
|
+
await ws.accept()
|
|
120
|
+
pair = pairs.setdefault(code, _Pair())
|
|
121
|
+
pair.phone = ws
|
|
122
|
+
pair.account = ws.query_params.get("account", "")
|
|
123
|
+
pair.voice = ws.query_params.get("voice", "")
|
|
124
|
+
# Tell the laptop a phone is here so it only spins up the (paid) V2V session
|
|
125
|
+
# when someone is actually connected.
|
|
126
|
+
if pair.agent is not None:
|
|
127
|
+
await _notify_up(pair.agent, pair.account, pair.voice)
|
|
128
|
+
else:
|
|
129
|
+
# No laptop bridged this code. Give it a moment to dial in (the laptop
|
|
130
|
+
# reconnects continuously); if still absent, tell the phone it's offline.
|
|
131
|
+
async def _grace_check():
|
|
132
|
+
await asyncio.sleep(4)
|
|
133
|
+
cur = pairs.get(code)
|
|
134
|
+
if cur is not None and cur.agent is None and cur.phone is ws:
|
|
135
|
+
await _notify_phone_offline(ws)
|
|
136
|
+
asyncio.create_task(_grace_check())
|
|
137
|
+
|
|
138
|
+
def on_close():
|
|
139
|
+
if pair.phone is ws:
|
|
140
|
+
pair.phone = None
|
|
141
|
+
if pair.agent is not None:
|
|
142
|
+
asyncio.create_task(_notify_down(pair.agent))
|
|
143
|
+
if pair.agent is None and pair.phone is None:
|
|
144
|
+
pairs.pop(code, None)
|
|
145
|
+
|
|
146
|
+
await _pipe(ws, lambda: pairs.get(code, pair).agent, on_close)
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def create_relay_app() -> FastAPI:
|
|
150
|
+
app = FastAPI()
|
|
151
|
+
|
|
152
|
+
@app.get("/healthz")
|
|
153
|
+
async def healthz():
|
|
154
|
+
return {"ok": True}
|
|
155
|
+
|
|
156
|
+
add_relay_routes(app)
|
|
157
|
+
return app
|
server/relay_client.py
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
"""Laptop-side relay bridge.
|
|
2
|
+
|
|
3
|
+
Dials OUT to your cloud relay's /agent socket and pipes it to the laptop's own
|
|
4
|
+
local /ws, so the phone reaches the laptop through your relay with no inbound
|
|
5
|
+
port, tunnel, or Tailscale. The local server (server.app) runs unchanged.
|
|
6
|
+
|
|
7
|
+
The laptop only opens the local /ws (and thus a paid V2V session) when the relay
|
|
8
|
+
signals a phone is actually connected (`__peer up`), and tears it down on `down`.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import asyncio
|
|
14
|
+
import contextlib
|
|
15
|
+
import json
|
|
16
|
+
import logging
|
|
17
|
+
|
|
18
|
+
import websockets
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
async def run_bridge(relay_ws_url: str, code: str, local_ws_url: str,
|
|
24
|
+
relay_token: str = "") -> None:
|
|
25
|
+
agent_url = f"{relay_ws_url}/agent?code={code}"
|
|
26
|
+
if relay_token:
|
|
27
|
+
agent_url += f"&token={relay_token}"
|
|
28
|
+
while True:
|
|
29
|
+
try:
|
|
30
|
+
async with websockets.connect(agent_url, max_size=None, ping_interval=20) as agent:
|
|
31
|
+
logger.info("relay bridge connected (code=%s)", code)
|
|
32
|
+
await _serve(agent, local_ws_url)
|
|
33
|
+
except Exception as e:
|
|
34
|
+
logger.warning("relay bridge dropped: %s", e)
|
|
35
|
+
await asyncio.sleep(2) # reconnect
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
async def _serve(agent, local_ws_url: str) -> None:
|
|
39
|
+
local = None
|
|
40
|
+
pump: asyncio.Task | None = None
|
|
41
|
+
|
|
42
|
+
async def open_local(account: str = "", voice: str = ""):
|
|
43
|
+
nonlocal local, pump
|
|
44
|
+
if local is not None:
|
|
45
|
+
return
|
|
46
|
+
url = local_ws_url
|
|
47
|
+
# Pass the phone's account (for metering) and chosen voice through to the
|
|
48
|
+
# laptop's local /ws so they reach the metered /live session.
|
|
49
|
+
for key, val in (("account", account), ("voice", voice)):
|
|
50
|
+
if val:
|
|
51
|
+
sep = "&" if "?" in url else "?"
|
|
52
|
+
url = f"{url}{sep}{key}={val}"
|
|
53
|
+
local = await websockets.connect(url, max_size=None, ping_interval=20)
|
|
54
|
+
pump = asyncio.create_task(_pump(local, agent))
|
|
55
|
+
|
|
56
|
+
async def close_local():
|
|
57
|
+
nonlocal local, pump
|
|
58
|
+
if pump:
|
|
59
|
+
pump.cancel()
|
|
60
|
+
pump = None
|
|
61
|
+
if local:
|
|
62
|
+
with contextlib.suppress(Exception):
|
|
63
|
+
await local.close()
|
|
64
|
+
local = None
|
|
65
|
+
|
|
66
|
+
try:
|
|
67
|
+
async for msg in agent: # phone -> laptop
|
|
68
|
+
if isinstance(msg, str) and '"__peer"' in msg:
|
|
69
|
+
try:
|
|
70
|
+
data = json.loads(msg)
|
|
71
|
+
except ValueError:
|
|
72
|
+
data = {}
|
|
73
|
+
if data.get("type") == "__peer":
|
|
74
|
+
if data.get("state") == "up":
|
|
75
|
+
await open_local(data.get("account", ""), data.get("voice", ""))
|
|
76
|
+
elif data.get("state") == "down":
|
|
77
|
+
await close_local()
|
|
78
|
+
continue
|
|
79
|
+
if local is not None:
|
|
80
|
+
await local.send(msg)
|
|
81
|
+
finally:
|
|
82
|
+
await close_local()
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
async def _pump(local, agent) -> None:
|
|
86
|
+
"""Laptop -> phone: forward everything from the local /ws to the relay agent."""
|
|
87
|
+
with contextlib.suppress(Exception):
|
|
88
|
+
async for msg in local:
|
|
89
|
+
await agent.send(msg)
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
"""RemoteOperator: drop-in replacement for GeminiOperator that routes V2V through
|
|
2
|
+
the cloud `/live` proxy instead of talking to Gemini directly.
|
|
3
|
+
|
|
4
|
+
Why: the metered/paid model runs Gemini with YOUR key on the cloud (so the key
|
|
5
|
+
isn't on customer laptops) and counts minutes there. The laptop streams the
|
|
6
|
+
phone's mic to `/live`, plays back the audio it returns, and EXECUTES the tool
|
|
7
|
+
calls the cloud's Gemini decides (start_claude_session, send_to_claude, ...) via
|
|
8
|
+
the same `handle_tool_call` the local operator would use.
|
|
9
|
+
|
|
10
|
+
Enabled on the laptop by setting VOXA_LIVE_PROXY (+ VOXA_PROXY_TOKEN, VOXA_ACCOUNT).
|
|
11
|
+
Same interface as GeminiOperator: async context manager + set_audio_out /
|
|
12
|
+
set_text_out / send_audio / speak / run.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import asyncio
|
|
18
|
+
import contextlib
|
|
19
|
+
import json
|
|
20
|
+
import logging
|
|
21
|
+
from typing import Awaitable, Callable, Optional
|
|
22
|
+
|
|
23
|
+
import websockets
|
|
24
|
+
|
|
25
|
+
logger = logging.getLogger(__name__)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class RemoteOperator:
|
|
29
|
+
def __init__(self, config, handle_tool_call: Callable[[str, dict], Awaitable[dict]],
|
|
30
|
+
*, proxy_url: str, account: str, token: str = "", voice: str = ""):
|
|
31
|
+
self._handle = handle_tool_call
|
|
32
|
+
url = f"{proxy_url}?account={account}"
|
|
33
|
+
if token:
|
|
34
|
+
url += f"&token={token}"
|
|
35
|
+
if voice:
|
|
36
|
+
url += f"&voice={voice}"
|
|
37
|
+
self._url = url
|
|
38
|
+
self._audio_out: Optional[Callable[[bytes], Awaitable[None]]] = None
|
|
39
|
+
self._text_out: Optional[Callable[[dict], Awaitable[None]]] = None
|
|
40
|
+
self._ws = None
|
|
41
|
+
self._suppress_pending = False
|
|
42
|
+
self._stack = contextlib.AsyncExitStack()
|
|
43
|
+
|
|
44
|
+
async def __aenter__(self) -> "RemoteOperator":
|
|
45
|
+
# The cloud /live can briefly refuse mid-deploy/restart; retry a few times with
|
|
46
|
+
# a longer handshake timeout so a momentary blip doesn't drop the session.
|
|
47
|
+
last_err: Exception | None = None
|
|
48
|
+
for attempt in range(4):
|
|
49
|
+
try:
|
|
50
|
+
self._ws = await self._stack.enter_async_context(
|
|
51
|
+
websockets.connect(self._url, max_size=None,
|
|
52
|
+
ping_interval=20, open_timeout=20))
|
|
53
|
+
return self
|
|
54
|
+
except Exception as e:
|
|
55
|
+
last_err = e
|
|
56
|
+
logger.warning("/live connect attempt %d failed: %s", attempt + 1, e)
|
|
57
|
+
if attempt < 3:
|
|
58
|
+
await asyncio.sleep(1.5)
|
|
59
|
+
raise last_err
|
|
60
|
+
|
|
61
|
+
async def __aexit__(self, *exc) -> bool:
|
|
62
|
+
await self._stack.aclose()
|
|
63
|
+
self._ws = None
|
|
64
|
+
return False
|
|
65
|
+
|
|
66
|
+
def set_audio_out(self, cb): self._audio_out = cb
|
|
67
|
+
def set_text_out(self, cb): self._text_out = cb
|
|
68
|
+
|
|
69
|
+
async def send_audio(self, pcm: bytes) -> None:
|
|
70
|
+
if self._ws is None:
|
|
71
|
+
return
|
|
72
|
+
try:
|
|
73
|
+
await self._ws.send(pcm)
|
|
74
|
+
except Exception:
|
|
75
|
+
return # cloud link closed (e.g. out of minutes); run()'s loop handles the end
|
|
76
|
+
|
|
77
|
+
async def speak(self, text: str, immediate: bool = False) -> None:
|
|
78
|
+
if self._ws is None:
|
|
79
|
+
return
|
|
80
|
+
try:
|
|
81
|
+
# Flush a pending greeting suppression FIRST, in-order on this single writer
|
|
82
|
+
# (a separate task could interleave a concurrent send on the same socket).
|
|
83
|
+
if self._suppress_pending:
|
|
84
|
+
self._suppress_pending = False
|
|
85
|
+
await self._ws.send(json.dumps({"type": "suppress_greeting"}))
|
|
86
|
+
await self._ws.send(json.dumps(
|
|
87
|
+
{"type": "speak", "text": text, "immediate": immediate}))
|
|
88
|
+
except Exception:
|
|
89
|
+
return # cloud link closed; don't crash the answer flow
|
|
90
|
+
|
|
91
|
+
def suppress_greeting(self) -> None:
|
|
92
|
+
# Ask the cloud brain not to speak its generic opening (the laptop supplies a
|
|
93
|
+
# contextual one on answer-attach). Sent in-order before the next speak().
|
|
94
|
+
self._suppress_pending = True
|
|
95
|
+
|
|
96
|
+
async def send_text(self, text: str) -> None:
|
|
97
|
+
# A typed user turn from the phone. Forward it to the cloud brain the same way
|
|
98
|
+
# audio is; without this, a `say` during a metered call raised AttributeError
|
|
99
|
+
# and tore the whole call down.
|
|
100
|
+
if self._ws is None:
|
|
101
|
+
return
|
|
102
|
+
try:
|
|
103
|
+
await self._ws.send(json.dumps({"type": "user_text", "text": text}))
|
|
104
|
+
except Exception:
|
|
105
|
+
return # cloud link closed; run()'s loop handles the end
|
|
106
|
+
|
|
107
|
+
async def run(self) -> None:
|
|
108
|
+
if self._ws is None:
|
|
109
|
+
raise RuntimeError("RemoteOperator is not open; use 'async with'.")
|
|
110
|
+
async for msg in self._ws:
|
|
111
|
+
if isinstance(msg, (bytes, bytearray)):
|
|
112
|
+
if self._audio_out is not None:
|
|
113
|
+
await self._audio_out(bytes(msg))
|
|
114
|
+
continue
|
|
115
|
+
try:
|
|
116
|
+
data = json.loads(msg)
|
|
117
|
+
except ValueError:
|
|
118
|
+
continue
|
|
119
|
+
if data.get("type") == "tool": # cloud Gemini -> run a tool here
|
|
120
|
+
try:
|
|
121
|
+
result = await self._handle(data.get("name", ""), data.get("args") or {})
|
|
122
|
+
except Exception as e:
|
|
123
|
+
result = {"error": str(e)}
|
|
124
|
+
with contextlib.suppress(Exception):
|
|
125
|
+
await self._ws.send(json.dumps(
|
|
126
|
+
{"type": "tool_result", "id": data.get("id"), "result": result}))
|
|
127
|
+
elif self._text_out is not None: # transcripts / status -> phone
|
|
128
|
+
await self._text_out(data)
|
server/session_hub.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import inspect
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class SessionHub:
|
|
7
|
+
def __init__(self, controller, call_manager):
|
|
8
|
+
self._c = controller
|
|
9
|
+
self._cm = call_manager
|
|
10
|
+
self._speak = None
|
|
11
|
+
# Ring on finish when no line is attached. Disabled once Claude Code hooks are
|
|
12
|
+
# live, so the hook becomes the single offline-ring source (no double-report).
|
|
13
|
+
self._offline_ring = True
|
|
14
|
+
controller.on_final(self.on_final)
|
|
15
|
+
|
|
16
|
+
def set_offline_ring(self, on: bool) -> None:
|
|
17
|
+
self._offline_ring = on
|
|
18
|
+
|
|
19
|
+
def attach(self, speak_fn) -> list[str]:
|
|
20
|
+
self._speak = speak_fn
|
|
21
|
+
return self._cm.attach()
|
|
22
|
+
|
|
23
|
+
def detach(self) -> None:
|
|
24
|
+
self._speak = None
|
|
25
|
+
self._cm.detach()
|
|
26
|
+
|
|
27
|
+
async def on_final(self, text: str) -> None:
|
|
28
|
+
if self._speak is not None:
|
|
29
|
+
result = self._speak(text)
|
|
30
|
+
if inspect.isawaitable(result):
|
|
31
|
+
await result
|
|
32
|
+
elif self._offline_ring:
|
|
33
|
+
await self._cm.on_update(text)
|
|
@@ -0,0 +1,241 @@
|
|
|
1
|
+
"""Background watcher: ring the phone when ANY open Claude terminal finishes.
|
|
2
|
+
|
|
3
|
+
Voxa's main loop only follows the one terminal you're attached to. This watcher
|
|
4
|
+
runs alongside it and watches EVERY open Claude session it can discover (tmux
|
|
5
|
+
sessions and iTerm2 windows, including ones you started yourself). When any of
|
|
6
|
+
them goes working -> done, it reports a one-line summary.
|
|
7
|
+
|
|
8
|
+
It reuses the same completion detector as the controllers (``monitor_loop``):
|
|
9
|
+
a session is "done" when its screen stops changing after having been active.
|
|
10
|
+
|
|
11
|
+
Routing of the report (speak on the line if a phone is connected, else fire a
|
|
12
|
+
CallKit ring) is the caller's job, passed in as ``on_done``.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import asyncio
|
|
18
|
+
import inspect
|
|
19
|
+
import logging
|
|
20
|
+
from typing import Awaitable, Callable, Optional
|
|
21
|
+
|
|
22
|
+
from .terminals import _osa, _shell, discover_claude_sessions, _iterm_capture_script
|
|
23
|
+
from .tmux_controller import monitor_loop, looks_actionable, _ACTIVE_MARKERS
|
|
24
|
+
from .transcript_monitor import TranscriptMonitor
|
|
25
|
+
from .transcripts import PROJECTS_DIR
|
|
26
|
+
|
|
27
|
+
logger = logging.getLogger(__name__)
|
|
28
|
+
|
|
29
|
+
# on_done(label, cwd, summary) -> awaitable | None
|
|
30
|
+
DoneCallback = Callable[[str, str, str], object]
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class _PassiveWatch:
|
|
34
|
+
"""A read-only stand-in a session that exposes exactly what monitor_loop needs.
|
|
35
|
+
|
|
36
|
+
Unlike the real controllers it never sends input, it only captures the pane
|
|
37
|
+
and emits when the screen stabilises after activity."""
|
|
38
|
+
|
|
39
|
+
# Claude shows these only while actively working; their ABSENCE (plus a stable
|
|
40
|
+
# screen) is what tells us a task truly finished, not just paused mid-step.
|
|
41
|
+
_WORKING_MARKERS = ("esc to interrupt", "esc to cancel", "tokens", "thinking…",
|
|
42
|
+
"thinking...", "running…", "running...")
|
|
43
|
+
|
|
44
|
+
def __init__(self, session: dict, on_emit, run=_shell, osa=_osa,
|
|
45
|
+
poll_interval: float = 2.0, idle_polls: int = 5):
|
|
46
|
+
self._session = session
|
|
47
|
+
self._on_emit = on_emit
|
|
48
|
+
self._run = run
|
|
49
|
+
self._osa = osa
|
|
50
|
+
self._poll = poll_interval
|
|
51
|
+
self._idle_polls = idle_polls
|
|
52
|
+
self.status = "idle"
|
|
53
|
+
self._started = True
|
|
54
|
+
# True once we've actually seen this session WORKING since the last report. A
|
|
55
|
+
# fresh session that merely booted to its idle prompt never shows the working
|
|
56
|
+
# markers, so it won't ring the phone on startup.
|
|
57
|
+
self._saw_work = False
|
|
58
|
+
|
|
59
|
+
def _capture(self) -> str:
|
|
60
|
+
backend = self._session.get("backend")
|
|
61
|
+
raw = self._session.get("raw_id", "")
|
|
62
|
+
if backend == "tmux":
|
|
63
|
+
screen = self._run(["tmux", "capture-pane", "-p", "-t", raw])
|
|
64
|
+
elif backend == "iterm":
|
|
65
|
+
screen = self._osa(_iterm_capture_script(raw))
|
|
66
|
+
elif backend == "terminal_app":
|
|
67
|
+
wid, _, tab = raw.partition(":")
|
|
68
|
+
screen = self._osa(
|
|
69
|
+
f'tell application "Terminal" to return history of '
|
|
70
|
+
f'tab {tab or "1"} of window id {wid}'
|
|
71
|
+
)
|
|
72
|
+
else:
|
|
73
|
+
screen = ""
|
|
74
|
+
if any(m in screen.lower() for m in _ACTIVE_MARKERS):
|
|
75
|
+
self._saw_work = True
|
|
76
|
+
return screen
|
|
77
|
+
|
|
78
|
+
async def _emit(self, text: str) -> None:
|
|
79
|
+
# Only report once this session actually worked (then went quiet), or when it
|
|
80
|
+
# is showing a real prompt waiting on the user. A fresh session that just booted
|
|
81
|
+
# to its idle prompt must not ring (that would call the user on session start).
|
|
82
|
+
if not (self._saw_work or looks_actionable(text)):
|
|
83
|
+
return
|
|
84
|
+
# Don't report "done" if Claude is still working (stable screen but a
|
|
85
|
+
# spinner/"esc to interrupt" is showing) -> avoids calling before the task
|
|
86
|
+
# actually finishes.
|
|
87
|
+
screen = self._capture().lower()
|
|
88
|
+
if any(m in screen for m in self._WORKING_MARKERS):
|
|
89
|
+
return
|
|
90
|
+
self._saw_work = False
|
|
91
|
+
result = self._on_emit(self._session, text)
|
|
92
|
+
if inspect.isawaitable(result):
|
|
93
|
+
await result
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
class TerminalWatcher:
|
|
97
|
+
"""Polls for open Claude sessions and watches each for completion."""
|
|
98
|
+
|
|
99
|
+
def __init__(self, on_done: DoneCallback, *,
|
|
100
|
+
on_resumed=None,
|
|
101
|
+
discover=discover_claude_sessions, run=_shell, osa=_osa,
|
|
102
|
+
should_skip: Optional[Callable[[dict], bool]] = None,
|
|
103
|
+
rediscover_interval: float = 5.0,
|
|
104
|
+
poll_interval: float = 2.0, idle_polls: int = 5,
|
|
105
|
+
dedupe_window: float = 90.0, clock=None,
|
|
106
|
+
resume_window: float = 30.0, resume_poll: float = 2.0,
|
|
107
|
+
monitor=monitor_loop, projects_dir: str | None = None):
|
|
108
|
+
self._on_done = on_done
|
|
109
|
+
self._on_resumed = on_resumed
|
|
110
|
+
self._resume_window = resume_window
|
|
111
|
+
self._resume_poll = resume_poll
|
|
112
|
+
self._discover = discover
|
|
113
|
+
self._run = run
|
|
114
|
+
self._osa = osa
|
|
115
|
+
self._should_skip = should_skip or (lambda s: False)
|
|
116
|
+
self._rediscover = rediscover_interval
|
|
117
|
+
self._poll = poll_interval
|
|
118
|
+
self._idle_polls = idle_polls
|
|
119
|
+
self._monitor = monitor
|
|
120
|
+
self._dedupe_window = dedupe_window
|
|
121
|
+
self._projects = projects_dir
|
|
122
|
+
self._quiet = 5.0
|
|
123
|
+
import time as _t
|
|
124
|
+
self._clock = clock or _t.monotonic
|
|
125
|
+
self._last_fired: dict[str, float] = {} # cwd -> last report time
|
|
126
|
+
self._watchers: dict[str, asyncio.Task] = {}
|
|
127
|
+
|
|
128
|
+
async def _handle_emit(self, session: dict, text: str) -> None:
|
|
129
|
+
# A session we're actively driving is reported by the main loop already.
|
|
130
|
+
if self._should_skip(session):
|
|
131
|
+
return
|
|
132
|
+
# Dedupe: don't report the same terminal again within the window (the
|
|
133
|
+
# monitor can re-fire on minor screen changes -> avoids ringing twice).
|
|
134
|
+
cwd = session.get("cwd", "") or session.get("id", "")
|
|
135
|
+
now = self._clock()
|
|
136
|
+
if now - self._last_fired.get(cwd, -1e9) < self._dedupe_window:
|
|
137
|
+
return
|
|
138
|
+
self._last_fired[cwd] = now
|
|
139
|
+
kind, summary = classify_screen(text)
|
|
140
|
+
if kind == "needs_input" and summary:
|
|
141
|
+
summary = f"needs input: {summary}"
|
|
142
|
+
result = self._on_done(session.get("label", ""), cwd, summary)
|
|
143
|
+
if inspect.isawaitable(result):
|
|
144
|
+
await result
|
|
145
|
+
if self._on_resumed is not None:
|
|
146
|
+
asyncio.ensure_future(self._watch_resume(session))
|
|
147
|
+
|
|
148
|
+
def _spawn(self, session: dict) -> None:
|
|
149
|
+
if session.get("backend") == "ax":
|
|
150
|
+
mon = TranscriptMonitor(
|
|
151
|
+
session.get("cwd", ""),
|
|
152
|
+
lambda text, s=session: self._handle_emit(s, text),
|
|
153
|
+
poll_interval=self._poll, quiet_secs=self._quiet,
|
|
154
|
+
projects_dir=self._projects or PROJECTS_DIR)
|
|
155
|
+
mon._started = True
|
|
156
|
+
self._watchers[session["id"]] = asyncio.ensure_future(mon.run())
|
|
157
|
+
return
|
|
158
|
+
watch = _PassiveWatch(session, self._handle_emit, run=self._run, osa=self._osa,
|
|
159
|
+
poll_interval=self._poll, idle_polls=self._idle_polls)
|
|
160
|
+
self._watchers[session["id"]] = asyncio.ensure_future(self._monitor(watch))
|
|
161
|
+
|
|
162
|
+
def _capture_session(self, session: dict) -> str:
|
|
163
|
+
backend = session.get("backend")
|
|
164
|
+
raw = session.get("raw_id", "")
|
|
165
|
+
if backend == "tmux":
|
|
166
|
+
return self._run(["tmux", "capture-pane", "-p", "-t", raw])
|
|
167
|
+
if backend == "iterm":
|
|
168
|
+
return self._osa(_iterm_capture_script(raw))
|
|
169
|
+
if backend == "terminal_app":
|
|
170
|
+
wid, _, tab = raw.partition(":")
|
|
171
|
+
return self._osa(
|
|
172
|
+
f'tell application "Terminal" to return history of '
|
|
173
|
+
f'tab {tab or "1"} of window id {wid}'
|
|
174
|
+
)
|
|
175
|
+
return ""
|
|
176
|
+
|
|
177
|
+
async def _watch_resume(self, session: dict) -> None:
|
|
178
|
+
"""After a ring fires, poll this session briefly. If working markers reappear
|
|
179
|
+
(the user acted on the laptop), tell the caller to cancel the ring."""
|
|
180
|
+
if self._on_resumed is None:
|
|
181
|
+
return
|
|
182
|
+
elapsed = 0.0
|
|
183
|
+
while elapsed < self._resume_window:
|
|
184
|
+
screen = (self._capture_session(session) or "").lower()
|
|
185
|
+
if any(m in screen for m in _PassiveWatch._WORKING_MARKERS):
|
|
186
|
+
result = self._on_resumed(session.get("label", ""), session.get("cwd", ""))
|
|
187
|
+
if inspect.isawaitable(result):
|
|
188
|
+
await result
|
|
189
|
+
return
|
|
190
|
+
await asyncio.sleep(self._resume_poll)
|
|
191
|
+
elapsed += self._resume_poll
|
|
192
|
+
|
|
193
|
+
async def reconcile_once(self) -> None:
|
|
194
|
+
"""One discovery pass: start watchers for new sessions, drop gone ones."""
|
|
195
|
+
try:
|
|
196
|
+
sessions = await asyncio.to_thread(self._discover, self._run, self._osa)
|
|
197
|
+
except Exception:
|
|
198
|
+
logger.exception("terminal discovery failed")
|
|
199
|
+
return
|
|
200
|
+
live_ids = set()
|
|
201
|
+
for s in sessions:
|
|
202
|
+
sid = s.get("id")
|
|
203
|
+
if not sid:
|
|
204
|
+
continue
|
|
205
|
+
live_ids.add(sid)
|
|
206
|
+
task = self._watchers.get(sid)
|
|
207
|
+
if task is None or task.done():
|
|
208
|
+
self._spawn(s)
|
|
209
|
+
for sid in [s for s in self._watchers if s not in live_ids]:
|
|
210
|
+
self._watchers.pop(sid).cancel()
|
|
211
|
+
|
|
212
|
+
async def run(self) -> None:
|
|
213
|
+
try:
|
|
214
|
+
while True:
|
|
215
|
+
await self.reconcile_once()
|
|
216
|
+
await asyncio.sleep(self._rediscover)
|
|
217
|
+
finally:
|
|
218
|
+
for t in self._watchers.values():
|
|
219
|
+
t.cancel()
|
|
220
|
+
self._watchers.clear()
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def _first_meaningful_line(text: str) -> str:
|
|
224
|
+
"""Pull a short human summary from the freshly-stable screen delta."""
|
|
225
|
+
for ln in (text or "").splitlines():
|
|
226
|
+
s = ln.strip()
|
|
227
|
+
if len(s) > 2:
|
|
228
|
+
return s[:200]
|
|
229
|
+
return ""
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
def classify_screen(text: str) -> tuple[str, str]:
|
|
233
|
+
"""Decide whether a freshly-stable screen is Claude WAITING FOR INPUT (a menu, a
|
|
234
|
+
y/n, a question) or a FINISHED result, and return (kind, summary)."""
|
|
235
|
+
body = text or ""
|
|
236
|
+
# A trailing question mark on the last meaningful line is a question to the user.
|
|
237
|
+
last = _first_meaningful_line("\n".join(reversed(body.splitlines())))
|
|
238
|
+
if looks_actionable(body) or last.endswith("?"):
|
|
239
|
+
q = _first_meaningful_line(body) or last
|
|
240
|
+
return "needs_input", q
|
|
241
|
+
return "finished", _first_meaningful_line(body)
|