voxcaster 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
speakd/__init__.py ADDED
@@ -0,0 +1,47 @@
1
+ """speakd — fire-and-forget local TTS narration over a Unix socket.
2
+
3
+ A small daemon that turns text lines into speech with `Kokoro
4
+ <https://github.com/hexgrad/kokoro>`_, plus a zero-dependency client.
5
+ Designed for narrating long-running work (training runs, builds, pipelines)
6
+ without ever blocking or crashing the thing doing the work.
7
+
8
+ Quickstart::
9
+
10
+ from speakd import speak
11
+ speak("experiment finished") # fire-and-forget
12
+ speak("loss is NaN — stopping", interrupt=True)
13
+ """
14
+ from typing import TYPE_CHECKING
15
+
16
+ __version__ = "0.1.0"
17
+
18
+ if TYPE_CHECKING: # real imports for type checkers / IDEs
19
+ from .client import ensure_daemon, ping, set_volume, speak
20
+ from .config import Config, load_config
21
+ from .markdown import extract_tts_summary, preprocess_for_speech, strip_markdown
22
+
23
+ __all__ = [
24
+ "speak", "ping", "set_volume", "ensure_daemon",
25
+ "Config", "load_config",
26
+ "strip_markdown", "extract_tts_summary", "preprocess_for_speech",
27
+ "__version__",
28
+ ]
29
+
30
+ _CLIENT_ATTRS = ("speak", "ping", "set_volume", "ensure_daemon")
31
+ _CONFIG_ATTRS = ("Config", "load_config")
32
+ _MARKDOWN_ATTRS = ("strip_markdown", "extract_tts_summary", "preprocess_for_speech")
33
+
34
+
35
+ def __getattr__(name: str):
36
+ """Lazy re-exports (PEP 562): keep ``import speakd`` instant and avoid
37
+ eagerly importing submodules that ``python -m speakd.<mod>`` re-executes."""
38
+ if name in _CLIENT_ATTRS:
39
+ from . import client
40
+ return getattr(client, name)
41
+ if name in _CONFIG_ATTRS:
42
+ from . import config
43
+ return getattr(config, name)
44
+ if name in _MARKDOWN_ATTRS:
45
+ from . import markdown
46
+ return getattr(markdown, name)
47
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
speakd/client.py ADDED
@@ -0,0 +1,289 @@
1
+ """speakd client: send text to the daemon, with auto-spawn and clean fallback.
2
+
3
+ This module is intentionally dependency-light (stdlib only) so that
4
+ ``import speakd`` and the ``speak`` CLI stay instant even on machines where
5
+ the TTS stack is heavy. The daemon's dependencies are only imported inside
6
+ the daemon process.
7
+
8
+ Python API
9
+ ----------
10
+ from speakd import speak, set_volume, ensure_daemon
11
+
12
+ speak("checkpoint saved") # fire-and-forget
13
+ speak("eval finished", blocking=True) # wait until spoken
14
+ speak("loss is NaN — stopping", interrupt=True) # jump the queue
15
+ set_volume(85) # live, 0-130
16
+
17
+ Every call is safe when the daemon is down: the client auto-spawns it once,
18
+ and if that fails it degrades to the configured fallback engine (espeak by
19
+ default) and logs the event — narration never silently disappears.
20
+
21
+ CLI
22
+ ---
23
+ speak "build finished"
24
+ speak --interrupt "disk is full"
25
+ speak --blocking "done"
26
+ speak --volume 85
27
+ long_running_job | speak # reads stdin when no text args are given
28
+ """
29
+ from __future__ import annotations
30
+
31
+ import argparse
32
+ import datetime
33
+ import os
34
+ import shlex
35
+ import socket
36
+ import subprocess
37
+ import sys
38
+ import time
39
+
40
+ from . import protocol
41
+ from .config import Config, load_config
42
+
43
+ # Process-wide default config, loaded lazily on first use.
44
+ _default_config: Config | None = None
45
+
46
+
47
+ def _get_config(config: Config | None = None) -> Config:
48
+ global _default_config
49
+ if config is not None:
50
+ return config
51
+ if _default_config is None:
52
+ _default_config = load_config()
53
+ return _default_config
54
+
55
+
56
+ # ── low-level helpers ───────────────────────────────────────────────────────
57
+
58
+ def _socket_alive(cfg: Config) -> bool:
59
+ """True if the daemon accepts connections (~1 ms; safe in hot loops)."""
60
+ try:
61
+ s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
62
+ s.settimeout(cfg.connect_timeout)
63
+ s.connect(cfg.socket_path)
64
+ s.close()
65
+ return True
66
+ except OSError:
67
+ return False
68
+
69
+
70
+ def _send(payload: bytes, cfg: Config, wait_ack: bool = False) -> bool:
71
+ """Deliver one wire-protocol line. Returns False on any socket error."""
72
+ try:
73
+ s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
74
+ s.settimeout(cfg.connect_timeout)
75
+ s.connect(cfg.socket_path)
76
+ s.sendall(payload)
77
+ if wait_ack:
78
+ # Speech can take a while — switch to the generous ack timeout.
79
+ s.settimeout(cfg.ack_timeout)
80
+ s.recv(len(protocol.ACK) + 62)
81
+ s.close()
82
+ return True
83
+ except OSError:
84
+ return False
85
+
86
+
87
+ def _log_fallback(cfg: Config, reason: str) -> None:
88
+ """Record a fallback event (file + stderr) so degraded audio is diagnosable."""
89
+ try:
90
+ os.makedirs(os.path.dirname(cfg.fallback_log), exist_ok=True)
91
+ timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
92
+ with open(cfg.fallback_log, "a") as f:
93
+ f.write(f"{timestamp} FALLBACK reason={reason}\n")
94
+ except OSError:
95
+ pass
96
+ print(f"[speakd] WARNING: fallback engine used — {reason} (see {cfg.fallback_log})",
97
+ file=sys.stderr, flush=True)
98
+
99
+
100
+ def _fallback_speak(text: str, interrupt: bool, cfg: Config) -> None:
101
+ """Last resort: speak through the configured fallback engine."""
102
+ if not cfg.fallback:
103
+ return # fallback disabled by config
104
+ argv = [a.format(text=text) for a in cfg.fallback]
105
+ if not any("{text}" in a for a in cfg.fallback):
106
+ argv.append(text)
107
+ try:
108
+ if interrupt:
109
+ # Best-effort: cut off any in-flight fallback speech first.
110
+ subprocess.run(["pkill", "-x", os.path.basename(argv[0])],
111
+ stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
112
+ subprocess.Popen(argv, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
113
+ except (OSError, FileNotFoundError):
114
+ pass # fallback engine not installed either — nothing left to try
115
+
116
+
117
+ # ── public API ──────────────────────────────────────────────────────────────
118
+
119
+ def ensure_daemon(config: Config | None = None) -> bool:
120
+ """Idempotent: make sure a daemon is listening on the configured socket.
121
+
122
+ Fast path returns immediately when the socket answers. Otherwise a
123
+ detached daemon is spawned (``python -m speakd.daemon`` with this
124
+ interpreter, overridable via ``$SPEAKD_DAEMON_CMD``) and we wait up to
125
+ ``client.spawn_wait`` seconds for it to come up. The daemon's flock
126
+ singleton makes concurrent spawn attempts harmless.
127
+ """
128
+ cfg = _get_config(config)
129
+ if _socket_alive(cfg):
130
+ return True
131
+
132
+ custom = os.environ.get("SPEAKD_DAEMON_CMD", "")
133
+ cmd = shlex.split(custom) if custom else [sys.executable, "-m", "speakd.daemon"]
134
+ env = dict(os.environ, SPEAKD_SOCKET=cfg.socket_path)
135
+ try:
136
+ os.makedirs(os.path.dirname(cfg.log_file), exist_ok=True)
137
+ with open(cfg.log_file, "a") as log_fh:
138
+ subprocess.Popen(
139
+ cmd,
140
+ stdout=log_fh,
141
+ stderr=log_fh,
142
+ env=env,
143
+ close_fds=True,
144
+ start_new_session=True,
145
+ )
146
+ except OSError:
147
+ return False
148
+
149
+ deadline = time.monotonic() + cfg.spawn_wait
150
+ while time.monotonic() < deadline:
151
+ if _socket_alive(cfg):
152
+ return True
153
+ time.sleep(0.2)
154
+ return False
155
+
156
+
157
+ def set_volume(level: int, config: Config | None = None) -> bool:
158
+ """Set the daemon's live playback volume (0-130; 100 = nominal).
159
+
160
+ Applies from the next spoken line — no restart needed. Returns True if
161
+ the daemon received it.
162
+ """
163
+ cfg = _get_config(config)
164
+ if _send(protocol.encode_volume(level), cfg):
165
+ return True
166
+ print(f"[speakd] daemon not running — start it, or export SPEAKD_VOLUME={level}",
167
+ file=sys.stderr)
168
+ return False
169
+
170
+
171
+ def ping(config: Config | None = None) -> str:
172
+ """Health-check the daemon.
173
+
174
+ Returns ``"ready"`` if the daemon is up and the model is loaded,
175
+ ``"starting"`` if it is up but the model has not loaded yet, and
176
+ ``"down"`` if no daemon is listening. Triggers no synthesis.
177
+ """
178
+ cfg = _get_config(config)
179
+ try:
180
+ s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
181
+ s.settimeout(cfg.connect_timeout)
182
+ s.connect(cfg.socket_path)
183
+ s.sendall(protocol.encode_ping())
184
+ reply = s.recv(len(protocol.PING_STARTING) + 8)
185
+ s.close()
186
+ except OSError:
187
+ return "down"
188
+ if reply.strip() == protocol.PING_READY.strip():
189
+ return "ready"
190
+ return "starting"
191
+
192
+
193
+ def speak(
194
+ text: str,
195
+ blocking: bool = False,
196
+ interrupt: bool = False,
197
+ voice: str | None = None,
198
+ config: Config | None = None,
199
+ ) -> bool:
200
+ """Send text to the voice daemon.
201
+
202
+ Args:
203
+ text: The text to speak. Empty/whitespace-only text is a no-op.
204
+ blocking: Wait until the daemon has finished speaking the line.
205
+ interrupt: Drain the pending queue and cut off in-flight playback
206
+ before speaking this line.
207
+ voice: Per-request Kokoro voice id override (e.g. ``af_heart``);
208
+ falls back to the daemon's configured voice when None.
209
+ config: Optional explicit :class:`speakd.config.Config`.
210
+
211
+ Returns:
212
+ True if the line was delivered to the daemon; False if the fallback
213
+ engine had to be used (or nothing could speak at all).
214
+ """
215
+ text = text.strip()
216
+ if not text:
217
+ return True
218
+
219
+ cfg = _get_config(config)
220
+ wire = protocol.encode_speak(text, interrupt=interrupt, voice=voice)
221
+
222
+ # Fast path — daemon already up.
223
+ if _send(wire, cfg, wait_ack=blocking):
224
+ return True
225
+
226
+ # Recovery — bring the daemon up, retry once.
227
+ if ensure_daemon(cfg) and _send(wire, cfg, wait_ack=blocking):
228
+ return True
229
+
230
+ # Last resort — fallback engine.
231
+ _log_fallback(cfg, "daemon down after spawn attempt")
232
+ _fallback_speak(text, interrupt, cfg)
233
+ return False
234
+
235
+
236
+ # ── CLI ─────────────────────────────────────────────────────────────────────
237
+
238
+ def main(argv: list[str] | None = None) -> int:
239
+ from . import __version__
240
+
241
+ parser = argparse.ArgumentParser(
242
+ prog="speak",
243
+ description="Send text to the speakd narration daemon.",
244
+ epilog="With no TEXT arguments, text is read from stdin (pipe-friendly).",
245
+ )
246
+ parser.add_argument("text", nargs="*", help="text to speak")
247
+ parser.add_argument("-i", "--interrupt", action="store_true",
248
+ help="cut off current speech and drain the queue first")
249
+ parser.add_argument("-b", "--blocking", action="store_true",
250
+ help="wait until the line has been spoken")
251
+ parser.add_argument("--volume", type=int, metavar="N",
252
+ help="set live playback volume (0-130) before speaking")
253
+ parser.add_argument("--voice", metavar="ID",
254
+ help="per-request Kokoro voice id (e.g. af_heart, bf_emma)")
255
+ parser.add_argument("--ping", action="store_true",
256
+ help="health-check the daemon and exit (prints ready/starting/down)")
257
+ parser.add_argument("--socket", metavar="PATH", help="Unix socket path override")
258
+ parser.add_argument("--config", metavar="PATH", help="TOML config file")
259
+ parser.add_argument("--version", action="version", version=f"speakd {__version__}")
260
+ args = parser.parse_args(argv)
261
+
262
+ cfg = load_config(args.config)
263
+ if args.socket:
264
+ cfg.socket_path = args.socket
265
+
266
+ if args.ping:
267
+ status = ping(config=cfg)
268
+ print(status)
269
+ return 0 if status == "ready" else 1
270
+
271
+ if args.volume is not None:
272
+ set_volume(args.volume, config=cfg)
273
+
274
+ text = " ".join(args.text)
275
+ if not text and not sys.stdin.isatty():
276
+ text = sys.stdin.read().strip()
277
+ if not text:
278
+ if args.volume is not None:
279
+ return 0 # volume-only invocation
280
+ parser.print_usage(sys.stderr)
281
+ return 2
282
+
283
+ delivered = speak(text, blocking=args.blocking, interrupt=args.interrupt,
284
+ voice=args.voice, config=cfg)
285
+ return 0 if delivered else 1
286
+
287
+
288
+ if __name__ == "__main__":
289
+ sys.exit(main())
speakd/config.py ADDED
@@ -0,0 +1,205 @@
1
+ """Configuration for speakd.
2
+
3
+ Precedence (lowest to highest):
4
+
5
+ 1. Built-in defaults (work out of the box on CPU)
6
+ 2. TOML config file
7
+ 3. ``SPEAKD_*`` environment variables
8
+ 4. CLI flags (applied by the entry points)
9
+
10
+ The config file is looked up in this order:
11
+
12
+ 1. ``$SPEAKD_CONFIG``
13
+ 2. ``$XDG_CONFIG_HOME/speakd/config.toml``
14
+ (default: ``~/.config/speakd/config.toml``)
15
+
16
+ Missing files are fine — every key has a sane default.
17
+ """
18
+ from __future__ import annotations
19
+
20
+ import os
21
+ import tempfile
22
+ from dataclasses import dataclass, field, fields
23
+
24
+ try:
25
+ import tomllib # Python 3.11+
26
+ except ModuleNotFoundError: # pragma: no cover - Python 3.10
27
+ import tomli as tomllib # type: ignore[no-redef]
28
+
29
+ VALID_DEVICE_POLICIES = ("auto", "cpu", "gpu")
30
+
31
+
32
+ def _as_bool(value: object) -> bool:
33
+ """Coerce a TOML/env value to bool. Accepts native bools and the usual
34
+ truthy/falsy strings (1/0, true/false, yes/no, on/off)."""
35
+ if isinstance(value, bool):
36
+ return value
37
+ return str(value).strip().lower() in ("1", "true", "yes", "on")
38
+
39
+
40
+ def default_socket_path() -> str:
41
+ """Per-user socket path: ``$XDG_RUNTIME_DIR/speakd.sock`` when available,
42
+ otherwise a uid-suffixed path under the system temp dir."""
43
+ runtime_dir = os.environ.get("XDG_RUNTIME_DIR")
44
+ if runtime_dir and os.path.isdir(runtime_dir):
45
+ return os.path.join(runtime_dir, "speakd.sock")
46
+ return os.path.join(tempfile.gettempdir(), f"speakd-{os.getuid()}.sock")
47
+
48
+
49
+ def default_state_dir() -> str:
50
+ """``$XDG_STATE_HOME/speakd`` (default: ``~/.local/state/speakd``)."""
51
+ state_home = os.environ.get(
52
+ "XDG_STATE_HOME", os.path.join(os.path.expanduser("~"), ".local", "state")
53
+ )
54
+ return os.path.join(state_home, "speakd")
55
+
56
+
57
+ def default_config_file() -> str:
58
+ config_home = os.environ.get(
59
+ "XDG_CONFIG_HOME", os.path.join(os.path.expanduser("~"), ".config")
60
+ )
61
+ return os.path.join(config_home, "speakd", "config.toml")
62
+
63
+
64
+ @dataclass
65
+ class Config:
66
+ """Effective speakd configuration. See ``config.example.toml`` for docs."""
67
+
68
+ # [tts]
69
+ voice: str = "af_heart" # Kokoro voice id (af_heart, bf_emma, am_adam, ...)
70
+ speed: float = 1.0 # speech-rate multiplier
71
+ lang_code: str = "a" # Kokoro language code ("a" = American English)
72
+
73
+ # [tts] (cont.)
74
+ markdown_preprocess: bool = True # strip markdown / extract TTS_SUMMARY before synthesis
75
+
76
+ # [device]
77
+ device: str = "auto" # "cpu" | "gpu" | "auto" (dynamic offload)
78
+ keepalive_seconds: int = 180 # idle seconds before GPU -> CPU offload
79
+
80
+ # [daemon]
81
+ socket_path: str = field(default_factory=default_socket_path)
82
+ socket_mode: int = 0o600 # permissions applied to the socket file
83
+ always_interrupt: bool = False # treat every request as an interrupt (latest-only)
84
+ log_file: str = field(
85
+ default_factory=lambda: os.path.join(default_state_dir(), "daemon.log")
86
+ )
87
+
88
+ # [audio]
89
+ volume: int = 100 # playback volume, 0-130 (mpv scale)
90
+ max_playback_seconds: int = 120 # kill the player after this long
91
+ player: list[str] = field(
92
+ default_factory=lambda: ["mpv", "--no-terminal", "--volume={volume}", "{file}"]
93
+ )
94
+
95
+ # [fallback] - argv template used when TTS fails; [] disables the fallback
96
+ fallback: list[str] = field(
97
+ default_factory=lambda: ["espeak", "-s", "160", "-v", "en-us", "{text}"]
98
+ )
99
+
100
+ # [client]
101
+ connect_timeout: float = 0.5 # seconds to connect/send on the socket
102
+ ack_timeout: float = 300.0 # seconds to wait for the ack in blocking mode
103
+ spawn_wait: float = 4.0 # seconds to wait for an auto-spawned daemon
104
+
105
+ # Path of the TOML file this config was loaded from ("" if defaults only).
106
+ source_file: str = ""
107
+
108
+ @property
109
+ def lock_path(self) -> str:
110
+ """Singleton flock file, always derived from the socket path."""
111
+ return self.socket_path + ".lock"
112
+
113
+ @property
114
+ def fallback_log(self) -> str:
115
+ """Client-side log of fallback events, next to the daemon log."""
116
+ return os.path.join(os.path.dirname(self.log_file), "fallback.log")
117
+
118
+ def describe(self) -> str:
119
+ """Human-readable dump of the effective configuration."""
120
+ lines = [f"# effective speakd config (source: {self.source_file or 'defaults'})"]
121
+ for f in fields(self):
122
+ if f.name == "source_file":
123
+ continue
124
+ value = getattr(self, f.name)
125
+ if f.name == "socket_mode":
126
+ value = oct(value)
127
+ lines.append(f"{f.name} = {value!r}")
128
+ lines.append(f"lock_path = {self.lock_path!r}")
129
+ lines.append(f"fallback_log = {self.fallback_log!r}")
130
+ return "\n".join(lines)
131
+
132
+
133
+ # (section, key, attribute, caster) - the full TOML surface.
134
+ _FILE_KEYS = [
135
+ ("tts", "voice", "voice", str),
136
+ ("tts", "speed", "speed", float),
137
+ ("tts", "lang_code", "lang_code", str),
138
+ ("tts", "markdown_preprocess", "markdown_preprocess", _as_bool),
139
+ ("device", "policy", "device", str),
140
+ ("device", "keepalive_seconds", "keepalive_seconds", int),
141
+ ("daemon", "socket_path", "socket_path", str),
142
+ ("daemon", "socket_mode", "socket_mode", lambda v: int(str(v), 8)),
143
+ ("daemon", "always_interrupt", "always_interrupt", _as_bool),
144
+ ("daemon", "log_file", "log_file", str),
145
+ ("audio", "volume", "volume", int),
146
+ ("audio", "max_playback_seconds", "max_playback_seconds", int),
147
+ ("audio", "player", "player", lambda v: [str(a) for a in v]),
148
+ ("fallback", "command", "fallback", lambda v: [str(a) for a in v]),
149
+ ("client", "connect_timeout", "connect_timeout", float),
150
+ ("client", "ack_timeout", "ack_timeout", float),
151
+ ("client", "spawn_wait", "spawn_wait", float),
152
+ ]
153
+
154
+ # Environment overrides for the headline knobs.
155
+ _ENV_KEYS = [
156
+ ("SPEAKD_VOICE", "voice", str),
157
+ ("SPEAKD_SPEED", "speed", float),
158
+ ("SPEAKD_LANG", "lang_code", str),
159
+ ("SPEAKD_DEVICE", "device", str),
160
+ ("SPEAKD_KEEPALIVE", "keepalive_seconds", int),
161
+ ("SPEAKD_SOCKET", "socket_path", str),
162
+ ("SPEAKD_VOLUME", "volume", int),
163
+ ("SPEAKD_LOG_FILE", "log_file", str),
164
+ ("SPEAKD_MARKDOWN_PREPROCESS", "markdown_preprocess", _as_bool),
165
+ ("SPEAKD_ALWAYS_INTERRUPT", "always_interrupt", _as_bool),
166
+ ]
167
+
168
+
169
+ def load_config(path: str | None = None) -> Config:
170
+ """Build the effective config: defaults -> TOML file -> environment.
171
+
172
+ ``path`` (or ``$SPEAKD_CONFIG``) names an explicit TOML file; an explicit
173
+ path that does not exist raises ``FileNotFoundError``. The default
174
+ XDG-location file is optional and silently skipped when absent.
175
+ """
176
+ cfg = Config()
177
+
178
+ explicit = path or os.environ.get("SPEAKD_CONFIG")
179
+ file = explicit or default_config_file()
180
+ if explicit and not os.path.exists(explicit):
181
+ raise FileNotFoundError(f"config file not found: {explicit}")
182
+ if os.path.exists(file):
183
+ with open(file, "rb") as fh:
184
+ data = tomllib.load(fh)
185
+ for section, key, attr, cast in _FILE_KEYS:
186
+ if section in data and key in data[section]:
187
+ try:
188
+ setattr(cfg, attr, cast(data[section][key]))
189
+ except (TypeError, ValueError) as e:
190
+ raise ValueError(f"bad value for [{section}] {key} in {file}: {e}") from e
191
+ cfg.source_file = file
192
+
193
+ for env, attr, cast in _ENV_KEYS:
194
+ raw = os.environ.get(env)
195
+ if raw is not None and raw != "":
196
+ try:
197
+ setattr(cfg, attr, cast(raw))
198
+ except ValueError as e:
199
+ raise ValueError(f"bad value for ${env}={raw!r}: {e}") from e
200
+
201
+ if cfg.device not in VALID_DEVICE_POLICIES:
202
+ raise ValueError(
203
+ f"device policy must be one of {VALID_DEVICE_POLICIES}, got {cfg.device!r}"
204
+ )
205
+ return cfg