speakd 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
speakd/__init__.py ADDED
@@ -0,0 +1,37 @@
1
+ """speakd — fire-and-forget local TTS narration over a Unix socket.
2
+
3
+ A small daemon that turns text lines into speech with `Kokoro
4
+ <https://github.com/hexgrad/kokoro>`_, plus a zero-dependency client.
5
+ Designed for narrating long-running work (training runs, builds, pipelines)
6
+ without ever blocking or crashing the thing doing the work.
7
+
8
+ Quickstart::
9
+
10
+ from speakd import speak
11
+ speak("experiment finished") # fire-and-forget
12
+ speak("loss is NaN — stopping", interrupt=True)
13
+ """
14
+ from typing import TYPE_CHECKING
15
+
16
+ __version__ = "0.1.0"
17
+
18
+ if TYPE_CHECKING: # real imports for type checkers / IDEs
19
+ from .client import ensure_daemon, set_volume, speak
20
+ from .config import Config, load_config
21
+
22
+ __all__ = ["speak", "set_volume", "ensure_daemon", "Config", "load_config", "__version__"]
23
+
24
+ _CLIENT_ATTRS = ("speak", "set_volume", "ensure_daemon")
25
+ _CONFIG_ATTRS = ("Config", "load_config")
26
+
27
+
28
+ def __getattr__(name: str):
29
+ """Lazy re-exports (PEP 562): keep ``import speakd`` instant and avoid
30
+ eagerly importing submodules that ``python -m speakd.<mod>`` re-executes."""
31
+ if name in _CLIENT_ATTRS:
32
+ from . import client
33
+ return getattr(client, name)
34
+ if name in _CONFIG_ATTRS:
35
+ from . import config
36
+ return getattr(config, name)
37
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
speakd/client.py ADDED
@@ -0,0 +1,254 @@
1
+ """speakd client: send text to the daemon, with auto-spawn and clean fallback.
2
+
3
+ This module is intentionally dependency-light (stdlib only) so that
4
+ ``import speakd`` and the ``speak`` CLI stay instant even on machines where
5
+ the TTS stack is heavy. The daemon's dependencies are only imported inside
6
+ the daemon process.
7
+
8
+ Python API
9
+ ----------
10
+ from speakd import speak, set_volume, ensure_daemon
11
+
12
+ speak("checkpoint saved") # fire-and-forget
13
+ speak("eval finished", blocking=True) # wait until spoken
14
+ speak("loss is NaN — stopping", interrupt=True) # jump the queue
15
+ set_volume(85) # live, 0-130
16
+
17
+ Every call is safe when the daemon is down: the client auto-spawns it once,
18
+ and if that fails it degrades to the configured fallback engine (espeak by
19
+ default) and logs the event — narration never silently disappears.
20
+
21
+ CLI
22
+ ---
23
+ speak "build finished"
24
+ speak --interrupt "disk is full"
25
+ speak --blocking "done"
26
+ speak --volume 85
27
+ long_running_job | speak # reads stdin when no text args are given
28
+ """
29
+ from __future__ import annotations
30
+
31
+ import argparse
32
+ import datetime
33
+ import os
34
+ import shlex
35
+ import socket
36
+ import subprocess
37
+ import sys
38
+ import time
39
+
40
+ from . import protocol
41
+ from .config import Config, load_config
42
+
43
+ # Process-wide default config, loaded lazily on first use.
44
+ _default_config: Config | None = None
45
+
46
+
47
+ def _get_config(config: Config | None = None) -> Config:
48
+ global _default_config
49
+ if config is not None:
50
+ return config
51
+ if _default_config is None:
52
+ _default_config = load_config()
53
+ return _default_config
54
+
55
+
56
+ # ── low-level helpers ───────────────────────────────────────────────────────
57
+
58
+ def _socket_alive(cfg: Config) -> bool:
59
+ """True if the daemon accepts connections (~1 ms; safe in hot loops)."""
60
+ try:
61
+ s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
62
+ s.settimeout(cfg.connect_timeout)
63
+ s.connect(cfg.socket_path)
64
+ s.close()
65
+ return True
66
+ except OSError:
67
+ return False
68
+
69
+
70
+ def _send(payload: bytes, cfg: Config, wait_ack: bool = False) -> bool:
71
+ """Deliver one wire-protocol line. Returns False on any socket error."""
72
+ try:
73
+ s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
74
+ s.settimeout(cfg.connect_timeout)
75
+ s.connect(cfg.socket_path)
76
+ s.sendall(payload)
77
+ if wait_ack:
78
+ # Speech can take a while — switch to the generous ack timeout.
79
+ s.settimeout(cfg.ack_timeout)
80
+ s.recv(len(protocol.ACK) + 62)
81
+ s.close()
82
+ return True
83
+ except OSError:
84
+ return False
85
+
86
+
87
+ def _log_fallback(cfg: Config, reason: str) -> None:
88
+ """Record a fallback event (file + stderr) so degraded audio is diagnosable."""
89
+ try:
90
+ os.makedirs(os.path.dirname(cfg.fallback_log), exist_ok=True)
91
+ timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
92
+ with open(cfg.fallback_log, "a") as f:
93
+ f.write(f"{timestamp} FALLBACK reason={reason}\n")
94
+ except OSError:
95
+ pass
96
+ print(f"[speakd] WARNING: fallback engine used — {reason} (see {cfg.fallback_log})",
97
+ file=sys.stderr, flush=True)
98
+
99
+
100
+ def _fallback_speak(text: str, interrupt: bool, cfg: Config) -> None:
101
+ """Last resort: speak through the configured fallback engine."""
102
+ if not cfg.fallback:
103
+ return # fallback disabled by config
104
+ argv = [a.format(text=text) for a in cfg.fallback]
105
+ if not any("{text}" in a for a in cfg.fallback):
106
+ argv.append(text)
107
+ try:
108
+ if interrupt:
109
+ # Best-effort: cut off any in-flight fallback speech first.
110
+ subprocess.run(["pkill", "-x", os.path.basename(argv[0])],
111
+ stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
112
+ subprocess.Popen(argv, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
113
+ except (OSError, FileNotFoundError):
114
+ pass # fallback engine not installed either — nothing left to try
115
+
116
+
117
+ # ── public API ──────────────────────────────────────────────────────────────
118
+
119
+ def ensure_daemon(config: Config | None = None) -> bool:
120
+ """Idempotent: make sure a daemon is listening on the configured socket.
121
+
122
+ Fast path returns immediately when the socket answers. Otherwise a
123
+ detached daemon is spawned (``python -m speakd.daemon`` with this
124
+ interpreter, overridable via ``$SPEAKD_DAEMON_CMD``) and we wait up to
125
+ ``client.spawn_wait`` seconds for it to come up. The daemon's flock
126
+ singleton makes concurrent spawn attempts harmless.
127
+ """
128
+ cfg = _get_config(config)
129
+ if _socket_alive(cfg):
130
+ return True
131
+
132
+ custom = os.environ.get("SPEAKD_DAEMON_CMD", "")
133
+ cmd = shlex.split(custom) if custom else [sys.executable, "-m", "speakd.daemon"]
134
+ env = dict(os.environ, SPEAKD_SOCKET=cfg.socket_path)
135
+ try:
136
+ os.makedirs(os.path.dirname(cfg.log_file), exist_ok=True)
137
+ with open(cfg.log_file, "a") as log_fh:
138
+ subprocess.Popen(
139
+ cmd,
140
+ stdout=log_fh,
141
+ stderr=log_fh,
142
+ env=env,
143
+ close_fds=True,
144
+ start_new_session=True,
145
+ )
146
+ except OSError:
147
+ return False
148
+
149
+ deadline = time.monotonic() + cfg.spawn_wait
150
+ while time.monotonic() < deadline:
151
+ if _socket_alive(cfg):
152
+ return True
153
+ time.sleep(0.2)
154
+ return False
155
+
156
+
157
+ def set_volume(level: int, config: Config | None = None) -> bool:
158
+ """Set the daemon's live playback volume (0-130; 100 = nominal).
159
+
160
+ Applies from the next spoken line — no restart needed. Returns True if
161
+ the daemon received it.
162
+ """
163
+ cfg = _get_config(config)
164
+ if _send(protocol.encode_volume(level), cfg):
165
+ return True
166
+ print(f"[speakd] daemon not running — start it, or export SPEAKD_VOLUME={level}",
167
+ file=sys.stderr)
168
+ return False
169
+
170
+
171
+ def speak(
172
+ text: str,
173
+ blocking: bool = False,
174
+ interrupt: bool = False,
175
+ config: Config | None = None,
176
+ ) -> bool:
177
+ """Send text to the voice daemon.
178
+
179
+ Args:
180
+ text: The text to speak. Empty/whitespace-only text is a no-op.
181
+ blocking: Wait until the daemon has finished speaking the line.
182
+ interrupt: Drain the pending queue and cut off in-flight playback
183
+ before speaking this line.
184
+ config: Optional explicit :class:`speakd.config.Config`.
185
+
186
+ Returns:
187
+ True if the line was delivered to the daemon; False if the fallback
188
+ engine had to be used (or nothing could speak at all).
189
+ """
190
+ text = text.strip()
191
+ if not text:
192
+ return True
193
+
194
+ cfg = _get_config(config)
195
+ wire = protocol.encode_speak(text, interrupt=interrupt)
196
+
197
+ # Fast path — daemon already up.
198
+ if _send(wire, cfg, wait_ack=blocking):
199
+ return True
200
+
201
+ # Recovery — bring the daemon up, retry once.
202
+ if ensure_daemon(cfg) and _send(wire, cfg, wait_ack=blocking):
203
+ return True
204
+
205
+ # Last resort — fallback engine.
206
+ _log_fallback(cfg, "daemon down after spawn attempt")
207
+ _fallback_speak(text, interrupt, cfg)
208
+ return False
209
+
210
+
211
+ # ── CLI ─────────────────────────────────────────────────────────────────────
212
+
213
+ def main(argv: list[str] | None = None) -> int:
214
+ from . import __version__
215
+
216
+ parser = argparse.ArgumentParser(
217
+ prog="speak",
218
+ description="Send text to the speakd narration daemon.",
219
+ epilog="With no TEXT arguments, text is read from stdin (pipe-friendly).",
220
+ )
221
+ parser.add_argument("text", nargs="*", help="text to speak")
222
+ parser.add_argument("-i", "--interrupt", action="store_true",
223
+ help="cut off current speech and drain the queue first")
224
+ parser.add_argument("-b", "--blocking", action="store_true",
225
+ help="wait until the line has been spoken")
226
+ parser.add_argument("--volume", type=int, metavar="N",
227
+ help="set live playback volume (0-130) before speaking")
228
+ parser.add_argument("--socket", metavar="PATH", help="Unix socket path override")
229
+ parser.add_argument("--config", metavar="PATH", help="TOML config file")
230
+ parser.add_argument("--version", action="version", version=f"speakd {__version__}")
231
+ args = parser.parse_args(argv)
232
+
233
+ cfg = load_config(args.config)
234
+ if args.socket:
235
+ cfg.socket_path = args.socket
236
+
237
+ if args.volume is not None:
238
+ set_volume(args.volume, config=cfg)
239
+
240
+ text = " ".join(args.text)
241
+ if not text and not sys.stdin.isatty():
242
+ text = sys.stdin.read().strip()
243
+ if not text:
244
+ if args.volume is not None:
245
+ return 0 # volume-only invocation
246
+ parser.print_usage(sys.stderr)
247
+ return 2
248
+
249
+ delivered = speak(text, blocking=args.blocking, interrupt=args.interrupt, config=cfg)
250
+ return 0 if delivered else 1
251
+
252
+
253
+ if __name__ == "__main__":
254
+ sys.exit(main())
speakd/config.py ADDED
@@ -0,0 +1,189 @@
1
+ """Configuration for speakd.
2
+
3
+ Precedence (lowest to highest):
4
+
5
+ 1. Built-in defaults (work out of the box on CPU)
6
+ 2. TOML config file
7
+ 3. ``SPEAKD_*`` environment variables
8
+ 4. CLI flags (applied by the entry points)
9
+
10
+ The config file is looked up in this order:
11
+
12
+ 1. ``$SPEAKD_CONFIG``
13
+ 2. ``$XDG_CONFIG_HOME/speakd/config.toml``
14
+ (default: ``~/.config/speakd/config.toml``)
15
+
16
+ Missing files are fine — every key has a sane default.
17
+ """
18
+ from __future__ import annotations
19
+
20
+ import os
21
+ import tempfile
22
+ from dataclasses import dataclass, field, fields
23
+
24
+ try:
25
+ import tomllib # Python 3.11+
26
+ except ModuleNotFoundError: # pragma: no cover - Python 3.10
27
+ import tomli as tomllib # type: ignore[no-redef]
28
+
29
+ VALID_DEVICE_POLICIES = ("auto", "cpu", "gpu")
30
+
31
+
32
+ def default_socket_path() -> str:
33
+ """Per-user socket path: ``$XDG_RUNTIME_DIR/speakd.sock`` when available,
34
+ otherwise a uid-suffixed path under the system temp dir."""
35
+ runtime_dir = os.environ.get("XDG_RUNTIME_DIR")
36
+ if runtime_dir and os.path.isdir(runtime_dir):
37
+ return os.path.join(runtime_dir, "speakd.sock")
38
+ return os.path.join(tempfile.gettempdir(), f"speakd-{os.getuid()}.sock")
39
+
40
+
41
+ def default_state_dir() -> str:
42
+ """``$XDG_STATE_HOME/speakd`` (default: ``~/.local/state/speakd``)."""
43
+ state_home = os.environ.get(
44
+ "XDG_STATE_HOME", os.path.join(os.path.expanduser("~"), ".local", "state")
45
+ )
46
+ return os.path.join(state_home, "speakd")
47
+
48
+
49
+ def default_config_file() -> str:
50
+ config_home = os.environ.get(
51
+ "XDG_CONFIG_HOME", os.path.join(os.path.expanduser("~"), ".config")
52
+ )
53
+ return os.path.join(config_home, "speakd", "config.toml")
54
+
55
+
56
+ @dataclass
57
+ class Config:
58
+ """Effective speakd configuration. See ``config.example.toml`` for docs."""
59
+
60
+ # [tts]
61
+ voice: str = "af_heart" # Kokoro voice id (af_heart, bf_emma, am_adam, ...)
62
+ speed: float = 1.0 # speech-rate multiplier
63
+ lang_code: str = "a" # Kokoro language code ("a" = American English)
64
+
65
+ # [device]
66
+ device: str = "auto" # "cpu" | "gpu" | "auto" (dynamic offload)
67
+ keepalive_seconds: int = 180 # idle seconds before GPU -> CPU offload
68
+
69
+ # [daemon]
70
+ socket_path: str = field(default_factory=default_socket_path)
71
+ socket_mode: int = 0o600 # permissions applied to the socket file
72
+ log_file: str = field(
73
+ default_factory=lambda: os.path.join(default_state_dir(), "daemon.log")
74
+ )
75
+
76
+ # [audio]
77
+ volume: int = 100 # playback volume, 0-130 (mpv scale)
78
+ max_playback_seconds: int = 120 # kill the player after this long
79
+ player: list[str] = field(
80
+ default_factory=lambda: ["mpv", "--no-terminal", "--volume={volume}", "{file}"]
81
+ )
82
+
83
+ # [fallback] - argv template used when TTS fails; [] disables the fallback
84
+ fallback: list[str] = field(
85
+ default_factory=lambda: ["espeak", "-s", "160", "-v", "en-us", "{text}"]
86
+ )
87
+
88
+ # [client]
89
+ connect_timeout: float = 0.5 # seconds to connect/send on the socket
90
+ ack_timeout: float = 300.0 # seconds to wait for the ack in blocking mode
91
+ spawn_wait: float = 4.0 # seconds to wait for an auto-spawned daemon
92
+
93
+ # Path of the TOML file this config was loaded from ("" if defaults only).
94
+ source_file: str = ""
95
+
96
+ @property
97
+ def lock_path(self) -> str:
98
+ """Singleton flock file, always derived from the socket path."""
99
+ return self.socket_path + ".lock"
100
+
101
+ @property
102
+ def fallback_log(self) -> str:
103
+ """Client-side log of fallback events, next to the daemon log."""
104
+ return os.path.join(os.path.dirname(self.log_file), "fallback.log")
105
+
106
+ def describe(self) -> str:
107
+ """Human-readable dump of the effective configuration."""
108
+ lines = [f"# effective speakd config (source: {self.source_file or 'defaults'})"]
109
+ for f in fields(self):
110
+ if f.name == "source_file":
111
+ continue
112
+ value = getattr(self, f.name)
113
+ if f.name == "socket_mode":
114
+ value = oct(value)
115
+ lines.append(f"{f.name} = {value!r}")
116
+ lines.append(f"lock_path = {self.lock_path!r}")
117
+ lines.append(f"fallback_log = {self.fallback_log!r}")
118
+ return "\n".join(lines)
119
+
120
+
121
+ # (section, key, attribute, caster) - the full TOML surface.
122
+ _FILE_KEYS = [
123
+ ("tts", "voice", "voice", str),
124
+ ("tts", "speed", "speed", float),
125
+ ("tts", "lang_code", "lang_code", str),
126
+ ("device", "policy", "device", str),
127
+ ("device", "keepalive_seconds", "keepalive_seconds", int),
128
+ ("daemon", "socket_path", "socket_path", str),
129
+ ("daemon", "socket_mode", "socket_mode", lambda v: int(str(v), 8)),
130
+ ("daemon", "log_file", "log_file", str),
131
+ ("audio", "volume", "volume", int),
132
+ ("audio", "max_playback_seconds", "max_playback_seconds", int),
133
+ ("audio", "player", "player", lambda v: [str(a) for a in v]),
134
+ ("fallback", "command", "fallback", lambda v: [str(a) for a in v]),
135
+ ("client", "connect_timeout", "connect_timeout", float),
136
+ ("client", "ack_timeout", "ack_timeout", float),
137
+ ("client", "spawn_wait", "spawn_wait", float),
138
+ ]
139
+
140
+ # Environment overrides for the headline knobs.
141
+ _ENV_KEYS = [
142
+ ("SPEAKD_VOICE", "voice", str),
143
+ ("SPEAKD_SPEED", "speed", float),
144
+ ("SPEAKD_LANG", "lang_code", str),
145
+ ("SPEAKD_DEVICE", "device", str),
146
+ ("SPEAKD_KEEPALIVE", "keepalive_seconds", int),
147
+ ("SPEAKD_SOCKET", "socket_path", str),
148
+ ("SPEAKD_VOLUME", "volume", int),
149
+ ("SPEAKD_LOG_FILE", "log_file", str),
150
+ ]
151
+
152
+
153
+ def load_config(path: str | None = None) -> Config:
154
+ """Build the effective config: defaults -> TOML file -> environment.
155
+
156
+ ``path`` (or ``$SPEAKD_CONFIG``) names an explicit TOML file; an explicit
157
+ path that does not exist raises ``FileNotFoundError``. The default
158
+ XDG-location file is optional and silently skipped when absent.
159
+ """
160
+ cfg = Config()
161
+
162
+ explicit = path or os.environ.get("SPEAKD_CONFIG")
163
+ file = explicit or default_config_file()
164
+ if explicit and not os.path.exists(explicit):
165
+ raise FileNotFoundError(f"config file not found: {explicit}")
166
+ if os.path.exists(file):
167
+ with open(file, "rb") as fh:
168
+ data = tomllib.load(fh)
169
+ for section, key, attr, cast in _FILE_KEYS:
170
+ if section in data and key in data[section]:
171
+ try:
172
+ setattr(cfg, attr, cast(data[section][key]))
173
+ except (TypeError, ValueError) as e:
174
+ raise ValueError(f"bad value for [{section}] {key} in {file}: {e}") from e
175
+ cfg.source_file = file
176
+
177
+ for env, attr, cast in _ENV_KEYS:
178
+ raw = os.environ.get(env)
179
+ if raw is not None and raw != "":
180
+ try:
181
+ setattr(cfg, attr, cast(raw))
182
+ except ValueError as e:
183
+ raise ValueError(f"bad value for ${env}={raw!r}: {e}") from e
184
+
185
+ if cfg.device not in VALID_DEVICE_POLICIES:
186
+ raise ValueError(
187
+ f"device policy must be one of {VALID_DEVICE_POLICIES}, got {cfg.device!r}"
188
+ )
189
+ return cfg