PyPI - voxcaster - Versions diffs - 0.2.0__py3-none-any.whl - Mend

voxcaster 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

speakd/__init__.py +47 -0
speakd/client.py +289 -0
speakd/config.py +205 -0
speakd/daemon.py +412 -0
speakd/engine.py +211 -0
speakd/markdown.py +87 -0
speakd/protocol.py +63 -0
voxcaster/__init__.py +47 -0
voxcaster/client.py +289 -0
voxcaster/config.py +205 -0
voxcaster/daemon.py +412 -0
voxcaster/engine.py +211 -0
voxcaster/markdown.py +87 -0
voxcaster/protocol.py +63 -0
voxcaster-0.2.0.dist-info/METADATA +261 -0
voxcaster-0.2.0.dist-info/RECORD +20 -0
voxcaster-0.2.0.dist-info/WHEEL +5 -0
voxcaster-0.2.0.dist-info/entry_points.txt +3 -0
voxcaster-0.2.0.dist-info/licenses/LICENSE +21 -0
voxcaster-0.2.0.dist-info/top_level.txt +2 -0

speakd/__init__.py ADDED Viewed

@@ -0,0 +1,47 @@
+"""speakd — fire-and-forget local TTS narration over a Unix socket.
+A small daemon that turns text lines into speech with `Kokoro
+<https://github.com/hexgrad/kokoro>`_, plus a zero-dependency client.
+Designed for narrating long-running work (training runs, builds, pipelines)
+without ever blocking or crashing the thing doing the work.
+Quickstart::
+    from speakd import speak
+    speak("experiment finished")                 # fire-and-forget
+    speak("loss is NaN — stopping", interrupt=True)
+"""
+from typing import TYPE_CHECKING
+__version__ = "0.1.0"
+if TYPE_CHECKING:  # real imports for type checkers / IDEs
+    from .client import ensure_daemon, ping, set_volume, speak
+    from .config import Config, load_config
+    from .markdown import extract_tts_summary, preprocess_for_speech, strip_markdown
+__all__ = [
+    "speak", "ping", "set_volume", "ensure_daemon",
+    "Config", "load_config",
+    "strip_markdown", "extract_tts_summary", "preprocess_for_speech",
+    "__version__",
+]
+_CLIENT_ATTRS = ("speak", "ping", "set_volume", "ensure_daemon")
+_CONFIG_ATTRS = ("Config", "load_config")
+_MARKDOWN_ATTRS = ("strip_markdown", "extract_tts_summary", "preprocess_for_speech")
+def __getattr__(name: str):
+    """Lazy re-exports (PEP 562): keep ``import speakd`` instant and avoid
+    eagerly importing submodules that ``python -m speakd.<mod>`` re-executes."""
+    if name in _CLIENT_ATTRS:
+        from . import client
+        return getattr(client, name)
+    if name in _CONFIG_ATTRS:
+        from . import config
+        return getattr(config, name)
+    if name in _MARKDOWN_ATTRS:
+        from . import markdown
+        return getattr(markdown, name)
+    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")

speakd/client.py ADDED Viewed

@@ -0,0 +1,289 @@
+"""speakd client: send text to the daemon, with auto-spawn and clean fallback.
+This module is intentionally dependency-light (stdlib only) so that
+``import speakd`` and the ``speak`` CLI stay instant even on machines where
+the TTS stack is heavy. The daemon's dependencies are only imported inside
+the daemon process.
+Python API
+----------
+    from speakd import speak, set_volume, ensure_daemon
+    speak("checkpoint saved")                      # fire-and-forget
+    speak("eval finished", blocking=True)          # wait until spoken
+    speak("loss is NaN — stopping", interrupt=True)  # jump the queue
+    set_volume(85)                                 # live, 0-130
+Every call is safe when the daemon is down: the client auto-spawns it once,
+and if that fails it degrades to the configured fallback engine (espeak by
+default) and logs the event — narration never silently disappears.
+CLI
+---
+    speak "build finished"
+    speak --interrupt "disk is full"
+    speak --blocking "done"
+    speak --volume 85
+    long_running_job | speak        # reads stdin when no text args are given
+"""
+from __future__ import annotations
+import argparse
+import datetime
+import os
+import shlex
+import socket
+import subprocess
+import sys
+import time
+from . import protocol
+from .config import Config, load_config
+# Process-wide default config, loaded lazily on first use.
+_default_config: Config | None = None
+def _get_config(config: Config | None = None) -> Config:
+    global _default_config
+    if config is not None:
+        return config
+    if _default_config is None:
+        _default_config = load_config()
+    return _default_config
+# ── low-level helpers ───────────────────────────────────────────────────────
+def _socket_alive(cfg: Config) -> bool:
+    """True if the daemon accepts connections (~1 ms; safe in hot loops)."""
+    try:
+        s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
+        s.settimeout(cfg.connect_timeout)
+        s.connect(cfg.socket_path)
+        s.close()
+        return True
+    except OSError:
+        return False
+def _send(payload: bytes, cfg: Config, wait_ack: bool = False) -> bool:
+    """Deliver one wire-protocol line. Returns False on any socket error."""
+    try:
+        s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
+        s.settimeout(cfg.connect_timeout)
+        s.connect(cfg.socket_path)
+        s.sendall(payload)
+        if wait_ack:
+            # Speech can take a while — switch to the generous ack timeout.
+            s.settimeout(cfg.ack_timeout)
+            s.recv(len(protocol.ACK) + 62)
+        s.close()
+        return True
+    except OSError:
+        return False
+def _log_fallback(cfg: Config, reason: str) -> None:
+    """Record a fallback event (file + stderr) so degraded audio is diagnosable."""
+    try:
+        os.makedirs(os.path.dirname(cfg.fallback_log), exist_ok=True)
+        timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+        with open(cfg.fallback_log, "a") as f:
+            f.write(f"{timestamp}  FALLBACK  reason={reason}\n")
+    except OSError:
+        pass
+    print(f"[speakd] WARNING: fallback engine used — {reason}  (see {cfg.fallback_log})",
+          file=sys.stderr, flush=True)
+def _fallback_speak(text: str, interrupt: bool, cfg: Config) -> None:
+    """Last resort: speak through the configured fallback engine."""
+    if not cfg.fallback:
+        return  # fallback disabled by config
+    argv = [a.format(text=text) for a in cfg.fallback]
+    if not any("{text}" in a for a in cfg.fallback):
+        argv.append(text)
+    try:
+        if interrupt:
+            # Best-effort: cut off any in-flight fallback speech first.
+            subprocess.run(["pkill", "-x", os.path.basename(argv[0])],
+                           stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+        subprocess.Popen(argv, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+    except (OSError, FileNotFoundError):
+        pass  # fallback engine not installed either — nothing left to try
+# ── public API ──────────────────────────────────────────────────────────────
+def ensure_daemon(config: Config | None = None) -> bool:
+    """Idempotent: make sure a daemon is listening on the configured socket.
+    Fast path returns immediately when the socket answers. Otherwise a
+    detached daemon is spawned (``python -m speakd.daemon`` with this
+    interpreter, overridable via ``$SPEAKD_DAEMON_CMD``) and we wait up to
+    ``client.spawn_wait`` seconds for it to come up. The daemon's flock
+    singleton makes concurrent spawn attempts harmless.
+    """
+    cfg = _get_config(config)
+    if _socket_alive(cfg):
+        return True
+    custom = os.environ.get("SPEAKD_DAEMON_CMD", "")
+    cmd = shlex.split(custom) if custom else [sys.executable, "-m", "speakd.daemon"]
+    env = dict(os.environ, SPEAKD_SOCKET=cfg.socket_path)
+    try:
+        os.makedirs(os.path.dirname(cfg.log_file), exist_ok=True)
+        with open(cfg.log_file, "a") as log_fh:
+            subprocess.Popen(
+                cmd,
+                stdout=log_fh,
+                stderr=log_fh,
+                env=env,
+                close_fds=True,
+                start_new_session=True,
+            )
+    except OSError:
+        return False
+    deadline = time.monotonic() + cfg.spawn_wait
+    while time.monotonic() < deadline:
+        if _socket_alive(cfg):
+            return True
+        time.sleep(0.2)
+    return False
+def set_volume(level: int, config: Config | None = None) -> bool:
+    """Set the daemon's live playback volume (0-130; 100 = nominal).
+    Applies from the next spoken line — no restart needed. Returns True if
+    the daemon received it.
+    """
+    cfg = _get_config(config)
+    if _send(protocol.encode_volume(level), cfg):
+        return True
+    print(f"[speakd] daemon not running — start it, or export SPEAKD_VOLUME={level}",
+          file=sys.stderr)
+    return False
+def ping(config: Config | None = None) -> str:
+    """Health-check the daemon.
+    Returns ``"ready"`` if the daemon is up and the model is loaded,
+    ``"starting"`` if it is up but the model has not loaded yet, and
+    ``"down"`` if no daemon is listening. Triggers no synthesis.
+    """
+    cfg = _get_config(config)
+    try:
+        s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
+        s.settimeout(cfg.connect_timeout)
+        s.connect(cfg.socket_path)
+        s.sendall(protocol.encode_ping())
+        reply = s.recv(len(protocol.PING_STARTING) + 8)
+        s.close()
+    except OSError:
+        return "down"
+    if reply.strip() == protocol.PING_READY.strip():
+        return "ready"
+    return "starting"
+def speak(
+    text: str,
+    blocking: bool = False,
+    interrupt: bool = False,
+    voice: str | None = None,
+    config: Config | None = None,
+) -> bool:
+    """Send text to the voice daemon.
+    Args:
+        text:      The text to speak. Empty/whitespace-only text is a no-op.
+        blocking:  Wait until the daemon has finished speaking the line.
+        interrupt: Drain the pending queue and cut off in-flight playback
+                   before speaking this line.
+        voice:     Per-request Kokoro voice id override (e.g. ``af_heart``);
+                   falls back to the daemon's configured voice when None.
+        config:    Optional explicit :class:`speakd.config.Config`.
+    Returns:
+        True if the line was delivered to the daemon; False if the fallback
+        engine had to be used (or nothing could speak at all).
+    """
+    text = text.strip()
+    if not text:
+        return True
+    cfg = _get_config(config)
+    wire = protocol.encode_speak(text, interrupt=interrupt, voice=voice)
+    # Fast path — daemon already up.
+    if _send(wire, cfg, wait_ack=blocking):
+        return True
+    # Recovery — bring the daemon up, retry once.
+    if ensure_daemon(cfg) and _send(wire, cfg, wait_ack=blocking):
+        return True
+    # Last resort — fallback engine.
+    _log_fallback(cfg, "daemon down after spawn attempt")
+    _fallback_speak(text, interrupt, cfg)
+    return False
+# ── CLI ─────────────────────────────────────────────────────────────────────
+def main(argv: list[str] | None = None) -> int:
+    from . import __version__
+    parser = argparse.ArgumentParser(
+        prog="speak",
+        description="Send text to the speakd narration daemon.",
+        epilog="With no TEXT arguments, text is read from stdin (pipe-friendly).",
+    )
+    parser.add_argument("text", nargs="*", help="text to speak")
+    parser.add_argument("-i", "--interrupt", action="store_true",
+                        help="cut off current speech and drain the queue first")
+    parser.add_argument("-b", "--blocking", action="store_true",
+                        help="wait until the line has been spoken")
+    parser.add_argument("--volume", type=int, metavar="N",
+                        help="set live playback volume (0-130) before speaking")
+    parser.add_argument("--voice", metavar="ID",
+                        help="per-request Kokoro voice id (e.g. af_heart, bf_emma)")
+    parser.add_argument("--ping", action="store_true",
+                        help="health-check the daemon and exit (prints ready/starting/down)")
+    parser.add_argument("--socket", metavar="PATH", help="Unix socket path override")
+    parser.add_argument("--config", metavar="PATH", help="TOML config file")
+    parser.add_argument("--version", action="version", version=f"speakd {__version__}")
+    args = parser.parse_args(argv)
+    cfg = load_config(args.config)
+    if args.socket:
+        cfg.socket_path = args.socket
+    if args.ping:
+        status = ping(config=cfg)
+        print(status)
+        return 0 if status == "ready" else 1
+    if args.volume is not None:
+        set_volume(args.volume, config=cfg)
+    text = " ".join(args.text)
+    if not text and not sys.stdin.isatty():
+        text = sys.stdin.read().strip()
+    if not text:
+        if args.volume is not None:
+            return 0  # volume-only invocation
+        parser.print_usage(sys.stderr)
+        return 2
+    delivered = speak(text, blocking=args.blocking, interrupt=args.interrupt,
+                      voice=args.voice, config=cfg)
+    return 0 if delivered else 1
+if __name__ == "__main__":
+    sys.exit(main())

speakd/config.py ADDED Viewed

@@ -0,0 +1,205 @@
+"""Configuration for speakd.
+Precedence (lowest to highest):
+    1. Built-in defaults (work out of the box on CPU)
+    2. TOML config file
+    3. ``SPEAKD_*`` environment variables
+    4. CLI flags (applied by the entry points)
+The config file is looked up in this order:
+    1. ``$SPEAKD_CONFIG``
+    2. ``$XDG_CONFIG_HOME/speakd/config.toml``
+       (default: ``~/.config/speakd/config.toml``)
+Missing files are fine — every key has a sane default.
+"""
+from __future__ import annotations
+import os
+import tempfile
+from dataclasses import dataclass, field, fields
+try:
+    import tomllib  # Python 3.11+
+except ModuleNotFoundError:  # pragma: no cover - Python 3.10
+    import tomli as tomllib  # type: ignore[no-redef]
+VALID_DEVICE_POLICIES = ("auto", "cpu", "gpu")
+def _as_bool(value: object) -> bool:
+    """Coerce a TOML/env value to bool. Accepts native bools and the usual
+    truthy/falsy strings (1/0, true/false, yes/no, on/off)."""
+    if isinstance(value, bool):
+        return value
+    return str(value).strip().lower() in ("1", "true", "yes", "on")
+def default_socket_path() -> str:
+    """Per-user socket path: ``$XDG_RUNTIME_DIR/speakd.sock`` when available,
+    otherwise a uid-suffixed path under the system temp dir."""
+    runtime_dir = os.environ.get("XDG_RUNTIME_DIR")
+    if runtime_dir and os.path.isdir(runtime_dir):
+        return os.path.join(runtime_dir, "speakd.sock")
+    return os.path.join(tempfile.gettempdir(), f"speakd-{os.getuid()}.sock")
+def default_state_dir() -> str:
+    """``$XDG_STATE_HOME/speakd`` (default: ``~/.local/state/speakd``)."""
+    state_home = os.environ.get(
+        "XDG_STATE_HOME", os.path.join(os.path.expanduser("~"), ".local", "state")
+    )
+    return os.path.join(state_home, "speakd")
+def default_config_file() -> str:
+    config_home = os.environ.get(
+        "XDG_CONFIG_HOME", os.path.join(os.path.expanduser("~"), ".config")
+    )
+    return os.path.join(config_home, "speakd", "config.toml")
+@dataclass
+class Config:
+    """Effective speakd configuration. See ``config.example.toml`` for docs."""
+    # [tts]
+    voice: str = "af_heart"  # Kokoro voice id (af_heart, bf_emma, am_adam, ...)
+    speed: float = 1.0       # speech-rate multiplier
+    lang_code: str = "a"     # Kokoro language code ("a" = American English)
+    # [tts] (cont.)
+    markdown_preprocess: bool = True  # strip markdown / extract TTS_SUMMARY before synthesis
+    # [device]
+    device: str = "auto"            # "cpu" | "gpu" | "auto" (dynamic offload)
+    keepalive_seconds: int = 180    # idle seconds before GPU -> CPU offload
+    # [daemon]
+    socket_path: str = field(default_factory=default_socket_path)
+    socket_mode: int = 0o600        # permissions applied to the socket file
+    always_interrupt: bool = False  # treat every request as an interrupt (latest-only)
+    log_file: str = field(
+        default_factory=lambda: os.path.join(default_state_dir(), "daemon.log")
+    )
+    # [audio]
+    volume: int = 100                    # playback volume, 0-130 (mpv scale)
+    max_playback_seconds: int = 120      # kill the player after this long
+    player: list[str] = field(
+        default_factory=lambda: ["mpv", "--no-terminal", "--volume={volume}", "{file}"]
+    )
+    # [fallback] - argv template used when TTS fails; [] disables the fallback
+    fallback: list[str] = field(
+        default_factory=lambda: ["espeak", "-s", "160", "-v", "en-us", "{text}"]
+    )
+    # [client]
+    connect_timeout: float = 0.5    # seconds to connect/send on the socket
+    ack_timeout: float = 300.0      # seconds to wait for the ack in blocking mode
+    spawn_wait: float = 4.0         # seconds to wait for an auto-spawned daemon
+    # Path of the TOML file this config was loaded from ("" if defaults only).
+    source_file: str = ""
+    @property
+    def lock_path(self) -> str:
+        """Singleton flock file, always derived from the socket path."""
+        return self.socket_path + ".lock"
+    @property
+    def fallback_log(self) -> str:
+        """Client-side log of fallback events, next to the daemon log."""
+        return os.path.join(os.path.dirname(self.log_file), "fallback.log")
+    def describe(self) -> str:
+        """Human-readable dump of the effective configuration."""
+        lines = [f"# effective speakd config (source: {self.source_file or 'defaults'})"]
+        for f in fields(self):
+            if f.name == "source_file":
+                continue
+            value = getattr(self, f.name)
+            if f.name == "socket_mode":
+                value = oct(value)
+            lines.append(f"{f.name} = {value!r}")
+        lines.append(f"lock_path = {self.lock_path!r}")
+        lines.append(f"fallback_log = {self.fallback_log!r}")
+        return "\n".join(lines)
+# (section, key, attribute, caster) - the full TOML surface.
+_FILE_KEYS = [
+    ("tts", "voice", "voice", str),
+    ("tts", "speed", "speed", float),
+    ("tts", "lang_code", "lang_code", str),
+    ("tts", "markdown_preprocess", "markdown_preprocess", _as_bool),
+    ("device", "policy", "device", str),
+    ("device", "keepalive_seconds", "keepalive_seconds", int),
+    ("daemon", "socket_path", "socket_path", str),
+    ("daemon", "socket_mode", "socket_mode", lambda v: int(str(v), 8)),
+    ("daemon", "always_interrupt", "always_interrupt", _as_bool),
+    ("daemon", "log_file", "log_file", str),
+    ("audio", "volume", "volume", int),
+    ("audio", "max_playback_seconds", "max_playback_seconds", int),
+    ("audio", "player", "player", lambda v: [str(a) for a in v]),
+    ("fallback", "command", "fallback", lambda v: [str(a) for a in v]),
+    ("client", "connect_timeout", "connect_timeout", float),
+    ("client", "ack_timeout", "ack_timeout", float),
+    ("client", "spawn_wait", "spawn_wait", float),
+]
+# Environment overrides for the headline knobs.
+_ENV_KEYS = [
+    ("SPEAKD_VOICE", "voice", str),
+    ("SPEAKD_SPEED", "speed", float),
+    ("SPEAKD_LANG", "lang_code", str),
+    ("SPEAKD_DEVICE", "device", str),
+    ("SPEAKD_KEEPALIVE", "keepalive_seconds", int),
+    ("SPEAKD_SOCKET", "socket_path", str),
+    ("SPEAKD_VOLUME", "volume", int),
+    ("SPEAKD_LOG_FILE", "log_file", str),
+    ("SPEAKD_MARKDOWN_PREPROCESS", "markdown_preprocess", _as_bool),
+    ("SPEAKD_ALWAYS_INTERRUPT", "always_interrupt", _as_bool),
+]
+def load_config(path: str | None = None) -> Config:
+    """Build the effective config: defaults -> TOML file -> environment.
+    ``path`` (or ``$SPEAKD_CONFIG``) names an explicit TOML file; an explicit
+    path that does not exist raises ``FileNotFoundError``. The default
+    XDG-location file is optional and silently skipped when absent.
+    """
+    cfg = Config()
+    explicit = path or os.environ.get("SPEAKD_CONFIG")
+    file = explicit or default_config_file()
+    if explicit and not os.path.exists(explicit):
+        raise FileNotFoundError(f"config file not found: {explicit}")
+    if os.path.exists(file):
+        with open(file, "rb") as fh:
+            data = tomllib.load(fh)
+        for section, key, attr, cast in _FILE_KEYS:
+            if section in data and key in data[section]:
+                try:
+                    setattr(cfg, attr, cast(data[section][key]))
+                except (TypeError, ValueError) as e:
+                    raise ValueError(f"bad value for [{section}] {key} in {file}: {e}") from e
+        cfg.source_file = file
+    for env, attr, cast in _ENV_KEYS:
+        raw = os.environ.get(env)
+        if raw is not None and raw != "":
+            try:
+                setattr(cfg, attr, cast(raw))
+            except ValueError as e:
+                raise ValueError(f"bad value for ${env}={raw!r}: {e}") from e
+    if cfg.device not in VALID_DEVICE_POLICIES:
+        raise ValueError(
+            f"device policy must be one of {VALID_DEVICE_POLICIES}, got {cfg.device!r}"
+        )
+    return cfg