PyPI - ttsforge - Versions diffs - 0.1.0__py3-none-any.whl - Mend

ttsforge 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

ttsforge/__init__.py +114 -0
ttsforge/_version.py +34 -0
ttsforge/audio_merge.py +180 -0
ttsforge/audio_player.py +473 -0
ttsforge/chapter_selection.py +75 -0
ttsforge/cli/__init__.py +73 -0
ttsforge/cli/commands_conversion.py +1927 -0
ttsforge/cli/commands_phonemes.py +1033 -0
ttsforge/cli/commands_utility.py +1389 -0
ttsforge/cli/helpers.py +76 -0
ttsforge/constants.py +164 -0
ttsforge/conversion.py +1090 -0
ttsforge/input_reader.py +408 -0
ttsforge/kokoro_lang.py +12 -0
ttsforge/kokoro_runner.py +125 -0
ttsforge/name_extractor.py +305 -0
ttsforge/phoneme_conversion.py +978 -0
ttsforge/phonemes.py +486 -0
ttsforge/ssmd_generator.py +422 -0
ttsforge/utils.py +785 -0
ttsforge/vocab/__init__.py +139 -0
ttsforge-0.1.0.dist-info/METADATA +659 -0
ttsforge-0.1.0.dist-info/RECORD +27 -0
ttsforge-0.1.0.dist-info/WHEEL +5 -0
ttsforge-0.1.0.dist-info/entry_points.txt +2 -0
ttsforge-0.1.0.dist-info/licenses/LICENSE +21 -0
ttsforge-0.1.0.dist-info/top_level.txt +1 -0

ttsforge/audio_player.py ADDED Viewed

@@ -0,0 +1,473 @@
+"""Audio streaming player for ttsforge using sounddevice.
+This module provides a continuous audio streaming player that can accept
+audio chunks and play them seamlessly without gaps.
+"""
+from __future__ import annotations
+import logging
+import queue
+import threading
+import time
+from collections.abc import Callable
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import TYPE_CHECKING, Any
+import numpy as np
+from .utils import atomic_write_json
+_LOGGER = logging.getLogger(__name__)
+if TYPE_CHECKING:
+    pass
+# Default sample rate for Kokoro models
+DEFAULT_SAMPLE_RATE = 24000
+def _import_sounddevice() -> Any:
+    try:
+        import sounddevice as sd
+    except ImportError as exc:
+        message = (
+            "Audio playback requires the optional dependency 'sounddevice'. "
+            "Install with: pip install ttsforge[audio] or pip install sounddevice."
+        )
+        raise RuntimeError(message) from exc
+    return sd
+@dataclass
+class PlaybackPosition:
+    """Represents the current playback position for resume functionality."""
+    file_path: str
+    chapter_index: int
+    segment_index: int
+    timestamp: float = field(default_factory=time.time)
+    def to_dict(self) -> dict[str, Any]:
+        """Convert to dictionary for JSON serialization."""
+        return {
+            "file_path": self.file_path,
+            "chapter_index": self.chapter_index,
+            "segment_index": self.segment_index,
+            "timestamp": self.timestamp,
+        }
+    @classmethod
+    def from_dict(cls, data: dict[str, Any]) -> PlaybackPosition:
+        """Create from dictionary."""
+        return cls(
+            file_path=data["file_path"],
+            chapter_index=data["chapter_index"],
+            segment_index=data["segment_index"],
+            timestamp=data.get("timestamp", time.time()),
+        )
+class StreamingAudioPlayer:
+    """
+    A continuous audio streaming player using sounddevice.
+    This player accepts audio chunks and plays them seamlessly using a
+    callback-based OutputStream. It handles buffering to prevent gaps
+    between chunks and supports pause/resume/stop functionality.
+    Example:
+        player = StreamingAudioPlayer(sample_rate=24000)
+        player.start()
+        for audio_chunk in audio_generator:
+            player.add_audio(audio_chunk)
+            if player.should_stop:
+                break
+        player.wait_until_done()
+        player.stop()
+    """
+    def __init__(
+        self,
+        sample_rate: int = DEFAULT_SAMPLE_RATE,
+        channels: int = 1,
+        buffer_size: int = 2048,
+        on_chunk_played: Callable[[int], None] | None = None,
+        max_buffer_seconds: float = 10.0,
+    ):
+        """
+        Initialize the streaming audio player.
+        Args:
+            sample_rate: Audio sample rate (default: 24000 for Kokoro)
+            channels: Number of audio channels (default: 1 for mono)
+            buffer_size: Size of audio buffer frames (default: 2048)
+            on_chunk_played: Optional callback when a chunk finishes playing
+            max_buffer_seconds: Max queued audio in seconds before blocking
+        """
+        self.sample_rate = sample_rate
+        self.channels = channels
+        self.buffer_size = buffer_size
+        self.on_chunk_played = on_chunk_played
+        self.max_buffer_seconds = max_buffer_seconds
+        max_samples = int(max_buffer_seconds * sample_rate)
+        self._max_buffer_samples = max(max_samples, buffer_size * 2)
+        self._max_queue_chunks = max(1, int(self._max_buffer_samples / buffer_size))
+        # Audio queue for buffering chunks
+        self._audio_queue: queue.Queue[np.ndarray | None] = queue.Queue(
+            maxsize=self._max_queue_chunks
+        )
+        self._queue_lock = threading.Lock()
+        self._queue_not_full = threading.Condition(self._queue_lock)
+        self._queued_samples = 0
+        # Current audio buffer being played
+        self._current_buffer: np.ndarray | None = None
+        self._buffer_position: int = 0
+        # Control flags
+        self._stream: Any | None = None
+        self._is_playing: bool = False
+        self._is_paused: bool = False
+        self._should_stop = threading.Event()
+        self._finished = threading.Event()
+        self._all_audio_added = threading.Event()
+        # Statistics
+        self._chunks_played = 0
+        self._total_samples_played = 0
+    @property
+    def is_playing(self) -> bool:
+        """Whether audio is currently playing."""
+        return self._is_playing and not self._is_paused
+    @property
+    def is_paused(self) -> bool:
+        """Whether playback is paused."""
+        return self._is_paused
+    @property
+    def should_stop(self) -> bool:
+        """Whether playback should stop (e.g., user pressed Ctrl+C)."""
+        return self._should_stop.is_set()
+    @property
+    def chunks_played(self) -> int:
+        """Number of audio chunks that have been played."""
+        return self._chunks_played
+    @property
+    def duration_played(self) -> float:
+        """Total duration of audio played in seconds."""
+        return self._total_samples_played / self.sample_rate
+    def _audio_callback(
+        self,
+        outdata: np.ndarray,
+        frames: int,
+        time_info: Any,
+        status: Any,
+    ) -> None:
+        """
+        Callback function called by sounddevice to fill the output buffer.
+        This runs in a separate thread and must be fast to avoid audio glitches.
+        """
+        if status:
+            # Log any stream errors (underflow, overflow)
+            pass
+        if self._should_stop.is_set() or self._is_paused:
+            # Fill with silence when stopped or paused
+            outdata.fill(0)
+            return
+        output_pos = 0
+        while output_pos < frames:
+            # If we have no current buffer, try to get one from the queue
+            if self._current_buffer is None or self._buffer_position >= len(
+                self._current_buffer
+            ):
+                try:
+                    self._current_buffer = self._audio_queue.get_nowait()
+                    self._buffer_position = 0
+                    if self._current_buffer is not None:
+                        with self._queue_not_full:
+                            self._queued_samples = max(
+                                0, self._queued_samples - len(self._current_buffer)
+                            )
+                            self._queue_not_full.notify_all()
+                    if self._current_buffer is None:
+                        # None signals end of audio
+                        outdata[output_pos:].fill(0)
+                        self._finished.set()
+                        return
+                    self._chunks_played += 1
+                    if self.on_chunk_played:
+                        self.on_chunk_played(self._chunks_played)
+                except queue.Empty:
+                    # No audio available, fill with silence
+                    outdata[output_pos:].fill(0)
+                    # Check if we're done
+                    if self._all_audio_added.is_set():
+                        self._finished.set()
+                    return
+            # Copy audio from buffer to output
+            available = len(self._current_buffer) - self._buffer_position
+            needed = frames - output_pos
+            to_copy = min(available, needed)
+            # Handle mono/stereo conversion if needed
+            audio_slice = self._current_buffer[
+                self._buffer_position : self._buffer_position + to_copy
+            ]
+            if self.channels == 1:
+                outdata[output_pos : output_pos + to_copy, 0] = audio_slice
+            else:
+                # Duplicate mono to all channels
+                for ch in range(self.channels):
+                    outdata[output_pos : output_pos + to_copy, ch] = audio_slice
+            self._buffer_position += to_copy
+            output_pos += to_copy
+            self._total_samples_played += to_copy
+    def start(self) -> None:
+        """Start the audio output stream."""
+        sd = _import_sounddevice()
+        if self._stream is not None:
+            return
+        self._should_stop.clear()
+        self._finished.clear()
+        self._all_audio_added.clear()
+        self._is_playing = True
+        stream = sd.OutputStream(
+            samplerate=self.sample_rate,
+            channels=self.channels,
+            dtype=np.float32,
+            blocksize=self.buffer_size,
+            callback=self._audio_callback,
+        )
+        self._stream = stream
+        stream.start()
+    def stop(self) -> None:
+        """Stop playback and close the stream."""
+        self._should_stop.set()
+        self._is_playing = False
+        self._current_buffer = None
+        self._buffer_position = 0
+        if self._stream is not None:
+            self._stream.stop()
+            self._stream.close()
+            self._stream = None
+        # Clear the queue
+        while not self._audio_queue.empty():
+            try:
+                self._audio_queue.get_nowait()
+            except queue.Empty:
+                break
+        with self._queue_not_full:
+            self._queued_samples = 0
+            self._queue_not_full.notify_all()
+        self._all_audio_added.set()
+        self._finished.set()
+    def pause(self) -> None:
+        """Pause playback."""
+        self._is_paused = True
+    def resume(self) -> None:
+        """Resume playback."""
+        self._is_paused = False
+    def toggle_pause(self) -> bool:
+        """Toggle pause state. Returns new pause state."""
+        self._is_paused = not self._is_paused
+        return self._is_paused
+    def add_audio(self, audio: np.ndarray) -> None:
+        """
+        Add an audio chunk to the playback queue.
+        Args:
+            audio: Audio samples as numpy array (float32)
+        """
+        # Ensure float32 format
+        if audio.dtype != np.float32:
+            audio = audio.astype(np.float32)
+        # Flatten if needed (handle potential 2D arrays)
+        if audio.ndim > 1:
+            audio = audio.flatten()
+        audio_len = len(audio)
+        with self._queue_not_full:
+            while self._queued_samples + audio_len > self._max_buffer_samples:
+                if self._should_stop.is_set():
+                    return
+                self._queue_not_full.wait(timeout=0.1)
+            if self._should_stop.is_set():
+                return
+            self._queued_samples += audio_len
+        while True:
+            try:
+                self._audio_queue.put(audio, timeout=0.1)
+                break
+            except queue.Full:
+                if self._should_stop.is_set():
+                    break
+        if self._should_stop.is_set():
+            with self._queue_not_full:
+                self._queued_samples = max(0, self._queued_samples - audio_len)
+                self._queue_not_full.notify_all()
+            return
+    def finish_adding(self) -> None:
+        """Signal that no more audio will be added."""
+        self._all_audio_added.set()
+        while True:
+            try:
+                self._audio_queue.put(None, timeout=0.1)
+                break
+            except queue.Full:
+                if self._should_stop.is_set():
+                    break
+    def wait_until_done(self, timeout: float | None = None) -> bool:
+        """
+        Wait until all audio has been played.
+        Args:
+            timeout: Maximum time to wait in seconds (None = wait forever)
+        Returns:
+            True if finished, False if timeout occurred
+        """
+        return self._finished.wait(timeout=timeout)
+    def request_stop(self) -> None:
+        """Request playback to stop (used for Ctrl+C handling)."""
+        self._should_stop.set()
+        self._all_audio_added.set()
+        self._finished.set()
+        with self._queue_not_full:
+            self._queue_not_full.notify_all()
+def save_playback_position(
+    position: PlaybackPosition, cache_dir: Path | None = None
+) -> None:
+    """
+    Save the current playback position for resume functionality.
+    Args:
+        position: PlaybackPosition to save
+        cache_dir: Directory to save to (default: ~/.cache/ttsforge)
+    """
+    from .utils import get_user_cache_path
+    if cache_dir is None:
+        cache_dir = get_user_cache_path()
+    position_file = cache_dir / "reading_position.json"
+    position_file.parent.mkdir(parents=True, exist_ok=True)
+    try:
+        atomic_write_json(
+            position_file, position.to_dict(), indent=2, ensure_ascii=True
+        )
+    except (OSError, TypeError, ValueError) as exc:
+        _LOGGER.debug("Failed to save playback position: %s", exc)
+def load_playback_position(
+    cache_dir: Path | None = None,
+) -> PlaybackPosition | None:
+    """
+    Load the saved playback position.
+    Args:
+        cache_dir: Directory to load from (default: ~/.cache/ttsforge)
+    Returns:
+        PlaybackPosition if found, None otherwise
+    """
+    import json
+    from .utils import get_user_cache_path
+    if cache_dir is None:
+        cache_dir = get_user_cache_path()
+    position_file = cache_dir / "reading_position.json"
+    if not position_file.exists():
+        return None
+    try:
+        with open(position_file, encoding="utf-8") as f:
+            data = json.load(f)
+        return PlaybackPosition.from_dict(data)
+    except (json.JSONDecodeError, KeyError, TypeError):
+        return None
+def clear_playback_position(cache_dir: Path | None = None) -> None:
+    """
+    Clear the saved playback position.
+    Args:
+        cache_dir: Directory containing the position file
+    """
+    from .utils import get_user_cache_path
+    if cache_dir is None:
+        cache_dir = get_user_cache_path()
+    position_file = cache_dir / "reading_position.json"
+    if position_file.exists():
+        position_file.unlink()
+def play_audio_blocking(
+    audio: np.ndarray, sample_rate: int = DEFAULT_SAMPLE_RATE
+) -> None:
+    """
+    Play audio and block until finished.
+    Simple utility for one-shot audio playback.
+    Args:
+        audio: Audio samples as numpy array
+        sample_rate: Sample rate (default: 24000)
+    """
+    sd = _import_sounddevice()
+    sd.play(audio, sample_rate)
+    sd.wait()

ttsforge/chapter_selection.py ADDED Viewed

@@ -0,0 +1,75 @@
+from __future__ import annotations
+def parse_chapter_selection(selection: str, total_chapters: int) -> list[int]:
+    """Parse chapter selection string into list of 0-based chapter indices.
+    Supports formats like:
+    - "3" -> [2] (single chapter, 1-based to 0-based)
+    - "1-5" -> [0, 1, 2, 3, 4] (range, inclusive)
+    - "3,5,7" -> [2, 4, 6] (comma-separated)
+    - "1-3,7,9-10" -> [0, 1, 2, 6, 8, 9] (mixed)
+    Args:
+        selection: Chapter selection string (1-based indexing)
+        total_chapters: Total number of chapters available
+    Returns:
+        List of 0-based chapter indices
+    Raises:
+        ValueError: If selection format is invalid or chapters out of range
+    """
+    indices: set[int] = set()
+    if selection.strip().lower() == "all":
+        return list(range(total_chapters))
+    for part in selection.split(","):
+        part = part.strip()
+        if not part:
+            continue
+        if "-" in part:
+            # Range: "1-5"
+            try:
+                start_str, end_str = part.split("-", 1)
+                start_str = start_str.strip()
+                end_str = end_str.strip()
+                if not start_str:
+                    raise ValueError(f"Invalid range format: {part}")
+                start = int(start_str)
+                end = int(end_str) if end_str else total_chapters
+            except ValueError as e:
+                raise ValueError(f"Invalid range format: {part}") from e
+            if start < 1 or end < 1:
+                raise ValueError(f"Chapter numbers must be >= 1: {part}")
+            if start > end:
+                raise ValueError(f"Invalid range (start > end): {part}")
+            if end > total_chapters:
+                raise ValueError(
+                    f"Chapter {end} exceeds total chapters ({total_chapters})"
+                )
+            # Convert to 0-based indices
+            for i in range(start - 1, end):
+                indices.add(i)
+        else:
+            # Single chapter: "3"
+            try:
+                chapter_num = int(part)
+            except ValueError as e:
+                raise ValueError(f"Invalid chapter number: {part}") from e
+            if chapter_num < 1:
+                raise ValueError(f"Chapter number must be >= 1: {chapter_num}")
+            if chapter_num > total_chapters:
+                raise ValueError(
+                    f"Chapter {chapter_num} exceeds total chapters ({total_chapters})"
+                )
+            # Convert to 0-based index
+            indices.add(chapter_num - 1)
+    return sorted(indices)

ttsforge/cli/__init__.py ADDED Viewed

@@ -0,0 +1,73 @@
+"""CLI interface for ttsforge - convert EPUB to audiobooks.
+This module serves as the main entry point for the ttsforge CLI, organizing
+commands into logical groups:
+- Conversion commands: convert, read, sample, list, info
+- Phoneme commands: phonemes export/convert/preview/info
+- Utility commands: voices, demo, download, config, extract-names, list-names
+"""
+from pathlib import Path
+from typing import Optional
+import click
+from ..constants import PROGRAM_NAME
+from .helpers import console, get_version
+# Import all command modules
+from .commands_conversion import convert, info, list_chapters, read, sample
+from .commands_phonemes import phonemes
+from .commands_utility import config, demo, download, extract_names, list_names, voices
+@click.group(invoke_without_command=True)
+@click.option("--version", is_flag=True, help="Show version and exit.")
+@click.option(
+    "--model",
+    type=click.Path(exists=True, path_type=Path),
+    default=None,
+    help="Path to custom kokoro.onnx model file.",
+)
+@click.option(
+    "--voices",
+    type=click.Path(exists=True, path_type=Path),
+    default=None,
+    help="Path to custom voices.bin file.",
+)
+@click.pass_context
+def main(
+    ctx: click.Context, version: bool, model: Path | None, voices: Path | None
+) -> None:
+    """ttsforge - Generate audiobooks from EPUB files with TTS."""
+    ctx.ensure_object(dict)
+    ctx.obj["model_path"] = model
+    ctx.obj["voices_path"] = voices
+    if version:
+        console.print(f"[bold]{PROGRAM_NAME}[/bold] version {get_version()}")
+        return
+    if ctx.invoked_subcommand is None:
+        click.echo(ctx.get_help())
+# Register all commands with the main group
+main.add_command(convert)
+main.add_command(list_chapters, name="list")
+main.add_command(info)
+main.add_command(sample)
+main.add_command(read)
+main.add_command(voices)
+main.add_command(demo)
+main.add_command(download)
+main.add_command(config)
+main.add_command(phonemes)
+main.add_command(extract_names)
+main.add_command(list_names)
+# Export main for backward compatibility
+__all__ = ["main"]
+if __name__ == "__main__":
+    main()