ttsforge 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,473 @@
1
+ """Audio streaming player for ttsforge using sounddevice.
2
+
3
+ This module provides a continuous audio streaming player that can accept
4
+ audio chunks and play them seamlessly without gaps.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import logging
10
+ import queue
11
+ import threading
12
+ import time
13
+ from collections.abc import Callable
14
+ from dataclasses import dataclass, field
15
+ from pathlib import Path
16
+ from typing import TYPE_CHECKING, Any
17
+
18
+ import numpy as np
19
+
20
+ from .utils import atomic_write_json
21
+
22
+ _LOGGER = logging.getLogger(__name__)
23
+
24
+ if TYPE_CHECKING:
25
+ pass
26
+
27
+ # Default sample rate for Kokoro models
28
+ DEFAULT_SAMPLE_RATE = 24000
29
+
30
+
31
+ def _import_sounddevice() -> Any:
32
+ try:
33
+ import sounddevice as sd
34
+ except ImportError as exc:
35
+ message = (
36
+ "Audio playback requires the optional dependency 'sounddevice'. "
37
+ "Install with: pip install ttsforge[audio] or pip install sounddevice."
38
+ )
39
+ raise RuntimeError(message) from exc
40
+ return sd
41
+
42
+
43
+ @dataclass
44
+ class PlaybackPosition:
45
+ """Represents the current playback position for resume functionality."""
46
+
47
+ file_path: str
48
+ chapter_index: int
49
+ segment_index: int
50
+ timestamp: float = field(default_factory=time.time)
51
+
52
+ def to_dict(self) -> dict[str, Any]:
53
+ """Convert to dictionary for JSON serialization."""
54
+ return {
55
+ "file_path": self.file_path,
56
+ "chapter_index": self.chapter_index,
57
+ "segment_index": self.segment_index,
58
+ "timestamp": self.timestamp,
59
+ }
60
+
61
+ @classmethod
62
+ def from_dict(cls, data: dict[str, Any]) -> PlaybackPosition:
63
+ """Create from dictionary."""
64
+ return cls(
65
+ file_path=data["file_path"],
66
+ chapter_index=data["chapter_index"],
67
+ segment_index=data["segment_index"],
68
+ timestamp=data.get("timestamp", time.time()),
69
+ )
70
+
71
+
72
+ class StreamingAudioPlayer:
73
+ """
74
+ A continuous audio streaming player using sounddevice.
75
+
76
+ This player accepts audio chunks and plays them seamlessly using a
77
+ callback-based OutputStream. It handles buffering to prevent gaps
78
+ between chunks and supports pause/resume/stop functionality.
79
+
80
+ Example:
81
+ player = StreamingAudioPlayer(sample_rate=24000)
82
+ player.start()
83
+
84
+ for audio_chunk in audio_generator:
85
+ player.add_audio(audio_chunk)
86
+ if player.should_stop:
87
+ break
88
+
89
+ player.wait_until_done()
90
+ player.stop()
91
+ """
92
+
93
+ def __init__(
94
+ self,
95
+ sample_rate: int = DEFAULT_SAMPLE_RATE,
96
+ channels: int = 1,
97
+ buffer_size: int = 2048,
98
+ on_chunk_played: Callable[[int], None] | None = None,
99
+ max_buffer_seconds: float = 10.0,
100
+ ):
101
+ """
102
+ Initialize the streaming audio player.
103
+
104
+ Args:
105
+ sample_rate: Audio sample rate (default: 24000 for Kokoro)
106
+ channels: Number of audio channels (default: 1 for mono)
107
+ buffer_size: Size of audio buffer frames (default: 2048)
108
+ on_chunk_played: Optional callback when a chunk finishes playing
109
+ max_buffer_seconds: Max queued audio in seconds before blocking
110
+ """
111
+ self.sample_rate = sample_rate
112
+ self.channels = channels
113
+ self.buffer_size = buffer_size
114
+ self.on_chunk_played = on_chunk_played
115
+ self.max_buffer_seconds = max_buffer_seconds
116
+
117
+ max_samples = int(max_buffer_seconds * sample_rate)
118
+ self._max_buffer_samples = max(max_samples, buffer_size * 2)
119
+ self._max_queue_chunks = max(1, int(self._max_buffer_samples / buffer_size))
120
+
121
+ # Audio queue for buffering chunks
122
+ self._audio_queue: queue.Queue[np.ndarray | None] = queue.Queue(
123
+ maxsize=self._max_queue_chunks
124
+ )
125
+ self._queue_lock = threading.Lock()
126
+ self._queue_not_full = threading.Condition(self._queue_lock)
127
+ self._queued_samples = 0
128
+
129
+ # Current audio buffer being played
130
+ self._current_buffer: np.ndarray | None = None
131
+ self._buffer_position: int = 0
132
+
133
+ # Control flags
134
+ self._stream: Any | None = None
135
+ self._is_playing: bool = False
136
+ self._is_paused: bool = False
137
+ self._should_stop = threading.Event()
138
+ self._finished = threading.Event()
139
+ self._all_audio_added = threading.Event()
140
+
141
+ # Statistics
142
+ self._chunks_played = 0
143
+ self._total_samples_played = 0
144
+
145
+ @property
146
+ def is_playing(self) -> bool:
147
+ """Whether audio is currently playing."""
148
+ return self._is_playing and not self._is_paused
149
+
150
+ @property
151
+ def is_paused(self) -> bool:
152
+ """Whether playback is paused."""
153
+ return self._is_paused
154
+
155
+ @property
156
+ def should_stop(self) -> bool:
157
+ """Whether playback should stop (e.g., user pressed Ctrl+C)."""
158
+ return self._should_stop.is_set()
159
+
160
+ @property
161
+ def chunks_played(self) -> int:
162
+ """Number of audio chunks that have been played."""
163
+ return self._chunks_played
164
+
165
+ @property
166
+ def duration_played(self) -> float:
167
+ """Total duration of audio played in seconds."""
168
+ return self._total_samples_played / self.sample_rate
169
+
170
+ def _audio_callback(
171
+ self,
172
+ outdata: np.ndarray,
173
+ frames: int,
174
+ time_info: Any,
175
+ status: Any,
176
+ ) -> None:
177
+ """
178
+ Callback function called by sounddevice to fill the output buffer.
179
+
180
+ This runs in a separate thread and must be fast to avoid audio glitches.
181
+ """
182
+ if status:
183
+ # Log any stream errors (underflow, overflow)
184
+ pass
185
+
186
+ if self._should_stop.is_set() or self._is_paused:
187
+ # Fill with silence when stopped or paused
188
+ outdata.fill(0)
189
+ return
190
+
191
+ output_pos = 0
192
+ while output_pos < frames:
193
+ # If we have no current buffer, try to get one from the queue
194
+ if self._current_buffer is None or self._buffer_position >= len(
195
+ self._current_buffer
196
+ ):
197
+ try:
198
+ self._current_buffer = self._audio_queue.get_nowait()
199
+ self._buffer_position = 0
200
+
201
+ if self._current_buffer is not None:
202
+ with self._queue_not_full:
203
+ self._queued_samples = max(
204
+ 0, self._queued_samples - len(self._current_buffer)
205
+ )
206
+ self._queue_not_full.notify_all()
207
+
208
+ if self._current_buffer is None:
209
+ # None signals end of audio
210
+ outdata[output_pos:].fill(0)
211
+ self._finished.set()
212
+ return
213
+
214
+ self._chunks_played += 1
215
+ if self.on_chunk_played:
216
+ self.on_chunk_played(self._chunks_played)
217
+
218
+ except queue.Empty:
219
+ # No audio available, fill with silence
220
+ outdata[output_pos:].fill(0)
221
+
222
+ # Check if we're done
223
+ if self._all_audio_added.is_set():
224
+ self._finished.set()
225
+ return
226
+
227
+ # Copy audio from buffer to output
228
+ available = len(self._current_buffer) - self._buffer_position
229
+ needed = frames - output_pos
230
+ to_copy = min(available, needed)
231
+
232
+ # Handle mono/stereo conversion if needed
233
+ audio_slice = self._current_buffer[
234
+ self._buffer_position : self._buffer_position + to_copy
235
+ ]
236
+
237
+ if self.channels == 1:
238
+ outdata[output_pos : output_pos + to_copy, 0] = audio_slice
239
+ else:
240
+ # Duplicate mono to all channels
241
+ for ch in range(self.channels):
242
+ outdata[output_pos : output_pos + to_copy, ch] = audio_slice
243
+
244
+ self._buffer_position += to_copy
245
+ output_pos += to_copy
246
+ self._total_samples_played += to_copy
247
+
248
+ def start(self) -> None:
249
+ """Start the audio output stream."""
250
+ sd = _import_sounddevice()
251
+
252
+ if self._stream is not None:
253
+ return
254
+
255
+ self._should_stop.clear()
256
+ self._finished.clear()
257
+ self._all_audio_added.clear()
258
+ self._is_playing = True
259
+
260
+ stream = sd.OutputStream(
261
+ samplerate=self.sample_rate,
262
+ channels=self.channels,
263
+ dtype=np.float32,
264
+ blocksize=self.buffer_size,
265
+ callback=self._audio_callback,
266
+ )
267
+ self._stream = stream
268
+ stream.start()
269
+
270
+ def stop(self) -> None:
271
+ """Stop playback and close the stream."""
272
+ self._should_stop.set()
273
+ self._is_playing = False
274
+ self._current_buffer = None
275
+ self._buffer_position = 0
276
+
277
+ if self._stream is not None:
278
+ self._stream.stop()
279
+ self._stream.close()
280
+ self._stream = None
281
+
282
+ # Clear the queue
283
+ while not self._audio_queue.empty():
284
+ try:
285
+ self._audio_queue.get_nowait()
286
+ except queue.Empty:
287
+ break
288
+
289
+ with self._queue_not_full:
290
+ self._queued_samples = 0
291
+ self._queue_not_full.notify_all()
292
+
293
+ self._all_audio_added.set()
294
+ self._finished.set()
295
+
296
+ def pause(self) -> None:
297
+ """Pause playback."""
298
+ self._is_paused = True
299
+
300
+ def resume(self) -> None:
301
+ """Resume playback."""
302
+ self._is_paused = False
303
+
304
+ def toggle_pause(self) -> bool:
305
+ """Toggle pause state. Returns new pause state."""
306
+ self._is_paused = not self._is_paused
307
+ return self._is_paused
308
+
309
+ def add_audio(self, audio: np.ndarray) -> None:
310
+ """
311
+ Add an audio chunk to the playback queue.
312
+
313
+ Args:
314
+ audio: Audio samples as numpy array (float32)
315
+ """
316
+ # Ensure float32 format
317
+ if audio.dtype != np.float32:
318
+ audio = audio.astype(np.float32)
319
+
320
+ # Flatten if needed (handle potential 2D arrays)
321
+ if audio.ndim > 1:
322
+ audio = audio.flatten()
323
+
324
+ audio_len = len(audio)
325
+ with self._queue_not_full:
326
+ while self._queued_samples + audio_len > self._max_buffer_samples:
327
+ if self._should_stop.is_set():
328
+ return
329
+ self._queue_not_full.wait(timeout=0.1)
330
+
331
+ if self._should_stop.is_set():
332
+ return
333
+
334
+ self._queued_samples += audio_len
335
+
336
+ while True:
337
+ try:
338
+ self._audio_queue.put(audio, timeout=0.1)
339
+ break
340
+ except queue.Full:
341
+ if self._should_stop.is_set():
342
+ break
343
+
344
+ if self._should_stop.is_set():
345
+ with self._queue_not_full:
346
+ self._queued_samples = max(0, self._queued_samples - audio_len)
347
+ self._queue_not_full.notify_all()
348
+ return
349
+
350
+ def finish_adding(self) -> None:
351
+ """Signal that no more audio will be added."""
352
+ self._all_audio_added.set()
353
+ while True:
354
+ try:
355
+ self._audio_queue.put(None, timeout=0.1)
356
+ break
357
+ except queue.Full:
358
+ if self._should_stop.is_set():
359
+ break
360
+
361
+ def wait_until_done(self, timeout: float | None = None) -> bool:
362
+ """
363
+ Wait until all audio has been played.
364
+
365
+ Args:
366
+ timeout: Maximum time to wait in seconds (None = wait forever)
367
+
368
+ Returns:
369
+ True if finished, False if timeout occurred
370
+ """
371
+ return self._finished.wait(timeout=timeout)
372
+
373
+ def request_stop(self) -> None:
374
+ """Request playback to stop (used for Ctrl+C handling)."""
375
+ self._should_stop.set()
376
+ self._all_audio_added.set()
377
+ self._finished.set()
378
+ with self._queue_not_full:
379
+ self._queue_not_full.notify_all()
380
+
381
+
382
+ def save_playback_position(
383
+ position: PlaybackPosition, cache_dir: Path | None = None
384
+ ) -> None:
385
+ """
386
+ Save the current playback position for resume functionality.
387
+
388
+ Args:
389
+ position: PlaybackPosition to save
390
+ cache_dir: Directory to save to (default: ~/.cache/ttsforge)
391
+ """
392
+ from .utils import get_user_cache_path
393
+
394
+ if cache_dir is None:
395
+ cache_dir = get_user_cache_path()
396
+
397
+ position_file = cache_dir / "reading_position.json"
398
+ position_file.parent.mkdir(parents=True, exist_ok=True)
399
+
400
+ try:
401
+ atomic_write_json(
402
+ position_file, position.to_dict(), indent=2, ensure_ascii=True
403
+ )
404
+ except (OSError, TypeError, ValueError) as exc:
405
+ _LOGGER.debug("Failed to save playback position: %s", exc)
406
+
407
+
408
+ def load_playback_position(
409
+ cache_dir: Path | None = None,
410
+ ) -> PlaybackPosition | None:
411
+ """
412
+ Load the saved playback position.
413
+
414
+ Args:
415
+ cache_dir: Directory to load from (default: ~/.cache/ttsforge)
416
+
417
+ Returns:
418
+ PlaybackPosition if found, None otherwise
419
+ """
420
+ import json
421
+
422
+ from .utils import get_user_cache_path
423
+
424
+ if cache_dir is None:
425
+ cache_dir = get_user_cache_path()
426
+
427
+ position_file = cache_dir / "reading_position.json"
428
+
429
+ if not position_file.exists():
430
+ return None
431
+
432
+ try:
433
+ with open(position_file, encoding="utf-8") as f:
434
+ data = json.load(f)
435
+ return PlaybackPosition.from_dict(data)
436
+ except (json.JSONDecodeError, KeyError, TypeError):
437
+ return None
438
+
439
+
440
+ def clear_playback_position(cache_dir: Path | None = None) -> None:
441
+ """
442
+ Clear the saved playback position.
443
+
444
+ Args:
445
+ cache_dir: Directory containing the position file
446
+ """
447
+ from .utils import get_user_cache_path
448
+
449
+ if cache_dir is None:
450
+ cache_dir = get_user_cache_path()
451
+
452
+ position_file = cache_dir / "reading_position.json"
453
+
454
+ if position_file.exists():
455
+ position_file.unlink()
456
+
457
+
458
+ def play_audio_blocking(
459
+ audio: np.ndarray, sample_rate: int = DEFAULT_SAMPLE_RATE
460
+ ) -> None:
461
+ """
462
+ Play audio and block until finished.
463
+
464
+ Simple utility for one-shot audio playback.
465
+
466
+ Args:
467
+ audio: Audio samples as numpy array
468
+ sample_rate: Sample rate (default: 24000)
469
+ """
470
+ sd = _import_sounddevice()
471
+
472
+ sd.play(audio, sample_rate)
473
+ sd.wait()
@@ -0,0 +1,75 @@
1
+ from __future__ import annotations
2
+
3
+
4
+ def parse_chapter_selection(selection: str, total_chapters: int) -> list[int]:
5
+ """Parse chapter selection string into list of 0-based chapter indices.
6
+
7
+ Supports formats like:
8
+ - "3" -> [2] (single chapter, 1-based to 0-based)
9
+ - "1-5" -> [0, 1, 2, 3, 4] (range, inclusive)
10
+ - "3,5,7" -> [2, 4, 6] (comma-separated)
11
+ - "1-3,7,9-10" -> [0, 1, 2, 6, 8, 9] (mixed)
12
+
13
+ Args:
14
+ selection: Chapter selection string (1-based indexing)
15
+ total_chapters: Total number of chapters available
16
+
17
+ Returns:
18
+ List of 0-based chapter indices
19
+
20
+ Raises:
21
+ ValueError: If selection format is invalid or chapters out of range
22
+ """
23
+ indices: set[int] = set()
24
+
25
+ if selection.strip().lower() == "all":
26
+ return list(range(total_chapters))
27
+
28
+ for part in selection.split(","):
29
+ part = part.strip()
30
+ if not part:
31
+ continue
32
+
33
+ if "-" in part:
34
+ # Range: "1-5"
35
+ try:
36
+ start_str, end_str = part.split("-", 1)
37
+ start_str = start_str.strip()
38
+ end_str = end_str.strip()
39
+ if not start_str:
40
+ raise ValueError(f"Invalid range format: {part}")
41
+ start = int(start_str)
42
+ end = int(end_str) if end_str else total_chapters
43
+ except ValueError as e:
44
+ raise ValueError(f"Invalid range format: {part}") from e
45
+
46
+ if start < 1 or end < 1:
47
+ raise ValueError(f"Chapter numbers must be >= 1: {part}")
48
+ if start > end:
49
+ raise ValueError(f"Invalid range (start > end): {part}")
50
+ if end > total_chapters:
51
+ raise ValueError(
52
+ f"Chapter {end} exceeds total chapters ({total_chapters})"
53
+ )
54
+
55
+ # Convert to 0-based indices
56
+ for i in range(start - 1, end):
57
+ indices.add(i)
58
+ else:
59
+ # Single chapter: "3"
60
+ try:
61
+ chapter_num = int(part)
62
+ except ValueError as e:
63
+ raise ValueError(f"Invalid chapter number: {part}") from e
64
+
65
+ if chapter_num < 1:
66
+ raise ValueError(f"Chapter number must be >= 1: {chapter_num}")
67
+ if chapter_num > total_chapters:
68
+ raise ValueError(
69
+ f"Chapter {chapter_num} exceeds total chapters ({total_chapters})"
70
+ )
71
+
72
+ # Convert to 0-based index
73
+ indices.add(chapter_num - 1)
74
+
75
+ return sorted(indices)
@@ -0,0 +1,73 @@
1
+ """CLI interface for ttsforge - convert EPUB to audiobooks.
2
+
3
+ This module serves as the main entry point for the ttsforge CLI, organizing
4
+ commands into logical groups:
5
+
6
+ - Conversion commands: convert, read, sample, list, info
7
+ - Phoneme commands: phonemes export/convert/preview/info
8
+ - Utility commands: voices, demo, download, config, extract-names, list-names
9
+ """
10
+
11
+ from pathlib import Path
12
+ from typing import Optional
13
+
14
+ import click
15
+
16
+ from ..constants import PROGRAM_NAME
17
+ from .helpers import console, get_version
18
+
19
+ # Import all command modules
20
+ from .commands_conversion import convert, info, list_chapters, read, sample
21
+ from .commands_phonemes import phonemes
22
+ from .commands_utility import config, demo, download, extract_names, list_names, voices
23
+
24
+
25
+ @click.group(invoke_without_command=True)
26
+ @click.option("--version", is_flag=True, help="Show version and exit.")
27
+ @click.option(
28
+ "--model",
29
+ type=click.Path(exists=True, path_type=Path),
30
+ default=None,
31
+ help="Path to custom kokoro.onnx model file.",
32
+ )
33
+ @click.option(
34
+ "--voices",
35
+ type=click.Path(exists=True, path_type=Path),
36
+ default=None,
37
+ help="Path to custom voices.bin file.",
38
+ )
39
+ @click.pass_context
40
+ def main(
41
+ ctx: click.Context, version: bool, model: Path | None, voices: Path | None
42
+ ) -> None:
43
+ """ttsforge - Generate audiobooks from EPUB files with TTS."""
44
+ ctx.ensure_object(dict)
45
+ ctx.obj["model_path"] = model
46
+ ctx.obj["voices_path"] = voices
47
+ if version:
48
+ console.print(f"[bold]{PROGRAM_NAME}[/bold] version {get_version()}")
49
+ return
50
+ if ctx.invoked_subcommand is None:
51
+ click.echo(ctx.get_help())
52
+
53
+
54
+ # Register all commands with the main group
55
+ main.add_command(convert)
56
+ main.add_command(list_chapters, name="list")
57
+ main.add_command(info)
58
+ main.add_command(sample)
59
+ main.add_command(read)
60
+ main.add_command(voices)
61
+ main.add_command(demo)
62
+ main.add_command(download)
63
+ main.add_command(config)
64
+ main.add_command(phonemes)
65
+ main.add_command(extract_names)
66
+ main.add_command(list_names)
67
+
68
+ # Export main for backward compatibility
69
+ __all__ = ["main"]
70
+
71
+
72
+ if __name__ == "__main__":
73
+ main()