ttsforge 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1927 @@
1
+ """Conversion commands for ttsforge CLI.
2
+
3
+ Commands for converting EPUB/text files to audiobooks:
4
+ - convert: Main EPUB to audiobook conversion
5
+ - list: List chapters in a file
6
+ - info: Show file metadata
7
+ - sample: Generate TTS samples
8
+ - read: Interactive read command
9
+ """
10
+
11
+ import re
12
+ import sys
13
+ import tempfile
14
+ from pathlib import Path
15
+ from types import FrameType
16
+ from typing import Literal, TypedDict, cast
17
+
18
+ import click
19
+ import numpy as np
20
+ from rich.panel import Panel
21
+ from rich.progress import (
22
+ BarColumn,
23
+ Progress,
24
+ SpinnerColumn,
25
+ TaskID,
26
+ TaskProgressColumn,
27
+ TextColumn,
28
+ TimeElapsedColumn,
29
+ TimeRemainingColumn,
30
+ )
31
+ from rich.prompt import Confirm
32
+ from rich.table import Table
33
+ from typing_extensions import NotRequired
34
+
35
+ from ..chapter_selection import parse_chapter_selection
36
+ from ..constants import (
37
+ LANGUAGE_DESCRIPTIONS,
38
+ SUPPORTED_OUTPUT_FORMATS,
39
+ VOICE_PREFIX_TO_LANG,
40
+ VOICES,
41
+ )
42
+ from ..conversion import (
43
+ Chapter,
44
+ ConversionOptions,
45
+ ConversionProgress,
46
+ TTSConverter,
47
+ detect_language_from_iso,
48
+ get_default_voice_for_language,
49
+ )
50
+ from ..utils import (
51
+ format_chapters_range,
52
+ format_filename_template,
53
+ format_size,
54
+ load_config,
55
+ resolve_conversion_defaults,
56
+ )
57
+ from .helpers import DEFAULT_SAMPLE_TEXT, console, parse_voice_parameter
58
+
59
+
60
+ class ContentItem(TypedDict):
61
+ title: str
62
+ text: str
63
+ index: int
64
+ page_number: NotRequired[int]
65
+
66
+
67
+ @click.command()
68
+ @click.argument("epub_file", type=click.Path(exists=True, path_type=Path))
69
+ @click.option(
70
+ "-o",
71
+ "--output",
72
+ type=click.Path(path_type=Path),
73
+ help="Output file path. Defaults to input filename with new extension.",
74
+ )
75
+ @click.option(
76
+ "-f",
77
+ "--format",
78
+ "output_format",
79
+ type=click.Choice(SUPPORTED_OUTPUT_FORMATS),
80
+ help="Output audio format.",
81
+ )
82
+ @click.option(
83
+ "-v",
84
+ "--voice",
85
+ type=click.Choice(VOICES),
86
+ help="Voice to use for TTS.",
87
+ )
88
+ @click.option(
89
+ "-l",
90
+ "--language",
91
+ type=click.Choice(list(LANGUAGE_DESCRIPTIONS.keys())),
92
+ help="Language code (a=American English, b=British English, etc.).",
93
+ )
94
+ @click.option(
95
+ "--lang",
96
+ type=str,
97
+ default=None,
98
+ help="Override language for phonemization (e.g., 'de', 'fr', 'en-us'). "
99
+ "By default, language is determined from the voice.",
100
+ )
101
+ @click.option(
102
+ "-s",
103
+ "--speed",
104
+ type=float,
105
+ help="Speech speed (0.5 to 2.0).",
106
+ )
107
+ @click.option(
108
+ "--gpu/--no-gpu",
109
+ "use_gpu",
110
+ default=None,
111
+ help="Enable/disable GPU acceleration.",
112
+ )
113
+ @click.option(
114
+ "--chapters",
115
+ type=str,
116
+ help="Chapters to convert (e.g., '1-5', '1,3,5', 'all').",
117
+ )
118
+ @click.option(
119
+ "--silence",
120
+ type=float,
121
+ help="Silence duration between chapters in seconds.",
122
+ )
123
+ @click.option(
124
+ "--pause-clause",
125
+ type=float,
126
+ default=None,
127
+ help="Pause after clauses in seconds (default: 0.25).",
128
+ )
129
+ @click.option(
130
+ "--pause-sentence",
131
+ type=float,
132
+ default=None,
133
+ help="Pause after sentences in seconds (default: 0.2).",
134
+ )
135
+ @click.option(
136
+ "--pause-paragraph",
137
+ type=float,
138
+ default=None,
139
+ help="Pause after paragraphs in seconds (default: 0.75).",
140
+ )
141
+ @click.option(
142
+ "--pause-variance",
143
+ type=float,
144
+ default=None,
145
+ help="Random variance added to pauses in seconds (default: 0.05).",
146
+ )
147
+ @click.option(
148
+ "--pause-mode",
149
+ type=str,
150
+ default=None,
151
+ help="Pause mode: 'tts', 'manual', or 'auto' (default: auto).",
152
+ )
153
+ @click.option(
154
+ "--announce-chapters/--no-announce-chapters",
155
+ "announce_chapters",
156
+ default=None,
157
+ help="Read chapter titles aloud before chapter content (default: enabled).",
158
+ )
159
+ @click.option(
160
+ "--chapter-pause",
161
+ type=float,
162
+ default=None,
163
+ help="Pause duration after chapter title announcement in seconds (default: 2.0).",
164
+ )
165
+ @click.option(
166
+ "--title",
167
+ type=str,
168
+ help="Title metadata for the audiobook.",
169
+ )
170
+ @click.option(
171
+ "--author",
172
+ type=str,
173
+ help="Author metadata for the audiobook.",
174
+ )
175
+ @click.option(
176
+ "--cover",
177
+ type=click.Path(exists=True, path_type=Path),
178
+ help="Cover image for m4b format.",
179
+ )
180
+ @click.option(
181
+ "-y",
182
+ "--yes",
183
+ is_flag=True,
184
+ help="Skip confirmation prompts.",
185
+ )
186
+ @click.option(
187
+ "--verbose",
188
+ is_flag=True,
189
+ help="Show detailed output.",
190
+ )
191
+ @click.option(
192
+ "--split-mode",
193
+ "split_mode",
194
+ type=click.Choice(["auto", "line", "paragraph", "sentence", "clause"]),
195
+ default=None,
196
+ help="Text splitting mode: auto, line, paragraph, sentence, clause.",
197
+ )
198
+ @click.option(
199
+ "--resume/--no-resume",
200
+ "resume",
201
+ default=True,
202
+ help="Enable/disable resume capability (default: enabled).",
203
+ )
204
+ @click.option(
205
+ "--generate-ssmd",
206
+ "generate_ssmd_only",
207
+ is_flag=True,
208
+ help="Generate only SSMD files without creating audio (for manual editing).",
209
+ )
210
+ @click.option(
211
+ "--detect-emphasis/--no-detect-emphasis",
212
+ "detect_emphasis",
213
+ default=False,
214
+ help=(
215
+ "Detect emphasis (italic/bold) from HTML tags in EPUB files "
216
+ "(default: disabled)."
217
+ ),
218
+ )
219
+ @click.option(
220
+ "--fresh",
221
+ is_flag=True,
222
+ help="Discard any previous progress and start conversion from scratch.",
223
+ )
224
+ @click.option(
225
+ "--keep-chapters",
226
+ "keep_chapter_files",
227
+ is_flag=True,
228
+ help="Keep individual chapter audio files after conversion.",
229
+ )
230
+ @click.option(
231
+ "--voice-blend",
232
+ "voice_blend",
233
+ type=str,
234
+ help="Blend multiple voices (e.g., 'af_nicole:50,am_michael:50').",
235
+ )
236
+ @click.option(
237
+ "--voice-db",
238
+ "voice_database",
239
+ type=click.Path(exists=True, path_type=Path),
240
+ help="Path to custom voice database (SQLite).",
241
+ )
242
+ @click.option(
243
+ "--use-mixed-language",
244
+ "use_mixed_language",
245
+ is_flag=True,
246
+ help="Enable mixed-language support (auto-detect multiple languages in text).",
247
+ )
248
+ @click.option(
249
+ "--mixed-language-primary",
250
+ "mixed_language_primary",
251
+ type=str,
252
+ help="Primary language for mixed-language mode (e.g., 'de', 'en-us').",
253
+ )
254
+ @click.option(
255
+ "--mixed-language-allowed",
256
+ "mixed_language_allowed",
257
+ type=str,
258
+ help="Comma-separated list of allowed languages (e.g., 'de,en-us').",
259
+ )
260
+ @click.option(
261
+ "--mixed-language-confidence",
262
+ "mixed_language_confidence",
263
+ type=float,
264
+ help=(
265
+ "Detection confidence threshold for mixed-language mode "
266
+ "(0.0-1.0, default: 0.7)."
267
+ ),
268
+ )
269
+ @click.option(
270
+ "--phoneme-dict",
271
+ "phoneme_dictionary_path",
272
+ type=click.Path(exists=True),
273
+ help="Path to custom phoneme dictionary JSON file for pronunciation overrides.",
274
+ )
275
+ @click.option(
276
+ "--phoneme-dict-case-sensitive",
277
+ "phoneme_dict_case_sensitive",
278
+ is_flag=True,
279
+ help="Make phoneme dictionary matching case-sensitive (default: case-insensitive).",
280
+ )
281
+ @click.pass_context
282
+ def convert( # noqa: C901
283
+ ctx: click.Context,
284
+ epub_file: Path,
285
+ output: Path | None,
286
+ output_format: str | None,
287
+ voice: str | None,
288
+ language: str | None,
289
+ lang: str | None,
290
+ speed: float | None,
291
+ use_gpu: bool | None,
292
+ chapters: str | None,
293
+ silence: float | None,
294
+ pause_clause: float | None,
295
+ pause_sentence: float | None,
296
+ pause_paragraph: float | None,
297
+ pause_variance: float | None,
298
+ pause_mode: str | None,
299
+ announce_chapters: bool | None,
300
+ chapter_pause: float | None,
301
+ title: str | None,
302
+ author: str | None,
303
+ cover: Path | None,
304
+ yes: bool,
305
+ verbose: bool,
306
+ split_mode: str | None,
307
+ resume: bool,
308
+ generate_ssmd_only: bool,
309
+ detect_emphasis: bool,
310
+ fresh: bool,
311
+ keep_chapter_files: bool,
312
+ voice_blend: str | None,
313
+ voice_database: Path | None,
314
+ use_mixed_language: bool,
315
+ mixed_language_primary: str | None,
316
+ mixed_language_allowed: str | None,
317
+ mixed_language_confidence: float | None,
318
+ phoneme_dictionary_path: str | None,
319
+ phoneme_dict_case_sensitive: bool,
320
+ ) -> None:
321
+ """Convert an EPUB file to an audiobook.
322
+
323
+ EPUB_FILE is the path to the EPUB file to convert.
324
+ """
325
+ config = load_config()
326
+ model_path = ctx.obj.get("model_path") if ctx.obj else None
327
+ voices_path = ctx.obj.get("voices_path") if ctx.obj else None
328
+
329
+ # Get format first (needed for output path construction)
330
+ fmt = output_format or config.get("default_format", "m4b")
331
+
332
+ # Load chapters from input file
333
+ console.print(f"[bold]Loading:[/bold] {epub_file}")
334
+
335
+ from ..input_reader import InputReader
336
+
337
+ # Parse input file
338
+ try:
339
+ reader = InputReader(epub_file)
340
+ except Exception as e:
341
+ console.print(f"[red]Error loading file:[/red] {e}")
342
+ sys.exit(1)
343
+
344
+ # Get metadata
345
+ metadata = reader.get_metadata()
346
+ default_title = config.get("default_title", "Untitled")
347
+ epub_title = metadata.title or default_title
348
+ epub_author = metadata.authors[0] if metadata.authors else "Unknown"
349
+ epub_language = metadata.language
350
+
351
+ # Use CLI title/author if provided, otherwise use metadata
352
+ effective_title = title or epub_title
353
+ effective_author = author or epub_author
354
+
355
+ # Extract chapters
356
+ with console.status("Extracting chapters..."):
357
+ if detect_emphasis and reader.file_type == "epub":
358
+ # Get chapters with HTML for emphasis detection
359
+ chapters_with_html = reader.get_chapters_with_html()
360
+ epub_chapters = [ch for ch, _ in chapters_with_html]
361
+ html_contents = [html for _, html in chapters_with_html]
362
+ else:
363
+ # Just get plain text chapters
364
+ epub_chapters = reader.get_chapters()
365
+ html_contents = None
366
+
367
+ if not epub_chapters:
368
+ console.print("[red]Error:[/red] No chapters found in file.")
369
+ sys.exit(1)
370
+
371
+ console.print(f"[green]Found {len(epub_chapters)} chapters[/green]")
372
+
373
+ # Auto-detect language if not specified
374
+ if language is None:
375
+ if epub_language:
376
+ language = detect_language_from_iso(epub_language)
377
+ lang_desc = LANGUAGE_DESCRIPTIONS.get(language, language)
378
+ console.print(f"[dim]Auto-detected language: {lang_desc}[/dim]")
379
+ else:
380
+ language = config.get("default_language", "a")
381
+
382
+ # Get voice
383
+ if voice is None:
384
+ voice = config.get("default_voice")
385
+ # Ensure voice matches language
386
+ if voice and language:
387
+ voice_lang = VOICE_PREFIX_TO_LANG.get(voice[:2], "a")
388
+ if voice_lang != language:
389
+ voice = get_default_voice_for_language(language)
390
+ elif language:
391
+ voice = get_default_voice_for_language(language)
392
+ else:
393
+ voice = "af_heart"
394
+
395
+ # Ensure language has a default
396
+ if language is None:
397
+ language = "a"
398
+
399
+ # Chapter selection
400
+ selected_indices: list[int] | None = None
401
+ if chapters:
402
+ try:
403
+ selected_indices = parse_chapter_selection(chapters, len(epub_chapters))
404
+ except ValueError as exc:
405
+ console.print(f"[yellow]{exc}[/yellow]")
406
+ sys.exit(1)
407
+ elif not yes:
408
+ selected_indices = _interactive_chapter_selection(epub_chapters)
409
+
410
+ if selected_indices is not None and len(selected_indices) == 0:
411
+ console.print("[yellow]No chapters selected. Exiting.[/yellow]")
412
+ return
413
+
414
+ # Determine output path using filename template
415
+ if output is None:
416
+ output_template = config.get("output_filename_template", "{book_title}")
417
+ chapters_range = format_chapters_range(
418
+ selected_indices or list(range(len(epub_chapters))), len(epub_chapters)
419
+ )
420
+ output_filename = format_filename_template(
421
+ output_template,
422
+ book_title=effective_title,
423
+ author=effective_author,
424
+ input_stem=epub_file.stem,
425
+ chapters_range=chapters_range,
426
+ default_title=default_title,
427
+ )
428
+ # Append chapters range to filename if partial selection
429
+ if chapters_range:
430
+ output_filename = f"{output_filename}_{chapters_range}"
431
+ output = epub_file.parent / f"{output_filename}.{fmt}"
432
+ elif output.is_dir():
433
+ # If output is a directory, construct filename using template
434
+ output_template = config.get("output_filename_template", "{book_title}")
435
+ chapters_range = format_chapters_range(
436
+ selected_indices or list(range(len(epub_chapters))), len(epub_chapters)
437
+ )
438
+ output_filename = format_filename_template(
439
+ output_template,
440
+ book_title=effective_title,
441
+ author=effective_author,
442
+ input_stem=epub_file.stem,
443
+ chapters_range=chapters_range,
444
+ default_title=default_title,
445
+ )
446
+ if chapters_range:
447
+ output_filename = f"{output_filename}_{chapters_range}"
448
+ output = output / f"{output_filename}.{fmt}"
449
+
450
+ # Get format from output extension if not specified
451
+ if output_format is None:
452
+ output_format = output.suffix.lstrip(".") or config.get("default_format", "m4b")
453
+
454
+ # Parse mixed_language_allowed from comma-separated string
455
+ parsed_mixed_language_allowed = None
456
+ if mixed_language_allowed:
457
+ parsed_mixed_language_allowed = [
458
+ lang.strip() for lang in mixed_language_allowed.split(",")
459
+ ]
460
+
461
+ # Show conversion summary
462
+ _show_conversion_summary(
463
+ epub_file=epub_file,
464
+ output=output,
465
+ output_format=output_format or config.get("default_format", "m4b"),
466
+ voice=voice or "af_bella",
467
+ language=language or "a",
468
+ speed=speed or config.get("default_speed", 1.0),
469
+ use_gpu=use_gpu if use_gpu is not None else config.get("use_gpu", False),
470
+ num_chapters=len(selected_indices) if selected_indices else len(epub_chapters),
471
+ title=effective_title,
472
+ author=effective_author,
473
+ lang=lang,
474
+ use_mixed_language=use_mixed_language
475
+ or config.get("use_mixed_language", False),
476
+ mixed_language_primary=mixed_language_primary
477
+ or config.get("mixed_language_primary"),
478
+ mixed_language_allowed=parsed_mixed_language_allowed
479
+ or config.get("mixed_language_allowed"),
480
+ mixed_language_confidence=mixed_language_confidence
481
+ if mixed_language_confidence is not None
482
+ else config.get("mixed_language_confidence", 0.7),
483
+ )
484
+
485
+ # Confirm
486
+ if not yes:
487
+ if not Confirm.ask("Proceed with conversion?"):
488
+ console.print("[yellow]Cancelled.[/yellow]")
489
+ return
490
+
491
+ # Handle --fresh flag: delete existing progress
492
+ if fresh:
493
+ import shutil
494
+
495
+ from ..utils import sanitize_filename
496
+
497
+ safe_book_title = sanitize_filename(effective_title)[:50]
498
+ work_dir = output.parent / f".{safe_book_title}_chapters"
499
+ if work_dir.exists():
500
+ console.print(f"[yellow]Removing previous progress:[/yellow] {work_dir}")
501
+ shutil.rmtree(work_dir)
502
+ # Fresh start means we don't try to resume
503
+ resume = False
504
+
505
+ # Create conversion options
506
+ options = ConversionOptions(
507
+ voice=voice or config.get("default_voice", "af_heart"),
508
+ language=language or config.get("default_language", "a"),
509
+ speed=speed or config.get("default_speed", 1.0),
510
+ output_format=output_format or config.get("default_format", "m4b"),
511
+ output_dir=output.parent,
512
+ use_gpu=use_gpu if use_gpu is not None else config.get("use_gpu", False),
513
+ silence_between_chapters=silence or config.get("silence_between_chapters", 2.0),
514
+ lang=lang or config.get("phonemization_lang"),
515
+ use_mixed_language=(
516
+ use_mixed_language or config.get("use_mixed_language", False)
517
+ ),
518
+ mixed_language_primary=(
519
+ mixed_language_primary or config.get("mixed_language_primary")
520
+ ),
521
+ mixed_language_allowed=(
522
+ parsed_mixed_language_allowed or config.get("mixed_language_allowed")
523
+ ),
524
+ mixed_language_confidence=(
525
+ mixed_language_confidence
526
+ if mixed_language_confidence is not None
527
+ else config.get("mixed_language_confidence", 0.7)
528
+ ),
529
+ phoneme_dictionary_path=(
530
+ phoneme_dictionary_path or config.get("phoneme_dictionary_path")
531
+ ),
532
+ phoneme_dict_case_sensitive=(
533
+ phoneme_dict_case_sensitive
534
+ or config.get("phoneme_dict_case_sensitive", False)
535
+ ),
536
+ pause_clause=(
537
+ pause_clause
538
+ if pause_clause is not None
539
+ else config.get("pause_clause", 0.25)
540
+ ),
541
+ pause_sentence=(
542
+ pause_sentence
543
+ if pause_sentence is not None
544
+ else config.get("pause_sentence", 0.2)
545
+ ),
546
+ pause_paragraph=(
547
+ pause_paragraph
548
+ if pause_paragraph is not None
549
+ else config.get("pause_paragraph", 0.75)
550
+ ),
551
+ pause_variance=(
552
+ pause_variance
553
+ if pause_variance is not None
554
+ else config.get("pause_variance", 0.05)
555
+ ),
556
+ pause_mode=(
557
+ pause_mode if pause_mode is not None else config.get("pause_mode", "auto")
558
+ ),
559
+ announce_chapters=(
560
+ announce_chapters
561
+ if announce_chapters is not None
562
+ else config.get("announce_chapters", True)
563
+ ),
564
+ chapter_pause_after_title=(
565
+ chapter_pause
566
+ if chapter_pause is not None
567
+ else config.get("chapter_pause_after_title", 2.0)
568
+ ),
569
+ split_mode=split_mode or config.get("default_split_mode", "auto"),
570
+ resume=resume,
571
+ keep_chapter_files=keep_chapter_files,
572
+ title=effective_title,
573
+ author=effective_author,
574
+ cover_image=cover,
575
+ voice_blend=voice_blend,
576
+ voice_database=voice_database,
577
+ chapter_filename_template=config.get(
578
+ "chapter_filename_template",
579
+ "{chapter_num:03d}_{book_title}_{chapter_title}",
580
+ ),
581
+ model_path=model_path,
582
+ voices_path=voices_path,
583
+ generate_ssmd_only=generate_ssmd_only,
584
+ detect_emphasis=detect_emphasis,
585
+ )
586
+
587
+ # Set up progress display
588
+ progress = Progress(
589
+ SpinnerColumn(),
590
+ TextColumn("[progress.description]{task.description}"),
591
+ BarColumn(),
592
+ TaskProgressColumn(),
593
+ TimeElapsedColumn(),
594
+ TimeRemainingColumn(),
595
+ console=console,
596
+ )
597
+
598
+ task_id: TaskID | None = None
599
+ current_chapter_text = ""
600
+
601
+ def progress_callback(prog: ConversionProgress) -> None:
602
+ nonlocal task_id, current_chapter_text
603
+ if task_id is not None:
604
+ progress.update(task_id, completed=prog.chars_processed)
605
+ ch = prog.current_chapter
606
+ total = prog.total_chapters
607
+ current_chapter_text = f"Chapter {ch}/{total}: {prog.chapter_name}"
608
+ progress.update(task_id, description=current_chapter_text[:50])
609
+
610
+ def log_callback(message: str, level: str) -> None:
611
+ if verbose:
612
+ if level == "error":
613
+ console.print(f"[red]{message}[/red]")
614
+ elif level == "warning":
615
+ console.print(f"[yellow]{message}[/yellow]")
616
+ else:
617
+ console.print(f"[dim]{message}[/dim]")
618
+
619
+ # Run conversion
620
+ converter = TTSConverter(
621
+ options=options,
622
+ progress_callback=progress_callback,
623
+ log_callback=log_callback,
624
+ )
625
+
626
+ # Calculate total characters for progress
627
+ total_chars = sum(
628
+ ch.char_count
629
+ for i, ch in enumerate(epub_chapters)
630
+ if selected_indices is None or i in selected_indices
631
+ )
632
+
633
+ # Filter chapters if selection provided
634
+ if selected_indices:
635
+ filtered_chapters = [
636
+ ch for i, ch in enumerate(epub_chapters) if i in selected_indices
637
+ ]
638
+ if html_contents:
639
+ filtered_html = [
640
+ html for i, html in enumerate(html_contents) if i in selected_indices
641
+ ]
642
+ else:
643
+ filtered_html = None
644
+ else:
645
+ filtered_chapters = epub_chapters
646
+ filtered_html = html_contents
647
+
648
+ # Convert input_reader.Chapter to conversion.Chapter
649
+ chapters_to_convert: list[Chapter] = []
650
+ for i, ch in enumerate(filtered_chapters):
651
+ html_content = filtered_html[i] if filtered_html else None
652
+ chapters_to_convert.append(
653
+ Chapter(
654
+ title=ch.title,
655
+ content=ch.text,
656
+ index=ch.index,
657
+ html_content=html_content,
658
+ is_ssmd=ch.is_ssmd,
659
+ )
660
+ )
661
+
662
+ with progress:
663
+ task_id = progress.add_task("Converting...", total=total_chars)
664
+
665
+ result = converter.convert_chapters_resumable(
666
+ chapters=chapters_to_convert,
667
+ output_path=output,
668
+ source_file=epub_file,
669
+ resume=resume,
670
+ )
671
+
672
+ progress.update(task_id, completed=total_chars)
673
+
674
+ # Show result
675
+ if result.success:
676
+ console.print()
677
+ if generate_ssmd_only:
678
+ console.print(
679
+ Panel(
680
+ f"[green]SSMD files generated in:[/green]\n{result.chapters_dir}",
681
+ title="[bold green]SSMD Generation Complete[/bold green]",
682
+ )
683
+ )
684
+ else:
685
+ console.print(
686
+ Panel(
687
+ f"[green]Audiobook saved to:[/green]\n{result.output_path}",
688
+ title="[bold green]Conversion Complete[/bold green]",
689
+ )
690
+ )
691
+ else:
692
+ console.print()
693
+ console.print(
694
+ Panel(
695
+ f"[red]{result.error_message}[/red]",
696
+ title="[bold red]Conversion Failed[/bold red]",
697
+ )
698
+ )
699
+ sys.exit(1)
700
+
701
+
702
+ @click.command("list")
703
+ @click.argument("epub_file", type=click.Path(exists=True, path_type=Path))
704
+ def list_chapters(epub_file: Path) -> None:
705
+ """List chapters in a file.
706
+
707
+ EPUB_FILE is the path to the file (EPUB, TXT, or SSMD).
708
+ """
709
+ from ..input_reader import InputReader
710
+
711
+ with console.status("Loading file..."):
712
+ try:
713
+ reader = InputReader(epub_file)
714
+ chapters = reader.get_chapters()
715
+ except Exception as e:
716
+ console.print(f"[red]Error:[/red] {e}")
717
+ sys.exit(1)
718
+
719
+ if not chapters:
720
+ console.print("[yellow]No chapters found in file.[/yellow]")
721
+ return
722
+
723
+ table = Table(title=f"Chapters in {epub_file.name}")
724
+ table.add_column("#", style="dim", width=4)
725
+ table.add_column("Title", style="bold")
726
+ table.add_column("Characters", justify="right")
727
+
728
+ total_chars = 0
729
+ for i, ch in enumerate(chapters, 1):
730
+ char_count = ch.char_count
731
+ total_chars += char_count
732
+ table.add_row(str(i), ch.title[:60], f"{char_count:,}")
733
+
734
+ console.print(table)
735
+ console.print(
736
+ f"\n[bold]Total:[/bold] {len(chapters)} chapters, {total_chars:,} characters"
737
+ )
738
+
739
+
740
+ @click.command()
741
+ @click.argument("epub_file", type=click.Path(exists=True, path_type=Path))
742
+ def info(epub_file: Path) -> None:
743
+ """Show metadata and information about a file.
744
+
745
+ EPUB_FILE is the path to the file (EPUB, TXT, or SSMD).
746
+ """
747
+ from ..input_reader import InputReader
748
+
749
+ # Parse file
750
+ with console.status("Loading file..."):
751
+ try:
752
+ reader = InputReader(epub_file)
753
+ metadata = reader.get_metadata()
754
+ chapters = reader.get_chapters()
755
+ except Exception as e:
756
+ console.print(f"[red]Error:[/red] {e}")
757
+ sys.exit(1)
758
+
759
+ total_chars = sum(ch.char_count for ch in chapters) if chapters else 0
760
+
761
+ # Display info
762
+ console.print(Panel(f"[bold]{epub_file.name}[/bold]", title="File Information"))
763
+
764
+ table = Table(show_header=False, box=None)
765
+ table.add_column("Field", style="bold")
766
+ table.add_column("Value")
767
+
768
+ if metadata:
769
+ if metadata.title:
770
+ table.add_row("Title", metadata.title)
771
+ if metadata.authors:
772
+ table.add_row("Author", ", ".join(metadata.authors))
773
+ if metadata.language:
774
+ lang = metadata.language
775
+ lang_desc = LANGUAGE_DESCRIPTIONS.get(detect_language_from_iso(lang), lang)
776
+ table.add_row("Language", f"{lang} ({lang_desc})")
777
+ if metadata.publisher:
778
+ table.add_row("Publisher", metadata.publisher)
779
+ if metadata.publication_year:
780
+ table.add_row("Year", str(metadata.publication_year))
781
+
782
+ table.add_row("Chapters", str(len(chapters)) if chapters else "0")
783
+ table.add_row("Characters", f"{total_chars:,}")
784
+ table.add_row("File Size", format_size(epub_file.stat().st_size))
785
+
786
+ console.print(table)
787
+
788
+
789
+ @click.command()
790
+ @click.argument("text", required=False, default=None)
791
+ @click.option(
792
+ "-o",
793
+ "--output",
794
+ type=click.Path(path_type=Path),
795
+ help="Output file path (default: ./sample.wav).",
796
+ )
797
+ @click.option(
798
+ "-f",
799
+ "--format",
800
+ "output_format",
801
+ type=click.Choice(SUPPORTED_OUTPUT_FORMATS),
802
+ default="wav",
803
+ help="Output audio format.",
804
+ )
805
+ @click.option(
806
+ "-v",
807
+ "--voice",
808
+ type=str,
809
+ help=(
810
+ "TTS voice to use or voice blend "
811
+ "(e.g., 'af_sky' or 'af_nicole:50,am_michael:50')."
812
+ ),
813
+ )
814
+ @click.option(
815
+ "-l",
816
+ "--language",
817
+ type=click.Choice(list(LANGUAGE_DESCRIPTIONS.keys())),
818
+ help="Language for TTS.",
819
+ )
820
+ @click.option(
821
+ "--lang",
822
+ type=str,
823
+ default=None,
824
+ help="Override language for phonemization (e.g., 'de', 'fr', 'en-us').",
825
+ )
826
+ @click.option("-s", "--speed", type=float, help="Speech speed (default: 1.0).")
827
+ @click.option(
828
+ "--gpu/--no-gpu",
829
+ "use_gpu",
830
+ default=None,
831
+ help="Use GPU acceleration if available.",
832
+ )
833
+ @click.option(
834
+ "--split-mode",
835
+ "split_mode",
836
+ type=click.Choice(["auto", "line", "paragraph", "sentence", "clause"]),
837
+ help="Text splitting mode for processing.",
838
+ )
839
+ @click.option("--verbose", is_flag=True, help="Show detailed output.")
840
+ @click.option(
841
+ "-p",
842
+ "--play",
843
+ "play_audio",
844
+ is_flag=True,
845
+ help="Play audio directly (also saves to file if -o specified).",
846
+ )
847
+ @click.option(
848
+ "--use-mixed-language",
849
+ "use_mixed_language",
850
+ is_flag=True,
851
+ help="Enable mixed-language support (auto-detect multiple languages in text).",
852
+ )
853
+ @click.option(
854
+ "--mixed-language-primary",
855
+ "mixed_language_primary",
856
+ type=str,
857
+ help="Primary language for mixed-language mode (e.g., 'de', 'en-us').",
858
+ )
859
+ @click.option(
860
+ "--mixed-language-allowed",
861
+ "mixed_language_allowed",
862
+ type=str,
863
+ help="Comma-separated list of allowed languages (e.g., 'de,en-us').",
864
+ )
865
+ @click.option(
866
+ "--mixed-language-confidence",
867
+ "mixed_language_confidence",
868
+ type=float,
869
+ help=(
870
+ "Detection confidence threshold for mixed-language mode "
871
+ "(0.0-1.0, default: 0.7)."
872
+ ),
873
+ )
874
+ @click.option(
875
+ "--phoneme-dict",
876
+ "phoneme_dictionary_path",
877
+ type=click.Path(exists=True),
878
+ help="Path to custom phoneme dictionary JSON file for pronunciation overrides.",
879
+ )
880
+ @click.option(
881
+ "--phoneme-dict-case-sensitive",
882
+ "phoneme_dict_case_sensitive",
883
+ is_flag=True,
884
+ help="Make phoneme dictionary matching case-sensitive (default: case-insensitive).",
885
+ )
886
+ @click.pass_context
887
+ def sample(
888
+ ctx: click.Context,
889
+ text: str | None,
890
+ output: Path | None,
891
+ output_format: str,
892
+ voice: str | None,
893
+ language: str | None,
894
+ lang: str | None,
895
+ speed: float | None,
896
+ use_gpu: bool | None,
897
+ split_mode: str | None,
898
+ play_audio: bool,
899
+ verbose: bool,
900
+ use_mixed_language: bool,
901
+ mixed_language_primary: str | None,
902
+ mixed_language_allowed: str | None,
903
+ mixed_language_confidence: float | None,
904
+ phoneme_dictionary_path: str | None,
905
+ phoneme_dict_case_sensitive: bool,
906
+ ) -> None:
907
+ """Generate a sample audio file to test TTS settings.
908
+
909
+ If no TEXT is provided, uses a default sample text.
910
+
911
+ Examples:
912
+
913
+ ttsforge sample
914
+
915
+ ttsforge sample "Hello, this is a test."
916
+
917
+ ttsforge sample --voice am_adam --speed 1.2 -o test.wav
918
+
919
+ ttsforge sample --play # Play directly without saving
920
+
921
+ ttsforge sample --play -o test.wav # Play and save to file
922
+ """
923
+
924
+ from ..conversion import ConversionOptions, TTSConverter
925
+
926
+ # Get model path from global context
927
+ model_path = ctx.obj.get("model_path") if ctx.obj else None
928
+ voices_path = ctx.obj.get("voices_path") if ctx.obj else None
929
+
930
+ # Use default text if none provided
931
+ sample_text = text or DEFAULT_SAMPLE_TEXT
932
+
933
+ # Handle output path for playback mode
934
+ temp_dir: str | None = None
935
+ save_output = output is not None or not play_audio
936
+
937
+ if play_audio and output is None:
938
+ # Create temp file for playback only
939
+ temp_dir = tempfile.mkdtemp()
940
+ output = Path(temp_dir) / "sample.wav"
941
+ output_format = "wav" # Force WAV for playback
942
+ elif output is None:
943
+ output = Path(f"./sample.{output_format}")
944
+ elif output.suffix == "":
945
+ # If no extension provided, add the format
946
+ output = output.with_suffix(f".{output_format}")
947
+
948
+ # Load config for defaults
949
+ user_config = load_config()
950
+ resolved_defaults = resolve_conversion_defaults(
951
+ user_config,
952
+ {
953
+ "voice": voice,
954
+ "language": language,
955
+ "speed": speed,
956
+ "split_mode": split_mode,
957
+ "use_gpu": use_gpu,
958
+ "lang": lang,
959
+ },
960
+ )
961
+
962
+ # Parse mixed_language_allowed from comma-separated string
963
+ parsed_mixed_language_allowed = None
964
+ if mixed_language_allowed:
965
+ parsed_mixed_language_allowed = [
966
+ lang_item.strip() for lang_item in mixed_language_allowed.split(",")
967
+ ]
968
+
969
+ # Auto-detect if voice is a blend
970
+ voice_value = resolved_defaults["voice"]
971
+ parsed_voice, parsed_voice_blend = parse_voice_parameter(voice_value)
972
+
973
+ # Build conversion options (use ConversionOptions defaults if not specified)
974
+ options = ConversionOptions(
975
+ voice=parsed_voice or "af_bella",
976
+ voice_blend=parsed_voice_blend,
977
+ language=resolved_defaults["language"],
978
+ speed=resolved_defaults["speed"],
979
+ output_format=output_format,
980
+ use_gpu=resolved_defaults["use_gpu"],
981
+ split_mode=resolved_defaults["split_mode"],
982
+ lang=resolved_defaults["lang"],
983
+ use_mixed_language=(
984
+ use_mixed_language or user_config.get("use_mixed_language", False)
985
+ ),
986
+ mixed_language_primary=(
987
+ mixed_language_primary or user_config.get("mixed_language_primary")
988
+ ),
989
+ mixed_language_allowed=(
990
+ parsed_mixed_language_allowed or user_config.get("mixed_language_allowed")
991
+ ),
992
+ mixed_language_confidence=(
993
+ mixed_language_confidence
994
+ if mixed_language_confidence is not None
995
+ else user_config.get("mixed_language_confidence", 0.7)
996
+ ),
997
+ phoneme_dictionary_path=(
998
+ phoneme_dictionary_path or user_config.get("phoneme_dictionary_path")
999
+ ),
1000
+ phoneme_dict_case_sensitive=(
1001
+ phoneme_dict_case_sensitive
1002
+ or user_config.get("phoneme_dict_case_sensitive", False)
1003
+ ),
1004
+ model_path=model_path,
1005
+ voices_path=voices_path,
1006
+ )
1007
+
1008
+ # Always show settings
1009
+ if options.voice_blend:
1010
+ console.print(f"[dim]Voice Blend:[/dim] {options.voice_blend}")
1011
+ else:
1012
+ console.print(f"[dim]Voice:[/dim] {options.voice}")
1013
+ lang_desc = LANGUAGE_DESCRIPTIONS.get(options.language, "Unknown")
1014
+ console.print(f"[dim]Language:[/dim] {options.language} ({lang_desc})")
1015
+ if options.lang:
1016
+ console.print(f"[dim]Phonemization Lang:[/dim] {options.lang} (override)")
1017
+ console.print(f"[dim]Speed:[/dim] {options.speed}")
1018
+ console.print(f"[dim]Format:[/dim] {options.output_format}")
1019
+ console.print(f"[dim]Split mode:[/dim] {options.split_mode}")
1020
+ console.print(f"[dim]GPU:[/dim] {'enabled' if options.use_gpu else 'disabled'}")
1021
+
1022
+ if verbose:
1023
+ text_preview = sample_text[:100]
1024
+ ellipsis = "..." if len(sample_text) > 100 else ""
1025
+ console.print(f"[dim]Text:[/dim] {text_preview}{ellipsis}")
1026
+ if save_output:
1027
+ console.print(f"[dim]Output:[/dim] {output}")
1028
+
1029
+ try:
1030
+ converter = TTSConverter(options)
1031
+
1032
+ with Progress(
1033
+ SpinnerColumn(),
1034
+ TextColumn("[progress.description]{task.description}"),
1035
+ console=console,
1036
+ transient=True,
1037
+ ) as progress:
1038
+ progress.add_task("Generating audio...", total=None)
1039
+ result = converter.convert_text(sample_text, output)
1040
+
1041
+ if result.success:
1042
+ # Handle playback if requested
1043
+ if play_audio:
1044
+ import sounddevice as sd
1045
+ import soundfile as sf
1046
+
1047
+ audio_data, sample_rate = sf.read(str(output))
1048
+ console.print("[dim]Playing audio...[/dim]")
1049
+ sd.play(audio_data, sample_rate)
1050
+ sd.wait()
1051
+ console.print("[green]Playback complete.[/green]")
1052
+
1053
+ # Report save location (if not temp file)
1054
+ if save_output:
1055
+ console.print(f"[green]Sample saved to:[/green] {output}")
1056
+
1057
+ # Cleanup temp file if needed
1058
+ if temp_dir is not None:
1059
+ import shutil
1060
+
1061
+ shutil.rmtree(temp_dir)
1062
+ else:
1063
+ console.print(f"[red]Error:[/red] {result.error_message}")
1064
+ raise SystemExit(1)
1065
+
1066
+ except Exception as e:
1067
+ console.print(f"[red]Error generating sample:[/red] {e}")
1068
+ if verbose:
1069
+ import traceback
1070
+
1071
+ console.print(traceback.format_exc())
1072
+ # Cleanup temp dir on error
1073
+ if temp_dir is not None:
1074
+ import shutil
1075
+
1076
+ shutil.rmtree(temp_dir, ignore_errors=True)
1077
+ raise SystemExit(1) from None
1078
+
1079
+
1080
+ def _interactive_chapter_selection(chapters: list) -> list[int] | None:
1081
+ """Interactive chapter selection using Rich."""
1082
+ console.print("\n[bold]Available Chapters:[/bold]")
1083
+
1084
+ table = Table(show_header=True)
1085
+ table.add_column("#", style="dim", width=4)
1086
+ table.add_column("Title")
1087
+ table.add_column("Chars", justify="right")
1088
+
1089
+ for i, ch in enumerate(chapters, 1):
1090
+ table.add_row(str(i), ch.title[:50], f"{ch.char_count:,}")
1091
+
1092
+ console.print(table)
1093
+
1094
+ console.print("\n[dim]Enter chapter selection:[/dim]")
1095
+ console.print("[dim] - 'all' for all chapters[/dim]")
1096
+ console.print("[dim] - '1-5' for range[/dim]")
1097
+ console.print("[dim] - '1,3,5' for specific chapters[/dim]")
1098
+ console.print("[dim] - Press Enter for all chapters[/dim]")
1099
+
1100
+ selection = console.input("\n[bold]Selection:[/bold] ").strip()
1101
+
1102
+ if not selection:
1103
+ return None # All chapters
1104
+
1105
+ try:
1106
+ return parse_chapter_selection(selection, len(chapters))
1107
+ except ValueError as exc:
1108
+ console.print(f"[yellow]{exc}[/yellow]")
1109
+ return []
1110
+
1111
+
1112
+ def _show_conversion_summary(
1113
+ epub_file: Path,
1114
+ output: Path,
1115
+ output_format: str,
1116
+ voice: str,
1117
+ language: str,
1118
+ speed: float,
1119
+ use_gpu: bool,
1120
+ num_chapters: int,
1121
+ title: str,
1122
+ author: str,
1123
+ lang: str | None = None,
1124
+ use_mixed_language: bool = False,
1125
+ mixed_language_primary: str | None = None,
1126
+ mixed_language_allowed: list[str] | None = None,
1127
+ mixed_language_confidence: float = 0.7,
1128
+ ) -> None:
1129
+ """Show conversion summary before starting."""
1130
+ console.print()
1131
+
1132
+ table = Table(title="Conversion Summary", show_header=False)
1133
+ table.add_column("Field", style="bold")
1134
+ table.add_column("Value")
1135
+
1136
+ table.add_row("Input", str(epub_file))
1137
+ table.add_row("Output", str(output))
1138
+ table.add_row("Format", output_format.upper())
1139
+ table.add_row("Chapters", str(num_chapters))
1140
+ table.add_row("Voice", voice)
1141
+ table.add_row("Language", LANGUAGE_DESCRIPTIONS.get(language, language))
1142
+ if lang:
1143
+ table.add_row("Phonemization Lang", f"{lang} (override)")
1144
+ if use_mixed_language:
1145
+ table.add_row("Mixed-Language", "Enabled")
1146
+ if mixed_language_primary:
1147
+ table.add_row(" Primary Lang", mixed_language_primary)
1148
+ if mixed_language_allowed:
1149
+ table.add_row(" Allowed Langs", ", ".join(mixed_language_allowed))
1150
+ table.add_row(" Confidence", f"{mixed_language_confidence:.2f}")
1151
+ table.add_row("Speed", f"{speed}x")
1152
+ table.add_row("GPU", "Enabled" if use_gpu else "Disabled")
1153
+ table.add_row("Title", title)
1154
+ table.add_row("Author", author)
1155
+
1156
+ console.print(table)
1157
+ console.print()
1158
+
1159
+
1160
+ @click.command()
1161
+ @click.argument(
1162
+ "input_file",
1163
+ type=click.Path(path_type=Path),
1164
+ required=False,
1165
+ default=None,
1166
+ )
1167
+ @click.option(
1168
+ "-v",
1169
+ "--voice",
1170
+ type=click.Choice(VOICES),
1171
+ help="TTS voice to use.",
1172
+ )
1173
+ @click.option(
1174
+ "-l",
1175
+ "--language",
1176
+ type=click.Choice(list(LANGUAGE_DESCRIPTIONS.keys())),
1177
+ help="Language for TTS.",
1178
+ )
1179
+ @click.option(
1180
+ "-s",
1181
+ "--speed",
1182
+ type=float,
1183
+ help="Speech speed (default: 1.0).",
1184
+ )
1185
+ @click.option(
1186
+ "--gpu/--no-gpu",
1187
+ "use_gpu",
1188
+ default=None,
1189
+ help="Use GPU acceleration if available.",
1190
+ )
1191
+ @click.option(
1192
+ "--mode",
1193
+ "content_mode",
1194
+ type=click.Choice(["chapters", "pages"]),
1195
+ default=None,
1196
+ help="Split content by chapters or pages (default: chapters).",
1197
+ )
1198
+ @click.option(
1199
+ "-c",
1200
+ "--chapters",
1201
+ type=str,
1202
+ help="Chapter selection (e.g., '1-5', '1,3,5', '3-'). Use with --mode chapters.",
1203
+ )
1204
+ @click.option(
1205
+ "-p",
1206
+ "--pages",
1207
+ type=str,
1208
+ help="Page selection (e.g., '1-50', '10,20,30'). Use with --mode pages.",
1209
+ )
1210
+ @click.option(
1211
+ "--start-chapter",
1212
+ type=int,
1213
+ help="Start from specific chapter number (1-indexed).",
1214
+ )
1215
+ @click.option(
1216
+ "--start-page",
1217
+ type=int,
1218
+ help="Start from specific page number (1-indexed).",
1219
+ )
1220
+ @click.option(
1221
+ "--page-size",
1222
+ type=int,
1223
+ default=None,
1224
+ help="Synthetic page size in characters (default: 2000). Only for --mode pages.",
1225
+ )
1226
+ @click.option(
1227
+ "--resume",
1228
+ is_flag=True,
1229
+ help="Resume from last saved position.",
1230
+ )
1231
+ @click.option(
1232
+ "--list",
1233
+ "list_content",
1234
+ is_flag=True,
1235
+ help="List chapters/pages and exit without reading.",
1236
+ )
1237
+ @click.option(
1238
+ "--split",
1239
+ "split_mode",
1240
+ type=click.Choice(["sentence", "paragraph"]),
1241
+ default=None,
1242
+ help="Text splitting mode: sentence (shorter) or paragraph (grouped).",
1243
+ )
1244
+ @click.option(
1245
+ "--pause-clause",
1246
+ type=float,
1247
+ default=None,
1248
+ help="Pause after clauses in seconds.",
1249
+ )
1250
+ @click.option(
1251
+ "--pause-sentence",
1252
+ type=float,
1253
+ default=None,
1254
+ help="Pause after sentences in seconds.",
1255
+ )
1256
+ @click.option(
1257
+ "--pause-paragraph",
1258
+ type=float,
1259
+ default=None,
1260
+ help="Pause after paragraphs in seconds.",
1261
+ )
1262
+ @click.option(
1263
+ "--pause-variance",
1264
+ type=float,
1265
+ default=None,
1266
+ help="Random variance added to pauses in seconds.",
1267
+ )
1268
+ @click.option(
1269
+ "--pause-mode",
1270
+ type=str,
1271
+ default=None,
1272
+ help="Trim leading/trailing silence from audio.",
1273
+ )
1274
+ @click.pass_context
1275
+ def read( # noqa: C901
1276
+ ctx: click.Context,
1277
+ input_file: Path | None,
1278
+ voice: str | None,
1279
+ language: str | None,
1280
+ speed: float | None,
1281
+ use_gpu: bool | None,
1282
+ content_mode: str | None,
1283
+ chapters: str | None,
1284
+ pages: str | None,
1285
+ start_chapter: int | None,
1286
+ start_page: int | None,
1287
+ page_size: int | None,
1288
+ resume: bool,
1289
+ list_content: bool,
1290
+ split_mode: str | None,
1291
+ pause_clause: float | None,
1292
+ pause_sentence: float | None,
1293
+ pause_paragraph: float | None,
1294
+ pause_variance: float | None,
1295
+ pause_mode: str | None,
1296
+ ) -> None:
1297
+ """Read an EPUB or text file aloud with streaming playback.
1298
+
1299
+ Streams audio in real-time without creating output files.
1300
+ Supports chapter/page selection, position saving, and resume.
1301
+
1302
+ \b
1303
+ Examples:
1304
+ ttsforge read book.epub
1305
+ ttsforge read book.epub --chapters "1-5"
1306
+ ttsforge read book.epub --mode pages --pages "1-50"
1307
+ ttsforge read book.epub --mode pages --start-page 10
1308
+ ttsforge read book.epub --start-chapter 3
1309
+ ttsforge read book.epub --resume
1310
+ ttsforge read book.epub --split sentence
1311
+ ttsforge read book.epub --list
1312
+ ttsforge read story.txt
1313
+ cat story.txt | ttsforge read -
1314
+
1315
+ \b
1316
+ Controls:
1317
+ Ctrl+C - Stop reading (position is saved for resume)
1318
+ """
1319
+ import random
1320
+ import signal
1321
+ import sys
1322
+ import time
1323
+
1324
+ from pykokoro import GenerationConfig, KokoroPipeline, PipelineConfig
1325
+ from pykokoro.onnx_backend import LANG_CODE_TO_ONNX, Kokoro
1326
+ from pykokoro.stages.audio_generation.onnx import OnnxAudioGenerationAdapter
1327
+ from pykokoro.stages.audio_postprocessing.onnx import OnnxAudioPostprocessingAdapter
1328
+ from pykokoro.stages.phoneme_processing.onnx import OnnxPhonemeProcessorAdapter
1329
+
1330
+ from ..audio_player import (
1331
+ PlaybackPosition,
1332
+ clear_playback_position,
1333
+ load_playback_position,
1334
+ save_playback_position,
1335
+ )
1336
+
1337
+ # Get model path from global context
1338
+ model_path = ctx.obj.get("model_path") if ctx.obj else None
1339
+ voices_path = ctx.obj.get("voices_path") if ctx.obj else None
1340
+
1341
+ # Load config for defaults
1342
+ config = load_config()
1343
+ resolved_defaults = resolve_conversion_defaults(
1344
+ config,
1345
+ {
1346
+ "voice": voice,
1347
+ "language": language,
1348
+ "speed": speed,
1349
+ "split_mode": split_mode,
1350
+ "use_gpu": use_gpu,
1351
+ "lang": None,
1352
+ },
1353
+ )
1354
+ effective_voice = resolved_defaults["voice"]
1355
+ effective_language = resolved_defaults["language"]
1356
+ effective_speed = resolved_defaults["speed"]
1357
+ effective_use_gpu = resolved_defaults["use_gpu"]
1358
+ # Content mode: chapters or pages
1359
+ effective_content_mode = content_mode or config.get(
1360
+ "default_content_mode", "chapters"
1361
+ )
1362
+ effective_page_size = page_size or config.get("default_page_size", 2000)
1363
+ # Use default_split_mode from config, map "auto" to "sentence" for streaming
1364
+ config_split_mode = resolved_defaults["split_mode"]
1365
+ # Map auto/clause/line to sentence for the read command
1366
+ if config_split_mode in ("auto", "clause", "line"):
1367
+ effective_split_mode = "sentence"
1368
+ else:
1369
+ effective_split_mode = config_split_mode
1370
+ # Pause settings
1371
+ effective_pause_clause = (
1372
+ pause_clause if pause_clause is not None else config.get("pause_clause", 0.25)
1373
+ )
1374
+ effective_pause_sentence = (
1375
+ pause_sentence
1376
+ if pause_sentence is not None
1377
+ else config.get("pause_sentence", 0.2)
1378
+ )
1379
+ effective_pause_paragraph = (
1380
+ pause_paragraph
1381
+ if pause_paragraph is not None
1382
+ else config.get("pause_paragraph", 0.75)
1383
+ )
1384
+ effective_pause_variance = (
1385
+ pause_variance
1386
+ if pause_variance is not None
1387
+ else config.get("pause_variance", 0.05)
1388
+ )
1389
+ effective_pause_mode = (
1390
+ pause_mode if pause_mode is not None else config.get("pause_mode", "auto")
1391
+ )
1392
+
1393
+ # Get language code for TTS
1394
+ espeak_lang = LANG_CODE_TO_ONNX.get(effective_language, "en-us")
1395
+
1396
+ # Validate conflicting options
1397
+ if effective_content_mode == "chapters" and (pages or start_page):
1398
+ console.print(
1399
+ "[yellow]Warning:[/yellow] --pages/--start-page ignored in chapters mode. "
1400
+ "Use --mode pages to read by pages."
1401
+ )
1402
+ if effective_content_mode == "pages" and (chapters or start_chapter):
1403
+ console.print(
1404
+ "[yellow]Warning:[/yellow] --chapters/--start-chapter ignored in "
1405
+ "pages mode. Use --mode chapters to read by chapters."
1406
+ )
1407
+
1408
+ # Handle stdin input
1409
+ content_data: list[ContentItem]
1410
+ if input_file is None or str(input_file) == "-":
1411
+ if sys.stdin.isatty():
1412
+ console.print(
1413
+ "[red]Error:[/red] No input provided. Provide a file or pipe text."
1414
+ )
1415
+ console.print("[dim]Usage: ttsforge read book.epub[/dim]")
1416
+ console.print("[dim] cat story.txt | ttsforge read -[/dim]")
1417
+ sys.exit(1)
1418
+
1419
+ # Read from stdin
1420
+ text_content = sys.stdin.read().strip()
1421
+ if not text_content:
1422
+ console.print("[red]Error:[/red] No text received from stdin.")
1423
+ sys.exit(1)
1424
+
1425
+ # Create a simple structure for stdin text
1426
+ content_data = [
1427
+ cast(ContentItem, {"title": "Text", "text": text_content, "index": 0})
1428
+ ]
1429
+ file_identifier = "stdin"
1430
+ content_label = "section" # Generic label for stdin
1431
+ else:
1432
+ # Validate file exists (removed exists=True from click.Path for stdin)
1433
+ if not input_file.exists():
1434
+ console.print(f"[red]Error:[/red] File not found: {input_file}")
1435
+ sys.exit(1)
1436
+
1437
+ file_identifier = str(input_file.resolve())
1438
+
1439
+ # Handle different file types using InputReader
1440
+ try:
1441
+ from ..input_reader import InputReader
1442
+
1443
+ reader = InputReader(input_file)
1444
+ metadata = reader.get_metadata()
1445
+ except Exception as e:
1446
+ console.print(f"[red]Error loading file:[/red] {e}")
1447
+ sys.exit(1)
1448
+
1449
+ # Show book info
1450
+ title = metadata.title or input_file.stem
1451
+ author = metadata.authors[0] if metadata.authors else "Unknown"
1452
+ console.print(f"[bold]{title}[/bold] by {author}")
1453
+
1454
+ # For EPUB files, check if we can use pages mode
1455
+ if input_file.suffix.lower() == ".epub":
1456
+ # Load content based on mode (chapters or pages)
1457
+ if effective_content_mode == "pages":
1458
+ try:
1459
+ from epub2text import EPUBParser
1460
+
1461
+ parser = EPUBParser(str(input_file))
1462
+ epub_pages = parser.get_pages(
1463
+ synthetic_page_size=effective_page_size
1464
+ )
1465
+ except Exception as e:
1466
+ console.print(f"[red]Error loading pages:[/red] {e}")
1467
+ sys.exit(1)
1468
+
1469
+ if not epub_pages:
1470
+ console.print("[red]Error:[/red] No pages found in EPUB file.")
1471
+ sys.exit(1)
1472
+
1473
+ # Check if using native or synthetic pages
1474
+ has_native = parser.has_page_list()
1475
+ page_type = "native" if has_native else "synthetic"
1476
+ console.print(f"[dim]{len(epub_pages)} pages ({page_type})[/dim]")
1477
+
1478
+ # Convert to our format
1479
+ content_data = [
1480
+ cast(
1481
+ ContentItem,
1482
+ {
1483
+ "title": f"Page {p.page_number}",
1484
+ "text": p.text,
1485
+ "index": i,
1486
+ "page_number": p.page_number,
1487
+ },
1488
+ )
1489
+ for i, p in enumerate(epub_pages)
1490
+ ]
1491
+ content_label = "page"
1492
+ else:
1493
+ # Default: chapters mode
1494
+ epub_chapters = reader.get_chapters()
1495
+
1496
+ if not epub_chapters:
1497
+ console.print("[red]Error:[/red] No chapters found in file.")
1498
+ sys.exit(1)
1499
+
1500
+ console.print(f"[dim]{len(epub_chapters)} chapters[/dim]")
1501
+
1502
+ # Convert to our format - remove chapter markers
1503
+ content_data = [
1504
+ cast(
1505
+ ContentItem,
1506
+ {
1507
+ "title": ch.title or f"Chapter {i + 1}",
1508
+ "text": re.sub(
1509
+ r"^\s*<<CHAPTER:[^>]*>>\s*\n*",
1510
+ "",
1511
+ ch.text,
1512
+ count=1,
1513
+ flags=re.MULTILINE,
1514
+ ),
1515
+ "index": i,
1516
+ },
1517
+ )
1518
+ for i, ch in enumerate(epub_chapters)
1519
+ ]
1520
+ content_label = "chapter"
1521
+
1522
+ elif input_file.suffix.lower() in (".txt", ".text", ".ssmd"):
1523
+ # Plain text file - use InputReader's chapters
1524
+ text_chapters = reader.get_chapters()
1525
+
1526
+ if not text_chapters:
1527
+ console.print("[red]Error:[/red] No content found in file.")
1528
+ sys.exit(1)
1529
+
1530
+ # If it's a single chapter, use it as-is
1531
+ # If multiple chapters detected, use them
1532
+ content_data = [
1533
+ cast(
1534
+ ContentItem,
1535
+ {
1536
+ "title": ch.title or input_file.stem,
1537
+ "text": ch.text,
1538
+ "index": i,
1539
+ },
1540
+ )
1541
+ for i, ch in enumerate(text_chapters)
1542
+ ]
1543
+ content_label = "chapter" if len(text_chapters) > 1 else "section"
1544
+ else:
1545
+ console.print(
1546
+ f"[red]Error:[/red] Unsupported file type: {input_file.suffix}"
1547
+ )
1548
+ console.print("[dim]Supported formats: .epub, .txt[/dim]")
1549
+ sys.exit(1)
1550
+
1551
+ # List content and exit if requested
1552
+ if list_content:
1553
+ console.print()
1554
+ for item in content_data:
1555
+ idx = item["index"] + 1
1556
+ item_title = item["title"]
1557
+ text_preview = item["text"][:80].replace("\n", " ").strip()
1558
+ if len(item["text"]) > 80:
1559
+ text_preview += "..."
1560
+ console.print(f"[bold]{idx:3}.[/bold] {item_title}")
1561
+ console.print(f" [dim]{text_preview}[/dim]")
1562
+ return
1563
+
1564
+ # Content selection (chapters or pages)
1565
+ selected_indices: list[int] | None = None
1566
+
1567
+ if effective_content_mode == "pages":
1568
+ # Page selection
1569
+ if pages:
1570
+ try:
1571
+ selected_indices = parse_chapter_selection(pages, len(content_data))
1572
+ except ValueError as exc:
1573
+ console.print(f"[yellow]{exc}[/yellow]")
1574
+ sys.exit(1)
1575
+ elif start_page:
1576
+ if start_page < 1 or start_page > len(content_data):
1577
+ console.print(
1578
+ f"[red]Error:[/red] Invalid page number {start_page}. "
1579
+ f"Valid range: 1-{len(content_data)}"
1580
+ )
1581
+ sys.exit(1)
1582
+ selected_indices = list(range(start_page - 1, len(content_data)))
1583
+ else:
1584
+ # Chapter selection
1585
+ if chapters:
1586
+ try:
1587
+ selected_indices = parse_chapter_selection(chapters, len(content_data))
1588
+ except ValueError as exc:
1589
+ console.print(f"[yellow]{exc}[/yellow]")
1590
+ sys.exit(1)
1591
+ elif start_chapter:
1592
+ if start_chapter < 1 or start_chapter > len(content_data):
1593
+ console.print(
1594
+ f"[red]Error:[/red] Invalid chapter number {start_chapter}. "
1595
+ f"Valid range: 1-{len(content_data)}"
1596
+ )
1597
+ sys.exit(1)
1598
+ selected_indices = list(range(start_chapter - 1, len(content_data)))
1599
+
1600
+ # Handle resume
1601
+ start_segment_index = 0
1602
+ if resume:
1603
+ saved_position = load_playback_position()
1604
+ if saved_position and saved_position.file_path == file_identifier:
1605
+ # Resume from saved position
1606
+ resume_index = saved_position.chapter_index
1607
+ start_segment_index = saved_position.segment_index
1608
+
1609
+ if selected_indices is None:
1610
+ selected_indices = list(range(resume_index, len(content_data)))
1611
+ else:
1612
+ # Filter to only include items from resume point
1613
+ selected_indices = [i for i in selected_indices if i >= resume_index]
1614
+
1615
+ console.print(
1616
+ f"[yellow]Resuming from {content_label} {resume_index + 1}, "
1617
+ f"segment {start_segment_index + 1}[/yellow]"
1618
+ )
1619
+ else:
1620
+ console.print(
1621
+ "[dim]No saved position found for this file, "
1622
+ "starting from beginning.[/dim]"
1623
+ )
1624
+
1625
+ # Final selection
1626
+ if selected_indices is None:
1627
+ selected_indices = list(range(len(content_data)))
1628
+
1629
+ if not selected_indices:
1630
+ console.print(f"[yellow]No {content_label}s to read.[/yellow]")
1631
+ return
1632
+
1633
+ console.print()
1634
+ lang_desc = LANGUAGE_DESCRIPTIONS.get(effective_language, effective_language)
1635
+ console.print(
1636
+ f"[dim]Voice: {effective_voice} | Language: {lang_desc} | "
1637
+ f"Speed: {effective_speed}x[/dim]"
1638
+ )
1639
+ console.print()
1640
+
1641
+ # Initialize TTS pipeline
1642
+ console.print("[dim]Loading TTS model...[/dim]")
1643
+ try:
1644
+ kokoro = Kokoro(
1645
+ model_path=model_path,
1646
+ voices_path=voices_path,
1647
+ use_gpu=effective_use_gpu,
1648
+ )
1649
+ generation = GenerationConfig(
1650
+ speed=effective_speed,
1651
+ lang=espeak_lang,
1652
+ pause_mode=cast(Literal["tts", "manual", "auto"], effective_pause_mode),
1653
+ pause_clause=effective_pause_clause,
1654
+ pause_sentence=effective_pause_sentence,
1655
+ pause_paragraph=effective_pause_paragraph,
1656
+ pause_variance=effective_pause_variance,
1657
+ )
1658
+ pipeline_config = PipelineConfig(
1659
+ voice=effective_voice,
1660
+ generation=generation,
1661
+ model_path=model_path,
1662
+ voices_path=voices_path,
1663
+ )
1664
+ pipeline = KokoroPipeline(
1665
+ pipeline_config,
1666
+ phoneme_processing=OnnxPhonemeProcessorAdapter(kokoro),
1667
+ audio_generation=OnnxAudioGenerationAdapter(kokoro),
1668
+ audio_postprocessing=OnnxAudioPostprocessingAdapter(kokoro),
1669
+ )
1670
+ except Exception as e:
1671
+ console.print(f"[red]Error initializing TTS:[/red] {e}")
1672
+ sys.exit(1)
1673
+
1674
+ # Track current position for saving
1675
+ current_content_idx = selected_indices[0]
1676
+ current_segment_idx = 0
1677
+ stop_requested = False
1678
+
1679
+ def signal_handler(signum: int, frame: FrameType | None) -> None:
1680
+ """Handle Ctrl+C gracefully."""
1681
+ nonlocal stop_requested
1682
+ console.print("\n[yellow]Stopping... (position saved)[/yellow]")
1683
+ stop_requested = True
1684
+
1685
+ # Set up signal handler
1686
+ original_handler = signal.signal(signal.SIGINT, signal_handler)
1687
+
1688
+ try:
1689
+ import concurrent.futures
1690
+
1691
+ import sounddevice as sd
1692
+
1693
+ # Create a thread pool for TTS generation (1 worker for lookahead)
1694
+ executor = concurrent.futures.ThreadPoolExecutor(max_workers=1)
1695
+
1696
+ def generate_audio(text_segment: str) -> tuple[np.ndarray, int]:
1697
+ """Generate audio for a text segment."""
1698
+ print(text_segment)
1699
+ result = pipeline.run(text_segment)
1700
+ return result.audio, result.sample_rate
1701
+
1702
+ # Collect all segments across content items with their metadata
1703
+ all_segments: list[
1704
+ tuple[int, int, str, str]
1705
+ ] = [] # (content_idx, seg_idx, text, display)
1706
+
1707
+ for content_position, content_idx in enumerate(selected_indices):
1708
+ content_item = content_data[content_idx]
1709
+ text = content_item["text"].strip()
1710
+ if not text:
1711
+ continue
1712
+
1713
+ segments = _split_text_into_segments(text, split_mode=effective_split_mode)
1714
+
1715
+ # Skip segments if resuming mid-content
1716
+ seg_offset = 0
1717
+ if content_position == 0 and start_segment_index > 0:
1718
+ segments = segments[start_segment_index:]
1719
+ seg_offset = start_segment_index
1720
+
1721
+ for seg_idx, segment in enumerate(segments):
1722
+ actual_seg_idx = seg_idx + seg_offset
1723
+ # Clean up text for display (normalize whitespace)
1724
+ display_text = " ".join(segment.split())
1725
+ all_segments.append(
1726
+ (content_idx, actual_seg_idx, segment, display_text)
1727
+ )
1728
+
1729
+ if not all_segments:
1730
+ console.print("[yellow]No text to read.[/yellow]")
1731
+ return
1732
+
1733
+ # Pre-generate first segment
1734
+ current_future = executor.submit(generate_audio, all_segments[0][2])
1735
+ next_future = None
1736
+
1737
+ last_content_idx = -1
1738
+
1739
+ for i, (content_idx, seg_idx, _segment_text, display_text) in enumerate(
1740
+ all_segments
1741
+ ):
1742
+ if stop_requested:
1743
+ break
1744
+
1745
+ current_content_idx = content_idx
1746
+ current_segment_idx = seg_idx
1747
+
1748
+ # Detect content change for paragraph pause
1749
+ content_changed = content_idx != last_content_idx
1750
+
1751
+ # Show header when content item changes
1752
+ if content_changed:
1753
+ content_item = content_data[content_idx]
1754
+ console.print()
1755
+ label = content_label.capitalize()
1756
+ console.print(
1757
+ f"[bold cyan]{label} {content_idx + 1}:[/bold cyan] "
1758
+ f"{content_item['title']}"
1759
+ )
1760
+ console.print("-" * 60)
1761
+ if last_content_idx == -1 and start_segment_index > 0:
1762
+ console.print(
1763
+ f"[dim](resuming from segment {start_segment_index + 1})[/dim]"
1764
+ )
1765
+ last_content_idx = content_idx
1766
+
1767
+ # Display current segment
1768
+ console.print(f"[dim]{display_text}[/dim]")
1769
+
1770
+ # Start generating next segment while we wait for current
1771
+ if i + 1 < len(all_segments):
1772
+ next_future = executor.submit(generate_audio, all_segments[i + 1][2])
1773
+
1774
+ # Wait for current audio to be ready
1775
+ try:
1776
+ audio, sample_rate = current_future.result(timeout=60)
1777
+ except Exception as e:
1778
+ console.print(f"[red]TTS error:[/red] {e}")
1779
+ # Move to next segment's future
1780
+ if next_future:
1781
+ current_future = next_future
1782
+ next_future = None
1783
+ continue
1784
+
1785
+ # Play audio
1786
+ if not stop_requested:
1787
+ sd.play(audio, sample_rate)
1788
+ sd.wait()
1789
+
1790
+ # Add pause after segment (if not the last segment)
1791
+ if i + 1 < len(all_segments) and not stop_requested:
1792
+ next_content_idx = all_segments[i + 1][0]
1793
+ if next_content_idx != content_idx:
1794
+ # Paragraph pause (between content items)
1795
+ pause = effective_pause_paragraph + random.uniform(
1796
+ -effective_pause_variance, effective_pause_variance
1797
+ )
1798
+ else:
1799
+ # Segment pause (within content item)
1800
+ pause = effective_pause_sentence + random.uniform(
1801
+ -effective_pause_variance, effective_pause_variance
1802
+ )
1803
+ time.sleep(max(0, pause)) # Ensure non-negative
1804
+
1805
+ # Swap futures: next becomes current
1806
+ if next_future:
1807
+ current_future = next_future
1808
+ next_future = None
1809
+
1810
+ executor.shutdown(wait=False)
1811
+
1812
+ # Finished
1813
+ if not stop_requested:
1814
+ # Clear saved position on successful completion
1815
+ clear_playback_position()
1816
+ console.print("\n[green]Finished reading.[/green]")
1817
+ else:
1818
+ # Save position for resume
1819
+ position = PlaybackPosition(
1820
+ file_path=file_identifier,
1821
+ chapter_index=current_content_idx,
1822
+ segment_index=current_segment_idx,
1823
+ )
1824
+ save_playback_position(position)
1825
+ label = content_label.capitalize()
1826
+ console.print(
1827
+ f"[dim]Position saved: {label} {current_content_idx + 1}, "
1828
+ f"Segment {current_segment_idx + 1}[/dim]"
1829
+ )
1830
+ console.print("[dim]Use --resume to continue from this position.[/dim]")
1831
+
1832
+ except Exception as e:
1833
+ console.print(f"[red]Error during playback:[/red] {e}")
1834
+ # Save position on error too
1835
+ position = PlaybackPosition(
1836
+ file_path=file_identifier,
1837
+ chapter_index=current_content_idx,
1838
+ segment_index=current_segment_idx,
1839
+ )
1840
+ save_playback_position(position)
1841
+ raise
1842
+ finally:
1843
+ # Restore original signal handler
1844
+ signal.signal(signal.SIGINT, original_handler)
1845
+ kokoro.close()
1846
+
1847
+
1848
+ def _split_text_into_segments(
1849
+ text: str, split_mode: str = "paragraph", max_length: int = 500
1850
+ ) -> list[str]:
1851
+ """Split text into readable segments for streaming.
1852
+
1853
+ Args:
1854
+ text: Text to split
1855
+ split_mode: "sentence" for individual sentences, "paragraph" for grouped
1856
+ max_length: Maximum segment length (used for paragraph mode)
1857
+
1858
+ Returns:
1859
+ List of text segments
1860
+ """
1861
+ import re
1862
+
1863
+ # First split on sentence-ending punctuation
1864
+ sentence_pattern = r"(?<=[.!?])\s+"
1865
+ sentences = re.split(sentence_pattern, text)
1866
+ sentences = [s.strip() for s in sentences if s.strip()]
1867
+
1868
+ if split_mode == "sentence":
1869
+ # Return individual sentences, but split very long ones
1870
+ result = []
1871
+ for sentence in sentences:
1872
+ if len(sentence) > max_length:
1873
+ # Split long sentences on clause boundaries
1874
+ clause_parts = re.split(r"(?<=[,;:])\s+", sentence)
1875
+ for part in clause_parts:
1876
+ part = part.strip()
1877
+ if part:
1878
+ result.append(part)
1879
+ else:
1880
+ result.append(sentence)
1881
+ return result
1882
+
1883
+ # Paragraph mode: group sentences up to max_length
1884
+ segments = []
1885
+ current_segment = ""
1886
+
1887
+ for sentence in sentences:
1888
+ # If adding this sentence would exceed max_length
1889
+ if len(current_segment) + len(sentence) + 1 > max_length:
1890
+ if current_segment:
1891
+ segments.append(current_segment.strip())
1892
+
1893
+ # If single sentence is too long, split it further
1894
+ if len(sentence) > max_length:
1895
+ # Split on clause boundaries
1896
+ clause_parts = re.split(r"(?<=[,;:])\s+", sentence)
1897
+ for part in clause_parts:
1898
+ part = part.strip()
1899
+ if len(part) > max_length:
1900
+ # Last resort: split at word boundaries
1901
+ words = part.split()
1902
+ sub_segment = ""
1903
+ for word in words:
1904
+ if len(sub_segment) + len(word) + 1 > max_length:
1905
+ if sub_segment:
1906
+ segments.append(sub_segment.strip())
1907
+ sub_segment = word
1908
+ else:
1909
+ sub_segment = (
1910
+ f"{sub_segment} {word}" if sub_segment else word
1911
+ )
1912
+ if sub_segment:
1913
+ current_segment = sub_segment
1914
+ else:
1915
+ segments.append(part)
1916
+ current_segment = ""
1917
+ else:
1918
+ current_segment = sentence
1919
+ else:
1920
+ current_segment = (
1921
+ f"{current_segment} {sentence}" if current_segment else sentence
1922
+ )
1923
+
1924
+ if current_segment.strip():
1925
+ segments.append(current_segment.strip())
1926
+
1927
+ return [s for s in segments if s.strip()]