ttsforge 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ttsforge/conversion.py ADDED
@@ -0,0 +1,1090 @@
1
+ """TTS conversion module for ttsforge - converts text/EPUB to audiobooks."""
2
+
3
+ import hashlib
4
+ import json
5
+ import re
6
+ import threading
7
+ import time
8
+ from collections.abc import Callable
9
+ from dataclasses import dataclass, field
10
+ from pathlib import Path
11
+ from typing import Any, Literal, Optional, cast
12
+
13
+ import soundfile as sf
14
+
15
+ from .audio_merge import AudioMerger, MergeMeta
16
+ from .constants import (
17
+ DEFAULT_VOICE_FOR_LANG,
18
+ ISO_TO_LANG_CODE,
19
+ SAMPLE_RATE,
20
+ SUPPORTED_OUTPUT_FORMATS,
21
+ VOICE_PREFIX_TO_LANG,
22
+ )
23
+ from .kokoro_lang import get_onnx_lang_code
24
+ from .kokoro_runner import KokoroRunner, KokoroRunOptions
25
+ from .ssmd_generator import (
26
+ SSMDGenerationError,
27
+ chapter_to_ssmd,
28
+ load_ssmd_file,
29
+ save_ssmd_file,
30
+ )
31
+ from .utils import (
32
+ atomic_write_json,
33
+ format_duration,
34
+ format_filename_template,
35
+ load_phoneme_dictionary,
36
+ prevent_sleep_end,
37
+ prevent_sleep_start,
38
+ sanitize_filename,
39
+ )
40
+
41
+
42
+ @dataclass
43
+ class Chapter:
44
+ """Represents a chapter from an EPUB or text file."""
45
+
46
+ title: str
47
+ content: str
48
+ index: int = 0
49
+ html_content: str | None = None # Optional HTML for emphasis detection
50
+ is_ssmd: bool = False
51
+
52
+ @property
53
+ def char_count(self) -> int:
54
+ return len(self.content)
55
+
56
+ @property
57
+ def text(self) -> str:
58
+ """Alias for content to maintain compatibility with input_reader.Chapter."""
59
+ return self.content
60
+
61
+
62
+ @dataclass
63
+ class ConversionProgress:
64
+ """Progress information during conversion."""
65
+
66
+ current_chapter: int = 0
67
+ total_chapters: int = 0
68
+ chapter_name: str = ""
69
+ chars_processed: int = 0
70
+ total_chars: int = 0
71
+ current_text: str = ""
72
+ elapsed_time: float = 0.0
73
+ estimated_remaining: float = 0.0
74
+
75
+ @property
76
+ def percent(self) -> int:
77
+ if self.total_chars == 0:
78
+ return 0
79
+ return min(int(self.chars_processed / self.total_chars * 100), 99)
80
+
81
+ @property
82
+ def etr_formatted(self) -> str:
83
+ return format_duration(self.estimated_remaining)
84
+
85
+
86
+ @dataclass
87
+ class ConversionResult:
88
+ """Result of a conversion operation."""
89
+
90
+ success: bool
91
+ output_path: Path | None = None
92
+ subtitle_path: Path | None = None
93
+ error_message: str | None = None
94
+ chapters_dir: Path | None = None
95
+
96
+
97
+ @dataclass
98
+ class ChapterState:
99
+ """State of a single chapter conversion."""
100
+
101
+ index: int
102
+ title: str
103
+ content_hash: str # Hash of chapter content for integrity check
104
+ completed: bool = False
105
+ audio_file: str | None = None # Relative path to chapter audio
106
+ duration: float = 0.0 # Duration in seconds
107
+ char_count: int = 0
108
+ ssmd_file: str | None = None # Relative path to SSMD file
109
+ ssmd_hash: str | None = None # Hash of SSMD content for change detection
110
+
111
+
112
+ @dataclass
113
+ class ConversionState:
114
+ """Persistent state for resumable conversions."""
115
+
116
+ version: int = 1
117
+ source_file: str = ""
118
+ source_hash: str = "" # Hash of source file for change detection
119
+ output_file: str = ""
120
+ work_dir: str = ""
121
+ voice: str = ""
122
+ language: str = ""
123
+ speed: float = 1.0
124
+ split_mode: str = "auto"
125
+ output_format: str = "m4b"
126
+ silence_between_chapters: float = 2.0
127
+ pause_clause: float = 0.25
128
+ pause_sentence: float = 0.2
129
+ pause_paragraph: float = 0.75
130
+ pause_variance: float = 0.05
131
+ pause_mode: str = "auto" # "tts", "manual", or "auto
132
+ lang: str | None = None # Language override for phonemization
133
+ chapters: list[ChapterState] = field(default_factory=list)
134
+ started_at: str = ""
135
+ last_updated: str = ""
136
+
137
+ @classmethod
138
+ def load(cls, state_file: Path) -> Optional["ConversionState"]:
139
+ """Load state from a JSON file."""
140
+ if not state_file.exists():
141
+ return None
142
+ try:
143
+ with open(state_file, encoding="utf-8") as f:
144
+ data = json.load(f)
145
+
146
+ # Reconstruct ChapterState objects
147
+ chapters = [ChapterState(**ch) for ch in data.get("chapters", [])]
148
+ data["chapters"] = chapters
149
+
150
+ # Handle missing fields for backward compatibility
151
+ if "silence_between_chapters" not in data:
152
+ data["silence_between_chapters"] = 2.0
153
+
154
+ # Migrate old pause parameters to new system
155
+ if "segment_pause_min" in data or "segment_pause_max" in data:
156
+ seg_min = data.get("segment_pause_min", 0.1)
157
+ seg_max = data.get("segment_pause_max", 0.3)
158
+ data["pause_sentence"] = (seg_min + seg_max) / 2.0
159
+ if "pause_variance" not in data:
160
+ data["pause_variance"] = max(0.01, (seg_max - seg_min) / 4.0)
161
+
162
+ if "paragraph_pause_min" in data or "paragraph_pause_max" in data:
163
+ para_min = data.get("paragraph_pause_min", 0.5)
164
+ para_max = data.get("paragraph_pause_max", 1.0)
165
+ data["pause_paragraph"] = (para_min + para_max) / 2.0
166
+
167
+ for legacy_key in (
168
+ "segment_pause_min",
169
+ "segment_pause_max",
170
+ "paragraph_pause_min",
171
+ "paragraph_pause_max",
172
+ ):
173
+ data.pop(legacy_key, None)
174
+
175
+ # Set defaults for new parameters
176
+ if "pause_clause" not in data:
177
+ data["pause_clause"] = 0.25
178
+ if "pause_sentence" not in data:
179
+ data["pause_sentence"] = 0.2
180
+ if "pause_paragraph" not in data:
181
+ data["pause_paragraph"] = 0.75
182
+ if "pause_variance" not in data:
183
+ data["pause_variance"] = 0.05
184
+ if "pause_mode" not in data:
185
+ data["pause_mode"] = "auto"
186
+ if "lang" not in data:
187
+ data["lang"] = None
188
+
189
+ return cls(**data)
190
+ except (json.JSONDecodeError, TypeError, KeyError):
191
+ return None
192
+
193
+ def save(self, state_file: Path) -> None:
194
+ """Save state to a JSON file."""
195
+ self.last_updated = time.strftime("%Y-%m-%d %H:%M:%S")
196
+ data = {
197
+ "version": self.version,
198
+ "source_file": self.source_file,
199
+ "source_hash": self.source_hash,
200
+ "output_file": self.output_file,
201
+ "work_dir": self.work_dir,
202
+ "voice": self.voice,
203
+ "language": self.language,
204
+ "speed": self.speed,
205
+ "split_mode": self.split_mode,
206
+ "output_format": self.output_format,
207
+ "silence_between_chapters": self.silence_between_chapters,
208
+ "pause_clause": self.pause_clause,
209
+ "pause_sentence": self.pause_sentence,
210
+ "pause_paragraph": self.pause_paragraph,
211
+ "pause_variance": self.pause_variance,
212
+ "pause_mode": self.pause_mode,
213
+ "lang": self.lang,
214
+ "chapters": [
215
+ {
216
+ "index": ch.index,
217
+ "title": ch.title,
218
+ "content_hash": ch.content_hash,
219
+ "completed": ch.completed,
220
+ "audio_file": ch.audio_file,
221
+ "duration": ch.duration,
222
+ "char_count": ch.char_count,
223
+ "ssmd_file": ch.ssmd_file,
224
+ "ssmd_hash": ch.ssmd_hash,
225
+ }
226
+ for ch in self.chapters
227
+ ],
228
+ "started_at": self.started_at,
229
+ "last_updated": self.last_updated,
230
+ }
231
+ atomic_write_json(state_file, data, indent=2, ensure_ascii=True)
232
+
233
+ def get_completed_count(self) -> int:
234
+ """Get the number of completed chapters."""
235
+ return sum(1 for ch in self.chapters if ch.completed)
236
+
237
+ def get_next_incomplete_index(self) -> int | None:
238
+ """Get the index of the next incomplete chapter."""
239
+ for ch in self.chapters:
240
+ if not ch.completed:
241
+ return ch.index
242
+ return None
243
+
244
+ def is_complete(self) -> bool:
245
+ """Check if all chapters are completed."""
246
+ return all(ch.completed for ch in self.chapters)
247
+
248
+
249
+ def _hash_content(content: str) -> str:
250
+ """Generate a hash of content for integrity checking."""
251
+ return hashlib.md5(content.encode("utf-8")).hexdigest()[:12]
252
+
253
+
254
+ def _hash_file(file_path: Path) -> str:
255
+ """Generate a hash of a file for change detection."""
256
+ if not file_path.exists():
257
+ return ""
258
+ hasher = hashlib.md5()
259
+ with open(file_path, "rb") as f:
260
+ for chunk in iter(lambda: f.read(8192), b""):
261
+ hasher.update(chunk)
262
+ return hasher.hexdigest()[:12]
263
+
264
+
265
+ # Split mode options
266
+ SPLIT_MODES = ["auto", "line", "paragraph", "sentence", "clause"]
267
+
268
+
269
+ @dataclass
270
+ class ConversionOptions:
271
+ """Options for TTS conversion."""
272
+
273
+ voice: str = "af_bella"
274
+ language: str = "a"
275
+ speed: float = 1.0
276
+ output_format: str = "m4b"
277
+ output_dir: Path | None = None
278
+ use_gpu: bool = False # GPU requires onnxruntime-gpu
279
+ silence_between_chapters: float = 2.0
280
+ # Language override for phonemization (e.g., 'de', 'en-us', 'fr')
281
+ # If None, language is determined from voice prefix
282
+ lang: str | None = None
283
+ # Mixed-language support (auto-detect and handle multiple languages)
284
+ use_mixed_language: bool = False
285
+ mixed_language_primary: str | None = None
286
+ mixed_language_allowed: list[str] | None = None
287
+ mixed_language_confidence: float = 0.7
288
+ # Custom phoneme dictionary for pronunciation overrides
289
+ phoneme_dictionary_path: str | None = None
290
+ phoneme_dict_case_sensitive: bool = False
291
+ # Pause settings (pykokoro built-in pause handling)
292
+ pause_clause: float = 0.25 # For clause boundaries (commas)
293
+ pause_sentence: float = 0.2 # For sentence boundaries
294
+ pause_paragraph: float = 0.75 # For paragraph boundaries
295
+ pause_variance: float = 0.05 # Standard deviation for natural variation
296
+ pause_mode: str = "auto" # "tts", "manual", or "auto
297
+ # Chapter announcement settings
298
+ announce_chapters: bool = True # Read chapter titles aloud before content
299
+ chapter_pause_after_title: float = 2.0 # Pause after chapter title (seconds)
300
+ save_chapters_separately: bool = False
301
+ merge_at_end: bool = True
302
+ # Split mode: auto, line, paragraph, sentence, clause
303
+ split_mode: str = "auto"
304
+ # Resume capability
305
+ resume: bool = True # Enable resume by default for long conversions
306
+ keep_chapter_files: bool = False # Keep individual chapter files after merge
307
+ # Metadata for m4b
308
+ title: str | None = None
309
+ author: str | None = None
310
+ cover_image: Path | None = None
311
+ # Voice blending (e.g., "af_nicole:50,am_michael:50")
312
+ voice_blend: str | None = None
313
+ # Voice database for custom/synthetic voices
314
+ voice_database: Path | None = None
315
+ # Filename template for chapter files
316
+ chapter_filename_template: str = "{chapter_num:03d}_{book_title}_{chapter_title}"
317
+ # Custom ONNX model path (None = use default downloaded model)
318
+ model_path: Path | None = None
319
+ # Custom voices.bin path (None = use default downloaded voices)
320
+ voices_path: Path | None = None
321
+ # SSMD generation control
322
+ generate_ssmd_only: bool = False # If True, only generate SSMD files, no audio
323
+ detect_emphasis: bool = False # If True, detect emphasis from HTML tags in EPUB
324
+
325
+
326
+ # Pattern to detect chapter markers in text
327
+ CHAPTER_PATTERN = re.compile(
328
+ r"(?:^|\n)\s*(?:"
329
+ r"(?:Chapter|CHAPTER|Ch\.?|Kapitel|Chapitre|Capitulo|Capitolo)\s*"
330
+ r"(?:[IVXLCDM]+|\d+)"
331
+ r"(?:\s*[:\-\.\s]\s*.*)?"
332
+ r"|"
333
+ r"(?:Prologue|PROLOGUE|Epilogue|EPILOGUE|Introduction|INTRODUCTION)"
334
+ r"(?:\s*[:\-\.\s]\s*.*)?"
335
+ r")\s*(?:\n|$)",
336
+ re.MULTILINE | re.IGNORECASE,
337
+ )
338
+
339
+
340
+ def detect_language_from_iso(iso_code: str | None) -> str:
341
+ """Convert ISO language code to ttsforge language code."""
342
+ if not iso_code:
343
+ return "a" # Default to American English
344
+ iso_lower = iso_code.lower().strip()
345
+ return ISO_TO_LANG_CODE.get(iso_lower, ISO_TO_LANG_CODE.get(iso_lower[:2], "a"))
346
+
347
+
348
+ def get_voice_language(voice: str) -> str:
349
+ """Get the language code from a voice name."""
350
+ prefix = voice[:2] if len(voice) >= 2 else ""
351
+ return VOICE_PREFIX_TO_LANG.get(prefix, "a")
352
+
353
+
354
+ def get_default_voice_for_language(lang_code: str) -> str:
355
+ """Get the default voice for a language."""
356
+ return DEFAULT_VOICE_FOR_LANG.get(lang_code, "af_bella")
357
+
358
+
359
+ class TTSConverter:
360
+ """Converts text to speech using Kokoro ONNX TTS."""
361
+
362
+ def __init__(
363
+ self,
364
+ options: ConversionOptions,
365
+ progress_callback: Callable[[ConversionProgress], None] | None = None,
366
+ log_callback: Callable[[str, str], None] | None = None,
367
+ ) -> None:
368
+ """
369
+ Initialize the TTS converter.
370
+
371
+ Args:
372
+ options: Conversion options
373
+ progress_callback: Called with progress updates
374
+ log_callback: Called with log messages (message, level)
375
+ """
376
+ self.options = options
377
+ self.progress_callback = progress_callback
378
+ self.log_callback = log_callback
379
+ self._cancel_event = threading.Event()
380
+ self._runner: KokoroRunner | None = None
381
+ self._merger = AudioMerger(log=self.log)
382
+
383
+ @property
384
+ def _cancelled(self) -> bool:
385
+ return self._cancel_event.is_set()
386
+
387
+ def log(self, message: str, level: str = "info") -> None:
388
+ """Log a message."""
389
+ if self.log_callback:
390
+ self.log_callback(message, level)
391
+
392
+ def cancel(self) -> None:
393
+ """Request cancellation of the conversion."""
394
+ self._cancel_event.set()
395
+
396
+ def _init_runner(self) -> None:
397
+ """Initialize the Kokoro runner."""
398
+ if self._runner is not None:
399
+ return
400
+
401
+ self.log("Initializing ONNX TTS pipeline...")
402
+
403
+ # Create TokenizerConfig from ConversionOptions (for mixed-language support)
404
+ from pykokoro.tokenizer import TokenizerConfig
405
+
406
+ tokenizer_config = TokenizerConfig(
407
+ use_mixed_language=self.options.use_mixed_language,
408
+ mixed_language_primary=self.options.mixed_language_primary,
409
+ mixed_language_allowed=self.options.mixed_language_allowed,
410
+ mixed_language_confidence=self.options.mixed_language_confidence,
411
+ phoneme_dictionary_path=self.options.phoneme_dictionary_path,
412
+ phoneme_dict_case_sensitive=self.options.phoneme_dict_case_sensitive,
413
+ )
414
+
415
+ opts = KokoroRunOptions(
416
+ voice=self.options.voice,
417
+ speed=self.options.speed,
418
+ use_gpu=self.options.use_gpu,
419
+ pause_clause=self.options.pause_clause,
420
+ pause_sentence=self.options.pause_sentence,
421
+ pause_paragraph=self.options.pause_paragraph,
422
+ pause_variance=self.options.pause_variance,
423
+ model_path=self.options.model_path,
424
+ voices_path=self.options.voices_path,
425
+ voice_blend=self.options.voice_blend,
426
+ voice_database=self.options.voice_database,
427
+ tokenizer_config=tokenizer_config,
428
+ )
429
+ self._runner = KokoroRunner(opts, log=self.log)
430
+ self._runner.ensure_ready()
431
+
432
+ def _build_ssmd_content(
433
+ self,
434
+ chapter: Chapter,
435
+ phoneme_dict: dict[str, str] | None,
436
+ mixed_language_config: dict[str, Any] | None,
437
+ html_content: str | None,
438
+ ) -> str:
439
+ """Generate SSMD content for a chapter, falling back to plain text."""
440
+ try:
441
+ return chapter_to_ssmd(
442
+ chapter_title=chapter.title,
443
+ chapter_text=chapter.text,
444
+ phoneme_dict=phoneme_dict,
445
+ phoneme_dict_case_sensitive=self.options.phoneme_dict_case_sensitive,
446
+ mixed_language_config=mixed_language_config,
447
+ html_content=html_content,
448
+ include_title=self.options.announce_chapters,
449
+ )
450
+ except SSMDGenerationError as e:
451
+ self.log(f"SSMD generation failed: {e}, using plain text", "error")
452
+ return chapter.text
453
+
454
+ def _load_or_generate_ssmd(
455
+ self,
456
+ chapter: Chapter,
457
+ ssmd_file: Path,
458
+ phoneme_dict: dict[str, str] | None,
459
+ mixed_language_config: dict[str, Any] | None,
460
+ html_content: str | None,
461
+ ) -> tuple[str, str]:
462
+ """Load SSMD from disk or generate and save it."""
463
+ ssmd_content: str | None = None
464
+ ssmd_hash = ""
465
+
466
+ if chapter.is_ssmd:
467
+ if ssmd_file.exists():
468
+ try:
469
+ ssmd_content, ssmd_hash = load_ssmd_file(ssmd_file)
470
+ self.log(f"Loaded SSMD from {ssmd_file.name}")
471
+ except SSMDGenerationError as e:
472
+ self.log(f"Failed to load SSMD: {e}, using input", "warning")
473
+ ssmd_content = None
474
+
475
+ if ssmd_content is None:
476
+ ssmd_content = chapter.text
477
+ ssmd_hash = save_ssmd_file(ssmd_content, ssmd_file)
478
+ self.log(f"Saved SSMD to {ssmd_file.name}")
479
+
480
+ return ssmd_content, ssmd_hash
481
+
482
+ if ssmd_file.exists():
483
+ try:
484
+ ssmd_content, ssmd_hash = load_ssmd_file(ssmd_file)
485
+ self.log(f"Loaded SSMD from {ssmd_file.name}")
486
+ except SSMDGenerationError as e:
487
+ self.log(f"Failed to load SSMD: {e}, regenerating...", "warning")
488
+ ssmd_content = None
489
+
490
+ if ssmd_content is None:
491
+ self.log(f"Generating SSMD for chapter: {chapter.title}")
492
+ ssmd_content = self._build_ssmd_content(
493
+ chapter,
494
+ phoneme_dict=phoneme_dict,
495
+ mixed_language_config=mixed_language_config,
496
+ html_content=html_content,
497
+ )
498
+ ssmd_hash = save_ssmd_file(ssmd_content, ssmd_file)
499
+ self.log(f"Saved SSMD to {ssmd_file.name}")
500
+
501
+ return ssmd_content, ssmd_hash
502
+
503
+ def _render_chapter_wav(
504
+ self,
505
+ chapter: Chapter,
506
+ output_file: Path,
507
+ ssmd_content: str,
508
+ ) -> float:
509
+ """Render SSMD content to a chapter WAV file."""
510
+ effective_lang = (
511
+ self.options.lang if self.options.lang else self.options.language
512
+ )
513
+ lang_code = get_onnx_lang_code(effective_lang)
514
+
515
+ with sf.SoundFile(
516
+ str(output_file),
517
+ "w",
518
+ samplerate=SAMPLE_RATE,
519
+ channels=1,
520
+ format="wav",
521
+ ) as out_file:
522
+ assert self._runner is not None
523
+ samples = self._runner.synthesize(
524
+ ssmd_content,
525
+ lang_code=lang_code,
526
+ pause_mode=cast(
527
+ Literal["tts", "manual", "auto"], self.options.pause_mode
528
+ ),
529
+ is_phonemes=False,
530
+ )
531
+ out_file.write(samples)
532
+
533
+ return len(samples) / SAMPLE_RATE
534
+
535
+ def convert_chapters_resumable( # noqa: C901 - Complex but necessary for resume logic
536
+ self,
537
+ chapters: list[Chapter],
538
+ output_path: Path,
539
+ source_file: Path | None = None,
540
+ resume: bool = True,
541
+ ) -> ConversionResult:
542
+ """
543
+ Convert chapters to audio with resume capability.
544
+
545
+ Each chapter is saved as a separate WAV file, allowing conversion
546
+ to be resumed if interrupted. A state file tracks progress.
547
+
548
+ Args:
549
+ chapters: List of Chapter objects
550
+ output_path: Output file path
551
+ source_file: Original source file (for state tracking)
552
+ resume: Whether to resume from previous state
553
+
554
+ Returns:
555
+ ConversionResult with success status and paths
556
+ """
557
+ if not chapters:
558
+ return ConversionResult(
559
+ success=False, error_message="No chapters to convert"
560
+ )
561
+
562
+ if self.options.output_format not in SUPPORTED_OUTPUT_FORMATS:
563
+ return ConversionResult(
564
+ success=False,
565
+ error_message=f"Unsupported format: {self.options.output_format}",
566
+ )
567
+
568
+ self._cancel_event.clear()
569
+ prevent_sleep_start()
570
+
571
+ try:
572
+ # Set up work directory for chapter files (use book title)
573
+ safe_book_title = sanitize_filename(self.options.title or output_path.stem)[
574
+ :50
575
+ ]
576
+ work_dir = output_path.parent / f".{safe_book_title}_chapters"
577
+ work_dir.mkdir(parents=True, exist_ok=True)
578
+ state_file = work_dir / f"{safe_book_title}_state.json"
579
+
580
+ # Load or create state
581
+ state: ConversionState | None = None
582
+ if resume and state_file.exists():
583
+ state = ConversionState.load(state_file)
584
+ if state:
585
+ # Verify source file hasn't changed
586
+ source_hash = _hash_file(source_file) if source_file else ""
587
+ if source_file and state.source_hash != source_hash:
588
+ self.log(
589
+ "Source file changed, starting fresh conversion",
590
+ "warning",
591
+ )
592
+ state = None
593
+ # Verify chapter count matches
594
+ elif len(state.chapters) != len(chapters):
595
+ self.log(
596
+ f"Chapter count changed "
597
+ f"({len(state.chapters)} -> {len(chapters)}), "
598
+ "starting fresh conversion",
599
+ "warning",
600
+ )
601
+ state = None
602
+ else:
603
+ # Check if settings differ from saved state
604
+ settings_changed = (
605
+ state.voice != self.options.voice
606
+ or state.language != self.options.language
607
+ or state.speed != self.options.speed
608
+ or state.split_mode != self.options.split_mode
609
+ or state.silence_between_chapters
610
+ != self.options.silence_between_chapters
611
+ or state.pause_clause != self.options.pause_clause
612
+ or state.pause_sentence != self.options.pause_sentence
613
+ or state.pause_paragraph != self.options.pause_paragraph
614
+ or state.pause_variance != self.options.pause_variance
615
+ or state.pause_mode != self.options.pause_mode
616
+ or state.lang != self.options.lang
617
+ )
618
+
619
+ if settings_changed:
620
+ self.log(
621
+ f"Restoring settings from previous session: "
622
+ f"voice={state.voice}, language={state.language}, "
623
+ f"lang_override={state.lang}, "
624
+ f"speed={state.speed}, "
625
+ f"split_mode={state.split_mode}, "
626
+ f"silence={state.silence_between_chapters}s, "
627
+ f"pauses: clause={state.pause_clause}s "
628
+ f"sent={state.pause_sentence}s "
629
+ f"para={state.pause_paragraph}s "
630
+ f"var={state.pause_variance}s "
631
+ f"pause_mode={state.pause_mode}",
632
+ "info",
633
+ )
634
+
635
+ # Apply saved settings to options for consistency
636
+ self.options.voice = state.voice
637
+ self.options.language = state.language
638
+ self.options.speed = state.speed
639
+ self.options.split_mode = state.split_mode
640
+ self.options.output_format = state.output_format
641
+ self.options.silence_between_chapters = (
642
+ state.silence_between_chapters
643
+ )
644
+ self.options.pause_clause = state.pause_clause
645
+ self.options.pause_sentence = state.pause_sentence
646
+ self.options.pause_paragraph = state.pause_paragraph
647
+ self.options.pause_variance = state.pause_variance
648
+ self.options.pause_mode = state.pause_mode
649
+ self.options.lang = state.lang
650
+
651
+ if state is None:
652
+ # Create new state
653
+ source_hash = _hash_file(source_file) if source_file else ""
654
+ state = ConversionState(
655
+ source_file=str(source_file) if source_file else "",
656
+ source_hash=source_hash,
657
+ output_file=str(output_path),
658
+ work_dir=str(work_dir),
659
+ voice=self.options.voice,
660
+ language=self.options.language,
661
+ speed=self.options.speed,
662
+ split_mode=self.options.split_mode,
663
+ output_format=self.options.output_format,
664
+ silence_between_chapters=self.options.silence_between_chapters,
665
+ pause_clause=self.options.pause_clause,
666
+ pause_sentence=self.options.pause_sentence,
667
+ pause_paragraph=self.options.pause_paragraph,
668
+ pause_variance=self.options.pause_variance,
669
+ pause_mode=self.options.pause_mode,
670
+ lang=self.options.lang,
671
+ chapters=[
672
+ ChapterState(
673
+ index=i,
674
+ title=ch.title,
675
+ content_hash=_hash_content(ch.content),
676
+ char_count=ch.char_count,
677
+ )
678
+ for i, ch in enumerate(chapters)
679
+ ],
680
+ started_at=time.strftime("%Y-%m-%d %H:%M:%S"),
681
+ )
682
+ state.save(state_file)
683
+ else:
684
+ completed = state.get_completed_count()
685
+ total = len(chapters)
686
+ self.log(f"Resuming conversion: {completed}/{total} chapters completed")
687
+
688
+ # Initialize runner
689
+ self._init_runner()
690
+
691
+ phoneme_dict = None
692
+ if self.options.phoneme_dictionary_path:
693
+ phoneme_dict = load_phoneme_dictionary(
694
+ self.options.phoneme_dictionary_path,
695
+ case_sensitive=self.options.phoneme_dict_case_sensitive,
696
+ log_callback=lambda message: self.log(message, "warning"),
697
+ )
698
+
699
+ mixed_language_config = None
700
+ if self.options.use_mixed_language:
701
+ mixed_language_config = {
702
+ "use_mixed_language": True,
703
+ "primary": self.options.mixed_language_primary,
704
+ "allowed": self.options.mixed_language_allowed,
705
+ "confidence": self.options.mixed_language_confidence,
706
+ }
707
+
708
+ total_chars = sum(ch.char_count for ch in chapters)
709
+ # Account for already completed chapters
710
+ chars_already_done = sum(
711
+ state.chapters[i].char_count
712
+ for i in range(len(state.chapters))
713
+ if state.chapters[i].completed
714
+ )
715
+ chars_processed = chars_already_done
716
+ start_time = time.time()
717
+
718
+ progress = ConversionProgress(
719
+ total_chapters=len(chapters),
720
+ total_chars=total_chars,
721
+ chars_processed=chars_processed,
722
+ )
723
+
724
+ # Convert each chapter
725
+ for chapter_idx, chapter in enumerate(chapters):
726
+ if self._cancel_event.is_set():
727
+ state.save(state_file)
728
+ return ConversionResult(
729
+ success=False,
730
+ error_message="Cancelled",
731
+ chapters_dir=work_dir,
732
+ )
733
+
734
+ # Validate chapter index to prevent index errors
735
+ if chapter_idx >= len(state.chapters):
736
+ error_msg = (
737
+ f"Chapter index {chapter_idx} out of range. "
738
+ f"State has {len(state.chapters)} chapters "
739
+ f"but trying to access "
740
+ f"chapter {chapter_idx + 1}/{len(chapters)}. "
741
+ "This usually means the state file is corrupted. "
742
+ "Try using --fresh to start a new conversion."
743
+ )
744
+ return ConversionResult(
745
+ success=False,
746
+ error_message=error_msg,
747
+ )
748
+
749
+ chapter_state = state.chapters[chapter_idx]
750
+
751
+ # Check if SSMD file was manually edited
752
+ ssmd_edited = False
753
+ if chapter_state.ssmd_file and chapter_state.ssmd_hash:
754
+ ssmd_path = work_dir / chapter_state.ssmd_file
755
+ if ssmd_path.exists():
756
+ try:
757
+ _, current_hash = load_ssmd_file(ssmd_path)
758
+ if current_hash != chapter_state.ssmd_hash:
759
+ self.log(
760
+ f"Chapter {chapter_idx + 1} SSMD file was edited, "
761
+ "will regenerate audio",
762
+ "info",
763
+ )
764
+ ssmd_edited = True
765
+ chapter_state.completed = False
766
+ except SSMDGenerationError:
767
+ # SSMD file corrupted, will regenerate
768
+ ssmd_edited = True
769
+ chapter_state.completed = False
770
+
771
+ # Skip already completed chapters (unless SSMD was edited)
772
+ if (
773
+ chapter_state.completed
774
+ and chapter_state.audio_file
775
+ and not ssmd_edited
776
+ ):
777
+ chapter_file = work_dir / chapter_state.audio_file
778
+ if chapter_file.exists():
779
+ ch_num = chapter_idx + 1
780
+ self.log(
781
+ f"Skipping completed chapter {ch_num}: {chapter.title}"
782
+ )
783
+ continue
784
+ else:
785
+ # File missing, need to reconvert
786
+ chapter_state.completed = False
787
+
788
+ progress.current_chapter = chapter_idx + 1
789
+ progress.chapter_name = chapter.title
790
+
791
+ ch_num = chapter_idx + 1
792
+ self.log(
793
+ f"Converting chapter {ch_num}/{len(chapters)}: {chapter.title}"
794
+ )
795
+
796
+ # Generate chapter filename using template
797
+ chapter_filename = (
798
+ format_filename_template(
799
+ self.options.chapter_filename_template,
800
+ book_title=self.options.title or "Untitled",
801
+ chapter_title=chapter.title,
802
+ chapter_num=chapter_idx + 1,
803
+ )
804
+ + ".wav"
805
+ )
806
+ chapter_file = work_dir / chapter_filename
807
+
808
+ # Generate SSMD filename (same as WAV but with .ssmd extension)
809
+ ssmd_filename = chapter_filename.replace(".wav", ".ssmd")
810
+ ssmd_file = work_dir / ssmd_filename
811
+ html_content = (
812
+ chapter.html_content if self.options.detect_emphasis else None
813
+ )
814
+ ssmd_content, ssmd_hash = self._load_or_generate_ssmd(
815
+ chapter,
816
+ ssmd_file,
817
+ phoneme_dict=phoneme_dict,
818
+ mixed_language_config=mixed_language_config,
819
+ html_content=html_content,
820
+ )
821
+
822
+ # If generate_ssmd_only mode, just generate SSMD and skip audio
823
+ if self.options.generate_ssmd_only:
824
+ chapter_state.completed = True
825
+ chapter_state.ssmd_file = ssmd_filename
826
+ chapter_state.ssmd_hash = ssmd_hash
827
+ state.save(state_file)
828
+
829
+ chars_processed += chapter.char_count
830
+ progress.chars_processed = chars_processed
831
+ if self.progress_callback:
832
+ self.progress_callback(progress)
833
+ continue
834
+
835
+ duration = self._render_chapter_wav(
836
+ chapter,
837
+ chapter_file,
838
+ ssmd_content,
839
+ )
840
+
841
+ if self._cancel_event.is_set():
842
+ # Remove incomplete files
843
+ chapter_file.unlink(missing_ok=True)
844
+ ssmd_file.unlink(missing_ok=True)
845
+ state.save(state_file)
846
+ return ConversionResult(
847
+ success=False,
848
+ error_message="Cancelled",
849
+ chapters_dir=work_dir,
850
+ )
851
+
852
+ # Update state
853
+ chapter_state.completed = True
854
+ chapter_state.audio_file = chapter_filename
855
+ chapter_state.ssmd_file = ssmd_filename
856
+ chapter_state.ssmd_hash = ssmd_hash
857
+ chapter_state.duration = duration
858
+ state.save(state_file)
859
+
860
+ # Update progress
861
+ chars_processed += chapter.char_count
862
+ progress.chars_processed = chars_processed
863
+ progress.current_text = (
864
+ f"Completed chapter: {chapter.title or 'Untitled'}"
865
+ )
866
+ elapsed = time.time() - start_time
867
+ if chars_processed > chars_already_done and elapsed > 0.5:
868
+ chars_in_session = chars_processed - chars_already_done
869
+ avg_time = elapsed / chars_in_session
870
+ remaining = total_chars - chars_processed
871
+ progress.estimated_remaining = avg_time * remaining
872
+ progress.elapsed_time = elapsed
873
+
874
+ if self.progress_callback:
875
+ self.progress_callback(progress)
876
+
877
+ # If generate_ssmd_only mode, exit here without merging
878
+ if self.options.generate_ssmd_only:
879
+ self.log("SSMD generation complete!")
880
+ self.log(f"SSMD files saved in: {work_dir}")
881
+ return ConversionResult(
882
+ success=True,
883
+ chapters_dir=work_dir,
884
+ output_path=None, # No audio output in SSMD-only mode
885
+ )
886
+
887
+ # All chapters completed, merge into final output
888
+ self.log("Merging chapters into final audiobook...")
889
+
890
+ chapter_files = [
891
+ work_dir / ch.audio_file for ch in state.chapters if ch.audio_file
892
+ ]
893
+ chapter_durations = [ch.duration for ch in state.chapters]
894
+ chapter_titles = [ch.title for ch in state.chapters]
895
+
896
+ meta = MergeMeta(
897
+ fmt=self.options.output_format,
898
+ silence_between_chapters=self.options.silence_between_chapters,
899
+ title=self.options.title,
900
+ author=self.options.author,
901
+ cover_image=self.options.cover_image,
902
+ )
903
+ self._merger.merge_chapter_wavs(
904
+ chapter_files,
905
+ chapter_durations,
906
+ chapter_titles,
907
+ output_path,
908
+ meta,
909
+ )
910
+
911
+ self.log("Conversion complete!")
912
+
913
+ return ConversionResult(
914
+ success=True,
915
+ output_path=output_path,
916
+ chapters_dir=work_dir,
917
+ )
918
+
919
+ except Exception as e:
920
+ import traceback
921
+
922
+ error_msg = f"{str(e)}\n\nTraceback:\n{traceback.format_exc()}"
923
+ return ConversionResult(success=False, error_message=error_msg)
924
+ finally:
925
+ prevent_sleep_end()
926
+
927
+ def convert_chapters(
928
+ self,
929
+ chapters: list[Chapter],
930
+ output_path: Path,
931
+ ) -> ConversionResult:
932
+ """Convert a list of chapters to audio using the SSMD pipeline."""
933
+ result = self.convert_chapters_resumable(
934
+ chapters=chapters,
935
+ output_path=output_path,
936
+ resume=self.options.resume,
937
+ )
938
+ self._cleanup_chapter_dir(result)
939
+ return result
940
+
941
+ def _cleanup_chapter_dir(self, result: ConversionResult) -> None:
942
+ if self.options.generate_ssmd_only:
943
+ return
944
+ if (
945
+ result.success
946
+ and result.chapters_dir
947
+ and not self.options.keep_chapter_files
948
+ ):
949
+ import shutil
950
+
951
+ try:
952
+ shutil.rmtree(result.chapters_dir)
953
+ except OSError as exc:
954
+ self.log(
955
+ f"Failed to clean up chapter dir {result.chapters_dir}: {exc}",
956
+ "warning",
957
+ )
958
+
959
+ def convert_text(self, text: str, output_path: Path) -> ConversionResult:
960
+ """
961
+ Convert plain text to audio.
962
+
963
+ Args:
964
+ text: Text to convert
965
+ output_path: Output file path
966
+
967
+ Returns:
968
+ ConversionResult
969
+ """
970
+ chapters = [Chapter(title="Text", content=text, index=0)]
971
+ return self.convert_chapters(chapters, output_path)
972
+
973
+ def convert_epub(
974
+ self,
975
+ epub_path: Path,
976
+ output_path: Path,
977
+ selected_chapters: list[int] | None = None,
978
+ ) -> ConversionResult:
979
+ """
980
+ Convert an EPUB file to audio.
981
+
982
+ Args:
983
+ epub_path: Path to EPUB file
984
+ output_path: Output file path
985
+ selected_chapters: Optional list of chapter indices to convert
986
+
987
+ Returns:
988
+ ConversionResult
989
+ """
990
+ from epub2text import EPUBParser
991
+
992
+ self.log(f"Parsing EPUB: {epub_path}")
993
+
994
+ # Parse EPUB using epub2text
995
+ try:
996
+ parser = EPUBParser(str(epub_path))
997
+ epub_chapters = parser.get_chapters()
998
+ except Exception as e:
999
+ return ConversionResult(
1000
+ success=False,
1001
+ error_message=f"Failed to parse EPUB: {e}",
1002
+ )
1003
+
1004
+ if not epub_chapters:
1005
+ return ConversionResult(
1006
+ success=False,
1007
+ error_message="No chapters found in EPUB",
1008
+ )
1009
+
1010
+ # Filter chapters if selection provided
1011
+ if selected_chapters:
1012
+ epub_chapters = [
1013
+ ch for i, ch in enumerate(epub_chapters) if i in selected_chapters
1014
+ ]
1015
+
1016
+ # Convert to our Chapter format - epub2text Chapter has .text attribute
1017
+ # Remove <<CHAPTER: ...>> markers that epub2text adds at the start of content
1018
+ # since we now announce chapter titles separately
1019
+ chapters = []
1020
+ for i, ch in enumerate(epub_chapters):
1021
+ # Remove the <<CHAPTER: title>> marker from the beginning of content
1022
+ content = ch.text
1023
+ # Pattern matches: <<CHAPTER: anything>> followed by whitespace/newlines
1024
+ content = re.sub(
1025
+ r"^\s*<<CHAPTER:[^>]*>>\s*\n*", "", content, count=1, flags=re.MULTILINE
1026
+ )
1027
+ chapters.append(Chapter(title=ch.title, content=content, index=i))
1028
+
1029
+ self.log(f"Found {len(chapters)} chapters")
1030
+
1031
+ # Try to get metadata from EPUB for m4b
1032
+ if self.options.output_format == "m4b":
1033
+ try:
1034
+ metadata = parser.get_metadata()
1035
+ if metadata:
1036
+ if not self.options.title and metadata.title:
1037
+ self.options.title = metadata.title
1038
+ if not self.options.author and metadata.authors:
1039
+ self.options.author = metadata.authors[0]
1040
+ except (AttributeError, OSError, ValueError) as exc:
1041
+ self.log(f"Failed to read EPUB metadata: {exc}", "warning")
1042
+
1043
+ result = self.convert_chapters_resumable(
1044
+ chapters,
1045
+ output_path,
1046
+ source_file=epub_path,
1047
+ resume=self.options.resume,
1048
+ )
1049
+ self._cleanup_chapter_dir(result)
1050
+ return result
1051
+
1052
+
1053
+ def parse_text_chapters(text: str) -> list[Chapter]:
1054
+ """
1055
+ Parse text content into chapters based on chapter markers.
1056
+
1057
+ Args:
1058
+ text: Text content
1059
+
1060
+ Returns:
1061
+ List of Chapter objects
1062
+ """
1063
+ matches = list(CHAPTER_PATTERN.finditer(text))
1064
+
1065
+ if not matches:
1066
+ return [Chapter(title="Text", content=text.strip(), index=0)]
1067
+
1068
+ chapters = []
1069
+
1070
+ # Add introduction if content before first marker
1071
+ first_start = matches[0].start()
1072
+ if first_start > 0:
1073
+ intro_text = text[:first_start].strip()
1074
+ if intro_text:
1075
+ chapters.append(Chapter(title="Introduction", content=intro_text, index=0))
1076
+
1077
+ # Parse chapters
1078
+ for idx, match in enumerate(matches):
1079
+ start = match.end()
1080
+ end = matches[idx + 1].start() if idx + 1 < len(matches) else len(text)
1081
+
1082
+ chapter_name = match.group().strip()
1083
+ chapter_text = text[start:end].strip()
1084
+
1085
+ if chapter_text:
1086
+ chapters.append(
1087
+ Chapter(title=chapter_name, content=chapter_text, index=len(chapters))
1088
+ )
1089
+
1090
+ return chapters