ttsforge 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ttsforge/__init__.py +114 -0
- ttsforge/_version.py +34 -0
- ttsforge/audio_merge.py +180 -0
- ttsforge/audio_player.py +473 -0
- ttsforge/chapter_selection.py +75 -0
- ttsforge/cli/__init__.py +73 -0
- ttsforge/cli/commands_conversion.py +1927 -0
- ttsforge/cli/commands_phonemes.py +1033 -0
- ttsforge/cli/commands_utility.py +1389 -0
- ttsforge/cli/helpers.py +76 -0
- ttsforge/constants.py +164 -0
- ttsforge/conversion.py +1090 -0
- ttsforge/input_reader.py +408 -0
- ttsforge/kokoro_lang.py +12 -0
- ttsforge/kokoro_runner.py +125 -0
- ttsforge/name_extractor.py +305 -0
- ttsforge/phoneme_conversion.py +978 -0
- ttsforge/phonemes.py +486 -0
- ttsforge/ssmd_generator.py +422 -0
- ttsforge/utils.py +785 -0
- ttsforge/vocab/__init__.py +139 -0
- ttsforge-0.1.0.dist-info/METADATA +659 -0
- ttsforge-0.1.0.dist-info/RECORD +27 -0
- ttsforge-0.1.0.dist-info/WHEEL +5 -0
- ttsforge-0.1.0.dist-info/entry_points.txt +2 -0
- ttsforge-0.1.0.dist-info/licenses/LICENSE +21 -0
- ttsforge-0.1.0.dist-info/top_level.txt +1 -0
ttsforge/conversion.py
ADDED
|
@@ -0,0 +1,1090 @@
|
|
|
1
|
+
"""TTS conversion module for ttsforge - converts text/EPUB to audiobooks."""
|
|
2
|
+
|
|
3
|
+
import hashlib
|
|
4
|
+
import json
|
|
5
|
+
import re
|
|
6
|
+
import threading
|
|
7
|
+
import time
|
|
8
|
+
from collections.abc import Callable
|
|
9
|
+
from dataclasses import dataclass, field
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Any, Literal, Optional, cast
|
|
12
|
+
|
|
13
|
+
import soundfile as sf
|
|
14
|
+
|
|
15
|
+
from .audio_merge import AudioMerger, MergeMeta
|
|
16
|
+
from .constants import (
|
|
17
|
+
DEFAULT_VOICE_FOR_LANG,
|
|
18
|
+
ISO_TO_LANG_CODE,
|
|
19
|
+
SAMPLE_RATE,
|
|
20
|
+
SUPPORTED_OUTPUT_FORMATS,
|
|
21
|
+
VOICE_PREFIX_TO_LANG,
|
|
22
|
+
)
|
|
23
|
+
from .kokoro_lang import get_onnx_lang_code
|
|
24
|
+
from .kokoro_runner import KokoroRunner, KokoroRunOptions
|
|
25
|
+
from .ssmd_generator import (
|
|
26
|
+
SSMDGenerationError,
|
|
27
|
+
chapter_to_ssmd,
|
|
28
|
+
load_ssmd_file,
|
|
29
|
+
save_ssmd_file,
|
|
30
|
+
)
|
|
31
|
+
from .utils import (
|
|
32
|
+
atomic_write_json,
|
|
33
|
+
format_duration,
|
|
34
|
+
format_filename_template,
|
|
35
|
+
load_phoneme_dictionary,
|
|
36
|
+
prevent_sleep_end,
|
|
37
|
+
prevent_sleep_start,
|
|
38
|
+
sanitize_filename,
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@dataclass
|
|
43
|
+
class Chapter:
|
|
44
|
+
"""Represents a chapter from an EPUB or text file."""
|
|
45
|
+
|
|
46
|
+
title: str
|
|
47
|
+
content: str
|
|
48
|
+
index: int = 0
|
|
49
|
+
html_content: str | None = None # Optional HTML for emphasis detection
|
|
50
|
+
is_ssmd: bool = False
|
|
51
|
+
|
|
52
|
+
@property
|
|
53
|
+
def char_count(self) -> int:
|
|
54
|
+
return len(self.content)
|
|
55
|
+
|
|
56
|
+
@property
|
|
57
|
+
def text(self) -> str:
|
|
58
|
+
"""Alias for content to maintain compatibility with input_reader.Chapter."""
|
|
59
|
+
return self.content
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
@dataclass
|
|
63
|
+
class ConversionProgress:
|
|
64
|
+
"""Progress information during conversion."""
|
|
65
|
+
|
|
66
|
+
current_chapter: int = 0
|
|
67
|
+
total_chapters: int = 0
|
|
68
|
+
chapter_name: str = ""
|
|
69
|
+
chars_processed: int = 0
|
|
70
|
+
total_chars: int = 0
|
|
71
|
+
current_text: str = ""
|
|
72
|
+
elapsed_time: float = 0.0
|
|
73
|
+
estimated_remaining: float = 0.0
|
|
74
|
+
|
|
75
|
+
@property
|
|
76
|
+
def percent(self) -> int:
|
|
77
|
+
if self.total_chars == 0:
|
|
78
|
+
return 0
|
|
79
|
+
return min(int(self.chars_processed / self.total_chars * 100), 99)
|
|
80
|
+
|
|
81
|
+
@property
|
|
82
|
+
def etr_formatted(self) -> str:
|
|
83
|
+
return format_duration(self.estimated_remaining)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
@dataclass
|
|
87
|
+
class ConversionResult:
|
|
88
|
+
"""Result of a conversion operation."""
|
|
89
|
+
|
|
90
|
+
success: bool
|
|
91
|
+
output_path: Path | None = None
|
|
92
|
+
subtitle_path: Path | None = None
|
|
93
|
+
error_message: str | None = None
|
|
94
|
+
chapters_dir: Path | None = None
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
@dataclass
|
|
98
|
+
class ChapterState:
|
|
99
|
+
"""State of a single chapter conversion."""
|
|
100
|
+
|
|
101
|
+
index: int
|
|
102
|
+
title: str
|
|
103
|
+
content_hash: str # Hash of chapter content for integrity check
|
|
104
|
+
completed: bool = False
|
|
105
|
+
audio_file: str | None = None # Relative path to chapter audio
|
|
106
|
+
duration: float = 0.0 # Duration in seconds
|
|
107
|
+
char_count: int = 0
|
|
108
|
+
ssmd_file: str | None = None # Relative path to SSMD file
|
|
109
|
+
ssmd_hash: str | None = None # Hash of SSMD content for change detection
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
@dataclass
|
|
113
|
+
class ConversionState:
|
|
114
|
+
"""Persistent state for resumable conversions."""
|
|
115
|
+
|
|
116
|
+
version: int = 1
|
|
117
|
+
source_file: str = ""
|
|
118
|
+
source_hash: str = "" # Hash of source file for change detection
|
|
119
|
+
output_file: str = ""
|
|
120
|
+
work_dir: str = ""
|
|
121
|
+
voice: str = ""
|
|
122
|
+
language: str = ""
|
|
123
|
+
speed: float = 1.0
|
|
124
|
+
split_mode: str = "auto"
|
|
125
|
+
output_format: str = "m4b"
|
|
126
|
+
silence_between_chapters: float = 2.0
|
|
127
|
+
pause_clause: float = 0.25
|
|
128
|
+
pause_sentence: float = 0.2
|
|
129
|
+
pause_paragraph: float = 0.75
|
|
130
|
+
pause_variance: float = 0.05
|
|
131
|
+
pause_mode: str = "auto" # "tts", "manual", or "auto
|
|
132
|
+
lang: str | None = None # Language override for phonemization
|
|
133
|
+
chapters: list[ChapterState] = field(default_factory=list)
|
|
134
|
+
started_at: str = ""
|
|
135
|
+
last_updated: str = ""
|
|
136
|
+
|
|
137
|
+
@classmethod
|
|
138
|
+
def load(cls, state_file: Path) -> Optional["ConversionState"]:
|
|
139
|
+
"""Load state from a JSON file."""
|
|
140
|
+
if not state_file.exists():
|
|
141
|
+
return None
|
|
142
|
+
try:
|
|
143
|
+
with open(state_file, encoding="utf-8") as f:
|
|
144
|
+
data = json.load(f)
|
|
145
|
+
|
|
146
|
+
# Reconstruct ChapterState objects
|
|
147
|
+
chapters = [ChapterState(**ch) for ch in data.get("chapters", [])]
|
|
148
|
+
data["chapters"] = chapters
|
|
149
|
+
|
|
150
|
+
# Handle missing fields for backward compatibility
|
|
151
|
+
if "silence_between_chapters" not in data:
|
|
152
|
+
data["silence_between_chapters"] = 2.0
|
|
153
|
+
|
|
154
|
+
# Migrate old pause parameters to new system
|
|
155
|
+
if "segment_pause_min" in data or "segment_pause_max" in data:
|
|
156
|
+
seg_min = data.get("segment_pause_min", 0.1)
|
|
157
|
+
seg_max = data.get("segment_pause_max", 0.3)
|
|
158
|
+
data["pause_sentence"] = (seg_min + seg_max) / 2.0
|
|
159
|
+
if "pause_variance" not in data:
|
|
160
|
+
data["pause_variance"] = max(0.01, (seg_max - seg_min) / 4.0)
|
|
161
|
+
|
|
162
|
+
if "paragraph_pause_min" in data or "paragraph_pause_max" in data:
|
|
163
|
+
para_min = data.get("paragraph_pause_min", 0.5)
|
|
164
|
+
para_max = data.get("paragraph_pause_max", 1.0)
|
|
165
|
+
data["pause_paragraph"] = (para_min + para_max) / 2.0
|
|
166
|
+
|
|
167
|
+
for legacy_key in (
|
|
168
|
+
"segment_pause_min",
|
|
169
|
+
"segment_pause_max",
|
|
170
|
+
"paragraph_pause_min",
|
|
171
|
+
"paragraph_pause_max",
|
|
172
|
+
):
|
|
173
|
+
data.pop(legacy_key, None)
|
|
174
|
+
|
|
175
|
+
# Set defaults for new parameters
|
|
176
|
+
if "pause_clause" not in data:
|
|
177
|
+
data["pause_clause"] = 0.25
|
|
178
|
+
if "pause_sentence" not in data:
|
|
179
|
+
data["pause_sentence"] = 0.2
|
|
180
|
+
if "pause_paragraph" not in data:
|
|
181
|
+
data["pause_paragraph"] = 0.75
|
|
182
|
+
if "pause_variance" not in data:
|
|
183
|
+
data["pause_variance"] = 0.05
|
|
184
|
+
if "pause_mode" not in data:
|
|
185
|
+
data["pause_mode"] = "auto"
|
|
186
|
+
if "lang" not in data:
|
|
187
|
+
data["lang"] = None
|
|
188
|
+
|
|
189
|
+
return cls(**data)
|
|
190
|
+
except (json.JSONDecodeError, TypeError, KeyError):
|
|
191
|
+
return None
|
|
192
|
+
|
|
193
|
+
def save(self, state_file: Path) -> None:
|
|
194
|
+
"""Save state to a JSON file."""
|
|
195
|
+
self.last_updated = time.strftime("%Y-%m-%d %H:%M:%S")
|
|
196
|
+
data = {
|
|
197
|
+
"version": self.version,
|
|
198
|
+
"source_file": self.source_file,
|
|
199
|
+
"source_hash": self.source_hash,
|
|
200
|
+
"output_file": self.output_file,
|
|
201
|
+
"work_dir": self.work_dir,
|
|
202
|
+
"voice": self.voice,
|
|
203
|
+
"language": self.language,
|
|
204
|
+
"speed": self.speed,
|
|
205
|
+
"split_mode": self.split_mode,
|
|
206
|
+
"output_format": self.output_format,
|
|
207
|
+
"silence_between_chapters": self.silence_between_chapters,
|
|
208
|
+
"pause_clause": self.pause_clause,
|
|
209
|
+
"pause_sentence": self.pause_sentence,
|
|
210
|
+
"pause_paragraph": self.pause_paragraph,
|
|
211
|
+
"pause_variance": self.pause_variance,
|
|
212
|
+
"pause_mode": self.pause_mode,
|
|
213
|
+
"lang": self.lang,
|
|
214
|
+
"chapters": [
|
|
215
|
+
{
|
|
216
|
+
"index": ch.index,
|
|
217
|
+
"title": ch.title,
|
|
218
|
+
"content_hash": ch.content_hash,
|
|
219
|
+
"completed": ch.completed,
|
|
220
|
+
"audio_file": ch.audio_file,
|
|
221
|
+
"duration": ch.duration,
|
|
222
|
+
"char_count": ch.char_count,
|
|
223
|
+
"ssmd_file": ch.ssmd_file,
|
|
224
|
+
"ssmd_hash": ch.ssmd_hash,
|
|
225
|
+
}
|
|
226
|
+
for ch in self.chapters
|
|
227
|
+
],
|
|
228
|
+
"started_at": self.started_at,
|
|
229
|
+
"last_updated": self.last_updated,
|
|
230
|
+
}
|
|
231
|
+
atomic_write_json(state_file, data, indent=2, ensure_ascii=True)
|
|
232
|
+
|
|
233
|
+
def get_completed_count(self) -> int:
|
|
234
|
+
"""Get the number of completed chapters."""
|
|
235
|
+
return sum(1 for ch in self.chapters if ch.completed)
|
|
236
|
+
|
|
237
|
+
def get_next_incomplete_index(self) -> int | None:
|
|
238
|
+
"""Get the index of the next incomplete chapter."""
|
|
239
|
+
for ch in self.chapters:
|
|
240
|
+
if not ch.completed:
|
|
241
|
+
return ch.index
|
|
242
|
+
return None
|
|
243
|
+
|
|
244
|
+
def is_complete(self) -> bool:
|
|
245
|
+
"""Check if all chapters are completed."""
|
|
246
|
+
return all(ch.completed for ch in self.chapters)
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
def _hash_content(content: str) -> str:
|
|
250
|
+
"""Generate a hash of content for integrity checking."""
|
|
251
|
+
return hashlib.md5(content.encode("utf-8")).hexdigest()[:12]
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
def _hash_file(file_path: Path) -> str:
|
|
255
|
+
"""Generate a hash of a file for change detection."""
|
|
256
|
+
if not file_path.exists():
|
|
257
|
+
return ""
|
|
258
|
+
hasher = hashlib.md5()
|
|
259
|
+
with open(file_path, "rb") as f:
|
|
260
|
+
for chunk in iter(lambda: f.read(8192), b""):
|
|
261
|
+
hasher.update(chunk)
|
|
262
|
+
return hasher.hexdigest()[:12]
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
# Split mode options
|
|
266
|
+
SPLIT_MODES = ["auto", "line", "paragraph", "sentence", "clause"]
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
@dataclass
|
|
270
|
+
class ConversionOptions:
|
|
271
|
+
"""Options for TTS conversion."""
|
|
272
|
+
|
|
273
|
+
voice: str = "af_bella"
|
|
274
|
+
language: str = "a"
|
|
275
|
+
speed: float = 1.0
|
|
276
|
+
output_format: str = "m4b"
|
|
277
|
+
output_dir: Path | None = None
|
|
278
|
+
use_gpu: bool = False # GPU requires onnxruntime-gpu
|
|
279
|
+
silence_between_chapters: float = 2.0
|
|
280
|
+
# Language override for phonemization (e.g., 'de', 'en-us', 'fr')
|
|
281
|
+
# If None, language is determined from voice prefix
|
|
282
|
+
lang: str | None = None
|
|
283
|
+
# Mixed-language support (auto-detect and handle multiple languages)
|
|
284
|
+
use_mixed_language: bool = False
|
|
285
|
+
mixed_language_primary: str | None = None
|
|
286
|
+
mixed_language_allowed: list[str] | None = None
|
|
287
|
+
mixed_language_confidence: float = 0.7
|
|
288
|
+
# Custom phoneme dictionary for pronunciation overrides
|
|
289
|
+
phoneme_dictionary_path: str | None = None
|
|
290
|
+
phoneme_dict_case_sensitive: bool = False
|
|
291
|
+
# Pause settings (pykokoro built-in pause handling)
|
|
292
|
+
pause_clause: float = 0.25 # For clause boundaries (commas)
|
|
293
|
+
pause_sentence: float = 0.2 # For sentence boundaries
|
|
294
|
+
pause_paragraph: float = 0.75 # For paragraph boundaries
|
|
295
|
+
pause_variance: float = 0.05 # Standard deviation for natural variation
|
|
296
|
+
pause_mode: str = "auto" # "tts", "manual", or "auto
|
|
297
|
+
# Chapter announcement settings
|
|
298
|
+
announce_chapters: bool = True # Read chapter titles aloud before content
|
|
299
|
+
chapter_pause_after_title: float = 2.0 # Pause after chapter title (seconds)
|
|
300
|
+
save_chapters_separately: bool = False
|
|
301
|
+
merge_at_end: bool = True
|
|
302
|
+
# Split mode: auto, line, paragraph, sentence, clause
|
|
303
|
+
split_mode: str = "auto"
|
|
304
|
+
# Resume capability
|
|
305
|
+
resume: bool = True # Enable resume by default for long conversions
|
|
306
|
+
keep_chapter_files: bool = False # Keep individual chapter files after merge
|
|
307
|
+
# Metadata for m4b
|
|
308
|
+
title: str | None = None
|
|
309
|
+
author: str | None = None
|
|
310
|
+
cover_image: Path | None = None
|
|
311
|
+
# Voice blending (e.g., "af_nicole:50,am_michael:50")
|
|
312
|
+
voice_blend: str | None = None
|
|
313
|
+
# Voice database for custom/synthetic voices
|
|
314
|
+
voice_database: Path | None = None
|
|
315
|
+
# Filename template for chapter files
|
|
316
|
+
chapter_filename_template: str = "{chapter_num:03d}_{book_title}_{chapter_title}"
|
|
317
|
+
# Custom ONNX model path (None = use default downloaded model)
|
|
318
|
+
model_path: Path | None = None
|
|
319
|
+
# Custom voices.bin path (None = use default downloaded voices)
|
|
320
|
+
voices_path: Path | None = None
|
|
321
|
+
# SSMD generation control
|
|
322
|
+
generate_ssmd_only: bool = False # If True, only generate SSMD files, no audio
|
|
323
|
+
detect_emphasis: bool = False # If True, detect emphasis from HTML tags in EPUB
|
|
324
|
+
|
|
325
|
+
|
|
326
|
+
# Pattern to detect chapter markers in text
|
|
327
|
+
CHAPTER_PATTERN = re.compile(
|
|
328
|
+
r"(?:^|\n)\s*(?:"
|
|
329
|
+
r"(?:Chapter|CHAPTER|Ch\.?|Kapitel|Chapitre|Capitulo|Capitolo)\s*"
|
|
330
|
+
r"(?:[IVXLCDM]+|\d+)"
|
|
331
|
+
r"(?:\s*[:\-\.\s]\s*.*)?"
|
|
332
|
+
r"|"
|
|
333
|
+
r"(?:Prologue|PROLOGUE|Epilogue|EPILOGUE|Introduction|INTRODUCTION)"
|
|
334
|
+
r"(?:\s*[:\-\.\s]\s*.*)?"
|
|
335
|
+
r")\s*(?:\n|$)",
|
|
336
|
+
re.MULTILINE | re.IGNORECASE,
|
|
337
|
+
)
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
def detect_language_from_iso(iso_code: str | None) -> str:
|
|
341
|
+
"""Convert ISO language code to ttsforge language code."""
|
|
342
|
+
if not iso_code:
|
|
343
|
+
return "a" # Default to American English
|
|
344
|
+
iso_lower = iso_code.lower().strip()
|
|
345
|
+
return ISO_TO_LANG_CODE.get(iso_lower, ISO_TO_LANG_CODE.get(iso_lower[:2], "a"))
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
def get_voice_language(voice: str) -> str:
|
|
349
|
+
"""Get the language code from a voice name."""
|
|
350
|
+
prefix = voice[:2] if len(voice) >= 2 else ""
|
|
351
|
+
return VOICE_PREFIX_TO_LANG.get(prefix, "a")
|
|
352
|
+
|
|
353
|
+
|
|
354
|
+
def get_default_voice_for_language(lang_code: str) -> str:
|
|
355
|
+
"""Get the default voice for a language."""
|
|
356
|
+
return DEFAULT_VOICE_FOR_LANG.get(lang_code, "af_bella")
|
|
357
|
+
|
|
358
|
+
|
|
359
|
+
class TTSConverter:
|
|
360
|
+
"""Converts text to speech using Kokoro ONNX TTS."""
|
|
361
|
+
|
|
362
|
+
def __init__(
|
|
363
|
+
self,
|
|
364
|
+
options: ConversionOptions,
|
|
365
|
+
progress_callback: Callable[[ConversionProgress], None] | None = None,
|
|
366
|
+
log_callback: Callable[[str, str], None] | None = None,
|
|
367
|
+
) -> None:
|
|
368
|
+
"""
|
|
369
|
+
Initialize the TTS converter.
|
|
370
|
+
|
|
371
|
+
Args:
|
|
372
|
+
options: Conversion options
|
|
373
|
+
progress_callback: Called with progress updates
|
|
374
|
+
log_callback: Called with log messages (message, level)
|
|
375
|
+
"""
|
|
376
|
+
self.options = options
|
|
377
|
+
self.progress_callback = progress_callback
|
|
378
|
+
self.log_callback = log_callback
|
|
379
|
+
self._cancel_event = threading.Event()
|
|
380
|
+
self._runner: KokoroRunner | None = None
|
|
381
|
+
self._merger = AudioMerger(log=self.log)
|
|
382
|
+
|
|
383
|
+
@property
|
|
384
|
+
def _cancelled(self) -> bool:
|
|
385
|
+
return self._cancel_event.is_set()
|
|
386
|
+
|
|
387
|
+
def log(self, message: str, level: str = "info") -> None:
|
|
388
|
+
"""Log a message."""
|
|
389
|
+
if self.log_callback:
|
|
390
|
+
self.log_callback(message, level)
|
|
391
|
+
|
|
392
|
+
def cancel(self) -> None:
|
|
393
|
+
"""Request cancellation of the conversion."""
|
|
394
|
+
self._cancel_event.set()
|
|
395
|
+
|
|
396
|
+
def _init_runner(self) -> None:
|
|
397
|
+
"""Initialize the Kokoro runner."""
|
|
398
|
+
if self._runner is not None:
|
|
399
|
+
return
|
|
400
|
+
|
|
401
|
+
self.log("Initializing ONNX TTS pipeline...")
|
|
402
|
+
|
|
403
|
+
# Create TokenizerConfig from ConversionOptions (for mixed-language support)
|
|
404
|
+
from pykokoro.tokenizer import TokenizerConfig
|
|
405
|
+
|
|
406
|
+
tokenizer_config = TokenizerConfig(
|
|
407
|
+
use_mixed_language=self.options.use_mixed_language,
|
|
408
|
+
mixed_language_primary=self.options.mixed_language_primary,
|
|
409
|
+
mixed_language_allowed=self.options.mixed_language_allowed,
|
|
410
|
+
mixed_language_confidence=self.options.mixed_language_confidence,
|
|
411
|
+
phoneme_dictionary_path=self.options.phoneme_dictionary_path,
|
|
412
|
+
phoneme_dict_case_sensitive=self.options.phoneme_dict_case_sensitive,
|
|
413
|
+
)
|
|
414
|
+
|
|
415
|
+
opts = KokoroRunOptions(
|
|
416
|
+
voice=self.options.voice,
|
|
417
|
+
speed=self.options.speed,
|
|
418
|
+
use_gpu=self.options.use_gpu,
|
|
419
|
+
pause_clause=self.options.pause_clause,
|
|
420
|
+
pause_sentence=self.options.pause_sentence,
|
|
421
|
+
pause_paragraph=self.options.pause_paragraph,
|
|
422
|
+
pause_variance=self.options.pause_variance,
|
|
423
|
+
model_path=self.options.model_path,
|
|
424
|
+
voices_path=self.options.voices_path,
|
|
425
|
+
voice_blend=self.options.voice_blend,
|
|
426
|
+
voice_database=self.options.voice_database,
|
|
427
|
+
tokenizer_config=tokenizer_config,
|
|
428
|
+
)
|
|
429
|
+
self._runner = KokoroRunner(opts, log=self.log)
|
|
430
|
+
self._runner.ensure_ready()
|
|
431
|
+
|
|
432
|
+
def _build_ssmd_content(
|
|
433
|
+
self,
|
|
434
|
+
chapter: Chapter,
|
|
435
|
+
phoneme_dict: dict[str, str] | None,
|
|
436
|
+
mixed_language_config: dict[str, Any] | None,
|
|
437
|
+
html_content: str | None,
|
|
438
|
+
) -> str:
|
|
439
|
+
"""Generate SSMD content for a chapter, falling back to plain text."""
|
|
440
|
+
try:
|
|
441
|
+
return chapter_to_ssmd(
|
|
442
|
+
chapter_title=chapter.title,
|
|
443
|
+
chapter_text=chapter.text,
|
|
444
|
+
phoneme_dict=phoneme_dict,
|
|
445
|
+
phoneme_dict_case_sensitive=self.options.phoneme_dict_case_sensitive,
|
|
446
|
+
mixed_language_config=mixed_language_config,
|
|
447
|
+
html_content=html_content,
|
|
448
|
+
include_title=self.options.announce_chapters,
|
|
449
|
+
)
|
|
450
|
+
except SSMDGenerationError as e:
|
|
451
|
+
self.log(f"SSMD generation failed: {e}, using plain text", "error")
|
|
452
|
+
return chapter.text
|
|
453
|
+
|
|
454
|
+
def _load_or_generate_ssmd(
|
|
455
|
+
self,
|
|
456
|
+
chapter: Chapter,
|
|
457
|
+
ssmd_file: Path,
|
|
458
|
+
phoneme_dict: dict[str, str] | None,
|
|
459
|
+
mixed_language_config: dict[str, Any] | None,
|
|
460
|
+
html_content: str | None,
|
|
461
|
+
) -> tuple[str, str]:
|
|
462
|
+
"""Load SSMD from disk or generate and save it."""
|
|
463
|
+
ssmd_content: str | None = None
|
|
464
|
+
ssmd_hash = ""
|
|
465
|
+
|
|
466
|
+
if chapter.is_ssmd:
|
|
467
|
+
if ssmd_file.exists():
|
|
468
|
+
try:
|
|
469
|
+
ssmd_content, ssmd_hash = load_ssmd_file(ssmd_file)
|
|
470
|
+
self.log(f"Loaded SSMD from {ssmd_file.name}")
|
|
471
|
+
except SSMDGenerationError as e:
|
|
472
|
+
self.log(f"Failed to load SSMD: {e}, using input", "warning")
|
|
473
|
+
ssmd_content = None
|
|
474
|
+
|
|
475
|
+
if ssmd_content is None:
|
|
476
|
+
ssmd_content = chapter.text
|
|
477
|
+
ssmd_hash = save_ssmd_file(ssmd_content, ssmd_file)
|
|
478
|
+
self.log(f"Saved SSMD to {ssmd_file.name}")
|
|
479
|
+
|
|
480
|
+
return ssmd_content, ssmd_hash
|
|
481
|
+
|
|
482
|
+
if ssmd_file.exists():
|
|
483
|
+
try:
|
|
484
|
+
ssmd_content, ssmd_hash = load_ssmd_file(ssmd_file)
|
|
485
|
+
self.log(f"Loaded SSMD from {ssmd_file.name}")
|
|
486
|
+
except SSMDGenerationError as e:
|
|
487
|
+
self.log(f"Failed to load SSMD: {e}, regenerating...", "warning")
|
|
488
|
+
ssmd_content = None
|
|
489
|
+
|
|
490
|
+
if ssmd_content is None:
|
|
491
|
+
self.log(f"Generating SSMD for chapter: {chapter.title}")
|
|
492
|
+
ssmd_content = self._build_ssmd_content(
|
|
493
|
+
chapter,
|
|
494
|
+
phoneme_dict=phoneme_dict,
|
|
495
|
+
mixed_language_config=mixed_language_config,
|
|
496
|
+
html_content=html_content,
|
|
497
|
+
)
|
|
498
|
+
ssmd_hash = save_ssmd_file(ssmd_content, ssmd_file)
|
|
499
|
+
self.log(f"Saved SSMD to {ssmd_file.name}")
|
|
500
|
+
|
|
501
|
+
return ssmd_content, ssmd_hash
|
|
502
|
+
|
|
503
|
+
def _render_chapter_wav(
|
|
504
|
+
self,
|
|
505
|
+
chapter: Chapter,
|
|
506
|
+
output_file: Path,
|
|
507
|
+
ssmd_content: str,
|
|
508
|
+
) -> float:
|
|
509
|
+
"""Render SSMD content to a chapter WAV file."""
|
|
510
|
+
effective_lang = (
|
|
511
|
+
self.options.lang if self.options.lang else self.options.language
|
|
512
|
+
)
|
|
513
|
+
lang_code = get_onnx_lang_code(effective_lang)
|
|
514
|
+
|
|
515
|
+
with sf.SoundFile(
|
|
516
|
+
str(output_file),
|
|
517
|
+
"w",
|
|
518
|
+
samplerate=SAMPLE_RATE,
|
|
519
|
+
channels=1,
|
|
520
|
+
format="wav",
|
|
521
|
+
) as out_file:
|
|
522
|
+
assert self._runner is not None
|
|
523
|
+
samples = self._runner.synthesize(
|
|
524
|
+
ssmd_content,
|
|
525
|
+
lang_code=lang_code,
|
|
526
|
+
pause_mode=cast(
|
|
527
|
+
Literal["tts", "manual", "auto"], self.options.pause_mode
|
|
528
|
+
),
|
|
529
|
+
is_phonemes=False,
|
|
530
|
+
)
|
|
531
|
+
out_file.write(samples)
|
|
532
|
+
|
|
533
|
+
return len(samples) / SAMPLE_RATE
|
|
534
|
+
|
|
535
|
+
def convert_chapters_resumable( # noqa: C901 - Complex but necessary for resume logic
|
|
536
|
+
self,
|
|
537
|
+
chapters: list[Chapter],
|
|
538
|
+
output_path: Path,
|
|
539
|
+
source_file: Path | None = None,
|
|
540
|
+
resume: bool = True,
|
|
541
|
+
) -> ConversionResult:
|
|
542
|
+
"""
|
|
543
|
+
Convert chapters to audio with resume capability.
|
|
544
|
+
|
|
545
|
+
Each chapter is saved as a separate WAV file, allowing conversion
|
|
546
|
+
to be resumed if interrupted. A state file tracks progress.
|
|
547
|
+
|
|
548
|
+
Args:
|
|
549
|
+
chapters: List of Chapter objects
|
|
550
|
+
output_path: Output file path
|
|
551
|
+
source_file: Original source file (for state tracking)
|
|
552
|
+
resume: Whether to resume from previous state
|
|
553
|
+
|
|
554
|
+
Returns:
|
|
555
|
+
ConversionResult with success status and paths
|
|
556
|
+
"""
|
|
557
|
+
if not chapters:
|
|
558
|
+
return ConversionResult(
|
|
559
|
+
success=False, error_message="No chapters to convert"
|
|
560
|
+
)
|
|
561
|
+
|
|
562
|
+
if self.options.output_format not in SUPPORTED_OUTPUT_FORMATS:
|
|
563
|
+
return ConversionResult(
|
|
564
|
+
success=False,
|
|
565
|
+
error_message=f"Unsupported format: {self.options.output_format}",
|
|
566
|
+
)
|
|
567
|
+
|
|
568
|
+
self._cancel_event.clear()
|
|
569
|
+
prevent_sleep_start()
|
|
570
|
+
|
|
571
|
+
try:
|
|
572
|
+
# Set up work directory for chapter files (use book title)
|
|
573
|
+
safe_book_title = sanitize_filename(self.options.title or output_path.stem)[
|
|
574
|
+
:50
|
|
575
|
+
]
|
|
576
|
+
work_dir = output_path.parent / f".{safe_book_title}_chapters"
|
|
577
|
+
work_dir.mkdir(parents=True, exist_ok=True)
|
|
578
|
+
state_file = work_dir / f"{safe_book_title}_state.json"
|
|
579
|
+
|
|
580
|
+
# Load or create state
|
|
581
|
+
state: ConversionState | None = None
|
|
582
|
+
if resume and state_file.exists():
|
|
583
|
+
state = ConversionState.load(state_file)
|
|
584
|
+
if state:
|
|
585
|
+
# Verify source file hasn't changed
|
|
586
|
+
source_hash = _hash_file(source_file) if source_file else ""
|
|
587
|
+
if source_file and state.source_hash != source_hash:
|
|
588
|
+
self.log(
|
|
589
|
+
"Source file changed, starting fresh conversion",
|
|
590
|
+
"warning",
|
|
591
|
+
)
|
|
592
|
+
state = None
|
|
593
|
+
# Verify chapter count matches
|
|
594
|
+
elif len(state.chapters) != len(chapters):
|
|
595
|
+
self.log(
|
|
596
|
+
f"Chapter count changed "
|
|
597
|
+
f"({len(state.chapters)} -> {len(chapters)}), "
|
|
598
|
+
"starting fresh conversion",
|
|
599
|
+
"warning",
|
|
600
|
+
)
|
|
601
|
+
state = None
|
|
602
|
+
else:
|
|
603
|
+
# Check if settings differ from saved state
|
|
604
|
+
settings_changed = (
|
|
605
|
+
state.voice != self.options.voice
|
|
606
|
+
or state.language != self.options.language
|
|
607
|
+
or state.speed != self.options.speed
|
|
608
|
+
or state.split_mode != self.options.split_mode
|
|
609
|
+
or state.silence_between_chapters
|
|
610
|
+
!= self.options.silence_between_chapters
|
|
611
|
+
or state.pause_clause != self.options.pause_clause
|
|
612
|
+
or state.pause_sentence != self.options.pause_sentence
|
|
613
|
+
or state.pause_paragraph != self.options.pause_paragraph
|
|
614
|
+
or state.pause_variance != self.options.pause_variance
|
|
615
|
+
or state.pause_mode != self.options.pause_mode
|
|
616
|
+
or state.lang != self.options.lang
|
|
617
|
+
)
|
|
618
|
+
|
|
619
|
+
if settings_changed:
|
|
620
|
+
self.log(
|
|
621
|
+
f"Restoring settings from previous session: "
|
|
622
|
+
f"voice={state.voice}, language={state.language}, "
|
|
623
|
+
f"lang_override={state.lang}, "
|
|
624
|
+
f"speed={state.speed}, "
|
|
625
|
+
f"split_mode={state.split_mode}, "
|
|
626
|
+
f"silence={state.silence_between_chapters}s, "
|
|
627
|
+
f"pauses: clause={state.pause_clause}s "
|
|
628
|
+
f"sent={state.pause_sentence}s "
|
|
629
|
+
f"para={state.pause_paragraph}s "
|
|
630
|
+
f"var={state.pause_variance}s "
|
|
631
|
+
f"pause_mode={state.pause_mode}",
|
|
632
|
+
"info",
|
|
633
|
+
)
|
|
634
|
+
|
|
635
|
+
# Apply saved settings to options for consistency
|
|
636
|
+
self.options.voice = state.voice
|
|
637
|
+
self.options.language = state.language
|
|
638
|
+
self.options.speed = state.speed
|
|
639
|
+
self.options.split_mode = state.split_mode
|
|
640
|
+
self.options.output_format = state.output_format
|
|
641
|
+
self.options.silence_between_chapters = (
|
|
642
|
+
state.silence_between_chapters
|
|
643
|
+
)
|
|
644
|
+
self.options.pause_clause = state.pause_clause
|
|
645
|
+
self.options.pause_sentence = state.pause_sentence
|
|
646
|
+
self.options.pause_paragraph = state.pause_paragraph
|
|
647
|
+
self.options.pause_variance = state.pause_variance
|
|
648
|
+
self.options.pause_mode = state.pause_mode
|
|
649
|
+
self.options.lang = state.lang
|
|
650
|
+
|
|
651
|
+
if state is None:
|
|
652
|
+
# Create new state
|
|
653
|
+
source_hash = _hash_file(source_file) if source_file else ""
|
|
654
|
+
state = ConversionState(
|
|
655
|
+
source_file=str(source_file) if source_file else "",
|
|
656
|
+
source_hash=source_hash,
|
|
657
|
+
output_file=str(output_path),
|
|
658
|
+
work_dir=str(work_dir),
|
|
659
|
+
voice=self.options.voice,
|
|
660
|
+
language=self.options.language,
|
|
661
|
+
speed=self.options.speed,
|
|
662
|
+
split_mode=self.options.split_mode,
|
|
663
|
+
output_format=self.options.output_format,
|
|
664
|
+
silence_between_chapters=self.options.silence_between_chapters,
|
|
665
|
+
pause_clause=self.options.pause_clause,
|
|
666
|
+
pause_sentence=self.options.pause_sentence,
|
|
667
|
+
pause_paragraph=self.options.pause_paragraph,
|
|
668
|
+
pause_variance=self.options.pause_variance,
|
|
669
|
+
pause_mode=self.options.pause_mode,
|
|
670
|
+
lang=self.options.lang,
|
|
671
|
+
chapters=[
|
|
672
|
+
ChapterState(
|
|
673
|
+
index=i,
|
|
674
|
+
title=ch.title,
|
|
675
|
+
content_hash=_hash_content(ch.content),
|
|
676
|
+
char_count=ch.char_count,
|
|
677
|
+
)
|
|
678
|
+
for i, ch in enumerate(chapters)
|
|
679
|
+
],
|
|
680
|
+
started_at=time.strftime("%Y-%m-%d %H:%M:%S"),
|
|
681
|
+
)
|
|
682
|
+
state.save(state_file)
|
|
683
|
+
else:
|
|
684
|
+
completed = state.get_completed_count()
|
|
685
|
+
total = len(chapters)
|
|
686
|
+
self.log(f"Resuming conversion: {completed}/{total} chapters completed")
|
|
687
|
+
|
|
688
|
+
# Initialize runner
|
|
689
|
+
self._init_runner()
|
|
690
|
+
|
|
691
|
+
phoneme_dict = None
|
|
692
|
+
if self.options.phoneme_dictionary_path:
|
|
693
|
+
phoneme_dict = load_phoneme_dictionary(
|
|
694
|
+
self.options.phoneme_dictionary_path,
|
|
695
|
+
case_sensitive=self.options.phoneme_dict_case_sensitive,
|
|
696
|
+
log_callback=lambda message: self.log(message, "warning"),
|
|
697
|
+
)
|
|
698
|
+
|
|
699
|
+
mixed_language_config = None
|
|
700
|
+
if self.options.use_mixed_language:
|
|
701
|
+
mixed_language_config = {
|
|
702
|
+
"use_mixed_language": True,
|
|
703
|
+
"primary": self.options.mixed_language_primary,
|
|
704
|
+
"allowed": self.options.mixed_language_allowed,
|
|
705
|
+
"confidence": self.options.mixed_language_confidence,
|
|
706
|
+
}
|
|
707
|
+
|
|
708
|
+
total_chars = sum(ch.char_count for ch in chapters)
|
|
709
|
+
# Account for already completed chapters
|
|
710
|
+
chars_already_done = sum(
|
|
711
|
+
state.chapters[i].char_count
|
|
712
|
+
for i in range(len(state.chapters))
|
|
713
|
+
if state.chapters[i].completed
|
|
714
|
+
)
|
|
715
|
+
chars_processed = chars_already_done
|
|
716
|
+
start_time = time.time()
|
|
717
|
+
|
|
718
|
+
progress = ConversionProgress(
|
|
719
|
+
total_chapters=len(chapters),
|
|
720
|
+
total_chars=total_chars,
|
|
721
|
+
chars_processed=chars_processed,
|
|
722
|
+
)
|
|
723
|
+
|
|
724
|
+
# Convert each chapter
|
|
725
|
+
for chapter_idx, chapter in enumerate(chapters):
|
|
726
|
+
if self._cancel_event.is_set():
|
|
727
|
+
state.save(state_file)
|
|
728
|
+
return ConversionResult(
|
|
729
|
+
success=False,
|
|
730
|
+
error_message="Cancelled",
|
|
731
|
+
chapters_dir=work_dir,
|
|
732
|
+
)
|
|
733
|
+
|
|
734
|
+
# Validate chapter index to prevent index errors
|
|
735
|
+
if chapter_idx >= len(state.chapters):
|
|
736
|
+
error_msg = (
|
|
737
|
+
f"Chapter index {chapter_idx} out of range. "
|
|
738
|
+
f"State has {len(state.chapters)} chapters "
|
|
739
|
+
f"but trying to access "
|
|
740
|
+
f"chapter {chapter_idx + 1}/{len(chapters)}. "
|
|
741
|
+
"This usually means the state file is corrupted. "
|
|
742
|
+
"Try using --fresh to start a new conversion."
|
|
743
|
+
)
|
|
744
|
+
return ConversionResult(
|
|
745
|
+
success=False,
|
|
746
|
+
error_message=error_msg,
|
|
747
|
+
)
|
|
748
|
+
|
|
749
|
+
chapter_state = state.chapters[chapter_idx]
|
|
750
|
+
|
|
751
|
+
# Check if SSMD file was manually edited
|
|
752
|
+
ssmd_edited = False
|
|
753
|
+
if chapter_state.ssmd_file and chapter_state.ssmd_hash:
|
|
754
|
+
ssmd_path = work_dir / chapter_state.ssmd_file
|
|
755
|
+
if ssmd_path.exists():
|
|
756
|
+
try:
|
|
757
|
+
_, current_hash = load_ssmd_file(ssmd_path)
|
|
758
|
+
if current_hash != chapter_state.ssmd_hash:
|
|
759
|
+
self.log(
|
|
760
|
+
f"Chapter {chapter_idx + 1} SSMD file was edited, "
|
|
761
|
+
"will regenerate audio",
|
|
762
|
+
"info",
|
|
763
|
+
)
|
|
764
|
+
ssmd_edited = True
|
|
765
|
+
chapter_state.completed = False
|
|
766
|
+
except SSMDGenerationError:
|
|
767
|
+
# SSMD file corrupted, will regenerate
|
|
768
|
+
ssmd_edited = True
|
|
769
|
+
chapter_state.completed = False
|
|
770
|
+
|
|
771
|
+
# Skip already completed chapters (unless SSMD was edited)
|
|
772
|
+
if (
|
|
773
|
+
chapter_state.completed
|
|
774
|
+
and chapter_state.audio_file
|
|
775
|
+
and not ssmd_edited
|
|
776
|
+
):
|
|
777
|
+
chapter_file = work_dir / chapter_state.audio_file
|
|
778
|
+
if chapter_file.exists():
|
|
779
|
+
ch_num = chapter_idx + 1
|
|
780
|
+
self.log(
|
|
781
|
+
f"Skipping completed chapter {ch_num}: {chapter.title}"
|
|
782
|
+
)
|
|
783
|
+
continue
|
|
784
|
+
else:
|
|
785
|
+
# File missing, need to reconvert
|
|
786
|
+
chapter_state.completed = False
|
|
787
|
+
|
|
788
|
+
progress.current_chapter = chapter_idx + 1
|
|
789
|
+
progress.chapter_name = chapter.title
|
|
790
|
+
|
|
791
|
+
ch_num = chapter_idx + 1
|
|
792
|
+
self.log(
|
|
793
|
+
f"Converting chapter {ch_num}/{len(chapters)}: {chapter.title}"
|
|
794
|
+
)
|
|
795
|
+
|
|
796
|
+
# Generate chapter filename using template
|
|
797
|
+
chapter_filename = (
|
|
798
|
+
format_filename_template(
|
|
799
|
+
self.options.chapter_filename_template,
|
|
800
|
+
book_title=self.options.title or "Untitled",
|
|
801
|
+
chapter_title=chapter.title,
|
|
802
|
+
chapter_num=chapter_idx + 1,
|
|
803
|
+
)
|
|
804
|
+
+ ".wav"
|
|
805
|
+
)
|
|
806
|
+
chapter_file = work_dir / chapter_filename
|
|
807
|
+
|
|
808
|
+
# Generate SSMD filename (same as WAV but with .ssmd extension)
|
|
809
|
+
ssmd_filename = chapter_filename.replace(".wav", ".ssmd")
|
|
810
|
+
ssmd_file = work_dir / ssmd_filename
|
|
811
|
+
html_content = (
|
|
812
|
+
chapter.html_content if self.options.detect_emphasis else None
|
|
813
|
+
)
|
|
814
|
+
ssmd_content, ssmd_hash = self._load_or_generate_ssmd(
|
|
815
|
+
chapter,
|
|
816
|
+
ssmd_file,
|
|
817
|
+
phoneme_dict=phoneme_dict,
|
|
818
|
+
mixed_language_config=mixed_language_config,
|
|
819
|
+
html_content=html_content,
|
|
820
|
+
)
|
|
821
|
+
|
|
822
|
+
# If generate_ssmd_only mode, just generate SSMD and skip audio
|
|
823
|
+
if self.options.generate_ssmd_only:
|
|
824
|
+
chapter_state.completed = True
|
|
825
|
+
chapter_state.ssmd_file = ssmd_filename
|
|
826
|
+
chapter_state.ssmd_hash = ssmd_hash
|
|
827
|
+
state.save(state_file)
|
|
828
|
+
|
|
829
|
+
chars_processed += chapter.char_count
|
|
830
|
+
progress.chars_processed = chars_processed
|
|
831
|
+
if self.progress_callback:
|
|
832
|
+
self.progress_callback(progress)
|
|
833
|
+
continue
|
|
834
|
+
|
|
835
|
+
duration = self._render_chapter_wav(
|
|
836
|
+
chapter,
|
|
837
|
+
chapter_file,
|
|
838
|
+
ssmd_content,
|
|
839
|
+
)
|
|
840
|
+
|
|
841
|
+
if self._cancel_event.is_set():
|
|
842
|
+
# Remove incomplete files
|
|
843
|
+
chapter_file.unlink(missing_ok=True)
|
|
844
|
+
ssmd_file.unlink(missing_ok=True)
|
|
845
|
+
state.save(state_file)
|
|
846
|
+
return ConversionResult(
|
|
847
|
+
success=False,
|
|
848
|
+
error_message="Cancelled",
|
|
849
|
+
chapters_dir=work_dir,
|
|
850
|
+
)
|
|
851
|
+
|
|
852
|
+
# Update state
|
|
853
|
+
chapter_state.completed = True
|
|
854
|
+
chapter_state.audio_file = chapter_filename
|
|
855
|
+
chapter_state.ssmd_file = ssmd_filename
|
|
856
|
+
chapter_state.ssmd_hash = ssmd_hash
|
|
857
|
+
chapter_state.duration = duration
|
|
858
|
+
state.save(state_file)
|
|
859
|
+
|
|
860
|
+
# Update progress
|
|
861
|
+
chars_processed += chapter.char_count
|
|
862
|
+
progress.chars_processed = chars_processed
|
|
863
|
+
progress.current_text = (
|
|
864
|
+
f"Completed chapter: {chapter.title or 'Untitled'}"
|
|
865
|
+
)
|
|
866
|
+
elapsed = time.time() - start_time
|
|
867
|
+
if chars_processed > chars_already_done and elapsed > 0.5:
|
|
868
|
+
chars_in_session = chars_processed - chars_already_done
|
|
869
|
+
avg_time = elapsed / chars_in_session
|
|
870
|
+
remaining = total_chars - chars_processed
|
|
871
|
+
progress.estimated_remaining = avg_time * remaining
|
|
872
|
+
progress.elapsed_time = elapsed
|
|
873
|
+
|
|
874
|
+
if self.progress_callback:
|
|
875
|
+
self.progress_callback(progress)
|
|
876
|
+
|
|
877
|
+
# If generate_ssmd_only mode, exit here without merging
|
|
878
|
+
if self.options.generate_ssmd_only:
|
|
879
|
+
self.log("SSMD generation complete!")
|
|
880
|
+
self.log(f"SSMD files saved in: {work_dir}")
|
|
881
|
+
return ConversionResult(
|
|
882
|
+
success=True,
|
|
883
|
+
chapters_dir=work_dir,
|
|
884
|
+
output_path=None, # No audio output in SSMD-only mode
|
|
885
|
+
)
|
|
886
|
+
|
|
887
|
+
# All chapters completed, merge into final output
|
|
888
|
+
self.log("Merging chapters into final audiobook...")
|
|
889
|
+
|
|
890
|
+
chapter_files = [
|
|
891
|
+
work_dir / ch.audio_file for ch in state.chapters if ch.audio_file
|
|
892
|
+
]
|
|
893
|
+
chapter_durations = [ch.duration for ch in state.chapters]
|
|
894
|
+
chapter_titles = [ch.title for ch in state.chapters]
|
|
895
|
+
|
|
896
|
+
meta = MergeMeta(
|
|
897
|
+
fmt=self.options.output_format,
|
|
898
|
+
silence_between_chapters=self.options.silence_between_chapters,
|
|
899
|
+
title=self.options.title,
|
|
900
|
+
author=self.options.author,
|
|
901
|
+
cover_image=self.options.cover_image,
|
|
902
|
+
)
|
|
903
|
+
self._merger.merge_chapter_wavs(
|
|
904
|
+
chapter_files,
|
|
905
|
+
chapter_durations,
|
|
906
|
+
chapter_titles,
|
|
907
|
+
output_path,
|
|
908
|
+
meta,
|
|
909
|
+
)
|
|
910
|
+
|
|
911
|
+
self.log("Conversion complete!")
|
|
912
|
+
|
|
913
|
+
return ConversionResult(
|
|
914
|
+
success=True,
|
|
915
|
+
output_path=output_path,
|
|
916
|
+
chapters_dir=work_dir,
|
|
917
|
+
)
|
|
918
|
+
|
|
919
|
+
except Exception as e:
|
|
920
|
+
import traceback
|
|
921
|
+
|
|
922
|
+
error_msg = f"{str(e)}\n\nTraceback:\n{traceback.format_exc()}"
|
|
923
|
+
return ConversionResult(success=False, error_message=error_msg)
|
|
924
|
+
finally:
|
|
925
|
+
prevent_sleep_end()
|
|
926
|
+
|
|
927
|
+
def convert_chapters(
|
|
928
|
+
self,
|
|
929
|
+
chapters: list[Chapter],
|
|
930
|
+
output_path: Path,
|
|
931
|
+
) -> ConversionResult:
|
|
932
|
+
"""Convert a list of chapters to audio using the SSMD pipeline."""
|
|
933
|
+
result = self.convert_chapters_resumable(
|
|
934
|
+
chapters=chapters,
|
|
935
|
+
output_path=output_path,
|
|
936
|
+
resume=self.options.resume,
|
|
937
|
+
)
|
|
938
|
+
self._cleanup_chapter_dir(result)
|
|
939
|
+
return result
|
|
940
|
+
|
|
941
|
+
def _cleanup_chapter_dir(self, result: ConversionResult) -> None:
|
|
942
|
+
if self.options.generate_ssmd_only:
|
|
943
|
+
return
|
|
944
|
+
if (
|
|
945
|
+
result.success
|
|
946
|
+
and result.chapters_dir
|
|
947
|
+
and not self.options.keep_chapter_files
|
|
948
|
+
):
|
|
949
|
+
import shutil
|
|
950
|
+
|
|
951
|
+
try:
|
|
952
|
+
shutil.rmtree(result.chapters_dir)
|
|
953
|
+
except OSError as exc:
|
|
954
|
+
self.log(
|
|
955
|
+
f"Failed to clean up chapter dir {result.chapters_dir}: {exc}",
|
|
956
|
+
"warning",
|
|
957
|
+
)
|
|
958
|
+
|
|
959
|
+
def convert_text(self, text: str, output_path: Path) -> ConversionResult:
|
|
960
|
+
"""
|
|
961
|
+
Convert plain text to audio.
|
|
962
|
+
|
|
963
|
+
Args:
|
|
964
|
+
text: Text to convert
|
|
965
|
+
output_path: Output file path
|
|
966
|
+
|
|
967
|
+
Returns:
|
|
968
|
+
ConversionResult
|
|
969
|
+
"""
|
|
970
|
+
chapters = [Chapter(title="Text", content=text, index=0)]
|
|
971
|
+
return self.convert_chapters(chapters, output_path)
|
|
972
|
+
|
|
973
|
+
def convert_epub(
|
|
974
|
+
self,
|
|
975
|
+
epub_path: Path,
|
|
976
|
+
output_path: Path,
|
|
977
|
+
selected_chapters: list[int] | None = None,
|
|
978
|
+
) -> ConversionResult:
|
|
979
|
+
"""
|
|
980
|
+
Convert an EPUB file to audio.
|
|
981
|
+
|
|
982
|
+
Args:
|
|
983
|
+
epub_path: Path to EPUB file
|
|
984
|
+
output_path: Output file path
|
|
985
|
+
selected_chapters: Optional list of chapter indices to convert
|
|
986
|
+
|
|
987
|
+
Returns:
|
|
988
|
+
ConversionResult
|
|
989
|
+
"""
|
|
990
|
+
from epub2text import EPUBParser
|
|
991
|
+
|
|
992
|
+
self.log(f"Parsing EPUB: {epub_path}")
|
|
993
|
+
|
|
994
|
+
# Parse EPUB using epub2text
|
|
995
|
+
try:
|
|
996
|
+
parser = EPUBParser(str(epub_path))
|
|
997
|
+
epub_chapters = parser.get_chapters()
|
|
998
|
+
except Exception as e:
|
|
999
|
+
return ConversionResult(
|
|
1000
|
+
success=False,
|
|
1001
|
+
error_message=f"Failed to parse EPUB: {e}",
|
|
1002
|
+
)
|
|
1003
|
+
|
|
1004
|
+
if not epub_chapters:
|
|
1005
|
+
return ConversionResult(
|
|
1006
|
+
success=False,
|
|
1007
|
+
error_message="No chapters found in EPUB",
|
|
1008
|
+
)
|
|
1009
|
+
|
|
1010
|
+
# Filter chapters if selection provided
|
|
1011
|
+
if selected_chapters:
|
|
1012
|
+
epub_chapters = [
|
|
1013
|
+
ch for i, ch in enumerate(epub_chapters) if i in selected_chapters
|
|
1014
|
+
]
|
|
1015
|
+
|
|
1016
|
+
# Convert to our Chapter format - epub2text Chapter has .text attribute
|
|
1017
|
+
# Remove <<CHAPTER: ...>> markers that epub2text adds at the start of content
|
|
1018
|
+
# since we now announce chapter titles separately
|
|
1019
|
+
chapters = []
|
|
1020
|
+
for i, ch in enumerate(epub_chapters):
|
|
1021
|
+
# Remove the <<CHAPTER: title>> marker from the beginning of content
|
|
1022
|
+
content = ch.text
|
|
1023
|
+
# Pattern matches: <<CHAPTER: anything>> followed by whitespace/newlines
|
|
1024
|
+
content = re.sub(
|
|
1025
|
+
r"^\s*<<CHAPTER:[^>]*>>\s*\n*", "", content, count=1, flags=re.MULTILINE
|
|
1026
|
+
)
|
|
1027
|
+
chapters.append(Chapter(title=ch.title, content=content, index=i))
|
|
1028
|
+
|
|
1029
|
+
self.log(f"Found {len(chapters)} chapters")
|
|
1030
|
+
|
|
1031
|
+
# Try to get metadata from EPUB for m4b
|
|
1032
|
+
if self.options.output_format == "m4b":
|
|
1033
|
+
try:
|
|
1034
|
+
metadata = parser.get_metadata()
|
|
1035
|
+
if metadata:
|
|
1036
|
+
if not self.options.title and metadata.title:
|
|
1037
|
+
self.options.title = metadata.title
|
|
1038
|
+
if not self.options.author and metadata.authors:
|
|
1039
|
+
self.options.author = metadata.authors[0]
|
|
1040
|
+
except (AttributeError, OSError, ValueError) as exc:
|
|
1041
|
+
self.log(f"Failed to read EPUB metadata: {exc}", "warning")
|
|
1042
|
+
|
|
1043
|
+
result = self.convert_chapters_resumable(
|
|
1044
|
+
chapters,
|
|
1045
|
+
output_path,
|
|
1046
|
+
source_file=epub_path,
|
|
1047
|
+
resume=self.options.resume,
|
|
1048
|
+
)
|
|
1049
|
+
self._cleanup_chapter_dir(result)
|
|
1050
|
+
return result
|
|
1051
|
+
|
|
1052
|
+
|
|
1053
|
+
def parse_text_chapters(text: str) -> list[Chapter]:
|
|
1054
|
+
"""
|
|
1055
|
+
Parse text content into chapters based on chapter markers.
|
|
1056
|
+
|
|
1057
|
+
Args:
|
|
1058
|
+
text: Text content
|
|
1059
|
+
|
|
1060
|
+
Returns:
|
|
1061
|
+
List of Chapter objects
|
|
1062
|
+
"""
|
|
1063
|
+
matches = list(CHAPTER_PATTERN.finditer(text))
|
|
1064
|
+
|
|
1065
|
+
if not matches:
|
|
1066
|
+
return [Chapter(title="Text", content=text.strip(), index=0)]
|
|
1067
|
+
|
|
1068
|
+
chapters = []
|
|
1069
|
+
|
|
1070
|
+
# Add introduction if content before first marker
|
|
1071
|
+
first_start = matches[0].start()
|
|
1072
|
+
if first_start > 0:
|
|
1073
|
+
intro_text = text[:first_start].strip()
|
|
1074
|
+
if intro_text:
|
|
1075
|
+
chapters.append(Chapter(title="Introduction", content=intro_text, index=0))
|
|
1076
|
+
|
|
1077
|
+
# Parse chapters
|
|
1078
|
+
for idx, match in enumerate(matches):
|
|
1079
|
+
start = match.end()
|
|
1080
|
+
end = matches[idx + 1].start() if idx + 1 < len(matches) else len(text)
|
|
1081
|
+
|
|
1082
|
+
chapter_name = match.group().strip()
|
|
1083
|
+
chapter_text = text[start:end].strip()
|
|
1084
|
+
|
|
1085
|
+
if chapter_text:
|
|
1086
|
+
chapters.append(
|
|
1087
|
+
Chapter(title=chapter_name, content=chapter_text, index=len(chapters))
|
|
1088
|
+
)
|
|
1089
|
+
|
|
1090
|
+
return chapters
|