ttsforge 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ttsforge/__init__.py +114 -0
- ttsforge/_version.py +34 -0
- ttsforge/audio_merge.py +180 -0
- ttsforge/audio_player.py +473 -0
- ttsforge/chapter_selection.py +75 -0
- ttsforge/cli/__init__.py +73 -0
- ttsforge/cli/commands_conversion.py +1927 -0
- ttsforge/cli/commands_phonemes.py +1033 -0
- ttsforge/cli/commands_utility.py +1389 -0
- ttsforge/cli/helpers.py +76 -0
- ttsforge/constants.py +164 -0
- ttsforge/conversion.py +1090 -0
- ttsforge/input_reader.py +408 -0
- ttsforge/kokoro_lang.py +12 -0
- ttsforge/kokoro_runner.py +125 -0
- ttsforge/name_extractor.py +305 -0
- ttsforge/phoneme_conversion.py +978 -0
- ttsforge/phonemes.py +486 -0
- ttsforge/ssmd_generator.py +422 -0
- ttsforge/utils.py +785 -0
- ttsforge/vocab/__init__.py +139 -0
- ttsforge-0.1.0.dist-info/METADATA +659 -0
- ttsforge-0.1.0.dist-info/RECORD +27 -0
- ttsforge-0.1.0.dist-info/WHEEL +5 -0
- ttsforge-0.1.0.dist-info/entry_points.txt +2 -0
- ttsforge-0.1.0.dist-info/licenses/LICENSE +21 -0
- ttsforge-0.1.0.dist-info/top_level.txt +1 -0
ttsforge/input_reader.py
ADDED
|
@@ -0,0 +1,408 @@
|
|
|
1
|
+
"""Unified input file reader for EPUB, TXT, and SSMD files.
|
|
2
|
+
|
|
3
|
+
This module provides a common interface for reading different input formats,
|
|
4
|
+
extracting metadata, chapters, and content for TTS conversion.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import re
|
|
10
|
+
from dataclasses import dataclass, field
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
|
|
13
|
+
from .utils import detect_encoding
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass
|
|
17
|
+
class Metadata:
|
|
18
|
+
"""Book metadata."""
|
|
19
|
+
|
|
20
|
+
title: str | None = None
|
|
21
|
+
authors: list[str] = field(default_factory=list)
|
|
22
|
+
language: str | None = None
|
|
23
|
+
publisher: str | None = None
|
|
24
|
+
publication_year: int | None = None
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass
|
|
28
|
+
class Chapter:
|
|
29
|
+
"""Represents a chapter with title and content."""
|
|
30
|
+
|
|
31
|
+
title: str
|
|
32
|
+
text: str
|
|
33
|
+
index: int = 0
|
|
34
|
+
is_ssmd: bool = False
|
|
35
|
+
|
|
36
|
+
@property
|
|
37
|
+
def char_count(self) -> int:
|
|
38
|
+
"""Return the character count of the chapter."""
|
|
39
|
+
return len(self.text)
|
|
40
|
+
|
|
41
|
+
@property
|
|
42
|
+
def content(self) -> str:
|
|
43
|
+
"""Alias for text to maintain compatibility with conversion.Chapter."""
|
|
44
|
+
return self.text
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class InputReader:
|
|
48
|
+
"""Unified reader for EPUB, TXT (Gutenberg), and SSMD files."""
|
|
49
|
+
|
|
50
|
+
def __init__(self, file_path: Path | str):
|
|
51
|
+
"""Initialize the reader with a file path.
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
file_path: Path to the input file (EPUB, TXT, or SSMD)
|
|
55
|
+
"""
|
|
56
|
+
self.file_path = Path(file_path)
|
|
57
|
+
self._metadata: Metadata | None = None
|
|
58
|
+
self._chapters: list[Chapter] | None = None
|
|
59
|
+
|
|
60
|
+
if not self.file_path.exists():
|
|
61
|
+
raise FileNotFoundError(f"File not found: {self.file_path}")
|
|
62
|
+
|
|
63
|
+
# Determine file type
|
|
64
|
+
self.file_type = self._detect_file_type()
|
|
65
|
+
|
|
66
|
+
def _detect_file_type(self) -> str:
|
|
67
|
+
"""Detect the file type based on extension.
|
|
68
|
+
|
|
69
|
+
Returns:
|
|
70
|
+
File type: 'epub', 'txt', or 'ssmd'
|
|
71
|
+
"""
|
|
72
|
+
suffix = self.file_path.suffix.lower()
|
|
73
|
+
if suffix == ".epub":
|
|
74
|
+
return "epub"
|
|
75
|
+
elif suffix == ".ssmd":
|
|
76
|
+
return "ssmd"
|
|
77
|
+
elif suffix in [".txt", ".text"]:
|
|
78
|
+
return "txt"
|
|
79
|
+
elif suffix == ".pdf":
|
|
80
|
+
raise ValueError(
|
|
81
|
+
"PDF input is not supported yet. Convert the PDF to EPUB or TXT "
|
|
82
|
+
"and try again."
|
|
83
|
+
)
|
|
84
|
+
else:
|
|
85
|
+
raise ValueError(
|
|
86
|
+
f"Unsupported file type: {suffix}. Supported types: .epub, .txt, .ssmd"
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
def get_metadata(self) -> Metadata:
|
|
90
|
+
"""Extract metadata from the file.
|
|
91
|
+
|
|
92
|
+
Returns:
|
|
93
|
+
Metadata object with title, author, language, etc.
|
|
94
|
+
"""
|
|
95
|
+
if self._metadata is not None:
|
|
96
|
+
return self._metadata
|
|
97
|
+
|
|
98
|
+
if self.file_type == "epub":
|
|
99
|
+
self._metadata = self._get_epub_metadata()
|
|
100
|
+
elif self.file_type == "txt":
|
|
101
|
+
self._metadata = self._get_gutenberg_metadata()
|
|
102
|
+
elif self.file_type == "ssmd":
|
|
103
|
+
self._metadata = self._get_ssmd_metadata()
|
|
104
|
+
elif self.file_type == "pdf":
|
|
105
|
+
raise ValueError("PDF input is not supported yet.")
|
|
106
|
+
|
|
107
|
+
if self._metadata is None:
|
|
108
|
+
raise ValueError("Metadata could not be loaded")
|
|
109
|
+
return self._metadata
|
|
110
|
+
|
|
111
|
+
def get_chapters(self) -> list[Chapter]:
|
|
112
|
+
"""Extract chapters from the file.
|
|
113
|
+
|
|
114
|
+
Returns:
|
|
115
|
+
List of Chapter objects
|
|
116
|
+
"""
|
|
117
|
+
if self._chapters is not None:
|
|
118
|
+
return self._chapters
|
|
119
|
+
|
|
120
|
+
if self.file_type == "epub":
|
|
121
|
+
self._chapters = self._get_epub_chapters()
|
|
122
|
+
elif self.file_type == "txt":
|
|
123
|
+
self._chapters = self._get_gutenberg_chapters()
|
|
124
|
+
elif self.file_type == "ssmd":
|
|
125
|
+
self._chapters = self._get_ssmd_chapters()
|
|
126
|
+
elif self.file_type == "pdf":
|
|
127
|
+
raise ValueError("PDF input is not supported yet.")
|
|
128
|
+
|
|
129
|
+
if self._chapters is None:
|
|
130
|
+
raise ValueError("Chapters could not be loaded")
|
|
131
|
+
return self._chapters
|
|
132
|
+
|
|
133
|
+
def get_chapters_with_html(self) -> list[tuple[Chapter, str | None]]:
|
|
134
|
+
"""Extract chapters with their original HTML content for markup detection.
|
|
135
|
+
|
|
136
|
+
Returns:
|
|
137
|
+
List of tuples containing (Chapter, html_content or None)
|
|
138
|
+
"""
|
|
139
|
+
if self.file_type == "epub":
|
|
140
|
+
return self._get_epub_chapters_with_html()
|
|
141
|
+
else:
|
|
142
|
+
# For non-EPUB files, HTML content is not available
|
|
143
|
+
chapters = self.get_chapters()
|
|
144
|
+
return [(ch, None) for ch in chapters]
|
|
145
|
+
|
|
146
|
+
# EPUB methods
|
|
147
|
+
def _get_epub_metadata(self) -> Metadata:
|
|
148
|
+
"""Extract metadata from EPUB file."""
|
|
149
|
+
try:
|
|
150
|
+
from epub2text import EPUBParser
|
|
151
|
+
except ImportError as e:
|
|
152
|
+
raise ImportError(
|
|
153
|
+
"epub2text is required for EPUB support. "
|
|
154
|
+
"Install with: pip install epub2text"
|
|
155
|
+
) from e
|
|
156
|
+
|
|
157
|
+
parser = EPUBParser(str(self.file_path))
|
|
158
|
+
epub_metadata = parser.get_metadata()
|
|
159
|
+
|
|
160
|
+
raw_year: object = epub_metadata.publication_year
|
|
161
|
+
publication_year: int | None = None
|
|
162
|
+
if isinstance(raw_year, int):
|
|
163
|
+
publication_year = raw_year
|
|
164
|
+
elif isinstance(raw_year, str):
|
|
165
|
+
try:
|
|
166
|
+
publication_year = int(raw_year)
|
|
167
|
+
except ValueError:
|
|
168
|
+
publication_year = None
|
|
169
|
+
|
|
170
|
+
return Metadata(
|
|
171
|
+
title=epub_metadata.title,
|
|
172
|
+
authors=list(epub_metadata.authors) if epub_metadata.authors else [],
|
|
173
|
+
language=epub_metadata.language,
|
|
174
|
+
publisher=epub_metadata.publisher,
|
|
175
|
+
publication_year=publication_year,
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
def _get_epub_chapters(self) -> list[Chapter]:
|
|
179
|
+
"""Extract chapters from EPUB file."""
|
|
180
|
+
try:
|
|
181
|
+
from epub2text import EPUBParser
|
|
182
|
+
except ImportError as e:
|
|
183
|
+
raise ImportError(
|
|
184
|
+
"epub2text is required for EPUB support. "
|
|
185
|
+
"Install with: pip install epub2text"
|
|
186
|
+
) from e
|
|
187
|
+
|
|
188
|
+
parser = EPUBParser(str(self.file_path))
|
|
189
|
+
epub_chapters = parser.get_chapters()
|
|
190
|
+
|
|
191
|
+
# Convert to our Chapter format
|
|
192
|
+
chapters = []
|
|
193
|
+
for i, ch in enumerate(epub_chapters):
|
|
194
|
+
# Remove <<CHAPTER: ...>> markers that epub2text adds
|
|
195
|
+
content = re.sub(
|
|
196
|
+
r"^\s*<<CHAPTER:[^>]*>>\s*\n*", "", ch.text, count=1, flags=re.MULTILINE
|
|
197
|
+
)
|
|
198
|
+
chapters.append(Chapter(title=ch.title, text=content, index=i))
|
|
199
|
+
|
|
200
|
+
return chapters
|
|
201
|
+
|
|
202
|
+
def _get_epub_chapters_with_html(self) -> list[tuple[Chapter, str | None]]:
|
|
203
|
+
"""Extract chapters from EPUB with HTML content preserved."""
|
|
204
|
+
try:
|
|
205
|
+
from epub2text import EPUBParser
|
|
206
|
+
except ImportError as e:
|
|
207
|
+
raise ImportError(
|
|
208
|
+
"epub2text is required for EPUB support. "
|
|
209
|
+
"Install with: pip install epub2text"
|
|
210
|
+
) from e
|
|
211
|
+
|
|
212
|
+
parser = EPUBParser(str(self.file_path))
|
|
213
|
+
epub_chapters = parser.get_chapters()
|
|
214
|
+
|
|
215
|
+
# Convert to our Chapter format with HTML
|
|
216
|
+
chapters_with_html = []
|
|
217
|
+
for i, ch in enumerate(epub_chapters):
|
|
218
|
+
# Remove <<CHAPTER: ...>> markers from plain text
|
|
219
|
+
content = re.sub(
|
|
220
|
+
r"^\s*<<CHAPTER:[^>]*>>\s*\n*", "", ch.text, count=1, flags=re.MULTILINE
|
|
221
|
+
)
|
|
222
|
+
chapter = Chapter(title=ch.title, text=content, index=i)
|
|
223
|
+
|
|
224
|
+
# Try to get HTML content
|
|
225
|
+
# epub2text may have an html attribute or we need to extract it
|
|
226
|
+
html_content = getattr(ch, "html", None)
|
|
227
|
+
if html_content is None:
|
|
228
|
+
html_content = getattr(ch, "content", None)
|
|
229
|
+
|
|
230
|
+
chapters_with_html.append((chapter, html_content))
|
|
231
|
+
|
|
232
|
+
return chapters_with_html
|
|
233
|
+
|
|
234
|
+
# Gutenberg TXT methods
|
|
235
|
+
def _get_gutenberg_metadata(self) -> Metadata:
|
|
236
|
+
"""Extract metadata from Project Gutenberg TXT file.
|
|
237
|
+
|
|
238
|
+
Parses the header of a Gutenberg text file to extract metadata.
|
|
239
|
+
"""
|
|
240
|
+
encoding = detect_encoding(self.file_path)
|
|
241
|
+
with open(self.file_path, encoding=encoding, errors="replace") as f:
|
|
242
|
+
# Read first 1000 lines for metadata (Gutenberg header is typically short)
|
|
243
|
+
header_lines = []
|
|
244
|
+
for i, line in enumerate(f):
|
|
245
|
+
if i >= 1000:
|
|
246
|
+
break
|
|
247
|
+
header_lines.append(line)
|
|
248
|
+
# Stop at start of content
|
|
249
|
+
if "*** START OF" in line.upper():
|
|
250
|
+
break
|
|
251
|
+
|
|
252
|
+
header_text = "".join(header_lines)
|
|
253
|
+
|
|
254
|
+
# Extract metadata using regex
|
|
255
|
+
title = None
|
|
256
|
+
authors = []
|
|
257
|
+
language = None
|
|
258
|
+
|
|
259
|
+
# Title pattern: "Title: <title>"
|
|
260
|
+
title_match = re.search(
|
|
261
|
+
r"^Title:\s*(.+)$", header_text, re.MULTILINE | re.IGNORECASE
|
|
262
|
+
)
|
|
263
|
+
if title_match:
|
|
264
|
+
title = title_match.group(1).strip()
|
|
265
|
+
|
|
266
|
+
# Author pattern: "Author: <author>"
|
|
267
|
+
author_match = re.search(
|
|
268
|
+
r"^Author:\s*(.+)$", header_text, re.MULTILINE | re.IGNORECASE
|
|
269
|
+
)
|
|
270
|
+
if author_match:
|
|
271
|
+
authors = [author_match.group(1).strip()]
|
|
272
|
+
|
|
273
|
+
# Language pattern: "Language: <language>"
|
|
274
|
+
lang_match = re.search(
|
|
275
|
+
r"^Language:\s*(.+)$", header_text, re.MULTILINE | re.IGNORECASE
|
|
276
|
+
)
|
|
277
|
+
if lang_match:
|
|
278
|
+
language = lang_match.group(1).strip()
|
|
279
|
+
|
|
280
|
+
return Metadata(title=title, authors=authors, language=language)
|
|
281
|
+
|
|
282
|
+
def _get_gutenberg_chapters(self) -> list[Chapter]:
|
|
283
|
+
"""Extract chapters from Project Gutenberg TXT file.
|
|
284
|
+
|
|
285
|
+
Splits the text into chapters based on common patterns like:
|
|
286
|
+
- "CHAPTER I", "CHAPTER 1", "Chapter One"
|
|
287
|
+
- "ONE", "TWO", etc. (capitalized chapter titles)
|
|
288
|
+
- "PART I", etc.
|
|
289
|
+
"""
|
|
290
|
+
encoding = detect_encoding(self.file_path)
|
|
291
|
+
with open(self.file_path, encoding=encoding, errors="replace") as f:
|
|
292
|
+
full_text = f.read()
|
|
293
|
+
|
|
294
|
+
# Find the start and end markers
|
|
295
|
+
start_match = re.search(
|
|
296
|
+
r"\*\*\* START OF (?:THE|THIS) (?:PROJECT )?GUTENBERG (?:EBOOK|E-BOOK)",
|
|
297
|
+
full_text,
|
|
298
|
+
re.IGNORECASE,
|
|
299
|
+
)
|
|
300
|
+
end_match = re.search(
|
|
301
|
+
r"\*\*\* END OF (?:THE|THIS) (?:PROJECT )?GUTENBERG (?:EBOOK|E-BOOK)",
|
|
302
|
+
full_text,
|
|
303
|
+
re.IGNORECASE,
|
|
304
|
+
)
|
|
305
|
+
|
|
306
|
+
# Extract content between markers
|
|
307
|
+
if start_match:
|
|
308
|
+
start_pos = start_match.end()
|
|
309
|
+
else:
|
|
310
|
+
start_pos = 0
|
|
311
|
+
|
|
312
|
+
if end_match:
|
|
313
|
+
end_pos = end_match.start()
|
|
314
|
+
else:
|
|
315
|
+
end_pos = len(full_text)
|
|
316
|
+
|
|
317
|
+
content = full_text[start_pos:end_pos].strip()
|
|
318
|
+
|
|
319
|
+
# Try to split by chapters
|
|
320
|
+
# Pattern 1: "CHAPTER X" or "Chapter X" at start of line
|
|
321
|
+
chapter_pattern = re.compile(
|
|
322
|
+
r"^(?:CHAPTER|Chapter|PART|Part)\s+(?:[IVXLCDM]+|\d+|[A-Z][A-Z\s-]+)$",
|
|
323
|
+
re.MULTILINE,
|
|
324
|
+
)
|
|
325
|
+
|
|
326
|
+
# Find all chapter markers
|
|
327
|
+
chapter_matches = list(chapter_pattern.finditer(content))
|
|
328
|
+
|
|
329
|
+
if len(chapter_matches) > 1:
|
|
330
|
+
# We found chapters, split by them
|
|
331
|
+
chapters = []
|
|
332
|
+
for i, match in enumerate(chapter_matches):
|
|
333
|
+
title = match.group(0).strip()
|
|
334
|
+
start = match.end()
|
|
335
|
+
end = (
|
|
336
|
+
chapter_matches[i + 1].start()
|
|
337
|
+
if i + 1 < len(chapter_matches)
|
|
338
|
+
else len(content)
|
|
339
|
+
)
|
|
340
|
+
text = content[start:end].strip()
|
|
341
|
+
|
|
342
|
+
if text: # Only add non-empty chapters
|
|
343
|
+
chapters.append(Chapter(title=title, text=text, index=i))
|
|
344
|
+
|
|
345
|
+
return chapters
|
|
346
|
+
else:
|
|
347
|
+
# No clear chapter structure, check for numbered sections
|
|
348
|
+
# Pattern 2: Single words in all caps on own line
|
|
349
|
+
# (like "ONE", "TWO", etc.)
|
|
350
|
+
section_pattern = re.compile(r"^([A-Z][A-Z\s-]{2,})$", re.MULTILINE)
|
|
351
|
+
section_matches = list(section_pattern.finditer(content))
|
|
352
|
+
|
|
353
|
+
# Filter to likely chapter titles (not too long, appear multiple times)
|
|
354
|
+
if len(section_matches) >= 3:
|
|
355
|
+
chapters = []
|
|
356
|
+
for i, match in enumerate(section_matches):
|
|
357
|
+
title = match.group(0).strip()
|
|
358
|
+
start = match.end()
|
|
359
|
+
end = (
|
|
360
|
+
section_matches[i + 1].start()
|
|
361
|
+
if i + 1 < len(section_matches)
|
|
362
|
+
else len(content)
|
|
363
|
+
)
|
|
364
|
+
text = content[start:end].strip()
|
|
365
|
+
|
|
366
|
+
if text and len(text) > 100: # Only add substantial sections
|
|
367
|
+
chapters.append(Chapter(title=title, text=text, index=i))
|
|
368
|
+
|
|
369
|
+
if chapters:
|
|
370
|
+
return chapters
|
|
371
|
+
|
|
372
|
+
# No chapter structure found, return entire content as one chapter
|
|
373
|
+
metadata = self.get_metadata()
|
|
374
|
+
title = metadata.title or self.file_path.stem
|
|
375
|
+
return [Chapter(title=title, text=content, index=0)]
|
|
376
|
+
|
|
377
|
+
def _get_ssmd_metadata(self) -> Metadata:
|
|
378
|
+
"""Extract metadata from an SSMD file."""
|
|
379
|
+
return Metadata(title=self.file_path.stem, authors=[], language=None)
|
|
380
|
+
|
|
381
|
+
def _get_ssmd_chapters(self) -> list[Chapter]:
|
|
382
|
+
"""Read an SSMD file as a single chapter."""
|
|
383
|
+
encoding = detect_encoding(self.file_path)
|
|
384
|
+
with open(self.file_path, encoding=encoding, errors="replace") as f:
|
|
385
|
+
content = f.read()
|
|
386
|
+
return [
|
|
387
|
+
Chapter(
|
|
388
|
+
title=self.file_path.stem,
|
|
389
|
+
text=content,
|
|
390
|
+
index=0,
|
|
391
|
+
is_ssmd=True,
|
|
392
|
+
)
|
|
393
|
+
]
|
|
394
|
+
|
|
395
|
+
# PDF methods (placeholder for future implementation)
|
|
396
|
+
def _get_pdf_metadata(self) -> Metadata:
|
|
397
|
+
"""Extract metadata from PDF file.
|
|
398
|
+
|
|
399
|
+
TODO: Implement PDF metadata extraction.
|
|
400
|
+
"""
|
|
401
|
+
raise NotImplementedError("PDF support is not yet implemented")
|
|
402
|
+
|
|
403
|
+
def _get_pdf_chapters(self) -> list[Chapter]:
|
|
404
|
+
"""Extract chapters from PDF file.
|
|
405
|
+
|
|
406
|
+
TODO: Implement PDF chapter extraction.
|
|
407
|
+
"""
|
|
408
|
+
raise NotImplementedError("PDF support is not yet implemented")
|
ttsforge/kokoro_lang.py
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
# ttsforge/kokoro_lang.py
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def get_onnx_lang_code(ttsforge_lang: str) -> str:
|
|
6
|
+
"""Convert ttsforge language code to kokoro ONNX language code."""
|
|
7
|
+
from pykokoro.onnx_backend import LANG_CODE_TO_ONNX
|
|
8
|
+
|
|
9
|
+
lang = LANG_CODE_TO_ONNX.get(ttsforge_lang)
|
|
10
|
+
if isinstance(lang, str):
|
|
11
|
+
return lang
|
|
12
|
+
return ttsforge_lang or "en-us"
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
# ttsforge/kokoro_runner.py
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from typing import Any, Literal, Protocol, cast
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
from pykokoro import GenerationConfig, KokoroPipeline, PipelineConfig
|
|
9
|
+
from pykokoro.onnx_backend import (
|
|
10
|
+
Kokoro,
|
|
11
|
+
VoiceBlend,
|
|
12
|
+
are_models_downloaded,
|
|
13
|
+
download_all_models,
|
|
14
|
+
)
|
|
15
|
+
from pykokoro.stages.audio_generation.onnx import OnnxAudioGenerationAdapter
|
|
16
|
+
from pykokoro.stages.audio_postprocessing.onnx import OnnxAudioPostprocessingAdapter
|
|
17
|
+
from pykokoro.stages.phoneme_processing.onnx import OnnxPhonemeProcessorAdapter
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass(slots=True)
|
|
21
|
+
class KokoroRunOptions:
|
|
22
|
+
voice: str
|
|
23
|
+
speed: float
|
|
24
|
+
use_gpu: bool
|
|
25
|
+
pause_clause: float
|
|
26
|
+
pause_sentence: float
|
|
27
|
+
pause_paragraph: float
|
|
28
|
+
pause_variance: float
|
|
29
|
+
model_path: Any | None = None
|
|
30
|
+
voices_path: Any | None = None
|
|
31
|
+
voice_blend: str | None = None
|
|
32
|
+
voice_database: Any | None = None
|
|
33
|
+
tokenizer_config: Any | None = None # pykokoro.tokenizer.TokenizerConfig
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class KokoroRunner:
|
|
37
|
+
class LogCallback(Protocol):
|
|
38
|
+
def __call__(self, message: str, level: str = "info") -> None: ...
|
|
39
|
+
|
|
40
|
+
def __init__(self, opts: KokoroRunOptions, log: LogCallback):
|
|
41
|
+
self.opts = opts
|
|
42
|
+
self.log = log
|
|
43
|
+
self._kokoro: Kokoro | None = None
|
|
44
|
+
self._pipeline: KokoroPipeline | None = None
|
|
45
|
+
self._voice_style: str | VoiceBlend | None = None
|
|
46
|
+
|
|
47
|
+
def ensure_ready(self) -> None:
|
|
48
|
+
if self._pipeline is not None:
|
|
49
|
+
return
|
|
50
|
+
|
|
51
|
+
if not are_models_downloaded():
|
|
52
|
+
self.log("Downloading ONNX model files...")
|
|
53
|
+
download_all_models()
|
|
54
|
+
|
|
55
|
+
self._kokoro = Kokoro(
|
|
56
|
+
model_path=self.opts.model_path,
|
|
57
|
+
voices_path=self.opts.voices_path,
|
|
58
|
+
use_gpu=self.opts.use_gpu,
|
|
59
|
+
tokenizer_config=self.opts.tokenizer_config,
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
assert self._kokoro is not None
|
|
63
|
+
|
|
64
|
+
if self.opts.voice_database:
|
|
65
|
+
try:
|
|
66
|
+
self._kokoro.load_voice_database(self.opts.voice_database)
|
|
67
|
+
self.log(f"Loaded voice database: {self.opts.voice_database}")
|
|
68
|
+
except Exception as e:
|
|
69
|
+
self.log(f"Failed to load voice database: {e}", "warning")
|
|
70
|
+
|
|
71
|
+
if self.opts.voice_blend:
|
|
72
|
+
self._voice_style = VoiceBlend.parse(self.opts.voice_blend)
|
|
73
|
+
else:
|
|
74
|
+
# if voice_database provides overrides, let Kokoro resolve it
|
|
75
|
+
if self.opts.voice_database:
|
|
76
|
+
db_voice = cast(
|
|
77
|
+
str | VoiceBlend | None,
|
|
78
|
+
self._kokoro.get_voice_from_database(self.opts.voice),
|
|
79
|
+
)
|
|
80
|
+
self._voice_style = (
|
|
81
|
+
db_voice if db_voice is not None else self.opts.voice
|
|
82
|
+
)
|
|
83
|
+
else:
|
|
84
|
+
self._voice_style = self.opts.voice
|
|
85
|
+
|
|
86
|
+
# GenerationConfig will be supplied per call
|
|
87
|
+
# because lang / is_phonemes can vary
|
|
88
|
+
pipeline_cfg = PipelineConfig(
|
|
89
|
+
voice=self._voice_style,
|
|
90
|
+
generation=GenerationConfig(speed=self.opts.speed, lang="en-us"),
|
|
91
|
+
model_path=self.opts.model_path,
|
|
92
|
+
voices_path=self.opts.voices_path,
|
|
93
|
+
tokenizer_config=self.opts.tokenizer_config,
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
# Use the same adapters everywhere (text + phonemes)
|
|
97
|
+
self._pipeline = KokoroPipeline(
|
|
98
|
+
pipeline_cfg,
|
|
99
|
+
phoneme_processing=OnnxPhonemeProcessorAdapter(self._kokoro),
|
|
100
|
+
audio_generation=OnnxAudioGenerationAdapter(self._kokoro),
|
|
101
|
+
audio_postprocessing=OnnxAudioPostprocessingAdapter(self._kokoro),
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
def synthesize(
|
|
105
|
+
self,
|
|
106
|
+
text_or_ssmd: str,
|
|
107
|
+
*,
|
|
108
|
+
lang_code: str,
|
|
109
|
+
pause_mode: Literal["tts", "manual", "auto"],
|
|
110
|
+
is_phonemes: bool = False,
|
|
111
|
+
) -> np.ndarray:
|
|
112
|
+
self.ensure_ready()
|
|
113
|
+
assert self._pipeline is not None
|
|
114
|
+
gen = GenerationConfig(
|
|
115
|
+
speed=self.opts.speed,
|
|
116
|
+
lang=lang_code,
|
|
117
|
+
is_phonemes=is_phonemes,
|
|
118
|
+
pause_mode=pause_mode,
|
|
119
|
+
pause_clause=self.opts.pause_clause,
|
|
120
|
+
pause_sentence=self.opts.pause_sentence,
|
|
121
|
+
pause_paragraph=self.opts.pause_paragraph,
|
|
122
|
+
pause_variance=self.opts.pause_variance,
|
|
123
|
+
)
|
|
124
|
+
audio = self._pipeline.run(text_or_ssmd, generation=gen).audio
|
|
125
|
+
return cast(np.ndarray, audio)
|