ttsforge 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ttsforge/__init__.py +114 -0
- ttsforge/_version.py +34 -0
- ttsforge/audio_merge.py +180 -0
- ttsforge/audio_player.py +473 -0
- ttsforge/chapter_selection.py +75 -0
- ttsforge/cli/__init__.py +73 -0
- ttsforge/cli/commands_conversion.py +1927 -0
- ttsforge/cli/commands_phonemes.py +1033 -0
- ttsforge/cli/commands_utility.py +1389 -0
- ttsforge/cli/helpers.py +76 -0
- ttsforge/constants.py +164 -0
- ttsforge/conversion.py +1090 -0
- ttsforge/input_reader.py +408 -0
- ttsforge/kokoro_lang.py +12 -0
- ttsforge/kokoro_runner.py +125 -0
- ttsforge/name_extractor.py +305 -0
- ttsforge/phoneme_conversion.py +978 -0
- ttsforge/phonemes.py +486 -0
- ttsforge/ssmd_generator.py +422 -0
- ttsforge/utils.py +785 -0
- ttsforge/vocab/__init__.py +139 -0
- ttsforge-0.1.0.dist-info/METADATA +659 -0
- ttsforge-0.1.0.dist-info/RECORD +27 -0
- ttsforge-0.1.0.dist-info/WHEEL +5 -0
- ttsforge-0.1.0.dist-info/entry_points.txt +2 -0
- ttsforge-0.1.0.dist-info/licenses/LICENSE +21 -0
- ttsforge-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,422 @@
|
|
|
1
|
+
"""SSMD (Speech Synthesis Markdown) generator for ttsforge.
|
|
2
|
+
|
|
3
|
+
This module converts chapter text to SSMD format with markup for:
|
|
4
|
+
- Emphasis (*text* for moderate, **text** for strong)
|
|
5
|
+
- Language switches ([text](lang_code))
|
|
6
|
+
- Phoneme substitutions ([word](ph: /phoneme/))
|
|
7
|
+
|
|
8
|
+
Note: Structural breaks (paragraphs, sentences, clauses) are NOT automatically
|
|
9
|
+
added. The SSMD parser in pykokoro handles sentence detection automatically.
|
|
10
|
+
Users can manually add breaks in the SSMD file if desired:
|
|
11
|
+
- Paragraph breaks (...p)
|
|
12
|
+
- Sentence breaks (...s)
|
|
13
|
+
- Clause breaks (...c)
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import hashlib
|
|
19
|
+
import re
|
|
20
|
+
from html.parser import HTMLParser
|
|
21
|
+
from pathlib import Path
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class SSMDGenerationError(Exception):
|
|
25
|
+
"""Exception raised when SSMD generation fails."""
|
|
26
|
+
|
|
27
|
+
pass
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _hash_content(content: str) -> str:
|
|
31
|
+
"""Generate a hash of content for change detection.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
content: Text content to hash
|
|
35
|
+
|
|
36
|
+
Returns:
|
|
37
|
+
12-character hex hash
|
|
38
|
+
"""
|
|
39
|
+
return hashlib.md5(content.encode("utf-8")).hexdigest()[:12]
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class _EmphasisHTMLParser(HTMLParser):
|
|
43
|
+
def __init__(self) -> None:
|
|
44
|
+
super().__init__(convert_charrefs=True)
|
|
45
|
+
self._stack: list[str] = []
|
|
46
|
+
self.segments: list[tuple[str, str]] = []
|
|
47
|
+
self._last_was_emphasis = False
|
|
48
|
+
|
|
49
|
+
def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None:
|
|
50
|
+
tag_lower = tag.lower()
|
|
51
|
+
if tag_lower in {"em", "i"}:
|
|
52
|
+
self._stack.append("*")
|
|
53
|
+
elif tag_lower in {"strong", "b"}:
|
|
54
|
+
self._stack.append("**")
|
|
55
|
+
|
|
56
|
+
def handle_endtag(self, tag: str) -> None:
|
|
57
|
+
tag_lower = tag.lower()
|
|
58
|
+
if tag_lower in {"em", "i", "strong", "b"}:
|
|
59
|
+
for idx in range(len(self._stack) - 1, -1, -1):
|
|
60
|
+
if self._stack[idx] in {"*", "**"}:
|
|
61
|
+
self._stack.pop(idx)
|
|
62
|
+
break
|
|
63
|
+
|
|
64
|
+
def handle_data(self, data: str) -> None:
|
|
65
|
+
marker = "**" if "**" in self._stack else ("*" if "*" in self._stack else None)
|
|
66
|
+
if not marker:
|
|
67
|
+
self._last_was_emphasis = False
|
|
68
|
+
return
|
|
69
|
+
if self._last_was_emphasis and self.segments and self.segments[-1][1] == marker:
|
|
70
|
+
prev_text, _ = self.segments[-1]
|
|
71
|
+
self.segments[-1] = (prev_text + data, marker)
|
|
72
|
+
else:
|
|
73
|
+
self.segments.append((data, marker))
|
|
74
|
+
self._last_was_emphasis = True
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def _detect_emphasis_from_html(html_content: str) -> list[tuple[str, str]]:
|
|
78
|
+
"""Detect emphasis from HTML tags and return ordered text segments.
|
|
79
|
+
|
|
80
|
+
Args:
|
|
81
|
+
html_content: HTML content with formatting tags
|
|
82
|
+
|
|
83
|
+
Returns:
|
|
84
|
+
List of (text, marker) segments in document order
|
|
85
|
+
"""
|
|
86
|
+
parser = _EmphasisHTMLParser()
|
|
87
|
+
parser.feed(html_content)
|
|
88
|
+
return parser.segments
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def _apply_emphasis_markers(text: str, emphasis_segments: list[tuple[str, str]]) -> str:
|
|
92
|
+
"""Apply emphasis markers to text based on ordered emphasis segments.
|
|
93
|
+
|
|
94
|
+
Args:
|
|
95
|
+
text: Plain text
|
|
96
|
+
emphasis_segments: List of (text, marker) in document order
|
|
97
|
+
|
|
98
|
+
Returns:
|
|
99
|
+
Text with emphasis markers applied
|
|
100
|
+
"""
|
|
101
|
+
if not emphasis_segments:
|
|
102
|
+
return text
|
|
103
|
+
|
|
104
|
+
matches: list[tuple[int, int, str]] = []
|
|
105
|
+
cursor = 0
|
|
106
|
+
base_text = text
|
|
107
|
+
|
|
108
|
+
for emphasized_text, marker in emphasis_segments:
|
|
109
|
+
if not emphasized_text.strip():
|
|
110
|
+
continue
|
|
111
|
+
pattern = re.escape(emphasized_text)
|
|
112
|
+
pattern = re.sub(r"\s+", r"\\s+", pattern)
|
|
113
|
+
match = re.search(pattern, base_text[cursor:], flags=re.MULTILINE)
|
|
114
|
+
if not match:
|
|
115
|
+
continue
|
|
116
|
+
start = cursor + match.start()
|
|
117
|
+
end = cursor + match.end()
|
|
118
|
+
matches.append((start, end, marker))
|
|
119
|
+
cursor = end
|
|
120
|
+
|
|
121
|
+
for start, end, marker in reversed(matches):
|
|
122
|
+
base_text = (
|
|
123
|
+
base_text[:start] + marker + base_text[start:end] + marker + base_text[end:]
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
return base_text
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def _inject_phoneme_substitutions(
|
|
130
|
+
text: str, phoneme_dict: dict[str, str], case_sensitive: bool = False
|
|
131
|
+
) -> str:
|
|
132
|
+
"""Inject phoneme substitutions into text using SSMD [word](ph: /phoneme/) syntax.
|
|
133
|
+
|
|
134
|
+
Args:
|
|
135
|
+
text: Text to process
|
|
136
|
+
phoneme_dict: Dictionary mapping words to IPA phonemes
|
|
137
|
+
case_sensitive: Whether to match case-sensitively
|
|
138
|
+
|
|
139
|
+
Returns:
|
|
140
|
+
Text with phoneme substitutions injected
|
|
141
|
+
"""
|
|
142
|
+
if not phoneme_dict:
|
|
143
|
+
return text
|
|
144
|
+
|
|
145
|
+
link_pattern = re.compile(r"\[[^\]]+\]\([^\)]+\)")
|
|
146
|
+
|
|
147
|
+
words = [word for word in phoneme_dict.keys() if word]
|
|
148
|
+
if not words:
|
|
149
|
+
return text
|
|
150
|
+
|
|
151
|
+
words = sorted(words, key=len, reverse=True)
|
|
152
|
+
alternation = "|".join(re.escape(word) for word in words)
|
|
153
|
+
boundary_pattern = rf"(?<!\w)({alternation})(?!\w)"
|
|
154
|
+
flags = 0 if case_sensitive else re.IGNORECASE
|
|
155
|
+
compiled = re.compile(boundary_pattern, flags=flags)
|
|
156
|
+
|
|
157
|
+
if case_sensitive:
|
|
158
|
+
lookup = phoneme_dict
|
|
159
|
+
else:
|
|
160
|
+
lookup = {}
|
|
161
|
+
for word, phoneme in phoneme_dict.items():
|
|
162
|
+
key = word.lower()
|
|
163
|
+
if key not in lookup:
|
|
164
|
+
lookup[key] = phoneme
|
|
165
|
+
|
|
166
|
+
def replace(match: re.Match[str]) -> str:
|
|
167
|
+
matched_word = match.group(1)
|
|
168
|
+
key = matched_word if case_sensitive else matched_word.lower()
|
|
169
|
+
phoneme = lookup.get(key)
|
|
170
|
+
if not phoneme:
|
|
171
|
+
return matched_word
|
|
172
|
+
clean_phoneme = phoneme.strip("/")
|
|
173
|
+
return f"[{matched_word}](ph: /{clean_phoneme}/)"
|
|
174
|
+
|
|
175
|
+
segments: list[str] = []
|
|
176
|
+
last_index = 0
|
|
177
|
+
for match in link_pattern.finditer(text):
|
|
178
|
+
if match.start() > last_index:
|
|
179
|
+
segment = text[last_index : match.start()]
|
|
180
|
+
segments.append(compiled.sub(replace, segment))
|
|
181
|
+
segments.append(match.group(0))
|
|
182
|
+
last_index = match.end()
|
|
183
|
+
|
|
184
|
+
if last_index < len(text):
|
|
185
|
+
segments.append(compiled.sub(replace, text[last_index:]))
|
|
186
|
+
|
|
187
|
+
return "".join(segments)
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
def _add_language_markers(text: str, mixed_language_config: dict | None = None) -> str:
|
|
191
|
+
"""Add language markers for mixed-language segments.
|
|
192
|
+
|
|
193
|
+
Note: This is a placeholder for now. Full implementation would require
|
|
194
|
+
language detection library (lingua-language-detector).
|
|
195
|
+
|
|
196
|
+
Args:
|
|
197
|
+
text: Text to process
|
|
198
|
+
mixed_language_config: Configuration for mixed-language mode
|
|
199
|
+
|
|
200
|
+
Returns:
|
|
201
|
+
Text with language markers (currently returns text unchanged)
|
|
202
|
+
"""
|
|
203
|
+
# TODO: Implement language detection and wrapping
|
|
204
|
+
# For now, return text unchanged
|
|
205
|
+
# Future: Use lingua-language-detector to identify foreign segments
|
|
206
|
+
# and wrap them with [segment](lang_code)
|
|
207
|
+
return text
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def _add_structural_breaks(text: str) -> str:
|
|
211
|
+
"""Preserve paragraph structure without adding automatic SSMD breaks.
|
|
212
|
+
|
|
213
|
+
The SSMD parser in pykokoro will handle sentence detection automatically.
|
|
214
|
+
This function only preserves existing paragraph breaks as double newlines.
|
|
215
|
+
|
|
216
|
+
Args:
|
|
217
|
+
text: Plain text to process
|
|
218
|
+
|
|
219
|
+
Returns:
|
|
220
|
+
Text with normalized paragraph spacing (no SSMD break markers)
|
|
221
|
+
"""
|
|
222
|
+
# Split into paragraphs and normalize spacing
|
|
223
|
+
paragraphs = re.split(r"\n\s*\n+", text)
|
|
224
|
+
result_paragraphs = []
|
|
225
|
+
|
|
226
|
+
for para in paragraphs:
|
|
227
|
+
para = para.strip()
|
|
228
|
+
if para:
|
|
229
|
+
result_paragraphs.append(para)
|
|
230
|
+
|
|
231
|
+
# Join paragraphs with double newlines (standard paragraph separation)
|
|
232
|
+
# No SSMD markers - let pykokoro's parser handle sentence detection
|
|
233
|
+
result = "\n\n".join(result_paragraphs)
|
|
234
|
+
|
|
235
|
+
return result
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
def _strip_redundant_title(chapter_title: str, chapter_text: str) -> str:
|
|
239
|
+
"""Remove a duplicated chapter title from the start of the text."""
|
|
240
|
+
title = chapter_title.strip()
|
|
241
|
+
if not title:
|
|
242
|
+
return chapter_text
|
|
243
|
+
|
|
244
|
+
lines = chapter_text.splitlines()
|
|
245
|
+
first_idx = None
|
|
246
|
+
for idx, line in enumerate(lines):
|
|
247
|
+
if line.strip():
|
|
248
|
+
first_idx = idx
|
|
249
|
+
break
|
|
250
|
+
|
|
251
|
+
if first_idx is None:
|
|
252
|
+
return chapter_text
|
|
253
|
+
|
|
254
|
+
first_line = lines[first_idx]
|
|
255
|
+
title_pattern = re.compile(
|
|
256
|
+
rf"^\s*{re.escape(title)}(?:\b|[\s:;\-\u2013\u2014])",
|
|
257
|
+
re.IGNORECASE,
|
|
258
|
+
)
|
|
259
|
+
if not title_pattern.search(first_line):
|
|
260
|
+
return chapter_text
|
|
261
|
+
|
|
262
|
+
trimmed_line = title_pattern.sub("", first_line, count=1).lstrip(
|
|
263
|
+
" \t:;\-\u2013\u2014"
|
|
264
|
+
)
|
|
265
|
+
if trimmed_line:
|
|
266
|
+
lines[first_idx] = trimmed_line
|
|
267
|
+
return "\n".join(lines[first_idx:]).lstrip()
|
|
268
|
+
|
|
269
|
+
remaining = lines[first_idx + 1 :]
|
|
270
|
+
while remaining and not remaining[0].strip():
|
|
271
|
+
remaining = remaining[1:]
|
|
272
|
+
return "\n".join(remaining).lstrip()
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
def chapter_to_ssmd(
|
|
276
|
+
chapter_title: str,
|
|
277
|
+
chapter_text: str,
|
|
278
|
+
phoneme_dict: dict[str, str] | None = None,
|
|
279
|
+
phoneme_dict_case_sensitive: bool = False,
|
|
280
|
+
mixed_language_config: dict | None = None,
|
|
281
|
+
html_content: str | None = None,
|
|
282
|
+
include_title: bool = True,
|
|
283
|
+
) -> str:
|
|
284
|
+
"""Convert a chapter to SSMD format.
|
|
285
|
+
|
|
286
|
+
Args:
|
|
287
|
+
chapter_title: Title of the chapter
|
|
288
|
+
chapter_text: Plain text content of the chapter
|
|
289
|
+
phoneme_dict: Optional dictionary mapping words to IPA phonemes
|
|
290
|
+
phoneme_dict_case_sensitive: Whether phoneme matching is case-sensitive
|
|
291
|
+
mixed_language_config: Optional config for mixed-language mode
|
|
292
|
+
html_content: Optional HTML content for emphasis detection
|
|
293
|
+
include_title: Whether to include chapter title in SSMD
|
|
294
|
+
|
|
295
|
+
Returns:
|
|
296
|
+
SSMD formatted text
|
|
297
|
+
|
|
298
|
+
Raises:
|
|
299
|
+
SSMDGenerationError: If generation fails
|
|
300
|
+
"""
|
|
301
|
+
try:
|
|
302
|
+
result = chapter_text
|
|
303
|
+
if include_title and chapter_title:
|
|
304
|
+
result = _strip_redundant_title(chapter_title, result)
|
|
305
|
+
|
|
306
|
+
# Step 1: Detect emphasis from HTML if available
|
|
307
|
+
emphasis_segments: list[tuple[str, str]] = []
|
|
308
|
+
if html_content:
|
|
309
|
+
emphasis_segments = _detect_emphasis_from_html(html_content)
|
|
310
|
+
|
|
311
|
+
# Step 2: Apply emphasis markers
|
|
312
|
+
if emphasis_segments:
|
|
313
|
+
result = _apply_emphasis_markers(result, emphasis_segments)
|
|
314
|
+
|
|
315
|
+
# Step 3: Inject phoneme substitutions
|
|
316
|
+
if phoneme_dict:
|
|
317
|
+
result = _inject_phoneme_substitutions(
|
|
318
|
+
result, phoneme_dict, phoneme_dict_case_sensitive
|
|
319
|
+
)
|
|
320
|
+
|
|
321
|
+
# Step 4: Add language markers (if mixed-language mode)
|
|
322
|
+
if mixed_language_config and mixed_language_config.get("use_mixed_language"):
|
|
323
|
+
result = _add_language_markers(result, mixed_language_config)
|
|
324
|
+
|
|
325
|
+
# Step 5: Add structural breaks (paragraphs, sentences, clauses)
|
|
326
|
+
result = _add_structural_breaks(result)
|
|
327
|
+
|
|
328
|
+
# Step 6: Add chapter title if requested
|
|
329
|
+
if include_title and chapter_title:
|
|
330
|
+
# Clean title and add as heading with double newline separation
|
|
331
|
+
clean_title = chapter_title.strip()
|
|
332
|
+
result = f"# {clean_title}\n\n{result}"
|
|
333
|
+
|
|
334
|
+
return result
|
|
335
|
+
|
|
336
|
+
except Exception as e:
|
|
337
|
+
raise SSMDGenerationError(
|
|
338
|
+
f"Failed to generate SSMD for chapter '{chapter_title}': {str(e)}"
|
|
339
|
+
) from e
|
|
340
|
+
|
|
341
|
+
|
|
342
|
+
def save_ssmd_file(ssmd_content: str, output_path: Path) -> str:
|
|
343
|
+
"""Save SSMD content to a file and return its hash.
|
|
344
|
+
|
|
345
|
+
Args:
|
|
346
|
+
ssmd_content: SSMD formatted text
|
|
347
|
+
output_path: Path to save the SSMD file
|
|
348
|
+
|
|
349
|
+
Returns:
|
|
350
|
+
Hash of the saved content
|
|
351
|
+
|
|
352
|
+
Raises:
|
|
353
|
+
SSMDGenerationError: If file save fails
|
|
354
|
+
"""
|
|
355
|
+
try:
|
|
356
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
357
|
+
with open(output_path, "w", encoding="utf-8") as f:
|
|
358
|
+
f.write(ssmd_content)
|
|
359
|
+
return _hash_content(ssmd_content)
|
|
360
|
+
except Exception as e:
|
|
361
|
+
raise SSMDGenerationError(
|
|
362
|
+
f"Failed to save SSMD file to {output_path}: {str(e)}"
|
|
363
|
+
) from e
|
|
364
|
+
|
|
365
|
+
|
|
366
|
+
def load_ssmd_file(ssmd_path: Path) -> tuple[str, str]:
|
|
367
|
+
"""Load SSMD file and return content with hash.
|
|
368
|
+
|
|
369
|
+
Args:
|
|
370
|
+
ssmd_path: Path to the SSMD file
|
|
371
|
+
|
|
372
|
+
Returns:
|
|
373
|
+
Tuple of (content, hash)
|
|
374
|
+
|
|
375
|
+
Raises:
|
|
376
|
+
SSMDGenerationError: If file load fails or doesn't exist
|
|
377
|
+
"""
|
|
378
|
+
try:
|
|
379
|
+
if not ssmd_path.exists():
|
|
380
|
+
raise SSMDGenerationError(f"SSMD file not found: {ssmd_path}")
|
|
381
|
+
|
|
382
|
+
with open(ssmd_path, encoding="utf-8") as f:
|
|
383
|
+
content = f.read()
|
|
384
|
+
|
|
385
|
+
return content, _hash_content(content)
|
|
386
|
+
except SSMDGenerationError:
|
|
387
|
+
raise
|
|
388
|
+
except Exception as e:
|
|
389
|
+
raise SSMDGenerationError(
|
|
390
|
+
f"Failed to load SSMD file from {ssmd_path}: {str(e)}"
|
|
391
|
+
) from e
|
|
392
|
+
|
|
393
|
+
|
|
394
|
+
def validate_ssmd(ssmd_content: str) -> list[str]:
|
|
395
|
+
"""Validate SSMD content and return warnings.
|
|
396
|
+
|
|
397
|
+
This is intentionally lightweight: it checks for obviously unbalanced
|
|
398
|
+
brackets/parentheses and unmatched emphasis markers. It does not attempt
|
|
399
|
+
to fully parse SSMD.
|
|
400
|
+
|
|
401
|
+
Args:
|
|
402
|
+
ssmd_content: SSMD formatted text
|
|
403
|
+
|
|
404
|
+
Returns:
|
|
405
|
+
List of warning strings. Empty list means no issues found.
|
|
406
|
+
"""
|
|
407
|
+
warnings: list[str] = []
|
|
408
|
+
|
|
409
|
+
if ssmd_content.count("[") != ssmd_content.count("]"):
|
|
410
|
+
warnings.append("Unbalanced '[' and ']' brackets")
|
|
411
|
+
|
|
412
|
+
if ssmd_content.count("(") != ssmd_content.count(")"):
|
|
413
|
+
warnings.append("Unbalanced '(' and ')' parentheses")
|
|
414
|
+
|
|
415
|
+
strong_count = ssmd_content.count("**")
|
|
416
|
+
if strong_count % 2 != 0:
|
|
417
|
+
warnings.append("Unbalanced strong emphasis markers '**'")
|
|
418
|
+
|
|
419
|
+
if ssmd_content.count("*") % 2 != 0:
|
|
420
|
+
warnings.append("Unbalanced emphasis markers '*'")
|
|
421
|
+
|
|
422
|
+
return warnings
|