ttsforge 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ttsforge/__init__.py +3 -18
- ttsforge/_version.py +2 -2
- ttsforge/cli/commands_conversion.py +75 -10
- ttsforge/cli/commands_phonemes.py +22 -4
- ttsforge/cli/commands_utility.py +18 -1
- ttsforge/cli/helpers.py +1 -0
- ttsforge/constants.py +13 -4
- ttsforge/conversion.py +112 -51
- ttsforge/kokoro_runner.py +38 -5
- ttsforge/name_extractor.py +3 -3
- ttsforge/phoneme_conversion.py +61 -10
- ttsforge/ssmd_generator.py +4 -4
- {ttsforge-0.1.0.dist-info → ttsforge-0.1.2.dist-info}/METADATA +13 -12
- ttsforge-0.1.2.dist-info/RECORD +27 -0
- ttsforge-0.1.0.dist-info/RECORD +0 -27
- {ttsforge-0.1.0.dist-info → ttsforge-0.1.2.dist-info}/WHEEL +0 -0
- {ttsforge-0.1.0.dist-info → ttsforge-0.1.2.dist-info}/entry_points.txt +0 -0
- {ttsforge-0.1.0.dist-info → ttsforge-0.1.2.dist-info}/licenses/LICENSE +0 -0
- {ttsforge-0.1.0.dist-info → ttsforge-0.1.2.dist-info}/top_level.txt +0 -0
ttsforge/__init__.py
CHANGED
|
@@ -18,7 +18,7 @@ from pykokoro.tokenizer import (
|
|
|
18
18
|
Tokenizer,
|
|
19
19
|
)
|
|
20
20
|
from pykokoro.constants import SUPPORTED_LANGUAGES
|
|
21
|
-
|
|
21
|
+
from pykokoro.onnx_backend import VOICE_NAMES_BY_VARIANT
|
|
22
22
|
from .constants import (
|
|
23
23
|
DEFAULT_CONFIG,
|
|
24
24
|
LANGUAGE_DESCRIPTIONS,
|
|
@@ -27,23 +27,7 @@ from .constants import (
|
|
|
27
27
|
)
|
|
28
28
|
|
|
29
29
|
# Import from pykokoro
|
|
30
|
-
|
|
31
|
-
from pykokoro.constants import SAMPLE_RATE
|
|
32
|
-
from pykokoro.onnx_backend import LANG_CODE_TO_ONNX
|
|
33
|
-
except ImportError:
|
|
34
|
-
# Fallback values if pykokoro not installed
|
|
35
|
-
SAMPLE_RATE = 24000
|
|
36
|
-
LANG_CODE_TO_ONNX = {
|
|
37
|
-
"a": "en-us",
|
|
38
|
-
"b": "en-gb",
|
|
39
|
-
"e": "es",
|
|
40
|
-
"f": "fr-fr",
|
|
41
|
-
"h": "hi",
|
|
42
|
-
"i": "it",
|
|
43
|
-
"j": "ja",
|
|
44
|
-
"p": "pt",
|
|
45
|
-
"z": "zh",
|
|
46
|
-
}
|
|
30
|
+
from pykokoro.constants import SAMPLE_RATE
|
|
47
31
|
|
|
48
32
|
from .conversion import (
|
|
49
33
|
Chapter,
|
|
@@ -73,6 +57,7 @@ __all__ = [
|
|
|
73
57
|
"LANGUAGE_DESCRIPTIONS",
|
|
74
58
|
"SUPPORTED_OUTPUT_FORMATS",
|
|
75
59
|
"VOICES",
|
|
60
|
+
"VOICE_NAMES_BY_VARIANT",
|
|
76
61
|
# Conversion
|
|
77
62
|
"Chapter",
|
|
78
63
|
"ConversionOptions",
|
ttsforge/_version.py
CHANGED
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '0.1.
|
|
32
|
-
__version_tuple__ = version_tuple = (0, 1,
|
|
31
|
+
__version__ = version = '0.1.2'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 1, 2)
|
|
33
33
|
|
|
34
34
|
__commit_id__ = commit_id = None
|
|
@@ -17,6 +17,7 @@ from typing import Literal, TypedDict, cast
|
|
|
17
17
|
|
|
18
18
|
import click
|
|
19
19
|
import numpy as np
|
|
20
|
+
from pykokoro.onnx_backend import DEFAULT_MODEL_QUALITY, ModelQuality
|
|
20
21
|
from rich.panel import Panel
|
|
21
22
|
from rich.progress import (
|
|
22
23
|
BarColumn,
|
|
@@ -37,7 +38,6 @@ from ..constants import (
|
|
|
37
38
|
LANGUAGE_DESCRIPTIONS,
|
|
38
39
|
SUPPORTED_OUTPUT_FORMATS,
|
|
39
40
|
VOICE_PREFIX_TO_LANG,
|
|
40
|
-
VOICES,
|
|
41
41
|
)
|
|
42
42
|
from ..conversion import (
|
|
43
43
|
Chapter,
|
|
@@ -54,6 +54,7 @@ from ..utils import (
|
|
|
54
54
|
load_config,
|
|
55
55
|
resolve_conversion_defaults,
|
|
56
56
|
)
|
|
57
|
+
from .commands_utility import _resolve_model_source_and_variant, _resolve_voice_names
|
|
57
58
|
from .helpers import DEFAULT_SAMPLE_TEXT, console, parse_voice_parameter
|
|
58
59
|
|
|
59
60
|
|
|
@@ -64,6 +65,14 @@ class ContentItem(TypedDict):
|
|
|
64
65
|
page_number: NotRequired[int]
|
|
65
66
|
|
|
66
67
|
|
|
68
|
+
def get_voices() -> list[str]:
|
|
69
|
+
"""Get the list of available voices."""
|
|
70
|
+
cfg = load_config()
|
|
71
|
+
|
|
72
|
+
model_source, model_variant = _resolve_model_source_and_variant(cfg)
|
|
73
|
+
return _resolve_voice_names(model_source, model_variant)
|
|
74
|
+
|
|
75
|
+
|
|
67
76
|
@click.command()
|
|
68
77
|
@click.argument("epub_file", type=click.Path(exists=True, path_type=Path))
|
|
69
78
|
@click.option(
|
|
@@ -82,7 +91,7 @@ class ContentItem(TypedDict):
|
|
|
82
91
|
@click.option(
|
|
83
92
|
"-v",
|
|
84
93
|
"--voice",
|
|
85
|
-
type=click.Choice(
|
|
94
|
+
type=click.Choice(get_voices()),
|
|
86
95
|
help="Voice to use for TTS.",
|
|
87
96
|
)
|
|
88
97
|
@click.option(
|
|
@@ -150,6 +159,12 @@ class ContentItem(TypedDict):
|
|
|
150
159
|
default=None,
|
|
151
160
|
help="Pause mode: 'tts', 'manual', or 'auto' (default: auto).",
|
|
152
161
|
)
|
|
162
|
+
@click.option(
|
|
163
|
+
"--enable-short-sentence/--disable-short-sentence",
|
|
164
|
+
"enable_short_sentence",
|
|
165
|
+
default=None,
|
|
166
|
+
help="Enable/disable special handling for short sentences.",
|
|
167
|
+
)
|
|
153
168
|
@click.option(
|
|
154
169
|
"--announce-chapters/--no-announce-chapters",
|
|
155
170
|
"announce_chapters",
|
|
@@ -296,6 +311,7 @@ def convert( # noqa: C901
|
|
|
296
311
|
pause_paragraph: float | None,
|
|
297
312
|
pause_variance: float | None,
|
|
298
313
|
pause_mode: str | None,
|
|
314
|
+
enable_short_sentence: bool | None,
|
|
299
315
|
announce_chapters: bool | None,
|
|
300
316
|
chapter_pause: float | None,
|
|
301
317
|
title: str | None,
|
|
@@ -325,6 +341,10 @@ def convert( # noqa: C901
|
|
|
325
341
|
config = load_config()
|
|
326
342
|
model_path = ctx.obj.get("model_path") if ctx.obj else None
|
|
327
343
|
voices_path = ctx.obj.get("voices_path") if ctx.obj else None
|
|
344
|
+
model_source, model_variant = _resolve_model_source_and_variant(config)
|
|
345
|
+
model_quality = cast(
|
|
346
|
+
ModelQuality, config.get("model_quality", DEFAULT_MODEL_QUALITY)
|
|
347
|
+
)
|
|
328
348
|
|
|
329
349
|
# Get format first (needed for output path construction)
|
|
330
350
|
fmt = output_format or config.get("default_format", "m4b")
|
|
@@ -467,6 +487,9 @@ def convert( # noqa: C901
|
|
|
467
487
|
language=language or "a",
|
|
468
488
|
speed=speed or config.get("default_speed", 1.0),
|
|
469
489
|
use_gpu=use_gpu if use_gpu is not None else config.get("use_gpu", False),
|
|
490
|
+
model_source=model_source,
|
|
491
|
+
model_variant=model_variant,
|
|
492
|
+
model_quality=model_quality,
|
|
470
493
|
num_chapters=len(selected_indices) if selected_indices else len(epub_chapters),
|
|
471
494
|
title=effective_title,
|
|
472
495
|
author=effective_author,
|
|
@@ -510,6 +533,9 @@ def convert( # noqa: C901
|
|
|
510
533
|
output_format=output_format or config.get("default_format", "m4b"),
|
|
511
534
|
output_dir=output.parent,
|
|
512
535
|
use_gpu=use_gpu if use_gpu is not None else config.get("use_gpu", False),
|
|
536
|
+
model_quality=model_quality,
|
|
537
|
+
model_source=model_source,
|
|
538
|
+
model_variant=model_variant,
|
|
513
539
|
silence_between_chapters=silence or config.get("silence_between_chapters", 2.0),
|
|
514
540
|
lang=lang or config.get("phonemization_lang"),
|
|
515
541
|
use_mixed_language=(
|
|
@@ -536,17 +562,17 @@ def convert( # noqa: C901
|
|
|
536
562
|
pause_clause=(
|
|
537
563
|
pause_clause
|
|
538
564
|
if pause_clause is not None
|
|
539
|
-
else config.get("pause_clause", 0.
|
|
565
|
+
else config.get("pause_clause", 0.3)
|
|
540
566
|
),
|
|
541
567
|
pause_sentence=(
|
|
542
568
|
pause_sentence
|
|
543
569
|
if pause_sentence is not None
|
|
544
|
-
else config.get("pause_sentence", 0.
|
|
570
|
+
else config.get("pause_sentence", 0.5)
|
|
545
571
|
),
|
|
546
572
|
pause_paragraph=(
|
|
547
573
|
pause_paragraph
|
|
548
574
|
if pause_paragraph is not None
|
|
549
|
-
else config.get("pause_paragraph", 0.
|
|
575
|
+
else config.get("pause_paragraph", 0.9)
|
|
550
576
|
),
|
|
551
577
|
pause_variance=(
|
|
552
578
|
pause_variance
|
|
@@ -556,6 +582,11 @@ def convert( # noqa: C901
|
|
|
556
582
|
pause_mode=(
|
|
557
583
|
pause_mode if pause_mode is not None else config.get("pause_mode", "auto")
|
|
558
584
|
),
|
|
585
|
+
enable_short_sentence=(
|
|
586
|
+
enable_short_sentence
|
|
587
|
+
if enable_short_sentence is not None
|
|
588
|
+
else config.get("enable_short_sentence", None)
|
|
589
|
+
),
|
|
559
590
|
announce_chapters=(
|
|
560
591
|
announce_chapters
|
|
561
592
|
if announce_chapters is not None
|
|
@@ -947,6 +978,10 @@ def sample(
|
|
|
947
978
|
|
|
948
979
|
# Load config for defaults
|
|
949
980
|
user_config = load_config()
|
|
981
|
+
model_source, model_variant = _resolve_model_source_and_variant(user_config)
|
|
982
|
+
model_quality = cast(
|
|
983
|
+
ModelQuality, user_config.get("model_quality", DEFAULT_MODEL_QUALITY)
|
|
984
|
+
)
|
|
950
985
|
resolved_defaults = resolve_conversion_defaults(
|
|
951
986
|
user_config,
|
|
952
987
|
{
|
|
@@ -980,6 +1015,9 @@ def sample(
|
|
|
980
1015
|
use_gpu=resolved_defaults["use_gpu"],
|
|
981
1016
|
split_mode=resolved_defaults["split_mode"],
|
|
982
1017
|
lang=resolved_defaults["lang"],
|
|
1018
|
+
model_quality=model_quality,
|
|
1019
|
+
model_source=model_source,
|
|
1020
|
+
model_variant=model_variant,
|
|
983
1021
|
use_mixed_language=(
|
|
984
1022
|
use_mixed_language or user_config.get("use_mixed_language", False)
|
|
985
1023
|
),
|
|
@@ -1117,6 +1155,9 @@ def _show_conversion_summary(
|
|
|
1117
1155
|
language: str,
|
|
1118
1156
|
speed: float,
|
|
1119
1157
|
use_gpu: bool,
|
|
1158
|
+
model_source: str,
|
|
1159
|
+
model_variant: str,
|
|
1160
|
+
model_quality: str | None,
|
|
1120
1161
|
num_chapters: int,
|
|
1121
1162
|
title: str,
|
|
1122
1163
|
author: str,
|
|
@@ -1139,6 +1180,9 @@ def _show_conversion_summary(
|
|
|
1139
1180
|
table.add_row("Chapters", str(num_chapters))
|
|
1140
1181
|
table.add_row("Voice", voice)
|
|
1141
1182
|
table.add_row("Language", LANGUAGE_DESCRIPTIONS.get(language, language))
|
|
1183
|
+
table.add_row("Model Source", model_source)
|
|
1184
|
+
table.add_row("Model Variant", model_variant)
|
|
1185
|
+
table.add_row("Model Quality", str(model_quality))
|
|
1142
1186
|
if lang:
|
|
1143
1187
|
table.add_row("Phonemization Lang", f"{lang} (override)")
|
|
1144
1188
|
if use_mixed_language:
|
|
@@ -1167,7 +1211,7 @@ def _show_conversion_summary(
|
|
|
1167
1211
|
@click.option(
|
|
1168
1212
|
"-v",
|
|
1169
1213
|
"--voice",
|
|
1170
|
-
type=click.Choice(
|
|
1214
|
+
type=click.Choice(get_voices()),
|
|
1171
1215
|
help="TTS voice to use.",
|
|
1172
1216
|
)
|
|
1173
1217
|
@click.option(
|
|
@@ -1271,6 +1315,11 @@ def _show_conversion_summary(
|
|
|
1271
1315
|
default=None,
|
|
1272
1316
|
help="Trim leading/trailing silence from audio.",
|
|
1273
1317
|
)
|
|
1318
|
+
@click.option(
|
|
1319
|
+
"--enable-short-sentence/--disable-short-sentence",
|
|
1320
|
+
default=None,
|
|
1321
|
+
help="Enable special handling for short sentences.",
|
|
1322
|
+
)
|
|
1274
1323
|
@click.pass_context
|
|
1275
1324
|
def read( # noqa: C901
|
|
1276
1325
|
ctx: click.Context,
|
|
@@ -1293,6 +1342,7 @@ def read( # noqa: C901
|
|
|
1293
1342
|
pause_paragraph: float | None,
|
|
1294
1343
|
pause_variance: float | None,
|
|
1295
1344
|
pause_mode: str | None,
|
|
1345
|
+
enable_short_sentence: bool | None,
|
|
1296
1346
|
) -> None:
|
|
1297
1347
|
"""Read an EPUB or text file aloud with streaming playback.
|
|
1298
1348
|
|
|
@@ -1340,6 +1390,10 @@ def read( # noqa: C901
|
|
|
1340
1390
|
|
|
1341
1391
|
# Load config for defaults
|
|
1342
1392
|
config = load_config()
|
|
1393
|
+
model_source, model_variant = _resolve_model_source_and_variant(config)
|
|
1394
|
+
model_quality = cast(
|
|
1395
|
+
ModelQuality, config.get("model_quality", DEFAULT_MODEL_QUALITY)
|
|
1396
|
+
)
|
|
1343
1397
|
resolved_defaults = resolve_conversion_defaults(
|
|
1344
1398
|
config,
|
|
1345
1399
|
{
|
|
@@ -1369,17 +1423,17 @@ def read( # noqa: C901
|
|
|
1369
1423
|
effective_split_mode = config_split_mode
|
|
1370
1424
|
# Pause settings
|
|
1371
1425
|
effective_pause_clause = (
|
|
1372
|
-
pause_clause if pause_clause is not None else config.get("pause_clause", 0.
|
|
1426
|
+
pause_clause if pause_clause is not None else config.get("pause_clause", 0.3)
|
|
1373
1427
|
)
|
|
1374
1428
|
effective_pause_sentence = (
|
|
1375
1429
|
pause_sentence
|
|
1376
1430
|
if pause_sentence is not None
|
|
1377
|
-
else config.get("pause_sentence", 0.
|
|
1431
|
+
else config.get("pause_sentence", 0.5)
|
|
1378
1432
|
)
|
|
1379
1433
|
effective_pause_paragraph = (
|
|
1380
1434
|
pause_paragraph
|
|
1381
1435
|
if pause_paragraph is not None
|
|
1382
|
-
else config.get("pause_paragraph", 0.
|
|
1436
|
+
else config.get("pause_paragraph", 0.9)
|
|
1383
1437
|
)
|
|
1384
1438
|
effective_pause_variance = (
|
|
1385
1439
|
pause_variance
|
|
@@ -1389,6 +1443,11 @@ def read( # noqa: C901
|
|
|
1389
1443
|
effective_pause_mode = (
|
|
1390
1444
|
pause_mode if pause_mode is not None else config.get("pause_mode", "auto")
|
|
1391
1445
|
)
|
|
1446
|
+
effective_enable_short_sentence = (
|
|
1447
|
+
enable_short_sentence
|
|
1448
|
+
if enable_short_sentence is not None
|
|
1449
|
+
else config.get("enable_short_sentence", None)
|
|
1450
|
+
)
|
|
1392
1451
|
|
|
1393
1452
|
# Get language code for TTS
|
|
1394
1453
|
espeak_lang = LANG_CODE_TO_ONNX.get(effective_language, "en-us")
|
|
@@ -1645,11 +1704,15 @@ def read( # noqa: C901
|
|
|
1645
1704
|
model_path=model_path,
|
|
1646
1705
|
voices_path=voices_path,
|
|
1647
1706
|
use_gpu=effective_use_gpu,
|
|
1707
|
+
model_quality=model_quality,
|
|
1708
|
+
model_source=model_source,
|
|
1709
|
+
model_variant=model_variant,
|
|
1648
1710
|
)
|
|
1649
1711
|
generation = GenerationConfig(
|
|
1650
1712
|
speed=effective_speed,
|
|
1651
1713
|
lang=espeak_lang,
|
|
1652
1714
|
pause_mode=cast(Literal["tts", "manual", "auto"], effective_pause_mode),
|
|
1715
|
+
enable_short_sentence=effective_enable_short_sentence,
|
|
1653
1716
|
pause_clause=effective_pause_clause,
|
|
1654
1717
|
pause_sentence=effective_pause_sentence,
|
|
1655
1718
|
pause_paragraph=effective_pause_paragraph,
|
|
@@ -1658,6 +1721,9 @@ def read( # noqa: C901
|
|
|
1658
1721
|
pipeline_config = PipelineConfig(
|
|
1659
1722
|
voice=effective_voice,
|
|
1660
1723
|
generation=generation,
|
|
1724
|
+
model_quality=model_quality,
|
|
1725
|
+
model_source=model_source,
|
|
1726
|
+
model_variant=model_variant,
|
|
1661
1727
|
model_path=model_path,
|
|
1662
1728
|
voices_path=voices_path,
|
|
1663
1729
|
)
|
|
@@ -1695,7 +1761,6 @@ def read( # noqa: C901
|
|
|
1695
1761
|
|
|
1696
1762
|
def generate_audio(text_segment: str) -> tuple[np.ndarray, int]:
|
|
1697
1763
|
"""Generate audio for a text segment."""
|
|
1698
|
-
print(text_segment)
|
|
1699
1764
|
result = pipeline.run(text_segment)
|
|
1700
1765
|
return result.audio, result.sample_rate
|
|
1701
1766
|
|
|
@@ -10,9 +10,10 @@ This module contains commands for working with phonemes and pre-tokenized conten
|
|
|
10
10
|
import re
|
|
11
11
|
import sys
|
|
12
12
|
from pathlib import Path
|
|
13
|
-
from typing import Any
|
|
13
|
+
from typing import Any, cast
|
|
14
14
|
|
|
15
15
|
import click
|
|
16
|
+
from pykokoro.onnx_backend import DEFAULT_MODEL_QUALITY, ModelQuality
|
|
16
17
|
from rich.progress import (
|
|
17
18
|
BarColumn,
|
|
18
19
|
Progress,
|
|
@@ -37,6 +38,7 @@ from ..utils import (
|
|
|
37
38
|
format_filename_template,
|
|
38
39
|
load_config,
|
|
39
40
|
)
|
|
41
|
+
from .commands_utility import _resolve_model_source_and_variant
|
|
40
42
|
from .helpers import console, parse_voice_parameter
|
|
41
43
|
|
|
42
44
|
|
|
@@ -500,6 +502,10 @@ def phonemes_convert(
|
|
|
500
502
|
config = load_config()
|
|
501
503
|
model_path = ctx.obj.get("model_path") if ctx.obj else None
|
|
502
504
|
voices_path = ctx.obj.get("voices_path") if ctx.obj else None
|
|
505
|
+
model_source, model_variant = _resolve_model_source_and_variant(config)
|
|
506
|
+
model_quality = cast(
|
|
507
|
+
ModelQuality, config.get("model_quality", DEFAULT_MODEL_QUALITY)
|
|
508
|
+
)
|
|
503
509
|
|
|
504
510
|
# Get book info and metadata
|
|
505
511
|
book_info = book.get_info()
|
|
@@ -599,21 +605,24 @@ def phonemes_convert(
|
|
|
599
605
|
speed=speed,
|
|
600
606
|
output_format=fmt,
|
|
601
607
|
use_gpu=gpu,
|
|
608
|
+
model_quality=model_quality,
|
|
609
|
+
model_source=model_source,
|
|
610
|
+
model_variant=model_variant,
|
|
602
611
|
silence_between_chapters=silence,
|
|
603
612
|
pause_clause=(
|
|
604
613
|
pause_clause
|
|
605
614
|
if pause_clause is not None
|
|
606
|
-
else config.get("pause_clause", 0.
|
|
615
|
+
else config.get("pause_clause", 0.3)
|
|
607
616
|
),
|
|
608
617
|
pause_sentence=(
|
|
609
618
|
pause_sentence
|
|
610
619
|
if pause_sentence is not None
|
|
611
|
-
else config.get("pause_sentence", 0.
|
|
620
|
+
else config.get("pause_sentence", 0.5)
|
|
612
621
|
),
|
|
613
622
|
pause_paragraph=(
|
|
614
623
|
pause_paragraph
|
|
615
624
|
if pause_paragraph is not None
|
|
616
|
-
else config.get("pause_paragraph", 0.
|
|
625
|
+
else config.get("pause_paragraph", 0.9)
|
|
617
626
|
),
|
|
618
627
|
pause_variance=(
|
|
619
628
|
pause_variance
|
|
@@ -834,6 +843,12 @@ def phonemes_preview(
|
|
|
834
843
|
# Auto-detect if voice is a blend
|
|
835
844
|
parsed_voice, parsed_voice_blend = parse_voice_parameter(voice)
|
|
836
845
|
|
|
846
|
+
config = load_config()
|
|
847
|
+
model_source, model_variant = _resolve_model_source_and_variant(config)
|
|
848
|
+
model_quality = cast(
|
|
849
|
+
ModelQuality, config.get("model_quality", DEFAULT_MODEL_QUALITY)
|
|
850
|
+
)
|
|
851
|
+
|
|
837
852
|
# Initialize converter
|
|
838
853
|
options = ConversionOptions(
|
|
839
854
|
phoneme_dictionary_path=str(phoneme_dict) if phoneme_dict else None,
|
|
@@ -841,6 +856,9 @@ def phonemes_preview(
|
|
|
841
856
|
voice_blend=parsed_voice_blend,
|
|
842
857
|
language=language,
|
|
843
858
|
output_format="wav", # Explicitly set WAV format
|
|
859
|
+
model_quality=model_quality,
|
|
860
|
+
model_source=model_source,
|
|
861
|
+
model_variant=model_variant,
|
|
844
862
|
)
|
|
845
863
|
converter = TTSConverter(options)
|
|
846
864
|
|
ttsforge/cli/commands_utility.py
CHANGED
|
@@ -555,6 +555,14 @@ def _resolve_model_source_and_variant(cfg: dict) -> tuple[ModelSource, ModelVari
|
|
|
555
555
|
return cast(ModelSource, source), cast(ModelVariant, variant)
|
|
556
556
|
|
|
557
557
|
|
|
558
|
+
def _resolve_voice_names(
|
|
559
|
+
model_source: ModelSource = "huggingface",
|
|
560
|
+
model_variant: ModelVariant = "v1.0",
|
|
561
|
+
) -> list[str]:
|
|
562
|
+
"""Return the list of voice names for the given model variant."""
|
|
563
|
+
return VOICE_NAMES_BY_VARIANT.get(model_variant, VOICE_NAMES)
|
|
564
|
+
|
|
565
|
+
|
|
558
566
|
def _get_cache_voices_path(
|
|
559
567
|
model_source: ModelSource,
|
|
560
568
|
model_variant: ModelVariant,
|
|
@@ -708,7 +716,7 @@ def download(ctx: click.Context, force: bool, quality: str | None) -> None:
|
|
|
708
716
|
|
|
709
717
|
# ---- voices
|
|
710
718
|
if model_source == "huggingface":
|
|
711
|
-
voice_names =
|
|
719
|
+
voice_names = _resolve_voice_names(model_source, model_variant)
|
|
712
720
|
total_voices = len(voice_names)
|
|
713
721
|
voices_task = progress.add_task(
|
|
714
722
|
f"Downloading voices (0/{total_voices})...", total=total_voices
|
|
@@ -1269,6 +1277,12 @@ def list_names( # noqa: C901
|
|
|
1269
1277
|
)
|
|
1270
1278
|
console.print("[dim]Type 'q' to quit, 's' to skip, 'r' to replay.[/dim]\n")
|
|
1271
1279
|
|
|
1280
|
+
cfg = load_config()
|
|
1281
|
+
model_source, model_variant = _resolve_model_source_and_variant(cfg)
|
|
1282
|
+
model_quality = cast(
|
|
1283
|
+
ModelQuality, cfg.get("model_quality", DEFAULT_MODEL_QUALITY)
|
|
1284
|
+
)
|
|
1285
|
+
|
|
1272
1286
|
# Initialize converter with phoneme dictionary
|
|
1273
1287
|
try:
|
|
1274
1288
|
# Auto-detect if voice is a blend
|
|
@@ -1279,6 +1293,9 @@ def list_names( # noqa: C901
|
|
|
1279
1293
|
voice=parsed_voice or "af_sky",
|
|
1280
1294
|
voice_blend=parsed_voice_blend,
|
|
1281
1295
|
language=language,
|
|
1296
|
+
model_quality=model_quality,
|
|
1297
|
+
model_source=model_source,
|
|
1298
|
+
model_variant=model_variant,
|
|
1282
1299
|
)
|
|
1283
1300
|
converter = TTSConverter(options)
|
|
1284
1301
|
|
ttsforge/cli/helpers.py
CHANGED
|
@@ -50,6 +50,7 @@ DEFAULT_SAMPLE_TEXT = (
|
|
|
50
50
|
DEMO_TEXT = {
|
|
51
51
|
"a": "Hello! This audio was generated by {voice}. How do you like it?",
|
|
52
52
|
"b": "Hello! This audio was generated by {voice}. How do you like it?",
|
|
53
|
+
"d": "Hallo! Dieses Audio wurde von {voice} erzeugt. Wie gefallt es Ihnen?",
|
|
53
54
|
"e": "Hola! Este audio fue generado por {voice}. Que te parece?",
|
|
54
55
|
"f": "Bonjour! Cet audio a ete genere par {voice}. Comment le trouvez-vous?",
|
|
55
56
|
"h": "Namaste! Yah audio {voice} dwara banaya gaya hai. Aapko kaisa laga?",
|
ttsforge/constants.py
CHANGED
|
@@ -3,10 +3,10 @@
|
|
|
3
3
|
# from pykokoro.onnx_backend import VOICE_NAMES_V1_0
|
|
4
4
|
# from pykokoro.onnx_backend import VOICE_NAMES_V1_1_ZH, VOICE_NAMES_V1_1_DE
|
|
5
5
|
|
|
6
|
-
from pykokoro.onnx_backend import VOICE_NAMES_V1_0
|
|
6
|
+
from pykokoro.onnx_backend import DEFAULT_MODEL_SOURCE, VOICE_NAMES_V1_0
|
|
7
7
|
|
|
8
8
|
# Re-export from pykokoro for convenience
|
|
9
|
-
VOICES =
|
|
9
|
+
VOICES = VOICE_NAMES_V1_0
|
|
10
10
|
|
|
11
11
|
# Audio constants from pykokoro
|
|
12
12
|
try:
|
|
@@ -24,6 +24,7 @@ PROGRAM_DESCRIPTION = "Generate audiobooks from EPUB files using Kokoro ONNX TTS
|
|
|
24
24
|
LANGUAGE_DESCRIPTIONS = {
|
|
25
25
|
"a": "American English",
|
|
26
26
|
"b": "British English",
|
|
27
|
+
"d": "German",
|
|
27
28
|
"e": "Spanish",
|
|
28
29
|
"f": "French",
|
|
29
30
|
"h": "Hindi",
|
|
@@ -35,6 +36,8 @@ LANGUAGE_DESCRIPTIONS = {
|
|
|
35
36
|
|
|
36
37
|
# ISO language code to ttsforge language code mapping
|
|
37
38
|
ISO_TO_LANG_CODE = {
|
|
39
|
+
"de": "d",
|
|
40
|
+
"de-de": "d",
|
|
38
41
|
"en": "a", # Default to American English
|
|
39
42
|
"en-us": "a",
|
|
40
43
|
"en-gb": "b",
|
|
@@ -62,6 +65,8 @@ VOICE_PREFIX_TO_LANG = {
|
|
|
62
65
|
"am": "a", # American Male
|
|
63
66
|
"bf": "b", # British Female
|
|
64
67
|
"bm": "b", # British Male
|
|
68
|
+
"df": "d", # German Female
|
|
69
|
+
"dm": "d", # German Male
|
|
65
70
|
"ef": "e", # Spanish Female
|
|
66
71
|
"em": "e", # Spanish Male
|
|
67
72
|
"ff": "f", # French Female
|
|
@@ -82,6 +87,7 @@ VOICE_PREFIX_TO_LANG = {
|
|
|
82
87
|
DEFAULT_VOICE_FOR_LANG = {
|
|
83
88
|
"a": "af_heart",
|
|
84
89
|
"b": "bf_emma",
|
|
90
|
+
"d": "df_eva",
|
|
85
91
|
"e": "ef_dora",
|
|
86
92
|
"f": "ff_siwis",
|
|
87
93
|
"h": "hf_alpha",
|
|
@@ -115,6 +121,7 @@ DEFAULT_CONFIG = {
|
|
|
115
121
|
"use_gpu": False, # GPU requires onnxruntime-gpu
|
|
116
122
|
# Model quality: fp32, fp16, q8, q8f16, q4, q4f16, uint8, uint8f16
|
|
117
123
|
"model_quality": "fp32",
|
|
124
|
+
"model_source": DEFAULT_MODEL_SOURCE,
|
|
118
125
|
"model_variant": "v1.0",
|
|
119
126
|
"silence_between_chapters": 2.0,
|
|
120
127
|
"save_chapters_separately": False,
|
|
@@ -123,11 +130,12 @@ DEFAULT_CONFIG = {
|
|
|
123
130
|
"default_split_mode": "auto",
|
|
124
131
|
"default_content_mode": "chapters", # Content mode for read: chapters or pages
|
|
125
132
|
"default_page_size": 2000, # Synthetic page size in characters for pages mode
|
|
126
|
-
"pause_clause": 0.
|
|
127
|
-
"pause_sentence": 0.
|
|
133
|
+
"pause_clause": 0.3,
|
|
134
|
+
"pause_sentence": 0.5,
|
|
128
135
|
"pause_paragraph": 0.9,
|
|
129
136
|
"pause_variance": 0.05,
|
|
130
137
|
"pause_mode": "auto", # "tts", "manual", or "auto
|
|
138
|
+
"enable_short_sentence": None,
|
|
131
139
|
# Language override for phonemization (e.g., 'de', 'fr', 'en-us')
|
|
132
140
|
# If None, language is determined from voice prefix
|
|
133
141
|
"phonemization_lang": None,
|
|
@@ -154,6 +162,7 @@ AUDIO_CHANNELS = 1
|
|
|
154
162
|
SAMPLE_TEXTS = {
|
|
155
163
|
"a": "This is a sample of the selected voice.",
|
|
156
164
|
"b": "This is a sample of the selected voice.",
|
|
165
|
+
"d": "Dies ist ein Beispiel für die ausgewählte Stimme.",
|
|
157
166
|
"e": "Este es una muestra de la voz seleccionada.",
|
|
158
167
|
"f": "Ceci est un exemple de la voix sélectionnée.",
|
|
159
168
|
"h": "यह चयनित आवाज़ का एक नमूना है।", # noqa: E501
|
ttsforge/conversion.py
CHANGED
|
@@ -11,6 +11,14 @@ from pathlib import Path
|
|
|
11
11
|
from typing import Any, Literal, Optional, cast
|
|
12
12
|
|
|
13
13
|
import soundfile as sf
|
|
14
|
+
from pykokoro.onnx_backend import (
|
|
15
|
+
DEFAULT_MODEL_QUALITY,
|
|
16
|
+
DEFAULT_MODEL_SOURCE,
|
|
17
|
+
DEFAULT_MODEL_VARIANT,
|
|
18
|
+
ModelQuality,
|
|
19
|
+
ModelSource,
|
|
20
|
+
ModelVariant,
|
|
21
|
+
)
|
|
14
22
|
|
|
15
23
|
from .audio_merge import AudioMerger, MergeMeta
|
|
16
24
|
from .constants import (
|
|
@@ -123,12 +131,16 @@ class ConversionState:
|
|
|
123
131
|
speed: float = 1.0
|
|
124
132
|
split_mode: str = "auto"
|
|
125
133
|
output_format: str = "m4b"
|
|
134
|
+
model_quality: ModelQuality | None = DEFAULT_MODEL_QUALITY
|
|
135
|
+
model_source: ModelSource = DEFAULT_MODEL_SOURCE
|
|
136
|
+
model_variant: ModelVariant = DEFAULT_MODEL_VARIANT
|
|
126
137
|
silence_between_chapters: float = 2.0
|
|
127
|
-
pause_clause: float = 0.
|
|
128
|
-
pause_sentence: float = 0.
|
|
129
|
-
pause_paragraph: float = 0.
|
|
138
|
+
pause_clause: float = 0.3
|
|
139
|
+
pause_sentence: float = 0.5
|
|
140
|
+
pause_paragraph: float = 0.9
|
|
130
141
|
pause_variance: float = 0.05
|
|
131
142
|
pause_mode: str = "auto" # "tts", "manual", or "auto
|
|
143
|
+
enable_short_sentence: bool | None = None
|
|
132
144
|
lang: str | None = None # Language override for phonemization
|
|
133
145
|
chapters: list[ChapterState] = field(default_factory=list)
|
|
134
146
|
started_at: str = ""
|
|
@@ -174,17 +186,25 @@ class ConversionState:
|
|
|
174
186
|
|
|
175
187
|
# Set defaults for new parameters
|
|
176
188
|
if "pause_clause" not in data:
|
|
177
|
-
data["pause_clause"] = 0.
|
|
189
|
+
data["pause_clause"] = 0.3
|
|
178
190
|
if "pause_sentence" not in data:
|
|
179
|
-
data["pause_sentence"] = 0.
|
|
191
|
+
data["pause_sentence"] = 0.5
|
|
180
192
|
if "pause_paragraph" not in data:
|
|
181
|
-
data["pause_paragraph"] = 0.
|
|
193
|
+
data["pause_paragraph"] = 0.9
|
|
182
194
|
if "pause_variance" not in data:
|
|
183
195
|
data["pause_variance"] = 0.05
|
|
184
196
|
if "pause_mode" not in data:
|
|
185
197
|
data["pause_mode"] = "auto"
|
|
198
|
+
if "enable_short_sentence" not in data:
|
|
199
|
+
data["enable_short_sentence"] = None
|
|
186
200
|
if "lang" not in data:
|
|
187
201
|
data["lang"] = None
|
|
202
|
+
if "model_quality" not in data:
|
|
203
|
+
data["model_quality"] = DEFAULT_MODEL_QUALITY
|
|
204
|
+
if "model_source" not in data:
|
|
205
|
+
data["model_source"] = DEFAULT_MODEL_SOURCE
|
|
206
|
+
if "model_variant" not in data:
|
|
207
|
+
data["model_variant"] = DEFAULT_MODEL_VARIANT
|
|
188
208
|
|
|
189
209
|
return cls(**data)
|
|
190
210
|
except (json.JSONDecodeError, TypeError, KeyError):
|
|
@@ -204,12 +224,16 @@ class ConversionState:
|
|
|
204
224
|
"speed": self.speed,
|
|
205
225
|
"split_mode": self.split_mode,
|
|
206
226
|
"output_format": self.output_format,
|
|
227
|
+
"model_quality": self.model_quality,
|
|
228
|
+
"model_source": self.model_source,
|
|
229
|
+
"model_variant": self.model_variant,
|
|
207
230
|
"silence_between_chapters": self.silence_between_chapters,
|
|
208
231
|
"pause_clause": self.pause_clause,
|
|
209
232
|
"pause_sentence": self.pause_sentence,
|
|
210
233
|
"pause_paragraph": self.pause_paragraph,
|
|
211
234
|
"pause_variance": self.pause_variance,
|
|
212
235
|
"pause_mode": self.pause_mode,
|
|
236
|
+
"enable_short_sentence": self.enable_short_sentence,
|
|
213
237
|
"lang": self.lang,
|
|
214
238
|
"chapters": [
|
|
215
239
|
{
|
|
@@ -289,11 +313,12 @@ class ConversionOptions:
|
|
|
289
313
|
phoneme_dictionary_path: str | None = None
|
|
290
314
|
phoneme_dict_case_sensitive: bool = False
|
|
291
315
|
# Pause settings (pykokoro built-in pause handling)
|
|
292
|
-
pause_clause: float = 0.
|
|
293
|
-
pause_sentence: float = 0.
|
|
294
|
-
pause_paragraph: float = 0.
|
|
316
|
+
pause_clause: float = 0.3 # For clause boundaries (commas)
|
|
317
|
+
pause_sentence: float = 0.5 # For sentence boundaries
|
|
318
|
+
pause_paragraph: float = 0.9 # For paragraph boundaries
|
|
295
319
|
pause_variance: float = 0.05 # Standard deviation for natural variation
|
|
296
320
|
pause_mode: str = "auto" # "tts", "manual", or "auto
|
|
321
|
+
enable_short_sentence: bool | None = None # Enable short sentence handling
|
|
297
322
|
# Chapter announcement settings
|
|
298
323
|
announce_chapters: bool = True # Read chapter titles aloud before content
|
|
299
324
|
chapter_pause_after_title: float = 2.0 # Pause after chapter title (seconds)
|
|
@@ -315,6 +340,9 @@ class ConversionOptions:
|
|
|
315
340
|
# Filename template for chapter files
|
|
316
341
|
chapter_filename_template: str = "{chapter_num:03d}_{book_title}_{chapter_title}"
|
|
317
342
|
# Custom ONNX model path (None = use default downloaded model)
|
|
343
|
+
model_quality: ModelQuality | None = DEFAULT_MODEL_QUALITY
|
|
344
|
+
model_source: ModelSource = DEFAULT_MODEL_SOURCE
|
|
345
|
+
model_variant: ModelVariant = DEFAULT_MODEL_VARIANT
|
|
318
346
|
model_path: Path | None = None
|
|
319
347
|
# Custom voices.bin path (None = use default downloaded voices)
|
|
320
348
|
voices_path: Path | None = None
|
|
@@ -420,6 +448,9 @@ class TTSConverter:
|
|
|
420
448
|
pause_sentence=self.options.pause_sentence,
|
|
421
449
|
pause_paragraph=self.options.pause_paragraph,
|
|
422
450
|
pause_variance=self.options.pause_variance,
|
|
451
|
+
model_quality=self.options.model_quality,
|
|
452
|
+
model_source=self.options.model_source,
|
|
453
|
+
model_variant=self.options.model_variant,
|
|
423
454
|
model_path=self.options.model_path,
|
|
424
455
|
voices_path=self.options.voices_path,
|
|
425
456
|
voice_blend=self.options.voice_blend,
|
|
@@ -600,53 +631,79 @@ class TTSConverter:
|
|
|
600
631
|
)
|
|
601
632
|
state = None
|
|
602
633
|
else:
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
state.
|
|
606
|
-
or state.
|
|
607
|
-
or state.speed != self.options.speed
|
|
608
|
-
or state.split_mode != self.options.split_mode
|
|
609
|
-
or state.silence_between_chapters
|
|
610
|
-
!= self.options.silence_between_chapters
|
|
611
|
-
or state.pause_clause != self.options.pause_clause
|
|
612
|
-
or state.pause_sentence != self.options.pause_sentence
|
|
613
|
-
or state.pause_paragraph != self.options.pause_paragraph
|
|
614
|
-
or state.pause_variance != self.options.pause_variance
|
|
615
|
-
or state.pause_mode != self.options.pause_mode
|
|
616
|
-
or state.lang != self.options.lang
|
|
634
|
+
model_settings_changed = (
|
|
635
|
+
state.model_quality != self.options.model_quality
|
|
636
|
+
or state.model_source != self.options.model_source
|
|
637
|
+
or state.model_variant != self.options.model_variant
|
|
617
638
|
)
|
|
618
639
|
|
|
619
|
-
if
|
|
640
|
+
if model_settings_changed:
|
|
620
641
|
self.log(
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
642
|
+
"Model settings changed, starting fresh conversion",
|
|
643
|
+
"warning",
|
|
644
|
+
)
|
|
645
|
+
state = None
|
|
646
|
+
else:
|
|
647
|
+
# Check if settings differ from saved state
|
|
648
|
+
settings_changed = (
|
|
649
|
+
state.voice != self.options.voice
|
|
650
|
+
or state.language != self.options.language
|
|
651
|
+
or state.speed != self.options.speed
|
|
652
|
+
or state.split_mode != self.options.split_mode
|
|
653
|
+
or state.silence_between_chapters
|
|
654
|
+
!= self.options.silence_between_chapters
|
|
655
|
+
or state.pause_clause != self.options.pause_clause
|
|
656
|
+
or state.pause_sentence != self.options.pause_sentence
|
|
657
|
+
or state.pause_paragraph != self.options.pause_paragraph
|
|
658
|
+
or state.pause_variance != self.options.pause_variance
|
|
659
|
+
or state.pause_mode != self.options.pause_mode
|
|
660
|
+
or state.enable_short_sentence
|
|
661
|
+
!= self.options.enable_short_sentence
|
|
662
|
+
or state.lang != self.options.lang
|
|
633
663
|
)
|
|
634
664
|
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
665
|
+
if settings_changed:
|
|
666
|
+
self.log(
|
|
667
|
+
f"Restoring settings from previous session: "
|
|
668
|
+
f"voice={state.voice}, language={state.language}, "
|
|
669
|
+
f"lang_override={state.lang}, "
|
|
670
|
+
f"speed={state.speed}, "
|
|
671
|
+
f"split_mode={state.split_mode}, "
|
|
672
|
+
f"silence={state.silence_between_chapters}s, "
|
|
673
|
+
f"pauses: clause={state.pause_clause}s "
|
|
674
|
+
f"sent={state.pause_sentence}s "
|
|
675
|
+
f"para={state.pause_paragraph}s "
|
|
676
|
+
f"var={state.pause_variance}s "
|
|
677
|
+
f"pause_mode={state.pause_mode}, "
|
|
678
|
+
f"enable_short_sentence="
|
|
679
|
+
f"{state.enable_short_sentence}, "
|
|
680
|
+
f"model_source={state.model_source}, "
|
|
681
|
+
f"model_variant={state.model_variant}, "
|
|
682
|
+
f"model_quality={state.model_quality}",
|
|
683
|
+
"info",
|
|
684
|
+
)
|
|
685
|
+
|
|
686
|
+
# Apply saved settings to options for consistency
|
|
687
|
+
self.options.voice = state.voice
|
|
688
|
+
self.options.language = state.language
|
|
689
|
+
self.options.speed = state.speed
|
|
690
|
+
self.options.split_mode = state.split_mode
|
|
691
|
+
self.options.output_format = state.output_format
|
|
692
|
+
self.options.silence_between_chapters = (
|
|
693
|
+
state.silence_between_chapters
|
|
694
|
+
)
|
|
695
|
+
self.options.pause_clause = state.pause_clause
|
|
696
|
+
self.options.pause_sentence = state.pause_sentence
|
|
697
|
+
self.options.pause_paragraph = state.pause_paragraph
|
|
698
|
+
self.options.pause_variance = state.pause_variance
|
|
699
|
+
self.options.pause_mode = state.pause_mode
|
|
700
|
+
self.options.enable_short_sentence = (
|
|
701
|
+
state.enable_short_sentence
|
|
702
|
+
)
|
|
703
|
+
self.options.lang = state.lang
|
|
704
|
+
self.options.model_quality = state.model_quality
|
|
705
|
+
self.options.model_source = state.model_source
|
|
706
|
+
self.options.model_variant = state.model_variant
|
|
650
707
|
|
|
651
708
|
if state is None:
|
|
652
709
|
# Create new state
|
|
@@ -661,12 +718,16 @@ class TTSConverter:
|
|
|
661
718
|
speed=self.options.speed,
|
|
662
719
|
split_mode=self.options.split_mode,
|
|
663
720
|
output_format=self.options.output_format,
|
|
721
|
+
model_quality=self.options.model_quality,
|
|
722
|
+
model_source=self.options.model_source,
|
|
723
|
+
model_variant=self.options.model_variant,
|
|
664
724
|
silence_between_chapters=self.options.silence_between_chapters,
|
|
665
725
|
pause_clause=self.options.pause_clause,
|
|
666
726
|
pause_sentence=self.options.pause_sentence,
|
|
667
727
|
pause_paragraph=self.options.pause_paragraph,
|
|
668
728
|
pause_variance=self.options.pause_variance,
|
|
669
729
|
pause_mode=self.options.pause_mode,
|
|
730
|
+
enable_short_sentence=self.options.enable_short_sentence,
|
|
670
731
|
lang=self.options.lang,
|
|
671
732
|
chapters=[
|
|
672
733
|
ChapterState(
|
ttsforge/kokoro_runner.py
CHANGED
|
@@ -7,11 +7,19 @@ from typing import Any, Literal, Protocol, cast
|
|
|
7
7
|
import numpy as np
|
|
8
8
|
from pykokoro import GenerationConfig, KokoroPipeline, PipelineConfig
|
|
9
9
|
from pykokoro.onnx_backend import (
|
|
10
|
+
DEFAULT_MODEL_QUALITY,
|
|
11
|
+
DEFAULT_MODEL_SOURCE,
|
|
12
|
+
DEFAULT_MODEL_VARIANT,
|
|
10
13
|
Kokoro,
|
|
14
|
+
ModelQuality,
|
|
15
|
+
ModelSource,
|
|
16
|
+
ModelVariant,
|
|
11
17
|
VoiceBlend,
|
|
12
18
|
are_models_downloaded,
|
|
13
19
|
download_all_models,
|
|
20
|
+
download_all_models_github,
|
|
14
21
|
)
|
|
22
|
+
from pykokoro.pipeline import build_pipeline
|
|
15
23
|
from pykokoro.stages.audio_generation.onnx import OnnxAudioGenerationAdapter
|
|
16
24
|
from pykokoro.stages.audio_postprocessing.onnx import OnnxAudioPostprocessingAdapter
|
|
17
25
|
from pykokoro.stages.phoneme_processing.onnx import OnnxPhonemeProcessorAdapter
|
|
@@ -26,6 +34,9 @@ class KokoroRunOptions:
|
|
|
26
34
|
pause_sentence: float
|
|
27
35
|
pause_paragraph: float
|
|
28
36
|
pause_variance: float
|
|
37
|
+
model_quality: ModelQuality | None = DEFAULT_MODEL_QUALITY
|
|
38
|
+
model_source: ModelSource = DEFAULT_MODEL_SOURCE
|
|
39
|
+
model_variant: ModelVariant = DEFAULT_MODEL_VARIANT
|
|
29
40
|
model_path: Any | None = None
|
|
30
41
|
voices_path: Any | None = None
|
|
31
42
|
voice_blend: str | None = None
|
|
@@ -48,15 +59,32 @@ class KokoroRunner:
|
|
|
48
59
|
if self._pipeline is not None:
|
|
49
60
|
return
|
|
50
61
|
|
|
51
|
-
if
|
|
52
|
-
self.
|
|
53
|
-
|
|
62
|
+
if self.opts.model_path is None or self.opts.voices_path is None:
|
|
63
|
+
model_quality = self.opts.model_quality or DEFAULT_MODEL_QUALITY
|
|
64
|
+
model_source = self.opts.model_source or DEFAULT_MODEL_SOURCE
|
|
65
|
+
if model_source == "github":
|
|
66
|
+
if not are_models_downloaded(quality=model_quality):
|
|
67
|
+
self.log("Downloading ONNX model files from GitHub...")
|
|
68
|
+
download_all_models_github(
|
|
69
|
+
variant=self.opts.model_variant,
|
|
70
|
+
quality=model_quality,
|
|
71
|
+
)
|
|
72
|
+
else:
|
|
73
|
+
if not are_models_downloaded(quality=model_quality):
|
|
74
|
+
self.log("Downloading ONNX model files...")
|
|
75
|
+
download_all_models(
|
|
76
|
+
variant=self.opts.model_variant,
|
|
77
|
+
quality=model_quality,
|
|
78
|
+
)
|
|
54
79
|
|
|
55
80
|
self._kokoro = Kokoro(
|
|
56
81
|
model_path=self.opts.model_path,
|
|
57
82
|
voices_path=self.opts.voices_path,
|
|
58
83
|
use_gpu=self.opts.use_gpu,
|
|
59
84
|
tokenizer_config=self.opts.tokenizer_config,
|
|
85
|
+
model_quality=self.opts.model_quality,
|
|
86
|
+
model_source=self.opts.model_source,
|
|
87
|
+
model_variant=self.opts.model_variant,
|
|
60
88
|
)
|
|
61
89
|
|
|
62
90
|
assert self._kokoro is not None
|
|
@@ -88,14 +116,18 @@ class KokoroRunner:
|
|
|
88
116
|
pipeline_cfg = PipelineConfig(
|
|
89
117
|
voice=self._voice_style,
|
|
90
118
|
generation=GenerationConfig(speed=self.opts.speed, lang="en-us"),
|
|
119
|
+
model_quality=self.opts.model_quality,
|
|
120
|
+
model_source=self.opts.model_source,
|
|
121
|
+
model_variant=self.opts.model_variant,
|
|
91
122
|
model_path=self.opts.model_path,
|
|
92
123
|
voices_path=self.opts.voices_path,
|
|
93
124
|
tokenizer_config=self.opts.tokenizer_config,
|
|
94
125
|
)
|
|
95
126
|
|
|
96
127
|
# Use the same adapters everywhere (text + phonemes)
|
|
97
|
-
self._pipeline =
|
|
98
|
-
pipeline_cfg,
|
|
128
|
+
self._pipeline = build_pipeline(
|
|
129
|
+
config=pipeline_cfg,
|
|
130
|
+
backend=self._kokoro,
|
|
99
131
|
phoneme_processing=OnnxPhonemeProcessorAdapter(self._kokoro),
|
|
100
132
|
audio_generation=OnnxAudioGenerationAdapter(self._kokoro),
|
|
101
133
|
audio_postprocessing=OnnxAudioPostprocessingAdapter(self._kokoro),
|
|
@@ -116,6 +148,7 @@ class KokoroRunner:
|
|
|
116
148
|
lang=lang_code,
|
|
117
149
|
is_phonemes=is_phonemes,
|
|
118
150
|
pause_mode=pause_mode,
|
|
151
|
+
enable_short_sentence=self.opts.enable_short_sentence,
|
|
119
152
|
pause_clause=self.opts.pause_clause,
|
|
120
153
|
pause_sentence=self.opts.pause_sentence,
|
|
121
154
|
pause_paragraph=self.opts.pause_paragraph,
|
ttsforge/name_extractor.py
CHANGED
|
@@ -174,7 +174,7 @@ def generate_phoneme_suggestions(
|
|
|
174
174
|
Dictionary with phoneme suggestions and metadata:
|
|
175
175
|
{
|
|
176
176
|
"name": {
|
|
177
|
-
"phoneme": "
|
|
177
|
+
"phoneme": "phoneme",
|
|
178
178
|
"occurrences": count,
|
|
179
179
|
"suggestion_quality": "auto"
|
|
180
180
|
}
|
|
@@ -190,7 +190,7 @@ def generate_phoneme_suggestions(
|
|
|
190
190
|
phoneme = phonemize(name, language=language).phonemes
|
|
191
191
|
|
|
192
192
|
# Wrap in / / format for dictionary
|
|
193
|
-
phoneme_formatted = f"
|
|
193
|
+
phoneme_formatted = f"{phoneme}"
|
|
194
194
|
|
|
195
195
|
suggestions[name] = {
|
|
196
196
|
"phoneme": phoneme_formatted,
|
|
@@ -201,7 +201,7 @@ def generate_phoneme_suggestions(
|
|
|
201
201
|
logger.warning(f"Failed to generate phoneme for '{name}': {e}")
|
|
202
202
|
# Add placeholder
|
|
203
203
|
suggestions[name] = {
|
|
204
|
-
"phoneme": "
|
|
204
|
+
"phoneme": "FIXME",
|
|
205
205
|
"occurrences": count,
|
|
206
206
|
"suggestion_quality": "error",
|
|
207
207
|
"error": str(e),
|
ttsforge/phoneme_conversion.py
CHANGED
|
@@ -15,6 +15,14 @@ from typing import Any, Literal, Optional, cast
|
|
|
15
15
|
|
|
16
16
|
import numpy as np
|
|
17
17
|
import soundfile as sf
|
|
18
|
+
from pykokoro.onnx_backend import (
|
|
19
|
+
DEFAULT_MODEL_QUALITY,
|
|
20
|
+
DEFAULT_MODEL_SOURCE,
|
|
21
|
+
DEFAULT_MODEL_VARIANT,
|
|
22
|
+
ModelQuality,
|
|
23
|
+
ModelSource,
|
|
24
|
+
ModelVariant,
|
|
25
|
+
)
|
|
18
26
|
|
|
19
27
|
from .audio_merge import AudioMerger, MergeMeta
|
|
20
28
|
from .chapter_selection import parse_chapter_selection
|
|
@@ -94,12 +102,16 @@ class PhonemeConversionState:
|
|
|
94
102
|
voice: str = ""
|
|
95
103
|
speed: float = 1.0
|
|
96
104
|
output_format: str = "m4b"
|
|
105
|
+
model_quality: ModelQuality | None = DEFAULT_MODEL_QUALITY
|
|
106
|
+
model_source: ModelSource = DEFAULT_MODEL_SOURCE
|
|
107
|
+
model_variant: ModelVariant = DEFAULT_MODEL_VARIANT
|
|
97
108
|
silence_between_chapters: float = 2.0
|
|
98
|
-
pause_clause: float = 0.
|
|
99
|
-
pause_sentence: float = 0.
|
|
100
|
-
pause_paragraph: float = 0.
|
|
109
|
+
pause_clause: float = 0.3
|
|
110
|
+
pause_sentence: float = 0.5
|
|
111
|
+
pause_paragraph: float = 0.9
|
|
101
112
|
pause_variance: float = 0.05
|
|
102
113
|
pause_mode: str = "auto"
|
|
114
|
+
enable_short_sentence: bool | None = None
|
|
103
115
|
lang: str | None = None # Language override for phonemization
|
|
104
116
|
chapters: list[PhonemeChapterState] = field(default_factory=list)
|
|
105
117
|
started_at: str = ""
|
|
@@ -145,17 +157,25 @@ class PhonemeConversionState:
|
|
|
145
157
|
|
|
146
158
|
# Set defaults for new parameters
|
|
147
159
|
if "pause_clause" not in data:
|
|
148
|
-
data["pause_clause"] = 0.
|
|
160
|
+
data["pause_clause"] = 0.3
|
|
149
161
|
if "pause_sentence" not in data:
|
|
150
|
-
data["pause_sentence"] = 0.
|
|
162
|
+
data["pause_sentence"] = 0.5
|
|
151
163
|
if "pause_paragraph" not in data:
|
|
152
|
-
data["pause_paragraph"] = 0.
|
|
164
|
+
data["pause_paragraph"] = 0.9
|
|
153
165
|
if "pause_variance" not in data:
|
|
154
166
|
data["pause_variance"] = 0.05
|
|
155
167
|
if "pause_mode" not in data:
|
|
156
168
|
data["pause_mode"] = "auto"
|
|
169
|
+
if "enable_short_sentence" not in data:
|
|
170
|
+
data["enable_short_sentence"] = None
|
|
157
171
|
if "lang" not in data:
|
|
158
172
|
data["lang"] = None
|
|
173
|
+
if "model_quality" not in data:
|
|
174
|
+
data["model_quality"] = DEFAULT_MODEL_QUALITY
|
|
175
|
+
if "model_source" not in data:
|
|
176
|
+
data["model_source"] = DEFAULT_MODEL_SOURCE
|
|
177
|
+
if "model_variant" not in data:
|
|
178
|
+
data["model_variant"] = DEFAULT_MODEL_VARIANT
|
|
159
179
|
|
|
160
180
|
return cls(**data)
|
|
161
181
|
except (json.JSONDecodeError, TypeError, KeyError):
|
|
@@ -172,12 +192,16 @@ class PhonemeConversionState:
|
|
|
172
192
|
"voice": self.voice,
|
|
173
193
|
"speed": self.speed,
|
|
174
194
|
"output_format": self.output_format,
|
|
195
|
+
"model_quality": self.model_quality,
|
|
196
|
+
"model_source": self.model_source,
|
|
197
|
+
"model_variant": self.model_variant,
|
|
175
198
|
"silence_between_chapters": self.silence_between_chapters,
|
|
176
199
|
"pause_clause": self.pause_clause,
|
|
177
200
|
"pause_sentence": self.pause_sentence,
|
|
178
201
|
"pause_paragraph": self.pause_paragraph,
|
|
179
202
|
"pause_variance": self.pause_variance,
|
|
180
203
|
"pause_mode": self.pause_mode,
|
|
204
|
+
"enable_short_sentence": self.enable_short_sentence,
|
|
181
205
|
"lang": self.lang,
|
|
182
206
|
"chapters": [
|
|
183
207
|
{
|
|
@@ -210,11 +234,12 @@ class PhonemeConversionOptions:
|
|
|
210
234
|
# If None, language from PhonemeSegments is used
|
|
211
235
|
lang: str | None = None
|
|
212
236
|
# Pause settings (pykokoro built-in pause handling)
|
|
213
|
-
pause_clause: float = 0.
|
|
214
|
-
pause_sentence: float = 0.
|
|
215
|
-
pause_paragraph: float = 0.
|
|
237
|
+
pause_clause: float = 0.3 # For clause boundaries (commas)
|
|
238
|
+
pause_sentence: float = 0.5 # For sentence boundaries
|
|
239
|
+
pause_paragraph: float = 0.9 # For paragraph boundaries
|
|
216
240
|
pause_variance: float = 0.05 # Standard deviation for natural variation
|
|
217
241
|
pause_mode: str = "auto" # "tts", "manual", or "auto"
|
|
242
|
+
enable_short_sentence: bool | None = None # Enable short sentence handling
|
|
218
243
|
# Chapter announcement settings
|
|
219
244
|
announce_chapters: bool = True # Read chapter titles aloud before content
|
|
220
245
|
chapter_pause_after_title: float = 2.0 # Pause after chapter title (seconds)
|
|
@@ -235,6 +260,9 @@ class PhonemeConversionOptions:
|
|
|
235
260
|
# Filename template for chapter files
|
|
236
261
|
chapter_filename_template: str = "{chapter_num:03d}_{book_title}_{chapter_title}"
|
|
237
262
|
# Custom ONNX model path (None = use default downloaded model)
|
|
263
|
+
model_quality: ModelQuality | None = DEFAULT_MODEL_QUALITY
|
|
264
|
+
model_source: ModelSource = DEFAULT_MODEL_SOURCE
|
|
265
|
+
model_variant: ModelVariant = DEFAULT_MODEL_VARIANT
|
|
238
266
|
model_path: Path | None = None
|
|
239
267
|
# Custom voices.bin path (None = use default downloaded voices)
|
|
240
268
|
voices_path: Path | None = None
|
|
@@ -583,6 +611,11 @@ class PhonemeConverter:
|
|
|
583
611
|
or state.pause_paragraph != self.options.pause_paragraph
|
|
584
612
|
or state.pause_variance != self.options.pause_variance
|
|
585
613
|
or state.pause_mode != self.options.pause_mode
|
|
614
|
+
or state.enable_short_sentence
|
|
615
|
+
!= self.options.enable_short_sentence
|
|
616
|
+
or state.model_quality != self.options.model_quality
|
|
617
|
+
or state.model_source != self.options.model_source
|
|
618
|
+
or state.model_variant != self.options.model_variant
|
|
586
619
|
):
|
|
587
620
|
self.log(
|
|
588
621
|
f"Restoring settings from previous session: "
|
|
@@ -592,7 +625,11 @@ class PhonemeConverter:
|
|
|
592
625
|
f"pause_sentence={state.pause_sentence}s, "
|
|
593
626
|
f"pause_paragraph={state.pause_paragraph}s, "
|
|
594
627
|
f"pause_variance={state.pause_variance}s, "
|
|
595
|
-
f"pause_mode={state.pause_mode}"
|
|
628
|
+
f"pause_mode={state.pause_mode}, "
|
|
629
|
+
f"enable_short_sentence={state.enable_short_sentence}, "
|
|
630
|
+
f"model_source={state.model_source}, "
|
|
631
|
+
f"model_variant={state.model_variant}, "
|
|
632
|
+
f"model_quality={state.model_quality}",
|
|
596
633
|
"info",
|
|
597
634
|
)
|
|
598
635
|
# Apply saved settings for consistency
|
|
@@ -607,6 +644,10 @@ class PhonemeConverter:
|
|
|
607
644
|
self.options.pause_paragraph = state.pause_paragraph
|
|
608
645
|
self.options.pause_variance = state.pause_variance
|
|
609
646
|
self.options.pause_mode = state.pause_mode
|
|
647
|
+
self.options.enable_short_sentence = state.enable_short_sentence
|
|
648
|
+
self.options.model_quality = state.model_quality
|
|
649
|
+
self.options.model_source = state.model_source
|
|
650
|
+
self.options.model_variant = state.model_variant
|
|
610
651
|
|
|
611
652
|
if state is None:
|
|
612
653
|
# Create new state
|
|
@@ -617,12 +658,16 @@ class PhonemeConverter:
|
|
|
617
658
|
voice=self.options.voice,
|
|
618
659
|
speed=self.options.speed,
|
|
619
660
|
output_format=self.options.output_format,
|
|
661
|
+
model_quality=self.options.model_quality,
|
|
662
|
+
model_source=self.options.model_source,
|
|
663
|
+
model_variant=self.options.model_variant,
|
|
620
664
|
silence_between_chapters=self.options.silence_between_chapters,
|
|
621
665
|
pause_clause=self.options.pause_clause,
|
|
622
666
|
pause_sentence=self.options.pause_sentence,
|
|
623
667
|
pause_paragraph=self.options.pause_paragraph,
|
|
624
668
|
pause_variance=self.options.pause_variance,
|
|
625
669
|
pause_mode=self.options.pause_mode,
|
|
670
|
+
enable_short_sentence=self.options.enable_short_sentence,
|
|
626
671
|
chapters=[
|
|
627
672
|
PhonemeChapterState(
|
|
628
673
|
index=idx,
|
|
@@ -648,6 +693,9 @@ class PhonemeConverter:
|
|
|
648
693
|
pause_sentence=self.options.pause_sentence,
|
|
649
694
|
pause_paragraph=self.options.pause_paragraph,
|
|
650
695
|
pause_variance=self.options.pause_variance,
|
|
696
|
+
model_quality=self.options.model_quality,
|
|
697
|
+
model_source=self.options.model_source,
|
|
698
|
+
model_variant=self.options.model_variant,
|
|
651
699
|
model_path=self.options.model_path,
|
|
652
700
|
voices_path=self.options.voices_path,
|
|
653
701
|
voice_blend=self.options.voice_blend,
|
|
@@ -848,6 +896,9 @@ class PhonemeConverter:
|
|
|
848
896
|
pause_sentence=self.options.pause_sentence,
|
|
849
897
|
pause_paragraph=self.options.pause_paragraph,
|
|
850
898
|
pause_variance=self.options.pause_variance,
|
|
899
|
+
model_quality=self.options.model_quality,
|
|
900
|
+
model_source=self.options.model_source,
|
|
901
|
+
model_variant=self.options.model_variant,
|
|
851
902
|
model_path=self.options.model_path,
|
|
852
903
|
voices_path=self.options.voices_path,
|
|
853
904
|
voice_blend=self.options.voice_blend,
|
ttsforge/ssmd_generator.py
CHANGED
|
@@ -2,8 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
This module converts chapter text to SSMD format with markup for:
|
|
4
4
|
- Emphasis (*text* for moderate, **text** for strong)
|
|
5
|
-
- Language switches ([text]
|
|
6
|
-
- Phoneme substitutions ([word]
|
|
5
|
+
- Language switches ([text]{lang="lang_code"})
|
|
6
|
+
- Phoneme substitutions ([word]{ph="phoneme"})
|
|
7
7
|
|
|
8
8
|
Note: Structural breaks (paragraphs, sentences, clauses) are NOT automatically
|
|
9
9
|
added. The SSMD parser in pykokoro handles sentence detection automatically.
|
|
@@ -170,7 +170,7 @@ def _inject_phoneme_substitutions(
|
|
|
170
170
|
if not phoneme:
|
|
171
171
|
return matched_word
|
|
172
172
|
clean_phoneme = phoneme.strip("/")
|
|
173
|
-
return f"[{matched_word}]
|
|
173
|
+
return f"[{matched_word}]" + "{" + f'ph="{clean_phoneme}"' + "}"
|
|
174
174
|
|
|
175
175
|
segments: list[str] = []
|
|
176
176
|
last_index = 0
|
|
@@ -260,7 +260,7 @@ def _strip_redundant_title(chapter_title: str, chapter_text: str) -> str:
|
|
|
260
260
|
return chapter_text
|
|
261
261
|
|
|
262
262
|
trimmed_line = title_pattern.sub("", first_line, count=1).lstrip(
|
|
263
|
-
" \t
|
|
263
|
+
" \t:;-\u2013\u2014"
|
|
264
264
|
)
|
|
265
265
|
if trimmed_line:
|
|
266
266
|
lines[first_idx] = trimmed_line
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ttsforge
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.2
|
|
4
4
|
Summary: Generate audiobooks from EPUB files using Kokoro ONNX TTS.
|
|
5
5
|
Author-email: Holger Nahrstaedt <nahrstaedt@gmail.com>
|
|
6
6
|
License: MIT License
|
|
@@ -396,14 +396,14 @@ SSMD files use a simple markdown-like syntax:
|
|
|
396
396
|
**Custom Phonemes**:
|
|
397
397
|
|
|
398
398
|
```
|
|
399
|
-
[Hermione]
|
|
400
|
-
[API]
|
|
399
|
+
[Hermione]{ph="hɝmˈIni"} # Override pronunciation
|
|
400
|
+
[API]{ph="ˌeɪpiˈaɪ"} # Technical terms
|
|
401
401
|
```
|
|
402
402
|
|
|
403
403
|
**Language Switching** (planned):
|
|
404
404
|
|
|
405
405
|
```
|
|
406
|
-
[Bonjour]
|
|
406
|
+
[Bonjour]{lang="fr"} # Mark text as French
|
|
407
407
|
```
|
|
408
408
|
|
|
409
409
|
#### Example SSMD File
|
|
@@ -411,7 +411,7 @@ SSMD files use a simple markdown-like syntax:
|
|
|
411
411
|
```ssmd
|
|
412
412
|
Chapter One ...p
|
|
413
413
|
|
|
414
|
-
[Harry]
|
|
414
|
+
[Harry]{ph="hæɹi"} Potter was a *highly unusual* boy in many ways. ...s
|
|
415
415
|
For one thing, he **hated** the summer holidays more than any other
|
|
416
416
|
time of year. ...s For another, he really wanted to do his homework,
|
|
417
417
|
but was forced to do it in secret, in the dead of the night. ...p
|
|
@@ -498,12 +498,12 @@ Edit `custom_phonemes.json` to fix any incorrect phonemes. The file format is:
|
|
|
498
498
|
},
|
|
499
499
|
"entries": {
|
|
500
500
|
"Hermione": {
|
|
501
|
-
"phoneme": "
|
|
501
|
+
"phoneme": "hɝmˈIni",
|
|
502
502
|
"occurrences": 847,
|
|
503
503
|
"verified": false
|
|
504
504
|
},
|
|
505
505
|
"Kubernetes": {
|
|
506
|
-
"phoneme": "
|
|
506
|
+
"phoneme": "kubɚnˈɛtɪs",
|
|
507
507
|
"occurrences": 12,
|
|
508
508
|
"verified": false
|
|
509
509
|
}
|
|
@@ -515,8 +515,8 @@ Or use the simple format:
|
|
|
515
515
|
|
|
516
516
|
```json
|
|
517
517
|
{
|
|
518
|
-
"Hermione": "
|
|
519
|
-
"Kubernetes": "
|
|
518
|
+
"Hermione": "hɝmˈIni",
|
|
519
|
+
"Kubernetes": "kubɚnˈɛtɪs"
|
|
520
520
|
}
|
|
521
521
|
```
|
|
522
522
|
|
|
@@ -548,9 +548,9 @@ You can create a dictionary manually without extraction:
|
|
|
548
548
|
|
|
549
549
|
```json
|
|
550
550
|
{
|
|
551
|
-
"Katniss": "
|
|
552
|
-
"Peeta": "
|
|
553
|
-
"Panem": "
|
|
551
|
+
"Katniss": "kætnɪs",
|
|
552
|
+
"Peeta": "pitə",
|
|
553
|
+
"Panem": "pænəm"
|
|
554
554
|
}
|
|
555
555
|
```
|
|
556
556
|
|
|
@@ -617,6 +617,7 @@ ttsforge convert book.epub --gpu
|
|
|
617
617
|
| `pause_paragraph` | `0.9` | Paragraph pause (seconds) |
|
|
618
618
|
| `pause_variance` | `0.05` | Pause variance (seconds) |
|
|
619
619
|
| `pause_mode` | `auto` | Pause mode (`tts`, `manual`, `auto`) |
|
|
620
|
+
| `enable_short_sentence` | `None` | Handle short sentences |
|
|
620
621
|
| `announce_chapters` | `true` | Speak chapter titles |
|
|
621
622
|
| `chapter_pause_after_title` | `2.0` | Pause after chapter title |
|
|
622
623
|
| `phonemization_lang` | `None` | Override phonemization language |
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
ttsforge/__init__.py,sha256=5mT7eXwuq0Z-Qn_WjYWVjA8VUOUy0lGTyaqQwNdcEOE,2149
|
|
2
|
+
ttsforge/_version.py,sha256=Ok5oAXdWgR9aghaFXTafTeDW6sYO3uVe6d2Nket57R4,704
|
|
3
|
+
ttsforge/audio_merge.py,sha256=Tt7o8GBNrkcfiSKycUpWvblj-y4zwlULoX-eCblqYpo,5666
|
|
4
|
+
ttsforge/audio_player.py,sha256=HYc4vv46yDXjVXaWRlj1tUtWLiwNTwbzT6oDfOUB5vA,14351
|
|
5
|
+
ttsforge/chapter_selection.py,sha256=a-XlEO4HMzeUBfhvnh6gQOQmDuM0wMpVCi0pw6oM2hQ,2579
|
|
6
|
+
ttsforge/constants.py,sha256=c3b9s41mNIb8NHK6C7XKB-xzX4wJhFZpo0auUqCwT2s,5394
|
|
7
|
+
ttsforge/conversion.py,sha256=2ZzeBfg-Kwf7kR9Fwht30Vx9ZcoAEQOVAiFsmfF2IdA,45011
|
|
8
|
+
ttsforge/input_reader.py,sha256=b49SBT-mL4SnR74D8xwyWHC_smPhsJ5jpPAj4QQ5WKo,14068
|
|
9
|
+
ttsforge/kokoro_lang.py,sha256=8603b5whfk0KzGrNK7pqRjzoH1Ge9TKoX7AMzKsX0sk,376
|
|
10
|
+
ttsforge/kokoro_runner.py,sha256=AKvEMaBfCTCLR3KcHoE04nQnUkHDwv62BvQNb1vGC8U,5923
|
|
11
|
+
ttsforge/name_extractor.py,sha256=CxxBadCO0Pcoepcj7gZwkfWPMud2oa9477h_lDYWrIA,9578
|
|
12
|
+
ttsforge/phoneme_conversion.py,sha256=nbDV0adWi--XyRt2RblJbav_ImzDeXrC0xPvoPx8_9c,41093
|
|
13
|
+
ttsforge/phonemes.py,sha256=EUZ1Qr-0rPThRpSeuJQe5Z3J3nz7rX1Xs3Rjjw19qIQ,15517
|
|
14
|
+
ttsforge/ssmd_generator.py,sha256=LknVBSETKH9cY4CoAUBjd1vEfr5VXi0xqEKcft2CR8I,13346
|
|
15
|
+
ttsforge/utils.py,sha256=3BiNFyScV3Dy_xhVm2EigpxUb4Z6YwIQPzzxwDzfCzI,24942
|
|
16
|
+
ttsforge/cli/__init__.py,sha256=CTqYeUAJaKV7YTYqcmr7-VxjwJfjLcnPYM2OKyws0Oc,2103
|
|
17
|
+
ttsforge/cli/commands_conversion.py,sha256=T4hPiU4EXDQ2Wkbd5I5TuHvuAT8rlfEjhfC4mlSMrzg,66007
|
|
18
|
+
ttsforge/cli/commands_phonemes.py,sha256=k3CtXKnUTpbGHj1oPucjIB6syA3LWjxFpg3OIC-tzJ8,33183
|
|
19
|
+
ttsforge/cli/commands_utility.py,sha256=65NSHUFYjRPOWVLnpeBKbR-TbsaDLYAa5xDGcWHS-fk,48630
|
|
20
|
+
ttsforge/cli/helpers.py,sha256=IJt0VpIMPOC-lnBeR3-1keh31MuAdSemDsLh6FpiHLk,2778
|
|
21
|
+
ttsforge/vocab/__init__.py,sha256=lMgS0dY9VbOYI20LnPjjqrWcjLIQ1FKkR4-xcXsvrqc,3641
|
|
22
|
+
ttsforge-0.1.2.dist-info/licenses/LICENSE,sha256=9csb1sDNn0HdUPKgOTUwtb4CkvYPcFXHnkxKCS99EWQ,1074
|
|
23
|
+
ttsforge-0.1.2.dist-info/METADATA,sha256=T0xJ8RtNsBidb4sa_JYR-QnugMK-fLLyVg71R-zi0QU,19655
|
|
24
|
+
ttsforge-0.1.2.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
25
|
+
ttsforge-0.1.2.dist-info/entry_points.txt,sha256=SrcNdlhQpoUCzPzhVbOmMzATQeV7j7XYl0DPrVjZ-ks,47
|
|
26
|
+
ttsforge-0.1.2.dist-info/top_level.txt,sha256=rNLi-3muicHF8UvZu_FuA2ML_Dz9sVPCjik2E8XnCVk,9
|
|
27
|
+
ttsforge-0.1.2.dist-info/RECORD,,
|
ttsforge-0.1.0.dist-info/RECORD
DELETED
|
@@ -1,27 +0,0 @@
|
|
|
1
|
-
ttsforge/__init__.py,sha256=Jg8_0vPttTVWrnt4HBqrTOKYfmcgpVpfddSVcU4HKXo,2432
|
|
2
|
-
ttsforge/_version.py,sha256=5jwwVncvCiTnhOedfkzzxmxsggwmTBORdFL_4wq0ZeY,704
|
|
3
|
-
ttsforge/audio_merge.py,sha256=Tt7o8GBNrkcfiSKycUpWvblj-y4zwlULoX-eCblqYpo,5666
|
|
4
|
-
ttsforge/audio_player.py,sha256=HYc4vv46yDXjVXaWRlj1tUtWLiwNTwbzT6oDfOUB5vA,14351
|
|
5
|
-
ttsforge/chapter_selection.py,sha256=a-XlEO4HMzeUBfhvnh6gQOQmDuM0wMpVCi0pw6oM2hQ,2579
|
|
6
|
-
ttsforge/constants.py,sha256=p_-LfE_u1oT--tzjI5PrHGXd5u9DHM7dcHsfcUgvzvY,5108
|
|
7
|
-
ttsforge/conversion.py,sha256=oZ1FyyyeChb6EoCrVJ-maogXqLq537iUBne5oa_epdw,41629
|
|
8
|
-
ttsforge/input_reader.py,sha256=b49SBT-mL4SnR74D8xwyWHC_smPhsJ5jpPAj4QQ5WKo,14068
|
|
9
|
-
ttsforge/kokoro_lang.py,sha256=8603b5whfk0KzGrNK7pqRjzoH1Ge9TKoX7AMzKsX0sk,376
|
|
10
|
-
ttsforge/kokoro_runner.py,sha256=ZGBx70_rHcfwKiUgywa_3-7d5u-wQ_0pPOukQRuACu0,4390
|
|
11
|
-
ttsforge/name_extractor.py,sha256=vBVp2OT8sdYdbczs0SQdEcZff2sN1sk5URi5gBwJrcE,9584
|
|
12
|
-
ttsforge/phoneme_conversion.py,sha256=so5Iex2lme9tHvZiCysLBAkBH1tp42O8pkibLSrHzh8,38280
|
|
13
|
-
ttsforge/phonemes.py,sha256=EUZ1Qr-0rPThRpSeuJQe5Z3J3nz7rX1Xs3Rjjw19qIQ,15517
|
|
14
|
-
ttsforge/ssmd_generator.py,sha256=Dmuvy6T8WVyzHvaNWsPDdyyTIZCpyX2pOsaVUPk8s08,13326
|
|
15
|
-
ttsforge/utils.py,sha256=3BiNFyScV3Dy_xhVm2EigpxUb4Z6YwIQPzzxwDzfCzI,24942
|
|
16
|
-
ttsforge/cli/__init__.py,sha256=CTqYeUAJaKV7YTYqcmr7-VxjwJfjLcnPYM2OKyws0Oc,2103
|
|
17
|
-
ttsforge/cli/commands_conversion.py,sha256=H1fX1M52RdSiSnGfIknr-dBGx-MeMaVf5J6rMDVrgWU,63457
|
|
18
|
-
ttsforge/cli/commands_phonemes.py,sha256=_0PQd9_hjOvzkzx2qM5BEG1fNyZYTjKc5nrZZ41HV4k,32373
|
|
19
|
-
ttsforge/cli/commands_utility.py,sha256=_8KMUjVYVqp63PH_gjOjS-fw6ZCujaMDXFxoKfUzlko,48013
|
|
20
|
-
ttsforge/cli/helpers.py,sha256=5Co2EvDhYspKhjW2-P3sNxj9MFFgWyTFeqOyJbPy2yA,2697
|
|
21
|
-
ttsforge/vocab/__init__.py,sha256=lMgS0dY9VbOYI20LnPjjqrWcjLIQ1FKkR4-xcXsvrqc,3641
|
|
22
|
-
ttsforge-0.1.0.dist-info/licenses/LICENSE,sha256=9csb1sDNn0HdUPKgOTUwtb4CkvYPcFXHnkxKCS99EWQ,1074
|
|
23
|
-
ttsforge-0.1.0.dist-info/METADATA,sha256=cQJf57NrwNZJUZacAQgnokqO1cnSC_Mi1b7ZWjcZ0no,19577
|
|
24
|
-
ttsforge-0.1.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
25
|
-
ttsforge-0.1.0.dist-info/entry_points.txt,sha256=SrcNdlhQpoUCzPzhVbOmMzATQeV7j7XYl0DPrVjZ-ks,47
|
|
26
|
-
ttsforge-0.1.0.dist-info/top_level.txt,sha256=rNLi-3muicHF8UvZu_FuA2ML_Dz9sVPCjik2E8XnCVk,9
|
|
27
|
-
ttsforge-0.1.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|