ttsforge 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ttsforge/__init__.py +114 -0
- ttsforge/_version.py +34 -0
- ttsforge/audio_merge.py +180 -0
- ttsforge/audio_player.py +473 -0
- ttsforge/chapter_selection.py +75 -0
- ttsforge/cli/__init__.py +73 -0
- ttsforge/cli/commands_conversion.py +1927 -0
- ttsforge/cli/commands_phonemes.py +1033 -0
- ttsforge/cli/commands_utility.py +1389 -0
- ttsforge/cli/helpers.py +76 -0
- ttsforge/constants.py +164 -0
- ttsforge/conversion.py +1090 -0
- ttsforge/input_reader.py +408 -0
- ttsforge/kokoro_lang.py +12 -0
- ttsforge/kokoro_runner.py +125 -0
- ttsforge/name_extractor.py +305 -0
- ttsforge/phoneme_conversion.py +978 -0
- ttsforge/phonemes.py +486 -0
- ttsforge/ssmd_generator.py +422 -0
- ttsforge/utils.py +785 -0
- ttsforge/vocab/__init__.py +139 -0
- ttsforge-0.1.0.dist-info/METADATA +659 -0
- ttsforge-0.1.0.dist-info/RECORD +27 -0
- ttsforge-0.1.0.dist-info/WHEEL +5 -0
- ttsforge-0.1.0.dist-info/entry_points.txt +2 -0
- ttsforge-0.1.0.dist-info/licenses/LICENSE +21 -0
- ttsforge-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
"""Vocabulary management for ttsforge tokenizer.
|
|
2
|
+
|
|
3
|
+
This module provides a compatibility layer that wraps kokorog2p's vocabulary
|
|
4
|
+
functions for backward compatibility with existing ttsforge code.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import TYPE_CHECKING
|
|
11
|
+
|
|
12
|
+
import kokorog2p
|
|
13
|
+
from kokorog2p.vocab import (
|
|
14
|
+
get_vocab as _get_kokoro_vocab,
|
|
15
|
+
get_vocab_reverse as _get_vocab_reverse,
|
|
16
|
+
get_config as _get_kokoro_config,
|
|
17
|
+
N_TOKENS,
|
|
18
|
+
PAD_IDX,
|
|
19
|
+
encode,
|
|
20
|
+
decode,
|
|
21
|
+
validate_for_kokoro,
|
|
22
|
+
filter_for_kokoro,
|
|
23
|
+
phonemes_to_ids,
|
|
24
|
+
ids_to_phonemes,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
if TYPE_CHECKING:
|
|
28
|
+
pass
|
|
29
|
+
|
|
30
|
+
# Default version identifier (for compatibility)
|
|
31
|
+
DEFAULT_VERSION = "v1.0"
|
|
32
|
+
|
|
33
|
+
# Supported version strings (for backward compatibility)
|
|
34
|
+
SUPPORTED_VERSIONS = {"v1.0"}
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def get_config_path() -> Path:
|
|
38
|
+
"""Get the path to the config.json (compatibility function).
|
|
39
|
+
|
|
40
|
+
Returns:
|
|
41
|
+
Path to the embedded kokoro_config.json in kokorog2p.
|
|
42
|
+
|
|
43
|
+
Note:
|
|
44
|
+
This now returns the path to kokorog2p's embedded config,
|
|
45
|
+
not the downloaded config.json from onnx_backend.
|
|
46
|
+
"""
|
|
47
|
+
import kokorog2p.data
|
|
48
|
+
|
|
49
|
+
return Path(kokorog2p.data.__file__).parent / "kokoro_config.json"
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def is_config_downloaded() -> bool:
|
|
53
|
+
"""Check if config is available (always True with kokorog2p).
|
|
54
|
+
|
|
55
|
+
Returns:
|
|
56
|
+
True (kokorog2p embeds the vocabulary)
|
|
57
|
+
"""
|
|
58
|
+
return True
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def load_vocab(config_path: Path | str | None = None) -> dict[str, int]:
|
|
62
|
+
"""Load vocabulary from kokorog2p.
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
config_path: Ignored (kept for backward compatibility).
|
|
66
|
+
The vocabulary is now loaded from kokorog2p's embedded data.
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
Dictionary mapping phoneme strings to token IDs.
|
|
70
|
+
|
|
71
|
+
Raises:
|
|
72
|
+
ValueError: If an unknown version string is provided.
|
|
73
|
+
"""
|
|
74
|
+
# Handle backward compatibility with version strings
|
|
75
|
+
if isinstance(config_path, str):
|
|
76
|
+
if config_path in SUPPORTED_VERSIONS:
|
|
77
|
+
# Version string provided, use kokorog2p vocab
|
|
78
|
+
pass
|
|
79
|
+
elif config_path.startswith("v") and "." in config_path:
|
|
80
|
+
# Looks like a version string but not supported
|
|
81
|
+
raise ValueError(
|
|
82
|
+
f"Unknown vocabulary version: {config_path}. "
|
|
83
|
+
f"Supported versions: {', '.join(sorted(SUPPORTED_VERSIONS))}"
|
|
84
|
+
)
|
|
85
|
+
# Otherwise ignore and use kokorog2p vocab
|
|
86
|
+
|
|
87
|
+
return _get_kokoro_vocab()
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def get_vocab_info(config_path: Path | str | None = None) -> dict:
|
|
91
|
+
"""Get metadata about the vocabulary.
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
config_path: Ignored (kept for backward compatibility).
|
|
95
|
+
|
|
96
|
+
Returns:
|
|
97
|
+
Dictionary with vocabulary metadata.
|
|
98
|
+
"""
|
|
99
|
+
vocab = _get_kokoro_vocab()
|
|
100
|
+
return {
|
|
101
|
+
"version": DEFAULT_VERSION,
|
|
102
|
+
"path": str(get_config_path()),
|
|
103
|
+
"num_tokens": len(vocab),
|
|
104
|
+
"max_token_id": max(vocab.values()) if vocab else 0,
|
|
105
|
+
"n_tokens": N_TOKENS,
|
|
106
|
+
"downloaded": True,
|
|
107
|
+
"backend": "kokorog2p",
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def list_versions() -> list[str]:
|
|
112
|
+
"""List all available vocabulary versions.
|
|
113
|
+
|
|
114
|
+
Returns:
|
|
115
|
+
List of version strings. Currently only "v1.0" is supported.
|
|
116
|
+
"""
|
|
117
|
+
return [DEFAULT_VERSION]
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
# Re-export kokorog2p vocabulary functions for convenience
|
|
121
|
+
__all__ = [
|
|
122
|
+
# Compatibility functions
|
|
123
|
+
"DEFAULT_VERSION",
|
|
124
|
+
"SUPPORTED_VERSIONS",
|
|
125
|
+
"get_config_path",
|
|
126
|
+
"is_config_downloaded",
|
|
127
|
+
"load_vocab",
|
|
128
|
+
"get_vocab_info",
|
|
129
|
+
"list_versions",
|
|
130
|
+
# kokorog2p re-exports
|
|
131
|
+
"N_TOKENS",
|
|
132
|
+
"PAD_IDX",
|
|
133
|
+
"encode",
|
|
134
|
+
"decode",
|
|
135
|
+
"validate_for_kokoro",
|
|
136
|
+
"filter_for_kokoro",
|
|
137
|
+
"phonemes_to_ids",
|
|
138
|
+
"ids_to_phonemes",
|
|
139
|
+
]
|