ttsforge 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,139 @@
1
+ """Vocabulary management for ttsforge tokenizer.
2
+
3
+ This module provides a compatibility layer that wraps kokorog2p's vocabulary
4
+ functions for backward compatibility with existing ttsforge code.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from pathlib import Path
10
+ from typing import TYPE_CHECKING
11
+
12
+ import kokorog2p
13
+ from kokorog2p.vocab import (
14
+ get_vocab as _get_kokoro_vocab,
15
+ get_vocab_reverse as _get_vocab_reverse,
16
+ get_config as _get_kokoro_config,
17
+ N_TOKENS,
18
+ PAD_IDX,
19
+ encode,
20
+ decode,
21
+ validate_for_kokoro,
22
+ filter_for_kokoro,
23
+ phonemes_to_ids,
24
+ ids_to_phonemes,
25
+ )
26
+
27
+ if TYPE_CHECKING:
28
+ pass
29
+
30
+ # Default version identifier (for compatibility)
31
+ DEFAULT_VERSION = "v1.0"
32
+
33
+ # Supported version strings (for backward compatibility)
34
+ SUPPORTED_VERSIONS = {"v1.0"}
35
+
36
+
37
+ def get_config_path() -> Path:
38
+ """Get the path to the config.json (compatibility function).
39
+
40
+ Returns:
41
+ Path to the embedded kokoro_config.json in kokorog2p.
42
+
43
+ Note:
44
+ This now returns the path to kokorog2p's embedded config,
45
+ not the downloaded config.json from onnx_backend.
46
+ """
47
+ import kokorog2p.data
48
+
49
+ return Path(kokorog2p.data.__file__).parent / "kokoro_config.json"
50
+
51
+
52
+ def is_config_downloaded() -> bool:
53
+ """Check if config is available (always True with kokorog2p).
54
+
55
+ Returns:
56
+ True (kokorog2p embeds the vocabulary)
57
+ """
58
+ return True
59
+
60
+
61
+ def load_vocab(config_path: Path | str | None = None) -> dict[str, int]:
62
+ """Load vocabulary from kokorog2p.
63
+
64
+ Args:
65
+ config_path: Ignored (kept for backward compatibility).
66
+ The vocabulary is now loaded from kokorog2p's embedded data.
67
+
68
+ Returns:
69
+ Dictionary mapping phoneme strings to token IDs.
70
+
71
+ Raises:
72
+ ValueError: If an unknown version string is provided.
73
+ """
74
+ # Handle backward compatibility with version strings
75
+ if isinstance(config_path, str):
76
+ if config_path in SUPPORTED_VERSIONS:
77
+ # Version string provided, use kokorog2p vocab
78
+ pass
79
+ elif config_path.startswith("v") and "." in config_path:
80
+ # Looks like a version string but not supported
81
+ raise ValueError(
82
+ f"Unknown vocabulary version: {config_path}. "
83
+ f"Supported versions: {', '.join(sorted(SUPPORTED_VERSIONS))}"
84
+ )
85
+ # Otherwise ignore and use kokorog2p vocab
86
+
87
+ return _get_kokoro_vocab()
88
+
89
+
90
+ def get_vocab_info(config_path: Path | str | None = None) -> dict:
91
+ """Get metadata about the vocabulary.
92
+
93
+ Args:
94
+ config_path: Ignored (kept for backward compatibility).
95
+
96
+ Returns:
97
+ Dictionary with vocabulary metadata.
98
+ """
99
+ vocab = _get_kokoro_vocab()
100
+ return {
101
+ "version": DEFAULT_VERSION,
102
+ "path": str(get_config_path()),
103
+ "num_tokens": len(vocab),
104
+ "max_token_id": max(vocab.values()) if vocab else 0,
105
+ "n_tokens": N_TOKENS,
106
+ "downloaded": True,
107
+ "backend": "kokorog2p",
108
+ }
109
+
110
+
111
+ def list_versions() -> list[str]:
112
+ """List all available vocabulary versions.
113
+
114
+ Returns:
115
+ List of version strings. Currently only "v1.0" is supported.
116
+ """
117
+ return [DEFAULT_VERSION]
118
+
119
+
120
+ # Re-export kokorog2p vocabulary functions for convenience
121
+ __all__ = [
122
+ # Compatibility functions
123
+ "DEFAULT_VERSION",
124
+ "SUPPORTED_VERSIONS",
125
+ "get_config_path",
126
+ "is_config_downloaded",
127
+ "load_vocab",
128
+ "get_vocab_info",
129
+ "list_versions",
130
+ # kokorog2p re-exports
131
+ "N_TOKENS",
132
+ "PAD_IDX",
133
+ "encode",
134
+ "decode",
135
+ "validate_for_kokoro",
136
+ "filter_for_kokoro",
137
+ "phonemes_to_ids",
138
+ "ids_to_phonemes",
139
+ ]