xinference 1.2.0__py3-none-any.whl → 1.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +4 -7
- xinference/client/handlers.py +3 -0
- xinference/core/chat_interface.py +6 -1
- xinference/core/model.py +2 -0
- xinference/core/scheduler.py +4 -7
- xinference/core/supervisor.py +114 -23
- xinference/core/worker.py +70 -4
- xinference/deploy/local.py +2 -1
- xinference/model/audio/core.py +11 -0
- xinference/model/audio/cosyvoice.py +16 -5
- xinference/model/audio/kokoro.py +139 -0
- xinference/model/audio/melotts.py +110 -0
- xinference/model/audio/model_spec.json +80 -0
- xinference/model/audio/model_spec_modelscope.json +18 -0
- xinference/model/audio/whisper.py +35 -10
- xinference/model/llm/llama_cpp/core.py +21 -14
- xinference/model/llm/llm_family.json +527 -1
- xinference/model/llm/llm_family.py +4 -1
- xinference/model/llm/llm_family_modelscope.json +495 -3
- xinference/model/llm/memory.py +1 -1
- xinference/model/llm/mlx/core.py +24 -6
- xinference/model/llm/transformers/core.py +9 -1
- xinference/model/llm/transformers/qwen2_audio.py +3 -1
- xinference/model/llm/transformers/qwen2_vl.py +20 -3
- xinference/model/llm/transformers/utils.py +22 -11
- xinference/model/llm/utils.py +115 -1
- xinference/model/llm/vllm/core.py +14 -4
- xinference/model/llm/vllm/xavier/block.py +3 -4
- xinference/model/llm/vllm/xavier/block_tracker.py +71 -58
- xinference/model/llm/vllm/xavier/collective.py +74 -0
- xinference/model/llm/vllm/xavier/collective_manager.py +147 -0
- xinference/model/llm/vllm/xavier/executor.py +18 -16
- xinference/model/llm/vllm/xavier/scheduler.py +79 -63
- xinference/model/llm/vllm/xavier/test/test_xavier.py +60 -35
- xinference/model/llm/vllm/xavier/transfer.py +53 -32
- xinference/thirdparty/cosyvoice/bin/spk2info.pt +0 -0
- xinference/thirdparty/melo/__init__.py +0 -0
- xinference/thirdparty/melo/api.py +135 -0
- xinference/thirdparty/melo/app.py +61 -0
- xinference/thirdparty/melo/attentions.py +459 -0
- xinference/thirdparty/melo/commons.py +160 -0
- xinference/thirdparty/melo/configs/config.json +94 -0
- xinference/thirdparty/melo/data/example/metadata.list +20 -0
- xinference/thirdparty/melo/data_utils.py +413 -0
- xinference/thirdparty/melo/download_utils.py +67 -0
- xinference/thirdparty/melo/infer.py +25 -0
- xinference/thirdparty/melo/init_downloads.py +14 -0
- xinference/thirdparty/melo/losses.py +58 -0
- xinference/thirdparty/melo/main.py +36 -0
- xinference/thirdparty/melo/mel_processing.py +174 -0
- xinference/thirdparty/melo/models.py +1030 -0
- xinference/thirdparty/melo/modules.py +598 -0
- xinference/thirdparty/melo/monotonic_align/__init__.py +16 -0
- xinference/thirdparty/melo/monotonic_align/core.py +46 -0
- xinference/thirdparty/melo/preprocess_text.py +135 -0
- xinference/thirdparty/melo/split_utils.py +174 -0
- xinference/thirdparty/melo/text/__init__.py +35 -0
- xinference/thirdparty/melo/text/chinese.py +199 -0
- xinference/thirdparty/melo/text/chinese_bert.py +107 -0
- xinference/thirdparty/melo/text/chinese_mix.py +253 -0
- xinference/thirdparty/melo/text/cleaner.py +36 -0
- xinference/thirdparty/melo/text/cleaner_multiling.py +110 -0
- xinference/thirdparty/melo/text/cmudict.rep +129530 -0
- xinference/thirdparty/melo/text/cmudict_cache.pickle +0 -0
- xinference/thirdparty/melo/text/english.py +284 -0
- xinference/thirdparty/melo/text/english_bert.py +39 -0
- xinference/thirdparty/melo/text/english_utils/__init__.py +0 -0
- xinference/thirdparty/melo/text/english_utils/abbreviations.py +35 -0
- xinference/thirdparty/melo/text/english_utils/number_norm.py +97 -0
- xinference/thirdparty/melo/text/english_utils/time_norm.py +47 -0
- xinference/thirdparty/melo/text/es_phonemizer/__init__.py +0 -0
- xinference/thirdparty/melo/text/es_phonemizer/base.py +140 -0
- xinference/thirdparty/melo/text/es_phonemizer/cleaner.py +109 -0
- xinference/thirdparty/melo/text/es_phonemizer/es_symbols.json +79 -0
- xinference/thirdparty/melo/text/es_phonemizer/es_symbols.txt +1 -0
- xinference/thirdparty/melo/text/es_phonemizer/es_symbols_v2.json +83 -0
- xinference/thirdparty/melo/text/es_phonemizer/es_to_ipa.py +12 -0
- xinference/thirdparty/melo/text/es_phonemizer/example_ipa.txt +400 -0
- xinference/thirdparty/melo/text/es_phonemizer/gruut_wrapper.py +253 -0
- xinference/thirdparty/melo/text/es_phonemizer/punctuation.py +174 -0
- xinference/thirdparty/melo/text/es_phonemizer/spanish_symbols.txt +1 -0
- xinference/thirdparty/melo/text/es_phonemizer/test.ipynb +124 -0
- xinference/thirdparty/melo/text/fr_phonemizer/__init__.py +0 -0
- xinference/thirdparty/melo/text/fr_phonemizer/base.py +140 -0
- xinference/thirdparty/melo/text/fr_phonemizer/cleaner.py +122 -0
- xinference/thirdparty/melo/text/fr_phonemizer/en_symbols.json +78 -0
- xinference/thirdparty/melo/text/fr_phonemizer/example_ipa.txt +1 -0
- xinference/thirdparty/melo/text/fr_phonemizer/fr_symbols.json +89 -0
- xinference/thirdparty/melo/text/fr_phonemizer/fr_to_ipa.py +30 -0
- xinference/thirdparty/melo/text/fr_phonemizer/french_abbreviations.py +48 -0
- xinference/thirdparty/melo/text/fr_phonemizer/french_symbols.txt +1 -0
- xinference/thirdparty/melo/text/fr_phonemizer/gruut_wrapper.py +258 -0
- xinference/thirdparty/melo/text/fr_phonemizer/punctuation.py +172 -0
- xinference/thirdparty/melo/text/french.py +94 -0
- xinference/thirdparty/melo/text/french_bert.py +39 -0
- xinference/thirdparty/melo/text/japanese.py +647 -0
- xinference/thirdparty/melo/text/japanese_bert.py +49 -0
- xinference/thirdparty/melo/text/ko_dictionary.py +44 -0
- xinference/thirdparty/melo/text/korean.py +192 -0
- xinference/thirdparty/melo/text/opencpop-strict.txt +429 -0
- xinference/thirdparty/melo/text/spanish.py +122 -0
- xinference/thirdparty/melo/text/spanish_bert.py +39 -0
- xinference/thirdparty/melo/text/symbols.py +290 -0
- xinference/thirdparty/melo/text/tone_sandhi.py +769 -0
- xinference/thirdparty/melo/train.py +635 -0
- xinference/thirdparty/melo/train.sh +19 -0
- xinference/thirdparty/melo/transforms.py +209 -0
- xinference/thirdparty/melo/utils.py +424 -0
- xinference/types.py +2 -0
- xinference/web/ui/build/asset-manifest.json +3 -3
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/js/{main.1eb206d1.js → main.b0936c54.js} +3 -3
- xinference/web/ui/build/static/js/main.b0936c54.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a3ff866acddf34917a7ee399e0e571a4dfd8ba66d5057db885f243e16a6eb17d.json +1 -0
- {xinference-1.2.0.dist-info → xinference-1.2.2.dist-info}/METADATA +37 -27
- {xinference-1.2.0.dist-info → xinference-1.2.2.dist-info}/RECORD +122 -45
- xinference/web/ui/build/static/js/main.1eb206d1.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/2213d49de260e1f67c888081b18f120f5225462b829ae57c9e05a05cec83689d.json +0 -1
- /xinference/web/ui/build/static/js/{main.1eb206d1.js.LICENSE.txt → main.b0936c54.js.LICENSE.txt} +0 -0
- {xinference-1.2.0.dist-info → xinference-1.2.2.dist-info}/LICENSE +0 -0
- {xinference-1.2.0.dist-info → xinference-1.2.2.dist-info}/WHEEL +0 -0
- {xinference-1.2.0.dist-info → xinference-1.2.2.dist-info}/entry_points.txt +0 -0
- {xinference-1.2.0.dist-info → xinference-1.2.2.dist-info}/top_level.txt +0 -0
|
Binary file
|
|
@@ -0,0 +1,284 @@
|
|
|
1
|
+
import pickle
|
|
2
|
+
import os
|
|
3
|
+
import re
|
|
4
|
+
from g2p_en import G2p
|
|
5
|
+
|
|
6
|
+
from . import symbols
|
|
7
|
+
|
|
8
|
+
from .english_utils.abbreviations import expand_abbreviations
|
|
9
|
+
from .english_utils.time_norm import expand_time_english
|
|
10
|
+
from .english_utils.number_norm import normalize_numbers
|
|
11
|
+
from .japanese import distribute_phone
|
|
12
|
+
|
|
13
|
+
from transformers import AutoTokenizer
|
|
14
|
+
|
|
15
|
+
current_file_path = os.path.dirname(__file__)
|
|
16
|
+
CMU_DICT_PATH = os.path.join(current_file_path, "cmudict.rep")
|
|
17
|
+
CACHE_PATH = os.path.join(current_file_path, "cmudict_cache.pickle")
|
|
18
|
+
_g2p = G2p()
|
|
19
|
+
|
|
20
|
+
arpa = {
|
|
21
|
+
"AH0",
|
|
22
|
+
"S",
|
|
23
|
+
"AH1",
|
|
24
|
+
"EY2",
|
|
25
|
+
"AE2",
|
|
26
|
+
"EH0",
|
|
27
|
+
"OW2",
|
|
28
|
+
"UH0",
|
|
29
|
+
"NG",
|
|
30
|
+
"B",
|
|
31
|
+
"G",
|
|
32
|
+
"AY0",
|
|
33
|
+
"M",
|
|
34
|
+
"AA0",
|
|
35
|
+
"F",
|
|
36
|
+
"AO0",
|
|
37
|
+
"ER2",
|
|
38
|
+
"UH1",
|
|
39
|
+
"IY1",
|
|
40
|
+
"AH2",
|
|
41
|
+
"DH",
|
|
42
|
+
"IY0",
|
|
43
|
+
"EY1",
|
|
44
|
+
"IH0",
|
|
45
|
+
"K",
|
|
46
|
+
"N",
|
|
47
|
+
"W",
|
|
48
|
+
"IY2",
|
|
49
|
+
"T",
|
|
50
|
+
"AA1",
|
|
51
|
+
"ER1",
|
|
52
|
+
"EH2",
|
|
53
|
+
"OY0",
|
|
54
|
+
"UH2",
|
|
55
|
+
"UW1",
|
|
56
|
+
"Z",
|
|
57
|
+
"AW2",
|
|
58
|
+
"AW1",
|
|
59
|
+
"V",
|
|
60
|
+
"UW2",
|
|
61
|
+
"AA2",
|
|
62
|
+
"ER",
|
|
63
|
+
"AW0",
|
|
64
|
+
"UW0",
|
|
65
|
+
"R",
|
|
66
|
+
"OW1",
|
|
67
|
+
"EH1",
|
|
68
|
+
"ZH",
|
|
69
|
+
"AE0",
|
|
70
|
+
"IH2",
|
|
71
|
+
"IH",
|
|
72
|
+
"Y",
|
|
73
|
+
"JH",
|
|
74
|
+
"P",
|
|
75
|
+
"AY1",
|
|
76
|
+
"EY0",
|
|
77
|
+
"OY2",
|
|
78
|
+
"TH",
|
|
79
|
+
"HH",
|
|
80
|
+
"D",
|
|
81
|
+
"ER0",
|
|
82
|
+
"CH",
|
|
83
|
+
"AO1",
|
|
84
|
+
"AE1",
|
|
85
|
+
"AO2",
|
|
86
|
+
"OY1",
|
|
87
|
+
"AY2",
|
|
88
|
+
"IH1",
|
|
89
|
+
"OW0",
|
|
90
|
+
"L",
|
|
91
|
+
"SH",
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def post_replace_ph(ph):
|
|
96
|
+
rep_map = {
|
|
97
|
+
":": ",",
|
|
98
|
+
";": ",",
|
|
99
|
+
",": ",",
|
|
100
|
+
"。": ".",
|
|
101
|
+
"!": "!",
|
|
102
|
+
"?": "?",
|
|
103
|
+
"\n": ".",
|
|
104
|
+
"·": ",",
|
|
105
|
+
"、": ",",
|
|
106
|
+
"...": "…",
|
|
107
|
+
"v": "V",
|
|
108
|
+
}
|
|
109
|
+
if ph in rep_map.keys():
|
|
110
|
+
ph = rep_map[ph]
|
|
111
|
+
if ph in symbols:
|
|
112
|
+
return ph
|
|
113
|
+
if ph not in symbols:
|
|
114
|
+
ph = "UNK"
|
|
115
|
+
return ph
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def read_dict():
|
|
119
|
+
g2p_dict = {}
|
|
120
|
+
start_line = 49
|
|
121
|
+
with open(CMU_DICT_PATH) as f:
|
|
122
|
+
line = f.readline()
|
|
123
|
+
line_index = 1
|
|
124
|
+
while line:
|
|
125
|
+
if line_index >= start_line:
|
|
126
|
+
line = line.strip()
|
|
127
|
+
word_split = line.split(" ")
|
|
128
|
+
word = word_split[0]
|
|
129
|
+
|
|
130
|
+
syllable_split = word_split[1].split(" - ")
|
|
131
|
+
g2p_dict[word] = []
|
|
132
|
+
for syllable in syllable_split:
|
|
133
|
+
phone_split = syllable.split(" ")
|
|
134
|
+
g2p_dict[word].append(phone_split)
|
|
135
|
+
|
|
136
|
+
line_index = line_index + 1
|
|
137
|
+
line = f.readline()
|
|
138
|
+
|
|
139
|
+
return g2p_dict
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def cache_dict(g2p_dict, file_path):
|
|
143
|
+
with open(file_path, "wb") as pickle_file:
|
|
144
|
+
pickle.dump(g2p_dict, pickle_file)
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def get_dict():
|
|
148
|
+
if os.path.exists(CACHE_PATH):
|
|
149
|
+
with open(CACHE_PATH, "rb") as pickle_file:
|
|
150
|
+
g2p_dict = pickle.load(pickle_file)
|
|
151
|
+
else:
|
|
152
|
+
g2p_dict = read_dict()
|
|
153
|
+
cache_dict(g2p_dict, CACHE_PATH)
|
|
154
|
+
|
|
155
|
+
return g2p_dict
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
eng_dict = get_dict()
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def refine_ph(phn):
|
|
162
|
+
tone = 0
|
|
163
|
+
if re.search(r"\d$", phn):
|
|
164
|
+
tone = int(phn[-1]) + 1
|
|
165
|
+
phn = phn[:-1]
|
|
166
|
+
return phn.lower(), tone
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def refine_syllables(syllables):
|
|
170
|
+
tones = []
|
|
171
|
+
phonemes = []
|
|
172
|
+
for phn_list in syllables:
|
|
173
|
+
for i in range(len(phn_list)):
|
|
174
|
+
phn = phn_list[i]
|
|
175
|
+
phn, tone = refine_ph(phn)
|
|
176
|
+
phonemes.append(phn)
|
|
177
|
+
tones.append(tone)
|
|
178
|
+
return phonemes, tones
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def text_normalize(text):
|
|
182
|
+
text = text.lower()
|
|
183
|
+
text = expand_time_english(text)
|
|
184
|
+
text = normalize_numbers(text)
|
|
185
|
+
text = expand_abbreviations(text)
|
|
186
|
+
return text
|
|
187
|
+
|
|
188
|
+
model_id = 'bert-base-uncased'
|
|
189
|
+
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
|
190
|
+
def g2p_old(text):
|
|
191
|
+
tokenized = tokenizer.tokenize(text)
|
|
192
|
+
# import pdb; pdb.set_trace()
|
|
193
|
+
phones = []
|
|
194
|
+
tones = []
|
|
195
|
+
words = re.split(r"([,;.\-\?\!\s+])", text)
|
|
196
|
+
for w in words:
|
|
197
|
+
if w.upper() in eng_dict:
|
|
198
|
+
phns, tns = refine_syllables(eng_dict[w.upper()])
|
|
199
|
+
phones += phns
|
|
200
|
+
tones += tns
|
|
201
|
+
else:
|
|
202
|
+
phone_list = list(filter(lambda p: p != " ", _g2p(w)))
|
|
203
|
+
for ph in phone_list:
|
|
204
|
+
if ph in arpa:
|
|
205
|
+
ph, tn = refine_ph(ph)
|
|
206
|
+
phones.append(ph)
|
|
207
|
+
tones.append(tn)
|
|
208
|
+
else:
|
|
209
|
+
phones.append(ph)
|
|
210
|
+
tones.append(0)
|
|
211
|
+
# todo: implement word2ph
|
|
212
|
+
word2ph = [1 for i in phones]
|
|
213
|
+
|
|
214
|
+
phones = [post_replace_ph(i) for i in phones]
|
|
215
|
+
return phones, tones, word2ph
|
|
216
|
+
|
|
217
|
+
def g2p(text, pad_start_end=True, tokenized=None):
|
|
218
|
+
if tokenized is None:
|
|
219
|
+
tokenized = tokenizer.tokenize(text)
|
|
220
|
+
# import pdb; pdb.set_trace()
|
|
221
|
+
phs = []
|
|
222
|
+
ph_groups = []
|
|
223
|
+
for t in tokenized:
|
|
224
|
+
if not t.startswith("#"):
|
|
225
|
+
ph_groups.append([t])
|
|
226
|
+
else:
|
|
227
|
+
ph_groups[-1].append(t.replace("#", ""))
|
|
228
|
+
|
|
229
|
+
phones = []
|
|
230
|
+
tones = []
|
|
231
|
+
word2ph = []
|
|
232
|
+
for group in ph_groups:
|
|
233
|
+
w = "".join(group)
|
|
234
|
+
phone_len = 0
|
|
235
|
+
word_len = len(group)
|
|
236
|
+
if w.upper() in eng_dict:
|
|
237
|
+
phns, tns = refine_syllables(eng_dict[w.upper()])
|
|
238
|
+
phones += phns
|
|
239
|
+
tones += tns
|
|
240
|
+
phone_len += len(phns)
|
|
241
|
+
else:
|
|
242
|
+
phone_list = list(filter(lambda p: p != " ", _g2p(w)))
|
|
243
|
+
for ph in phone_list:
|
|
244
|
+
if ph in arpa:
|
|
245
|
+
ph, tn = refine_ph(ph)
|
|
246
|
+
phones.append(ph)
|
|
247
|
+
tones.append(tn)
|
|
248
|
+
else:
|
|
249
|
+
phones.append(ph)
|
|
250
|
+
tones.append(0)
|
|
251
|
+
phone_len += 1
|
|
252
|
+
aaa = distribute_phone(phone_len, word_len)
|
|
253
|
+
word2ph += aaa
|
|
254
|
+
phones = [post_replace_ph(i) for i in phones]
|
|
255
|
+
|
|
256
|
+
if pad_start_end:
|
|
257
|
+
phones = ["_"] + phones + ["_"]
|
|
258
|
+
tones = [0] + tones + [0]
|
|
259
|
+
word2ph = [1] + word2ph + [1]
|
|
260
|
+
return phones, tones, word2ph
|
|
261
|
+
|
|
262
|
+
def get_bert_feature(text, word2ph, device=None):
|
|
263
|
+
from text import english_bert
|
|
264
|
+
|
|
265
|
+
return english_bert.get_bert_feature(text, word2ph, device=device)
|
|
266
|
+
|
|
267
|
+
if __name__ == "__main__":
|
|
268
|
+
# print(get_dict())
|
|
269
|
+
# print(eng_word_to_phoneme("hello"))
|
|
270
|
+
from text.english_bert import get_bert_feature
|
|
271
|
+
text = "In this paper, we propose 1 DSPGAN, a N-F-T GAN-based universal vocoder."
|
|
272
|
+
text = text_normalize(text)
|
|
273
|
+
phones, tones, word2ph = g2p(text)
|
|
274
|
+
import pdb; pdb.set_trace()
|
|
275
|
+
bert = get_bert_feature(text, word2ph)
|
|
276
|
+
|
|
277
|
+
print(phones, tones, word2ph, bert.shape)
|
|
278
|
+
|
|
279
|
+
# all_phones = set()
|
|
280
|
+
# for k, syllables in eng_dict.items():
|
|
281
|
+
# for group in syllables:
|
|
282
|
+
# for ph in group:
|
|
283
|
+
# all_phones.add(ph)
|
|
284
|
+
# print(all_phones)
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import torch
|
|
2
|
+
from transformers import AutoTokenizer, AutoModelForMaskedLM
|
|
3
|
+
import sys
|
|
4
|
+
|
|
5
|
+
model_id = 'bert-base-uncased'
|
|
6
|
+
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
|
7
|
+
model = None
|
|
8
|
+
|
|
9
|
+
def get_bert_feature(text, word2ph, device=None):
|
|
10
|
+
global model
|
|
11
|
+
if (
|
|
12
|
+
sys.platform == "darwin"
|
|
13
|
+
and torch.backends.mps.is_available()
|
|
14
|
+
and device == "cpu"
|
|
15
|
+
):
|
|
16
|
+
device = "mps"
|
|
17
|
+
if not device:
|
|
18
|
+
device = "cuda"
|
|
19
|
+
if model is None:
|
|
20
|
+
model = AutoModelForMaskedLM.from_pretrained(model_id).to(
|
|
21
|
+
device
|
|
22
|
+
)
|
|
23
|
+
with torch.no_grad():
|
|
24
|
+
inputs = tokenizer(text, return_tensors="pt")
|
|
25
|
+
for i in inputs:
|
|
26
|
+
inputs[i] = inputs[i].to(device)
|
|
27
|
+
res = model(**inputs, output_hidden_states=True)
|
|
28
|
+
res = torch.cat(res["hidden_states"][-3:-2], -1)[0].cpu()
|
|
29
|
+
|
|
30
|
+
assert inputs["input_ids"].shape[-1] == len(word2ph)
|
|
31
|
+
word2phone = word2ph
|
|
32
|
+
phone_level_feature = []
|
|
33
|
+
for i in range(len(word2phone)):
|
|
34
|
+
repeat_feature = res[i].repeat(word2phone[i], 1)
|
|
35
|
+
phone_level_feature.append(repeat_feature)
|
|
36
|
+
|
|
37
|
+
phone_level_feature = torch.cat(phone_level_feature, dim=0)
|
|
38
|
+
|
|
39
|
+
return phone_level_feature.T
|
|
File without changes
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import re
|
|
2
|
+
|
|
3
|
+
# List of (regular expression, replacement) pairs for abbreviations in english:
|
|
4
|
+
abbreviations_en = [
|
|
5
|
+
(re.compile("\\b%s\\." % x[0], re.IGNORECASE), x[1])
|
|
6
|
+
for x in [
|
|
7
|
+
("mrs", "misess"),
|
|
8
|
+
("mr", "mister"),
|
|
9
|
+
("dr", "doctor"),
|
|
10
|
+
("st", "saint"),
|
|
11
|
+
("co", "company"),
|
|
12
|
+
("jr", "junior"),
|
|
13
|
+
("maj", "major"),
|
|
14
|
+
("gen", "general"),
|
|
15
|
+
("drs", "doctors"),
|
|
16
|
+
("rev", "reverend"),
|
|
17
|
+
("lt", "lieutenant"),
|
|
18
|
+
("hon", "honorable"),
|
|
19
|
+
("sgt", "sergeant"),
|
|
20
|
+
("capt", "captain"),
|
|
21
|
+
("esq", "esquire"),
|
|
22
|
+
("ltd", "limited"),
|
|
23
|
+
("col", "colonel"),
|
|
24
|
+
("ft", "fort"),
|
|
25
|
+
]
|
|
26
|
+
]
|
|
27
|
+
|
|
28
|
+
def expand_abbreviations(text, lang="en"):
|
|
29
|
+
if lang == "en":
|
|
30
|
+
_abbreviations = abbreviations_en
|
|
31
|
+
else:
|
|
32
|
+
raise NotImplementedError()
|
|
33
|
+
for regex, replacement in _abbreviations:
|
|
34
|
+
text = re.sub(regex, replacement, text)
|
|
35
|
+
return text
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
""" from https://github.com/keithito/tacotron """
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from typing import Dict
|
|
5
|
+
|
|
6
|
+
import inflect
|
|
7
|
+
|
|
8
|
+
_inflect = inflect.engine()
|
|
9
|
+
_comma_number_re = re.compile(r"([0-9][0-9\,]+[0-9])")
|
|
10
|
+
_decimal_number_re = re.compile(r"([0-9]+\.[0-9]+)")
|
|
11
|
+
_currency_re = re.compile(r"(£|\$|¥)([0-9\,\.]*[0-9]+)")
|
|
12
|
+
_ordinal_re = re.compile(r"[0-9]+(st|nd|rd|th)")
|
|
13
|
+
_number_re = re.compile(r"-?[0-9]+")
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _remove_commas(m):
|
|
17
|
+
return m.group(1).replace(",", "")
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _expand_decimal_point(m):
|
|
21
|
+
return m.group(1).replace(".", " point ")
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def __expand_currency(value: str, inflection: Dict[float, str]) -> str:
|
|
25
|
+
parts = value.replace(",", "").split(".")
|
|
26
|
+
if len(parts) > 2:
|
|
27
|
+
return f"{value} {inflection[2]}" # Unexpected format
|
|
28
|
+
text = []
|
|
29
|
+
integer = int(parts[0]) if parts[0] else 0
|
|
30
|
+
if integer > 0:
|
|
31
|
+
integer_unit = inflection.get(integer, inflection[2])
|
|
32
|
+
text.append(f"{integer} {integer_unit}")
|
|
33
|
+
fraction = int(parts[1]) if len(parts) > 1 and parts[1] else 0
|
|
34
|
+
if fraction > 0:
|
|
35
|
+
fraction_unit = inflection.get(fraction / 100, inflection[0.02])
|
|
36
|
+
text.append(f"{fraction} {fraction_unit}")
|
|
37
|
+
if len(text) == 0:
|
|
38
|
+
return f"zero {inflection[2]}"
|
|
39
|
+
return " ".join(text)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _expand_currency(m: "re.Match") -> str:
|
|
43
|
+
currencies = {
|
|
44
|
+
"$": {
|
|
45
|
+
0.01: "cent",
|
|
46
|
+
0.02: "cents",
|
|
47
|
+
1: "dollar",
|
|
48
|
+
2: "dollars",
|
|
49
|
+
},
|
|
50
|
+
"€": {
|
|
51
|
+
0.01: "cent",
|
|
52
|
+
0.02: "cents",
|
|
53
|
+
1: "euro",
|
|
54
|
+
2: "euros",
|
|
55
|
+
},
|
|
56
|
+
"£": {
|
|
57
|
+
0.01: "penny",
|
|
58
|
+
0.02: "pence",
|
|
59
|
+
1: "pound sterling",
|
|
60
|
+
2: "pounds sterling",
|
|
61
|
+
},
|
|
62
|
+
"¥": {
|
|
63
|
+
# TODO rin
|
|
64
|
+
0.02: "sen",
|
|
65
|
+
2: "yen",
|
|
66
|
+
},
|
|
67
|
+
}
|
|
68
|
+
unit = m.group(1)
|
|
69
|
+
currency = currencies[unit]
|
|
70
|
+
value = m.group(2)
|
|
71
|
+
return __expand_currency(value, currency)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _expand_ordinal(m):
|
|
75
|
+
return _inflect.number_to_words(m.group(0))
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def _expand_number(m):
|
|
79
|
+
num = int(m.group(0))
|
|
80
|
+
if 1000 < num < 3000:
|
|
81
|
+
if num == 2000:
|
|
82
|
+
return "two thousand"
|
|
83
|
+
if 2000 < num < 2010:
|
|
84
|
+
return "two thousand " + _inflect.number_to_words(num % 100)
|
|
85
|
+
if num % 100 == 0:
|
|
86
|
+
return _inflect.number_to_words(num // 100) + " hundred"
|
|
87
|
+
return _inflect.number_to_words(num, andword="", zero="oh", group=2).replace(", ", " ")
|
|
88
|
+
return _inflect.number_to_words(num, andword="")
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def normalize_numbers(text):
|
|
92
|
+
text = re.sub(_comma_number_re, _remove_commas, text)
|
|
93
|
+
text = re.sub(_currency_re, _expand_currency, text)
|
|
94
|
+
text = re.sub(_decimal_number_re, _expand_decimal_point, text)
|
|
95
|
+
text = re.sub(_ordinal_re, _expand_ordinal, text)
|
|
96
|
+
text = re.sub(_number_re, _expand_number, text)
|
|
97
|
+
return text
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import re
|
|
2
|
+
|
|
3
|
+
import inflect
|
|
4
|
+
|
|
5
|
+
_inflect = inflect.engine()
|
|
6
|
+
|
|
7
|
+
_time_re = re.compile(
|
|
8
|
+
r"""\b
|
|
9
|
+
((0?[0-9])|(1[0-1])|(1[2-9])|(2[0-3])) # hours
|
|
10
|
+
:
|
|
11
|
+
([0-5][0-9]) # minutes
|
|
12
|
+
\s*(a\\.m\\.|am|pm|p\\.m\\.|a\\.m|p\\.m)? # am/pm
|
|
13
|
+
\b""",
|
|
14
|
+
re.IGNORECASE | re.X,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _expand_num(n: int) -> str:
|
|
19
|
+
return _inflect.number_to_words(n)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _expand_time_english(match: "re.Match") -> str:
|
|
23
|
+
hour = int(match.group(1))
|
|
24
|
+
past_noon = hour >= 12
|
|
25
|
+
time = []
|
|
26
|
+
if hour > 12:
|
|
27
|
+
hour -= 12
|
|
28
|
+
elif hour == 0:
|
|
29
|
+
hour = 12
|
|
30
|
+
past_noon = True
|
|
31
|
+
time.append(_expand_num(hour))
|
|
32
|
+
|
|
33
|
+
minute = int(match.group(6))
|
|
34
|
+
if minute > 0:
|
|
35
|
+
if minute < 10:
|
|
36
|
+
time.append("oh")
|
|
37
|
+
time.append(_expand_num(minute))
|
|
38
|
+
am_pm = match.group(7)
|
|
39
|
+
if am_pm is None:
|
|
40
|
+
time.append("p m" if past_noon else "a m")
|
|
41
|
+
else:
|
|
42
|
+
time.extend(list(am_pm.replace(".", "")))
|
|
43
|
+
return " ".join(time)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def expand_time_english(text: str) -> str:
|
|
47
|
+
return re.sub(_time_re, _expand_time_english, text)
|
|
File without changes
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
import abc
|
|
2
|
+
from typing import List, Tuple
|
|
3
|
+
|
|
4
|
+
from .punctuation import Punctuation
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class BasePhonemizer(abc.ABC):
|
|
8
|
+
"""Base phonemizer class
|
|
9
|
+
|
|
10
|
+
Phonemization follows the following steps:
|
|
11
|
+
1. Preprocessing:
|
|
12
|
+
- remove empty lines
|
|
13
|
+
- remove punctuation
|
|
14
|
+
- keep track of punctuation marks
|
|
15
|
+
|
|
16
|
+
2. Phonemization:
|
|
17
|
+
- convert text to phonemes
|
|
18
|
+
|
|
19
|
+
3. Postprocessing:
|
|
20
|
+
- join phonemes
|
|
21
|
+
- restore punctuation marks
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
language (str):
|
|
25
|
+
Language used by the phonemizer.
|
|
26
|
+
|
|
27
|
+
punctuations (List[str]):
|
|
28
|
+
List of punctuation marks to be preserved.
|
|
29
|
+
|
|
30
|
+
keep_puncs (bool):
|
|
31
|
+
Whether to preserve punctuation marks or not.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
def __init__(self, language, punctuations=Punctuation.default_puncs(), keep_puncs=False):
|
|
35
|
+
# ensure the backend is installed on the system
|
|
36
|
+
if not self.is_available():
|
|
37
|
+
raise RuntimeError("{} not installed on your system".format(self.name())) # pragma: nocover
|
|
38
|
+
|
|
39
|
+
# ensure the backend support the requested language
|
|
40
|
+
self._language = self._init_language(language)
|
|
41
|
+
|
|
42
|
+
# setup punctuation processing
|
|
43
|
+
self._keep_puncs = keep_puncs
|
|
44
|
+
self._punctuator = Punctuation(punctuations)
|
|
45
|
+
|
|
46
|
+
def _init_language(self, language):
|
|
47
|
+
"""Language initialization
|
|
48
|
+
|
|
49
|
+
This method may be overloaded in child classes (see Segments backend)
|
|
50
|
+
|
|
51
|
+
"""
|
|
52
|
+
if not self.is_supported_language(language):
|
|
53
|
+
raise RuntimeError(f'language "{language}" is not supported by the ' f"{self.name()} backend")
|
|
54
|
+
return language
|
|
55
|
+
|
|
56
|
+
@property
|
|
57
|
+
def language(self):
|
|
58
|
+
"""The language code configured to be used for phonemization"""
|
|
59
|
+
return self._language
|
|
60
|
+
|
|
61
|
+
@staticmethod
|
|
62
|
+
@abc.abstractmethod
|
|
63
|
+
def name():
|
|
64
|
+
"""The name of the backend"""
|
|
65
|
+
...
|
|
66
|
+
|
|
67
|
+
@classmethod
|
|
68
|
+
@abc.abstractmethod
|
|
69
|
+
def is_available(cls):
|
|
70
|
+
"""Returns True if the backend is installed, False otherwise"""
|
|
71
|
+
...
|
|
72
|
+
|
|
73
|
+
@classmethod
|
|
74
|
+
@abc.abstractmethod
|
|
75
|
+
def version(cls):
|
|
76
|
+
"""Return the backend version as a tuple (major, minor, patch)"""
|
|
77
|
+
...
|
|
78
|
+
|
|
79
|
+
@staticmethod
|
|
80
|
+
@abc.abstractmethod
|
|
81
|
+
def supported_languages():
|
|
82
|
+
"""Return a dict of language codes -> name supported by the backend"""
|
|
83
|
+
...
|
|
84
|
+
|
|
85
|
+
def is_supported_language(self, language):
|
|
86
|
+
"""Returns True if `language` is supported by the backend"""
|
|
87
|
+
return language in self.supported_languages()
|
|
88
|
+
|
|
89
|
+
@abc.abstractmethod
|
|
90
|
+
def _phonemize(self, text, separator):
|
|
91
|
+
"""The main phonemization method"""
|
|
92
|
+
|
|
93
|
+
def _phonemize_preprocess(self, text) -> Tuple[List[str], List]:
|
|
94
|
+
"""Preprocess the text before phonemization
|
|
95
|
+
|
|
96
|
+
1. remove spaces
|
|
97
|
+
2. remove punctuation
|
|
98
|
+
|
|
99
|
+
Override this if you need a different behaviour
|
|
100
|
+
"""
|
|
101
|
+
text = text.strip()
|
|
102
|
+
if self._keep_puncs:
|
|
103
|
+
# a tuple (text, punctuation marks)
|
|
104
|
+
return self._punctuator.strip_to_restore(text)
|
|
105
|
+
return [self._punctuator.strip(text)], []
|
|
106
|
+
|
|
107
|
+
def _phonemize_postprocess(self, phonemized, punctuations) -> str:
|
|
108
|
+
"""Postprocess the raw phonemized output
|
|
109
|
+
|
|
110
|
+
Override this if you need a different behaviour
|
|
111
|
+
"""
|
|
112
|
+
if self._keep_puncs:
|
|
113
|
+
return self._punctuator.restore(phonemized, punctuations)[0]
|
|
114
|
+
return phonemized[0]
|
|
115
|
+
|
|
116
|
+
def phonemize(self, text: str, separator="|", language: str = None) -> str: # pylint: disable=unused-argument
|
|
117
|
+
"""Returns the `text` phonemized for the given language
|
|
118
|
+
|
|
119
|
+
Args:
|
|
120
|
+
text (str):
|
|
121
|
+
Text to be phonemized.
|
|
122
|
+
|
|
123
|
+
separator (str):
|
|
124
|
+
string separator used between phonemes. Default to '_'.
|
|
125
|
+
|
|
126
|
+
Returns:
|
|
127
|
+
(str): Phonemized text
|
|
128
|
+
"""
|
|
129
|
+
text, punctuations = self._phonemize_preprocess(text)
|
|
130
|
+
phonemized = []
|
|
131
|
+
for t in text:
|
|
132
|
+
p = self._phonemize(t, separator)
|
|
133
|
+
phonemized.append(p)
|
|
134
|
+
phonemized = self._phonemize_postprocess(phonemized, punctuations)
|
|
135
|
+
return phonemized
|
|
136
|
+
|
|
137
|
+
def print_logs(self, level: int = 0):
|
|
138
|
+
indent = "\t" * level
|
|
139
|
+
print(f"{indent}| > phoneme language: {self.language}")
|
|
140
|
+
print(f"{indent}| > phoneme backend: {self.name()}")
|