xinference 1.2.0__py3-none-any.whl → 1.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +4 -7
- xinference/client/handlers.py +3 -0
- xinference/core/chat_interface.py +6 -1
- xinference/core/model.py +2 -0
- xinference/core/scheduler.py +4 -7
- xinference/core/supervisor.py +114 -23
- xinference/core/worker.py +70 -4
- xinference/deploy/local.py +2 -1
- xinference/model/audio/core.py +11 -0
- xinference/model/audio/cosyvoice.py +16 -5
- xinference/model/audio/kokoro.py +139 -0
- xinference/model/audio/melotts.py +110 -0
- xinference/model/audio/model_spec.json +80 -0
- xinference/model/audio/model_spec_modelscope.json +18 -0
- xinference/model/audio/whisper.py +35 -10
- xinference/model/llm/llama_cpp/core.py +21 -14
- xinference/model/llm/llm_family.json +527 -1
- xinference/model/llm/llm_family.py +4 -1
- xinference/model/llm/llm_family_modelscope.json +495 -3
- xinference/model/llm/memory.py +1 -1
- xinference/model/llm/mlx/core.py +24 -6
- xinference/model/llm/transformers/core.py +9 -1
- xinference/model/llm/transformers/qwen2_audio.py +3 -1
- xinference/model/llm/transformers/qwen2_vl.py +20 -3
- xinference/model/llm/transformers/utils.py +22 -11
- xinference/model/llm/utils.py +115 -1
- xinference/model/llm/vllm/core.py +14 -4
- xinference/model/llm/vllm/xavier/block.py +3 -4
- xinference/model/llm/vllm/xavier/block_tracker.py +71 -58
- xinference/model/llm/vllm/xavier/collective.py +74 -0
- xinference/model/llm/vllm/xavier/collective_manager.py +147 -0
- xinference/model/llm/vllm/xavier/executor.py +18 -16
- xinference/model/llm/vllm/xavier/scheduler.py +79 -63
- xinference/model/llm/vllm/xavier/test/test_xavier.py +60 -35
- xinference/model/llm/vllm/xavier/transfer.py +53 -32
- xinference/thirdparty/cosyvoice/bin/spk2info.pt +0 -0
- xinference/thirdparty/melo/__init__.py +0 -0
- xinference/thirdparty/melo/api.py +135 -0
- xinference/thirdparty/melo/app.py +61 -0
- xinference/thirdparty/melo/attentions.py +459 -0
- xinference/thirdparty/melo/commons.py +160 -0
- xinference/thirdparty/melo/configs/config.json +94 -0
- xinference/thirdparty/melo/data/example/metadata.list +20 -0
- xinference/thirdparty/melo/data_utils.py +413 -0
- xinference/thirdparty/melo/download_utils.py +67 -0
- xinference/thirdparty/melo/infer.py +25 -0
- xinference/thirdparty/melo/init_downloads.py +14 -0
- xinference/thirdparty/melo/losses.py +58 -0
- xinference/thirdparty/melo/main.py +36 -0
- xinference/thirdparty/melo/mel_processing.py +174 -0
- xinference/thirdparty/melo/models.py +1030 -0
- xinference/thirdparty/melo/modules.py +598 -0
- xinference/thirdparty/melo/monotonic_align/__init__.py +16 -0
- xinference/thirdparty/melo/monotonic_align/core.py +46 -0
- xinference/thirdparty/melo/preprocess_text.py +135 -0
- xinference/thirdparty/melo/split_utils.py +174 -0
- xinference/thirdparty/melo/text/__init__.py +35 -0
- xinference/thirdparty/melo/text/chinese.py +199 -0
- xinference/thirdparty/melo/text/chinese_bert.py +107 -0
- xinference/thirdparty/melo/text/chinese_mix.py +253 -0
- xinference/thirdparty/melo/text/cleaner.py +36 -0
- xinference/thirdparty/melo/text/cleaner_multiling.py +110 -0
- xinference/thirdparty/melo/text/cmudict.rep +129530 -0
- xinference/thirdparty/melo/text/cmudict_cache.pickle +0 -0
- xinference/thirdparty/melo/text/english.py +284 -0
- xinference/thirdparty/melo/text/english_bert.py +39 -0
- xinference/thirdparty/melo/text/english_utils/__init__.py +0 -0
- xinference/thirdparty/melo/text/english_utils/abbreviations.py +35 -0
- xinference/thirdparty/melo/text/english_utils/number_norm.py +97 -0
- xinference/thirdparty/melo/text/english_utils/time_norm.py +47 -0
- xinference/thirdparty/melo/text/es_phonemizer/__init__.py +0 -0
- xinference/thirdparty/melo/text/es_phonemizer/base.py +140 -0
- xinference/thirdparty/melo/text/es_phonemizer/cleaner.py +109 -0
- xinference/thirdparty/melo/text/es_phonemizer/es_symbols.json +79 -0
- xinference/thirdparty/melo/text/es_phonemizer/es_symbols.txt +1 -0
- xinference/thirdparty/melo/text/es_phonemizer/es_symbols_v2.json +83 -0
- xinference/thirdparty/melo/text/es_phonemizer/es_to_ipa.py +12 -0
- xinference/thirdparty/melo/text/es_phonemizer/example_ipa.txt +400 -0
- xinference/thirdparty/melo/text/es_phonemizer/gruut_wrapper.py +253 -0
- xinference/thirdparty/melo/text/es_phonemizer/punctuation.py +174 -0
- xinference/thirdparty/melo/text/es_phonemizer/spanish_symbols.txt +1 -0
- xinference/thirdparty/melo/text/es_phonemizer/test.ipynb +124 -0
- xinference/thirdparty/melo/text/fr_phonemizer/__init__.py +0 -0
- xinference/thirdparty/melo/text/fr_phonemizer/base.py +140 -0
- xinference/thirdparty/melo/text/fr_phonemizer/cleaner.py +122 -0
- xinference/thirdparty/melo/text/fr_phonemizer/en_symbols.json +78 -0
- xinference/thirdparty/melo/text/fr_phonemizer/example_ipa.txt +1 -0
- xinference/thirdparty/melo/text/fr_phonemizer/fr_symbols.json +89 -0
- xinference/thirdparty/melo/text/fr_phonemizer/fr_to_ipa.py +30 -0
- xinference/thirdparty/melo/text/fr_phonemizer/french_abbreviations.py +48 -0
- xinference/thirdparty/melo/text/fr_phonemizer/french_symbols.txt +1 -0
- xinference/thirdparty/melo/text/fr_phonemizer/gruut_wrapper.py +258 -0
- xinference/thirdparty/melo/text/fr_phonemizer/punctuation.py +172 -0
- xinference/thirdparty/melo/text/french.py +94 -0
- xinference/thirdparty/melo/text/french_bert.py +39 -0
- xinference/thirdparty/melo/text/japanese.py +647 -0
- xinference/thirdparty/melo/text/japanese_bert.py +49 -0
- xinference/thirdparty/melo/text/ko_dictionary.py +44 -0
- xinference/thirdparty/melo/text/korean.py +192 -0
- xinference/thirdparty/melo/text/opencpop-strict.txt +429 -0
- xinference/thirdparty/melo/text/spanish.py +122 -0
- xinference/thirdparty/melo/text/spanish_bert.py +39 -0
- xinference/thirdparty/melo/text/symbols.py +290 -0
- xinference/thirdparty/melo/text/tone_sandhi.py +769 -0
- xinference/thirdparty/melo/train.py +635 -0
- xinference/thirdparty/melo/train.sh +19 -0
- xinference/thirdparty/melo/transforms.py +209 -0
- xinference/thirdparty/melo/utils.py +424 -0
- xinference/types.py +2 -0
- xinference/web/ui/build/asset-manifest.json +3 -3
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/js/{main.1eb206d1.js → main.b0936c54.js} +3 -3
- xinference/web/ui/build/static/js/main.b0936c54.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a3ff866acddf34917a7ee399e0e571a4dfd8ba66d5057db885f243e16a6eb17d.json +1 -0
- {xinference-1.2.0.dist-info → xinference-1.2.2.dist-info}/METADATA +37 -27
- {xinference-1.2.0.dist-info → xinference-1.2.2.dist-info}/RECORD +122 -45
- xinference/web/ui/build/static/js/main.1eb206d1.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/2213d49de260e1f67c888081b18f120f5225462b829ae57c9e05a05cec83689d.json +0 -1
- /xinference/web/ui/build/static/js/{main.1eb206d1.js.LICENSE.txt → main.b0936c54.js.LICENSE.txt} +0 -0
- {xinference-1.2.0.dist-info → xinference-1.2.2.dist-info}/LICENSE +0 -0
- {xinference-1.2.0.dist-info → xinference-1.2.2.dist-info}/WHEEL +0 -0
- {xinference-1.2.0.dist-info → xinference-1.2.2.dist-info}/entry_points.txt +0 -0
- {xinference-1.2.0.dist-info → xinference-1.2.2.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import torch
|
|
2
|
+
from transformers import AutoTokenizer, AutoModelForMaskedLM
|
|
3
|
+
import sys
|
|
4
|
+
|
|
5
|
+
model_id = 'dbmdz/bert-base-french-europeana-cased'
|
|
6
|
+
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
|
7
|
+
model = None
|
|
8
|
+
|
|
9
|
+
def get_bert_feature(text, word2ph, device=None):
|
|
10
|
+
global model
|
|
11
|
+
if (
|
|
12
|
+
sys.platform == "darwin"
|
|
13
|
+
and torch.backends.mps.is_available()
|
|
14
|
+
and device == "cpu"
|
|
15
|
+
):
|
|
16
|
+
device = "mps"
|
|
17
|
+
if not device:
|
|
18
|
+
device = "cuda"
|
|
19
|
+
if model is None:
|
|
20
|
+
model = AutoModelForMaskedLM.from_pretrained(model_id).to(
|
|
21
|
+
device
|
|
22
|
+
)
|
|
23
|
+
with torch.no_grad():
|
|
24
|
+
inputs = tokenizer(text, return_tensors="pt")
|
|
25
|
+
for i in inputs:
|
|
26
|
+
inputs[i] = inputs[i].to(device)
|
|
27
|
+
res = model(**inputs, output_hidden_states=True)
|
|
28
|
+
res = torch.cat(res["hidden_states"][-3:-2], -1)[0].cpu()
|
|
29
|
+
|
|
30
|
+
assert inputs["input_ids"].shape[-1] == len(word2ph)
|
|
31
|
+
word2phone = word2ph
|
|
32
|
+
phone_level_feature = []
|
|
33
|
+
for i in range(len(word2phone)):
|
|
34
|
+
repeat_feature = res[i].repeat(word2phone[i], 1)
|
|
35
|
+
phone_level_feature.append(repeat_feature)
|
|
36
|
+
|
|
37
|
+
phone_level_feature = torch.cat(phone_level_feature, dim=0)
|
|
38
|
+
|
|
39
|
+
return phone_level_feature.T
|
|
@@ -0,0 +1,647 @@
|
|
|
1
|
+
# Convert Japanese text to phonemes which is
|
|
2
|
+
# compatible with Julius https://github.com/julius-speech/segmentation-kit
|
|
3
|
+
import re
|
|
4
|
+
import unicodedata
|
|
5
|
+
|
|
6
|
+
from transformers import AutoTokenizer
|
|
7
|
+
|
|
8
|
+
from . import symbols
|
|
9
|
+
punctuation = ["!", "?", "…", ",", ".", "'", "-"]
|
|
10
|
+
|
|
11
|
+
try:
|
|
12
|
+
import MeCab
|
|
13
|
+
except ImportError as e:
|
|
14
|
+
raise ImportError("Japanese requires mecab-python3 and unidic-lite.") from e
|
|
15
|
+
from num2words import num2words
|
|
16
|
+
|
|
17
|
+
_CONVRULES = [
|
|
18
|
+
# Conversion of 2 letters
|
|
19
|
+
"アァ/ a a",
|
|
20
|
+
"イィ/ i i",
|
|
21
|
+
"イェ/ i e",
|
|
22
|
+
"イャ/ y a",
|
|
23
|
+
"ウゥ/ u:",
|
|
24
|
+
"エェ/ e e",
|
|
25
|
+
"オォ/ o:",
|
|
26
|
+
"カァ/ k a:",
|
|
27
|
+
"キィ/ k i:",
|
|
28
|
+
"クゥ/ k u:",
|
|
29
|
+
"クャ/ ky a",
|
|
30
|
+
"クュ/ ky u",
|
|
31
|
+
"クョ/ ky o",
|
|
32
|
+
"ケェ/ k e:",
|
|
33
|
+
"コォ/ k o:",
|
|
34
|
+
"ガァ/ g a:",
|
|
35
|
+
"ギィ/ g i:",
|
|
36
|
+
"グゥ/ g u:",
|
|
37
|
+
"グャ/ gy a",
|
|
38
|
+
"グュ/ gy u",
|
|
39
|
+
"グョ/ gy o",
|
|
40
|
+
"ゲェ/ g e:",
|
|
41
|
+
"ゴォ/ g o:",
|
|
42
|
+
"サァ/ s a:",
|
|
43
|
+
"シィ/ sh i:",
|
|
44
|
+
"スゥ/ s u:",
|
|
45
|
+
"スャ/ sh a",
|
|
46
|
+
"スュ/ sh u",
|
|
47
|
+
"スョ/ sh o",
|
|
48
|
+
"セェ/ s e:",
|
|
49
|
+
"ソォ/ s o:",
|
|
50
|
+
"ザァ/ z a:",
|
|
51
|
+
"ジィ/ j i:",
|
|
52
|
+
"ズゥ/ z u:",
|
|
53
|
+
"ズャ/ zy a",
|
|
54
|
+
"ズュ/ zy u",
|
|
55
|
+
"ズョ/ zy o",
|
|
56
|
+
"ゼェ/ z e:",
|
|
57
|
+
"ゾォ/ z o:",
|
|
58
|
+
"タァ/ t a:",
|
|
59
|
+
"チィ/ ch i:",
|
|
60
|
+
"ツァ/ ts a",
|
|
61
|
+
"ツィ/ ts i",
|
|
62
|
+
"ツゥ/ ts u:",
|
|
63
|
+
"ツャ/ ch a",
|
|
64
|
+
"ツュ/ ch u",
|
|
65
|
+
"ツョ/ ch o",
|
|
66
|
+
"ツェ/ ts e",
|
|
67
|
+
"ツォ/ ts o",
|
|
68
|
+
"テェ/ t e:",
|
|
69
|
+
"トォ/ t o:",
|
|
70
|
+
"ダァ/ d a:",
|
|
71
|
+
"ヂィ/ j i:",
|
|
72
|
+
"ヅゥ/ d u:",
|
|
73
|
+
"ヅャ/ zy a",
|
|
74
|
+
"ヅュ/ zy u",
|
|
75
|
+
"ヅョ/ zy o",
|
|
76
|
+
"デェ/ d e:",
|
|
77
|
+
"ドォ/ d o:",
|
|
78
|
+
"ナァ/ n a:",
|
|
79
|
+
"ニィ/ n i:",
|
|
80
|
+
"ヌゥ/ n u:",
|
|
81
|
+
"ヌャ/ ny a",
|
|
82
|
+
"ヌュ/ ny u",
|
|
83
|
+
"ヌョ/ ny o",
|
|
84
|
+
"ネェ/ n e:",
|
|
85
|
+
"ノォ/ n o:",
|
|
86
|
+
"ハァ/ h a:",
|
|
87
|
+
"ヒィ/ h i:",
|
|
88
|
+
"フゥ/ f u:",
|
|
89
|
+
"フャ/ hy a",
|
|
90
|
+
"フュ/ hy u",
|
|
91
|
+
"フョ/ hy o",
|
|
92
|
+
"ヘェ/ h e:",
|
|
93
|
+
"ホォ/ h o:",
|
|
94
|
+
"バァ/ b a:",
|
|
95
|
+
"ビィ/ b i:",
|
|
96
|
+
"ブゥ/ b u:",
|
|
97
|
+
"フャ/ hy a",
|
|
98
|
+
"ブュ/ by u",
|
|
99
|
+
"フョ/ hy o",
|
|
100
|
+
"ベェ/ b e:",
|
|
101
|
+
"ボォ/ b o:",
|
|
102
|
+
"パァ/ p a:",
|
|
103
|
+
"ピィ/ p i:",
|
|
104
|
+
"プゥ/ p u:",
|
|
105
|
+
"プャ/ py a",
|
|
106
|
+
"プュ/ py u",
|
|
107
|
+
"プョ/ py o",
|
|
108
|
+
"ペェ/ p e:",
|
|
109
|
+
"ポォ/ p o:",
|
|
110
|
+
"マァ/ m a:",
|
|
111
|
+
"ミィ/ m i:",
|
|
112
|
+
"ムゥ/ m u:",
|
|
113
|
+
"ムャ/ my a",
|
|
114
|
+
"ムュ/ my u",
|
|
115
|
+
"ムョ/ my o",
|
|
116
|
+
"メェ/ m e:",
|
|
117
|
+
"モォ/ m o:",
|
|
118
|
+
"ヤァ/ y a:",
|
|
119
|
+
"ユゥ/ y u:",
|
|
120
|
+
"ユャ/ y a:",
|
|
121
|
+
"ユュ/ y u:",
|
|
122
|
+
"ユョ/ y o:",
|
|
123
|
+
"ヨォ/ y o:",
|
|
124
|
+
"ラァ/ r a:",
|
|
125
|
+
"リィ/ r i:",
|
|
126
|
+
"ルゥ/ r u:",
|
|
127
|
+
"ルャ/ ry a",
|
|
128
|
+
"ルュ/ ry u",
|
|
129
|
+
"ルョ/ ry o",
|
|
130
|
+
"レェ/ r e:",
|
|
131
|
+
"ロォ/ r o:",
|
|
132
|
+
"ワァ/ w a:",
|
|
133
|
+
"ヲォ/ o:",
|
|
134
|
+
"ディ/ d i",
|
|
135
|
+
"デェ/ d e:",
|
|
136
|
+
"デャ/ dy a",
|
|
137
|
+
"デュ/ dy u",
|
|
138
|
+
"デョ/ dy o",
|
|
139
|
+
"ティ/ t i",
|
|
140
|
+
"テェ/ t e:",
|
|
141
|
+
"テャ/ ty a",
|
|
142
|
+
"テュ/ ty u",
|
|
143
|
+
"テョ/ ty o",
|
|
144
|
+
"スィ/ s i",
|
|
145
|
+
"ズァ/ z u a",
|
|
146
|
+
"ズィ/ z i",
|
|
147
|
+
"ズゥ/ z u",
|
|
148
|
+
"ズャ/ zy a",
|
|
149
|
+
"ズュ/ zy u",
|
|
150
|
+
"ズョ/ zy o",
|
|
151
|
+
"ズェ/ z e",
|
|
152
|
+
"ズォ/ z o",
|
|
153
|
+
"キャ/ ky a",
|
|
154
|
+
"キュ/ ky u",
|
|
155
|
+
"キョ/ ky o",
|
|
156
|
+
"シャ/ sh a",
|
|
157
|
+
"シュ/ sh u",
|
|
158
|
+
"シェ/ sh e",
|
|
159
|
+
"ショ/ sh o",
|
|
160
|
+
"チャ/ ch a",
|
|
161
|
+
"チュ/ ch u",
|
|
162
|
+
"チェ/ ch e",
|
|
163
|
+
"チョ/ ch o",
|
|
164
|
+
"トゥ/ t u",
|
|
165
|
+
"トャ/ ty a",
|
|
166
|
+
"トュ/ ty u",
|
|
167
|
+
"トョ/ ty o",
|
|
168
|
+
"ドァ/ d o a",
|
|
169
|
+
"ドゥ/ d u",
|
|
170
|
+
"ドャ/ dy a",
|
|
171
|
+
"ドュ/ dy u",
|
|
172
|
+
"ドョ/ dy o",
|
|
173
|
+
"ドォ/ d o:",
|
|
174
|
+
"ニャ/ ny a",
|
|
175
|
+
"ニュ/ ny u",
|
|
176
|
+
"ニョ/ ny o",
|
|
177
|
+
"ヒャ/ hy a",
|
|
178
|
+
"ヒュ/ hy u",
|
|
179
|
+
"ヒョ/ hy o",
|
|
180
|
+
"ミャ/ my a",
|
|
181
|
+
"ミュ/ my u",
|
|
182
|
+
"ミョ/ my o",
|
|
183
|
+
"リャ/ ry a",
|
|
184
|
+
"リュ/ ry u",
|
|
185
|
+
"リョ/ ry o",
|
|
186
|
+
"ギャ/ gy a",
|
|
187
|
+
"ギュ/ gy u",
|
|
188
|
+
"ギョ/ gy o",
|
|
189
|
+
"ヂェ/ j e",
|
|
190
|
+
"ヂャ/ j a",
|
|
191
|
+
"ヂュ/ j u",
|
|
192
|
+
"ヂョ/ j o",
|
|
193
|
+
"ジェ/ j e",
|
|
194
|
+
"ジャ/ j a",
|
|
195
|
+
"ジュ/ j u",
|
|
196
|
+
"ジョ/ j o",
|
|
197
|
+
"ビャ/ by a",
|
|
198
|
+
"ビュ/ by u",
|
|
199
|
+
"ビョ/ by o",
|
|
200
|
+
"ピャ/ py a",
|
|
201
|
+
"ピュ/ py u",
|
|
202
|
+
"ピョ/ py o",
|
|
203
|
+
"ウァ/ u a",
|
|
204
|
+
"ウィ/ w i",
|
|
205
|
+
"ウェ/ w e",
|
|
206
|
+
"ウォ/ w o",
|
|
207
|
+
"ファ/ f a",
|
|
208
|
+
"フィ/ f i",
|
|
209
|
+
"フゥ/ f u",
|
|
210
|
+
"フャ/ hy a",
|
|
211
|
+
"フュ/ hy u",
|
|
212
|
+
"フョ/ hy o",
|
|
213
|
+
"フェ/ f e",
|
|
214
|
+
"フォ/ f o",
|
|
215
|
+
"ヴァ/ b a",
|
|
216
|
+
"ヴィ/ b i",
|
|
217
|
+
"ヴェ/ b e",
|
|
218
|
+
"ヴォ/ b o",
|
|
219
|
+
"ヴュ/ by u",
|
|
220
|
+
# Conversion of 1 letter
|
|
221
|
+
"ア/ a",
|
|
222
|
+
"イ/ i",
|
|
223
|
+
"ウ/ u",
|
|
224
|
+
"エ/ e",
|
|
225
|
+
"オ/ o",
|
|
226
|
+
"カ/ k a",
|
|
227
|
+
"キ/ k i",
|
|
228
|
+
"ク/ k u",
|
|
229
|
+
"ケ/ k e",
|
|
230
|
+
"コ/ k o",
|
|
231
|
+
"サ/ s a",
|
|
232
|
+
"シ/ sh i",
|
|
233
|
+
"ス/ s u",
|
|
234
|
+
"セ/ s e",
|
|
235
|
+
"ソ/ s o",
|
|
236
|
+
"タ/ t a",
|
|
237
|
+
"チ/ ch i",
|
|
238
|
+
"ツ/ ts u",
|
|
239
|
+
"テ/ t e",
|
|
240
|
+
"ト/ t o",
|
|
241
|
+
"ナ/ n a",
|
|
242
|
+
"ニ/ n i",
|
|
243
|
+
"ヌ/ n u",
|
|
244
|
+
"ネ/ n e",
|
|
245
|
+
"ノ/ n o",
|
|
246
|
+
"ハ/ h a",
|
|
247
|
+
"ヒ/ h i",
|
|
248
|
+
"フ/ f u",
|
|
249
|
+
"ヘ/ h e",
|
|
250
|
+
"ホ/ h o",
|
|
251
|
+
"マ/ m a",
|
|
252
|
+
"ミ/ m i",
|
|
253
|
+
"ム/ m u",
|
|
254
|
+
"メ/ m e",
|
|
255
|
+
"モ/ m o",
|
|
256
|
+
"ラ/ r a",
|
|
257
|
+
"リ/ r i",
|
|
258
|
+
"ル/ r u",
|
|
259
|
+
"レ/ r e",
|
|
260
|
+
"ロ/ r o",
|
|
261
|
+
"ガ/ g a",
|
|
262
|
+
"ギ/ g i",
|
|
263
|
+
"グ/ g u",
|
|
264
|
+
"ゲ/ g e",
|
|
265
|
+
"ゴ/ g o",
|
|
266
|
+
"ザ/ z a",
|
|
267
|
+
"ジ/ j i",
|
|
268
|
+
"ズ/ z u",
|
|
269
|
+
"ゼ/ z e",
|
|
270
|
+
"ゾ/ z o",
|
|
271
|
+
"ダ/ d a",
|
|
272
|
+
"ヂ/ j i",
|
|
273
|
+
"ヅ/ z u",
|
|
274
|
+
"デ/ d e",
|
|
275
|
+
"ド/ d o",
|
|
276
|
+
"バ/ b a",
|
|
277
|
+
"ビ/ b i",
|
|
278
|
+
"ブ/ b u",
|
|
279
|
+
"ベ/ b e",
|
|
280
|
+
"ボ/ b o",
|
|
281
|
+
"パ/ p a",
|
|
282
|
+
"ピ/ p i",
|
|
283
|
+
"プ/ p u",
|
|
284
|
+
"ペ/ p e",
|
|
285
|
+
"ポ/ p o",
|
|
286
|
+
"ヤ/ y a",
|
|
287
|
+
"ユ/ y u",
|
|
288
|
+
"ヨ/ y o",
|
|
289
|
+
"ワ/ w a",
|
|
290
|
+
"ヰ/ i",
|
|
291
|
+
"ヱ/ e",
|
|
292
|
+
"ヲ/ o",
|
|
293
|
+
"ン/ N",
|
|
294
|
+
"ッ/ q",
|
|
295
|
+
"ヴ/ b u",
|
|
296
|
+
"ー/:",
|
|
297
|
+
# Try converting broken text
|
|
298
|
+
"ァ/ a",
|
|
299
|
+
"ィ/ i",
|
|
300
|
+
"ゥ/ u",
|
|
301
|
+
"ェ/ e",
|
|
302
|
+
"ォ/ o",
|
|
303
|
+
"ヮ/ w a",
|
|
304
|
+
"ォ/ o",
|
|
305
|
+
# Try converting broken text
|
|
306
|
+
"ャ/ y a",
|
|
307
|
+
"ョ/ y o",
|
|
308
|
+
"ュ/ y u",
|
|
309
|
+
"琦/ ch i",
|
|
310
|
+
"ヶ/ k e",
|
|
311
|
+
"髙/ t a k a",
|
|
312
|
+
"煞/ sh y a",
|
|
313
|
+
# Symbols
|
|
314
|
+
"、/ ,",
|
|
315
|
+
"。/ .",
|
|
316
|
+
"!/ !",
|
|
317
|
+
"?/ ?",
|
|
318
|
+
"・/ ,",
|
|
319
|
+
]
|
|
320
|
+
|
|
321
|
+
_COLON_RX = re.compile(":+")
|
|
322
|
+
_REJECT_RX = re.compile("[^ a-zA-Z:,.?]")
|
|
323
|
+
|
|
324
|
+
|
|
325
|
+
def _makerulemap():
|
|
326
|
+
l = [tuple(x.split("/")) for x in _CONVRULES]
|
|
327
|
+
return tuple({k: v for k, v in l if len(k) == i} for i in (1, 2))
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
_RULEMAP1, _RULEMAP2 = _makerulemap()
|
|
331
|
+
|
|
332
|
+
|
|
333
|
+
def kata2phoneme(text: str) -> str:
|
|
334
|
+
"""Convert katakana text to phonemes."""
|
|
335
|
+
text = text.strip()
|
|
336
|
+
res = []
|
|
337
|
+
while text:
|
|
338
|
+
if len(text) >= 2:
|
|
339
|
+
x = _RULEMAP2.get(text[:2])
|
|
340
|
+
if x is not None:
|
|
341
|
+
text = text[2:]
|
|
342
|
+
res += x.split(" ")[1:]
|
|
343
|
+
continue
|
|
344
|
+
x = _RULEMAP1.get(text[0])
|
|
345
|
+
if x is not None:
|
|
346
|
+
text = text[1:]
|
|
347
|
+
res += x.split(" ")[1:]
|
|
348
|
+
continue
|
|
349
|
+
res.append(text[0])
|
|
350
|
+
text = text[1:]
|
|
351
|
+
# res = _COLON_RX.sub(":", res)
|
|
352
|
+
return res
|
|
353
|
+
|
|
354
|
+
|
|
355
|
+
_KATAKANA = "".join(chr(ch) for ch in range(ord("ァ"), ord("ン") + 1))
|
|
356
|
+
_HIRAGANA = "".join(chr(ch) for ch in range(ord("ぁ"), ord("ん") + 1))
|
|
357
|
+
_HIRA2KATATRANS = str.maketrans(_HIRAGANA, _KATAKANA)
|
|
358
|
+
|
|
359
|
+
|
|
360
|
+
def hira2kata(text: str) -> str:
|
|
361
|
+
text = text.translate(_HIRA2KATATRANS)
|
|
362
|
+
return text.replace("う゛", "ヴ")
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
_SYMBOL_TOKENS = set(list("・、。?!"))
|
|
366
|
+
_NO_YOMI_TOKENS = set(list("「」『』―()[][]"))
|
|
367
|
+
_TAGGER = MeCab.Tagger()
|
|
368
|
+
|
|
369
|
+
|
|
370
|
+
def text2kata(text: str) -> str:
|
|
371
|
+
parsed = _TAGGER.parse(text)
|
|
372
|
+
res = []
|
|
373
|
+
for line in parsed.split("\n"):
|
|
374
|
+
if line == "EOS":
|
|
375
|
+
break
|
|
376
|
+
parts = line.split("\t")
|
|
377
|
+
|
|
378
|
+
word, yomi = parts[0], parts[1]
|
|
379
|
+
if yomi:
|
|
380
|
+
try:
|
|
381
|
+
res.append(yomi.split(',')[6])
|
|
382
|
+
except:
|
|
383
|
+
import pdb; pdb.set_trace()
|
|
384
|
+
else:
|
|
385
|
+
if word in _SYMBOL_TOKENS:
|
|
386
|
+
res.append(word)
|
|
387
|
+
elif word in ("っ", "ッ"):
|
|
388
|
+
res.append("ッ")
|
|
389
|
+
elif word in _NO_YOMI_TOKENS:
|
|
390
|
+
pass
|
|
391
|
+
else:
|
|
392
|
+
res.append(word)
|
|
393
|
+
return hira2kata("".join(res))
|
|
394
|
+
|
|
395
|
+
|
|
396
|
+
_ALPHASYMBOL_YOMI = {
|
|
397
|
+
"#": "シャープ",
|
|
398
|
+
"%": "パーセント",
|
|
399
|
+
"&": "アンド",
|
|
400
|
+
"+": "プラス",
|
|
401
|
+
"-": "マイナス",
|
|
402
|
+
":": "コロン",
|
|
403
|
+
";": "セミコロン",
|
|
404
|
+
"<": "小なり",
|
|
405
|
+
"=": "イコール",
|
|
406
|
+
">": "大なり",
|
|
407
|
+
"@": "アット",
|
|
408
|
+
"a": "エー",
|
|
409
|
+
"b": "ビー",
|
|
410
|
+
"c": "シー",
|
|
411
|
+
"d": "ディー",
|
|
412
|
+
"e": "イー",
|
|
413
|
+
"f": "エフ",
|
|
414
|
+
"g": "ジー",
|
|
415
|
+
"h": "エイチ",
|
|
416
|
+
"i": "アイ",
|
|
417
|
+
"j": "ジェー",
|
|
418
|
+
"k": "ケー",
|
|
419
|
+
"l": "エル",
|
|
420
|
+
"m": "エム",
|
|
421
|
+
"n": "エヌ",
|
|
422
|
+
"o": "オー",
|
|
423
|
+
"p": "ピー",
|
|
424
|
+
"q": "キュー",
|
|
425
|
+
"r": "アール",
|
|
426
|
+
"s": "エス",
|
|
427
|
+
"t": "ティー",
|
|
428
|
+
"u": "ユー",
|
|
429
|
+
"v": "ブイ",
|
|
430
|
+
"w": "ダブリュー",
|
|
431
|
+
"x": "エックス",
|
|
432
|
+
"y": "ワイ",
|
|
433
|
+
"z": "ゼット",
|
|
434
|
+
"α": "アルファ",
|
|
435
|
+
"β": "ベータ",
|
|
436
|
+
"γ": "ガンマ",
|
|
437
|
+
"δ": "デルタ",
|
|
438
|
+
"ε": "イプシロン",
|
|
439
|
+
"ζ": "ゼータ",
|
|
440
|
+
"η": "イータ",
|
|
441
|
+
"θ": "シータ",
|
|
442
|
+
"ι": "イオタ",
|
|
443
|
+
"κ": "カッパ",
|
|
444
|
+
"λ": "ラムダ",
|
|
445
|
+
"μ": "ミュー",
|
|
446
|
+
"ν": "ニュー",
|
|
447
|
+
"ξ": "クサイ",
|
|
448
|
+
"ο": "オミクロン",
|
|
449
|
+
"π": "パイ",
|
|
450
|
+
"ρ": "ロー",
|
|
451
|
+
"σ": "シグマ",
|
|
452
|
+
"τ": "タウ",
|
|
453
|
+
"υ": "ウプシロン",
|
|
454
|
+
"φ": "ファイ",
|
|
455
|
+
"χ": "カイ",
|
|
456
|
+
"ψ": "プサイ",
|
|
457
|
+
"ω": "オメガ",
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
|
|
461
|
+
_NUMBER_WITH_SEPARATOR_RX = re.compile("[0-9]{1,3}(,[0-9]{3})+")
|
|
462
|
+
_CURRENCY_MAP = {"$": "ドル", "¥": "円", "£": "ポンド", "€": "ユーロ"}
|
|
463
|
+
_CURRENCY_RX = re.compile(r"([$¥£€])([0-9.]*[0-9])")
|
|
464
|
+
_NUMBER_RX = re.compile(r"[0-9]+(\.[0-9]+)?")
|
|
465
|
+
|
|
466
|
+
|
|
467
|
+
def japanese_convert_numbers_to_words(text: str) -> str:
|
|
468
|
+
res = _NUMBER_WITH_SEPARATOR_RX.sub(lambda m: m[0].replace(",", ""), text)
|
|
469
|
+
res = _CURRENCY_RX.sub(lambda m: m[2] + _CURRENCY_MAP.get(m[1], m[1]), res)
|
|
470
|
+
res = _NUMBER_RX.sub(lambda m: num2words(m[0], lang="ja"), res)
|
|
471
|
+
return res
|
|
472
|
+
|
|
473
|
+
|
|
474
|
+
def japanese_convert_alpha_symbols_to_words(text: str) -> str:
|
|
475
|
+
return "".join([_ALPHASYMBOL_YOMI.get(ch, ch) for ch in text.lower()])
|
|
476
|
+
|
|
477
|
+
|
|
478
|
+
def japanese_text_to_phonemes(text: str) -> str:
|
|
479
|
+
"""Convert Japanese text to phonemes."""
|
|
480
|
+
res = unicodedata.normalize("NFKC", text)
|
|
481
|
+
res = japanese_convert_numbers_to_words(res)
|
|
482
|
+
res = japanese_convert_alpha_symbols_to_words(res)
|
|
483
|
+
res = text2kata(res)
|
|
484
|
+
res = kata2phoneme(res)
|
|
485
|
+
return res
|
|
486
|
+
|
|
487
|
+
|
|
488
|
+
def is_japanese_character(char):
|
|
489
|
+
# 定义日语文字系统的 Unicode 范围
|
|
490
|
+
japanese_ranges = [
|
|
491
|
+
(0x3040, 0x309F), # 平假名
|
|
492
|
+
(0x30A0, 0x30FF), # 片假名
|
|
493
|
+
(0x4E00, 0x9FFF), # 汉字 (CJK Unified Ideographs)
|
|
494
|
+
(0x3400, 0x4DBF), # 汉字扩展 A
|
|
495
|
+
(0x20000, 0x2A6DF), # 汉字扩展 B
|
|
496
|
+
# 可以根据需要添加其他汉字扩展范围
|
|
497
|
+
]
|
|
498
|
+
|
|
499
|
+
# 将字符的 Unicode 编码转换为整数
|
|
500
|
+
char_code = ord(char)
|
|
501
|
+
|
|
502
|
+
# 检查字符是否在任何一个日语范围内
|
|
503
|
+
for start, end in japanese_ranges:
|
|
504
|
+
if start <= char_code <= end:
|
|
505
|
+
return True
|
|
506
|
+
|
|
507
|
+
return False
|
|
508
|
+
|
|
509
|
+
|
|
510
|
+
rep_map = {
|
|
511
|
+
":": ",",
|
|
512
|
+
";": ",",
|
|
513
|
+
",": ",",
|
|
514
|
+
"。": ".",
|
|
515
|
+
"!": "!",
|
|
516
|
+
"?": "?",
|
|
517
|
+
"\n": ".",
|
|
518
|
+
"·": ",",
|
|
519
|
+
"、": ",",
|
|
520
|
+
"...": "…",
|
|
521
|
+
}
|
|
522
|
+
|
|
523
|
+
|
|
524
|
+
def replace_punctuation(text):
|
|
525
|
+
pattern = re.compile("|".join(re.escape(p) for p in rep_map.keys()))
|
|
526
|
+
|
|
527
|
+
replaced_text = pattern.sub(lambda x: rep_map[x.group()], text)
|
|
528
|
+
|
|
529
|
+
replaced_text = re.sub(
|
|
530
|
+
r"[^\u3040-\u309F\u30A0-\u30FF\u4E00-\u9FFF\u3400-\u4DBF"
|
|
531
|
+
+ "".join(punctuation)
|
|
532
|
+
+ r"]+",
|
|
533
|
+
"",
|
|
534
|
+
replaced_text,
|
|
535
|
+
)
|
|
536
|
+
|
|
537
|
+
return replaced_text
|
|
538
|
+
|
|
539
|
+
from pykakasi import kakasi
|
|
540
|
+
# Initialize kakasi object
|
|
541
|
+
kakasi = kakasi()
|
|
542
|
+
# Set options for converting Chinese characters to Katakana
|
|
543
|
+
kakasi.setMode("J", "K") # Chinese to Katakana
|
|
544
|
+
kakasi.setMode("H", "K") # Hiragana to Katakana
|
|
545
|
+
# Convert Chinese characters to Katakana
|
|
546
|
+
conv = kakasi.getConverter()
|
|
547
|
+
|
|
548
|
+
def text_normalize(text):
|
|
549
|
+
res = unicodedata.normalize("NFKC", text)
|
|
550
|
+
res = japanese_convert_numbers_to_words(res)
|
|
551
|
+
res = "".join([i for i in res if is_japanese_character(i)])
|
|
552
|
+
res = replace_punctuation(res)
|
|
553
|
+
res = conv.do(res)
|
|
554
|
+
return res
|
|
555
|
+
|
|
556
|
+
|
|
557
|
+
def distribute_phone(n_phone, n_word):
|
|
558
|
+
phones_per_word = [0] * n_word
|
|
559
|
+
for task in range(n_phone):
|
|
560
|
+
min_tasks = min(phones_per_word)
|
|
561
|
+
min_index = phones_per_word.index(min_tasks)
|
|
562
|
+
phones_per_word[min_index] += 1
|
|
563
|
+
return phones_per_word
|
|
564
|
+
|
|
565
|
+
|
|
566
|
+
|
|
567
|
+
# tokenizer = AutoTokenizer.from_pretrained('cl-tohoku/bert-base-japanese-v3')
|
|
568
|
+
|
|
569
|
+
model_id = 'tohoku-nlp/bert-base-japanese-v3'
|
|
570
|
+
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
|
571
|
+
def g2p(norm_text):
|
|
572
|
+
|
|
573
|
+
tokenized = tokenizer.tokenize(norm_text)
|
|
574
|
+
phs = []
|
|
575
|
+
ph_groups = []
|
|
576
|
+
for t in tokenized:
|
|
577
|
+
if not t.startswith("#"):
|
|
578
|
+
ph_groups.append([t])
|
|
579
|
+
else:
|
|
580
|
+
ph_groups[-1].append(t.replace("#", ""))
|
|
581
|
+
word2ph = []
|
|
582
|
+
for group in ph_groups:
|
|
583
|
+
text = ""
|
|
584
|
+
for ch in group:
|
|
585
|
+
text += ch
|
|
586
|
+
if text == '[UNK]':
|
|
587
|
+
phs += ['_']
|
|
588
|
+
word2ph += [1]
|
|
589
|
+
continue
|
|
590
|
+
elif text in punctuation:
|
|
591
|
+
phs += [text]
|
|
592
|
+
word2ph += [1]
|
|
593
|
+
continue
|
|
594
|
+
# import pdb; pdb.set_trace()
|
|
595
|
+
# phonemes = japanese_text_to_phonemes(text)
|
|
596
|
+
phonemes = kata2phoneme(text)
|
|
597
|
+
# phonemes = [i for i in phonemes if i in symbols]
|
|
598
|
+
for i in phonemes:
|
|
599
|
+
assert i in symbols, (group, norm_text, tokenized, i)
|
|
600
|
+
phone_len = len(phonemes)
|
|
601
|
+
word_len = len(group)
|
|
602
|
+
|
|
603
|
+
aaa = distribute_phone(phone_len, word_len)
|
|
604
|
+
assert len(aaa) == word_len
|
|
605
|
+
word2ph += aaa
|
|
606
|
+
|
|
607
|
+
phs += phonemes
|
|
608
|
+
phones = ["_"] + phs + ["_"]
|
|
609
|
+
tones = [0 for i in phones]
|
|
610
|
+
word2ph = [1] + word2ph + [1]
|
|
611
|
+
assert len(word2ph) == len(tokenized) + 2
|
|
612
|
+
return phones, tones, word2ph
|
|
613
|
+
|
|
614
|
+
def get_bert_feature(text, word2ph, device):
|
|
615
|
+
from text import japanese_bert
|
|
616
|
+
|
|
617
|
+
return japanese_bert.get_bert_feature(text, word2ph, device=device)
|
|
618
|
+
|
|
619
|
+
|
|
620
|
+
if __name__ == "__main__":
|
|
621
|
+
# tokenizer = AutoTokenizer.from_pretrained("./bert/bert-base-japanese-v3")
|
|
622
|
+
text = "こんにちは、世界!..."
|
|
623
|
+
text = 'ええ、僕はおきなと申します。こちらの小さいわらべは杏子。ご挨拶が遅れてしまいすみません。あなたの名は?'
|
|
624
|
+
text = 'あの、お前以外のみんなは、全員生きてること?'
|
|
625
|
+
from text.japanese_bert import get_bert_feature
|
|
626
|
+
|
|
627
|
+
text = text_normalize(text)
|
|
628
|
+
print(text)
|
|
629
|
+
phones, tones, word2ph = g2p(text)
|
|
630
|
+
bert = get_bert_feature(text, word2ph)
|
|
631
|
+
|
|
632
|
+
print(phones, tones, word2ph, bert.shape)
|
|
633
|
+
|
|
634
|
+
# if __name__ == '__main__':
|
|
635
|
+
# from pykakasi import kakasi
|
|
636
|
+
# # Initialize kakasi object
|
|
637
|
+
# kakasi = kakasi()
|
|
638
|
+
|
|
639
|
+
# # Set options for converting Chinese characters to Katakana
|
|
640
|
+
# kakasi.setMode("J", "H") # Chinese to Katakana
|
|
641
|
+
# kakasi.setMode("K", "H") # Hiragana to Katakana
|
|
642
|
+
|
|
643
|
+
# # Convert Chinese characters to Katakana
|
|
644
|
+
# conv = kakasi.getConverter()
|
|
645
|
+
# katakana_text = conv.do('ええ、僕はおきなと申します。こちらの小さいわらべは杏子。ご挨拶が遅れてしまいすみません。あなたの名は?') # Replace with your Chinese text
|
|
646
|
+
|
|
647
|
+
# print(katakana_text) # Output: ニーハオセカイ
|