xinference 1.2.0__py3-none-any.whl → 1.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (124) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/restful_api.py +4 -7
  3. xinference/client/handlers.py +3 -0
  4. xinference/core/chat_interface.py +6 -1
  5. xinference/core/model.py +2 -0
  6. xinference/core/scheduler.py +4 -7
  7. xinference/core/supervisor.py +114 -23
  8. xinference/core/worker.py +70 -4
  9. xinference/deploy/local.py +2 -1
  10. xinference/model/audio/core.py +11 -0
  11. xinference/model/audio/cosyvoice.py +16 -5
  12. xinference/model/audio/kokoro.py +139 -0
  13. xinference/model/audio/melotts.py +110 -0
  14. xinference/model/audio/model_spec.json +80 -0
  15. xinference/model/audio/model_spec_modelscope.json +18 -0
  16. xinference/model/audio/whisper.py +35 -10
  17. xinference/model/llm/llama_cpp/core.py +21 -14
  18. xinference/model/llm/llm_family.json +527 -1
  19. xinference/model/llm/llm_family.py +4 -1
  20. xinference/model/llm/llm_family_modelscope.json +495 -3
  21. xinference/model/llm/memory.py +1 -1
  22. xinference/model/llm/mlx/core.py +24 -6
  23. xinference/model/llm/transformers/core.py +9 -1
  24. xinference/model/llm/transformers/qwen2_audio.py +3 -1
  25. xinference/model/llm/transformers/qwen2_vl.py +20 -3
  26. xinference/model/llm/transformers/utils.py +22 -11
  27. xinference/model/llm/utils.py +115 -1
  28. xinference/model/llm/vllm/core.py +14 -4
  29. xinference/model/llm/vllm/xavier/block.py +3 -4
  30. xinference/model/llm/vllm/xavier/block_tracker.py +71 -58
  31. xinference/model/llm/vllm/xavier/collective.py +74 -0
  32. xinference/model/llm/vllm/xavier/collective_manager.py +147 -0
  33. xinference/model/llm/vllm/xavier/executor.py +18 -16
  34. xinference/model/llm/vllm/xavier/scheduler.py +79 -63
  35. xinference/model/llm/vllm/xavier/test/test_xavier.py +60 -35
  36. xinference/model/llm/vllm/xavier/transfer.py +53 -32
  37. xinference/thirdparty/cosyvoice/bin/spk2info.pt +0 -0
  38. xinference/thirdparty/melo/__init__.py +0 -0
  39. xinference/thirdparty/melo/api.py +135 -0
  40. xinference/thirdparty/melo/app.py +61 -0
  41. xinference/thirdparty/melo/attentions.py +459 -0
  42. xinference/thirdparty/melo/commons.py +160 -0
  43. xinference/thirdparty/melo/configs/config.json +94 -0
  44. xinference/thirdparty/melo/data/example/metadata.list +20 -0
  45. xinference/thirdparty/melo/data_utils.py +413 -0
  46. xinference/thirdparty/melo/download_utils.py +67 -0
  47. xinference/thirdparty/melo/infer.py +25 -0
  48. xinference/thirdparty/melo/init_downloads.py +14 -0
  49. xinference/thirdparty/melo/losses.py +58 -0
  50. xinference/thirdparty/melo/main.py +36 -0
  51. xinference/thirdparty/melo/mel_processing.py +174 -0
  52. xinference/thirdparty/melo/models.py +1030 -0
  53. xinference/thirdparty/melo/modules.py +598 -0
  54. xinference/thirdparty/melo/monotonic_align/__init__.py +16 -0
  55. xinference/thirdparty/melo/monotonic_align/core.py +46 -0
  56. xinference/thirdparty/melo/preprocess_text.py +135 -0
  57. xinference/thirdparty/melo/split_utils.py +174 -0
  58. xinference/thirdparty/melo/text/__init__.py +35 -0
  59. xinference/thirdparty/melo/text/chinese.py +199 -0
  60. xinference/thirdparty/melo/text/chinese_bert.py +107 -0
  61. xinference/thirdparty/melo/text/chinese_mix.py +253 -0
  62. xinference/thirdparty/melo/text/cleaner.py +36 -0
  63. xinference/thirdparty/melo/text/cleaner_multiling.py +110 -0
  64. xinference/thirdparty/melo/text/cmudict.rep +129530 -0
  65. xinference/thirdparty/melo/text/cmudict_cache.pickle +0 -0
  66. xinference/thirdparty/melo/text/english.py +284 -0
  67. xinference/thirdparty/melo/text/english_bert.py +39 -0
  68. xinference/thirdparty/melo/text/english_utils/__init__.py +0 -0
  69. xinference/thirdparty/melo/text/english_utils/abbreviations.py +35 -0
  70. xinference/thirdparty/melo/text/english_utils/number_norm.py +97 -0
  71. xinference/thirdparty/melo/text/english_utils/time_norm.py +47 -0
  72. xinference/thirdparty/melo/text/es_phonemizer/__init__.py +0 -0
  73. xinference/thirdparty/melo/text/es_phonemizer/base.py +140 -0
  74. xinference/thirdparty/melo/text/es_phonemizer/cleaner.py +109 -0
  75. xinference/thirdparty/melo/text/es_phonemizer/es_symbols.json +79 -0
  76. xinference/thirdparty/melo/text/es_phonemizer/es_symbols.txt +1 -0
  77. xinference/thirdparty/melo/text/es_phonemizer/es_symbols_v2.json +83 -0
  78. xinference/thirdparty/melo/text/es_phonemizer/es_to_ipa.py +12 -0
  79. xinference/thirdparty/melo/text/es_phonemizer/example_ipa.txt +400 -0
  80. xinference/thirdparty/melo/text/es_phonemizer/gruut_wrapper.py +253 -0
  81. xinference/thirdparty/melo/text/es_phonemizer/punctuation.py +174 -0
  82. xinference/thirdparty/melo/text/es_phonemizer/spanish_symbols.txt +1 -0
  83. xinference/thirdparty/melo/text/es_phonemizer/test.ipynb +124 -0
  84. xinference/thirdparty/melo/text/fr_phonemizer/__init__.py +0 -0
  85. xinference/thirdparty/melo/text/fr_phonemizer/base.py +140 -0
  86. xinference/thirdparty/melo/text/fr_phonemizer/cleaner.py +122 -0
  87. xinference/thirdparty/melo/text/fr_phonemizer/en_symbols.json +78 -0
  88. xinference/thirdparty/melo/text/fr_phonemizer/example_ipa.txt +1 -0
  89. xinference/thirdparty/melo/text/fr_phonemizer/fr_symbols.json +89 -0
  90. xinference/thirdparty/melo/text/fr_phonemizer/fr_to_ipa.py +30 -0
  91. xinference/thirdparty/melo/text/fr_phonemizer/french_abbreviations.py +48 -0
  92. xinference/thirdparty/melo/text/fr_phonemizer/french_symbols.txt +1 -0
  93. xinference/thirdparty/melo/text/fr_phonemizer/gruut_wrapper.py +258 -0
  94. xinference/thirdparty/melo/text/fr_phonemizer/punctuation.py +172 -0
  95. xinference/thirdparty/melo/text/french.py +94 -0
  96. xinference/thirdparty/melo/text/french_bert.py +39 -0
  97. xinference/thirdparty/melo/text/japanese.py +647 -0
  98. xinference/thirdparty/melo/text/japanese_bert.py +49 -0
  99. xinference/thirdparty/melo/text/ko_dictionary.py +44 -0
  100. xinference/thirdparty/melo/text/korean.py +192 -0
  101. xinference/thirdparty/melo/text/opencpop-strict.txt +429 -0
  102. xinference/thirdparty/melo/text/spanish.py +122 -0
  103. xinference/thirdparty/melo/text/spanish_bert.py +39 -0
  104. xinference/thirdparty/melo/text/symbols.py +290 -0
  105. xinference/thirdparty/melo/text/tone_sandhi.py +769 -0
  106. xinference/thirdparty/melo/train.py +635 -0
  107. xinference/thirdparty/melo/train.sh +19 -0
  108. xinference/thirdparty/melo/transforms.py +209 -0
  109. xinference/thirdparty/melo/utils.py +424 -0
  110. xinference/types.py +2 -0
  111. xinference/web/ui/build/asset-manifest.json +3 -3
  112. xinference/web/ui/build/index.html +1 -1
  113. xinference/web/ui/build/static/js/{main.1eb206d1.js → main.b0936c54.js} +3 -3
  114. xinference/web/ui/build/static/js/main.b0936c54.js.map +1 -0
  115. xinference/web/ui/node_modules/.cache/babel-loader/a3ff866acddf34917a7ee399e0e571a4dfd8ba66d5057db885f243e16a6eb17d.json +1 -0
  116. {xinference-1.2.0.dist-info → xinference-1.2.2.dist-info}/METADATA +37 -27
  117. {xinference-1.2.0.dist-info → xinference-1.2.2.dist-info}/RECORD +122 -45
  118. xinference/web/ui/build/static/js/main.1eb206d1.js.map +0 -1
  119. xinference/web/ui/node_modules/.cache/babel-loader/2213d49de260e1f67c888081b18f120f5225462b829ae57c9e05a05cec83689d.json +0 -1
  120. /xinference/web/ui/build/static/js/{main.1eb206d1.js.LICENSE.txt → main.b0936c54.js.LICENSE.txt} +0 -0
  121. {xinference-1.2.0.dist-info → xinference-1.2.2.dist-info}/LICENSE +0 -0
  122. {xinference-1.2.0.dist-info → xinference-1.2.2.dist-info}/WHEEL +0 -0
  123. {xinference-1.2.0.dist-info → xinference-1.2.2.dist-info}/entry_points.txt +0 -0
  124. {xinference-1.2.0.dist-info → xinference-1.2.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,39 @@
1
+ import torch
2
+ from transformers import AutoTokenizer, AutoModelForMaskedLM
3
+ import sys
4
+
5
+ model_id = 'dbmdz/bert-base-french-europeana-cased'
6
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
7
+ model = None
8
+
9
+ def get_bert_feature(text, word2ph, device=None):
10
+ global model
11
+ if (
12
+ sys.platform == "darwin"
13
+ and torch.backends.mps.is_available()
14
+ and device == "cpu"
15
+ ):
16
+ device = "mps"
17
+ if not device:
18
+ device = "cuda"
19
+ if model is None:
20
+ model = AutoModelForMaskedLM.from_pretrained(model_id).to(
21
+ device
22
+ )
23
+ with torch.no_grad():
24
+ inputs = tokenizer(text, return_tensors="pt")
25
+ for i in inputs:
26
+ inputs[i] = inputs[i].to(device)
27
+ res = model(**inputs, output_hidden_states=True)
28
+ res = torch.cat(res["hidden_states"][-3:-2], -1)[0].cpu()
29
+
30
+ assert inputs["input_ids"].shape[-1] == len(word2ph)
31
+ word2phone = word2ph
32
+ phone_level_feature = []
33
+ for i in range(len(word2phone)):
34
+ repeat_feature = res[i].repeat(word2phone[i], 1)
35
+ phone_level_feature.append(repeat_feature)
36
+
37
+ phone_level_feature = torch.cat(phone_level_feature, dim=0)
38
+
39
+ return phone_level_feature.T
@@ -0,0 +1,647 @@
1
+ # Convert Japanese text to phonemes which is
2
+ # compatible with Julius https://github.com/julius-speech/segmentation-kit
3
+ import re
4
+ import unicodedata
5
+
6
+ from transformers import AutoTokenizer
7
+
8
+ from . import symbols
9
+ punctuation = ["!", "?", "…", ",", ".", "'", "-"]
10
+
11
+ try:
12
+ import MeCab
13
+ except ImportError as e:
14
+ raise ImportError("Japanese requires mecab-python3 and unidic-lite.") from e
15
+ from num2words import num2words
16
+
17
+ _CONVRULES = [
18
+ # Conversion of 2 letters
19
+ "アァ/ a a",
20
+ "イィ/ i i",
21
+ "イェ/ i e",
22
+ "イャ/ y a",
23
+ "ウゥ/ u:",
24
+ "エェ/ e e",
25
+ "オォ/ o:",
26
+ "カァ/ k a:",
27
+ "キィ/ k i:",
28
+ "クゥ/ k u:",
29
+ "クャ/ ky a",
30
+ "クュ/ ky u",
31
+ "クョ/ ky o",
32
+ "ケェ/ k e:",
33
+ "コォ/ k o:",
34
+ "ガァ/ g a:",
35
+ "ギィ/ g i:",
36
+ "グゥ/ g u:",
37
+ "グャ/ gy a",
38
+ "グュ/ gy u",
39
+ "グョ/ gy o",
40
+ "ゲェ/ g e:",
41
+ "ゴォ/ g o:",
42
+ "サァ/ s a:",
43
+ "シィ/ sh i:",
44
+ "スゥ/ s u:",
45
+ "スャ/ sh a",
46
+ "スュ/ sh u",
47
+ "スョ/ sh o",
48
+ "セェ/ s e:",
49
+ "ソォ/ s o:",
50
+ "ザァ/ z a:",
51
+ "ジィ/ j i:",
52
+ "ズゥ/ z u:",
53
+ "ズャ/ zy a",
54
+ "ズュ/ zy u",
55
+ "ズョ/ zy o",
56
+ "ゼェ/ z e:",
57
+ "ゾォ/ z o:",
58
+ "タァ/ t a:",
59
+ "チィ/ ch i:",
60
+ "ツァ/ ts a",
61
+ "ツィ/ ts i",
62
+ "ツゥ/ ts u:",
63
+ "ツャ/ ch a",
64
+ "ツュ/ ch u",
65
+ "ツョ/ ch o",
66
+ "ツェ/ ts e",
67
+ "ツォ/ ts o",
68
+ "テェ/ t e:",
69
+ "トォ/ t o:",
70
+ "ダァ/ d a:",
71
+ "ヂィ/ j i:",
72
+ "ヅゥ/ d u:",
73
+ "ヅャ/ zy a",
74
+ "ヅュ/ zy u",
75
+ "ヅョ/ zy o",
76
+ "デェ/ d e:",
77
+ "ドォ/ d o:",
78
+ "ナァ/ n a:",
79
+ "ニィ/ n i:",
80
+ "ヌゥ/ n u:",
81
+ "ヌャ/ ny a",
82
+ "ヌュ/ ny u",
83
+ "ヌョ/ ny o",
84
+ "ネェ/ n e:",
85
+ "ノォ/ n o:",
86
+ "ハァ/ h a:",
87
+ "ヒィ/ h i:",
88
+ "フゥ/ f u:",
89
+ "フャ/ hy a",
90
+ "フュ/ hy u",
91
+ "フョ/ hy o",
92
+ "ヘェ/ h e:",
93
+ "ホォ/ h o:",
94
+ "バァ/ b a:",
95
+ "ビィ/ b i:",
96
+ "ブゥ/ b u:",
97
+ "フャ/ hy a",
98
+ "ブュ/ by u",
99
+ "フョ/ hy o",
100
+ "ベェ/ b e:",
101
+ "ボォ/ b o:",
102
+ "パァ/ p a:",
103
+ "ピィ/ p i:",
104
+ "プゥ/ p u:",
105
+ "プャ/ py a",
106
+ "プュ/ py u",
107
+ "プョ/ py o",
108
+ "ペェ/ p e:",
109
+ "ポォ/ p o:",
110
+ "マァ/ m a:",
111
+ "ミィ/ m i:",
112
+ "ムゥ/ m u:",
113
+ "ムャ/ my a",
114
+ "ムュ/ my u",
115
+ "ムョ/ my o",
116
+ "メェ/ m e:",
117
+ "モォ/ m o:",
118
+ "ヤァ/ y a:",
119
+ "ユゥ/ y u:",
120
+ "ユャ/ y a:",
121
+ "ユュ/ y u:",
122
+ "ユョ/ y o:",
123
+ "ヨォ/ y o:",
124
+ "ラァ/ r a:",
125
+ "リィ/ r i:",
126
+ "ルゥ/ r u:",
127
+ "ルャ/ ry a",
128
+ "ルュ/ ry u",
129
+ "ルョ/ ry o",
130
+ "レェ/ r e:",
131
+ "ロォ/ r o:",
132
+ "ワァ/ w a:",
133
+ "ヲォ/ o:",
134
+ "ディ/ d i",
135
+ "デェ/ d e:",
136
+ "デャ/ dy a",
137
+ "デュ/ dy u",
138
+ "デョ/ dy o",
139
+ "ティ/ t i",
140
+ "テェ/ t e:",
141
+ "テャ/ ty a",
142
+ "テュ/ ty u",
143
+ "テョ/ ty o",
144
+ "スィ/ s i",
145
+ "ズァ/ z u a",
146
+ "ズィ/ z i",
147
+ "ズゥ/ z u",
148
+ "ズャ/ zy a",
149
+ "ズュ/ zy u",
150
+ "ズョ/ zy o",
151
+ "ズェ/ z e",
152
+ "ズォ/ z o",
153
+ "キャ/ ky a",
154
+ "キュ/ ky u",
155
+ "キョ/ ky o",
156
+ "シャ/ sh a",
157
+ "シュ/ sh u",
158
+ "シェ/ sh e",
159
+ "ショ/ sh o",
160
+ "チャ/ ch a",
161
+ "チュ/ ch u",
162
+ "チェ/ ch e",
163
+ "チョ/ ch o",
164
+ "トゥ/ t u",
165
+ "トャ/ ty a",
166
+ "トュ/ ty u",
167
+ "トョ/ ty o",
168
+ "ドァ/ d o a",
169
+ "ドゥ/ d u",
170
+ "ドャ/ dy a",
171
+ "ドュ/ dy u",
172
+ "ドョ/ dy o",
173
+ "ドォ/ d o:",
174
+ "ニャ/ ny a",
175
+ "ニュ/ ny u",
176
+ "ニョ/ ny o",
177
+ "ヒャ/ hy a",
178
+ "ヒュ/ hy u",
179
+ "ヒョ/ hy o",
180
+ "ミャ/ my a",
181
+ "ミュ/ my u",
182
+ "ミョ/ my o",
183
+ "リャ/ ry a",
184
+ "リュ/ ry u",
185
+ "リョ/ ry o",
186
+ "ギャ/ gy a",
187
+ "ギュ/ gy u",
188
+ "ギョ/ gy o",
189
+ "ヂェ/ j e",
190
+ "ヂャ/ j a",
191
+ "ヂュ/ j u",
192
+ "ヂョ/ j o",
193
+ "ジェ/ j e",
194
+ "ジャ/ j a",
195
+ "ジュ/ j u",
196
+ "ジョ/ j o",
197
+ "ビャ/ by a",
198
+ "ビュ/ by u",
199
+ "ビョ/ by o",
200
+ "ピャ/ py a",
201
+ "ピュ/ py u",
202
+ "ピョ/ py o",
203
+ "ウァ/ u a",
204
+ "ウィ/ w i",
205
+ "ウェ/ w e",
206
+ "ウォ/ w o",
207
+ "ファ/ f a",
208
+ "フィ/ f i",
209
+ "フゥ/ f u",
210
+ "フャ/ hy a",
211
+ "フュ/ hy u",
212
+ "フョ/ hy o",
213
+ "フェ/ f e",
214
+ "フォ/ f o",
215
+ "ヴァ/ b a",
216
+ "ヴィ/ b i",
217
+ "ヴェ/ b e",
218
+ "ヴォ/ b o",
219
+ "ヴュ/ by u",
220
+ # Conversion of 1 letter
221
+ "ア/ a",
222
+ "イ/ i",
223
+ "ウ/ u",
224
+ "エ/ e",
225
+ "オ/ o",
226
+ "カ/ k a",
227
+ "キ/ k i",
228
+ "ク/ k u",
229
+ "ケ/ k e",
230
+ "コ/ k o",
231
+ "サ/ s a",
232
+ "シ/ sh i",
233
+ "ス/ s u",
234
+ "セ/ s e",
235
+ "ソ/ s o",
236
+ "タ/ t a",
237
+ "チ/ ch i",
238
+ "ツ/ ts u",
239
+ "テ/ t e",
240
+ "ト/ t o",
241
+ "ナ/ n a",
242
+ "ニ/ n i",
243
+ "ヌ/ n u",
244
+ "ネ/ n e",
245
+ "ノ/ n o",
246
+ "ハ/ h a",
247
+ "ヒ/ h i",
248
+ "フ/ f u",
249
+ "ヘ/ h e",
250
+ "ホ/ h o",
251
+ "マ/ m a",
252
+ "ミ/ m i",
253
+ "ム/ m u",
254
+ "メ/ m e",
255
+ "モ/ m o",
256
+ "ラ/ r a",
257
+ "リ/ r i",
258
+ "ル/ r u",
259
+ "レ/ r e",
260
+ "ロ/ r o",
261
+ "ガ/ g a",
262
+ "ギ/ g i",
263
+ "グ/ g u",
264
+ "ゲ/ g e",
265
+ "ゴ/ g o",
266
+ "ザ/ z a",
267
+ "ジ/ j i",
268
+ "ズ/ z u",
269
+ "ゼ/ z e",
270
+ "ゾ/ z o",
271
+ "ダ/ d a",
272
+ "ヂ/ j i",
273
+ "ヅ/ z u",
274
+ "デ/ d e",
275
+ "ド/ d o",
276
+ "バ/ b a",
277
+ "ビ/ b i",
278
+ "ブ/ b u",
279
+ "ベ/ b e",
280
+ "ボ/ b o",
281
+ "パ/ p a",
282
+ "ピ/ p i",
283
+ "プ/ p u",
284
+ "ペ/ p e",
285
+ "ポ/ p o",
286
+ "ヤ/ y a",
287
+ "ユ/ y u",
288
+ "ヨ/ y o",
289
+ "ワ/ w a",
290
+ "ヰ/ i",
291
+ "ヱ/ e",
292
+ "ヲ/ o",
293
+ "ン/ N",
294
+ "ッ/ q",
295
+ "ヴ/ b u",
296
+ "ー/:",
297
+ # Try converting broken text
298
+ "ァ/ a",
299
+ "ィ/ i",
300
+ "ゥ/ u",
301
+ "ェ/ e",
302
+ "ォ/ o",
303
+ "ヮ/ w a",
304
+ "ォ/ o",
305
+ # Try converting broken text
306
+ "ャ/ y a",
307
+ "ョ/ y o",
308
+ "ュ/ y u",
309
+ "琦/ ch i",
310
+ "ヶ/ k e",
311
+ "髙/ t a k a",
312
+ "煞/ sh y a",
313
+ # Symbols
314
+ "、/ ,",
315
+ "。/ .",
316
+ "!/ !",
317
+ "?/ ?",
318
+ "・/ ,",
319
+ ]
320
+
321
+ _COLON_RX = re.compile(":+")
322
+ _REJECT_RX = re.compile("[^ a-zA-Z:,.?]")
323
+
324
+
325
+ def _makerulemap():
326
+ l = [tuple(x.split("/")) for x in _CONVRULES]
327
+ return tuple({k: v for k, v in l if len(k) == i} for i in (1, 2))
328
+
329
+
330
+ _RULEMAP1, _RULEMAP2 = _makerulemap()
331
+
332
+
333
+ def kata2phoneme(text: str) -> str:
334
+ """Convert katakana text to phonemes."""
335
+ text = text.strip()
336
+ res = []
337
+ while text:
338
+ if len(text) >= 2:
339
+ x = _RULEMAP2.get(text[:2])
340
+ if x is not None:
341
+ text = text[2:]
342
+ res += x.split(" ")[1:]
343
+ continue
344
+ x = _RULEMAP1.get(text[0])
345
+ if x is not None:
346
+ text = text[1:]
347
+ res += x.split(" ")[1:]
348
+ continue
349
+ res.append(text[0])
350
+ text = text[1:]
351
+ # res = _COLON_RX.sub(":", res)
352
+ return res
353
+
354
+
355
+ _KATAKANA = "".join(chr(ch) for ch in range(ord("ァ"), ord("ン") + 1))
356
+ _HIRAGANA = "".join(chr(ch) for ch in range(ord("ぁ"), ord("ん") + 1))
357
+ _HIRA2KATATRANS = str.maketrans(_HIRAGANA, _KATAKANA)
358
+
359
+
360
+ def hira2kata(text: str) -> str:
361
+ text = text.translate(_HIRA2KATATRANS)
362
+ return text.replace("う゛", "ヴ")
363
+
364
+
365
+ _SYMBOL_TOKENS = set(list("・、。?!"))
366
+ _NO_YOMI_TOKENS = set(list("「」『』―()[][]"))
367
+ _TAGGER = MeCab.Tagger()
368
+
369
+
370
+ def text2kata(text: str) -> str:
371
+ parsed = _TAGGER.parse(text)
372
+ res = []
373
+ for line in parsed.split("\n"):
374
+ if line == "EOS":
375
+ break
376
+ parts = line.split("\t")
377
+
378
+ word, yomi = parts[0], parts[1]
379
+ if yomi:
380
+ try:
381
+ res.append(yomi.split(',')[6])
382
+ except:
383
+ import pdb; pdb.set_trace()
384
+ else:
385
+ if word in _SYMBOL_TOKENS:
386
+ res.append(word)
387
+ elif word in ("っ", "ッ"):
388
+ res.append("ッ")
389
+ elif word in _NO_YOMI_TOKENS:
390
+ pass
391
+ else:
392
+ res.append(word)
393
+ return hira2kata("".join(res))
394
+
395
+
396
+ _ALPHASYMBOL_YOMI = {
397
+ "#": "シャープ",
398
+ "%": "パーセント",
399
+ "&": "アンド",
400
+ "+": "プラス",
401
+ "-": "マイナス",
402
+ ":": "コロン",
403
+ ";": "セミコロン",
404
+ "<": "小なり",
405
+ "=": "イコール",
406
+ ">": "大なり",
407
+ "@": "アット",
408
+ "a": "エー",
409
+ "b": "ビー",
410
+ "c": "シー",
411
+ "d": "ディー",
412
+ "e": "イー",
413
+ "f": "エフ",
414
+ "g": "ジー",
415
+ "h": "エイチ",
416
+ "i": "アイ",
417
+ "j": "ジェー",
418
+ "k": "ケー",
419
+ "l": "エル",
420
+ "m": "エム",
421
+ "n": "エヌ",
422
+ "o": "オー",
423
+ "p": "ピー",
424
+ "q": "キュー",
425
+ "r": "アール",
426
+ "s": "エス",
427
+ "t": "ティー",
428
+ "u": "ユー",
429
+ "v": "ブイ",
430
+ "w": "ダブリュー",
431
+ "x": "エックス",
432
+ "y": "ワイ",
433
+ "z": "ゼット",
434
+ "α": "アルファ",
435
+ "β": "ベータ",
436
+ "γ": "ガンマ",
437
+ "δ": "デルタ",
438
+ "ε": "イプシロン",
439
+ "ζ": "ゼータ",
440
+ "η": "イータ",
441
+ "θ": "シータ",
442
+ "ι": "イオタ",
443
+ "κ": "カッパ",
444
+ "λ": "ラムダ",
445
+ "μ": "ミュー",
446
+ "ν": "ニュー",
447
+ "ξ": "クサイ",
448
+ "ο": "オミクロン",
449
+ "π": "パイ",
450
+ "ρ": "ロー",
451
+ "σ": "シグマ",
452
+ "τ": "タウ",
453
+ "υ": "ウプシロン",
454
+ "φ": "ファイ",
455
+ "χ": "カイ",
456
+ "ψ": "プサイ",
457
+ "ω": "オメガ",
458
+ }
459
+
460
+
461
+ _NUMBER_WITH_SEPARATOR_RX = re.compile("[0-9]{1,3}(,[0-9]{3})+")
462
+ _CURRENCY_MAP = {"$": "ドル", "¥": "円", "£": "ポンド", "€": "ユーロ"}
463
+ _CURRENCY_RX = re.compile(r"([$¥£€])([0-9.]*[0-9])")
464
+ _NUMBER_RX = re.compile(r"[0-9]+(\.[0-9]+)?")
465
+
466
+
467
+ def japanese_convert_numbers_to_words(text: str) -> str:
468
+ res = _NUMBER_WITH_SEPARATOR_RX.sub(lambda m: m[0].replace(",", ""), text)
469
+ res = _CURRENCY_RX.sub(lambda m: m[2] + _CURRENCY_MAP.get(m[1], m[1]), res)
470
+ res = _NUMBER_RX.sub(lambda m: num2words(m[0], lang="ja"), res)
471
+ return res
472
+
473
+
474
+ def japanese_convert_alpha_symbols_to_words(text: str) -> str:
475
+ return "".join([_ALPHASYMBOL_YOMI.get(ch, ch) for ch in text.lower()])
476
+
477
+
478
+ def japanese_text_to_phonemes(text: str) -> str:
479
+ """Convert Japanese text to phonemes."""
480
+ res = unicodedata.normalize("NFKC", text)
481
+ res = japanese_convert_numbers_to_words(res)
482
+ res = japanese_convert_alpha_symbols_to_words(res)
483
+ res = text2kata(res)
484
+ res = kata2phoneme(res)
485
+ return res
486
+
487
+
488
+ def is_japanese_character(char):
489
+ # 定义日语文字系统的 Unicode 范围
490
+ japanese_ranges = [
491
+ (0x3040, 0x309F), # 平假名
492
+ (0x30A0, 0x30FF), # 片假名
493
+ (0x4E00, 0x9FFF), # 汉字 (CJK Unified Ideographs)
494
+ (0x3400, 0x4DBF), # 汉字扩展 A
495
+ (0x20000, 0x2A6DF), # 汉字扩展 B
496
+ # 可以根据需要添加其他汉字扩展范围
497
+ ]
498
+
499
+ # 将字符的 Unicode 编码转换为整数
500
+ char_code = ord(char)
501
+
502
+ # 检查字符是否在任何一个日语范围内
503
+ for start, end in japanese_ranges:
504
+ if start <= char_code <= end:
505
+ return True
506
+
507
+ return False
508
+
509
+
510
+ rep_map = {
511
+ ":": ",",
512
+ ";": ",",
513
+ ",": ",",
514
+ "。": ".",
515
+ "!": "!",
516
+ "?": "?",
517
+ "\n": ".",
518
+ "·": ",",
519
+ "、": ",",
520
+ "...": "…",
521
+ }
522
+
523
+
524
+ def replace_punctuation(text):
525
+ pattern = re.compile("|".join(re.escape(p) for p in rep_map.keys()))
526
+
527
+ replaced_text = pattern.sub(lambda x: rep_map[x.group()], text)
528
+
529
+ replaced_text = re.sub(
530
+ r"[^\u3040-\u309F\u30A0-\u30FF\u4E00-\u9FFF\u3400-\u4DBF"
531
+ + "".join(punctuation)
532
+ + r"]+",
533
+ "",
534
+ replaced_text,
535
+ )
536
+
537
+ return replaced_text
538
+
539
+ from pykakasi import kakasi
540
+ # Initialize kakasi object
541
+ kakasi = kakasi()
542
+ # Set options for converting Chinese characters to Katakana
543
+ kakasi.setMode("J", "K") # Chinese to Katakana
544
+ kakasi.setMode("H", "K") # Hiragana to Katakana
545
+ # Convert Chinese characters to Katakana
546
+ conv = kakasi.getConverter()
547
+
548
+ def text_normalize(text):
549
+ res = unicodedata.normalize("NFKC", text)
550
+ res = japanese_convert_numbers_to_words(res)
551
+ res = "".join([i for i in res if is_japanese_character(i)])
552
+ res = replace_punctuation(res)
553
+ res = conv.do(res)
554
+ return res
555
+
556
+
557
+ def distribute_phone(n_phone, n_word):
558
+ phones_per_word = [0] * n_word
559
+ for task in range(n_phone):
560
+ min_tasks = min(phones_per_word)
561
+ min_index = phones_per_word.index(min_tasks)
562
+ phones_per_word[min_index] += 1
563
+ return phones_per_word
564
+
565
+
566
+
567
+ # tokenizer = AutoTokenizer.from_pretrained('cl-tohoku/bert-base-japanese-v3')
568
+
569
+ model_id = 'tohoku-nlp/bert-base-japanese-v3'
570
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
571
+ def g2p(norm_text):
572
+
573
+ tokenized = tokenizer.tokenize(norm_text)
574
+ phs = []
575
+ ph_groups = []
576
+ for t in tokenized:
577
+ if not t.startswith("#"):
578
+ ph_groups.append([t])
579
+ else:
580
+ ph_groups[-1].append(t.replace("#", ""))
581
+ word2ph = []
582
+ for group in ph_groups:
583
+ text = ""
584
+ for ch in group:
585
+ text += ch
586
+ if text == '[UNK]':
587
+ phs += ['_']
588
+ word2ph += [1]
589
+ continue
590
+ elif text in punctuation:
591
+ phs += [text]
592
+ word2ph += [1]
593
+ continue
594
+ # import pdb; pdb.set_trace()
595
+ # phonemes = japanese_text_to_phonemes(text)
596
+ phonemes = kata2phoneme(text)
597
+ # phonemes = [i for i in phonemes if i in symbols]
598
+ for i in phonemes:
599
+ assert i in symbols, (group, norm_text, tokenized, i)
600
+ phone_len = len(phonemes)
601
+ word_len = len(group)
602
+
603
+ aaa = distribute_phone(phone_len, word_len)
604
+ assert len(aaa) == word_len
605
+ word2ph += aaa
606
+
607
+ phs += phonemes
608
+ phones = ["_"] + phs + ["_"]
609
+ tones = [0 for i in phones]
610
+ word2ph = [1] + word2ph + [1]
611
+ assert len(word2ph) == len(tokenized) + 2
612
+ return phones, tones, word2ph
613
+
614
+ def get_bert_feature(text, word2ph, device):
615
+ from text import japanese_bert
616
+
617
+ return japanese_bert.get_bert_feature(text, word2ph, device=device)
618
+
619
+
620
+ if __name__ == "__main__":
621
+ # tokenizer = AutoTokenizer.from_pretrained("./bert/bert-base-japanese-v3")
622
+ text = "こんにちは、世界!..."
623
+ text = 'ええ、僕はおきなと申します。こちらの小さいわらべは杏子。ご挨拶が遅れてしまいすみません。あなたの名は?'
624
+ text = 'あの、お前以外のみんなは、全員生きてること?'
625
+ from text.japanese_bert import get_bert_feature
626
+
627
+ text = text_normalize(text)
628
+ print(text)
629
+ phones, tones, word2ph = g2p(text)
630
+ bert = get_bert_feature(text, word2ph)
631
+
632
+ print(phones, tones, word2ph, bert.shape)
633
+
634
+ # if __name__ == '__main__':
635
+ # from pykakasi import kakasi
636
+ # # Initialize kakasi object
637
+ # kakasi = kakasi()
638
+
639
+ # # Set options for converting Chinese characters to Katakana
640
+ # kakasi.setMode("J", "H") # Chinese to Katakana
641
+ # kakasi.setMode("K", "H") # Hiragana to Katakana
642
+
643
+ # # Convert Chinese characters to Katakana
644
+ # conv = kakasi.getConverter()
645
+ # katakana_text = conv.do('ええ、僕はおきなと申します。こちらの小さいわらべは杏子。ご挨拶が遅れてしまいすみません。あなたの名は?') # Replace with your Chinese text
646
+
647
+ # print(katakana_text) # Output: ニーハオセカイ