xinference 0.16.3__py3-none-any.whl → 1.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (373) hide show
  1. xinference/_compat.py +24 -2
  2. xinference/_version.py +3 -3
  3. xinference/api/restful_api.py +219 -77
  4. xinference/client/restful/restful_client.py +47 -2
  5. xinference/constants.py +1 -0
  6. xinference/core/chat_interface.py +6 -1
  7. xinference/core/model.py +124 -34
  8. xinference/core/supervisor.py +180 -12
  9. xinference/core/utils.py +73 -4
  10. xinference/core/worker.py +102 -4
  11. xinference/deploy/cmdline.py +3 -1
  12. xinference/deploy/test/test_cmdline.py +56 -0
  13. xinference/isolation.py +24 -0
  14. xinference/model/audio/__init__.py +12 -0
  15. xinference/model/audio/core.py +37 -4
  16. xinference/model/audio/cosyvoice.py +39 -6
  17. xinference/model/audio/f5tts.py +200 -0
  18. xinference/model/audio/f5tts_mlx.py +260 -0
  19. xinference/model/audio/fish_speech.py +70 -110
  20. xinference/model/audio/melotts.py +110 -0
  21. xinference/model/audio/model_spec.json +179 -3
  22. xinference/model/audio/model_spec_modelscope.json +27 -0
  23. xinference/model/audio/utils.py +32 -0
  24. xinference/model/audio/whisper.py +35 -10
  25. xinference/model/audio/whisper_mlx.py +208 -0
  26. xinference/model/embedding/core.py +322 -6
  27. xinference/model/embedding/model_spec.json +8 -1
  28. xinference/model/embedding/model_spec_modelscope.json +9 -1
  29. xinference/model/image/core.py +69 -1
  30. xinference/model/image/model_spec.json +145 -4
  31. xinference/model/image/model_spec_modelscope.json +150 -4
  32. xinference/model/image/stable_diffusion/core.py +50 -15
  33. xinference/model/llm/__init__.py +6 -2
  34. xinference/model/llm/llm_family.json +1055 -93
  35. xinference/model/llm/llm_family.py +15 -36
  36. xinference/model/llm/llm_family_modelscope.json +1031 -78
  37. xinference/model/llm/memory.py +1 -1
  38. xinference/model/llm/mlx/core.py +285 -47
  39. xinference/model/llm/sglang/core.py +2 -0
  40. xinference/model/llm/transformers/chatglm.py +9 -5
  41. xinference/model/llm/transformers/cogagent.py +272 -0
  42. xinference/model/llm/transformers/core.py +3 -0
  43. xinference/model/llm/transformers/glm_edge_v.py +230 -0
  44. xinference/model/llm/transformers/qwen2_vl.py +12 -1
  45. xinference/model/llm/transformers/utils.py +16 -8
  46. xinference/model/llm/utils.py +55 -4
  47. xinference/model/llm/vllm/core.py +137 -12
  48. xinference/model/llm/vllm/xavier/__init__.py +13 -0
  49. xinference/model/llm/vllm/xavier/allocator.py +74 -0
  50. xinference/model/llm/vllm/xavier/block.py +111 -0
  51. xinference/model/llm/vllm/xavier/block_manager.py +71 -0
  52. xinference/model/llm/vllm/xavier/block_tracker.py +129 -0
  53. xinference/model/llm/vllm/xavier/collective.py +74 -0
  54. xinference/model/llm/vllm/xavier/collective_manager.py +147 -0
  55. xinference/model/llm/vllm/xavier/engine.py +247 -0
  56. xinference/model/llm/vllm/xavier/executor.py +134 -0
  57. xinference/model/llm/vllm/xavier/scheduler.py +438 -0
  58. xinference/model/llm/vllm/xavier/test/__init__.py +13 -0
  59. xinference/model/llm/vllm/xavier/test/test_xavier.py +147 -0
  60. xinference/model/llm/vllm/xavier/transfer.py +319 -0
  61. xinference/model/rerank/core.py +11 -4
  62. xinference/model/video/diffusers.py +14 -0
  63. xinference/model/video/model_spec.json +15 -0
  64. xinference/model/video/model_spec_modelscope.json +16 -0
  65. xinference/thirdparty/cosyvoice/bin/average_model.py +92 -0
  66. xinference/thirdparty/cosyvoice/bin/export_jit.py +12 -2
  67. xinference/thirdparty/cosyvoice/bin/export_onnx.py +112 -0
  68. xinference/thirdparty/cosyvoice/bin/export_trt.sh +9 -0
  69. xinference/thirdparty/cosyvoice/bin/inference.py +5 -7
  70. xinference/thirdparty/cosyvoice/bin/spk2info.pt +0 -0
  71. xinference/thirdparty/cosyvoice/bin/train.py +42 -8
  72. xinference/thirdparty/cosyvoice/cli/cosyvoice.py +96 -25
  73. xinference/thirdparty/cosyvoice/cli/frontend.py +77 -30
  74. xinference/thirdparty/cosyvoice/cli/model.py +330 -80
  75. xinference/thirdparty/cosyvoice/dataset/dataset.py +6 -2
  76. xinference/thirdparty/cosyvoice/dataset/processor.py +76 -14
  77. xinference/thirdparty/cosyvoice/flow/decoder.py +92 -13
  78. xinference/thirdparty/cosyvoice/flow/flow.py +99 -9
  79. xinference/thirdparty/cosyvoice/flow/flow_matching.py +110 -13
  80. xinference/thirdparty/cosyvoice/flow/length_regulator.py +5 -4
  81. xinference/thirdparty/cosyvoice/hifigan/discriminator.py +140 -0
  82. xinference/thirdparty/cosyvoice/hifigan/generator.py +58 -42
  83. xinference/thirdparty/cosyvoice/hifigan/hifigan.py +67 -0
  84. xinference/thirdparty/cosyvoice/llm/llm.py +139 -6
  85. xinference/thirdparty/cosyvoice/tokenizer/assets/multilingual_zh_ja_yue_char_del.tiktoken +58836 -0
  86. xinference/thirdparty/cosyvoice/tokenizer/tokenizer.py +279 -0
  87. xinference/thirdparty/cosyvoice/transformer/embedding.py +2 -2
  88. xinference/thirdparty/cosyvoice/transformer/encoder_layer.py +7 -7
  89. xinference/thirdparty/cosyvoice/transformer/upsample_encoder.py +318 -0
  90. xinference/thirdparty/cosyvoice/utils/common.py +28 -1
  91. xinference/thirdparty/cosyvoice/utils/executor.py +69 -7
  92. xinference/thirdparty/cosyvoice/utils/file_utils.py +2 -12
  93. xinference/thirdparty/cosyvoice/utils/frontend_utils.py +9 -5
  94. xinference/thirdparty/cosyvoice/utils/losses.py +20 -0
  95. xinference/thirdparty/cosyvoice/utils/scheduler.py +1 -2
  96. xinference/thirdparty/cosyvoice/utils/train_utils.py +101 -45
  97. xinference/thirdparty/f5_tts/api.py +166 -0
  98. xinference/thirdparty/f5_tts/configs/E2TTS_Base_train.yaml +44 -0
  99. xinference/thirdparty/f5_tts/configs/E2TTS_Small_train.yaml +44 -0
  100. xinference/thirdparty/f5_tts/configs/F5TTS_Base_train.yaml +46 -0
  101. xinference/thirdparty/f5_tts/configs/F5TTS_Small_train.yaml +46 -0
  102. xinference/thirdparty/f5_tts/eval/README.md +49 -0
  103. xinference/thirdparty/f5_tts/eval/ecapa_tdnn.py +330 -0
  104. xinference/thirdparty/f5_tts/eval/eval_infer_batch.py +207 -0
  105. xinference/thirdparty/f5_tts/eval/eval_infer_batch.sh +13 -0
  106. xinference/thirdparty/f5_tts/eval/eval_librispeech_test_clean.py +84 -0
  107. xinference/thirdparty/f5_tts/eval/eval_seedtts_testset.py +84 -0
  108. xinference/thirdparty/f5_tts/eval/utils_eval.py +405 -0
  109. xinference/thirdparty/f5_tts/infer/README.md +191 -0
  110. xinference/thirdparty/f5_tts/infer/SHARED.md +74 -0
  111. xinference/thirdparty/f5_tts/infer/examples/basic/basic.toml +11 -0
  112. xinference/thirdparty/f5_tts/infer/examples/basic/basic_ref_en.wav +0 -0
  113. xinference/thirdparty/f5_tts/infer/examples/basic/basic_ref_zh.wav +0 -0
  114. xinference/thirdparty/f5_tts/infer/examples/multi/country.flac +0 -0
  115. xinference/thirdparty/f5_tts/infer/examples/multi/main.flac +0 -0
  116. xinference/thirdparty/f5_tts/infer/examples/multi/story.toml +19 -0
  117. xinference/thirdparty/f5_tts/infer/examples/multi/story.txt +1 -0
  118. xinference/thirdparty/f5_tts/infer/examples/multi/town.flac +0 -0
  119. xinference/thirdparty/f5_tts/infer/examples/vocab.txt +2545 -0
  120. xinference/thirdparty/f5_tts/infer/infer_cli.py +226 -0
  121. xinference/thirdparty/f5_tts/infer/infer_gradio.py +851 -0
  122. xinference/thirdparty/f5_tts/infer/speech_edit.py +193 -0
  123. xinference/thirdparty/f5_tts/infer/utils_infer.py +538 -0
  124. xinference/thirdparty/f5_tts/model/__init__.py +10 -0
  125. xinference/thirdparty/f5_tts/model/backbones/README.md +20 -0
  126. xinference/thirdparty/f5_tts/model/backbones/dit.py +163 -0
  127. xinference/thirdparty/f5_tts/model/backbones/mmdit.py +146 -0
  128. xinference/thirdparty/f5_tts/model/backbones/unett.py +219 -0
  129. xinference/thirdparty/f5_tts/model/cfm.py +285 -0
  130. xinference/thirdparty/f5_tts/model/dataset.py +319 -0
  131. xinference/thirdparty/f5_tts/model/modules.py +658 -0
  132. xinference/thirdparty/f5_tts/model/trainer.py +366 -0
  133. xinference/thirdparty/f5_tts/model/utils.py +185 -0
  134. xinference/thirdparty/f5_tts/scripts/count_max_epoch.py +33 -0
  135. xinference/thirdparty/f5_tts/scripts/count_params_gflops.py +39 -0
  136. xinference/thirdparty/f5_tts/socket_server.py +159 -0
  137. xinference/thirdparty/f5_tts/train/README.md +77 -0
  138. xinference/thirdparty/f5_tts/train/datasets/prepare_csv_wavs.py +139 -0
  139. xinference/thirdparty/f5_tts/train/datasets/prepare_emilia.py +230 -0
  140. xinference/thirdparty/f5_tts/train/datasets/prepare_libritts.py +92 -0
  141. xinference/thirdparty/f5_tts/train/datasets/prepare_ljspeech.py +65 -0
  142. xinference/thirdparty/f5_tts/train/datasets/prepare_wenetspeech4tts.py +125 -0
  143. xinference/thirdparty/f5_tts/train/finetune_cli.py +174 -0
  144. xinference/thirdparty/f5_tts/train/finetune_gradio.py +1846 -0
  145. xinference/thirdparty/f5_tts/train/train.py +75 -0
  146. xinference/thirdparty/fish_speech/fish_speech/conversation.py +266 -1
  147. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/en_US.json +2 -1
  148. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/es_ES.json +2 -1
  149. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/ja_JP.json +2 -2
  150. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/ko_KR.json +123 -0
  151. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/zh_CN.json +2 -1
  152. xinference/thirdparty/fish_speech/fish_speech/models/text2semantic/llama.py +137 -29
  153. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/firefly.py +9 -9
  154. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/fsq.py +1 -1
  155. xinference/thirdparty/fish_speech/fish_speech/text/clean.py +17 -11
  156. xinference/thirdparty/fish_speech/fish_speech/text/spliter.py +1 -1
  157. xinference/thirdparty/fish_speech/fish_speech/tokenizer.py +152 -0
  158. xinference/thirdparty/fish_speech/fish_speech/train.py +2 -2
  159. xinference/thirdparty/fish_speech/fish_speech/utils/__init__.py +2 -1
  160. xinference/thirdparty/fish_speech/fish_speech/utils/utils.py +22 -0
  161. xinference/thirdparty/fish_speech/fish_speech/webui/launch_utils.py +1 -1
  162. xinference/thirdparty/fish_speech/fish_speech/webui/manage.py +2 -2
  163. xinference/thirdparty/fish_speech/tools/{post_api.py → api_client.py} +34 -18
  164. xinference/thirdparty/fish_speech/tools/api_server.py +98 -0
  165. xinference/thirdparty/fish_speech/tools/download_models.py +5 -5
  166. xinference/thirdparty/fish_speech/tools/e2e_webui.py +232 -0
  167. xinference/thirdparty/fish_speech/tools/fish_e2e.py +298 -0
  168. xinference/thirdparty/fish_speech/tools/inference_engine/__init__.py +192 -0
  169. xinference/thirdparty/fish_speech/tools/inference_engine/reference_loader.py +125 -0
  170. xinference/thirdparty/fish_speech/tools/inference_engine/utils.py +39 -0
  171. xinference/thirdparty/fish_speech/tools/inference_engine/vq_manager.py +57 -0
  172. xinference/thirdparty/fish_speech/tools/llama/eval_in_context.py +2 -2
  173. xinference/thirdparty/fish_speech/tools/llama/generate.py +484 -72
  174. xinference/thirdparty/fish_speech/tools/run_webui.py +104 -0
  175. xinference/thirdparty/fish_speech/tools/schema.py +170 -0
  176. xinference/thirdparty/fish_speech/tools/server/agent/__init__.py +57 -0
  177. xinference/thirdparty/fish_speech/tools/server/agent/generate.py +119 -0
  178. xinference/thirdparty/fish_speech/tools/server/agent/generation_utils.py +122 -0
  179. xinference/thirdparty/fish_speech/tools/server/agent/pre_generation_utils.py +72 -0
  180. xinference/thirdparty/fish_speech/tools/server/api_utils.py +75 -0
  181. xinference/thirdparty/fish_speech/tools/server/exception_handler.py +27 -0
  182. xinference/thirdparty/fish_speech/tools/server/inference.py +45 -0
  183. xinference/thirdparty/fish_speech/tools/server/model_manager.py +122 -0
  184. xinference/thirdparty/fish_speech/tools/server/model_utils.py +129 -0
  185. xinference/thirdparty/fish_speech/tools/server/views.py +246 -0
  186. xinference/thirdparty/fish_speech/tools/vqgan/extract_vq.py +7 -1
  187. xinference/thirdparty/fish_speech/tools/vqgan/inference.py +2 -3
  188. xinference/thirdparty/fish_speech/tools/webui/__init__.py +173 -0
  189. xinference/thirdparty/fish_speech/tools/webui/inference.py +91 -0
  190. xinference/thirdparty/fish_speech/tools/webui/variables.py +14 -0
  191. xinference/thirdparty/matcha/utils/utils.py +2 -2
  192. xinference/thirdparty/melo/api.py +135 -0
  193. xinference/thirdparty/melo/app.py +61 -0
  194. xinference/thirdparty/melo/attentions.py +459 -0
  195. xinference/thirdparty/melo/commons.py +160 -0
  196. xinference/thirdparty/melo/configs/config.json +94 -0
  197. xinference/thirdparty/melo/data/example/metadata.list +20 -0
  198. xinference/thirdparty/melo/data_utils.py +413 -0
  199. xinference/thirdparty/melo/download_utils.py +67 -0
  200. xinference/thirdparty/melo/infer.py +25 -0
  201. xinference/thirdparty/melo/init_downloads.py +14 -0
  202. xinference/thirdparty/melo/losses.py +58 -0
  203. xinference/thirdparty/melo/main.py +36 -0
  204. xinference/thirdparty/melo/mel_processing.py +174 -0
  205. xinference/thirdparty/melo/models.py +1030 -0
  206. xinference/thirdparty/melo/modules.py +598 -0
  207. xinference/thirdparty/melo/monotonic_align/__init__.py +16 -0
  208. xinference/thirdparty/melo/monotonic_align/core.py +46 -0
  209. xinference/thirdparty/melo/preprocess_text.py +135 -0
  210. xinference/thirdparty/melo/split_utils.py +174 -0
  211. xinference/thirdparty/melo/text/__init__.py +35 -0
  212. xinference/thirdparty/melo/text/chinese.py +199 -0
  213. xinference/thirdparty/melo/text/chinese_bert.py +107 -0
  214. xinference/thirdparty/melo/text/chinese_mix.py +253 -0
  215. xinference/thirdparty/melo/text/cleaner.py +36 -0
  216. xinference/thirdparty/melo/text/cleaner_multiling.py +110 -0
  217. xinference/thirdparty/melo/text/cmudict.rep +129530 -0
  218. xinference/thirdparty/melo/text/cmudict_cache.pickle +0 -0
  219. xinference/thirdparty/melo/text/english.py +284 -0
  220. xinference/thirdparty/melo/text/english_bert.py +39 -0
  221. xinference/thirdparty/melo/text/english_utils/abbreviations.py +35 -0
  222. xinference/thirdparty/melo/text/english_utils/number_norm.py +97 -0
  223. xinference/thirdparty/melo/text/english_utils/time_norm.py +47 -0
  224. xinference/thirdparty/melo/text/es_phonemizer/base.py +140 -0
  225. xinference/thirdparty/melo/text/es_phonemizer/cleaner.py +109 -0
  226. xinference/thirdparty/melo/text/es_phonemizer/es_symbols.json +79 -0
  227. xinference/thirdparty/melo/text/es_phonemizer/es_symbols.txt +1 -0
  228. xinference/thirdparty/melo/text/es_phonemizer/es_symbols_v2.json +83 -0
  229. xinference/thirdparty/melo/text/es_phonemizer/es_to_ipa.py +12 -0
  230. xinference/thirdparty/melo/text/es_phonemizer/example_ipa.txt +400 -0
  231. xinference/thirdparty/melo/text/es_phonemizer/gruut_wrapper.py +253 -0
  232. xinference/thirdparty/melo/text/es_phonemizer/punctuation.py +174 -0
  233. xinference/thirdparty/melo/text/es_phonemizer/spanish_symbols.txt +1 -0
  234. xinference/thirdparty/melo/text/es_phonemizer/test.ipynb +124 -0
  235. xinference/thirdparty/melo/text/fr_phonemizer/base.py +140 -0
  236. xinference/thirdparty/melo/text/fr_phonemizer/cleaner.py +122 -0
  237. xinference/thirdparty/melo/text/fr_phonemizer/en_symbols.json +78 -0
  238. xinference/thirdparty/melo/text/fr_phonemizer/example_ipa.txt +1 -0
  239. xinference/thirdparty/melo/text/fr_phonemizer/fr_symbols.json +89 -0
  240. xinference/thirdparty/melo/text/fr_phonemizer/fr_to_ipa.py +30 -0
  241. xinference/thirdparty/melo/text/fr_phonemizer/french_abbreviations.py +48 -0
  242. xinference/thirdparty/melo/text/fr_phonemizer/french_symbols.txt +1 -0
  243. xinference/thirdparty/melo/text/fr_phonemizer/gruut_wrapper.py +258 -0
  244. xinference/thirdparty/melo/text/fr_phonemizer/punctuation.py +172 -0
  245. xinference/thirdparty/melo/text/french.py +94 -0
  246. xinference/thirdparty/melo/text/french_bert.py +39 -0
  247. xinference/thirdparty/melo/text/japanese.py +647 -0
  248. xinference/thirdparty/melo/text/japanese_bert.py +49 -0
  249. xinference/thirdparty/melo/text/ko_dictionary.py +44 -0
  250. xinference/thirdparty/melo/text/korean.py +192 -0
  251. xinference/thirdparty/melo/text/opencpop-strict.txt +429 -0
  252. xinference/thirdparty/melo/text/spanish.py +122 -0
  253. xinference/thirdparty/melo/text/spanish_bert.py +39 -0
  254. xinference/thirdparty/melo/text/symbols.py +290 -0
  255. xinference/thirdparty/melo/text/tone_sandhi.py +769 -0
  256. xinference/thirdparty/melo/train.py +635 -0
  257. xinference/thirdparty/melo/train.sh +19 -0
  258. xinference/thirdparty/melo/transforms.py +209 -0
  259. xinference/thirdparty/melo/utils.py +424 -0
  260. xinference/types.py +17 -1
  261. xinference/web/ui/build/asset-manifest.json +6 -6
  262. xinference/web/ui/build/index.html +1 -1
  263. xinference/web/ui/build/static/css/main.51a587ff.css +2 -0
  264. xinference/web/ui/build/static/css/main.51a587ff.css.map +1 -0
  265. xinference/web/ui/build/static/js/main.b0936c54.js +3 -0
  266. xinference/web/ui/build/static/js/main.b0936c54.js.map +1 -0
  267. xinference/web/ui/node_modules/.cache/babel-loader/03c4052f1b91f6ba0c5389bdcf49c43319b4076c08e4b8585dab312538ae290a.json +1 -0
  268. xinference/web/ui/node_modules/.cache/babel-loader/1786b83003b8e9605a0f5f855a185d4d16e38fc893dfb326a2a9cca206b4240a.json +1 -0
  269. xinference/web/ui/node_modules/.cache/babel-loader/17cbc181dd674b9150b80c73ed6a82656de0082d857f6e5f66d9716129ac0b38.json +1 -0
  270. xinference/web/ui/node_modules/.cache/babel-loader/185ceb8872d562e032b47e79df6a45670e06345b8ed70aad1a131e0476783c5c.json +1 -0
  271. xinference/web/ui/node_modules/.cache/babel-loader/26b8c9f34b0bed789b3a833767672e39302d1e0c09b4276f4d58d1df7b6bd93b.json +1 -0
  272. xinference/web/ui/node_modules/.cache/babel-loader/2b484da66c724d0d56a40849c109327408796a668b1381511b6e9e03baa48658.json +1 -0
  273. xinference/web/ui/node_modules/.cache/babel-loader/2cbbbce9b84df73330d4c42b82436ed881b3847628f2fbc346aa62e2859fd88c.json +1 -0
  274. xinference/web/ui/node_modules/.cache/babel-loader/2ec9b14431ed33ce6901bf9f27007be4e6e472709c99d6e22b50ce528e4b78ee.json +1 -0
  275. xinference/web/ui/node_modules/.cache/babel-loader/3b966db018f96be4a055d6ca205f0990d4d0b370e2980c17d8bca2c9a021819c.json +1 -0
  276. xinference/web/ui/node_modules/.cache/babel-loader/3eefb411b24c2b3ce053570ef50daccf154022f0e168be5ed0fec21394baf9f4.json +1 -0
  277. xinference/web/ui/node_modules/.cache/babel-loader/522b229e3cac219123f0d69673f5570e191c2d2a505dc65b312d336eae2279c0.json +1 -0
  278. xinference/web/ui/node_modules/.cache/babel-loader/52e45f17ba300580ea3fcc9f9228ccba194bb092b76f25e9255af311f8b05aab.json +1 -0
  279. xinference/web/ui/node_modules/.cache/babel-loader/5a0bc4631f936459afc1a3b1d3ec2420118b1f00e11f60ccac3e08088f3f27a8.json +1 -0
  280. xinference/web/ui/node_modules/.cache/babel-loader/611fa2c6c53b66039991d06dfb0473b5ab37fc63b4564e0f6e1718523768a045.json +1 -0
  281. xinference/web/ui/node_modules/.cache/babel-loader/6329bc76c406fe5eb305412383fbde5950f847bb5e43261f73f37622c365acb4.json +1 -0
  282. xinference/web/ui/node_modules/.cache/babel-loader/63c8e07687ea53a4f8a910ee5e42e0eb26cd1acbfbe820f3e3248a786ee51401.json +1 -0
  283. xinference/web/ui/node_modules/.cache/babel-loader/69b2d5001684174ec9da57e07914eed3eac4960018bceb6cbfa801d861301d7c.json +1 -0
  284. xinference/web/ui/node_modules/.cache/babel-loader/710c1acda69e561e30a933b98c6a56d50197868b15c21e2aad55ab6d46649eb6.json +1 -0
  285. xinference/web/ui/node_modules/.cache/babel-loader/720deca1fce5a1dc5056048fa8258fd138a82ea855f350b6613f104a73fb761f.json +1 -0
  286. xinference/web/ui/node_modules/.cache/babel-loader/76a23b92d26a499c57e61eea2b895fbc9771bd0849a72e66f8e633192017978b.json +1 -0
  287. xinference/web/ui/node_modules/.cache/babel-loader/858063f23b34dfe600254eb5afd85518b0002ec4b30b7386616c45600826e3b2.json +1 -0
  288. xinference/web/ui/node_modules/.cache/babel-loader/920b82c1c89124cf217109eeedbfcd3aae3b917be50c9dfb6bbb4ce26bdfd2e7.json +1 -0
  289. xinference/web/ui/node_modules/.cache/babel-loader/94d8b7aeb0076f2ce07db598cea0e87b13bc8d5614eb530b8d6e696c2daf6f88.json +1 -0
  290. xinference/web/ui/node_modules/.cache/babel-loader/9e917fe7022d01b2ccbe5cc0ce73d70bb72bee584ff293bad71bdff6695dee28.json +1 -0
  291. xinference/web/ui/node_modules/.cache/babel-loader/9f28fdb8399f1d0474f0aca86f1658dc94f5bf0c90f6146352de150692de8862.json +1 -0
  292. xinference/web/ui/node_modules/.cache/babel-loader/a0dfafa06b2bb7cba8cad41c482503f61944f759f4318139362602ef5cc47ccb.json +1 -0
  293. xinference/web/ui/node_modules/.cache/babel-loader/a3ff866acddf34917a7ee399e0e571a4dfd8ba66d5057db885f243e16a6eb17d.json +1 -0
  294. xinference/web/ui/node_modules/.cache/babel-loader/afb8084f539534cd594755ea2205ecd5bd1f62dddcfdf75a2eace59a28131278.json +1 -0
  295. xinference/web/ui/node_modules/.cache/babel-loader/b57b1438b77294c1f3f6cfce12ac487d8106c6f016975ba0aec94d98997e2e1e.json +1 -0
  296. xinference/web/ui/node_modules/.cache/babel-loader/b9917b0bf8e4d55ccbac1c334aa04d6ff3c5b6ed9e5d38b9ea2c687fa7d3f5a9.json +1 -0
  297. xinference/web/ui/node_modules/.cache/babel-loader/bbcc94b0149963d1d6f267ee1f4f03d3925b758392ce2f516c3fe8af0e0169fc.json +1 -0
  298. xinference/web/ui/node_modules/.cache/babel-loader/bdee44abeadc4abc17d41c52eb49c6e19a4b1a267b6e16876ce91bdeeebfc52d.json +1 -0
  299. xinference/web/ui/node_modules/.cache/babel-loader/beb112b70f4a56db95920a9e20efb6c97c37b68450716730217a9ee1a9ae92be.json +1 -0
  300. xinference/web/ui/node_modules/.cache/babel-loader/c88db97be0cdf440193b3995996e83510a04cb00048135485fc0e26d197e80b5.json +1 -0
  301. xinference/web/ui/node_modules/.cache/babel-loader/d49e5314d34310a62d01a03067ce1bec5da00abce84c5196aa9c6842fa79a430.json +1 -0
  302. xinference/web/ui/node_modules/.cache/babel-loader/d7664d18c4ddbad9c3a6a31b91f7c00fb0dde804608674a9860ee50f33e54708.json +1 -0
  303. xinference/web/ui/node_modules/.cache/babel-loader/d9072c318b819b7c90a0f7e9cc0b6413b4dbeb8e9859898e53d75ea882fcde99.json +1 -0
  304. xinference/web/ui/node_modules/.cache/babel-loader/db16a983bc08a05f0439cc61ca0840e49e1d8400eef678909f16c032a418a3d6.json +1 -0
  305. xinference/web/ui/node_modules/.cache/babel-loader/dc249829767b8abcbc3677e0b07b6d3ecbfdfe6d08cfe23a665eb33373a9aa9d.json +1 -0
  306. xinference/web/ui/node_modules/.cache/babel-loader/e242c583c2dbc2784f0fcf513523975f7d5df447e106c1c17e49e8578a6fc3ed.json +1 -0
  307. xinference/web/ui/node_modules/.cache/babel-loader/eac5f1296513e69e4b96f750ddccd4d0264e2bae4e4c449144e83274a48698d9.json +1 -0
  308. xinference/web/ui/node_modules/.cache/babel-loader/ed57202cb79649bb716400436590245547df241988fc7c8e1d85d132299542d2.json +1 -0
  309. xinference/web/ui/node_modules/.cache/babel-loader/f125bf72e773a14cdaebd0c343e80adb909d12e317ee5c00cd4a57442fbe2c62.json +1 -0
  310. xinference/web/ui/node_modules/.cache/babel-loader/f91af913d7f91c410719ab13136aaed3aaf0f8dda06652f25c42cb5231587398.json +1 -0
  311. xinference/web/ui/node_modules/.package-lock.json +67 -3
  312. xinference/web/ui/node_modules/@babel/runtime/package.json +592 -538
  313. xinference/web/ui/node_modules/html-parse-stringify/package.json +50 -0
  314. xinference/web/ui/node_modules/i18next/dist/esm/package.json +1 -0
  315. xinference/web/ui/node_modules/i18next/package.json +129 -0
  316. xinference/web/ui/node_modules/react-i18next/.eslintrc.json +74 -0
  317. xinference/web/ui/node_modules/react-i18next/dist/es/package.json +1 -0
  318. xinference/web/ui/node_modules/react-i18next/package.json +162 -0
  319. xinference/web/ui/node_modules/void-elements/package.json +34 -0
  320. xinference/web/ui/package-lock.json +69 -3
  321. xinference/web/ui/package.json +2 -0
  322. xinference/web/ui/src/locales/en.json +186 -0
  323. xinference/web/ui/src/locales/zh.json +186 -0
  324. {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/METADATA +96 -36
  325. {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/RECORD +335 -146
  326. {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/WHEEL +1 -1
  327. xinference/thirdparty/cosyvoice/bin/export_trt.py +0 -8
  328. xinference/thirdparty/fish_speech/fish_speech/configs/lora/__init__.py +0 -0
  329. xinference/thirdparty/fish_speech/fish_speech/datasets/__init__.py +0 -0
  330. xinference/thirdparty/fish_speech/fish_speech/datasets/protos/__init__.py +0 -0
  331. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/__init__.py +0 -0
  332. xinference/thirdparty/fish_speech/fish_speech/models/__init__.py +0 -0
  333. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/__init__.py +0 -0
  334. xinference/thirdparty/fish_speech/fish_speech/webui/__init__.py +0 -0
  335. xinference/thirdparty/fish_speech/tools/__init__.py +0 -0
  336. xinference/thirdparty/fish_speech/tools/api.py +0 -440
  337. xinference/thirdparty/fish_speech/tools/commons.py +0 -35
  338. xinference/thirdparty/fish_speech/tools/llama/__init__.py +0 -0
  339. xinference/thirdparty/fish_speech/tools/msgpack_api.py +0 -34
  340. xinference/thirdparty/fish_speech/tools/vqgan/__init__.py +0 -0
  341. xinference/thirdparty/fish_speech/tools/webui.py +0 -485
  342. xinference/web/ui/build/static/css/main.5061c4c3.css +0 -2
  343. xinference/web/ui/build/static/css/main.5061c4c3.css.map +0 -1
  344. xinference/web/ui/build/static/js/main.2f269bb3.js +0 -3
  345. xinference/web/ui/build/static/js/main.2f269bb3.js.map +0 -1
  346. xinference/web/ui/node_modules/.cache/babel-loader/07ce9e632e6aff24d7aa3ad8e48224433bbfeb0d633fca723453f1fcae0c9f1c.json +0 -1
  347. xinference/web/ui/node_modules/.cache/babel-loader/1130403f9e46f5738a23b45ac59b57de8f360c908c713e2c0670c2cce9bd367a.json +0 -1
  348. xinference/web/ui/node_modules/.cache/babel-loader/131091b25d26b17cdca187d7542a21475c211138d900cf667682260e76ef9463.json +0 -1
  349. xinference/web/ui/node_modules/.cache/babel-loader/1f269fb2a368363c1cb2237825f1dba093b6bdd8c44cc05954fd19ec2c1fff03.json +0 -1
  350. xinference/web/ui/node_modules/.cache/babel-loader/331312668fa8bd3d7401818f4a25fa98135d7f61371cd6bfff78b18cf4fbdd92.json +0 -1
  351. xinference/web/ui/node_modules/.cache/babel-loader/40f17338fc75ae095de7d2b4d8eae0d5ca0193a7e2bcece4ee745b22a7a2f4b7.json +0 -1
  352. xinference/web/ui/node_modules/.cache/babel-loader/4de9a6942c5f1749d6cbfdd54279699975f16016b182848bc253886f52ec2ec3.json +0 -1
  353. xinference/web/ui/node_modules/.cache/babel-loader/822586ed1077201b64b954f12f25e3f9b45678c1acbabe53d8af3ca82ca71f33.json +0 -1
  354. xinference/web/ui/node_modules/.cache/babel-loader/8d33354bd2100c8602afc3341f131a88cc36aaeecd5a4b365ed038514708e350.json +0 -1
  355. xinference/web/ui/node_modules/.cache/babel-loader/9375a35b05d56989b2755bf72161fa707c92f28569d33765a75f91a568fda6e9.json +0 -1
  356. xinference/web/ui/node_modules/.cache/babel-loader/a158a9ffa0c9b169aee53dd4a0c44501a596755b4e4f6ede7746d65a72e2a71f.json +0 -1
  357. xinference/web/ui/node_modules/.cache/babel-loader/bd6ad8159341315a1764c397621a560809f7eb7219ab5174c801fca7e969d943.json +0 -1
  358. xinference/web/ui/node_modules/.cache/babel-loader/c7bf40bab396765f67d0fed627ed3665890608b2d0edaa3e8cb7cfc96310db45.json +0 -1
  359. xinference/web/ui/node_modules/.cache/babel-loader/d6c643278a0b28320e6f33a60f5fb64c053997cbdc39a60e53ccc574688ade9e.json +0 -1
  360. xinference/web/ui/node_modules/.cache/babel-loader/e42b72d4cc1ea412ebecbb8d040dc6c6bfee462c33903c2f1f3facb602ad742e.json +0 -1
  361. xinference/web/ui/node_modules/.cache/babel-loader/e64b7e8cedcf43d4c95deba60ec1341855c887705805bb62431693118b870c69.json +0 -1
  362. xinference/web/ui/node_modules/.cache/babel-loader/f5039ddbeb815c51491a1989532006b96fc3ae49c6c60e3c097f875b4ae915ae.json +0 -1
  363. xinference/web/ui/node_modules/.cache/babel-loader/f72f011744c4649fabddca6f7a9327861ac0a315a89b1a2e62a39774e7863845.json +0 -1
  364. xinference/web/ui/node_modules/.cache/babel-loader/feabb04b4aa507102da0a64398a40818e878fd1df9b75dda8461b3e1e7ff3f11.json +0 -1
  365. /xinference/thirdparty/{cosyvoice/bin → f5_tts}/__init__.py +0 -0
  366. /xinference/thirdparty/{cosyvoice/flow → melo}/__init__.py +0 -0
  367. /xinference/thirdparty/{cosyvoice/hifigan → melo/text/english_utils}/__init__.py +0 -0
  368. /xinference/thirdparty/{cosyvoice/llm → melo/text/es_phonemizer}/__init__.py +0 -0
  369. /xinference/thirdparty/{fish_speech/fish_speech/configs → melo/text/fr_phonemizer}/__init__.py +0 -0
  370. /xinference/web/ui/build/static/js/{main.2f269bb3.js.LICENSE.txt → main.b0936c54.js.LICENSE.txt} +0 -0
  371. {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/LICENSE +0 -0
  372. {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/entry_points.txt +0 -0
  373. {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,39 @@
1
+ import torch
2
+ from transformers import AutoTokenizer, AutoModelForMaskedLM
3
+ import sys
4
+
5
+ model_id = 'dbmdz/bert-base-french-europeana-cased'
6
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
7
+ model = None
8
+
9
+ def get_bert_feature(text, word2ph, device=None):
10
+ global model
11
+ if (
12
+ sys.platform == "darwin"
13
+ and torch.backends.mps.is_available()
14
+ and device == "cpu"
15
+ ):
16
+ device = "mps"
17
+ if not device:
18
+ device = "cuda"
19
+ if model is None:
20
+ model = AutoModelForMaskedLM.from_pretrained(model_id).to(
21
+ device
22
+ )
23
+ with torch.no_grad():
24
+ inputs = tokenizer(text, return_tensors="pt")
25
+ for i in inputs:
26
+ inputs[i] = inputs[i].to(device)
27
+ res = model(**inputs, output_hidden_states=True)
28
+ res = torch.cat(res["hidden_states"][-3:-2], -1)[0].cpu()
29
+
30
+ assert inputs["input_ids"].shape[-1] == len(word2ph)
31
+ word2phone = word2ph
32
+ phone_level_feature = []
33
+ for i in range(len(word2phone)):
34
+ repeat_feature = res[i].repeat(word2phone[i], 1)
35
+ phone_level_feature.append(repeat_feature)
36
+
37
+ phone_level_feature = torch.cat(phone_level_feature, dim=0)
38
+
39
+ return phone_level_feature.T
@@ -0,0 +1,647 @@
1
+ # Convert Japanese text to phonemes which is
2
+ # compatible with Julius https://github.com/julius-speech/segmentation-kit
3
+ import re
4
+ import unicodedata
5
+
6
+ from transformers import AutoTokenizer
7
+
8
+ from . import symbols
9
+ punctuation = ["!", "?", "…", ",", ".", "'", "-"]
10
+
11
+ try:
12
+ import MeCab
13
+ except ImportError as e:
14
+ raise ImportError("Japanese requires mecab-python3 and unidic-lite.") from e
15
+ from num2words import num2words
16
+
17
+ _CONVRULES = [
18
+ # Conversion of 2 letters
19
+ "アァ/ a a",
20
+ "イィ/ i i",
21
+ "イェ/ i e",
22
+ "イャ/ y a",
23
+ "ウゥ/ u:",
24
+ "エェ/ e e",
25
+ "オォ/ o:",
26
+ "カァ/ k a:",
27
+ "キィ/ k i:",
28
+ "クゥ/ k u:",
29
+ "クャ/ ky a",
30
+ "クュ/ ky u",
31
+ "クョ/ ky o",
32
+ "ケェ/ k e:",
33
+ "コォ/ k o:",
34
+ "ガァ/ g a:",
35
+ "ギィ/ g i:",
36
+ "グゥ/ g u:",
37
+ "グャ/ gy a",
38
+ "グュ/ gy u",
39
+ "グョ/ gy o",
40
+ "ゲェ/ g e:",
41
+ "ゴォ/ g o:",
42
+ "サァ/ s a:",
43
+ "シィ/ sh i:",
44
+ "スゥ/ s u:",
45
+ "スャ/ sh a",
46
+ "スュ/ sh u",
47
+ "スョ/ sh o",
48
+ "セェ/ s e:",
49
+ "ソォ/ s o:",
50
+ "ザァ/ z a:",
51
+ "ジィ/ j i:",
52
+ "ズゥ/ z u:",
53
+ "ズャ/ zy a",
54
+ "ズュ/ zy u",
55
+ "ズョ/ zy o",
56
+ "ゼェ/ z e:",
57
+ "ゾォ/ z o:",
58
+ "タァ/ t a:",
59
+ "チィ/ ch i:",
60
+ "ツァ/ ts a",
61
+ "ツィ/ ts i",
62
+ "ツゥ/ ts u:",
63
+ "ツャ/ ch a",
64
+ "ツュ/ ch u",
65
+ "ツョ/ ch o",
66
+ "ツェ/ ts e",
67
+ "ツォ/ ts o",
68
+ "テェ/ t e:",
69
+ "トォ/ t o:",
70
+ "ダァ/ d a:",
71
+ "ヂィ/ j i:",
72
+ "ヅゥ/ d u:",
73
+ "ヅャ/ zy a",
74
+ "ヅュ/ zy u",
75
+ "ヅョ/ zy o",
76
+ "デェ/ d e:",
77
+ "ドォ/ d o:",
78
+ "ナァ/ n a:",
79
+ "ニィ/ n i:",
80
+ "ヌゥ/ n u:",
81
+ "ヌャ/ ny a",
82
+ "ヌュ/ ny u",
83
+ "ヌョ/ ny o",
84
+ "ネェ/ n e:",
85
+ "ノォ/ n o:",
86
+ "ハァ/ h a:",
87
+ "ヒィ/ h i:",
88
+ "フゥ/ f u:",
89
+ "フャ/ hy a",
90
+ "フュ/ hy u",
91
+ "フョ/ hy o",
92
+ "ヘェ/ h e:",
93
+ "ホォ/ h o:",
94
+ "バァ/ b a:",
95
+ "ビィ/ b i:",
96
+ "ブゥ/ b u:",
97
+ "フャ/ hy a",
98
+ "ブュ/ by u",
99
+ "フョ/ hy o",
100
+ "ベェ/ b e:",
101
+ "ボォ/ b o:",
102
+ "パァ/ p a:",
103
+ "ピィ/ p i:",
104
+ "プゥ/ p u:",
105
+ "プャ/ py a",
106
+ "プュ/ py u",
107
+ "プョ/ py o",
108
+ "ペェ/ p e:",
109
+ "ポォ/ p o:",
110
+ "マァ/ m a:",
111
+ "ミィ/ m i:",
112
+ "ムゥ/ m u:",
113
+ "ムャ/ my a",
114
+ "ムュ/ my u",
115
+ "ムョ/ my o",
116
+ "メェ/ m e:",
117
+ "モォ/ m o:",
118
+ "ヤァ/ y a:",
119
+ "ユゥ/ y u:",
120
+ "ユャ/ y a:",
121
+ "ユュ/ y u:",
122
+ "ユョ/ y o:",
123
+ "ヨォ/ y o:",
124
+ "ラァ/ r a:",
125
+ "リィ/ r i:",
126
+ "ルゥ/ r u:",
127
+ "ルャ/ ry a",
128
+ "ルュ/ ry u",
129
+ "ルョ/ ry o",
130
+ "レェ/ r e:",
131
+ "ロォ/ r o:",
132
+ "ワァ/ w a:",
133
+ "ヲォ/ o:",
134
+ "ディ/ d i",
135
+ "デェ/ d e:",
136
+ "デャ/ dy a",
137
+ "デュ/ dy u",
138
+ "デョ/ dy o",
139
+ "ティ/ t i",
140
+ "テェ/ t e:",
141
+ "テャ/ ty a",
142
+ "テュ/ ty u",
143
+ "テョ/ ty o",
144
+ "スィ/ s i",
145
+ "ズァ/ z u a",
146
+ "ズィ/ z i",
147
+ "ズゥ/ z u",
148
+ "ズャ/ zy a",
149
+ "ズュ/ zy u",
150
+ "ズョ/ zy o",
151
+ "ズェ/ z e",
152
+ "ズォ/ z o",
153
+ "キャ/ ky a",
154
+ "キュ/ ky u",
155
+ "キョ/ ky o",
156
+ "シャ/ sh a",
157
+ "シュ/ sh u",
158
+ "シェ/ sh e",
159
+ "ショ/ sh o",
160
+ "チャ/ ch a",
161
+ "チュ/ ch u",
162
+ "チェ/ ch e",
163
+ "チョ/ ch o",
164
+ "トゥ/ t u",
165
+ "トャ/ ty a",
166
+ "トュ/ ty u",
167
+ "トョ/ ty o",
168
+ "ドァ/ d o a",
169
+ "ドゥ/ d u",
170
+ "ドャ/ dy a",
171
+ "ドュ/ dy u",
172
+ "ドョ/ dy o",
173
+ "ドォ/ d o:",
174
+ "ニャ/ ny a",
175
+ "ニュ/ ny u",
176
+ "ニョ/ ny o",
177
+ "ヒャ/ hy a",
178
+ "ヒュ/ hy u",
179
+ "ヒョ/ hy o",
180
+ "ミャ/ my a",
181
+ "ミュ/ my u",
182
+ "ミョ/ my o",
183
+ "リャ/ ry a",
184
+ "リュ/ ry u",
185
+ "リョ/ ry o",
186
+ "ギャ/ gy a",
187
+ "ギュ/ gy u",
188
+ "ギョ/ gy o",
189
+ "ヂェ/ j e",
190
+ "ヂャ/ j a",
191
+ "ヂュ/ j u",
192
+ "ヂョ/ j o",
193
+ "ジェ/ j e",
194
+ "ジャ/ j a",
195
+ "ジュ/ j u",
196
+ "ジョ/ j o",
197
+ "ビャ/ by a",
198
+ "ビュ/ by u",
199
+ "ビョ/ by o",
200
+ "ピャ/ py a",
201
+ "ピュ/ py u",
202
+ "ピョ/ py o",
203
+ "ウァ/ u a",
204
+ "ウィ/ w i",
205
+ "ウェ/ w e",
206
+ "ウォ/ w o",
207
+ "ファ/ f a",
208
+ "フィ/ f i",
209
+ "フゥ/ f u",
210
+ "フャ/ hy a",
211
+ "フュ/ hy u",
212
+ "フョ/ hy o",
213
+ "フェ/ f e",
214
+ "フォ/ f o",
215
+ "ヴァ/ b a",
216
+ "ヴィ/ b i",
217
+ "ヴェ/ b e",
218
+ "ヴォ/ b o",
219
+ "ヴュ/ by u",
220
+ # Conversion of 1 letter
221
+ "ア/ a",
222
+ "イ/ i",
223
+ "ウ/ u",
224
+ "エ/ e",
225
+ "オ/ o",
226
+ "カ/ k a",
227
+ "キ/ k i",
228
+ "ク/ k u",
229
+ "ケ/ k e",
230
+ "コ/ k o",
231
+ "サ/ s a",
232
+ "シ/ sh i",
233
+ "ス/ s u",
234
+ "セ/ s e",
235
+ "ソ/ s o",
236
+ "タ/ t a",
237
+ "チ/ ch i",
238
+ "ツ/ ts u",
239
+ "テ/ t e",
240
+ "ト/ t o",
241
+ "ナ/ n a",
242
+ "ニ/ n i",
243
+ "ヌ/ n u",
244
+ "ネ/ n e",
245
+ "ノ/ n o",
246
+ "ハ/ h a",
247
+ "ヒ/ h i",
248
+ "フ/ f u",
249
+ "ヘ/ h e",
250
+ "ホ/ h o",
251
+ "マ/ m a",
252
+ "ミ/ m i",
253
+ "ム/ m u",
254
+ "メ/ m e",
255
+ "モ/ m o",
256
+ "ラ/ r a",
257
+ "リ/ r i",
258
+ "ル/ r u",
259
+ "レ/ r e",
260
+ "ロ/ r o",
261
+ "ガ/ g a",
262
+ "ギ/ g i",
263
+ "グ/ g u",
264
+ "ゲ/ g e",
265
+ "ゴ/ g o",
266
+ "ザ/ z a",
267
+ "ジ/ j i",
268
+ "ズ/ z u",
269
+ "ゼ/ z e",
270
+ "ゾ/ z o",
271
+ "ダ/ d a",
272
+ "ヂ/ j i",
273
+ "ヅ/ z u",
274
+ "デ/ d e",
275
+ "ド/ d o",
276
+ "バ/ b a",
277
+ "ビ/ b i",
278
+ "ブ/ b u",
279
+ "ベ/ b e",
280
+ "ボ/ b o",
281
+ "パ/ p a",
282
+ "ピ/ p i",
283
+ "プ/ p u",
284
+ "ペ/ p e",
285
+ "ポ/ p o",
286
+ "ヤ/ y a",
287
+ "ユ/ y u",
288
+ "ヨ/ y o",
289
+ "ワ/ w a",
290
+ "ヰ/ i",
291
+ "ヱ/ e",
292
+ "ヲ/ o",
293
+ "ン/ N",
294
+ "ッ/ q",
295
+ "ヴ/ b u",
296
+ "ー/:",
297
+ # Try converting broken text
298
+ "ァ/ a",
299
+ "ィ/ i",
300
+ "ゥ/ u",
301
+ "ェ/ e",
302
+ "ォ/ o",
303
+ "ヮ/ w a",
304
+ "ォ/ o",
305
+ # Try converting broken text
306
+ "ャ/ y a",
307
+ "ョ/ y o",
308
+ "ュ/ y u",
309
+ "琦/ ch i",
310
+ "ヶ/ k e",
311
+ "髙/ t a k a",
312
+ "煞/ sh y a",
313
+ # Symbols
314
+ "、/ ,",
315
+ "。/ .",
316
+ "!/ !",
317
+ "?/ ?",
318
+ "・/ ,",
319
+ ]
320
+
321
+ _COLON_RX = re.compile(":+")
322
+ _REJECT_RX = re.compile("[^ a-zA-Z:,.?]")
323
+
324
+
325
+ def _makerulemap():
326
+ l = [tuple(x.split("/")) for x in _CONVRULES]
327
+ return tuple({k: v for k, v in l if len(k) == i} for i in (1, 2))
328
+
329
+
330
+ _RULEMAP1, _RULEMAP2 = _makerulemap()
331
+
332
+
333
+ def kata2phoneme(text: str) -> str:
334
+ """Convert katakana text to phonemes."""
335
+ text = text.strip()
336
+ res = []
337
+ while text:
338
+ if len(text) >= 2:
339
+ x = _RULEMAP2.get(text[:2])
340
+ if x is not None:
341
+ text = text[2:]
342
+ res += x.split(" ")[1:]
343
+ continue
344
+ x = _RULEMAP1.get(text[0])
345
+ if x is not None:
346
+ text = text[1:]
347
+ res += x.split(" ")[1:]
348
+ continue
349
+ res.append(text[0])
350
+ text = text[1:]
351
+ # res = _COLON_RX.sub(":", res)
352
+ return res
353
+
354
+
355
+ _KATAKANA = "".join(chr(ch) for ch in range(ord("ァ"), ord("ン") + 1))
356
+ _HIRAGANA = "".join(chr(ch) for ch in range(ord("ぁ"), ord("ん") + 1))
357
+ _HIRA2KATATRANS = str.maketrans(_HIRAGANA, _KATAKANA)
358
+
359
+
360
+ def hira2kata(text: str) -> str:
361
+ text = text.translate(_HIRA2KATATRANS)
362
+ return text.replace("う゛", "ヴ")
363
+
364
+
365
+ _SYMBOL_TOKENS = set(list("・、。?!"))
366
+ _NO_YOMI_TOKENS = set(list("「」『』―()[][]"))
367
+ _TAGGER = MeCab.Tagger()
368
+
369
+
370
+ def text2kata(text: str) -> str:
371
+ parsed = _TAGGER.parse(text)
372
+ res = []
373
+ for line in parsed.split("\n"):
374
+ if line == "EOS":
375
+ break
376
+ parts = line.split("\t")
377
+
378
+ word, yomi = parts[0], parts[1]
379
+ if yomi:
380
+ try:
381
+ res.append(yomi.split(',')[6])
382
+ except:
383
+ import pdb; pdb.set_trace()
384
+ else:
385
+ if word in _SYMBOL_TOKENS:
386
+ res.append(word)
387
+ elif word in ("っ", "ッ"):
388
+ res.append("ッ")
389
+ elif word in _NO_YOMI_TOKENS:
390
+ pass
391
+ else:
392
+ res.append(word)
393
+ return hira2kata("".join(res))
394
+
395
+
396
+ _ALPHASYMBOL_YOMI = {
397
+ "#": "シャープ",
398
+ "%": "パーセント",
399
+ "&": "アンド",
400
+ "+": "プラス",
401
+ "-": "マイナス",
402
+ ":": "コロン",
403
+ ";": "セミコロン",
404
+ "<": "小なり",
405
+ "=": "イコール",
406
+ ">": "大なり",
407
+ "@": "アット",
408
+ "a": "エー",
409
+ "b": "ビー",
410
+ "c": "シー",
411
+ "d": "ディー",
412
+ "e": "イー",
413
+ "f": "エフ",
414
+ "g": "ジー",
415
+ "h": "エイチ",
416
+ "i": "アイ",
417
+ "j": "ジェー",
418
+ "k": "ケー",
419
+ "l": "エル",
420
+ "m": "エム",
421
+ "n": "エヌ",
422
+ "o": "オー",
423
+ "p": "ピー",
424
+ "q": "キュー",
425
+ "r": "アール",
426
+ "s": "エス",
427
+ "t": "ティー",
428
+ "u": "ユー",
429
+ "v": "ブイ",
430
+ "w": "ダブリュー",
431
+ "x": "エックス",
432
+ "y": "ワイ",
433
+ "z": "ゼット",
434
+ "α": "アルファ",
435
+ "β": "ベータ",
436
+ "γ": "ガンマ",
437
+ "δ": "デルタ",
438
+ "ε": "イプシロン",
439
+ "ζ": "ゼータ",
440
+ "η": "イータ",
441
+ "θ": "シータ",
442
+ "ι": "イオタ",
443
+ "κ": "カッパ",
444
+ "λ": "ラムダ",
445
+ "μ": "ミュー",
446
+ "ν": "ニュー",
447
+ "ξ": "クサイ",
448
+ "ο": "オミクロン",
449
+ "π": "パイ",
450
+ "ρ": "ロー",
451
+ "σ": "シグマ",
452
+ "τ": "タウ",
453
+ "υ": "ウプシロン",
454
+ "φ": "ファイ",
455
+ "χ": "カイ",
456
+ "ψ": "プサイ",
457
+ "ω": "オメガ",
458
+ }
459
+
460
+
461
+ _NUMBER_WITH_SEPARATOR_RX = re.compile("[0-9]{1,3}(,[0-9]{3})+")
462
+ _CURRENCY_MAP = {"$": "ドル", "¥": "円", "£": "ポンド", "€": "ユーロ"}
463
+ _CURRENCY_RX = re.compile(r"([$¥£€])([0-9.]*[0-9])")
464
+ _NUMBER_RX = re.compile(r"[0-9]+(\.[0-9]+)?")
465
+
466
+
467
+ def japanese_convert_numbers_to_words(text: str) -> str:
468
+ res = _NUMBER_WITH_SEPARATOR_RX.sub(lambda m: m[0].replace(",", ""), text)
469
+ res = _CURRENCY_RX.sub(lambda m: m[2] + _CURRENCY_MAP.get(m[1], m[1]), res)
470
+ res = _NUMBER_RX.sub(lambda m: num2words(m[0], lang="ja"), res)
471
+ return res
472
+
473
+
474
+ def japanese_convert_alpha_symbols_to_words(text: str) -> str:
475
+ return "".join([_ALPHASYMBOL_YOMI.get(ch, ch) for ch in text.lower()])
476
+
477
+
478
+ def japanese_text_to_phonemes(text: str) -> str:
479
+ """Convert Japanese text to phonemes."""
480
+ res = unicodedata.normalize("NFKC", text)
481
+ res = japanese_convert_numbers_to_words(res)
482
+ res = japanese_convert_alpha_symbols_to_words(res)
483
+ res = text2kata(res)
484
+ res = kata2phoneme(res)
485
+ return res
486
+
487
+
488
+ def is_japanese_character(char):
489
+ # 定义日语文字系统的 Unicode 范围
490
+ japanese_ranges = [
491
+ (0x3040, 0x309F), # 平假名
492
+ (0x30A0, 0x30FF), # 片假名
493
+ (0x4E00, 0x9FFF), # 汉字 (CJK Unified Ideographs)
494
+ (0x3400, 0x4DBF), # 汉字扩展 A
495
+ (0x20000, 0x2A6DF), # 汉字扩展 B
496
+ # 可以根据需要添加其他汉字扩展范围
497
+ ]
498
+
499
+ # 将字符的 Unicode 编码转换为整数
500
+ char_code = ord(char)
501
+
502
+ # 检查字符是否在任何一个日语范围内
503
+ for start, end in japanese_ranges:
504
+ if start <= char_code <= end:
505
+ return True
506
+
507
+ return False
508
+
509
+
510
+ rep_map = {
511
+ ":": ",",
512
+ ";": ",",
513
+ ",": ",",
514
+ "。": ".",
515
+ "!": "!",
516
+ "?": "?",
517
+ "\n": ".",
518
+ "·": ",",
519
+ "、": ",",
520
+ "...": "…",
521
+ }
522
+
523
+
524
+ def replace_punctuation(text):
525
+ pattern = re.compile("|".join(re.escape(p) for p in rep_map.keys()))
526
+
527
+ replaced_text = pattern.sub(lambda x: rep_map[x.group()], text)
528
+
529
+ replaced_text = re.sub(
530
+ r"[^\u3040-\u309F\u30A0-\u30FF\u4E00-\u9FFF\u3400-\u4DBF"
531
+ + "".join(punctuation)
532
+ + r"]+",
533
+ "",
534
+ replaced_text,
535
+ )
536
+
537
+ return replaced_text
538
+
539
+ from pykakasi import kakasi
540
+ # Initialize kakasi object
541
+ kakasi = kakasi()
542
+ # Set options for converting Chinese characters to Katakana
543
+ kakasi.setMode("J", "K") # Chinese to Katakana
544
+ kakasi.setMode("H", "K") # Hiragana to Katakana
545
+ # Convert Chinese characters to Katakana
546
+ conv = kakasi.getConverter()
547
+
548
+ def text_normalize(text):
549
+ res = unicodedata.normalize("NFKC", text)
550
+ res = japanese_convert_numbers_to_words(res)
551
+ res = "".join([i for i in res if is_japanese_character(i)])
552
+ res = replace_punctuation(res)
553
+ res = conv.do(res)
554
+ return res
555
+
556
+
557
+ def distribute_phone(n_phone, n_word):
558
+ phones_per_word = [0] * n_word
559
+ for task in range(n_phone):
560
+ min_tasks = min(phones_per_word)
561
+ min_index = phones_per_word.index(min_tasks)
562
+ phones_per_word[min_index] += 1
563
+ return phones_per_word
564
+
565
+
566
+
567
+ # tokenizer = AutoTokenizer.from_pretrained('cl-tohoku/bert-base-japanese-v3')
568
+
569
+ model_id = 'tohoku-nlp/bert-base-japanese-v3'
570
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
571
+ def g2p(norm_text):
572
+
573
+ tokenized = tokenizer.tokenize(norm_text)
574
+ phs = []
575
+ ph_groups = []
576
+ for t in tokenized:
577
+ if not t.startswith("#"):
578
+ ph_groups.append([t])
579
+ else:
580
+ ph_groups[-1].append(t.replace("#", ""))
581
+ word2ph = []
582
+ for group in ph_groups:
583
+ text = ""
584
+ for ch in group:
585
+ text += ch
586
+ if text == '[UNK]':
587
+ phs += ['_']
588
+ word2ph += [1]
589
+ continue
590
+ elif text in punctuation:
591
+ phs += [text]
592
+ word2ph += [1]
593
+ continue
594
+ # import pdb; pdb.set_trace()
595
+ # phonemes = japanese_text_to_phonemes(text)
596
+ phonemes = kata2phoneme(text)
597
+ # phonemes = [i for i in phonemes if i in symbols]
598
+ for i in phonemes:
599
+ assert i in symbols, (group, norm_text, tokenized, i)
600
+ phone_len = len(phonemes)
601
+ word_len = len(group)
602
+
603
+ aaa = distribute_phone(phone_len, word_len)
604
+ assert len(aaa) == word_len
605
+ word2ph += aaa
606
+
607
+ phs += phonemes
608
+ phones = ["_"] + phs + ["_"]
609
+ tones = [0 for i in phones]
610
+ word2ph = [1] + word2ph + [1]
611
+ assert len(word2ph) == len(tokenized) + 2
612
+ return phones, tones, word2ph
613
+
614
+ def get_bert_feature(text, word2ph, device):
615
+ from text import japanese_bert
616
+
617
+ return japanese_bert.get_bert_feature(text, word2ph, device=device)
618
+
619
+
620
+ if __name__ == "__main__":
621
+ # tokenizer = AutoTokenizer.from_pretrained("./bert/bert-base-japanese-v3")
622
+ text = "こんにちは、世界!..."
623
+ text = 'ええ、僕はおきなと申します。こちらの小さいわらべは杏子。ご挨拶が遅れてしまいすみません。あなたの名は?'
624
+ text = 'あの、お前以外のみんなは、全員生きてること?'
625
+ from text.japanese_bert import get_bert_feature
626
+
627
+ text = text_normalize(text)
628
+ print(text)
629
+ phones, tones, word2ph = g2p(text)
630
+ bert = get_bert_feature(text, word2ph)
631
+
632
+ print(phones, tones, word2ph, bert.shape)
633
+
634
+ # if __name__ == '__main__':
635
+ # from pykakasi import kakasi
636
+ # # Initialize kakasi object
637
+ # kakasi = kakasi()
638
+
639
+ # # Set options for converting Chinese characters to Katakana
640
+ # kakasi.setMode("J", "H") # Chinese to Katakana
641
+ # kakasi.setMode("K", "H") # Hiragana to Katakana
642
+
643
+ # # Convert Chinese characters to Katakana
644
+ # conv = kakasi.getConverter()
645
+ # katakana_text = conv.do('ええ、僕はおきなと申します。こちらの小さいわらべは杏子。ご挨拶が遅れてしまいすみません。あなたの名は?') # Replace with your Chinese text
646
+
647
+ # print(katakana_text) # Output: ニーハオセカイ