xinference 0.16.3__py3-none-any.whl → 1.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (373) hide show
  1. xinference/_compat.py +24 -2
  2. xinference/_version.py +3 -3
  3. xinference/api/restful_api.py +219 -77
  4. xinference/client/restful/restful_client.py +47 -2
  5. xinference/constants.py +1 -0
  6. xinference/core/chat_interface.py +6 -1
  7. xinference/core/model.py +124 -34
  8. xinference/core/supervisor.py +180 -12
  9. xinference/core/utils.py +73 -4
  10. xinference/core/worker.py +102 -4
  11. xinference/deploy/cmdline.py +3 -1
  12. xinference/deploy/test/test_cmdline.py +56 -0
  13. xinference/isolation.py +24 -0
  14. xinference/model/audio/__init__.py +12 -0
  15. xinference/model/audio/core.py +37 -4
  16. xinference/model/audio/cosyvoice.py +39 -6
  17. xinference/model/audio/f5tts.py +200 -0
  18. xinference/model/audio/f5tts_mlx.py +260 -0
  19. xinference/model/audio/fish_speech.py +70 -110
  20. xinference/model/audio/melotts.py +110 -0
  21. xinference/model/audio/model_spec.json +179 -3
  22. xinference/model/audio/model_spec_modelscope.json +27 -0
  23. xinference/model/audio/utils.py +32 -0
  24. xinference/model/audio/whisper.py +35 -10
  25. xinference/model/audio/whisper_mlx.py +208 -0
  26. xinference/model/embedding/core.py +322 -6
  27. xinference/model/embedding/model_spec.json +8 -1
  28. xinference/model/embedding/model_spec_modelscope.json +9 -1
  29. xinference/model/image/core.py +69 -1
  30. xinference/model/image/model_spec.json +145 -4
  31. xinference/model/image/model_spec_modelscope.json +150 -4
  32. xinference/model/image/stable_diffusion/core.py +50 -15
  33. xinference/model/llm/__init__.py +6 -2
  34. xinference/model/llm/llm_family.json +1055 -93
  35. xinference/model/llm/llm_family.py +15 -36
  36. xinference/model/llm/llm_family_modelscope.json +1031 -78
  37. xinference/model/llm/memory.py +1 -1
  38. xinference/model/llm/mlx/core.py +285 -47
  39. xinference/model/llm/sglang/core.py +2 -0
  40. xinference/model/llm/transformers/chatglm.py +9 -5
  41. xinference/model/llm/transformers/cogagent.py +272 -0
  42. xinference/model/llm/transformers/core.py +3 -0
  43. xinference/model/llm/transformers/glm_edge_v.py +230 -0
  44. xinference/model/llm/transformers/qwen2_vl.py +12 -1
  45. xinference/model/llm/transformers/utils.py +16 -8
  46. xinference/model/llm/utils.py +55 -4
  47. xinference/model/llm/vllm/core.py +137 -12
  48. xinference/model/llm/vllm/xavier/__init__.py +13 -0
  49. xinference/model/llm/vllm/xavier/allocator.py +74 -0
  50. xinference/model/llm/vllm/xavier/block.py +111 -0
  51. xinference/model/llm/vllm/xavier/block_manager.py +71 -0
  52. xinference/model/llm/vllm/xavier/block_tracker.py +129 -0
  53. xinference/model/llm/vllm/xavier/collective.py +74 -0
  54. xinference/model/llm/vllm/xavier/collective_manager.py +147 -0
  55. xinference/model/llm/vllm/xavier/engine.py +247 -0
  56. xinference/model/llm/vllm/xavier/executor.py +134 -0
  57. xinference/model/llm/vllm/xavier/scheduler.py +438 -0
  58. xinference/model/llm/vllm/xavier/test/__init__.py +13 -0
  59. xinference/model/llm/vllm/xavier/test/test_xavier.py +147 -0
  60. xinference/model/llm/vllm/xavier/transfer.py +319 -0
  61. xinference/model/rerank/core.py +11 -4
  62. xinference/model/video/diffusers.py +14 -0
  63. xinference/model/video/model_spec.json +15 -0
  64. xinference/model/video/model_spec_modelscope.json +16 -0
  65. xinference/thirdparty/cosyvoice/bin/average_model.py +92 -0
  66. xinference/thirdparty/cosyvoice/bin/export_jit.py +12 -2
  67. xinference/thirdparty/cosyvoice/bin/export_onnx.py +112 -0
  68. xinference/thirdparty/cosyvoice/bin/export_trt.sh +9 -0
  69. xinference/thirdparty/cosyvoice/bin/inference.py +5 -7
  70. xinference/thirdparty/cosyvoice/bin/spk2info.pt +0 -0
  71. xinference/thirdparty/cosyvoice/bin/train.py +42 -8
  72. xinference/thirdparty/cosyvoice/cli/cosyvoice.py +96 -25
  73. xinference/thirdparty/cosyvoice/cli/frontend.py +77 -30
  74. xinference/thirdparty/cosyvoice/cli/model.py +330 -80
  75. xinference/thirdparty/cosyvoice/dataset/dataset.py +6 -2
  76. xinference/thirdparty/cosyvoice/dataset/processor.py +76 -14
  77. xinference/thirdparty/cosyvoice/flow/decoder.py +92 -13
  78. xinference/thirdparty/cosyvoice/flow/flow.py +99 -9
  79. xinference/thirdparty/cosyvoice/flow/flow_matching.py +110 -13
  80. xinference/thirdparty/cosyvoice/flow/length_regulator.py +5 -4
  81. xinference/thirdparty/cosyvoice/hifigan/discriminator.py +140 -0
  82. xinference/thirdparty/cosyvoice/hifigan/generator.py +58 -42
  83. xinference/thirdparty/cosyvoice/hifigan/hifigan.py +67 -0
  84. xinference/thirdparty/cosyvoice/llm/llm.py +139 -6
  85. xinference/thirdparty/cosyvoice/tokenizer/assets/multilingual_zh_ja_yue_char_del.tiktoken +58836 -0
  86. xinference/thirdparty/cosyvoice/tokenizer/tokenizer.py +279 -0
  87. xinference/thirdparty/cosyvoice/transformer/embedding.py +2 -2
  88. xinference/thirdparty/cosyvoice/transformer/encoder_layer.py +7 -7
  89. xinference/thirdparty/cosyvoice/transformer/upsample_encoder.py +318 -0
  90. xinference/thirdparty/cosyvoice/utils/common.py +28 -1
  91. xinference/thirdparty/cosyvoice/utils/executor.py +69 -7
  92. xinference/thirdparty/cosyvoice/utils/file_utils.py +2 -12
  93. xinference/thirdparty/cosyvoice/utils/frontend_utils.py +9 -5
  94. xinference/thirdparty/cosyvoice/utils/losses.py +20 -0
  95. xinference/thirdparty/cosyvoice/utils/scheduler.py +1 -2
  96. xinference/thirdparty/cosyvoice/utils/train_utils.py +101 -45
  97. xinference/thirdparty/f5_tts/api.py +166 -0
  98. xinference/thirdparty/f5_tts/configs/E2TTS_Base_train.yaml +44 -0
  99. xinference/thirdparty/f5_tts/configs/E2TTS_Small_train.yaml +44 -0
  100. xinference/thirdparty/f5_tts/configs/F5TTS_Base_train.yaml +46 -0
  101. xinference/thirdparty/f5_tts/configs/F5TTS_Small_train.yaml +46 -0
  102. xinference/thirdparty/f5_tts/eval/README.md +49 -0
  103. xinference/thirdparty/f5_tts/eval/ecapa_tdnn.py +330 -0
  104. xinference/thirdparty/f5_tts/eval/eval_infer_batch.py +207 -0
  105. xinference/thirdparty/f5_tts/eval/eval_infer_batch.sh +13 -0
  106. xinference/thirdparty/f5_tts/eval/eval_librispeech_test_clean.py +84 -0
  107. xinference/thirdparty/f5_tts/eval/eval_seedtts_testset.py +84 -0
  108. xinference/thirdparty/f5_tts/eval/utils_eval.py +405 -0
  109. xinference/thirdparty/f5_tts/infer/README.md +191 -0
  110. xinference/thirdparty/f5_tts/infer/SHARED.md +74 -0
  111. xinference/thirdparty/f5_tts/infer/examples/basic/basic.toml +11 -0
  112. xinference/thirdparty/f5_tts/infer/examples/basic/basic_ref_en.wav +0 -0
  113. xinference/thirdparty/f5_tts/infer/examples/basic/basic_ref_zh.wav +0 -0
  114. xinference/thirdparty/f5_tts/infer/examples/multi/country.flac +0 -0
  115. xinference/thirdparty/f5_tts/infer/examples/multi/main.flac +0 -0
  116. xinference/thirdparty/f5_tts/infer/examples/multi/story.toml +19 -0
  117. xinference/thirdparty/f5_tts/infer/examples/multi/story.txt +1 -0
  118. xinference/thirdparty/f5_tts/infer/examples/multi/town.flac +0 -0
  119. xinference/thirdparty/f5_tts/infer/examples/vocab.txt +2545 -0
  120. xinference/thirdparty/f5_tts/infer/infer_cli.py +226 -0
  121. xinference/thirdparty/f5_tts/infer/infer_gradio.py +851 -0
  122. xinference/thirdparty/f5_tts/infer/speech_edit.py +193 -0
  123. xinference/thirdparty/f5_tts/infer/utils_infer.py +538 -0
  124. xinference/thirdparty/f5_tts/model/__init__.py +10 -0
  125. xinference/thirdparty/f5_tts/model/backbones/README.md +20 -0
  126. xinference/thirdparty/f5_tts/model/backbones/dit.py +163 -0
  127. xinference/thirdparty/f5_tts/model/backbones/mmdit.py +146 -0
  128. xinference/thirdparty/f5_tts/model/backbones/unett.py +219 -0
  129. xinference/thirdparty/f5_tts/model/cfm.py +285 -0
  130. xinference/thirdparty/f5_tts/model/dataset.py +319 -0
  131. xinference/thirdparty/f5_tts/model/modules.py +658 -0
  132. xinference/thirdparty/f5_tts/model/trainer.py +366 -0
  133. xinference/thirdparty/f5_tts/model/utils.py +185 -0
  134. xinference/thirdparty/f5_tts/scripts/count_max_epoch.py +33 -0
  135. xinference/thirdparty/f5_tts/scripts/count_params_gflops.py +39 -0
  136. xinference/thirdparty/f5_tts/socket_server.py +159 -0
  137. xinference/thirdparty/f5_tts/train/README.md +77 -0
  138. xinference/thirdparty/f5_tts/train/datasets/prepare_csv_wavs.py +139 -0
  139. xinference/thirdparty/f5_tts/train/datasets/prepare_emilia.py +230 -0
  140. xinference/thirdparty/f5_tts/train/datasets/prepare_libritts.py +92 -0
  141. xinference/thirdparty/f5_tts/train/datasets/prepare_ljspeech.py +65 -0
  142. xinference/thirdparty/f5_tts/train/datasets/prepare_wenetspeech4tts.py +125 -0
  143. xinference/thirdparty/f5_tts/train/finetune_cli.py +174 -0
  144. xinference/thirdparty/f5_tts/train/finetune_gradio.py +1846 -0
  145. xinference/thirdparty/f5_tts/train/train.py +75 -0
  146. xinference/thirdparty/fish_speech/fish_speech/conversation.py +266 -1
  147. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/en_US.json +2 -1
  148. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/es_ES.json +2 -1
  149. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/ja_JP.json +2 -2
  150. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/ko_KR.json +123 -0
  151. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/zh_CN.json +2 -1
  152. xinference/thirdparty/fish_speech/fish_speech/models/text2semantic/llama.py +137 -29
  153. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/firefly.py +9 -9
  154. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/fsq.py +1 -1
  155. xinference/thirdparty/fish_speech/fish_speech/text/clean.py +17 -11
  156. xinference/thirdparty/fish_speech/fish_speech/text/spliter.py +1 -1
  157. xinference/thirdparty/fish_speech/fish_speech/tokenizer.py +152 -0
  158. xinference/thirdparty/fish_speech/fish_speech/train.py +2 -2
  159. xinference/thirdparty/fish_speech/fish_speech/utils/__init__.py +2 -1
  160. xinference/thirdparty/fish_speech/fish_speech/utils/utils.py +22 -0
  161. xinference/thirdparty/fish_speech/fish_speech/webui/launch_utils.py +1 -1
  162. xinference/thirdparty/fish_speech/fish_speech/webui/manage.py +2 -2
  163. xinference/thirdparty/fish_speech/tools/{post_api.py → api_client.py} +34 -18
  164. xinference/thirdparty/fish_speech/tools/api_server.py +98 -0
  165. xinference/thirdparty/fish_speech/tools/download_models.py +5 -5
  166. xinference/thirdparty/fish_speech/tools/e2e_webui.py +232 -0
  167. xinference/thirdparty/fish_speech/tools/fish_e2e.py +298 -0
  168. xinference/thirdparty/fish_speech/tools/inference_engine/__init__.py +192 -0
  169. xinference/thirdparty/fish_speech/tools/inference_engine/reference_loader.py +125 -0
  170. xinference/thirdparty/fish_speech/tools/inference_engine/utils.py +39 -0
  171. xinference/thirdparty/fish_speech/tools/inference_engine/vq_manager.py +57 -0
  172. xinference/thirdparty/fish_speech/tools/llama/eval_in_context.py +2 -2
  173. xinference/thirdparty/fish_speech/tools/llama/generate.py +484 -72
  174. xinference/thirdparty/fish_speech/tools/run_webui.py +104 -0
  175. xinference/thirdparty/fish_speech/tools/schema.py +170 -0
  176. xinference/thirdparty/fish_speech/tools/server/agent/__init__.py +57 -0
  177. xinference/thirdparty/fish_speech/tools/server/agent/generate.py +119 -0
  178. xinference/thirdparty/fish_speech/tools/server/agent/generation_utils.py +122 -0
  179. xinference/thirdparty/fish_speech/tools/server/agent/pre_generation_utils.py +72 -0
  180. xinference/thirdparty/fish_speech/tools/server/api_utils.py +75 -0
  181. xinference/thirdparty/fish_speech/tools/server/exception_handler.py +27 -0
  182. xinference/thirdparty/fish_speech/tools/server/inference.py +45 -0
  183. xinference/thirdparty/fish_speech/tools/server/model_manager.py +122 -0
  184. xinference/thirdparty/fish_speech/tools/server/model_utils.py +129 -0
  185. xinference/thirdparty/fish_speech/tools/server/views.py +246 -0
  186. xinference/thirdparty/fish_speech/tools/vqgan/extract_vq.py +7 -1
  187. xinference/thirdparty/fish_speech/tools/vqgan/inference.py +2 -3
  188. xinference/thirdparty/fish_speech/tools/webui/__init__.py +173 -0
  189. xinference/thirdparty/fish_speech/tools/webui/inference.py +91 -0
  190. xinference/thirdparty/fish_speech/tools/webui/variables.py +14 -0
  191. xinference/thirdparty/matcha/utils/utils.py +2 -2
  192. xinference/thirdparty/melo/api.py +135 -0
  193. xinference/thirdparty/melo/app.py +61 -0
  194. xinference/thirdparty/melo/attentions.py +459 -0
  195. xinference/thirdparty/melo/commons.py +160 -0
  196. xinference/thirdparty/melo/configs/config.json +94 -0
  197. xinference/thirdparty/melo/data/example/metadata.list +20 -0
  198. xinference/thirdparty/melo/data_utils.py +413 -0
  199. xinference/thirdparty/melo/download_utils.py +67 -0
  200. xinference/thirdparty/melo/infer.py +25 -0
  201. xinference/thirdparty/melo/init_downloads.py +14 -0
  202. xinference/thirdparty/melo/losses.py +58 -0
  203. xinference/thirdparty/melo/main.py +36 -0
  204. xinference/thirdparty/melo/mel_processing.py +174 -0
  205. xinference/thirdparty/melo/models.py +1030 -0
  206. xinference/thirdparty/melo/modules.py +598 -0
  207. xinference/thirdparty/melo/monotonic_align/__init__.py +16 -0
  208. xinference/thirdparty/melo/monotonic_align/core.py +46 -0
  209. xinference/thirdparty/melo/preprocess_text.py +135 -0
  210. xinference/thirdparty/melo/split_utils.py +174 -0
  211. xinference/thirdparty/melo/text/__init__.py +35 -0
  212. xinference/thirdparty/melo/text/chinese.py +199 -0
  213. xinference/thirdparty/melo/text/chinese_bert.py +107 -0
  214. xinference/thirdparty/melo/text/chinese_mix.py +253 -0
  215. xinference/thirdparty/melo/text/cleaner.py +36 -0
  216. xinference/thirdparty/melo/text/cleaner_multiling.py +110 -0
  217. xinference/thirdparty/melo/text/cmudict.rep +129530 -0
  218. xinference/thirdparty/melo/text/cmudict_cache.pickle +0 -0
  219. xinference/thirdparty/melo/text/english.py +284 -0
  220. xinference/thirdparty/melo/text/english_bert.py +39 -0
  221. xinference/thirdparty/melo/text/english_utils/abbreviations.py +35 -0
  222. xinference/thirdparty/melo/text/english_utils/number_norm.py +97 -0
  223. xinference/thirdparty/melo/text/english_utils/time_norm.py +47 -0
  224. xinference/thirdparty/melo/text/es_phonemizer/base.py +140 -0
  225. xinference/thirdparty/melo/text/es_phonemizer/cleaner.py +109 -0
  226. xinference/thirdparty/melo/text/es_phonemizer/es_symbols.json +79 -0
  227. xinference/thirdparty/melo/text/es_phonemizer/es_symbols.txt +1 -0
  228. xinference/thirdparty/melo/text/es_phonemizer/es_symbols_v2.json +83 -0
  229. xinference/thirdparty/melo/text/es_phonemizer/es_to_ipa.py +12 -0
  230. xinference/thirdparty/melo/text/es_phonemizer/example_ipa.txt +400 -0
  231. xinference/thirdparty/melo/text/es_phonemizer/gruut_wrapper.py +253 -0
  232. xinference/thirdparty/melo/text/es_phonemizer/punctuation.py +174 -0
  233. xinference/thirdparty/melo/text/es_phonemizer/spanish_symbols.txt +1 -0
  234. xinference/thirdparty/melo/text/es_phonemizer/test.ipynb +124 -0
  235. xinference/thirdparty/melo/text/fr_phonemizer/base.py +140 -0
  236. xinference/thirdparty/melo/text/fr_phonemizer/cleaner.py +122 -0
  237. xinference/thirdparty/melo/text/fr_phonemizer/en_symbols.json +78 -0
  238. xinference/thirdparty/melo/text/fr_phonemizer/example_ipa.txt +1 -0
  239. xinference/thirdparty/melo/text/fr_phonemizer/fr_symbols.json +89 -0
  240. xinference/thirdparty/melo/text/fr_phonemizer/fr_to_ipa.py +30 -0
  241. xinference/thirdparty/melo/text/fr_phonemizer/french_abbreviations.py +48 -0
  242. xinference/thirdparty/melo/text/fr_phonemizer/french_symbols.txt +1 -0
  243. xinference/thirdparty/melo/text/fr_phonemizer/gruut_wrapper.py +258 -0
  244. xinference/thirdparty/melo/text/fr_phonemizer/punctuation.py +172 -0
  245. xinference/thirdparty/melo/text/french.py +94 -0
  246. xinference/thirdparty/melo/text/french_bert.py +39 -0
  247. xinference/thirdparty/melo/text/japanese.py +647 -0
  248. xinference/thirdparty/melo/text/japanese_bert.py +49 -0
  249. xinference/thirdparty/melo/text/ko_dictionary.py +44 -0
  250. xinference/thirdparty/melo/text/korean.py +192 -0
  251. xinference/thirdparty/melo/text/opencpop-strict.txt +429 -0
  252. xinference/thirdparty/melo/text/spanish.py +122 -0
  253. xinference/thirdparty/melo/text/spanish_bert.py +39 -0
  254. xinference/thirdparty/melo/text/symbols.py +290 -0
  255. xinference/thirdparty/melo/text/tone_sandhi.py +769 -0
  256. xinference/thirdparty/melo/train.py +635 -0
  257. xinference/thirdparty/melo/train.sh +19 -0
  258. xinference/thirdparty/melo/transforms.py +209 -0
  259. xinference/thirdparty/melo/utils.py +424 -0
  260. xinference/types.py +17 -1
  261. xinference/web/ui/build/asset-manifest.json +6 -6
  262. xinference/web/ui/build/index.html +1 -1
  263. xinference/web/ui/build/static/css/main.51a587ff.css +2 -0
  264. xinference/web/ui/build/static/css/main.51a587ff.css.map +1 -0
  265. xinference/web/ui/build/static/js/main.b0936c54.js +3 -0
  266. xinference/web/ui/build/static/js/main.b0936c54.js.map +1 -0
  267. xinference/web/ui/node_modules/.cache/babel-loader/03c4052f1b91f6ba0c5389bdcf49c43319b4076c08e4b8585dab312538ae290a.json +1 -0
  268. xinference/web/ui/node_modules/.cache/babel-loader/1786b83003b8e9605a0f5f855a185d4d16e38fc893dfb326a2a9cca206b4240a.json +1 -0
  269. xinference/web/ui/node_modules/.cache/babel-loader/17cbc181dd674b9150b80c73ed6a82656de0082d857f6e5f66d9716129ac0b38.json +1 -0
  270. xinference/web/ui/node_modules/.cache/babel-loader/185ceb8872d562e032b47e79df6a45670e06345b8ed70aad1a131e0476783c5c.json +1 -0
  271. xinference/web/ui/node_modules/.cache/babel-loader/26b8c9f34b0bed789b3a833767672e39302d1e0c09b4276f4d58d1df7b6bd93b.json +1 -0
  272. xinference/web/ui/node_modules/.cache/babel-loader/2b484da66c724d0d56a40849c109327408796a668b1381511b6e9e03baa48658.json +1 -0
  273. xinference/web/ui/node_modules/.cache/babel-loader/2cbbbce9b84df73330d4c42b82436ed881b3847628f2fbc346aa62e2859fd88c.json +1 -0
  274. xinference/web/ui/node_modules/.cache/babel-loader/2ec9b14431ed33ce6901bf9f27007be4e6e472709c99d6e22b50ce528e4b78ee.json +1 -0
  275. xinference/web/ui/node_modules/.cache/babel-loader/3b966db018f96be4a055d6ca205f0990d4d0b370e2980c17d8bca2c9a021819c.json +1 -0
  276. xinference/web/ui/node_modules/.cache/babel-loader/3eefb411b24c2b3ce053570ef50daccf154022f0e168be5ed0fec21394baf9f4.json +1 -0
  277. xinference/web/ui/node_modules/.cache/babel-loader/522b229e3cac219123f0d69673f5570e191c2d2a505dc65b312d336eae2279c0.json +1 -0
  278. xinference/web/ui/node_modules/.cache/babel-loader/52e45f17ba300580ea3fcc9f9228ccba194bb092b76f25e9255af311f8b05aab.json +1 -0
  279. xinference/web/ui/node_modules/.cache/babel-loader/5a0bc4631f936459afc1a3b1d3ec2420118b1f00e11f60ccac3e08088f3f27a8.json +1 -0
  280. xinference/web/ui/node_modules/.cache/babel-loader/611fa2c6c53b66039991d06dfb0473b5ab37fc63b4564e0f6e1718523768a045.json +1 -0
  281. xinference/web/ui/node_modules/.cache/babel-loader/6329bc76c406fe5eb305412383fbde5950f847bb5e43261f73f37622c365acb4.json +1 -0
  282. xinference/web/ui/node_modules/.cache/babel-loader/63c8e07687ea53a4f8a910ee5e42e0eb26cd1acbfbe820f3e3248a786ee51401.json +1 -0
  283. xinference/web/ui/node_modules/.cache/babel-loader/69b2d5001684174ec9da57e07914eed3eac4960018bceb6cbfa801d861301d7c.json +1 -0
  284. xinference/web/ui/node_modules/.cache/babel-loader/710c1acda69e561e30a933b98c6a56d50197868b15c21e2aad55ab6d46649eb6.json +1 -0
  285. xinference/web/ui/node_modules/.cache/babel-loader/720deca1fce5a1dc5056048fa8258fd138a82ea855f350b6613f104a73fb761f.json +1 -0
  286. xinference/web/ui/node_modules/.cache/babel-loader/76a23b92d26a499c57e61eea2b895fbc9771bd0849a72e66f8e633192017978b.json +1 -0
  287. xinference/web/ui/node_modules/.cache/babel-loader/858063f23b34dfe600254eb5afd85518b0002ec4b30b7386616c45600826e3b2.json +1 -0
  288. xinference/web/ui/node_modules/.cache/babel-loader/920b82c1c89124cf217109eeedbfcd3aae3b917be50c9dfb6bbb4ce26bdfd2e7.json +1 -0
  289. xinference/web/ui/node_modules/.cache/babel-loader/94d8b7aeb0076f2ce07db598cea0e87b13bc8d5614eb530b8d6e696c2daf6f88.json +1 -0
  290. xinference/web/ui/node_modules/.cache/babel-loader/9e917fe7022d01b2ccbe5cc0ce73d70bb72bee584ff293bad71bdff6695dee28.json +1 -0
  291. xinference/web/ui/node_modules/.cache/babel-loader/9f28fdb8399f1d0474f0aca86f1658dc94f5bf0c90f6146352de150692de8862.json +1 -0
  292. xinference/web/ui/node_modules/.cache/babel-loader/a0dfafa06b2bb7cba8cad41c482503f61944f759f4318139362602ef5cc47ccb.json +1 -0
  293. xinference/web/ui/node_modules/.cache/babel-loader/a3ff866acddf34917a7ee399e0e571a4dfd8ba66d5057db885f243e16a6eb17d.json +1 -0
  294. xinference/web/ui/node_modules/.cache/babel-loader/afb8084f539534cd594755ea2205ecd5bd1f62dddcfdf75a2eace59a28131278.json +1 -0
  295. xinference/web/ui/node_modules/.cache/babel-loader/b57b1438b77294c1f3f6cfce12ac487d8106c6f016975ba0aec94d98997e2e1e.json +1 -0
  296. xinference/web/ui/node_modules/.cache/babel-loader/b9917b0bf8e4d55ccbac1c334aa04d6ff3c5b6ed9e5d38b9ea2c687fa7d3f5a9.json +1 -0
  297. xinference/web/ui/node_modules/.cache/babel-loader/bbcc94b0149963d1d6f267ee1f4f03d3925b758392ce2f516c3fe8af0e0169fc.json +1 -0
  298. xinference/web/ui/node_modules/.cache/babel-loader/bdee44abeadc4abc17d41c52eb49c6e19a4b1a267b6e16876ce91bdeeebfc52d.json +1 -0
  299. xinference/web/ui/node_modules/.cache/babel-loader/beb112b70f4a56db95920a9e20efb6c97c37b68450716730217a9ee1a9ae92be.json +1 -0
  300. xinference/web/ui/node_modules/.cache/babel-loader/c88db97be0cdf440193b3995996e83510a04cb00048135485fc0e26d197e80b5.json +1 -0
  301. xinference/web/ui/node_modules/.cache/babel-loader/d49e5314d34310a62d01a03067ce1bec5da00abce84c5196aa9c6842fa79a430.json +1 -0
  302. xinference/web/ui/node_modules/.cache/babel-loader/d7664d18c4ddbad9c3a6a31b91f7c00fb0dde804608674a9860ee50f33e54708.json +1 -0
  303. xinference/web/ui/node_modules/.cache/babel-loader/d9072c318b819b7c90a0f7e9cc0b6413b4dbeb8e9859898e53d75ea882fcde99.json +1 -0
  304. xinference/web/ui/node_modules/.cache/babel-loader/db16a983bc08a05f0439cc61ca0840e49e1d8400eef678909f16c032a418a3d6.json +1 -0
  305. xinference/web/ui/node_modules/.cache/babel-loader/dc249829767b8abcbc3677e0b07b6d3ecbfdfe6d08cfe23a665eb33373a9aa9d.json +1 -0
  306. xinference/web/ui/node_modules/.cache/babel-loader/e242c583c2dbc2784f0fcf513523975f7d5df447e106c1c17e49e8578a6fc3ed.json +1 -0
  307. xinference/web/ui/node_modules/.cache/babel-loader/eac5f1296513e69e4b96f750ddccd4d0264e2bae4e4c449144e83274a48698d9.json +1 -0
  308. xinference/web/ui/node_modules/.cache/babel-loader/ed57202cb79649bb716400436590245547df241988fc7c8e1d85d132299542d2.json +1 -0
  309. xinference/web/ui/node_modules/.cache/babel-loader/f125bf72e773a14cdaebd0c343e80adb909d12e317ee5c00cd4a57442fbe2c62.json +1 -0
  310. xinference/web/ui/node_modules/.cache/babel-loader/f91af913d7f91c410719ab13136aaed3aaf0f8dda06652f25c42cb5231587398.json +1 -0
  311. xinference/web/ui/node_modules/.package-lock.json +67 -3
  312. xinference/web/ui/node_modules/@babel/runtime/package.json +592 -538
  313. xinference/web/ui/node_modules/html-parse-stringify/package.json +50 -0
  314. xinference/web/ui/node_modules/i18next/dist/esm/package.json +1 -0
  315. xinference/web/ui/node_modules/i18next/package.json +129 -0
  316. xinference/web/ui/node_modules/react-i18next/.eslintrc.json +74 -0
  317. xinference/web/ui/node_modules/react-i18next/dist/es/package.json +1 -0
  318. xinference/web/ui/node_modules/react-i18next/package.json +162 -0
  319. xinference/web/ui/node_modules/void-elements/package.json +34 -0
  320. xinference/web/ui/package-lock.json +69 -3
  321. xinference/web/ui/package.json +2 -0
  322. xinference/web/ui/src/locales/en.json +186 -0
  323. xinference/web/ui/src/locales/zh.json +186 -0
  324. {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/METADATA +96 -36
  325. {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/RECORD +335 -146
  326. {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/WHEEL +1 -1
  327. xinference/thirdparty/cosyvoice/bin/export_trt.py +0 -8
  328. xinference/thirdparty/fish_speech/fish_speech/configs/lora/__init__.py +0 -0
  329. xinference/thirdparty/fish_speech/fish_speech/datasets/__init__.py +0 -0
  330. xinference/thirdparty/fish_speech/fish_speech/datasets/protos/__init__.py +0 -0
  331. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/__init__.py +0 -0
  332. xinference/thirdparty/fish_speech/fish_speech/models/__init__.py +0 -0
  333. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/__init__.py +0 -0
  334. xinference/thirdparty/fish_speech/fish_speech/webui/__init__.py +0 -0
  335. xinference/thirdparty/fish_speech/tools/__init__.py +0 -0
  336. xinference/thirdparty/fish_speech/tools/api.py +0 -440
  337. xinference/thirdparty/fish_speech/tools/commons.py +0 -35
  338. xinference/thirdparty/fish_speech/tools/llama/__init__.py +0 -0
  339. xinference/thirdparty/fish_speech/tools/msgpack_api.py +0 -34
  340. xinference/thirdparty/fish_speech/tools/vqgan/__init__.py +0 -0
  341. xinference/thirdparty/fish_speech/tools/webui.py +0 -485
  342. xinference/web/ui/build/static/css/main.5061c4c3.css +0 -2
  343. xinference/web/ui/build/static/css/main.5061c4c3.css.map +0 -1
  344. xinference/web/ui/build/static/js/main.2f269bb3.js +0 -3
  345. xinference/web/ui/build/static/js/main.2f269bb3.js.map +0 -1
  346. xinference/web/ui/node_modules/.cache/babel-loader/07ce9e632e6aff24d7aa3ad8e48224433bbfeb0d633fca723453f1fcae0c9f1c.json +0 -1
  347. xinference/web/ui/node_modules/.cache/babel-loader/1130403f9e46f5738a23b45ac59b57de8f360c908c713e2c0670c2cce9bd367a.json +0 -1
  348. xinference/web/ui/node_modules/.cache/babel-loader/131091b25d26b17cdca187d7542a21475c211138d900cf667682260e76ef9463.json +0 -1
  349. xinference/web/ui/node_modules/.cache/babel-loader/1f269fb2a368363c1cb2237825f1dba093b6bdd8c44cc05954fd19ec2c1fff03.json +0 -1
  350. xinference/web/ui/node_modules/.cache/babel-loader/331312668fa8bd3d7401818f4a25fa98135d7f61371cd6bfff78b18cf4fbdd92.json +0 -1
  351. xinference/web/ui/node_modules/.cache/babel-loader/40f17338fc75ae095de7d2b4d8eae0d5ca0193a7e2bcece4ee745b22a7a2f4b7.json +0 -1
  352. xinference/web/ui/node_modules/.cache/babel-loader/4de9a6942c5f1749d6cbfdd54279699975f16016b182848bc253886f52ec2ec3.json +0 -1
  353. xinference/web/ui/node_modules/.cache/babel-loader/822586ed1077201b64b954f12f25e3f9b45678c1acbabe53d8af3ca82ca71f33.json +0 -1
  354. xinference/web/ui/node_modules/.cache/babel-loader/8d33354bd2100c8602afc3341f131a88cc36aaeecd5a4b365ed038514708e350.json +0 -1
  355. xinference/web/ui/node_modules/.cache/babel-loader/9375a35b05d56989b2755bf72161fa707c92f28569d33765a75f91a568fda6e9.json +0 -1
  356. xinference/web/ui/node_modules/.cache/babel-loader/a158a9ffa0c9b169aee53dd4a0c44501a596755b4e4f6ede7746d65a72e2a71f.json +0 -1
  357. xinference/web/ui/node_modules/.cache/babel-loader/bd6ad8159341315a1764c397621a560809f7eb7219ab5174c801fca7e969d943.json +0 -1
  358. xinference/web/ui/node_modules/.cache/babel-loader/c7bf40bab396765f67d0fed627ed3665890608b2d0edaa3e8cb7cfc96310db45.json +0 -1
  359. xinference/web/ui/node_modules/.cache/babel-loader/d6c643278a0b28320e6f33a60f5fb64c053997cbdc39a60e53ccc574688ade9e.json +0 -1
  360. xinference/web/ui/node_modules/.cache/babel-loader/e42b72d4cc1ea412ebecbb8d040dc6c6bfee462c33903c2f1f3facb602ad742e.json +0 -1
  361. xinference/web/ui/node_modules/.cache/babel-loader/e64b7e8cedcf43d4c95deba60ec1341855c887705805bb62431693118b870c69.json +0 -1
  362. xinference/web/ui/node_modules/.cache/babel-loader/f5039ddbeb815c51491a1989532006b96fc3ae49c6c60e3c097f875b4ae915ae.json +0 -1
  363. xinference/web/ui/node_modules/.cache/babel-loader/f72f011744c4649fabddca6f7a9327861ac0a315a89b1a2e62a39774e7863845.json +0 -1
  364. xinference/web/ui/node_modules/.cache/babel-loader/feabb04b4aa507102da0a64398a40818e878fd1df9b75dda8461b3e1e7ff3f11.json +0 -1
  365. /xinference/thirdparty/{cosyvoice/bin → f5_tts}/__init__.py +0 -0
  366. /xinference/thirdparty/{cosyvoice/flow → melo}/__init__.py +0 -0
  367. /xinference/thirdparty/{cosyvoice/hifigan → melo/text/english_utils}/__init__.py +0 -0
  368. /xinference/thirdparty/{cosyvoice/llm → melo/text/es_phonemizer}/__init__.py +0 -0
  369. /xinference/thirdparty/{fish_speech/fish_speech/configs → melo/text/fr_phonemizer}/__init__.py +0 -0
  370. /xinference/web/ui/build/static/js/{main.2f269bb3.js.LICENSE.txt → main.b0936c54.js.LICENSE.txt} +0 -0
  371. {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/LICENSE +0 -0
  372. {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/entry_points.txt +0 -0
  373. {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,284 @@
1
+ import pickle
2
+ import os
3
+ import re
4
+ from g2p_en import G2p
5
+
6
+ from . import symbols
7
+
8
+ from .english_utils.abbreviations import expand_abbreviations
9
+ from .english_utils.time_norm import expand_time_english
10
+ from .english_utils.number_norm import normalize_numbers
11
+ from .japanese import distribute_phone
12
+
13
+ from transformers import AutoTokenizer
14
+
15
+ current_file_path = os.path.dirname(__file__)
16
+ CMU_DICT_PATH = os.path.join(current_file_path, "cmudict.rep")
17
+ CACHE_PATH = os.path.join(current_file_path, "cmudict_cache.pickle")
18
+ _g2p = G2p()
19
+
20
+ arpa = {
21
+ "AH0",
22
+ "S",
23
+ "AH1",
24
+ "EY2",
25
+ "AE2",
26
+ "EH0",
27
+ "OW2",
28
+ "UH0",
29
+ "NG",
30
+ "B",
31
+ "G",
32
+ "AY0",
33
+ "M",
34
+ "AA0",
35
+ "F",
36
+ "AO0",
37
+ "ER2",
38
+ "UH1",
39
+ "IY1",
40
+ "AH2",
41
+ "DH",
42
+ "IY0",
43
+ "EY1",
44
+ "IH0",
45
+ "K",
46
+ "N",
47
+ "W",
48
+ "IY2",
49
+ "T",
50
+ "AA1",
51
+ "ER1",
52
+ "EH2",
53
+ "OY0",
54
+ "UH2",
55
+ "UW1",
56
+ "Z",
57
+ "AW2",
58
+ "AW1",
59
+ "V",
60
+ "UW2",
61
+ "AA2",
62
+ "ER",
63
+ "AW0",
64
+ "UW0",
65
+ "R",
66
+ "OW1",
67
+ "EH1",
68
+ "ZH",
69
+ "AE0",
70
+ "IH2",
71
+ "IH",
72
+ "Y",
73
+ "JH",
74
+ "P",
75
+ "AY1",
76
+ "EY0",
77
+ "OY2",
78
+ "TH",
79
+ "HH",
80
+ "D",
81
+ "ER0",
82
+ "CH",
83
+ "AO1",
84
+ "AE1",
85
+ "AO2",
86
+ "OY1",
87
+ "AY2",
88
+ "IH1",
89
+ "OW0",
90
+ "L",
91
+ "SH",
92
+ }
93
+
94
+
95
+ def post_replace_ph(ph):
96
+ rep_map = {
97
+ ":": ",",
98
+ ";": ",",
99
+ ",": ",",
100
+ "。": ".",
101
+ "!": "!",
102
+ "?": "?",
103
+ "\n": ".",
104
+ "·": ",",
105
+ "、": ",",
106
+ "...": "…",
107
+ "v": "V",
108
+ }
109
+ if ph in rep_map.keys():
110
+ ph = rep_map[ph]
111
+ if ph in symbols:
112
+ return ph
113
+ if ph not in symbols:
114
+ ph = "UNK"
115
+ return ph
116
+
117
+
118
+ def read_dict():
119
+ g2p_dict = {}
120
+ start_line = 49
121
+ with open(CMU_DICT_PATH) as f:
122
+ line = f.readline()
123
+ line_index = 1
124
+ while line:
125
+ if line_index >= start_line:
126
+ line = line.strip()
127
+ word_split = line.split(" ")
128
+ word = word_split[0]
129
+
130
+ syllable_split = word_split[1].split(" - ")
131
+ g2p_dict[word] = []
132
+ for syllable in syllable_split:
133
+ phone_split = syllable.split(" ")
134
+ g2p_dict[word].append(phone_split)
135
+
136
+ line_index = line_index + 1
137
+ line = f.readline()
138
+
139
+ return g2p_dict
140
+
141
+
142
+ def cache_dict(g2p_dict, file_path):
143
+ with open(file_path, "wb") as pickle_file:
144
+ pickle.dump(g2p_dict, pickle_file)
145
+
146
+
147
+ def get_dict():
148
+ if os.path.exists(CACHE_PATH):
149
+ with open(CACHE_PATH, "rb") as pickle_file:
150
+ g2p_dict = pickle.load(pickle_file)
151
+ else:
152
+ g2p_dict = read_dict()
153
+ cache_dict(g2p_dict, CACHE_PATH)
154
+
155
+ return g2p_dict
156
+
157
+
158
+ eng_dict = get_dict()
159
+
160
+
161
+ def refine_ph(phn):
162
+ tone = 0
163
+ if re.search(r"\d$", phn):
164
+ tone = int(phn[-1]) + 1
165
+ phn = phn[:-1]
166
+ return phn.lower(), tone
167
+
168
+
169
+ def refine_syllables(syllables):
170
+ tones = []
171
+ phonemes = []
172
+ for phn_list in syllables:
173
+ for i in range(len(phn_list)):
174
+ phn = phn_list[i]
175
+ phn, tone = refine_ph(phn)
176
+ phonemes.append(phn)
177
+ tones.append(tone)
178
+ return phonemes, tones
179
+
180
+
181
+ def text_normalize(text):
182
+ text = text.lower()
183
+ text = expand_time_english(text)
184
+ text = normalize_numbers(text)
185
+ text = expand_abbreviations(text)
186
+ return text
187
+
188
+ model_id = 'bert-base-uncased'
189
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
190
+ def g2p_old(text):
191
+ tokenized = tokenizer.tokenize(text)
192
+ # import pdb; pdb.set_trace()
193
+ phones = []
194
+ tones = []
195
+ words = re.split(r"([,;.\-\?\!\s+])", text)
196
+ for w in words:
197
+ if w.upper() in eng_dict:
198
+ phns, tns = refine_syllables(eng_dict[w.upper()])
199
+ phones += phns
200
+ tones += tns
201
+ else:
202
+ phone_list = list(filter(lambda p: p != " ", _g2p(w)))
203
+ for ph in phone_list:
204
+ if ph in arpa:
205
+ ph, tn = refine_ph(ph)
206
+ phones.append(ph)
207
+ tones.append(tn)
208
+ else:
209
+ phones.append(ph)
210
+ tones.append(0)
211
+ # todo: implement word2ph
212
+ word2ph = [1 for i in phones]
213
+
214
+ phones = [post_replace_ph(i) for i in phones]
215
+ return phones, tones, word2ph
216
+
217
+ def g2p(text, pad_start_end=True, tokenized=None):
218
+ if tokenized is None:
219
+ tokenized = tokenizer.tokenize(text)
220
+ # import pdb; pdb.set_trace()
221
+ phs = []
222
+ ph_groups = []
223
+ for t in tokenized:
224
+ if not t.startswith("#"):
225
+ ph_groups.append([t])
226
+ else:
227
+ ph_groups[-1].append(t.replace("#", ""))
228
+
229
+ phones = []
230
+ tones = []
231
+ word2ph = []
232
+ for group in ph_groups:
233
+ w = "".join(group)
234
+ phone_len = 0
235
+ word_len = len(group)
236
+ if w.upper() in eng_dict:
237
+ phns, tns = refine_syllables(eng_dict[w.upper()])
238
+ phones += phns
239
+ tones += tns
240
+ phone_len += len(phns)
241
+ else:
242
+ phone_list = list(filter(lambda p: p != " ", _g2p(w)))
243
+ for ph in phone_list:
244
+ if ph in arpa:
245
+ ph, tn = refine_ph(ph)
246
+ phones.append(ph)
247
+ tones.append(tn)
248
+ else:
249
+ phones.append(ph)
250
+ tones.append(0)
251
+ phone_len += 1
252
+ aaa = distribute_phone(phone_len, word_len)
253
+ word2ph += aaa
254
+ phones = [post_replace_ph(i) for i in phones]
255
+
256
+ if pad_start_end:
257
+ phones = ["_"] + phones + ["_"]
258
+ tones = [0] + tones + [0]
259
+ word2ph = [1] + word2ph + [1]
260
+ return phones, tones, word2ph
261
+
262
+ def get_bert_feature(text, word2ph, device=None):
263
+ from text import english_bert
264
+
265
+ return english_bert.get_bert_feature(text, word2ph, device=device)
266
+
267
+ if __name__ == "__main__":
268
+ # print(get_dict())
269
+ # print(eng_word_to_phoneme("hello"))
270
+ from text.english_bert import get_bert_feature
271
+ text = "In this paper, we propose 1 DSPGAN, a N-F-T GAN-based universal vocoder."
272
+ text = text_normalize(text)
273
+ phones, tones, word2ph = g2p(text)
274
+ import pdb; pdb.set_trace()
275
+ bert = get_bert_feature(text, word2ph)
276
+
277
+ print(phones, tones, word2ph, bert.shape)
278
+
279
+ # all_phones = set()
280
+ # for k, syllables in eng_dict.items():
281
+ # for group in syllables:
282
+ # for ph in group:
283
+ # all_phones.add(ph)
284
+ # print(all_phones)
@@ -0,0 +1,39 @@
1
+ import torch
2
+ from transformers import AutoTokenizer, AutoModelForMaskedLM
3
+ import sys
4
+
5
+ model_id = 'bert-base-uncased'
6
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
7
+ model = None
8
+
9
+ def get_bert_feature(text, word2ph, device=None):
10
+ global model
11
+ if (
12
+ sys.platform == "darwin"
13
+ and torch.backends.mps.is_available()
14
+ and device == "cpu"
15
+ ):
16
+ device = "mps"
17
+ if not device:
18
+ device = "cuda"
19
+ if model is None:
20
+ model = AutoModelForMaskedLM.from_pretrained(model_id).to(
21
+ device
22
+ )
23
+ with torch.no_grad():
24
+ inputs = tokenizer(text, return_tensors="pt")
25
+ for i in inputs:
26
+ inputs[i] = inputs[i].to(device)
27
+ res = model(**inputs, output_hidden_states=True)
28
+ res = torch.cat(res["hidden_states"][-3:-2], -1)[0].cpu()
29
+
30
+ assert inputs["input_ids"].shape[-1] == len(word2ph)
31
+ word2phone = word2ph
32
+ phone_level_feature = []
33
+ for i in range(len(word2phone)):
34
+ repeat_feature = res[i].repeat(word2phone[i], 1)
35
+ phone_level_feature.append(repeat_feature)
36
+
37
+ phone_level_feature = torch.cat(phone_level_feature, dim=0)
38
+
39
+ return phone_level_feature.T
@@ -0,0 +1,35 @@
1
+ import re
2
+
3
+ # List of (regular expression, replacement) pairs for abbreviations in english:
4
+ abbreviations_en = [
5
+ (re.compile("\\b%s\\." % x[0], re.IGNORECASE), x[1])
6
+ for x in [
7
+ ("mrs", "misess"),
8
+ ("mr", "mister"),
9
+ ("dr", "doctor"),
10
+ ("st", "saint"),
11
+ ("co", "company"),
12
+ ("jr", "junior"),
13
+ ("maj", "major"),
14
+ ("gen", "general"),
15
+ ("drs", "doctors"),
16
+ ("rev", "reverend"),
17
+ ("lt", "lieutenant"),
18
+ ("hon", "honorable"),
19
+ ("sgt", "sergeant"),
20
+ ("capt", "captain"),
21
+ ("esq", "esquire"),
22
+ ("ltd", "limited"),
23
+ ("col", "colonel"),
24
+ ("ft", "fort"),
25
+ ]
26
+ ]
27
+
28
+ def expand_abbreviations(text, lang="en"):
29
+ if lang == "en":
30
+ _abbreviations = abbreviations_en
31
+ else:
32
+ raise NotImplementedError()
33
+ for regex, replacement in _abbreviations:
34
+ text = re.sub(regex, replacement, text)
35
+ return text
@@ -0,0 +1,97 @@
1
+ """ from https://github.com/keithito/tacotron """
2
+
3
+ import re
4
+ from typing import Dict
5
+
6
+ import inflect
7
+
8
+ _inflect = inflect.engine()
9
+ _comma_number_re = re.compile(r"([0-9][0-9\,]+[0-9])")
10
+ _decimal_number_re = re.compile(r"([0-9]+\.[0-9]+)")
11
+ _currency_re = re.compile(r"(£|\$|¥)([0-9\,\.]*[0-9]+)")
12
+ _ordinal_re = re.compile(r"[0-9]+(st|nd|rd|th)")
13
+ _number_re = re.compile(r"-?[0-9]+")
14
+
15
+
16
+ def _remove_commas(m):
17
+ return m.group(1).replace(",", "")
18
+
19
+
20
+ def _expand_decimal_point(m):
21
+ return m.group(1).replace(".", " point ")
22
+
23
+
24
+ def __expand_currency(value: str, inflection: Dict[float, str]) -> str:
25
+ parts = value.replace(",", "").split(".")
26
+ if len(parts) > 2:
27
+ return f"{value} {inflection[2]}" # Unexpected format
28
+ text = []
29
+ integer = int(parts[0]) if parts[0] else 0
30
+ if integer > 0:
31
+ integer_unit = inflection.get(integer, inflection[2])
32
+ text.append(f"{integer} {integer_unit}")
33
+ fraction = int(parts[1]) if len(parts) > 1 and parts[1] else 0
34
+ if fraction > 0:
35
+ fraction_unit = inflection.get(fraction / 100, inflection[0.02])
36
+ text.append(f"{fraction} {fraction_unit}")
37
+ if len(text) == 0:
38
+ return f"zero {inflection[2]}"
39
+ return " ".join(text)
40
+
41
+
42
+ def _expand_currency(m: "re.Match") -> str:
43
+ currencies = {
44
+ "$": {
45
+ 0.01: "cent",
46
+ 0.02: "cents",
47
+ 1: "dollar",
48
+ 2: "dollars",
49
+ },
50
+ "€": {
51
+ 0.01: "cent",
52
+ 0.02: "cents",
53
+ 1: "euro",
54
+ 2: "euros",
55
+ },
56
+ "£": {
57
+ 0.01: "penny",
58
+ 0.02: "pence",
59
+ 1: "pound sterling",
60
+ 2: "pounds sterling",
61
+ },
62
+ "¥": {
63
+ # TODO rin
64
+ 0.02: "sen",
65
+ 2: "yen",
66
+ },
67
+ }
68
+ unit = m.group(1)
69
+ currency = currencies[unit]
70
+ value = m.group(2)
71
+ return __expand_currency(value, currency)
72
+
73
+
74
+ def _expand_ordinal(m):
75
+ return _inflect.number_to_words(m.group(0))
76
+
77
+
78
+ def _expand_number(m):
79
+ num = int(m.group(0))
80
+ if 1000 < num < 3000:
81
+ if num == 2000:
82
+ return "two thousand"
83
+ if 2000 < num < 2010:
84
+ return "two thousand " + _inflect.number_to_words(num % 100)
85
+ if num % 100 == 0:
86
+ return _inflect.number_to_words(num // 100) + " hundred"
87
+ return _inflect.number_to_words(num, andword="", zero="oh", group=2).replace(", ", " ")
88
+ return _inflect.number_to_words(num, andword="")
89
+
90
+
91
+ def normalize_numbers(text):
92
+ text = re.sub(_comma_number_re, _remove_commas, text)
93
+ text = re.sub(_currency_re, _expand_currency, text)
94
+ text = re.sub(_decimal_number_re, _expand_decimal_point, text)
95
+ text = re.sub(_ordinal_re, _expand_ordinal, text)
96
+ text = re.sub(_number_re, _expand_number, text)
97
+ return text
@@ -0,0 +1,47 @@
1
+ import re
2
+
3
+ import inflect
4
+
5
+ _inflect = inflect.engine()
6
+
7
+ _time_re = re.compile(
8
+ r"""\b
9
+ ((0?[0-9])|(1[0-1])|(1[2-9])|(2[0-3])) # hours
10
+ :
11
+ ([0-5][0-9]) # minutes
12
+ \s*(a\\.m\\.|am|pm|p\\.m\\.|a\\.m|p\\.m)? # am/pm
13
+ \b""",
14
+ re.IGNORECASE | re.X,
15
+ )
16
+
17
+
18
+ def _expand_num(n: int) -> str:
19
+ return _inflect.number_to_words(n)
20
+
21
+
22
+ def _expand_time_english(match: "re.Match") -> str:
23
+ hour = int(match.group(1))
24
+ past_noon = hour >= 12
25
+ time = []
26
+ if hour > 12:
27
+ hour -= 12
28
+ elif hour == 0:
29
+ hour = 12
30
+ past_noon = True
31
+ time.append(_expand_num(hour))
32
+
33
+ minute = int(match.group(6))
34
+ if minute > 0:
35
+ if minute < 10:
36
+ time.append("oh")
37
+ time.append(_expand_num(minute))
38
+ am_pm = match.group(7)
39
+ if am_pm is None:
40
+ time.append("p m" if past_noon else "a m")
41
+ else:
42
+ time.extend(list(am_pm.replace(".", "")))
43
+ return " ".join(time)
44
+
45
+
46
+ def expand_time_english(text: str) -> str:
47
+ return re.sub(_time_re, _expand_time_english, text)
@@ -0,0 +1,140 @@
1
+ import abc
2
+ from typing import List, Tuple
3
+
4
+ from .punctuation import Punctuation
5
+
6
+
7
+ class BasePhonemizer(abc.ABC):
8
+ """Base phonemizer class
9
+
10
+ Phonemization follows the following steps:
11
+ 1. Preprocessing:
12
+ - remove empty lines
13
+ - remove punctuation
14
+ - keep track of punctuation marks
15
+
16
+ 2. Phonemization:
17
+ - convert text to phonemes
18
+
19
+ 3. Postprocessing:
20
+ - join phonemes
21
+ - restore punctuation marks
22
+
23
+ Args:
24
+ language (str):
25
+ Language used by the phonemizer.
26
+
27
+ punctuations (List[str]):
28
+ List of punctuation marks to be preserved.
29
+
30
+ keep_puncs (bool):
31
+ Whether to preserve punctuation marks or not.
32
+ """
33
+
34
+ def __init__(self, language, punctuations=Punctuation.default_puncs(), keep_puncs=False):
35
+ # ensure the backend is installed on the system
36
+ if not self.is_available():
37
+ raise RuntimeError("{} not installed on your system".format(self.name())) # pragma: nocover
38
+
39
+ # ensure the backend support the requested language
40
+ self._language = self._init_language(language)
41
+
42
+ # setup punctuation processing
43
+ self._keep_puncs = keep_puncs
44
+ self._punctuator = Punctuation(punctuations)
45
+
46
+ def _init_language(self, language):
47
+ """Language initialization
48
+
49
+ This method may be overloaded in child classes (see Segments backend)
50
+
51
+ """
52
+ if not self.is_supported_language(language):
53
+ raise RuntimeError(f'language "{language}" is not supported by the ' f"{self.name()} backend")
54
+ return language
55
+
56
+ @property
57
+ def language(self):
58
+ """The language code configured to be used for phonemization"""
59
+ return self._language
60
+
61
+ @staticmethod
62
+ @abc.abstractmethod
63
+ def name():
64
+ """The name of the backend"""
65
+ ...
66
+
67
+ @classmethod
68
+ @abc.abstractmethod
69
+ def is_available(cls):
70
+ """Returns True if the backend is installed, False otherwise"""
71
+ ...
72
+
73
+ @classmethod
74
+ @abc.abstractmethod
75
+ def version(cls):
76
+ """Return the backend version as a tuple (major, minor, patch)"""
77
+ ...
78
+
79
+ @staticmethod
80
+ @abc.abstractmethod
81
+ def supported_languages():
82
+ """Return a dict of language codes -> name supported by the backend"""
83
+ ...
84
+
85
+ def is_supported_language(self, language):
86
+ """Returns True if `language` is supported by the backend"""
87
+ return language in self.supported_languages()
88
+
89
+ @abc.abstractmethod
90
+ def _phonemize(self, text, separator):
91
+ """The main phonemization method"""
92
+
93
+ def _phonemize_preprocess(self, text) -> Tuple[List[str], List]:
94
+ """Preprocess the text before phonemization
95
+
96
+ 1. remove spaces
97
+ 2. remove punctuation
98
+
99
+ Override this if you need a different behaviour
100
+ """
101
+ text = text.strip()
102
+ if self._keep_puncs:
103
+ # a tuple (text, punctuation marks)
104
+ return self._punctuator.strip_to_restore(text)
105
+ return [self._punctuator.strip(text)], []
106
+
107
+ def _phonemize_postprocess(self, phonemized, punctuations) -> str:
108
+ """Postprocess the raw phonemized output
109
+
110
+ Override this if you need a different behaviour
111
+ """
112
+ if self._keep_puncs:
113
+ return self._punctuator.restore(phonemized, punctuations)[0]
114
+ return phonemized[0]
115
+
116
+ def phonemize(self, text: str, separator="|", language: str = None) -> str: # pylint: disable=unused-argument
117
+ """Returns the `text` phonemized for the given language
118
+
119
+ Args:
120
+ text (str):
121
+ Text to be phonemized.
122
+
123
+ separator (str):
124
+ string separator used between phonemes. Default to '_'.
125
+
126
+ Returns:
127
+ (str): Phonemized text
128
+ """
129
+ text, punctuations = self._phonemize_preprocess(text)
130
+ phonemized = []
131
+ for t in text:
132
+ p = self._phonemize(t, separator)
133
+ phonemized.append(p)
134
+ phonemized = self._phonemize_postprocess(phonemized, punctuations)
135
+ return phonemized
136
+
137
+ def print_logs(self, level: int = 0):
138
+ indent = "\t" * level
139
+ print(f"{indent}| > phoneme language: {self.language}")
140
+ print(f"{indent}| > phoneme backend: {self.name()}")