xinference 0.16.3__py3-none-any.whl → 1.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (373) hide show
  1. xinference/_compat.py +24 -2
  2. xinference/_version.py +3 -3
  3. xinference/api/restful_api.py +219 -77
  4. xinference/client/restful/restful_client.py +47 -2
  5. xinference/constants.py +1 -0
  6. xinference/core/chat_interface.py +6 -1
  7. xinference/core/model.py +124 -34
  8. xinference/core/supervisor.py +180 -12
  9. xinference/core/utils.py +73 -4
  10. xinference/core/worker.py +102 -4
  11. xinference/deploy/cmdline.py +3 -1
  12. xinference/deploy/test/test_cmdline.py +56 -0
  13. xinference/isolation.py +24 -0
  14. xinference/model/audio/__init__.py +12 -0
  15. xinference/model/audio/core.py +37 -4
  16. xinference/model/audio/cosyvoice.py +39 -6
  17. xinference/model/audio/f5tts.py +200 -0
  18. xinference/model/audio/f5tts_mlx.py +260 -0
  19. xinference/model/audio/fish_speech.py +70 -110
  20. xinference/model/audio/melotts.py +110 -0
  21. xinference/model/audio/model_spec.json +179 -3
  22. xinference/model/audio/model_spec_modelscope.json +27 -0
  23. xinference/model/audio/utils.py +32 -0
  24. xinference/model/audio/whisper.py +35 -10
  25. xinference/model/audio/whisper_mlx.py +208 -0
  26. xinference/model/embedding/core.py +322 -6
  27. xinference/model/embedding/model_spec.json +8 -1
  28. xinference/model/embedding/model_spec_modelscope.json +9 -1
  29. xinference/model/image/core.py +69 -1
  30. xinference/model/image/model_spec.json +145 -4
  31. xinference/model/image/model_spec_modelscope.json +150 -4
  32. xinference/model/image/stable_diffusion/core.py +50 -15
  33. xinference/model/llm/__init__.py +6 -2
  34. xinference/model/llm/llm_family.json +1055 -93
  35. xinference/model/llm/llm_family.py +15 -36
  36. xinference/model/llm/llm_family_modelscope.json +1031 -78
  37. xinference/model/llm/memory.py +1 -1
  38. xinference/model/llm/mlx/core.py +285 -47
  39. xinference/model/llm/sglang/core.py +2 -0
  40. xinference/model/llm/transformers/chatglm.py +9 -5
  41. xinference/model/llm/transformers/cogagent.py +272 -0
  42. xinference/model/llm/transformers/core.py +3 -0
  43. xinference/model/llm/transformers/glm_edge_v.py +230 -0
  44. xinference/model/llm/transformers/qwen2_vl.py +12 -1
  45. xinference/model/llm/transformers/utils.py +16 -8
  46. xinference/model/llm/utils.py +55 -4
  47. xinference/model/llm/vllm/core.py +137 -12
  48. xinference/model/llm/vllm/xavier/__init__.py +13 -0
  49. xinference/model/llm/vllm/xavier/allocator.py +74 -0
  50. xinference/model/llm/vllm/xavier/block.py +111 -0
  51. xinference/model/llm/vllm/xavier/block_manager.py +71 -0
  52. xinference/model/llm/vllm/xavier/block_tracker.py +129 -0
  53. xinference/model/llm/vllm/xavier/collective.py +74 -0
  54. xinference/model/llm/vllm/xavier/collective_manager.py +147 -0
  55. xinference/model/llm/vllm/xavier/engine.py +247 -0
  56. xinference/model/llm/vllm/xavier/executor.py +134 -0
  57. xinference/model/llm/vllm/xavier/scheduler.py +438 -0
  58. xinference/model/llm/vllm/xavier/test/__init__.py +13 -0
  59. xinference/model/llm/vllm/xavier/test/test_xavier.py +147 -0
  60. xinference/model/llm/vllm/xavier/transfer.py +319 -0
  61. xinference/model/rerank/core.py +11 -4
  62. xinference/model/video/diffusers.py +14 -0
  63. xinference/model/video/model_spec.json +15 -0
  64. xinference/model/video/model_spec_modelscope.json +16 -0
  65. xinference/thirdparty/cosyvoice/bin/average_model.py +92 -0
  66. xinference/thirdparty/cosyvoice/bin/export_jit.py +12 -2
  67. xinference/thirdparty/cosyvoice/bin/export_onnx.py +112 -0
  68. xinference/thirdparty/cosyvoice/bin/export_trt.sh +9 -0
  69. xinference/thirdparty/cosyvoice/bin/inference.py +5 -7
  70. xinference/thirdparty/cosyvoice/bin/spk2info.pt +0 -0
  71. xinference/thirdparty/cosyvoice/bin/train.py +42 -8
  72. xinference/thirdparty/cosyvoice/cli/cosyvoice.py +96 -25
  73. xinference/thirdparty/cosyvoice/cli/frontend.py +77 -30
  74. xinference/thirdparty/cosyvoice/cli/model.py +330 -80
  75. xinference/thirdparty/cosyvoice/dataset/dataset.py +6 -2
  76. xinference/thirdparty/cosyvoice/dataset/processor.py +76 -14
  77. xinference/thirdparty/cosyvoice/flow/decoder.py +92 -13
  78. xinference/thirdparty/cosyvoice/flow/flow.py +99 -9
  79. xinference/thirdparty/cosyvoice/flow/flow_matching.py +110 -13
  80. xinference/thirdparty/cosyvoice/flow/length_regulator.py +5 -4
  81. xinference/thirdparty/cosyvoice/hifigan/discriminator.py +140 -0
  82. xinference/thirdparty/cosyvoice/hifigan/generator.py +58 -42
  83. xinference/thirdparty/cosyvoice/hifigan/hifigan.py +67 -0
  84. xinference/thirdparty/cosyvoice/llm/llm.py +139 -6
  85. xinference/thirdparty/cosyvoice/tokenizer/assets/multilingual_zh_ja_yue_char_del.tiktoken +58836 -0
  86. xinference/thirdparty/cosyvoice/tokenizer/tokenizer.py +279 -0
  87. xinference/thirdparty/cosyvoice/transformer/embedding.py +2 -2
  88. xinference/thirdparty/cosyvoice/transformer/encoder_layer.py +7 -7
  89. xinference/thirdparty/cosyvoice/transformer/upsample_encoder.py +318 -0
  90. xinference/thirdparty/cosyvoice/utils/common.py +28 -1
  91. xinference/thirdparty/cosyvoice/utils/executor.py +69 -7
  92. xinference/thirdparty/cosyvoice/utils/file_utils.py +2 -12
  93. xinference/thirdparty/cosyvoice/utils/frontend_utils.py +9 -5
  94. xinference/thirdparty/cosyvoice/utils/losses.py +20 -0
  95. xinference/thirdparty/cosyvoice/utils/scheduler.py +1 -2
  96. xinference/thirdparty/cosyvoice/utils/train_utils.py +101 -45
  97. xinference/thirdparty/f5_tts/api.py +166 -0
  98. xinference/thirdparty/f5_tts/configs/E2TTS_Base_train.yaml +44 -0
  99. xinference/thirdparty/f5_tts/configs/E2TTS_Small_train.yaml +44 -0
  100. xinference/thirdparty/f5_tts/configs/F5TTS_Base_train.yaml +46 -0
  101. xinference/thirdparty/f5_tts/configs/F5TTS_Small_train.yaml +46 -0
  102. xinference/thirdparty/f5_tts/eval/README.md +49 -0
  103. xinference/thirdparty/f5_tts/eval/ecapa_tdnn.py +330 -0
  104. xinference/thirdparty/f5_tts/eval/eval_infer_batch.py +207 -0
  105. xinference/thirdparty/f5_tts/eval/eval_infer_batch.sh +13 -0
  106. xinference/thirdparty/f5_tts/eval/eval_librispeech_test_clean.py +84 -0
  107. xinference/thirdparty/f5_tts/eval/eval_seedtts_testset.py +84 -0
  108. xinference/thirdparty/f5_tts/eval/utils_eval.py +405 -0
  109. xinference/thirdparty/f5_tts/infer/README.md +191 -0
  110. xinference/thirdparty/f5_tts/infer/SHARED.md +74 -0
  111. xinference/thirdparty/f5_tts/infer/examples/basic/basic.toml +11 -0
  112. xinference/thirdparty/f5_tts/infer/examples/basic/basic_ref_en.wav +0 -0
  113. xinference/thirdparty/f5_tts/infer/examples/basic/basic_ref_zh.wav +0 -0
  114. xinference/thirdparty/f5_tts/infer/examples/multi/country.flac +0 -0
  115. xinference/thirdparty/f5_tts/infer/examples/multi/main.flac +0 -0
  116. xinference/thirdparty/f5_tts/infer/examples/multi/story.toml +19 -0
  117. xinference/thirdparty/f5_tts/infer/examples/multi/story.txt +1 -0
  118. xinference/thirdparty/f5_tts/infer/examples/multi/town.flac +0 -0
  119. xinference/thirdparty/f5_tts/infer/examples/vocab.txt +2545 -0
  120. xinference/thirdparty/f5_tts/infer/infer_cli.py +226 -0
  121. xinference/thirdparty/f5_tts/infer/infer_gradio.py +851 -0
  122. xinference/thirdparty/f5_tts/infer/speech_edit.py +193 -0
  123. xinference/thirdparty/f5_tts/infer/utils_infer.py +538 -0
  124. xinference/thirdparty/f5_tts/model/__init__.py +10 -0
  125. xinference/thirdparty/f5_tts/model/backbones/README.md +20 -0
  126. xinference/thirdparty/f5_tts/model/backbones/dit.py +163 -0
  127. xinference/thirdparty/f5_tts/model/backbones/mmdit.py +146 -0
  128. xinference/thirdparty/f5_tts/model/backbones/unett.py +219 -0
  129. xinference/thirdparty/f5_tts/model/cfm.py +285 -0
  130. xinference/thirdparty/f5_tts/model/dataset.py +319 -0
  131. xinference/thirdparty/f5_tts/model/modules.py +658 -0
  132. xinference/thirdparty/f5_tts/model/trainer.py +366 -0
  133. xinference/thirdparty/f5_tts/model/utils.py +185 -0
  134. xinference/thirdparty/f5_tts/scripts/count_max_epoch.py +33 -0
  135. xinference/thirdparty/f5_tts/scripts/count_params_gflops.py +39 -0
  136. xinference/thirdparty/f5_tts/socket_server.py +159 -0
  137. xinference/thirdparty/f5_tts/train/README.md +77 -0
  138. xinference/thirdparty/f5_tts/train/datasets/prepare_csv_wavs.py +139 -0
  139. xinference/thirdparty/f5_tts/train/datasets/prepare_emilia.py +230 -0
  140. xinference/thirdparty/f5_tts/train/datasets/prepare_libritts.py +92 -0
  141. xinference/thirdparty/f5_tts/train/datasets/prepare_ljspeech.py +65 -0
  142. xinference/thirdparty/f5_tts/train/datasets/prepare_wenetspeech4tts.py +125 -0
  143. xinference/thirdparty/f5_tts/train/finetune_cli.py +174 -0
  144. xinference/thirdparty/f5_tts/train/finetune_gradio.py +1846 -0
  145. xinference/thirdparty/f5_tts/train/train.py +75 -0
  146. xinference/thirdparty/fish_speech/fish_speech/conversation.py +266 -1
  147. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/en_US.json +2 -1
  148. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/es_ES.json +2 -1
  149. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/ja_JP.json +2 -2
  150. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/ko_KR.json +123 -0
  151. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/zh_CN.json +2 -1
  152. xinference/thirdparty/fish_speech/fish_speech/models/text2semantic/llama.py +137 -29
  153. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/firefly.py +9 -9
  154. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/fsq.py +1 -1
  155. xinference/thirdparty/fish_speech/fish_speech/text/clean.py +17 -11
  156. xinference/thirdparty/fish_speech/fish_speech/text/spliter.py +1 -1
  157. xinference/thirdparty/fish_speech/fish_speech/tokenizer.py +152 -0
  158. xinference/thirdparty/fish_speech/fish_speech/train.py +2 -2
  159. xinference/thirdparty/fish_speech/fish_speech/utils/__init__.py +2 -1
  160. xinference/thirdparty/fish_speech/fish_speech/utils/utils.py +22 -0
  161. xinference/thirdparty/fish_speech/fish_speech/webui/launch_utils.py +1 -1
  162. xinference/thirdparty/fish_speech/fish_speech/webui/manage.py +2 -2
  163. xinference/thirdparty/fish_speech/tools/{post_api.py → api_client.py} +34 -18
  164. xinference/thirdparty/fish_speech/tools/api_server.py +98 -0
  165. xinference/thirdparty/fish_speech/tools/download_models.py +5 -5
  166. xinference/thirdparty/fish_speech/tools/e2e_webui.py +232 -0
  167. xinference/thirdparty/fish_speech/tools/fish_e2e.py +298 -0
  168. xinference/thirdparty/fish_speech/tools/inference_engine/__init__.py +192 -0
  169. xinference/thirdparty/fish_speech/tools/inference_engine/reference_loader.py +125 -0
  170. xinference/thirdparty/fish_speech/tools/inference_engine/utils.py +39 -0
  171. xinference/thirdparty/fish_speech/tools/inference_engine/vq_manager.py +57 -0
  172. xinference/thirdparty/fish_speech/tools/llama/eval_in_context.py +2 -2
  173. xinference/thirdparty/fish_speech/tools/llama/generate.py +484 -72
  174. xinference/thirdparty/fish_speech/tools/run_webui.py +104 -0
  175. xinference/thirdparty/fish_speech/tools/schema.py +170 -0
  176. xinference/thirdparty/fish_speech/tools/server/agent/__init__.py +57 -0
  177. xinference/thirdparty/fish_speech/tools/server/agent/generate.py +119 -0
  178. xinference/thirdparty/fish_speech/tools/server/agent/generation_utils.py +122 -0
  179. xinference/thirdparty/fish_speech/tools/server/agent/pre_generation_utils.py +72 -0
  180. xinference/thirdparty/fish_speech/tools/server/api_utils.py +75 -0
  181. xinference/thirdparty/fish_speech/tools/server/exception_handler.py +27 -0
  182. xinference/thirdparty/fish_speech/tools/server/inference.py +45 -0
  183. xinference/thirdparty/fish_speech/tools/server/model_manager.py +122 -0
  184. xinference/thirdparty/fish_speech/tools/server/model_utils.py +129 -0
  185. xinference/thirdparty/fish_speech/tools/server/views.py +246 -0
  186. xinference/thirdparty/fish_speech/tools/vqgan/extract_vq.py +7 -1
  187. xinference/thirdparty/fish_speech/tools/vqgan/inference.py +2 -3
  188. xinference/thirdparty/fish_speech/tools/webui/__init__.py +173 -0
  189. xinference/thirdparty/fish_speech/tools/webui/inference.py +91 -0
  190. xinference/thirdparty/fish_speech/tools/webui/variables.py +14 -0
  191. xinference/thirdparty/matcha/utils/utils.py +2 -2
  192. xinference/thirdparty/melo/api.py +135 -0
  193. xinference/thirdparty/melo/app.py +61 -0
  194. xinference/thirdparty/melo/attentions.py +459 -0
  195. xinference/thirdparty/melo/commons.py +160 -0
  196. xinference/thirdparty/melo/configs/config.json +94 -0
  197. xinference/thirdparty/melo/data/example/metadata.list +20 -0
  198. xinference/thirdparty/melo/data_utils.py +413 -0
  199. xinference/thirdparty/melo/download_utils.py +67 -0
  200. xinference/thirdparty/melo/infer.py +25 -0
  201. xinference/thirdparty/melo/init_downloads.py +14 -0
  202. xinference/thirdparty/melo/losses.py +58 -0
  203. xinference/thirdparty/melo/main.py +36 -0
  204. xinference/thirdparty/melo/mel_processing.py +174 -0
  205. xinference/thirdparty/melo/models.py +1030 -0
  206. xinference/thirdparty/melo/modules.py +598 -0
  207. xinference/thirdparty/melo/monotonic_align/__init__.py +16 -0
  208. xinference/thirdparty/melo/monotonic_align/core.py +46 -0
  209. xinference/thirdparty/melo/preprocess_text.py +135 -0
  210. xinference/thirdparty/melo/split_utils.py +174 -0
  211. xinference/thirdparty/melo/text/__init__.py +35 -0
  212. xinference/thirdparty/melo/text/chinese.py +199 -0
  213. xinference/thirdparty/melo/text/chinese_bert.py +107 -0
  214. xinference/thirdparty/melo/text/chinese_mix.py +253 -0
  215. xinference/thirdparty/melo/text/cleaner.py +36 -0
  216. xinference/thirdparty/melo/text/cleaner_multiling.py +110 -0
  217. xinference/thirdparty/melo/text/cmudict.rep +129530 -0
  218. xinference/thirdparty/melo/text/cmudict_cache.pickle +0 -0
  219. xinference/thirdparty/melo/text/english.py +284 -0
  220. xinference/thirdparty/melo/text/english_bert.py +39 -0
  221. xinference/thirdparty/melo/text/english_utils/abbreviations.py +35 -0
  222. xinference/thirdparty/melo/text/english_utils/number_norm.py +97 -0
  223. xinference/thirdparty/melo/text/english_utils/time_norm.py +47 -0
  224. xinference/thirdparty/melo/text/es_phonemizer/base.py +140 -0
  225. xinference/thirdparty/melo/text/es_phonemizer/cleaner.py +109 -0
  226. xinference/thirdparty/melo/text/es_phonemizer/es_symbols.json +79 -0
  227. xinference/thirdparty/melo/text/es_phonemizer/es_symbols.txt +1 -0
  228. xinference/thirdparty/melo/text/es_phonemizer/es_symbols_v2.json +83 -0
  229. xinference/thirdparty/melo/text/es_phonemizer/es_to_ipa.py +12 -0
  230. xinference/thirdparty/melo/text/es_phonemizer/example_ipa.txt +400 -0
  231. xinference/thirdparty/melo/text/es_phonemizer/gruut_wrapper.py +253 -0
  232. xinference/thirdparty/melo/text/es_phonemizer/punctuation.py +174 -0
  233. xinference/thirdparty/melo/text/es_phonemizer/spanish_symbols.txt +1 -0
  234. xinference/thirdparty/melo/text/es_phonemizer/test.ipynb +124 -0
  235. xinference/thirdparty/melo/text/fr_phonemizer/base.py +140 -0
  236. xinference/thirdparty/melo/text/fr_phonemizer/cleaner.py +122 -0
  237. xinference/thirdparty/melo/text/fr_phonemizer/en_symbols.json +78 -0
  238. xinference/thirdparty/melo/text/fr_phonemizer/example_ipa.txt +1 -0
  239. xinference/thirdparty/melo/text/fr_phonemizer/fr_symbols.json +89 -0
  240. xinference/thirdparty/melo/text/fr_phonemizer/fr_to_ipa.py +30 -0
  241. xinference/thirdparty/melo/text/fr_phonemizer/french_abbreviations.py +48 -0
  242. xinference/thirdparty/melo/text/fr_phonemizer/french_symbols.txt +1 -0
  243. xinference/thirdparty/melo/text/fr_phonemizer/gruut_wrapper.py +258 -0
  244. xinference/thirdparty/melo/text/fr_phonemizer/punctuation.py +172 -0
  245. xinference/thirdparty/melo/text/french.py +94 -0
  246. xinference/thirdparty/melo/text/french_bert.py +39 -0
  247. xinference/thirdparty/melo/text/japanese.py +647 -0
  248. xinference/thirdparty/melo/text/japanese_bert.py +49 -0
  249. xinference/thirdparty/melo/text/ko_dictionary.py +44 -0
  250. xinference/thirdparty/melo/text/korean.py +192 -0
  251. xinference/thirdparty/melo/text/opencpop-strict.txt +429 -0
  252. xinference/thirdparty/melo/text/spanish.py +122 -0
  253. xinference/thirdparty/melo/text/spanish_bert.py +39 -0
  254. xinference/thirdparty/melo/text/symbols.py +290 -0
  255. xinference/thirdparty/melo/text/tone_sandhi.py +769 -0
  256. xinference/thirdparty/melo/train.py +635 -0
  257. xinference/thirdparty/melo/train.sh +19 -0
  258. xinference/thirdparty/melo/transforms.py +209 -0
  259. xinference/thirdparty/melo/utils.py +424 -0
  260. xinference/types.py +17 -1
  261. xinference/web/ui/build/asset-manifest.json +6 -6
  262. xinference/web/ui/build/index.html +1 -1
  263. xinference/web/ui/build/static/css/main.51a587ff.css +2 -0
  264. xinference/web/ui/build/static/css/main.51a587ff.css.map +1 -0
  265. xinference/web/ui/build/static/js/main.b0936c54.js +3 -0
  266. xinference/web/ui/build/static/js/main.b0936c54.js.map +1 -0
  267. xinference/web/ui/node_modules/.cache/babel-loader/03c4052f1b91f6ba0c5389bdcf49c43319b4076c08e4b8585dab312538ae290a.json +1 -0
  268. xinference/web/ui/node_modules/.cache/babel-loader/1786b83003b8e9605a0f5f855a185d4d16e38fc893dfb326a2a9cca206b4240a.json +1 -0
  269. xinference/web/ui/node_modules/.cache/babel-loader/17cbc181dd674b9150b80c73ed6a82656de0082d857f6e5f66d9716129ac0b38.json +1 -0
  270. xinference/web/ui/node_modules/.cache/babel-loader/185ceb8872d562e032b47e79df6a45670e06345b8ed70aad1a131e0476783c5c.json +1 -0
  271. xinference/web/ui/node_modules/.cache/babel-loader/26b8c9f34b0bed789b3a833767672e39302d1e0c09b4276f4d58d1df7b6bd93b.json +1 -0
  272. xinference/web/ui/node_modules/.cache/babel-loader/2b484da66c724d0d56a40849c109327408796a668b1381511b6e9e03baa48658.json +1 -0
  273. xinference/web/ui/node_modules/.cache/babel-loader/2cbbbce9b84df73330d4c42b82436ed881b3847628f2fbc346aa62e2859fd88c.json +1 -0
  274. xinference/web/ui/node_modules/.cache/babel-loader/2ec9b14431ed33ce6901bf9f27007be4e6e472709c99d6e22b50ce528e4b78ee.json +1 -0
  275. xinference/web/ui/node_modules/.cache/babel-loader/3b966db018f96be4a055d6ca205f0990d4d0b370e2980c17d8bca2c9a021819c.json +1 -0
  276. xinference/web/ui/node_modules/.cache/babel-loader/3eefb411b24c2b3ce053570ef50daccf154022f0e168be5ed0fec21394baf9f4.json +1 -0
  277. xinference/web/ui/node_modules/.cache/babel-loader/522b229e3cac219123f0d69673f5570e191c2d2a505dc65b312d336eae2279c0.json +1 -0
  278. xinference/web/ui/node_modules/.cache/babel-loader/52e45f17ba300580ea3fcc9f9228ccba194bb092b76f25e9255af311f8b05aab.json +1 -0
  279. xinference/web/ui/node_modules/.cache/babel-loader/5a0bc4631f936459afc1a3b1d3ec2420118b1f00e11f60ccac3e08088f3f27a8.json +1 -0
  280. xinference/web/ui/node_modules/.cache/babel-loader/611fa2c6c53b66039991d06dfb0473b5ab37fc63b4564e0f6e1718523768a045.json +1 -0
  281. xinference/web/ui/node_modules/.cache/babel-loader/6329bc76c406fe5eb305412383fbde5950f847bb5e43261f73f37622c365acb4.json +1 -0
  282. xinference/web/ui/node_modules/.cache/babel-loader/63c8e07687ea53a4f8a910ee5e42e0eb26cd1acbfbe820f3e3248a786ee51401.json +1 -0
  283. xinference/web/ui/node_modules/.cache/babel-loader/69b2d5001684174ec9da57e07914eed3eac4960018bceb6cbfa801d861301d7c.json +1 -0
  284. xinference/web/ui/node_modules/.cache/babel-loader/710c1acda69e561e30a933b98c6a56d50197868b15c21e2aad55ab6d46649eb6.json +1 -0
  285. xinference/web/ui/node_modules/.cache/babel-loader/720deca1fce5a1dc5056048fa8258fd138a82ea855f350b6613f104a73fb761f.json +1 -0
  286. xinference/web/ui/node_modules/.cache/babel-loader/76a23b92d26a499c57e61eea2b895fbc9771bd0849a72e66f8e633192017978b.json +1 -0
  287. xinference/web/ui/node_modules/.cache/babel-loader/858063f23b34dfe600254eb5afd85518b0002ec4b30b7386616c45600826e3b2.json +1 -0
  288. xinference/web/ui/node_modules/.cache/babel-loader/920b82c1c89124cf217109eeedbfcd3aae3b917be50c9dfb6bbb4ce26bdfd2e7.json +1 -0
  289. xinference/web/ui/node_modules/.cache/babel-loader/94d8b7aeb0076f2ce07db598cea0e87b13bc8d5614eb530b8d6e696c2daf6f88.json +1 -0
  290. xinference/web/ui/node_modules/.cache/babel-loader/9e917fe7022d01b2ccbe5cc0ce73d70bb72bee584ff293bad71bdff6695dee28.json +1 -0
  291. xinference/web/ui/node_modules/.cache/babel-loader/9f28fdb8399f1d0474f0aca86f1658dc94f5bf0c90f6146352de150692de8862.json +1 -0
  292. xinference/web/ui/node_modules/.cache/babel-loader/a0dfafa06b2bb7cba8cad41c482503f61944f759f4318139362602ef5cc47ccb.json +1 -0
  293. xinference/web/ui/node_modules/.cache/babel-loader/a3ff866acddf34917a7ee399e0e571a4dfd8ba66d5057db885f243e16a6eb17d.json +1 -0
  294. xinference/web/ui/node_modules/.cache/babel-loader/afb8084f539534cd594755ea2205ecd5bd1f62dddcfdf75a2eace59a28131278.json +1 -0
  295. xinference/web/ui/node_modules/.cache/babel-loader/b57b1438b77294c1f3f6cfce12ac487d8106c6f016975ba0aec94d98997e2e1e.json +1 -0
  296. xinference/web/ui/node_modules/.cache/babel-loader/b9917b0bf8e4d55ccbac1c334aa04d6ff3c5b6ed9e5d38b9ea2c687fa7d3f5a9.json +1 -0
  297. xinference/web/ui/node_modules/.cache/babel-loader/bbcc94b0149963d1d6f267ee1f4f03d3925b758392ce2f516c3fe8af0e0169fc.json +1 -0
  298. xinference/web/ui/node_modules/.cache/babel-loader/bdee44abeadc4abc17d41c52eb49c6e19a4b1a267b6e16876ce91bdeeebfc52d.json +1 -0
  299. xinference/web/ui/node_modules/.cache/babel-loader/beb112b70f4a56db95920a9e20efb6c97c37b68450716730217a9ee1a9ae92be.json +1 -0
  300. xinference/web/ui/node_modules/.cache/babel-loader/c88db97be0cdf440193b3995996e83510a04cb00048135485fc0e26d197e80b5.json +1 -0
  301. xinference/web/ui/node_modules/.cache/babel-loader/d49e5314d34310a62d01a03067ce1bec5da00abce84c5196aa9c6842fa79a430.json +1 -0
  302. xinference/web/ui/node_modules/.cache/babel-loader/d7664d18c4ddbad9c3a6a31b91f7c00fb0dde804608674a9860ee50f33e54708.json +1 -0
  303. xinference/web/ui/node_modules/.cache/babel-loader/d9072c318b819b7c90a0f7e9cc0b6413b4dbeb8e9859898e53d75ea882fcde99.json +1 -0
  304. xinference/web/ui/node_modules/.cache/babel-loader/db16a983bc08a05f0439cc61ca0840e49e1d8400eef678909f16c032a418a3d6.json +1 -0
  305. xinference/web/ui/node_modules/.cache/babel-loader/dc249829767b8abcbc3677e0b07b6d3ecbfdfe6d08cfe23a665eb33373a9aa9d.json +1 -0
  306. xinference/web/ui/node_modules/.cache/babel-loader/e242c583c2dbc2784f0fcf513523975f7d5df447e106c1c17e49e8578a6fc3ed.json +1 -0
  307. xinference/web/ui/node_modules/.cache/babel-loader/eac5f1296513e69e4b96f750ddccd4d0264e2bae4e4c449144e83274a48698d9.json +1 -0
  308. xinference/web/ui/node_modules/.cache/babel-loader/ed57202cb79649bb716400436590245547df241988fc7c8e1d85d132299542d2.json +1 -0
  309. xinference/web/ui/node_modules/.cache/babel-loader/f125bf72e773a14cdaebd0c343e80adb909d12e317ee5c00cd4a57442fbe2c62.json +1 -0
  310. xinference/web/ui/node_modules/.cache/babel-loader/f91af913d7f91c410719ab13136aaed3aaf0f8dda06652f25c42cb5231587398.json +1 -0
  311. xinference/web/ui/node_modules/.package-lock.json +67 -3
  312. xinference/web/ui/node_modules/@babel/runtime/package.json +592 -538
  313. xinference/web/ui/node_modules/html-parse-stringify/package.json +50 -0
  314. xinference/web/ui/node_modules/i18next/dist/esm/package.json +1 -0
  315. xinference/web/ui/node_modules/i18next/package.json +129 -0
  316. xinference/web/ui/node_modules/react-i18next/.eslintrc.json +74 -0
  317. xinference/web/ui/node_modules/react-i18next/dist/es/package.json +1 -0
  318. xinference/web/ui/node_modules/react-i18next/package.json +162 -0
  319. xinference/web/ui/node_modules/void-elements/package.json +34 -0
  320. xinference/web/ui/package-lock.json +69 -3
  321. xinference/web/ui/package.json +2 -0
  322. xinference/web/ui/src/locales/en.json +186 -0
  323. xinference/web/ui/src/locales/zh.json +186 -0
  324. {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/METADATA +96 -36
  325. {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/RECORD +335 -146
  326. {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/WHEEL +1 -1
  327. xinference/thirdparty/cosyvoice/bin/export_trt.py +0 -8
  328. xinference/thirdparty/fish_speech/fish_speech/configs/lora/__init__.py +0 -0
  329. xinference/thirdparty/fish_speech/fish_speech/datasets/__init__.py +0 -0
  330. xinference/thirdparty/fish_speech/fish_speech/datasets/protos/__init__.py +0 -0
  331. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/__init__.py +0 -0
  332. xinference/thirdparty/fish_speech/fish_speech/models/__init__.py +0 -0
  333. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/__init__.py +0 -0
  334. xinference/thirdparty/fish_speech/fish_speech/webui/__init__.py +0 -0
  335. xinference/thirdparty/fish_speech/tools/__init__.py +0 -0
  336. xinference/thirdparty/fish_speech/tools/api.py +0 -440
  337. xinference/thirdparty/fish_speech/tools/commons.py +0 -35
  338. xinference/thirdparty/fish_speech/tools/llama/__init__.py +0 -0
  339. xinference/thirdparty/fish_speech/tools/msgpack_api.py +0 -34
  340. xinference/thirdparty/fish_speech/tools/vqgan/__init__.py +0 -0
  341. xinference/thirdparty/fish_speech/tools/webui.py +0 -485
  342. xinference/web/ui/build/static/css/main.5061c4c3.css +0 -2
  343. xinference/web/ui/build/static/css/main.5061c4c3.css.map +0 -1
  344. xinference/web/ui/build/static/js/main.2f269bb3.js +0 -3
  345. xinference/web/ui/build/static/js/main.2f269bb3.js.map +0 -1
  346. xinference/web/ui/node_modules/.cache/babel-loader/07ce9e632e6aff24d7aa3ad8e48224433bbfeb0d633fca723453f1fcae0c9f1c.json +0 -1
  347. xinference/web/ui/node_modules/.cache/babel-loader/1130403f9e46f5738a23b45ac59b57de8f360c908c713e2c0670c2cce9bd367a.json +0 -1
  348. xinference/web/ui/node_modules/.cache/babel-loader/131091b25d26b17cdca187d7542a21475c211138d900cf667682260e76ef9463.json +0 -1
  349. xinference/web/ui/node_modules/.cache/babel-loader/1f269fb2a368363c1cb2237825f1dba093b6bdd8c44cc05954fd19ec2c1fff03.json +0 -1
  350. xinference/web/ui/node_modules/.cache/babel-loader/331312668fa8bd3d7401818f4a25fa98135d7f61371cd6bfff78b18cf4fbdd92.json +0 -1
  351. xinference/web/ui/node_modules/.cache/babel-loader/40f17338fc75ae095de7d2b4d8eae0d5ca0193a7e2bcece4ee745b22a7a2f4b7.json +0 -1
  352. xinference/web/ui/node_modules/.cache/babel-loader/4de9a6942c5f1749d6cbfdd54279699975f16016b182848bc253886f52ec2ec3.json +0 -1
  353. xinference/web/ui/node_modules/.cache/babel-loader/822586ed1077201b64b954f12f25e3f9b45678c1acbabe53d8af3ca82ca71f33.json +0 -1
  354. xinference/web/ui/node_modules/.cache/babel-loader/8d33354bd2100c8602afc3341f131a88cc36aaeecd5a4b365ed038514708e350.json +0 -1
  355. xinference/web/ui/node_modules/.cache/babel-loader/9375a35b05d56989b2755bf72161fa707c92f28569d33765a75f91a568fda6e9.json +0 -1
  356. xinference/web/ui/node_modules/.cache/babel-loader/a158a9ffa0c9b169aee53dd4a0c44501a596755b4e4f6ede7746d65a72e2a71f.json +0 -1
  357. xinference/web/ui/node_modules/.cache/babel-loader/bd6ad8159341315a1764c397621a560809f7eb7219ab5174c801fca7e969d943.json +0 -1
  358. xinference/web/ui/node_modules/.cache/babel-loader/c7bf40bab396765f67d0fed627ed3665890608b2d0edaa3e8cb7cfc96310db45.json +0 -1
  359. xinference/web/ui/node_modules/.cache/babel-loader/d6c643278a0b28320e6f33a60f5fb64c053997cbdc39a60e53ccc574688ade9e.json +0 -1
  360. xinference/web/ui/node_modules/.cache/babel-loader/e42b72d4cc1ea412ebecbb8d040dc6c6bfee462c33903c2f1f3facb602ad742e.json +0 -1
  361. xinference/web/ui/node_modules/.cache/babel-loader/e64b7e8cedcf43d4c95deba60ec1341855c887705805bb62431693118b870c69.json +0 -1
  362. xinference/web/ui/node_modules/.cache/babel-loader/f5039ddbeb815c51491a1989532006b96fc3ae49c6c60e3c097f875b4ae915ae.json +0 -1
  363. xinference/web/ui/node_modules/.cache/babel-loader/f72f011744c4649fabddca6f7a9327861ac0a315a89b1a2e62a39774e7863845.json +0 -1
  364. xinference/web/ui/node_modules/.cache/babel-loader/feabb04b4aa507102da0a64398a40818e878fd1df9b75dda8461b3e1e7ff3f11.json +0 -1
  365. /xinference/thirdparty/{cosyvoice/bin → f5_tts}/__init__.py +0 -0
  366. /xinference/thirdparty/{cosyvoice/flow → melo}/__init__.py +0 -0
  367. /xinference/thirdparty/{cosyvoice/hifigan → melo/text/english_utils}/__init__.py +0 -0
  368. /xinference/thirdparty/{cosyvoice/llm → melo/text/es_phonemizer}/__init__.py +0 -0
  369. /xinference/thirdparty/{fish_speech/fish_speech/configs → melo/text/fr_phonemizer}/__init__.py +0 -0
  370. /xinference/web/ui/build/static/js/{main.2f269bb3.js.LICENSE.txt → main.b0936c54.js.LICENSE.txt} +0 -0
  371. {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/LICENSE +0 -0
  372. {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/entry_points.txt +0 -0
  373. {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/top_level.txt +0 -0
xinference/core/utils.py CHANGED
@@ -11,11 +11,13 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
+ import asyncio
14
15
  import logging
15
16
  import os
16
17
  import random
17
18
  import string
18
19
  import uuid
20
+ import weakref
19
21
  from enum import Enum
20
22
  from typing import Dict, Generator, List, Optional, Tuple, Union
21
23
 
@@ -23,7 +25,10 @@ import orjson
23
25
  from pynvml import nvmlDeviceGetCount, nvmlInit, nvmlShutdown
24
26
 
25
27
  from .._compat import BaseModel
26
- from ..constants import XINFERENCE_LOG_ARG_MAX_LENGTH
28
+ from ..constants import (
29
+ XINFERENCE_DEFAULT_CANCEL_BLOCK_DURATION,
30
+ XINFERENCE_LOG_ARG_MAX_LENGTH,
31
+ )
27
32
 
28
33
  logger = logging.getLogger(__name__)
29
34
 
@@ -49,13 +54,24 @@ def log_async(
49
54
  ):
50
55
  import time
51
56
  from functools import wraps
57
+ from inspect import signature
52
58
 
53
59
  def decorator(func):
54
60
  func_name = func.__name__
61
+ sig = signature(func)
55
62
 
56
63
  @wraps(func)
57
64
  async def wrapped(*args, **kwargs):
58
- request_id_str = kwargs.get("request_id", "")
65
+ request_id_str = kwargs.get("request_id")
66
+ if not request_id_str:
67
+ # sometimes `request_id` not in kwargs
68
+ # we try to bind the arguments
69
+ try:
70
+ bound_args = sig.bind_partial(*args, **kwargs)
71
+ arguments = bound_args.arguments
72
+ except TypeError:
73
+ arguments = {}
74
+ request_id_str = arguments.get("request_id", "")
59
75
  if not request_id_str:
60
76
  request_id_str = uuid.uuid1()
61
77
  if func_name == "text_to_image":
@@ -260,8 +276,8 @@ def get_nvidia_gpu_info() -> Dict:
260
276
 
261
277
 
262
278
  def assign_replica_gpu(
263
- _replica_model_uid: str, replica: int, gpu_idx: Union[int, List[int]]
264
- ) -> List[int]:
279
+ _replica_model_uid: str, replica: int, gpu_idx: Optional[Union[int, List[int]]]
280
+ ) -> Optional[List[int]]:
265
281
  model_uid, rep_id = parse_replica_model_uid(_replica_model_uid)
266
282
  rep_id, replica = int(rep_id), int(replica)
267
283
  if isinstance(gpu_idx, int):
@@ -269,3 +285,56 @@ def assign_replica_gpu(
269
285
  if isinstance(gpu_idx, list) and gpu_idx:
270
286
  return gpu_idx[rep_id::replica]
271
287
  return gpu_idx
288
+
289
+
290
+ class CancelMixin:
291
+ _CANCEL_TASK_NAME = "abort_block"
292
+
293
+ def __init__(self):
294
+ self._running_tasks: weakref.WeakValueDictionary[
295
+ str, asyncio.Task
296
+ ] = weakref.WeakValueDictionary()
297
+
298
+ def _add_running_task(self, request_id: Optional[str]):
299
+ """Add current asyncio task to the running task.
300
+ :param request_id: The corresponding request id.
301
+ """
302
+ if request_id is None:
303
+ return
304
+ running_task = self._running_tasks.get(request_id)
305
+ if running_task is not None:
306
+ if running_task.get_name() == self._CANCEL_TASK_NAME:
307
+ raise Exception(f"The request has been aborted: {request_id}")
308
+ raise Exception(f"Duplicate request id: {request_id}")
309
+ current_task = asyncio.current_task()
310
+ assert current_task is not None
311
+ self._running_tasks[request_id] = current_task
312
+
313
+ def _cancel_running_task(
314
+ self,
315
+ request_id: Optional[str],
316
+ block_duration: int = XINFERENCE_DEFAULT_CANCEL_BLOCK_DURATION,
317
+ ):
318
+ """Cancel the running asyncio task.
319
+ :param request_id: The request id to cancel.
320
+ :param block_duration: The duration seconds to ensure the request can't be executed.
321
+ """
322
+ if request_id is None:
323
+ return
324
+ running_task = self._running_tasks.pop(request_id, None)
325
+ if running_task is not None:
326
+ running_task.cancel()
327
+
328
+ async def block_task():
329
+ """This task is for blocking the request for a duration."""
330
+ try:
331
+ await asyncio.sleep(block_duration)
332
+ logger.info("Abort block end for request: %s", request_id)
333
+ except asyncio.CancelledError:
334
+ logger.info("Abort block is cancelled for request: %s", request_id)
335
+
336
+ if block_duration > 0:
337
+ logger.info("Abort block start for request: %s", request_id)
338
+ self._running_tasks[request_id] = asyncio.create_task(
339
+ block_task(), name=self._CANCEL_TASK_NAME
340
+ )
xinference/core/worker.py CHANGED
@@ -22,8 +22,9 @@ import signal
22
22
  import threading
23
23
  import time
24
24
  from collections import defaultdict
25
+ from dataclasses import dataclass
25
26
  from logging import getLogger
26
- from typing import Any, Dict, List, Literal, Optional, Set, Tuple, Union
27
+ from typing import Any, Dict, List, Literal, Optional, Set, Tuple, Union, no_type_check
27
28
 
28
29
  import xoscar as xo
29
30
  from async_timeout import timeout
@@ -58,6 +59,11 @@ else:
58
59
  MODEL_ACTOR_AUTO_RECOVER_LIMIT = None
59
60
 
60
61
 
62
+ @dataclass
63
+ class ModelStatus:
64
+ last_error: str = ""
65
+
66
+
61
67
  class WorkerActor(xo.StatelessActor):
62
68
  def __init__(
63
69
  self,
@@ -90,6 +96,7 @@ class WorkerActor(xo.StatelessActor):
90
96
  # attributes maintained after model launched:
91
97
  self._model_uid_to_model: Dict[str, xo.ActorRefType["ModelActor"]] = {}
92
98
  self._model_uid_to_model_spec: Dict[str, ModelDescription] = {}
99
+ self._model_uid_to_model_status: Dict[str, ModelStatus] = {}
93
100
  self._gpu_to_model_uid: Dict[int, str] = {}
94
101
  self._gpu_to_embedding_model_uids: Dict[int, Set[str]] = defaultdict(set)
95
102
  # Dict structure: gpu_index: {(replica_model_uid, model_type)}
@@ -177,12 +184,12 @@ class WorkerActor(xo.StatelessActor):
177
184
  self._model_uid_to_recover_count[model_uid] = (
178
185
  recover_count - 1
179
186
  )
180
- await self.launch_builtin_model(**launch_args)
187
+ await self.recover_model(launch_args)
181
188
  else:
182
189
  logger.warning("Stop recreating model actor.")
183
190
  else:
184
191
  logger.warning("Recreating model actor %s ...", model_uid)
185
- await self.launch_builtin_model(**launch_args)
192
+ await self.recover_model(launch_args)
186
193
  break
187
194
 
188
195
  @classmethod
@@ -866,6 +873,9 @@ class WorkerActor(xo.StatelessActor):
866
873
  )
867
874
 
868
875
  try:
876
+ xavier_config: Optional[Dict] = kwargs.pop("xavier_config", None)
877
+ if xavier_config is not None:
878
+ xavier_config["rank_address"] = subpool_address
869
879
  model, model_description = await asyncio.to_thread(
870
880
  create_model_instance,
871
881
  subpool_address,
@@ -893,6 +903,7 @@ class WorkerActor(xo.StatelessActor):
893
903
  model=model,
894
904
  model_description=model_description,
895
905
  request_limits=request_limits,
906
+ xavier_config=xavier_config,
896
907
  )
897
908
  await model_ref.load()
898
909
  except:
@@ -902,6 +913,7 @@ class WorkerActor(xo.StatelessActor):
902
913
  raise
903
914
  self._model_uid_to_model[model_uid] = model_ref
904
915
  self._model_uid_to_model_spec[model_uid] = model_description
916
+ self._model_uid_to_model_status[model_uid] = ModelStatus()
905
917
  self._model_uid_to_addr[model_uid] = subpool_address
906
918
  self._model_uid_to_recover_count.setdefault(
907
919
  model_uid, MODEL_ACTOR_AUTO_RECOVER_LIMIT
@@ -921,13 +933,18 @@ class WorkerActor(xo.StatelessActor):
921
933
  origin_uid,
922
934
  {"model_ability": abilities, "status": LaunchStatus.READY.name},
923
935
  )
936
+ return subpool_address
924
937
 
925
938
  @log_async(logger=logger, level=logging.INFO)
926
939
  async def terminate_model(self, model_uid: str, is_model_die=False):
927
940
  # Terminate model while its launching is not allow
928
941
  if model_uid in self._model_uid_launching_guard:
929
942
  raise ValueError(f"{model_uid} is launching")
930
- origin_uid, _ = parse_replica_model_uid(model_uid)
943
+ # In special cases, if the suffix is `-rank0`, this is the Xavier's rank 0 model actor.
944
+ if model_uid.endswith("-rank0"):
945
+ origin_uid = model_uid.removesuffix("-rank0")
946
+ else:
947
+ origin_uid, _ = parse_replica_model_uid(model_uid)
931
948
  try:
932
949
  _ = await self.get_supervisor_ref()
933
950
  if self._event_collector_ref is not None:
@@ -976,6 +993,7 @@ class WorkerActor(xo.StatelessActor):
976
993
  status = LaunchStatus.ERROR.name
977
994
  else:
978
995
  status = LaunchStatus.TERMINATED.name
996
+ self._model_uid_to_model_status.pop(model_uid, None)
979
997
 
980
998
  if self._status_guard_ref is None:
981
999
  _ = await self.get_supervisor_ref()
@@ -1010,6 +1028,9 @@ class WorkerActor(xo.StatelessActor):
1010
1028
 
1011
1029
  @log_sync(logger=logger)
1012
1030
  def get_model(self, model_uid: str) -> xo.ActorRefType["ModelActor"]:
1031
+ model_status = self._model_uid_to_model_status.get(model_uid)
1032
+ if model_status and model_status.last_error:
1033
+ raise Exception(model_status.last_error)
1013
1034
  model_ref = self._model_uid_to_model.get(model_uid, None)
1014
1035
  if model_ref is None:
1015
1036
  raise ValueError(f"Model not found, uid: {model_uid}")
@@ -1138,6 +1159,83 @@ class WorkerActor(xo.StatelessActor):
1138
1159
  }
1139
1160
  return ret
1140
1161
 
1162
+ def update_model_status(self, model_uid: str, **kwargs):
1163
+ model_status = self._model_uid_to_model_status.get(model_uid)
1164
+ if model_status is not None:
1165
+ for k, v in kwargs.items():
1166
+ setattr(model_status, k, v)
1167
+
1168
+ def get_model_status(self, model_uid: str):
1169
+ return self._model_uid_to_model_status.get(model_uid)
1170
+
1141
1171
  @staticmethod
1142
1172
  def record_metrics(name, op, kwargs):
1143
1173
  record_metrics(name, op, kwargs)
1174
+
1175
+ async def start_transfer_for_vllm(
1176
+ self, rep_model_uid: str, rank_addresses: List[str]
1177
+ ):
1178
+ model_ref = self._model_uid_to_model[rep_model_uid]
1179
+ await model_ref.start_transfer_for_vllm(rank_addresses)
1180
+
1181
+ @log_async(logger=logger, level=logging.INFO)
1182
+ async def launch_rank0_model(
1183
+ self, rep_model_uid: str, xavier_config: Dict[str, Any]
1184
+ ) -> Tuple[str, int]:
1185
+ from ..model.llm.vllm.xavier.collective_manager import Rank0ModelActor
1186
+
1187
+ if os.name != "nt" and platform.system() != "Darwin":
1188
+ # Linux
1189
+ start_method = "forkserver"
1190
+ else:
1191
+ # Windows and macOS
1192
+ start_method = "spawn"
1193
+ subpool_address = await self._main_pool.append_sub_pool(
1194
+ start_method=start_method
1195
+ )
1196
+
1197
+ store_address = subpool_address.split(":")[0]
1198
+ # Note that `store_port` needs to be generated on the worker,
1199
+ # as the TCP store is on rank 0, not on the supervisor.
1200
+ store_port = xo.utils.get_next_port()
1201
+ self._model_uid_launching_guard[rep_model_uid] = True
1202
+ try:
1203
+ try:
1204
+ xavier_config["rank_address"] = subpool_address
1205
+ xavier_config["store_address"] = store_address
1206
+ xavier_config["store_port"] = store_port
1207
+ model_ref = await xo.create_actor(
1208
+ Rank0ModelActor,
1209
+ address=subpool_address,
1210
+ uid=rep_model_uid,
1211
+ xavier_config=xavier_config,
1212
+ )
1213
+ except:
1214
+ await self._main_pool.remove_sub_pool(subpool_address)
1215
+ raise
1216
+ self._model_uid_to_model[rep_model_uid] = model_ref
1217
+ self._model_uid_to_addr[rep_model_uid] = subpool_address
1218
+ finally:
1219
+ del self._model_uid_launching_guard[rep_model_uid]
1220
+ return subpool_address, store_port
1221
+
1222
+ @no_type_check
1223
+ async def recover_model(self, launch_args: Dict[str, Any]):
1224
+ rep_model_uid = launch_args.get("model_uid")
1225
+ origin_uid, _ = parse_replica_model_uid(rep_model_uid)
1226
+ xavier_config: Optional[Dict[str, Any]] = launch_args.get("xavier_config", None)
1227
+ is_xavier: bool = xavier_config is not None
1228
+ supervisor_ref = await self.get_supervisor_ref(add_worker=False)
1229
+ if is_xavier:
1230
+ rank = xavier_config.get("rank")
1231
+ await supervisor_ref.call_collective_manager(
1232
+ origin_uid, "unregister_rank", rank
1233
+ )
1234
+ subpool_address = await self.launch_builtin_model(**launch_args)
1235
+ if is_xavier:
1236
+ model_ref = self._model_uid_to_model[rep_model_uid]
1237
+ await model_ref.start_transfer_for_vllm([])
1238
+ rank = xavier_config.get("rank")
1239
+ await supervisor_ref.call_collective_manager(
1240
+ origin_uid, "register_rank", rank, subpool_address, update=True
1241
+ )
@@ -846,7 +846,9 @@ def model_launch(
846
846
  kwargs = {}
847
847
  for i in range(0, len(ctx.args), 2):
848
848
  if not ctx.args[i].startswith("--"):
849
- raise ValueError("You must specify extra kwargs with `--` prefix.")
849
+ raise ValueError(
850
+ f"You must specify extra kwargs with `--` prefix. There is an error in parameter passing that is {ctx.args[i]}."
851
+ )
850
852
  kwargs[ctx.args[i][2:]] = handle_click_args_type(ctx.args[i + 1])
851
853
  print(f"Launch model name: {model_name} with kwargs: {kwargs}", file=sys.stderr)
852
854
 
@@ -23,6 +23,7 @@ from ..cmdline import (
23
23
  list_model_registrations,
24
24
  model_chat,
25
25
  model_generate,
26
+ model_launch,
26
27
  model_list,
27
28
  model_terminate,
28
29
  register_model,
@@ -311,3 +312,58 @@ def test_remove_cache(setup):
311
312
 
312
313
  assert result.exit_code == 0
313
314
  assert "Cache directory qwen1.5-chat has been deleted."
315
+
316
+
317
+ def test_launch_error_in_passing_parameters():
318
+ runner = CliRunner()
319
+
320
+ # Known parameter but not provided with value.
321
+ result = runner.invoke(
322
+ model_launch,
323
+ [
324
+ "--model-engine",
325
+ "transformers",
326
+ "--model-name",
327
+ "qwen2.5-instruct",
328
+ "--model-uid",
329
+ "-s",
330
+ "0.5",
331
+ "-f",
332
+ "gptq",
333
+ "-q",
334
+ "INT4",
335
+ "111",
336
+ "-l",
337
+ ],
338
+ )
339
+ assert result.exit_code == 1
340
+ assert (
341
+ "You must specify extra kwargs with `--` prefix. There is an error in parameter passing that is 0.5."
342
+ in str(result)
343
+ )
344
+
345
+ # Unknown parameter
346
+ result = runner.invoke(
347
+ model_launch,
348
+ [
349
+ "--model-engine",
350
+ "transformers",
351
+ "--model-name",
352
+ "qwen2.5-instruct",
353
+ "--model-uid",
354
+ "123",
355
+ "-s",
356
+ "0.5",
357
+ "-f",
358
+ "gptq",
359
+ "-q",
360
+ "INT4",
361
+ "-l",
362
+ "111",
363
+ ],
364
+ )
365
+ assert result.exit_code == 1
366
+ assert (
367
+ "You must specify extra kwargs with `--` prefix. There is an error in parameter passing that is -l."
368
+ in str(result)
369
+ )
xinference/isolation.py CHANGED
@@ -37,6 +37,30 @@ class Isolation:
37
37
  asyncio.set_event_loop(self._loop)
38
38
  self._stopped = asyncio.Event()
39
39
  self._loop.run_until_complete(self._stopped.wait())
40
+ self._cancel_all_tasks(self._loop)
41
+
42
+ @staticmethod
43
+ def _cancel_all_tasks(loop):
44
+ to_cancel = asyncio.all_tasks(loop)
45
+ if not to_cancel:
46
+ return
47
+
48
+ for task in to_cancel:
49
+ task.cancel()
50
+
51
+ loop.run_until_complete(asyncio.gather(*to_cancel, return_exceptions=True))
52
+
53
+ for task in to_cancel:
54
+ if task.cancelled():
55
+ continue
56
+ if task.exception() is not None:
57
+ loop.call_exception_handler(
58
+ {
59
+ "message": "unhandled exception during asyncio.run() shutdown",
60
+ "exception": task.exception(),
61
+ "task": task,
62
+ }
63
+ )
40
64
 
41
65
  def start(self):
42
66
  if self._threaded:
@@ -15,6 +15,8 @@
15
15
  import codecs
16
16
  import json
17
17
  import os
18
+ import platform
19
+ import sys
18
20
  import warnings
19
21
  from typing import Any, Dict
20
22
 
@@ -55,6 +57,14 @@ def register_custom_model():
55
57
  warnings.warn(f"{user_defined_audio_dir}/{f} has error, {e}")
56
58
 
57
59
 
60
+ def _need_filter(spec: dict):
61
+ if (sys.platform != "darwin" or platform.processor() != "arm") and spec.get(
62
+ "engine", ""
63
+ ).upper() == "MLX":
64
+ return True
65
+ return False
66
+
67
+
58
68
  def _install():
59
69
  _model_spec_json = os.path.join(os.path.dirname(__file__), "model_spec.json")
60
70
  _model_spec_modelscope_json = os.path.join(
@@ -64,6 +74,7 @@ def _install():
64
74
  dict(
65
75
  (spec["model_name"], AudioModelFamilyV1(**spec))
66
76
  for spec in json.load(codecs.open(_model_spec_json, "r", encoding="utf-8"))
77
+ if not _need_filter(spec)
67
78
  )
68
79
  )
69
80
  for model_name, model_spec in BUILTIN_AUDIO_MODELS.items():
@@ -75,6 +86,7 @@ def _install():
75
86
  for spec in json.load(
76
87
  codecs.open(_model_spec_modelscope_json, "r", encoding="utf-8")
77
88
  )
89
+ if not _need_filter(spec)
78
90
  )
79
91
  )
80
92
  for model_name, model_spec in MODELSCOPE_AUDIO_MODELS.items():
@@ -21,9 +21,13 @@ from ..core import CacheableModelSpec, ModelDescription
21
21
  from ..utils import valid_model_revision
22
22
  from .chattts import ChatTTSModel
23
23
  from .cosyvoice import CosyVoiceModel
24
+ from .f5tts import F5TTSModel
25
+ from .f5tts_mlx import F5TTSMLXModel
24
26
  from .fish_speech import FishSpeechModel
25
27
  from .funasr import FunASRModel
28
+ from .melotts import MeloTTSModel
26
29
  from .whisper import WhisperModel
30
+ from .whisper_mlx import WhisperMLXModel
27
31
 
28
32
  logger = logging.getLogger(__name__)
29
33
 
@@ -43,11 +47,13 @@ class AudioModelFamilyV1(CacheableModelSpec):
43
47
  model_family: str
44
48
  model_name: str
45
49
  model_id: str
46
- model_revision: str
50
+ model_revision: Optional[str]
47
51
  multilingual: bool
52
+ language: Optional[str]
48
53
  model_ability: Optional[str]
49
54
  default_model_config: Optional[Dict[str, Any]]
50
55
  default_transcription_config: Optional[Dict[str, Any]]
56
+ engine: Optional[str]
51
57
 
52
58
 
53
59
  class AudioModelDescription(ModelDescription):
@@ -160,17 +166,38 @@ def create_audio_model_instance(
160
166
  model_path: Optional[str] = None,
161
167
  **kwargs,
162
168
  ) -> Tuple[
163
- Union[WhisperModel, FunASRModel, ChatTTSModel, CosyVoiceModel, FishSpeechModel],
169
+ Union[
170
+ WhisperModel,
171
+ WhisperMLXModel,
172
+ FunASRModel,
173
+ ChatTTSModel,
174
+ CosyVoiceModel,
175
+ FishSpeechModel,
176
+ F5TTSModel,
177
+ F5TTSMLXModel,
178
+ MeloTTSModel,
179
+ ],
164
180
  AudioModelDescription,
165
181
  ]:
166
182
  model_spec = match_audio(model_name, download_hub)
167
183
  if model_path is None:
168
184
  model_path = cache(model_spec)
169
185
  model: Union[
170
- WhisperModel, FunASRModel, ChatTTSModel, CosyVoiceModel, FishSpeechModel
186
+ WhisperModel,
187
+ WhisperMLXModel,
188
+ FunASRModel,
189
+ ChatTTSModel,
190
+ CosyVoiceModel,
191
+ FishSpeechModel,
192
+ F5TTSModel,
193
+ F5TTSMLXModel,
194
+ MeloTTSModel,
171
195
  ]
172
196
  if model_spec.model_family == "whisper":
173
- model = WhisperModel(model_uid, model_path, model_spec, **kwargs)
197
+ if not model_spec.engine:
198
+ model = WhisperModel(model_uid, model_path, model_spec, **kwargs)
199
+ else:
200
+ model = WhisperMLXModel(model_uid, model_path, model_spec, **kwargs)
174
201
  elif model_spec.model_family == "funasr":
175
202
  model = FunASRModel(model_uid, model_path, model_spec, **kwargs)
176
203
  elif model_spec.model_family == "ChatTTS":
@@ -179,6 +206,12 @@ def create_audio_model_instance(
179
206
  model = CosyVoiceModel(model_uid, model_path, model_spec, **kwargs)
180
207
  elif model_spec.model_family == "FishAudio":
181
208
  model = FishSpeechModel(model_uid, model_path, model_spec, **kwargs)
209
+ elif model_spec.model_family == "F5-TTS":
210
+ model = F5TTSModel(model_uid, model_path, model_spec, **kwargs)
211
+ elif model_spec.model_family == "F5-TTS-MLX":
212
+ model = F5TTSMLXModel(model_uid, model_path, model_spec, **kwargs)
213
+ elif model_spec.model_family == "MeloTTS":
214
+ model = MeloTTSModel(model_uid, model_path, model_spec, **kwargs)
182
215
  else:
183
216
  raise Exception(f"Unsupported audio model family: {model_spec.model_family}")
184
217
  model_description = AudioModelDescription(
@@ -39,6 +39,7 @@ class CosyVoiceModel:
39
39
  self._device = device
40
40
  self._model = None
41
41
  self._kwargs = kwargs
42
+ self._is_cosyvoice2 = False
42
43
 
43
44
  @property
44
45
  def model_ability(self):
@@ -48,14 +49,32 @@ class CosyVoiceModel:
48
49
  import os
49
50
  import sys
50
51
 
52
+ import torch
53
+
51
54
  # The yaml config loaded from model has hard-coded the import paths. please refer to: load_hyperpyyaml
52
- sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../../thirdparty"))
55
+ thirdparty_dir = os.path.join(os.path.dirname(__file__), "../../thirdparty")
56
+ sys.path.insert(0, thirdparty_dir)
57
+
58
+ if "CosyVoice2" in self._model_spec.model_name:
59
+ from cosyvoice.cli.cosyvoice import CosyVoice2 as CosyVoice
60
+
61
+ self._is_cosyvoice2 = True
62
+ else:
63
+ from cosyvoice.cli.cosyvoice import CosyVoice
53
64
 
54
- from cosyvoice.cli.cosyvoice import CosyVoice
65
+ self._is_cosyvoice2 = False
55
66
 
56
- self._model = CosyVoice(
57
- self._model_path, load_jit=self._kwargs.get("load_jit", False)
67
+ # Unify this configuration name as 'compile' to be compatible with the name 'load_jit'.
68
+ load_jit = self._kwargs.get("load_jit", False) or self._kwargs.get(
69
+ "compile", False
58
70
  )
71
+ logger.info("Loading CosyVoice model, compile=%s...", load_jit)
72
+ self._model = CosyVoice(self._model_path, load_jit=load_jit)
73
+ if self._is_cosyvoice2:
74
+ spk2info_file = os.path.join(thirdparty_dir, "cosyvoice/bin/spk2info.pt")
75
+ self._model.frontend.spk2info = torch.load(
76
+ spk2info_file, map_location=self._device
77
+ )
59
78
 
60
79
  def _speech_handle(
61
80
  self,
@@ -78,6 +97,15 @@ class CosyVoiceModel:
78
97
  output = self._model.inference_zero_shot(
79
98
  input, prompt_text, prompt_speech_16k, stream=stream
80
99
  )
100
+ elif instruct_text:
101
+ assert self._is_cosyvoice2
102
+ logger.info("CosyVoice inference_instruct")
103
+ output = self._model.inference_instruct2(
104
+ input,
105
+ instruct_text=instruct_text,
106
+ prompt_speech_16k=prompt_speech_16k,
107
+ stream=stream,
108
+ )
81
109
  else:
82
110
  logger.info("CosyVoice inference_cross_lingual")
83
111
  output = self._model.inference_cross_lingual(
@@ -87,6 +115,7 @@ class CosyVoiceModel:
87
115
  available_speakers = self._model.list_avaliable_spks()
88
116
  if not voice:
89
117
  voice = available_speakers[0]
118
+ logger.info("Auto select speaker: %s", voice)
90
119
  else:
91
120
  assert (
92
121
  voice in available_speakers
@@ -106,7 +135,9 @@ class CosyVoiceModel:
106
135
  def _generator_stream():
107
136
  with BytesIO() as out:
108
137
  writer = torchaudio.io.StreamWriter(out, format=response_format)
109
- writer.add_audio_stream(sample_rate=22050, num_channels=1)
138
+ writer.add_audio_stream(
139
+ sample_rate=self._model.sample_rate, num_channels=1
140
+ )
110
141
  i = 0
111
142
  last_pos = 0
112
143
  with writer.open():
@@ -125,7 +156,7 @@ class CosyVoiceModel:
125
156
  chunks = [o["tts_speech"] for o in output]
126
157
  t = torch.cat(chunks, dim=1)
127
158
  with BytesIO() as out:
128
- torchaudio.save(out, t, 22050, format=response_format)
159
+ torchaudio.save(out, t, self._model.sample_rate, format=response_format)
129
160
  return out.getvalue()
130
161
 
131
162
  return _generator_stream() if stream else _generator_block()
@@ -163,6 +194,8 @@ class CosyVoiceModel:
163
194
  assert (
164
195
  prompt_text is None
165
196
  ), "CosyVoice Instruct model does not support prompt_text"
197
+ elif self._is_cosyvoice2:
198
+ pass
166
199
  else:
167
200
  # inference_zero_shot
168
201
  # inference_cross_lingual