xinference 1.0.1__py3-none-any.whl → 1.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (343) hide show
  1. xinference/_compat.py +2 -0
  2. xinference/_version.py +3 -3
  3. xinference/api/restful_api.py +77 -71
  4. xinference/core/chat_interface.py +6 -1
  5. xinference/core/model.py +79 -19
  6. xinference/core/supervisor.py +172 -10
  7. xinference/core/utils.py +12 -8
  8. xinference/core/worker.py +102 -4
  9. xinference/deploy/cmdline.py +3 -1
  10. xinference/deploy/test/test_cmdline.py +56 -0
  11. xinference/isolation.py +24 -0
  12. xinference/model/audio/core.py +16 -0
  13. xinference/model/audio/cosyvoice.py +39 -6
  14. xinference/model/audio/f5tts.py +200 -0
  15. xinference/model/audio/f5tts_mlx.py +260 -0
  16. xinference/model/audio/fish_speech.py +36 -111
  17. xinference/model/audio/melotts.py +110 -0
  18. xinference/model/audio/model_spec.json +99 -3
  19. xinference/model/audio/model_spec_modelscope.json +27 -0
  20. xinference/model/audio/utils.py +32 -0
  21. xinference/model/audio/whisper.py +35 -10
  22. xinference/model/embedding/core.py +203 -142
  23. xinference/model/embedding/model_spec.json +7 -0
  24. xinference/model/embedding/model_spec_modelscope.json +8 -0
  25. xinference/model/image/core.py +69 -1
  26. xinference/model/image/model_spec.json +145 -4
  27. xinference/model/image/model_spec_modelscope.json +150 -4
  28. xinference/model/image/stable_diffusion/core.py +45 -13
  29. xinference/model/llm/__init__.py +4 -2
  30. xinference/model/llm/llm_family.json +536 -53
  31. xinference/model/llm/llm_family.py +15 -36
  32. xinference/model/llm/llm_family_modelscope.json +454 -20
  33. xinference/model/llm/memory.py +1 -1
  34. xinference/model/llm/mlx/core.py +248 -52
  35. xinference/model/llm/sglang/core.py +1 -0
  36. xinference/model/llm/transformers/chatglm.py +9 -5
  37. xinference/model/llm/transformers/cogagent.py +272 -0
  38. xinference/model/llm/transformers/core.py +2 -0
  39. xinference/model/llm/transformers/qwen2_vl.py +12 -1
  40. xinference/model/llm/transformers/utils.py +16 -8
  41. xinference/model/llm/utils.py +36 -4
  42. xinference/model/llm/vllm/core.py +53 -10
  43. xinference/model/llm/vllm/xavier/__init__.py +13 -0
  44. xinference/model/llm/vllm/xavier/allocator.py +74 -0
  45. xinference/model/llm/vllm/xavier/block.py +111 -0
  46. xinference/model/llm/vllm/xavier/block_manager.py +71 -0
  47. xinference/model/llm/vllm/xavier/block_tracker.py +129 -0
  48. xinference/model/llm/vllm/xavier/collective.py +74 -0
  49. xinference/model/llm/vllm/xavier/collective_manager.py +147 -0
  50. xinference/model/llm/vllm/xavier/engine.py +247 -0
  51. xinference/model/llm/vllm/xavier/executor.py +134 -0
  52. xinference/model/llm/vllm/xavier/scheduler.py +438 -0
  53. xinference/model/llm/vllm/xavier/test/__init__.py +13 -0
  54. xinference/model/llm/vllm/xavier/test/test_xavier.py +147 -0
  55. xinference/model/llm/vllm/xavier/transfer.py +319 -0
  56. xinference/model/video/diffusers.py +14 -0
  57. xinference/model/video/model_spec.json +15 -0
  58. xinference/model/video/model_spec_modelscope.json +16 -0
  59. xinference/thirdparty/cosyvoice/bin/average_model.py +92 -0
  60. xinference/thirdparty/cosyvoice/bin/export_jit.py +12 -2
  61. xinference/thirdparty/cosyvoice/bin/export_onnx.py +112 -0
  62. xinference/thirdparty/cosyvoice/bin/export_trt.sh +9 -0
  63. xinference/thirdparty/cosyvoice/bin/inference.py +5 -7
  64. xinference/thirdparty/cosyvoice/bin/spk2info.pt +0 -0
  65. xinference/thirdparty/cosyvoice/bin/train.py +42 -8
  66. xinference/thirdparty/cosyvoice/cli/cosyvoice.py +96 -25
  67. xinference/thirdparty/cosyvoice/cli/frontend.py +77 -30
  68. xinference/thirdparty/cosyvoice/cli/model.py +330 -80
  69. xinference/thirdparty/cosyvoice/dataset/dataset.py +6 -2
  70. xinference/thirdparty/cosyvoice/dataset/processor.py +76 -14
  71. xinference/thirdparty/cosyvoice/flow/decoder.py +92 -13
  72. xinference/thirdparty/cosyvoice/flow/flow.py +99 -9
  73. xinference/thirdparty/cosyvoice/flow/flow_matching.py +110 -13
  74. xinference/thirdparty/cosyvoice/flow/length_regulator.py +5 -4
  75. xinference/thirdparty/cosyvoice/hifigan/discriminator.py +140 -0
  76. xinference/thirdparty/cosyvoice/hifigan/generator.py +58 -42
  77. xinference/thirdparty/cosyvoice/hifigan/hifigan.py +67 -0
  78. xinference/thirdparty/cosyvoice/llm/llm.py +139 -6
  79. xinference/thirdparty/cosyvoice/tokenizer/assets/multilingual_zh_ja_yue_char_del.tiktoken +58836 -0
  80. xinference/thirdparty/cosyvoice/tokenizer/tokenizer.py +279 -0
  81. xinference/thirdparty/cosyvoice/transformer/embedding.py +2 -2
  82. xinference/thirdparty/cosyvoice/transformer/encoder_layer.py +7 -7
  83. xinference/thirdparty/cosyvoice/transformer/upsample_encoder.py +318 -0
  84. xinference/thirdparty/cosyvoice/utils/common.py +28 -1
  85. xinference/thirdparty/cosyvoice/utils/executor.py +69 -7
  86. xinference/thirdparty/cosyvoice/utils/file_utils.py +2 -12
  87. xinference/thirdparty/cosyvoice/utils/frontend_utils.py +9 -5
  88. xinference/thirdparty/cosyvoice/utils/losses.py +20 -0
  89. xinference/thirdparty/cosyvoice/utils/scheduler.py +1 -2
  90. xinference/thirdparty/cosyvoice/utils/train_utils.py +101 -45
  91. xinference/thirdparty/f5_tts/api.py +166 -0
  92. xinference/thirdparty/f5_tts/configs/E2TTS_Base_train.yaml +44 -0
  93. xinference/thirdparty/f5_tts/configs/E2TTS_Small_train.yaml +44 -0
  94. xinference/thirdparty/f5_tts/configs/F5TTS_Base_train.yaml +46 -0
  95. xinference/thirdparty/f5_tts/configs/F5TTS_Small_train.yaml +46 -0
  96. xinference/thirdparty/f5_tts/eval/README.md +49 -0
  97. xinference/thirdparty/f5_tts/eval/ecapa_tdnn.py +330 -0
  98. xinference/thirdparty/f5_tts/eval/eval_infer_batch.py +207 -0
  99. xinference/thirdparty/f5_tts/eval/eval_infer_batch.sh +13 -0
  100. xinference/thirdparty/f5_tts/eval/eval_librispeech_test_clean.py +84 -0
  101. xinference/thirdparty/f5_tts/eval/eval_seedtts_testset.py +84 -0
  102. xinference/thirdparty/f5_tts/eval/utils_eval.py +405 -0
  103. xinference/thirdparty/f5_tts/infer/README.md +191 -0
  104. xinference/thirdparty/f5_tts/infer/SHARED.md +74 -0
  105. xinference/thirdparty/f5_tts/infer/examples/basic/basic.toml +11 -0
  106. xinference/thirdparty/f5_tts/infer/examples/basic/basic_ref_en.wav +0 -0
  107. xinference/thirdparty/f5_tts/infer/examples/basic/basic_ref_zh.wav +0 -0
  108. xinference/thirdparty/f5_tts/infer/examples/multi/country.flac +0 -0
  109. xinference/thirdparty/f5_tts/infer/examples/multi/main.flac +0 -0
  110. xinference/thirdparty/f5_tts/infer/examples/multi/story.toml +19 -0
  111. xinference/thirdparty/f5_tts/infer/examples/multi/story.txt +1 -0
  112. xinference/thirdparty/f5_tts/infer/examples/multi/town.flac +0 -0
  113. xinference/thirdparty/f5_tts/infer/examples/vocab.txt +2545 -0
  114. xinference/thirdparty/f5_tts/infer/infer_cli.py +226 -0
  115. xinference/thirdparty/f5_tts/infer/infer_gradio.py +851 -0
  116. xinference/thirdparty/f5_tts/infer/speech_edit.py +193 -0
  117. xinference/thirdparty/f5_tts/infer/utils_infer.py +538 -0
  118. xinference/thirdparty/f5_tts/model/__init__.py +10 -0
  119. xinference/thirdparty/f5_tts/model/backbones/README.md +20 -0
  120. xinference/thirdparty/f5_tts/model/backbones/dit.py +163 -0
  121. xinference/thirdparty/f5_tts/model/backbones/mmdit.py +146 -0
  122. xinference/thirdparty/f5_tts/model/backbones/unett.py +219 -0
  123. xinference/thirdparty/f5_tts/model/cfm.py +285 -0
  124. xinference/thirdparty/f5_tts/model/dataset.py +319 -0
  125. xinference/thirdparty/f5_tts/model/modules.py +658 -0
  126. xinference/thirdparty/f5_tts/model/trainer.py +366 -0
  127. xinference/thirdparty/f5_tts/model/utils.py +185 -0
  128. xinference/thirdparty/f5_tts/scripts/count_max_epoch.py +33 -0
  129. xinference/thirdparty/f5_tts/scripts/count_params_gflops.py +39 -0
  130. xinference/thirdparty/f5_tts/socket_server.py +159 -0
  131. xinference/thirdparty/f5_tts/train/README.md +77 -0
  132. xinference/thirdparty/f5_tts/train/datasets/prepare_csv_wavs.py +139 -0
  133. xinference/thirdparty/f5_tts/train/datasets/prepare_emilia.py +230 -0
  134. xinference/thirdparty/f5_tts/train/datasets/prepare_libritts.py +92 -0
  135. xinference/thirdparty/f5_tts/train/datasets/prepare_ljspeech.py +65 -0
  136. xinference/thirdparty/f5_tts/train/datasets/prepare_wenetspeech4tts.py +125 -0
  137. xinference/thirdparty/f5_tts/train/finetune_cli.py +174 -0
  138. xinference/thirdparty/f5_tts/train/finetune_gradio.py +1846 -0
  139. xinference/thirdparty/f5_tts/train/train.py +75 -0
  140. xinference/thirdparty/fish_speech/fish_speech/conversation.py +94 -83
  141. xinference/thirdparty/fish_speech/fish_speech/models/text2semantic/llama.py +63 -20
  142. xinference/thirdparty/fish_speech/fish_speech/text/clean.py +1 -26
  143. xinference/thirdparty/fish_speech/fish_speech/text/spliter.py +1 -1
  144. xinference/thirdparty/fish_speech/fish_speech/tokenizer.py +152 -0
  145. xinference/thirdparty/fish_speech/fish_speech/train.py +2 -2
  146. xinference/thirdparty/fish_speech/fish_speech/webui/manage.py +1 -1
  147. xinference/thirdparty/fish_speech/tools/{post_api.py → api_client.py} +7 -13
  148. xinference/thirdparty/fish_speech/tools/api_server.py +98 -0
  149. xinference/thirdparty/fish_speech/tools/download_models.py +5 -5
  150. xinference/thirdparty/fish_speech/tools/fish_e2e.py +2 -2
  151. xinference/thirdparty/fish_speech/tools/inference_engine/__init__.py +192 -0
  152. xinference/thirdparty/fish_speech/tools/inference_engine/reference_loader.py +125 -0
  153. xinference/thirdparty/fish_speech/tools/inference_engine/utils.py +39 -0
  154. xinference/thirdparty/fish_speech/tools/inference_engine/vq_manager.py +57 -0
  155. xinference/thirdparty/fish_speech/tools/llama/eval_in_context.py +2 -2
  156. xinference/thirdparty/fish_speech/tools/llama/generate.py +117 -89
  157. xinference/thirdparty/fish_speech/tools/run_webui.py +104 -0
  158. xinference/thirdparty/fish_speech/tools/schema.py +11 -28
  159. xinference/thirdparty/fish_speech/tools/server/agent/__init__.py +57 -0
  160. xinference/thirdparty/fish_speech/tools/server/agent/generate.py +119 -0
  161. xinference/thirdparty/fish_speech/tools/server/agent/generation_utils.py +122 -0
  162. xinference/thirdparty/fish_speech/tools/server/agent/pre_generation_utils.py +72 -0
  163. xinference/thirdparty/fish_speech/tools/server/api_utils.py +75 -0
  164. xinference/thirdparty/fish_speech/tools/server/exception_handler.py +27 -0
  165. xinference/thirdparty/fish_speech/tools/server/inference.py +45 -0
  166. xinference/thirdparty/fish_speech/tools/server/model_manager.py +122 -0
  167. xinference/thirdparty/fish_speech/tools/server/model_utils.py +129 -0
  168. xinference/thirdparty/fish_speech/tools/server/views.py +246 -0
  169. xinference/thirdparty/fish_speech/tools/webui/__init__.py +173 -0
  170. xinference/thirdparty/fish_speech/tools/webui/inference.py +91 -0
  171. xinference/thirdparty/fish_speech/tools/webui/variables.py +14 -0
  172. xinference/thirdparty/matcha/utils/utils.py +2 -2
  173. xinference/thirdparty/melo/api.py +135 -0
  174. xinference/thirdparty/melo/app.py +61 -0
  175. xinference/thirdparty/melo/attentions.py +459 -0
  176. xinference/thirdparty/melo/commons.py +160 -0
  177. xinference/thirdparty/melo/configs/config.json +94 -0
  178. xinference/thirdparty/melo/data/example/metadata.list +20 -0
  179. xinference/thirdparty/melo/data_utils.py +413 -0
  180. xinference/thirdparty/melo/download_utils.py +67 -0
  181. xinference/thirdparty/melo/infer.py +25 -0
  182. xinference/thirdparty/melo/init_downloads.py +14 -0
  183. xinference/thirdparty/melo/losses.py +58 -0
  184. xinference/thirdparty/melo/main.py +36 -0
  185. xinference/thirdparty/melo/mel_processing.py +174 -0
  186. xinference/thirdparty/melo/models.py +1030 -0
  187. xinference/thirdparty/melo/modules.py +598 -0
  188. xinference/thirdparty/melo/monotonic_align/__init__.py +16 -0
  189. xinference/thirdparty/melo/monotonic_align/core.py +46 -0
  190. xinference/thirdparty/melo/preprocess_text.py +135 -0
  191. xinference/thirdparty/melo/split_utils.py +174 -0
  192. xinference/thirdparty/melo/text/__init__.py +35 -0
  193. xinference/thirdparty/melo/text/chinese.py +199 -0
  194. xinference/thirdparty/melo/text/chinese_bert.py +107 -0
  195. xinference/thirdparty/melo/text/chinese_mix.py +253 -0
  196. xinference/thirdparty/melo/text/cleaner.py +36 -0
  197. xinference/thirdparty/melo/text/cleaner_multiling.py +110 -0
  198. xinference/thirdparty/melo/text/cmudict.rep +129530 -0
  199. xinference/thirdparty/melo/text/cmudict_cache.pickle +0 -0
  200. xinference/thirdparty/melo/text/english.py +284 -0
  201. xinference/thirdparty/melo/text/english_bert.py +39 -0
  202. xinference/thirdparty/melo/text/english_utils/abbreviations.py +35 -0
  203. xinference/thirdparty/melo/text/english_utils/number_norm.py +97 -0
  204. xinference/thirdparty/melo/text/english_utils/time_norm.py +47 -0
  205. xinference/thirdparty/melo/text/es_phonemizer/base.py +140 -0
  206. xinference/thirdparty/melo/text/es_phonemizer/cleaner.py +109 -0
  207. xinference/thirdparty/melo/text/es_phonemizer/es_symbols.json +79 -0
  208. xinference/thirdparty/melo/text/es_phonemizer/es_symbols.txt +1 -0
  209. xinference/thirdparty/melo/text/es_phonemizer/es_symbols_v2.json +83 -0
  210. xinference/thirdparty/melo/text/es_phonemizer/es_to_ipa.py +12 -0
  211. xinference/thirdparty/melo/text/es_phonemizer/example_ipa.txt +400 -0
  212. xinference/thirdparty/melo/text/es_phonemizer/gruut_wrapper.py +253 -0
  213. xinference/thirdparty/melo/text/es_phonemizer/punctuation.py +174 -0
  214. xinference/thirdparty/melo/text/es_phonemizer/spanish_symbols.txt +1 -0
  215. xinference/thirdparty/melo/text/es_phonemizer/test.ipynb +124 -0
  216. xinference/thirdparty/melo/text/fr_phonemizer/base.py +140 -0
  217. xinference/thirdparty/melo/text/fr_phonemizer/cleaner.py +122 -0
  218. xinference/thirdparty/melo/text/fr_phonemizer/en_symbols.json +78 -0
  219. xinference/thirdparty/melo/text/fr_phonemizer/example_ipa.txt +1 -0
  220. xinference/thirdparty/melo/text/fr_phonemizer/fr_symbols.json +89 -0
  221. xinference/thirdparty/melo/text/fr_phonemizer/fr_to_ipa.py +30 -0
  222. xinference/thirdparty/melo/text/fr_phonemizer/french_abbreviations.py +48 -0
  223. xinference/thirdparty/melo/text/fr_phonemizer/french_symbols.txt +1 -0
  224. xinference/thirdparty/melo/text/fr_phonemizer/gruut_wrapper.py +258 -0
  225. xinference/thirdparty/melo/text/fr_phonemizer/punctuation.py +172 -0
  226. xinference/thirdparty/melo/text/french.py +94 -0
  227. xinference/thirdparty/melo/text/french_bert.py +39 -0
  228. xinference/thirdparty/melo/text/japanese.py +647 -0
  229. xinference/thirdparty/melo/text/japanese_bert.py +49 -0
  230. xinference/thirdparty/melo/text/ko_dictionary.py +44 -0
  231. xinference/thirdparty/melo/text/korean.py +192 -0
  232. xinference/thirdparty/melo/text/opencpop-strict.txt +429 -0
  233. xinference/thirdparty/melo/text/spanish.py +122 -0
  234. xinference/thirdparty/melo/text/spanish_bert.py +39 -0
  235. xinference/thirdparty/melo/text/symbols.py +290 -0
  236. xinference/thirdparty/melo/text/tone_sandhi.py +769 -0
  237. xinference/thirdparty/melo/train.py +635 -0
  238. xinference/thirdparty/melo/train.sh +19 -0
  239. xinference/thirdparty/melo/transforms.py +209 -0
  240. xinference/thirdparty/melo/utils.py +424 -0
  241. xinference/types.py +15 -0
  242. xinference/web/ui/build/asset-manifest.json +6 -6
  243. xinference/web/ui/build/index.html +1 -1
  244. xinference/web/ui/build/static/css/main.51a587ff.css +2 -0
  245. xinference/web/ui/build/static/css/main.51a587ff.css.map +1 -0
  246. xinference/web/ui/build/static/js/main.b0936c54.js +3 -0
  247. xinference/web/ui/build/static/js/main.b0936c54.js.map +1 -0
  248. xinference/web/ui/node_modules/.cache/babel-loader/03c4052f1b91f6ba0c5389bdcf49c43319b4076c08e4b8585dab312538ae290a.json +1 -0
  249. xinference/web/ui/node_modules/.cache/babel-loader/1786b83003b8e9605a0f5f855a185d4d16e38fc893dfb326a2a9cca206b4240a.json +1 -0
  250. xinference/web/ui/node_modules/.cache/babel-loader/17cbc181dd674b9150b80c73ed6a82656de0082d857f6e5f66d9716129ac0b38.json +1 -0
  251. xinference/web/ui/node_modules/.cache/babel-loader/185ceb8872d562e032b47e79df6a45670e06345b8ed70aad1a131e0476783c5c.json +1 -0
  252. xinference/web/ui/node_modules/.cache/babel-loader/26b8c9f34b0bed789b3a833767672e39302d1e0c09b4276f4d58d1df7b6bd93b.json +1 -0
  253. xinference/web/ui/node_modules/.cache/babel-loader/2b484da66c724d0d56a40849c109327408796a668b1381511b6e9e03baa48658.json +1 -0
  254. xinference/web/ui/node_modules/.cache/babel-loader/2cbbbce9b84df73330d4c42b82436ed881b3847628f2fbc346aa62e2859fd88c.json +1 -0
  255. xinference/web/ui/node_modules/.cache/babel-loader/2ec9b14431ed33ce6901bf9f27007be4e6e472709c99d6e22b50ce528e4b78ee.json +1 -0
  256. xinference/web/ui/node_modules/.cache/babel-loader/3b966db018f96be4a055d6ca205f0990d4d0b370e2980c17d8bca2c9a021819c.json +1 -0
  257. xinference/web/ui/node_modules/.cache/babel-loader/3eefb411b24c2b3ce053570ef50daccf154022f0e168be5ed0fec21394baf9f4.json +1 -0
  258. xinference/web/ui/node_modules/.cache/babel-loader/522b229e3cac219123f0d69673f5570e191c2d2a505dc65b312d336eae2279c0.json +1 -0
  259. xinference/web/ui/node_modules/.cache/babel-loader/52e45f17ba300580ea3fcc9f9228ccba194bb092b76f25e9255af311f8b05aab.json +1 -0
  260. xinference/web/ui/node_modules/.cache/babel-loader/5a0bc4631f936459afc1a3b1d3ec2420118b1f00e11f60ccac3e08088f3f27a8.json +1 -0
  261. xinference/web/ui/node_modules/.cache/babel-loader/611fa2c6c53b66039991d06dfb0473b5ab37fc63b4564e0f6e1718523768a045.json +1 -0
  262. xinference/web/ui/node_modules/.cache/babel-loader/6329bc76c406fe5eb305412383fbde5950f847bb5e43261f73f37622c365acb4.json +1 -0
  263. xinference/web/ui/node_modules/.cache/babel-loader/63c8e07687ea53a4f8a910ee5e42e0eb26cd1acbfbe820f3e3248a786ee51401.json +1 -0
  264. xinference/web/ui/node_modules/.cache/babel-loader/69b2d5001684174ec9da57e07914eed3eac4960018bceb6cbfa801d861301d7c.json +1 -0
  265. xinference/web/ui/node_modules/.cache/babel-loader/710c1acda69e561e30a933b98c6a56d50197868b15c21e2aad55ab6d46649eb6.json +1 -0
  266. xinference/web/ui/node_modules/.cache/babel-loader/720deca1fce5a1dc5056048fa8258fd138a82ea855f350b6613f104a73fb761f.json +1 -0
  267. xinference/web/ui/node_modules/.cache/babel-loader/76a23b92d26a499c57e61eea2b895fbc9771bd0849a72e66f8e633192017978b.json +1 -0
  268. xinference/web/ui/node_modules/.cache/babel-loader/858063f23b34dfe600254eb5afd85518b0002ec4b30b7386616c45600826e3b2.json +1 -0
  269. xinference/web/ui/node_modules/.cache/babel-loader/920b82c1c89124cf217109eeedbfcd3aae3b917be50c9dfb6bbb4ce26bdfd2e7.json +1 -0
  270. xinference/web/ui/node_modules/.cache/babel-loader/94d8b7aeb0076f2ce07db598cea0e87b13bc8d5614eb530b8d6e696c2daf6f88.json +1 -0
  271. xinference/web/ui/node_modules/.cache/babel-loader/9e917fe7022d01b2ccbe5cc0ce73d70bb72bee584ff293bad71bdff6695dee28.json +1 -0
  272. xinference/web/ui/node_modules/.cache/babel-loader/9f28fdb8399f1d0474f0aca86f1658dc94f5bf0c90f6146352de150692de8862.json +1 -0
  273. xinference/web/ui/node_modules/.cache/babel-loader/a0dfafa06b2bb7cba8cad41c482503f61944f759f4318139362602ef5cc47ccb.json +1 -0
  274. xinference/web/ui/node_modules/.cache/babel-loader/a3ff866acddf34917a7ee399e0e571a4dfd8ba66d5057db885f243e16a6eb17d.json +1 -0
  275. xinference/web/ui/node_modules/.cache/babel-loader/afb8084f539534cd594755ea2205ecd5bd1f62dddcfdf75a2eace59a28131278.json +1 -0
  276. xinference/web/ui/node_modules/.cache/babel-loader/b57b1438b77294c1f3f6cfce12ac487d8106c6f016975ba0aec94d98997e2e1e.json +1 -0
  277. xinference/web/ui/node_modules/.cache/babel-loader/b9917b0bf8e4d55ccbac1c334aa04d6ff3c5b6ed9e5d38b9ea2c687fa7d3f5a9.json +1 -0
  278. xinference/web/ui/node_modules/.cache/babel-loader/bbcc94b0149963d1d6f267ee1f4f03d3925b758392ce2f516c3fe8af0e0169fc.json +1 -0
  279. xinference/web/ui/node_modules/.cache/babel-loader/bdee44abeadc4abc17d41c52eb49c6e19a4b1a267b6e16876ce91bdeeebfc52d.json +1 -0
  280. xinference/web/ui/node_modules/.cache/babel-loader/beb112b70f4a56db95920a9e20efb6c97c37b68450716730217a9ee1a9ae92be.json +1 -0
  281. xinference/web/ui/node_modules/.cache/babel-loader/c88db97be0cdf440193b3995996e83510a04cb00048135485fc0e26d197e80b5.json +1 -0
  282. xinference/web/ui/node_modules/.cache/babel-loader/d49e5314d34310a62d01a03067ce1bec5da00abce84c5196aa9c6842fa79a430.json +1 -0
  283. xinference/web/ui/node_modules/.cache/babel-loader/d7664d18c4ddbad9c3a6a31b91f7c00fb0dde804608674a9860ee50f33e54708.json +1 -0
  284. xinference/web/ui/node_modules/.cache/babel-loader/d9072c318b819b7c90a0f7e9cc0b6413b4dbeb8e9859898e53d75ea882fcde99.json +1 -0
  285. xinference/web/ui/node_modules/.cache/babel-loader/db16a983bc08a05f0439cc61ca0840e49e1d8400eef678909f16c032a418a3d6.json +1 -0
  286. xinference/web/ui/node_modules/.cache/babel-loader/dc249829767b8abcbc3677e0b07b6d3ecbfdfe6d08cfe23a665eb33373a9aa9d.json +1 -0
  287. xinference/web/ui/node_modules/.cache/babel-loader/e242c583c2dbc2784f0fcf513523975f7d5df447e106c1c17e49e8578a6fc3ed.json +1 -0
  288. xinference/web/ui/node_modules/.cache/babel-loader/eac5f1296513e69e4b96f750ddccd4d0264e2bae4e4c449144e83274a48698d9.json +1 -0
  289. xinference/web/ui/node_modules/.cache/babel-loader/ed57202cb79649bb716400436590245547df241988fc7c8e1d85d132299542d2.json +1 -0
  290. xinference/web/ui/node_modules/.cache/babel-loader/f125bf72e773a14cdaebd0c343e80adb909d12e317ee5c00cd4a57442fbe2c62.json +1 -0
  291. xinference/web/ui/node_modules/.cache/babel-loader/f91af913d7f91c410719ab13136aaed3aaf0f8dda06652f25c42cb5231587398.json +1 -0
  292. xinference/web/ui/node_modules/.package-lock.json +67 -3
  293. xinference/web/ui/node_modules/@babel/runtime/package.json +592 -538
  294. xinference/web/ui/node_modules/html-parse-stringify/package.json +50 -0
  295. xinference/web/ui/node_modules/i18next/dist/esm/package.json +1 -0
  296. xinference/web/ui/node_modules/i18next/package.json +129 -0
  297. xinference/web/ui/node_modules/react-i18next/.eslintrc.json +74 -0
  298. xinference/web/ui/node_modules/react-i18next/dist/es/package.json +1 -0
  299. xinference/web/ui/node_modules/react-i18next/package.json +162 -0
  300. xinference/web/ui/node_modules/void-elements/package.json +34 -0
  301. xinference/web/ui/package-lock.json +69 -3
  302. xinference/web/ui/package.json +2 -0
  303. xinference/web/ui/src/locales/en.json +186 -0
  304. xinference/web/ui/src/locales/zh.json +186 -0
  305. {xinference-1.0.1.dist-info → xinference-1.2.1.dist-info}/METADATA +68 -32
  306. {xinference-1.0.1.dist-info → xinference-1.2.1.dist-info}/RECORD +316 -122
  307. xinference/thirdparty/cosyvoice/bin/export_trt.py +0 -8
  308. xinference/thirdparty/fish_speech/tools/api.py +0 -943
  309. xinference/thirdparty/fish_speech/tools/msgpack_api.py +0 -95
  310. xinference/thirdparty/fish_speech/tools/webui.py +0 -548
  311. xinference/web/ui/build/static/css/main.5061c4c3.css +0 -2
  312. xinference/web/ui/build/static/css/main.5061c4c3.css.map +0 -1
  313. xinference/web/ui/build/static/js/main.2f269bb3.js +0 -3
  314. xinference/web/ui/build/static/js/main.2f269bb3.js.map +0 -1
  315. xinference/web/ui/node_modules/.cache/babel-loader/07ce9e632e6aff24d7aa3ad8e48224433bbfeb0d633fca723453f1fcae0c9f1c.json +0 -1
  316. xinference/web/ui/node_modules/.cache/babel-loader/1130403f9e46f5738a23b45ac59b57de8f360c908c713e2c0670c2cce9bd367a.json +0 -1
  317. xinference/web/ui/node_modules/.cache/babel-loader/131091b25d26b17cdca187d7542a21475c211138d900cf667682260e76ef9463.json +0 -1
  318. xinference/web/ui/node_modules/.cache/babel-loader/1f269fb2a368363c1cb2237825f1dba093b6bdd8c44cc05954fd19ec2c1fff03.json +0 -1
  319. xinference/web/ui/node_modules/.cache/babel-loader/331312668fa8bd3d7401818f4a25fa98135d7f61371cd6bfff78b18cf4fbdd92.json +0 -1
  320. xinference/web/ui/node_modules/.cache/babel-loader/40f17338fc75ae095de7d2b4d8eae0d5ca0193a7e2bcece4ee745b22a7a2f4b7.json +0 -1
  321. xinference/web/ui/node_modules/.cache/babel-loader/4de9a6942c5f1749d6cbfdd54279699975f16016b182848bc253886f52ec2ec3.json +0 -1
  322. xinference/web/ui/node_modules/.cache/babel-loader/822586ed1077201b64b954f12f25e3f9b45678c1acbabe53d8af3ca82ca71f33.json +0 -1
  323. xinference/web/ui/node_modules/.cache/babel-loader/8d33354bd2100c8602afc3341f131a88cc36aaeecd5a4b365ed038514708e350.json +0 -1
  324. xinference/web/ui/node_modules/.cache/babel-loader/9375a35b05d56989b2755bf72161fa707c92f28569d33765a75f91a568fda6e9.json +0 -1
  325. xinference/web/ui/node_modules/.cache/babel-loader/a158a9ffa0c9b169aee53dd4a0c44501a596755b4e4f6ede7746d65a72e2a71f.json +0 -1
  326. xinference/web/ui/node_modules/.cache/babel-loader/bd6ad8159341315a1764c397621a560809f7eb7219ab5174c801fca7e969d943.json +0 -1
  327. xinference/web/ui/node_modules/.cache/babel-loader/c7bf40bab396765f67d0fed627ed3665890608b2d0edaa3e8cb7cfc96310db45.json +0 -1
  328. xinference/web/ui/node_modules/.cache/babel-loader/d6c643278a0b28320e6f33a60f5fb64c053997cbdc39a60e53ccc574688ade9e.json +0 -1
  329. xinference/web/ui/node_modules/.cache/babel-loader/e42b72d4cc1ea412ebecbb8d040dc6c6bfee462c33903c2f1f3facb602ad742e.json +0 -1
  330. xinference/web/ui/node_modules/.cache/babel-loader/e64b7e8cedcf43d4c95deba60ec1341855c887705805bb62431693118b870c69.json +0 -1
  331. xinference/web/ui/node_modules/.cache/babel-loader/f5039ddbeb815c51491a1989532006b96fc3ae49c6c60e3c097f875b4ae915ae.json +0 -1
  332. xinference/web/ui/node_modules/.cache/babel-loader/f72f011744c4649fabddca6f7a9327861ac0a315a89b1a2e62a39774e7863845.json +0 -1
  333. xinference/web/ui/node_modules/.cache/babel-loader/feabb04b4aa507102da0a64398a40818e878fd1df9b75dda8461b3e1e7ff3f11.json +0 -1
  334. /xinference/thirdparty/{cosyvoice/bin → f5_tts}/__init__.py +0 -0
  335. /xinference/thirdparty/{cosyvoice/flow → melo}/__init__.py +0 -0
  336. /xinference/thirdparty/{cosyvoice/hifigan → melo/text/english_utils}/__init__.py +0 -0
  337. /xinference/thirdparty/{cosyvoice/llm → melo/text/es_phonemizer}/__init__.py +0 -0
  338. /xinference/thirdparty/{fish_speech/tools → melo/text/fr_phonemizer}/__init__.py +0 -0
  339. /xinference/web/ui/build/static/js/{main.2f269bb3.js.LICENSE.txt → main.b0936c54.js.LICENSE.txt} +0 -0
  340. {xinference-1.0.1.dist-info → xinference-1.2.1.dist-info}/LICENSE +0 -0
  341. {xinference-1.0.1.dist-info → xinference-1.2.1.dist-info}/WHEEL +0 -0
  342. {xinference-1.0.1.dist-info → xinference-1.2.1.dist-info}/entry_points.txt +0 -0
  343. {xinference-1.0.1.dist-info → xinference-1.2.1.dist-info}/top_level.txt +0 -0
@@ -267,6 +267,14 @@ class SupervisorActor(xo.StatelessActor):
267
267
  signal.SIGTERM, lambda: asyncio.create_task(signal_handler())
268
268
  )
269
269
 
270
+ from ..model.llm.vllm.xavier.block_tracker import VLLMBlockTracker
271
+ from ..model.llm.vllm.xavier.collective_manager import CollectiveManager
272
+
273
+ self._block_tracker_mapping: Dict[str, xo.ActorRefType[VLLMBlockTracker]] = {}
274
+ self._collective_manager_mapping: Dict[
275
+ str, xo.ActorRefType[CollectiveManager]
276
+ ] = {}
277
+
270
278
  @typing.no_type_check
271
279
  async def get_cluster_device_info(self, detailed: bool = False) -> List:
272
280
  import psutil
@@ -959,29 +967,83 @@ class SupervisorActor(xo.StatelessActor):
959
967
  if model_uid is None:
960
968
  model_uid = self._gen_model_uid(model_name)
961
969
 
970
+ # Xavier-related
971
+ enable_xavier: bool = (
972
+ bool(kwargs.pop("enable_xavier", False))
973
+ and model_engine is not None
974
+ and model_engine.lower() == "vllm"
975
+ )
976
+ store_address = None
977
+ store_port = None
978
+ world_size = None
979
+ if enable_xavier:
980
+ if replica <= 1:
981
+ logger.warning(f"Enabling xavier when `replica<=1` is meaningless.")
982
+ enable_xavier = False
983
+ else:
984
+ from ..model.llm.vllm.xavier.block_tracker import VLLMBlockTracker
985
+ from ..model.llm.vllm.xavier.collective_manager import CollectiveManager
986
+
987
+ self._block_tracker_mapping[model_uid] = await xo.create_actor(
988
+ VLLMBlockTracker,
989
+ address=self.address,
990
+ uid=f"{VLLMBlockTracker.default_uid()}-{model_uid}",
991
+ )
992
+ world_size = replica + 1
993
+ logger.info(f"Going to start xavier with world size: {world_size}")
994
+ self._collective_manager_mapping[model_uid] = await xo.create_actor(
995
+ CollectiveManager,
996
+ address=self.address,
997
+ uid=f"{CollectiveManager.default_uid()}-{model_uid}",
998
+ model_uid=model_uid,
999
+ )
1000
+ logger.info(f"Start collective manager for {model_uid} done.")
1001
+
962
1002
  model_size = str(model_size_in_billions) if model_size_in_billions else ""
963
1003
  logger.debug(
964
1004
  f"Enter launch_builtin_model, model_uid: {model_uid}, model_name: {model_name}, model_size: {model_size}, "
965
- f"model_format: {model_format}, quantization: {quantization}, replica: {replica}, "
1005
+ f"model_format: {model_format}, quantization: {quantization}, replica: {replica}, enable_xavier: {enable_xavier}, "
966
1006
  f"kwargs: {kwargs}"
967
1007
  )
968
1008
 
969
- async def _launch_one_model(_replica_model_uid):
1009
+ async def _launch_one_model(worker_ref, _replica_model_uid, rank: int):
970
1010
  if _replica_model_uid in self._replica_model_uid_to_worker:
971
1011
  raise ValueError(
972
1012
  f"Model is already in the model list, uid: {_replica_model_uid}"
973
1013
  )
1014
+
1015
+ nonlocal store_address
1016
+ nonlocal store_port
1017
+ xavier_config = (
1018
+ {
1019
+ "block_tracker_uid": self._block_tracker_mapping[model_uid].uid,
1020
+ "block_tracker_address": self._block_tracker_mapping[
1021
+ model_uid
1022
+ ].address,
1023
+ "rank": rank,
1024
+ "world_size": world_size,
1025
+ "store_address": store_address,
1026
+ "store_port": store_port,
1027
+ }
1028
+ if enable_xavier
1029
+ else None
1030
+ )
1031
+
1032
+ if enable_xavier and rank == 0:
1033
+ rank0_address, _port = await worker_ref.launch_rank0_model(
1034
+ _replica_model_uid, xavier_config
1035
+ )
1036
+ self._replica_model_uid_to_worker[_replica_model_uid] = worker_ref
1037
+ store_address = rank0_address.split(":")[0]
1038
+ store_port = _port
1039
+ return rank0_address
1040
+
974
1041
  replica_gpu_idx = assign_replica_gpu(_replica_model_uid, replica, gpu_idx)
975
1042
  nonlocal model_type
976
1043
 
977
- worker_ref = (
978
- target_ip_worker_ref
979
- if target_ip_worker_ref is not None
980
- else await self._choose_worker()
981
- )
982
1044
  # LLM as default for compatibility
983
1045
  model_type = model_type or "LLM"
984
- await worker_ref.launch_builtin_model(
1046
+ subpool_address = await worker_ref.launch_builtin_model(
985
1047
  model_uid=_replica_model_uid,
986
1048
  model_name=model_name,
987
1049
  model_size_in_billions=model_size_in_billions,
@@ -995,14 +1057,64 @@ class SupervisorActor(xo.StatelessActor):
995
1057
  gpu_idx=replica_gpu_idx,
996
1058
  download_hub=download_hub,
997
1059
  model_path=model_path,
1060
+ xavier_config=xavier_config,
998
1061
  **kwargs,
999
1062
  )
1000
1063
  self._replica_model_uid_to_worker[_replica_model_uid] = worker_ref
1064
+ return subpool_address
1001
1065
 
1002
1066
  async def _launch_model():
1003
1067
  try:
1004
- for rep_model_uid in iter_replica_model_uid(model_uid, replica):
1005
- await _launch_one_model(rep_model_uid)
1068
+ worker_refs = []
1069
+ rank_addresses = []
1070
+ for _idx, rep_model_uid in enumerate(
1071
+ iter_replica_model_uid(model_uid, replica)
1072
+ ):
1073
+ worker_ref = (
1074
+ target_ip_worker_ref
1075
+ if target_ip_worker_ref is not None
1076
+ else await self._choose_worker()
1077
+ )
1078
+ if enable_xavier and _idx == 0:
1079
+ """
1080
+ Start the rank 0 model actor on the worker that holds the rank 1 replica,
1081
+ solely for constructing the collective communication world.
1082
+ """
1083
+ _uid = model_uid + "-rank0"
1084
+ rank0_address = await _launch_one_model(worker_ref, _uid, 0)
1085
+ worker_refs.append((worker_ref, _uid))
1086
+ rank_addresses.append(rank0_address)
1087
+
1088
+ subpool_address = await _launch_one_model(
1089
+ worker_ref, rep_model_uid, _idx + 1
1090
+ )
1091
+ worker_refs.append((worker_ref, rep_model_uid))
1092
+ rank_addresses.append(subpool_address)
1093
+
1094
+ # For xavier, start all the vllm instances first,
1095
+ # and then start the transfer component,
1096
+ # because the transfer actor needs all the rank addresses used for collective communication
1097
+ if enable_xavier:
1098
+ logger.debug(f"Init transfer component for xavier...")
1099
+ collective_manager_ref = self._collective_manager_mapping[model_uid]
1100
+ tasks = []
1101
+ for worker_ref, rep_model_uid in worker_refs:
1102
+ tasks.append(
1103
+ worker_ref.start_transfer_for_vllm(
1104
+ rep_model_uid, rank_addresses
1105
+ )
1106
+ )
1107
+ # Here you must use asyncio.gather, not a for loop,
1108
+ # or you will get stuck.
1109
+ await asyncio.gather(*tasks)
1110
+
1111
+ # init collective_manager
1112
+ for idx, addr in enumerate(rank_addresses):
1113
+ await collective_manager_ref.register_rank(
1114
+ idx, addr, update=False
1115
+ )
1116
+
1117
+ logger.debug(f"Init transfer component for xavier done.")
1006
1118
  except Exception:
1007
1119
  # terminate_model will remove the replica info.
1008
1120
  await self.terminate_model(model_uid, suppress_exception=True)
@@ -1131,6 +1243,38 @@ class SupervisorActor(xo.StatelessActor):
1131
1243
  raise
1132
1244
  self._model_uid_to_replica_info.pop(model_uid, None)
1133
1245
 
1246
+ # clear for xavier
1247
+ rank0_uid = model_uid + "-rank0"
1248
+ if rank0_uid in self._replica_model_uid_to_worker:
1249
+ await _terminate_one_model(rank0_uid)
1250
+
1251
+ collective_manager_ref = self._collective_manager_mapping.pop(model_uid, None)
1252
+ if collective_manager_ref is not None:
1253
+ try:
1254
+ await xo.destroy_actor(collective_manager_ref)
1255
+ except Exception as e:
1256
+ logger.debug(
1257
+ "Destroy collective_manager_ref failed, model uid: %s, error: %s",
1258
+ model_uid,
1259
+ e,
1260
+ )
1261
+ finally:
1262
+ logger.debug(
1263
+ f"Destroy collective_manager_ref done. model uid: {model_uid}"
1264
+ )
1265
+ block_tracker_ref = self._block_tracker_mapping.pop(model_uid, None)
1266
+ if block_tracker_ref is not None:
1267
+ try:
1268
+ await xo.destroy_actor(block_tracker_ref)
1269
+ except Exception as e:
1270
+ logger.debug(
1271
+ "Destroy block_tracker_ref failed, model uid: %s, error: %s",
1272
+ model_uid,
1273
+ e,
1274
+ )
1275
+ finally:
1276
+ logger.debug(f"Destroy block_tracker_ref done. model uid: {model_uid}")
1277
+
1134
1278
  @log_async(logger=logger)
1135
1279
  async def get_model(self, model_uid: str) -> xo.ActorRefType["ModelActor"]:
1136
1280
  replica_info = self._model_uid_to_replica_info.get(model_uid, None)
@@ -1148,6 +1292,15 @@ class SupervisorActor(xo.StatelessActor):
1148
1292
  )
1149
1293
  return await worker_ref.get_model(model_uid=replica_model_uid)
1150
1294
 
1295
+ @log_async(logger=logger)
1296
+ async def get_model_status(self, replica_model_uid: str):
1297
+ worker_ref = self._replica_model_uid_to_worker.get(replica_model_uid, None)
1298
+ if worker_ref is None:
1299
+ raise ValueError(
1300
+ f"Model not found in the model list, uid: {replica_model_uid}"
1301
+ )
1302
+ return await worker_ref.get_model_status(replica_model_uid)
1303
+
1151
1304
  @log_async(logger=logger)
1152
1305
  async def describe_model(self, model_uid: str) -> Dict[str, Any]:
1153
1306
  replica_info = self._model_uid_to_replica_info.get(model_uid, None)
@@ -1377,3 +1530,12 @@ class SupervisorActor(xo.StatelessActor):
1377
1530
 
1378
1531
  async def get_progress(self, request_id: str) -> float:
1379
1532
  return await self._progress_tracker.get_progress(request_id)
1533
+
1534
+ async def call_collective_manager(
1535
+ self, model_uid: str, func_name: str, *args, **kwargs
1536
+ ):
1537
+ """
1538
+ Used by worker.
1539
+ """
1540
+ collective_manager_ref = self._collective_manager_mapping[model_uid]
1541
+ await getattr(collective_manager_ref, func_name)(*args, **kwargs)
xinference/core/utils.py CHANGED
@@ -62,12 +62,16 @@ def log_async(
62
62
 
63
63
  @wraps(func)
64
64
  async def wrapped(*args, **kwargs):
65
- try:
66
- bound_args = sig.bind_partial(*args, **kwargs)
67
- arguments = bound_args.arguments
68
- except TypeError:
69
- arguments = {}
70
- request_id_str = arguments.get("request_id", "")
65
+ request_id_str = kwargs.get("request_id")
66
+ if not request_id_str:
67
+ # sometimes `request_id` not in kwargs
68
+ # we try to bind the arguments
69
+ try:
70
+ bound_args = sig.bind_partial(*args, **kwargs)
71
+ arguments = bound_args.arguments
72
+ except TypeError:
73
+ arguments = {}
74
+ request_id_str = arguments.get("request_id", "")
71
75
  if not request_id_str:
72
76
  request_id_str = uuid.uuid1()
73
77
  if func_name == "text_to_image":
@@ -272,8 +276,8 @@ def get_nvidia_gpu_info() -> Dict:
272
276
 
273
277
 
274
278
  def assign_replica_gpu(
275
- _replica_model_uid: str, replica: int, gpu_idx: Union[int, List[int]]
276
- ) -> List[int]:
279
+ _replica_model_uid: str, replica: int, gpu_idx: Optional[Union[int, List[int]]]
280
+ ) -> Optional[List[int]]:
277
281
  model_uid, rep_id = parse_replica_model_uid(_replica_model_uid)
278
282
  rep_id, replica = int(rep_id), int(replica)
279
283
  if isinstance(gpu_idx, int):
xinference/core/worker.py CHANGED
@@ -22,8 +22,9 @@ import signal
22
22
  import threading
23
23
  import time
24
24
  from collections import defaultdict
25
+ from dataclasses import dataclass
25
26
  from logging import getLogger
26
- from typing import Any, Dict, List, Literal, Optional, Set, Tuple, Union
27
+ from typing import Any, Dict, List, Literal, Optional, Set, Tuple, Union, no_type_check
27
28
 
28
29
  import xoscar as xo
29
30
  from async_timeout import timeout
@@ -58,6 +59,11 @@ else:
58
59
  MODEL_ACTOR_AUTO_RECOVER_LIMIT = None
59
60
 
60
61
 
62
+ @dataclass
63
+ class ModelStatus:
64
+ last_error: str = ""
65
+
66
+
61
67
  class WorkerActor(xo.StatelessActor):
62
68
  def __init__(
63
69
  self,
@@ -90,6 +96,7 @@ class WorkerActor(xo.StatelessActor):
90
96
  # attributes maintained after model launched:
91
97
  self._model_uid_to_model: Dict[str, xo.ActorRefType["ModelActor"]] = {}
92
98
  self._model_uid_to_model_spec: Dict[str, ModelDescription] = {}
99
+ self._model_uid_to_model_status: Dict[str, ModelStatus] = {}
93
100
  self._gpu_to_model_uid: Dict[int, str] = {}
94
101
  self._gpu_to_embedding_model_uids: Dict[int, Set[str]] = defaultdict(set)
95
102
  # Dict structure: gpu_index: {(replica_model_uid, model_type)}
@@ -177,12 +184,12 @@ class WorkerActor(xo.StatelessActor):
177
184
  self._model_uid_to_recover_count[model_uid] = (
178
185
  recover_count - 1
179
186
  )
180
- await self.launch_builtin_model(**launch_args)
187
+ await self.recover_model(launch_args)
181
188
  else:
182
189
  logger.warning("Stop recreating model actor.")
183
190
  else:
184
191
  logger.warning("Recreating model actor %s ...", model_uid)
185
- await self.launch_builtin_model(**launch_args)
192
+ await self.recover_model(launch_args)
186
193
  break
187
194
 
188
195
  @classmethod
@@ -866,6 +873,9 @@ class WorkerActor(xo.StatelessActor):
866
873
  )
867
874
 
868
875
  try:
876
+ xavier_config: Optional[Dict] = kwargs.pop("xavier_config", None)
877
+ if xavier_config is not None:
878
+ xavier_config["rank_address"] = subpool_address
869
879
  model, model_description = await asyncio.to_thread(
870
880
  create_model_instance,
871
881
  subpool_address,
@@ -893,6 +903,7 @@ class WorkerActor(xo.StatelessActor):
893
903
  model=model,
894
904
  model_description=model_description,
895
905
  request_limits=request_limits,
906
+ xavier_config=xavier_config,
896
907
  )
897
908
  await model_ref.load()
898
909
  except:
@@ -902,6 +913,7 @@ class WorkerActor(xo.StatelessActor):
902
913
  raise
903
914
  self._model_uid_to_model[model_uid] = model_ref
904
915
  self._model_uid_to_model_spec[model_uid] = model_description
916
+ self._model_uid_to_model_status[model_uid] = ModelStatus()
905
917
  self._model_uid_to_addr[model_uid] = subpool_address
906
918
  self._model_uid_to_recover_count.setdefault(
907
919
  model_uid, MODEL_ACTOR_AUTO_RECOVER_LIMIT
@@ -921,13 +933,18 @@ class WorkerActor(xo.StatelessActor):
921
933
  origin_uid,
922
934
  {"model_ability": abilities, "status": LaunchStatus.READY.name},
923
935
  )
936
+ return subpool_address
924
937
 
925
938
  @log_async(logger=logger, level=logging.INFO)
926
939
  async def terminate_model(self, model_uid: str, is_model_die=False):
927
940
  # Terminate model while its launching is not allow
928
941
  if model_uid in self._model_uid_launching_guard:
929
942
  raise ValueError(f"{model_uid} is launching")
930
- origin_uid, _ = parse_replica_model_uid(model_uid)
943
+ # In special cases, if the suffix is `-rank0`, this is the Xavier's rank 0 model actor.
944
+ if model_uid.endswith("-rank0"):
945
+ origin_uid = model_uid.removesuffix("-rank0")
946
+ else:
947
+ origin_uid, _ = parse_replica_model_uid(model_uid)
931
948
  try:
932
949
  _ = await self.get_supervisor_ref()
933
950
  if self._event_collector_ref is not None:
@@ -976,6 +993,7 @@ class WorkerActor(xo.StatelessActor):
976
993
  status = LaunchStatus.ERROR.name
977
994
  else:
978
995
  status = LaunchStatus.TERMINATED.name
996
+ self._model_uid_to_model_status.pop(model_uid, None)
979
997
 
980
998
  if self._status_guard_ref is None:
981
999
  _ = await self.get_supervisor_ref()
@@ -1010,6 +1028,9 @@ class WorkerActor(xo.StatelessActor):
1010
1028
 
1011
1029
  @log_sync(logger=logger)
1012
1030
  def get_model(self, model_uid: str) -> xo.ActorRefType["ModelActor"]:
1031
+ model_status = self._model_uid_to_model_status.get(model_uid)
1032
+ if model_status and model_status.last_error:
1033
+ raise Exception(model_status.last_error)
1013
1034
  model_ref = self._model_uid_to_model.get(model_uid, None)
1014
1035
  if model_ref is None:
1015
1036
  raise ValueError(f"Model not found, uid: {model_uid}")
@@ -1138,6 +1159,83 @@ class WorkerActor(xo.StatelessActor):
1138
1159
  }
1139
1160
  return ret
1140
1161
 
1162
+ def update_model_status(self, model_uid: str, **kwargs):
1163
+ model_status = self._model_uid_to_model_status.get(model_uid)
1164
+ if model_status is not None:
1165
+ for k, v in kwargs.items():
1166
+ setattr(model_status, k, v)
1167
+
1168
+ def get_model_status(self, model_uid: str):
1169
+ return self._model_uid_to_model_status.get(model_uid)
1170
+
1141
1171
  @staticmethod
1142
1172
  def record_metrics(name, op, kwargs):
1143
1173
  record_metrics(name, op, kwargs)
1174
+
1175
+ async def start_transfer_for_vllm(
1176
+ self, rep_model_uid: str, rank_addresses: List[str]
1177
+ ):
1178
+ model_ref = self._model_uid_to_model[rep_model_uid]
1179
+ await model_ref.start_transfer_for_vllm(rank_addresses)
1180
+
1181
+ @log_async(logger=logger, level=logging.INFO)
1182
+ async def launch_rank0_model(
1183
+ self, rep_model_uid: str, xavier_config: Dict[str, Any]
1184
+ ) -> Tuple[str, int]:
1185
+ from ..model.llm.vllm.xavier.collective_manager import Rank0ModelActor
1186
+
1187
+ if os.name != "nt" and platform.system() != "Darwin":
1188
+ # Linux
1189
+ start_method = "forkserver"
1190
+ else:
1191
+ # Windows and macOS
1192
+ start_method = "spawn"
1193
+ subpool_address = await self._main_pool.append_sub_pool(
1194
+ start_method=start_method
1195
+ )
1196
+
1197
+ store_address = subpool_address.split(":")[0]
1198
+ # Note that `store_port` needs to be generated on the worker,
1199
+ # as the TCP store is on rank 0, not on the supervisor.
1200
+ store_port = xo.utils.get_next_port()
1201
+ self._model_uid_launching_guard[rep_model_uid] = True
1202
+ try:
1203
+ try:
1204
+ xavier_config["rank_address"] = subpool_address
1205
+ xavier_config["store_address"] = store_address
1206
+ xavier_config["store_port"] = store_port
1207
+ model_ref = await xo.create_actor(
1208
+ Rank0ModelActor,
1209
+ address=subpool_address,
1210
+ uid=rep_model_uid,
1211
+ xavier_config=xavier_config,
1212
+ )
1213
+ except:
1214
+ await self._main_pool.remove_sub_pool(subpool_address)
1215
+ raise
1216
+ self._model_uid_to_model[rep_model_uid] = model_ref
1217
+ self._model_uid_to_addr[rep_model_uid] = subpool_address
1218
+ finally:
1219
+ del self._model_uid_launching_guard[rep_model_uid]
1220
+ return subpool_address, store_port
1221
+
1222
+ @no_type_check
1223
+ async def recover_model(self, launch_args: Dict[str, Any]):
1224
+ rep_model_uid = launch_args.get("model_uid")
1225
+ origin_uid, _ = parse_replica_model_uid(rep_model_uid)
1226
+ xavier_config: Optional[Dict[str, Any]] = launch_args.get("xavier_config", None)
1227
+ is_xavier: bool = xavier_config is not None
1228
+ supervisor_ref = await self.get_supervisor_ref(add_worker=False)
1229
+ if is_xavier:
1230
+ rank = xavier_config.get("rank")
1231
+ await supervisor_ref.call_collective_manager(
1232
+ origin_uid, "unregister_rank", rank
1233
+ )
1234
+ subpool_address = await self.launch_builtin_model(**launch_args)
1235
+ if is_xavier:
1236
+ model_ref = self._model_uid_to_model[rep_model_uid]
1237
+ await model_ref.start_transfer_for_vllm([])
1238
+ rank = xavier_config.get("rank")
1239
+ await supervisor_ref.call_collective_manager(
1240
+ origin_uid, "register_rank", rank, subpool_address, update=True
1241
+ )
@@ -846,7 +846,9 @@ def model_launch(
846
846
  kwargs = {}
847
847
  for i in range(0, len(ctx.args), 2):
848
848
  if not ctx.args[i].startswith("--"):
849
- raise ValueError("You must specify extra kwargs with `--` prefix.")
849
+ raise ValueError(
850
+ f"You must specify extra kwargs with `--` prefix. There is an error in parameter passing that is {ctx.args[i]}."
851
+ )
850
852
  kwargs[ctx.args[i][2:]] = handle_click_args_type(ctx.args[i + 1])
851
853
  print(f"Launch model name: {model_name} with kwargs: {kwargs}", file=sys.stderr)
852
854
 
@@ -23,6 +23,7 @@ from ..cmdline import (
23
23
  list_model_registrations,
24
24
  model_chat,
25
25
  model_generate,
26
+ model_launch,
26
27
  model_list,
27
28
  model_terminate,
28
29
  register_model,
@@ -311,3 +312,58 @@ def test_remove_cache(setup):
311
312
 
312
313
  assert result.exit_code == 0
313
314
  assert "Cache directory qwen1.5-chat has been deleted."
315
+
316
+
317
+ def test_launch_error_in_passing_parameters():
318
+ runner = CliRunner()
319
+
320
+ # Known parameter but not provided with value.
321
+ result = runner.invoke(
322
+ model_launch,
323
+ [
324
+ "--model-engine",
325
+ "transformers",
326
+ "--model-name",
327
+ "qwen2.5-instruct",
328
+ "--model-uid",
329
+ "-s",
330
+ "0.5",
331
+ "-f",
332
+ "gptq",
333
+ "-q",
334
+ "INT4",
335
+ "111",
336
+ "-l",
337
+ ],
338
+ )
339
+ assert result.exit_code == 1
340
+ assert (
341
+ "You must specify extra kwargs with `--` prefix. There is an error in parameter passing that is 0.5."
342
+ in str(result)
343
+ )
344
+
345
+ # Unknown parameter
346
+ result = runner.invoke(
347
+ model_launch,
348
+ [
349
+ "--model-engine",
350
+ "transformers",
351
+ "--model-name",
352
+ "qwen2.5-instruct",
353
+ "--model-uid",
354
+ "123",
355
+ "-s",
356
+ "0.5",
357
+ "-f",
358
+ "gptq",
359
+ "-q",
360
+ "INT4",
361
+ "-l",
362
+ "111",
363
+ ],
364
+ )
365
+ assert result.exit_code == 1
366
+ assert (
367
+ "You must specify extra kwargs with `--` prefix. There is an error in parameter passing that is -l."
368
+ in str(result)
369
+ )
xinference/isolation.py CHANGED
@@ -37,6 +37,30 @@ class Isolation:
37
37
  asyncio.set_event_loop(self._loop)
38
38
  self._stopped = asyncio.Event()
39
39
  self._loop.run_until_complete(self._stopped.wait())
40
+ self._cancel_all_tasks(self._loop)
41
+
42
+ @staticmethod
43
+ def _cancel_all_tasks(loop):
44
+ to_cancel = asyncio.all_tasks(loop)
45
+ if not to_cancel:
46
+ return
47
+
48
+ for task in to_cancel:
49
+ task.cancel()
50
+
51
+ loop.run_until_complete(asyncio.gather(*to_cancel, return_exceptions=True))
52
+
53
+ for task in to_cancel:
54
+ if task.cancelled():
55
+ continue
56
+ if task.exception() is not None:
57
+ loop.call_exception_handler(
58
+ {
59
+ "message": "unhandled exception during asyncio.run() shutdown",
60
+ "exception": task.exception(),
61
+ "task": task,
62
+ }
63
+ )
40
64
 
41
65
  def start(self):
42
66
  if self._threaded:
@@ -21,8 +21,11 @@ from ..core import CacheableModelSpec, ModelDescription
21
21
  from ..utils import valid_model_revision
22
22
  from .chattts import ChatTTSModel
23
23
  from .cosyvoice import CosyVoiceModel
24
+ from .f5tts import F5TTSModel
25
+ from .f5tts_mlx import F5TTSMLXModel
24
26
  from .fish_speech import FishSpeechModel
25
27
  from .funasr import FunASRModel
28
+ from .melotts import MeloTTSModel
26
29
  from .whisper import WhisperModel
27
30
  from .whisper_mlx import WhisperMLXModel
28
31
 
@@ -46,6 +49,7 @@ class AudioModelFamilyV1(CacheableModelSpec):
46
49
  model_id: str
47
50
  model_revision: Optional[str]
48
51
  multilingual: bool
52
+ language: Optional[str]
49
53
  model_ability: Optional[str]
50
54
  default_model_config: Optional[Dict[str, Any]]
51
55
  default_transcription_config: Optional[Dict[str, Any]]
@@ -169,6 +173,9 @@ def create_audio_model_instance(
169
173
  ChatTTSModel,
170
174
  CosyVoiceModel,
171
175
  FishSpeechModel,
176
+ F5TTSModel,
177
+ F5TTSMLXModel,
178
+ MeloTTSModel,
172
179
  ],
173
180
  AudioModelDescription,
174
181
  ]:
@@ -182,6 +189,9 @@ def create_audio_model_instance(
182
189
  ChatTTSModel,
183
190
  CosyVoiceModel,
184
191
  FishSpeechModel,
192
+ F5TTSModel,
193
+ F5TTSMLXModel,
194
+ MeloTTSModel,
185
195
  ]
186
196
  if model_spec.model_family == "whisper":
187
197
  if not model_spec.engine:
@@ -196,6 +206,12 @@ def create_audio_model_instance(
196
206
  model = CosyVoiceModel(model_uid, model_path, model_spec, **kwargs)
197
207
  elif model_spec.model_family == "FishAudio":
198
208
  model = FishSpeechModel(model_uid, model_path, model_spec, **kwargs)
209
+ elif model_spec.model_family == "F5-TTS":
210
+ model = F5TTSModel(model_uid, model_path, model_spec, **kwargs)
211
+ elif model_spec.model_family == "F5-TTS-MLX":
212
+ model = F5TTSMLXModel(model_uid, model_path, model_spec, **kwargs)
213
+ elif model_spec.model_family == "MeloTTS":
214
+ model = MeloTTSModel(model_uid, model_path, model_spec, **kwargs)
199
215
  else:
200
216
  raise Exception(f"Unsupported audio model family: {model_spec.model_family}")
201
217
  model_description = AudioModelDescription(