xinference 1.0.1__py3-none-any.whl → 1.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_compat.py +2 -0
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +77 -71
- xinference/core/chat_interface.py +6 -1
- xinference/core/model.py +79 -19
- xinference/core/supervisor.py +172 -10
- xinference/core/utils.py +12 -8
- xinference/core/worker.py +102 -4
- xinference/deploy/cmdline.py +3 -1
- xinference/deploy/test/test_cmdline.py +56 -0
- xinference/isolation.py +24 -0
- xinference/model/audio/core.py +16 -0
- xinference/model/audio/cosyvoice.py +39 -6
- xinference/model/audio/f5tts.py +200 -0
- xinference/model/audio/f5tts_mlx.py +260 -0
- xinference/model/audio/fish_speech.py +36 -111
- xinference/model/audio/melotts.py +110 -0
- xinference/model/audio/model_spec.json +99 -3
- xinference/model/audio/model_spec_modelscope.json +27 -0
- xinference/model/audio/utils.py +32 -0
- xinference/model/audio/whisper.py +35 -10
- xinference/model/embedding/core.py +203 -142
- xinference/model/embedding/model_spec.json +7 -0
- xinference/model/embedding/model_spec_modelscope.json +8 -0
- xinference/model/image/core.py +69 -1
- xinference/model/image/model_spec.json +145 -4
- xinference/model/image/model_spec_modelscope.json +150 -4
- xinference/model/image/stable_diffusion/core.py +45 -13
- xinference/model/llm/__init__.py +4 -2
- xinference/model/llm/llm_family.json +536 -53
- xinference/model/llm/llm_family.py +15 -36
- xinference/model/llm/llm_family_modelscope.json +454 -20
- xinference/model/llm/memory.py +1 -1
- xinference/model/llm/mlx/core.py +248 -52
- xinference/model/llm/sglang/core.py +1 -0
- xinference/model/llm/transformers/chatglm.py +9 -5
- xinference/model/llm/transformers/cogagent.py +272 -0
- xinference/model/llm/transformers/core.py +2 -0
- xinference/model/llm/transformers/qwen2_vl.py +12 -1
- xinference/model/llm/transformers/utils.py +16 -8
- xinference/model/llm/utils.py +36 -4
- xinference/model/llm/vllm/core.py +53 -10
- xinference/model/llm/vllm/xavier/__init__.py +13 -0
- xinference/model/llm/vllm/xavier/allocator.py +74 -0
- xinference/model/llm/vllm/xavier/block.py +111 -0
- xinference/model/llm/vllm/xavier/block_manager.py +71 -0
- xinference/model/llm/vllm/xavier/block_tracker.py +129 -0
- xinference/model/llm/vllm/xavier/collective.py +74 -0
- xinference/model/llm/vllm/xavier/collective_manager.py +147 -0
- xinference/model/llm/vllm/xavier/engine.py +247 -0
- xinference/model/llm/vllm/xavier/executor.py +134 -0
- xinference/model/llm/vllm/xavier/scheduler.py +438 -0
- xinference/model/llm/vllm/xavier/test/__init__.py +13 -0
- xinference/model/llm/vllm/xavier/test/test_xavier.py +147 -0
- xinference/model/llm/vllm/xavier/transfer.py +319 -0
- xinference/model/video/diffusers.py +14 -0
- xinference/model/video/model_spec.json +15 -0
- xinference/model/video/model_spec_modelscope.json +16 -0
- xinference/thirdparty/cosyvoice/bin/average_model.py +92 -0
- xinference/thirdparty/cosyvoice/bin/export_jit.py +12 -2
- xinference/thirdparty/cosyvoice/bin/export_onnx.py +112 -0
- xinference/thirdparty/cosyvoice/bin/export_trt.sh +9 -0
- xinference/thirdparty/cosyvoice/bin/inference.py +5 -7
- xinference/thirdparty/cosyvoice/bin/spk2info.pt +0 -0
- xinference/thirdparty/cosyvoice/bin/train.py +42 -8
- xinference/thirdparty/cosyvoice/cli/cosyvoice.py +96 -25
- xinference/thirdparty/cosyvoice/cli/frontend.py +77 -30
- xinference/thirdparty/cosyvoice/cli/model.py +330 -80
- xinference/thirdparty/cosyvoice/dataset/dataset.py +6 -2
- xinference/thirdparty/cosyvoice/dataset/processor.py +76 -14
- xinference/thirdparty/cosyvoice/flow/decoder.py +92 -13
- xinference/thirdparty/cosyvoice/flow/flow.py +99 -9
- xinference/thirdparty/cosyvoice/flow/flow_matching.py +110 -13
- xinference/thirdparty/cosyvoice/flow/length_regulator.py +5 -4
- xinference/thirdparty/cosyvoice/hifigan/discriminator.py +140 -0
- xinference/thirdparty/cosyvoice/hifigan/generator.py +58 -42
- xinference/thirdparty/cosyvoice/hifigan/hifigan.py +67 -0
- xinference/thirdparty/cosyvoice/llm/llm.py +139 -6
- xinference/thirdparty/cosyvoice/tokenizer/assets/multilingual_zh_ja_yue_char_del.tiktoken +58836 -0
- xinference/thirdparty/cosyvoice/tokenizer/tokenizer.py +279 -0
- xinference/thirdparty/cosyvoice/transformer/embedding.py +2 -2
- xinference/thirdparty/cosyvoice/transformer/encoder_layer.py +7 -7
- xinference/thirdparty/cosyvoice/transformer/upsample_encoder.py +318 -0
- xinference/thirdparty/cosyvoice/utils/common.py +28 -1
- xinference/thirdparty/cosyvoice/utils/executor.py +69 -7
- xinference/thirdparty/cosyvoice/utils/file_utils.py +2 -12
- xinference/thirdparty/cosyvoice/utils/frontend_utils.py +9 -5
- xinference/thirdparty/cosyvoice/utils/losses.py +20 -0
- xinference/thirdparty/cosyvoice/utils/scheduler.py +1 -2
- xinference/thirdparty/cosyvoice/utils/train_utils.py +101 -45
- xinference/thirdparty/f5_tts/api.py +166 -0
- xinference/thirdparty/f5_tts/configs/E2TTS_Base_train.yaml +44 -0
- xinference/thirdparty/f5_tts/configs/E2TTS_Small_train.yaml +44 -0
- xinference/thirdparty/f5_tts/configs/F5TTS_Base_train.yaml +46 -0
- xinference/thirdparty/f5_tts/configs/F5TTS_Small_train.yaml +46 -0
- xinference/thirdparty/f5_tts/eval/README.md +49 -0
- xinference/thirdparty/f5_tts/eval/ecapa_tdnn.py +330 -0
- xinference/thirdparty/f5_tts/eval/eval_infer_batch.py +207 -0
- xinference/thirdparty/f5_tts/eval/eval_infer_batch.sh +13 -0
- xinference/thirdparty/f5_tts/eval/eval_librispeech_test_clean.py +84 -0
- xinference/thirdparty/f5_tts/eval/eval_seedtts_testset.py +84 -0
- xinference/thirdparty/f5_tts/eval/utils_eval.py +405 -0
- xinference/thirdparty/f5_tts/infer/README.md +191 -0
- xinference/thirdparty/f5_tts/infer/SHARED.md +74 -0
- xinference/thirdparty/f5_tts/infer/examples/basic/basic.toml +11 -0
- xinference/thirdparty/f5_tts/infer/examples/basic/basic_ref_en.wav +0 -0
- xinference/thirdparty/f5_tts/infer/examples/basic/basic_ref_zh.wav +0 -0
- xinference/thirdparty/f5_tts/infer/examples/multi/country.flac +0 -0
- xinference/thirdparty/f5_tts/infer/examples/multi/main.flac +0 -0
- xinference/thirdparty/f5_tts/infer/examples/multi/story.toml +19 -0
- xinference/thirdparty/f5_tts/infer/examples/multi/story.txt +1 -0
- xinference/thirdparty/f5_tts/infer/examples/multi/town.flac +0 -0
- xinference/thirdparty/f5_tts/infer/examples/vocab.txt +2545 -0
- xinference/thirdparty/f5_tts/infer/infer_cli.py +226 -0
- xinference/thirdparty/f5_tts/infer/infer_gradio.py +851 -0
- xinference/thirdparty/f5_tts/infer/speech_edit.py +193 -0
- xinference/thirdparty/f5_tts/infer/utils_infer.py +538 -0
- xinference/thirdparty/f5_tts/model/__init__.py +10 -0
- xinference/thirdparty/f5_tts/model/backbones/README.md +20 -0
- xinference/thirdparty/f5_tts/model/backbones/dit.py +163 -0
- xinference/thirdparty/f5_tts/model/backbones/mmdit.py +146 -0
- xinference/thirdparty/f5_tts/model/backbones/unett.py +219 -0
- xinference/thirdparty/f5_tts/model/cfm.py +285 -0
- xinference/thirdparty/f5_tts/model/dataset.py +319 -0
- xinference/thirdparty/f5_tts/model/modules.py +658 -0
- xinference/thirdparty/f5_tts/model/trainer.py +366 -0
- xinference/thirdparty/f5_tts/model/utils.py +185 -0
- xinference/thirdparty/f5_tts/scripts/count_max_epoch.py +33 -0
- xinference/thirdparty/f5_tts/scripts/count_params_gflops.py +39 -0
- xinference/thirdparty/f5_tts/socket_server.py +159 -0
- xinference/thirdparty/f5_tts/train/README.md +77 -0
- xinference/thirdparty/f5_tts/train/datasets/prepare_csv_wavs.py +139 -0
- xinference/thirdparty/f5_tts/train/datasets/prepare_emilia.py +230 -0
- xinference/thirdparty/f5_tts/train/datasets/prepare_libritts.py +92 -0
- xinference/thirdparty/f5_tts/train/datasets/prepare_ljspeech.py +65 -0
- xinference/thirdparty/f5_tts/train/datasets/prepare_wenetspeech4tts.py +125 -0
- xinference/thirdparty/f5_tts/train/finetune_cli.py +174 -0
- xinference/thirdparty/f5_tts/train/finetune_gradio.py +1846 -0
- xinference/thirdparty/f5_tts/train/train.py +75 -0
- xinference/thirdparty/fish_speech/fish_speech/conversation.py +94 -83
- xinference/thirdparty/fish_speech/fish_speech/models/text2semantic/llama.py +63 -20
- xinference/thirdparty/fish_speech/fish_speech/text/clean.py +1 -26
- xinference/thirdparty/fish_speech/fish_speech/text/spliter.py +1 -1
- xinference/thirdparty/fish_speech/fish_speech/tokenizer.py +152 -0
- xinference/thirdparty/fish_speech/fish_speech/train.py +2 -2
- xinference/thirdparty/fish_speech/fish_speech/webui/manage.py +1 -1
- xinference/thirdparty/fish_speech/tools/{post_api.py → api_client.py} +7 -13
- xinference/thirdparty/fish_speech/tools/api_server.py +98 -0
- xinference/thirdparty/fish_speech/tools/download_models.py +5 -5
- xinference/thirdparty/fish_speech/tools/fish_e2e.py +2 -2
- xinference/thirdparty/fish_speech/tools/inference_engine/__init__.py +192 -0
- xinference/thirdparty/fish_speech/tools/inference_engine/reference_loader.py +125 -0
- xinference/thirdparty/fish_speech/tools/inference_engine/utils.py +39 -0
- xinference/thirdparty/fish_speech/tools/inference_engine/vq_manager.py +57 -0
- xinference/thirdparty/fish_speech/tools/llama/eval_in_context.py +2 -2
- xinference/thirdparty/fish_speech/tools/llama/generate.py +117 -89
- xinference/thirdparty/fish_speech/tools/run_webui.py +104 -0
- xinference/thirdparty/fish_speech/tools/schema.py +11 -28
- xinference/thirdparty/fish_speech/tools/server/agent/__init__.py +57 -0
- xinference/thirdparty/fish_speech/tools/server/agent/generate.py +119 -0
- xinference/thirdparty/fish_speech/tools/server/agent/generation_utils.py +122 -0
- xinference/thirdparty/fish_speech/tools/server/agent/pre_generation_utils.py +72 -0
- xinference/thirdparty/fish_speech/tools/server/api_utils.py +75 -0
- xinference/thirdparty/fish_speech/tools/server/exception_handler.py +27 -0
- xinference/thirdparty/fish_speech/tools/server/inference.py +45 -0
- xinference/thirdparty/fish_speech/tools/server/model_manager.py +122 -0
- xinference/thirdparty/fish_speech/tools/server/model_utils.py +129 -0
- xinference/thirdparty/fish_speech/tools/server/views.py +246 -0
- xinference/thirdparty/fish_speech/tools/webui/__init__.py +173 -0
- xinference/thirdparty/fish_speech/tools/webui/inference.py +91 -0
- xinference/thirdparty/fish_speech/tools/webui/variables.py +14 -0
- xinference/thirdparty/matcha/utils/utils.py +2 -2
- xinference/thirdparty/melo/api.py +135 -0
- xinference/thirdparty/melo/app.py +61 -0
- xinference/thirdparty/melo/attentions.py +459 -0
- xinference/thirdparty/melo/commons.py +160 -0
- xinference/thirdparty/melo/configs/config.json +94 -0
- xinference/thirdparty/melo/data/example/metadata.list +20 -0
- xinference/thirdparty/melo/data_utils.py +413 -0
- xinference/thirdparty/melo/download_utils.py +67 -0
- xinference/thirdparty/melo/infer.py +25 -0
- xinference/thirdparty/melo/init_downloads.py +14 -0
- xinference/thirdparty/melo/losses.py +58 -0
- xinference/thirdparty/melo/main.py +36 -0
- xinference/thirdparty/melo/mel_processing.py +174 -0
- xinference/thirdparty/melo/models.py +1030 -0
- xinference/thirdparty/melo/modules.py +598 -0
- xinference/thirdparty/melo/monotonic_align/__init__.py +16 -0
- xinference/thirdparty/melo/monotonic_align/core.py +46 -0
- xinference/thirdparty/melo/preprocess_text.py +135 -0
- xinference/thirdparty/melo/split_utils.py +174 -0
- xinference/thirdparty/melo/text/__init__.py +35 -0
- xinference/thirdparty/melo/text/chinese.py +199 -0
- xinference/thirdparty/melo/text/chinese_bert.py +107 -0
- xinference/thirdparty/melo/text/chinese_mix.py +253 -0
- xinference/thirdparty/melo/text/cleaner.py +36 -0
- xinference/thirdparty/melo/text/cleaner_multiling.py +110 -0
- xinference/thirdparty/melo/text/cmudict.rep +129530 -0
- xinference/thirdparty/melo/text/cmudict_cache.pickle +0 -0
- xinference/thirdparty/melo/text/english.py +284 -0
- xinference/thirdparty/melo/text/english_bert.py +39 -0
- xinference/thirdparty/melo/text/english_utils/abbreviations.py +35 -0
- xinference/thirdparty/melo/text/english_utils/number_norm.py +97 -0
- xinference/thirdparty/melo/text/english_utils/time_norm.py +47 -0
- xinference/thirdparty/melo/text/es_phonemizer/base.py +140 -0
- xinference/thirdparty/melo/text/es_phonemizer/cleaner.py +109 -0
- xinference/thirdparty/melo/text/es_phonemizer/es_symbols.json +79 -0
- xinference/thirdparty/melo/text/es_phonemizer/es_symbols.txt +1 -0
- xinference/thirdparty/melo/text/es_phonemizer/es_symbols_v2.json +83 -0
- xinference/thirdparty/melo/text/es_phonemizer/es_to_ipa.py +12 -0
- xinference/thirdparty/melo/text/es_phonemizer/example_ipa.txt +400 -0
- xinference/thirdparty/melo/text/es_phonemizer/gruut_wrapper.py +253 -0
- xinference/thirdparty/melo/text/es_phonemizer/punctuation.py +174 -0
- xinference/thirdparty/melo/text/es_phonemizer/spanish_symbols.txt +1 -0
- xinference/thirdparty/melo/text/es_phonemizer/test.ipynb +124 -0
- xinference/thirdparty/melo/text/fr_phonemizer/base.py +140 -0
- xinference/thirdparty/melo/text/fr_phonemizer/cleaner.py +122 -0
- xinference/thirdparty/melo/text/fr_phonemizer/en_symbols.json +78 -0
- xinference/thirdparty/melo/text/fr_phonemizer/example_ipa.txt +1 -0
- xinference/thirdparty/melo/text/fr_phonemizer/fr_symbols.json +89 -0
- xinference/thirdparty/melo/text/fr_phonemizer/fr_to_ipa.py +30 -0
- xinference/thirdparty/melo/text/fr_phonemizer/french_abbreviations.py +48 -0
- xinference/thirdparty/melo/text/fr_phonemizer/french_symbols.txt +1 -0
- xinference/thirdparty/melo/text/fr_phonemizer/gruut_wrapper.py +258 -0
- xinference/thirdparty/melo/text/fr_phonemizer/punctuation.py +172 -0
- xinference/thirdparty/melo/text/french.py +94 -0
- xinference/thirdparty/melo/text/french_bert.py +39 -0
- xinference/thirdparty/melo/text/japanese.py +647 -0
- xinference/thirdparty/melo/text/japanese_bert.py +49 -0
- xinference/thirdparty/melo/text/ko_dictionary.py +44 -0
- xinference/thirdparty/melo/text/korean.py +192 -0
- xinference/thirdparty/melo/text/opencpop-strict.txt +429 -0
- xinference/thirdparty/melo/text/spanish.py +122 -0
- xinference/thirdparty/melo/text/spanish_bert.py +39 -0
- xinference/thirdparty/melo/text/symbols.py +290 -0
- xinference/thirdparty/melo/text/tone_sandhi.py +769 -0
- xinference/thirdparty/melo/train.py +635 -0
- xinference/thirdparty/melo/train.sh +19 -0
- xinference/thirdparty/melo/transforms.py +209 -0
- xinference/thirdparty/melo/utils.py +424 -0
- xinference/types.py +15 -0
- xinference/web/ui/build/asset-manifest.json +6 -6
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/css/main.51a587ff.css +2 -0
- xinference/web/ui/build/static/css/main.51a587ff.css.map +1 -0
- xinference/web/ui/build/static/js/main.b0936c54.js +3 -0
- xinference/web/ui/build/static/js/main.b0936c54.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/03c4052f1b91f6ba0c5389bdcf49c43319b4076c08e4b8585dab312538ae290a.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/1786b83003b8e9605a0f5f855a185d4d16e38fc893dfb326a2a9cca206b4240a.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/17cbc181dd674b9150b80c73ed6a82656de0082d857f6e5f66d9716129ac0b38.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/185ceb8872d562e032b47e79df6a45670e06345b8ed70aad1a131e0476783c5c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/26b8c9f34b0bed789b3a833767672e39302d1e0c09b4276f4d58d1df7b6bd93b.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/2b484da66c724d0d56a40849c109327408796a668b1381511b6e9e03baa48658.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/2cbbbce9b84df73330d4c42b82436ed881b3847628f2fbc346aa62e2859fd88c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/2ec9b14431ed33ce6901bf9f27007be4e6e472709c99d6e22b50ce528e4b78ee.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3b966db018f96be4a055d6ca205f0990d4d0b370e2980c17d8bca2c9a021819c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3eefb411b24c2b3ce053570ef50daccf154022f0e168be5ed0fec21394baf9f4.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/522b229e3cac219123f0d69673f5570e191c2d2a505dc65b312d336eae2279c0.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/52e45f17ba300580ea3fcc9f9228ccba194bb092b76f25e9255af311f8b05aab.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/5a0bc4631f936459afc1a3b1d3ec2420118b1f00e11f60ccac3e08088f3f27a8.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/611fa2c6c53b66039991d06dfb0473b5ab37fc63b4564e0f6e1718523768a045.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/6329bc76c406fe5eb305412383fbde5950f847bb5e43261f73f37622c365acb4.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/63c8e07687ea53a4f8a910ee5e42e0eb26cd1acbfbe820f3e3248a786ee51401.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/69b2d5001684174ec9da57e07914eed3eac4960018bceb6cbfa801d861301d7c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/710c1acda69e561e30a933b98c6a56d50197868b15c21e2aad55ab6d46649eb6.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/720deca1fce5a1dc5056048fa8258fd138a82ea855f350b6613f104a73fb761f.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/76a23b92d26a499c57e61eea2b895fbc9771bd0849a72e66f8e633192017978b.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/858063f23b34dfe600254eb5afd85518b0002ec4b30b7386616c45600826e3b2.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/920b82c1c89124cf217109eeedbfcd3aae3b917be50c9dfb6bbb4ce26bdfd2e7.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/94d8b7aeb0076f2ce07db598cea0e87b13bc8d5614eb530b8d6e696c2daf6f88.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/9e917fe7022d01b2ccbe5cc0ce73d70bb72bee584ff293bad71bdff6695dee28.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/9f28fdb8399f1d0474f0aca86f1658dc94f5bf0c90f6146352de150692de8862.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a0dfafa06b2bb7cba8cad41c482503f61944f759f4318139362602ef5cc47ccb.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a3ff866acddf34917a7ee399e0e571a4dfd8ba66d5057db885f243e16a6eb17d.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/afb8084f539534cd594755ea2205ecd5bd1f62dddcfdf75a2eace59a28131278.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/b57b1438b77294c1f3f6cfce12ac487d8106c6f016975ba0aec94d98997e2e1e.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/b9917b0bf8e4d55ccbac1c334aa04d6ff3c5b6ed9e5d38b9ea2c687fa7d3f5a9.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/bbcc94b0149963d1d6f267ee1f4f03d3925b758392ce2f516c3fe8af0e0169fc.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/bdee44abeadc4abc17d41c52eb49c6e19a4b1a267b6e16876ce91bdeeebfc52d.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/beb112b70f4a56db95920a9e20efb6c97c37b68450716730217a9ee1a9ae92be.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c88db97be0cdf440193b3995996e83510a04cb00048135485fc0e26d197e80b5.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d49e5314d34310a62d01a03067ce1bec5da00abce84c5196aa9c6842fa79a430.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d7664d18c4ddbad9c3a6a31b91f7c00fb0dde804608674a9860ee50f33e54708.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d9072c318b819b7c90a0f7e9cc0b6413b4dbeb8e9859898e53d75ea882fcde99.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/db16a983bc08a05f0439cc61ca0840e49e1d8400eef678909f16c032a418a3d6.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/dc249829767b8abcbc3677e0b07b6d3ecbfdfe6d08cfe23a665eb33373a9aa9d.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/e242c583c2dbc2784f0fcf513523975f7d5df447e106c1c17e49e8578a6fc3ed.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/eac5f1296513e69e4b96f750ddccd4d0264e2bae4e4c449144e83274a48698d9.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/ed57202cb79649bb716400436590245547df241988fc7c8e1d85d132299542d2.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f125bf72e773a14cdaebd0c343e80adb909d12e317ee5c00cd4a57442fbe2c62.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f91af913d7f91c410719ab13136aaed3aaf0f8dda06652f25c42cb5231587398.json +1 -0
- xinference/web/ui/node_modules/.package-lock.json +67 -3
- xinference/web/ui/node_modules/@babel/runtime/package.json +592 -538
- xinference/web/ui/node_modules/html-parse-stringify/package.json +50 -0
- xinference/web/ui/node_modules/i18next/dist/esm/package.json +1 -0
- xinference/web/ui/node_modules/i18next/package.json +129 -0
- xinference/web/ui/node_modules/react-i18next/.eslintrc.json +74 -0
- xinference/web/ui/node_modules/react-i18next/dist/es/package.json +1 -0
- xinference/web/ui/node_modules/react-i18next/package.json +162 -0
- xinference/web/ui/node_modules/void-elements/package.json +34 -0
- xinference/web/ui/package-lock.json +69 -3
- xinference/web/ui/package.json +2 -0
- xinference/web/ui/src/locales/en.json +186 -0
- xinference/web/ui/src/locales/zh.json +186 -0
- {xinference-1.0.1.dist-info → xinference-1.2.1.dist-info}/METADATA +68 -32
- {xinference-1.0.1.dist-info → xinference-1.2.1.dist-info}/RECORD +316 -122
- xinference/thirdparty/cosyvoice/bin/export_trt.py +0 -8
- xinference/thirdparty/fish_speech/tools/api.py +0 -943
- xinference/thirdparty/fish_speech/tools/msgpack_api.py +0 -95
- xinference/thirdparty/fish_speech/tools/webui.py +0 -548
- xinference/web/ui/build/static/css/main.5061c4c3.css +0 -2
- xinference/web/ui/build/static/css/main.5061c4c3.css.map +0 -1
- xinference/web/ui/build/static/js/main.2f269bb3.js +0 -3
- xinference/web/ui/build/static/js/main.2f269bb3.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/07ce9e632e6aff24d7aa3ad8e48224433bbfeb0d633fca723453f1fcae0c9f1c.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/1130403f9e46f5738a23b45ac59b57de8f360c908c713e2c0670c2cce9bd367a.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/131091b25d26b17cdca187d7542a21475c211138d900cf667682260e76ef9463.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/1f269fb2a368363c1cb2237825f1dba093b6bdd8c44cc05954fd19ec2c1fff03.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/331312668fa8bd3d7401818f4a25fa98135d7f61371cd6bfff78b18cf4fbdd92.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/40f17338fc75ae095de7d2b4d8eae0d5ca0193a7e2bcece4ee745b22a7a2f4b7.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/4de9a6942c5f1749d6cbfdd54279699975f16016b182848bc253886f52ec2ec3.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/822586ed1077201b64b954f12f25e3f9b45678c1acbabe53d8af3ca82ca71f33.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/8d33354bd2100c8602afc3341f131a88cc36aaeecd5a4b365ed038514708e350.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/9375a35b05d56989b2755bf72161fa707c92f28569d33765a75f91a568fda6e9.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/a158a9ffa0c9b169aee53dd4a0c44501a596755b4e4f6ede7746d65a72e2a71f.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/bd6ad8159341315a1764c397621a560809f7eb7219ab5174c801fca7e969d943.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/c7bf40bab396765f67d0fed627ed3665890608b2d0edaa3e8cb7cfc96310db45.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/d6c643278a0b28320e6f33a60f5fb64c053997cbdc39a60e53ccc574688ade9e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/e42b72d4cc1ea412ebecbb8d040dc6c6bfee462c33903c2f1f3facb602ad742e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/e64b7e8cedcf43d4c95deba60ec1341855c887705805bb62431693118b870c69.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f5039ddbeb815c51491a1989532006b96fc3ae49c6c60e3c097f875b4ae915ae.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f72f011744c4649fabddca6f7a9327861ac0a315a89b1a2e62a39774e7863845.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/feabb04b4aa507102da0a64398a40818e878fd1df9b75dda8461b3e1e7ff3f11.json +0 -1
- /xinference/thirdparty/{cosyvoice/bin → f5_tts}/__init__.py +0 -0
- /xinference/thirdparty/{cosyvoice/flow → melo}/__init__.py +0 -0
- /xinference/thirdparty/{cosyvoice/hifigan → melo/text/english_utils}/__init__.py +0 -0
- /xinference/thirdparty/{cosyvoice/llm → melo/text/es_phonemizer}/__init__.py +0 -0
- /xinference/thirdparty/{fish_speech/tools → melo/text/fr_phonemizer}/__init__.py +0 -0
- /xinference/web/ui/build/static/js/{main.2f269bb3.js.LICENSE.txt → main.b0936c54.js.LICENSE.txt} +0 -0
- {xinference-1.0.1.dist-info → xinference-1.2.1.dist-info}/LICENSE +0 -0
- {xinference-1.0.1.dist-info → xinference-1.2.1.dist-info}/WHEEL +0 -0
- {xinference-1.0.1.dist-info → xinference-1.2.1.dist-info}/entry_points.txt +0 -0
- {xinference-1.0.1.dist-info → xinference-1.2.1.dist-info}/top_level.txt +0 -0
xinference/core/supervisor.py
CHANGED
|
@@ -267,6 +267,14 @@ class SupervisorActor(xo.StatelessActor):
|
|
|
267
267
|
signal.SIGTERM, lambda: asyncio.create_task(signal_handler())
|
|
268
268
|
)
|
|
269
269
|
|
|
270
|
+
from ..model.llm.vllm.xavier.block_tracker import VLLMBlockTracker
|
|
271
|
+
from ..model.llm.vllm.xavier.collective_manager import CollectiveManager
|
|
272
|
+
|
|
273
|
+
self._block_tracker_mapping: Dict[str, xo.ActorRefType[VLLMBlockTracker]] = {}
|
|
274
|
+
self._collective_manager_mapping: Dict[
|
|
275
|
+
str, xo.ActorRefType[CollectiveManager]
|
|
276
|
+
] = {}
|
|
277
|
+
|
|
270
278
|
@typing.no_type_check
|
|
271
279
|
async def get_cluster_device_info(self, detailed: bool = False) -> List:
|
|
272
280
|
import psutil
|
|
@@ -959,29 +967,83 @@ class SupervisorActor(xo.StatelessActor):
|
|
|
959
967
|
if model_uid is None:
|
|
960
968
|
model_uid = self._gen_model_uid(model_name)
|
|
961
969
|
|
|
970
|
+
# Xavier-related
|
|
971
|
+
enable_xavier: bool = (
|
|
972
|
+
bool(kwargs.pop("enable_xavier", False))
|
|
973
|
+
and model_engine is not None
|
|
974
|
+
and model_engine.lower() == "vllm"
|
|
975
|
+
)
|
|
976
|
+
store_address = None
|
|
977
|
+
store_port = None
|
|
978
|
+
world_size = None
|
|
979
|
+
if enable_xavier:
|
|
980
|
+
if replica <= 1:
|
|
981
|
+
logger.warning(f"Enabling xavier when `replica<=1` is meaningless.")
|
|
982
|
+
enable_xavier = False
|
|
983
|
+
else:
|
|
984
|
+
from ..model.llm.vllm.xavier.block_tracker import VLLMBlockTracker
|
|
985
|
+
from ..model.llm.vllm.xavier.collective_manager import CollectiveManager
|
|
986
|
+
|
|
987
|
+
self._block_tracker_mapping[model_uid] = await xo.create_actor(
|
|
988
|
+
VLLMBlockTracker,
|
|
989
|
+
address=self.address,
|
|
990
|
+
uid=f"{VLLMBlockTracker.default_uid()}-{model_uid}",
|
|
991
|
+
)
|
|
992
|
+
world_size = replica + 1
|
|
993
|
+
logger.info(f"Going to start xavier with world size: {world_size}")
|
|
994
|
+
self._collective_manager_mapping[model_uid] = await xo.create_actor(
|
|
995
|
+
CollectiveManager,
|
|
996
|
+
address=self.address,
|
|
997
|
+
uid=f"{CollectiveManager.default_uid()}-{model_uid}",
|
|
998
|
+
model_uid=model_uid,
|
|
999
|
+
)
|
|
1000
|
+
logger.info(f"Start collective manager for {model_uid} done.")
|
|
1001
|
+
|
|
962
1002
|
model_size = str(model_size_in_billions) if model_size_in_billions else ""
|
|
963
1003
|
logger.debug(
|
|
964
1004
|
f"Enter launch_builtin_model, model_uid: {model_uid}, model_name: {model_name}, model_size: {model_size}, "
|
|
965
|
-
f"model_format: {model_format}, quantization: {quantization}, replica: {replica}, "
|
|
1005
|
+
f"model_format: {model_format}, quantization: {quantization}, replica: {replica}, enable_xavier: {enable_xavier}, "
|
|
966
1006
|
f"kwargs: {kwargs}"
|
|
967
1007
|
)
|
|
968
1008
|
|
|
969
|
-
async def _launch_one_model(_replica_model_uid):
|
|
1009
|
+
async def _launch_one_model(worker_ref, _replica_model_uid, rank: int):
|
|
970
1010
|
if _replica_model_uid in self._replica_model_uid_to_worker:
|
|
971
1011
|
raise ValueError(
|
|
972
1012
|
f"Model is already in the model list, uid: {_replica_model_uid}"
|
|
973
1013
|
)
|
|
1014
|
+
|
|
1015
|
+
nonlocal store_address
|
|
1016
|
+
nonlocal store_port
|
|
1017
|
+
xavier_config = (
|
|
1018
|
+
{
|
|
1019
|
+
"block_tracker_uid": self._block_tracker_mapping[model_uid].uid,
|
|
1020
|
+
"block_tracker_address": self._block_tracker_mapping[
|
|
1021
|
+
model_uid
|
|
1022
|
+
].address,
|
|
1023
|
+
"rank": rank,
|
|
1024
|
+
"world_size": world_size,
|
|
1025
|
+
"store_address": store_address,
|
|
1026
|
+
"store_port": store_port,
|
|
1027
|
+
}
|
|
1028
|
+
if enable_xavier
|
|
1029
|
+
else None
|
|
1030
|
+
)
|
|
1031
|
+
|
|
1032
|
+
if enable_xavier and rank == 0:
|
|
1033
|
+
rank0_address, _port = await worker_ref.launch_rank0_model(
|
|
1034
|
+
_replica_model_uid, xavier_config
|
|
1035
|
+
)
|
|
1036
|
+
self._replica_model_uid_to_worker[_replica_model_uid] = worker_ref
|
|
1037
|
+
store_address = rank0_address.split(":")[0]
|
|
1038
|
+
store_port = _port
|
|
1039
|
+
return rank0_address
|
|
1040
|
+
|
|
974
1041
|
replica_gpu_idx = assign_replica_gpu(_replica_model_uid, replica, gpu_idx)
|
|
975
1042
|
nonlocal model_type
|
|
976
1043
|
|
|
977
|
-
worker_ref = (
|
|
978
|
-
target_ip_worker_ref
|
|
979
|
-
if target_ip_worker_ref is not None
|
|
980
|
-
else await self._choose_worker()
|
|
981
|
-
)
|
|
982
1044
|
# LLM as default for compatibility
|
|
983
1045
|
model_type = model_type or "LLM"
|
|
984
|
-
await worker_ref.launch_builtin_model(
|
|
1046
|
+
subpool_address = await worker_ref.launch_builtin_model(
|
|
985
1047
|
model_uid=_replica_model_uid,
|
|
986
1048
|
model_name=model_name,
|
|
987
1049
|
model_size_in_billions=model_size_in_billions,
|
|
@@ -995,14 +1057,64 @@ class SupervisorActor(xo.StatelessActor):
|
|
|
995
1057
|
gpu_idx=replica_gpu_idx,
|
|
996
1058
|
download_hub=download_hub,
|
|
997
1059
|
model_path=model_path,
|
|
1060
|
+
xavier_config=xavier_config,
|
|
998
1061
|
**kwargs,
|
|
999
1062
|
)
|
|
1000
1063
|
self._replica_model_uid_to_worker[_replica_model_uid] = worker_ref
|
|
1064
|
+
return subpool_address
|
|
1001
1065
|
|
|
1002
1066
|
async def _launch_model():
|
|
1003
1067
|
try:
|
|
1004
|
-
|
|
1005
|
-
|
|
1068
|
+
worker_refs = []
|
|
1069
|
+
rank_addresses = []
|
|
1070
|
+
for _idx, rep_model_uid in enumerate(
|
|
1071
|
+
iter_replica_model_uid(model_uid, replica)
|
|
1072
|
+
):
|
|
1073
|
+
worker_ref = (
|
|
1074
|
+
target_ip_worker_ref
|
|
1075
|
+
if target_ip_worker_ref is not None
|
|
1076
|
+
else await self._choose_worker()
|
|
1077
|
+
)
|
|
1078
|
+
if enable_xavier and _idx == 0:
|
|
1079
|
+
"""
|
|
1080
|
+
Start the rank 0 model actor on the worker that holds the rank 1 replica,
|
|
1081
|
+
solely for constructing the collective communication world.
|
|
1082
|
+
"""
|
|
1083
|
+
_uid = model_uid + "-rank0"
|
|
1084
|
+
rank0_address = await _launch_one_model(worker_ref, _uid, 0)
|
|
1085
|
+
worker_refs.append((worker_ref, _uid))
|
|
1086
|
+
rank_addresses.append(rank0_address)
|
|
1087
|
+
|
|
1088
|
+
subpool_address = await _launch_one_model(
|
|
1089
|
+
worker_ref, rep_model_uid, _idx + 1
|
|
1090
|
+
)
|
|
1091
|
+
worker_refs.append((worker_ref, rep_model_uid))
|
|
1092
|
+
rank_addresses.append(subpool_address)
|
|
1093
|
+
|
|
1094
|
+
# For xavier, start all the vllm instances first,
|
|
1095
|
+
# and then start the transfer component,
|
|
1096
|
+
# because the transfer actor needs all the rank addresses used for collective communication
|
|
1097
|
+
if enable_xavier:
|
|
1098
|
+
logger.debug(f"Init transfer component for xavier...")
|
|
1099
|
+
collective_manager_ref = self._collective_manager_mapping[model_uid]
|
|
1100
|
+
tasks = []
|
|
1101
|
+
for worker_ref, rep_model_uid in worker_refs:
|
|
1102
|
+
tasks.append(
|
|
1103
|
+
worker_ref.start_transfer_for_vllm(
|
|
1104
|
+
rep_model_uid, rank_addresses
|
|
1105
|
+
)
|
|
1106
|
+
)
|
|
1107
|
+
# Here you must use asyncio.gather, not a for loop,
|
|
1108
|
+
# or you will get stuck.
|
|
1109
|
+
await asyncio.gather(*tasks)
|
|
1110
|
+
|
|
1111
|
+
# init collective_manager
|
|
1112
|
+
for idx, addr in enumerate(rank_addresses):
|
|
1113
|
+
await collective_manager_ref.register_rank(
|
|
1114
|
+
idx, addr, update=False
|
|
1115
|
+
)
|
|
1116
|
+
|
|
1117
|
+
logger.debug(f"Init transfer component for xavier done.")
|
|
1006
1118
|
except Exception:
|
|
1007
1119
|
# terminate_model will remove the replica info.
|
|
1008
1120
|
await self.terminate_model(model_uid, suppress_exception=True)
|
|
@@ -1131,6 +1243,38 @@ class SupervisorActor(xo.StatelessActor):
|
|
|
1131
1243
|
raise
|
|
1132
1244
|
self._model_uid_to_replica_info.pop(model_uid, None)
|
|
1133
1245
|
|
|
1246
|
+
# clear for xavier
|
|
1247
|
+
rank0_uid = model_uid + "-rank0"
|
|
1248
|
+
if rank0_uid in self._replica_model_uid_to_worker:
|
|
1249
|
+
await _terminate_one_model(rank0_uid)
|
|
1250
|
+
|
|
1251
|
+
collective_manager_ref = self._collective_manager_mapping.pop(model_uid, None)
|
|
1252
|
+
if collective_manager_ref is not None:
|
|
1253
|
+
try:
|
|
1254
|
+
await xo.destroy_actor(collective_manager_ref)
|
|
1255
|
+
except Exception as e:
|
|
1256
|
+
logger.debug(
|
|
1257
|
+
"Destroy collective_manager_ref failed, model uid: %s, error: %s",
|
|
1258
|
+
model_uid,
|
|
1259
|
+
e,
|
|
1260
|
+
)
|
|
1261
|
+
finally:
|
|
1262
|
+
logger.debug(
|
|
1263
|
+
f"Destroy collective_manager_ref done. model uid: {model_uid}"
|
|
1264
|
+
)
|
|
1265
|
+
block_tracker_ref = self._block_tracker_mapping.pop(model_uid, None)
|
|
1266
|
+
if block_tracker_ref is not None:
|
|
1267
|
+
try:
|
|
1268
|
+
await xo.destroy_actor(block_tracker_ref)
|
|
1269
|
+
except Exception as e:
|
|
1270
|
+
logger.debug(
|
|
1271
|
+
"Destroy block_tracker_ref failed, model uid: %s, error: %s",
|
|
1272
|
+
model_uid,
|
|
1273
|
+
e,
|
|
1274
|
+
)
|
|
1275
|
+
finally:
|
|
1276
|
+
logger.debug(f"Destroy block_tracker_ref done. model uid: {model_uid}")
|
|
1277
|
+
|
|
1134
1278
|
@log_async(logger=logger)
|
|
1135
1279
|
async def get_model(self, model_uid: str) -> xo.ActorRefType["ModelActor"]:
|
|
1136
1280
|
replica_info = self._model_uid_to_replica_info.get(model_uid, None)
|
|
@@ -1148,6 +1292,15 @@ class SupervisorActor(xo.StatelessActor):
|
|
|
1148
1292
|
)
|
|
1149
1293
|
return await worker_ref.get_model(model_uid=replica_model_uid)
|
|
1150
1294
|
|
|
1295
|
+
@log_async(logger=logger)
|
|
1296
|
+
async def get_model_status(self, replica_model_uid: str):
|
|
1297
|
+
worker_ref = self._replica_model_uid_to_worker.get(replica_model_uid, None)
|
|
1298
|
+
if worker_ref is None:
|
|
1299
|
+
raise ValueError(
|
|
1300
|
+
f"Model not found in the model list, uid: {replica_model_uid}"
|
|
1301
|
+
)
|
|
1302
|
+
return await worker_ref.get_model_status(replica_model_uid)
|
|
1303
|
+
|
|
1151
1304
|
@log_async(logger=logger)
|
|
1152
1305
|
async def describe_model(self, model_uid: str) -> Dict[str, Any]:
|
|
1153
1306
|
replica_info = self._model_uid_to_replica_info.get(model_uid, None)
|
|
@@ -1377,3 +1530,12 @@ class SupervisorActor(xo.StatelessActor):
|
|
|
1377
1530
|
|
|
1378
1531
|
async def get_progress(self, request_id: str) -> float:
|
|
1379
1532
|
return await self._progress_tracker.get_progress(request_id)
|
|
1533
|
+
|
|
1534
|
+
async def call_collective_manager(
|
|
1535
|
+
self, model_uid: str, func_name: str, *args, **kwargs
|
|
1536
|
+
):
|
|
1537
|
+
"""
|
|
1538
|
+
Used by worker.
|
|
1539
|
+
"""
|
|
1540
|
+
collective_manager_ref = self._collective_manager_mapping[model_uid]
|
|
1541
|
+
await getattr(collective_manager_ref, func_name)(*args, **kwargs)
|
xinference/core/utils.py
CHANGED
|
@@ -62,12 +62,16 @@ def log_async(
|
|
|
62
62
|
|
|
63
63
|
@wraps(func)
|
|
64
64
|
async def wrapped(*args, **kwargs):
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
65
|
+
request_id_str = kwargs.get("request_id")
|
|
66
|
+
if not request_id_str:
|
|
67
|
+
# sometimes `request_id` not in kwargs
|
|
68
|
+
# we try to bind the arguments
|
|
69
|
+
try:
|
|
70
|
+
bound_args = sig.bind_partial(*args, **kwargs)
|
|
71
|
+
arguments = bound_args.arguments
|
|
72
|
+
except TypeError:
|
|
73
|
+
arguments = {}
|
|
74
|
+
request_id_str = arguments.get("request_id", "")
|
|
71
75
|
if not request_id_str:
|
|
72
76
|
request_id_str = uuid.uuid1()
|
|
73
77
|
if func_name == "text_to_image":
|
|
@@ -272,8 +276,8 @@ def get_nvidia_gpu_info() -> Dict:
|
|
|
272
276
|
|
|
273
277
|
|
|
274
278
|
def assign_replica_gpu(
|
|
275
|
-
_replica_model_uid: str, replica: int, gpu_idx: Union[int, List[int]]
|
|
276
|
-
) -> List[int]:
|
|
279
|
+
_replica_model_uid: str, replica: int, gpu_idx: Optional[Union[int, List[int]]]
|
|
280
|
+
) -> Optional[List[int]]:
|
|
277
281
|
model_uid, rep_id = parse_replica_model_uid(_replica_model_uid)
|
|
278
282
|
rep_id, replica = int(rep_id), int(replica)
|
|
279
283
|
if isinstance(gpu_idx, int):
|
xinference/core/worker.py
CHANGED
|
@@ -22,8 +22,9 @@ import signal
|
|
|
22
22
|
import threading
|
|
23
23
|
import time
|
|
24
24
|
from collections import defaultdict
|
|
25
|
+
from dataclasses import dataclass
|
|
25
26
|
from logging import getLogger
|
|
26
|
-
from typing import Any, Dict, List, Literal, Optional, Set, Tuple, Union
|
|
27
|
+
from typing import Any, Dict, List, Literal, Optional, Set, Tuple, Union, no_type_check
|
|
27
28
|
|
|
28
29
|
import xoscar as xo
|
|
29
30
|
from async_timeout import timeout
|
|
@@ -58,6 +59,11 @@ else:
|
|
|
58
59
|
MODEL_ACTOR_AUTO_RECOVER_LIMIT = None
|
|
59
60
|
|
|
60
61
|
|
|
62
|
+
@dataclass
|
|
63
|
+
class ModelStatus:
|
|
64
|
+
last_error: str = ""
|
|
65
|
+
|
|
66
|
+
|
|
61
67
|
class WorkerActor(xo.StatelessActor):
|
|
62
68
|
def __init__(
|
|
63
69
|
self,
|
|
@@ -90,6 +96,7 @@ class WorkerActor(xo.StatelessActor):
|
|
|
90
96
|
# attributes maintained after model launched:
|
|
91
97
|
self._model_uid_to_model: Dict[str, xo.ActorRefType["ModelActor"]] = {}
|
|
92
98
|
self._model_uid_to_model_spec: Dict[str, ModelDescription] = {}
|
|
99
|
+
self._model_uid_to_model_status: Dict[str, ModelStatus] = {}
|
|
93
100
|
self._gpu_to_model_uid: Dict[int, str] = {}
|
|
94
101
|
self._gpu_to_embedding_model_uids: Dict[int, Set[str]] = defaultdict(set)
|
|
95
102
|
# Dict structure: gpu_index: {(replica_model_uid, model_type)}
|
|
@@ -177,12 +184,12 @@ class WorkerActor(xo.StatelessActor):
|
|
|
177
184
|
self._model_uid_to_recover_count[model_uid] = (
|
|
178
185
|
recover_count - 1
|
|
179
186
|
)
|
|
180
|
-
await self.
|
|
187
|
+
await self.recover_model(launch_args)
|
|
181
188
|
else:
|
|
182
189
|
logger.warning("Stop recreating model actor.")
|
|
183
190
|
else:
|
|
184
191
|
logger.warning("Recreating model actor %s ...", model_uid)
|
|
185
|
-
await self.
|
|
192
|
+
await self.recover_model(launch_args)
|
|
186
193
|
break
|
|
187
194
|
|
|
188
195
|
@classmethod
|
|
@@ -866,6 +873,9 @@ class WorkerActor(xo.StatelessActor):
|
|
|
866
873
|
)
|
|
867
874
|
|
|
868
875
|
try:
|
|
876
|
+
xavier_config: Optional[Dict] = kwargs.pop("xavier_config", None)
|
|
877
|
+
if xavier_config is not None:
|
|
878
|
+
xavier_config["rank_address"] = subpool_address
|
|
869
879
|
model, model_description = await asyncio.to_thread(
|
|
870
880
|
create_model_instance,
|
|
871
881
|
subpool_address,
|
|
@@ -893,6 +903,7 @@ class WorkerActor(xo.StatelessActor):
|
|
|
893
903
|
model=model,
|
|
894
904
|
model_description=model_description,
|
|
895
905
|
request_limits=request_limits,
|
|
906
|
+
xavier_config=xavier_config,
|
|
896
907
|
)
|
|
897
908
|
await model_ref.load()
|
|
898
909
|
except:
|
|
@@ -902,6 +913,7 @@ class WorkerActor(xo.StatelessActor):
|
|
|
902
913
|
raise
|
|
903
914
|
self._model_uid_to_model[model_uid] = model_ref
|
|
904
915
|
self._model_uid_to_model_spec[model_uid] = model_description
|
|
916
|
+
self._model_uid_to_model_status[model_uid] = ModelStatus()
|
|
905
917
|
self._model_uid_to_addr[model_uid] = subpool_address
|
|
906
918
|
self._model_uid_to_recover_count.setdefault(
|
|
907
919
|
model_uid, MODEL_ACTOR_AUTO_RECOVER_LIMIT
|
|
@@ -921,13 +933,18 @@ class WorkerActor(xo.StatelessActor):
|
|
|
921
933
|
origin_uid,
|
|
922
934
|
{"model_ability": abilities, "status": LaunchStatus.READY.name},
|
|
923
935
|
)
|
|
936
|
+
return subpool_address
|
|
924
937
|
|
|
925
938
|
@log_async(logger=logger, level=logging.INFO)
|
|
926
939
|
async def terminate_model(self, model_uid: str, is_model_die=False):
|
|
927
940
|
# Terminate model while its launching is not allow
|
|
928
941
|
if model_uid in self._model_uid_launching_guard:
|
|
929
942
|
raise ValueError(f"{model_uid} is launching")
|
|
930
|
-
|
|
943
|
+
# In special cases, if the suffix is `-rank0`, this is the Xavier's rank 0 model actor.
|
|
944
|
+
if model_uid.endswith("-rank0"):
|
|
945
|
+
origin_uid = model_uid.removesuffix("-rank0")
|
|
946
|
+
else:
|
|
947
|
+
origin_uid, _ = parse_replica_model_uid(model_uid)
|
|
931
948
|
try:
|
|
932
949
|
_ = await self.get_supervisor_ref()
|
|
933
950
|
if self._event_collector_ref is not None:
|
|
@@ -976,6 +993,7 @@ class WorkerActor(xo.StatelessActor):
|
|
|
976
993
|
status = LaunchStatus.ERROR.name
|
|
977
994
|
else:
|
|
978
995
|
status = LaunchStatus.TERMINATED.name
|
|
996
|
+
self._model_uid_to_model_status.pop(model_uid, None)
|
|
979
997
|
|
|
980
998
|
if self._status_guard_ref is None:
|
|
981
999
|
_ = await self.get_supervisor_ref()
|
|
@@ -1010,6 +1028,9 @@ class WorkerActor(xo.StatelessActor):
|
|
|
1010
1028
|
|
|
1011
1029
|
@log_sync(logger=logger)
|
|
1012
1030
|
def get_model(self, model_uid: str) -> xo.ActorRefType["ModelActor"]:
|
|
1031
|
+
model_status = self._model_uid_to_model_status.get(model_uid)
|
|
1032
|
+
if model_status and model_status.last_error:
|
|
1033
|
+
raise Exception(model_status.last_error)
|
|
1013
1034
|
model_ref = self._model_uid_to_model.get(model_uid, None)
|
|
1014
1035
|
if model_ref is None:
|
|
1015
1036
|
raise ValueError(f"Model not found, uid: {model_uid}")
|
|
@@ -1138,6 +1159,83 @@ class WorkerActor(xo.StatelessActor):
|
|
|
1138
1159
|
}
|
|
1139
1160
|
return ret
|
|
1140
1161
|
|
|
1162
|
+
def update_model_status(self, model_uid: str, **kwargs):
|
|
1163
|
+
model_status = self._model_uid_to_model_status.get(model_uid)
|
|
1164
|
+
if model_status is not None:
|
|
1165
|
+
for k, v in kwargs.items():
|
|
1166
|
+
setattr(model_status, k, v)
|
|
1167
|
+
|
|
1168
|
+
def get_model_status(self, model_uid: str):
|
|
1169
|
+
return self._model_uid_to_model_status.get(model_uid)
|
|
1170
|
+
|
|
1141
1171
|
@staticmethod
|
|
1142
1172
|
def record_metrics(name, op, kwargs):
|
|
1143
1173
|
record_metrics(name, op, kwargs)
|
|
1174
|
+
|
|
1175
|
+
async def start_transfer_for_vllm(
|
|
1176
|
+
self, rep_model_uid: str, rank_addresses: List[str]
|
|
1177
|
+
):
|
|
1178
|
+
model_ref = self._model_uid_to_model[rep_model_uid]
|
|
1179
|
+
await model_ref.start_transfer_for_vllm(rank_addresses)
|
|
1180
|
+
|
|
1181
|
+
@log_async(logger=logger, level=logging.INFO)
|
|
1182
|
+
async def launch_rank0_model(
|
|
1183
|
+
self, rep_model_uid: str, xavier_config: Dict[str, Any]
|
|
1184
|
+
) -> Tuple[str, int]:
|
|
1185
|
+
from ..model.llm.vllm.xavier.collective_manager import Rank0ModelActor
|
|
1186
|
+
|
|
1187
|
+
if os.name != "nt" and platform.system() != "Darwin":
|
|
1188
|
+
# Linux
|
|
1189
|
+
start_method = "forkserver"
|
|
1190
|
+
else:
|
|
1191
|
+
# Windows and macOS
|
|
1192
|
+
start_method = "spawn"
|
|
1193
|
+
subpool_address = await self._main_pool.append_sub_pool(
|
|
1194
|
+
start_method=start_method
|
|
1195
|
+
)
|
|
1196
|
+
|
|
1197
|
+
store_address = subpool_address.split(":")[0]
|
|
1198
|
+
# Note that `store_port` needs to be generated on the worker,
|
|
1199
|
+
# as the TCP store is on rank 0, not on the supervisor.
|
|
1200
|
+
store_port = xo.utils.get_next_port()
|
|
1201
|
+
self._model_uid_launching_guard[rep_model_uid] = True
|
|
1202
|
+
try:
|
|
1203
|
+
try:
|
|
1204
|
+
xavier_config["rank_address"] = subpool_address
|
|
1205
|
+
xavier_config["store_address"] = store_address
|
|
1206
|
+
xavier_config["store_port"] = store_port
|
|
1207
|
+
model_ref = await xo.create_actor(
|
|
1208
|
+
Rank0ModelActor,
|
|
1209
|
+
address=subpool_address,
|
|
1210
|
+
uid=rep_model_uid,
|
|
1211
|
+
xavier_config=xavier_config,
|
|
1212
|
+
)
|
|
1213
|
+
except:
|
|
1214
|
+
await self._main_pool.remove_sub_pool(subpool_address)
|
|
1215
|
+
raise
|
|
1216
|
+
self._model_uid_to_model[rep_model_uid] = model_ref
|
|
1217
|
+
self._model_uid_to_addr[rep_model_uid] = subpool_address
|
|
1218
|
+
finally:
|
|
1219
|
+
del self._model_uid_launching_guard[rep_model_uid]
|
|
1220
|
+
return subpool_address, store_port
|
|
1221
|
+
|
|
1222
|
+
@no_type_check
|
|
1223
|
+
async def recover_model(self, launch_args: Dict[str, Any]):
|
|
1224
|
+
rep_model_uid = launch_args.get("model_uid")
|
|
1225
|
+
origin_uid, _ = parse_replica_model_uid(rep_model_uid)
|
|
1226
|
+
xavier_config: Optional[Dict[str, Any]] = launch_args.get("xavier_config", None)
|
|
1227
|
+
is_xavier: bool = xavier_config is not None
|
|
1228
|
+
supervisor_ref = await self.get_supervisor_ref(add_worker=False)
|
|
1229
|
+
if is_xavier:
|
|
1230
|
+
rank = xavier_config.get("rank")
|
|
1231
|
+
await supervisor_ref.call_collective_manager(
|
|
1232
|
+
origin_uid, "unregister_rank", rank
|
|
1233
|
+
)
|
|
1234
|
+
subpool_address = await self.launch_builtin_model(**launch_args)
|
|
1235
|
+
if is_xavier:
|
|
1236
|
+
model_ref = self._model_uid_to_model[rep_model_uid]
|
|
1237
|
+
await model_ref.start_transfer_for_vllm([])
|
|
1238
|
+
rank = xavier_config.get("rank")
|
|
1239
|
+
await supervisor_ref.call_collective_manager(
|
|
1240
|
+
origin_uid, "register_rank", rank, subpool_address, update=True
|
|
1241
|
+
)
|
xinference/deploy/cmdline.py
CHANGED
|
@@ -846,7 +846,9 @@ def model_launch(
|
|
|
846
846
|
kwargs = {}
|
|
847
847
|
for i in range(0, len(ctx.args), 2):
|
|
848
848
|
if not ctx.args[i].startswith("--"):
|
|
849
|
-
raise ValueError(
|
|
849
|
+
raise ValueError(
|
|
850
|
+
f"You must specify extra kwargs with `--` prefix. There is an error in parameter passing that is {ctx.args[i]}."
|
|
851
|
+
)
|
|
850
852
|
kwargs[ctx.args[i][2:]] = handle_click_args_type(ctx.args[i + 1])
|
|
851
853
|
print(f"Launch model name: {model_name} with kwargs: {kwargs}", file=sys.stderr)
|
|
852
854
|
|
|
@@ -23,6 +23,7 @@ from ..cmdline import (
|
|
|
23
23
|
list_model_registrations,
|
|
24
24
|
model_chat,
|
|
25
25
|
model_generate,
|
|
26
|
+
model_launch,
|
|
26
27
|
model_list,
|
|
27
28
|
model_terminate,
|
|
28
29
|
register_model,
|
|
@@ -311,3 +312,58 @@ def test_remove_cache(setup):
|
|
|
311
312
|
|
|
312
313
|
assert result.exit_code == 0
|
|
313
314
|
assert "Cache directory qwen1.5-chat has been deleted."
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
def test_launch_error_in_passing_parameters():
|
|
318
|
+
runner = CliRunner()
|
|
319
|
+
|
|
320
|
+
# Known parameter but not provided with value.
|
|
321
|
+
result = runner.invoke(
|
|
322
|
+
model_launch,
|
|
323
|
+
[
|
|
324
|
+
"--model-engine",
|
|
325
|
+
"transformers",
|
|
326
|
+
"--model-name",
|
|
327
|
+
"qwen2.5-instruct",
|
|
328
|
+
"--model-uid",
|
|
329
|
+
"-s",
|
|
330
|
+
"0.5",
|
|
331
|
+
"-f",
|
|
332
|
+
"gptq",
|
|
333
|
+
"-q",
|
|
334
|
+
"INT4",
|
|
335
|
+
"111",
|
|
336
|
+
"-l",
|
|
337
|
+
],
|
|
338
|
+
)
|
|
339
|
+
assert result.exit_code == 1
|
|
340
|
+
assert (
|
|
341
|
+
"You must specify extra kwargs with `--` prefix. There is an error in parameter passing that is 0.5."
|
|
342
|
+
in str(result)
|
|
343
|
+
)
|
|
344
|
+
|
|
345
|
+
# Unknown parameter
|
|
346
|
+
result = runner.invoke(
|
|
347
|
+
model_launch,
|
|
348
|
+
[
|
|
349
|
+
"--model-engine",
|
|
350
|
+
"transformers",
|
|
351
|
+
"--model-name",
|
|
352
|
+
"qwen2.5-instruct",
|
|
353
|
+
"--model-uid",
|
|
354
|
+
"123",
|
|
355
|
+
"-s",
|
|
356
|
+
"0.5",
|
|
357
|
+
"-f",
|
|
358
|
+
"gptq",
|
|
359
|
+
"-q",
|
|
360
|
+
"INT4",
|
|
361
|
+
"-l",
|
|
362
|
+
"111",
|
|
363
|
+
],
|
|
364
|
+
)
|
|
365
|
+
assert result.exit_code == 1
|
|
366
|
+
assert (
|
|
367
|
+
"You must specify extra kwargs with `--` prefix. There is an error in parameter passing that is -l."
|
|
368
|
+
in str(result)
|
|
369
|
+
)
|
xinference/isolation.py
CHANGED
|
@@ -37,6 +37,30 @@ class Isolation:
|
|
|
37
37
|
asyncio.set_event_loop(self._loop)
|
|
38
38
|
self._stopped = asyncio.Event()
|
|
39
39
|
self._loop.run_until_complete(self._stopped.wait())
|
|
40
|
+
self._cancel_all_tasks(self._loop)
|
|
41
|
+
|
|
42
|
+
@staticmethod
|
|
43
|
+
def _cancel_all_tasks(loop):
|
|
44
|
+
to_cancel = asyncio.all_tasks(loop)
|
|
45
|
+
if not to_cancel:
|
|
46
|
+
return
|
|
47
|
+
|
|
48
|
+
for task in to_cancel:
|
|
49
|
+
task.cancel()
|
|
50
|
+
|
|
51
|
+
loop.run_until_complete(asyncio.gather(*to_cancel, return_exceptions=True))
|
|
52
|
+
|
|
53
|
+
for task in to_cancel:
|
|
54
|
+
if task.cancelled():
|
|
55
|
+
continue
|
|
56
|
+
if task.exception() is not None:
|
|
57
|
+
loop.call_exception_handler(
|
|
58
|
+
{
|
|
59
|
+
"message": "unhandled exception during asyncio.run() shutdown",
|
|
60
|
+
"exception": task.exception(),
|
|
61
|
+
"task": task,
|
|
62
|
+
}
|
|
63
|
+
)
|
|
40
64
|
|
|
41
65
|
def start(self):
|
|
42
66
|
if self._threaded:
|
xinference/model/audio/core.py
CHANGED
|
@@ -21,8 +21,11 @@ from ..core import CacheableModelSpec, ModelDescription
|
|
|
21
21
|
from ..utils import valid_model_revision
|
|
22
22
|
from .chattts import ChatTTSModel
|
|
23
23
|
from .cosyvoice import CosyVoiceModel
|
|
24
|
+
from .f5tts import F5TTSModel
|
|
25
|
+
from .f5tts_mlx import F5TTSMLXModel
|
|
24
26
|
from .fish_speech import FishSpeechModel
|
|
25
27
|
from .funasr import FunASRModel
|
|
28
|
+
from .melotts import MeloTTSModel
|
|
26
29
|
from .whisper import WhisperModel
|
|
27
30
|
from .whisper_mlx import WhisperMLXModel
|
|
28
31
|
|
|
@@ -46,6 +49,7 @@ class AudioModelFamilyV1(CacheableModelSpec):
|
|
|
46
49
|
model_id: str
|
|
47
50
|
model_revision: Optional[str]
|
|
48
51
|
multilingual: bool
|
|
52
|
+
language: Optional[str]
|
|
49
53
|
model_ability: Optional[str]
|
|
50
54
|
default_model_config: Optional[Dict[str, Any]]
|
|
51
55
|
default_transcription_config: Optional[Dict[str, Any]]
|
|
@@ -169,6 +173,9 @@ def create_audio_model_instance(
|
|
|
169
173
|
ChatTTSModel,
|
|
170
174
|
CosyVoiceModel,
|
|
171
175
|
FishSpeechModel,
|
|
176
|
+
F5TTSModel,
|
|
177
|
+
F5TTSMLXModel,
|
|
178
|
+
MeloTTSModel,
|
|
172
179
|
],
|
|
173
180
|
AudioModelDescription,
|
|
174
181
|
]:
|
|
@@ -182,6 +189,9 @@ def create_audio_model_instance(
|
|
|
182
189
|
ChatTTSModel,
|
|
183
190
|
CosyVoiceModel,
|
|
184
191
|
FishSpeechModel,
|
|
192
|
+
F5TTSModel,
|
|
193
|
+
F5TTSMLXModel,
|
|
194
|
+
MeloTTSModel,
|
|
185
195
|
]
|
|
186
196
|
if model_spec.model_family == "whisper":
|
|
187
197
|
if not model_spec.engine:
|
|
@@ -196,6 +206,12 @@ def create_audio_model_instance(
|
|
|
196
206
|
model = CosyVoiceModel(model_uid, model_path, model_spec, **kwargs)
|
|
197
207
|
elif model_spec.model_family == "FishAudio":
|
|
198
208
|
model = FishSpeechModel(model_uid, model_path, model_spec, **kwargs)
|
|
209
|
+
elif model_spec.model_family == "F5-TTS":
|
|
210
|
+
model = F5TTSModel(model_uid, model_path, model_spec, **kwargs)
|
|
211
|
+
elif model_spec.model_family == "F5-TTS-MLX":
|
|
212
|
+
model = F5TTSMLXModel(model_uid, model_path, model_spec, **kwargs)
|
|
213
|
+
elif model_spec.model_family == "MeloTTS":
|
|
214
|
+
model = MeloTTSModel(model_uid, model_path, model_spec, **kwargs)
|
|
199
215
|
else:
|
|
200
216
|
raise Exception(f"Unsupported audio model family: {model_spec.model_family}")
|
|
201
217
|
model_description = AudioModelDescription(
|