xinference 0.16.3__py3-none-any.whl → 1.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_compat.py +24 -2
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +219 -77
- xinference/client/restful/restful_client.py +47 -2
- xinference/constants.py +1 -0
- xinference/core/chat_interface.py +6 -1
- xinference/core/model.py +124 -34
- xinference/core/supervisor.py +180 -12
- xinference/core/utils.py +73 -4
- xinference/core/worker.py +102 -4
- xinference/deploy/cmdline.py +3 -1
- xinference/deploy/test/test_cmdline.py +56 -0
- xinference/isolation.py +24 -0
- xinference/model/audio/__init__.py +12 -0
- xinference/model/audio/core.py +37 -4
- xinference/model/audio/cosyvoice.py +39 -6
- xinference/model/audio/f5tts.py +200 -0
- xinference/model/audio/f5tts_mlx.py +260 -0
- xinference/model/audio/fish_speech.py +70 -110
- xinference/model/audio/melotts.py +110 -0
- xinference/model/audio/model_spec.json +179 -3
- xinference/model/audio/model_spec_modelscope.json +27 -0
- xinference/model/audio/utils.py +32 -0
- xinference/model/audio/whisper.py +35 -10
- xinference/model/audio/whisper_mlx.py +208 -0
- xinference/model/embedding/core.py +322 -6
- xinference/model/embedding/model_spec.json +8 -1
- xinference/model/embedding/model_spec_modelscope.json +9 -1
- xinference/model/image/core.py +69 -1
- xinference/model/image/model_spec.json +145 -4
- xinference/model/image/model_spec_modelscope.json +150 -4
- xinference/model/image/stable_diffusion/core.py +50 -15
- xinference/model/llm/__init__.py +6 -2
- xinference/model/llm/llm_family.json +1055 -93
- xinference/model/llm/llm_family.py +15 -36
- xinference/model/llm/llm_family_modelscope.json +1031 -78
- xinference/model/llm/memory.py +1 -1
- xinference/model/llm/mlx/core.py +285 -47
- xinference/model/llm/sglang/core.py +2 -0
- xinference/model/llm/transformers/chatglm.py +9 -5
- xinference/model/llm/transformers/cogagent.py +272 -0
- xinference/model/llm/transformers/core.py +3 -0
- xinference/model/llm/transformers/glm_edge_v.py +230 -0
- xinference/model/llm/transformers/qwen2_vl.py +12 -1
- xinference/model/llm/transformers/utils.py +16 -8
- xinference/model/llm/utils.py +55 -4
- xinference/model/llm/vllm/core.py +137 -12
- xinference/model/llm/vllm/xavier/__init__.py +13 -0
- xinference/model/llm/vllm/xavier/allocator.py +74 -0
- xinference/model/llm/vllm/xavier/block.py +111 -0
- xinference/model/llm/vllm/xavier/block_manager.py +71 -0
- xinference/model/llm/vllm/xavier/block_tracker.py +129 -0
- xinference/model/llm/vllm/xavier/collective.py +74 -0
- xinference/model/llm/vllm/xavier/collective_manager.py +147 -0
- xinference/model/llm/vllm/xavier/engine.py +247 -0
- xinference/model/llm/vllm/xavier/executor.py +134 -0
- xinference/model/llm/vllm/xavier/scheduler.py +438 -0
- xinference/model/llm/vllm/xavier/test/__init__.py +13 -0
- xinference/model/llm/vllm/xavier/test/test_xavier.py +147 -0
- xinference/model/llm/vllm/xavier/transfer.py +319 -0
- xinference/model/rerank/core.py +11 -4
- xinference/model/video/diffusers.py +14 -0
- xinference/model/video/model_spec.json +15 -0
- xinference/model/video/model_spec_modelscope.json +16 -0
- xinference/thirdparty/cosyvoice/bin/average_model.py +92 -0
- xinference/thirdparty/cosyvoice/bin/export_jit.py +12 -2
- xinference/thirdparty/cosyvoice/bin/export_onnx.py +112 -0
- xinference/thirdparty/cosyvoice/bin/export_trt.sh +9 -0
- xinference/thirdparty/cosyvoice/bin/inference.py +5 -7
- xinference/thirdparty/cosyvoice/bin/spk2info.pt +0 -0
- xinference/thirdparty/cosyvoice/bin/train.py +42 -8
- xinference/thirdparty/cosyvoice/cli/cosyvoice.py +96 -25
- xinference/thirdparty/cosyvoice/cli/frontend.py +77 -30
- xinference/thirdparty/cosyvoice/cli/model.py +330 -80
- xinference/thirdparty/cosyvoice/dataset/dataset.py +6 -2
- xinference/thirdparty/cosyvoice/dataset/processor.py +76 -14
- xinference/thirdparty/cosyvoice/flow/decoder.py +92 -13
- xinference/thirdparty/cosyvoice/flow/flow.py +99 -9
- xinference/thirdparty/cosyvoice/flow/flow_matching.py +110 -13
- xinference/thirdparty/cosyvoice/flow/length_regulator.py +5 -4
- xinference/thirdparty/cosyvoice/hifigan/discriminator.py +140 -0
- xinference/thirdparty/cosyvoice/hifigan/generator.py +58 -42
- xinference/thirdparty/cosyvoice/hifigan/hifigan.py +67 -0
- xinference/thirdparty/cosyvoice/llm/llm.py +139 -6
- xinference/thirdparty/cosyvoice/tokenizer/assets/multilingual_zh_ja_yue_char_del.tiktoken +58836 -0
- xinference/thirdparty/cosyvoice/tokenizer/tokenizer.py +279 -0
- xinference/thirdparty/cosyvoice/transformer/embedding.py +2 -2
- xinference/thirdparty/cosyvoice/transformer/encoder_layer.py +7 -7
- xinference/thirdparty/cosyvoice/transformer/upsample_encoder.py +318 -0
- xinference/thirdparty/cosyvoice/utils/common.py +28 -1
- xinference/thirdparty/cosyvoice/utils/executor.py +69 -7
- xinference/thirdparty/cosyvoice/utils/file_utils.py +2 -12
- xinference/thirdparty/cosyvoice/utils/frontend_utils.py +9 -5
- xinference/thirdparty/cosyvoice/utils/losses.py +20 -0
- xinference/thirdparty/cosyvoice/utils/scheduler.py +1 -2
- xinference/thirdparty/cosyvoice/utils/train_utils.py +101 -45
- xinference/thirdparty/f5_tts/api.py +166 -0
- xinference/thirdparty/f5_tts/configs/E2TTS_Base_train.yaml +44 -0
- xinference/thirdparty/f5_tts/configs/E2TTS_Small_train.yaml +44 -0
- xinference/thirdparty/f5_tts/configs/F5TTS_Base_train.yaml +46 -0
- xinference/thirdparty/f5_tts/configs/F5TTS_Small_train.yaml +46 -0
- xinference/thirdparty/f5_tts/eval/README.md +49 -0
- xinference/thirdparty/f5_tts/eval/ecapa_tdnn.py +330 -0
- xinference/thirdparty/f5_tts/eval/eval_infer_batch.py +207 -0
- xinference/thirdparty/f5_tts/eval/eval_infer_batch.sh +13 -0
- xinference/thirdparty/f5_tts/eval/eval_librispeech_test_clean.py +84 -0
- xinference/thirdparty/f5_tts/eval/eval_seedtts_testset.py +84 -0
- xinference/thirdparty/f5_tts/eval/utils_eval.py +405 -0
- xinference/thirdparty/f5_tts/infer/README.md +191 -0
- xinference/thirdparty/f5_tts/infer/SHARED.md +74 -0
- xinference/thirdparty/f5_tts/infer/examples/basic/basic.toml +11 -0
- xinference/thirdparty/f5_tts/infer/examples/basic/basic_ref_en.wav +0 -0
- xinference/thirdparty/f5_tts/infer/examples/basic/basic_ref_zh.wav +0 -0
- xinference/thirdparty/f5_tts/infer/examples/multi/country.flac +0 -0
- xinference/thirdparty/f5_tts/infer/examples/multi/main.flac +0 -0
- xinference/thirdparty/f5_tts/infer/examples/multi/story.toml +19 -0
- xinference/thirdparty/f5_tts/infer/examples/multi/story.txt +1 -0
- xinference/thirdparty/f5_tts/infer/examples/multi/town.flac +0 -0
- xinference/thirdparty/f5_tts/infer/examples/vocab.txt +2545 -0
- xinference/thirdparty/f5_tts/infer/infer_cli.py +226 -0
- xinference/thirdparty/f5_tts/infer/infer_gradio.py +851 -0
- xinference/thirdparty/f5_tts/infer/speech_edit.py +193 -0
- xinference/thirdparty/f5_tts/infer/utils_infer.py +538 -0
- xinference/thirdparty/f5_tts/model/__init__.py +10 -0
- xinference/thirdparty/f5_tts/model/backbones/README.md +20 -0
- xinference/thirdparty/f5_tts/model/backbones/dit.py +163 -0
- xinference/thirdparty/f5_tts/model/backbones/mmdit.py +146 -0
- xinference/thirdparty/f5_tts/model/backbones/unett.py +219 -0
- xinference/thirdparty/f5_tts/model/cfm.py +285 -0
- xinference/thirdparty/f5_tts/model/dataset.py +319 -0
- xinference/thirdparty/f5_tts/model/modules.py +658 -0
- xinference/thirdparty/f5_tts/model/trainer.py +366 -0
- xinference/thirdparty/f5_tts/model/utils.py +185 -0
- xinference/thirdparty/f5_tts/scripts/count_max_epoch.py +33 -0
- xinference/thirdparty/f5_tts/scripts/count_params_gflops.py +39 -0
- xinference/thirdparty/f5_tts/socket_server.py +159 -0
- xinference/thirdparty/f5_tts/train/README.md +77 -0
- xinference/thirdparty/f5_tts/train/datasets/prepare_csv_wavs.py +139 -0
- xinference/thirdparty/f5_tts/train/datasets/prepare_emilia.py +230 -0
- xinference/thirdparty/f5_tts/train/datasets/prepare_libritts.py +92 -0
- xinference/thirdparty/f5_tts/train/datasets/prepare_ljspeech.py +65 -0
- xinference/thirdparty/f5_tts/train/datasets/prepare_wenetspeech4tts.py +125 -0
- xinference/thirdparty/f5_tts/train/finetune_cli.py +174 -0
- xinference/thirdparty/f5_tts/train/finetune_gradio.py +1846 -0
- xinference/thirdparty/f5_tts/train/train.py +75 -0
- xinference/thirdparty/fish_speech/fish_speech/conversation.py +266 -1
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/en_US.json +2 -1
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/es_ES.json +2 -1
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/ja_JP.json +2 -2
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/ko_KR.json +123 -0
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/zh_CN.json +2 -1
- xinference/thirdparty/fish_speech/fish_speech/models/text2semantic/llama.py +137 -29
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/firefly.py +9 -9
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/fsq.py +1 -1
- xinference/thirdparty/fish_speech/fish_speech/text/clean.py +17 -11
- xinference/thirdparty/fish_speech/fish_speech/text/spliter.py +1 -1
- xinference/thirdparty/fish_speech/fish_speech/tokenizer.py +152 -0
- xinference/thirdparty/fish_speech/fish_speech/train.py +2 -2
- xinference/thirdparty/fish_speech/fish_speech/utils/__init__.py +2 -1
- xinference/thirdparty/fish_speech/fish_speech/utils/utils.py +22 -0
- xinference/thirdparty/fish_speech/fish_speech/webui/launch_utils.py +1 -1
- xinference/thirdparty/fish_speech/fish_speech/webui/manage.py +2 -2
- xinference/thirdparty/fish_speech/tools/{post_api.py → api_client.py} +34 -18
- xinference/thirdparty/fish_speech/tools/api_server.py +98 -0
- xinference/thirdparty/fish_speech/tools/download_models.py +5 -5
- xinference/thirdparty/fish_speech/tools/e2e_webui.py +232 -0
- xinference/thirdparty/fish_speech/tools/fish_e2e.py +298 -0
- xinference/thirdparty/fish_speech/tools/inference_engine/__init__.py +192 -0
- xinference/thirdparty/fish_speech/tools/inference_engine/reference_loader.py +125 -0
- xinference/thirdparty/fish_speech/tools/inference_engine/utils.py +39 -0
- xinference/thirdparty/fish_speech/tools/inference_engine/vq_manager.py +57 -0
- xinference/thirdparty/fish_speech/tools/llama/eval_in_context.py +2 -2
- xinference/thirdparty/fish_speech/tools/llama/generate.py +484 -72
- xinference/thirdparty/fish_speech/tools/run_webui.py +104 -0
- xinference/thirdparty/fish_speech/tools/schema.py +170 -0
- xinference/thirdparty/fish_speech/tools/server/agent/__init__.py +57 -0
- xinference/thirdparty/fish_speech/tools/server/agent/generate.py +119 -0
- xinference/thirdparty/fish_speech/tools/server/agent/generation_utils.py +122 -0
- xinference/thirdparty/fish_speech/tools/server/agent/pre_generation_utils.py +72 -0
- xinference/thirdparty/fish_speech/tools/server/api_utils.py +75 -0
- xinference/thirdparty/fish_speech/tools/server/exception_handler.py +27 -0
- xinference/thirdparty/fish_speech/tools/server/inference.py +45 -0
- xinference/thirdparty/fish_speech/tools/server/model_manager.py +122 -0
- xinference/thirdparty/fish_speech/tools/server/model_utils.py +129 -0
- xinference/thirdparty/fish_speech/tools/server/views.py +246 -0
- xinference/thirdparty/fish_speech/tools/vqgan/extract_vq.py +7 -1
- xinference/thirdparty/fish_speech/tools/vqgan/inference.py +2 -3
- xinference/thirdparty/fish_speech/tools/webui/__init__.py +173 -0
- xinference/thirdparty/fish_speech/tools/webui/inference.py +91 -0
- xinference/thirdparty/fish_speech/tools/webui/variables.py +14 -0
- xinference/thirdparty/matcha/utils/utils.py +2 -2
- xinference/thirdparty/melo/api.py +135 -0
- xinference/thirdparty/melo/app.py +61 -0
- xinference/thirdparty/melo/attentions.py +459 -0
- xinference/thirdparty/melo/commons.py +160 -0
- xinference/thirdparty/melo/configs/config.json +94 -0
- xinference/thirdparty/melo/data/example/metadata.list +20 -0
- xinference/thirdparty/melo/data_utils.py +413 -0
- xinference/thirdparty/melo/download_utils.py +67 -0
- xinference/thirdparty/melo/infer.py +25 -0
- xinference/thirdparty/melo/init_downloads.py +14 -0
- xinference/thirdparty/melo/losses.py +58 -0
- xinference/thirdparty/melo/main.py +36 -0
- xinference/thirdparty/melo/mel_processing.py +174 -0
- xinference/thirdparty/melo/models.py +1030 -0
- xinference/thirdparty/melo/modules.py +598 -0
- xinference/thirdparty/melo/monotonic_align/__init__.py +16 -0
- xinference/thirdparty/melo/monotonic_align/core.py +46 -0
- xinference/thirdparty/melo/preprocess_text.py +135 -0
- xinference/thirdparty/melo/split_utils.py +174 -0
- xinference/thirdparty/melo/text/__init__.py +35 -0
- xinference/thirdparty/melo/text/chinese.py +199 -0
- xinference/thirdparty/melo/text/chinese_bert.py +107 -0
- xinference/thirdparty/melo/text/chinese_mix.py +253 -0
- xinference/thirdparty/melo/text/cleaner.py +36 -0
- xinference/thirdparty/melo/text/cleaner_multiling.py +110 -0
- xinference/thirdparty/melo/text/cmudict.rep +129530 -0
- xinference/thirdparty/melo/text/cmudict_cache.pickle +0 -0
- xinference/thirdparty/melo/text/english.py +284 -0
- xinference/thirdparty/melo/text/english_bert.py +39 -0
- xinference/thirdparty/melo/text/english_utils/abbreviations.py +35 -0
- xinference/thirdparty/melo/text/english_utils/number_norm.py +97 -0
- xinference/thirdparty/melo/text/english_utils/time_norm.py +47 -0
- xinference/thirdparty/melo/text/es_phonemizer/base.py +140 -0
- xinference/thirdparty/melo/text/es_phonemizer/cleaner.py +109 -0
- xinference/thirdparty/melo/text/es_phonemizer/es_symbols.json +79 -0
- xinference/thirdparty/melo/text/es_phonemizer/es_symbols.txt +1 -0
- xinference/thirdparty/melo/text/es_phonemizer/es_symbols_v2.json +83 -0
- xinference/thirdparty/melo/text/es_phonemizer/es_to_ipa.py +12 -0
- xinference/thirdparty/melo/text/es_phonemizer/example_ipa.txt +400 -0
- xinference/thirdparty/melo/text/es_phonemizer/gruut_wrapper.py +253 -0
- xinference/thirdparty/melo/text/es_phonemizer/punctuation.py +174 -0
- xinference/thirdparty/melo/text/es_phonemizer/spanish_symbols.txt +1 -0
- xinference/thirdparty/melo/text/es_phonemizer/test.ipynb +124 -0
- xinference/thirdparty/melo/text/fr_phonemizer/base.py +140 -0
- xinference/thirdparty/melo/text/fr_phonemizer/cleaner.py +122 -0
- xinference/thirdparty/melo/text/fr_phonemizer/en_symbols.json +78 -0
- xinference/thirdparty/melo/text/fr_phonemizer/example_ipa.txt +1 -0
- xinference/thirdparty/melo/text/fr_phonemizer/fr_symbols.json +89 -0
- xinference/thirdparty/melo/text/fr_phonemizer/fr_to_ipa.py +30 -0
- xinference/thirdparty/melo/text/fr_phonemizer/french_abbreviations.py +48 -0
- xinference/thirdparty/melo/text/fr_phonemizer/french_symbols.txt +1 -0
- xinference/thirdparty/melo/text/fr_phonemizer/gruut_wrapper.py +258 -0
- xinference/thirdparty/melo/text/fr_phonemizer/punctuation.py +172 -0
- xinference/thirdparty/melo/text/french.py +94 -0
- xinference/thirdparty/melo/text/french_bert.py +39 -0
- xinference/thirdparty/melo/text/japanese.py +647 -0
- xinference/thirdparty/melo/text/japanese_bert.py +49 -0
- xinference/thirdparty/melo/text/ko_dictionary.py +44 -0
- xinference/thirdparty/melo/text/korean.py +192 -0
- xinference/thirdparty/melo/text/opencpop-strict.txt +429 -0
- xinference/thirdparty/melo/text/spanish.py +122 -0
- xinference/thirdparty/melo/text/spanish_bert.py +39 -0
- xinference/thirdparty/melo/text/symbols.py +290 -0
- xinference/thirdparty/melo/text/tone_sandhi.py +769 -0
- xinference/thirdparty/melo/train.py +635 -0
- xinference/thirdparty/melo/train.sh +19 -0
- xinference/thirdparty/melo/transforms.py +209 -0
- xinference/thirdparty/melo/utils.py +424 -0
- xinference/types.py +17 -1
- xinference/web/ui/build/asset-manifest.json +6 -6
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/css/main.51a587ff.css +2 -0
- xinference/web/ui/build/static/css/main.51a587ff.css.map +1 -0
- xinference/web/ui/build/static/js/main.b0936c54.js +3 -0
- xinference/web/ui/build/static/js/main.b0936c54.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/03c4052f1b91f6ba0c5389bdcf49c43319b4076c08e4b8585dab312538ae290a.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/1786b83003b8e9605a0f5f855a185d4d16e38fc893dfb326a2a9cca206b4240a.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/17cbc181dd674b9150b80c73ed6a82656de0082d857f6e5f66d9716129ac0b38.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/185ceb8872d562e032b47e79df6a45670e06345b8ed70aad1a131e0476783c5c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/26b8c9f34b0bed789b3a833767672e39302d1e0c09b4276f4d58d1df7b6bd93b.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/2b484da66c724d0d56a40849c109327408796a668b1381511b6e9e03baa48658.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/2cbbbce9b84df73330d4c42b82436ed881b3847628f2fbc346aa62e2859fd88c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/2ec9b14431ed33ce6901bf9f27007be4e6e472709c99d6e22b50ce528e4b78ee.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3b966db018f96be4a055d6ca205f0990d4d0b370e2980c17d8bca2c9a021819c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3eefb411b24c2b3ce053570ef50daccf154022f0e168be5ed0fec21394baf9f4.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/522b229e3cac219123f0d69673f5570e191c2d2a505dc65b312d336eae2279c0.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/52e45f17ba300580ea3fcc9f9228ccba194bb092b76f25e9255af311f8b05aab.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/5a0bc4631f936459afc1a3b1d3ec2420118b1f00e11f60ccac3e08088f3f27a8.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/611fa2c6c53b66039991d06dfb0473b5ab37fc63b4564e0f6e1718523768a045.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/6329bc76c406fe5eb305412383fbde5950f847bb5e43261f73f37622c365acb4.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/63c8e07687ea53a4f8a910ee5e42e0eb26cd1acbfbe820f3e3248a786ee51401.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/69b2d5001684174ec9da57e07914eed3eac4960018bceb6cbfa801d861301d7c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/710c1acda69e561e30a933b98c6a56d50197868b15c21e2aad55ab6d46649eb6.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/720deca1fce5a1dc5056048fa8258fd138a82ea855f350b6613f104a73fb761f.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/76a23b92d26a499c57e61eea2b895fbc9771bd0849a72e66f8e633192017978b.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/858063f23b34dfe600254eb5afd85518b0002ec4b30b7386616c45600826e3b2.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/920b82c1c89124cf217109eeedbfcd3aae3b917be50c9dfb6bbb4ce26bdfd2e7.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/94d8b7aeb0076f2ce07db598cea0e87b13bc8d5614eb530b8d6e696c2daf6f88.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/9e917fe7022d01b2ccbe5cc0ce73d70bb72bee584ff293bad71bdff6695dee28.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/9f28fdb8399f1d0474f0aca86f1658dc94f5bf0c90f6146352de150692de8862.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a0dfafa06b2bb7cba8cad41c482503f61944f759f4318139362602ef5cc47ccb.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a3ff866acddf34917a7ee399e0e571a4dfd8ba66d5057db885f243e16a6eb17d.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/afb8084f539534cd594755ea2205ecd5bd1f62dddcfdf75a2eace59a28131278.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/b57b1438b77294c1f3f6cfce12ac487d8106c6f016975ba0aec94d98997e2e1e.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/b9917b0bf8e4d55ccbac1c334aa04d6ff3c5b6ed9e5d38b9ea2c687fa7d3f5a9.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/bbcc94b0149963d1d6f267ee1f4f03d3925b758392ce2f516c3fe8af0e0169fc.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/bdee44abeadc4abc17d41c52eb49c6e19a4b1a267b6e16876ce91bdeeebfc52d.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/beb112b70f4a56db95920a9e20efb6c97c37b68450716730217a9ee1a9ae92be.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c88db97be0cdf440193b3995996e83510a04cb00048135485fc0e26d197e80b5.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d49e5314d34310a62d01a03067ce1bec5da00abce84c5196aa9c6842fa79a430.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d7664d18c4ddbad9c3a6a31b91f7c00fb0dde804608674a9860ee50f33e54708.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d9072c318b819b7c90a0f7e9cc0b6413b4dbeb8e9859898e53d75ea882fcde99.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/db16a983bc08a05f0439cc61ca0840e49e1d8400eef678909f16c032a418a3d6.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/dc249829767b8abcbc3677e0b07b6d3ecbfdfe6d08cfe23a665eb33373a9aa9d.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/e242c583c2dbc2784f0fcf513523975f7d5df447e106c1c17e49e8578a6fc3ed.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/eac5f1296513e69e4b96f750ddccd4d0264e2bae4e4c449144e83274a48698d9.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/ed57202cb79649bb716400436590245547df241988fc7c8e1d85d132299542d2.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f125bf72e773a14cdaebd0c343e80adb909d12e317ee5c00cd4a57442fbe2c62.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f91af913d7f91c410719ab13136aaed3aaf0f8dda06652f25c42cb5231587398.json +1 -0
- xinference/web/ui/node_modules/.package-lock.json +67 -3
- xinference/web/ui/node_modules/@babel/runtime/package.json +592 -538
- xinference/web/ui/node_modules/html-parse-stringify/package.json +50 -0
- xinference/web/ui/node_modules/i18next/dist/esm/package.json +1 -0
- xinference/web/ui/node_modules/i18next/package.json +129 -0
- xinference/web/ui/node_modules/react-i18next/.eslintrc.json +74 -0
- xinference/web/ui/node_modules/react-i18next/dist/es/package.json +1 -0
- xinference/web/ui/node_modules/react-i18next/package.json +162 -0
- xinference/web/ui/node_modules/void-elements/package.json +34 -0
- xinference/web/ui/package-lock.json +69 -3
- xinference/web/ui/package.json +2 -0
- xinference/web/ui/src/locales/en.json +186 -0
- xinference/web/ui/src/locales/zh.json +186 -0
- {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/METADATA +96 -36
- {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/RECORD +335 -146
- {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/WHEEL +1 -1
- xinference/thirdparty/cosyvoice/bin/export_trt.py +0 -8
- xinference/thirdparty/fish_speech/fish_speech/configs/lora/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/datasets/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/datasets/protos/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/models/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/webui/__init__.py +0 -0
- xinference/thirdparty/fish_speech/tools/__init__.py +0 -0
- xinference/thirdparty/fish_speech/tools/api.py +0 -440
- xinference/thirdparty/fish_speech/tools/commons.py +0 -35
- xinference/thirdparty/fish_speech/tools/llama/__init__.py +0 -0
- xinference/thirdparty/fish_speech/tools/msgpack_api.py +0 -34
- xinference/thirdparty/fish_speech/tools/vqgan/__init__.py +0 -0
- xinference/thirdparty/fish_speech/tools/webui.py +0 -485
- xinference/web/ui/build/static/css/main.5061c4c3.css +0 -2
- xinference/web/ui/build/static/css/main.5061c4c3.css.map +0 -1
- xinference/web/ui/build/static/js/main.2f269bb3.js +0 -3
- xinference/web/ui/build/static/js/main.2f269bb3.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/07ce9e632e6aff24d7aa3ad8e48224433bbfeb0d633fca723453f1fcae0c9f1c.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/1130403f9e46f5738a23b45ac59b57de8f360c908c713e2c0670c2cce9bd367a.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/131091b25d26b17cdca187d7542a21475c211138d900cf667682260e76ef9463.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/1f269fb2a368363c1cb2237825f1dba093b6bdd8c44cc05954fd19ec2c1fff03.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/331312668fa8bd3d7401818f4a25fa98135d7f61371cd6bfff78b18cf4fbdd92.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/40f17338fc75ae095de7d2b4d8eae0d5ca0193a7e2bcece4ee745b22a7a2f4b7.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/4de9a6942c5f1749d6cbfdd54279699975f16016b182848bc253886f52ec2ec3.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/822586ed1077201b64b954f12f25e3f9b45678c1acbabe53d8af3ca82ca71f33.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/8d33354bd2100c8602afc3341f131a88cc36aaeecd5a4b365ed038514708e350.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/9375a35b05d56989b2755bf72161fa707c92f28569d33765a75f91a568fda6e9.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/a158a9ffa0c9b169aee53dd4a0c44501a596755b4e4f6ede7746d65a72e2a71f.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/bd6ad8159341315a1764c397621a560809f7eb7219ab5174c801fca7e969d943.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/c7bf40bab396765f67d0fed627ed3665890608b2d0edaa3e8cb7cfc96310db45.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/d6c643278a0b28320e6f33a60f5fb64c053997cbdc39a60e53ccc574688ade9e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/e42b72d4cc1ea412ebecbb8d040dc6c6bfee462c33903c2f1f3facb602ad742e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/e64b7e8cedcf43d4c95deba60ec1341855c887705805bb62431693118b870c69.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f5039ddbeb815c51491a1989532006b96fc3ae49c6c60e3c097f875b4ae915ae.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f72f011744c4649fabddca6f7a9327861ac0a315a89b1a2e62a39774e7863845.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/feabb04b4aa507102da0a64398a40818e878fd1df9b75dda8461b3e1e7ff3f11.json +0 -1
- /xinference/thirdparty/{cosyvoice/bin → f5_tts}/__init__.py +0 -0
- /xinference/thirdparty/{cosyvoice/flow → melo}/__init__.py +0 -0
- /xinference/thirdparty/{cosyvoice/hifigan → melo/text/english_utils}/__init__.py +0 -0
- /xinference/thirdparty/{cosyvoice/llm → melo/text/es_phonemizer}/__init__.py +0 -0
- /xinference/thirdparty/{fish_speech/fish_speech/configs → melo/text/fr_phonemizer}/__init__.py +0 -0
- /xinference/web/ui/build/static/js/{main.2f269bb3.js.LICENSE.txt → main.b0936c54.js.LICENSE.txt} +0 -0
- {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/LICENSE +0 -0
- {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/entry_points.txt +0 -0
- {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/top_level.txt +0 -0
|
@@ -126,6 +126,43 @@ class RESTfulEmbeddingModelHandle(RESTfulModelHandle):
|
|
|
126
126
|
response_data = response.json()
|
|
127
127
|
return response_data
|
|
128
128
|
|
|
129
|
+
def convert_ids_to_tokens(
|
|
130
|
+
self, input: Union[List, List[List]], **kwargs
|
|
131
|
+
) -> List[str]:
|
|
132
|
+
"""
|
|
133
|
+
Convert token IDs to human readable tokens via RESTful APIs.
|
|
134
|
+
|
|
135
|
+
Parameters
|
|
136
|
+
----------
|
|
137
|
+
input: Union[List, List[List]]
|
|
138
|
+
Input token IDs to convert, can be a single list of token IDs or a list of token ID lists.
|
|
139
|
+
To convert multiple sequences in a single request, pass a list of token ID lists.
|
|
140
|
+
|
|
141
|
+
Returns
|
|
142
|
+
-------
|
|
143
|
+
list
|
|
144
|
+
A list of decoded tokens in human readable format.
|
|
145
|
+
|
|
146
|
+
Raises
|
|
147
|
+
------
|
|
148
|
+
RuntimeError
|
|
149
|
+
Report the failure of token conversion and provide the error message.
|
|
150
|
+
|
|
151
|
+
"""
|
|
152
|
+
url = f"{self._base_url}/v1/convert_ids_to_tokens"
|
|
153
|
+
request_body = {
|
|
154
|
+
"model": self._model_uid,
|
|
155
|
+
"input": input,
|
|
156
|
+
}
|
|
157
|
+
request_body.update(kwargs)
|
|
158
|
+
response = requests.post(url, json=request_body, headers=self.auth_headers)
|
|
159
|
+
if response.status_code != 200:
|
|
160
|
+
raise RuntimeError(
|
|
161
|
+
f"Failed to decode token ids, detail: {_get_error_string(response)}"
|
|
162
|
+
)
|
|
163
|
+
response_data = response.json()
|
|
164
|
+
return response_data
|
|
165
|
+
|
|
129
166
|
|
|
130
167
|
class RESTfulRerankModelHandle(RESTfulModelHandle):
|
|
131
168
|
def rerank(
|
|
@@ -174,6 +211,7 @@ class RESTfulRerankModelHandle(RESTfulModelHandle):
|
|
|
174
211
|
"max_chunks_per_doc": max_chunks_per_doc,
|
|
175
212
|
"return_documents": return_documents,
|
|
176
213
|
"return_len": return_len,
|
|
214
|
+
"kwargs": json.dumps(kwargs),
|
|
177
215
|
}
|
|
178
216
|
request_body.update(kwargs)
|
|
179
217
|
response = requests.post(url, json=request_body, headers=self.auth_headers)
|
|
@@ -703,6 +741,8 @@ class RESTfulAudioModelHandle(RESTfulModelHandle):
|
|
|
703
741
|
The speed of the generated audio.
|
|
704
742
|
stream: bool
|
|
705
743
|
Use stream or not.
|
|
744
|
+
prompt_speech: bytes
|
|
745
|
+
The audio bytes to be provided to the model.
|
|
706
746
|
|
|
707
747
|
Returns
|
|
708
748
|
-------
|
|
@@ -1357,7 +1397,7 @@ class Client:
|
|
|
1357
1397
|
response_data = response.json()
|
|
1358
1398
|
return response_data
|
|
1359
1399
|
|
|
1360
|
-
def abort_request(self, model_uid: str, request_id: str):
|
|
1400
|
+
def abort_request(self, model_uid: str, request_id: str, block_duration: int = 30):
|
|
1361
1401
|
"""
|
|
1362
1402
|
Abort a request.
|
|
1363
1403
|
Abort a submitted request. If the request is finished or not found, this method will be a no-op.
|
|
@@ -1369,13 +1409,18 @@ class Client:
|
|
|
1369
1409
|
Model uid.
|
|
1370
1410
|
request_id: str
|
|
1371
1411
|
Request id.
|
|
1412
|
+
block_duration: int
|
|
1413
|
+
The duration to make the request id abort. If set to 0, the abort_request will be immediate, which may
|
|
1414
|
+
prevent it from taking effect if it arrives before the request operation.
|
|
1372
1415
|
Returns
|
|
1373
1416
|
-------
|
|
1374
1417
|
Dict
|
|
1375
1418
|
Return empty dict.
|
|
1376
1419
|
"""
|
|
1377
1420
|
url = f"{self.base_url}/v1/models/{model_uid}/requests/{request_id}/abort"
|
|
1378
|
-
response = requests.post(
|
|
1421
|
+
response = requests.post(
|
|
1422
|
+
url, headers=self._headers, json={"block_duration": block_duration}
|
|
1423
|
+
)
|
|
1379
1424
|
if response.status_code != 200:
|
|
1380
1425
|
raise RuntimeError(
|
|
1381
1426
|
f"Failed to abort request, detail: {_get_error_string(response)}"
|
xinference/constants.py
CHANGED
|
@@ -13,6 +13,7 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
import base64
|
|
16
|
+
import html
|
|
16
17
|
import logging
|
|
17
18
|
import os
|
|
18
19
|
from io import BytesIO
|
|
@@ -137,7 +138,11 @@ class GradioInterface:
|
|
|
137
138
|
if "content" not in delta:
|
|
138
139
|
continue
|
|
139
140
|
else:
|
|
140
|
-
|
|
141
|
+
# some model like deepseek-r1-distill-qwen
|
|
142
|
+
# will generate <think>...</think> ...
|
|
143
|
+
# in gradio, no output will be rendered,
|
|
144
|
+
# thus escape html tags in advance
|
|
145
|
+
response_content += html.escape(delta["content"])
|
|
141
146
|
yield response_content
|
|
142
147
|
|
|
143
148
|
yield response_content
|
xinference/core/model.py
CHANGED
|
@@ -35,12 +35,14 @@ from typing import (
|
|
|
35
35
|
List,
|
|
36
36
|
Optional,
|
|
37
37
|
Union,
|
|
38
|
+
no_type_check,
|
|
38
39
|
)
|
|
39
40
|
|
|
40
41
|
import sse_starlette.sse
|
|
41
42
|
import xoscar as xo
|
|
42
43
|
|
|
43
44
|
from ..constants import (
|
|
45
|
+
XINFERENCE_DEFAULT_CANCEL_BLOCK_DURATION,
|
|
44
46
|
XINFERENCE_LAUNCH_MODEL_RETRY,
|
|
45
47
|
XINFERENCE_TEXT_TO_IMAGE_BATCHING_SIZE,
|
|
46
48
|
)
|
|
@@ -57,7 +59,7 @@ import logging
|
|
|
57
59
|
logger = logging.getLogger(__name__)
|
|
58
60
|
|
|
59
61
|
from ..device_utils import empty_cache
|
|
60
|
-
from .utils import json_dumps, log_async
|
|
62
|
+
from .utils import CancelMixin, json_dumps, log_async
|
|
61
63
|
|
|
62
64
|
try:
|
|
63
65
|
from torch.cuda import OutOfMemoryError
|
|
@@ -77,6 +79,9 @@ XINFERENCE_BATCHING_ALLOWED_VISION_MODELS = [
|
|
|
77
79
|
]
|
|
78
80
|
|
|
79
81
|
XINFERENCE_TEXT_TO_IMAGE_BATCHING_ALLOWED_MODELS = ["FLUX.1-dev", "FLUX.1-schnell"]
|
|
82
|
+
XINFERENCE_TEST_OUT_OF_MEMORY_ERROR = bool(
|
|
83
|
+
os.getenv("XINFERENCE_TEST_OUT_OF_MEMORY_ERROR", False)
|
|
84
|
+
)
|
|
80
85
|
|
|
81
86
|
|
|
82
87
|
def request_limit(fn):
|
|
@@ -90,21 +95,26 @@ def request_limit(fn):
|
|
|
90
95
|
logger.debug(
|
|
91
96
|
f"Request {fn.__name__}, current serve request count: {self._serve_count}, request limit: {self._request_limits} for the model {self.model_uid()}"
|
|
92
97
|
)
|
|
93
|
-
if self.
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
98
|
+
if 1 + self._serve_count <= self._request_limits:
|
|
99
|
+
self._serve_count += 1
|
|
100
|
+
else:
|
|
101
|
+
raise RuntimeError(
|
|
102
|
+
f"Rate limit reached for the model. Request limit {self._request_limits} for the model: {self.model_uid()}"
|
|
103
|
+
)
|
|
104
|
+
ret = None
|
|
100
105
|
try:
|
|
101
106
|
ret = await fn(self, *args, **kwargs)
|
|
102
107
|
finally:
|
|
103
|
-
if
|
|
108
|
+
if ret is not None and (
|
|
109
|
+
inspect.isasyncgen(ret) or inspect.isgenerator(ret)
|
|
110
|
+
):
|
|
111
|
+
# stream case, let client call model_ref to decrease self._serve_count
|
|
112
|
+
pass
|
|
113
|
+
else:
|
|
104
114
|
self._serve_count -= 1
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
115
|
+
logger.debug(
|
|
116
|
+
f"After request {fn.__name__}, current serve request count: {self._serve_count} for the model {self.model_uid()}"
|
|
117
|
+
)
|
|
108
118
|
return ret
|
|
109
119
|
|
|
110
120
|
return wrapped_func
|
|
@@ -112,20 +122,25 @@ def request_limit(fn):
|
|
|
112
122
|
|
|
113
123
|
def oom_check(fn):
|
|
114
124
|
@functools.wraps(fn)
|
|
115
|
-
def _wrapper(*args, **kwargs):
|
|
125
|
+
def _wrapper(self, *args, **kwargs):
|
|
116
126
|
try:
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
127
|
+
if XINFERENCE_TEST_OUT_OF_MEMORY_ERROR:
|
|
128
|
+
raise OutOfMemoryError("Test Out of Memory Error")
|
|
129
|
+
return fn(self, *args, **kwargs)
|
|
130
|
+
except OutOfMemoryError as ex:
|
|
131
|
+
assert self._loop is not None
|
|
132
|
+
asyncio.run_coroutine_threadsafe(
|
|
133
|
+
self._handle_oom_error(ex), loop=self._loop
|
|
134
|
+
)
|
|
121
135
|
|
|
122
136
|
@functools.wraps(fn)
|
|
123
|
-
async def _async_wrapper(*args, **kwargs):
|
|
137
|
+
async def _async_wrapper(self, *args, **kwargs):
|
|
124
138
|
try:
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
139
|
+
if XINFERENCE_TEST_OUT_OF_MEMORY_ERROR:
|
|
140
|
+
raise OutOfMemoryError("Test Out of Memory Error")
|
|
141
|
+
return await fn(self, *args, **kwargs)
|
|
142
|
+
except OutOfMemoryError as ex:
|
|
143
|
+
await self._handle_oom_error(ex)
|
|
129
144
|
|
|
130
145
|
assert not inspect.isasyncgen(fn)
|
|
131
146
|
assert not inspect.isgenerator(fn)
|
|
@@ -136,7 +151,7 @@ def oom_check(fn):
|
|
|
136
151
|
return _wrapper
|
|
137
152
|
|
|
138
153
|
|
|
139
|
-
class ModelActor(xo.StatelessActor):
|
|
154
|
+
class ModelActor(xo.StatelessActor, CancelMixin):
|
|
140
155
|
_replica_model_uid: Optional[str]
|
|
141
156
|
|
|
142
157
|
@classmethod
|
|
@@ -172,6 +187,16 @@ class ModelActor(xo.StatelessActor):
|
|
|
172
187
|
if hasattr(self._model, "stop") and callable(self._model.stop):
|
|
173
188
|
self._model.stop()
|
|
174
189
|
|
|
190
|
+
if isinstance(self._model, LLMVLLMModel):
|
|
191
|
+
if self._transfer_ref is not None:
|
|
192
|
+
try:
|
|
193
|
+
await xo.destroy_actor(self._transfer_ref)
|
|
194
|
+
del self._transfer_ref
|
|
195
|
+
except Exception as e:
|
|
196
|
+
logger.debug(
|
|
197
|
+
f"Destroy transfer actor failed, address: {self.address}, error: {e}"
|
|
198
|
+
)
|
|
199
|
+
|
|
175
200
|
if (
|
|
176
201
|
isinstance(self._model, (LLMPytorchModel, LLMVLLMModel, SGLANGModel))
|
|
177
202
|
and self._model.model_spec.model_format == "pytorch"
|
|
@@ -200,6 +225,7 @@ class ModelActor(xo.StatelessActor):
|
|
|
200
225
|
replica_model_uid: str,
|
|
201
226
|
model_description: Optional["ModelDescription"] = None,
|
|
202
227
|
request_limits: Optional[int] = None,
|
|
228
|
+
xavier_config: Optional[Dict] = None,
|
|
203
229
|
):
|
|
204
230
|
super().__init__()
|
|
205
231
|
from ..model.llm.lmdeploy.core import LMDeployModel
|
|
@@ -214,7 +240,9 @@ class ModelActor(xo.StatelessActor):
|
|
|
214
240
|
self._model_description = (
|
|
215
241
|
model_description.to_dict() if model_description else {}
|
|
216
242
|
)
|
|
217
|
-
self._request_limits =
|
|
243
|
+
self._request_limits = (
|
|
244
|
+
float("inf") if request_limits is None else request_limits
|
|
245
|
+
)
|
|
218
246
|
self._pending_requests: asyncio.Queue = asyncio.Queue()
|
|
219
247
|
self._handle_pending_requests_task = None
|
|
220
248
|
self._lock = (
|
|
@@ -239,6 +267,11 @@ class ModelActor(xo.StatelessActor):
|
|
|
239
267
|
self._scheduler_ref = None
|
|
240
268
|
self._text_to_image_scheduler_ref = None
|
|
241
269
|
|
|
270
|
+
if isinstance(self._model, VLLMModel):
|
|
271
|
+
self._xavier_config = xavier_config
|
|
272
|
+
self._model.set_xavier_config(xavier_config)
|
|
273
|
+
self._transfer_ref = None
|
|
274
|
+
|
|
242
275
|
async def __post_create__(self):
|
|
243
276
|
self._loop = asyncio.get_running_loop()
|
|
244
277
|
|
|
@@ -267,6 +300,32 @@ class ModelActor(xo.StatelessActor):
|
|
|
267
300
|
def __repr__(self) -> str:
|
|
268
301
|
return f"ModelActor({self._replica_model_uid})"
|
|
269
302
|
|
|
303
|
+
def decrease_serve_count(self):
|
|
304
|
+
self._serve_count -= 1
|
|
305
|
+
|
|
306
|
+
@no_type_check
|
|
307
|
+
async def start_transfer_for_vllm(self, rank_addresses: List[str]):
|
|
308
|
+
from ..model.llm.vllm.core import VLLMModel
|
|
309
|
+
from ..model.llm.vllm.xavier.transfer import TransferActor
|
|
310
|
+
|
|
311
|
+
assert isinstance(self._model, VLLMModel)
|
|
312
|
+
rank = self._xavier_config.get("rank") # type: ignore
|
|
313
|
+
self._transfer_ref = await xo.create_actor(
|
|
314
|
+
TransferActor,
|
|
315
|
+
address=self.address,
|
|
316
|
+
uid=f"{TransferActor.default_uid()}-{rank}",
|
|
317
|
+
rank=rank,
|
|
318
|
+
world_size=self._xavier_config.get("world_size"), # type: ignore
|
|
319
|
+
rank_address=self._xavier_config.get("rank_address"), # type: ignore
|
|
320
|
+
store_address=self._xavier_config.get("store_address"), # type: ignore
|
|
321
|
+
store_port=self._xavier_config.get("store_port"), # type: ignore
|
|
322
|
+
world_addresses=rank_addresses,
|
|
323
|
+
)
|
|
324
|
+
await self._model.init_xavier()
|
|
325
|
+
logger.debug(
|
|
326
|
+
f"Init transfer actor: {self._transfer_ref.address}, rank: {rank} done for vllm." # type: ignore
|
|
327
|
+
)
|
|
328
|
+
|
|
270
329
|
async def _record_completion_metrics(
|
|
271
330
|
self, duration, completion_tokens, prompt_tokens
|
|
272
331
|
):
|
|
@@ -429,11 +488,24 @@ class ModelActor(xo.StatelessActor):
|
|
|
429
488
|
)
|
|
430
489
|
)
|
|
431
490
|
|
|
491
|
+
async def _handle_oom_error(self, ex):
|
|
492
|
+
error_message = (
|
|
493
|
+
f"Model actor is out of memory, model id: {self.model_uid()}, error: {ex}"
|
|
494
|
+
)
|
|
495
|
+
logger.exception(error_message)
|
|
496
|
+
worker_ref = await self._get_worker_ref()
|
|
497
|
+
await worker_ref.update_model_status(
|
|
498
|
+
self._replica_model_uid, last_error=error_message
|
|
499
|
+
)
|
|
500
|
+
os._exit(1)
|
|
501
|
+
|
|
432
502
|
def _to_generator(self, output_type: str, gen: types.GeneratorType):
|
|
433
503
|
start_time = time.time()
|
|
434
504
|
time_to_first_token = None
|
|
435
505
|
final_usage = None
|
|
436
506
|
try:
|
|
507
|
+
if XINFERENCE_TEST_OUT_OF_MEMORY_ERROR:
|
|
508
|
+
raise OutOfMemoryError("Test Out of Memory Error")
|
|
437
509
|
for v in gen:
|
|
438
510
|
if time_to_first_token is None:
|
|
439
511
|
time_to_first_token = (time.time() - start_time) * 1000
|
|
@@ -445,11 +517,11 @@ class ModelActor(xo.StatelessActor):
|
|
|
445
517
|
output_type == "binary"
|
|
446
518
|
), f"Unknown output type '{output_type}'"
|
|
447
519
|
yield sse_starlette.sse.ensure_bytes(v, None)
|
|
448
|
-
except OutOfMemoryError:
|
|
449
|
-
|
|
450
|
-
|
|
520
|
+
except OutOfMemoryError as ex:
|
|
521
|
+
assert self._loop is not None
|
|
522
|
+
asyncio.run_coroutine_threadsafe(
|
|
523
|
+
self._handle_oom_error(ex), loop=self._loop
|
|
451
524
|
)
|
|
452
|
-
os._exit(1)
|
|
453
525
|
finally:
|
|
454
526
|
if self._loop is not None and time_to_first_token is not None:
|
|
455
527
|
coro = self.record_metrics(
|
|
@@ -471,6 +543,8 @@ class ModelActor(xo.StatelessActor):
|
|
|
471
543
|
time_to_first_token = None
|
|
472
544
|
final_usage = None
|
|
473
545
|
try:
|
|
546
|
+
if XINFERENCE_TEST_OUT_OF_MEMORY_ERROR:
|
|
547
|
+
raise OutOfMemoryError("Test Out of Memory Error")
|
|
474
548
|
async for v in gen:
|
|
475
549
|
if time_to_first_token is None:
|
|
476
550
|
time_to_first_token = (time.time() - start_time) * 1000
|
|
@@ -483,11 +557,8 @@ class ModelActor(xo.StatelessActor):
|
|
|
483
557
|
output_type == "binary"
|
|
484
558
|
), f"Unknown output type '{output_type}'"
|
|
485
559
|
yield await asyncio.to_thread(sse_starlette.sse.ensure_bytes, v, None)
|
|
486
|
-
except OutOfMemoryError:
|
|
487
|
-
|
|
488
|
-
"Model actor is out of memory, model id: %s", self.model_uid()
|
|
489
|
-
)
|
|
490
|
-
os._exit(1)
|
|
560
|
+
except OutOfMemoryError as ex:
|
|
561
|
+
await self._handle_oom_error(ex)
|
|
491
562
|
finally:
|
|
492
563
|
coros = []
|
|
493
564
|
if time_to_first_token is not None:
|
|
@@ -553,6 +624,7 @@ class ModelActor(xo.StatelessActor):
|
|
|
553
624
|
|
|
554
625
|
@oom_check
|
|
555
626
|
async def _call_wrapper(self, output_type: str, fn: Callable, *args, **kwargs):
|
|
627
|
+
self._add_running_task(kwargs.get("request_id"))
|
|
556
628
|
if self._lock is None:
|
|
557
629
|
if inspect.iscoroutinefunction(fn):
|
|
558
630
|
ret = await fn(*args, **kwargs)
|
|
@@ -761,9 +833,14 @@ class ModelActor(xo.StatelessActor):
|
|
|
761
833
|
prompt_tokens,
|
|
762
834
|
)
|
|
763
835
|
|
|
764
|
-
async def abort_request(
|
|
836
|
+
async def abort_request(
|
|
837
|
+
self,
|
|
838
|
+
request_id: str,
|
|
839
|
+
block_duration: int = XINFERENCE_DEFAULT_CANCEL_BLOCK_DURATION,
|
|
840
|
+
) -> str:
|
|
765
841
|
from .utils import AbortRequestMessage
|
|
766
842
|
|
|
843
|
+
self._cancel_running_task(request_id, block_duration)
|
|
767
844
|
if self.allow_batching():
|
|
768
845
|
if self._scheduler_ref is None:
|
|
769
846
|
return AbortRequestMessage.NOT_FOUND.name
|
|
@@ -787,6 +864,19 @@ class ModelActor(xo.StatelessActor):
|
|
|
787
864
|
f"Model {self._model.model_spec} is not for creating embedding."
|
|
788
865
|
)
|
|
789
866
|
|
|
867
|
+
@request_limit
|
|
868
|
+
@log_async(logger=logger)
|
|
869
|
+
async def convert_ids_to_tokens(
|
|
870
|
+
self, input: Union[List, List[List]], *args, **kwargs
|
|
871
|
+
):
|
|
872
|
+
kwargs.pop("request_id", None)
|
|
873
|
+
if hasattr(self._model, "convert_ids_to_tokens"):
|
|
874
|
+
return await self._call_wrapper_json(
|
|
875
|
+
self._model.convert_ids_to_tokens, input, *args, **kwargs
|
|
876
|
+
)
|
|
877
|
+
|
|
878
|
+
raise AttributeError(f"Model {self._model.model_spec} can convert token id.")
|
|
879
|
+
|
|
790
880
|
@request_limit
|
|
791
881
|
@log_async(logger=logger)
|
|
792
882
|
async def rerank(
|
xinference/core/supervisor.py
CHANGED
|
@@ -35,6 +35,7 @@ from typing import (
|
|
|
35
35
|
import xoscar as xo
|
|
36
36
|
|
|
37
37
|
from ..constants import (
|
|
38
|
+
XINFERENCE_DEFAULT_CANCEL_BLOCK_DURATION,
|
|
38
39
|
XINFERENCE_DISABLE_HEALTH_CHECK,
|
|
39
40
|
XINFERENCE_HEALTH_CHECK_FAILURE_THRESHOLD,
|
|
40
41
|
XINFERENCE_HEALTH_CHECK_INTERVAL,
|
|
@@ -266,6 +267,14 @@ class SupervisorActor(xo.StatelessActor):
|
|
|
266
267
|
signal.SIGTERM, lambda: asyncio.create_task(signal_handler())
|
|
267
268
|
)
|
|
268
269
|
|
|
270
|
+
from ..model.llm.vllm.xavier.block_tracker import VLLMBlockTracker
|
|
271
|
+
from ..model.llm.vllm.xavier.collective_manager import CollectiveManager
|
|
272
|
+
|
|
273
|
+
self._block_tracker_mapping: Dict[str, xo.ActorRefType[VLLMBlockTracker]] = {}
|
|
274
|
+
self._collective_manager_mapping: Dict[
|
|
275
|
+
str, xo.ActorRefType[CollectiveManager]
|
|
276
|
+
] = {}
|
|
277
|
+
|
|
269
278
|
@typing.no_type_check
|
|
270
279
|
async def get_cluster_device_info(self, detailed: bool = False) -> List:
|
|
271
280
|
import psutil
|
|
@@ -958,29 +967,83 @@ class SupervisorActor(xo.StatelessActor):
|
|
|
958
967
|
if model_uid is None:
|
|
959
968
|
model_uid = self._gen_model_uid(model_name)
|
|
960
969
|
|
|
970
|
+
# Xavier-related
|
|
971
|
+
enable_xavier: bool = (
|
|
972
|
+
bool(kwargs.pop("enable_xavier", False))
|
|
973
|
+
and model_engine is not None
|
|
974
|
+
and model_engine.lower() == "vllm"
|
|
975
|
+
)
|
|
976
|
+
store_address = None
|
|
977
|
+
store_port = None
|
|
978
|
+
world_size = None
|
|
979
|
+
if enable_xavier:
|
|
980
|
+
if replica <= 1:
|
|
981
|
+
logger.warning(f"Enabling xavier when `replica<=1` is meaningless.")
|
|
982
|
+
enable_xavier = False
|
|
983
|
+
else:
|
|
984
|
+
from ..model.llm.vllm.xavier.block_tracker import VLLMBlockTracker
|
|
985
|
+
from ..model.llm.vllm.xavier.collective_manager import CollectiveManager
|
|
986
|
+
|
|
987
|
+
self._block_tracker_mapping[model_uid] = await xo.create_actor(
|
|
988
|
+
VLLMBlockTracker,
|
|
989
|
+
address=self.address,
|
|
990
|
+
uid=f"{VLLMBlockTracker.default_uid()}-{model_uid}",
|
|
991
|
+
)
|
|
992
|
+
world_size = replica + 1
|
|
993
|
+
logger.info(f"Going to start xavier with world size: {world_size}")
|
|
994
|
+
self._collective_manager_mapping[model_uid] = await xo.create_actor(
|
|
995
|
+
CollectiveManager,
|
|
996
|
+
address=self.address,
|
|
997
|
+
uid=f"{CollectiveManager.default_uid()}-{model_uid}",
|
|
998
|
+
model_uid=model_uid,
|
|
999
|
+
)
|
|
1000
|
+
logger.info(f"Start collective manager for {model_uid} done.")
|
|
1001
|
+
|
|
961
1002
|
model_size = str(model_size_in_billions) if model_size_in_billions else ""
|
|
962
1003
|
logger.debug(
|
|
963
1004
|
f"Enter launch_builtin_model, model_uid: {model_uid}, model_name: {model_name}, model_size: {model_size}, "
|
|
964
|
-
f"model_format: {model_format}, quantization: {quantization}, replica: {replica}, "
|
|
1005
|
+
f"model_format: {model_format}, quantization: {quantization}, replica: {replica}, enable_xavier: {enable_xavier}, "
|
|
965
1006
|
f"kwargs: {kwargs}"
|
|
966
1007
|
)
|
|
967
1008
|
|
|
968
|
-
async def _launch_one_model(_replica_model_uid):
|
|
1009
|
+
async def _launch_one_model(worker_ref, _replica_model_uid, rank: int):
|
|
969
1010
|
if _replica_model_uid in self._replica_model_uid_to_worker:
|
|
970
1011
|
raise ValueError(
|
|
971
1012
|
f"Model is already in the model list, uid: {_replica_model_uid}"
|
|
972
1013
|
)
|
|
1014
|
+
|
|
1015
|
+
nonlocal store_address
|
|
1016
|
+
nonlocal store_port
|
|
1017
|
+
xavier_config = (
|
|
1018
|
+
{
|
|
1019
|
+
"block_tracker_uid": self._block_tracker_mapping[model_uid].uid,
|
|
1020
|
+
"block_tracker_address": self._block_tracker_mapping[
|
|
1021
|
+
model_uid
|
|
1022
|
+
].address,
|
|
1023
|
+
"rank": rank,
|
|
1024
|
+
"world_size": world_size,
|
|
1025
|
+
"store_address": store_address,
|
|
1026
|
+
"store_port": store_port,
|
|
1027
|
+
}
|
|
1028
|
+
if enable_xavier
|
|
1029
|
+
else None
|
|
1030
|
+
)
|
|
1031
|
+
|
|
1032
|
+
if enable_xavier and rank == 0:
|
|
1033
|
+
rank0_address, _port = await worker_ref.launch_rank0_model(
|
|
1034
|
+
_replica_model_uid, xavier_config
|
|
1035
|
+
)
|
|
1036
|
+
self._replica_model_uid_to_worker[_replica_model_uid] = worker_ref
|
|
1037
|
+
store_address = rank0_address.split(":")[0]
|
|
1038
|
+
store_port = _port
|
|
1039
|
+
return rank0_address
|
|
1040
|
+
|
|
973
1041
|
replica_gpu_idx = assign_replica_gpu(_replica_model_uid, replica, gpu_idx)
|
|
974
1042
|
nonlocal model_type
|
|
975
1043
|
|
|
976
|
-
worker_ref = (
|
|
977
|
-
target_ip_worker_ref
|
|
978
|
-
if target_ip_worker_ref is not None
|
|
979
|
-
else await self._choose_worker()
|
|
980
|
-
)
|
|
981
1044
|
# LLM as default for compatibility
|
|
982
1045
|
model_type = model_type or "LLM"
|
|
983
|
-
await worker_ref.launch_builtin_model(
|
|
1046
|
+
subpool_address = await worker_ref.launch_builtin_model(
|
|
984
1047
|
model_uid=_replica_model_uid,
|
|
985
1048
|
model_name=model_name,
|
|
986
1049
|
model_size_in_billions=model_size_in_billions,
|
|
@@ -994,14 +1057,64 @@ class SupervisorActor(xo.StatelessActor):
|
|
|
994
1057
|
gpu_idx=replica_gpu_idx,
|
|
995
1058
|
download_hub=download_hub,
|
|
996
1059
|
model_path=model_path,
|
|
1060
|
+
xavier_config=xavier_config,
|
|
997
1061
|
**kwargs,
|
|
998
1062
|
)
|
|
999
1063
|
self._replica_model_uid_to_worker[_replica_model_uid] = worker_ref
|
|
1064
|
+
return subpool_address
|
|
1000
1065
|
|
|
1001
1066
|
async def _launch_model():
|
|
1002
1067
|
try:
|
|
1003
|
-
|
|
1004
|
-
|
|
1068
|
+
worker_refs = []
|
|
1069
|
+
rank_addresses = []
|
|
1070
|
+
for _idx, rep_model_uid in enumerate(
|
|
1071
|
+
iter_replica_model_uid(model_uid, replica)
|
|
1072
|
+
):
|
|
1073
|
+
worker_ref = (
|
|
1074
|
+
target_ip_worker_ref
|
|
1075
|
+
if target_ip_worker_ref is not None
|
|
1076
|
+
else await self._choose_worker()
|
|
1077
|
+
)
|
|
1078
|
+
if enable_xavier and _idx == 0:
|
|
1079
|
+
"""
|
|
1080
|
+
Start the rank 0 model actor on the worker that holds the rank 1 replica,
|
|
1081
|
+
solely for constructing the collective communication world.
|
|
1082
|
+
"""
|
|
1083
|
+
_uid = model_uid + "-rank0"
|
|
1084
|
+
rank0_address = await _launch_one_model(worker_ref, _uid, 0)
|
|
1085
|
+
worker_refs.append((worker_ref, _uid))
|
|
1086
|
+
rank_addresses.append(rank0_address)
|
|
1087
|
+
|
|
1088
|
+
subpool_address = await _launch_one_model(
|
|
1089
|
+
worker_ref, rep_model_uid, _idx + 1
|
|
1090
|
+
)
|
|
1091
|
+
worker_refs.append((worker_ref, rep_model_uid))
|
|
1092
|
+
rank_addresses.append(subpool_address)
|
|
1093
|
+
|
|
1094
|
+
# For xavier, start all the vllm instances first,
|
|
1095
|
+
# and then start the transfer component,
|
|
1096
|
+
# because the transfer actor needs all the rank addresses used for collective communication
|
|
1097
|
+
if enable_xavier:
|
|
1098
|
+
logger.debug(f"Init transfer component for xavier...")
|
|
1099
|
+
collective_manager_ref = self._collective_manager_mapping[model_uid]
|
|
1100
|
+
tasks = []
|
|
1101
|
+
for worker_ref, rep_model_uid in worker_refs:
|
|
1102
|
+
tasks.append(
|
|
1103
|
+
worker_ref.start_transfer_for_vllm(
|
|
1104
|
+
rep_model_uid, rank_addresses
|
|
1105
|
+
)
|
|
1106
|
+
)
|
|
1107
|
+
# Here you must use asyncio.gather, not a for loop,
|
|
1108
|
+
# or you will get stuck.
|
|
1109
|
+
await asyncio.gather(*tasks)
|
|
1110
|
+
|
|
1111
|
+
# init collective_manager
|
|
1112
|
+
for idx, addr in enumerate(rank_addresses):
|
|
1113
|
+
await collective_manager_ref.register_rank(
|
|
1114
|
+
idx, addr, update=False
|
|
1115
|
+
)
|
|
1116
|
+
|
|
1117
|
+
logger.debug(f"Init transfer component for xavier done.")
|
|
1005
1118
|
except Exception:
|
|
1006
1119
|
# terminate_model will remove the replica info.
|
|
1007
1120
|
await self.terminate_model(model_uid, suppress_exception=True)
|
|
@@ -1130,6 +1243,38 @@ class SupervisorActor(xo.StatelessActor):
|
|
|
1130
1243
|
raise
|
|
1131
1244
|
self._model_uid_to_replica_info.pop(model_uid, None)
|
|
1132
1245
|
|
|
1246
|
+
# clear for xavier
|
|
1247
|
+
rank0_uid = model_uid + "-rank0"
|
|
1248
|
+
if rank0_uid in self._replica_model_uid_to_worker:
|
|
1249
|
+
await _terminate_one_model(rank0_uid)
|
|
1250
|
+
|
|
1251
|
+
collective_manager_ref = self._collective_manager_mapping.pop(model_uid, None)
|
|
1252
|
+
if collective_manager_ref is not None:
|
|
1253
|
+
try:
|
|
1254
|
+
await xo.destroy_actor(collective_manager_ref)
|
|
1255
|
+
except Exception as e:
|
|
1256
|
+
logger.debug(
|
|
1257
|
+
"Destroy collective_manager_ref failed, model uid: %s, error: %s",
|
|
1258
|
+
model_uid,
|
|
1259
|
+
e,
|
|
1260
|
+
)
|
|
1261
|
+
finally:
|
|
1262
|
+
logger.debug(
|
|
1263
|
+
f"Destroy collective_manager_ref done. model uid: {model_uid}"
|
|
1264
|
+
)
|
|
1265
|
+
block_tracker_ref = self._block_tracker_mapping.pop(model_uid, None)
|
|
1266
|
+
if block_tracker_ref is not None:
|
|
1267
|
+
try:
|
|
1268
|
+
await xo.destroy_actor(block_tracker_ref)
|
|
1269
|
+
except Exception as e:
|
|
1270
|
+
logger.debug(
|
|
1271
|
+
"Destroy block_tracker_ref failed, model uid: %s, error: %s",
|
|
1272
|
+
model_uid,
|
|
1273
|
+
e,
|
|
1274
|
+
)
|
|
1275
|
+
finally:
|
|
1276
|
+
logger.debug(f"Destroy block_tracker_ref done. model uid: {model_uid}")
|
|
1277
|
+
|
|
1133
1278
|
@log_async(logger=logger)
|
|
1134
1279
|
async def get_model(self, model_uid: str) -> xo.ActorRefType["ModelActor"]:
|
|
1135
1280
|
replica_info = self._model_uid_to_replica_info.get(model_uid, None)
|
|
@@ -1147,6 +1292,15 @@ class SupervisorActor(xo.StatelessActor):
|
|
|
1147
1292
|
)
|
|
1148
1293
|
return await worker_ref.get_model(model_uid=replica_model_uid)
|
|
1149
1294
|
|
|
1295
|
+
@log_async(logger=logger)
|
|
1296
|
+
async def get_model_status(self, replica_model_uid: str):
|
|
1297
|
+
worker_ref = self._replica_model_uid_to_worker.get(replica_model_uid, None)
|
|
1298
|
+
if worker_ref is None:
|
|
1299
|
+
raise ValueError(
|
|
1300
|
+
f"Model not found in the model list, uid: {replica_model_uid}"
|
|
1301
|
+
)
|
|
1302
|
+
return await worker_ref.get_model_status(replica_model_uid)
|
|
1303
|
+
|
|
1150
1304
|
@log_async(logger=logger)
|
|
1151
1305
|
async def describe_model(self, model_uid: str) -> Dict[str, Any]:
|
|
1152
1306
|
replica_info = self._model_uid_to_replica_info.get(model_uid, None)
|
|
@@ -1213,7 +1367,12 @@ class SupervisorActor(xo.StatelessActor):
|
|
|
1213
1367
|
return cached_models
|
|
1214
1368
|
|
|
1215
1369
|
@log_async(logger=logger)
|
|
1216
|
-
async def abort_request(
|
|
1370
|
+
async def abort_request(
|
|
1371
|
+
self,
|
|
1372
|
+
model_uid: str,
|
|
1373
|
+
request_id: str,
|
|
1374
|
+
block_duration: int = XINFERENCE_DEFAULT_CANCEL_BLOCK_DURATION,
|
|
1375
|
+
) -> Dict:
|
|
1217
1376
|
from .scheduler import AbortRequestMessage
|
|
1218
1377
|
|
|
1219
1378
|
res = {"msg": AbortRequestMessage.NO_OP.name}
|
|
@@ -1228,7 +1387,7 @@ class SupervisorActor(xo.StatelessActor):
|
|
|
1228
1387
|
if worker_ref is None:
|
|
1229
1388
|
continue
|
|
1230
1389
|
model_ref = await worker_ref.get_model(model_uid=rep_mid)
|
|
1231
|
-
result_info = await model_ref.abort_request(request_id)
|
|
1390
|
+
result_info = await model_ref.abort_request(request_id, block_duration)
|
|
1232
1391
|
res["msg"] = result_info
|
|
1233
1392
|
if result_info == AbortRequestMessage.DONE.name:
|
|
1234
1393
|
break
|
|
@@ -1371,3 +1530,12 @@ class SupervisorActor(xo.StatelessActor):
|
|
|
1371
1530
|
|
|
1372
1531
|
async def get_progress(self, request_id: str) -> float:
|
|
1373
1532
|
return await self._progress_tracker.get_progress(request_id)
|
|
1533
|
+
|
|
1534
|
+
async def call_collective_manager(
|
|
1535
|
+
self, model_uid: str, func_name: str, *args, **kwargs
|
|
1536
|
+
):
|
|
1537
|
+
"""
|
|
1538
|
+
Used by worker.
|
|
1539
|
+
"""
|
|
1540
|
+
collective_manager_ref = self._collective_manager_mapping[model_uid]
|
|
1541
|
+
await getattr(collective_manager_ref, func_name)(*args, **kwargs)
|