xinference 1.0.1__py3-none-any.whl → 1.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_compat.py +2 -0
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +77 -71
- xinference/core/chat_interface.py +6 -1
- xinference/core/model.py +79 -19
- xinference/core/supervisor.py +172 -10
- xinference/core/utils.py +12 -8
- xinference/core/worker.py +102 -4
- xinference/deploy/cmdline.py +3 -1
- xinference/deploy/test/test_cmdline.py +56 -0
- xinference/isolation.py +24 -0
- xinference/model/audio/core.py +16 -0
- xinference/model/audio/cosyvoice.py +39 -6
- xinference/model/audio/f5tts.py +200 -0
- xinference/model/audio/f5tts_mlx.py +260 -0
- xinference/model/audio/fish_speech.py +36 -111
- xinference/model/audio/melotts.py +110 -0
- xinference/model/audio/model_spec.json +99 -3
- xinference/model/audio/model_spec_modelscope.json +27 -0
- xinference/model/audio/utils.py +32 -0
- xinference/model/audio/whisper.py +35 -10
- xinference/model/embedding/core.py +203 -142
- xinference/model/embedding/model_spec.json +7 -0
- xinference/model/embedding/model_spec_modelscope.json +8 -0
- xinference/model/image/core.py +69 -1
- xinference/model/image/model_spec.json +145 -4
- xinference/model/image/model_spec_modelscope.json +150 -4
- xinference/model/image/stable_diffusion/core.py +45 -13
- xinference/model/llm/__init__.py +4 -2
- xinference/model/llm/llm_family.json +536 -53
- xinference/model/llm/llm_family.py +15 -36
- xinference/model/llm/llm_family_modelscope.json +454 -20
- xinference/model/llm/memory.py +1 -1
- xinference/model/llm/mlx/core.py +248 -52
- xinference/model/llm/sglang/core.py +1 -0
- xinference/model/llm/transformers/chatglm.py +9 -5
- xinference/model/llm/transformers/cogagent.py +272 -0
- xinference/model/llm/transformers/core.py +2 -0
- xinference/model/llm/transformers/qwen2_vl.py +12 -1
- xinference/model/llm/transformers/utils.py +16 -8
- xinference/model/llm/utils.py +36 -4
- xinference/model/llm/vllm/core.py +53 -10
- xinference/model/llm/vllm/xavier/__init__.py +13 -0
- xinference/model/llm/vllm/xavier/allocator.py +74 -0
- xinference/model/llm/vllm/xavier/block.py +111 -0
- xinference/model/llm/vllm/xavier/block_manager.py +71 -0
- xinference/model/llm/vllm/xavier/block_tracker.py +129 -0
- xinference/model/llm/vllm/xavier/collective.py +74 -0
- xinference/model/llm/vllm/xavier/collective_manager.py +147 -0
- xinference/model/llm/vllm/xavier/engine.py +247 -0
- xinference/model/llm/vllm/xavier/executor.py +134 -0
- xinference/model/llm/vllm/xavier/scheduler.py +438 -0
- xinference/model/llm/vllm/xavier/test/__init__.py +13 -0
- xinference/model/llm/vllm/xavier/test/test_xavier.py +147 -0
- xinference/model/llm/vllm/xavier/transfer.py +319 -0
- xinference/model/video/diffusers.py +14 -0
- xinference/model/video/model_spec.json +15 -0
- xinference/model/video/model_spec_modelscope.json +16 -0
- xinference/thirdparty/cosyvoice/bin/average_model.py +92 -0
- xinference/thirdparty/cosyvoice/bin/export_jit.py +12 -2
- xinference/thirdparty/cosyvoice/bin/export_onnx.py +112 -0
- xinference/thirdparty/cosyvoice/bin/export_trt.sh +9 -0
- xinference/thirdparty/cosyvoice/bin/inference.py +5 -7
- xinference/thirdparty/cosyvoice/bin/spk2info.pt +0 -0
- xinference/thirdparty/cosyvoice/bin/train.py +42 -8
- xinference/thirdparty/cosyvoice/cli/cosyvoice.py +96 -25
- xinference/thirdparty/cosyvoice/cli/frontend.py +77 -30
- xinference/thirdparty/cosyvoice/cli/model.py +330 -80
- xinference/thirdparty/cosyvoice/dataset/dataset.py +6 -2
- xinference/thirdparty/cosyvoice/dataset/processor.py +76 -14
- xinference/thirdparty/cosyvoice/flow/decoder.py +92 -13
- xinference/thirdparty/cosyvoice/flow/flow.py +99 -9
- xinference/thirdparty/cosyvoice/flow/flow_matching.py +110 -13
- xinference/thirdparty/cosyvoice/flow/length_regulator.py +5 -4
- xinference/thirdparty/cosyvoice/hifigan/discriminator.py +140 -0
- xinference/thirdparty/cosyvoice/hifigan/generator.py +58 -42
- xinference/thirdparty/cosyvoice/hifigan/hifigan.py +67 -0
- xinference/thirdparty/cosyvoice/llm/llm.py +139 -6
- xinference/thirdparty/cosyvoice/tokenizer/assets/multilingual_zh_ja_yue_char_del.tiktoken +58836 -0
- xinference/thirdparty/cosyvoice/tokenizer/tokenizer.py +279 -0
- xinference/thirdparty/cosyvoice/transformer/embedding.py +2 -2
- xinference/thirdparty/cosyvoice/transformer/encoder_layer.py +7 -7
- xinference/thirdparty/cosyvoice/transformer/upsample_encoder.py +318 -0
- xinference/thirdparty/cosyvoice/utils/common.py +28 -1
- xinference/thirdparty/cosyvoice/utils/executor.py +69 -7
- xinference/thirdparty/cosyvoice/utils/file_utils.py +2 -12
- xinference/thirdparty/cosyvoice/utils/frontend_utils.py +9 -5
- xinference/thirdparty/cosyvoice/utils/losses.py +20 -0
- xinference/thirdparty/cosyvoice/utils/scheduler.py +1 -2
- xinference/thirdparty/cosyvoice/utils/train_utils.py +101 -45
- xinference/thirdparty/f5_tts/api.py +166 -0
- xinference/thirdparty/f5_tts/configs/E2TTS_Base_train.yaml +44 -0
- xinference/thirdparty/f5_tts/configs/E2TTS_Small_train.yaml +44 -0
- xinference/thirdparty/f5_tts/configs/F5TTS_Base_train.yaml +46 -0
- xinference/thirdparty/f5_tts/configs/F5TTS_Small_train.yaml +46 -0
- xinference/thirdparty/f5_tts/eval/README.md +49 -0
- xinference/thirdparty/f5_tts/eval/ecapa_tdnn.py +330 -0
- xinference/thirdparty/f5_tts/eval/eval_infer_batch.py +207 -0
- xinference/thirdparty/f5_tts/eval/eval_infer_batch.sh +13 -0
- xinference/thirdparty/f5_tts/eval/eval_librispeech_test_clean.py +84 -0
- xinference/thirdparty/f5_tts/eval/eval_seedtts_testset.py +84 -0
- xinference/thirdparty/f5_tts/eval/utils_eval.py +405 -0
- xinference/thirdparty/f5_tts/infer/README.md +191 -0
- xinference/thirdparty/f5_tts/infer/SHARED.md +74 -0
- xinference/thirdparty/f5_tts/infer/examples/basic/basic.toml +11 -0
- xinference/thirdparty/f5_tts/infer/examples/basic/basic_ref_en.wav +0 -0
- xinference/thirdparty/f5_tts/infer/examples/basic/basic_ref_zh.wav +0 -0
- xinference/thirdparty/f5_tts/infer/examples/multi/country.flac +0 -0
- xinference/thirdparty/f5_tts/infer/examples/multi/main.flac +0 -0
- xinference/thirdparty/f5_tts/infer/examples/multi/story.toml +19 -0
- xinference/thirdparty/f5_tts/infer/examples/multi/story.txt +1 -0
- xinference/thirdparty/f5_tts/infer/examples/multi/town.flac +0 -0
- xinference/thirdparty/f5_tts/infer/examples/vocab.txt +2545 -0
- xinference/thirdparty/f5_tts/infer/infer_cli.py +226 -0
- xinference/thirdparty/f5_tts/infer/infer_gradio.py +851 -0
- xinference/thirdparty/f5_tts/infer/speech_edit.py +193 -0
- xinference/thirdparty/f5_tts/infer/utils_infer.py +538 -0
- xinference/thirdparty/f5_tts/model/__init__.py +10 -0
- xinference/thirdparty/f5_tts/model/backbones/README.md +20 -0
- xinference/thirdparty/f5_tts/model/backbones/dit.py +163 -0
- xinference/thirdparty/f5_tts/model/backbones/mmdit.py +146 -0
- xinference/thirdparty/f5_tts/model/backbones/unett.py +219 -0
- xinference/thirdparty/f5_tts/model/cfm.py +285 -0
- xinference/thirdparty/f5_tts/model/dataset.py +319 -0
- xinference/thirdparty/f5_tts/model/modules.py +658 -0
- xinference/thirdparty/f5_tts/model/trainer.py +366 -0
- xinference/thirdparty/f5_tts/model/utils.py +185 -0
- xinference/thirdparty/f5_tts/scripts/count_max_epoch.py +33 -0
- xinference/thirdparty/f5_tts/scripts/count_params_gflops.py +39 -0
- xinference/thirdparty/f5_tts/socket_server.py +159 -0
- xinference/thirdparty/f5_tts/train/README.md +77 -0
- xinference/thirdparty/f5_tts/train/datasets/prepare_csv_wavs.py +139 -0
- xinference/thirdparty/f5_tts/train/datasets/prepare_emilia.py +230 -0
- xinference/thirdparty/f5_tts/train/datasets/prepare_libritts.py +92 -0
- xinference/thirdparty/f5_tts/train/datasets/prepare_ljspeech.py +65 -0
- xinference/thirdparty/f5_tts/train/datasets/prepare_wenetspeech4tts.py +125 -0
- xinference/thirdparty/f5_tts/train/finetune_cli.py +174 -0
- xinference/thirdparty/f5_tts/train/finetune_gradio.py +1846 -0
- xinference/thirdparty/f5_tts/train/train.py +75 -0
- xinference/thirdparty/fish_speech/fish_speech/conversation.py +94 -83
- xinference/thirdparty/fish_speech/fish_speech/models/text2semantic/llama.py +63 -20
- xinference/thirdparty/fish_speech/fish_speech/text/clean.py +1 -26
- xinference/thirdparty/fish_speech/fish_speech/text/spliter.py +1 -1
- xinference/thirdparty/fish_speech/fish_speech/tokenizer.py +152 -0
- xinference/thirdparty/fish_speech/fish_speech/train.py +2 -2
- xinference/thirdparty/fish_speech/fish_speech/webui/manage.py +1 -1
- xinference/thirdparty/fish_speech/tools/{post_api.py → api_client.py} +7 -13
- xinference/thirdparty/fish_speech/tools/api_server.py +98 -0
- xinference/thirdparty/fish_speech/tools/download_models.py +5 -5
- xinference/thirdparty/fish_speech/tools/fish_e2e.py +2 -2
- xinference/thirdparty/fish_speech/tools/inference_engine/__init__.py +192 -0
- xinference/thirdparty/fish_speech/tools/inference_engine/reference_loader.py +125 -0
- xinference/thirdparty/fish_speech/tools/inference_engine/utils.py +39 -0
- xinference/thirdparty/fish_speech/tools/inference_engine/vq_manager.py +57 -0
- xinference/thirdparty/fish_speech/tools/llama/eval_in_context.py +2 -2
- xinference/thirdparty/fish_speech/tools/llama/generate.py +117 -89
- xinference/thirdparty/fish_speech/tools/run_webui.py +104 -0
- xinference/thirdparty/fish_speech/tools/schema.py +11 -28
- xinference/thirdparty/fish_speech/tools/server/agent/__init__.py +57 -0
- xinference/thirdparty/fish_speech/tools/server/agent/generate.py +119 -0
- xinference/thirdparty/fish_speech/tools/server/agent/generation_utils.py +122 -0
- xinference/thirdparty/fish_speech/tools/server/agent/pre_generation_utils.py +72 -0
- xinference/thirdparty/fish_speech/tools/server/api_utils.py +75 -0
- xinference/thirdparty/fish_speech/tools/server/exception_handler.py +27 -0
- xinference/thirdparty/fish_speech/tools/server/inference.py +45 -0
- xinference/thirdparty/fish_speech/tools/server/model_manager.py +122 -0
- xinference/thirdparty/fish_speech/tools/server/model_utils.py +129 -0
- xinference/thirdparty/fish_speech/tools/server/views.py +246 -0
- xinference/thirdparty/fish_speech/tools/webui/__init__.py +173 -0
- xinference/thirdparty/fish_speech/tools/webui/inference.py +91 -0
- xinference/thirdparty/fish_speech/tools/webui/variables.py +14 -0
- xinference/thirdparty/matcha/utils/utils.py +2 -2
- xinference/thirdparty/melo/api.py +135 -0
- xinference/thirdparty/melo/app.py +61 -0
- xinference/thirdparty/melo/attentions.py +459 -0
- xinference/thirdparty/melo/commons.py +160 -0
- xinference/thirdparty/melo/configs/config.json +94 -0
- xinference/thirdparty/melo/data/example/metadata.list +20 -0
- xinference/thirdparty/melo/data_utils.py +413 -0
- xinference/thirdparty/melo/download_utils.py +67 -0
- xinference/thirdparty/melo/infer.py +25 -0
- xinference/thirdparty/melo/init_downloads.py +14 -0
- xinference/thirdparty/melo/losses.py +58 -0
- xinference/thirdparty/melo/main.py +36 -0
- xinference/thirdparty/melo/mel_processing.py +174 -0
- xinference/thirdparty/melo/models.py +1030 -0
- xinference/thirdparty/melo/modules.py +598 -0
- xinference/thirdparty/melo/monotonic_align/__init__.py +16 -0
- xinference/thirdparty/melo/monotonic_align/core.py +46 -0
- xinference/thirdparty/melo/preprocess_text.py +135 -0
- xinference/thirdparty/melo/split_utils.py +174 -0
- xinference/thirdparty/melo/text/__init__.py +35 -0
- xinference/thirdparty/melo/text/chinese.py +199 -0
- xinference/thirdparty/melo/text/chinese_bert.py +107 -0
- xinference/thirdparty/melo/text/chinese_mix.py +253 -0
- xinference/thirdparty/melo/text/cleaner.py +36 -0
- xinference/thirdparty/melo/text/cleaner_multiling.py +110 -0
- xinference/thirdparty/melo/text/cmudict.rep +129530 -0
- xinference/thirdparty/melo/text/cmudict_cache.pickle +0 -0
- xinference/thirdparty/melo/text/english.py +284 -0
- xinference/thirdparty/melo/text/english_bert.py +39 -0
- xinference/thirdparty/melo/text/english_utils/abbreviations.py +35 -0
- xinference/thirdparty/melo/text/english_utils/number_norm.py +97 -0
- xinference/thirdparty/melo/text/english_utils/time_norm.py +47 -0
- xinference/thirdparty/melo/text/es_phonemizer/base.py +140 -0
- xinference/thirdparty/melo/text/es_phonemizer/cleaner.py +109 -0
- xinference/thirdparty/melo/text/es_phonemizer/es_symbols.json +79 -0
- xinference/thirdparty/melo/text/es_phonemizer/es_symbols.txt +1 -0
- xinference/thirdparty/melo/text/es_phonemizer/es_symbols_v2.json +83 -0
- xinference/thirdparty/melo/text/es_phonemizer/es_to_ipa.py +12 -0
- xinference/thirdparty/melo/text/es_phonemizer/example_ipa.txt +400 -0
- xinference/thirdparty/melo/text/es_phonemizer/gruut_wrapper.py +253 -0
- xinference/thirdparty/melo/text/es_phonemizer/punctuation.py +174 -0
- xinference/thirdparty/melo/text/es_phonemizer/spanish_symbols.txt +1 -0
- xinference/thirdparty/melo/text/es_phonemizer/test.ipynb +124 -0
- xinference/thirdparty/melo/text/fr_phonemizer/base.py +140 -0
- xinference/thirdparty/melo/text/fr_phonemizer/cleaner.py +122 -0
- xinference/thirdparty/melo/text/fr_phonemizer/en_symbols.json +78 -0
- xinference/thirdparty/melo/text/fr_phonemizer/example_ipa.txt +1 -0
- xinference/thirdparty/melo/text/fr_phonemizer/fr_symbols.json +89 -0
- xinference/thirdparty/melo/text/fr_phonemizer/fr_to_ipa.py +30 -0
- xinference/thirdparty/melo/text/fr_phonemizer/french_abbreviations.py +48 -0
- xinference/thirdparty/melo/text/fr_phonemizer/french_symbols.txt +1 -0
- xinference/thirdparty/melo/text/fr_phonemizer/gruut_wrapper.py +258 -0
- xinference/thirdparty/melo/text/fr_phonemizer/punctuation.py +172 -0
- xinference/thirdparty/melo/text/french.py +94 -0
- xinference/thirdparty/melo/text/french_bert.py +39 -0
- xinference/thirdparty/melo/text/japanese.py +647 -0
- xinference/thirdparty/melo/text/japanese_bert.py +49 -0
- xinference/thirdparty/melo/text/ko_dictionary.py +44 -0
- xinference/thirdparty/melo/text/korean.py +192 -0
- xinference/thirdparty/melo/text/opencpop-strict.txt +429 -0
- xinference/thirdparty/melo/text/spanish.py +122 -0
- xinference/thirdparty/melo/text/spanish_bert.py +39 -0
- xinference/thirdparty/melo/text/symbols.py +290 -0
- xinference/thirdparty/melo/text/tone_sandhi.py +769 -0
- xinference/thirdparty/melo/train.py +635 -0
- xinference/thirdparty/melo/train.sh +19 -0
- xinference/thirdparty/melo/transforms.py +209 -0
- xinference/thirdparty/melo/utils.py +424 -0
- xinference/types.py +15 -0
- xinference/web/ui/build/asset-manifest.json +6 -6
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/css/main.51a587ff.css +2 -0
- xinference/web/ui/build/static/css/main.51a587ff.css.map +1 -0
- xinference/web/ui/build/static/js/main.b0936c54.js +3 -0
- xinference/web/ui/build/static/js/main.b0936c54.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/03c4052f1b91f6ba0c5389bdcf49c43319b4076c08e4b8585dab312538ae290a.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/1786b83003b8e9605a0f5f855a185d4d16e38fc893dfb326a2a9cca206b4240a.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/17cbc181dd674b9150b80c73ed6a82656de0082d857f6e5f66d9716129ac0b38.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/185ceb8872d562e032b47e79df6a45670e06345b8ed70aad1a131e0476783c5c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/26b8c9f34b0bed789b3a833767672e39302d1e0c09b4276f4d58d1df7b6bd93b.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/2b484da66c724d0d56a40849c109327408796a668b1381511b6e9e03baa48658.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/2cbbbce9b84df73330d4c42b82436ed881b3847628f2fbc346aa62e2859fd88c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/2ec9b14431ed33ce6901bf9f27007be4e6e472709c99d6e22b50ce528e4b78ee.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3b966db018f96be4a055d6ca205f0990d4d0b370e2980c17d8bca2c9a021819c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3eefb411b24c2b3ce053570ef50daccf154022f0e168be5ed0fec21394baf9f4.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/522b229e3cac219123f0d69673f5570e191c2d2a505dc65b312d336eae2279c0.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/52e45f17ba300580ea3fcc9f9228ccba194bb092b76f25e9255af311f8b05aab.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/5a0bc4631f936459afc1a3b1d3ec2420118b1f00e11f60ccac3e08088f3f27a8.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/611fa2c6c53b66039991d06dfb0473b5ab37fc63b4564e0f6e1718523768a045.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/6329bc76c406fe5eb305412383fbde5950f847bb5e43261f73f37622c365acb4.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/63c8e07687ea53a4f8a910ee5e42e0eb26cd1acbfbe820f3e3248a786ee51401.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/69b2d5001684174ec9da57e07914eed3eac4960018bceb6cbfa801d861301d7c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/710c1acda69e561e30a933b98c6a56d50197868b15c21e2aad55ab6d46649eb6.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/720deca1fce5a1dc5056048fa8258fd138a82ea855f350b6613f104a73fb761f.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/76a23b92d26a499c57e61eea2b895fbc9771bd0849a72e66f8e633192017978b.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/858063f23b34dfe600254eb5afd85518b0002ec4b30b7386616c45600826e3b2.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/920b82c1c89124cf217109eeedbfcd3aae3b917be50c9dfb6bbb4ce26bdfd2e7.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/94d8b7aeb0076f2ce07db598cea0e87b13bc8d5614eb530b8d6e696c2daf6f88.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/9e917fe7022d01b2ccbe5cc0ce73d70bb72bee584ff293bad71bdff6695dee28.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/9f28fdb8399f1d0474f0aca86f1658dc94f5bf0c90f6146352de150692de8862.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a0dfafa06b2bb7cba8cad41c482503f61944f759f4318139362602ef5cc47ccb.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a3ff866acddf34917a7ee399e0e571a4dfd8ba66d5057db885f243e16a6eb17d.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/afb8084f539534cd594755ea2205ecd5bd1f62dddcfdf75a2eace59a28131278.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/b57b1438b77294c1f3f6cfce12ac487d8106c6f016975ba0aec94d98997e2e1e.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/b9917b0bf8e4d55ccbac1c334aa04d6ff3c5b6ed9e5d38b9ea2c687fa7d3f5a9.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/bbcc94b0149963d1d6f267ee1f4f03d3925b758392ce2f516c3fe8af0e0169fc.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/bdee44abeadc4abc17d41c52eb49c6e19a4b1a267b6e16876ce91bdeeebfc52d.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/beb112b70f4a56db95920a9e20efb6c97c37b68450716730217a9ee1a9ae92be.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c88db97be0cdf440193b3995996e83510a04cb00048135485fc0e26d197e80b5.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d49e5314d34310a62d01a03067ce1bec5da00abce84c5196aa9c6842fa79a430.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d7664d18c4ddbad9c3a6a31b91f7c00fb0dde804608674a9860ee50f33e54708.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d9072c318b819b7c90a0f7e9cc0b6413b4dbeb8e9859898e53d75ea882fcde99.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/db16a983bc08a05f0439cc61ca0840e49e1d8400eef678909f16c032a418a3d6.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/dc249829767b8abcbc3677e0b07b6d3ecbfdfe6d08cfe23a665eb33373a9aa9d.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/e242c583c2dbc2784f0fcf513523975f7d5df447e106c1c17e49e8578a6fc3ed.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/eac5f1296513e69e4b96f750ddccd4d0264e2bae4e4c449144e83274a48698d9.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/ed57202cb79649bb716400436590245547df241988fc7c8e1d85d132299542d2.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f125bf72e773a14cdaebd0c343e80adb909d12e317ee5c00cd4a57442fbe2c62.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f91af913d7f91c410719ab13136aaed3aaf0f8dda06652f25c42cb5231587398.json +1 -0
- xinference/web/ui/node_modules/.package-lock.json +67 -3
- xinference/web/ui/node_modules/@babel/runtime/package.json +592 -538
- xinference/web/ui/node_modules/html-parse-stringify/package.json +50 -0
- xinference/web/ui/node_modules/i18next/dist/esm/package.json +1 -0
- xinference/web/ui/node_modules/i18next/package.json +129 -0
- xinference/web/ui/node_modules/react-i18next/.eslintrc.json +74 -0
- xinference/web/ui/node_modules/react-i18next/dist/es/package.json +1 -0
- xinference/web/ui/node_modules/react-i18next/package.json +162 -0
- xinference/web/ui/node_modules/void-elements/package.json +34 -0
- xinference/web/ui/package-lock.json +69 -3
- xinference/web/ui/package.json +2 -0
- xinference/web/ui/src/locales/en.json +186 -0
- xinference/web/ui/src/locales/zh.json +186 -0
- {xinference-1.0.1.dist-info → xinference-1.2.1.dist-info}/METADATA +68 -32
- {xinference-1.0.1.dist-info → xinference-1.2.1.dist-info}/RECORD +316 -122
- xinference/thirdparty/cosyvoice/bin/export_trt.py +0 -8
- xinference/thirdparty/fish_speech/tools/api.py +0 -943
- xinference/thirdparty/fish_speech/tools/msgpack_api.py +0 -95
- xinference/thirdparty/fish_speech/tools/webui.py +0 -548
- xinference/web/ui/build/static/css/main.5061c4c3.css +0 -2
- xinference/web/ui/build/static/css/main.5061c4c3.css.map +0 -1
- xinference/web/ui/build/static/js/main.2f269bb3.js +0 -3
- xinference/web/ui/build/static/js/main.2f269bb3.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/07ce9e632e6aff24d7aa3ad8e48224433bbfeb0d633fca723453f1fcae0c9f1c.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/1130403f9e46f5738a23b45ac59b57de8f360c908c713e2c0670c2cce9bd367a.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/131091b25d26b17cdca187d7542a21475c211138d900cf667682260e76ef9463.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/1f269fb2a368363c1cb2237825f1dba093b6bdd8c44cc05954fd19ec2c1fff03.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/331312668fa8bd3d7401818f4a25fa98135d7f61371cd6bfff78b18cf4fbdd92.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/40f17338fc75ae095de7d2b4d8eae0d5ca0193a7e2bcece4ee745b22a7a2f4b7.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/4de9a6942c5f1749d6cbfdd54279699975f16016b182848bc253886f52ec2ec3.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/822586ed1077201b64b954f12f25e3f9b45678c1acbabe53d8af3ca82ca71f33.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/8d33354bd2100c8602afc3341f131a88cc36aaeecd5a4b365ed038514708e350.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/9375a35b05d56989b2755bf72161fa707c92f28569d33765a75f91a568fda6e9.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/a158a9ffa0c9b169aee53dd4a0c44501a596755b4e4f6ede7746d65a72e2a71f.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/bd6ad8159341315a1764c397621a560809f7eb7219ab5174c801fca7e969d943.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/c7bf40bab396765f67d0fed627ed3665890608b2d0edaa3e8cb7cfc96310db45.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/d6c643278a0b28320e6f33a60f5fb64c053997cbdc39a60e53ccc574688ade9e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/e42b72d4cc1ea412ebecbb8d040dc6c6bfee462c33903c2f1f3facb602ad742e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/e64b7e8cedcf43d4c95deba60ec1341855c887705805bb62431693118b870c69.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f5039ddbeb815c51491a1989532006b96fc3ae49c6c60e3c097f875b4ae915ae.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f72f011744c4649fabddca6f7a9327861ac0a315a89b1a2e62a39774e7863845.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/feabb04b4aa507102da0a64398a40818e878fd1df9b75dda8461b3e1e7ff3f11.json +0 -1
- /xinference/thirdparty/{cosyvoice/bin → f5_tts}/__init__.py +0 -0
- /xinference/thirdparty/{cosyvoice/flow → melo}/__init__.py +0 -0
- /xinference/thirdparty/{cosyvoice/hifigan → melo/text/english_utils}/__init__.py +0 -0
- /xinference/thirdparty/{cosyvoice/llm → melo/text/es_phonemizer}/__init__.py +0 -0
- /xinference/thirdparty/{fish_speech/tools → melo/text/fr_phonemizer}/__init__.py +0 -0
- /xinference/web/ui/build/static/js/{main.2f269bb3.js.LICENSE.txt → main.b0936c54.js.LICENSE.txt} +0 -0
- {xinference-1.0.1.dist-info → xinference-1.2.1.dist-info}/LICENSE +0 -0
- {xinference-1.0.1.dist-info → xinference-1.2.1.dist-info}/WHEEL +0 -0
- {xinference-1.0.1.dist-info → xinference-1.2.1.dist-info}/entry_points.txt +0 -0
- {xinference-1.0.1.dist-info → xinference-1.2.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
# Copyright 2022-2025 XProbe Inc.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
import asyncio
|
|
15
|
+
import logging
|
|
16
|
+
from typing import Any, Dict, Optional
|
|
17
|
+
|
|
18
|
+
import xoscar as xo
|
|
19
|
+
from vllm.core.block.interfaces import BlockId
|
|
20
|
+
from vllm.core.block.prefix_caching_block import (
|
|
21
|
+
BlockTracker,
|
|
22
|
+
PrefixCachingBlockAllocator,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
from .....isolation import Isolation
|
|
26
|
+
|
|
27
|
+
logger = logging.getLogger(__name__)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class XavierInnerBlockTracker(BlockTracker):
|
|
31
|
+
"""Used to track the status of a block inside the prefix caching allocator"""
|
|
32
|
+
|
|
33
|
+
"""
|
|
34
|
+
Here, two fixed attributes, `transferred` and `executed`,
|
|
35
|
+
have been added to the `BlockTracker` class to mark the status of the corresponding `block_id`.
|
|
36
|
+
We cannot directly set attributes on the `Block` object
|
|
37
|
+
because the `Block` objects are dynamically allocated with each scheduling.
|
|
38
|
+
The `Block` objects executed in two different scheduling steps may have the same `id`, `hash`, etc.,
|
|
39
|
+
but the instance objects may differ.
|
|
40
|
+
The BlockTracker object inside vllm is one-to-one with the block_id.
|
|
41
|
+
"""
|
|
42
|
+
__slots__ = ("active", "last_accessed", "computed", "transferred", "executed")
|
|
43
|
+
|
|
44
|
+
def __init__(self):
|
|
45
|
+
super().__init__()
|
|
46
|
+
self.transferred = False
|
|
47
|
+
self.executed = False
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class XavierPrefixCachingBlockAllocator(PrefixCachingBlockAllocator):
|
|
51
|
+
def __init__(self, *args, run_isolation: bool = False, **kwargs):
|
|
52
|
+
super().__init__(*args, **kwargs)
|
|
53
|
+
for _id in self._block_tracker.keys():
|
|
54
|
+
self._block_tracker[_id] = XavierInnerBlockTracker()
|
|
55
|
+
|
|
56
|
+
self._xavier_config: Optional[Dict[str, Any]] = None
|
|
57
|
+
self._block_tracker_ref = None
|
|
58
|
+
if run_isolation:
|
|
59
|
+
self._isolation = Isolation(
|
|
60
|
+
asyncio.new_event_loop(), threaded=True, daemon=True
|
|
61
|
+
)
|
|
62
|
+
self._isolation.start()
|
|
63
|
+
else:
|
|
64
|
+
self._isolation = None # type: ignore
|
|
65
|
+
|
|
66
|
+
def __del__(self):
|
|
67
|
+
if self._isolation is not None:
|
|
68
|
+
self._isolation.stop()
|
|
69
|
+
|
|
70
|
+
@property
|
|
71
|
+
def xavier_config(self):
|
|
72
|
+
return self._xavier_config
|
|
73
|
+
|
|
74
|
+
@xavier_config.setter
|
|
75
|
+
def xavier_config(self, v: Dict[str, Any]):
|
|
76
|
+
self._xavier_config = v
|
|
77
|
+
|
|
78
|
+
async def _get_block_tracker_ref(self):
|
|
79
|
+
if self._block_tracker_ref is None:
|
|
80
|
+
block_tracker_address = self.xavier_config.get("block_tracker_address")
|
|
81
|
+
block_tracker_uid = self.xavier_config.get("block_tracker_uid")
|
|
82
|
+
self._block_tracker_ref = await xo.actor_ref(
|
|
83
|
+
address=block_tracker_address, uid=block_tracker_uid
|
|
84
|
+
)
|
|
85
|
+
return self._block_tracker_ref
|
|
86
|
+
|
|
87
|
+
async def unregister_block(self, block_id: int):
|
|
88
|
+
assert self._xavier_config is not None
|
|
89
|
+
tracker_ref = await self._get_block_tracker_ref()
|
|
90
|
+
await tracker_ref.unregister_block(
|
|
91
|
+
self.xavier_config.get("virtual_engine"),
|
|
92
|
+
self.xavier_config.get("rank"),
|
|
93
|
+
block_id,
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
def _maybe_allocate_evicted_block_id(self) -> Optional[BlockId]:
|
|
97
|
+
"""
|
|
98
|
+
This is the only entry point where the `block_id` is evicted from the cache.
|
|
99
|
+
Therefore, when the `block_id` is evicted, the tracker actor needs to unregister the block information.
|
|
100
|
+
At the same time, make sure to reset the attributes corresponding to that `block_id`.
|
|
101
|
+
"""
|
|
102
|
+
evicted_block_id = super()._maybe_allocate_evicted_block_id()
|
|
103
|
+
logger.debug(f"block_id: {evicted_block_id} will be evicted from the cache.")
|
|
104
|
+
if evicted_block_id is not None and self._isolation is not None:
|
|
105
|
+
tracker = self._block_tracker[evicted_block_id]
|
|
106
|
+
assert isinstance(tracker, XavierInnerBlockTracker)
|
|
107
|
+
tracker.transferred = False
|
|
108
|
+
tracker.executed = False
|
|
109
|
+
self._isolation.call(self.unregister_block(evicted_block_id))
|
|
110
|
+
logger.debug(f"block_id: {evicted_block_id} will be used again.")
|
|
111
|
+
return evicted_block_id
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
# Copyright 2022-2025 XProbe Inc.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
import logging
|
|
15
|
+
from typing import Any, Dict, Optional
|
|
16
|
+
|
|
17
|
+
from vllm.core.block.cpu_gpu_block_allocator import CpuGpuBlockAllocator
|
|
18
|
+
from vllm.core.block.interfaces import Block
|
|
19
|
+
from vllm.core.block_manager import SelfAttnBlockSpaceManager
|
|
20
|
+
from vllm.sequence import SequenceGroup, SequenceStatus
|
|
21
|
+
from vllm.utils import Device
|
|
22
|
+
|
|
23
|
+
from .allocator import XavierCpuGpuBlockAllocator
|
|
24
|
+
|
|
25
|
+
logger = logging.getLogger(__name__)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class XavierBlockManager(SelfAttnBlockSpaceManager):
|
|
29
|
+
def __init__(self, *args, **kwargs):
|
|
30
|
+
# Monkey patch
|
|
31
|
+
CpuGpuBlockAllocator.create = XavierCpuGpuBlockAllocator.create
|
|
32
|
+
super().__init__(*args, **kwargs)
|
|
33
|
+
self._xavier_config: Optional[Dict[str, Any]] = None
|
|
34
|
+
logger.debug("Init xavier block manager done.")
|
|
35
|
+
|
|
36
|
+
@property
|
|
37
|
+
def xavier_config(self):
|
|
38
|
+
return self._xavier_config
|
|
39
|
+
|
|
40
|
+
@xavier_config.setter
|
|
41
|
+
def xavier_config(self, value: Dict[str, Any]):
|
|
42
|
+
self._xavier_config = value
|
|
43
|
+
self.block_allocator.xavier_config = value
|
|
44
|
+
|
|
45
|
+
def get_block_by_block_id(self, seq_id: int, block_id: int) -> Block:
|
|
46
|
+
table = self.block_tables[seq_id]
|
|
47
|
+
for b in table.blocks:
|
|
48
|
+
if b.block_id == block_id:
|
|
49
|
+
return b
|
|
50
|
+
|
|
51
|
+
def get_block_status_by_block_id(self, status_name: str, block_id: int) -> bool:
|
|
52
|
+
tracker = self.block_allocator._allocators[Device.GPU]._block_tracker[block_id]
|
|
53
|
+
return getattr(tracker, status_name)
|
|
54
|
+
|
|
55
|
+
def set_block_status_by_block_id(
|
|
56
|
+
self, status_name: str, block_id: int, status: bool
|
|
57
|
+
) -> None:
|
|
58
|
+
tracker = self.block_allocator._allocators[Device.GPU]._block_tracker[block_id]
|
|
59
|
+
assert getattr(tracker, status_name, None) is not None
|
|
60
|
+
setattr(tracker, status_name, status)
|
|
61
|
+
|
|
62
|
+
def allocate(self, seq_group: SequenceGroup) -> None:
|
|
63
|
+
"""
|
|
64
|
+
If the `seq_group` has the `transferred` attribute,
|
|
65
|
+
it indicates that the `seq_group` has gone through the transfer process,
|
|
66
|
+
so the block allocation logic should not be executed again.
|
|
67
|
+
"""
|
|
68
|
+
waiting_seqs = seq_group.get_seqs(status=SequenceStatus.WAITING)
|
|
69
|
+
if all([getattr(s, "transferred", False) for s in waiting_seqs]):
|
|
70
|
+
return
|
|
71
|
+
super().allocate(seq_group)
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
# Copyright 2022-2025 XProbe Inc.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
import random
|
|
15
|
+
from typing import Dict, List, Optional, Set, Tuple
|
|
16
|
+
|
|
17
|
+
import xoscar as xo
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class VLLMBlockTracker(xo.StatelessActor):
|
|
21
|
+
@classmethod
|
|
22
|
+
def default_uid(cls):
|
|
23
|
+
return f"vllm-block-tracker-actor"
|
|
24
|
+
|
|
25
|
+
def __init__(self):
|
|
26
|
+
super().__init__()
|
|
27
|
+
# engine -> hash -> (rank, block_id)
|
|
28
|
+
self._hash_to_rank_and_block_id: Dict[int, Dict[int, Set[Tuple[int, int]]]] = {}
|
|
29
|
+
# engine -> rank -> (hash, block_id)
|
|
30
|
+
self._rank_to_hash_and_block_id: Dict[int, Dict[int, Set[Tuple[int, int]]]] = {}
|
|
31
|
+
self._unavailable_ranks: Set[int] = set()
|
|
32
|
+
|
|
33
|
+
def register_blocks(
|
|
34
|
+
self, virtual_engine: int, block_infos: List[Tuple[int, int]], rank: int
|
|
35
|
+
):
|
|
36
|
+
# Update query meta
|
|
37
|
+
if virtual_engine not in self._hash_to_rank_and_block_id:
|
|
38
|
+
self._hash_to_rank_and_block_id[virtual_engine] = {}
|
|
39
|
+
hash_to_rank_and_block_id = self._hash_to_rank_and_block_id[virtual_engine]
|
|
40
|
+
for hash_content, block_id in block_infos:
|
|
41
|
+
if hash_content not in hash_to_rank_and_block_id:
|
|
42
|
+
hash_to_rank_and_block_id[hash_content] = {
|
|
43
|
+
(rank, block_id),
|
|
44
|
+
}
|
|
45
|
+
else:
|
|
46
|
+
hash_to_rank_and_block_id[hash_content].add((rank, block_id))
|
|
47
|
+
|
|
48
|
+
# Update remove meta
|
|
49
|
+
if virtual_engine not in self._rank_to_hash_and_block_id:
|
|
50
|
+
self._rank_to_hash_and_block_id[virtual_engine] = {}
|
|
51
|
+
rank_to_hash_and_block_id = self._rank_to_hash_and_block_id[virtual_engine]
|
|
52
|
+
if rank not in rank_to_hash_and_block_id:
|
|
53
|
+
rank_to_hash_and_block_id[rank] = set()
|
|
54
|
+
rank_to_hash_and_block_id[rank].update(block_infos)
|
|
55
|
+
|
|
56
|
+
def query_blocks(
|
|
57
|
+
self, virtual_engine: int, hash_contents: List[Tuple[int, int]]
|
|
58
|
+
) -> Dict[int, Set[Tuple[int, int, int]]]:
|
|
59
|
+
if virtual_engine not in self._hash_to_rank_and_block_id:
|
|
60
|
+
return {}
|
|
61
|
+
hash_to_rank_and_block_id = self._hash_to_rank_and_block_id[virtual_engine]
|
|
62
|
+
remote: Dict[int, Set[Tuple[int, int, int]]] = {}
|
|
63
|
+
for hash_content, _id in hash_contents:
|
|
64
|
+
if (
|
|
65
|
+
hash_content in hash_to_rank_and_block_id
|
|
66
|
+
) and hash_to_rank_and_block_id[hash_content]:
|
|
67
|
+
# exclude ranks that are in the recovery process
|
|
68
|
+
rank_and_block_id = [
|
|
69
|
+
(r, b)
|
|
70
|
+
for r, b in hash_to_rank_and_block_id[hash_content]
|
|
71
|
+
if r not in self._unavailable_ranks
|
|
72
|
+
]
|
|
73
|
+
if rank_and_block_id:
|
|
74
|
+
# TODO: Randomly select here, and try to distribute requests as evenly as possible.
|
|
75
|
+
# There may be better methods in the future.
|
|
76
|
+
rank, block_id = random.choice(rank_and_block_id)
|
|
77
|
+
if rank not in remote:
|
|
78
|
+
remote[rank] = {
|
|
79
|
+
(hash_content, block_id, _id),
|
|
80
|
+
}
|
|
81
|
+
else:
|
|
82
|
+
remote[rank].add((hash_content, block_id, _id))
|
|
83
|
+
return remote
|
|
84
|
+
|
|
85
|
+
def unregister_block(self, virtual_engine: int, rank: int, block_id: int):
|
|
86
|
+
if (virtual_engine not in self._rank_to_hash_and_block_id) or (
|
|
87
|
+
virtual_engine not in self._hash_to_rank_and_block_id
|
|
88
|
+
):
|
|
89
|
+
return
|
|
90
|
+
|
|
91
|
+
# Update remove meta
|
|
92
|
+
rank_to_hash_and_block_id = self._rank_to_hash_and_block_id[virtual_engine]
|
|
93
|
+
if rank not in rank_to_hash_and_block_id:
|
|
94
|
+
return
|
|
95
|
+
hash_and_block_id = rank_to_hash_and_block_id[rank]
|
|
96
|
+
detail: Optional[Tuple[int, int]] = None
|
|
97
|
+
for hash_content, _id in hash_and_block_id.copy():
|
|
98
|
+
if _id == block_id:
|
|
99
|
+
detail = (hash_content, block_id)
|
|
100
|
+
hash_and_block_id.discard(detail)
|
|
101
|
+
break
|
|
102
|
+
|
|
103
|
+
# Update query meta
|
|
104
|
+
if detail is not None:
|
|
105
|
+
hash_to_rank_and_block_id = self._hash_to_rank_and_block_id[virtual_engine]
|
|
106
|
+
_hash = detail[0]
|
|
107
|
+
if _hash in hash_to_rank_and_block_id:
|
|
108
|
+
hash_to_rank_and_block_id[_hash].discard((rank, detail[1]))
|
|
109
|
+
|
|
110
|
+
def unregister_rank(self, rank: int):
|
|
111
|
+
"""
|
|
112
|
+
This rank is in the recovery process, and its query results will be excluded.
|
|
113
|
+
"""
|
|
114
|
+
self._unavailable_ranks.add(rank)
|
|
115
|
+
|
|
116
|
+
def register_rank(self, rank: int):
|
|
117
|
+
"""
|
|
118
|
+
After recovery is successful, clear all stale data of the rank and mark the rank as available.
|
|
119
|
+
"""
|
|
120
|
+
for _, rank_to_hash_and_block_id in self._rank_to_hash_and_block_id.items():
|
|
121
|
+
rank_to_hash_and_block_id.pop(rank, None)
|
|
122
|
+
|
|
123
|
+
for _, hash_to_rank_and_block_id in self._hash_to_rank_and_block_id.items():
|
|
124
|
+
for _, rank_and_block_id in hash_to_rank_and_block_id.items():
|
|
125
|
+
to_delete = [(r, b) for r, b in rank_and_block_id if r == rank]
|
|
126
|
+
if to_delete:
|
|
127
|
+
rank_and_block_id.difference_update(to_delete)
|
|
128
|
+
|
|
129
|
+
self._unavailable_ranks.discard(rank)
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
# Copyright 2022-2025 XProbe Inc.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
import logging
|
|
15
|
+
from typing import List, Optional
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class CollectiveRank:
|
|
21
|
+
def __init__(
|
|
22
|
+
self,
|
|
23
|
+
rank: int,
|
|
24
|
+
world_size: int,
|
|
25
|
+
rank_address: str,
|
|
26
|
+
store_address: str,
|
|
27
|
+
store_port: int,
|
|
28
|
+
world_addresses: List[str],
|
|
29
|
+
):
|
|
30
|
+
self._rank = rank
|
|
31
|
+
self._world_size = world_size
|
|
32
|
+
self._rank_address = rank_address
|
|
33
|
+
self._world_addresses = world_addresses
|
|
34
|
+
self._store_address = store_address
|
|
35
|
+
self._store_port = store_port
|
|
36
|
+
self._device = None
|
|
37
|
+
self._tcp_store = None
|
|
38
|
+
self._context = None
|
|
39
|
+
|
|
40
|
+
def init_rank(self):
|
|
41
|
+
from xoscar.collective import xoscar_pygloo as xp
|
|
42
|
+
|
|
43
|
+
self._context = xp.rendezvous.Context(self._rank, self._world_size)
|
|
44
|
+
|
|
45
|
+
attr = xp.transport.tcp.attr(self._rank_address.split(":")[0])
|
|
46
|
+
self._device = xp.transport.tcp.CreateDevice(attr)
|
|
47
|
+
|
|
48
|
+
opt = xp.rendezvous.TCPStoreOptions()
|
|
49
|
+
opt.port = self._store_port
|
|
50
|
+
opt.numWorkers = self._world_size
|
|
51
|
+
opt.isServer = self._rank == 0
|
|
52
|
+
opt.waitWorkers = False
|
|
53
|
+
|
|
54
|
+
self._tcp_store = xp.rendezvous.TCPStore(self._store_address, opt)
|
|
55
|
+
if self._world_addresses:
|
|
56
|
+
self.connect_full_mesh()
|
|
57
|
+
|
|
58
|
+
def connect_full_mesh(
|
|
59
|
+
self, prefix: Optional[str] = None, world_addresses: Optional[List[str]] = None
|
|
60
|
+
):
|
|
61
|
+
from xoscar.collective import xoscar_pygloo as xp
|
|
62
|
+
|
|
63
|
+
assert self._device is not None
|
|
64
|
+
assert self._tcp_store is not None
|
|
65
|
+
assert self._context is not None
|
|
66
|
+
if world_addresses is not None:
|
|
67
|
+
self._world_addresses = world_addresses
|
|
68
|
+
prefix_store = xp.rendezvous.PrefixStore(
|
|
69
|
+
prefix or str(self._world_size), self._tcp_store
|
|
70
|
+
)
|
|
71
|
+
self._context.connectFullMesh(prefix_store, self._device)
|
|
72
|
+
logger.debug(
|
|
73
|
+
f"Rank {self._rank} arrives successfully, world addresses: {self._world_addresses}"
|
|
74
|
+
)
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
# Copyright 2022-2025 XProbe Inc.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
import asyncio
|
|
15
|
+
import logging
|
|
16
|
+
import traceback
|
|
17
|
+
from typing import TYPE_CHECKING, Any, Dict, List, Optional, no_type_check
|
|
18
|
+
|
|
19
|
+
import xoscar as xo
|
|
20
|
+
|
|
21
|
+
from .block_tracker import VLLMBlockTracker
|
|
22
|
+
|
|
23
|
+
if TYPE_CHECKING:
|
|
24
|
+
from .transfer import Rank0TransferActor, TransferActor
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
logger = logging.getLogger(__name__)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class Rank0ModelActor(xo.StatelessActor):
|
|
31
|
+
@classmethod
|
|
32
|
+
def default_uid(cls):
|
|
33
|
+
return "rank0-model-actor"
|
|
34
|
+
|
|
35
|
+
def __init__(self, xavier_config: Dict[str, Any]):
|
|
36
|
+
super().__init__()
|
|
37
|
+
self._rank = 0
|
|
38
|
+
self._xavier_config = xavier_config
|
|
39
|
+
self._transfer_ref: Optional[xo.ActorRefType["Rank0TransferActor"]] = None
|
|
40
|
+
|
|
41
|
+
async def __pre_destroy__(self):
|
|
42
|
+
if self._transfer_ref is not None:
|
|
43
|
+
try:
|
|
44
|
+
await xo.destroy_actor(self._transfer_ref)
|
|
45
|
+
del self._transfer_ref
|
|
46
|
+
except Exception as e:
|
|
47
|
+
logger.debug(
|
|
48
|
+
f"Destroy transfer actor failed, rank: {self._rank}, address: {self.address}, error: {e}"
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
@no_type_check
|
|
52
|
+
async def start_transfer_for_vllm(self, rank_addresses: List[str]):
|
|
53
|
+
from .transfer import Rank0TransferActor
|
|
54
|
+
|
|
55
|
+
self._transfer_ref = await xo.create_actor(
|
|
56
|
+
Rank0TransferActor,
|
|
57
|
+
address=self.address,
|
|
58
|
+
uid=f"{Rank0TransferActor.default_uid()}-{self._rank}",
|
|
59
|
+
rank=self._rank,
|
|
60
|
+
world_size=self._xavier_config.get("world_size"), # type: ignore
|
|
61
|
+
rank_address=self._xavier_config.get("rank_address"), # type: ignore
|
|
62
|
+
store_address=self._xavier_config.get("store_address"), # type: ignore
|
|
63
|
+
store_port=self._xavier_config.get("store_port"), # type: ignore
|
|
64
|
+
world_addresses=rank_addresses,
|
|
65
|
+
)
|
|
66
|
+
logger.debug(
|
|
67
|
+
f"Init transfer actor: {self._transfer_ref.address}, rank: {self._rank} done for vllm." # type: ignore
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def with_lock(method):
|
|
72
|
+
async def wrapper(self, *args, **kwargs):
|
|
73
|
+
async with self._lock:
|
|
74
|
+
return await method(self, *args, **kwargs)
|
|
75
|
+
|
|
76
|
+
return wrapper
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class CollectiveManager(xo.StatelessActor):
|
|
80
|
+
@classmethod
|
|
81
|
+
def default_uid(cls):
|
|
82
|
+
return f"xavier-collective-manager"
|
|
83
|
+
|
|
84
|
+
def __init__(self, model_uid: str):
|
|
85
|
+
super().__init__()
|
|
86
|
+
self._model_uid = model_uid
|
|
87
|
+
self._tracker_ref: Optional[xo.ActorRefType["VLLMBlockTracker"]] = None
|
|
88
|
+
self._rank_to_ref: Dict[int, xo.ActorRefType["TransferActor"]] = {}
|
|
89
|
+
self._lock = asyncio.Lock()
|
|
90
|
+
|
|
91
|
+
async def __post_create__(self):
|
|
92
|
+
self._tracker_ref = await xo.actor_ref(
|
|
93
|
+
address=self.address,
|
|
94
|
+
uid=f"{VLLMBlockTracker.default_uid()}-{self._model_uid}",
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
async def unregister_rank(self, rank: int):
|
|
98
|
+
self._rank_to_ref.pop(rank, None)
|
|
99
|
+
await self._tracker_ref.unregister_rank(rank) # type: ignore
|
|
100
|
+
logger.debug(f"Unregister rank: {rank}")
|
|
101
|
+
|
|
102
|
+
async def register_rank(self, rank: int, address: str, update: bool = False):
|
|
103
|
+
from .transfer import TransferActor
|
|
104
|
+
|
|
105
|
+
rank_ref = await xo.actor_ref(
|
|
106
|
+
address=address, uid=f"{TransferActor.default_uid()}-{rank}"
|
|
107
|
+
)
|
|
108
|
+
self._rank_to_ref[rank] = rank_ref
|
|
109
|
+
logger.debug(f"Register rank: {rank}, address: {address}")
|
|
110
|
+
if update:
|
|
111
|
+
await self._update_world()
|
|
112
|
+
await self._tracker_ref.register_rank(rank) # type: ignore
|
|
113
|
+
|
|
114
|
+
@with_lock
|
|
115
|
+
async def _update_world(self):
|
|
116
|
+
"""
|
|
117
|
+
Locking is used to prevent chaos when multiple replicas trigger recovery simultaneously.
|
|
118
|
+
"""
|
|
119
|
+
from .....core.utils import gen_random_string
|
|
120
|
+
|
|
121
|
+
prefix = gen_random_string(6)
|
|
122
|
+
tasks = []
|
|
123
|
+
rank_to_ref = self._rank_to_ref.copy()
|
|
124
|
+
world_addresses = [ref.address for _, ref in sorted(rank_to_ref.items())]
|
|
125
|
+
for rank, ref in rank_to_ref.items():
|
|
126
|
+
tasks.append(ref.connect_full_mesh(prefix, world_addresses))
|
|
127
|
+
try:
|
|
128
|
+
logger.debug(
|
|
129
|
+
f"Rebuild collective communication with world_addresses: {world_addresses}, prefix: {prefix}"
|
|
130
|
+
)
|
|
131
|
+
await asyncio.gather(*tasks)
|
|
132
|
+
logger.debug(
|
|
133
|
+
f"Rebuild collective communication with world_addresses: {world_addresses}, prefix: {prefix} done."
|
|
134
|
+
)
|
|
135
|
+
except Exception as e:
|
|
136
|
+
"""
|
|
137
|
+
The exception here is most likely due to another replica triggering recovery during the recovery process,
|
|
138
|
+
causing `connect_full_mesh` to time out.
|
|
139
|
+
Simply log the exception and
|
|
140
|
+
let the subsequent update process handle the reconstruction of the collective communication world.
|
|
141
|
+
"""
|
|
142
|
+
logger.error(
|
|
143
|
+
f"Rebuild collective communication with world_addresses: {world_addresses} failed. "
|
|
144
|
+
f"Exception: {e}"
|
|
145
|
+
)
|
|
146
|
+
# Print the complete error stack
|
|
147
|
+
traceback.print_exception(type(e), e, e.__traceback__)
|