xinference 0.16.3__py3-none-any.whl → 1.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_compat.py +24 -2
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +219 -77
- xinference/client/restful/restful_client.py +47 -2
- xinference/constants.py +1 -0
- xinference/core/chat_interface.py +6 -1
- xinference/core/model.py +124 -34
- xinference/core/supervisor.py +180 -12
- xinference/core/utils.py +73 -4
- xinference/core/worker.py +102 -4
- xinference/deploy/cmdline.py +3 -1
- xinference/deploy/test/test_cmdline.py +56 -0
- xinference/isolation.py +24 -0
- xinference/model/audio/__init__.py +12 -0
- xinference/model/audio/core.py +37 -4
- xinference/model/audio/cosyvoice.py +39 -6
- xinference/model/audio/f5tts.py +200 -0
- xinference/model/audio/f5tts_mlx.py +260 -0
- xinference/model/audio/fish_speech.py +70 -110
- xinference/model/audio/melotts.py +110 -0
- xinference/model/audio/model_spec.json +179 -3
- xinference/model/audio/model_spec_modelscope.json +27 -0
- xinference/model/audio/utils.py +32 -0
- xinference/model/audio/whisper.py +35 -10
- xinference/model/audio/whisper_mlx.py +208 -0
- xinference/model/embedding/core.py +322 -6
- xinference/model/embedding/model_spec.json +8 -1
- xinference/model/embedding/model_spec_modelscope.json +9 -1
- xinference/model/image/core.py +69 -1
- xinference/model/image/model_spec.json +145 -4
- xinference/model/image/model_spec_modelscope.json +150 -4
- xinference/model/image/stable_diffusion/core.py +50 -15
- xinference/model/llm/__init__.py +6 -2
- xinference/model/llm/llm_family.json +1055 -93
- xinference/model/llm/llm_family.py +15 -36
- xinference/model/llm/llm_family_modelscope.json +1031 -78
- xinference/model/llm/memory.py +1 -1
- xinference/model/llm/mlx/core.py +285 -47
- xinference/model/llm/sglang/core.py +2 -0
- xinference/model/llm/transformers/chatglm.py +9 -5
- xinference/model/llm/transformers/cogagent.py +272 -0
- xinference/model/llm/transformers/core.py +3 -0
- xinference/model/llm/transformers/glm_edge_v.py +230 -0
- xinference/model/llm/transformers/qwen2_vl.py +12 -1
- xinference/model/llm/transformers/utils.py +16 -8
- xinference/model/llm/utils.py +55 -4
- xinference/model/llm/vllm/core.py +137 -12
- xinference/model/llm/vllm/xavier/__init__.py +13 -0
- xinference/model/llm/vllm/xavier/allocator.py +74 -0
- xinference/model/llm/vllm/xavier/block.py +111 -0
- xinference/model/llm/vllm/xavier/block_manager.py +71 -0
- xinference/model/llm/vllm/xavier/block_tracker.py +129 -0
- xinference/model/llm/vllm/xavier/collective.py +74 -0
- xinference/model/llm/vllm/xavier/collective_manager.py +147 -0
- xinference/model/llm/vllm/xavier/engine.py +247 -0
- xinference/model/llm/vllm/xavier/executor.py +134 -0
- xinference/model/llm/vllm/xavier/scheduler.py +438 -0
- xinference/model/llm/vllm/xavier/test/__init__.py +13 -0
- xinference/model/llm/vllm/xavier/test/test_xavier.py +147 -0
- xinference/model/llm/vllm/xavier/transfer.py +319 -0
- xinference/model/rerank/core.py +11 -4
- xinference/model/video/diffusers.py +14 -0
- xinference/model/video/model_spec.json +15 -0
- xinference/model/video/model_spec_modelscope.json +16 -0
- xinference/thirdparty/cosyvoice/bin/average_model.py +92 -0
- xinference/thirdparty/cosyvoice/bin/export_jit.py +12 -2
- xinference/thirdparty/cosyvoice/bin/export_onnx.py +112 -0
- xinference/thirdparty/cosyvoice/bin/export_trt.sh +9 -0
- xinference/thirdparty/cosyvoice/bin/inference.py +5 -7
- xinference/thirdparty/cosyvoice/bin/spk2info.pt +0 -0
- xinference/thirdparty/cosyvoice/bin/train.py +42 -8
- xinference/thirdparty/cosyvoice/cli/cosyvoice.py +96 -25
- xinference/thirdparty/cosyvoice/cli/frontend.py +77 -30
- xinference/thirdparty/cosyvoice/cli/model.py +330 -80
- xinference/thirdparty/cosyvoice/dataset/dataset.py +6 -2
- xinference/thirdparty/cosyvoice/dataset/processor.py +76 -14
- xinference/thirdparty/cosyvoice/flow/decoder.py +92 -13
- xinference/thirdparty/cosyvoice/flow/flow.py +99 -9
- xinference/thirdparty/cosyvoice/flow/flow_matching.py +110 -13
- xinference/thirdparty/cosyvoice/flow/length_regulator.py +5 -4
- xinference/thirdparty/cosyvoice/hifigan/discriminator.py +140 -0
- xinference/thirdparty/cosyvoice/hifigan/generator.py +58 -42
- xinference/thirdparty/cosyvoice/hifigan/hifigan.py +67 -0
- xinference/thirdparty/cosyvoice/llm/llm.py +139 -6
- xinference/thirdparty/cosyvoice/tokenizer/assets/multilingual_zh_ja_yue_char_del.tiktoken +58836 -0
- xinference/thirdparty/cosyvoice/tokenizer/tokenizer.py +279 -0
- xinference/thirdparty/cosyvoice/transformer/embedding.py +2 -2
- xinference/thirdparty/cosyvoice/transformer/encoder_layer.py +7 -7
- xinference/thirdparty/cosyvoice/transformer/upsample_encoder.py +318 -0
- xinference/thirdparty/cosyvoice/utils/common.py +28 -1
- xinference/thirdparty/cosyvoice/utils/executor.py +69 -7
- xinference/thirdparty/cosyvoice/utils/file_utils.py +2 -12
- xinference/thirdparty/cosyvoice/utils/frontend_utils.py +9 -5
- xinference/thirdparty/cosyvoice/utils/losses.py +20 -0
- xinference/thirdparty/cosyvoice/utils/scheduler.py +1 -2
- xinference/thirdparty/cosyvoice/utils/train_utils.py +101 -45
- xinference/thirdparty/f5_tts/api.py +166 -0
- xinference/thirdparty/f5_tts/configs/E2TTS_Base_train.yaml +44 -0
- xinference/thirdparty/f5_tts/configs/E2TTS_Small_train.yaml +44 -0
- xinference/thirdparty/f5_tts/configs/F5TTS_Base_train.yaml +46 -0
- xinference/thirdparty/f5_tts/configs/F5TTS_Small_train.yaml +46 -0
- xinference/thirdparty/f5_tts/eval/README.md +49 -0
- xinference/thirdparty/f5_tts/eval/ecapa_tdnn.py +330 -0
- xinference/thirdparty/f5_tts/eval/eval_infer_batch.py +207 -0
- xinference/thirdparty/f5_tts/eval/eval_infer_batch.sh +13 -0
- xinference/thirdparty/f5_tts/eval/eval_librispeech_test_clean.py +84 -0
- xinference/thirdparty/f5_tts/eval/eval_seedtts_testset.py +84 -0
- xinference/thirdparty/f5_tts/eval/utils_eval.py +405 -0
- xinference/thirdparty/f5_tts/infer/README.md +191 -0
- xinference/thirdparty/f5_tts/infer/SHARED.md +74 -0
- xinference/thirdparty/f5_tts/infer/examples/basic/basic.toml +11 -0
- xinference/thirdparty/f5_tts/infer/examples/basic/basic_ref_en.wav +0 -0
- xinference/thirdparty/f5_tts/infer/examples/basic/basic_ref_zh.wav +0 -0
- xinference/thirdparty/f5_tts/infer/examples/multi/country.flac +0 -0
- xinference/thirdparty/f5_tts/infer/examples/multi/main.flac +0 -0
- xinference/thirdparty/f5_tts/infer/examples/multi/story.toml +19 -0
- xinference/thirdparty/f5_tts/infer/examples/multi/story.txt +1 -0
- xinference/thirdparty/f5_tts/infer/examples/multi/town.flac +0 -0
- xinference/thirdparty/f5_tts/infer/examples/vocab.txt +2545 -0
- xinference/thirdparty/f5_tts/infer/infer_cli.py +226 -0
- xinference/thirdparty/f5_tts/infer/infer_gradio.py +851 -0
- xinference/thirdparty/f5_tts/infer/speech_edit.py +193 -0
- xinference/thirdparty/f5_tts/infer/utils_infer.py +538 -0
- xinference/thirdparty/f5_tts/model/__init__.py +10 -0
- xinference/thirdparty/f5_tts/model/backbones/README.md +20 -0
- xinference/thirdparty/f5_tts/model/backbones/dit.py +163 -0
- xinference/thirdparty/f5_tts/model/backbones/mmdit.py +146 -0
- xinference/thirdparty/f5_tts/model/backbones/unett.py +219 -0
- xinference/thirdparty/f5_tts/model/cfm.py +285 -0
- xinference/thirdparty/f5_tts/model/dataset.py +319 -0
- xinference/thirdparty/f5_tts/model/modules.py +658 -0
- xinference/thirdparty/f5_tts/model/trainer.py +366 -0
- xinference/thirdparty/f5_tts/model/utils.py +185 -0
- xinference/thirdparty/f5_tts/scripts/count_max_epoch.py +33 -0
- xinference/thirdparty/f5_tts/scripts/count_params_gflops.py +39 -0
- xinference/thirdparty/f5_tts/socket_server.py +159 -0
- xinference/thirdparty/f5_tts/train/README.md +77 -0
- xinference/thirdparty/f5_tts/train/datasets/prepare_csv_wavs.py +139 -0
- xinference/thirdparty/f5_tts/train/datasets/prepare_emilia.py +230 -0
- xinference/thirdparty/f5_tts/train/datasets/prepare_libritts.py +92 -0
- xinference/thirdparty/f5_tts/train/datasets/prepare_ljspeech.py +65 -0
- xinference/thirdparty/f5_tts/train/datasets/prepare_wenetspeech4tts.py +125 -0
- xinference/thirdparty/f5_tts/train/finetune_cli.py +174 -0
- xinference/thirdparty/f5_tts/train/finetune_gradio.py +1846 -0
- xinference/thirdparty/f5_tts/train/train.py +75 -0
- xinference/thirdparty/fish_speech/fish_speech/conversation.py +266 -1
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/en_US.json +2 -1
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/es_ES.json +2 -1
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/ja_JP.json +2 -2
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/ko_KR.json +123 -0
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/zh_CN.json +2 -1
- xinference/thirdparty/fish_speech/fish_speech/models/text2semantic/llama.py +137 -29
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/firefly.py +9 -9
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/fsq.py +1 -1
- xinference/thirdparty/fish_speech/fish_speech/text/clean.py +17 -11
- xinference/thirdparty/fish_speech/fish_speech/text/spliter.py +1 -1
- xinference/thirdparty/fish_speech/fish_speech/tokenizer.py +152 -0
- xinference/thirdparty/fish_speech/fish_speech/train.py +2 -2
- xinference/thirdparty/fish_speech/fish_speech/utils/__init__.py +2 -1
- xinference/thirdparty/fish_speech/fish_speech/utils/utils.py +22 -0
- xinference/thirdparty/fish_speech/fish_speech/webui/launch_utils.py +1 -1
- xinference/thirdparty/fish_speech/fish_speech/webui/manage.py +2 -2
- xinference/thirdparty/fish_speech/tools/{post_api.py → api_client.py} +34 -18
- xinference/thirdparty/fish_speech/tools/api_server.py +98 -0
- xinference/thirdparty/fish_speech/tools/download_models.py +5 -5
- xinference/thirdparty/fish_speech/tools/e2e_webui.py +232 -0
- xinference/thirdparty/fish_speech/tools/fish_e2e.py +298 -0
- xinference/thirdparty/fish_speech/tools/inference_engine/__init__.py +192 -0
- xinference/thirdparty/fish_speech/tools/inference_engine/reference_loader.py +125 -0
- xinference/thirdparty/fish_speech/tools/inference_engine/utils.py +39 -0
- xinference/thirdparty/fish_speech/tools/inference_engine/vq_manager.py +57 -0
- xinference/thirdparty/fish_speech/tools/llama/eval_in_context.py +2 -2
- xinference/thirdparty/fish_speech/tools/llama/generate.py +484 -72
- xinference/thirdparty/fish_speech/tools/run_webui.py +104 -0
- xinference/thirdparty/fish_speech/tools/schema.py +170 -0
- xinference/thirdparty/fish_speech/tools/server/agent/__init__.py +57 -0
- xinference/thirdparty/fish_speech/tools/server/agent/generate.py +119 -0
- xinference/thirdparty/fish_speech/tools/server/agent/generation_utils.py +122 -0
- xinference/thirdparty/fish_speech/tools/server/agent/pre_generation_utils.py +72 -0
- xinference/thirdparty/fish_speech/tools/server/api_utils.py +75 -0
- xinference/thirdparty/fish_speech/tools/server/exception_handler.py +27 -0
- xinference/thirdparty/fish_speech/tools/server/inference.py +45 -0
- xinference/thirdparty/fish_speech/tools/server/model_manager.py +122 -0
- xinference/thirdparty/fish_speech/tools/server/model_utils.py +129 -0
- xinference/thirdparty/fish_speech/tools/server/views.py +246 -0
- xinference/thirdparty/fish_speech/tools/vqgan/extract_vq.py +7 -1
- xinference/thirdparty/fish_speech/tools/vqgan/inference.py +2 -3
- xinference/thirdparty/fish_speech/tools/webui/__init__.py +173 -0
- xinference/thirdparty/fish_speech/tools/webui/inference.py +91 -0
- xinference/thirdparty/fish_speech/tools/webui/variables.py +14 -0
- xinference/thirdparty/matcha/utils/utils.py +2 -2
- xinference/thirdparty/melo/api.py +135 -0
- xinference/thirdparty/melo/app.py +61 -0
- xinference/thirdparty/melo/attentions.py +459 -0
- xinference/thirdparty/melo/commons.py +160 -0
- xinference/thirdparty/melo/configs/config.json +94 -0
- xinference/thirdparty/melo/data/example/metadata.list +20 -0
- xinference/thirdparty/melo/data_utils.py +413 -0
- xinference/thirdparty/melo/download_utils.py +67 -0
- xinference/thirdparty/melo/infer.py +25 -0
- xinference/thirdparty/melo/init_downloads.py +14 -0
- xinference/thirdparty/melo/losses.py +58 -0
- xinference/thirdparty/melo/main.py +36 -0
- xinference/thirdparty/melo/mel_processing.py +174 -0
- xinference/thirdparty/melo/models.py +1030 -0
- xinference/thirdparty/melo/modules.py +598 -0
- xinference/thirdparty/melo/monotonic_align/__init__.py +16 -0
- xinference/thirdparty/melo/monotonic_align/core.py +46 -0
- xinference/thirdparty/melo/preprocess_text.py +135 -0
- xinference/thirdparty/melo/split_utils.py +174 -0
- xinference/thirdparty/melo/text/__init__.py +35 -0
- xinference/thirdparty/melo/text/chinese.py +199 -0
- xinference/thirdparty/melo/text/chinese_bert.py +107 -0
- xinference/thirdparty/melo/text/chinese_mix.py +253 -0
- xinference/thirdparty/melo/text/cleaner.py +36 -0
- xinference/thirdparty/melo/text/cleaner_multiling.py +110 -0
- xinference/thirdparty/melo/text/cmudict.rep +129530 -0
- xinference/thirdparty/melo/text/cmudict_cache.pickle +0 -0
- xinference/thirdparty/melo/text/english.py +284 -0
- xinference/thirdparty/melo/text/english_bert.py +39 -0
- xinference/thirdparty/melo/text/english_utils/abbreviations.py +35 -0
- xinference/thirdparty/melo/text/english_utils/number_norm.py +97 -0
- xinference/thirdparty/melo/text/english_utils/time_norm.py +47 -0
- xinference/thirdparty/melo/text/es_phonemizer/base.py +140 -0
- xinference/thirdparty/melo/text/es_phonemizer/cleaner.py +109 -0
- xinference/thirdparty/melo/text/es_phonemizer/es_symbols.json +79 -0
- xinference/thirdparty/melo/text/es_phonemizer/es_symbols.txt +1 -0
- xinference/thirdparty/melo/text/es_phonemizer/es_symbols_v2.json +83 -0
- xinference/thirdparty/melo/text/es_phonemizer/es_to_ipa.py +12 -0
- xinference/thirdparty/melo/text/es_phonemizer/example_ipa.txt +400 -0
- xinference/thirdparty/melo/text/es_phonemizer/gruut_wrapper.py +253 -0
- xinference/thirdparty/melo/text/es_phonemizer/punctuation.py +174 -0
- xinference/thirdparty/melo/text/es_phonemizer/spanish_symbols.txt +1 -0
- xinference/thirdparty/melo/text/es_phonemizer/test.ipynb +124 -0
- xinference/thirdparty/melo/text/fr_phonemizer/base.py +140 -0
- xinference/thirdparty/melo/text/fr_phonemizer/cleaner.py +122 -0
- xinference/thirdparty/melo/text/fr_phonemizer/en_symbols.json +78 -0
- xinference/thirdparty/melo/text/fr_phonemizer/example_ipa.txt +1 -0
- xinference/thirdparty/melo/text/fr_phonemizer/fr_symbols.json +89 -0
- xinference/thirdparty/melo/text/fr_phonemizer/fr_to_ipa.py +30 -0
- xinference/thirdparty/melo/text/fr_phonemizer/french_abbreviations.py +48 -0
- xinference/thirdparty/melo/text/fr_phonemizer/french_symbols.txt +1 -0
- xinference/thirdparty/melo/text/fr_phonemizer/gruut_wrapper.py +258 -0
- xinference/thirdparty/melo/text/fr_phonemizer/punctuation.py +172 -0
- xinference/thirdparty/melo/text/french.py +94 -0
- xinference/thirdparty/melo/text/french_bert.py +39 -0
- xinference/thirdparty/melo/text/japanese.py +647 -0
- xinference/thirdparty/melo/text/japanese_bert.py +49 -0
- xinference/thirdparty/melo/text/ko_dictionary.py +44 -0
- xinference/thirdparty/melo/text/korean.py +192 -0
- xinference/thirdparty/melo/text/opencpop-strict.txt +429 -0
- xinference/thirdparty/melo/text/spanish.py +122 -0
- xinference/thirdparty/melo/text/spanish_bert.py +39 -0
- xinference/thirdparty/melo/text/symbols.py +290 -0
- xinference/thirdparty/melo/text/tone_sandhi.py +769 -0
- xinference/thirdparty/melo/train.py +635 -0
- xinference/thirdparty/melo/train.sh +19 -0
- xinference/thirdparty/melo/transforms.py +209 -0
- xinference/thirdparty/melo/utils.py +424 -0
- xinference/types.py +17 -1
- xinference/web/ui/build/asset-manifest.json +6 -6
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/css/main.51a587ff.css +2 -0
- xinference/web/ui/build/static/css/main.51a587ff.css.map +1 -0
- xinference/web/ui/build/static/js/main.b0936c54.js +3 -0
- xinference/web/ui/build/static/js/main.b0936c54.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/03c4052f1b91f6ba0c5389bdcf49c43319b4076c08e4b8585dab312538ae290a.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/1786b83003b8e9605a0f5f855a185d4d16e38fc893dfb326a2a9cca206b4240a.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/17cbc181dd674b9150b80c73ed6a82656de0082d857f6e5f66d9716129ac0b38.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/185ceb8872d562e032b47e79df6a45670e06345b8ed70aad1a131e0476783c5c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/26b8c9f34b0bed789b3a833767672e39302d1e0c09b4276f4d58d1df7b6bd93b.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/2b484da66c724d0d56a40849c109327408796a668b1381511b6e9e03baa48658.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/2cbbbce9b84df73330d4c42b82436ed881b3847628f2fbc346aa62e2859fd88c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/2ec9b14431ed33ce6901bf9f27007be4e6e472709c99d6e22b50ce528e4b78ee.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3b966db018f96be4a055d6ca205f0990d4d0b370e2980c17d8bca2c9a021819c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3eefb411b24c2b3ce053570ef50daccf154022f0e168be5ed0fec21394baf9f4.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/522b229e3cac219123f0d69673f5570e191c2d2a505dc65b312d336eae2279c0.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/52e45f17ba300580ea3fcc9f9228ccba194bb092b76f25e9255af311f8b05aab.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/5a0bc4631f936459afc1a3b1d3ec2420118b1f00e11f60ccac3e08088f3f27a8.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/611fa2c6c53b66039991d06dfb0473b5ab37fc63b4564e0f6e1718523768a045.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/6329bc76c406fe5eb305412383fbde5950f847bb5e43261f73f37622c365acb4.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/63c8e07687ea53a4f8a910ee5e42e0eb26cd1acbfbe820f3e3248a786ee51401.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/69b2d5001684174ec9da57e07914eed3eac4960018bceb6cbfa801d861301d7c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/710c1acda69e561e30a933b98c6a56d50197868b15c21e2aad55ab6d46649eb6.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/720deca1fce5a1dc5056048fa8258fd138a82ea855f350b6613f104a73fb761f.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/76a23b92d26a499c57e61eea2b895fbc9771bd0849a72e66f8e633192017978b.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/858063f23b34dfe600254eb5afd85518b0002ec4b30b7386616c45600826e3b2.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/920b82c1c89124cf217109eeedbfcd3aae3b917be50c9dfb6bbb4ce26bdfd2e7.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/94d8b7aeb0076f2ce07db598cea0e87b13bc8d5614eb530b8d6e696c2daf6f88.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/9e917fe7022d01b2ccbe5cc0ce73d70bb72bee584ff293bad71bdff6695dee28.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/9f28fdb8399f1d0474f0aca86f1658dc94f5bf0c90f6146352de150692de8862.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a0dfafa06b2bb7cba8cad41c482503f61944f759f4318139362602ef5cc47ccb.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a3ff866acddf34917a7ee399e0e571a4dfd8ba66d5057db885f243e16a6eb17d.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/afb8084f539534cd594755ea2205ecd5bd1f62dddcfdf75a2eace59a28131278.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/b57b1438b77294c1f3f6cfce12ac487d8106c6f016975ba0aec94d98997e2e1e.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/b9917b0bf8e4d55ccbac1c334aa04d6ff3c5b6ed9e5d38b9ea2c687fa7d3f5a9.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/bbcc94b0149963d1d6f267ee1f4f03d3925b758392ce2f516c3fe8af0e0169fc.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/bdee44abeadc4abc17d41c52eb49c6e19a4b1a267b6e16876ce91bdeeebfc52d.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/beb112b70f4a56db95920a9e20efb6c97c37b68450716730217a9ee1a9ae92be.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c88db97be0cdf440193b3995996e83510a04cb00048135485fc0e26d197e80b5.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d49e5314d34310a62d01a03067ce1bec5da00abce84c5196aa9c6842fa79a430.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d7664d18c4ddbad9c3a6a31b91f7c00fb0dde804608674a9860ee50f33e54708.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d9072c318b819b7c90a0f7e9cc0b6413b4dbeb8e9859898e53d75ea882fcde99.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/db16a983bc08a05f0439cc61ca0840e49e1d8400eef678909f16c032a418a3d6.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/dc249829767b8abcbc3677e0b07b6d3ecbfdfe6d08cfe23a665eb33373a9aa9d.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/e242c583c2dbc2784f0fcf513523975f7d5df447e106c1c17e49e8578a6fc3ed.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/eac5f1296513e69e4b96f750ddccd4d0264e2bae4e4c449144e83274a48698d9.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/ed57202cb79649bb716400436590245547df241988fc7c8e1d85d132299542d2.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f125bf72e773a14cdaebd0c343e80adb909d12e317ee5c00cd4a57442fbe2c62.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f91af913d7f91c410719ab13136aaed3aaf0f8dda06652f25c42cb5231587398.json +1 -0
- xinference/web/ui/node_modules/.package-lock.json +67 -3
- xinference/web/ui/node_modules/@babel/runtime/package.json +592 -538
- xinference/web/ui/node_modules/html-parse-stringify/package.json +50 -0
- xinference/web/ui/node_modules/i18next/dist/esm/package.json +1 -0
- xinference/web/ui/node_modules/i18next/package.json +129 -0
- xinference/web/ui/node_modules/react-i18next/.eslintrc.json +74 -0
- xinference/web/ui/node_modules/react-i18next/dist/es/package.json +1 -0
- xinference/web/ui/node_modules/react-i18next/package.json +162 -0
- xinference/web/ui/node_modules/void-elements/package.json +34 -0
- xinference/web/ui/package-lock.json +69 -3
- xinference/web/ui/package.json +2 -0
- xinference/web/ui/src/locales/en.json +186 -0
- xinference/web/ui/src/locales/zh.json +186 -0
- {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/METADATA +96 -36
- {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/RECORD +335 -146
- {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/WHEEL +1 -1
- xinference/thirdparty/cosyvoice/bin/export_trt.py +0 -8
- xinference/thirdparty/fish_speech/fish_speech/configs/lora/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/datasets/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/datasets/protos/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/models/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/webui/__init__.py +0 -0
- xinference/thirdparty/fish_speech/tools/__init__.py +0 -0
- xinference/thirdparty/fish_speech/tools/api.py +0 -440
- xinference/thirdparty/fish_speech/tools/commons.py +0 -35
- xinference/thirdparty/fish_speech/tools/llama/__init__.py +0 -0
- xinference/thirdparty/fish_speech/tools/msgpack_api.py +0 -34
- xinference/thirdparty/fish_speech/tools/vqgan/__init__.py +0 -0
- xinference/thirdparty/fish_speech/tools/webui.py +0 -485
- xinference/web/ui/build/static/css/main.5061c4c3.css +0 -2
- xinference/web/ui/build/static/css/main.5061c4c3.css.map +0 -1
- xinference/web/ui/build/static/js/main.2f269bb3.js +0 -3
- xinference/web/ui/build/static/js/main.2f269bb3.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/07ce9e632e6aff24d7aa3ad8e48224433bbfeb0d633fca723453f1fcae0c9f1c.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/1130403f9e46f5738a23b45ac59b57de8f360c908c713e2c0670c2cce9bd367a.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/131091b25d26b17cdca187d7542a21475c211138d900cf667682260e76ef9463.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/1f269fb2a368363c1cb2237825f1dba093b6bdd8c44cc05954fd19ec2c1fff03.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/331312668fa8bd3d7401818f4a25fa98135d7f61371cd6bfff78b18cf4fbdd92.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/40f17338fc75ae095de7d2b4d8eae0d5ca0193a7e2bcece4ee745b22a7a2f4b7.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/4de9a6942c5f1749d6cbfdd54279699975f16016b182848bc253886f52ec2ec3.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/822586ed1077201b64b954f12f25e3f9b45678c1acbabe53d8af3ca82ca71f33.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/8d33354bd2100c8602afc3341f131a88cc36aaeecd5a4b365ed038514708e350.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/9375a35b05d56989b2755bf72161fa707c92f28569d33765a75f91a568fda6e9.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/a158a9ffa0c9b169aee53dd4a0c44501a596755b4e4f6ede7746d65a72e2a71f.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/bd6ad8159341315a1764c397621a560809f7eb7219ab5174c801fca7e969d943.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/c7bf40bab396765f67d0fed627ed3665890608b2d0edaa3e8cb7cfc96310db45.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/d6c643278a0b28320e6f33a60f5fb64c053997cbdc39a60e53ccc574688ade9e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/e42b72d4cc1ea412ebecbb8d040dc6c6bfee462c33903c2f1f3facb602ad742e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/e64b7e8cedcf43d4c95deba60ec1341855c887705805bb62431693118b870c69.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f5039ddbeb815c51491a1989532006b96fc3ae49c6c60e3c097f875b4ae915ae.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f72f011744c4649fabddca6f7a9327861ac0a315a89b1a2e62a39774e7863845.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/feabb04b4aa507102da0a64398a40818e878fd1df9b75dda8461b3e1e7ff3f11.json +0 -1
- /xinference/thirdparty/{cosyvoice/bin → f5_tts}/__init__.py +0 -0
- /xinference/thirdparty/{cosyvoice/flow → melo}/__init__.py +0 -0
- /xinference/thirdparty/{cosyvoice/hifigan → melo/text/english_utils}/__init__.py +0 -0
- /xinference/thirdparty/{cosyvoice/llm → melo/text/es_phonemizer}/__init__.py +0 -0
- /xinference/thirdparty/{fish_speech/fish_speech/configs → melo/text/fr_phonemizer}/__init__.py +0 -0
- /xinference/web/ui/build/static/js/{main.2f269bb3.js.LICENSE.txt → main.b0936c54.js.LICENSE.txt} +0 -0
- {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/LICENSE +0 -0
- {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/entry_points.txt +0 -0
- {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
# Copyright 2022-2025 XProbe Inc.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
import os
|
|
15
|
+
import sys
|
|
16
|
+
|
|
17
|
+
import pytest
|
|
18
|
+
import xoscar as xo
|
|
19
|
+
|
|
20
|
+
from ..block_tracker import VLLMBlockTracker
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class ExtendedBlockTracker(VLLMBlockTracker):
|
|
24
|
+
def get_hash_to_rank_and_block_id(self):
|
|
25
|
+
return self._hash_to_rank_and_block_id
|
|
26
|
+
|
|
27
|
+
def get_rank_to_hash_and_block_id(self):
|
|
28
|
+
return self._rank_to_hash_and_block_id
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@pytest.fixture
|
|
32
|
+
async def actor_pool_context():
|
|
33
|
+
start_method = (
|
|
34
|
+
os.environ.get("POOL_START_METHOD", "forkserver")
|
|
35
|
+
if sys.platform != "win32"
|
|
36
|
+
else None
|
|
37
|
+
)
|
|
38
|
+
pool = await xo.create_actor_pool(
|
|
39
|
+
"127.0.0.1", n_process=2, subprocess_start_method=start_method
|
|
40
|
+
)
|
|
41
|
+
async with pool:
|
|
42
|
+
yield pool
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@pytest.mark.asyncio
|
|
46
|
+
async def test_block_tracker(actor_pool_context):
|
|
47
|
+
actor_pool = actor_pool_context
|
|
48
|
+
addr = actor_pool.external_address
|
|
49
|
+
tracker_ref: xo.ActorRefType[ExtendedBlockTracker] = await xo.create_actor(
|
|
50
|
+
ExtendedBlockTracker,
|
|
51
|
+
address=addr,
|
|
52
|
+
uid=VLLMBlockTracker.default_uid(),
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
virtual_engine = 0
|
|
56
|
+
rank = 0
|
|
57
|
+
block_infos = [(123, 0), (456, 1), (789, 2)]
|
|
58
|
+
|
|
59
|
+
# register blocks
|
|
60
|
+
await tracker_ref.register_blocks(virtual_engine, block_infos, rank)
|
|
61
|
+
|
|
62
|
+
# query blocks
|
|
63
|
+
res = await tracker_ref.query_blocks(virtual_engine, [(123, 4), (789, 5)])
|
|
64
|
+
assert len(res) == 1
|
|
65
|
+
assert rank in res
|
|
66
|
+
assert len(res[rank]) == 2
|
|
67
|
+
assert {x[0] for x in res[rank]} == {123, 789}
|
|
68
|
+
assert {x[1] for x in res[rank]} == {0, 2}
|
|
69
|
+
assert {x[2] for x in res[rank]} == {4, 5}
|
|
70
|
+
|
|
71
|
+
# query with extra info
|
|
72
|
+
res = await tracker_ref.query_blocks(virtual_engine, [(123, 4), (789, 5), (110, 6)])
|
|
73
|
+
assert len(res) == 1
|
|
74
|
+
assert rank in res
|
|
75
|
+
assert len(res[rank]) == 2
|
|
76
|
+
assert {x[0] for x in res[rank]} == {123, 789}
|
|
77
|
+
assert {x[1] for x in res[rank]} == {0, 2}
|
|
78
|
+
assert {x[2] for x in res[rank]} == {4, 5}
|
|
79
|
+
|
|
80
|
+
# unregister block
|
|
81
|
+
await tracker_ref.unregister_block(virtual_engine, rank, 1)
|
|
82
|
+
res = await tracker_ref.query_blocks(virtual_engine, [(123, 4), (456, 7)])
|
|
83
|
+
assert len(res) == 1
|
|
84
|
+
assert rank in res
|
|
85
|
+
assert len(res[rank]) == 1
|
|
86
|
+
assert {x[0] for x in res[rank]} == {123}
|
|
87
|
+
assert {x[1] for x in res[rank]} == {
|
|
88
|
+
0,
|
|
89
|
+
}
|
|
90
|
+
assert {x[2] for x in res[rank]} == {
|
|
91
|
+
4,
|
|
92
|
+
}
|
|
93
|
+
# nothing happens
|
|
94
|
+
await tracker_ref.unregister_block(virtual_engine, rank, 3)
|
|
95
|
+
res = await tracker_ref.query_blocks(virtual_engine, [(123, 4), (456, 7)])
|
|
96
|
+
assert len(res) == 1
|
|
97
|
+
assert rank in res
|
|
98
|
+
assert len(res[rank]) == 1
|
|
99
|
+
assert {x[0] for x in res[rank]} == {123}
|
|
100
|
+
assert {x[1] for x in res[rank]} == {
|
|
101
|
+
0,
|
|
102
|
+
}
|
|
103
|
+
assert {x[2] for x in res[rank]} == {
|
|
104
|
+
4,
|
|
105
|
+
}
|
|
106
|
+
# query returns empty
|
|
107
|
+
res = await tracker_ref.query_blocks(virtual_engine, [(456, 8)])
|
|
108
|
+
assert res == {}
|
|
109
|
+
|
|
110
|
+
# check internal data
|
|
111
|
+
hash_to_rank_and_block_id = await tracker_ref.get_hash_to_rank_and_block_id()
|
|
112
|
+
assert virtual_engine in hash_to_rank_and_block_id
|
|
113
|
+
assert hash_to_rank_and_block_id[virtual_engine] == {
|
|
114
|
+
123: {
|
|
115
|
+
(rank, 0),
|
|
116
|
+
},
|
|
117
|
+
456: set(),
|
|
118
|
+
789: {(rank, 2)},
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
rank_to_hash_and_block_id = await tracker_ref.get_rank_to_hash_and_block_id()
|
|
122
|
+
assert virtual_engine in rank_to_hash_and_block_id
|
|
123
|
+
assert rank_to_hash_and_block_id[virtual_engine] == {rank: {(123, 0), (789, 2)}}
|
|
124
|
+
|
|
125
|
+
# register blocks
|
|
126
|
+
new_rank = 1
|
|
127
|
+
block_infos = [(111, 7), (222, 8), (333, 9), (123, 10)]
|
|
128
|
+
await tracker_ref.register_blocks(virtual_engine, block_infos, new_rank)
|
|
129
|
+
|
|
130
|
+
# test unregister rank
|
|
131
|
+
await tracker_ref.unregister_rank(0)
|
|
132
|
+
res = await tracker_ref.query_blocks(virtual_engine, [(789, 5)])
|
|
133
|
+
assert len(res) == 0
|
|
134
|
+
res = await tracker_ref.query_blocks(virtual_engine, [(123, 6)])
|
|
135
|
+
assert len(res) == 1
|
|
136
|
+
assert new_rank in res
|
|
137
|
+
|
|
138
|
+
# check internal data
|
|
139
|
+
rank_to_hash_and_block_id = await tracker_ref.get_rank_to_hash_and_block_id()
|
|
140
|
+
assert rank in rank_to_hash_and_block_id[virtual_engine]
|
|
141
|
+
assert new_rank in rank_to_hash_and_block_id[virtual_engine]
|
|
142
|
+
|
|
143
|
+
# test register rank
|
|
144
|
+
await tracker_ref.register_rank(0)
|
|
145
|
+
rank_to_hash_and_block_id = await tracker_ref.get_rank_to_hash_and_block_id()
|
|
146
|
+
assert rank not in rank_to_hash_and_block_id[virtual_engine]
|
|
147
|
+
assert new_rank in rank_to_hash_and_block_id[virtual_engine]
|
|
@@ -0,0 +1,319 @@
|
|
|
1
|
+
# Copyright 2022-2025 XProbe Inc.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
import asyncio
|
|
15
|
+
import logging
|
|
16
|
+
from functools import lru_cache
|
|
17
|
+
from queue import Queue
|
|
18
|
+
from typing import Dict, List, Optional, no_type_check
|
|
19
|
+
|
|
20
|
+
import torch
|
|
21
|
+
import xoscar as xo
|
|
22
|
+
from vllm.core.scheduler import Scheduler
|
|
23
|
+
from vllm.utils import TORCH_DTYPE_TO_NUMPY_DTYPE, Device
|
|
24
|
+
from vllm.worker.cache_engine import CacheEngine
|
|
25
|
+
|
|
26
|
+
from .collective import CollectiveRank
|
|
27
|
+
|
|
28
|
+
logger = logging.getLogger(__name__)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class BufferTransferMixin:
|
|
32
|
+
def __init__(self):
|
|
33
|
+
self.num_buffer: int = 0
|
|
34
|
+
self.buffers: List[torch.Tensor] = []
|
|
35
|
+
self.buffer_queue: Optional[Queue] = None
|
|
36
|
+
self.transfer_block_num = 0
|
|
37
|
+
self.num_attn_layers = 0
|
|
38
|
+
|
|
39
|
+
def init_buffer(
|
|
40
|
+
self, num_buffer: int, buffer_shape, buffer_dtype, buffer_device, pin_memory
|
|
41
|
+
):
|
|
42
|
+
# (transfer_block_num, num_attn_layers, 2, *kv_cache_shape[2:])
|
|
43
|
+
|
|
44
|
+
if buffer_dtype is torch.bfloat16:
|
|
45
|
+
buffer_dtype = torch.float16
|
|
46
|
+
|
|
47
|
+
self.num_buffer = num_buffer
|
|
48
|
+
self.transfer_block_num = buffer_shape[0]
|
|
49
|
+
self.num_attn_layers = buffer_shape[1]
|
|
50
|
+
|
|
51
|
+
self.buffers = [
|
|
52
|
+
torch.zeros(
|
|
53
|
+
size=buffer_shape,
|
|
54
|
+
dtype=buffer_dtype,
|
|
55
|
+
device=buffer_device,
|
|
56
|
+
pin_memory=pin_memory,
|
|
57
|
+
)
|
|
58
|
+
for _ in range(self.num_buffer)
|
|
59
|
+
]
|
|
60
|
+
|
|
61
|
+
self.buffer_queue = Queue()
|
|
62
|
+
for i in range(self.num_buffer):
|
|
63
|
+
self.buffer_queue.put_nowait(i)
|
|
64
|
+
logger.debug(
|
|
65
|
+
f"Init buffer done. "
|
|
66
|
+
f"transfer_block_num: {self.transfer_block_num}, "
|
|
67
|
+
f"num_buffer: {self.num_buffer}, "
|
|
68
|
+
f"buffer_dtype: {buffer_dtype}, "
|
|
69
|
+
f"buffer_shape: {buffer_shape}"
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
@no_type_check
|
|
73
|
+
def get_buffer_index(self) -> int:
|
|
74
|
+
return self.buffer_queue.get()
|
|
75
|
+
|
|
76
|
+
@no_type_check
|
|
77
|
+
def free_buffer_index(self, index: int) -> None:
|
|
78
|
+
self.buffer_queue.put_nowait(index)
|
|
79
|
+
|
|
80
|
+
def get_swap_buffer(self, index: int, num_blocks: int) -> torch.Tensor:
|
|
81
|
+
buf = self.buffers[index]
|
|
82
|
+
buffer = buf[:num_blocks].view(
|
|
83
|
+
self.num_attn_layers, 2, num_blocks, *buf.shape[3:]
|
|
84
|
+
)
|
|
85
|
+
return buffer
|
|
86
|
+
|
|
87
|
+
@lru_cache(maxsize=None)
|
|
88
|
+
def get_gloo_dtype(self, input_dtype: torch.dtype):
|
|
89
|
+
from xoscar.collective.common import TypeMappingGloo
|
|
90
|
+
|
|
91
|
+
return TypeMappingGloo[TORCH_DTYPE_TO_NUMPY_DTYPE[input_dtype]]
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
class TransferActor(xo.StatelessActor, BufferTransferMixin, CollectiveRank):
|
|
95
|
+
@classmethod
|
|
96
|
+
def default_uid(cls):
|
|
97
|
+
return f"vllm-transfer-actor"
|
|
98
|
+
|
|
99
|
+
def __init__(
|
|
100
|
+
self,
|
|
101
|
+
rank: int,
|
|
102
|
+
world_size: int,
|
|
103
|
+
rank_address: str,
|
|
104
|
+
store_address: str,
|
|
105
|
+
store_port: int,
|
|
106
|
+
world_addresses: List[str],
|
|
107
|
+
):
|
|
108
|
+
super().__init__()
|
|
109
|
+
CollectiveRank.__init__(
|
|
110
|
+
self,
|
|
111
|
+
rank,
|
|
112
|
+
world_size,
|
|
113
|
+
rank_address,
|
|
114
|
+
store_address,
|
|
115
|
+
store_port,
|
|
116
|
+
world_addresses,
|
|
117
|
+
)
|
|
118
|
+
self._cache_engine: Optional[List[CacheEngine]] = None
|
|
119
|
+
self._scheduler: Optional[List[Scheduler]] = None
|
|
120
|
+
self._swap_stream = torch.cuda.Stream()
|
|
121
|
+
|
|
122
|
+
async def __post_create__(self):
|
|
123
|
+
self.init_rank()
|
|
124
|
+
|
|
125
|
+
def setup(
|
|
126
|
+
self,
|
|
127
|
+
cache_engine: List[CacheEngine],
|
|
128
|
+
scheduler: List[Scheduler],
|
|
129
|
+
num_buffer: int,
|
|
130
|
+
buffer_shape,
|
|
131
|
+
buffer_dtype,
|
|
132
|
+
buffer_device,
|
|
133
|
+
pin_memory: bool,
|
|
134
|
+
):
|
|
135
|
+
self._cache_engine = cache_engine
|
|
136
|
+
self._scheduler = scheduler
|
|
137
|
+
self.init_buffer(
|
|
138
|
+
num_buffer, buffer_shape, buffer_dtype, buffer_device, pin_memory
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
async def __pre_destroy__(self):
|
|
142
|
+
self._context.closeConnections()
|
|
143
|
+
|
|
144
|
+
def _get_cache_engine(self, virtual_engine: int) -> CacheEngine:
|
|
145
|
+
return self._cache_engine[virtual_engine] # type: ignore
|
|
146
|
+
|
|
147
|
+
@staticmethod
|
|
148
|
+
def _get_swap_block_ids(src_to_dst: Dict[int, int], is_sender: bool) -> List[int]:
|
|
149
|
+
return list(sorted([r if is_sender else l for r, l in src_to_dst.items()]))
|
|
150
|
+
|
|
151
|
+
def _swap_out_to_buffer(
|
|
152
|
+
self, cache_engine: CacheEngine, cpu_buf_index: int, block_ids: List[int]
|
|
153
|
+
) -> torch.Tensor:
|
|
154
|
+
num_blocks = len(block_ids)
|
|
155
|
+
src_to_dst = torch.tensor(
|
|
156
|
+
[(block_num, idx) for idx, block_num in enumerate(block_ids)],
|
|
157
|
+
device="cpu",
|
|
158
|
+
dtype=torch.int64,
|
|
159
|
+
).view(-1, 2)
|
|
160
|
+
cpu_buf = self.get_swap_buffer(cpu_buf_index, num_blocks)
|
|
161
|
+
with torch.cuda.stream(self._swap_stream):
|
|
162
|
+
for i in range(self.num_attn_layers):
|
|
163
|
+
cache_engine.attn_backend.swap_blocks(
|
|
164
|
+
cache_engine.gpu_cache[i], cpu_buf[i], src_to_dst
|
|
165
|
+
)
|
|
166
|
+
torch.cuda.Stream.synchronize(self._swap_stream)
|
|
167
|
+
return cpu_buf
|
|
168
|
+
|
|
169
|
+
def _swap_in_from_buffer(
|
|
170
|
+
self, cache_engine: CacheEngine, cpu_buf: torch.Tensor, block_ids: List[int]
|
|
171
|
+
) -> None:
|
|
172
|
+
src_to_dst = torch.tensor(
|
|
173
|
+
[(idx, block_num) for idx, block_num in enumerate(block_ids)],
|
|
174
|
+
device="cpu",
|
|
175
|
+
dtype=torch.int64,
|
|
176
|
+
).view(-1, 2)
|
|
177
|
+
with torch.cuda.stream(self._swap_stream):
|
|
178
|
+
for i in range(self.num_attn_layers):
|
|
179
|
+
cache_engine.attn_backend.swap_blocks(
|
|
180
|
+
cpu_buf[i], cache_engine.gpu_cache[i], src_to_dst
|
|
181
|
+
)
|
|
182
|
+
torch.cuda.Stream.synchronize(self._swap_stream)
|
|
183
|
+
|
|
184
|
+
def _incr_count_for_block_id(self, virtual_engine: int, block_ids: List[int]):
|
|
185
|
+
"""
|
|
186
|
+
The reference count of the `block_id` involved in the transfer is incremented by 1
|
|
187
|
+
to ensure it is not reclaimed.
|
|
188
|
+
"""
|
|
189
|
+
scheduler = self._scheduler[virtual_engine] # type: ignore
|
|
190
|
+
gpu_allocator = scheduler.block_manager.block_allocator._allocators[Device.GPU]
|
|
191
|
+
|
|
192
|
+
for _id in block_ids:
|
|
193
|
+
gpu_allocator._refcounter.incr(_id)
|
|
194
|
+
|
|
195
|
+
def _decr_count_for_block_id(self, virtual_engine: int, block_ids: List[int]):
|
|
196
|
+
"""
|
|
197
|
+
After the transfer, the reference count is decremented by 1.
|
|
198
|
+
"""
|
|
199
|
+
scheduler = self._scheduler[virtual_engine] # type: ignore
|
|
200
|
+
gpu_allocator = scheduler.block_manager.block_allocator._allocators[Device.GPU]
|
|
201
|
+
|
|
202
|
+
for _id in block_ids:
|
|
203
|
+
gpu_allocator._refcounter.decr(_id)
|
|
204
|
+
|
|
205
|
+
async def do_send(
|
|
206
|
+
self, virtual_engine: int, to_rank: int, src_to_dst: Dict[int, int]
|
|
207
|
+
):
|
|
208
|
+
"""
|
|
209
|
+
Sending logic: GPU -> Buffer -> Gloo send.
|
|
210
|
+
GPU -> Buffer is directly handled using the internal `swap_out` interface of vllm.
|
|
211
|
+
"""
|
|
212
|
+
from xoscar.collective import xoscar_pygloo as xp
|
|
213
|
+
|
|
214
|
+
cache_engine = self._get_cache_engine(virtual_engine)
|
|
215
|
+
|
|
216
|
+
block_ids = self._get_swap_block_ids(src_to_dst, is_sender=True)
|
|
217
|
+
self._incr_count_for_block_id(virtual_engine, block_ids)
|
|
218
|
+
cpu_buf_index = self.get_buffer_index()
|
|
219
|
+
total_blocks: int = len(block_ids)
|
|
220
|
+
|
|
221
|
+
try:
|
|
222
|
+
for start_idx in range(0, total_blocks, self.transfer_block_num):
|
|
223
|
+
offset = min(self.transfer_block_num, total_blocks - start_idx)
|
|
224
|
+
send_block_ids = block_ids[start_idx : start_idx + offset]
|
|
225
|
+
sendbuf = self._swap_out_to_buffer(
|
|
226
|
+
cache_engine, cpu_buf_index, send_block_ids
|
|
227
|
+
)
|
|
228
|
+
assert sendbuf.is_contiguous()
|
|
229
|
+
sendptr = sendbuf.numpy().ctypes.data
|
|
230
|
+
data_size = sendbuf.numel()
|
|
231
|
+
datatype = self.get_gloo_dtype(sendbuf.dtype)
|
|
232
|
+
peer = to_rank
|
|
233
|
+
xp.send(self._context, sendptr, data_size, datatype, peer)
|
|
234
|
+
finally:
|
|
235
|
+
self._decr_count_for_block_id(virtual_engine, block_ids)
|
|
236
|
+
self.free_buffer_index(cpu_buf_index)
|
|
237
|
+
|
|
238
|
+
async def do_recv(
|
|
239
|
+
self, virtual_engine: int, from_rank: int, src_to_dst: Dict[int, int]
|
|
240
|
+
):
|
|
241
|
+
"""
|
|
242
|
+
Receiving logic: Gloo recv -> Buffer -> GPU.
|
|
243
|
+
Buffer -> GPU is directly handled using the internal `swap_in` interface of vllm.
|
|
244
|
+
"""
|
|
245
|
+
from xoscar.collective import xoscar_pygloo as xp
|
|
246
|
+
|
|
247
|
+
cache_engine = self._get_cache_engine(virtual_engine)
|
|
248
|
+
|
|
249
|
+
block_ids = self._get_swap_block_ids(src_to_dst, is_sender=False)
|
|
250
|
+
self._incr_count_for_block_id(virtual_engine, block_ids)
|
|
251
|
+
total_blocks = len(block_ids)
|
|
252
|
+
cpu_buf_index = self.get_buffer_index()
|
|
253
|
+
|
|
254
|
+
try:
|
|
255
|
+
for start_idx in range(0, total_blocks, self.transfer_block_num):
|
|
256
|
+
offset = min(self.transfer_block_num, total_blocks - start_idx)
|
|
257
|
+
recv_block_ids = block_ids[start_idx : start_idx + offset]
|
|
258
|
+
recvbuf = self.get_swap_buffer(cpu_buf_index, len(recv_block_ids))
|
|
259
|
+
assert recvbuf.is_contiguous()
|
|
260
|
+
recvptr = recvbuf.numpy().ctypes.data
|
|
261
|
+
data_size = recvbuf.numel()
|
|
262
|
+
datatype = self.get_gloo_dtype(recvbuf.dtype)
|
|
263
|
+
peer = from_rank
|
|
264
|
+
xp.recv(self._context, recvptr, data_size, datatype, peer)
|
|
265
|
+
|
|
266
|
+
self._swap_in_from_buffer(cache_engine, recvbuf, recv_block_ids)
|
|
267
|
+
finally:
|
|
268
|
+
self._decr_count_for_block_id(virtual_engine, block_ids)
|
|
269
|
+
self.free_buffer_index(cpu_buf_index)
|
|
270
|
+
|
|
271
|
+
async def recv(
|
|
272
|
+
self, virtual_engine: int, from_rank: int, src_to_dst: Dict[int, int]
|
|
273
|
+
):
|
|
274
|
+
"""
|
|
275
|
+
This is the external entry point for the call.
|
|
276
|
+
The transfer logic is as follows:
|
|
277
|
+
the receiver requests the sender to send the data directly to itself in a point-to-point manner.
|
|
278
|
+
"""
|
|
279
|
+
from_address = self._world_addresses[from_rank]
|
|
280
|
+
sender_ref = await xo.actor_ref(
|
|
281
|
+
address=from_address, uid=f"{TransferActor.default_uid()}-{from_rank}"
|
|
282
|
+
)
|
|
283
|
+
await asyncio.gather(
|
|
284
|
+
sender_ref.do_send(virtual_engine, self._rank, src_to_dst),
|
|
285
|
+
self.do_recv(virtual_engine, from_rank, src_to_dst),
|
|
286
|
+
)
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
class Rank0TransferActor(xo.StatelessActor, CollectiveRank):
|
|
290
|
+
"""
|
|
291
|
+
The Rank 0 transfer actor is only used for constructing the collective communication world,
|
|
292
|
+
so it only needs to inherit the `CollectiveWorld` class.
|
|
293
|
+
"""
|
|
294
|
+
|
|
295
|
+
@classmethod
|
|
296
|
+
def default_uid(cls):
|
|
297
|
+
return f"vllm-transfer-actor"
|
|
298
|
+
|
|
299
|
+
def __init__(
|
|
300
|
+
self,
|
|
301
|
+
rank: int,
|
|
302
|
+
world_size: int,
|
|
303
|
+
rank_address: str,
|
|
304
|
+
store_address: str,
|
|
305
|
+
store_port: int,
|
|
306
|
+
world_addresses: List[str],
|
|
307
|
+
):
|
|
308
|
+
CollectiveRank.__init__(
|
|
309
|
+
self,
|
|
310
|
+
rank,
|
|
311
|
+
world_size,
|
|
312
|
+
rank_address,
|
|
313
|
+
store_address,
|
|
314
|
+
store_port,
|
|
315
|
+
world_addresses,
|
|
316
|
+
)
|
|
317
|
+
|
|
318
|
+
async def __post_create__(self):
|
|
319
|
+
self.init_rank()
|
xinference/model/rerank/core.py
CHANGED
|
@@ -179,6 +179,7 @@ class RerankModel:
|
|
|
179
179
|
return rerank_type
|
|
180
180
|
|
|
181
181
|
def load(self):
|
|
182
|
+
logger.info("Loading rerank model: %s", self._model_path)
|
|
182
183
|
flash_attn_installed = importlib.util.find_spec("flash_attn") is not None
|
|
183
184
|
if (
|
|
184
185
|
self._auto_detect_type(self._model_path) != "normal"
|
|
@@ -189,6 +190,7 @@ class RerankModel:
|
|
|
189
190
|
"will force set `use_fp16` to True"
|
|
190
191
|
)
|
|
191
192
|
self._use_fp16 = True
|
|
193
|
+
|
|
192
194
|
if self._model_spec.type == "normal":
|
|
193
195
|
try:
|
|
194
196
|
import sentence_transformers
|
|
@@ -250,22 +252,27 @@ class RerankModel:
|
|
|
250
252
|
**kwargs,
|
|
251
253
|
) -> Rerank:
|
|
252
254
|
assert self._model is not None
|
|
253
|
-
if kwargs:
|
|
254
|
-
raise ValueError("rerank hasn't support extra parameter.")
|
|
255
255
|
if max_chunks_per_doc is not None:
|
|
256
256
|
raise ValueError("rerank hasn't support `max_chunks_per_doc` parameter.")
|
|
257
|
+
logger.info("Rerank with kwargs: %s, model: %s", kwargs, self._model)
|
|
257
258
|
sentence_combinations = [[query, doc] for doc in documents]
|
|
258
259
|
# reset n tokens
|
|
259
260
|
self._model.model.n_tokens = 0
|
|
260
261
|
if self._model_spec.type == "normal":
|
|
261
262
|
similarity_scores = self._model.predict(
|
|
262
|
-
sentence_combinations,
|
|
263
|
+
sentence_combinations,
|
|
264
|
+
convert_to_numpy=False,
|
|
265
|
+
convert_to_tensor=True,
|
|
266
|
+
**kwargs,
|
|
263
267
|
).cpu()
|
|
264
268
|
if similarity_scores.dtype == torch.bfloat16:
|
|
265
269
|
similarity_scores = similarity_scores.float()
|
|
266
270
|
else:
|
|
267
271
|
# Related issue: https://github.com/xorbitsai/inference/issues/1775
|
|
268
|
-
similarity_scores = self._model.compute_score(
|
|
272
|
+
similarity_scores = self._model.compute_score(
|
|
273
|
+
sentence_combinations, **kwargs
|
|
274
|
+
)
|
|
275
|
+
|
|
269
276
|
if not isinstance(similarity_scores, Sequence):
|
|
270
277
|
similarity_scores = [similarity_scores]
|
|
271
278
|
elif (
|
|
@@ -91,6 +91,20 @@ class DiffUsersVideoModel:
|
|
|
91
91
|
pipeline = self._model = CogVideoXPipeline.from_pretrained(
|
|
92
92
|
self._model_path, **kwargs
|
|
93
93
|
)
|
|
94
|
+
elif self._model_spec.model_family == "HunyuanVideo":
|
|
95
|
+
from diffusers import HunyuanVideoPipeline, HunyuanVideoTransformer3DModel
|
|
96
|
+
|
|
97
|
+
transformer_torch_dtype = kwargs.pop("transformer_torch_dtype")
|
|
98
|
+
if isinstance(transformer_torch_dtype, str):
|
|
99
|
+
transformer_torch_dtype = getattr(torch, transformer_torch_dtype)
|
|
100
|
+
transformer = HunyuanVideoTransformer3DModel.from_pretrained(
|
|
101
|
+
self._model_path,
|
|
102
|
+
subfolder="transformer",
|
|
103
|
+
torch_dtype=transformer_torch_dtype,
|
|
104
|
+
)
|
|
105
|
+
pipeline = self._model = HunyuanVideoPipeline.from_pretrained(
|
|
106
|
+
self._model_path, transformer=transformer, **kwargs
|
|
107
|
+
)
|
|
94
108
|
else:
|
|
95
109
|
raise Exception(
|
|
96
110
|
f"Unsupported model family: {self._model_spec.model_family}"
|
|
@@ -30,5 +30,20 @@
|
|
|
30
30
|
"default_generate_config": {
|
|
31
31
|
"guidance_scale": 7
|
|
32
32
|
}
|
|
33
|
+
},
|
|
34
|
+
{
|
|
35
|
+
"model_name": "HunyuanVideo",
|
|
36
|
+
"model_family": "HunyuanVideo",
|
|
37
|
+
"model_id": "hunyuanvideo-community/HunyuanVideo",
|
|
38
|
+
"model_revision": "e8c2aaa66fe3742a32c11a6766aecbf07c56e773",
|
|
39
|
+
"model_ability": [
|
|
40
|
+
"text2video"
|
|
41
|
+
],
|
|
42
|
+
"default_model_config": {
|
|
43
|
+
"transformer_torch_dtype": "bfloat16",
|
|
44
|
+
"torch_dtype": "float16"
|
|
45
|
+
},
|
|
46
|
+
"default_generate_config": {
|
|
47
|
+
}
|
|
33
48
|
}
|
|
34
49
|
]
|
|
@@ -32,5 +32,21 @@
|
|
|
32
32
|
"default_generate_config": {
|
|
33
33
|
"guidance_scale": 7
|
|
34
34
|
}
|
|
35
|
+
},
|
|
36
|
+
{
|
|
37
|
+
"model_name": "HunyuanVideo",
|
|
38
|
+
"model_family": "HunyuanVideo",
|
|
39
|
+
"model_hub": "modelscope",
|
|
40
|
+
"model_id": "Xorbits/HunyuanVideo",
|
|
41
|
+
"model_revision": "master",
|
|
42
|
+
"model_ability": [
|
|
43
|
+
"text2video"
|
|
44
|
+
],
|
|
45
|
+
"default_model_config": {
|
|
46
|
+
"transformer_torch_dtype": "bfloat16",
|
|
47
|
+
"torch_dtype": "float16"
|
|
48
|
+
},
|
|
49
|
+
"default_generate_config": {
|
|
50
|
+
}
|
|
35
51
|
}
|
|
36
52
|
]
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
# Copyright (c) 2020 Mobvoi Inc (Di Wu)
|
|
2
|
+
# Copyright (c) 2024 Alibaba Inc (authors: Xiang Lyu)
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
import os
|
|
17
|
+
import argparse
|
|
18
|
+
import glob
|
|
19
|
+
|
|
20
|
+
import yaml
|
|
21
|
+
import torch
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def get_args():
|
|
25
|
+
parser = argparse.ArgumentParser(description='average model')
|
|
26
|
+
parser.add_argument('--dst_model', required=True, help='averaged model')
|
|
27
|
+
parser.add_argument('--src_path',
|
|
28
|
+
required=True,
|
|
29
|
+
help='src model path for average')
|
|
30
|
+
parser.add_argument('--val_best',
|
|
31
|
+
action="store_true",
|
|
32
|
+
help='averaged model')
|
|
33
|
+
parser.add_argument('--num',
|
|
34
|
+
default=5,
|
|
35
|
+
type=int,
|
|
36
|
+
help='nums for averaged model')
|
|
37
|
+
|
|
38
|
+
args = parser.parse_args()
|
|
39
|
+
print(args)
|
|
40
|
+
return args
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def main():
|
|
44
|
+
args = get_args()
|
|
45
|
+
val_scores = []
|
|
46
|
+
if args.val_best:
|
|
47
|
+
yamls = glob.glob('{}/*.yaml'.format(args.src_path))
|
|
48
|
+
yamls = [
|
|
49
|
+
f for f in yamls
|
|
50
|
+
if not (os.path.basename(f).startswith('train')
|
|
51
|
+
or os.path.basename(f).startswith('init'))
|
|
52
|
+
]
|
|
53
|
+
for y in yamls:
|
|
54
|
+
with open(y, 'r') as f:
|
|
55
|
+
dic_yaml = yaml.load(f, Loader=yaml.BaseLoader)
|
|
56
|
+
loss = float(dic_yaml['loss_dict']['loss'])
|
|
57
|
+
epoch = int(dic_yaml['epoch'])
|
|
58
|
+
step = int(dic_yaml['step'])
|
|
59
|
+
tag = dic_yaml['tag']
|
|
60
|
+
val_scores += [[epoch, step, loss, tag]]
|
|
61
|
+
sorted_val_scores = sorted(val_scores,
|
|
62
|
+
key=lambda x: x[2],
|
|
63
|
+
reverse=False)
|
|
64
|
+
print("best val (epoch, step, loss, tag) = " +
|
|
65
|
+
str(sorted_val_scores[:args.num]))
|
|
66
|
+
path_list = [
|
|
67
|
+
args.src_path + '/epoch_{}_whole.pt'.format(score[0])
|
|
68
|
+
for score in sorted_val_scores[:args.num]
|
|
69
|
+
]
|
|
70
|
+
print(path_list)
|
|
71
|
+
avg = {}
|
|
72
|
+
num = args.num
|
|
73
|
+
assert num == len(path_list)
|
|
74
|
+
for path in path_list:
|
|
75
|
+
print('Processing {}'.format(path))
|
|
76
|
+
states = torch.load(path, map_location=torch.device('cpu'))
|
|
77
|
+
for k in states.keys():
|
|
78
|
+
if k not in avg.keys():
|
|
79
|
+
avg[k] = states[k].clone()
|
|
80
|
+
else:
|
|
81
|
+
avg[k] += states[k]
|
|
82
|
+
# average
|
|
83
|
+
for k in avg.keys():
|
|
84
|
+
if avg[k] is not None:
|
|
85
|
+
# pytorch 1.6 use true_divide instead of /=
|
|
86
|
+
avg[k] = torch.true_divide(avg[k], num)
|
|
87
|
+
print('Saving to {}'.format(args.dst_model))
|
|
88
|
+
torch.save(avg, args.dst_model)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
if __name__ == '__main__':
|
|
92
|
+
main()
|