xinference 0.16.3__py3-none-any.whl → 1.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_compat.py +24 -2
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +219 -77
- xinference/client/restful/restful_client.py +47 -2
- xinference/constants.py +1 -0
- xinference/core/chat_interface.py +6 -1
- xinference/core/model.py +124 -34
- xinference/core/supervisor.py +180 -12
- xinference/core/utils.py +73 -4
- xinference/core/worker.py +102 -4
- xinference/deploy/cmdline.py +3 -1
- xinference/deploy/test/test_cmdline.py +56 -0
- xinference/isolation.py +24 -0
- xinference/model/audio/__init__.py +12 -0
- xinference/model/audio/core.py +37 -4
- xinference/model/audio/cosyvoice.py +39 -6
- xinference/model/audio/f5tts.py +200 -0
- xinference/model/audio/f5tts_mlx.py +260 -0
- xinference/model/audio/fish_speech.py +70 -110
- xinference/model/audio/melotts.py +110 -0
- xinference/model/audio/model_spec.json +179 -3
- xinference/model/audio/model_spec_modelscope.json +27 -0
- xinference/model/audio/utils.py +32 -0
- xinference/model/audio/whisper.py +35 -10
- xinference/model/audio/whisper_mlx.py +208 -0
- xinference/model/embedding/core.py +322 -6
- xinference/model/embedding/model_spec.json +8 -1
- xinference/model/embedding/model_spec_modelscope.json +9 -1
- xinference/model/image/core.py +69 -1
- xinference/model/image/model_spec.json +145 -4
- xinference/model/image/model_spec_modelscope.json +150 -4
- xinference/model/image/stable_diffusion/core.py +50 -15
- xinference/model/llm/__init__.py +6 -2
- xinference/model/llm/llm_family.json +1055 -93
- xinference/model/llm/llm_family.py +15 -36
- xinference/model/llm/llm_family_modelscope.json +1031 -78
- xinference/model/llm/memory.py +1 -1
- xinference/model/llm/mlx/core.py +285 -47
- xinference/model/llm/sglang/core.py +2 -0
- xinference/model/llm/transformers/chatglm.py +9 -5
- xinference/model/llm/transformers/cogagent.py +272 -0
- xinference/model/llm/transformers/core.py +3 -0
- xinference/model/llm/transformers/glm_edge_v.py +230 -0
- xinference/model/llm/transformers/qwen2_vl.py +12 -1
- xinference/model/llm/transformers/utils.py +16 -8
- xinference/model/llm/utils.py +55 -4
- xinference/model/llm/vllm/core.py +137 -12
- xinference/model/llm/vllm/xavier/__init__.py +13 -0
- xinference/model/llm/vllm/xavier/allocator.py +74 -0
- xinference/model/llm/vllm/xavier/block.py +111 -0
- xinference/model/llm/vllm/xavier/block_manager.py +71 -0
- xinference/model/llm/vllm/xavier/block_tracker.py +129 -0
- xinference/model/llm/vllm/xavier/collective.py +74 -0
- xinference/model/llm/vllm/xavier/collective_manager.py +147 -0
- xinference/model/llm/vllm/xavier/engine.py +247 -0
- xinference/model/llm/vllm/xavier/executor.py +134 -0
- xinference/model/llm/vllm/xavier/scheduler.py +438 -0
- xinference/model/llm/vllm/xavier/test/__init__.py +13 -0
- xinference/model/llm/vllm/xavier/test/test_xavier.py +147 -0
- xinference/model/llm/vllm/xavier/transfer.py +319 -0
- xinference/model/rerank/core.py +11 -4
- xinference/model/video/diffusers.py +14 -0
- xinference/model/video/model_spec.json +15 -0
- xinference/model/video/model_spec_modelscope.json +16 -0
- xinference/thirdparty/cosyvoice/bin/average_model.py +92 -0
- xinference/thirdparty/cosyvoice/bin/export_jit.py +12 -2
- xinference/thirdparty/cosyvoice/bin/export_onnx.py +112 -0
- xinference/thirdparty/cosyvoice/bin/export_trt.sh +9 -0
- xinference/thirdparty/cosyvoice/bin/inference.py +5 -7
- xinference/thirdparty/cosyvoice/bin/spk2info.pt +0 -0
- xinference/thirdparty/cosyvoice/bin/train.py +42 -8
- xinference/thirdparty/cosyvoice/cli/cosyvoice.py +96 -25
- xinference/thirdparty/cosyvoice/cli/frontend.py +77 -30
- xinference/thirdparty/cosyvoice/cli/model.py +330 -80
- xinference/thirdparty/cosyvoice/dataset/dataset.py +6 -2
- xinference/thirdparty/cosyvoice/dataset/processor.py +76 -14
- xinference/thirdparty/cosyvoice/flow/decoder.py +92 -13
- xinference/thirdparty/cosyvoice/flow/flow.py +99 -9
- xinference/thirdparty/cosyvoice/flow/flow_matching.py +110 -13
- xinference/thirdparty/cosyvoice/flow/length_regulator.py +5 -4
- xinference/thirdparty/cosyvoice/hifigan/discriminator.py +140 -0
- xinference/thirdparty/cosyvoice/hifigan/generator.py +58 -42
- xinference/thirdparty/cosyvoice/hifigan/hifigan.py +67 -0
- xinference/thirdparty/cosyvoice/llm/llm.py +139 -6
- xinference/thirdparty/cosyvoice/tokenizer/assets/multilingual_zh_ja_yue_char_del.tiktoken +58836 -0
- xinference/thirdparty/cosyvoice/tokenizer/tokenizer.py +279 -0
- xinference/thirdparty/cosyvoice/transformer/embedding.py +2 -2
- xinference/thirdparty/cosyvoice/transformer/encoder_layer.py +7 -7
- xinference/thirdparty/cosyvoice/transformer/upsample_encoder.py +318 -0
- xinference/thirdparty/cosyvoice/utils/common.py +28 -1
- xinference/thirdparty/cosyvoice/utils/executor.py +69 -7
- xinference/thirdparty/cosyvoice/utils/file_utils.py +2 -12
- xinference/thirdparty/cosyvoice/utils/frontend_utils.py +9 -5
- xinference/thirdparty/cosyvoice/utils/losses.py +20 -0
- xinference/thirdparty/cosyvoice/utils/scheduler.py +1 -2
- xinference/thirdparty/cosyvoice/utils/train_utils.py +101 -45
- xinference/thirdparty/f5_tts/api.py +166 -0
- xinference/thirdparty/f5_tts/configs/E2TTS_Base_train.yaml +44 -0
- xinference/thirdparty/f5_tts/configs/E2TTS_Small_train.yaml +44 -0
- xinference/thirdparty/f5_tts/configs/F5TTS_Base_train.yaml +46 -0
- xinference/thirdparty/f5_tts/configs/F5TTS_Small_train.yaml +46 -0
- xinference/thirdparty/f5_tts/eval/README.md +49 -0
- xinference/thirdparty/f5_tts/eval/ecapa_tdnn.py +330 -0
- xinference/thirdparty/f5_tts/eval/eval_infer_batch.py +207 -0
- xinference/thirdparty/f5_tts/eval/eval_infer_batch.sh +13 -0
- xinference/thirdparty/f5_tts/eval/eval_librispeech_test_clean.py +84 -0
- xinference/thirdparty/f5_tts/eval/eval_seedtts_testset.py +84 -0
- xinference/thirdparty/f5_tts/eval/utils_eval.py +405 -0
- xinference/thirdparty/f5_tts/infer/README.md +191 -0
- xinference/thirdparty/f5_tts/infer/SHARED.md +74 -0
- xinference/thirdparty/f5_tts/infer/examples/basic/basic.toml +11 -0
- xinference/thirdparty/f5_tts/infer/examples/basic/basic_ref_en.wav +0 -0
- xinference/thirdparty/f5_tts/infer/examples/basic/basic_ref_zh.wav +0 -0
- xinference/thirdparty/f5_tts/infer/examples/multi/country.flac +0 -0
- xinference/thirdparty/f5_tts/infer/examples/multi/main.flac +0 -0
- xinference/thirdparty/f5_tts/infer/examples/multi/story.toml +19 -0
- xinference/thirdparty/f5_tts/infer/examples/multi/story.txt +1 -0
- xinference/thirdparty/f5_tts/infer/examples/multi/town.flac +0 -0
- xinference/thirdparty/f5_tts/infer/examples/vocab.txt +2545 -0
- xinference/thirdparty/f5_tts/infer/infer_cli.py +226 -0
- xinference/thirdparty/f5_tts/infer/infer_gradio.py +851 -0
- xinference/thirdparty/f5_tts/infer/speech_edit.py +193 -0
- xinference/thirdparty/f5_tts/infer/utils_infer.py +538 -0
- xinference/thirdparty/f5_tts/model/__init__.py +10 -0
- xinference/thirdparty/f5_tts/model/backbones/README.md +20 -0
- xinference/thirdparty/f5_tts/model/backbones/dit.py +163 -0
- xinference/thirdparty/f5_tts/model/backbones/mmdit.py +146 -0
- xinference/thirdparty/f5_tts/model/backbones/unett.py +219 -0
- xinference/thirdparty/f5_tts/model/cfm.py +285 -0
- xinference/thirdparty/f5_tts/model/dataset.py +319 -0
- xinference/thirdparty/f5_tts/model/modules.py +658 -0
- xinference/thirdparty/f5_tts/model/trainer.py +366 -0
- xinference/thirdparty/f5_tts/model/utils.py +185 -0
- xinference/thirdparty/f5_tts/scripts/count_max_epoch.py +33 -0
- xinference/thirdparty/f5_tts/scripts/count_params_gflops.py +39 -0
- xinference/thirdparty/f5_tts/socket_server.py +159 -0
- xinference/thirdparty/f5_tts/train/README.md +77 -0
- xinference/thirdparty/f5_tts/train/datasets/prepare_csv_wavs.py +139 -0
- xinference/thirdparty/f5_tts/train/datasets/prepare_emilia.py +230 -0
- xinference/thirdparty/f5_tts/train/datasets/prepare_libritts.py +92 -0
- xinference/thirdparty/f5_tts/train/datasets/prepare_ljspeech.py +65 -0
- xinference/thirdparty/f5_tts/train/datasets/prepare_wenetspeech4tts.py +125 -0
- xinference/thirdparty/f5_tts/train/finetune_cli.py +174 -0
- xinference/thirdparty/f5_tts/train/finetune_gradio.py +1846 -0
- xinference/thirdparty/f5_tts/train/train.py +75 -0
- xinference/thirdparty/fish_speech/fish_speech/conversation.py +266 -1
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/en_US.json +2 -1
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/es_ES.json +2 -1
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/ja_JP.json +2 -2
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/ko_KR.json +123 -0
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/zh_CN.json +2 -1
- xinference/thirdparty/fish_speech/fish_speech/models/text2semantic/llama.py +137 -29
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/firefly.py +9 -9
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/fsq.py +1 -1
- xinference/thirdparty/fish_speech/fish_speech/text/clean.py +17 -11
- xinference/thirdparty/fish_speech/fish_speech/text/spliter.py +1 -1
- xinference/thirdparty/fish_speech/fish_speech/tokenizer.py +152 -0
- xinference/thirdparty/fish_speech/fish_speech/train.py +2 -2
- xinference/thirdparty/fish_speech/fish_speech/utils/__init__.py +2 -1
- xinference/thirdparty/fish_speech/fish_speech/utils/utils.py +22 -0
- xinference/thirdparty/fish_speech/fish_speech/webui/launch_utils.py +1 -1
- xinference/thirdparty/fish_speech/fish_speech/webui/manage.py +2 -2
- xinference/thirdparty/fish_speech/tools/{post_api.py → api_client.py} +34 -18
- xinference/thirdparty/fish_speech/tools/api_server.py +98 -0
- xinference/thirdparty/fish_speech/tools/download_models.py +5 -5
- xinference/thirdparty/fish_speech/tools/e2e_webui.py +232 -0
- xinference/thirdparty/fish_speech/tools/fish_e2e.py +298 -0
- xinference/thirdparty/fish_speech/tools/inference_engine/__init__.py +192 -0
- xinference/thirdparty/fish_speech/tools/inference_engine/reference_loader.py +125 -0
- xinference/thirdparty/fish_speech/tools/inference_engine/utils.py +39 -0
- xinference/thirdparty/fish_speech/tools/inference_engine/vq_manager.py +57 -0
- xinference/thirdparty/fish_speech/tools/llama/eval_in_context.py +2 -2
- xinference/thirdparty/fish_speech/tools/llama/generate.py +484 -72
- xinference/thirdparty/fish_speech/tools/run_webui.py +104 -0
- xinference/thirdparty/fish_speech/tools/schema.py +170 -0
- xinference/thirdparty/fish_speech/tools/server/agent/__init__.py +57 -0
- xinference/thirdparty/fish_speech/tools/server/agent/generate.py +119 -0
- xinference/thirdparty/fish_speech/tools/server/agent/generation_utils.py +122 -0
- xinference/thirdparty/fish_speech/tools/server/agent/pre_generation_utils.py +72 -0
- xinference/thirdparty/fish_speech/tools/server/api_utils.py +75 -0
- xinference/thirdparty/fish_speech/tools/server/exception_handler.py +27 -0
- xinference/thirdparty/fish_speech/tools/server/inference.py +45 -0
- xinference/thirdparty/fish_speech/tools/server/model_manager.py +122 -0
- xinference/thirdparty/fish_speech/tools/server/model_utils.py +129 -0
- xinference/thirdparty/fish_speech/tools/server/views.py +246 -0
- xinference/thirdparty/fish_speech/tools/vqgan/extract_vq.py +7 -1
- xinference/thirdparty/fish_speech/tools/vqgan/inference.py +2 -3
- xinference/thirdparty/fish_speech/tools/webui/__init__.py +173 -0
- xinference/thirdparty/fish_speech/tools/webui/inference.py +91 -0
- xinference/thirdparty/fish_speech/tools/webui/variables.py +14 -0
- xinference/thirdparty/matcha/utils/utils.py +2 -2
- xinference/thirdparty/melo/api.py +135 -0
- xinference/thirdparty/melo/app.py +61 -0
- xinference/thirdparty/melo/attentions.py +459 -0
- xinference/thirdparty/melo/commons.py +160 -0
- xinference/thirdparty/melo/configs/config.json +94 -0
- xinference/thirdparty/melo/data/example/metadata.list +20 -0
- xinference/thirdparty/melo/data_utils.py +413 -0
- xinference/thirdparty/melo/download_utils.py +67 -0
- xinference/thirdparty/melo/infer.py +25 -0
- xinference/thirdparty/melo/init_downloads.py +14 -0
- xinference/thirdparty/melo/losses.py +58 -0
- xinference/thirdparty/melo/main.py +36 -0
- xinference/thirdparty/melo/mel_processing.py +174 -0
- xinference/thirdparty/melo/models.py +1030 -0
- xinference/thirdparty/melo/modules.py +598 -0
- xinference/thirdparty/melo/monotonic_align/__init__.py +16 -0
- xinference/thirdparty/melo/monotonic_align/core.py +46 -0
- xinference/thirdparty/melo/preprocess_text.py +135 -0
- xinference/thirdparty/melo/split_utils.py +174 -0
- xinference/thirdparty/melo/text/__init__.py +35 -0
- xinference/thirdparty/melo/text/chinese.py +199 -0
- xinference/thirdparty/melo/text/chinese_bert.py +107 -0
- xinference/thirdparty/melo/text/chinese_mix.py +253 -0
- xinference/thirdparty/melo/text/cleaner.py +36 -0
- xinference/thirdparty/melo/text/cleaner_multiling.py +110 -0
- xinference/thirdparty/melo/text/cmudict.rep +129530 -0
- xinference/thirdparty/melo/text/cmudict_cache.pickle +0 -0
- xinference/thirdparty/melo/text/english.py +284 -0
- xinference/thirdparty/melo/text/english_bert.py +39 -0
- xinference/thirdparty/melo/text/english_utils/abbreviations.py +35 -0
- xinference/thirdparty/melo/text/english_utils/number_norm.py +97 -0
- xinference/thirdparty/melo/text/english_utils/time_norm.py +47 -0
- xinference/thirdparty/melo/text/es_phonemizer/base.py +140 -0
- xinference/thirdparty/melo/text/es_phonemizer/cleaner.py +109 -0
- xinference/thirdparty/melo/text/es_phonemizer/es_symbols.json +79 -0
- xinference/thirdparty/melo/text/es_phonemizer/es_symbols.txt +1 -0
- xinference/thirdparty/melo/text/es_phonemizer/es_symbols_v2.json +83 -0
- xinference/thirdparty/melo/text/es_phonemizer/es_to_ipa.py +12 -0
- xinference/thirdparty/melo/text/es_phonemizer/example_ipa.txt +400 -0
- xinference/thirdparty/melo/text/es_phonemizer/gruut_wrapper.py +253 -0
- xinference/thirdparty/melo/text/es_phonemizer/punctuation.py +174 -0
- xinference/thirdparty/melo/text/es_phonemizer/spanish_symbols.txt +1 -0
- xinference/thirdparty/melo/text/es_phonemizer/test.ipynb +124 -0
- xinference/thirdparty/melo/text/fr_phonemizer/base.py +140 -0
- xinference/thirdparty/melo/text/fr_phonemizer/cleaner.py +122 -0
- xinference/thirdparty/melo/text/fr_phonemizer/en_symbols.json +78 -0
- xinference/thirdparty/melo/text/fr_phonemizer/example_ipa.txt +1 -0
- xinference/thirdparty/melo/text/fr_phonemizer/fr_symbols.json +89 -0
- xinference/thirdparty/melo/text/fr_phonemizer/fr_to_ipa.py +30 -0
- xinference/thirdparty/melo/text/fr_phonemizer/french_abbreviations.py +48 -0
- xinference/thirdparty/melo/text/fr_phonemizer/french_symbols.txt +1 -0
- xinference/thirdparty/melo/text/fr_phonemizer/gruut_wrapper.py +258 -0
- xinference/thirdparty/melo/text/fr_phonemizer/punctuation.py +172 -0
- xinference/thirdparty/melo/text/french.py +94 -0
- xinference/thirdparty/melo/text/french_bert.py +39 -0
- xinference/thirdparty/melo/text/japanese.py +647 -0
- xinference/thirdparty/melo/text/japanese_bert.py +49 -0
- xinference/thirdparty/melo/text/ko_dictionary.py +44 -0
- xinference/thirdparty/melo/text/korean.py +192 -0
- xinference/thirdparty/melo/text/opencpop-strict.txt +429 -0
- xinference/thirdparty/melo/text/spanish.py +122 -0
- xinference/thirdparty/melo/text/spanish_bert.py +39 -0
- xinference/thirdparty/melo/text/symbols.py +290 -0
- xinference/thirdparty/melo/text/tone_sandhi.py +769 -0
- xinference/thirdparty/melo/train.py +635 -0
- xinference/thirdparty/melo/train.sh +19 -0
- xinference/thirdparty/melo/transforms.py +209 -0
- xinference/thirdparty/melo/utils.py +424 -0
- xinference/types.py +17 -1
- xinference/web/ui/build/asset-manifest.json +6 -6
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/css/main.51a587ff.css +2 -0
- xinference/web/ui/build/static/css/main.51a587ff.css.map +1 -0
- xinference/web/ui/build/static/js/main.b0936c54.js +3 -0
- xinference/web/ui/build/static/js/main.b0936c54.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/03c4052f1b91f6ba0c5389bdcf49c43319b4076c08e4b8585dab312538ae290a.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/1786b83003b8e9605a0f5f855a185d4d16e38fc893dfb326a2a9cca206b4240a.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/17cbc181dd674b9150b80c73ed6a82656de0082d857f6e5f66d9716129ac0b38.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/185ceb8872d562e032b47e79df6a45670e06345b8ed70aad1a131e0476783c5c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/26b8c9f34b0bed789b3a833767672e39302d1e0c09b4276f4d58d1df7b6bd93b.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/2b484da66c724d0d56a40849c109327408796a668b1381511b6e9e03baa48658.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/2cbbbce9b84df73330d4c42b82436ed881b3847628f2fbc346aa62e2859fd88c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/2ec9b14431ed33ce6901bf9f27007be4e6e472709c99d6e22b50ce528e4b78ee.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3b966db018f96be4a055d6ca205f0990d4d0b370e2980c17d8bca2c9a021819c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3eefb411b24c2b3ce053570ef50daccf154022f0e168be5ed0fec21394baf9f4.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/522b229e3cac219123f0d69673f5570e191c2d2a505dc65b312d336eae2279c0.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/52e45f17ba300580ea3fcc9f9228ccba194bb092b76f25e9255af311f8b05aab.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/5a0bc4631f936459afc1a3b1d3ec2420118b1f00e11f60ccac3e08088f3f27a8.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/611fa2c6c53b66039991d06dfb0473b5ab37fc63b4564e0f6e1718523768a045.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/6329bc76c406fe5eb305412383fbde5950f847bb5e43261f73f37622c365acb4.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/63c8e07687ea53a4f8a910ee5e42e0eb26cd1acbfbe820f3e3248a786ee51401.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/69b2d5001684174ec9da57e07914eed3eac4960018bceb6cbfa801d861301d7c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/710c1acda69e561e30a933b98c6a56d50197868b15c21e2aad55ab6d46649eb6.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/720deca1fce5a1dc5056048fa8258fd138a82ea855f350b6613f104a73fb761f.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/76a23b92d26a499c57e61eea2b895fbc9771bd0849a72e66f8e633192017978b.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/858063f23b34dfe600254eb5afd85518b0002ec4b30b7386616c45600826e3b2.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/920b82c1c89124cf217109eeedbfcd3aae3b917be50c9dfb6bbb4ce26bdfd2e7.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/94d8b7aeb0076f2ce07db598cea0e87b13bc8d5614eb530b8d6e696c2daf6f88.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/9e917fe7022d01b2ccbe5cc0ce73d70bb72bee584ff293bad71bdff6695dee28.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/9f28fdb8399f1d0474f0aca86f1658dc94f5bf0c90f6146352de150692de8862.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a0dfafa06b2bb7cba8cad41c482503f61944f759f4318139362602ef5cc47ccb.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a3ff866acddf34917a7ee399e0e571a4dfd8ba66d5057db885f243e16a6eb17d.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/afb8084f539534cd594755ea2205ecd5bd1f62dddcfdf75a2eace59a28131278.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/b57b1438b77294c1f3f6cfce12ac487d8106c6f016975ba0aec94d98997e2e1e.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/b9917b0bf8e4d55ccbac1c334aa04d6ff3c5b6ed9e5d38b9ea2c687fa7d3f5a9.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/bbcc94b0149963d1d6f267ee1f4f03d3925b758392ce2f516c3fe8af0e0169fc.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/bdee44abeadc4abc17d41c52eb49c6e19a4b1a267b6e16876ce91bdeeebfc52d.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/beb112b70f4a56db95920a9e20efb6c97c37b68450716730217a9ee1a9ae92be.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c88db97be0cdf440193b3995996e83510a04cb00048135485fc0e26d197e80b5.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d49e5314d34310a62d01a03067ce1bec5da00abce84c5196aa9c6842fa79a430.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d7664d18c4ddbad9c3a6a31b91f7c00fb0dde804608674a9860ee50f33e54708.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d9072c318b819b7c90a0f7e9cc0b6413b4dbeb8e9859898e53d75ea882fcde99.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/db16a983bc08a05f0439cc61ca0840e49e1d8400eef678909f16c032a418a3d6.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/dc249829767b8abcbc3677e0b07b6d3ecbfdfe6d08cfe23a665eb33373a9aa9d.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/e242c583c2dbc2784f0fcf513523975f7d5df447e106c1c17e49e8578a6fc3ed.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/eac5f1296513e69e4b96f750ddccd4d0264e2bae4e4c449144e83274a48698d9.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/ed57202cb79649bb716400436590245547df241988fc7c8e1d85d132299542d2.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f125bf72e773a14cdaebd0c343e80adb909d12e317ee5c00cd4a57442fbe2c62.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f91af913d7f91c410719ab13136aaed3aaf0f8dda06652f25c42cb5231587398.json +1 -0
- xinference/web/ui/node_modules/.package-lock.json +67 -3
- xinference/web/ui/node_modules/@babel/runtime/package.json +592 -538
- xinference/web/ui/node_modules/html-parse-stringify/package.json +50 -0
- xinference/web/ui/node_modules/i18next/dist/esm/package.json +1 -0
- xinference/web/ui/node_modules/i18next/package.json +129 -0
- xinference/web/ui/node_modules/react-i18next/.eslintrc.json +74 -0
- xinference/web/ui/node_modules/react-i18next/dist/es/package.json +1 -0
- xinference/web/ui/node_modules/react-i18next/package.json +162 -0
- xinference/web/ui/node_modules/void-elements/package.json +34 -0
- xinference/web/ui/package-lock.json +69 -3
- xinference/web/ui/package.json +2 -0
- xinference/web/ui/src/locales/en.json +186 -0
- xinference/web/ui/src/locales/zh.json +186 -0
- {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/METADATA +96 -36
- {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/RECORD +335 -146
- {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/WHEEL +1 -1
- xinference/thirdparty/cosyvoice/bin/export_trt.py +0 -8
- xinference/thirdparty/fish_speech/fish_speech/configs/lora/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/datasets/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/datasets/protos/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/models/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/webui/__init__.py +0 -0
- xinference/thirdparty/fish_speech/tools/__init__.py +0 -0
- xinference/thirdparty/fish_speech/tools/api.py +0 -440
- xinference/thirdparty/fish_speech/tools/commons.py +0 -35
- xinference/thirdparty/fish_speech/tools/llama/__init__.py +0 -0
- xinference/thirdparty/fish_speech/tools/msgpack_api.py +0 -34
- xinference/thirdparty/fish_speech/tools/vqgan/__init__.py +0 -0
- xinference/thirdparty/fish_speech/tools/webui.py +0 -485
- xinference/web/ui/build/static/css/main.5061c4c3.css +0 -2
- xinference/web/ui/build/static/css/main.5061c4c3.css.map +0 -1
- xinference/web/ui/build/static/js/main.2f269bb3.js +0 -3
- xinference/web/ui/build/static/js/main.2f269bb3.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/07ce9e632e6aff24d7aa3ad8e48224433bbfeb0d633fca723453f1fcae0c9f1c.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/1130403f9e46f5738a23b45ac59b57de8f360c908c713e2c0670c2cce9bd367a.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/131091b25d26b17cdca187d7542a21475c211138d900cf667682260e76ef9463.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/1f269fb2a368363c1cb2237825f1dba093b6bdd8c44cc05954fd19ec2c1fff03.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/331312668fa8bd3d7401818f4a25fa98135d7f61371cd6bfff78b18cf4fbdd92.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/40f17338fc75ae095de7d2b4d8eae0d5ca0193a7e2bcece4ee745b22a7a2f4b7.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/4de9a6942c5f1749d6cbfdd54279699975f16016b182848bc253886f52ec2ec3.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/822586ed1077201b64b954f12f25e3f9b45678c1acbabe53d8af3ca82ca71f33.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/8d33354bd2100c8602afc3341f131a88cc36aaeecd5a4b365ed038514708e350.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/9375a35b05d56989b2755bf72161fa707c92f28569d33765a75f91a568fda6e9.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/a158a9ffa0c9b169aee53dd4a0c44501a596755b4e4f6ede7746d65a72e2a71f.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/bd6ad8159341315a1764c397621a560809f7eb7219ab5174c801fca7e969d943.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/c7bf40bab396765f67d0fed627ed3665890608b2d0edaa3e8cb7cfc96310db45.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/d6c643278a0b28320e6f33a60f5fb64c053997cbdc39a60e53ccc574688ade9e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/e42b72d4cc1ea412ebecbb8d040dc6c6bfee462c33903c2f1f3facb602ad742e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/e64b7e8cedcf43d4c95deba60ec1341855c887705805bb62431693118b870c69.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f5039ddbeb815c51491a1989532006b96fc3ae49c6c60e3c097f875b4ae915ae.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f72f011744c4649fabddca6f7a9327861ac0a315a89b1a2e62a39774e7863845.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/feabb04b4aa507102da0a64398a40818e878fd1df9b75dda8461b3e1e7ff3f11.json +0 -1
- /xinference/thirdparty/{cosyvoice/bin → f5_tts}/__init__.py +0 -0
- /xinference/thirdparty/{cosyvoice/flow → melo}/__init__.py +0 -0
- /xinference/thirdparty/{cosyvoice/hifigan → melo/text/english_utils}/__init__.py +0 -0
- /xinference/thirdparty/{cosyvoice/llm → melo/text/es_phonemizer}/__init__.py +0 -0
- /xinference/thirdparty/{fish_speech/fish_speech/configs → melo/text/fr_phonemizer}/__init__.py +0 -0
- /xinference/web/ui/build/static/js/{main.2f269bb3.js.LICENSE.txt → main.b0936c54.js.LICENSE.txt} +0 -0
- {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/LICENSE +0 -0
- {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/entry_points.txt +0 -0
- {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/top_level.txt +0 -0
xinference/core/utils.py
CHANGED
|
@@ -11,11 +11,13 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
+
import asyncio
|
|
14
15
|
import logging
|
|
15
16
|
import os
|
|
16
17
|
import random
|
|
17
18
|
import string
|
|
18
19
|
import uuid
|
|
20
|
+
import weakref
|
|
19
21
|
from enum import Enum
|
|
20
22
|
from typing import Dict, Generator, List, Optional, Tuple, Union
|
|
21
23
|
|
|
@@ -23,7 +25,10 @@ import orjson
|
|
|
23
25
|
from pynvml import nvmlDeviceGetCount, nvmlInit, nvmlShutdown
|
|
24
26
|
|
|
25
27
|
from .._compat import BaseModel
|
|
26
|
-
from ..constants import
|
|
28
|
+
from ..constants import (
|
|
29
|
+
XINFERENCE_DEFAULT_CANCEL_BLOCK_DURATION,
|
|
30
|
+
XINFERENCE_LOG_ARG_MAX_LENGTH,
|
|
31
|
+
)
|
|
27
32
|
|
|
28
33
|
logger = logging.getLogger(__name__)
|
|
29
34
|
|
|
@@ -49,13 +54,24 @@ def log_async(
|
|
|
49
54
|
):
|
|
50
55
|
import time
|
|
51
56
|
from functools import wraps
|
|
57
|
+
from inspect import signature
|
|
52
58
|
|
|
53
59
|
def decorator(func):
|
|
54
60
|
func_name = func.__name__
|
|
61
|
+
sig = signature(func)
|
|
55
62
|
|
|
56
63
|
@wraps(func)
|
|
57
64
|
async def wrapped(*args, **kwargs):
|
|
58
|
-
request_id_str = kwargs.get("request_id"
|
|
65
|
+
request_id_str = kwargs.get("request_id")
|
|
66
|
+
if not request_id_str:
|
|
67
|
+
# sometimes `request_id` not in kwargs
|
|
68
|
+
# we try to bind the arguments
|
|
69
|
+
try:
|
|
70
|
+
bound_args = sig.bind_partial(*args, **kwargs)
|
|
71
|
+
arguments = bound_args.arguments
|
|
72
|
+
except TypeError:
|
|
73
|
+
arguments = {}
|
|
74
|
+
request_id_str = arguments.get("request_id", "")
|
|
59
75
|
if not request_id_str:
|
|
60
76
|
request_id_str = uuid.uuid1()
|
|
61
77
|
if func_name == "text_to_image":
|
|
@@ -260,8 +276,8 @@ def get_nvidia_gpu_info() -> Dict:
|
|
|
260
276
|
|
|
261
277
|
|
|
262
278
|
def assign_replica_gpu(
|
|
263
|
-
_replica_model_uid: str, replica: int, gpu_idx: Union[int, List[int]]
|
|
264
|
-
) -> List[int]:
|
|
279
|
+
_replica_model_uid: str, replica: int, gpu_idx: Optional[Union[int, List[int]]]
|
|
280
|
+
) -> Optional[List[int]]:
|
|
265
281
|
model_uid, rep_id = parse_replica_model_uid(_replica_model_uid)
|
|
266
282
|
rep_id, replica = int(rep_id), int(replica)
|
|
267
283
|
if isinstance(gpu_idx, int):
|
|
@@ -269,3 +285,56 @@ def assign_replica_gpu(
|
|
|
269
285
|
if isinstance(gpu_idx, list) and gpu_idx:
|
|
270
286
|
return gpu_idx[rep_id::replica]
|
|
271
287
|
return gpu_idx
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
class CancelMixin:
|
|
291
|
+
_CANCEL_TASK_NAME = "abort_block"
|
|
292
|
+
|
|
293
|
+
def __init__(self):
|
|
294
|
+
self._running_tasks: weakref.WeakValueDictionary[
|
|
295
|
+
str, asyncio.Task
|
|
296
|
+
] = weakref.WeakValueDictionary()
|
|
297
|
+
|
|
298
|
+
def _add_running_task(self, request_id: Optional[str]):
|
|
299
|
+
"""Add current asyncio task to the running task.
|
|
300
|
+
:param request_id: The corresponding request id.
|
|
301
|
+
"""
|
|
302
|
+
if request_id is None:
|
|
303
|
+
return
|
|
304
|
+
running_task = self._running_tasks.get(request_id)
|
|
305
|
+
if running_task is not None:
|
|
306
|
+
if running_task.get_name() == self._CANCEL_TASK_NAME:
|
|
307
|
+
raise Exception(f"The request has been aborted: {request_id}")
|
|
308
|
+
raise Exception(f"Duplicate request id: {request_id}")
|
|
309
|
+
current_task = asyncio.current_task()
|
|
310
|
+
assert current_task is not None
|
|
311
|
+
self._running_tasks[request_id] = current_task
|
|
312
|
+
|
|
313
|
+
def _cancel_running_task(
|
|
314
|
+
self,
|
|
315
|
+
request_id: Optional[str],
|
|
316
|
+
block_duration: int = XINFERENCE_DEFAULT_CANCEL_BLOCK_DURATION,
|
|
317
|
+
):
|
|
318
|
+
"""Cancel the running asyncio task.
|
|
319
|
+
:param request_id: The request id to cancel.
|
|
320
|
+
:param block_duration: The duration seconds to ensure the request can't be executed.
|
|
321
|
+
"""
|
|
322
|
+
if request_id is None:
|
|
323
|
+
return
|
|
324
|
+
running_task = self._running_tasks.pop(request_id, None)
|
|
325
|
+
if running_task is not None:
|
|
326
|
+
running_task.cancel()
|
|
327
|
+
|
|
328
|
+
async def block_task():
|
|
329
|
+
"""This task is for blocking the request for a duration."""
|
|
330
|
+
try:
|
|
331
|
+
await asyncio.sleep(block_duration)
|
|
332
|
+
logger.info("Abort block end for request: %s", request_id)
|
|
333
|
+
except asyncio.CancelledError:
|
|
334
|
+
logger.info("Abort block is cancelled for request: %s", request_id)
|
|
335
|
+
|
|
336
|
+
if block_duration > 0:
|
|
337
|
+
logger.info("Abort block start for request: %s", request_id)
|
|
338
|
+
self._running_tasks[request_id] = asyncio.create_task(
|
|
339
|
+
block_task(), name=self._CANCEL_TASK_NAME
|
|
340
|
+
)
|
xinference/core/worker.py
CHANGED
|
@@ -22,8 +22,9 @@ import signal
|
|
|
22
22
|
import threading
|
|
23
23
|
import time
|
|
24
24
|
from collections import defaultdict
|
|
25
|
+
from dataclasses import dataclass
|
|
25
26
|
from logging import getLogger
|
|
26
|
-
from typing import Any, Dict, List, Literal, Optional, Set, Tuple, Union
|
|
27
|
+
from typing import Any, Dict, List, Literal, Optional, Set, Tuple, Union, no_type_check
|
|
27
28
|
|
|
28
29
|
import xoscar as xo
|
|
29
30
|
from async_timeout import timeout
|
|
@@ -58,6 +59,11 @@ else:
|
|
|
58
59
|
MODEL_ACTOR_AUTO_RECOVER_LIMIT = None
|
|
59
60
|
|
|
60
61
|
|
|
62
|
+
@dataclass
|
|
63
|
+
class ModelStatus:
|
|
64
|
+
last_error: str = ""
|
|
65
|
+
|
|
66
|
+
|
|
61
67
|
class WorkerActor(xo.StatelessActor):
|
|
62
68
|
def __init__(
|
|
63
69
|
self,
|
|
@@ -90,6 +96,7 @@ class WorkerActor(xo.StatelessActor):
|
|
|
90
96
|
# attributes maintained after model launched:
|
|
91
97
|
self._model_uid_to_model: Dict[str, xo.ActorRefType["ModelActor"]] = {}
|
|
92
98
|
self._model_uid_to_model_spec: Dict[str, ModelDescription] = {}
|
|
99
|
+
self._model_uid_to_model_status: Dict[str, ModelStatus] = {}
|
|
93
100
|
self._gpu_to_model_uid: Dict[int, str] = {}
|
|
94
101
|
self._gpu_to_embedding_model_uids: Dict[int, Set[str]] = defaultdict(set)
|
|
95
102
|
# Dict structure: gpu_index: {(replica_model_uid, model_type)}
|
|
@@ -177,12 +184,12 @@ class WorkerActor(xo.StatelessActor):
|
|
|
177
184
|
self._model_uid_to_recover_count[model_uid] = (
|
|
178
185
|
recover_count - 1
|
|
179
186
|
)
|
|
180
|
-
await self.
|
|
187
|
+
await self.recover_model(launch_args)
|
|
181
188
|
else:
|
|
182
189
|
logger.warning("Stop recreating model actor.")
|
|
183
190
|
else:
|
|
184
191
|
logger.warning("Recreating model actor %s ...", model_uid)
|
|
185
|
-
await self.
|
|
192
|
+
await self.recover_model(launch_args)
|
|
186
193
|
break
|
|
187
194
|
|
|
188
195
|
@classmethod
|
|
@@ -866,6 +873,9 @@ class WorkerActor(xo.StatelessActor):
|
|
|
866
873
|
)
|
|
867
874
|
|
|
868
875
|
try:
|
|
876
|
+
xavier_config: Optional[Dict] = kwargs.pop("xavier_config", None)
|
|
877
|
+
if xavier_config is not None:
|
|
878
|
+
xavier_config["rank_address"] = subpool_address
|
|
869
879
|
model, model_description = await asyncio.to_thread(
|
|
870
880
|
create_model_instance,
|
|
871
881
|
subpool_address,
|
|
@@ -893,6 +903,7 @@ class WorkerActor(xo.StatelessActor):
|
|
|
893
903
|
model=model,
|
|
894
904
|
model_description=model_description,
|
|
895
905
|
request_limits=request_limits,
|
|
906
|
+
xavier_config=xavier_config,
|
|
896
907
|
)
|
|
897
908
|
await model_ref.load()
|
|
898
909
|
except:
|
|
@@ -902,6 +913,7 @@ class WorkerActor(xo.StatelessActor):
|
|
|
902
913
|
raise
|
|
903
914
|
self._model_uid_to_model[model_uid] = model_ref
|
|
904
915
|
self._model_uid_to_model_spec[model_uid] = model_description
|
|
916
|
+
self._model_uid_to_model_status[model_uid] = ModelStatus()
|
|
905
917
|
self._model_uid_to_addr[model_uid] = subpool_address
|
|
906
918
|
self._model_uid_to_recover_count.setdefault(
|
|
907
919
|
model_uid, MODEL_ACTOR_AUTO_RECOVER_LIMIT
|
|
@@ -921,13 +933,18 @@ class WorkerActor(xo.StatelessActor):
|
|
|
921
933
|
origin_uid,
|
|
922
934
|
{"model_ability": abilities, "status": LaunchStatus.READY.name},
|
|
923
935
|
)
|
|
936
|
+
return subpool_address
|
|
924
937
|
|
|
925
938
|
@log_async(logger=logger, level=logging.INFO)
|
|
926
939
|
async def terminate_model(self, model_uid: str, is_model_die=False):
|
|
927
940
|
# Terminate model while its launching is not allow
|
|
928
941
|
if model_uid in self._model_uid_launching_guard:
|
|
929
942
|
raise ValueError(f"{model_uid} is launching")
|
|
930
|
-
|
|
943
|
+
# In special cases, if the suffix is `-rank0`, this is the Xavier's rank 0 model actor.
|
|
944
|
+
if model_uid.endswith("-rank0"):
|
|
945
|
+
origin_uid = model_uid.removesuffix("-rank0")
|
|
946
|
+
else:
|
|
947
|
+
origin_uid, _ = parse_replica_model_uid(model_uid)
|
|
931
948
|
try:
|
|
932
949
|
_ = await self.get_supervisor_ref()
|
|
933
950
|
if self._event_collector_ref is not None:
|
|
@@ -976,6 +993,7 @@ class WorkerActor(xo.StatelessActor):
|
|
|
976
993
|
status = LaunchStatus.ERROR.name
|
|
977
994
|
else:
|
|
978
995
|
status = LaunchStatus.TERMINATED.name
|
|
996
|
+
self._model_uid_to_model_status.pop(model_uid, None)
|
|
979
997
|
|
|
980
998
|
if self._status_guard_ref is None:
|
|
981
999
|
_ = await self.get_supervisor_ref()
|
|
@@ -1010,6 +1028,9 @@ class WorkerActor(xo.StatelessActor):
|
|
|
1010
1028
|
|
|
1011
1029
|
@log_sync(logger=logger)
|
|
1012
1030
|
def get_model(self, model_uid: str) -> xo.ActorRefType["ModelActor"]:
|
|
1031
|
+
model_status = self._model_uid_to_model_status.get(model_uid)
|
|
1032
|
+
if model_status and model_status.last_error:
|
|
1033
|
+
raise Exception(model_status.last_error)
|
|
1013
1034
|
model_ref = self._model_uid_to_model.get(model_uid, None)
|
|
1014
1035
|
if model_ref is None:
|
|
1015
1036
|
raise ValueError(f"Model not found, uid: {model_uid}")
|
|
@@ -1138,6 +1159,83 @@ class WorkerActor(xo.StatelessActor):
|
|
|
1138
1159
|
}
|
|
1139
1160
|
return ret
|
|
1140
1161
|
|
|
1162
|
+
def update_model_status(self, model_uid: str, **kwargs):
|
|
1163
|
+
model_status = self._model_uid_to_model_status.get(model_uid)
|
|
1164
|
+
if model_status is not None:
|
|
1165
|
+
for k, v in kwargs.items():
|
|
1166
|
+
setattr(model_status, k, v)
|
|
1167
|
+
|
|
1168
|
+
def get_model_status(self, model_uid: str):
|
|
1169
|
+
return self._model_uid_to_model_status.get(model_uid)
|
|
1170
|
+
|
|
1141
1171
|
@staticmethod
|
|
1142
1172
|
def record_metrics(name, op, kwargs):
|
|
1143
1173
|
record_metrics(name, op, kwargs)
|
|
1174
|
+
|
|
1175
|
+
async def start_transfer_for_vllm(
|
|
1176
|
+
self, rep_model_uid: str, rank_addresses: List[str]
|
|
1177
|
+
):
|
|
1178
|
+
model_ref = self._model_uid_to_model[rep_model_uid]
|
|
1179
|
+
await model_ref.start_transfer_for_vllm(rank_addresses)
|
|
1180
|
+
|
|
1181
|
+
@log_async(logger=logger, level=logging.INFO)
|
|
1182
|
+
async def launch_rank0_model(
|
|
1183
|
+
self, rep_model_uid: str, xavier_config: Dict[str, Any]
|
|
1184
|
+
) -> Tuple[str, int]:
|
|
1185
|
+
from ..model.llm.vllm.xavier.collective_manager import Rank0ModelActor
|
|
1186
|
+
|
|
1187
|
+
if os.name != "nt" and platform.system() != "Darwin":
|
|
1188
|
+
# Linux
|
|
1189
|
+
start_method = "forkserver"
|
|
1190
|
+
else:
|
|
1191
|
+
# Windows and macOS
|
|
1192
|
+
start_method = "spawn"
|
|
1193
|
+
subpool_address = await self._main_pool.append_sub_pool(
|
|
1194
|
+
start_method=start_method
|
|
1195
|
+
)
|
|
1196
|
+
|
|
1197
|
+
store_address = subpool_address.split(":")[0]
|
|
1198
|
+
# Note that `store_port` needs to be generated on the worker,
|
|
1199
|
+
# as the TCP store is on rank 0, not on the supervisor.
|
|
1200
|
+
store_port = xo.utils.get_next_port()
|
|
1201
|
+
self._model_uid_launching_guard[rep_model_uid] = True
|
|
1202
|
+
try:
|
|
1203
|
+
try:
|
|
1204
|
+
xavier_config["rank_address"] = subpool_address
|
|
1205
|
+
xavier_config["store_address"] = store_address
|
|
1206
|
+
xavier_config["store_port"] = store_port
|
|
1207
|
+
model_ref = await xo.create_actor(
|
|
1208
|
+
Rank0ModelActor,
|
|
1209
|
+
address=subpool_address,
|
|
1210
|
+
uid=rep_model_uid,
|
|
1211
|
+
xavier_config=xavier_config,
|
|
1212
|
+
)
|
|
1213
|
+
except:
|
|
1214
|
+
await self._main_pool.remove_sub_pool(subpool_address)
|
|
1215
|
+
raise
|
|
1216
|
+
self._model_uid_to_model[rep_model_uid] = model_ref
|
|
1217
|
+
self._model_uid_to_addr[rep_model_uid] = subpool_address
|
|
1218
|
+
finally:
|
|
1219
|
+
del self._model_uid_launching_guard[rep_model_uid]
|
|
1220
|
+
return subpool_address, store_port
|
|
1221
|
+
|
|
1222
|
+
@no_type_check
|
|
1223
|
+
async def recover_model(self, launch_args: Dict[str, Any]):
|
|
1224
|
+
rep_model_uid = launch_args.get("model_uid")
|
|
1225
|
+
origin_uid, _ = parse_replica_model_uid(rep_model_uid)
|
|
1226
|
+
xavier_config: Optional[Dict[str, Any]] = launch_args.get("xavier_config", None)
|
|
1227
|
+
is_xavier: bool = xavier_config is not None
|
|
1228
|
+
supervisor_ref = await self.get_supervisor_ref(add_worker=False)
|
|
1229
|
+
if is_xavier:
|
|
1230
|
+
rank = xavier_config.get("rank")
|
|
1231
|
+
await supervisor_ref.call_collective_manager(
|
|
1232
|
+
origin_uid, "unregister_rank", rank
|
|
1233
|
+
)
|
|
1234
|
+
subpool_address = await self.launch_builtin_model(**launch_args)
|
|
1235
|
+
if is_xavier:
|
|
1236
|
+
model_ref = self._model_uid_to_model[rep_model_uid]
|
|
1237
|
+
await model_ref.start_transfer_for_vllm([])
|
|
1238
|
+
rank = xavier_config.get("rank")
|
|
1239
|
+
await supervisor_ref.call_collective_manager(
|
|
1240
|
+
origin_uid, "register_rank", rank, subpool_address, update=True
|
|
1241
|
+
)
|
xinference/deploy/cmdline.py
CHANGED
|
@@ -846,7 +846,9 @@ def model_launch(
|
|
|
846
846
|
kwargs = {}
|
|
847
847
|
for i in range(0, len(ctx.args), 2):
|
|
848
848
|
if not ctx.args[i].startswith("--"):
|
|
849
|
-
raise ValueError(
|
|
849
|
+
raise ValueError(
|
|
850
|
+
f"You must specify extra kwargs with `--` prefix. There is an error in parameter passing that is {ctx.args[i]}."
|
|
851
|
+
)
|
|
850
852
|
kwargs[ctx.args[i][2:]] = handle_click_args_type(ctx.args[i + 1])
|
|
851
853
|
print(f"Launch model name: {model_name} with kwargs: {kwargs}", file=sys.stderr)
|
|
852
854
|
|
|
@@ -23,6 +23,7 @@ from ..cmdline import (
|
|
|
23
23
|
list_model_registrations,
|
|
24
24
|
model_chat,
|
|
25
25
|
model_generate,
|
|
26
|
+
model_launch,
|
|
26
27
|
model_list,
|
|
27
28
|
model_terminate,
|
|
28
29
|
register_model,
|
|
@@ -311,3 +312,58 @@ def test_remove_cache(setup):
|
|
|
311
312
|
|
|
312
313
|
assert result.exit_code == 0
|
|
313
314
|
assert "Cache directory qwen1.5-chat has been deleted."
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
def test_launch_error_in_passing_parameters():
|
|
318
|
+
runner = CliRunner()
|
|
319
|
+
|
|
320
|
+
# Known parameter but not provided with value.
|
|
321
|
+
result = runner.invoke(
|
|
322
|
+
model_launch,
|
|
323
|
+
[
|
|
324
|
+
"--model-engine",
|
|
325
|
+
"transformers",
|
|
326
|
+
"--model-name",
|
|
327
|
+
"qwen2.5-instruct",
|
|
328
|
+
"--model-uid",
|
|
329
|
+
"-s",
|
|
330
|
+
"0.5",
|
|
331
|
+
"-f",
|
|
332
|
+
"gptq",
|
|
333
|
+
"-q",
|
|
334
|
+
"INT4",
|
|
335
|
+
"111",
|
|
336
|
+
"-l",
|
|
337
|
+
],
|
|
338
|
+
)
|
|
339
|
+
assert result.exit_code == 1
|
|
340
|
+
assert (
|
|
341
|
+
"You must specify extra kwargs with `--` prefix. There is an error in parameter passing that is 0.5."
|
|
342
|
+
in str(result)
|
|
343
|
+
)
|
|
344
|
+
|
|
345
|
+
# Unknown parameter
|
|
346
|
+
result = runner.invoke(
|
|
347
|
+
model_launch,
|
|
348
|
+
[
|
|
349
|
+
"--model-engine",
|
|
350
|
+
"transformers",
|
|
351
|
+
"--model-name",
|
|
352
|
+
"qwen2.5-instruct",
|
|
353
|
+
"--model-uid",
|
|
354
|
+
"123",
|
|
355
|
+
"-s",
|
|
356
|
+
"0.5",
|
|
357
|
+
"-f",
|
|
358
|
+
"gptq",
|
|
359
|
+
"-q",
|
|
360
|
+
"INT4",
|
|
361
|
+
"-l",
|
|
362
|
+
"111",
|
|
363
|
+
],
|
|
364
|
+
)
|
|
365
|
+
assert result.exit_code == 1
|
|
366
|
+
assert (
|
|
367
|
+
"You must specify extra kwargs with `--` prefix. There is an error in parameter passing that is -l."
|
|
368
|
+
in str(result)
|
|
369
|
+
)
|
xinference/isolation.py
CHANGED
|
@@ -37,6 +37,30 @@ class Isolation:
|
|
|
37
37
|
asyncio.set_event_loop(self._loop)
|
|
38
38
|
self._stopped = asyncio.Event()
|
|
39
39
|
self._loop.run_until_complete(self._stopped.wait())
|
|
40
|
+
self._cancel_all_tasks(self._loop)
|
|
41
|
+
|
|
42
|
+
@staticmethod
|
|
43
|
+
def _cancel_all_tasks(loop):
|
|
44
|
+
to_cancel = asyncio.all_tasks(loop)
|
|
45
|
+
if not to_cancel:
|
|
46
|
+
return
|
|
47
|
+
|
|
48
|
+
for task in to_cancel:
|
|
49
|
+
task.cancel()
|
|
50
|
+
|
|
51
|
+
loop.run_until_complete(asyncio.gather(*to_cancel, return_exceptions=True))
|
|
52
|
+
|
|
53
|
+
for task in to_cancel:
|
|
54
|
+
if task.cancelled():
|
|
55
|
+
continue
|
|
56
|
+
if task.exception() is not None:
|
|
57
|
+
loop.call_exception_handler(
|
|
58
|
+
{
|
|
59
|
+
"message": "unhandled exception during asyncio.run() shutdown",
|
|
60
|
+
"exception": task.exception(),
|
|
61
|
+
"task": task,
|
|
62
|
+
}
|
|
63
|
+
)
|
|
40
64
|
|
|
41
65
|
def start(self):
|
|
42
66
|
if self._threaded:
|
|
@@ -15,6 +15,8 @@
|
|
|
15
15
|
import codecs
|
|
16
16
|
import json
|
|
17
17
|
import os
|
|
18
|
+
import platform
|
|
19
|
+
import sys
|
|
18
20
|
import warnings
|
|
19
21
|
from typing import Any, Dict
|
|
20
22
|
|
|
@@ -55,6 +57,14 @@ def register_custom_model():
|
|
|
55
57
|
warnings.warn(f"{user_defined_audio_dir}/{f} has error, {e}")
|
|
56
58
|
|
|
57
59
|
|
|
60
|
+
def _need_filter(spec: dict):
|
|
61
|
+
if (sys.platform != "darwin" or platform.processor() != "arm") and spec.get(
|
|
62
|
+
"engine", ""
|
|
63
|
+
).upper() == "MLX":
|
|
64
|
+
return True
|
|
65
|
+
return False
|
|
66
|
+
|
|
67
|
+
|
|
58
68
|
def _install():
|
|
59
69
|
_model_spec_json = os.path.join(os.path.dirname(__file__), "model_spec.json")
|
|
60
70
|
_model_spec_modelscope_json = os.path.join(
|
|
@@ -64,6 +74,7 @@ def _install():
|
|
|
64
74
|
dict(
|
|
65
75
|
(spec["model_name"], AudioModelFamilyV1(**spec))
|
|
66
76
|
for spec in json.load(codecs.open(_model_spec_json, "r", encoding="utf-8"))
|
|
77
|
+
if not _need_filter(spec)
|
|
67
78
|
)
|
|
68
79
|
)
|
|
69
80
|
for model_name, model_spec in BUILTIN_AUDIO_MODELS.items():
|
|
@@ -75,6 +86,7 @@ def _install():
|
|
|
75
86
|
for spec in json.load(
|
|
76
87
|
codecs.open(_model_spec_modelscope_json, "r", encoding="utf-8")
|
|
77
88
|
)
|
|
89
|
+
if not _need_filter(spec)
|
|
78
90
|
)
|
|
79
91
|
)
|
|
80
92
|
for model_name, model_spec in MODELSCOPE_AUDIO_MODELS.items():
|
xinference/model/audio/core.py
CHANGED
|
@@ -21,9 +21,13 @@ from ..core import CacheableModelSpec, ModelDescription
|
|
|
21
21
|
from ..utils import valid_model_revision
|
|
22
22
|
from .chattts import ChatTTSModel
|
|
23
23
|
from .cosyvoice import CosyVoiceModel
|
|
24
|
+
from .f5tts import F5TTSModel
|
|
25
|
+
from .f5tts_mlx import F5TTSMLXModel
|
|
24
26
|
from .fish_speech import FishSpeechModel
|
|
25
27
|
from .funasr import FunASRModel
|
|
28
|
+
from .melotts import MeloTTSModel
|
|
26
29
|
from .whisper import WhisperModel
|
|
30
|
+
from .whisper_mlx import WhisperMLXModel
|
|
27
31
|
|
|
28
32
|
logger = logging.getLogger(__name__)
|
|
29
33
|
|
|
@@ -43,11 +47,13 @@ class AudioModelFamilyV1(CacheableModelSpec):
|
|
|
43
47
|
model_family: str
|
|
44
48
|
model_name: str
|
|
45
49
|
model_id: str
|
|
46
|
-
model_revision: str
|
|
50
|
+
model_revision: Optional[str]
|
|
47
51
|
multilingual: bool
|
|
52
|
+
language: Optional[str]
|
|
48
53
|
model_ability: Optional[str]
|
|
49
54
|
default_model_config: Optional[Dict[str, Any]]
|
|
50
55
|
default_transcription_config: Optional[Dict[str, Any]]
|
|
56
|
+
engine: Optional[str]
|
|
51
57
|
|
|
52
58
|
|
|
53
59
|
class AudioModelDescription(ModelDescription):
|
|
@@ -160,17 +166,38 @@ def create_audio_model_instance(
|
|
|
160
166
|
model_path: Optional[str] = None,
|
|
161
167
|
**kwargs,
|
|
162
168
|
) -> Tuple[
|
|
163
|
-
Union[
|
|
169
|
+
Union[
|
|
170
|
+
WhisperModel,
|
|
171
|
+
WhisperMLXModel,
|
|
172
|
+
FunASRModel,
|
|
173
|
+
ChatTTSModel,
|
|
174
|
+
CosyVoiceModel,
|
|
175
|
+
FishSpeechModel,
|
|
176
|
+
F5TTSModel,
|
|
177
|
+
F5TTSMLXModel,
|
|
178
|
+
MeloTTSModel,
|
|
179
|
+
],
|
|
164
180
|
AudioModelDescription,
|
|
165
181
|
]:
|
|
166
182
|
model_spec = match_audio(model_name, download_hub)
|
|
167
183
|
if model_path is None:
|
|
168
184
|
model_path = cache(model_spec)
|
|
169
185
|
model: Union[
|
|
170
|
-
WhisperModel,
|
|
186
|
+
WhisperModel,
|
|
187
|
+
WhisperMLXModel,
|
|
188
|
+
FunASRModel,
|
|
189
|
+
ChatTTSModel,
|
|
190
|
+
CosyVoiceModel,
|
|
191
|
+
FishSpeechModel,
|
|
192
|
+
F5TTSModel,
|
|
193
|
+
F5TTSMLXModel,
|
|
194
|
+
MeloTTSModel,
|
|
171
195
|
]
|
|
172
196
|
if model_spec.model_family == "whisper":
|
|
173
|
-
|
|
197
|
+
if not model_spec.engine:
|
|
198
|
+
model = WhisperModel(model_uid, model_path, model_spec, **kwargs)
|
|
199
|
+
else:
|
|
200
|
+
model = WhisperMLXModel(model_uid, model_path, model_spec, **kwargs)
|
|
174
201
|
elif model_spec.model_family == "funasr":
|
|
175
202
|
model = FunASRModel(model_uid, model_path, model_spec, **kwargs)
|
|
176
203
|
elif model_spec.model_family == "ChatTTS":
|
|
@@ -179,6 +206,12 @@ def create_audio_model_instance(
|
|
|
179
206
|
model = CosyVoiceModel(model_uid, model_path, model_spec, **kwargs)
|
|
180
207
|
elif model_spec.model_family == "FishAudio":
|
|
181
208
|
model = FishSpeechModel(model_uid, model_path, model_spec, **kwargs)
|
|
209
|
+
elif model_spec.model_family == "F5-TTS":
|
|
210
|
+
model = F5TTSModel(model_uid, model_path, model_spec, **kwargs)
|
|
211
|
+
elif model_spec.model_family == "F5-TTS-MLX":
|
|
212
|
+
model = F5TTSMLXModel(model_uid, model_path, model_spec, **kwargs)
|
|
213
|
+
elif model_spec.model_family == "MeloTTS":
|
|
214
|
+
model = MeloTTSModel(model_uid, model_path, model_spec, **kwargs)
|
|
182
215
|
else:
|
|
183
216
|
raise Exception(f"Unsupported audio model family: {model_spec.model_family}")
|
|
184
217
|
model_description = AudioModelDescription(
|
|
@@ -39,6 +39,7 @@ class CosyVoiceModel:
|
|
|
39
39
|
self._device = device
|
|
40
40
|
self._model = None
|
|
41
41
|
self._kwargs = kwargs
|
|
42
|
+
self._is_cosyvoice2 = False
|
|
42
43
|
|
|
43
44
|
@property
|
|
44
45
|
def model_ability(self):
|
|
@@ -48,14 +49,32 @@ class CosyVoiceModel:
|
|
|
48
49
|
import os
|
|
49
50
|
import sys
|
|
50
51
|
|
|
52
|
+
import torch
|
|
53
|
+
|
|
51
54
|
# The yaml config loaded from model has hard-coded the import paths. please refer to: load_hyperpyyaml
|
|
52
|
-
|
|
55
|
+
thirdparty_dir = os.path.join(os.path.dirname(__file__), "../../thirdparty")
|
|
56
|
+
sys.path.insert(0, thirdparty_dir)
|
|
57
|
+
|
|
58
|
+
if "CosyVoice2" in self._model_spec.model_name:
|
|
59
|
+
from cosyvoice.cli.cosyvoice import CosyVoice2 as CosyVoice
|
|
60
|
+
|
|
61
|
+
self._is_cosyvoice2 = True
|
|
62
|
+
else:
|
|
63
|
+
from cosyvoice.cli.cosyvoice import CosyVoice
|
|
53
64
|
|
|
54
|
-
|
|
65
|
+
self._is_cosyvoice2 = False
|
|
55
66
|
|
|
56
|
-
|
|
57
|
-
|
|
67
|
+
# Unify this configuration name as 'compile' to be compatible with the name 'load_jit'.
|
|
68
|
+
load_jit = self._kwargs.get("load_jit", False) or self._kwargs.get(
|
|
69
|
+
"compile", False
|
|
58
70
|
)
|
|
71
|
+
logger.info("Loading CosyVoice model, compile=%s...", load_jit)
|
|
72
|
+
self._model = CosyVoice(self._model_path, load_jit=load_jit)
|
|
73
|
+
if self._is_cosyvoice2:
|
|
74
|
+
spk2info_file = os.path.join(thirdparty_dir, "cosyvoice/bin/spk2info.pt")
|
|
75
|
+
self._model.frontend.spk2info = torch.load(
|
|
76
|
+
spk2info_file, map_location=self._device
|
|
77
|
+
)
|
|
59
78
|
|
|
60
79
|
def _speech_handle(
|
|
61
80
|
self,
|
|
@@ -78,6 +97,15 @@ class CosyVoiceModel:
|
|
|
78
97
|
output = self._model.inference_zero_shot(
|
|
79
98
|
input, prompt_text, prompt_speech_16k, stream=stream
|
|
80
99
|
)
|
|
100
|
+
elif instruct_text:
|
|
101
|
+
assert self._is_cosyvoice2
|
|
102
|
+
logger.info("CosyVoice inference_instruct")
|
|
103
|
+
output = self._model.inference_instruct2(
|
|
104
|
+
input,
|
|
105
|
+
instruct_text=instruct_text,
|
|
106
|
+
prompt_speech_16k=prompt_speech_16k,
|
|
107
|
+
stream=stream,
|
|
108
|
+
)
|
|
81
109
|
else:
|
|
82
110
|
logger.info("CosyVoice inference_cross_lingual")
|
|
83
111
|
output = self._model.inference_cross_lingual(
|
|
@@ -87,6 +115,7 @@ class CosyVoiceModel:
|
|
|
87
115
|
available_speakers = self._model.list_avaliable_spks()
|
|
88
116
|
if not voice:
|
|
89
117
|
voice = available_speakers[0]
|
|
118
|
+
logger.info("Auto select speaker: %s", voice)
|
|
90
119
|
else:
|
|
91
120
|
assert (
|
|
92
121
|
voice in available_speakers
|
|
@@ -106,7 +135,9 @@ class CosyVoiceModel:
|
|
|
106
135
|
def _generator_stream():
|
|
107
136
|
with BytesIO() as out:
|
|
108
137
|
writer = torchaudio.io.StreamWriter(out, format=response_format)
|
|
109
|
-
writer.add_audio_stream(
|
|
138
|
+
writer.add_audio_stream(
|
|
139
|
+
sample_rate=self._model.sample_rate, num_channels=1
|
|
140
|
+
)
|
|
110
141
|
i = 0
|
|
111
142
|
last_pos = 0
|
|
112
143
|
with writer.open():
|
|
@@ -125,7 +156,7 @@ class CosyVoiceModel:
|
|
|
125
156
|
chunks = [o["tts_speech"] for o in output]
|
|
126
157
|
t = torch.cat(chunks, dim=1)
|
|
127
158
|
with BytesIO() as out:
|
|
128
|
-
torchaudio.save(out, t,
|
|
159
|
+
torchaudio.save(out, t, self._model.sample_rate, format=response_format)
|
|
129
160
|
return out.getvalue()
|
|
130
161
|
|
|
131
162
|
return _generator_stream() if stream else _generator_block()
|
|
@@ -163,6 +194,8 @@ class CosyVoiceModel:
|
|
|
163
194
|
assert (
|
|
164
195
|
prompt_text is None
|
|
165
196
|
), "CosyVoice Instruct model does not support prompt_text"
|
|
197
|
+
elif self._is_cosyvoice2:
|
|
198
|
+
pass
|
|
166
199
|
else:
|
|
167
200
|
# inference_zero_shot
|
|
168
201
|
# inference_cross_lingual
|