xinference 1.0.1__py3-none-any.whl → 1.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_compat.py +2 -0
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +77 -71
- xinference/core/chat_interface.py +6 -1
- xinference/core/model.py +79 -19
- xinference/core/supervisor.py +172 -10
- xinference/core/utils.py +12 -8
- xinference/core/worker.py +102 -4
- xinference/deploy/cmdline.py +3 -1
- xinference/deploy/test/test_cmdline.py +56 -0
- xinference/isolation.py +24 -0
- xinference/model/audio/core.py +16 -0
- xinference/model/audio/cosyvoice.py +39 -6
- xinference/model/audio/f5tts.py +200 -0
- xinference/model/audio/f5tts_mlx.py +260 -0
- xinference/model/audio/fish_speech.py +36 -111
- xinference/model/audio/melotts.py +110 -0
- xinference/model/audio/model_spec.json +99 -3
- xinference/model/audio/model_spec_modelscope.json +27 -0
- xinference/model/audio/utils.py +32 -0
- xinference/model/audio/whisper.py +35 -10
- xinference/model/embedding/core.py +203 -142
- xinference/model/embedding/model_spec.json +7 -0
- xinference/model/embedding/model_spec_modelscope.json +8 -0
- xinference/model/image/core.py +69 -1
- xinference/model/image/model_spec.json +145 -4
- xinference/model/image/model_spec_modelscope.json +150 -4
- xinference/model/image/stable_diffusion/core.py +45 -13
- xinference/model/llm/__init__.py +4 -2
- xinference/model/llm/llm_family.json +536 -53
- xinference/model/llm/llm_family.py +15 -36
- xinference/model/llm/llm_family_modelscope.json +454 -20
- xinference/model/llm/memory.py +1 -1
- xinference/model/llm/mlx/core.py +248 -52
- xinference/model/llm/sglang/core.py +1 -0
- xinference/model/llm/transformers/chatglm.py +9 -5
- xinference/model/llm/transformers/cogagent.py +272 -0
- xinference/model/llm/transformers/core.py +2 -0
- xinference/model/llm/transformers/qwen2_vl.py +12 -1
- xinference/model/llm/transformers/utils.py +16 -8
- xinference/model/llm/utils.py +36 -4
- xinference/model/llm/vllm/core.py +53 -10
- xinference/model/llm/vllm/xavier/__init__.py +13 -0
- xinference/model/llm/vllm/xavier/allocator.py +74 -0
- xinference/model/llm/vllm/xavier/block.py +111 -0
- xinference/model/llm/vllm/xavier/block_manager.py +71 -0
- xinference/model/llm/vllm/xavier/block_tracker.py +129 -0
- xinference/model/llm/vllm/xavier/collective.py +74 -0
- xinference/model/llm/vllm/xavier/collective_manager.py +147 -0
- xinference/model/llm/vllm/xavier/engine.py +247 -0
- xinference/model/llm/vllm/xavier/executor.py +134 -0
- xinference/model/llm/vllm/xavier/scheduler.py +438 -0
- xinference/model/llm/vllm/xavier/test/__init__.py +13 -0
- xinference/model/llm/vllm/xavier/test/test_xavier.py +147 -0
- xinference/model/llm/vllm/xavier/transfer.py +319 -0
- xinference/model/video/diffusers.py +14 -0
- xinference/model/video/model_spec.json +15 -0
- xinference/model/video/model_spec_modelscope.json +16 -0
- xinference/thirdparty/cosyvoice/bin/average_model.py +92 -0
- xinference/thirdparty/cosyvoice/bin/export_jit.py +12 -2
- xinference/thirdparty/cosyvoice/bin/export_onnx.py +112 -0
- xinference/thirdparty/cosyvoice/bin/export_trt.sh +9 -0
- xinference/thirdparty/cosyvoice/bin/inference.py +5 -7
- xinference/thirdparty/cosyvoice/bin/spk2info.pt +0 -0
- xinference/thirdparty/cosyvoice/bin/train.py +42 -8
- xinference/thirdparty/cosyvoice/cli/cosyvoice.py +96 -25
- xinference/thirdparty/cosyvoice/cli/frontend.py +77 -30
- xinference/thirdparty/cosyvoice/cli/model.py +330 -80
- xinference/thirdparty/cosyvoice/dataset/dataset.py +6 -2
- xinference/thirdparty/cosyvoice/dataset/processor.py +76 -14
- xinference/thirdparty/cosyvoice/flow/decoder.py +92 -13
- xinference/thirdparty/cosyvoice/flow/flow.py +99 -9
- xinference/thirdparty/cosyvoice/flow/flow_matching.py +110 -13
- xinference/thirdparty/cosyvoice/flow/length_regulator.py +5 -4
- xinference/thirdparty/cosyvoice/hifigan/discriminator.py +140 -0
- xinference/thirdparty/cosyvoice/hifigan/generator.py +58 -42
- xinference/thirdparty/cosyvoice/hifigan/hifigan.py +67 -0
- xinference/thirdparty/cosyvoice/llm/llm.py +139 -6
- xinference/thirdparty/cosyvoice/tokenizer/assets/multilingual_zh_ja_yue_char_del.tiktoken +58836 -0
- xinference/thirdparty/cosyvoice/tokenizer/tokenizer.py +279 -0
- xinference/thirdparty/cosyvoice/transformer/embedding.py +2 -2
- xinference/thirdparty/cosyvoice/transformer/encoder_layer.py +7 -7
- xinference/thirdparty/cosyvoice/transformer/upsample_encoder.py +318 -0
- xinference/thirdparty/cosyvoice/utils/common.py +28 -1
- xinference/thirdparty/cosyvoice/utils/executor.py +69 -7
- xinference/thirdparty/cosyvoice/utils/file_utils.py +2 -12
- xinference/thirdparty/cosyvoice/utils/frontend_utils.py +9 -5
- xinference/thirdparty/cosyvoice/utils/losses.py +20 -0
- xinference/thirdparty/cosyvoice/utils/scheduler.py +1 -2
- xinference/thirdparty/cosyvoice/utils/train_utils.py +101 -45
- xinference/thirdparty/f5_tts/api.py +166 -0
- xinference/thirdparty/f5_tts/configs/E2TTS_Base_train.yaml +44 -0
- xinference/thirdparty/f5_tts/configs/E2TTS_Small_train.yaml +44 -0
- xinference/thirdparty/f5_tts/configs/F5TTS_Base_train.yaml +46 -0
- xinference/thirdparty/f5_tts/configs/F5TTS_Small_train.yaml +46 -0
- xinference/thirdparty/f5_tts/eval/README.md +49 -0
- xinference/thirdparty/f5_tts/eval/ecapa_tdnn.py +330 -0
- xinference/thirdparty/f5_tts/eval/eval_infer_batch.py +207 -0
- xinference/thirdparty/f5_tts/eval/eval_infer_batch.sh +13 -0
- xinference/thirdparty/f5_tts/eval/eval_librispeech_test_clean.py +84 -0
- xinference/thirdparty/f5_tts/eval/eval_seedtts_testset.py +84 -0
- xinference/thirdparty/f5_tts/eval/utils_eval.py +405 -0
- xinference/thirdparty/f5_tts/infer/README.md +191 -0
- xinference/thirdparty/f5_tts/infer/SHARED.md +74 -0
- xinference/thirdparty/f5_tts/infer/examples/basic/basic.toml +11 -0
- xinference/thirdparty/f5_tts/infer/examples/basic/basic_ref_en.wav +0 -0
- xinference/thirdparty/f5_tts/infer/examples/basic/basic_ref_zh.wav +0 -0
- xinference/thirdparty/f5_tts/infer/examples/multi/country.flac +0 -0
- xinference/thirdparty/f5_tts/infer/examples/multi/main.flac +0 -0
- xinference/thirdparty/f5_tts/infer/examples/multi/story.toml +19 -0
- xinference/thirdparty/f5_tts/infer/examples/multi/story.txt +1 -0
- xinference/thirdparty/f5_tts/infer/examples/multi/town.flac +0 -0
- xinference/thirdparty/f5_tts/infer/examples/vocab.txt +2545 -0
- xinference/thirdparty/f5_tts/infer/infer_cli.py +226 -0
- xinference/thirdparty/f5_tts/infer/infer_gradio.py +851 -0
- xinference/thirdparty/f5_tts/infer/speech_edit.py +193 -0
- xinference/thirdparty/f5_tts/infer/utils_infer.py +538 -0
- xinference/thirdparty/f5_tts/model/__init__.py +10 -0
- xinference/thirdparty/f5_tts/model/backbones/README.md +20 -0
- xinference/thirdparty/f5_tts/model/backbones/dit.py +163 -0
- xinference/thirdparty/f5_tts/model/backbones/mmdit.py +146 -0
- xinference/thirdparty/f5_tts/model/backbones/unett.py +219 -0
- xinference/thirdparty/f5_tts/model/cfm.py +285 -0
- xinference/thirdparty/f5_tts/model/dataset.py +319 -0
- xinference/thirdparty/f5_tts/model/modules.py +658 -0
- xinference/thirdparty/f5_tts/model/trainer.py +366 -0
- xinference/thirdparty/f5_tts/model/utils.py +185 -0
- xinference/thirdparty/f5_tts/scripts/count_max_epoch.py +33 -0
- xinference/thirdparty/f5_tts/scripts/count_params_gflops.py +39 -0
- xinference/thirdparty/f5_tts/socket_server.py +159 -0
- xinference/thirdparty/f5_tts/train/README.md +77 -0
- xinference/thirdparty/f5_tts/train/datasets/prepare_csv_wavs.py +139 -0
- xinference/thirdparty/f5_tts/train/datasets/prepare_emilia.py +230 -0
- xinference/thirdparty/f5_tts/train/datasets/prepare_libritts.py +92 -0
- xinference/thirdparty/f5_tts/train/datasets/prepare_ljspeech.py +65 -0
- xinference/thirdparty/f5_tts/train/datasets/prepare_wenetspeech4tts.py +125 -0
- xinference/thirdparty/f5_tts/train/finetune_cli.py +174 -0
- xinference/thirdparty/f5_tts/train/finetune_gradio.py +1846 -0
- xinference/thirdparty/f5_tts/train/train.py +75 -0
- xinference/thirdparty/fish_speech/fish_speech/conversation.py +94 -83
- xinference/thirdparty/fish_speech/fish_speech/models/text2semantic/llama.py +63 -20
- xinference/thirdparty/fish_speech/fish_speech/text/clean.py +1 -26
- xinference/thirdparty/fish_speech/fish_speech/text/spliter.py +1 -1
- xinference/thirdparty/fish_speech/fish_speech/tokenizer.py +152 -0
- xinference/thirdparty/fish_speech/fish_speech/train.py +2 -2
- xinference/thirdparty/fish_speech/fish_speech/webui/manage.py +1 -1
- xinference/thirdparty/fish_speech/tools/{post_api.py → api_client.py} +7 -13
- xinference/thirdparty/fish_speech/tools/api_server.py +98 -0
- xinference/thirdparty/fish_speech/tools/download_models.py +5 -5
- xinference/thirdparty/fish_speech/tools/fish_e2e.py +2 -2
- xinference/thirdparty/fish_speech/tools/inference_engine/__init__.py +192 -0
- xinference/thirdparty/fish_speech/tools/inference_engine/reference_loader.py +125 -0
- xinference/thirdparty/fish_speech/tools/inference_engine/utils.py +39 -0
- xinference/thirdparty/fish_speech/tools/inference_engine/vq_manager.py +57 -0
- xinference/thirdparty/fish_speech/tools/llama/eval_in_context.py +2 -2
- xinference/thirdparty/fish_speech/tools/llama/generate.py +117 -89
- xinference/thirdparty/fish_speech/tools/run_webui.py +104 -0
- xinference/thirdparty/fish_speech/tools/schema.py +11 -28
- xinference/thirdparty/fish_speech/tools/server/agent/__init__.py +57 -0
- xinference/thirdparty/fish_speech/tools/server/agent/generate.py +119 -0
- xinference/thirdparty/fish_speech/tools/server/agent/generation_utils.py +122 -0
- xinference/thirdparty/fish_speech/tools/server/agent/pre_generation_utils.py +72 -0
- xinference/thirdparty/fish_speech/tools/server/api_utils.py +75 -0
- xinference/thirdparty/fish_speech/tools/server/exception_handler.py +27 -0
- xinference/thirdparty/fish_speech/tools/server/inference.py +45 -0
- xinference/thirdparty/fish_speech/tools/server/model_manager.py +122 -0
- xinference/thirdparty/fish_speech/tools/server/model_utils.py +129 -0
- xinference/thirdparty/fish_speech/tools/server/views.py +246 -0
- xinference/thirdparty/fish_speech/tools/webui/__init__.py +173 -0
- xinference/thirdparty/fish_speech/tools/webui/inference.py +91 -0
- xinference/thirdparty/fish_speech/tools/webui/variables.py +14 -0
- xinference/thirdparty/matcha/utils/utils.py +2 -2
- xinference/thirdparty/melo/api.py +135 -0
- xinference/thirdparty/melo/app.py +61 -0
- xinference/thirdparty/melo/attentions.py +459 -0
- xinference/thirdparty/melo/commons.py +160 -0
- xinference/thirdparty/melo/configs/config.json +94 -0
- xinference/thirdparty/melo/data/example/metadata.list +20 -0
- xinference/thirdparty/melo/data_utils.py +413 -0
- xinference/thirdparty/melo/download_utils.py +67 -0
- xinference/thirdparty/melo/infer.py +25 -0
- xinference/thirdparty/melo/init_downloads.py +14 -0
- xinference/thirdparty/melo/losses.py +58 -0
- xinference/thirdparty/melo/main.py +36 -0
- xinference/thirdparty/melo/mel_processing.py +174 -0
- xinference/thirdparty/melo/models.py +1030 -0
- xinference/thirdparty/melo/modules.py +598 -0
- xinference/thirdparty/melo/monotonic_align/__init__.py +16 -0
- xinference/thirdparty/melo/monotonic_align/core.py +46 -0
- xinference/thirdparty/melo/preprocess_text.py +135 -0
- xinference/thirdparty/melo/split_utils.py +174 -0
- xinference/thirdparty/melo/text/__init__.py +35 -0
- xinference/thirdparty/melo/text/chinese.py +199 -0
- xinference/thirdparty/melo/text/chinese_bert.py +107 -0
- xinference/thirdparty/melo/text/chinese_mix.py +253 -0
- xinference/thirdparty/melo/text/cleaner.py +36 -0
- xinference/thirdparty/melo/text/cleaner_multiling.py +110 -0
- xinference/thirdparty/melo/text/cmudict.rep +129530 -0
- xinference/thirdparty/melo/text/cmudict_cache.pickle +0 -0
- xinference/thirdparty/melo/text/english.py +284 -0
- xinference/thirdparty/melo/text/english_bert.py +39 -0
- xinference/thirdparty/melo/text/english_utils/abbreviations.py +35 -0
- xinference/thirdparty/melo/text/english_utils/number_norm.py +97 -0
- xinference/thirdparty/melo/text/english_utils/time_norm.py +47 -0
- xinference/thirdparty/melo/text/es_phonemizer/base.py +140 -0
- xinference/thirdparty/melo/text/es_phonemizer/cleaner.py +109 -0
- xinference/thirdparty/melo/text/es_phonemizer/es_symbols.json +79 -0
- xinference/thirdparty/melo/text/es_phonemizer/es_symbols.txt +1 -0
- xinference/thirdparty/melo/text/es_phonemizer/es_symbols_v2.json +83 -0
- xinference/thirdparty/melo/text/es_phonemizer/es_to_ipa.py +12 -0
- xinference/thirdparty/melo/text/es_phonemizer/example_ipa.txt +400 -0
- xinference/thirdparty/melo/text/es_phonemizer/gruut_wrapper.py +253 -0
- xinference/thirdparty/melo/text/es_phonemizer/punctuation.py +174 -0
- xinference/thirdparty/melo/text/es_phonemizer/spanish_symbols.txt +1 -0
- xinference/thirdparty/melo/text/es_phonemizer/test.ipynb +124 -0
- xinference/thirdparty/melo/text/fr_phonemizer/base.py +140 -0
- xinference/thirdparty/melo/text/fr_phonemizer/cleaner.py +122 -0
- xinference/thirdparty/melo/text/fr_phonemizer/en_symbols.json +78 -0
- xinference/thirdparty/melo/text/fr_phonemizer/example_ipa.txt +1 -0
- xinference/thirdparty/melo/text/fr_phonemizer/fr_symbols.json +89 -0
- xinference/thirdparty/melo/text/fr_phonemizer/fr_to_ipa.py +30 -0
- xinference/thirdparty/melo/text/fr_phonemizer/french_abbreviations.py +48 -0
- xinference/thirdparty/melo/text/fr_phonemizer/french_symbols.txt +1 -0
- xinference/thirdparty/melo/text/fr_phonemizer/gruut_wrapper.py +258 -0
- xinference/thirdparty/melo/text/fr_phonemizer/punctuation.py +172 -0
- xinference/thirdparty/melo/text/french.py +94 -0
- xinference/thirdparty/melo/text/french_bert.py +39 -0
- xinference/thirdparty/melo/text/japanese.py +647 -0
- xinference/thirdparty/melo/text/japanese_bert.py +49 -0
- xinference/thirdparty/melo/text/ko_dictionary.py +44 -0
- xinference/thirdparty/melo/text/korean.py +192 -0
- xinference/thirdparty/melo/text/opencpop-strict.txt +429 -0
- xinference/thirdparty/melo/text/spanish.py +122 -0
- xinference/thirdparty/melo/text/spanish_bert.py +39 -0
- xinference/thirdparty/melo/text/symbols.py +290 -0
- xinference/thirdparty/melo/text/tone_sandhi.py +769 -0
- xinference/thirdparty/melo/train.py +635 -0
- xinference/thirdparty/melo/train.sh +19 -0
- xinference/thirdparty/melo/transforms.py +209 -0
- xinference/thirdparty/melo/utils.py +424 -0
- xinference/types.py +15 -0
- xinference/web/ui/build/asset-manifest.json +6 -6
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/css/main.51a587ff.css +2 -0
- xinference/web/ui/build/static/css/main.51a587ff.css.map +1 -0
- xinference/web/ui/build/static/js/main.b0936c54.js +3 -0
- xinference/web/ui/build/static/js/main.b0936c54.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/03c4052f1b91f6ba0c5389bdcf49c43319b4076c08e4b8585dab312538ae290a.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/1786b83003b8e9605a0f5f855a185d4d16e38fc893dfb326a2a9cca206b4240a.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/17cbc181dd674b9150b80c73ed6a82656de0082d857f6e5f66d9716129ac0b38.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/185ceb8872d562e032b47e79df6a45670e06345b8ed70aad1a131e0476783c5c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/26b8c9f34b0bed789b3a833767672e39302d1e0c09b4276f4d58d1df7b6bd93b.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/2b484da66c724d0d56a40849c109327408796a668b1381511b6e9e03baa48658.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/2cbbbce9b84df73330d4c42b82436ed881b3847628f2fbc346aa62e2859fd88c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/2ec9b14431ed33ce6901bf9f27007be4e6e472709c99d6e22b50ce528e4b78ee.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3b966db018f96be4a055d6ca205f0990d4d0b370e2980c17d8bca2c9a021819c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3eefb411b24c2b3ce053570ef50daccf154022f0e168be5ed0fec21394baf9f4.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/522b229e3cac219123f0d69673f5570e191c2d2a505dc65b312d336eae2279c0.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/52e45f17ba300580ea3fcc9f9228ccba194bb092b76f25e9255af311f8b05aab.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/5a0bc4631f936459afc1a3b1d3ec2420118b1f00e11f60ccac3e08088f3f27a8.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/611fa2c6c53b66039991d06dfb0473b5ab37fc63b4564e0f6e1718523768a045.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/6329bc76c406fe5eb305412383fbde5950f847bb5e43261f73f37622c365acb4.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/63c8e07687ea53a4f8a910ee5e42e0eb26cd1acbfbe820f3e3248a786ee51401.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/69b2d5001684174ec9da57e07914eed3eac4960018bceb6cbfa801d861301d7c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/710c1acda69e561e30a933b98c6a56d50197868b15c21e2aad55ab6d46649eb6.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/720deca1fce5a1dc5056048fa8258fd138a82ea855f350b6613f104a73fb761f.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/76a23b92d26a499c57e61eea2b895fbc9771bd0849a72e66f8e633192017978b.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/858063f23b34dfe600254eb5afd85518b0002ec4b30b7386616c45600826e3b2.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/920b82c1c89124cf217109eeedbfcd3aae3b917be50c9dfb6bbb4ce26bdfd2e7.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/94d8b7aeb0076f2ce07db598cea0e87b13bc8d5614eb530b8d6e696c2daf6f88.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/9e917fe7022d01b2ccbe5cc0ce73d70bb72bee584ff293bad71bdff6695dee28.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/9f28fdb8399f1d0474f0aca86f1658dc94f5bf0c90f6146352de150692de8862.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a0dfafa06b2bb7cba8cad41c482503f61944f759f4318139362602ef5cc47ccb.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a3ff866acddf34917a7ee399e0e571a4dfd8ba66d5057db885f243e16a6eb17d.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/afb8084f539534cd594755ea2205ecd5bd1f62dddcfdf75a2eace59a28131278.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/b57b1438b77294c1f3f6cfce12ac487d8106c6f016975ba0aec94d98997e2e1e.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/b9917b0bf8e4d55ccbac1c334aa04d6ff3c5b6ed9e5d38b9ea2c687fa7d3f5a9.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/bbcc94b0149963d1d6f267ee1f4f03d3925b758392ce2f516c3fe8af0e0169fc.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/bdee44abeadc4abc17d41c52eb49c6e19a4b1a267b6e16876ce91bdeeebfc52d.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/beb112b70f4a56db95920a9e20efb6c97c37b68450716730217a9ee1a9ae92be.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c88db97be0cdf440193b3995996e83510a04cb00048135485fc0e26d197e80b5.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d49e5314d34310a62d01a03067ce1bec5da00abce84c5196aa9c6842fa79a430.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d7664d18c4ddbad9c3a6a31b91f7c00fb0dde804608674a9860ee50f33e54708.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d9072c318b819b7c90a0f7e9cc0b6413b4dbeb8e9859898e53d75ea882fcde99.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/db16a983bc08a05f0439cc61ca0840e49e1d8400eef678909f16c032a418a3d6.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/dc249829767b8abcbc3677e0b07b6d3ecbfdfe6d08cfe23a665eb33373a9aa9d.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/e242c583c2dbc2784f0fcf513523975f7d5df447e106c1c17e49e8578a6fc3ed.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/eac5f1296513e69e4b96f750ddccd4d0264e2bae4e4c449144e83274a48698d9.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/ed57202cb79649bb716400436590245547df241988fc7c8e1d85d132299542d2.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f125bf72e773a14cdaebd0c343e80adb909d12e317ee5c00cd4a57442fbe2c62.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f91af913d7f91c410719ab13136aaed3aaf0f8dda06652f25c42cb5231587398.json +1 -0
- xinference/web/ui/node_modules/.package-lock.json +67 -3
- xinference/web/ui/node_modules/@babel/runtime/package.json +592 -538
- xinference/web/ui/node_modules/html-parse-stringify/package.json +50 -0
- xinference/web/ui/node_modules/i18next/dist/esm/package.json +1 -0
- xinference/web/ui/node_modules/i18next/package.json +129 -0
- xinference/web/ui/node_modules/react-i18next/.eslintrc.json +74 -0
- xinference/web/ui/node_modules/react-i18next/dist/es/package.json +1 -0
- xinference/web/ui/node_modules/react-i18next/package.json +162 -0
- xinference/web/ui/node_modules/void-elements/package.json +34 -0
- xinference/web/ui/package-lock.json +69 -3
- xinference/web/ui/package.json +2 -0
- xinference/web/ui/src/locales/en.json +186 -0
- xinference/web/ui/src/locales/zh.json +186 -0
- {xinference-1.0.1.dist-info → xinference-1.2.1.dist-info}/METADATA +68 -32
- {xinference-1.0.1.dist-info → xinference-1.2.1.dist-info}/RECORD +316 -122
- xinference/thirdparty/cosyvoice/bin/export_trt.py +0 -8
- xinference/thirdparty/fish_speech/tools/api.py +0 -943
- xinference/thirdparty/fish_speech/tools/msgpack_api.py +0 -95
- xinference/thirdparty/fish_speech/tools/webui.py +0 -548
- xinference/web/ui/build/static/css/main.5061c4c3.css +0 -2
- xinference/web/ui/build/static/css/main.5061c4c3.css.map +0 -1
- xinference/web/ui/build/static/js/main.2f269bb3.js +0 -3
- xinference/web/ui/build/static/js/main.2f269bb3.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/07ce9e632e6aff24d7aa3ad8e48224433bbfeb0d633fca723453f1fcae0c9f1c.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/1130403f9e46f5738a23b45ac59b57de8f360c908c713e2c0670c2cce9bd367a.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/131091b25d26b17cdca187d7542a21475c211138d900cf667682260e76ef9463.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/1f269fb2a368363c1cb2237825f1dba093b6bdd8c44cc05954fd19ec2c1fff03.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/331312668fa8bd3d7401818f4a25fa98135d7f61371cd6bfff78b18cf4fbdd92.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/40f17338fc75ae095de7d2b4d8eae0d5ca0193a7e2bcece4ee745b22a7a2f4b7.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/4de9a6942c5f1749d6cbfdd54279699975f16016b182848bc253886f52ec2ec3.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/822586ed1077201b64b954f12f25e3f9b45678c1acbabe53d8af3ca82ca71f33.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/8d33354bd2100c8602afc3341f131a88cc36aaeecd5a4b365ed038514708e350.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/9375a35b05d56989b2755bf72161fa707c92f28569d33765a75f91a568fda6e9.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/a158a9ffa0c9b169aee53dd4a0c44501a596755b4e4f6ede7746d65a72e2a71f.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/bd6ad8159341315a1764c397621a560809f7eb7219ab5174c801fca7e969d943.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/c7bf40bab396765f67d0fed627ed3665890608b2d0edaa3e8cb7cfc96310db45.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/d6c643278a0b28320e6f33a60f5fb64c053997cbdc39a60e53ccc574688ade9e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/e42b72d4cc1ea412ebecbb8d040dc6c6bfee462c33903c2f1f3facb602ad742e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/e64b7e8cedcf43d4c95deba60ec1341855c887705805bb62431693118b870c69.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f5039ddbeb815c51491a1989532006b96fc3ae49c6c60e3c097f875b4ae915ae.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f72f011744c4649fabddca6f7a9327861ac0a315a89b1a2e62a39774e7863845.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/feabb04b4aa507102da0a64398a40818e878fd1df9b75dda8461b3e1e7ff3f11.json +0 -1
- /xinference/thirdparty/{cosyvoice/bin → f5_tts}/__init__.py +0 -0
- /xinference/thirdparty/{cosyvoice/flow → melo}/__init__.py +0 -0
- /xinference/thirdparty/{cosyvoice/hifigan → melo/text/english_utils}/__init__.py +0 -0
- /xinference/thirdparty/{cosyvoice/llm → melo/text/es_phonemizer}/__init__.py +0 -0
- /xinference/thirdparty/{fish_speech/tools → melo/text/fr_phonemizer}/__init__.py +0 -0
- /xinference/web/ui/build/static/js/{main.2f269bb3.js.LICENSE.txt → main.b0936c54.js.LICENSE.txt} +0 -0
- {xinference-1.0.1.dist-info → xinference-1.2.1.dist-info}/LICENSE +0 -0
- {xinference-1.0.1.dist-info → xinference-1.2.1.dist-info}/WHEEL +0 -0
- {xinference-1.0.1.dist-info → xinference-1.2.1.dist-info}/entry_points.txt +0 -0
- {xinference-1.0.1.dist-info → xinference-1.2.1.dist-info}/top_level.txt +0 -0
xinference/_compat.py
CHANGED
|
@@ -72,6 +72,7 @@ OpenAIChatCompletionToolParam = create_model_from_typeddict(ChatCompletionToolPa
|
|
|
72
72
|
OpenAIChatCompletionNamedToolChoiceParam = create_model_from_typeddict(
|
|
73
73
|
ChatCompletionNamedToolChoiceParam
|
|
74
74
|
)
|
|
75
|
+
from openai._types import Body
|
|
75
76
|
|
|
76
77
|
|
|
77
78
|
class JSONSchema(BaseModel):
|
|
@@ -120,4 +121,5 @@ class CreateChatCompletionOpenAI(BaseModel):
|
|
|
120
121
|
tools: Optional[Iterable[OpenAIChatCompletionToolParam]] # type: ignore
|
|
121
122
|
top_logprobs: Optional[int]
|
|
122
123
|
top_p: Optional[float]
|
|
124
|
+
extra_body: Optional[Body]
|
|
123
125
|
user: Optional[str]
|
xinference/_version.py
CHANGED
|
@@ -8,11 +8,11 @@ import json
|
|
|
8
8
|
|
|
9
9
|
version_json = '''
|
|
10
10
|
{
|
|
11
|
-
"date": "
|
|
11
|
+
"date": "2025-01-24T16:52:57+0800",
|
|
12
12
|
"dirty": false,
|
|
13
13
|
"error": null,
|
|
14
|
-
"full-revisionid": "
|
|
15
|
-
"version": "1.
|
|
14
|
+
"full-revisionid": "a57b99b07b40d1082f69a8fc5b968d56bc3636bc",
|
|
15
|
+
"version": "1.2.1"
|
|
16
16
|
}
|
|
17
17
|
''' # END VERSION_JSON
|
|
18
18
|
|
xinference/api/restful_api.py
CHANGED
|
@@ -94,9 +94,9 @@ class CreateCompletionRequest(CreateCompletion):
|
|
|
94
94
|
|
|
95
95
|
class CreateEmbeddingRequest(BaseModel):
|
|
96
96
|
model: str
|
|
97
|
-
input: Union[
|
|
98
|
-
|
|
99
|
-
)
|
|
97
|
+
input: Union[
|
|
98
|
+
str, List[str], List[int], List[List[int]], Dict[str, str], List[Dict[str, str]]
|
|
99
|
+
] = Field(description="The input to embed.")
|
|
100
100
|
user: Optional[str] = None
|
|
101
101
|
|
|
102
102
|
class Config:
|
|
@@ -1214,6 +1214,19 @@ class RESTfulAPI(CancelMixin):
|
|
|
1214
1214
|
async def get_address(self) -> JSONResponse:
|
|
1215
1215
|
return JSONResponse(content=self._supervisor_address)
|
|
1216
1216
|
|
|
1217
|
+
async def _get_model_last_error(self, replica_model_uid: bytes, e: Exception):
|
|
1218
|
+
if not isinstance(e, xo.ServerClosed):
|
|
1219
|
+
return e
|
|
1220
|
+
try:
|
|
1221
|
+
model_status = await (await self._get_supervisor_ref()).get_model_status(
|
|
1222
|
+
replica_model_uid.decode("utf-8")
|
|
1223
|
+
)
|
|
1224
|
+
if model_status is not None and model_status.last_error:
|
|
1225
|
+
return Exception(model_status.last_error)
|
|
1226
|
+
except Exception as ex:
|
|
1227
|
+
return ex
|
|
1228
|
+
return e
|
|
1229
|
+
|
|
1217
1230
|
async def create_completion(self, request: Request) -> Response:
|
|
1218
1231
|
raw_body = await request.json()
|
|
1219
1232
|
body = CreateCompletionRequest.parse_obj(raw_body)
|
|
@@ -1272,6 +1285,7 @@ class RESTfulAPI(CancelMixin):
|
|
|
1272
1285
|
)
|
|
1273
1286
|
return
|
|
1274
1287
|
except Exception as ex:
|
|
1288
|
+
ex = await self._get_model_last_error(model.uid, ex)
|
|
1275
1289
|
logger.exception("Completion stream got an error: %s", ex)
|
|
1276
1290
|
await self._report_error_event(model_uid, str(ex))
|
|
1277
1291
|
# https://github.com/openai/openai-python/blob/e0aafc6c1a45334ac889fe3e54957d309c3af93f/src/openai/_streaming.py#L107
|
|
@@ -1286,6 +1300,7 @@ class RESTfulAPI(CancelMixin):
|
|
|
1286
1300
|
data = await model.generate(body.prompt, kwargs, raw_params=raw_kwargs)
|
|
1287
1301
|
return Response(data, media_type="application/json")
|
|
1288
1302
|
except Exception as e:
|
|
1303
|
+
e = await self._get_model_last_error(model.uid, e)
|
|
1289
1304
|
logger.error(e, exc_info=True)
|
|
1290
1305
|
await self._report_error_event(model_uid, str(e))
|
|
1291
1306
|
self.handle_request_limit_error(e)
|
|
@@ -1317,14 +1332,11 @@ class RESTfulAPI(CancelMixin):
|
|
|
1317
1332
|
try:
|
|
1318
1333
|
embedding = await model.create_embedding(body.input, **kwargs)
|
|
1319
1334
|
return Response(embedding, media_type="application/json")
|
|
1320
|
-
except RuntimeError as re:
|
|
1321
|
-
logger.error(re, exc_info=True)
|
|
1322
|
-
await self._report_error_event(model_uid, str(re))
|
|
1323
|
-
self.handle_request_limit_error(re)
|
|
1324
|
-
raise HTTPException(status_code=400, detail=str(re))
|
|
1325
1335
|
except Exception as e:
|
|
1336
|
+
e = await self._get_model_last_error(model.uid, e)
|
|
1326
1337
|
logger.error(e, exc_info=True)
|
|
1327
1338
|
await self._report_error_event(model_uid, str(e))
|
|
1339
|
+
self.handle_request_limit_error(e)
|
|
1328
1340
|
raise HTTPException(status_code=500, detail=str(e))
|
|
1329
1341
|
|
|
1330
1342
|
async def convert_ids_to_tokens(self, request: Request) -> Response:
|
|
@@ -1352,14 +1364,11 @@ class RESTfulAPI(CancelMixin):
|
|
|
1352
1364
|
try:
|
|
1353
1365
|
decoded_texts = await model.convert_ids_to_tokens(body.input, **kwargs)
|
|
1354
1366
|
return Response(decoded_texts, media_type="application/json")
|
|
1355
|
-
except RuntimeError as re:
|
|
1356
|
-
logger.error(re, exc_info=True)
|
|
1357
|
-
await self._report_error_event(model_uid, str(re))
|
|
1358
|
-
self.handle_request_limit_error(re)
|
|
1359
|
-
raise HTTPException(status_code=400, detail=str(re))
|
|
1360
1367
|
except Exception as e:
|
|
1368
|
+
e = await self._get_model_last_error(model.uid, e)
|
|
1361
1369
|
logger.error(e, exc_info=True)
|
|
1362
1370
|
await self._report_error_event(model_uid, str(e))
|
|
1371
|
+
self.handle_request_limit_error(e)
|
|
1363
1372
|
raise HTTPException(status_code=500, detail=str(e))
|
|
1364
1373
|
|
|
1365
1374
|
async def rerank(self, request: Request) -> Response:
|
|
@@ -1393,14 +1402,11 @@ class RESTfulAPI(CancelMixin):
|
|
|
1393
1402
|
**parsed_kwargs,
|
|
1394
1403
|
)
|
|
1395
1404
|
return Response(scores, media_type="application/json")
|
|
1396
|
-
except RuntimeError as re:
|
|
1397
|
-
logger.error(re, exc_info=True)
|
|
1398
|
-
await self._report_error_event(model_uid, str(re))
|
|
1399
|
-
self.handle_request_limit_error(re)
|
|
1400
|
-
raise HTTPException(status_code=400, detail=str(re))
|
|
1401
1405
|
except Exception as e:
|
|
1406
|
+
e = await self._get_model_last_error(model.uid, e)
|
|
1402
1407
|
logger.error(e, exc_info=True)
|
|
1403
1408
|
await self._report_error_event(model_uid, str(e))
|
|
1409
|
+
self.handle_request_limit_error(e)
|
|
1404
1410
|
raise HTTPException(status_code=500, detail=str(e))
|
|
1405
1411
|
|
|
1406
1412
|
async def create_transcriptions(
|
|
@@ -1445,13 +1451,11 @@ class RESTfulAPI(CancelMixin):
|
|
|
1445
1451
|
**parsed_kwargs,
|
|
1446
1452
|
)
|
|
1447
1453
|
return Response(content=transcription, media_type="application/json")
|
|
1448
|
-
except RuntimeError as re:
|
|
1449
|
-
logger.error(re, exc_info=True)
|
|
1450
|
-
await self._report_error_event(model_uid, str(re))
|
|
1451
|
-
raise HTTPException(status_code=400, detail=str(re))
|
|
1452
1454
|
except Exception as e:
|
|
1455
|
+
e = await self._get_model_last_error(model_ref.uid, e)
|
|
1453
1456
|
logger.error(e, exc_info=True)
|
|
1454
1457
|
await self._report_error_event(model_uid, str(e))
|
|
1458
|
+
self.handle_request_limit_error(e)
|
|
1455
1459
|
raise HTTPException(status_code=500, detail=str(e))
|
|
1456
1460
|
|
|
1457
1461
|
async def create_translations(
|
|
@@ -1496,13 +1500,11 @@ class RESTfulAPI(CancelMixin):
|
|
|
1496
1500
|
**parsed_kwargs,
|
|
1497
1501
|
)
|
|
1498
1502
|
return Response(content=translation, media_type="application/json")
|
|
1499
|
-
except RuntimeError as re:
|
|
1500
|
-
logger.error(re, exc_info=True)
|
|
1501
|
-
await self._report_error_event(model_uid, str(re))
|
|
1502
|
-
raise HTTPException(status_code=400, detail=str(re))
|
|
1503
1503
|
except Exception as e:
|
|
1504
|
+
e = await self._get_model_last_error(model_ref.uid, e)
|
|
1504
1505
|
logger.error(e, exc_info=True)
|
|
1505
1506
|
await self._report_error_event(model_uid, str(e))
|
|
1507
|
+
self.handle_request_limit_error(e)
|
|
1506
1508
|
raise HTTPException(status_code=500, detail=str(e))
|
|
1507
1509
|
|
|
1508
1510
|
async def create_speech(
|
|
@@ -1558,14 +1560,11 @@ class RESTfulAPI(CancelMixin):
|
|
|
1558
1560
|
)
|
|
1559
1561
|
else:
|
|
1560
1562
|
return Response(media_type="application/octet-stream", content=out)
|
|
1561
|
-
except RuntimeError as re:
|
|
1562
|
-
logger.error(re, exc_info=True)
|
|
1563
|
-
await self._report_error_event(model_uid, str(re))
|
|
1564
|
-
self.handle_request_limit_error(re)
|
|
1565
|
-
raise HTTPException(status_code=400, detail=str(re))
|
|
1566
1563
|
except Exception as e:
|
|
1564
|
+
e = await self._get_model_last_error(model.uid, e)
|
|
1567
1565
|
logger.error(e, exc_info=True)
|
|
1568
1566
|
await self._report_error_event(model_uid, str(e))
|
|
1567
|
+
self.handle_request_limit_error(e)
|
|
1569
1568
|
raise HTTPException(status_code=500, detail=str(e))
|
|
1570
1569
|
|
|
1571
1570
|
async def get_progress(self, request_id: str) -> JSONResponse:
|
|
@@ -1611,14 +1610,11 @@ class RESTfulAPI(CancelMixin):
|
|
|
1611
1610
|
logger.error(err_str)
|
|
1612
1611
|
await self._report_error_event(model_uid, err_str)
|
|
1613
1612
|
raise HTTPException(status_code=409, detail=err_str)
|
|
1614
|
-
except RuntimeError as re:
|
|
1615
|
-
logger.error(re, exc_info=True)
|
|
1616
|
-
await self._report_error_event(model_uid, str(re))
|
|
1617
|
-
self.handle_request_limit_error(re)
|
|
1618
|
-
raise HTTPException(status_code=400, detail=str(re))
|
|
1619
1613
|
except Exception as e:
|
|
1614
|
+
e = await self._get_model_last_error(model.uid, e)
|
|
1620
1615
|
logger.error(e, exc_info=True)
|
|
1621
1616
|
await self._report_error_event(model_uid, str(e))
|
|
1617
|
+
self.handle_request_limit_error(e)
|
|
1622
1618
|
raise HTTPException(status_code=500, detail=str(e))
|
|
1623
1619
|
|
|
1624
1620
|
async def sdapi_options(self, request: Request) -> Response:
|
|
@@ -1689,14 +1685,11 @@ class RESTfulAPI(CancelMixin):
|
|
|
1689
1685
|
**kwargs,
|
|
1690
1686
|
)
|
|
1691
1687
|
return Response(content=image_list, media_type="application/json")
|
|
1692
|
-
except RuntimeError as re:
|
|
1693
|
-
logger.error(re, exc_info=True)
|
|
1694
|
-
await self._report_error_event(model_uid, str(re))
|
|
1695
|
-
self.handle_request_limit_error(re)
|
|
1696
|
-
raise HTTPException(status_code=400, detail=str(re))
|
|
1697
1688
|
except Exception as e:
|
|
1689
|
+
e = await self._get_model_last_error(model.uid, e)
|
|
1698
1690
|
logger.error(e, exc_info=True)
|
|
1699
1691
|
await self._report_error_event(model_uid, str(e))
|
|
1692
|
+
self.handle_request_limit_error(e)
|
|
1700
1693
|
raise HTTPException(status_code=500, detail=str(e))
|
|
1701
1694
|
|
|
1702
1695
|
async def sdapi_img2img(self, request: Request) -> Response:
|
|
@@ -1723,14 +1716,11 @@ class RESTfulAPI(CancelMixin):
|
|
|
1723
1716
|
**kwargs,
|
|
1724
1717
|
)
|
|
1725
1718
|
return Response(content=image_list, media_type="application/json")
|
|
1726
|
-
except RuntimeError as re:
|
|
1727
|
-
logger.error(re, exc_info=True)
|
|
1728
|
-
await self._report_error_event(model_uid, str(re))
|
|
1729
|
-
self.handle_request_limit_error(re)
|
|
1730
|
-
raise HTTPException(status_code=400, detail=str(re))
|
|
1731
1719
|
except Exception as e:
|
|
1720
|
+
e = await self._get_model_last_error(model.uid, e)
|
|
1732
1721
|
logger.error(e, exc_info=True)
|
|
1733
1722
|
await self._report_error_event(model_uid, str(e))
|
|
1723
|
+
self.handle_request_limit_error(e)
|
|
1734
1724
|
raise HTTPException(status_code=500, detail=str(e))
|
|
1735
1725
|
|
|
1736
1726
|
async def create_variations(
|
|
@@ -1779,13 +1769,11 @@ class RESTfulAPI(CancelMixin):
|
|
|
1779
1769
|
logger.error(err_str)
|
|
1780
1770
|
await self._report_error_event(model_uid, err_str)
|
|
1781
1771
|
raise HTTPException(status_code=409, detail=err_str)
|
|
1782
|
-
except RuntimeError as re:
|
|
1783
|
-
logger.error(re, exc_info=True)
|
|
1784
|
-
await self._report_error_event(model_uid, str(re))
|
|
1785
|
-
raise HTTPException(status_code=400, detail=str(re))
|
|
1786
1772
|
except Exception as e:
|
|
1773
|
+
e = await self._get_model_last_error(model_ref.uid, e)
|
|
1787
1774
|
logger.error(e, exc_info=True)
|
|
1788
1775
|
await self._report_error_event(model_uid, str(e))
|
|
1776
|
+
self.handle_request_limit_error(e)
|
|
1789
1777
|
raise HTTPException(status_code=500, detail=str(e))
|
|
1790
1778
|
|
|
1791
1779
|
async def create_inpainting(
|
|
@@ -1841,13 +1829,11 @@ class RESTfulAPI(CancelMixin):
|
|
|
1841
1829
|
logger.error(err_str)
|
|
1842
1830
|
await self._report_error_event(model_uid, err_str)
|
|
1843
1831
|
raise HTTPException(status_code=409, detail=err_str)
|
|
1844
|
-
except RuntimeError as re:
|
|
1845
|
-
logger.error(re, exc_info=True)
|
|
1846
|
-
await self._report_error_event(model_uid, str(re))
|
|
1847
|
-
raise HTTPException(status_code=400, detail=str(re))
|
|
1848
1832
|
except Exception as e:
|
|
1833
|
+
e = await self._get_model_last_error(model_ref.uid, e)
|
|
1849
1834
|
logger.error(e, exc_info=True)
|
|
1850
1835
|
await self._report_error_event(model_uid, str(e))
|
|
1836
|
+
self.handle_request_limit_error(e)
|
|
1851
1837
|
raise HTTPException(status_code=500, detail=str(e))
|
|
1852
1838
|
|
|
1853
1839
|
async def create_ocr(
|
|
@@ -1887,13 +1873,11 @@ class RESTfulAPI(CancelMixin):
|
|
|
1887
1873
|
logger.error(err_str)
|
|
1888
1874
|
await self._report_error_event(model_uid, err_str)
|
|
1889
1875
|
raise HTTPException(status_code=409, detail=err_str)
|
|
1890
|
-
except RuntimeError as re:
|
|
1891
|
-
logger.error(re, exc_info=True)
|
|
1892
|
-
await self._report_error_event(model_uid, str(re))
|
|
1893
|
-
raise HTTPException(status_code=400, detail=str(re))
|
|
1894
1876
|
except Exception as e:
|
|
1877
|
+
e = await self._get_model_last_error(model_ref.uid, e)
|
|
1895
1878
|
logger.error(e, exc_info=True)
|
|
1896
1879
|
await self._report_error_event(model_uid, str(e))
|
|
1880
|
+
self.handle_request_limit_error(e)
|
|
1897
1881
|
raise HTTPException(status_code=500, detail=str(e))
|
|
1898
1882
|
|
|
1899
1883
|
async def create_flexible_infer(self, request: Request) -> Response:
|
|
@@ -1920,14 +1904,11 @@ class RESTfulAPI(CancelMixin):
|
|
|
1920
1904
|
try:
|
|
1921
1905
|
result = await model.infer(**kwargs)
|
|
1922
1906
|
return Response(result, media_type="application/json")
|
|
1923
|
-
except RuntimeError as re:
|
|
1924
|
-
logger.error(re, exc_info=True)
|
|
1925
|
-
await self._report_error_event(model_uid, str(re))
|
|
1926
|
-
self.handle_request_limit_error(re)
|
|
1927
|
-
raise HTTPException(status_code=400, detail=str(re))
|
|
1928
1907
|
except Exception as e:
|
|
1908
|
+
e = await self._get_model_last_error(model.uid, e)
|
|
1929
1909
|
logger.error(e, exc_info=True)
|
|
1930
1910
|
await self._report_error_event(model_uid, str(e))
|
|
1911
|
+
self.handle_request_limit_error(e)
|
|
1931
1912
|
raise HTTPException(status_code=500, detail=str(e))
|
|
1932
1913
|
|
|
1933
1914
|
async def create_videos(self, request: Request) -> Response:
|
|
@@ -1952,14 +1933,11 @@ class RESTfulAPI(CancelMixin):
|
|
|
1952
1933
|
**kwargs,
|
|
1953
1934
|
)
|
|
1954
1935
|
return Response(content=video_list, media_type="application/json")
|
|
1955
|
-
except RuntimeError as re:
|
|
1956
|
-
logger.error(re, exc_info=True)
|
|
1957
|
-
await self._report_error_event(model_uid, str(re))
|
|
1958
|
-
self.handle_request_limit_error(re)
|
|
1959
|
-
raise HTTPException(status_code=400, detail=str(re))
|
|
1960
1936
|
except Exception as e:
|
|
1937
|
+
e = await self._get_model_last_error(model.uid, e)
|
|
1961
1938
|
logger.error(e, exc_info=True)
|
|
1962
1939
|
await self._report_error_event(model_uid, str(e))
|
|
1940
|
+
self.handle_request_limit_error(e)
|
|
1963
1941
|
raise HTTPException(status_code=500, detail=str(e))
|
|
1964
1942
|
|
|
1965
1943
|
async def create_chat_completion(self, request: Request) -> Response:
|
|
@@ -2044,7 +2022,6 @@ class RESTfulAPI(CancelMixin):
|
|
|
2044
2022
|
)
|
|
2045
2023
|
if body.tools and body.stream:
|
|
2046
2024
|
is_vllm = await model.is_vllm_backend()
|
|
2047
|
-
|
|
2048
2025
|
if not (
|
|
2049
2026
|
(is_vllm and model_family in QWEN_TOOL_CALL_FAMILY)
|
|
2050
2027
|
or (not is_vllm and model_family in GLM4_TOOL_CALL_FAMILY)
|
|
@@ -2054,7 +2031,8 @@ class RESTfulAPI(CancelMixin):
|
|
|
2054
2031
|
detail="Streaming support for tool calls is available only when using "
|
|
2055
2032
|
"Qwen models with vLLM backend or GLM4-chat models without vLLM backend.",
|
|
2056
2033
|
)
|
|
2057
|
-
|
|
2034
|
+
if "skip_special_tokens" in raw_kwargs and await model.is_vllm_backend():
|
|
2035
|
+
kwargs["skip_special_tokens"] = raw_kwargs["skip_special_tokens"]
|
|
2058
2036
|
if body.stream:
|
|
2059
2037
|
|
|
2060
2038
|
async def stream_results():
|
|
@@ -2084,6 +2062,7 @@ class RESTfulAPI(CancelMixin):
|
|
|
2084
2062
|
# TODO: Cannot yield here. Yield here would leads to error for the next streaming request.
|
|
2085
2063
|
return
|
|
2086
2064
|
except Exception as ex:
|
|
2065
|
+
ex = await self._get_model_last_error(model.uid, ex)
|
|
2087
2066
|
logger.exception("Chat completion stream got an error: %s", ex)
|
|
2088
2067
|
await self._report_error_event(model_uid, str(ex))
|
|
2089
2068
|
# https://github.com/openai/openai-python/blob/e0aafc6c1a45334ac889fe3e54957d309c3af93f/src/openai/_streaming.py#L107
|
|
@@ -2102,6 +2081,7 @@ class RESTfulAPI(CancelMixin):
|
|
|
2102
2081
|
)
|
|
2103
2082
|
return Response(content=data, media_type="application/json")
|
|
2104
2083
|
except Exception as e:
|
|
2084
|
+
e = await self._get_model_last_error(model.uid, e)
|
|
2105
2085
|
logger.error(e, exc_info=True)
|
|
2106
2086
|
await self._report_error_event(model_uid, str(e))
|
|
2107
2087
|
self.handle_request_limit_error(e)
|
|
@@ -2346,7 +2326,8 @@ class RESTfulAPI(CancelMixin):
|
|
|
2346
2326
|
@staticmethod
|
|
2347
2327
|
def extract_guided_params(raw_body: dict) -> dict:
|
|
2348
2328
|
kwargs = {}
|
|
2349
|
-
|
|
2329
|
+
raw_extra_body: dict = raw_body.get("extra_body") # type: ignore
|
|
2330
|
+
if raw_body.get("guided_json"):
|
|
2350
2331
|
kwargs["guided_json"] = raw_body.get("guided_json")
|
|
2351
2332
|
if raw_body.get("guided_regex") is not None:
|
|
2352
2333
|
kwargs["guided_regex"] = raw_body.get("guided_regex")
|
|
@@ -2362,6 +2343,31 @@ class RESTfulAPI(CancelMixin):
|
|
|
2362
2343
|
kwargs["guided_whitespace_pattern"] = raw_body.get(
|
|
2363
2344
|
"guided_whitespace_pattern"
|
|
2364
2345
|
)
|
|
2346
|
+
# Parse OpenAI extra_body
|
|
2347
|
+
if raw_extra_body is not None:
|
|
2348
|
+
if raw_extra_body.get("guided_json"):
|
|
2349
|
+
kwargs["guided_json"] = raw_extra_body.get("guided_json")
|
|
2350
|
+
if raw_extra_body.get("guided_regex") is not None:
|
|
2351
|
+
kwargs["guided_regex"] = raw_extra_body.get("guided_regex")
|
|
2352
|
+
if raw_extra_body.get("guided_choice") is not None:
|
|
2353
|
+
kwargs["guided_choice"] = raw_extra_body.get("guided_choice")
|
|
2354
|
+
if raw_extra_body.get("guided_grammar") is not None:
|
|
2355
|
+
kwargs["guided_grammar"] = raw_extra_body.get("guided_grammar")
|
|
2356
|
+
if raw_extra_body.get("guided_json_object") is not None:
|
|
2357
|
+
kwargs["guided_json_object"] = raw_extra_body.get("guided_json_object")
|
|
2358
|
+
if raw_extra_body.get("guided_decoding_backend") is not None:
|
|
2359
|
+
kwargs["guided_decoding_backend"] = raw_extra_body.get(
|
|
2360
|
+
"guided_decoding_backend"
|
|
2361
|
+
)
|
|
2362
|
+
if raw_extra_body.get("guided_whitespace_pattern") is not None:
|
|
2363
|
+
kwargs["guided_whitespace_pattern"] = raw_extra_body.get(
|
|
2364
|
+
"guided_whitespace_pattern"
|
|
2365
|
+
)
|
|
2366
|
+
if raw_extra_body.get("platform") is not None:
|
|
2367
|
+
kwargs["platform"] = raw_extra_body.get("platform")
|
|
2368
|
+
if raw_extra_body.get("format") is not None:
|
|
2369
|
+
kwargs["format"] = raw_extra_body.get("format")
|
|
2370
|
+
|
|
2365
2371
|
return kwargs
|
|
2366
2372
|
|
|
2367
2373
|
|
|
@@ -13,6 +13,7 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
import base64
|
|
16
|
+
import html
|
|
16
17
|
import logging
|
|
17
18
|
import os
|
|
18
19
|
from io import BytesIO
|
|
@@ -137,7 +138,11 @@ class GradioInterface:
|
|
|
137
138
|
if "content" not in delta:
|
|
138
139
|
continue
|
|
139
140
|
else:
|
|
140
|
-
|
|
141
|
+
# some model like deepseek-r1-distill-qwen
|
|
142
|
+
# will generate <think>...</think> ...
|
|
143
|
+
# in gradio, no output will be rendered,
|
|
144
|
+
# thus escape html tags in advance
|
|
145
|
+
response_content += html.escape(delta["content"])
|
|
141
146
|
yield response_content
|
|
142
147
|
|
|
143
148
|
yield response_content
|
xinference/core/model.py
CHANGED
|
@@ -35,6 +35,7 @@ from typing import (
|
|
|
35
35
|
List,
|
|
36
36
|
Optional,
|
|
37
37
|
Union,
|
|
38
|
+
no_type_check,
|
|
38
39
|
)
|
|
39
40
|
|
|
40
41
|
import sse_starlette.sse
|
|
@@ -78,6 +79,9 @@ XINFERENCE_BATCHING_ALLOWED_VISION_MODELS = [
|
|
|
78
79
|
]
|
|
79
80
|
|
|
80
81
|
XINFERENCE_TEXT_TO_IMAGE_BATCHING_ALLOWED_MODELS = ["FLUX.1-dev", "FLUX.1-schnell"]
|
|
82
|
+
XINFERENCE_TEST_OUT_OF_MEMORY_ERROR = bool(
|
|
83
|
+
os.getenv("XINFERENCE_TEST_OUT_OF_MEMORY_ERROR", False)
|
|
84
|
+
)
|
|
81
85
|
|
|
82
86
|
|
|
83
87
|
def request_limit(fn):
|
|
@@ -118,20 +122,25 @@ def request_limit(fn):
|
|
|
118
122
|
|
|
119
123
|
def oom_check(fn):
|
|
120
124
|
@functools.wraps(fn)
|
|
121
|
-
def _wrapper(*args, **kwargs):
|
|
125
|
+
def _wrapper(self, *args, **kwargs):
|
|
122
126
|
try:
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
+
if XINFERENCE_TEST_OUT_OF_MEMORY_ERROR:
|
|
128
|
+
raise OutOfMemoryError("Test Out of Memory Error")
|
|
129
|
+
return fn(self, *args, **kwargs)
|
|
130
|
+
except OutOfMemoryError as ex:
|
|
131
|
+
assert self._loop is not None
|
|
132
|
+
asyncio.run_coroutine_threadsafe(
|
|
133
|
+
self._handle_oom_error(ex), loop=self._loop
|
|
134
|
+
)
|
|
127
135
|
|
|
128
136
|
@functools.wraps(fn)
|
|
129
|
-
async def _async_wrapper(*args, **kwargs):
|
|
137
|
+
async def _async_wrapper(self, *args, **kwargs):
|
|
130
138
|
try:
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
139
|
+
if XINFERENCE_TEST_OUT_OF_MEMORY_ERROR:
|
|
140
|
+
raise OutOfMemoryError("Test Out of Memory Error")
|
|
141
|
+
return await fn(self, *args, **kwargs)
|
|
142
|
+
except OutOfMemoryError as ex:
|
|
143
|
+
await self._handle_oom_error(ex)
|
|
135
144
|
|
|
136
145
|
assert not inspect.isasyncgen(fn)
|
|
137
146
|
assert not inspect.isgenerator(fn)
|
|
@@ -178,6 +187,16 @@ class ModelActor(xo.StatelessActor, CancelMixin):
|
|
|
178
187
|
if hasattr(self._model, "stop") and callable(self._model.stop):
|
|
179
188
|
self._model.stop()
|
|
180
189
|
|
|
190
|
+
if isinstance(self._model, LLMVLLMModel):
|
|
191
|
+
if self._transfer_ref is not None:
|
|
192
|
+
try:
|
|
193
|
+
await xo.destroy_actor(self._transfer_ref)
|
|
194
|
+
del self._transfer_ref
|
|
195
|
+
except Exception as e:
|
|
196
|
+
logger.debug(
|
|
197
|
+
f"Destroy transfer actor failed, address: {self.address}, error: {e}"
|
|
198
|
+
)
|
|
199
|
+
|
|
181
200
|
if (
|
|
182
201
|
isinstance(self._model, (LLMPytorchModel, LLMVLLMModel, SGLANGModel))
|
|
183
202
|
and self._model.model_spec.model_format == "pytorch"
|
|
@@ -206,6 +225,7 @@ class ModelActor(xo.StatelessActor, CancelMixin):
|
|
|
206
225
|
replica_model_uid: str,
|
|
207
226
|
model_description: Optional["ModelDescription"] = None,
|
|
208
227
|
request_limits: Optional[int] = None,
|
|
228
|
+
xavier_config: Optional[Dict] = None,
|
|
209
229
|
):
|
|
210
230
|
super().__init__()
|
|
211
231
|
from ..model.llm.lmdeploy.core import LMDeployModel
|
|
@@ -247,6 +267,11 @@ class ModelActor(xo.StatelessActor, CancelMixin):
|
|
|
247
267
|
self._scheduler_ref = None
|
|
248
268
|
self._text_to_image_scheduler_ref = None
|
|
249
269
|
|
|
270
|
+
if isinstance(self._model, VLLMModel):
|
|
271
|
+
self._xavier_config = xavier_config
|
|
272
|
+
self._model.set_xavier_config(xavier_config)
|
|
273
|
+
self._transfer_ref = None
|
|
274
|
+
|
|
250
275
|
async def __post_create__(self):
|
|
251
276
|
self._loop = asyncio.get_running_loop()
|
|
252
277
|
|
|
@@ -278,6 +303,29 @@ class ModelActor(xo.StatelessActor, CancelMixin):
|
|
|
278
303
|
def decrease_serve_count(self):
|
|
279
304
|
self._serve_count -= 1
|
|
280
305
|
|
|
306
|
+
@no_type_check
|
|
307
|
+
async def start_transfer_for_vllm(self, rank_addresses: List[str]):
|
|
308
|
+
from ..model.llm.vllm.core import VLLMModel
|
|
309
|
+
from ..model.llm.vllm.xavier.transfer import TransferActor
|
|
310
|
+
|
|
311
|
+
assert isinstance(self._model, VLLMModel)
|
|
312
|
+
rank = self._xavier_config.get("rank") # type: ignore
|
|
313
|
+
self._transfer_ref = await xo.create_actor(
|
|
314
|
+
TransferActor,
|
|
315
|
+
address=self.address,
|
|
316
|
+
uid=f"{TransferActor.default_uid()}-{rank}",
|
|
317
|
+
rank=rank,
|
|
318
|
+
world_size=self._xavier_config.get("world_size"), # type: ignore
|
|
319
|
+
rank_address=self._xavier_config.get("rank_address"), # type: ignore
|
|
320
|
+
store_address=self._xavier_config.get("store_address"), # type: ignore
|
|
321
|
+
store_port=self._xavier_config.get("store_port"), # type: ignore
|
|
322
|
+
world_addresses=rank_addresses,
|
|
323
|
+
)
|
|
324
|
+
await self._model.init_xavier()
|
|
325
|
+
logger.debug(
|
|
326
|
+
f"Init transfer actor: {self._transfer_ref.address}, rank: {rank} done for vllm." # type: ignore
|
|
327
|
+
)
|
|
328
|
+
|
|
281
329
|
async def _record_completion_metrics(
|
|
282
330
|
self, duration, completion_tokens, prompt_tokens
|
|
283
331
|
):
|
|
@@ -440,11 +488,24 @@ class ModelActor(xo.StatelessActor, CancelMixin):
|
|
|
440
488
|
)
|
|
441
489
|
)
|
|
442
490
|
|
|
491
|
+
async def _handle_oom_error(self, ex):
|
|
492
|
+
error_message = (
|
|
493
|
+
f"Model actor is out of memory, model id: {self.model_uid()}, error: {ex}"
|
|
494
|
+
)
|
|
495
|
+
logger.exception(error_message)
|
|
496
|
+
worker_ref = await self._get_worker_ref()
|
|
497
|
+
await worker_ref.update_model_status(
|
|
498
|
+
self._replica_model_uid, last_error=error_message
|
|
499
|
+
)
|
|
500
|
+
os._exit(1)
|
|
501
|
+
|
|
443
502
|
def _to_generator(self, output_type: str, gen: types.GeneratorType):
|
|
444
503
|
start_time = time.time()
|
|
445
504
|
time_to_first_token = None
|
|
446
505
|
final_usage = None
|
|
447
506
|
try:
|
|
507
|
+
if XINFERENCE_TEST_OUT_OF_MEMORY_ERROR:
|
|
508
|
+
raise OutOfMemoryError("Test Out of Memory Error")
|
|
448
509
|
for v in gen:
|
|
449
510
|
if time_to_first_token is None:
|
|
450
511
|
time_to_first_token = (time.time() - start_time) * 1000
|
|
@@ -456,11 +517,11 @@ class ModelActor(xo.StatelessActor, CancelMixin):
|
|
|
456
517
|
output_type == "binary"
|
|
457
518
|
), f"Unknown output type '{output_type}'"
|
|
458
519
|
yield sse_starlette.sse.ensure_bytes(v, None)
|
|
459
|
-
except OutOfMemoryError:
|
|
460
|
-
|
|
461
|
-
|
|
520
|
+
except OutOfMemoryError as ex:
|
|
521
|
+
assert self._loop is not None
|
|
522
|
+
asyncio.run_coroutine_threadsafe(
|
|
523
|
+
self._handle_oom_error(ex), loop=self._loop
|
|
462
524
|
)
|
|
463
|
-
os._exit(1)
|
|
464
525
|
finally:
|
|
465
526
|
if self._loop is not None and time_to_first_token is not None:
|
|
466
527
|
coro = self.record_metrics(
|
|
@@ -482,6 +543,8 @@ class ModelActor(xo.StatelessActor, CancelMixin):
|
|
|
482
543
|
time_to_first_token = None
|
|
483
544
|
final_usage = None
|
|
484
545
|
try:
|
|
546
|
+
if XINFERENCE_TEST_OUT_OF_MEMORY_ERROR:
|
|
547
|
+
raise OutOfMemoryError("Test Out of Memory Error")
|
|
485
548
|
async for v in gen:
|
|
486
549
|
if time_to_first_token is None:
|
|
487
550
|
time_to_first_token = (time.time() - start_time) * 1000
|
|
@@ -494,11 +557,8 @@ class ModelActor(xo.StatelessActor, CancelMixin):
|
|
|
494
557
|
output_type == "binary"
|
|
495
558
|
), f"Unknown output type '{output_type}'"
|
|
496
559
|
yield await asyncio.to_thread(sse_starlette.sse.ensure_bytes, v, None)
|
|
497
|
-
except OutOfMemoryError:
|
|
498
|
-
|
|
499
|
-
"Model actor is out of memory, model id: %s", self.model_uid()
|
|
500
|
-
)
|
|
501
|
-
os._exit(1)
|
|
560
|
+
except OutOfMemoryError as ex:
|
|
561
|
+
await self._handle_oom_error(ex)
|
|
502
562
|
finally:
|
|
503
563
|
coros = []
|
|
504
564
|
if time_to_first_token is not None:
|