xinference 0.16.3__py3-none-any.whl → 1.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_compat.py +24 -2
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +219 -77
- xinference/client/restful/restful_client.py +47 -2
- xinference/constants.py +1 -0
- xinference/core/chat_interface.py +6 -1
- xinference/core/model.py +124 -34
- xinference/core/supervisor.py +180 -12
- xinference/core/utils.py +73 -4
- xinference/core/worker.py +102 -4
- xinference/deploy/cmdline.py +3 -1
- xinference/deploy/test/test_cmdline.py +56 -0
- xinference/isolation.py +24 -0
- xinference/model/audio/__init__.py +12 -0
- xinference/model/audio/core.py +37 -4
- xinference/model/audio/cosyvoice.py +39 -6
- xinference/model/audio/f5tts.py +200 -0
- xinference/model/audio/f5tts_mlx.py +260 -0
- xinference/model/audio/fish_speech.py +70 -110
- xinference/model/audio/melotts.py +110 -0
- xinference/model/audio/model_spec.json +179 -3
- xinference/model/audio/model_spec_modelscope.json +27 -0
- xinference/model/audio/utils.py +32 -0
- xinference/model/audio/whisper.py +35 -10
- xinference/model/audio/whisper_mlx.py +208 -0
- xinference/model/embedding/core.py +322 -6
- xinference/model/embedding/model_spec.json +8 -1
- xinference/model/embedding/model_spec_modelscope.json +9 -1
- xinference/model/image/core.py +69 -1
- xinference/model/image/model_spec.json +145 -4
- xinference/model/image/model_spec_modelscope.json +150 -4
- xinference/model/image/stable_diffusion/core.py +50 -15
- xinference/model/llm/__init__.py +6 -2
- xinference/model/llm/llm_family.json +1055 -93
- xinference/model/llm/llm_family.py +15 -36
- xinference/model/llm/llm_family_modelscope.json +1031 -78
- xinference/model/llm/memory.py +1 -1
- xinference/model/llm/mlx/core.py +285 -47
- xinference/model/llm/sglang/core.py +2 -0
- xinference/model/llm/transformers/chatglm.py +9 -5
- xinference/model/llm/transformers/cogagent.py +272 -0
- xinference/model/llm/transformers/core.py +3 -0
- xinference/model/llm/transformers/glm_edge_v.py +230 -0
- xinference/model/llm/transformers/qwen2_vl.py +12 -1
- xinference/model/llm/transformers/utils.py +16 -8
- xinference/model/llm/utils.py +55 -4
- xinference/model/llm/vllm/core.py +137 -12
- xinference/model/llm/vllm/xavier/__init__.py +13 -0
- xinference/model/llm/vllm/xavier/allocator.py +74 -0
- xinference/model/llm/vllm/xavier/block.py +111 -0
- xinference/model/llm/vllm/xavier/block_manager.py +71 -0
- xinference/model/llm/vllm/xavier/block_tracker.py +129 -0
- xinference/model/llm/vllm/xavier/collective.py +74 -0
- xinference/model/llm/vllm/xavier/collective_manager.py +147 -0
- xinference/model/llm/vllm/xavier/engine.py +247 -0
- xinference/model/llm/vllm/xavier/executor.py +134 -0
- xinference/model/llm/vllm/xavier/scheduler.py +438 -0
- xinference/model/llm/vllm/xavier/test/__init__.py +13 -0
- xinference/model/llm/vllm/xavier/test/test_xavier.py +147 -0
- xinference/model/llm/vllm/xavier/transfer.py +319 -0
- xinference/model/rerank/core.py +11 -4
- xinference/model/video/diffusers.py +14 -0
- xinference/model/video/model_spec.json +15 -0
- xinference/model/video/model_spec_modelscope.json +16 -0
- xinference/thirdparty/cosyvoice/bin/average_model.py +92 -0
- xinference/thirdparty/cosyvoice/bin/export_jit.py +12 -2
- xinference/thirdparty/cosyvoice/bin/export_onnx.py +112 -0
- xinference/thirdparty/cosyvoice/bin/export_trt.sh +9 -0
- xinference/thirdparty/cosyvoice/bin/inference.py +5 -7
- xinference/thirdparty/cosyvoice/bin/spk2info.pt +0 -0
- xinference/thirdparty/cosyvoice/bin/train.py +42 -8
- xinference/thirdparty/cosyvoice/cli/cosyvoice.py +96 -25
- xinference/thirdparty/cosyvoice/cli/frontend.py +77 -30
- xinference/thirdparty/cosyvoice/cli/model.py +330 -80
- xinference/thirdparty/cosyvoice/dataset/dataset.py +6 -2
- xinference/thirdparty/cosyvoice/dataset/processor.py +76 -14
- xinference/thirdparty/cosyvoice/flow/decoder.py +92 -13
- xinference/thirdparty/cosyvoice/flow/flow.py +99 -9
- xinference/thirdparty/cosyvoice/flow/flow_matching.py +110 -13
- xinference/thirdparty/cosyvoice/flow/length_regulator.py +5 -4
- xinference/thirdparty/cosyvoice/hifigan/discriminator.py +140 -0
- xinference/thirdparty/cosyvoice/hifigan/generator.py +58 -42
- xinference/thirdparty/cosyvoice/hifigan/hifigan.py +67 -0
- xinference/thirdparty/cosyvoice/llm/llm.py +139 -6
- xinference/thirdparty/cosyvoice/tokenizer/assets/multilingual_zh_ja_yue_char_del.tiktoken +58836 -0
- xinference/thirdparty/cosyvoice/tokenizer/tokenizer.py +279 -0
- xinference/thirdparty/cosyvoice/transformer/embedding.py +2 -2
- xinference/thirdparty/cosyvoice/transformer/encoder_layer.py +7 -7
- xinference/thirdparty/cosyvoice/transformer/upsample_encoder.py +318 -0
- xinference/thirdparty/cosyvoice/utils/common.py +28 -1
- xinference/thirdparty/cosyvoice/utils/executor.py +69 -7
- xinference/thirdparty/cosyvoice/utils/file_utils.py +2 -12
- xinference/thirdparty/cosyvoice/utils/frontend_utils.py +9 -5
- xinference/thirdparty/cosyvoice/utils/losses.py +20 -0
- xinference/thirdparty/cosyvoice/utils/scheduler.py +1 -2
- xinference/thirdparty/cosyvoice/utils/train_utils.py +101 -45
- xinference/thirdparty/f5_tts/api.py +166 -0
- xinference/thirdparty/f5_tts/configs/E2TTS_Base_train.yaml +44 -0
- xinference/thirdparty/f5_tts/configs/E2TTS_Small_train.yaml +44 -0
- xinference/thirdparty/f5_tts/configs/F5TTS_Base_train.yaml +46 -0
- xinference/thirdparty/f5_tts/configs/F5TTS_Small_train.yaml +46 -0
- xinference/thirdparty/f5_tts/eval/README.md +49 -0
- xinference/thirdparty/f5_tts/eval/ecapa_tdnn.py +330 -0
- xinference/thirdparty/f5_tts/eval/eval_infer_batch.py +207 -0
- xinference/thirdparty/f5_tts/eval/eval_infer_batch.sh +13 -0
- xinference/thirdparty/f5_tts/eval/eval_librispeech_test_clean.py +84 -0
- xinference/thirdparty/f5_tts/eval/eval_seedtts_testset.py +84 -0
- xinference/thirdparty/f5_tts/eval/utils_eval.py +405 -0
- xinference/thirdparty/f5_tts/infer/README.md +191 -0
- xinference/thirdparty/f5_tts/infer/SHARED.md +74 -0
- xinference/thirdparty/f5_tts/infer/examples/basic/basic.toml +11 -0
- xinference/thirdparty/f5_tts/infer/examples/basic/basic_ref_en.wav +0 -0
- xinference/thirdparty/f5_tts/infer/examples/basic/basic_ref_zh.wav +0 -0
- xinference/thirdparty/f5_tts/infer/examples/multi/country.flac +0 -0
- xinference/thirdparty/f5_tts/infer/examples/multi/main.flac +0 -0
- xinference/thirdparty/f5_tts/infer/examples/multi/story.toml +19 -0
- xinference/thirdparty/f5_tts/infer/examples/multi/story.txt +1 -0
- xinference/thirdparty/f5_tts/infer/examples/multi/town.flac +0 -0
- xinference/thirdparty/f5_tts/infer/examples/vocab.txt +2545 -0
- xinference/thirdparty/f5_tts/infer/infer_cli.py +226 -0
- xinference/thirdparty/f5_tts/infer/infer_gradio.py +851 -0
- xinference/thirdparty/f5_tts/infer/speech_edit.py +193 -0
- xinference/thirdparty/f5_tts/infer/utils_infer.py +538 -0
- xinference/thirdparty/f5_tts/model/__init__.py +10 -0
- xinference/thirdparty/f5_tts/model/backbones/README.md +20 -0
- xinference/thirdparty/f5_tts/model/backbones/dit.py +163 -0
- xinference/thirdparty/f5_tts/model/backbones/mmdit.py +146 -0
- xinference/thirdparty/f5_tts/model/backbones/unett.py +219 -0
- xinference/thirdparty/f5_tts/model/cfm.py +285 -0
- xinference/thirdparty/f5_tts/model/dataset.py +319 -0
- xinference/thirdparty/f5_tts/model/modules.py +658 -0
- xinference/thirdparty/f5_tts/model/trainer.py +366 -0
- xinference/thirdparty/f5_tts/model/utils.py +185 -0
- xinference/thirdparty/f5_tts/scripts/count_max_epoch.py +33 -0
- xinference/thirdparty/f5_tts/scripts/count_params_gflops.py +39 -0
- xinference/thirdparty/f5_tts/socket_server.py +159 -0
- xinference/thirdparty/f5_tts/train/README.md +77 -0
- xinference/thirdparty/f5_tts/train/datasets/prepare_csv_wavs.py +139 -0
- xinference/thirdparty/f5_tts/train/datasets/prepare_emilia.py +230 -0
- xinference/thirdparty/f5_tts/train/datasets/prepare_libritts.py +92 -0
- xinference/thirdparty/f5_tts/train/datasets/prepare_ljspeech.py +65 -0
- xinference/thirdparty/f5_tts/train/datasets/prepare_wenetspeech4tts.py +125 -0
- xinference/thirdparty/f5_tts/train/finetune_cli.py +174 -0
- xinference/thirdparty/f5_tts/train/finetune_gradio.py +1846 -0
- xinference/thirdparty/f5_tts/train/train.py +75 -0
- xinference/thirdparty/fish_speech/fish_speech/conversation.py +266 -1
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/en_US.json +2 -1
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/es_ES.json +2 -1
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/ja_JP.json +2 -2
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/ko_KR.json +123 -0
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/zh_CN.json +2 -1
- xinference/thirdparty/fish_speech/fish_speech/models/text2semantic/llama.py +137 -29
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/firefly.py +9 -9
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/fsq.py +1 -1
- xinference/thirdparty/fish_speech/fish_speech/text/clean.py +17 -11
- xinference/thirdparty/fish_speech/fish_speech/text/spliter.py +1 -1
- xinference/thirdparty/fish_speech/fish_speech/tokenizer.py +152 -0
- xinference/thirdparty/fish_speech/fish_speech/train.py +2 -2
- xinference/thirdparty/fish_speech/fish_speech/utils/__init__.py +2 -1
- xinference/thirdparty/fish_speech/fish_speech/utils/utils.py +22 -0
- xinference/thirdparty/fish_speech/fish_speech/webui/launch_utils.py +1 -1
- xinference/thirdparty/fish_speech/fish_speech/webui/manage.py +2 -2
- xinference/thirdparty/fish_speech/tools/{post_api.py → api_client.py} +34 -18
- xinference/thirdparty/fish_speech/tools/api_server.py +98 -0
- xinference/thirdparty/fish_speech/tools/download_models.py +5 -5
- xinference/thirdparty/fish_speech/tools/e2e_webui.py +232 -0
- xinference/thirdparty/fish_speech/tools/fish_e2e.py +298 -0
- xinference/thirdparty/fish_speech/tools/inference_engine/__init__.py +192 -0
- xinference/thirdparty/fish_speech/tools/inference_engine/reference_loader.py +125 -0
- xinference/thirdparty/fish_speech/tools/inference_engine/utils.py +39 -0
- xinference/thirdparty/fish_speech/tools/inference_engine/vq_manager.py +57 -0
- xinference/thirdparty/fish_speech/tools/llama/eval_in_context.py +2 -2
- xinference/thirdparty/fish_speech/tools/llama/generate.py +484 -72
- xinference/thirdparty/fish_speech/tools/run_webui.py +104 -0
- xinference/thirdparty/fish_speech/tools/schema.py +170 -0
- xinference/thirdparty/fish_speech/tools/server/agent/__init__.py +57 -0
- xinference/thirdparty/fish_speech/tools/server/agent/generate.py +119 -0
- xinference/thirdparty/fish_speech/tools/server/agent/generation_utils.py +122 -0
- xinference/thirdparty/fish_speech/tools/server/agent/pre_generation_utils.py +72 -0
- xinference/thirdparty/fish_speech/tools/server/api_utils.py +75 -0
- xinference/thirdparty/fish_speech/tools/server/exception_handler.py +27 -0
- xinference/thirdparty/fish_speech/tools/server/inference.py +45 -0
- xinference/thirdparty/fish_speech/tools/server/model_manager.py +122 -0
- xinference/thirdparty/fish_speech/tools/server/model_utils.py +129 -0
- xinference/thirdparty/fish_speech/tools/server/views.py +246 -0
- xinference/thirdparty/fish_speech/tools/vqgan/extract_vq.py +7 -1
- xinference/thirdparty/fish_speech/tools/vqgan/inference.py +2 -3
- xinference/thirdparty/fish_speech/tools/webui/__init__.py +173 -0
- xinference/thirdparty/fish_speech/tools/webui/inference.py +91 -0
- xinference/thirdparty/fish_speech/tools/webui/variables.py +14 -0
- xinference/thirdparty/matcha/utils/utils.py +2 -2
- xinference/thirdparty/melo/api.py +135 -0
- xinference/thirdparty/melo/app.py +61 -0
- xinference/thirdparty/melo/attentions.py +459 -0
- xinference/thirdparty/melo/commons.py +160 -0
- xinference/thirdparty/melo/configs/config.json +94 -0
- xinference/thirdparty/melo/data/example/metadata.list +20 -0
- xinference/thirdparty/melo/data_utils.py +413 -0
- xinference/thirdparty/melo/download_utils.py +67 -0
- xinference/thirdparty/melo/infer.py +25 -0
- xinference/thirdparty/melo/init_downloads.py +14 -0
- xinference/thirdparty/melo/losses.py +58 -0
- xinference/thirdparty/melo/main.py +36 -0
- xinference/thirdparty/melo/mel_processing.py +174 -0
- xinference/thirdparty/melo/models.py +1030 -0
- xinference/thirdparty/melo/modules.py +598 -0
- xinference/thirdparty/melo/monotonic_align/__init__.py +16 -0
- xinference/thirdparty/melo/monotonic_align/core.py +46 -0
- xinference/thirdparty/melo/preprocess_text.py +135 -0
- xinference/thirdparty/melo/split_utils.py +174 -0
- xinference/thirdparty/melo/text/__init__.py +35 -0
- xinference/thirdparty/melo/text/chinese.py +199 -0
- xinference/thirdparty/melo/text/chinese_bert.py +107 -0
- xinference/thirdparty/melo/text/chinese_mix.py +253 -0
- xinference/thirdparty/melo/text/cleaner.py +36 -0
- xinference/thirdparty/melo/text/cleaner_multiling.py +110 -0
- xinference/thirdparty/melo/text/cmudict.rep +129530 -0
- xinference/thirdparty/melo/text/cmudict_cache.pickle +0 -0
- xinference/thirdparty/melo/text/english.py +284 -0
- xinference/thirdparty/melo/text/english_bert.py +39 -0
- xinference/thirdparty/melo/text/english_utils/abbreviations.py +35 -0
- xinference/thirdparty/melo/text/english_utils/number_norm.py +97 -0
- xinference/thirdparty/melo/text/english_utils/time_norm.py +47 -0
- xinference/thirdparty/melo/text/es_phonemizer/base.py +140 -0
- xinference/thirdparty/melo/text/es_phonemizer/cleaner.py +109 -0
- xinference/thirdparty/melo/text/es_phonemizer/es_symbols.json +79 -0
- xinference/thirdparty/melo/text/es_phonemizer/es_symbols.txt +1 -0
- xinference/thirdparty/melo/text/es_phonemizer/es_symbols_v2.json +83 -0
- xinference/thirdparty/melo/text/es_phonemizer/es_to_ipa.py +12 -0
- xinference/thirdparty/melo/text/es_phonemizer/example_ipa.txt +400 -0
- xinference/thirdparty/melo/text/es_phonemizer/gruut_wrapper.py +253 -0
- xinference/thirdparty/melo/text/es_phonemizer/punctuation.py +174 -0
- xinference/thirdparty/melo/text/es_phonemizer/spanish_symbols.txt +1 -0
- xinference/thirdparty/melo/text/es_phonemizer/test.ipynb +124 -0
- xinference/thirdparty/melo/text/fr_phonemizer/base.py +140 -0
- xinference/thirdparty/melo/text/fr_phonemizer/cleaner.py +122 -0
- xinference/thirdparty/melo/text/fr_phonemizer/en_symbols.json +78 -0
- xinference/thirdparty/melo/text/fr_phonemizer/example_ipa.txt +1 -0
- xinference/thirdparty/melo/text/fr_phonemizer/fr_symbols.json +89 -0
- xinference/thirdparty/melo/text/fr_phonemizer/fr_to_ipa.py +30 -0
- xinference/thirdparty/melo/text/fr_phonemizer/french_abbreviations.py +48 -0
- xinference/thirdparty/melo/text/fr_phonemizer/french_symbols.txt +1 -0
- xinference/thirdparty/melo/text/fr_phonemizer/gruut_wrapper.py +258 -0
- xinference/thirdparty/melo/text/fr_phonemizer/punctuation.py +172 -0
- xinference/thirdparty/melo/text/french.py +94 -0
- xinference/thirdparty/melo/text/french_bert.py +39 -0
- xinference/thirdparty/melo/text/japanese.py +647 -0
- xinference/thirdparty/melo/text/japanese_bert.py +49 -0
- xinference/thirdparty/melo/text/ko_dictionary.py +44 -0
- xinference/thirdparty/melo/text/korean.py +192 -0
- xinference/thirdparty/melo/text/opencpop-strict.txt +429 -0
- xinference/thirdparty/melo/text/spanish.py +122 -0
- xinference/thirdparty/melo/text/spanish_bert.py +39 -0
- xinference/thirdparty/melo/text/symbols.py +290 -0
- xinference/thirdparty/melo/text/tone_sandhi.py +769 -0
- xinference/thirdparty/melo/train.py +635 -0
- xinference/thirdparty/melo/train.sh +19 -0
- xinference/thirdparty/melo/transforms.py +209 -0
- xinference/thirdparty/melo/utils.py +424 -0
- xinference/types.py +17 -1
- xinference/web/ui/build/asset-manifest.json +6 -6
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/css/main.51a587ff.css +2 -0
- xinference/web/ui/build/static/css/main.51a587ff.css.map +1 -0
- xinference/web/ui/build/static/js/main.b0936c54.js +3 -0
- xinference/web/ui/build/static/js/main.b0936c54.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/03c4052f1b91f6ba0c5389bdcf49c43319b4076c08e4b8585dab312538ae290a.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/1786b83003b8e9605a0f5f855a185d4d16e38fc893dfb326a2a9cca206b4240a.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/17cbc181dd674b9150b80c73ed6a82656de0082d857f6e5f66d9716129ac0b38.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/185ceb8872d562e032b47e79df6a45670e06345b8ed70aad1a131e0476783c5c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/26b8c9f34b0bed789b3a833767672e39302d1e0c09b4276f4d58d1df7b6bd93b.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/2b484da66c724d0d56a40849c109327408796a668b1381511b6e9e03baa48658.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/2cbbbce9b84df73330d4c42b82436ed881b3847628f2fbc346aa62e2859fd88c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/2ec9b14431ed33ce6901bf9f27007be4e6e472709c99d6e22b50ce528e4b78ee.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3b966db018f96be4a055d6ca205f0990d4d0b370e2980c17d8bca2c9a021819c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3eefb411b24c2b3ce053570ef50daccf154022f0e168be5ed0fec21394baf9f4.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/522b229e3cac219123f0d69673f5570e191c2d2a505dc65b312d336eae2279c0.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/52e45f17ba300580ea3fcc9f9228ccba194bb092b76f25e9255af311f8b05aab.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/5a0bc4631f936459afc1a3b1d3ec2420118b1f00e11f60ccac3e08088f3f27a8.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/611fa2c6c53b66039991d06dfb0473b5ab37fc63b4564e0f6e1718523768a045.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/6329bc76c406fe5eb305412383fbde5950f847bb5e43261f73f37622c365acb4.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/63c8e07687ea53a4f8a910ee5e42e0eb26cd1acbfbe820f3e3248a786ee51401.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/69b2d5001684174ec9da57e07914eed3eac4960018bceb6cbfa801d861301d7c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/710c1acda69e561e30a933b98c6a56d50197868b15c21e2aad55ab6d46649eb6.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/720deca1fce5a1dc5056048fa8258fd138a82ea855f350b6613f104a73fb761f.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/76a23b92d26a499c57e61eea2b895fbc9771bd0849a72e66f8e633192017978b.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/858063f23b34dfe600254eb5afd85518b0002ec4b30b7386616c45600826e3b2.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/920b82c1c89124cf217109eeedbfcd3aae3b917be50c9dfb6bbb4ce26bdfd2e7.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/94d8b7aeb0076f2ce07db598cea0e87b13bc8d5614eb530b8d6e696c2daf6f88.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/9e917fe7022d01b2ccbe5cc0ce73d70bb72bee584ff293bad71bdff6695dee28.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/9f28fdb8399f1d0474f0aca86f1658dc94f5bf0c90f6146352de150692de8862.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a0dfafa06b2bb7cba8cad41c482503f61944f759f4318139362602ef5cc47ccb.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a3ff866acddf34917a7ee399e0e571a4dfd8ba66d5057db885f243e16a6eb17d.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/afb8084f539534cd594755ea2205ecd5bd1f62dddcfdf75a2eace59a28131278.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/b57b1438b77294c1f3f6cfce12ac487d8106c6f016975ba0aec94d98997e2e1e.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/b9917b0bf8e4d55ccbac1c334aa04d6ff3c5b6ed9e5d38b9ea2c687fa7d3f5a9.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/bbcc94b0149963d1d6f267ee1f4f03d3925b758392ce2f516c3fe8af0e0169fc.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/bdee44abeadc4abc17d41c52eb49c6e19a4b1a267b6e16876ce91bdeeebfc52d.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/beb112b70f4a56db95920a9e20efb6c97c37b68450716730217a9ee1a9ae92be.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c88db97be0cdf440193b3995996e83510a04cb00048135485fc0e26d197e80b5.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d49e5314d34310a62d01a03067ce1bec5da00abce84c5196aa9c6842fa79a430.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d7664d18c4ddbad9c3a6a31b91f7c00fb0dde804608674a9860ee50f33e54708.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d9072c318b819b7c90a0f7e9cc0b6413b4dbeb8e9859898e53d75ea882fcde99.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/db16a983bc08a05f0439cc61ca0840e49e1d8400eef678909f16c032a418a3d6.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/dc249829767b8abcbc3677e0b07b6d3ecbfdfe6d08cfe23a665eb33373a9aa9d.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/e242c583c2dbc2784f0fcf513523975f7d5df447e106c1c17e49e8578a6fc3ed.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/eac5f1296513e69e4b96f750ddccd4d0264e2bae4e4c449144e83274a48698d9.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/ed57202cb79649bb716400436590245547df241988fc7c8e1d85d132299542d2.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f125bf72e773a14cdaebd0c343e80adb909d12e317ee5c00cd4a57442fbe2c62.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f91af913d7f91c410719ab13136aaed3aaf0f8dda06652f25c42cb5231587398.json +1 -0
- xinference/web/ui/node_modules/.package-lock.json +67 -3
- xinference/web/ui/node_modules/@babel/runtime/package.json +592 -538
- xinference/web/ui/node_modules/html-parse-stringify/package.json +50 -0
- xinference/web/ui/node_modules/i18next/dist/esm/package.json +1 -0
- xinference/web/ui/node_modules/i18next/package.json +129 -0
- xinference/web/ui/node_modules/react-i18next/.eslintrc.json +74 -0
- xinference/web/ui/node_modules/react-i18next/dist/es/package.json +1 -0
- xinference/web/ui/node_modules/react-i18next/package.json +162 -0
- xinference/web/ui/node_modules/void-elements/package.json +34 -0
- xinference/web/ui/package-lock.json +69 -3
- xinference/web/ui/package.json +2 -0
- xinference/web/ui/src/locales/en.json +186 -0
- xinference/web/ui/src/locales/zh.json +186 -0
- {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/METADATA +96 -36
- {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/RECORD +335 -146
- {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/WHEEL +1 -1
- xinference/thirdparty/cosyvoice/bin/export_trt.py +0 -8
- xinference/thirdparty/fish_speech/fish_speech/configs/lora/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/datasets/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/datasets/protos/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/models/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/webui/__init__.py +0 -0
- xinference/thirdparty/fish_speech/tools/__init__.py +0 -0
- xinference/thirdparty/fish_speech/tools/api.py +0 -440
- xinference/thirdparty/fish_speech/tools/commons.py +0 -35
- xinference/thirdparty/fish_speech/tools/llama/__init__.py +0 -0
- xinference/thirdparty/fish_speech/tools/msgpack_api.py +0 -34
- xinference/thirdparty/fish_speech/tools/vqgan/__init__.py +0 -0
- xinference/thirdparty/fish_speech/tools/webui.py +0 -485
- xinference/web/ui/build/static/css/main.5061c4c3.css +0 -2
- xinference/web/ui/build/static/css/main.5061c4c3.css.map +0 -1
- xinference/web/ui/build/static/js/main.2f269bb3.js +0 -3
- xinference/web/ui/build/static/js/main.2f269bb3.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/07ce9e632e6aff24d7aa3ad8e48224433bbfeb0d633fca723453f1fcae0c9f1c.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/1130403f9e46f5738a23b45ac59b57de8f360c908c713e2c0670c2cce9bd367a.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/131091b25d26b17cdca187d7542a21475c211138d900cf667682260e76ef9463.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/1f269fb2a368363c1cb2237825f1dba093b6bdd8c44cc05954fd19ec2c1fff03.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/331312668fa8bd3d7401818f4a25fa98135d7f61371cd6bfff78b18cf4fbdd92.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/40f17338fc75ae095de7d2b4d8eae0d5ca0193a7e2bcece4ee745b22a7a2f4b7.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/4de9a6942c5f1749d6cbfdd54279699975f16016b182848bc253886f52ec2ec3.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/822586ed1077201b64b954f12f25e3f9b45678c1acbabe53d8af3ca82ca71f33.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/8d33354bd2100c8602afc3341f131a88cc36aaeecd5a4b365ed038514708e350.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/9375a35b05d56989b2755bf72161fa707c92f28569d33765a75f91a568fda6e9.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/a158a9ffa0c9b169aee53dd4a0c44501a596755b4e4f6ede7746d65a72e2a71f.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/bd6ad8159341315a1764c397621a560809f7eb7219ab5174c801fca7e969d943.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/c7bf40bab396765f67d0fed627ed3665890608b2d0edaa3e8cb7cfc96310db45.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/d6c643278a0b28320e6f33a60f5fb64c053997cbdc39a60e53ccc574688ade9e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/e42b72d4cc1ea412ebecbb8d040dc6c6bfee462c33903c2f1f3facb602ad742e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/e64b7e8cedcf43d4c95deba60ec1341855c887705805bb62431693118b870c69.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f5039ddbeb815c51491a1989532006b96fc3ae49c6c60e3c097f875b4ae915ae.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f72f011744c4649fabddca6f7a9327861ac0a315a89b1a2e62a39774e7863845.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/feabb04b4aa507102da0a64398a40818e878fd1df9b75dda8461b3e1e7ff3f11.json +0 -1
- /xinference/thirdparty/{cosyvoice/bin → f5_tts}/__init__.py +0 -0
- /xinference/thirdparty/{cosyvoice/flow → melo}/__init__.py +0 -0
- /xinference/thirdparty/{cosyvoice/hifigan → melo/text/english_utils}/__init__.py +0 -0
- /xinference/thirdparty/{cosyvoice/llm → melo/text/es_phonemizer}/__init__.py +0 -0
- /xinference/thirdparty/{fish_speech/fish_speech/configs → melo/text/fr_phonemizer}/__init__.py +0 -0
- /xinference/web/ui/build/static/js/{main.2f269bb3.js.LICENSE.txt → main.b0936c54.js.LICENSE.txt} +0 -0
- {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/LICENSE +0 -0
- {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/entry_points.txt +0 -0
- {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/top_level.txt +0 -0
xinference/_compat.py
CHANGED
|
@@ -60,6 +60,10 @@ from openai.types.chat.chat_completion_stream_options_param import (
|
|
|
60
60
|
ChatCompletionStreamOptionsParam,
|
|
61
61
|
)
|
|
62
62
|
from openai.types.chat.chat_completion_tool_param import ChatCompletionToolParam
|
|
63
|
+
from openai.types.shared_params.response_format_json_object import (
|
|
64
|
+
ResponseFormatJSONObject,
|
|
65
|
+
)
|
|
66
|
+
from openai.types.shared_params.response_format_text import ResponseFormatText
|
|
63
67
|
|
|
64
68
|
OpenAIChatCompletionStreamOptionsParam = create_model_from_typeddict(
|
|
65
69
|
ChatCompletionStreamOptionsParam
|
|
@@ -68,6 +72,24 @@ OpenAIChatCompletionToolParam = create_model_from_typeddict(ChatCompletionToolPa
|
|
|
68
72
|
OpenAIChatCompletionNamedToolChoiceParam = create_model_from_typeddict(
|
|
69
73
|
ChatCompletionNamedToolChoiceParam
|
|
70
74
|
)
|
|
75
|
+
from openai._types import Body
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class JSONSchema(BaseModel):
|
|
79
|
+
name: str
|
|
80
|
+
description: Optional[str] = None
|
|
81
|
+
schema_: Optional[Dict[str, object]] = Field(alias="schema", default=None)
|
|
82
|
+
strict: Optional[bool] = None
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class ResponseFormatJSONSchema(BaseModel):
|
|
86
|
+
json_schema: JSONSchema
|
|
87
|
+
type: Literal["json_schema"]
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
ResponseFormat = Union[
|
|
91
|
+
ResponseFormatText, ResponseFormatJSONObject, ResponseFormatJSONSchema
|
|
92
|
+
]
|
|
71
93
|
|
|
72
94
|
|
|
73
95
|
class CreateChatCompletionOpenAI(BaseModel):
|
|
@@ -84,8 +106,7 @@ class CreateChatCompletionOpenAI(BaseModel):
|
|
|
84
106
|
n: Optional[int]
|
|
85
107
|
parallel_tool_calls: Optional[bool]
|
|
86
108
|
presence_penalty: Optional[float]
|
|
87
|
-
|
|
88
|
-
# response_format: ResponseFormat
|
|
109
|
+
response_format: Optional[ResponseFormat]
|
|
89
110
|
seed: Optional[int]
|
|
90
111
|
service_tier: Optional[Literal["auto", "default"]]
|
|
91
112
|
stop: Union[Optional[str], List[str]]
|
|
@@ -100,4 +121,5 @@ class CreateChatCompletionOpenAI(BaseModel):
|
|
|
100
121
|
tools: Optional[Iterable[OpenAIChatCompletionToolParam]] # type: ignore
|
|
101
122
|
top_logprobs: Optional[int]
|
|
102
123
|
top_p: Optional[float]
|
|
124
|
+
extra_body: Optional[Body]
|
|
103
125
|
user: Optional[str]
|
xinference/_version.py
CHANGED
|
@@ -8,11 +8,11 @@ import json
|
|
|
8
8
|
|
|
9
9
|
version_json = '''
|
|
10
10
|
{
|
|
11
|
-
"date": "
|
|
11
|
+
"date": "2025-01-24T16:52:57+0800",
|
|
12
12
|
"dirty": false,
|
|
13
13
|
"error": null,
|
|
14
|
-
"full-revisionid": "
|
|
15
|
-
"version": "
|
|
14
|
+
"full-revisionid": "a57b99b07b40d1082f69a8fc5b968d56bc3636bc",
|
|
15
|
+
"version": "1.2.1"
|
|
16
16
|
}
|
|
17
17
|
''' # END VERSION_JSON
|
|
18
18
|
|
xinference/api/restful_api.py
CHANGED
|
@@ -52,10 +52,14 @@ from xoscar.utils import get_next_port
|
|
|
52
52
|
|
|
53
53
|
from .._compat import BaseModel, Field
|
|
54
54
|
from .._version import get_versions
|
|
55
|
-
from ..constants import
|
|
55
|
+
from ..constants import (
|
|
56
|
+
XINFERENCE_DEFAULT_CANCEL_BLOCK_DURATION,
|
|
57
|
+
XINFERENCE_DEFAULT_ENDPOINT_PORT,
|
|
58
|
+
XINFERENCE_DISABLE_METRICS,
|
|
59
|
+
)
|
|
56
60
|
from ..core.event import Event, EventCollectorActor, EventType
|
|
57
61
|
from ..core.supervisor import SupervisorActor
|
|
58
|
-
from ..core.utils import json_dumps
|
|
62
|
+
from ..core.utils import CancelMixin, json_dumps
|
|
59
63
|
from ..types import (
|
|
60
64
|
ChatCompletion,
|
|
61
65
|
Completion,
|
|
@@ -90,9 +94,9 @@ class CreateCompletionRequest(CreateCompletion):
|
|
|
90
94
|
|
|
91
95
|
class CreateEmbeddingRequest(BaseModel):
|
|
92
96
|
model: str
|
|
93
|
-
input: Union[
|
|
94
|
-
|
|
95
|
-
)
|
|
97
|
+
input: Union[
|
|
98
|
+
str, List[str], List[int], List[List[int]], Dict[str, str], List[Dict[str, str]]
|
|
99
|
+
] = Field(description="The input to embed.")
|
|
96
100
|
user: Optional[str] = None
|
|
97
101
|
|
|
98
102
|
class Config:
|
|
@@ -111,6 +115,7 @@ class RerankRequest(BaseModel):
|
|
|
111
115
|
return_documents: Optional[bool] = False
|
|
112
116
|
return_len: Optional[bool] = False
|
|
113
117
|
max_chunks_per_doc: Optional[int] = None
|
|
118
|
+
kwargs: Optional[str] = None
|
|
114
119
|
|
|
115
120
|
|
|
116
121
|
class TextToImageRequest(BaseModel):
|
|
@@ -206,7 +211,7 @@ class BuildGradioImageInterfaceRequest(BaseModel):
|
|
|
206
211
|
model_ability: List[str]
|
|
207
212
|
|
|
208
213
|
|
|
209
|
-
class RESTfulAPI:
|
|
214
|
+
class RESTfulAPI(CancelMixin):
|
|
210
215
|
def __init__(
|
|
211
216
|
self,
|
|
212
217
|
supervisor_address: str,
|
|
@@ -484,6 +489,16 @@ class RESTfulAPI:
|
|
|
484
489
|
else None
|
|
485
490
|
),
|
|
486
491
|
)
|
|
492
|
+
self._router.add_api_route(
|
|
493
|
+
"/v1/convert_ids_to_tokens",
|
|
494
|
+
self.convert_ids_to_tokens,
|
|
495
|
+
methods=["POST"],
|
|
496
|
+
dependencies=(
|
|
497
|
+
[Security(self._auth_service, scopes=["models:read"])]
|
|
498
|
+
if self.is_authenticated()
|
|
499
|
+
else None
|
|
500
|
+
),
|
|
501
|
+
)
|
|
487
502
|
self._router.add_api_route(
|
|
488
503
|
"/v1/rerank",
|
|
489
504
|
self.rerank,
|
|
@@ -1199,6 +1214,19 @@ class RESTfulAPI:
|
|
|
1199
1214
|
async def get_address(self) -> JSONResponse:
|
|
1200
1215
|
return JSONResponse(content=self._supervisor_address)
|
|
1201
1216
|
|
|
1217
|
+
async def _get_model_last_error(self, replica_model_uid: bytes, e: Exception):
|
|
1218
|
+
if not isinstance(e, xo.ServerClosed):
|
|
1219
|
+
return e
|
|
1220
|
+
try:
|
|
1221
|
+
model_status = await (await self._get_supervisor_ref()).get_model_status(
|
|
1222
|
+
replica_model_uid.decode("utf-8")
|
|
1223
|
+
)
|
|
1224
|
+
if model_status is not None and model_status.last_error:
|
|
1225
|
+
return Exception(model_status.last_error)
|
|
1226
|
+
except Exception as ex:
|
|
1227
|
+
return ex
|
|
1228
|
+
return e
|
|
1229
|
+
|
|
1202
1230
|
async def create_completion(self, request: Request) -> Response:
|
|
1203
1231
|
raw_body = await request.json()
|
|
1204
1232
|
body = CreateCompletionRequest.parse_obj(raw_body)
|
|
@@ -1214,6 +1242,9 @@ class RESTfulAPI:
|
|
|
1214
1242
|
raw_kwargs = {k: v for k, v in raw_body.items() if k not in exclude}
|
|
1215
1243
|
kwargs = body.dict(exclude_unset=True, exclude=exclude)
|
|
1216
1244
|
|
|
1245
|
+
# guided_decoding params
|
|
1246
|
+
kwargs.update(self.extract_guided_params(raw_body=raw_body))
|
|
1247
|
+
|
|
1217
1248
|
# TODO: Decide if this default value override is necessary #1061
|
|
1218
1249
|
if body.max_tokens is None:
|
|
1219
1250
|
kwargs["max_tokens"] = max_tokens_field.default
|
|
@@ -1254,11 +1285,14 @@ class RESTfulAPI:
|
|
|
1254
1285
|
)
|
|
1255
1286
|
return
|
|
1256
1287
|
except Exception as ex:
|
|
1288
|
+
ex = await self._get_model_last_error(model.uid, ex)
|
|
1257
1289
|
logger.exception("Completion stream got an error: %s", ex)
|
|
1258
1290
|
await self._report_error_event(model_uid, str(ex))
|
|
1259
1291
|
# https://github.com/openai/openai-python/blob/e0aafc6c1a45334ac889fe3e54957d309c3af93f/src/openai/_streaming.py#L107
|
|
1260
1292
|
yield dict(data=json.dumps({"error": str(ex)}))
|
|
1261
1293
|
return
|
|
1294
|
+
finally:
|
|
1295
|
+
await model.decrease_serve_count()
|
|
1262
1296
|
|
|
1263
1297
|
return EventSourceResponse(stream_results())
|
|
1264
1298
|
else:
|
|
@@ -1266,6 +1300,7 @@ class RESTfulAPI:
|
|
|
1266
1300
|
data = await model.generate(body.prompt, kwargs, raw_params=raw_kwargs)
|
|
1267
1301
|
return Response(data, media_type="application/json")
|
|
1268
1302
|
except Exception as e:
|
|
1303
|
+
e = await self._get_model_last_error(model.uid, e)
|
|
1269
1304
|
logger.error(e, exc_info=True)
|
|
1270
1305
|
await self._report_error_event(model_uid, str(e))
|
|
1271
1306
|
self.handle_request_limit_error(e)
|
|
@@ -1297,25 +1332,49 @@ class RESTfulAPI:
|
|
|
1297
1332
|
try:
|
|
1298
1333
|
embedding = await model.create_embedding(body.input, **kwargs)
|
|
1299
1334
|
return Response(embedding, media_type="application/json")
|
|
1300
|
-
except
|
|
1301
|
-
|
|
1302
|
-
|
|
1303
|
-
self.
|
|
1304
|
-
|
|
1335
|
+
except Exception as e:
|
|
1336
|
+
e = await self._get_model_last_error(model.uid, e)
|
|
1337
|
+
logger.error(e, exc_info=True)
|
|
1338
|
+
await self._report_error_event(model_uid, str(e))
|
|
1339
|
+
self.handle_request_limit_error(e)
|
|
1340
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
1341
|
+
|
|
1342
|
+
async def convert_ids_to_tokens(self, request: Request) -> Response:
|
|
1343
|
+
payload = await request.json()
|
|
1344
|
+
body = CreateEmbeddingRequest.parse_obj(payload)
|
|
1345
|
+
model_uid = body.model
|
|
1346
|
+
exclude = {
|
|
1347
|
+
"model",
|
|
1348
|
+
"input",
|
|
1349
|
+
"user",
|
|
1350
|
+
}
|
|
1351
|
+
kwargs = {key: value for key, value in payload.items() if key not in exclude}
|
|
1352
|
+
|
|
1353
|
+
try:
|
|
1354
|
+
model = await (await self._get_supervisor_ref()).get_model(model_uid)
|
|
1355
|
+
except ValueError as ve:
|
|
1356
|
+
logger.error(str(ve), exc_info=True)
|
|
1357
|
+
await self._report_error_event(model_uid, str(ve))
|
|
1358
|
+
raise HTTPException(status_code=400, detail=str(ve))
|
|
1305
1359
|
except Exception as e:
|
|
1306
1360
|
logger.error(e, exc_info=True)
|
|
1307
1361
|
await self._report_error_event(model_uid, str(e))
|
|
1308
1362
|
raise HTTPException(status_code=500, detail=str(e))
|
|
1309
1363
|
|
|
1364
|
+
try:
|
|
1365
|
+
decoded_texts = await model.convert_ids_to_tokens(body.input, **kwargs)
|
|
1366
|
+
return Response(decoded_texts, media_type="application/json")
|
|
1367
|
+
except Exception as e:
|
|
1368
|
+
e = await self._get_model_last_error(model.uid, e)
|
|
1369
|
+
logger.error(e, exc_info=True)
|
|
1370
|
+
await self._report_error_event(model_uid, str(e))
|
|
1371
|
+
self.handle_request_limit_error(e)
|
|
1372
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
1373
|
+
|
|
1310
1374
|
async def rerank(self, request: Request) -> Response:
|
|
1311
1375
|
payload = await request.json()
|
|
1312
1376
|
body = RerankRequest.parse_obj(payload)
|
|
1313
1377
|
model_uid = body.model
|
|
1314
|
-
kwargs = {
|
|
1315
|
-
key: value
|
|
1316
|
-
for key, value in payload.items()
|
|
1317
|
-
if key not in RerankRequest.__annotations__.keys()
|
|
1318
|
-
}
|
|
1319
1378
|
|
|
1320
1379
|
try:
|
|
1321
1380
|
model = await (await self._get_supervisor_ref()).get_model(model_uid)
|
|
@@ -1329,6 +1388,10 @@ class RESTfulAPI:
|
|
|
1329
1388
|
raise HTTPException(status_code=500, detail=str(e))
|
|
1330
1389
|
|
|
1331
1390
|
try:
|
|
1391
|
+
if body.kwargs is not None:
|
|
1392
|
+
parsed_kwargs = json.loads(body.kwargs)
|
|
1393
|
+
else:
|
|
1394
|
+
parsed_kwargs = {}
|
|
1332
1395
|
scores = await model.rerank(
|
|
1333
1396
|
body.documents,
|
|
1334
1397
|
body.query,
|
|
@@ -1336,17 +1399,14 @@ class RESTfulAPI:
|
|
|
1336
1399
|
max_chunks_per_doc=body.max_chunks_per_doc,
|
|
1337
1400
|
return_documents=body.return_documents,
|
|
1338
1401
|
return_len=body.return_len,
|
|
1339
|
-
**
|
|
1402
|
+
**parsed_kwargs,
|
|
1340
1403
|
)
|
|
1341
1404
|
return Response(scores, media_type="application/json")
|
|
1342
|
-
except RuntimeError as re:
|
|
1343
|
-
logger.error(re, exc_info=True)
|
|
1344
|
-
await self._report_error_event(model_uid, str(re))
|
|
1345
|
-
self.handle_request_limit_error(re)
|
|
1346
|
-
raise HTTPException(status_code=400, detail=str(re))
|
|
1347
1405
|
except Exception as e:
|
|
1406
|
+
e = await self._get_model_last_error(model.uid, e)
|
|
1348
1407
|
logger.error(e, exc_info=True)
|
|
1349
1408
|
await self._report_error_event(model_uid, str(e))
|
|
1409
|
+
self.handle_request_limit_error(e)
|
|
1350
1410
|
raise HTTPException(status_code=500, detail=str(e))
|
|
1351
1411
|
|
|
1352
1412
|
async def create_transcriptions(
|
|
@@ -1391,13 +1451,11 @@ class RESTfulAPI:
|
|
|
1391
1451
|
**parsed_kwargs,
|
|
1392
1452
|
)
|
|
1393
1453
|
return Response(content=transcription, media_type="application/json")
|
|
1394
|
-
except RuntimeError as re:
|
|
1395
|
-
logger.error(re, exc_info=True)
|
|
1396
|
-
await self._report_error_event(model_uid, str(re))
|
|
1397
|
-
raise HTTPException(status_code=400, detail=str(re))
|
|
1398
1454
|
except Exception as e:
|
|
1455
|
+
e = await self._get_model_last_error(model_ref.uid, e)
|
|
1399
1456
|
logger.error(e, exc_info=True)
|
|
1400
1457
|
await self._report_error_event(model_uid, str(e))
|
|
1458
|
+
self.handle_request_limit_error(e)
|
|
1401
1459
|
raise HTTPException(status_code=500, detail=str(e))
|
|
1402
1460
|
|
|
1403
1461
|
async def create_translations(
|
|
@@ -1442,13 +1500,11 @@ class RESTfulAPI:
|
|
|
1442
1500
|
**parsed_kwargs,
|
|
1443
1501
|
)
|
|
1444
1502
|
return Response(content=translation, media_type="application/json")
|
|
1445
|
-
except RuntimeError as re:
|
|
1446
|
-
logger.error(re, exc_info=True)
|
|
1447
|
-
await self._report_error_event(model_uid, str(re))
|
|
1448
|
-
raise HTTPException(status_code=400, detail=str(re))
|
|
1449
1503
|
except Exception as e:
|
|
1504
|
+
e = await self._get_model_last_error(model_ref.uid, e)
|
|
1450
1505
|
logger.error(e, exc_info=True)
|
|
1451
1506
|
await self._report_error_event(model_uid, str(e))
|
|
1507
|
+
self.handle_request_limit_error(e)
|
|
1452
1508
|
raise HTTPException(status_code=500, detail=str(e))
|
|
1453
1509
|
|
|
1454
1510
|
async def create_speech(
|
|
@@ -1491,19 +1547,24 @@ class RESTfulAPI:
|
|
|
1491
1547
|
**parsed_kwargs,
|
|
1492
1548
|
)
|
|
1493
1549
|
if body.stream:
|
|
1550
|
+
|
|
1551
|
+
async def stream_results():
|
|
1552
|
+
try:
|
|
1553
|
+
async for item in out:
|
|
1554
|
+
yield item
|
|
1555
|
+
finally:
|
|
1556
|
+
await model.decrease_serve_count()
|
|
1557
|
+
|
|
1494
1558
|
return EventSourceResponse(
|
|
1495
|
-
media_type="application/octet-stream", content=
|
|
1559
|
+
media_type="application/octet-stream", content=stream_results()
|
|
1496
1560
|
)
|
|
1497
1561
|
else:
|
|
1498
1562
|
return Response(media_type="application/octet-stream", content=out)
|
|
1499
|
-
except RuntimeError as re:
|
|
1500
|
-
logger.error(re, exc_info=True)
|
|
1501
|
-
await self._report_error_event(model_uid, str(re))
|
|
1502
|
-
self.handle_request_limit_error(re)
|
|
1503
|
-
raise HTTPException(status_code=400, detail=str(re))
|
|
1504
1563
|
except Exception as e:
|
|
1564
|
+
e = await self._get_model_last_error(model.uid, e)
|
|
1505
1565
|
logger.error(e, exc_info=True)
|
|
1506
1566
|
await self._report_error_event(model_uid, str(e))
|
|
1567
|
+
self.handle_request_limit_error(e)
|
|
1507
1568
|
raise HTTPException(status_code=500, detail=str(e))
|
|
1508
1569
|
|
|
1509
1570
|
async def get_progress(self, request_id: str) -> JSONResponse:
|
|
@@ -1531,8 +1592,11 @@ class RESTfulAPI:
|
|
|
1531
1592
|
await self._report_error_event(model_uid, str(e))
|
|
1532
1593
|
raise HTTPException(status_code=500, detail=str(e))
|
|
1533
1594
|
|
|
1595
|
+
request_id = None
|
|
1534
1596
|
try:
|
|
1535
1597
|
kwargs = json.loads(body.kwargs) if body.kwargs else {}
|
|
1598
|
+
request_id = kwargs.get("request_id")
|
|
1599
|
+
self._add_running_task(request_id)
|
|
1536
1600
|
image_list = await model.text_to_image(
|
|
1537
1601
|
prompt=body.prompt,
|
|
1538
1602
|
n=body.n,
|
|
@@ -1541,14 +1605,16 @@ class RESTfulAPI:
|
|
|
1541
1605
|
**kwargs,
|
|
1542
1606
|
)
|
|
1543
1607
|
return Response(content=image_list, media_type="application/json")
|
|
1544
|
-
except
|
|
1545
|
-
|
|
1546
|
-
|
|
1547
|
-
self.
|
|
1548
|
-
raise HTTPException(status_code=
|
|
1608
|
+
except asyncio.CancelledError:
|
|
1609
|
+
err_str = f"The request has been cancelled: {request_id}"
|
|
1610
|
+
logger.error(err_str)
|
|
1611
|
+
await self._report_error_event(model_uid, err_str)
|
|
1612
|
+
raise HTTPException(status_code=409, detail=err_str)
|
|
1549
1613
|
except Exception as e:
|
|
1614
|
+
e = await self._get_model_last_error(model.uid, e)
|
|
1550
1615
|
logger.error(e, exc_info=True)
|
|
1551
1616
|
await self._report_error_event(model_uid, str(e))
|
|
1617
|
+
self.handle_request_limit_error(e)
|
|
1552
1618
|
raise HTTPException(status_code=500, detail=str(e))
|
|
1553
1619
|
|
|
1554
1620
|
async def sdapi_options(self, request: Request) -> Response:
|
|
@@ -1619,14 +1685,11 @@ class RESTfulAPI:
|
|
|
1619
1685
|
**kwargs,
|
|
1620
1686
|
)
|
|
1621
1687
|
return Response(content=image_list, media_type="application/json")
|
|
1622
|
-
except RuntimeError as re:
|
|
1623
|
-
logger.error(re, exc_info=True)
|
|
1624
|
-
await self._report_error_event(model_uid, str(re))
|
|
1625
|
-
self.handle_request_limit_error(re)
|
|
1626
|
-
raise HTTPException(status_code=400, detail=str(re))
|
|
1627
1688
|
except Exception as e:
|
|
1689
|
+
e = await self._get_model_last_error(model.uid, e)
|
|
1628
1690
|
logger.error(e, exc_info=True)
|
|
1629
1691
|
await self._report_error_event(model_uid, str(e))
|
|
1692
|
+
self.handle_request_limit_error(e)
|
|
1630
1693
|
raise HTTPException(status_code=500, detail=str(e))
|
|
1631
1694
|
|
|
1632
1695
|
async def sdapi_img2img(self, request: Request) -> Response:
|
|
@@ -1653,14 +1716,11 @@ class RESTfulAPI:
|
|
|
1653
1716
|
**kwargs,
|
|
1654
1717
|
)
|
|
1655
1718
|
return Response(content=image_list, media_type="application/json")
|
|
1656
|
-
except RuntimeError as re:
|
|
1657
|
-
logger.error(re, exc_info=True)
|
|
1658
|
-
await self._report_error_event(model_uid, str(re))
|
|
1659
|
-
self.handle_request_limit_error(re)
|
|
1660
|
-
raise HTTPException(status_code=400, detail=str(re))
|
|
1661
1719
|
except Exception as e:
|
|
1720
|
+
e = await self._get_model_last_error(model.uid, e)
|
|
1662
1721
|
logger.error(e, exc_info=True)
|
|
1663
1722
|
await self._report_error_event(model_uid, str(e))
|
|
1723
|
+
self.handle_request_limit_error(e)
|
|
1664
1724
|
raise HTTPException(status_code=500, detail=str(e))
|
|
1665
1725
|
|
|
1666
1726
|
async def create_variations(
|
|
@@ -1686,11 +1746,14 @@ class RESTfulAPI:
|
|
|
1686
1746
|
await self._report_error_event(model_uid, str(e))
|
|
1687
1747
|
raise HTTPException(status_code=500, detail=str(e))
|
|
1688
1748
|
|
|
1749
|
+
request_id = None
|
|
1689
1750
|
try:
|
|
1690
1751
|
if kwargs is not None:
|
|
1691
1752
|
parsed_kwargs = json.loads(kwargs)
|
|
1692
1753
|
else:
|
|
1693
1754
|
parsed_kwargs = {}
|
|
1755
|
+
request_id = parsed_kwargs.get("request_id")
|
|
1756
|
+
self._add_running_task(request_id)
|
|
1694
1757
|
image_list = await model_ref.image_to_image(
|
|
1695
1758
|
image=Image.open(image.file),
|
|
1696
1759
|
prompt=prompt,
|
|
@@ -1701,13 +1764,16 @@ class RESTfulAPI:
|
|
|
1701
1764
|
**parsed_kwargs,
|
|
1702
1765
|
)
|
|
1703
1766
|
return Response(content=image_list, media_type="application/json")
|
|
1704
|
-
except
|
|
1705
|
-
|
|
1706
|
-
|
|
1707
|
-
|
|
1767
|
+
except asyncio.CancelledError:
|
|
1768
|
+
err_str = f"The request has been cancelled: {request_id}"
|
|
1769
|
+
logger.error(err_str)
|
|
1770
|
+
await self._report_error_event(model_uid, err_str)
|
|
1771
|
+
raise HTTPException(status_code=409, detail=err_str)
|
|
1708
1772
|
except Exception as e:
|
|
1773
|
+
e = await self._get_model_last_error(model_ref.uid, e)
|
|
1709
1774
|
logger.error(e, exc_info=True)
|
|
1710
1775
|
await self._report_error_event(model_uid, str(e))
|
|
1776
|
+
self.handle_request_limit_error(e)
|
|
1711
1777
|
raise HTTPException(status_code=500, detail=str(e))
|
|
1712
1778
|
|
|
1713
1779
|
async def create_inpainting(
|
|
@@ -1734,11 +1800,14 @@ class RESTfulAPI:
|
|
|
1734
1800
|
await self._report_error_event(model_uid, str(e))
|
|
1735
1801
|
raise HTTPException(status_code=500, detail=str(e))
|
|
1736
1802
|
|
|
1803
|
+
request_id = None
|
|
1737
1804
|
try:
|
|
1738
1805
|
if kwargs is not None:
|
|
1739
1806
|
parsed_kwargs = json.loads(kwargs)
|
|
1740
1807
|
else:
|
|
1741
1808
|
parsed_kwargs = {}
|
|
1809
|
+
request_id = parsed_kwargs.get("request_id")
|
|
1810
|
+
self._add_running_task(request_id)
|
|
1742
1811
|
im = Image.open(image.file)
|
|
1743
1812
|
mask_im = Image.open(mask_image.file)
|
|
1744
1813
|
if not size:
|
|
@@ -1755,13 +1824,16 @@ class RESTfulAPI:
|
|
|
1755
1824
|
**parsed_kwargs,
|
|
1756
1825
|
)
|
|
1757
1826
|
return Response(content=image_list, media_type="application/json")
|
|
1758
|
-
except
|
|
1759
|
-
|
|
1760
|
-
|
|
1761
|
-
|
|
1827
|
+
except asyncio.CancelledError:
|
|
1828
|
+
err_str = f"The request has been cancelled: {request_id}"
|
|
1829
|
+
logger.error(err_str)
|
|
1830
|
+
await self._report_error_event(model_uid, err_str)
|
|
1831
|
+
raise HTTPException(status_code=409, detail=err_str)
|
|
1762
1832
|
except Exception as e:
|
|
1833
|
+
e = await self._get_model_last_error(model_ref.uid, e)
|
|
1763
1834
|
logger.error(e, exc_info=True)
|
|
1764
1835
|
await self._report_error_event(model_uid, str(e))
|
|
1836
|
+
self.handle_request_limit_error(e)
|
|
1765
1837
|
raise HTTPException(status_code=500, detail=str(e))
|
|
1766
1838
|
|
|
1767
1839
|
async def create_ocr(
|
|
@@ -1782,24 +1854,30 @@ class RESTfulAPI:
|
|
|
1782
1854
|
await self._report_error_event(model_uid, str(e))
|
|
1783
1855
|
raise HTTPException(status_code=500, detail=str(e))
|
|
1784
1856
|
|
|
1857
|
+
request_id = None
|
|
1785
1858
|
try:
|
|
1786
1859
|
if kwargs is not None:
|
|
1787
1860
|
parsed_kwargs = json.loads(kwargs)
|
|
1788
1861
|
else:
|
|
1789
1862
|
parsed_kwargs = {}
|
|
1863
|
+
request_id = parsed_kwargs.get("request_id")
|
|
1864
|
+
self._add_running_task(request_id)
|
|
1790
1865
|
im = Image.open(image.file)
|
|
1791
1866
|
text = await model_ref.ocr(
|
|
1792
1867
|
image=im,
|
|
1793
1868
|
**parsed_kwargs,
|
|
1794
1869
|
)
|
|
1795
1870
|
return Response(content=text, media_type="text/plain")
|
|
1796
|
-
except
|
|
1797
|
-
|
|
1798
|
-
|
|
1799
|
-
|
|
1871
|
+
except asyncio.CancelledError:
|
|
1872
|
+
err_str = f"The request has been cancelled: {request_id}"
|
|
1873
|
+
logger.error(err_str)
|
|
1874
|
+
await self._report_error_event(model_uid, err_str)
|
|
1875
|
+
raise HTTPException(status_code=409, detail=err_str)
|
|
1800
1876
|
except Exception as e:
|
|
1877
|
+
e = await self._get_model_last_error(model_ref.uid, e)
|
|
1801
1878
|
logger.error(e, exc_info=True)
|
|
1802
1879
|
await self._report_error_event(model_uid, str(e))
|
|
1880
|
+
self.handle_request_limit_error(e)
|
|
1803
1881
|
raise HTTPException(status_code=500, detail=str(e))
|
|
1804
1882
|
|
|
1805
1883
|
async def create_flexible_infer(self, request: Request) -> Response:
|
|
@@ -1826,14 +1904,11 @@ class RESTfulAPI:
|
|
|
1826
1904
|
try:
|
|
1827
1905
|
result = await model.infer(**kwargs)
|
|
1828
1906
|
return Response(result, media_type="application/json")
|
|
1829
|
-
except RuntimeError as re:
|
|
1830
|
-
logger.error(re, exc_info=True)
|
|
1831
|
-
await self._report_error_event(model_uid, str(re))
|
|
1832
|
-
self.handle_request_limit_error(re)
|
|
1833
|
-
raise HTTPException(status_code=400, detail=str(re))
|
|
1834
1907
|
except Exception as e:
|
|
1908
|
+
e = await self._get_model_last_error(model.uid, e)
|
|
1835
1909
|
logger.error(e, exc_info=True)
|
|
1836
1910
|
await self._report_error_event(model_uid, str(e))
|
|
1911
|
+
self.handle_request_limit_error(e)
|
|
1837
1912
|
raise HTTPException(status_code=500, detail=str(e))
|
|
1838
1913
|
|
|
1839
1914
|
async def create_videos(self, request: Request) -> Response:
|
|
@@ -1858,14 +1933,11 @@ class RESTfulAPI:
|
|
|
1858
1933
|
**kwargs,
|
|
1859
1934
|
)
|
|
1860
1935
|
return Response(content=video_list, media_type="application/json")
|
|
1861
|
-
except RuntimeError as re:
|
|
1862
|
-
logger.error(re, exc_info=True)
|
|
1863
|
-
await self._report_error_event(model_uid, str(re))
|
|
1864
|
-
self.handle_request_limit_error(re)
|
|
1865
|
-
raise HTTPException(status_code=400, detail=str(re))
|
|
1866
1936
|
except Exception as e:
|
|
1937
|
+
e = await self._get_model_last_error(model.uid, e)
|
|
1867
1938
|
logger.error(e, exc_info=True)
|
|
1868
1939
|
await self._report_error_event(model_uid, str(e))
|
|
1940
|
+
self.handle_request_limit_error(e)
|
|
1869
1941
|
raise HTTPException(status_code=500, detail=str(e))
|
|
1870
1942
|
|
|
1871
1943
|
async def create_chat_completion(self, request: Request) -> Response:
|
|
@@ -1880,9 +1952,13 @@ class RESTfulAPI:
|
|
|
1880
1952
|
"logit_bias_type",
|
|
1881
1953
|
"user",
|
|
1882
1954
|
}
|
|
1955
|
+
|
|
1883
1956
|
raw_kwargs = {k: v for k, v in raw_body.items() if k not in exclude}
|
|
1884
1957
|
kwargs = body.dict(exclude_unset=True, exclude=exclude)
|
|
1885
1958
|
|
|
1959
|
+
# guided_decoding params
|
|
1960
|
+
kwargs.update(self.extract_guided_params(raw_body=raw_body))
|
|
1961
|
+
|
|
1886
1962
|
# TODO: Decide if this default value override is necessary #1061
|
|
1887
1963
|
if body.max_tokens is None:
|
|
1888
1964
|
kwargs["max_tokens"] = max_tokens_field.default
|
|
@@ -1946,7 +2022,6 @@ class RESTfulAPI:
|
|
|
1946
2022
|
)
|
|
1947
2023
|
if body.tools and body.stream:
|
|
1948
2024
|
is_vllm = await model.is_vllm_backend()
|
|
1949
|
-
|
|
1950
2025
|
if not (
|
|
1951
2026
|
(is_vllm and model_family in QWEN_TOOL_CALL_FAMILY)
|
|
1952
2027
|
or (not is_vllm and model_family in GLM4_TOOL_CALL_FAMILY)
|
|
@@ -1956,7 +2031,8 @@ class RESTfulAPI:
|
|
|
1956
2031
|
detail="Streaming support for tool calls is available only when using "
|
|
1957
2032
|
"Qwen models with vLLM backend or GLM4-chat models without vLLM backend.",
|
|
1958
2033
|
)
|
|
1959
|
-
|
|
2034
|
+
if "skip_special_tokens" in raw_kwargs and await model.is_vllm_backend():
|
|
2035
|
+
kwargs["skip_special_tokens"] = raw_kwargs["skip_special_tokens"]
|
|
1960
2036
|
if body.stream:
|
|
1961
2037
|
|
|
1962
2038
|
async def stream_results():
|
|
@@ -1986,11 +2062,14 @@ class RESTfulAPI:
|
|
|
1986
2062
|
# TODO: Cannot yield here. Yield here would leads to error for the next streaming request.
|
|
1987
2063
|
return
|
|
1988
2064
|
except Exception as ex:
|
|
2065
|
+
ex = await self._get_model_last_error(model.uid, ex)
|
|
1989
2066
|
logger.exception("Chat completion stream got an error: %s", ex)
|
|
1990
2067
|
await self._report_error_event(model_uid, str(ex))
|
|
1991
2068
|
# https://github.com/openai/openai-python/blob/e0aafc6c1a45334ac889fe3e54957d309c3af93f/src/openai/_streaming.py#L107
|
|
1992
2069
|
yield dict(data=json.dumps({"error": str(ex)}))
|
|
1993
2070
|
return
|
|
2071
|
+
finally:
|
|
2072
|
+
await model.decrease_serve_count()
|
|
1994
2073
|
|
|
1995
2074
|
return EventSourceResponse(stream_results())
|
|
1996
2075
|
else:
|
|
@@ -2002,6 +2081,7 @@ class RESTfulAPI:
|
|
|
2002
2081
|
)
|
|
2003
2082
|
return Response(content=data, media_type="application/json")
|
|
2004
2083
|
except Exception as e:
|
|
2084
|
+
e = await self._get_model_last_error(model.uid, e)
|
|
2005
2085
|
logger.error(e, exc_info=True)
|
|
2006
2086
|
await self._report_error_event(model_uid, str(e))
|
|
2007
2087
|
self.handle_request_limit_error(e)
|
|
@@ -2111,10 +2191,25 @@ class RESTfulAPI:
|
|
|
2111
2191
|
logger.error(e, exc_info=True)
|
|
2112
2192
|
raise HTTPException(status_code=500, detail=str(e))
|
|
2113
2193
|
|
|
2114
|
-
async def abort_request(
|
|
2194
|
+
async def abort_request(
|
|
2195
|
+
self, request: Request, model_uid: str, request_id: str
|
|
2196
|
+
) -> JSONResponse:
|
|
2115
2197
|
try:
|
|
2198
|
+
payload = await request.json()
|
|
2199
|
+
block_duration = payload.get(
|
|
2200
|
+
"block_duration", XINFERENCE_DEFAULT_CANCEL_BLOCK_DURATION
|
|
2201
|
+
)
|
|
2202
|
+
logger.info(
|
|
2203
|
+
"Abort request with model uid: %s, request id: %s, block duration: %s",
|
|
2204
|
+
model_uid,
|
|
2205
|
+
request_id,
|
|
2206
|
+
block_duration,
|
|
2207
|
+
)
|
|
2116
2208
|
supervisor_ref = await self._get_supervisor_ref()
|
|
2117
|
-
res = await supervisor_ref.abort_request(
|
|
2209
|
+
res = await supervisor_ref.abort_request(
|
|
2210
|
+
model_uid, request_id, block_duration
|
|
2211
|
+
)
|
|
2212
|
+
self._cancel_running_task(request_id, block_duration)
|
|
2118
2213
|
return JSONResponse(content=res)
|
|
2119
2214
|
except Exception as e:
|
|
2120
2215
|
logger.error(e, exc_info=True)
|
|
@@ -2228,6 +2323,53 @@ class RESTfulAPI:
|
|
|
2228
2323
|
logger.error(e, exc_info=True)
|
|
2229
2324
|
raise HTTPException(status_code=500, detail=str(e))
|
|
2230
2325
|
|
|
2326
|
+
@staticmethod
|
|
2327
|
+
def extract_guided_params(raw_body: dict) -> dict:
|
|
2328
|
+
kwargs = {}
|
|
2329
|
+
raw_extra_body: dict = raw_body.get("extra_body") # type: ignore
|
|
2330
|
+
if raw_body.get("guided_json"):
|
|
2331
|
+
kwargs["guided_json"] = raw_body.get("guided_json")
|
|
2332
|
+
if raw_body.get("guided_regex") is not None:
|
|
2333
|
+
kwargs["guided_regex"] = raw_body.get("guided_regex")
|
|
2334
|
+
if raw_body.get("guided_choice") is not None:
|
|
2335
|
+
kwargs["guided_choice"] = raw_body.get("guided_choice")
|
|
2336
|
+
if raw_body.get("guided_grammar") is not None:
|
|
2337
|
+
kwargs["guided_grammar"] = raw_body.get("guided_grammar")
|
|
2338
|
+
if raw_body.get("guided_json_object") is not None:
|
|
2339
|
+
kwargs["guided_json_object"] = raw_body.get("guided_json_object")
|
|
2340
|
+
if raw_body.get("guided_decoding_backend") is not None:
|
|
2341
|
+
kwargs["guided_decoding_backend"] = raw_body.get("guided_decoding_backend")
|
|
2342
|
+
if raw_body.get("guided_whitespace_pattern") is not None:
|
|
2343
|
+
kwargs["guided_whitespace_pattern"] = raw_body.get(
|
|
2344
|
+
"guided_whitespace_pattern"
|
|
2345
|
+
)
|
|
2346
|
+
# Parse OpenAI extra_body
|
|
2347
|
+
if raw_extra_body is not None:
|
|
2348
|
+
if raw_extra_body.get("guided_json"):
|
|
2349
|
+
kwargs["guided_json"] = raw_extra_body.get("guided_json")
|
|
2350
|
+
if raw_extra_body.get("guided_regex") is not None:
|
|
2351
|
+
kwargs["guided_regex"] = raw_extra_body.get("guided_regex")
|
|
2352
|
+
if raw_extra_body.get("guided_choice") is not None:
|
|
2353
|
+
kwargs["guided_choice"] = raw_extra_body.get("guided_choice")
|
|
2354
|
+
if raw_extra_body.get("guided_grammar") is not None:
|
|
2355
|
+
kwargs["guided_grammar"] = raw_extra_body.get("guided_grammar")
|
|
2356
|
+
if raw_extra_body.get("guided_json_object") is not None:
|
|
2357
|
+
kwargs["guided_json_object"] = raw_extra_body.get("guided_json_object")
|
|
2358
|
+
if raw_extra_body.get("guided_decoding_backend") is not None:
|
|
2359
|
+
kwargs["guided_decoding_backend"] = raw_extra_body.get(
|
|
2360
|
+
"guided_decoding_backend"
|
|
2361
|
+
)
|
|
2362
|
+
if raw_extra_body.get("guided_whitespace_pattern") is not None:
|
|
2363
|
+
kwargs["guided_whitespace_pattern"] = raw_extra_body.get(
|
|
2364
|
+
"guided_whitespace_pattern"
|
|
2365
|
+
)
|
|
2366
|
+
if raw_extra_body.get("platform") is not None:
|
|
2367
|
+
kwargs["platform"] = raw_extra_body.get("platform")
|
|
2368
|
+
if raw_extra_body.get("format") is not None:
|
|
2369
|
+
kwargs["format"] = raw_extra_body.get("format")
|
|
2370
|
+
|
|
2371
|
+
return kwargs
|
|
2372
|
+
|
|
2231
2373
|
|
|
2232
2374
|
def run(
|
|
2233
2375
|
supervisor_address: str,
|