xinference 1.10.0__py3-none-any.whl → 1.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +473 -31
- xinference/client/restful/async_restful_client.py +178 -8
- xinference/client/restful/restful_client.py +151 -3
- xinference/core/supervisor.py +99 -53
- xinference/core/worker.py +10 -0
- xinference/deploy/cmdline.py +15 -0
- xinference/model/audio/core.py +21 -6
- xinference/model/audio/indextts2.py +166 -0
- xinference/model/audio/model_spec.json +58 -21
- xinference/model/image/model_spec.json +159 -90
- xinference/model/image/stable_diffusion/core.py +13 -4
- xinference/model/llm/__init__.py +6 -2
- xinference/model/llm/llm_family.json +1299 -174
- xinference/model/llm/mlx/distributed_models/core.py +41 -0
- xinference/model/llm/mlx/distributed_models/qwen2.py +1 -2
- xinference/model/llm/sglang/core.py +44 -11
- xinference/model/llm/tool_parsers/deepseek_r1_tool_parser.py +94 -32
- xinference/model/llm/tool_parsers/qwen_tool_parser.py +29 -4
- xinference/model/llm/transformers/chatglm.py +3 -0
- xinference/model/llm/transformers/core.py +129 -36
- xinference/model/llm/transformers/multimodal/minicpmv45.py +340 -0
- xinference/model/llm/transformers/multimodal/qwen2_vl.py +34 -8
- xinference/model/llm/transformers/utils.py +23 -0
- xinference/model/llm/utils.py +48 -32
- xinference/model/llm/vllm/core.py +207 -72
- xinference/model/utils.py +74 -31
- xinference/thirdparty/audiotools/__init__.py +10 -0
- xinference/thirdparty/audiotools/core/__init__.py +4 -0
- xinference/thirdparty/audiotools/core/audio_signal.py +1682 -0
- xinference/thirdparty/audiotools/core/display.py +194 -0
- xinference/thirdparty/audiotools/core/dsp.py +390 -0
- xinference/thirdparty/audiotools/core/effects.py +647 -0
- xinference/thirdparty/audiotools/core/ffmpeg.py +211 -0
- xinference/thirdparty/audiotools/core/loudness.py +320 -0
- xinference/thirdparty/audiotools/core/playback.py +252 -0
- xinference/thirdparty/audiotools/core/templates/__init__.py +0 -0
- xinference/thirdparty/audiotools/core/templates/headers.html +322 -0
- xinference/thirdparty/audiotools/core/templates/pandoc.css +407 -0
- xinference/thirdparty/audiotools/core/templates/widget.html +52 -0
- xinference/thirdparty/audiotools/core/util.py +671 -0
- xinference/thirdparty/audiotools/core/whisper.py +97 -0
- xinference/thirdparty/audiotools/data/__init__.py +3 -0
- xinference/thirdparty/audiotools/data/datasets.py +517 -0
- xinference/thirdparty/audiotools/data/preprocess.py +81 -0
- xinference/thirdparty/audiotools/data/transforms.py +1592 -0
- xinference/thirdparty/audiotools/metrics/__init__.py +6 -0
- xinference/thirdparty/audiotools/metrics/distance.py +131 -0
- xinference/thirdparty/audiotools/metrics/quality.py +159 -0
- xinference/thirdparty/audiotools/metrics/spectral.py +247 -0
- xinference/thirdparty/audiotools/ml/__init__.py +5 -0
- xinference/thirdparty/audiotools/ml/accelerator.py +184 -0
- xinference/thirdparty/audiotools/ml/decorators.py +440 -0
- xinference/thirdparty/audiotools/ml/experiment.py +90 -0
- xinference/thirdparty/audiotools/ml/layers/__init__.py +2 -0
- xinference/thirdparty/audiotools/ml/layers/base.py +328 -0
- xinference/thirdparty/audiotools/ml/layers/spectral_gate.py +127 -0
- xinference/thirdparty/audiotools/post.py +140 -0
- xinference/thirdparty/audiotools/preference.py +600 -0
- xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/text.py +1 -1
- xinference/thirdparty/indextts/BigVGAN/ECAPA_TDNN.py +656 -0
- xinference/thirdparty/indextts/BigVGAN/__init__.py +0 -0
- xinference/thirdparty/indextts/BigVGAN/activations.py +122 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/__init__.py +0 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/.gitignore +1 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/__init__.py +0 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/activation1d.py +76 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/anti_alias_activation.cpp +23 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/anti_alias_activation_cuda.cu +256 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/compat.h +29 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/load.py +121 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/type_shim.h +92 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/torch/__init__.py +6 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/torch/act.py +31 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/torch/filter.py +102 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/torch/resample.py +58 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_torch/__init__.py +6 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_torch/act.py +29 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_torch/filter.py +96 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_torch/resample.py +49 -0
- xinference/thirdparty/indextts/BigVGAN/bigvgan.py +534 -0
- xinference/thirdparty/indextts/BigVGAN/models.py +451 -0
- xinference/thirdparty/indextts/BigVGAN/nnet/CNN.py +546 -0
- xinference/thirdparty/indextts/BigVGAN/nnet/__init__.py +0 -0
- xinference/thirdparty/indextts/BigVGAN/nnet/linear.py +89 -0
- xinference/thirdparty/indextts/BigVGAN/nnet/normalization.py +670 -0
- xinference/thirdparty/indextts/BigVGAN/utils.py +101 -0
- xinference/thirdparty/indextts/__init__.py +0 -0
- xinference/thirdparty/indextts/cli.py +65 -0
- xinference/thirdparty/indextts/gpt/__init__.py +0 -0
- xinference/thirdparty/indextts/gpt/conformer/__init__.py +0 -0
- xinference/thirdparty/indextts/gpt/conformer/attention.py +312 -0
- xinference/thirdparty/indextts/gpt/conformer/embedding.py +163 -0
- xinference/thirdparty/indextts/gpt/conformer/subsampling.py +348 -0
- xinference/thirdparty/indextts/gpt/conformer_encoder.py +520 -0
- xinference/thirdparty/indextts/gpt/model.py +713 -0
- xinference/thirdparty/indextts/gpt/model_v2.py +747 -0
- xinference/thirdparty/indextts/gpt/perceiver.py +317 -0
- xinference/thirdparty/indextts/gpt/transformers_beam_search.py +1013 -0
- xinference/thirdparty/indextts/gpt/transformers_generation_utils.py +4747 -0
- xinference/thirdparty/indextts/gpt/transformers_gpt2.py +1878 -0
- xinference/thirdparty/indextts/gpt/transformers_modeling_utils.py +5525 -0
- xinference/thirdparty/indextts/infer.py +690 -0
- xinference/thirdparty/indextts/infer_v2.py +739 -0
- xinference/thirdparty/indextts/s2mel/dac/__init__.py +16 -0
- xinference/thirdparty/indextts/s2mel/dac/__main__.py +36 -0
- xinference/thirdparty/indextts/s2mel/dac/model/__init__.py +4 -0
- xinference/thirdparty/indextts/s2mel/dac/model/base.py +294 -0
- xinference/thirdparty/indextts/s2mel/dac/model/dac.py +400 -0
- xinference/thirdparty/indextts/s2mel/dac/model/discriminator.py +228 -0
- xinference/thirdparty/indextts/s2mel/dac/model/encodec.py +320 -0
- xinference/thirdparty/indextts/s2mel/dac/nn/__init__.py +3 -0
- xinference/thirdparty/indextts/s2mel/dac/nn/layers.py +33 -0
- xinference/thirdparty/indextts/s2mel/dac/nn/loss.py +368 -0
- xinference/thirdparty/indextts/s2mel/dac/nn/quantize.py +339 -0
- xinference/thirdparty/indextts/s2mel/dac/utils/__init__.py +123 -0
- xinference/thirdparty/indextts/s2mel/dac/utils/decode.py +95 -0
- xinference/thirdparty/indextts/s2mel/dac/utils/encode.py +94 -0
- xinference/thirdparty/indextts/s2mel/hf_utils.py +12 -0
- xinference/thirdparty/indextts/s2mel/modules/alias_free_torch/__init__.py +5 -0
- xinference/thirdparty/indextts/s2mel/modules/alias_free_torch/act.py +29 -0
- xinference/thirdparty/indextts/s2mel/modules/alias_free_torch/filter.py +96 -0
- xinference/thirdparty/indextts/s2mel/modules/alias_free_torch/resample.py +57 -0
- xinference/thirdparty/indextts/s2mel/modules/audio.py +82 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/activations.py +120 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/__init__.py +0 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/activation1d.py +77 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/anti_alias_activation.cpp +23 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/anti_alias_activation_cuda.cu +246 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/compat.h +29 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/load.py +86 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/type_shim.h +92 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/torch/__init__.py +6 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/torch/act.py +30 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/torch/filter.py +101 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/torch/resample.py +58 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/bigvgan.py +492 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/config.json +63 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/env.py +18 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/meldataset.py +354 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/utils.py +99 -0
- xinference/thirdparty/indextts/s2mel/modules/campplus/DTDNN.py +115 -0
- xinference/thirdparty/indextts/s2mel/modules/campplus/classifier.py +70 -0
- xinference/thirdparty/indextts/s2mel/modules/campplus/layers.py +253 -0
- xinference/thirdparty/indextts/s2mel/modules/commons.py +632 -0
- xinference/thirdparty/indextts/s2mel/modules/diffusion_transformer.py +257 -0
- xinference/thirdparty/indextts/s2mel/modules/encodec.py +292 -0
- xinference/thirdparty/indextts/s2mel/modules/flow_matching.py +171 -0
- xinference/thirdparty/indextts/s2mel/modules/gpt_fast/generate.py +436 -0
- xinference/thirdparty/indextts/s2mel/modules/gpt_fast/model.py +360 -0
- xinference/thirdparty/indextts/s2mel/modules/gpt_fast/quantize.py +622 -0
- xinference/thirdparty/indextts/s2mel/modules/hifigan/f0_predictor.py +55 -0
- xinference/thirdparty/indextts/s2mel/modules/hifigan/generator.py +454 -0
- xinference/thirdparty/indextts/s2mel/modules/layers.py +354 -0
- xinference/thirdparty/indextts/s2mel/modules/length_regulator.py +141 -0
- xinference/thirdparty/indextts/s2mel/modules/openvoice/__init__.py +0 -0
- xinference/thirdparty/indextts/s2mel/modules/openvoice/api.py +186 -0
- xinference/thirdparty/indextts/s2mel/modules/openvoice/attentions.py +465 -0
- xinference/thirdparty/indextts/s2mel/modules/openvoice/checkpoints_v2/converter/config.json +57 -0
- xinference/thirdparty/indextts/s2mel/modules/openvoice/commons.py +160 -0
- xinference/thirdparty/indextts/s2mel/modules/openvoice/mel_processing.py +183 -0
- xinference/thirdparty/indextts/s2mel/modules/openvoice/models.py +499 -0
- xinference/thirdparty/indextts/s2mel/modules/openvoice/modules.py +598 -0
- xinference/thirdparty/indextts/s2mel/modules/openvoice/openvoice_app.py +275 -0
- xinference/thirdparty/indextts/s2mel/modules/openvoice/se_extractor.py +153 -0
- xinference/thirdparty/indextts/s2mel/modules/openvoice/transforms.py +209 -0
- xinference/thirdparty/indextts/s2mel/modules/openvoice/utils.py +194 -0
- xinference/thirdparty/indextts/s2mel/modules/quantize.py +229 -0
- xinference/thirdparty/indextts/s2mel/modules/rmvpe.py +631 -0
- xinference/thirdparty/indextts/s2mel/modules/vocos/__init__.py +4 -0
- xinference/thirdparty/indextts/s2mel/modules/vocos/heads.py +164 -0
- xinference/thirdparty/indextts/s2mel/modules/vocos/helpers.py +71 -0
- xinference/thirdparty/indextts/s2mel/modules/vocos/loss.py +114 -0
- xinference/thirdparty/indextts/s2mel/modules/vocos/models.py +118 -0
- xinference/thirdparty/indextts/s2mel/modules/vocos/modules.py +213 -0
- xinference/thirdparty/indextts/s2mel/modules/vocos/pretrained.py +51 -0
- xinference/thirdparty/indextts/s2mel/modules/vocos/spectral_ops.py +192 -0
- xinference/thirdparty/indextts/s2mel/modules/wavenet.py +174 -0
- xinference/thirdparty/indextts/s2mel/optimizers.py +96 -0
- xinference/thirdparty/indextts/s2mel/wav2vecbert_extract.py +148 -0
- xinference/thirdparty/indextts/utils/__init__.py +0 -0
- xinference/thirdparty/indextts/utils/arch_util.py +120 -0
- xinference/thirdparty/indextts/utils/checkpoint.py +34 -0
- xinference/thirdparty/indextts/utils/common.py +121 -0
- xinference/thirdparty/indextts/utils/feature_extractors.py +50 -0
- xinference/thirdparty/indextts/utils/front.py +536 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/__init__.py +0 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/codec.py +427 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/__init__.py +11 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/factorized_vector_quantize.py +150 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/lookup_free_quantize.py +77 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/residual_vq.py +177 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/vector_quantize.py +401 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/vocos.py +881 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/codec_dataset.py +264 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/codec_inference.py +515 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/codec_sampler.py +126 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/codec_trainer.py +166 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/__init__.py +0 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/__init__.py +5 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/act.py +29 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/filter.py +96 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/resample.py +57 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/facodec_dataset.py +98 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/facodec_inference.py +137 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/facodec_trainer.py +776 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/JDC/__init__.py +1 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/JDC/bst.t7 +0 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/JDC/model.py +219 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/attentions.py +437 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/commons.py +331 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/gradient_reversal.py +35 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/layers.py +460 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/quantize.py +741 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/style_encoder.py +110 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/wavenet.py +224 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/optimizer.py +104 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/kmeans/repcodec_model.py +210 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/kmeans/vocos.py +850 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/melvqgan/melspec.py +108 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/README.md +216 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/__init__.py +6 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/__init__.py +5 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/act.py +29 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/filter.py +96 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/resample.py +57 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/facodec.py +1222 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/gradient_reversal.py +35 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/melspec.py +102 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/quantize/__init__.py +7 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/quantize/fvq.py +116 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/quantize/rvq.py +87 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/transformer.py +234 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/model.py +184 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/__init__.py +27 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/conv.py +346 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/lstm.py +46 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/norm.py +37 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/__init__.py +14 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/ac.py +317 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/core_vq.py +388 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/distrib.py +135 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/vq.py +125 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/seanet.py +414 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/vevo/vevo_repcodec.py +592 -0
- xinference/thirdparty/indextts/utils/maskgct/models/tts/maskgct/ckpt/wav2vec2bert_stats.pt +0 -0
- xinference/thirdparty/indextts/utils/maskgct/models/tts/maskgct/llama_nar.py +650 -0
- xinference/thirdparty/indextts/utils/maskgct/models/tts/maskgct/maskgct_s2a.py +503 -0
- xinference/thirdparty/indextts/utils/maskgct_utils.py +259 -0
- xinference/thirdparty/indextts/utils/text_utils.py +41 -0
- xinference/thirdparty/indextts/utils/typical_sampling.py +30 -0
- xinference/thirdparty/indextts/utils/utils.py +93 -0
- xinference/thirdparty/indextts/utils/webui_utils.py +42 -0
- xinference/thirdparty/indextts/utils/xtransformers.py +1247 -0
- xinference/thirdparty/indextts/vqvae/__init__.py +0 -0
- xinference/thirdparty/indextts/vqvae/xtts_dvae.py +395 -0
- xinference/thirdparty/melo/text/chinese_mix.py +2 -2
- xinference/types.py +9 -0
- xinference/ui/gradio/media_interface.py +66 -8
- xinference/ui/web/ui/build/asset-manifest.json +6 -6
- xinference/ui/web/ui/build/index.html +1 -1
- xinference/ui/web/ui/build/static/css/main.5ea97072.css +2 -0
- xinference/ui/web/ui/build/static/css/main.5ea97072.css.map +1 -0
- xinference/ui/web/ui/build/static/js/main.45e78536.js +3 -0
- xinference/ui/web/ui/build/static/js/{main.1086c759.js.LICENSE.txt → main.45e78536.js.LICENSE.txt} +0 -7
- xinference/ui/web/ui/build/static/js/main.45e78536.js.map +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/089c38df5f52348d212ed868dda5c518a42e0c2762caed4175487c0405830c35.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/2b6e3a5b6eb2c5c5f2d007e68cd46c372721cd52bf63508adcdb21ecf79241d8.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/2d887825fd07a56f872eda4420da25fba0b5b62a23bdcc6c6da1a5281887f618.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/4001f9c3e64e73a4f2158826650c174a59d5e3f89ddecddf17cbb6bb688cc4ca.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/4a7018a69e6b7f90fc313248c2aa86f2a8f1eb1db120df586047a8023549b44b.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/64b12aaa1c1d1bf53820ada8a63769067c0ccc5aab46b32348eb1917ae7f2a11.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/7275b67c78ec76ce38a686bb8a576d8c9cecf54e1573614c84859d538efb9be5.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/a68b6ee3b31eadc051fb95ce8f8ccb9c2e8b52c60f290dbab545a1917e065282.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/ae8771cc37693feb160fa8727231312a0c54ef2d1d1ca893be568cd70016ca7e.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/bb4e8722d2d41d87f1fce3661bc8937bffe9448e231fc5f0462630849e851592.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/be6aada1ee4adc2bbf65dbe56d17db32bb3b5478be05d6b527805a8ba6cfb2b9.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/de91c352653c233cf0cb6674e6e04049a44fd0e1156560de65d5c4620521391e.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/e85f7002fc325c83b9c9cd8a1619e5b3ebc701d30e811afc284b88e6ae710cb5.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/e8b603c78944bf3d213639078bfe155ff5c0dfa4048a93cbb967cad6a4eb4ff3.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/ea2a26361204e70cf1018d6990fb6354bed82b3ac69690391e0f100385e7abb7.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/f05535160a508b2a312de546a6de234776c613db276479ea4253c0b1bdeeb7d6.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/f09ba9e11106bd59a0de10cc85c55084097729dcab575f43dfcf07375961ed87.json +1 -0
- xinference/ui/web/ui/node_modules/.package-lock.json +0 -33
- xinference/ui/web/ui/package-lock.json +0 -34
- xinference/ui/web/ui/package.json +0 -1
- xinference/ui/web/ui/src/locales/en.json +9 -3
- xinference/ui/web/ui/src/locales/ja.json +9 -3
- xinference/ui/web/ui/src/locales/ko.json +9 -3
- xinference/ui/web/ui/src/locales/zh.json +9 -3
- {xinference-1.10.0.dist-info → xinference-1.11.0.dist-info}/METADATA +24 -6
- {xinference-1.10.0.dist-info → xinference-1.11.0.dist-info}/RECORD +296 -77
- xinference/ui/web/ui/build/static/css/main.013f296b.css +0 -2
- xinference/ui/web/ui/build/static/css/main.013f296b.css.map +0 -1
- xinference/ui/web/ui/build/static/js/main.1086c759.js +0 -3
- xinference/ui/web/ui/build/static/js/main.1086c759.js.map +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/0b0f77000cc1b482ca091cfbcae511dfe02f08916971645fad21d0b1234d04a2.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/1c5f8ff423a7c9202bea60b15680f04b1e9964b445b0da3f86c6ff70cf24e797.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/44ce7993e344980e3ed4f13e8f69237d4a5dfc60e37ca6b54f51f8ee1357bd67.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/4aec1cc414ac3ebb3481d3d915e4db597d9127de813291346eacb8554ab170d4.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/644cfec52f3c57a6e222ce60f112237a1efefe9835efd9aad857a685f53d8eed.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/663436f72af53fe0d72394f56d003fa4e0bba489e5bb4e483fd34b00f84637f7.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/69db82ca9bfe27fe417cc6cf2b1716b09be9c6f0cd198530f12bfc60e801bbcf.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/85087e27618d740c236bf159f30e0219db443ab55f0997388eed5fde6f9e90cc.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/88b07838348864aa86c672be3bbca1e9f58f6f3a2881b32070ec27f4e7b449d1.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/8b8cd408ccfbe115acef27ccfa5b233da8597131a2a5712add13e1e4d5d4504b.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/a23824fe746b9c6ca5eee9159b5764d1ff1653c1d856288c0f75c742bbb0023b.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/a3eb18af328280b139693c9092dff2a0ef8c9a967e6c8956ceee0996611f1984.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/bc1aacc65a102db325ca61bcd2f681e1ae22c36a1f1d98a6ff5e4ad49dc7544f.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/c682fd521747c19dae437d83ce3235a306ce6b68e24a117bc57c27ebb8d1f1ca.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/d5c224be7081f18cba1678b7874a9782eba895df004874ff8f243f94ba79942a.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/f7f18bfb539b036a6a342176dd98a85df5057a884a8da978d679f2a0264883d0.json +0 -1
- xinference/ui/web/ui/node_modules/clipboard/.babelrc.json +0 -11
- xinference/ui/web/ui/node_modules/clipboard/.eslintrc.json +0 -24
- xinference/ui/web/ui/node_modules/clipboard/.prettierrc.json +0 -9
- xinference/ui/web/ui/node_modules/clipboard/bower.json +0 -18
- xinference/ui/web/ui/node_modules/clipboard/composer.json +0 -25
- xinference/ui/web/ui/node_modules/clipboard/package.json +0 -63
- xinference/ui/web/ui/node_modules/delegate/package.json +0 -31
- xinference/ui/web/ui/node_modules/good-listener/bower.json +0 -11
- xinference/ui/web/ui/node_modules/good-listener/package.json +0 -35
- xinference/ui/web/ui/node_modules/select/bower.json +0 -13
- xinference/ui/web/ui/node_modules/select/package.json +0 -29
- xinference/ui/web/ui/node_modules/tiny-emitter/package.json +0 -53
- {xinference-1.10.0.dist-info → xinference-1.11.0.dist-info}/WHEEL +0 -0
- {xinference-1.10.0.dist-info → xinference-1.11.0.dist-info}/entry_points.txt +0 -0
- {xinference-1.10.0.dist-info → xinference-1.11.0.dist-info}/licenses/LICENSE +0 -0
- {xinference-1.10.0.dist-info → xinference-1.11.0.dist-info}/top_level.txt +0 -0
|
@@ -131,7 +131,7 @@ except ImportError:
|
|
|
131
131
|
VLLM_INSTALLED = False
|
|
132
132
|
VLLM_VERSION = None
|
|
133
133
|
|
|
134
|
-
|
|
134
|
+
VLLM_SUPPORTED_MULTI_MODEL_LIST: List[str] = []
|
|
135
135
|
VLLM_SUPPORTED_MODELS = [
|
|
136
136
|
"llama-2",
|
|
137
137
|
"llama-3",
|
|
@@ -229,34 +229,37 @@ if VLLM_INSTALLED and VLLM_VERSION > version.parse("0.5.3"):
|
|
|
229
229
|
VLLM_SUPPORTED_CHAT_MODELS.append("HuatuoGPT-o1-LLaMA-3.1")
|
|
230
230
|
|
|
231
231
|
if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.6.1"):
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
232
|
+
VLLM_SUPPORTED_MULTI_MODEL_LIST.append("internvl2")
|
|
233
|
+
VLLM_SUPPORTED_MULTI_MODEL_LIST.append("InternVL2.5")
|
|
234
|
+
VLLM_SUPPORTED_MULTI_MODEL_LIST.append("InternVL2.5-MPO")
|
|
235
|
+
VLLM_SUPPORTED_MULTI_MODEL_LIST.append("InternVL3")
|
|
236
236
|
|
|
237
237
|
if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.6.2"):
|
|
238
238
|
VLLM_SUPPORTED_CHAT_MODELS.append("minicpm3-4b")
|
|
239
239
|
|
|
240
240
|
if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.6.3"):
|
|
241
241
|
VLLM_SUPPORTED_MODELS.append("llama-3.2-vision")
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
242
|
+
VLLM_SUPPORTED_MULTI_MODEL_LIST.append("llama-3.2-vision-instruct")
|
|
243
|
+
VLLM_SUPPORTED_MULTI_MODEL_LIST.append("qwen2-vl-instruct")
|
|
244
|
+
VLLM_SUPPORTED_MULTI_MODEL_LIST.append("QvQ-72B-Preview")
|
|
245
|
+
VLLM_SUPPORTED_MULTI_MODEL_LIST.append("qwen2-audio")
|
|
245
246
|
|
|
246
247
|
if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.7.0"):
|
|
247
248
|
VLLM_SUPPORTED_CHAT_MODELS.append("internlm3-instruct")
|
|
248
249
|
|
|
249
250
|
if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.7.2"):
|
|
250
|
-
|
|
251
|
+
VLLM_SUPPORTED_MULTI_MODEL_LIST.append("qwen2.5-vl-instruct")
|
|
251
252
|
VLLM_SUPPORTED_CHAT_MODELS.append("moonlight-16b-a3b-instruct")
|
|
253
|
+
VLLM_SUPPORTED_MULTI_MODEL_LIST.append("qwen2-audio-instruct")
|
|
252
254
|
|
|
253
255
|
if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.7.3"):
|
|
254
256
|
VLLM_SUPPORTED_CHAT_MODELS.append("qwen2.5-instruct-1m")
|
|
255
257
|
VLLM_SUPPORTED_CHAT_MODELS.append("qwenLong-l1")
|
|
258
|
+
VLLM_SUPPORTED_MULTI_MODEL_LIST.append("qwen2.5-omni")
|
|
256
259
|
|
|
257
260
|
if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.8.0"):
|
|
258
261
|
VLLM_SUPPORTED_CHAT_MODELS.append("gemma-3-1b-it")
|
|
259
|
-
|
|
262
|
+
VLLM_SUPPORTED_MULTI_MODEL_LIST.append("gemma-3-it")
|
|
260
263
|
|
|
261
264
|
if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.8.4"):
|
|
262
265
|
VLLM_SUPPORTED_CHAT_MODELS.append("glm4-0414")
|
|
@@ -264,12 +267,15 @@ if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.8.4"):
|
|
|
264
267
|
if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.8.5"):
|
|
265
268
|
VLLM_SUPPORTED_CHAT_MODELS.append("qwen3")
|
|
266
269
|
|
|
270
|
+
if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.9.0"):
|
|
271
|
+
VLLM_SUPPORTED_CHAT_MODELS.append("Baichuan-M2")
|
|
272
|
+
|
|
267
273
|
if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.9.1"):
|
|
268
274
|
VLLM_SUPPORTED_CHAT_MODELS.append("minicpm4")
|
|
269
275
|
|
|
270
276
|
if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.9.2"):
|
|
271
277
|
VLLM_SUPPORTED_CHAT_MODELS.append("Ernie4.5")
|
|
272
|
-
|
|
278
|
+
VLLM_SUPPORTED_MULTI_MODEL_LIST.append("glm-4.1v-thinking")
|
|
273
279
|
VLLM_SUPPORTED_CHAT_MODELS.append("Qwen3-Instruct")
|
|
274
280
|
VLLM_SUPPORTED_CHAT_MODELS.append("Qwen3-Thinking")
|
|
275
281
|
VLLM_SUPPORTED_CHAT_MODELS.append("Qwen3-Coder")
|
|
@@ -277,15 +283,22 @@ if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.9.2"):
|
|
|
277
283
|
|
|
278
284
|
if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.10.0"):
|
|
279
285
|
VLLM_SUPPORTED_CHAT_MODELS.append("glm-4.5")
|
|
280
|
-
|
|
286
|
+
VLLM_SUPPORTED_MULTI_MODEL_LIST.append("glm-4.5v")
|
|
281
287
|
VLLM_SUPPORTED_CHAT_MODELS.append("KAT-V1")
|
|
282
288
|
|
|
283
289
|
if VLLM_INSTALLED and VLLM_VERSION > version.parse("0.10.0"):
|
|
284
290
|
VLLM_SUPPORTED_CHAT_MODELS.append("gpt-oss")
|
|
285
|
-
VLLM_SUPPORTED_CHAT_MODELS.append("seed-oss")
|
|
286
291
|
|
|
287
|
-
if VLLM_INSTALLED and VLLM_VERSION
|
|
292
|
+
if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.10.2"):
|
|
288
293
|
VLLM_SUPPORTED_CHAT_MODELS.append("seed-oss")
|
|
294
|
+
VLLM_SUPPORTED_CHAT_MODELS.append("Qwen3-Next-Instruct")
|
|
295
|
+
VLLM_SUPPORTED_CHAT_MODELS.append("Qwen3-Next-Thinking")
|
|
296
|
+
|
|
297
|
+
if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.11.0"):
|
|
298
|
+
VLLM_SUPPORTED_MULTI_MODEL_LIST.append("Qwen3-VL-Thinking")
|
|
299
|
+
VLLM_SUPPORTED_MULTI_MODEL_LIST.append("Qwen3-VL-Instruct")
|
|
300
|
+
VLLM_SUPPORTED_MULTI_MODEL_LIST.append("Qwen3-Omni-Thinking")
|
|
301
|
+
VLLM_SUPPORTED_MULTI_MODEL_LIST.append("Qwen3-Omni-Instruct")
|
|
289
302
|
|
|
290
303
|
|
|
291
304
|
class VLLMModel(LLM):
|
|
@@ -537,7 +550,7 @@ class VLLMModel(LLM):
|
|
|
537
550
|
# patch vllm Executor.get_class
|
|
538
551
|
Executor.get_class = lambda vllm_config: executor_cls
|
|
539
552
|
self._engine = AsyncLLMEngine.from_engine_args(engine_args)
|
|
540
|
-
except:
|
|
553
|
+
except: # noqa: E722
|
|
541
554
|
logger.exception("Creating vllm engine failed")
|
|
542
555
|
self._loading_error = sys.exc_info()
|
|
543
556
|
|
|
@@ -706,7 +719,7 @@ class VLLMModel(LLM):
|
|
|
706
719
|
logger.info("Detecting vLLM is not health, prepare to quit the process")
|
|
707
720
|
try:
|
|
708
721
|
self.stop()
|
|
709
|
-
except:
|
|
722
|
+
except: # noqa: E722
|
|
710
723
|
# ignore error when stop
|
|
711
724
|
pass
|
|
712
725
|
# Just kill the process and let xinference auto-recover the model
|
|
@@ -849,7 +862,7 @@ class VLLMModel(LLM):
|
|
|
849
862
|
if llm_spec.model_format not in ["pytorch", "gptq", "awq", "fp8", "bnb"]:
|
|
850
863
|
return False
|
|
851
864
|
if llm_spec.model_format == "pytorch":
|
|
852
|
-
if quantization != "none" and
|
|
865
|
+
if quantization != "none" and quantization is not None:
|
|
853
866
|
return False
|
|
854
867
|
if llm_spec.model_format == "awq":
|
|
855
868
|
# Currently, only 4-bit weight quantization is supported for AWQ, but got 8 bits.
|
|
@@ -934,9 +947,21 @@ class VLLMModel(LLM):
|
|
|
934
947
|
|
|
935
948
|
async def _get_tokenizer(self, lora_request: Any) -> Any:
|
|
936
949
|
try:
|
|
937
|
-
|
|
950
|
+
# vLLM 0.11.0+ get_tokenizer doesn't accept lora_request parameter
|
|
951
|
+
if (
|
|
952
|
+
VLLM_VERSION >= version.parse("0.11.0")
|
|
953
|
+
or VLLM_VERSION.base_version >= "0.11.0"
|
|
954
|
+
):
|
|
955
|
+
return await self._engine.get_tokenizer() # type: ignore
|
|
956
|
+
else:
|
|
957
|
+
return await self._engine.get_tokenizer(lora_request) # type: ignore
|
|
938
958
|
except AttributeError:
|
|
939
|
-
|
|
959
|
+
# Fallback to get_tokenizer_async for older versions
|
|
960
|
+
try:
|
|
961
|
+
return await self._engine.get_tokenizer_async(lora_request) # type: ignore
|
|
962
|
+
except (AttributeError, TypeError):
|
|
963
|
+
# If all else fails, try without parameters
|
|
964
|
+
return await self._engine.get_tokenizer() # type: ignore
|
|
940
965
|
|
|
941
966
|
def _tokenize(self, tokenizer: Any, prompt: str, config: dict) -> List[int]:
|
|
942
967
|
truncate_prompt_tokens = config.get("truncate_prompt_tokens")
|
|
@@ -968,7 +993,10 @@ class VLLMModel(LLM):
|
|
|
968
993
|
from vllm import TokensPrompt
|
|
969
994
|
|
|
970
995
|
token_ids = await asyncio.to_thread(
|
|
971
|
-
self._tokenize,
|
|
996
|
+
self._tokenize,
|
|
997
|
+
tokenizer,
|
|
998
|
+
prompt, # type: ignore
|
|
999
|
+
config,
|
|
972
1000
|
)
|
|
973
1001
|
return TokensPrompt(prompt_token_ids=token_ids)
|
|
974
1002
|
|
|
@@ -1017,23 +1045,90 @@ class VLLMModel(LLM):
|
|
|
1017
1045
|
# guided decoding only available for vllm >= 0.6.3
|
|
1018
1046
|
from vllm.sampling_params import GuidedDecodingParams
|
|
1019
1047
|
|
|
1020
|
-
|
|
1021
|
-
|
|
1022
|
-
|
|
1023
|
-
|
|
1024
|
-
|
|
1025
|
-
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
|
|
1048
|
+
# Extract guided decoding parameters
|
|
1049
|
+
guided_params: dict[str, Any] = {}
|
|
1050
|
+
guided_json = sanitized_generate_config.pop("guided_json", None)
|
|
1051
|
+
if guided_json:
|
|
1052
|
+
guided_params["json"] = guided_json
|
|
1053
|
+
|
|
1054
|
+
guided_regex = sanitized_generate_config.pop("guided_regex", None)
|
|
1055
|
+
if guided_regex:
|
|
1056
|
+
guided_params["regex"] = guided_regex
|
|
1057
|
+
|
|
1058
|
+
guided_choice = sanitized_generate_config.pop("guided_choice", None)
|
|
1059
|
+
if guided_choice:
|
|
1060
|
+
guided_params["choice"] = guided_choice
|
|
1061
|
+
|
|
1062
|
+
guided_grammar = sanitized_generate_config.pop("guided_grammar", None)
|
|
1063
|
+
if guided_grammar:
|
|
1064
|
+
guided_params["grammar"] = guided_grammar
|
|
1065
|
+
|
|
1066
|
+
guided_json_object = sanitized_generate_config.pop(
|
|
1067
|
+
"guided_json_object", None
|
|
1068
|
+
)
|
|
1069
|
+
if guided_json_object:
|
|
1070
|
+
guided_params["json_object"] = guided_json_object
|
|
1071
|
+
|
|
1072
|
+
guided_backend = sanitized_generate_config.pop(
|
|
1073
|
+
"guided_decoding_backend", None
|
|
1030
1074
|
)
|
|
1075
|
+
if guided_backend:
|
|
1076
|
+
guided_params["_backend"] = guided_backend
|
|
1031
1077
|
|
|
1032
|
-
|
|
1033
|
-
|
|
1078
|
+
guided_whitespace_pattern = sanitized_generate_config.pop(
|
|
1079
|
+
"guided_whitespace_pattern", None
|
|
1034
1080
|
)
|
|
1081
|
+
if guided_whitespace_pattern:
|
|
1082
|
+
guided_params["whitespace_pattern"] = guided_whitespace_pattern
|
|
1083
|
+
|
|
1084
|
+
# Create GuidedDecodingParams if we have any guided parameters
|
|
1085
|
+
guided_options = None
|
|
1086
|
+
if guided_params:
|
|
1087
|
+
try:
|
|
1088
|
+
guided_options = GuidedDecodingParams(**guided_params)
|
|
1089
|
+
except Exception as e:
|
|
1090
|
+
logger.warning(f"Failed to create GuidedDecodingParams: {e}")
|
|
1091
|
+
guided_options = None
|
|
1092
|
+
|
|
1093
|
+
try:
|
|
1094
|
+
import inspect
|
|
1095
|
+
|
|
1096
|
+
sp_sig = inspect.signature(SamplingParams)
|
|
1097
|
+
# For v0.9.2 and similar versions, prioritize guided_decoding over structured_outputs
|
|
1098
|
+
# structured_outputs was introduced later (around v0.11.0) and may not accept
|
|
1099
|
+
# GuidedDecodingParams in earlier versions even if the parameter exists
|
|
1100
|
+
if "guided_decoding" in sp_sig.parameters:
|
|
1101
|
+
sampling_params = SamplingParams(
|
|
1102
|
+
guided_decoding=guided_options, **sanitized_generate_config
|
|
1103
|
+
)
|
|
1104
|
+
elif "structured_outputs" in sp_sig.parameters:
|
|
1105
|
+
try:
|
|
1106
|
+
sampling_params = SamplingParams(
|
|
1107
|
+
structured_outputs=guided_options,
|
|
1108
|
+
**sanitized_generate_config,
|
|
1109
|
+
)
|
|
1110
|
+
except TypeError as e:
|
|
1111
|
+
if "structured_outputs" in str(e):
|
|
1112
|
+
# structured_outputs parameter exists but doesn't accept GuidedDecodingParams
|
|
1113
|
+
# Fall back to no guided decoding
|
|
1114
|
+
logger.warning(
|
|
1115
|
+
f"structured_outputs parameter failed: {e}. "
|
|
1116
|
+
"Falling back to no guided decoding for vLLM version compatibility."
|
|
1117
|
+
)
|
|
1118
|
+
sampling_params = SamplingParams(
|
|
1119
|
+
**sanitized_generate_config
|
|
1120
|
+
)
|
|
1121
|
+
else:
|
|
1122
|
+
raise
|
|
1123
|
+
else:
|
|
1124
|
+
sampling_params = SamplingParams(**sanitized_generate_config)
|
|
1125
|
+
except Exception as e:
|
|
1126
|
+
logger.warning(
|
|
1127
|
+
f"Failed to create SamplingParams with guided decoding: {e}"
|
|
1128
|
+
)
|
|
1129
|
+
sampling_params = SamplingParams(**sanitized_generate_config)
|
|
1035
1130
|
else:
|
|
1036
|
-
# ignore generate configs
|
|
1131
|
+
# ignore generate configs for older versions
|
|
1037
1132
|
sanitized_generate_config.pop("guided_json", None)
|
|
1038
1133
|
sanitized_generate_config.pop("guided_regex", None)
|
|
1039
1134
|
sanitized_generate_config.pop("guided_choice", None)
|
|
@@ -1049,7 +1144,9 @@ class VLLMModel(LLM):
|
|
|
1049
1144
|
# this requires tokenizing
|
|
1050
1145
|
tokenizer = await self._get_tokenizer(lora_request)
|
|
1051
1146
|
prompt_or_token_ids = await self._gen_tokens_prompt(
|
|
1052
|
-
tokenizer,
|
|
1147
|
+
tokenizer,
|
|
1148
|
+
prompt,
|
|
1149
|
+
sanitized_generate_config, # type: ignore
|
|
1053
1150
|
)
|
|
1054
1151
|
sampling_params.max_tokens = max_tokens = self._context_length - len( # type: ignore
|
|
1055
1152
|
prompt_or_token_ids["prompt_token_ids"] # type: ignore
|
|
@@ -1204,11 +1301,10 @@ class VLLMChatModel(VLLMModel, ChatModelMixin):
|
|
|
1204
1301
|
]:
|
|
1205
1302
|
return False
|
|
1206
1303
|
if llm_spec.model_format == "pytorch":
|
|
1207
|
-
if quantization != "none" and
|
|
1304
|
+
if quantization != "none" and quantization is not None:
|
|
1208
1305
|
return False
|
|
1209
1306
|
if llm_spec.model_format == "awq":
|
|
1210
|
-
|
|
1211
|
-
if "4" not in quantization:
|
|
1307
|
+
if not any(q in quantization for q in ("4", "8")):
|
|
1212
1308
|
return False
|
|
1213
1309
|
if llm_spec.model_format == "gptq":
|
|
1214
1310
|
if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.3.3"):
|
|
@@ -1236,6 +1332,7 @@ class VLLMChatModel(VLLMModel, ChatModelMixin):
|
|
|
1236
1332
|
) -> Dict:
|
|
1237
1333
|
if not generate_config:
|
|
1238
1334
|
generate_config = {}
|
|
1335
|
+
|
|
1239
1336
|
if "reasoning" in getattr(self.model_family, "model_ability", []):
|
|
1240
1337
|
generate_config.pop("stop", None)
|
|
1241
1338
|
generate_config.pop("stop_token_ids", None)
|
|
@@ -1249,6 +1346,19 @@ class VLLMChatModel(VLLMModel, ChatModelMixin):
|
|
|
1249
1346
|
generate_config["stop_token_ids"] = (
|
|
1250
1347
|
self.model_family.stop_token_ids.copy()
|
|
1251
1348
|
)
|
|
1349
|
+
|
|
1350
|
+
# if response_format exists,generate guided_json
|
|
1351
|
+
if "response_format" in generate_config:
|
|
1352
|
+
resp_format = generate_config["response_format"]
|
|
1353
|
+
if (
|
|
1354
|
+
isinstance(resp_format, dict)
|
|
1355
|
+
and resp_format.get("type") == "json_schema"
|
|
1356
|
+
and "json_schema" in resp_format
|
|
1357
|
+
):
|
|
1358
|
+
schema = resp_format["json_schema"].get("schema_")
|
|
1359
|
+
if schema:
|
|
1360
|
+
generate_config["guided_json"] = schema
|
|
1361
|
+
|
|
1252
1362
|
return generate_config
|
|
1253
1363
|
|
|
1254
1364
|
@staticmethod
|
|
@@ -1354,7 +1464,7 @@ class VLLMChatModel(VLLMModel, ChatModelMixin):
|
|
|
1354
1464
|
return self._to_chat_completion(c, self.reasoning_parser)
|
|
1355
1465
|
|
|
1356
1466
|
|
|
1357
|
-
class
|
|
1467
|
+
class VLLMMultiModel(VLLMModel, ChatModelMixin):
|
|
1358
1468
|
@classmethod
|
|
1359
1469
|
def match_json(
|
|
1360
1470
|
cls, llm_family: "LLMFamilyV2", llm_spec: "LLMSpecV1", quantization: str
|
|
@@ -1366,11 +1476,10 @@ class VLLMVisionModel(VLLMModel, ChatModelMixin):
|
|
|
1366
1476
|
if llm_spec.model_format not in ["pytorch", "gptq", "awq", "fp8", "bnb"]:
|
|
1367
1477
|
return False
|
|
1368
1478
|
if llm_spec.model_format == "pytorch":
|
|
1369
|
-
if quantization != "none" and
|
|
1479
|
+
if quantization != "none" and quantization is not None:
|
|
1370
1480
|
return False
|
|
1371
1481
|
if llm_spec.model_format == "awq":
|
|
1372
|
-
|
|
1373
|
-
if "4" not in quantization:
|
|
1482
|
+
if not any(q in quantization for q in ("4", "8")):
|
|
1374
1483
|
return False
|
|
1375
1484
|
if llm_spec.model_format == "gptq":
|
|
1376
1485
|
if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.3.3"):
|
|
@@ -1380,12 +1489,16 @@ class VLLMVisionModel(VLLMModel, ChatModelMixin):
|
|
|
1380
1489
|
if "4" not in quantization:
|
|
1381
1490
|
return False
|
|
1382
1491
|
if isinstance(llm_family, CustomLLMFamilyV2):
|
|
1383
|
-
if llm_family.model_family not in
|
|
1492
|
+
if llm_family.model_family not in VLLM_SUPPORTED_MULTI_MODEL_LIST:
|
|
1384
1493
|
return False
|
|
1385
1494
|
else:
|
|
1386
|
-
if llm_family.model_name not in
|
|
1495
|
+
if llm_family.model_name not in VLLM_SUPPORTED_MULTI_MODEL_LIST:
|
|
1387
1496
|
return False
|
|
1388
|
-
if
|
|
1497
|
+
if (
|
|
1498
|
+
"vision" not in llm_family.model_ability
|
|
1499
|
+
and "audio" not in llm_family.model_ability
|
|
1500
|
+
and "omni" not in llm_family.model_ability
|
|
1501
|
+
):
|
|
1389
1502
|
return False
|
|
1390
1503
|
return VLLM_INSTALLED
|
|
1391
1504
|
|
|
@@ -1394,13 +1507,21 @@ class VLLMVisionModel(VLLMModel, ChatModelMixin):
|
|
|
1394
1507
|
) -> VLLMModelConfig:
|
|
1395
1508
|
model_config = super()._sanitize_model_config(model_config)
|
|
1396
1509
|
if VLLM_VERSION >= version.parse("0.5.5"):
|
|
1397
|
-
model_config
|
|
1398
|
-
|
|
1399
|
-
|
|
1400
|
-
|
|
1401
|
-
|
|
1402
|
-
|
|
1403
|
-
|
|
1510
|
+
if model_config.get("limit_mm_per_prompt"):
|
|
1511
|
+
model_config["limit_mm_per_prompt"] = json.loads(
|
|
1512
|
+
model_config.get("limit_mm_per_prompt") # type: ignore
|
|
1513
|
+
)
|
|
1514
|
+
else:
|
|
1515
|
+
if "omni" in self.model_family.model_ability:
|
|
1516
|
+
model_config["limit_mm_per_prompt"] = {
|
|
1517
|
+
"image": 2,
|
|
1518
|
+
"video": 2,
|
|
1519
|
+
"audio": 2,
|
|
1520
|
+
}
|
|
1521
|
+
elif "vision" in self.model_family.model_ability:
|
|
1522
|
+
model_config["limit_mm_per_prompt"] = {"image": 2, "video": 2}
|
|
1523
|
+
elif "audio" in self.model_family.model_ability:
|
|
1524
|
+
model_config["limit_mm_per_prompt"] = {"audio": 2}
|
|
1404
1525
|
return model_config
|
|
1405
1526
|
|
|
1406
1527
|
def _sanitize_chat_config(
|
|
@@ -1434,7 +1555,10 @@ class VLLMVisionModel(VLLMModel, ChatModelMixin):
|
|
|
1434
1555
|
multi_modal_data = prompt.get("multi_modal_data")
|
|
1435
1556
|
|
|
1436
1557
|
token_ids = await asyncio.to_thread(
|
|
1437
|
-
self._tokenize,
|
|
1558
|
+
self._tokenize,
|
|
1559
|
+
tokenizer,
|
|
1560
|
+
prompt_str,
|
|
1561
|
+
config, # type: ignore
|
|
1438
1562
|
)
|
|
1439
1563
|
return TokensPrompt(
|
|
1440
1564
|
prompt_token_ids=token_ids, multi_modal_data=multi_modal_data
|
|
@@ -1450,9 +1574,13 @@ class VLLMVisionModel(VLLMModel, ChatModelMixin):
|
|
|
1450
1574
|
tools = generate_config.pop("tools", []) if generate_config else None
|
|
1451
1575
|
|
|
1452
1576
|
model_family = self.model_family.model_family or self.model_family.model_name
|
|
1453
|
-
|
|
1577
|
+
audios, images, videos = None, None, None
|
|
1454
1578
|
if "internvl" not in model_family.lower():
|
|
1455
|
-
from
|
|
1579
|
+
from qwen_omni_utils import (
|
|
1580
|
+
process_audio_info,
|
|
1581
|
+
process_mm_info,
|
|
1582
|
+
process_vision_info,
|
|
1583
|
+
)
|
|
1456
1584
|
|
|
1457
1585
|
messages = self._transform_messages(messages)
|
|
1458
1586
|
|
|
@@ -1467,29 +1595,36 @@ class VLLMVisionModel(VLLMModel, ChatModelMixin):
|
|
|
1467
1595
|
if tools and model_family in QWEN_TOOL_CALL_FAMILY:
|
|
1468
1596
|
full_context_kwargs["tools"] = tools
|
|
1469
1597
|
assert self.model_family.chat_template is not None
|
|
1598
|
+
if "omni" in self.model_family.model_ability:
|
|
1599
|
+
audios, images, videos = process_mm_info(
|
|
1600
|
+
messages, use_audio_in_video=True
|
|
1601
|
+
)
|
|
1602
|
+
elif "audio" in self.model_family.model_ability:
|
|
1603
|
+
audios = process_audio_info(messages, use_audio_in_video=False)
|
|
1604
|
+
elif "vision" in self.model_family.model_ability:
|
|
1605
|
+
images, videos = process_vision_info( # type: ignore
|
|
1606
|
+
messages, return_video_kwargs=False
|
|
1607
|
+
)
|
|
1608
|
+
|
|
1470
1609
|
prompt = self.get_full_context(
|
|
1471
1610
|
messages, self.model_family.chat_template, **full_context_kwargs
|
|
1472
1611
|
)
|
|
1473
|
-
images, video_inputs = process_vision_info(messages)
|
|
1474
|
-
if video_inputs:
|
|
1475
|
-
raise ValueError("Not support video input now.")
|
|
1476
|
-
else:
|
|
1477
|
-
prompt, images = self.get_specific_prompt(model_family, messages)
|
|
1478
1612
|
|
|
1479
|
-
if not images:
|
|
1480
|
-
inputs = {
|
|
1481
|
-
"prompt": prompt,
|
|
1482
|
-
}
|
|
1483
|
-
elif len(images) == 1:
|
|
1484
|
-
inputs = {
|
|
1485
|
-
"prompt": prompt,
|
|
1486
|
-
"multi_modal_data": {"image": images[-1]}, # type: ignore
|
|
1487
|
-
}
|
|
1488
1613
|
else:
|
|
1489
|
-
|
|
1490
|
-
|
|
1491
|
-
|
|
1492
|
-
|
|
1614
|
+
prompt, images = self.get_specific_prompt(model_family, messages)
|
|
1615
|
+
inputs = {"prompt": prompt, "multi_modal_data": {}, "mm_processor_kwargs": {}}
|
|
1616
|
+
if images:
|
|
1617
|
+
inputs["multi_modal_data"]["image"] = images
|
|
1618
|
+
if videos:
|
|
1619
|
+
inputs["multi_modal_data"]["video"] = videos
|
|
1620
|
+
if audios:
|
|
1621
|
+
inputs["multi_modal_data"]["audio"] = audios
|
|
1622
|
+
if "omni" in self.model_family.model_ability:
|
|
1623
|
+
inputs["mm_processor_kwargs"]["use_audio_in_video"] = True
|
|
1624
|
+
if inputs["multi_modal_data"] == {}:
|
|
1625
|
+
inputs.pop("multi_modal_data")
|
|
1626
|
+
if inputs["mm_processor_kwargs"] == {}:
|
|
1627
|
+
inputs.pop("mm_processor_kwargs")
|
|
1493
1628
|
generate_config = self._sanitize_chat_config(generate_config)
|
|
1494
1629
|
|
|
1495
1630
|
stream = generate_config.get("stream", None)
|
xinference/model/utils.py
CHANGED
|
@@ -315,6 +315,11 @@ def set_all_random_seed(seed: int):
|
|
|
315
315
|
|
|
316
316
|
|
|
317
317
|
class CancellableDownloader:
|
|
318
|
+
_global_lock = threading.Lock()
|
|
319
|
+
_active_instances = 0
|
|
320
|
+
_original_update = None # Class-level original update method
|
|
321
|
+
_patch_lock = threading.Lock() # Additional lock for patching operations
|
|
322
|
+
|
|
318
323
|
def __init__(
|
|
319
324
|
self,
|
|
320
325
|
cancel_error_cls: Type[BaseException] = asyncio.CancelledError,
|
|
@@ -325,23 +330,23 @@ class CancellableDownloader:
|
|
|
325
330
|
self._cancelled = threading.Event()
|
|
326
331
|
self._done_event = threading.Event()
|
|
327
332
|
self._cancel_error_cls = cancel_error_cls
|
|
328
|
-
self._original_update = None
|
|
329
333
|
# progress for tqdm that is main
|
|
330
334
|
self._main_progresses: Set[tqdm] = set()
|
|
331
335
|
# progress for file downloader
|
|
332
336
|
# mainly when tqdm unit is set
|
|
333
337
|
self._download_progresses: Set[tqdm] = set()
|
|
334
|
-
# tqdm
|
|
335
|
-
self.
|
|
338
|
+
# Instance-specific tqdm tracking
|
|
339
|
+
self._patched_instances: Set[int] = set()
|
|
336
340
|
|
|
337
341
|
def reset(self):
|
|
338
342
|
self._main_progresses.clear()
|
|
339
343
|
self._download_progresses.clear()
|
|
340
344
|
|
|
341
345
|
def get_progress(self) -> float:
|
|
342
|
-
if self.
|
|
343
|
-
# directly return 1.0 when
|
|
346
|
+
if self.done:
|
|
347
|
+
# directly return 1.0 when finished
|
|
344
348
|
return 1.0
|
|
349
|
+
# Don't return 1.0 when cancelled, calculate actual progress
|
|
345
350
|
|
|
346
351
|
tasks = finished_tasks = 0
|
|
347
352
|
for main_progress in self._main_progresses:
|
|
@@ -376,6 +381,7 @@ class CancellableDownloader:
|
|
|
376
381
|
|
|
377
382
|
def cancel(self):
|
|
378
383
|
self._cancelled.set()
|
|
384
|
+
self._done_event.set()
|
|
379
385
|
|
|
380
386
|
@property
|
|
381
387
|
def cancelled(self):
|
|
@@ -392,39 +398,76 @@ class CancellableDownloader:
|
|
|
392
398
|
raise self._cancel_error_cls(error_msg)
|
|
393
399
|
|
|
394
400
|
def patch_tqdm(self):
|
|
395
|
-
#
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
401
|
+
# Use class-level patching to avoid conflicts
|
|
402
|
+
with self._patch_lock:
|
|
403
|
+
if self._original_update is None:
|
|
404
|
+
self._original_update = original_update = tqdm.update
|
|
405
|
+
|
|
406
|
+
# Thread-safe patched update
|
|
407
|
+
def patched_update(tqdm_instance, n):
|
|
408
|
+
import gc
|
|
409
|
+
|
|
410
|
+
# Get all CancellableDownloader instances and check for cancellation
|
|
411
|
+
downloaders = [
|
|
412
|
+
obj
|
|
413
|
+
for obj in gc.get_objects()
|
|
414
|
+
if isinstance(obj, CancellableDownloader)
|
|
415
|
+
]
|
|
416
|
+
|
|
417
|
+
for downloader in downloaders:
|
|
418
|
+
# if download cancelled, throw error
|
|
419
|
+
if getattr(downloader, "cancelled", False):
|
|
420
|
+
downloader.raise_error()
|
|
421
|
+
|
|
422
|
+
progresses = None
|
|
423
|
+
if not getattr(tqdm_instance, "disable", False):
|
|
424
|
+
unit = getattr(tqdm_instance, "unit", "it")
|
|
425
|
+
if unit == "it":
|
|
426
|
+
progresses = getattr(
|
|
427
|
+
downloader, "_main_progresses", None
|
|
428
|
+
)
|
|
429
|
+
else:
|
|
430
|
+
progresses = getattr(
|
|
431
|
+
downloader, "_download_progresses", None
|
|
432
|
+
)
|
|
433
|
+
|
|
434
|
+
if progresses is not None:
|
|
435
|
+
progresses.add(tqdm_instance)
|
|
436
|
+
else:
|
|
437
|
+
logger.debug(
|
|
438
|
+
f"No progresses found for downloader {downloader}"
|
|
439
|
+
)
|
|
440
|
+
|
|
441
|
+
# Call original update with safety check
|
|
442
|
+
return original_update(tqdm_instance, n)
|
|
443
|
+
|
|
444
|
+
tqdm.update = patched_update
|
|
413
445
|
|
|
414
446
|
def unpatch_tqdm(self):
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
447
|
+
with self._patch_lock:
|
|
448
|
+
if self._original_update is not None and self._active_instances == 0:
|
|
449
|
+
tqdm.update = self._original_update
|
|
450
|
+
self._original_update = None
|
|
419
451
|
|
|
420
452
|
def __enter__(self):
|
|
421
|
-
|
|
453
|
+
# Use global lock to prevent concurrent patching
|
|
454
|
+
with self._global_lock:
|
|
455
|
+
if self._active_instances == 0:
|
|
456
|
+
self.patch_tqdm()
|
|
457
|
+
self._active_instances += 1
|
|
422
458
|
return self
|
|
423
459
|
|
|
424
460
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
425
|
-
|
|
426
|
-
self.
|
|
427
|
-
|
|
461
|
+
# Use global lock to prevent concurrent unpatching
|
|
462
|
+
with self._global_lock:
|
|
463
|
+
self._active_instances -= 1
|
|
464
|
+
if self._active_instances == 0:
|
|
465
|
+
self.unpatch_tqdm()
|
|
466
|
+
try:
|
|
467
|
+
self._done_event.set()
|
|
468
|
+
self.reset()
|
|
469
|
+
except Exception as e:
|
|
470
|
+
logger.debug(f"Error during CancellableDownloader cleanup: {e}")
|
|
428
471
|
|
|
429
472
|
|
|
430
473
|
def get_engine_params_by_name(
|