xinference 1.10.0__py3-none-any.whl → 1.10.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +11 -28
- xinference/client/restful/async_restful_client.py +20 -3
- xinference/client/restful/restful_client.py +20 -3
- xinference/core/supervisor.py +87 -53
- xinference/core/worker.py +10 -0
- xinference/deploy/cmdline.py +15 -0
- xinference/model/audio/core.py +21 -6
- xinference/model/audio/indextts2.py +166 -0
- xinference/model/audio/model_spec.json +38 -1
- xinference/model/image/model_spec.json +69 -0
- xinference/model/image/stable_diffusion/core.py +13 -4
- xinference/model/llm/__init__.py +4 -0
- xinference/model/llm/llm_family.json +464 -2
- xinference/model/llm/sglang/core.py +30 -11
- xinference/model/llm/tool_parsers/deepseek_r1_tool_parser.py +94 -32
- xinference/model/llm/transformers/multimodal/qwen2_vl.py +34 -8
- xinference/model/llm/utils.py +12 -9
- xinference/model/llm/vllm/core.py +93 -17
- xinference/thirdparty/audiotools/__init__.py +10 -0
- xinference/thirdparty/audiotools/core/__init__.py +4 -0
- xinference/thirdparty/audiotools/core/audio_signal.py +1682 -0
- xinference/thirdparty/audiotools/core/display.py +194 -0
- xinference/thirdparty/audiotools/core/dsp.py +390 -0
- xinference/thirdparty/audiotools/core/effects.py +647 -0
- xinference/thirdparty/audiotools/core/ffmpeg.py +211 -0
- xinference/thirdparty/audiotools/core/loudness.py +320 -0
- xinference/thirdparty/audiotools/core/playback.py +252 -0
- xinference/thirdparty/audiotools/core/templates/__init__.py +0 -0
- xinference/thirdparty/audiotools/core/templates/headers.html +322 -0
- xinference/thirdparty/audiotools/core/templates/pandoc.css +407 -0
- xinference/thirdparty/audiotools/core/templates/widget.html +52 -0
- xinference/thirdparty/audiotools/core/util.py +671 -0
- xinference/thirdparty/audiotools/core/whisper.py +97 -0
- xinference/thirdparty/audiotools/data/__init__.py +3 -0
- xinference/thirdparty/audiotools/data/datasets.py +517 -0
- xinference/thirdparty/audiotools/data/preprocess.py +81 -0
- xinference/thirdparty/audiotools/data/transforms.py +1592 -0
- xinference/thirdparty/audiotools/metrics/__init__.py +6 -0
- xinference/thirdparty/audiotools/metrics/distance.py +131 -0
- xinference/thirdparty/audiotools/metrics/quality.py +159 -0
- xinference/thirdparty/audiotools/metrics/spectral.py +247 -0
- xinference/thirdparty/audiotools/ml/__init__.py +5 -0
- xinference/thirdparty/audiotools/ml/accelerator.py +184 -0
- xinference/thirdparty/audiotools/ml/decorators.py +440 -0
- xinference/thirdparty/audiotools/ml/experiment.py +90 -0
- xinference/thirdparty/audiotools/ml/layers/__init__.py +2 -0
- xinference/thirdparty/audiotools/ml/layers/base.py +328 -0
- xinference/thirdparty/audiotools/ml/layers/spectral_gate.py +127 -0
- xinference/thirdparty/audiotools/post.py +140 -0
- xinference/thirdparty/audiotools/preference.py +600 -0
- xinference/thirdparty/indextts/BigVGAN/ECAPA_TDNN.py +656 -0
- xinference/thirdparty/indextts/BigVGAN/__init__.py +0 -0
- xinference/thirdparty/indextts/BigVGAN/activations.py +122 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/__init__.py +0 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/.gitignore +1 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/__init__.py +0 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/activation1d.py +76 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/anti_alias_activation.cpp +23 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/anti_alias_activation_cuda.cu +256 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/compat.h +29 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/load.py +121 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/type_shim.h +92 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/torch/__init__.py +6 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/torch/act.py +31 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/torch/filter.py +102 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/torch/resample.py +58 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_torch/__init__.py +6 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_torch/act.py +29 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_torch/filter.py +96 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_torch/resample.py +49 -0
- xinference/thirdparty/indextts/BigVGAN/bigvgan.py +534 -0
- xinference/thirdparty/indextts/BigVGAN/models.py +451 -0
- xinference/thirdparty/indextts/BigVGAN/nnet/CNN.py +546 -0
- xinference/thirdparty/indextts/BigVGAN/nnet/__init__.py +0 -0
- xinference/thirdparty/indextts/BigVGAN/nnet/linear.py +89 -0
- xinference/thirdparty/indextts/BigVGAN/nnet/normalization.py +670 -0
- xinference/thirdparty/indextts/BigVGAN/utils.py +101 -0
- xinference/thirdparty/indextts/__init__.py +0 -0
- xinference/thirdparty/indextts/cli.py +65 -0
- xinference/thirdparty/indextts/gpt/__init__.py +0 -0
- xinference/thirdparty/indextts/gpt/conformer/__init__.py +0 -0
- xinference/thirdparty/indextts/gpt/conformer/attention.py +312 -0
- xinference/thirdparty/indextts/gpt/conformer/embedding.py +163 -0
- xinference/thirdparty/indextts/gpt/conformer/subsampling.py +348 -0
- xinference/thirdparty/indextts/gpt/conformer_encoder.py +520 -0
- xinference/thirdparty/indextts/gpt/model.py +713 -0
- xinference/thirdparty/indextts/gpt/model_v2.py +747 -0
- xinference/thirdparty/indextts/gpt/perceiver.py +317 -0
- xinference/thirdparty/indextts/gpt/transformers_beam_search.py +1013 -0
- xinference/thirdparty/indextts/gpt/transformers_generation_utils.py +4747 -0
- xinference/thirdparty/indextts/gpt/transformers_gpt2.py +1878 -0
- xinference/thirdparty/indextts/gpt/transformers_modeling_utils.py +5525 -0
- xinference/thirdparty/indextts/infer.py +690 -0
- xinference/thirdparty/indextts/infer_v2.py +739 -0
- xinference/thirdparty/indextts/s2mel/dac/__init__.py +16 -0
- xinference/thirdparty/indextts/s2mel/dac/__main__.py +36 -0
- xinference/thirdparty/indextts/s2mel/dac/model/__init__.py +4 -0
- xinference/thirdparty/indextts/s2mel/dac/model/base.py +294 -0
- xinference/thirdparty/indextts/s2mel/dac/model/dac.py +400 -0
- xinference/thirdparty/indextts/s2mel/dac/model/discriminator.py +228 -0
- xinference/thirdparty/indextts/s2mel/dac/model/encodec.py +320 -0
- xinference/thirdparty/indextts/s2mel/dac/nn/__init__.py +3 -0
- xinference/thirdparty/indextts/s2mel/dac/nn/layers.py +33 -0
- xinference/thirdparty/indextts/s2mel/dac/nn/loss.py +368 -0
- xinference/thirdparty/indextts/s2mel/dac/nn/quantize.py +339 -0
- xinference/thirdparty/indextts/s2mel/dac/utils/__init__.py +123 -0
- xinference/thirdparty/indextts/s2mel/dac/utils/decode.py +95 -0
- xinference/thirdparty/indextts/s2mel/dac/utils/encode.py +94 -0
- xinference/thirdparty/indextts/s2mel/hf_utils.py +12 -0
- xinference/thirdparty/indextts/s2mel/modules/alias_free_torch/__init__.py +5 -0
- xinference/thirdparty/indextts/s2mel/modules/alias_free_torch/act.py +29 -0
- xinference/thirdparty/indextts/s2mel/modules/alias_free_torch/filter.py +96 -0
- xinference/thirdparty/indextts/s2mel/modules/alias_free_torch/resample.py +57 -0
- xinference/thirdparty/indextts/s2mel/modules/audio.py +82 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/activations.py +120 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/__init__.py +0 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/activation1d.py +77 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/anti_alias_activation.cpp +23 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/anti_alias_activation_cuda.cu +246 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/compat.h +29 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/load.py +86 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/type_shim.h +92 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/torch/__init__.py +6 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/torch/act.py +30 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/torch/filter.py +101 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/torch/resample.py +58 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/bigvgan.py +492 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/config.json +63 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/env.py +18 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/meldataset.py +354 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/utils.py +99 -0
- xinference/thirdparty/indextts/s2mel/modules/campplus/DTDNN.py +115 -0
- xinference/thirdparty/indextts/s2mel/modules/campplus/classifier.py +70 -0
- xinference/thirdparty/indextts/s2mel/modules/campplus/layers.py +253 -0
- xinference/thirdparty/indextts/s2mel/modules/commons.py +632 -0
- xinference/thirdparty/indextts/s2mel/modules/diffusion_transformer.py +257 -0
- xinference/thirdparty/indextts/s2mel/modules/encodec.py +292 -0
- xinference/thirdparty/indextts/s2mel/modules/flow_matching.py +171 -0
- xinference/thirdparty/indextts/s2mel/modules/gpt_fast/generate.py +436 -0
- xinference/thirdparty/indextts/s2mel/modules/gpt_fast/model.py +360 -0
- xinference/thirdparty/indextts/s2mel/modules/gpt_fast/quantize.py +622 -0
- xinference/thirdparty/indextts/s2mel/modules/hifigan/f0_predictor.py +55 -0
- xinference/thirdparty/indextts/s2mel/modules/hifigan/generator.py +454 -0
- xinference/thirdparty/indextts/s2mel/modules/layers.py +354 -0
- xinference/thirdparty/indextts/s2mel/modules/length_regulator.py +141 -0
- xinference/thirdparty/indextts/s2mel/modules/openvoice/__init__.py +0 -0
- xinference/thirdparty/indextts/s2mel/modules/openvoice/api.py +186 -0
- xinference/thirdparty/indextts/s2mel/modules/openvoice/attentions.py +465 -0
- xinference/thirdparty/indextts/s2mel/modules/openvoice/checkpoints_v2/converter/config.json +57 -0
- xinference/thirdparty/indextts/s2mel/modules/openvoice/commons.py +160 -0
- xinference/thirdparty/indextts/s2mel/modules/openvoice/mel_processing.py +183 -0
- xinference/thirdparty/indextts/s2mel/modules/openvoice/models.py +499 -0
- xinference/thirdparty/indextts/s2mel/modules/openvoice/modules.py +598 -0
- xinference/thirdparty/indextts/s2mel/modules/openvoice/openvoice_app.py +275 -0
- xinference/thirdparty/indextts/s2mel/modules/openvoice/se_extractor.py +153 -0
- xinference/thirdparty/indextts/s2mel/modules/openvoice/transforms.py +209 -0
- xinference/thirdparty/indextts/s2mel/modules/openvoice/utils.py +194 -0
- xinference/thirdparty/indextts/s2mel/modules/quantize.py +229 -0
- xinference/thirdparty/indextts/s2mel/modules/rmvpe.py +631 -0
- xinference/thirdparty/indextts/s2mel/modules/vocos/__init__.py +4 -0
- xinference/thirdparty/indextts/s2mel/modules/vocos/heads.py +164 -0
- xinference/thirdparty/indextts/s2mel/modules/vocos/helpers.py +71 -0
- xinference/thirdparty/indextts/s2mel/modules/vocos/loss.py +114 -0
- xinference/thirdparty/indextts/s2mel/modules/vocos/models.py +118 -0
- xinference/thirdparty/indextts/s2mel/modules/vocos/modules.py +213 -0
- xinference/thirdparty/indextts/s2mel/modules/vocos/pretrained.py +51 -0
- xinference/thirdparty/indextts/s2mel/modules/vocos/spectral_ops.py +192 -0
- xinference/thirdparty/indextts/s2mel/modules/wavenet.py +174 -0
- xinference/thirdparty/indextts/s2mel/optimizers.py +96 -0
- xinference/thirdparty/indextts/s2mel/wav2vecbert_extract.py +148 -0
- xinference/thirdparty/indextts/utils/__init__.py +0 -0
- xinference/thirdparty/indextts/utils/arch_util.py +120 -0
- xinference/thirdparty/indextts/utils/checkpoint.py +34 -0
- xinference/thirdparty/indextts/utils/common.py +121 -0
- xinference/thirdparty/indextts/utils/feature_extractors.py +50 -0
- xinference/thirdparty/indextts/utils/front.py +536 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/__init__.py +0 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/codec.py +427 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/__init__.py +11 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/factorized_vector_quantize.py +150 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/lookup_free_quantize.py +77 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/residual_vq.py +177 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/vector_quantize.py +401 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/vocos.py +881 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/codec_dataset.py +264 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/codec_inference.py +515 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/codec_sampler.py +126 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/codec_trainer.py +166 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/__init__.py +0 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/__init__.py +5 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/act.py +29 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/filter.py +96 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/resample.py +57 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/facodec_dataset.py +98 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/facodec_inference.py +137 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/facodec_trainer.py +776 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/JDC/__init__.py +1 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/JDC/bst.t7 +0 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/JDC/model.py +219 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/attentions.py +437 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/commons.py +331 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/gradient_reversal.py +35 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/layers.py +460 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/quantize.py +741 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/style_encoder.py +110 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/wavenet.py +224 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/optimizer.py +104 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/kmeans/repcodec_model.py +210 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/kmeans/vocos.py +850 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/melvqgan/melspec.py +108 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/README.md +216 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/__init__.py +6 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/__init__.py +5 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/act.py +29 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/filter.py +96 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/resample.py +57 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/facodec.py +1222 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/gradient_reversal.py +35 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/melspec.py +102 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/quantize/__init__.py +7 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/quantize/fvq.py +116 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/quantize/rvq.py +87 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/transformer.py +234 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/model.py +184 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/__init__.py +27 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/conv.py +346 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/lstm.py +46 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/norm.py +37 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/__init__.py +14 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/ac.py +317 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/core_vq.py +388 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/distrib.py +135 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/vq.py +125 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/seanet.py +414 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/vevo/vevo_repcodec.py +592 -0
- xinference/thirdparty/indextts/utils/maskgct/models/tts/maskgct/ckpt/wav2vec2bert_stats.pt +0 -0
- xinference/thirdparty/indextts/utils/maskgct/models/tts/maskgct/llama_nar.py +650 -0
- xinference/thirdparty/indextts/utils/maskgct/models/tts/maskgct/maskgct_s2a.py +503 -0
- xinference/thirdparty/indextts/utils/maskgct_utils.py +259 -0
- xinference/thirdparty/indextts/utils/text_utils.py +41 -0
- xinference/thirdparty/indextts/utils/typical_sampling.py +30 -0
- xinference/thirdparty/indextts/utils/utils.py +93 -0
- xinference/thirdparty/indextts/utils/webui_utils.py +42 -0
- xinference/thirdparty/indextts/utils/xtransformers.py +1247 -0
- xinference/thirdparty/indextts/vqvae/__init__.py +0 -0
- xinference/thirdparty/indextts/vqvae/xtts_dvae.py +395 -0
- xinference/ui/gradio/media_interface.py +66 -8
- xinference/ui/web/ui/build/asset-manifest.json +6 -6
- xinference/ui/web/ui/build/index.html +1 -1
- xinference/ui/web/ui/build/static/css/main.5ea97072.css +2 -0
- xinference/ui/web/ui/build/static/css/main.5ea97072.css.map +1 -0
- xinference/ui/web/ui/build/static/js/main.d192c4f3.js +3 -0
- xinference/ui/web/ui/build/static/js/{main.1086c759.js.LICENSE.txt → main.d192c4f3.js.LICENSE.txt} +0 -7
- xinference/ui/web/ui/build/static/js/main.d192c4f3.js.map +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/089c38df5f52348d212ed868dda5c518a42e0c2762caed4175487c0405830c35.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/2b6e3a5b6eb2c5c5f2d007e68cd46c372721cd52bf63508adcdb21ecf79241d8.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/2d887825fd07a56f872eda4420da25fba0b5b62a23bdcc6c6da1a5281887f618.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/4001f9c3e64e73a4f2158826650c174a59d5e3f89ddecddf17cbb6bb688cc4ca.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/4a7018a69e6b7f90fc313248c2aa86f2a8f1eb1db120df586047a8023549b44b.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/64b12aaa1c1d1bf53820ada8a63769067c0ccc5aab46b32348eb1917ae7f2a11.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/7275b67c78ec76ce38a686bb8a576d8c9cecf54e1573614c84859d538efb9be5.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/a68b6ee3b31eadc051fb95ce8f8ccb9c2e8b52c60f290dbab545a1917e065282.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/ae8771cc37693feb160fa8727231312a0c54ef2d1d1ca893be568cd70016ca7e.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/bb4e8722d2d41d87f1fce3661bc8937bffe9448e231fc5f0462630849e851592.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/be6aada1ee4adc2bbf65dbe56d17db32bb3b5478be05d6b527805a8ba6cfb2b9.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/de91c352653c233cf0cb6674e6e04049a44fd0e1156560de65d5c4620521391e.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/e85f7002fc325c83b9c9cd8a1619e5b3ebc701d30e811afc284b88e6ae710cb5.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/e8b603c78944bf3d213639078bfe155ff5c0dfa4048a93cbb967cad6a4eb4ff3.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/f05535160a508b2a312de546a6de234776c613db276479ea4253c0b1bdeeb7d6.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/f09ba9e11106bd59a0de10cc85c55084097729dcab575f43dfcf07375961ed87.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/f995a2425dfb0822fd07127f66ffe9b026883bc156b402eb8bd0b83d52460a93.json +1 -0
- xinference/ui/web/ui/node_modules/.package-lock.json +0 -33
- xinference/ui/web/ui/package-lock.json +0 -34
- xinference/ui/web/ui/package.json +0 -1
- xinference/ui/web/ui/src/locales/en.json +9 -3
- xinference/ui/web/ui/src/locales/ja.json +9 -3
- xinference/ui/web/ui/src/locales/ko.json +9 -3
- xinference/ui/web/ui/src/locales/zh.json +9 -3
- {xinference-1.10.0.dist-info → xinference-1.10.1.dist-info}/METADATA +18 -2
- {xinference-1.10.0.dist-info → xinference-1.10.1.dist-info}/RECORD +285 -67
- xinference/ui/web/ui/build/static/css/main.013f296b.css +0 -2
- xinference/ui/web/ui/build/static/css/main.013f296b.css.map +0 -1
- xinference/ui/web/ui/build/static/js/main.1086c759.js +0 -3
- xinference/ui/web/ui/build/static/js/main.1086c759.js.map +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/0b0f77000cc1b482ca091cfbcae511dfe02f08916971645fad21d0b1234d04a2.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/1c5f8ff423a7c9202bea60b15680f04b1e9964b445b0da3f86c6ff70cf24e797.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/44ce7993e344980e3ed4f13e8f69237d4a5dfc60e37ca6b54f51f8ee1357bd67.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/4aec1cc414ac3ebb3481d3d915e4db597d9127de813291346eacb8554ab170d4.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/644cfec52f3c57a6e222ce60f112237a1efefe9835efd9aad857a685f53d8eed.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/663436f72af53fe0d72394f56d003fa4e0bba489e5bb4e483fd34b00f84637f7.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/69db82ca9bfe27fe417cc6cf2b1716b09be9c6f0cd198530f12bfc60e801bbcf.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/85087e27618d740c236bf159f30e0219db443ab55f0997388eed5fde6f9e90cc.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/88b07838348864aa86c672be3bbca1e9f58f6f3a2881b32070ec27f4e7b449d1.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/8b8cd408ccfbe115acef27ccfa5b233da8597131a2a5712add13e1e4d5d4504b.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/a23824fe746b9c6ca5eee9159b5764d1ff1653c1d856288c0f75c742bbb0023b.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/a3eb18af328280b139693c9092dff2a0ef8c9a967e6c8956ceee0996611f1984.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/bc1aacc65a102db325ca61bcd2f681e1ae22c36a1f1d98a6ff5e4ad49dc7544f.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/c682fd521747c19dae437d83ce3235a306ce6b68e24a117bc57c27ebb8d1f1ca.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/d5c224be7081f18cba1678b7874a9782eba895df004874ff8f243f94ba79942a.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/f7f18bfb539b036a6a342176dd98a85df5057a884a8da978d679f2a0264883d0.json +0 -1
- xinference/ui/web/ui/node_modules/clipboard/.babelrc.json +0 -11
- xinference/ui/web/ui/node_modules/clipboard/.eslintrc.json +0 -24
- xinference/ui/web/ui/node_modules/clipboard/.prettierrc.json +0 -9
- xinference/ui/web/ui/node_modules/clipboard/bower.json +0 -18
- xinference/ui/web/ui/node_modules/clipboard/composer.json +0 -25
- xinference/ui/web/ui/node_modules/clipboard/package.json +0 -63
- xinference/ui/web/ui/node_modules/delegate/package.json +0 -31
- xinference/ui/web/ui/node_modules/good-listener/bower.json +0 -11
- xinference/ui/web/ui/node_modules/good-listener/package.json +0 -35
- xinference/ui/web/ui/node_modules/select/bower.json +0 -13
- xinference/ui/web/ui/node_modules/select/package.json +0 -29
- xinference/ui/web/ui/node_modules/tiny-emitter/package.json +0 -53
- {xinference-1.10.0.dist-info → xinference-1.10.1.dist-info}/WHEEL +0 -0
- {xinference-1.10.0.dist-info → xinference-1.10.1.dist-info}/entry_points.txt +0 -0
- {xinference-1.10.0.dist-info → xinference-1.10.1.dist-info}/licenses/LICENSE +0 -0
- {xinference-1.10.0.dist-info → xinference-1.10.1.dist-info}/top_level.txt +0 -0
|
@@ -23,12 +23,27 @@ class DeepseekR1ToolParser(ToolParser):
|
|
|
23
23
|
Initialize the DeepSeek R1 tool parser.
|
|
24
24
|
"""
|
|
25
25
|
super().__init__()
|
|
26
|
+
|
|
27
|
+
# Sentinel tokens for streaming mode
|
|
28
|
+
self.think_start_token: str = "<think>"
|
|
29
|
+
self.think_end_token: str = "</think>"
|
|
30
|
+
self.tool_call_start_token: str = "<|tool▁call▁begin|>"
|
|
31
|
+
self.tool_call_end_token: str = "<|tool▁call▁end|>"
|
|
32
|
+
|
|
26
33
|
# Regex pattern to match DeepSeek R1 tool call format
|
|
27
34
|
self.tool_calls_regex = (
|
|
28
35
|
r"<\|tool▁call▁begin|>function<\|tool▁sep|>([^\n]+)\n"
|
|
29
36
|
r"```json\n(.*?)\n```<\|tool▁call▁end|>"
|
|
30
37
|
)
|
|
31
38
|
|
|
39
|
+
# Regex pattern to match the entire tool-calls wrapper block.
|
|
40
|
+
# We intentionally do NOT match <think> blocks here so that the
|
|
41
|
+
# "text before" chunk will include both the think block and any
|
|
42
|
+
# narrative text up to the tool calls wrapper, yielding exactly two
|
|
43
|
+
# blocks when there is a single tool calls section:
|
|
44
|
+
# [before_text_including_think, tool_calls_wrapper_block]
|
|
45
|
+
self.content_regex = r"(<\|tool▁calls▁begin|>.*?<\|tool▁calls▁end|>)"
|
|
46
|
+
|
|
32
47
|
def extract_tool_calls(
|
|
33
48
|
self, model_output: str
|
|
34
49
|
) -> List[Tuple[Optional[str], Optional[str], Optional[dict]]]:
|
|
@@ -56,49 +71,96 @@ class DeepseekR1ToolParser(ToolParser):
|
|
|
56
71
|
>>> print(result)
|
|
57
72
|
[(None, 'get_current_weather', {'location': 'Beijing'})]
|
|
58
73
|
"""
|
|
59
|
-
|
|
60
|
-
if not
|
|
61
|
-
# No tool calls found, return the original output as content
|
|
74
|
+
# If no tool call tokens, return original output as content
|
|
75
|
+
if self.tool_call_start_token not in model_output:
|
|
62
76
|
return [(model_output, None, None)]
|
|
63
77
|
|
|
78
|
+
# Get all content blocks (text, thinking blocks, tool calls)
|
|
79
|
+
function_calls = self._get_function_calls(model_output)
|
|
80
|
+
|
|
64
81
|
# Use set for deduplication of identical tool calls
|
|
65
82
|
tool_calls = set()
|
|
66
83
|
results: List[Tuple[Optional[str], Optional[str], Optional[dict]]] = []
|
|
67
84
|
|
|
68
|
-
for
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
85
|
+
for content_block in function_calls:
|
|
86
|
+
# Check if this block is a tool call
|
|
87
|
+
if (
|
|
88
|
+
self.tool_call_start_token in content_block
|
|
89
|
+
and self.tool_call_end_token in content_block
|
|
90
|
+
):
|
|
91
|
+
# Extract function name and arguments from tool call block
|
|
92
|
+
matches = re.findall(self.tool_calls_regex, content_block, re.DOTALL)
|
|
93
|
+
if not matches:
|
|
94
|
+
# Malformed tool call, treat as regular content
|
|
95
|
+
results.append((content_block, None, None))
|
|
96
|
+
continue
|
|
97
|
+
|
|
98
|
+
func_name, raw_json = matches[0] # Take the first match
|
|
99
|
+
|
|
100
|
+
func_and_args = None
|
|
101
|
+
try:
|
|
102
|
+
# Parse JSON arguments
|
|
103
|
+
func_and_args = json.loads(raw_json)
|
|
104
|
+
# Create hashable representation for deduplication
|
|
105
|
+
arguments_hashable = frozenset(func_and_args.items())
|
|
106
|
+
tool_call_tuple = (
|
|
107
|
+
None, # No content error
|
|
108
|
+
func_name,
|
|
109
|
+
func_and_args,
|
|
110
|
+
)
|
|
111
|
+
except Exception as e:
|
|
112
|
+
# JSON parsing failed, treat as raw content
|
|
113
|
+
logger.warning(
|
|
114
|
+
f"Failed to parse tool call JSON: {raw_json}, error: {e}"
|
|
115
|
+
)
|
|
116
|
+
tool_call_tuple = (raw_json, None, None)
|
|
117
|
+
arguments_hashable = None
|
|
118
|
+
|
|
119
|
+
# Create deduplication key
|
|
120
|
+
dedup_key = (
|
|
121
|
+
(func_name, arguments_hashable)
|
|
122
|
+
if func_and_args is not None
|
|
123
|
+
else raw_json
|
|
79
124
|
)
|
|
80
|
-
except Exception as e:
|
|
81
|
-
# JSON parsing failed, treat as raw content
|
|
82
|
-
logger.warning(
|
|
83
|
-
f"Failed to parse tool call JSON: {raw_json}, error: {e}"
|
|
84
|
-
)
|
|
85
|
-
tool_call_tuple = (raw_json, None, None)
|
|
86
|
-
arguments_hashable = None
|
|
87
|
-
|
|
88
|
-
# Create deduplication key
|
|
89
|
-
dedup_key = (
|
|
90
|
-
(func_name, arguments_hashable)
|
|
91
|
-
if func_and_args is not None
|
|
92
|
-
else raw_json
|
|
93
|
-
)
|
|
94
125
|
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
126
|
+
# Add to results if not already seen
|
|
127
|
+
if dedup_key not in tool_calls:
|
|
128
|
+
tool_calls.add(dedup_key)
|
|
129
|
+
results.append(tool_call_tuple)
|
|
130
|
+
else:
|
|
131
|
+
# This is regular content (text or thinking block), add as-is
|
|
132
|
+
if content_block.strip(): # Only add non-empty content
|
|
133
|
+
results.append((content_block, None, None))
|
|
99
134
|
|
|
100
135
|
return results
|
|
101
136
|
|
|
137
|
+
def _get_function_calls(self, model_output: str) -> List[str]:
|
|
138
|
+
"""
|
|
139
|
+
Extract all function calls and content blocks from model output.
|
|
140
|
+
|
|
141
|
+
Parses the model output to separate thinking blocks, tool calls,
|
|
142
|
+
and regular content into individual components.
|
|
143
|
+
|
|
144
|
+
Args:
|
|
145
|
+
model_output (str): The complete model output to parse.
|
|
146
|
+
|
|
147
|
+
Returns:
|
|
148
|
+
List[str]: List of content blocks (text, thinking blocks, tool calls).
|
|
149
|
+
"""
|
|
150
|
+
functions_calls = []
|
|
151
|
+
last_end = 0
|
|
152
|
+
for m in re.finditer(self.content_regex, model_output, re.DOTALL):
|
|
153
|
+
# Add any text before the current match
|
|
154
|
+
if m.start() > last_end:
|
|
155
|
+
functions_calls.append(model_output[last_end : m.start()])
|
|
156
|
+
# Add the matched content (think or tool_call block)
|
|
157
|
+
functions_calls.append(m.group(0))
|
|
158
|
+
last_end = m.end()
|
|
159
|
+
# Add any remaining text after the last match
|
|
160
|
+
if last_end < len(model_output):
|
|
161
|
+
functions_calls.append(model_output[last_end:])
|
|
162
|
+
return functions_calls
|
|
163
|
+
|
|
102
164
|
def extract_tool_calls_streaming(
|
|
103
165
|
self, previous_text: List[str], current_text: str, delta_text: str
|
|
104
166
|
) -> Optional[Any]:
|
|
@@ -27,11 +27,19 @@ logger = logging.getLogger(__name__)
|
|
|
27
27
|
|
|
28
28
|
|
|
29
29
|
@register_batching_multimodal_models(
|
|
30
|
-
"qwen2-vl-instruct",
|
|
30
|
+
"qwen2-vl-instruct",
|
|
31
|
+
"qwen2.5-vl-instruct",
|
|
32
|
+
"QvQ-72B-Preview",
|
|
33
|
+
"Qwen3-VL-Instruct",
|
|
34
|
+
"Qwen3-VL-Thinking",
|
|
31
35
|
)
|
|
32
36
|
@register_transformer
|
|
33
37
|
@register_non_default_model(
|
|
34
|
-
"qwen2-vl-instruct",
|
|
38
|
+
"qwen2-vl-instruct",
|
|
39
|
+
"qwen2.5-vl-instruct",
|
|
40
|
+
"QvQ-72B-Preview",
|
|
41
|
+
"Qwen3-VL-Instruct",
|
|
42
|
+
"Qwen3-VL-Thinking",
|
|
35
43
|
)
|
|
36
44
|
class Qwen2VLChatModel(PytorchMultiModalModel):
|
|
37
45
|
def _sanitize_model_config(
|
|
@@ -47,7 +55,7 @@ class Qwen2VLChatModel(PytorchMultiModalModel):
|
|
|
47
55
|
def match_json(
|
|
48
56
|
cls, model_family: "LLMFamilyV2", model_spec: "LLMSpecV1", quantization: str
|
|
49
57
|
) -> bool:
|
|
50
|
-
if model_spec.model_format not in ["pytorch", "gptq", "awq", "bnb"]:
|
|
58
|
+
if model_spec.model_format not in ["pytorch", "gptq", "awq", "bnb", "fp8"]:
|
|
51
59
|
return False
|
|
52
60
|
llm_family = model_family.model_family or model_family.model_name
|
|
53
61
|
if "qwen2-vl-instruct".lower() in llm_family.lower():
|
|
@@ -56,6 +64,8 @@ class Qwen2VLChatModel(PytorchMultiModalModel):
|
|
|
56
64
|
return True
|
|
57
65
|
if "qvq-72b-preview".lower() in llm_family.lower():
|
|
58
66
|
return True
|
|
67
|
+
if "qwen3-vl" in llm_family.lower():
|
|
68
|
+
return True
|
|
59
69
|
return False
|
|
60
70
|
|
|
61
71
|
def decide_device(self):
|
|
@@ -85,13 +95,19 @@ class Qwen2VLChatModel(PytorchMultiModalModel):
|
|
|
85
95
|
except ImportError:
|
|
86
96
|
Qwen2_5_VLForConditionalGeneration = None
|
|
87
97
|
|
|
98
|
+
try:
|
|
99
|
+
from transformers import AutoModelForImageTextToText
|
|
100
|
+
except ImportError:
|
|
101
|
+
AutoModelForImageTextToText = None
|
|
102
|
+
|
|
88
103
|
kwargs = self.apply_bnb_quantization()
|
|
89
104
|
llm_family = self.model_family.model_family or self.model_family.model_name
|
|
90
|
-
|
|
91
|
-
Qwen2_5_VLForConditionalGeneration
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
105
|
+
if "qwen2.5" in llm_family:
|
|
106
|
+
model_cls = Qwen2_5_VLForConditionalGeneration
|
|
107
|
+
elif "qwen3" in llm_family:
|
|
108
|
+
model_cls = AutoModelForImageTextToText
|
|
109
|
+
else:
|
|
110
|
+
model_cls = Qwen2VLForConditionalGeneration
|
|
95
111
|
if model_cls is None:
|
|
96
112
|
raise ImportError("`transformers` version is too old, please upgrade it")
|
|
97
113
|
device = "auto" if self._device == "cuda" else self._device
|
|
@@ -118,6 +134,16 @@ class Qwen2VLChatModel(PytorchMultiModalModel):
|
|
|
118
134
|
torch_dtype="float16",
|
|
119
135
|
**kwargs,
|
|
120
136
|
).eval()
|
|
137
|
+
elif device == "mps":
|
|
138
|
+
# MacOS special, see https://github.com/QwenLM/Qwen2.5-VL/issues/761
|
|
139
|
+
self._model = model_cls.from_pretrained(
|
|
140
|
+
self.model_path,
|
|
141
|
+
torch_dtype="bfloat16",
|
|
142
|
+
device_map=device,
|
|
143
|
+
attn_implementation="eager",
|
|
144
|
+
low_cpu_mem_usage=True,
|
|
145
|
+
trust_remote_code=True,
|
|
146
|
+
).eval()
|
|
121
147
|
else:
|
|
122
148
|
self._model = model_cls.from_pretrained(
|
|
123
149
|
self.model_path,
|
xinference/model/llm/utils.py
CHANGED
|
@@ -71,6 +71,10 @@ QWEN_TOOL_CALL_FAMILY = [
|
|
|
71
71
|
"Qwen3-Thinking",
|
|
72
72
|
"Qwen3-Instruct",
|
|
73
73
|
"Qwen3-Coder",
|
|
74
|
+
"Qwen3-VL-Instruct",
|
|
75
|
+
"Qwen3-VL-Thinking",
|
|
76
|
+
"Qwen3-Next-Instruct",
|
|
77
|
+
"Qwen3-Next-Thinking",
|
|
74
78
|
]
|
|
75
79
|
|
|
76
80
|
GLM4_TOOL_CALL_FAMILY = [
|
|
@@ -347,9 +351,7 @@ class ChatModelMixin:
|
|
|
347
351
|
assert choices is not None
|
|
348
352
|
usage = (
|
|
349
353
|
chunk["usage"]
|
|
350
|
-
if choices[0]["finish_reason"] is not None
|
|
351
|
-
and reasoning_parser
|
|
352
|
-
and reasoning_parser.check_content_parser()
|
|
354
|
+
if choices and choices[0]["finish_reason"] is not None or not choices
|
|
353
355
|
else None
|
|
354
356
|
)
|
|
355
357
|
chat_chunk = {
|
|
@@ -798,7 +800,11 @@ class ChatModelMixin:
|
|
|
798
800
|
chunk_id=None,
|
|
799
801
|
previous_texts: List[str] = [""],
|
|
800
802
|
):
|
|
803
|
+
if not c.get("choices"):
|
|
804
|
+
return c
|
|
801
805
|
_id = chunk_id if chunk_id is not None else str(uuid.uuid4())
|
|
806
|
+
tool_result = None
|
|
807
|
+
finish_reason = None
|
|
802
808
|
if isinstance(self.tool_parser, Glm4ToolParser):
|
|
803
809
|
tool_result = self.tool_parser.extract_tool_calls_streaming(
|
|
804
810
|
[],
|
|
@@ -851,11 +857,7 @@ class ChatModelMixin:
|
|
|
851
857
|
usage = c.get("usage")
|
|
852
858
|
assert "prompt_tokens" in usage
|
|
853
859
|
except Exception:
|
|
854
|
-
usage =
|
|
855
|
-
"prompt_tokens": -1,
|
|
856
|
-
"completion_tokens": -1,
|
|
857
|
-
"total_tokens": -1,
|
|
858
|
-
}
|
|
860
|
+
usage = None
|
|
859
861
|
return {
|
|
860
862
|
"id": "chat" + f"cmpl-{_id}",
|
|
861
863
|
"model": model_uid,
|
|
@@ -1009,7 +1011,8 @@ class ChatModelMixin:
|
|
|
1009
1011
|
completion_chunk, self.reasoning_parser, previous_texts
|
|
1010
1012
|
)
|
|
1011
1013
|
if (
|
|
1012
|
-
|
|
1014
|
+
chat_chunk["choices"]
|
|
1015
|
+
and "reasoning_content" in chat_chunk["choices"][0]["delta"]
|
|
1013
1016
|
and chat_chunk["choices"][0]["delta"]["reasoning_content"] is not None
|
|
1014
1017
|
):
|
|
1015
1018
|
yield chat_chunk
|
|
@@ -264,6 +264,9 @@ if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.8.4"):
|
|
|
264
264
|
if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.8.5"):
|
|
265
265
|
VLLM_SUPPORTED_CHAT_MODELS.append("qwen3")
|
|
266
266
|
|
|
267
|
+
if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.9.0"):
|
|
268
|
+
VLLM_SUPPORTED_CHAT_MODELS.append("Baichuan-M2")
|
|
269
|
+
|
|
267
270
|
if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.9.1"):
|
|
268
271
|
VLLM_SUPPORTED_CHAT_MODELS.append("minicpm4")
|
|
269
272
|
|
|
@@ -282,10 +285,15 @@ if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.10.0"):
|
|
|
282
285
|
|
|
283
286
|
if VLLM_INSTALLED and VLLM_VERSION > version.parse("0.10.0"):
|
|
284
287
|
VLLM_SUPPORTED_CHAT_MODELS.append("gpt-oss")
|
|
285
|
-
VLLM_SUPPORTED_CHAT_MODELS.append("seed-oss")
|
|
286
288
|
|
|
287
|
-
if VLLM_INSTALLED and VLLM_VERSION
|
|
289
|
+
if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.10.2"):
|
|
288
290
|
VLLM_SUPPORTED_CHAT_MODELS.append("seed-oss")
|
|
291
|
+
VLLM_SUPPORTED_CHAT_MODELS.append("Qwen3-Next-Instruct")
|
|
292
|
+
VLLM_SUPPORTED_CHAT_MODELS.append("Qwen3-Next-Thinking")
|
|
293
|
+
|
|
294
|
+
if VLLM_INSTALLED and VLLM_VERSION > version.parse("0.10.2"):
|
|
295
|
+
VLLM_SUPPORTED_VISION_MODEL_LIST.append("Qwen3-VL-Instruct")
|
|
296
|
+
VLLM_SUPPORTED_VISION_MODEL_LIST.append("Qwen3-VL-Instruct")
|
|
289
297
|
|
|
290
298
|
|
|
291
299
|
class VLLMModel(LLM):
|
|
@@ -934,9 +942,21 @@ class VLLMModel(LLM):
|
|
|
934
942
|
|
|
935
943
|
async def _get_tokenizer(self, lora_request: Any) -> Any:
|
|
936
944
|
try:
|
|
937
|
-
|
|
945
|
+
# vLLM 0.11.0+ get_tokenizer doesn't accept lora_request parameter
|
|
946
|
+
if (
|
|
947
|
+
VLLM_VERSION >= version.parse("0.11.0")
|
|
948
|
+
or VLLM_VERSION.base_version >= "0.11.0"
|
|
949
|
+
):
|
|
950
|
+
return await self._engine.get_tokenizer() # type: ignore
|
|
951
|
+
else:
|
|
952
|
+
return await self._engine.get_tokenizer(lora_request) # type: ignore
|
|
938
953
|
except AttributeError:
|
|
939
|
-
|
|
954
|
+
# Fallback to get_tokenizer_async for older versions
|
|
955
|
+
try:
|
|
956
|
+
return await self._engine.get_tokenizer_async(lora_request) # type: ignore
|
|
957
|
+
except (AttributeError, TypeError):
|
|
958
|
+
# If all else fails, try without parameters
|
|
959
|
+
return await self._engine.get_tokenizer() # type: ignore
|
|
940
960
|
|
|
941
961
|
def _tokenize(self, tokenizer: Any, prompt: str, config: dict) -> List[int]:
|
|
942
962
|
truncate_prompt_tokens = config.get("truncate_prompt_tokens")
|
|
@@ -1017,23 +1037,65 @@ class VLLMModel(LLM):
|
|
|
1017
1037
|
# guided decoding only available for vllm >= 0.6.3
|
|
1018
1038
|
from vllm.sampling_params import GuidedDecodingParams
|
|
1019
1039
|
|
|
1020
|
-
|
|
1021
|
-
|
|
1022
|
-
|
|
1023
|
-
|
|
1024
|
-
|
|
1025
|
-
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
|
|
1040
|
+
# Extract guided decoding parameters
|
|
1041
|
+
guided_params: dict[str, Any] = {}
|
|
1042
|
+
guided_json = sanitized_generate_config.pop("guided_json", None)
|
|
1043
|
+
if guided_json:
|
|
1044
|
+
guided_params["json"] = guided_json
|
|
1045
|
+
|
|
1046
|
+
guided_regex = sanitized_generate_config.pop("guided_regex", None)
|
|
1047
|
+
if guided_regex:
|
|
1048
|
+
guided_params["regex"] = guided_regex
|
|
1049
|
+
|
|
1050
|
+
guided_choice = sanitized_generate_config.pop("guided_choice", None)
|
|
1051
|
+
if guided_choice:
|
|
1052
|
+
guided_params["choice"] = guided_choice
|
|
1053
|
+
|
|
1054
|
+
guided_grammar = sanitized_generate_config.pop("guided_grammar", None)
|
|
1055
|
+
if guided_grammar:
|
|
1056
|
+
guided_params["grammar"] = guided_grammar
|
|
1057
|
+
|
|
1058
|
+
guided_json_object = sanitized_generate_config.pop(
|
|
1059
|
+
"guided_json_object", None
|
|
1030
1060
|
)
|
|
1061
|
+
if guided_json_object:
|
|
1062
|
+
guided_params["json_object"] = guided_json_object
|
|
1031
1063
|
|
|
1032
|
-
|
|
1033
|
-
|
|
1064
|
+
guided_backend = sanitized_generate_config.pop(
|
|
1065
|
+
"guided_decoding_backend", None
|
|
1034
1066
|
)
|
|
1067
|
+
if guided_backend:
|
|
1068
|
+
guided_params["_backend"] = guided_backend
|
|
1069
|
+
|
|
1070
|
+
guided_whitespace_pattern = sanitized_generate_config.pop(
|
|
1071
|
+
"guided_whitespace_pattern", None
|
|
1072
|
+
)
|
|
1073
|
+
if guided_whitespace_pattern:
|
|
1074
|
+
guided_params["whitespace_pattern"] = guided_whitespace_pattern
|
|
1075
|
+
|
|
1076
|
+
# Create GuidedDecodingParams if we have any guided parameters
|
|
1077
|
+
guided_options = None
|
|
1078
|
+
if guided_params:
|
|
1079
|
+
try:
|
|
1080
|
+
guided_options = GuidedDecodingParams(**guided_params)
|
|
1081
|
+
except Exception as e:
|
|
1082
|
+
logger.warning(f"Failed to create GuidedDecodingParams: {e}")
|
|
1083
|
+
guided_options = None
|
|
1084
|
+
|
|
1085
|
+
# Use structured_outputs for vLLM >= 0.11.0, guided_decoding for older versions
|
|
1086
|
+
if (
|
|
1087
|
+
VLLM_VERSION >= version.parse("0.11.0")
|
|
1088
|
+
or VLLM_VERSION.base_version >= "0.11.0"
|
|
1089
|
+
):
|
|
1090
|
+
sampling_params = SamplingParams(
|
|
1091
|
+
structured_outputs=guided_options, **sanitized_generate_config
|
|
1092
|
+
)
|
|
1093
|
+
else:
|
|
1094
|
+
sampling_params = SamplingParams(
|
|
1095
|
+
guided_decoding=guided_options, **sanitized_generate_config
|
|
1096
|
+
)
|
|
1035
1097
|
else:
|
|
1036
|
-
# ignore generate configs
|
|
1098
|
+
# ignore generate configs for older versions
|
|
1037
1099
|
sanitized_generate_config.pop("guided_json", None)
|
|
1038
1100
|
sanitized_generate_config.pop("guided_regex", None)
|
|
1039
1101
|
sanitized_generate_config.pop("guided_choice", None)
|
|
@@ -1236,6 +1298,7 @@ class VLLMChatModel(VLLMModel, ChatModelMixin):
|
|
|
1236
1298
|
) -> Dict:
|
|
1237
1299
|
if not generate_config:
|
|
1238
1300
|
generate_config = {}
|
|
1301
|
+
|
|
1239
1302
|
if "reasoning" in getattr(self.model_family, "model_ability", []):
|
|
1240
1303
|
generate_config.pop("stop", None)
|
|
1241
1304
|
generate_config.pop("stop_token_ids", None)
|
|
@@ -1249,6 +1312,19 @@ class VLLMChatModel(VLLMModel, ChatModelMixin):
|
|
|
1249
1312
|
generate_config["stop_token_ids"] = (
|
|
1250
1313
|
self.model_family.stop_token_ids.copy()
|
|
1251
1314
|
)
|
|
1315
|
+
|
|
1316
|
+
# if response_format exists,generate guided_json
|
|
1317
|
+
if "response_format" in generate_config:
|
|
1318
|
+
resp_format = generate_config["response_format"]
|
|
1319
|
+
if (
|
|
1320
|
+
isinstance(resp_format, dict)
|
|
1321
|
+
and resp_format.get("type") == "json_schema"
|
|
1322
|
+
and "json_schema" in resp_format
|
|
1323
|
+
):
|
|
1324
|
+
schema = resp_format["json_schema"].get("schema_")
|
|
1325
|
+
if schema:
|
|
1326
|
+
generate_config["guided_json"] = schema
|
|
1327
|
+
|
|
1252
1328
|
return generate_config
|
|
1253
1329
|
|
|
1254
1330
|
@staticmethod
|