xinference 1.9.1__py3-none-any.whl → 1.10.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +400 -3
- xinference/client/restful/async_restful_client.py +20 -3
- xinference/client/restful/restful_client.py +20 -3
- xinference/constants.py +2 -0
- xinference/core/supervisor.py +111 -49
- xinference/core/worker.py +10 -0
- xinference/deploy/cmdline.py +15 -0
- xinference/model/audio/core.py +26 -6
- xinference/model/audio/indextts2.py +166 -0
- xinference/model/audio/kokoro.py +1 -1
- xinference/model/audio/kokoro_zh.py +124 -0
- xinference/model/audio/model_spec.json +58 -1
- xinference/model/embedding/sentence_transformers/core.py +4 -4
- xinference/model/embedding/vllm/core.py +7 -1
- xinference/model/image/model_spec.json +71 -3
- xinference/model/image/stable_diffusion/core.py +13 -4
- xinference/model/llm/__init__.py +4 -0
- xinference/model/llm/core.py +10 -0
- xinference/model/llm/llama_cpp/core.py +1 -0
- xinference/model/llm/llm_family.json +503 -21
- xinference/model/llm/llm_family.py +1 -0
- xinference/model/llm/mlx/core.py +52 -33
- xinference/model/llm/sglang/core.py +32 -55
- xinference/model/llm/tool_parsers/__init__.py +58 -0
- xinference/model/llm/tool_parsers/abstract_tool_parser.py +33 -0
- xinference/model/llm/tool_parsers/deepseek_r1_tool_parser.py +190 -0
- xinference/model/llm/tool_parsers/deepseek_v3_tool_parser.py +145 -0
- xinference/model/llm/tool_parsers/glm4_tool_parser.py +123 -0
- xinference/model/llm/tool_parsers/llama3_tool_parser.py +77 -0
- xinference/model/llm/tool_parsers/qwen_tool_parser.py +320 -0
- xinference/model/llm/transformers/core.py +1 -1
- xinference/model/llm/transformers/multimodal/qwen2_vl.py +34 -8
- xinference/model/llm/utils.py +138 -53
- xinference/model/llm/vllm/core.py +95 -78
- xinference/thirdparty/audiotools/__init__.py +10 -0
- xinference/thirdparty/audiotools/core/__init__.py +4 -0
- xinference/thirdparty/audiotools/core/audio_signal.py +1682 -0
- xinference/thirdparty/audiotools/core/display.py +194 -0
- xinference/thirdparty/audiotools/core/dsp.py +390 -0
- xinference/thirdparty/audiotools/core/effects.py +647 -0
- xinference/thirdparty/audiotools/core/ffmpeg.py +211 -0
- xinference/thirdparty/audiotools/core/loudness.py +320 -0
- xinference/thirdparty/audiotools/core/playback.py +252 -0
- xinference/thirdparty/audiotools/core/templates/__init__.py +0 -0
- xinference/thirdparty/audiotools/core/templates/headers.html +322 -0
- xinference/thirdparty/audiotools/core/templates/pandoc.css +407 -0
- xinference/thirdparty/audiotools/core/templates/widget.html +52 -0
- xinference/thirdparty/audiotools/core/util.py +671 -0
- xinference/thirdparty/audiotools/core/whisper.py +97 -0
- xinference/thirdparty/audiotools/data/__init__.py +3 -0
- xinference/thirdparty/audiotools/data/datasets.py +517 -0
- xinference/thirdparty/audiotools/data/preprocess.py +81 -0
- xinference/thirdparty/audiotools/data/transforms.py +1592 -0
- xinference/thirdparty/audiotools/metrics/__init__.py +6 -0
- xinference/thirdparty/audiotools/metrics/distance.py +131 -0
- xinference/thirdparty/audiotools/metrics/quality.py +159 -0
- xinference/thirdparty/audiotools/metrics/spectral.py +247 -0
- xinference/thirdparty/audiotools/ml/__init__.py +5 -0
- xinference/thirdparty/audiotools/ml/accelerator.py +184 -0
- xinference/thirdparty/audiotools/ml/decorators.py +440 -0
- xinference/thirdparty/audiotools/ml/experiment.py +90 -0
- xinference/thirdparty/audiotools/ml/layers/__init__.py +2 -0
- xinference/thirdparty/audiotools/ml/layers/base.py +328 -0
- xinference/thirdparty/audiotools/ml/layers/spectral_gate.py +127 -0
- xinference/thirdparty/audiotools/post.py +140 -0
- xinference/thirdparty/audiotools/preference.py +600 -0
- xinference/thirdparty/indextts/BigVGAN/ECAPA_TDNN.py +656 -0
- xinference/thirdparty/indextts/BigVGAN/__init__.py +0 -0
- xinference/thirdparty/indextts/BigVGAN/activations.py +122 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/__init__.py +0 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/.gitignore +1 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/__init__.py +0 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/activation1d.py +76 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/anti_alias_activation.cpp +23 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/anti_alias_activation_cuda.cu +256 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/compat.h +29 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/load.py +121 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/type_shim.h +92 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/torch/__init__.py +6 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/torch/act.py +31 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/torch/filter.py +102 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/torch/resample.py +58 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_torch/__init__.py +6 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_torch/act.py +29 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_torch/filter.py +96 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_torch/resample.py +49 -0
- xinference/thirdparty/indextts/BigVGAN/bigvgan.py +534 -0
- xinference/thirdparty/indextts/BigVGAN/models.py +451 -0
- xinference/thirdparty/indextts/BigVGAN/nnet/CNN.py +546 -0
- xinference/thirdparty/indextts/BigVGAN/nnet/__init__.py +0 -0
- xinference/thirdparty/indextts/BigVGAN/nnet/linear.py +89 -0
- xinference/thirdparty/indextts/BigVGAN/nnet/normalization.py +670 -0
- xinference/thirdparty/indextts/BigVGAN/utils.py +101 -0
- xinference/thirdparty/indextts/__init__.py +0 -0
- xinference/thirdparty/indextts/cli.py +65 -0
- xinference/thirdparty/indextts/gpt/__init__.py +0 -0
- xinference/thirdparty/indextts/gpt/conformer/__init__.py +0 -0
- xinference/thirdparty/indextts/gpt/conformer/attention.py +312 -0
- xinference/thirdparty/indextts/gpt/conformer/embedding.py +163 -0
- xinference/thirdparty/indextts/gpt/conformer/subsampling.py +348 -0
- xinference/thirdparty/indextts/gpt/conformer_encoder.py +520 -0
- xinference/thirdparty/indextts/gpt/model.py +713 -0
- xinference/thirdparty/indextts/gpt/model_v2.py +747 -0
- xinference/thirdparty/indextts/gpt/perceiver.py +317 -0
- xinference/thirdparty/indextts/gpt/transformers_beam_search.py +1013 -0
- xinference/thirdparty/indextts/gpt/transformers_generation_utils.py +4747 -0
- xinference/thirdparty/indextts/gpt/transformers_gpt2.py +1878 -0
- xinference/thirdparty/indextts/gpt/transformers_modeling_utils.py +5525 -0
- xinference/thirdparty/indextts/infer.py +690 -0
- xinference/thirdparty/indextts/infer_v2.py +739 -0
- xinference/thirdparty/indextts/s2mel/dac/__init__.py +16 -0
- xinference/thirdparty/indextts/s2mel/dac/__main__.py +36 -0
- xinference/thirdparty/indextts/s2mel/dac/model/__init__.py +4 -0
- xinference/thirdparty/indextts/s2mel/dac/model/base.py +294 -0
- xinference/thirdparty/indextts/s2mel/dac/model/dac.py +400 -0
- xinference/thirdparty/indextts/s2mel/dac/model/discriminator.py +228 -0
- xinference/thirdparty/indextts/s2mel/dac/model/encodec.py +320 -0
- xinference/thirdparty/indextts/s2mel/dac/nn/__init__.py +3 -0
- xinference/thirdparty/indextts/s2mel/dac/nn/layers.py +33 -0
- xinference/thirdparty/indextts/s2mel/dac/nn/loss.py +368 -0
- xinference/thirdparty/indextts/s2mel/dac/nn/quantize.py +339 -0
- xinference/thirdparty/indextts/s2mel/dac/utils/__init__.py +123 -0
- xinference/thirdparty/indextts/s2mel/dac/utils/decode.py +95 -0
- xinference/thirdparty/indextts/s2mel/dac/utils/encode.py +94 -0
- xinference/thirdparty/indextts/s2mel/hf_utils.py +12 -0
- xinference/thirdparty/indextts/s2mel/modules/alias_free_torch/__init__.py +5 -0
- xinference/thirdparty/indextts/s2mel/modules/alias_free_torch/act.py +29 -0
- xinference/thirdparty/indextts/s2mel/modules/alias_free_torch/filter.py +96 -0
- xinference/thirdparty/indextts/s2mel/modules/alias_free_torch/resample.py +57 -0
- xinference/thirdparty/indextts/s2mel/modules/audio.py +82 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/activations.py +120 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/__init__.py +0 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/activation1d.py +77 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/anti_alias_activation.cpp +23 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/anti_alias_activation_cuda.cu +246 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/compat.h +29 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/load.py +86 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/type_shim.h +92 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/torch/__init__.py +6 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/torch/act.py +30 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/torch/filter.py +101 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/torch/resample.py +58 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/bigvgan.py +492 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/config.json +63 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/env.py +18 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/meldataset.py +354 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/utils.py +99 -0
- xinference/thirdparty/indextts/s2mel/modules/campplus/DTDNN.py +115 -0
- xinference/thirdparty/indextts/s2mel/modules/campplus/classifier.py +70 -0
- xinference/thirdparty/indextts/s2mel/modules/campplus/layers.py +253 -0
- xinference/thirdparty/indextts/s2mel/modules/commons.py +632 -0
- xinference/thirdparty/indextts/s2mel/modules/diffusion_transformer.py +257 -0
- xinference/thirdparty/indextts/s2mel/modules/encodec.py +292 -0
- xinference/thirdparty/indextts/s2mel/modules/flow_matching.py +171 -0
- xinference/thirdparty/indextts/s2mel/modules/gpt_fast/generate.py +436 -0
- xinference/thirdparty/indextts/s2mel/modules/gpt_fast/model.py +360 -0
- xinference/thirdparty/indextts/s2mel/modules/gpt_fast/quantize.py +622 -0
- xinference/thirdparty/indextts/s2mel/modules/hifigan/f0_predictor.py +55 -0
- xinference/thirdparty/indextts/s2mel/modules/hifigan/generator.py +454 -0
- xinference/thirdparty/indextts/s2mel/modules/layers.py +354 -0
- xinference/thirdparty/indextts/s2mel/modules/length_regulator.py +141 -0
- xinference/thirdparty/indextts/s2mel/modules/openvoice/__init__.py +0 -0
- xinference/thirdparty/indextts/s2mel/modules/openvoice/api.py +186 -0
- xinference/thirdparty/indextts/s2mel/modules/openvoice/attentions.py +465 -0
- xinference/thirdparty/indextts/s2mel/modules/openvoice/checkpoints_v2/converter/config.json +57 -0
- xinference/thirdparty/indextts/s2mel/modules/openvoice/commons.py +160 -0
- xinference/thirdparty/indextts/s2mel/modules/openvoice/mel_processing.py +183 -0
- xinference/thirdparty/indextts/s2mel/modules/openvoice/models.py +499 -0
- xinference/thirdparty/indextts/s2mel/modules/openvoice/modules.py +598 -0
- xinference/thirdparty/indextts/s2mel/modules/openvoice/openvoice_app.py +275 -0
- xinference/thirdparty/indextts/s2mel/modules/openvoice/se_extractor.py +153 -0
- xinference/thirdparty/indextts/s2mel/modules/openvoice/transforms.py +209 -0
- xinference/thirdparty/indextts/s2mel/modules/openvoice/utils.py +194 -0
- xinference/thirdparty/indextts/s2mel/modules/quantize.py +229 -0
- xinference/thirdparty/indextts/s2mel/modules/rmvpe.py +631 -0
- xinference/thirdparty/indextts/s2mel/modules/vocos/__init__.py +4 -0
- xinference/thirdparty/indextts/s2mel/modules/vocos/heads.py +164 -0
- xinference/thirdparty/indextts/s2mel/modules/vocos/helpers.py +71 -0
- xinference/thirdparty/indextts/s2mel/modules/vocos/loss.py +114 -0
- xinference/thirdparty/indextts/s2mel/modules/vocos/models.py +118 -0
- xinference/thirdparty/indextts/s2mel/modules/vocos/modules.py +213 -0
- xinference/thirdparty/indextts/s2mel/modules/vocos/pretrained.py +51 -0
- xinference/thirdparty/indextts/s2mel/modules/vocos/spectral_ops.py +192 -0
- xinference/thirdparty/indextts/s2mel/modules/wavenet.py +174 -0
- xinference/thirdparty/indextts/s2mel/optimizers.py +96 -0
- xinference/thirdparty/indextts/s2mel/wav2vecbert_extract.py +148 -0
- xinference/thirdparty/indextts/utils/__init__.py +0 -0
- xinference/thirdparty/indextts/utils/arch_util.py +120 -0
- xinference/thirdparty/indextts/utils/checkpoint.py +34 -0
- xinference/thirdparty/indextts/utils/common.py +121 -0
- xinference/thirdparty/indextts/utils/feature_extractors.py +50 -0
- xinference/thirdparty/indextts/utils/front.py +536 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/__init__.py +0 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/codec.py +427 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/__init__.py +11 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/factorized_vector_quantize.py +150 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/lookup_free_quantize.py +77 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/residual_vq.py +177 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/vector_quantize.py +401 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/vocos.py +881 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/codec_dataset.py +264 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/codec_inference.py +515 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/codec_sampler.py +126 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/codec_trainer.py +166 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/__init__.py +0 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/__init__.py +5 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/act.py +29 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/filter.py +96 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/resample.py +57 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/facodec_dataset.py +98 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/facodec_inference.py +137 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/facodec_trainer.py +776 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/JDC/__init__.py +1 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/JDC/bst.t7 +0 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/JDC/model.py +219 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/attentions.py +437 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/commons.py +331 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/gradient_reversal.py +35 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/layers.py +460 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/quantize.py +741 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/style_encoder.py +110 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/wavenet.py +224 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/optimizer.py +104 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/kmeans/repcodec_model.py +210 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/kmeans/vocos.py +850 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/melvqgan/melspec.py +108 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/README.md +216 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/__init__.py +6 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/__init__.py +5 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/act.py +29 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/filter.py +96 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/resample.py +57 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/facodec.py +1222 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/gradient_reversal.py +35 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/melspec.py +102 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/quantize/__init__.py +7 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/quantize/fvq.py +116 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/quantize/rvq.py +87 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/transformer.py +234 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/model.py +184 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/__init__.py +27 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/conv.py +346 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/lstm.py +46 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/norm.py +37 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/__init__.py +14 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/ac.py +317 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/core_vq.py +388 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/distrib.py +135 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/vq.py +125 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/seanet.py +414 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/vevo/vevo_repcodec.py +592 -0
- xinference/thirdparty/indextts/utils/maskgct/models/tts/maskgct/ckpt/wav2vec2bert_stats.pt +0 -0
- xinference/thirdparty/indextts/utils/maskgct/models/tts/maskgct/llama_nar.py +650 -0
- xinference/thirdparty/indextts/utils/maskgct/models/tts/maskgct/maskgct_s2a.py +503 -0
- xinference/thirdparty/indextts/utils/maskgct_utils.py +259 -0
- xinference/thirdparty/indextts/utils/text_utils.py +41 -0
- xinference/thirdparty/indextts/utils/typical_sampling.py +30 -0
- xinference/thirdparty/indextts/utils/utils.py +93 -0
- xinference/thirdparty/indextts/utils/webui_utils.py +42 -0
- xinference/thirdparty/indextts/utils/xtransformers.py +1247 -0
- xinference/thirdparty/indextts/vqvae/__init__.py +0 -0
- xinference/thirdparty/indextts/vqvae/xtts_dvae.py +395 -0
- xinference/types.py +105 -2
- xinference/ui/gradio/media_interface.py +66 -8
- xinference/ui/web/ui/build/asset-manifest.json +6 -6
- xinference/ui/web/ui/build/index.html +1 -1
- xinference/ui/web/ui/build/static/css/main.5ea97072.css +2 -0
- xinference/ui/web/ui/build/static/css/main.5ea97072.css.map +1 -0
- xinference/ui/web/ui/build/static/js/main.d192c4f3.js +3 -0
- xinference/ui/web/ui/build/static/js/{main.1086c759.js.LICENSE.txt → main.d192c4f3.js.LICENSE.txt} +0 -7
- xinference/ui/web/ui/build/static/js/main.d192c4f3.js.map +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/089c38df5f52348d212ed868dda5c518a42e0c2762caed4175487c0405830c35.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/2b6e3a5b6eb2c5c5f2d007e68cd46c372721cd52bf63508adcdb21ecf79241d8.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/2d887825fd07a56f872eda4420da25fba0b5b62a23bdcc6c6da1a5281887f618.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/4001f9c3e64e73a4f2158826650c174a59d5e3f89ddecddf17cbb6bb688cc4ca.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/4a7018a69e6b7f90fc313248c2aa86f2a8f1eb1db120df586047a8023549b44b.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/64b12aaa1c1d1bf53820ada8a63769067c0ccc5aab46b32348eb1917ae7f2a11.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/7275b67c78ec76ce38a686bb8a576d8c9cecf54e1573614c84859d538efb9be5.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/a68b6ee3b31eadc051fb95ce8f8ccb9c2e8b52c60f290dbab545a1917e065282.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/ae8771cc37693feb160fa8727231312a0c54ef2d1d1ca893be568cd70016ca7e.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/bb4e8722d2d41d87f1fce3661bc8937bffe9448e231fc5f0462630849e851592.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/be6aada1ee4adc2bbf65dbe56d17db32bb3b5478be05d6b527805a8ba6cfb2b9.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/de91c352653c233cf0cb6674e6e04049a44fd0e1156560de65d5c4620521391e.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/e85f7002fc325c83b9c9cd8a1619e5b3ebc701d30e811afc284b88e6ae710cb5.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/e8b603c78944bf3d213639078bfe155ff5c0dfa4048a93cbb967cad6a4eb4ff3.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/f05535160a508b2a312de546a6de234776c613db276479ea4253c0b1bdeeb7d6.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/f09ba9e11106bd59a0de10cc85c55084097729dcab575f43dfcf07375961ed87.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/f995a2425dfb0822fd07127f66ffe9b026883bc156b402eb8bd0b83d52460a93.json +1 -0
- xinference/ui/web/ui/node_modules/.package-lock.json +0 -33
- xinference/ui/web/ui/package-lock.json +0 -34
- xinference/ui/web/ui/package.json +0 -1
- xinference/ui/web/ui/src/locales/en.json +9 -3
- xinference/ui/web/ui/src/locales/ja.json +9 -3
- xinference/ui/web/ui/src/locales/ko.json +9 -3
- xinference/ui/web/ui/src/locales/zh.json +9 -3
- {xinference-1.9.1.dist-info → xinference-1.10.1.dist-info}/METADATA +24 -4
- {xinference-1.9.1.dist-info → xinference-1.10.1.dist-info}/RECORD +302 -76
- xinference/ui/web/ui/build/static/css/main.013f296b.css +0 -2
- xinference/ui/web/ui/build/static/css/main.013f296b.css.map +0 -1
- xinference/ui/web/ui/build/static/js/main.1086c759.js +0 -3
- xinference/ui/web/ui/build/static/js/main.1086c759.js.map +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/0b0f77000cc1b482ca091cfbcae511dfe02f08916971645fad21d0b1234d04a2.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/1c5f8ff423a7c9202bea60b15680f04b1e9964b445b0da3f86c6ff70cf24e797.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/44ce7993e344980e3ed4f13e8f69237d4a5dfc60e37ca6b54f51f8ee1357bd67.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/4aec1cc414ac3ebb3481d3d915e4db597d9127de813291346eacb8554ab170d4.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/644cfec52f3c57a6e222ce60f112237a1efefe9835efd9aad857a685f53d8eed.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/663436f72af53fe0d72394f56d003fa4e0bba489e5bb4e483fd34b00f84637f7.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/69db82ca9bfe27fe417cc6cf2b1716b09be9c6f0cd198530f12bfc60e801bbcf.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/85087e27618d740c236bf159f30e0219db443ab55f0997388eed5fde6f9e90cc.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/88b07838348864aa86c672be3bbca1e9f58f6f3a2881b32070ec27f4e7b449d1.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/8b8cd408ccfbe115acef27ccfa5b233da8597131a2a5712add13e1e4d5d4504b.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/a23824fe746b9c6ca5eee9159b5764d1ff1653c1d856288c0f75c742bbb0023b.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/a3eb18af328280b139693c9092dff2a0ef8c9a967e6c8956ceee0996611f1984.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/bc1aacc65a102db325ca61bcd2f681e1ae22c36a1f1d98a6ff5e4ad49dc7544f.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/c682fd521747c19dae437d83ce3235a306ce6b68e24a117bc57c27ebb8d1f1ca.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/d5c224be7081f18cba1678b7874a9782eba895df004874ff8f243f94ba79942a.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/f7f18bfb539b036a6a342176dd98a85df5057a884a8da978d679f2a0264883d0.json +0 -1
- xinference/ui/web/ui/node_modules/clipboard/.babelrc.json +0 -11
- xinference/ui/web/ui/node_modules/clipboard/.eslintrc.json +0 -24
- xinference/ui/web/ui/node_modules/clipboard/.prettierrc.json +0 -9
- xinference/ui/web/ui/node_modules/clipboard/bower.json +0 -18
- xinference/ui/web/ui/node_modules/clipboard/composer.json +0 -25
- xinference/ui/web/ui/node_modules/clipboard/package.json +0 -63
- xinference/ui/web/ui/node_modules/delegate/package.json +0 -31
- xinference/ui/web/ui/node_modules/good-listener/bower.json +0 -11
- xinference/ui/web/ui/node_modules/good-listener/package.json +0 -35
- xinference/ui/web/ui/node_modules/select/bower.json +0 -13
- xinference/ui/web/ui/node_modules/select/package.json +0 -29
- xinference/ui/web/ui/node_modules/tiny-emitter/package.json +0 -53
- {xinference-1.9.1.dist-info → xinference-1.10.1.dist-info}/WHEEL +0 -0
- {xinference-1.9.1.dist-info → xinference-1.10.1.dist-info}/entry_points.txt +0 -0
- {xinference-1.9.1.dist-info → xinference-1.10.1.dist-info}/licenses/LICENSE +0 -0
- {xinference-1.9.1.dist-info → xinference-1.10.1.dist-info}/top_level.txt +0 -0
|
@@ -27,11 +27,19 @@ logger = logging.getLogger(__name__)
|
|
|
27
27
|
|
|
28
28
|
|
|
29
29
|
@register_batching_multimodal_models(
|
|
30
|
-
"qwen2-vl-instruct",
|
|
30
|
+
"qwen2-vl-instruct",
|
|
31
|
+
"qwen2.5-vl-instruct",
|
|
32
|
+
"QvQ-72B-Preview",
|
|
33
|
+
"Qwen3-VL-Instruct",
|
|
34
|
+
"Qwen3-VL-Thinking",
|
|
31
35
|
)
|
|
32
36
|
@register_transformer
|
|
33
37
|
@register_non_default_model(
|
|
34
|
-
"qwen2-vl-instruct",
|
|
38
|
+
"qwen2-vl-instruct",
|
|
39
|
+
"qwen2.5-vl-instruct",
|
|
40
|
+
"QvQ-72B-Preview",
|
|
41
|
+
"Qwen3-VL-Instruct",
|
|
42
|
+
"Qwen3-VL-Thinking",
|
|
35
43
|
)
|
|
36
44
|
class Qwen2VLChatModel(PytorchMultiModalModel):
|
|
37
45
|
def _sanitize_model_config(
|
|
@@ -47,7 +55,7 @@ class Qwen2VLChatModel(PytorchMultiModalModel):
|
|
|
47
55
|
def match_json(
|
|
48
56
|
cls, model_family: "LLMFamilyV2", model_spec: "LLMSpecV1", quantization: str
|
|
49
57
|
) -> bool:
|
|
50
|
-
if model_spec.model_format not in ["pytorch", "gptq", "awq", "bnb"]:
|
|
58
|
+
if model_spec.model_format not in ["pytorch", "gptq", "awq", "bnb", "fp8"]:
|
|
51
59
|
return False
|
|
52
60
|
llm_family = model_family.model_family or model_family.model_name
|
|
53
61
|
if "qwen2-vl-instruct".lower() in llm_family.lower():
|
|
@@ -56,6 +64,8 @@ class Qwen2VLChatModel(PytorchMultiModalModel):
|
|
|
56
64
|
return True
|
|
57
65
|
if "qvq-72b-preview".lower() in llm_family.lower():
|
|
58
66
|
return True
|
|
67
|
+
if "qwen3-vl" in llm_family.lower():
|
|
68
|
+
return True
|
|
59
69
|
return False
|
|
60
70
|
|
|
61
71
|
def decide_device(self):
|
|
@@ -85,13 +95,19 @@ class Qwen2VLChatModel(PytorchMultiModalModel):
|
|
|
85
95
|
except ImportError:
|
|
86
96
|
Qwen2_5_VLForConditionalGeneration = None
|
|
87
97
|
|
|
98
|
+
try:
|
|
99
|
+
from transformers import AutoModelForImageTextToText
|
|
100
|
+
except ImportError:
|
|
101
|
+
AutoModelForImageTextToText = None
|
|
102
|
+
|
|
88
103
|
kwargs = self.apply_bnb_quantization()
|
|
89
104
|
llm_family = self.model_family.model_family or self.model_family.model_name
|
|
90
|
-
|
|
91
|
-
Qwen2_5_VLForConditionalGeneration
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
105
|
+
if "qwen2.5" in llm_family:
|
|
106
|
+
model_cls = Qwen2_5_VLForConditionalGeneration
|
|
107
|
+
elif "qwen3" in llm_family:
|
|
108
|
+
model_cls = AutoModelForImageTextToText
|
|
109
|
+
else:
|
|
110
|
+
model_cls = Qwen2VLForConditionalGeneration
|
|
95
111
|
if model_cls is None:
|
|
96
112
|
raise ImportError("`transformers` version is too old, please upgrade it")
|
|
97
113
|
device = "auto" if self._device == "cuda" else self._device
|
|
@@ -118,6 +134,16 @@ class Qwen2VLChatModel(PytorchMultiModalModel):
|
|
|
118
134
|
torch_dtype="float16",
|
|
119
135
|
**kwargs,
|
|
120
136
|
).eval()
|
|
137
|
+
elif device == "mps":
|
|
138
|
+
# MacOS special, see https://github.com/QwenLM/Qwen2.5-VL/issues/761
|
|
139
|
+
self._model = model_cls.from_pretrained(
|
|
140
|
+
self.model_path,
|
|
141
|
+
torch_dtype="bfloat16",
|
|
142
|
+
device_map=device,
|
|
143
|
+
attn_implementation="eager",
|
|
144
|
+
low_cpu_mem_usage=True,
|
|
145
|
+
trust_remote_code=True,
|
|
146
|
+
).eval()
|
|
121
147
|
else:
|
|
122
148
|
self._model = model_cls.from_pretrained(
|
|
123
149
|
self.model_path,
|
xinference/model/llm/utils.py
CHANGED
|
@@ -51,6 +51,7 @@ from ...types import (
|
|
|
51
51
|
)
|
|
52
52
|
from .core import chat_context_var
|
|
53
53
|
from .reasoning_parser import ReasoningParser
|
|
54
|
+
from .tool_parsers.glm4_tool_parser import Glm4ToolParser
|
|
54
55
|
|
|
55
56
|
logger = logging.getLogger(__name__)
|
|
56
57
|
|
|
@@ -70,6 +71,10 @@ QWEN_TOOL_CALL_FAMILY = [
|
|
|
70
71
|
"Qwen3-Thinking",
|
|
71
72
|
"Qwen3-Instruct",
|
|
72
73
|
"Qwen3-Coder",
|
|
74
|
+
"Qwen3-VL-Instruct",
|
|
75
|
+
"Qwen3-VL-Thinking",
|
|
76
|
+
"Qwen3-Next-Instruct",
|
|
77
|
+
"Qwen3-Next-Thinking",
|
|
73
78
|
]
|
|
74
79
|
|
|
75
80
|
GLM4_TOOL_CALL_FAMILY = [
|
|
@@ -95,6 +100,13 @@ QWEN_TOOL_CALL_SYMBOLS = ["<tool_call>", "</tool_call>"]
|
|
|
95
100
|
|
|
96
101
|
|
|
97
102
|
class ChatModelMixin:
|
|
103
|
+
|
|
104
|
+
def __init__(self):
|
|
105
|
+
self.model_family = None
|
|
106
|
+
self.model_uid = None
|
|
107
|
+
self.reasoning_parser = None
|
|
108
|
+
self.tool_parser = None
|
|
109
|
+
|
|
98
110
|
@staticmethod
|
|
99
111
|
@functools.lru_cache
|
|
100
112
|
def _compile_jinja_template(chat_template):
|
|
@@ -339,9 +351,7 @@ class ChatModelMixin:
|
|
|
339
351
|
assert choices is not None
|
|
340
352
|
usage = (
|
|
341
353
|
chunk["usage"]
|
|
342
|
-
if choices[0]["finish_reason"] is not None
|
|
343
|
-
and reasoning_parser
|
|
344
|
-
and reasoning_parser.check_content_parser()
|
|
354
|
+
if choices and choices[0]["finish_reason"] is not None or not choices
|
|
345
355
|
else None
|
|
346
356
|
)
|
|
347
357
|
chat_chunk = {
|
|
@@ -590,16 +600,41 @@ class ChatModelMixin:
|
|
|
590
600
|
pos2 = content.find(QWEN_TOOL_CALL_SYMBOLS[1])
|
|
591
601
|
if pos2 != -1:
|
|
592
602
|
content = content[:pos2]
|
|
603
|
+
|
|
604
|
+
# Skip empty content after extraction
|
|
605
|
+
if not content.strip():
|
|
606
|
+
continue
|
|
607
|
+
|
|
593
608
|
try:
|
|
594
609
|
res = json.loads(content, strict=False)
|
|
595
|
-
|
|
596
|
-
|
|
610
|
+
if isinstance(res, dict):
|
|
611
|
+
# Check if required fields exist
|
|
612
|
+
if "name" in res and "arguments" in res:
|
|
613
|
+
results.append((None, res["name"], res["arguments"]))
|
|
614
|
+
else:
|
|
615
|
+
logger.warning(
|
|
616
|
+
"Missing required fields in qwen tool call: %s", content
|
|
617
|
+
)
|
|
618
|
+
results.append((content, None, None))
|
|
619
|
+
else:
|
|
620
|
+
logger.warning(
|
|
621
|
+
"Qwen tool call result is not a dict: %s", content
|
|
622
|
+
)
|
|
623
|
+
results.append((content, None, None))
|
|
624
|
+
except json.JSONDecodeError as e:
|
|
597
625
|
logger.error(
|
|
598
626
|
"Can't parse single qwen tool call output: %s. Error: %s",
|
|
599
627
|
content,
|
|
600
628
|
e,
|
|
601
629
|
)
|
|
602
630
|
results.append((content, None, None))
|
|
631
|
+
except Exception as e:
|
|
632
|
+
logger.error(
|
|
633
|
+
"Unexpected error parsing qwen tool call: %s. Error: %s",
|
|
634
|
+
content,
|
|
635
|
+
e,
|
|
636
|
+
)
|
|
637
|
+
results.append((content, None, None))
|
|
603
638
|
return results
|
|
604
639
|
|
|
605
640
|
@classmethod
|
|
@@ -757,47 +792,64 @@ class ChatModelMixin:
|
|
|
757
792
|
logger.debug(f"Tool call content: {result}")
|
|
758
793
|
return result
|
|
759
794
|
|
|
760
|
-
@classmethod
|
|
761
795
|
def _post_process_completion_chunk(
|
|
762
|
-
|
|
796
|
+
self,
|
|
763
797
|
model_family,
|
|
764
798
|
model_uid,
|
|
765
799
|
c,
|
|
766
800
|
chunk_id=None,
|
|
767
|
-
|
|
768
|
-
tool_call_text: Optional[str] = None,
|
|
801
|
+
previous_texts: List[str] = [""],
|
|
769
802
|
):
|
|
803
|
+
if not c.get("choices"):
|
|
804
|
+
return c
|
|
770
805
|
_id = chunk_id if chunk_id is not None else str(uuid.uuid4())
|
|
771
|
-
tool_result =
|
|
806
|
+
tool_result = None
|
|
807
|
+
finish_reason = None
|
|
808
|
+
if isinstance(self.tool_parser, Glm4ToolParser):
|
|
809
|
+
tool_result = self.tool_parser.extract_tool_calls_streaming(
|
|
810
|
+
[],
|
|
811
|
+
c,
|
|
812
|
+
c,
|
|
813
|
+
)
|
|
814
|
+
else:
|
|
815
|
+
finish_reason = c["choices"][0]["finish_reason"]
|
|
816
|
+
delta_text = c["choices"][0]["delta"]["content"]
|
|
817
|
+
current_text = (
|
|
818
|
+
previous_texts[-1] + delta_text if previous_texts else delta_text
|
|
819
|
+
)
|
|
820
|
+
tool_result = self.tool_parser.extract_tool_calls_streaming(
|
|
821
|
+
previous_texts,
|
|
822
|
+
current_text,
|
|
823
|
+
delta_text,
|
|
824
|
+
)
|
|
825
|
+
previous_texts[-1] = current_text
|
|
826
|
+
if tool_result is None and not finish_reason:
|
|
827
|
+
return None
|
|
772
828
|
tool_calls = []
|
|
773
829
|
failed_contents = []
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
finish_reason = "tool_calls" if tool_calls else "stop"
|
|
830
|
+
content, func, args = tool_result if tool_result else ("", None, None)
|
|
831
|
+
if func:
|
|
832
|
+
tool_calls.append(
|
|
833
|
+
{
|
|
834
|
+
"index": 0,
|
|
835
|
+
"id": f"call_{_id}",
|
|
836
|
+
"type": "function",
|
|
837
|
+
"function": {
|
|
838
|
+
"name": func,
|
|
839
|
+
"arguments": json.dumps(args, ensure_ascii=False),
|
|
840
|
+
},
|
|
841
|
+
}
|
|
842
|
+
)
|
|
843
|
+
else:
|
|
844
|
+
failed_contents.append(content)
|
|
790
845
|
|
|
791
|
-
|
|
846
|
+
finish_reason = "tool_calls" if tool_calls else finish_reason
|
|
792
847
|
|
|
793
|
-
|
|
794
|
-
family = model_family.model_family or model_family.model_name
|
|
795
|
-
if tool_calls and family in QWEN_TOOL_CALL_FAMILY and content is None:
|
|
796
|
-
content = ""
|
|
848
|
+
content = "".join(failed_contents) if failed_contents else None
|
|
797
849
|
|
|
798
850
|
d = {
|
|
799
851
|
"role": "assistant",
|
|
800
|
-
"content": content,
|
|
852
|
+
"content": content if content else "",
|
|
801
853
|
"tool_calls": tool_calls,
|
|
802
854
|
}
|
|
803
855
|
|
|
@@ -805,11 +857,7 @@ class ChatModelMixin:
|
|
|
805
857
|
usage = c.get("usage")
|
|
806
858
|
assert "prompt_tokens" in usage
|
|
807
859
|
except Exception:
|
|
808
|
-
usage =
|
|
809
|
-
"prompt_tokens": -1,
|
|
810
|
-
"completion_tokens": -1,
|
|
811
|
-
"total_tokens": -1,
|
|
812
|
-
}
|
|
860
|
+
usage = None
|
|
813
861
|
return {
|
|
814
862
|
"id": "chat" + f"cmpl-{_id}",
|
|
815
863
|
"model": model_uid,
|
|
@@ -826,29 +874,32 @@ class ChatModelMixin:
|
|
|
826
874
|
"usage": usage,
|
|
827
875
|
}
|
|
828
876
|
|
|
829
|
-
@classmethod
|
|
830
877
|
def _post_process_completion(
|
|
831
|
-
|
|
878
|
+
self,
|
|
832
879
|
model_family,
|
|
833
880
|
model_uid,
|
|
834
881
|
c,
|
|
835
|
-
reasoning_parser: Optional[ReasoningParser] = None,
|
|
836
882
|
):
|
|
837
|
-
if
|
|
838
|
-
|
|
883
|
+
if not self.tool_parser:
|
|
884
|
+
return self._get_final_chat_completion_chunk(c)
|
|
885
|
+
if self.reasoning_parser:
|
|
886
|
+
c = self.reasoning_parser.prepare_reasoning_content(c)
|
|
839
887
|
_id = str(uuid.uuid4())
|
|
840
888
|
reasoning_content = None
|
|
841
|
-
if reasoning_parser and reasoning_parser.check_content_parser():
|
|
889
|
+
if self.reasoning_parser and self.reasoning_parser.check_content_parser():
|
|
842
890
|
text = c["choices"][0]["text"]
|
|
843
|
-
reasoning_content, content =
|
|
844
|
-
text
|
|
891
|
+
reasoning_content, content = (
|
|
892
|
+
self.reasoning_parser.extract_reasoning_content(text)
|
|
845
893
|
)
|
|
846
894
|
c["choices"][0]["text"] = content
|
|
847
895
|
|
|
848
|
-
tool_result = cls._eval_tool_arguments(model_family, c)
|
|
849
|
-
|
|
850
896
|
tool_calls = []
|
|
851
897
|
failed_contents = []
|
|
898
|
+
if isinstance(self.tool_parser, Glm4ToolParser):
|
|
899
|
+
tool_result = self.tool_parser.extract_tool_calls(c)
|
|
900
|
+
else:
|
|
901
|
+
text = c["choices"][0]["text"]
|
|
902
|
+
tool_result = self.tool_parser.extract_tool_calls(text)
|
|
852
903
|
for content, func, args in tool_result:
|
|
853
904
|
if func:
|
|
854
905
|
tool_calls.append(
|
|
@@ -868,14 +919,9 @@ class ChatModelMixin:
|
|
|
868
919
|
|
|
869
920
|
content = "".join(failed_contents) if failed_contents else None
|
|
870
921
|
|
|
871
|
-
# fix: qwen tool_call content field return null
|
|
872
|
-
family = model_family.model_family or model_family.model_name
|
|
873
|
-
if tool_calls and family in QWEN_TOOL_CALL_FAMILY and content is None:
|
|
874
|
-
content = ""
|
|
875
|
-
|
|
876
922
|
m = {
|
|
877
923
|
"role": "assistant",
|
|
878
|
-
"content": content,
|
|
924
|
+
"content": content if content else "",
|
|
879
925
|
"tool_calls": tool_calls,
|
|
880
926
|
}
|
|
881
927
|
# add only reasoning_content is None
|
|
@@ -943,6 +989,45 @@ class ChatModelMixin:
|
|
|
943
989
|
|
|
944
990
|
return transformed_messages
|
|
945
991
|
|
|
992
|
+
async def _async_to_tool_completion_chunks(
|
|
993
|
+
self,
|
|
994
|
+
chunks: AsyncGenerator[CompletionChunk, None],
|
|
995
|
+
ctx: Optional[Dict[str, Any]] = None,
|
|
996
|
+
) -> AsyncGenerator[ChatCompletionChunk, None]:
|
|
997
|
+
def set_context():
|
|
998
|
+
if ctx:
|
|
999
|
+
chat_context_var.set(ctx)
|
|
1000
|
+
|
|
1001
|
+
i = 0
|
|
1002
|
+
previous_texts = [""]
|
|
1003
|
+
previous_tools_texts = [""]
|
|
1004
|
+
full_text = ""
|
|
1005
|
+
if self.reasoning_parser:
|
|
1006
|
+
set_context()
|
|
1007
|
+
chunks = self.reasoning_parser.prepare_reasoning_content_streaming(chunks)
|
|
1008
|
+
async for completion_chunk in chunks:
|
|
1009
|
+
set_context()
|
|
1010
|
+
chat_chunk = self._to_chat_completion_chunk(
|
|
1011
|
+
completion_chunk, self.reasoning_parser, previous_texts
|
|
1012
|
+
)
|
|
1013
|
+
if (
|
|
1014
|
+
chat_chunk["choices"]
|
|
1015
|
+
and "reasoning_content" in chat_chunk["choices"][0]["delta"]
|
|
1016
|
+
and chat_chunk["choices"][0]["delta"]["reasoning_content"] is not None
|
|
1017
|
+
):
|
|
1018
|
+
yield chat_chunk
|
|
1019
|
+
continue
|
|
1020
|
+
processed_chunk = self._post_process_completion_chunk(
|
|
1021
|
+
self.model_family,
|
|
1022
|
+
self.model_uid,
|
|
1023
|
+
chat_chunk,
|
|
1024
|
+
previous_texts=previous_tools_texts,
|
|
1025
|
+
)
|
|
1026
|
+
if processed_chunk:
|
|
1027
|
+
yield processed_chunk
|
|
1028
|
+
i += 1
|
|
1029
|
+
logger.debug("Chat finished, output: %s", full_text)
|
|
1030
|
+
|
|
946
1031
|
|
|
947
1032
|
def get_model_version(
|
|
948
1033
|
model_name: str,
|
|
@@ -264,6 +264,9 @@ if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.8.4"):
|
|
|
264
264
|
if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.8.5"):
|
|
265
265
|
VLLM_SUPPORTED_CHAT_MODELS.append("qwen3")
|
|
266
266
|
|
|
267
|
+
if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.9.0"):
|
|
268
|
+
VLLM_SUPPORTED_CHAT_MODELS.append("Baichuan-M2")
|
|
269
|
+
|
|
267
270
|
if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.9.1"):
|
|
268
271
|
VLLM_SUPPORTED_CHAT_MODELS.append("minicpm4")
|
|
269
272
|
|
|
@@ -282,10 +285,15 @@ if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.10.0"):
|
|
|
282
285
|
|
|
283
286
|
if VLLM_INSTALLED and VLLM_VERSION > version.parse("0.10.0"):
|
|
284
287
|
VLLM_SUPPORTED_CHAT_MODELS.append("gpt-oss")
|
|
285
|
-
VLLM_SUPPORTED_CHAT_MODELS.append("seed-oss")
|
|
286
288
|
|
|
287
|
-
if VLLM_INSTALLED and VLLM_VERSION
|
|
289
|
+
if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.10.2"):
|
|
288
290
|
VLLM_SUPPORTED_CHAT_MODELS.append("seed-oss")
|
|
291
|
+
VLLM_SUPPORTED_CHAT_MODELS.append("Qwen3-Next-Instruct")
|
|
292
|
+
VLLM_SUPPORTED_CHAT_MODELS.append("Qwen3-Next-Thinking")
|
|
293
|
+
|
|
294
|
+
if VLLM_INSTALLED and VLLM_VERSION > version.parse("0.10.2"):
|
|
295
|
+
VLLM_SUPPORTED_VISION_MODEL_LIST.append("Qwen3-VL-Instruct")
|
|
296
|
+
VLLM_SUPPORTED_VISION_MODEL_LIST.append("Qwen3-VL-Instruct")
|
|
289
297
|
|
|
290
298
|
|
|
291
299
|
class VLLMModel(LLM):
|
|
@@ -393,6 +401,7 @@ class VLLMModel(LLM):
|
|
|
393
401
|
self.prepare_parse_reasoning_content(
|
|
394
402
|
reasoning_content, enable_thinking=enable_thinking
|
|
395
403
|
)
|
|
404
|
+
self.prepare_parse_tool_calls()
|
|
396
405
|
|
|
397
406
|
if (
|
|
398
407
|
isinstance(self.model_spec, LlamaCppLLMSpecV2)
|
|
@@ -773,7 +782,6 @@ class VLLMModel(LLM):
|
|
|
773
782
|
sanitized = VLLMGenerateConfig()
|
|
774
783
|
|
|
775
784
|
response_format = generate_config.pop("response_format", None)
|
|
776
|
-
guided_decoding_backend = generate_config.get("guided_decoding_backend", None)
|
|
777
785
|
guided_json_object = None
|
|
778
786
|
guided_json = None
|
|
779
787
|
|
|
@@ -784,8 +792,6 @@ class VLLMModel(LLM):
|
|
|
784
792
|
json_schema = response_format.get("json_schema")
|
|
785
793
|
assert json_schema is not None
|
|
786
794
|
guided_json = json_schema.get("json_schema")
|
|
787
|
-
if guided_decoding_backend is None:
|
|
788
|
-
guided_decoding_backend = "outlines"
|
|
789
795
|
|
|
790
796
|
sanitized.setdefault("lora_name", generate_config.get("lora_name", None))
|
|
791
797
|
sanitized.setdefault("n", generate_config.get("n", 1))
|
|
@@ -833,10 +839,6 @@ class VLLMModel(LLM):
|
|
|
833
839
|
"guided_json_object",
|
|
834
840
|
generate_config.get("guided_json_object", guided_json_object),
|
|
835
841
|
)
|
|
836
|
-
sanitized.setdefault(
|
|
837
|
-
"guided_decoding_backend",
|
|
838
|
-
generate_config.get("guided_decoding_backend", guided_decoding_backend),
|
|
839
|
-
)
|
|
840
842
|
|
|
841
843
|
return sanitized
|
|
842
844
|
|
|
@@ -940,9 +942,21 @@ class VLLMModel(LLM):
|
|
|
940
942
|
|
|
941
943
|
async def _get_tokenizer(self, lora_request: Any) -> Any:
|
|
942
944
|
try:
|
|
943
|
-
|
|
945
|
+
# vLLM 0.11.0+ get_tokenizer doesn't accept lora_request parameter
|
|
946
|
+
if (
|
|
947
|
+
VLLM_VERSION >= version.parse("0.11.0")
|
|
948
|
+
or VLLM_VERSION.base_version >= "0.11.0"
|
|
949
|
+
):
|
|
950
|
+
return await self._engine.get_tokenizer() # type: ignore
|
|
951
|
+
else:
|
|
952
|
+
return await self._engine.get_tokenizer(lora_request) # type: ignore
|
|
944
953
|
except AttributeError:
|
|
945
|
-
|
|
954
|
+
# Fallback to get_tokenizer_async for older versions
|
|
955
|
+
try:
|
|
956
|
+
return await self._engine.get_tokenizer_async(lora_request) # type: ignore
|
|
957
|
+
except (AttributeError, TypeError):
|
|
958
|
+
# If all else fails, try without parameters
|
|
959
|
+
return await self._engine.get_tokenizer() # type: ignore
|
|
946
960
|
|
|
947
961
|
def _tokenize(self, tokenizer: Any, prompt: str, config: dict) -> List[int]:
|
|
948
962
|
truncate_prompt_tokens = config.get("truncate_prompt_tokens")
|
|
@@ -1023,23 +1037,65 @@ class VLLMModel(LLM):
|
|
|
1023
1037
|
# guided decoding only available for vllm >= 0.6.3
|
|
1024
1038
|
from vllm.sampling_params import GuidedDecodingParams
|
|
1025
1039
|
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
|
|
1030
|
-
|
|
1031
|
-
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
|
|
1040
|
+
# Extract guided decoding parameters
|
|
1041
|
+
guided_params: dict[str, Any] = {}
|
|
1042
|
+
guided_json = sanitized_generate_config.pop("guided_json", None)
|
|
1043
|
+
if guided_json:
|
|
1044
|
+
guided_params["json"] = guided_json
|
|
1045
|
+
|
|
1046
|
+
guided_regex = sanitized_generate_config.pop("guided_regex", None)
|
|
1047
|
+
if guided_regex:
|
|
1048
|
+
guided_params["regex"] = guided_regex
|
|
1049
|
+
|
|
1050
|
+
guided_choice = sanitized_generate_config.pop("guided_choice", None)
|
|
1051
|
+
if guided_choice:
|
|
1052
|
+
guided_params["choice"] = guided_choice
|
|
1053
|
+
|
|
1054
|
+
guided_grammar = sanitized_generate_config.pop("guided_grammar", None)
|
|
1055
|
+
if guided_grammar:
|
|
1056
|
+
guided_params["grammar"] = guided_grammar
|
|
1057
|
+
|
|
1058
|
+
guided_json_object = sanitized_generate_config.pop(
|
|
1059
|
+
"guided_json_object", None
|
|
1036
1060
|
)
|
|
1061
|
+
if guided_json_object:
|
|
1062
|
+
guided_params["json_object"] = guided_json_object
|
|
1037
1063
|
|
|
1038
|
-
|
|
1039
|
-
|
|
1064
|
+
guided_backend = sanitized_generate_config.pop(
|
|
1065
|
+
"guided_decoding_backend", None
|
|
1040
1066
|
)
|
|
1067
|
+
if guided_backend:
|
|
1068
|
+
guided_params["_backend"] = guided_backend
|
|
1069
|
+
|
|
1070
|
+
guided_whitespace_pattern = sanitized_generate_config.pop(
|
|
1071
|
+
"guided_whitespace_pattern", None
|
|
1072
|
+
)
|
|
1073
|
+
if guided_whitespace_pattern:
|
|
1074
|
+
guided_params["whitespace_pattern"] = guided_whitespace_pattern
|
|
1075
|
+
|
|
1076
|
+
# Create GuidedDecodingParams if we have any guided parameters
|
|
1077
|
+
guided_options = None
|
|
1078
|
+
if guided_params:
|
|
1079
|
+
try:
|
|
1080
|
+
guided_options = GuidedDecodingParams(**guided_params)
|
|
1081
|
+
except Exception as e:
|
|
1082
|
+
logger.warning(f"Failed to create GuidedDecodingParams: {e}")
|
|
1083
|
+
guided_options = None
|
|
1084
|
+
|
|
1085
|
+
# Use structured_outputs for vLLM >= 0.11.0, guided_decoding for older versions
|
|
1086
|
+
if (
|
|
1087
|
+
VLLM_VERSION >= version.parse("0.11.0")
|
|
1088
|
+
or VLLM_VERSION.base_version >= "0.11.0"
|
|
1089
|
+
):
|
|
1090
|
+
sampling_params = SamplingParams(
|
|
1091
|
+
structured_outputs=guided_options, **sanitized_generate_config
|
|
1092
|
+
)
|
|
1093
|
+
else:
|
|
1094
|
+
sampling_params = SamplingParams(
|
|
1095
|
+
guided_decoding=guided_options, **sanitized_generate_config
|
|
1096
|
+
)
|
|
1041
1097
|
else:
|
|
1042
|
-
# ignore generate configs
|
|
1098
|
+
# ignore generate configs for older versions
|
|
1043
1099
|
sanitized_generate_config.pop("guided_json", None)
|
|
1044
1100
|
sanitized_generate_config.pop("guided_regex", None)
|
|
1045
1101
|
sanitized_generate_config.pop("guided_choice", None)
|
|
@@ -1242,6 +1298,7 @@ class VLLMChatModel(VLLMModel, ChatModelMixin):
|
|
|
1242
1298
|
) -> Dict:
|
|
1243
1299
|
if not generate_config:
|
|
1244
1300
|
generate_config = {}
|
|
1301
|
+
|
|
1245
1302
|
if "reasoning" in getattr(self.model_family, "model_ability", []):
|
|
1246
1303
|
generate_config.pop("stop", None)
|
|
1247
1304
|
generate_config.pop("stop_token_ids", None)
|
|
@@ -1255,6 +1312,19 @@ class VLLMChatModel(VLLMModel, ChatModelMixin):
|
|
|
1255
1312
|
generate_config["stop_token_ids"] = (
|
|
1256
1313
|
self.model_family.stop_token_ids.copy()
|
|
1257
1314
|
)
|
|
1315
|
+
|
|
1316
|
+
# if response_format exists,generate guided_json
|
|
1317
|
+
if "response_format" in generate_config:
|
|
1318
|
+
resp_format = generate_config["response_format"]
|
|
1319
|
+
if (
|
|
1320
|
+
isinstance(resp_format, dict)
|
|
1321
|
+
and resp_format.get("type") == "json_schema"
|
|
1322
|
+
and "json_schema" in resp_format
|
|
1323
|
+
):
|
|
1324
|
+
schema = resp_format["json_schema"].get("schema_")
|
|
1325
|
+
if schema:
|
|
1326
|
+
generate_config["guided_json"] = schema
|
|
1327
|
+
|
|
1258
1328
|
return generate_config
|
|
1259
1329
|
|
|
1260
1330
|
@staticmethod
|
|
@@ -1291,59 +1361,6 @@ class VLLMChatModel(VLLMModel, ChatModelMixin):
|
|
|
1291
1361
|
|
|
1292
1362
|
return processed_messages
|
|
1293
1363
|
|
|
1294
|
-
async def _async_to_tool_completion_chunks(
|
|
1295
|
-
self,
|
|
1296
|
-
chunks: AsyncGenerator[CompletionChunk, None],
|
|
1297
|
-
ctx: Optional[Dict[str, Any]] = {},
|
|
1298
|
-
) -> AsyncGenerator[ChatCompletionChunk, None]:
|
|
1299
|
-
def set_context():
|
|
1300
|
-
if ctx:
|
|
1301
|
-
chat_context_var.set(ctx)
|
|
1302
|
-
|
|
1303
|
-
i = 0
|
|
1304
|
-
previous_texts = [""]
|
|
1305
|
-
tool_call = False
|
|
1306
|
-
tool_call_texts = [""]
|
|
1307
|
-
full_text = ""
|
|
1308
|
-
if self.reasoning_parser:
|
|
1309
|
-
set_context()
|
|
1310
|
-
chunks = self.reasoning_parser.prepare_reasoning_content_streaming(chunks)
|
|
1311
|
-
async for chunk in chunks:
|
|
1312
|
-
set_context()
|
|
1313
|
-
if i == 0:
|
|
1314
|
-
for first_chunk in self._get_first_chat_completion_chunk(
|
|
1315
|
-
chunk, self.reasoning_parser
|
|
1316
|
-
):
|
|
1317
|
-
yield first_chunk
|
|
1318
|
-
# usage
|
|
1319
|
-
choices = chunk.get("choices")
|
|
1320
|
-
if not choices:
|
|
1321
|
-
yield self._get_final_chat_completion_chunk(chunk)
|
|
1322
|
-
else:
|
|
1323
|
-
full_text += chunk["choices"][0]["text"]
|
|
1324
|
-
if self.is_tool_call_chunk_start(chunk):
|
|
1325
|
-
tool_call = True
|
|
1326
|
-
if tool_call:
|
|
1327
|
-
tool_call_text = tool_call_texts[-1]
|
|
1328
|
-
tool_call_text += chunk["choices"][0]["text"]
|
|
1329
|
-
tool_call_texts.append(tool_call_text)
|
|
1330
|
-
if self.is_tool_call_chunk_end(chunk):
|
|
1331
|
-
yield self._post_process_completion_chunk(
|
|
1332
|
-
self.model_family,
|
|
1333
|
-
self.model_uid,
|
|
1334
|
-
chunk,
|
|
1335
|
-
reasoning_parser=self.reasoning_parser,
|
|
1336
|
-
tool_call_text=tool_call_text,
|
|
1337
|
-
)
|
|
1338
|
-
tool_call = False
|
|
1339
|
-
tool_call_texts = [""]
|
|
1340
|
-
else:
|
|
1341
|
-
yield self._to_chat_completion_chunk(
|
|
1342
|
-
chunk, self.reasoning_parser, previous_texts
|
|
1343
|
-
)
|
|
1344
|
-
i += 1
|
|
1345
|
-
logger.debug("Chat finished, output: %s", full_text)
|
|
1346
|
-
|
|
1347
1364
|
@vllm_check
|
|
1348
1365
|
async def async_chat(
|
|
1349
1366
|
self,
|
|
@@ -1408,7 +1425,7 @@ class VLLMChatModel(VLLMModel, ChatModelMixin):
|
|
|
1408
1425
|
assert not isinstance(c, AsyncGenerator)
|
|
1409
1426
|
if tools:
|
|
1410
1427
|
return self._post_process_completion(
|
|
1411
|
-
self.model_family, self.model_uid, c
|
|
1428
|
+
self.model_family, self.model_uid, c
|
|
1412
1429
|
)
|
|
1413
1430
|
return self._to_chat_completion(c, self.reasoning_parser)
|
|
1414
1431
|
|