xinference 1.9.1__py3-none-any.whl → 1.10.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +400 -3
- xinference/client/restful/async_restful_client.py +20 -3
- xinference/client/restful/restful_client.py +20 -3
- xinference/constants.py +2 -0
- xinference/core/supervisor.py +111 -49
- xinference/core/worker.py +10 -0
- xinference/deploy/cmdline.py +15 -0
- xinference/model/audio/core.py +26 -6
- xinference/model/audio/indextts2.py +166 -0
- xinference/model/audio/kokoro.py +1 -1
- xinference/model/audio/kokoro_zh.py +124 -0
- xinference/model/audio/model_spec.json +58 -1
- xinference/model/embedding/sentence_transformers/core.py +4 -4
- xinference/model/embedding/vllm/core.py +7 -1
- xinference/model/image/model_spec.json +71 -3
- xinference/model/image/stable_diffusion/core.py +13 -4
- xinference/model/llm/__init__.py +4 -0
- xinference/model/llm/core.py +10 -0
- xinference/model/llm/llama_cpp/core.py +1 -0
- xinference/model/llm/llm_family.json +503 -21
- xinference/model/llm/llm_family.py +1 -0
- xinference/model/llm/mlx/core.py +52 -33
- xinference/model/llm/sglang/core.py +32 -55
- xinference/model/llm/tool_parsers/__init__.py +58 -0
- xinference/model/llm/tool_parsers/abstract_tool_parser.py +33 -0
- xinference/model/llm/tool_parsers/deepseek_r1_tool_parser.py +190 -0
- xinference/model/llm/tool_parsers/deepseek_v3_tool_parser.py +145 -0
- xinference/model/llm/tool_parsers/glm4_tool_parser.py +123 -0
- xinference/model/llm/tool_parsers/llama3_tool_parser.py +77 -0
- xinference/model/llm/tool_parsers/qwen_tool_parser.py +320 -0
- xinference/model/llm/transformers/core.py +1 -1
- xinference/model/llm/transformers/multimodal/qwen2_vl.py +34 -8
- xinference/model/llm/utils.py +138 -53
- xinference/model/llm/vllm/core.py +95 -78
- xinference/thirdparty/audiotools/__init__.py +10 -0
- xinference/thirdparty/audiotools/core/__init__.py +4 -0
- xinference/thirdparty/audiotools/core/audio_signal.py +1682 -0
- xinference/thirdparty/audiotools/core/display.py +194 -0
- xinference/thirdparty/audiotools/core/dsp.py +390 -0
- xinference/thirdparty/audiotools/core/effects.py +647 -0
- xinference/thirdparty/audiotools/core/ffmpeg.py +211 -0
- xinference/thirdparty/audiotools/core/loudness.py +320 -0
- xinference/thirdparty/audiotools/core/playback.py +252 -0
- xinference/thirdparty/audiotools/core/templates/__init__.py +0 -0
- xinference/thirdparty/audiotools/core/templates/headers.html +322 -0
- xinference/thirdparty/audiotools/core/templates/pandoc.css +407 -0
- xinference/thirdparty/audiotools/core/templates/widget.html +52 -0
- xinference/thirdparty/audiotools/core/util.py +671 -0
- xinference/thirdparty/audiotools/core/whisper.py +97 -0
- xinference/thirdparty/audiotools/data/__init__.py +3 -0
- xinference/thirdparty/audiotools/data/datasets.py +517 -0
- xinference/thirdparty/audiotools/data/preprocess.py +81 -0
- xinference/thirdparty/audiotools/data/transforms.py +1592 -0
- xinference/thirdparty/audiotools/metrics/__init__.py +6 -0
- xinference/thirdparty/audiotools/metrics/distance.py +131 -0
- xinference/thirdparty/audiotools/metrics/quality.py +159 -0
- xinference/thirdparty/audiotools/metrics/spectral.py +247 -0
- xinference/thirdparty/audiotools/ml/__init__.py +5 -0
- xinference/thirdparty/audiotools/ml/accelerator.py +184 -0
- xinference/thirdparty/audiotools/ml/decorators.py +440 -0
- xinference/thirdparty/audiotools/ml/experiment.py +90 -0
- xinference/thirdparty/audiotools/ml/layers/__init__.py +2 -0
- xinference/thirdparty/audiotools/ml/layers/base.py +328 -0
- xinference/thirdparty/audiotools/ml/layers/spectral_gate.py +127 -0
- xinference/thirdparty/audiotools/post.py +140 -0
- xinference/thirdparty/audiotools/preference.py +600 -0
- xinference/thirdparty/indextts/BigVGAN/ECAPA_TDNN.py +656 -0
- xinference/thirdparty/indextts/BigVGAN/__init__.py +0 -0
- xinference/thirdparty/indextts/BigVGAN/activations.py +122 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/__init__.py +0 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/.gitignore +1 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/__init__.py +0 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/activation1d.py +76 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/anti_alias_activation.cpp +23 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/anti_alias_activation_cuda.cu +256 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/compat.h +29 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/load.py +121 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/type_shim.h +92 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/torch/__init__.py +6 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/torch/act.py +31 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/torch/filter.py +102 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/torch/resample.py +58 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_torch/__init__.py +6 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_torch/act.py +29 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_torch/filter.py +96 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_torch/resample.py +49 -0
- xinference/thirdparty/indextts/BigVGAN/bigvgan.py +534 -0
- xinference/thirdparty/indextts/BigVGAN/models.py +451 -0
- xinference/thirdparty/indextts/BigVGAN/nnet/CNN.py +546 -0
- xinference/thirdparty/indextts/BigVGAN/nnet/__init__.py +0 -0
- xinference/thirdparty/indextts/BigVGAN/nnet/linear.py +89 -0
- xinference/thirdparty/indextts/BigVGAN/nnet/normalization.py +670 -0
- xinference/thirdparty/indextts/BigVGAN/utils.py +101 -0
- xinference/thirdparty/indextts/__init__.py +0 -0
- xinference/thirdparty/indextts/cli.py +65 -0
- xinference/thirdparty/indextts/gpt/__init__.py +0 -0
- xinference/thirdparty/indextts/gpt/conformer/__init__.py +0 -0
- xinference/thirdparty/indextts/gpt/conformer/attention.py +312 -0
- xinference/thirdparty/indextts/gpt/conformer/embedding.py +163 -0
- xinference/thirdparty/indextts/gpt/conformer/subsampling.py +348 -0
- xinference/thirdparty/indextts/gpt/conformer_encoder.py +520 -0
- xinference/thirdparty/indextts/gpt/model.py +713 -0
- xinference/thirdparty/indextts/gpt/model_v2.py +747 -0
- xinference/thirdparty/indextts/gpt/perceiver.py +317 -0
- xinference/thirdparty/indextts/gpt/transformers_beam_search.py +1013 -0
- xinference/thirdparty/indextts/gpt/transformers_generation_utils.py +4747 -0
- xinference/thirdparty/indextts/gpt/transformers_gpt2.py +1878 -0
- xinference/thirdparty/indextts/gpt/transformers_modeling_utils.py +5525 -0
- xinference/thirdparty/indextts/infer.py +690 -0
- xinference/thirdparty/indextts/infer_v2.py +739 -0
- xinference/thirdparty/indextts/s2mel/dac/__init__.py +16 -0
- xinference/thirdparty/indextts/s2mel/dac/__main__.py +36 -0
- xinference/thirdparty/indextts/s2mel/dac/model/__init__.py +4 -0
- xinference/thirdparty/indextts/s2mel/dac/model/base.py +294 -0
- xinference/thirdparty/indextts/s2mel/dac/model/dac.py +400 -0
- xinference/thirdparty/indextts/s2mel/dac/model/discriminator.py +228 -0
- xinference/thirdparty/indextts/s2mel/dac/model/encodec.py +320 -0
- xinference/thirdparty/indextts/s2mel/dac/nn/__init__.py +3 -0
- xinference/thirdparty/indextts/s2mel/dac/nn/layers.py +33 -0
- xinference/thirdparty/indextts/s2mel/dac/nn/loss.py +368 -0
- xinference/thirdparty/indextts/s2mel/dac/nn/quantize.py +339 -0
- xinference/thirdparty/indextts/s2mel/dac/utils/__init__.py +123 -0
- xinference/thirdparty/indextts/s2mel/dac/utils/decode.py +95 -0
- xinference/thirdparty/indextts/s2mel/dac/utils/encode.py +94 -0
- xinference/thirdparty/indextts/s2mel/hf_utils.py +12 -0
- xinference/thirdparty/indextts/s2mel/modules/alias_free_torch/__init__.py +5 -0
- xinference/thirdparty/indextts/s2mel/modules/alias_free_torch/act.py +29 -0
- xinference/thirdparty/indextts/s2mel/modules/alias_free_torch/filter.py +96 -0
- xinference/thirdparty/indextts/s2mel/modules/alias_free_torch/resample.py +57 -0
- xinference/thirdparty/indextts/s2mel/modules/audio.py +82 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/activations.py +120 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/__init__.py +0 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/activation1d.py +77 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/anti_alias_activation.cpp +23 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/anti_alias_activation_cuda.cu +246 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/compat.h +29 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/load.py +86 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/type_shim.h +92 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/torch/__init__.py +6 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/torch/act.py +30 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/torch/filter.py +101 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/torch/resample.py +58 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/bigvgan.py +492 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/config.json +63 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/env.py +18 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/meldataset.py +354 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/utils.py +99 -0
- xinference/thirdparty/indextts/s2mel/modules/campplus/DTDNN.py +115 -0
- xinference/thirdparty/indextts/s2mel/modules/campplus/classifier.py +70 -0
- xinference/thirdparty/indextts/s2mel/modules/campplus/layers.py +253 -0
- xinference/thirdparty/indextts/s2mel/modules/commons.py +632 -0
- xinference/thirdparty/indextts/s2mel/modules/diffusion_transformer.py +257 -0
- xinference/thirdparty/indextts/s2mel/modules/encodec.py +292 -0
- xinference/thirdparty/indextts/s2mel/modules/flow_matching.py +171 -0
- xinference/thirdparty/indextts/s2mel/modules/gpt_fast/generate.py +436 -0
- xinference/thirdparty/indextts/s2mel/modules/gpt_fast/model.py +360 -0
- xinference/thirdparty/indextts/s2mel/modules/gpt_fast/quantize.py +622 -0
- xinference/thirdparty/indextts/s2mel/modules/hifigan/f0_predictor.py +55 -0
- xinference/thirdparty/indextts/s2mel/modules/hifigan/generator.py +454 -0
- xinference/thirdparty/indextts/s2mel/modules/layers.py +354 -0
- xinference/thirdparty/indextts/s2mel/modules/length_regulator.py +141 -0
- xinference/thirdparty/indextts/s2mel/modules/openvoice/__init__.py +0 -0
- xinference/thirdparty/indextts/s2mel/modules/openvoice/api.py +186 -0
- xinference/thirdparty/indextts/s2mel/modules/openvoice/attentions.py +465 -0
- xinference/thirdparty/indextts/s2mel/modules/openvoice/checkpoints_v2/converter/config.json +57 -0
- xinference/thirdparty/indextts/s2mel/modules/openvoice/commons.py +160 -0
- xinference/thirdparty/indextts/s2mel/modules/openvoice/mel_processing.py +183 -0
- xinference/thirdparty/indextts/s2mel/modules/openvoice/models.py +499 -0
- xinference/thirdparty/indextts/s2mel/modules/openvoice/modules.py +598 -0
- xinference/thirdparty/indextts/s2mel/modules/openvoice/openvoice_app.py +275 -0
- xinference/thirdparty/indextts/s2mel/modules/openvoice/se_extractor.py +153 -0
- xinference/thirdparty/indextts/s2mel/modules/openvoice/transforms.py +209 -0
- xinference/thirdparty/indextts/s2mel/modules/openvoice/utils.py +194 -0
- xinference/thirdparty/indextts/s2mel/modules/quantize.py +229 -0
- xinference/thirdparty/indextts/s2mel/modules/rmvpe.py +631 -0
- xinference/thirdparty/indextts/s2mel/modules/vocos/__init__.py +4 -0
- xinference/thirdparty/indextts/s2mel/modules/vocos/heads.py +164 -0
- xinference/thirdparty/indextts/s2mel/modules/vocos/helpers.py +71 -0
- xinference/thirdparty/indextts/s2mel/modules/vocos/loss.py +114 -0
- xinference/thirdparty/indextts/s2mel/modules/vocos/models.py +118 -0
- xinference/thirdparty/indextts/s2mel/modules/vocos/modules.py +213 -0
- xinference/thirdparty/indextts/s2mel/modules/vocos/pretrained.py +51 -0
- xinference/thirdparty/indextts/s2mel/modules/vocos/spectral_ops.py +192 -0
- xinference/thirdparty/indextts/s2mel/modules/wavenet.py +174 -0
- xinference/thirdparty/indextts/s2mel/optimizers.py +96 -0
- xinference/thirdparty/indextts/s2mel/wav2vecbert_extract.py +148 -0
- xinference/thirdparty/indextts/utils/__init__.py +0 -0
- xinference/thirdparty/indextts/utils/arch_util.py +120 -0
- xinference/thirdparty/indextts/utils/checkpoint.py +34 -0
- xinference/thirdparty/indextts/utils/common.py +121 -0
- xinference/thirdparty/indextts/utils/feature_extractors.py +50 -0
- xinference/thirdparty/indextts/utils/front.py +536 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/__init__.py +0 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/codec.py +427 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/__init__.py +11 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/factorized_vector_quantize.py +150 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/lookup_free_quantize.py +77 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/residual_vq.py +177 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/vector_quantize.py +401 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/vocos.py +881 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/codec_dataset.py +264 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/codec_inference.py +515 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/codec_sampler.py +126 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/codec_trainer.py +166 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/__init__.py +0 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/__init__.py +5 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/act.py +29 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/filter.py +96 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/resample.py +57 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/facodec_dataset.py +98 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/facodec_inference.py +137 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/facodec_trainer.py +776 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/JDC/__init__.py +1 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/JDC/bst.t7 +0 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/JDC/model.py +219 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/attentions.py +437 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/commons.py +331 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/gradient_reversal.py +35 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/layers.py +460 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/quantize.py +741 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/style_encoder.py +110 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/wavenet.py +224 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/optimizer.py +104 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/kmeans/repcodec_model.py +210 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/kmeans/vocos.py +850 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/melvqgan/melspec.py +108 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/README.md +216 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/__init__.py +6 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/__init__.py +5 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/act.py +29 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/filter.py +96 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/resample.py +57 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/facodec.py +1222 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/gradient_reversal.py +35 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/melspec.py +102 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/quantize/__init__.py +7 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/quantize/fvq.py +116 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/quantize/rvq.py +87 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/transformer.py +234 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/model.py +184 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/__init__.py +27 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/conv.py +346 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/lstm.py +46 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/norm.py +37 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/__init__.py +14 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/ac.py +317 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/core_vq.py +388 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/distrib.py +135 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/vq.py +125 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/seanet.py +414 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/vevo/vevo_repcodec.py +592 -0
- xinference/thirdparty/indextts/utils/maskgct/models/tts/maskgct/ckpt/wav2vec2bert_stats.pt +0 -0
- xinference/thirdparty/indextts/utils/maskgct/models/tts/maskgct/llama_nar.py +650 -0
- xinference/thirdparty/indextts/utils/maskgct/models/tts/maskgct/maskgct_s2a.py +503 -0
- xinference/thirdparty/indextts/utils/maskgct_utils.py +259 -0
- xinference/thirdparty/indextts/utils/text_utils.py +41 -0
- xinference/thirdparty/indextts/utils/typical_sampling.py +30 -0
- xinference/thirdparty/indextts/utils/utils.py +93 -0
- xinference/thirdparty/indextts/utils/webui_utils.py +42 -0
- xinference/thirdparty/indextts/utils/xtransformers.py +1247 -0
- xinference/thirdparty/indextts/vqvae/__init__.py +0 -0
- xinference/thirdparty/indextts/vqvae/xtts_dvae.py +395 -0
- xinference/types.py +105 -2
- xinference/ui/gradio/media_interface.py +66 -8
- xinference/ui/web/ui/build/asset-manifest.json +6 -6
- xinference/ui/web/ui/build/index.html +1 -1
- xinference/ui/web/ui/build/static/css/main.5ea97072.css +2 -0
- xinference/ui/web/ui/build/static/css/main.5ea97072.css.map +1 -0
- xinference/ui/web/ui/build/static/js/main.d192c4f3.js +3 -0
- xinference/ui/web/ui/build/static/js/{main.1086c759.js.LICENSE.txt → main.d192c4f3.js.LICENSE.txt} +0 -7
- xinference/ui/web/ui/build/static/js/main.d192c4f3.js.map +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/089c38df5f52348d212ed868dda5c518a42e0c2762caed4175487c0405830c35.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/2b6e3a5b6eb2c5c5f2d007e68cd46c372721cd52bf63508adcdb21ecf79241d8.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/2d887825fd07a56f872eda4420da25fba0b5b62a23bdcc6c6da1a5281887f618.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/4001f9c3e64e73a4f2158826650c174a59d5e3f89ddecddf17cbb6bb688cc4ca.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/4a7018a69e6b7f90fc313248c2aa86f2a8f1eb1db120df586047a8023549b44b.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/64b12aaa1c1d1bf53820ada8a63769067c0ccc5aab46b32348eb1917ae7f2a11.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/7275b67c78ec76ce38a686bb8a576d8c9cecf54e1573614c84859d538efb9be5.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/a68b6ee3b31eadc051fb95ce8f8ccb9c2e8b52c60f290dbab545a1917e065282.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/ae8771cc37693feb160fa8727231312a0c54ef2d1d1ca893be568cd70016ca7e.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/bb4e8722d2d41d87f1fce3661bc8937bffe9448e231fc5f0462630849e851592.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/be6aada1ee4adc2bbf65dbe56d17db32bb3b5478be05d6b527805a8ba6cfb2b9.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/de91c352653c233cf0cb6674e6e04049a44fd0e1156560de65d5c4620521391e.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/e85f7002fc325c83b9c9cd8a1619e5b3ebc701d30e811afc284b88e6ae710cb5.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/e8b603c78944bf3d213639078bfe155ff5c0dfa4048a93cbb967cad6a4eb4ff3.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/f05535160a508b2a312de546a6de234776c613db276479ea4253c0b1bdeeb7d6.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/f09ba9e11106bd59a0de10cc85c55084097729dcab575f43dfcf07375961ed87.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/f995a2425dfb0822fd07127f66ffe9b026883bc156b402eb8bd0b83d52460a93.json +1 -0
- xinference/ui/web/ui/node_modules/.package-lock.json +0 -33
- xinference/ui/web/ui/package-lock.json +0 -34
- xinference/ui/web/ui/package.json +0 -1
- xinference/ui/web/ui/src/locales/en.json +9 -3
- xinference/ui/web/ui/src/locales/ja.json +9 -3
- xinference/ui/web/ui/src/locales/ko.json +9 -3
- xinference/ui/web/ui/src/locales/zh.json +9 -3
- {xinference-1.9.1.dist-info → xinference-1.10.1.dist-info}/METADATA +24 -4
- {xinference-1.9.1.dist-info → xinference-1.10.1.dist-info}/RECORD +302 -76
- xinference/ui/web/ui/build/static/css/main.013f296b.css +0 -2
- xinference/ui/web/ui/build/static/css/main.013f296b.css.map +0 -1
- xinference/ui/web/ui/build/static/js/main.1086c759.js +0 -3
- xinference/ui/web/ui/build/static/js/main.1086c759.js.map +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/0b0f77000cc1b482ca091cfbcae511dfe02f08916971645fad21d0b1234d04a2.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/1c5f8ff423a7c9202bea60b15680f04b1e9964b445b0da3f86c6ff70cf24e797.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/44ce7993e344980e3ed4f13e8f69237d4a5dfc60e37ca6b54f51f8ee1357bd67.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/4aec1cc414ac3ebb3481d3d915e4db597d9127de813291346eacb8554ab170d4.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/644cfec52f3c57a6e222ce60f112237a1efefe9835efd9aad857a685f53d8eed.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/663436f72af53fe0d72394f56d003fa4e0bba489e5bb4e483fd34b00f84637f7.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/69db82ca9bfe27fe417cc6cf2b1716b09be9c6f0cd198530f12bfc60e801bbcf.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/85087e27618d740c236bf159f30e0219db443ab55f0997388eed5fde6f9e90cc.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/88b07838348864aa86c672be3bbca1e9f58f6f3a2881b32070ec27f4e7b449d1.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/8b8cd408ccfbe115acef27ccfa5b233da8597131a2a5712add13e1e4d5d4504b.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/a23824fe746b9c6ca5eee9159b5764d1ff1653c1d856288c0f75c742bbb0023b.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/a3eb18af328280b139693c9092dff2a0ef8c9a967e6c8956ceee0996611f1984.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/bc1aacc65a102db325ca61bcd2f681e1ae22c36a1f1d98a6ff5e4ad49dc7544f.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/c682fd521747c19dae437d83ce3235a306ce6b68e24a117bc57c27ebb8d1f1ca.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/d5c224be7081f18cba1678b7874a9782eba895df004874ff8f243f94ba79942a.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/f7f18bfb539b036a6a342176dd98a85df5057a884a8da978d679f2a0264883d0.json +0 -1
- xinference/ui/web/ui/node_modules/clipboard/.babelrc.json +0 -11
- xinference/ui/web/ui/node_modules/clipboard/.eslintrc.json +0 -24
- xinference/ui/web/ui/node_modules/clipboard/.prettierrc.json +0 -9
- xinference/ui/web/ui/node_modules/clipboard/bower.json +0 -18
- xinference/ui/web/ui/node_modules/clipboard/composer.json +0 -25
- xinference/ui/web/ui/node_modules/clipboard/package.json +0 -63
- xinference/ui/web/ui/node_modules/delegate/package.json +0 -31
- xinference/ui/web/ui/node_modules/good-listener/bower.json +0 -11
- xinference/ui/web/ui/node_modules/good-listener/package.json +0 -35
- xinference/ui/web/ui/node_modules/select/bower.json +0 -13
- xinference/ui/web/ui/node_modules/select/package.json +0 -29
- xinference/ui/web/ui/node_modules/tiny-emitter/package.json +0 -53
- {xinference-1.9.1.dist-info → xinference-1.10.1.dist-info}/WHEEL +0 -0
- {xinference-1.9.1.dist-info → xinference-1.10.1.dist-info}/entry_points.txt +0 -0
- {xinference-1.9.1.dist-info → xinference-1.10.1.dist-info}/licenses/LICENSE +0 -0
- {xinference-1.9.1.dist-info → xinference-1.10.1.dist-info}/top_level.txt +0 -0
xinference/model/llm/mlx/core.py
CHANGED
|
@@ -148,6 +148,16 @@ class MLXModel(LLM):
|
|
|
148
148
|
# to call aynsc method with asyncio.run_coroutine_threadsafe
|
|
149
149
|
self._loop = loop # type: ignore
|
|
150
150
|
|
|
151
|
+
def _cleanup_memory(self):
|
|
152
|
+
import gc
|
|
153
|
+
|
|
154
|
+
import mlx.core as mx
|
|
155
|
+
|
|
156
|
+
# mandatory recycling
|
|
157
|
+
gc.collect()
|
|
158
|
+
# clear the MLX cache
|
|
159
|
+
mx.clear_cache()
|
|
160
|
+
|
|
151
161
|
@property
|
|
152
162
|
def driver_info(self) -> Optional[dict]:
|
|
153
163
|
return self._driver_info
|
|
@@ -333,6 +343,7 @@ class MLXModel(LLM):
|
|
|
333
343
|
self.prepare_parse_reasoning_content(
|
|
334
344
|
reasoning_content, enable_thinking=enable_thinking
|
|
335
345
|
)
|
|
346
|
+
self.prepare_parse_tool_calls()
|
|
336
347
|
|
|
337
348
|
kwargs = {}
|
|
338
349
|
kwargs["revision"] = self._model_config.get(
|
|
@@ -458,14 +469,18 @@ class MLXModel(LLM):
|
|
|
458
469
|
repetition_penalty=kwargs.pop("repetition_penalty"),
|
|
459
470
|
repetition_context_size=kwargs.pop("repetition_context_size"),
|
|
460
471
|
)
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
472
|
+
try:
|
|
473
|
+
yield from stream_generate(
|
|
474
|
+
self._model,
|
|
475
|
+
self._tokenizer,
|
|
476
|
+
prompt_token_ids,
|
|
477
|
+
sampler=sampler,
|
|
478
|
+
logits_processors=logits_processors,
|
|
479
|
+
**kwargs,
|
|
480
|
+
)
|
|
481
|
+
finally:
|
|
482
|
+
# after completing the inference, clear the memory.
|
|
483
|
+
self._cleanup_memory()
|
|
469
484
|
|
|
470
485
|
def _prepare_inputs(
|
|
471
486
|
self, prompt: Union[str, Dict[str, Any]], kwargs
|
|
@@ -755,7 +770,7 @@ class MLXChatModel(MLXModel, ChatModelMixin):
|
|
|
755
770
|
assert not isinstance(c, Iterator)
|
|
756
771
|
if tools:
|
|
757
772
|
return self._post_process_completion(
|
|
758
|
-
self.model_family, self.model_uid, c
|
|
773
|
+
self.model_family, self.model_uid, c
|
|
759
774
|
)
|
|
760
775
|
return self._to_chat_completion(c, self.reasoning_parser)
|
|
761
776
|
|
|
@@ -831,18 +846,32 @@ class MLXVisionModel(MLXModel, ChatModelMixin):
|
|
|
831
846
|
|
|
832
847
|
detokenizer.reset()
|
|
833
848
|
tic = time.perf_counter()
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
|
|
849
|
+
try:
|
|
850
|
+
for n, (token, logprobs) in enumerate(
|
|
851
|
+
generate_step(input_ids, self._model, pixel_values, mask, **kwargs),
|
|
852
|
+
):
|
|
853
|
+
if n == 0:
|
|
854
|
+
prompt_time = time.perf_counter() - tic
|
|
855
|
+
prompt_tps = len(input_ids) / prompt_time
|
|
856
|
+
tic = time.perf_counter()
|
|
857
|
+
if token == tokenizer.eos_token_id:
|
|
858
|
+
break
|
|
859
|
+
detokenizer.add_token(token)
|
|
860
|
+
|
|
861
|
+
# Yield the last segment if streaming
|
|
862
|
+
yield GenerationResponse(
|
|
863
|
+
text=detokenizer.last_segment,
|
|
864
|
+
token=token,
|
|
865
|
+
logprobs=logprobs,
|
|
866
|
+
from_draft=False,
|
|
867
|
+
prompt_tokens=len(input_ids),
|
|
868
|
+
prompt_tps=prompt_tps,
|
|
869
|
+
generation_tokens=n + 1,
|
|
870
|
+
generation_tps=(n + 1) / (time.perf_counter() - tic),
|
|
871
|
+
peak_memory=mx.metal.get_peak_memory() / 1e9,
|
|
872
|
+
)
|
|
844
873
|
|
|
845
|
-
|
|
874
|
+
detokenizer.finalize()
|
|
846
875
|
yield GenerationResponse(
|
|
847
876
|
text=detokenizer.last_segment,
|
|
848
877
|
token=token,
|
|
@@ -854,19 +883,9 @@ class MLXVisionModel(MLXModel, ChatModelMixin):
|
|
|
854
883
|
generation_tps=(n + 1) / (time.perf_counter() - tic),
|
|
855
884
|
peak_memory=mx.metal.get_peak_memory() / 1e9,
|
|
856
885
|
)
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
text=detokenizer.last_segment,
|
|
861
|
-
token=token,
|
|
862
|
-
logprobs=logprobs,
|
|
863
|
-
from_draft=False,
|
|
864
|
-
prompt_tokens=len(input_ids),
|
|
865
|
-
prompt_tps=prompt_tps,
|
|
866
|
-
generation_tokens=n + 1,
|
|
867
|
-
generation_tps=(n + 1) / (time.perf_counter() - tic),
|
|
868
|
-
peak_memory=mx.metal.get_peak_memory() / 1e9,
|
|
869
|
-
)
|
|
886
|
+
finally:
|
|
887
|
+
# after completing the inference, clear the memory
|
|
888
|
+
self._cleanup_memory()
|
|
870
889
|
|
|
871
890
|
def _prepare_inputs(
|
|
872
891
|
self, prompt: Union[str, Dict[str, Any]], kwargs
|
|
@@ -73,6 +73,7 @@ class SGLANGGenerateConfig(TypedDict, total=False):
|
|
|
73
73
|
stream: bool
|
|
74
74
|
stream_options: Optional[Union[dict, None]]
|
|
75
75
|
json_schema: Optional[dict]
|
|
76
|
+
response_format: dict
|
|
76
77
|
|
|
77
78
|
|
|
78
79
|
try:
|
|
@@ -175,6 +176,7 @@ class SGLANGModel(LLM):
|
|
|
175
176
|
self.prepare_parse_reasoning_content(
|
|
176
177
|
reasoning_content, enable_thinking=enable_thinking
|
|
177
178
|
)
|
|
179
|
+
self.prepare_parse_tool_calls()
|
|
178
180
|
|
|
179
181
|
# Fix: GH#2169
|
|
180
182
|
if sgl.__version__ >= "0.2.14":
|
|
@@ -316,13 +318,16 @@ class SGLANGModel(LLM):
|
|
|
316
318
|
stream_options = generate_config.get("stream_options")
|
|
317
319
|
generate_config.setdefault("stream_options", stream_options)
|
|
318
320
|
generate_config.setdefault("ignore_eos", False)
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
.pop("json_schema",
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
321
|
+
response_format = generate_config.pop("response_format", None)
|
|
322
|
+
if response_format:
|
|
323
|
+
json_schema_config = response_format.pop("json_schema", None)
|
|
324
|
+
json_schema = None
|
|
325
|
+
if "schema_" in json_schema_config:
|
|
326
|
+
json_schema = json_schema_config.pop("schema_")
|
|
327
|
+
elif "schema" in json_schema_config:
|
|
328
|
+
json_schema = json_schema_config.pop("schema")
|
|
329
|
+
if json_schema:
|
|
330
|
+
generate_config.setdefault("json_schema", json.dumps(json_schema)) # type: ignore
|
|
326
331
|
|
|
327
332
|
return generate_config
|
|
328
333
|
|
|
@@ -355,22 +360,31 @@ class SGLANGModel(LLM):
|
|
|
355
360
|
|
|
356
361
|
@staticmethod
|
|
357
362
|
def _convert_state_to_completion_chunk(
|
|
358
|
-
request_id: str, model: str, output_text: str
|
|
363
|
+
request_id: str, model: str, output_text: str, meta_info: Dict
|
|
359
364
|
) -> CompletionChunk:
|
|
365
|
+
finish_reason = meta_info.get("finish_reason", None)
|
|
366
|
+
if isinstance(finish_reason, dict) and "type" in finish_reason:
|
|
367
|
+
finish_reason = finish_reason["type"]
|
|
360
368
|
choices: List[CompletionChoice] = [
|
|
361
369
|
CompletionChoice(
|
|
362
370
|
text=output_text,
|
|
363
371
|
index=0,
|
|
364
372
|
logprobs=None,
|
|
365
|
-
finish_reason=
|
|
373
|
+
finish_reason=finish_reason,
|
|
366
374
|
)
|
|
367
375
|
]
|
|
376
|
+
usage = CompletionUsage(
|
|
377
|
+
prompt_tokens=meta_info["prompt_tokens"],
|
|
378
|
+
completion_tokens=meta_info["completion_tokens"],
|
|
379
|
+
total_tokens=meta_info["prompt_tokens"] + meta_info["completion_tokens"],
|
|
380
|
+
)
|
|
368
381
|
chunk = CompletionChunk(
|
|
369
382
|
id=request_id,
|
|
370
383
|
object="text_completion",
|
|
371
384
|
created=int(time.time()),
|
|
372
385
|
model=model,
|
|
373
386
|
choices=choices,
|
|
387
|
+
usage=usage,
|
|
374
388
|
)
|
|
375
389
|
return chunk
|
|
376
390
|
|
|
@@ -378,12 +392,15 @@ class SGLANGModel(LLM):
|
|
|
378
392
|
def _convert_state_to_completion(
|
|
379
393
|
request_id: str, model: str, output_text: str, meta_info: Dict
|
|
380
394
|
) -> Completion:
|
|
395
|
+
finish_reason = meta_info.get("finish_reason", None)
|
|
396
|
+
if isinstance(finish_reason, dict) and "type" in finish_reason:
|
|
397
|
+
finish_reason = finish_reason["type"]
|
|
381
398
|
choices = [
|
|
382
399
|
CompletionChoice(
|
|
383
400
|
text=output_text,
|
|
384
401
|
index=0,
|
|
385
402
|
logprobs=None,
|
|
386
|
-
finish_reason=
|
|
403
|
+
finish_reason=finish_reason,
|
|
387
404
|
)
|
|
388
405
|
]
|
|
389
406
|
|
|
@@ -512,7 +529,10 @@ class SGLANGModel(LLM):
|
|
|
512
529
|
prompt, image_data, **sanitized_generate_config
|
|
513
530
|
):
|
|
514
531
|
chunk = self._convert_state_to_completion_chunk(
|
|
515
|
-
request_id,
|
|
532
|
+
request_id,
|
|
533
|
+
self.model_uid,
|
|
534
|
+
output_text=out,
|
|
535
|
+
meta_info=meta_info,
|
|
516
536
|
)
|
|
517
537
|
complete_response += out
|
|
518
538
|
finish_reason = meta_info["finish_reason"]
|
|
@@ -646,49 +666,6 @@ class SGLANGChatModel(SGLANGModel, ChatModelMixin):
|
|
|
646
666
|
def is_tool_call_chunk_end(chunk):
|
|
647
667
|
return chunk["choices"][0]["text"].endswith(QWEN_TOOL_CALL_SYMBOLS[1])
|
|
648
668
|
|
|
649
|
-
async def _async_to_tool_completion_chunks(
|
|
650
|
-
self,
|
|
651
|
-
chunks: AsyncGenerator[CompletionChunk, None],
|
|
652
|
-
) -> AsyncGenerator[ChatCompletionChunk, None]:
|
|
653
|
-
i = 0
|
|
654
|
-
previous_texts = [""]
|
|
655
|
-
tool_call = False
|
|
656
|
-
tool_call_texts = [""]
|
|
657
|
-
if self.reasoning_parser:
|
|
658
|
-
chunks = self.reasoning_parser.prepare_reasoning_content_streaming(chunks)
|
|
659
|
-
async for chunk in chunks:
|
|
660
|
-
if i == 0:
|
|
661
|
-
for first_chunk in self._get_first_chat_completion_chunk(
|
|
662
|
-
chunk, self.reasoning_parser
|
|
663
|
-
):
|
|
664
|
-
yield first_chunk
|
|
665
|
-
# usage
|
|
666
|
-
choices = chunk.get("choices")
|
|
667
|
-
if not choices:
|
|
668
|
-
yield self._get_final_chat_completion_chunk(chunk)
|
|
669
|
-
else:
|
|
670
|
-
if self.is_tool_call_chunk_start(chunk):
|
|
671
|
-
tool_call = True
|
|
672
|
-
if tool_call:
|
|
673
|
-
tool_call_text = tool_call_texts[-1]
|
|
674
|
-
tool_call_text += chunk["choices"][0]["text"]
|
|
675
|
-
tool_call_texts.append(tool_call_text)
|
|
676
|
-
if self.is_tool_call_chunk_end(chunk):
|
|
677
|
-
yield self._post_process_completion_chunk(
|
|
678
|
-
self.model_family,
|
|
679
|
-
self.model_uid,
|
|
680
|
-
chunk,
|
|
681
|
-
reasoning_parser=self.reasoning_parser,
|
|
682
|
-
tool_call_text=tool_call_text,
|
|
683
|
-
)
|
|
684
|
-
tool_call = False
|
|
685
|
-
tool_call_texts = [""]
|
|
686
|
-
else:
|
|
687
|
-
yield self._to_chat_completion_chunk(
|
|
688
|
-
chunk, self.reasoning_parser, previous_texts
|
|
689
|
-
)
|
|
690
|
-
i += 1
|
|
691
|
-
|
|
692
669
|
async def async_chat(
|
|
693
670
|
self,
|
|
694
671
|
messages: List[Dict],
|
|
@@ -731,7 +708,7 @@ class SGLANGChatModel(SGLANGModel, ChatModelMixin):
|
|
|
731
708
|
assert not isinstance(c, AsyncGenerator)
|
|
732
709
|
if tools:
|
|
733
710
|
return self._post_process_completion(
|
|
734
|
-
self.model_family, self.model_uid, c
|
|
711
|
+
self.model_family, self.model_uid, c
|
|
735
712
|
)
|
|
736
713
|
return self._to_chat_completion(c, self.reasoning_parser)
|
|
737
714
|
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
from functools import wraps
|
|
2
|
+
from typing import Any, Callable, Dict, Type
|
|
3
|
+
|
|
4
|
+
# Global registry for tool parsers, mapping parser names to their classes
|
|
5
|
+
TOOL_PARSERS: Dict[str, Type[Any]] = {}
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def register_tool_parser(name: str):
|
|
9
|
+
"""
|
|
10
|
+
Decorator for registering ToolParser classes to the TOOL_PARSERS registry.
|
|
11
|
+
|
|
12
|
+
This decorator allows tool parser classes to be automatically registered
|
|
13
|
+
when they are defined, making them available for dynamic lookup.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
name (str): The name to register the tool parser under. This should
|
|
17
|
+
typically match the model family name (e.g., "qwen", "glm4").
|
|
18
|
+
|
|
19
|
+
Returns:
|
|
20
|
+
Callable: The decorator function that registers the class.
|
|
21
|
+
|
|
22
|
+
Example:
|
|
23
|
+
@register_tool_parser("qwen")
|
|
24
|
+
class QwenToolParser(ToolParser):
|
|
25
|
+
def parse_tool_calls(self, text: str) -> List[ToolCall]:
|
|
26
|
+
# Implementation for parsing Qwen model tool calls
|
|
27
|
+
pass
|
|
28
|
+
|
|
29
|
+
Note:
|
|
30
|
+
The registered class should implement the ToolParser interface
|
|
31
|
+
and provide methods for parsing tool calls from model outputs.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
def decorator(cls: Type[Any]) -> Type[Any]:
|
|
35
|
+
"""
|
|
36
|
+
The actual decorator that performs the registration.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
cls: The tool parser class to register.
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
The same class (unmodified) after registration.
|
|
43
|
+
"""
|
|
44
|
+
TOOL_PARSERS[name] = cls
|
|
45
|
+
return cls
|
|
46
|
+
|
|
47
|
+
return decorator
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
# Import all tool parser modules to trigger decorator registration
|
|
51
|
+
# This ensures all tool parsers are automatically registered when this module is imported
|
|
52
|
+
from . import (
|
|
53
|
+
deepseek_r1_tool_parser,
|
|
54
|
+
deepseek_v3_tool_parser,
|
|
55
|
+
glm4_tool_parser,
|
|
56
|
+
llama3_tool_parser,
|
|
57
|
+
qwen_tool_parser,
|
|
58
|
+
)
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
class ToolParser:
|
|
2
|
+
"""
|
|
3
|
+
Abstract ToolParser class that should not be used directly. Provided
|
|
4
|
+
properties and methods should be used in
|
|
5
|
+
derived classes.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
def extract_tool_calls(self, model_output: str):
|
|
9
|
+
"""
|
|
10
|
+
Static method that should be implemented for extracting tool calls from
|
|
11
|
+
a complete model-generated string.
|
|
12
|
+
Used for non-streaming responses where we have the entire model response
|
|
13
|
+
available before sending to the client.
|
|
14
|
+
Static because it's stateless.
|
|
15
|
+
"""
|
|
16
|
+
raise NotImplementedError(
|
|
17
|
+
"AbstractToolParser.extract_tool_calls has not been implemented!"
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
def extract_tool_calls_streaming(
|
|
21
|
+
self, previous_text, current_text: str, delta_text: str
|
|
22
|
+
):
|
|
23
|
+
"""
|
|
24
|
+
Instance method that should be implemented for extracting tool calls
|
|
25
|
+
from an incomplete response; for use when handling tool calls and
|
|
26
|
+
streaming. Has to be an instance method because it requires state -
|
|
27
|
+
the current tokens/diffs, but also the information about what has
|
|
28
|
+
previously been parsed and extracted (see constructor)
|
|
29
|
+
"""
|
|
30
|
+
raise NotImplementedError(
|
|
31
|
+
"AbstractToolParser.extract_tool_calls_streaming has not been "
|
|
32
|
+
"implemented!"
|
|
33
|
+
)
|
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import logging
|
|
3
|
+
import re
|
|
4
|
+
from typing import Any, List, Optional, Tuple
|
|
5
|
+
|
|
6
|
+
from . import register_tool_parser
|
|
7
|
+
from .abstract_tool_parser import ToolParser
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@register_tool_parser("deepseek-r1")
|
|
13
|
+
class DeepseekR1ToolParser(ToolParser):
|
|
14
|
+
"""
|
|
15
|
+
Tool parser implementation for DeepSeek R1 model.
|
|
16
|
+
|
|
17
|
+
This parser handles the specific format used by DeepSeek R1 for tool calls,
|
|
18
|
+
which includes special Unicode tokens and JSON-formatted function arguments.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
def __init__(self):
|
|
22
|
+
"""
|
|
23
|
+
Initialize the DeepSeek R1 tool parser.
|
|
24
|
+
"""
|
|
25
|
+
super().__init__()
|
|
26
|
+
|
|
27
|
+
# Sentinel tokens for streaming mode
|
|
28
|
+
self.think_start_token: str = "<think>"
|
|
29
|
+
self.think_end_token: str = "</think>"
|
|
30
|
+
self.tool_call_start_token: str = "<|tool▁call▁begin|>"
|
|
31
|
+
self.tool_call_end_token: str = "<|tool▁call▁end|>"
|
|
32
|
+
|
|
33
|
+
# Regex pattern to match DeepSeek R1 tool call format
|
|
34
|
+
self.tool_calls_regex = (
|
|
35
|
+
r"<\|tool▁call▁begin|>function<\|tool▁sep|>([^\n]+)\n"
|
|
36
|
+
r"```json\n(.*?)\n```<\|tool▁call▁end|>"
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
# Regex pattern to match the entire tool-calls wrapper block.
|
|
40
|
+
# We intentionally do NOT match <think> blocks here so that the
|
|
41
|
+
# "text before" chunk will include both the think block and any
|
|
42
|
+
# narrative text up to the tool calls wrapper, yielding exactly two
|
|
43
|
+
# blocks when there is a single tool calls section:
|
|
44
|
+
# [before_text_including_think, tool_calls_wrapper_block]
|
|
45
|
+
self.content_regex = r"(<\|tool▁calls▁begin|>.*?<\|tool▁calls▁end|>)"
|
|
46
|
+
|
|
47
|
+
def extract_tool_calls(
|
|
48
|
+
self, model_output: str
|
|
49
|
+
) -> List[Tuple[Optional[str], Optional[str], Optional[dict]]]:
|
|
50
|
+
"""
|
|
51
|
+
Extract tool calls from complete model output.
|
|
52
|
+
|
|
53
|
+
Parses the model output to find tool call patterns and extracts
|
|
54
|
+
function names and arguments. Handles JSON parsing errors gracefully
|
|
55
|
+
and deduplicates identical tool calls.
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
model_output (str): The complete output string from the model.
|
|
59
|
+
|
|
60
|
+
Returns:
|
|
61
|
+
List[Tuple[Optional[str], Optional[str], Optional[dict]]]:
|
|
62
|
+
A list of tuples where each tuple contains:
|
|
63
|
+
- content (str or None): Raw content if parsing failed, None if successful
|
|
64
|
+
- function_name (str or None): Name of the function to call
|
|
65
|
+
- arguments (dict or None): Parsed function arguments
|
|
66
|
+
|
|
67
|
+
Example:
|
|
68
|
+
>>> parser = DeepseekR1ToolParser()
|
|
69
|
+
>>> output = '<|tool▁call▁begin|>function<|tool▁sep|>get_current_weather\n```json\n{"location": "上海", "unit": "celsius"}\n```<|tool▁call▁end|>'
|
|
70
|
+
>>> result = parser.extract_tool_calls(output)
|
|
71
|
+
>>> print(result)
|
|
72
|
+
[(None, 'get_current_weather', {'location': 'Beijing'})]
|
|
73
|
+
"""
|
|
74
|
+
# If no tool call tokens, return original output as content
|
|
75
|
+
if self.tool_call_start_token not in model_output:
|
|
76
|
+
return [(model_output, None, None)]
|
|
77
|
+
|
|
78
|
+
# Get all content blocks (text, thinking blocks, tool calls)
|
|
79
|
+
function_calls = self._get_function_calls(model_output)
|
|
80
|
+
|
|
81
|
+
# Use set for deduplication of identical tool calls
|
|
82
|
+
tool_calls = set()
|
|
83
|
+
results: List[Tuple[Optional[str], Optional[str], Optional[dict]]] = []
|
|
84
|
+
|
|
85
|
+
for content_block in function_calls:
|
|
86
|
+
# Check if this block is a tool call
|
|
87
|
+
if (
|
|
88
|
+
self.tool_call_start_token in content_block
|
|
89
|
+
and self.tool_call_end_token in content_block
|
|
90
|
+
):
|
|
91
|
+
# Extract function name and arguments from tool call block
|
|
92
|
+
matches = re.findall(self.tool_calls_regex, content_block, re.DOTALL)
|
|
93
|
+
if not matches:
|
|
94
|
+
# Malformed tool call, treat as regular content
|
|
95
|
+
results.append((content_block, None, None))
|
|
96
|
+
continue
|
|
97
|
+
|
|
98
|
+
func_name, raw_json = matches[0] # Take the first match
|
|
99
|
+
|
|
100
|
+
func_and_args = None
|
|
101
|
+
try:
|
|
102
|
+
# Parse JSON arguments
|
|
103
|
+
func_and_args = json.loads(raw_json)
|
|
104
|
+
# Create hashable representation for deduplication
|
|
105
|
+
arguments_hashable = frozenset(func_and_args.items())
|
|
106
|
+
tool_call_tuple = (
|
|
107
|
+
None, # No content error
|
|
108
|
+
func_name,
|
|
109
|
+
func_and_args,
|
|
110
|
+
)
|
|
111
|
+
except Exception as e:
|
|
112
|
+
# JSON parsing failed, treat as raw content
|
|
113
|
+
logger.warning(
|
|
114
|
+
f"Failed to parse tool call JSON: {raw_json}, error: {e}"
|
|
115
|
+
)
|
|
116
|
+
tool_call_tuple = (raw_json, None, None)
|
|
117
|
+
arguments_hashable = None
|
|
118
|
+
|
|
119
|
+
# Create deduplication key
|
|
120
|
+
dedup_key = (
|
|
121
|
+
(func_name, arguments_hashable)
|
|
122
|
+
if func_and_args is not None
|
|
123
|
+
else raw_json
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
# Add to results if not already seen
|
|
127
|
+
if dedup_key not in tool_calls:
|
|
128
|
+
tool_calls.add(dedup_key)
|
|
129
|
+
results.append(tool_call_tuple)
|
|
130
|
+
else:
|
|
131
|
+
# This is regular content (text or thinking block), add as-is
|
|
132
|
+
if content_block.strip(): # Only add non-empty content
|
|
133
|
+
results.append((content_block, None, None))
|
|
134
|
+
|
|
135
|
+
return results
|
|
136
|
+
|
|
137
|
+
def _get_function_calls(self, model_output: str) -> List[str]:
|
|
138
|
+
"""
|
|
139
|
+
Extract all function calls and content blocks from model output.
|
|
140
|
+
|
|
141
|
+
Parses the model output to separate thinking blocks, tool calls,
|
|
142
|
+
and regular content into individual components.
|
|
143
|
+
|
|
144
|
+
Args:
|
|
145
|
+
model_output (str): The complete model output to parse.
|
|
146
|
+
|
|
147
|
+
Returns:
|
|
148
|
+
List[str]: List of content blocks (text, thinking blocks, tool calls).
|
|
149
|
+
"""
|
|
150
|
+
functions_calls = []
|
|
151
|
+
last_end = 0
|
|
152
|
+
for m in re.finditer(self.content_regex, model_output, re.DOTALL):
|
|
153
|
+
# Add any text before the current match
|
|
154
|
+
if m.start() > last_end:
|
|
155
|
+
functions_calls.append(model_output[last_end : m.start()])
|
|
156
|
+
# Add the matched content (think or tool_call block)
|
|
157
|
+
functions_calls.append(m.group(0))
|
|
158
|
+
last_end = m.end()
|
|
159
|
+
# Add any remaining text after the last match
|
|
160
|
+
if last_end < len(model_output):
|
|
161
|
+
functions_calls.append(model_output[last_end:])
|
|
162
|
+
return functions_calls
|
|
163
|
+
|
|
164
|
+
def extract_tool_calls_streaming(
|
|
165
|
+
self, previous_text: List[str], current_text: str, delta_text: str
|
|
166
|
+
) -> Optional[Any]:
|
|
167
|
+
"""
|
|
168
|
+
Extract tool calls from streaming output.
|
|
169
|
+
|
|
170
|
+
Currently not supported for DeepSeek R1 model. This method raises
|
|
171
|
+
a ValueError indicating that streaming tool call extraction is only
|
|
172
|
+
available for specific model/backend combinations.
|
|
173
|
+
|
|
174
|
+
Args:
|
|
175
|
+
previous_text (List[str]): Previous text chunks from the stream.
|
|
176
|
+
current_text (str): Current accumulated text.
|
|
177
|
+
delta_text (str): New text delta in this chunk.
|
|
178
|
+
|
|
179
|
+
Raises:
|
|
180
|
+
ValueError: Always raised as streaming is not supported.
|
|
181
|
+
|
|
182
|
+
Note:
|
|
183
|
+
DeepSeek R1 model does not currently support streaming tool call
|
|
184
|
+
extraction. Use extract_tool_calls() with complete output instead.
|
|
185
|
+
"""
|
|
186
|
+
raise NotImplementedError(
|
|
187
|
+
"Streaming support for tool calls is available only when using "
|
|
188
|
+
"Qwen models with vLLM backend or GLM4-chat models without vLLM backend. "
|
|
189
|
+
"DeepSeek R1 does not support streaming tool call extraction."
|
|
190
|
+
)
|