xinference 1.10.0__py3-none-any.whl → 1.10.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (317) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/restful_api.py +11 -28
  3. xinference/client/restful/async_restful_client.py +20 -3
  4. xinference/client/restful/restful_client.py +20 -3
  5. xinference/core/supervisor.py +87 -53
  6. xinference/core/worker.py +10 -0
  7. xinference/deploy/cmdline.py +15 -0
  8. xinference/model/audio/core.py +21 -6
  9. xinference/model/audio/indextts2.py +166 -0
  10. xinference/model/audio/model_spec.json +38 -1
  11. xinference/model/image/model_spec.json +69 -0
  12. xinference/model/image/stable_diffusion/core.py +13 -4
  13. xinference/model/llm/__init__.py +4 -0
  14. xinference/model/llm/llm_family.json +464 -2
  15. xinference/model/llm/sglang/core.py +30 -11
  16. xinference/model/llm/tool_parsers/deepseek_r1_tool_parser.py +94 -32
  17. xinference/model/llm/transformers/multimodal/qwen2_vl.py +34 -8
  18. xinference/model/llm/utils.py +12 -9
  19. xinference/model/llm/vllm/core.py +93 -17
  20. xinference/thirdparty/audiotools/__init__.py +10 -0
  21. xinference/thirdparty/audiotools/core/__init__.py +4 -0
  22. xinference/thirdparty/audiotools/core/audio_signal.py +1682 -0
  23. xinference/thirdparty/audiotools/core/display.py +194 -0
  24. xinference/thirdparty/audiotools/core/dsp.py +390 -0
  25. xinference/thirdparty/audiotools/core/effects.py +647 -0
  26. xinference/thirdparty/audiotools/core/ffmpeg.py +211 -0
  27. xinference/thirdparty/audiotools/core/loudness.py +320 -0
  28. xinference/thirdparty/audiotools/core/playback.py +252 -0
  29. xinference/thirdparty/audiotools/core/templates/__init__.py +0 -0
  30. xinference/thirdparty/audiotools/core/templates/headers.html +322 -0
  31. xinference/thirdparty/audiotools/core/templates/pandoc.css +407 -0
  32. xinference/thirdparty/audiotools/core/templates/widget.html +52 -0
  33. xinference/thirdparty/audiotools/core/util.py +671 -0
  34. xinference/thirdparty/audiotools/core/whisper.py +97 -0
  35. xinference/thirdparty/audiotools/data/__init__.py +3 -0
  36. xinference/thirdparty/audiotools/data/datasets.py +517 -0
  37. xinference/thirdparty/audiotools/data/preprocess.py +81 -0
  38. xinference/thirdparty/audiotools/data/transforms.py +1592 -0
  39. xinference/thirdparty/audiotools/metrics/__init__.py +6 -0
  40. xinference/thirdparty/audiotools/metrics/distance.py +131 -0
  41. xinference/thirdparty/audiotools/metrics/quality.py +159 -0
  42. xinference/thirdparty/audiotools/metrics/spectral.py +247 -0
  43. xinference/thirdparty/audiotools/ml/__init__.py +5 -0
  44. xinference/thirdparty/audiotools/ml/accelerator.py +184 -0
  45. xinference/thirdparty/audiotools/ml/decorators.py +440 -0
  46. xinference/thirdparty/audiotools/ml/experiment.py +90 -0
  47. xinference/thirdparty/audiotools/ml/layers/__init__.py +2 -0
  48. xinference/thirdparty/audiotools/ml/layers/base.py +328 -0
  49. xinference/thirdparty/audiotools/ml/layers/spectral_gate.py +127 -0
  50. xinference/thirdparty/audiotools/post.py +140 -0
  51. xinference/thirdparty/audiotools/preference.py +600 -0
  52. xinference/thirdparty/indextts/BigVGAN/ECAPA_TDNN.py +656 -0
  53. xinference/thirdparty/indextts/BigVGAN/__init__.py +0 -0
  54. xinference/thirdparty/indextts/BigVGAN/activations.py +122 -0
  55. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/__init__.py +0 -0
  56. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/.gitignore +1 -0
  57. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/__init__.py +0 -0
  58. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/activation1d.py +76 -0
  59. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/anti_alias_activation.cpp +23 -0
  60. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/anti_alias_activation_cuda.cu +256 -0
  61. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/compat.h +29 -0
  62. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/load.py +121 -0
  63. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/type_shim.h +92 -0
  64. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/torch/__init__.py +6 -0
  65. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/torch/act.py +31 -0
  66. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/torch/filter.py +102 -0
  67. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/torch/resample.py +58 -0
  68. xinference/thirdparty/indextts/BigVGAN/alias_free_torch/__init__.py +6 -0
  69. xinference/thirdparty/indextts/BigVGAN/alias_free_torch/act.py +29 -0
  70. xinference/thirdparty/indextts/BigVGAN/alias_free_torch/filter.py +96 -0
  71. xinference/thirdparty/indextts/BigVGAN/alias_free_torch/resample.py +49 -0
  72. xinference/thirdparty/indextts/BigVGAN/bigvgan.py +534 -0
  73. xinference/thirdparty/indextts/BigVGAN/models.py +451 -0
  74. xinference/thirdparty/indextts/BigVGAN/nnet/CNN.py +546 -0
  75. xinference/thirdparty/indextts/BigVGAN/nnet/__init__.py +0 -0
  76. xinference/thirdparty/indextts/BigVGAN/nnet/linear.py +89 -0
  77. xinference/thirdparty/indextts/BigVGAN/nnet/normalization.py +670 -0
  78. xinference/thirdparty/indextts/BigVGAN/utils.py +101 -0
  79. xinference/thirdparty/indextts/__init__.py +0 -0
  80. xinference/thirdparty/indextts/cli.py +65 -0
  81. xinference/thirdparty/indextts/gpt/__init__.py +0 -0
  82. xinference/thirdparty/indextts/gpt/conformer/__init__.py +0 -0
  83. xinference/thirdparty/indextts/gpt/conformer/attention.py +312 -0
  84. xinference/thirdparty/indextts/gpt/conformer/embedding.py +163 -0
  85. xinference/thirdparty/indextts/gpt/conformer/subsampling.py +348 -0
  86. xinference/thirdparty/indextts/gpt/conformer_encoder.py +520 -0
  87. xinference/thirdparty/indextts/gpt/model.py +713 -0
  88. xinference/thirdparty/indextts/gpt/model_v2.py +747 -0
  89. xinference/thirdparty/indextts/gpt/perceiver.py +317 -0
  90. xinference/thirdparty/indextts/gpt/transformers_beam_search.py +1013 -0
  91. xinference/thirdparty/indextts/gpt/transformers_generation_utils.py +4747 -0
  92. xinference/thirdparty/indextts/gpt/transformers_gpt2.py +1878 -0
  93. xinference/thirdparty/indextts/gpt/transformers_modeling_utils.py +5525 -0
  94. xinference/thirdparty/indextts/infer.py +690 -0
  95. xinference/thirdparty/indextts/infer_v2.py +739 -0
  96. xinference/thirdparty/indextts/s2mel/dac/__init__.py +16 -0
  97. xinference/thirdparty/indextts/s2mel/dac/__main__.py +36 -0
  98. xinference/thirdparty/indextts/s2mel/dac/model/__init__.py +4 -0
  99. xinference/thirdparty/indextts/s2mel/dac/model/base.py +294 -0
  100. xinference/thirdparty/indextts/s2mel/dac/model/dac.py +400 -0
  101. xinference/thirdparty/indextts/s2mel/dac/model/discriminator.py +228 -0
  102. xinference/thirdparty/indextts/s2mel/dac/model/encodec.py +320 -0
  103. xinference/thirdparty/indextts/s2mel/dac/nn/__init__.py +3 -0
  104. xinference/thirdparty/indextts/s2mel/dac/nn/layers.py +33 -0
  105. xinference/thirdparty/indextts/s2mel/dac/nn/loss.py +368 -0
  106. xinference/thirdparty/indextts/s2mel/dac/nn/quantize.py +339 -0
  107. xinference/thirdparty/indextts/s2mel/dac/utils/__init__.py +123 -0
  108. xinference/thirdparty/indextts/s2mel/dac/utils/decode.py +95 -0
  109. xinference/thirdparty/indextts/s2mel/dac/utils/encode.py +94 -0
  110. xinference/thirdparty/indextts/s2mel/hf_utils.py +12 -0
  111. xinference/thirdparty/indextts/s2mel/modules/alias_free_torch/__init__.py +5 -0
  112. xinference/thirdparty/indextts/s2mel/modules/alias_free_torch/act.py +29 -0
  113. xinference/thirdparty/indextts/s2mel/modules/alias_free_torch/filter.py +96 -0
  114. xinference/thirdparty/indextts/s2mel/modules/alias_free_torch/resample.py +57 -0
  115. xinference/thirdparty/indextts/s2mel/modules/audio.py +82 -0
  116. xinference/thirdparty/indextts/s2mel/modules/bigvgan/activations.py +120 -0
  117. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/__init__.py +0 -0
  118. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/activation1d.py +77 -0
  119. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/anti_alias_activation.cpp +23 -0
  120. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/anti_alias_activation_cuda.cu +246 -0
  121. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/compat.h +29 -0
  122. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/load.py +86 -0
  123. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/type_shim.h +92 -0
  124. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/torch/__init__.py +6 -0
  125. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/torch/act.py +30 -0
  126. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/torch/filter.py +101 -0
  127. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/torch/resample.py +58 -0
  128. xinference/thirdparty/indextts/s2mel/modules/bigvgan/bigvgan.py +492 -0
  129. xinference/thirdparty/indextts/s2mel/modules/bigvgan/config.json +63 -0
  130. xinference/thirdparty/indextts/s2mel/modules/bigvgan/env.py +18 -0
  131. xinference/thirdparty/indextts/s2mel/modules/bigvgan/meldataset.py +354 -0
  132. xinference/thirdparty/indextts/s2mel/modules/bigvgan/utils.py +99 -0
  133. xinference/thirdparty/indextts/s2mel/modules/campplus/DTDNN.py +115 -0
  134. xinference/thirdparty/indextts/s2mel/modules/campplus/classifier.py +70 -0
  135. xinference/thirdparty/indextts/s2mel/modules/campplus/layers.py +253 -0
  136. xinference/thirdparty/indextts/s2mel/modules/commons.py +632 -0
  137. xinference/thirdparty/indextts/s2mel/modules/diffusion_transformer.py +257 -0
  138. xinference/thirdparty/indextts/s2mel/modules/encodec.py +292 -0
  139. xinference/thirdparty/indextts/s2mel/modules/flow_matching.py +171 -0
  140. xinference/thirdparty/indextts/s2mel/modules/gpt_fast/generate.py +436 -0
  141. xinference/thirdparty/indextts/s2mel/modules/gpt_fast/model.py +360 -0
  142. xinference/thirdparty/indextts/s2mel/modules/gpt_fast/quantize.py +622 -0
  143. xinference/thirdparty/indextts/s2mel/modules/hifigan/f0_predictor.py +55 -0
  144. xinference/thirdparty/indextts/s2mel/modules/hifigan/generator.py +454 -0
  145. xinference/thirdparty/indextts/s2mel/modules/layers.py +354 -0
  146. xinference/thirdparty/indextts/s2mel/modules/length_regulator.py +141 -0
  147. xinference/thirdparty/indextts/s2mel/modules/openvoice/__init__.py +0 -0
  148. xinference/thirdparty/indextts/s2mel/modules/openvoice/api.py +186 -0
  149. xinference/thirdparty/indextts/s2mel/modules/openvoice/attentions.py +465 -0
  150. xinference/thirdparty/indextts/s2mel/modules/openvoice/checkpoints_v2/converter/config.json +57 -0
  151. xinference/thirdparty/indextts/s2mel/modules/openvoice/commons.py +160 -0
  152. xinference/thirdparty/indextts/s2mel/modules/openvoice/mel_processing.py +183 -0
  153. xinference/thirdparty/indextts/s2mel/modules/openvoice/models.py +499 -0
  154. xinference/thirdparty/indextts/s2mel/modules/openvoice/modules.py +598 -0
  155. xinference/thirdparty/indextts/s2mel/modules/openvoice/openvoice_app.py +275 -0
  156. xinference/thirdparty/indextts/s2mel/modules/openvoice/se_extractor.py +153 -0
  157. xinference/thirdparty/indextts/s2mel/modules/openvoice/transforms.py +209 -0
  158. xinference/thirdparty/indextts/s2mel/modules/openvoice/utils.py +194 -0
  159. xinference/thirdparty/indextts/s2mel/modules/quantize.py +229 -0
  160. xinference/thirdparty/indextts/s2mel/modules/rmvpe.py +631 -0
  161. xinference/thirdparty/indextts/s2mel/modules/vocos/__init__.py +4 -0
  162. xinference/thirdparty/indextts/s2mel/modules/vocos/heads.py +164 -0
  163. xinference/thirdparty/indextts/s2mel/modules/vocos/helpers.py +71 -0
  164. xinference/thirdparty/indextts/s2mel/modules/vocos/loss.py +114 -0
  165. xinference/thirdparty/indextts/s2mel/modules/vocos/models.py +118 -0
  166. xinference/thirdparty/indextts/s2mel/modules/vocos/modules.py +213 -0
  167. xinference/thirdparty/indextts/s2mel/modules/vocos/pretrained.py +51 -0
  168. xinference/thirdparty/indextts/s2mel/modules/vocos/spectral_ops.py +192 -0
  169. xinference/thirdparty/indextts/s2mel/modules/wavenet.py +174 -0
  170. xinference/thirdparty/indextts/s2mel/optimizers.py +96 -0
  171. xinference/thirdparty/indextts/s2mel/wav2vecbert_extract.py +148 -0
  172. xinference/thirdparty/indextts/utils/__init__.py +0 -0
  173. xinference/thirdparty/indextts/utils/arch_util.py +120 -0
  174. xinference/thirdparty/indextts/utils/checkpoint.py +34 -0
  175. xinference/thirdparty/indextts/utils/common.py +121 -0
  176. xinference/thirdparty/indextts/utils/feature_extractors.py +50 -0
  177. xinference/thirdparty/indextts/utils/front.py +536 -0
  178. xinference/thirdparty/indextts/utils/maskgct/models/codec/__init__.py +0 -0
  179. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/codec.py +427 -0
  180. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/__init__.py +11 -0
  181. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/factorized_vector_quantize.py +150 -0
  182. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/lookup_free_quantize.py +77 -0
  183. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/residual_vq.py +177 -0
  184. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/vector_quantize.py +401 -0
  185. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/vocos.py +881 -0
  186. xinference/thirdparty/indextts/utils/maskgct/models/codec/codec_dataset.py +264 -0
  187. xinference/thirdparty/indextts/utils/maskgct/models/codec/codec_inference.py +515 -0
  188. xinference/thirdparty/indextts/utils/maskgct/models/codec/codec_sampler.py +126 -0
  189. xinference/thirdparty/indextts/utils/maskgct/models/codec/codec_trainer.py +166 -0
  190. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/__init__.py +0 -0
  191. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/__init__.py +5 -0
  192. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/act.py +29 -0
  193. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/filter.py +96 -0
  194. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/resample.py +57 -0
  195. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/facodec_dataset.py +98 -0
  196. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/facodec_inference.py +137 -0
  197. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/facodec_trainer.py +776 -0
  198. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/JDC/__init__.py +1 -0
  199. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/JDC/bst.t7 +0 -0
  200. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/JDC/model.py +219 -0
  201. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/attentions.py +437 -0
  202. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/commons.py +331 -0
  203. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/gradient_reversal.py +35 -0
  204. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/layers.py +460 -0
  205. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/quantize.py +741 -0
  206. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/style_encoder.py +110 -0
  207. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/wavenet.py +224 -0
  208. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/optimizer.py +104 -0
  209. xinference/thirdparty/indextts/utils/maskgct/models/codec/kmeans/repcodec_model.py +210 -0
  210. xinference/thirdparty/indextts/utils/maskgct/models/codec/kmeans/vocos.py +850 -0
  211. xinference/thirdparty/indextts/utils/maskgct/models/codec/melvqgan/melspec.py +108 -0
  212. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/README.md +216 -0
  213. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/__init__.py +6 -0
  214. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/__init__.py +5 -0
  215. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/act.py +29 -0
  216. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/filter.py +96 -0
  217. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/resample.py +57 -0
  218. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/facodec.py +1222 -0
  219. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/gradient_reversal.py +35 -0
  220. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/melspec.py +102 -0
  221. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/quantize/__init__.py +7 -0
  222. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/quantize/fvq.py +116 -0
  223. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/quantize/rvq.py +87 -0
  224. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/transformer.py +234 -0
  225. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/model.py +184 -0
  226. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/__init__.py +27 -0
  227. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/conv.py +346 -0
  228. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/lstm.py +46 -0
  229. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/norm.py +37 -0
  230. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/__init__.py +14 -0
  231. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/ac.py +317 -0
  232. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/core_vq.py +388 -0
  233. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/distrib.py +135 -0
  234. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/vq.py +125 -0
  235. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/seanet.py +414 -0
  236. xinference/thirdparty/indextts/utils/maskgct/models/codec/vevo/vevo_repcodec.py +592 -0
  237. xinference/thirdparty/indextts/utils/maskgct/models/tts/maskgct/ckpt/wav2vec2bert_stats.pt +0 -0
  238. xinference/thirdparty/indextts/utils/maskgct/models/tts/maskgct/llama_nar.py +650 -0
  239. xinference/thirdparty/indextts/utils/maskgct/models/tts/maskgct/maskgct_s2a.py +503 -0
  240. xinference/thirdparty/indextts/utils/maskgct_utils.py +259 -0
  241. xinference/thirdparty/indextts/utils/text_utils.py +41 -0
  242. xinference/thirdparty/indextts/utils/typical_sampling.py +30 -0
  243. xinference/thirdparty/indextts/utils/utils.py +93 -0
  244. xinference/thirdparty/indextts/utils/webui_utils.py +42 -0
  245. xinference/thirdparty/indextts/utils/xtransformers.py +1247 -0
  246. xinference/thirdparty/indextts/vqvae/__init__.py +0 -0
  247. xinference/thirdparty/indextts/vqvae/xtts_dvae.py +395 -0
  248. xinference/ui/gradio/media_interface.py +66 -8
  249. xinference/ui/web/ui/build/asset-manifest.json +6 -6
  250. xinference/ui/web/ui/build/index.html +1 -1
  251. xinference/ui/web/ui/build/static/css/main.5ea97072.css +2 -0
  252. xinference/ui/web/ui/build/static/css/main.5ea97072.css.map +1 -0
  253. xinference/ui/web/ui/build/static/js/main.d192c4f3.js +3 -0
  254. xinference/ui/web/ui/build/static/js/{main.1086c759.js.LICENSE.txt → main.d192c4f3.js.LICENSE.txt} +0 -7
  255. xinference/ui/web/ui/build/static/js/main.d192c4f3.js.map +1 -0
  256. xinference/ui/web/ui/node_modules/.cache/babel-loader/089c38df5f52348d212ed868dda5c518a42e0c2762caed4175487c0405830c35.json +1 -0
  257. xinference/ui/web/ui/node_modules/.cache/babel-loader/2b6e3a5b6eb2c5c5f2d007e68cd46c372721cd52bf63508adcdb21ecf79241d8.json +1 -0
  258. xinference/ui/web/ui/node_modules/.cache/babel-loader/2d887825fd07a56f872eda4420da25fba0b5b62a23bdcc6c6da1a5281887f618.json +1 -0
  259. xinference/ui/web/ui/node_modules/.cache/babel-loader/4001f9c3e64e73a4f2158826650c174a59d5e3f89ddecddf17cbb6bb688cc4ca.json +1 -0
  260. xinference/ui/web/ui/node_modules/.cache/babel-loader/4a7018a69e6b7f90fc313248c2aa86f2a8f1eb1db120df586047a8023549b44b.json +1 -0
  261. xinference/ui/web/ui/node_modules/.cache/babel-loader/64b12aaa1c1d1bf53820ada8a63769067c0ccc5aab46b32348eb1917ae7f2a11.json +1 -0
  262. xinference/ui/web/ui/node_modules/.cache/babel-loader/7275b67c78ec76ce38a686bb8a576d8c9cecf54e1573614c84859d538efb9be5.json +1 -0
  263. xinference/ui/web/ui/node_modules/.cache/babel-loader/a68b6ee3b31eadc051fb95ce8f8ccb9c2e8b52c60f290dbab545a1917e065282.json +1 -0
  264. xinference/ui/web/ui/node_modules/.cache/babel-loader/ae8771cc37693feb160fa8727231312a0c54ef2d1d1ca893be568cd70016ca7e.json +1 -0
  265. xinference/ui/web/ui/node_modules/.cache/babel-loader/bb4e8722d2d41d87f1fce3661bc8937bffe9448e231fc5f0462630849e851592.json +1 -0
  266. xinference/ui/web/ui/node_modules/.cache/babel-loader/be6aada1ee4adc2bbf65dbe56d17db32bb3b5478be05d6b527805a8ba6cfb2b9.json +1 -0
  267. xinference/ui/web/ui/node_modules/.cache/babel-loader/de91c352653c233cf0cb6674e6e04049a44fd0e1156560de65d5c4620521391e.json +1 -0
  268. xinference/ui/web/ui/node_modules/.cache/babel-loader/e85f7002fc325c83b9c9cd8a1619e5b3ebc701d30e811afc284b88e6ae710cb5.json +1 -0
  269. xinference/ui/web/ui/node_modules/.cache/babel-loader/e8b603c78944bf3d213639078bfe155ff5c0dfa4048a93cbb967cad6a4eb4ff3.json +1 -0
  270. xinference/ui/web/ui/node_modules/.cache/babel-loader/f05535160a508b2a312de546a6de234776c613db276479ea4253c0b1bdeeb7d6.json +1 -0
  271. xinference/ui/web/ui/node_modules/.cache/babel-loader/f09ba9e11106bd59a0de10cc85c55084097729dcab575f43dfcf07375961ed87.json +1 -0
  272. xinference/ui/web/ui/node_modules/.cache/babel-loader/f995a2425dfb0822fd07127f66ffe9b026883bc156b402eb8bd0b83d52460a93.json +1 -0
  273. xinference/ui/web/ui/node_modules/.package-lock.json +0 -33
  274. xinference/ui/web/ui/package-lock.json +0 -34
  275. xinference/ui/web/ui/package.json +0 -1
  276. xinference/ui/web/ui/src/locales/en.json +9 -3
  277. xinference/ui/web/ui/src/locales/ja.json +9 -3
  278. xinference/ui/web/ui/src/locales/ko.json +9 -3
  279. xinference/ui/web/ui/src/locales/zh.json +9 -3
  280. {xinference-1.10.0.dist-info → xinference-1.10.1.dist-info}/METADATA +18 -2
  281. {xinference-1.10.0.dist-info → xinference-1.10.1.dist-info}/RECORD +285 -67
  282. xinference/ui/web/ui/build/static/css/main.013f296b.css +0 -2
  283. xinference/ui/web/ui/build/static/css/main.013f296b.css.map +0 -1
  284. xinference/ui/web/ui/build/static/js/main.1086c759.js +0 -3
  285. xinference/ui/web/ui/build/static/js/main.1086c759.js.map +0 -1
  286. xinference/ui/web/ui/node_modules/.cache/babel-loader/0b0f77000cc1b482ca091cfbcae511dfe02f08916971645fad21d0b1234d04a2.json +0 -1
  287. xinference/ui/web/ui/node_modules/.cache/babel-loader/1c5f8ff423a7c9202bea60b15680f04b1e9964b445b0da3f86c6ff70cf24e797.json +0 -1
  288. xinference/ui/web/ui/node_modules/.cache/babel-loader/44ce7993e344980e3ed4f13e8f69237d4a5dfc60e37ca6b54f51f8ee1357bd67.json +0 -1
  289. xinference/ui/web/ui/node_modules/.cache/babel-loader/4aec1cc414ac3ebb3481d3d915e4db597d9127de813291346eacb8554ab170d4.json +0 -1
  290. xinference/ui/web/ui/node_modules/.cache/babel-loader/644cfec52f3c57a6e222ce60f112237a1efefe9835efd9aad857a685f53d8eed.json +0 -1
  291. xinference/ui/web/ui/node_modules/.cache/babel-loader/663436f72af53fe0d72394f56d003fa4e0bba489e5bb4e483fd34b00f84637f7.json +0 -1
  292. xinference/ui/web/ui/node_modules/.cache/babel-loader/69db82ca9bfe27fe417cc6cf2b1716b09be9c6f0cd198530f12bfc60e801bbcf.json +0 -1
  293. xinference/ui/web/ui/node_modules/.cache/babel-loader/85087e27618d740c236bf159f30e0219db443ab55f0997388eed5fde6f9e90cc.json +0 -1
  294. xinference/ui/web/ui/node_modules/.cache/babel-loader/88b07838348864aa86c672be3bbca1e9f58f6f3a2881b32070ec27f4e7b449d1.json +0 -1
  295. xinference/ui/web/ui/node_modules/.cache/babel-loader/8b8cd408ccfbe115acef27ccfa5b233da8597131a2a5712add13e1e4d5d4504b.json +0 -1
  296. xinference/ui/web/ui/node_modules/.cache/babel-loader/a23824fe746b9c6ca5eee9159b5764d1ff1653c1d856288c0f75c742bbb0023b.json +0 -1
  297. xinference/ui/web/ui/node_modules/.cache/babel-loader/a3eb18af328280b139693c9092dff2a0ef8c9a967e6c8956ceee0996611f1984.json +0 -1
  298. xinference/ui/web/ui/node_modules/.cache/babel-loader/bc1aacc65a102db325ca61bcd2f681e1ae22c36a1f1d98a6ff5e4ad49dc7544f.json +0 -1
  299. xinference/ui/web/ui/node_modules/.cache/babel-loader/c682fd521747c19dae437d83ce3235a306ce6b68e24a117bc57c27ebb8d1f1ca.json +0 -1
  300. xinference/ui/web/ui/node_modules/.cache/babel-loader/d5c224be7081f18cba1678b7874a9782eba895df004874ff8f243f94ba79942a.json +0 -1
  301. xinference/ui/web/ui/node_modules/.cache/babel-loader/f7f18bfb539b036a6a342176dd98a85df5057a884a8da978d679f2a0264883d0.json +0 -1
  302. xinference/ui/web/ui/node_modules/clipboard/.babelrc.json +0 -11
  303. xinference/ui/web/ui/node_modules/clipboard/.eslintrc.json +0 -24
  304. xinference/ui/web/ui/node_modules/clipboard/.prettierrc.json +0 -9
  305. xinference/ui/web/ui/node_modules/clipboard/bower.json +0 -18
  306. xinference/ui/web/ui/node_modules/clipboard/composer.json +0 -25
  307. xinference/ui/web/ui/node_modules/clipboard/package.json +0 -63
  308. xinference/ui/web/ui/node_modules/delegate/package.json +0 -31
  309. xinference/ui/web/ui/node_modules/good-listener/bower.json +0 -11
  310. xinference/ui/web/ui/node_modules/good-listener/package.json +0 -35
  311. xinference/ui/web/ui/node_modules/select/bower.json +0 -13
  312. xinference/ui/web/ui/node_modules/select/package.json +0 -29
  313. xinference/ui/web/ui/node_modules/tiny-emitter/package.json +0 -53
  314. {xinference-1.10.0.dist-info → xinference-1.10.1.dist-info}/WHEEL +0 -0
  315. {xinference-1.10.0.dist-info → xinference-1.10.1.dist-info}/entry_points.txt +0 -0
  316. {xinference-1.10.0.dist-info → xinference-1.10.1.dist-info}/licenses/LICENSE +0 -0
  317. {xinference-1.10.0.dist-info → xinference-1.10.1.dist-info}/top_level.txt +0 -0
@@ -23,12 +23,27 @@ class DeepseekR1ToolParser(ToolParser):
23
23
  Initialize the DeepSeek R1 tool parser.
24
24
  """
25
25
  super().__init__()
26
+
27
+ # Sentinel tokens for streaming mode
28
+ self.think_start_token: str = "<think>"
29
+ self.think_end_token: str = "</think>"
30
+ self.tool_call_start_token: str = "<|tool▁call▁begin|>"
31
+ self.tool_call_end_token: str = "<|tool▁call▁end|>"
32
+
26
33
  # Regex pattern to match DeepSeek R1 tool call format
27
34
  self.tool_calls_regex = (
28
35
  r"<\|tool▁call▁begin|>function<\|tool▁sep|>([^\n]+)\n"
29
36
  r"```json\n(.*?)\n```<\|tool▁call▁end|>"
30
37
  )
31
38
 
39
+ # Regex pattern to match the entire tool-calls wrapper block.
40
+ # We intentionally do NOT match <think> blocks here so that the
41
+ # "text before" chunk will include both the think block and any
42
+ # narrative text up to the tool calls wrapper, yielding exactly two
43
+ # blocks when there is a single tool calls section:
44
+ # [before_text_including_think, tool_calls_wrapper_block]
45
+ self.content_regex = r"(<\|tool▁calls▁begin|>.*?<\|tool▁calls▁end|>)"
46
+
32
47
  def extract_tool_calls(
33
48
  self, model_output: str
34
49
  ) -> List[Tuple[Optional[str], Optional[str], Optional[dict]]]:
@@ -56,49 +71,96 @@ class DeepseekR1ToolParser(ToolParser):
56
71
  >>> print(result)
57
72
  [(None, 'get_current_weather', {'location': 'Beijing'})]
58
73
  """
59
- matches = re.findall(self.tool_calls_regex, model_output, re.DOTALL)
60
- if not matches:
61
- # No tool calls found, return the original output as content
74
+ # If no tool call tokens, return original output as content
75
+ if self.tool_call_start_token not in model_output:
62
76
  return [(model_output, None, None)]
63
77
 
78
+ # Get all content blocks (text, thinking blocks, tool calls)
79
+ function_calls = self._get_function_calls(model_output)
80
+
64
81
  # Use set for deduplication of identical tool calls
65
82
  tool_calls = set()
66
83
  results: List[Tuple[Optional[str], Optional[str], Optional[dict]]] = []
67
84
 
68
- for func_name, raw_json in matches:
69
- func_and_args = None
70
- try:
71
- # Parse JSON arguments
72
- func_and_args = json.loads(raw_json)
73
- # Create hashable representation for deduplication
74
- arguments_hashable = frozenset(func_and_args.items())
75
- tool_call_tuple = (
76
- None, # No content error
77
- func_name,
78
- func_and_args,
85
+ for content_block in function_calls:
86
+ # Check if this block is a tool call
87
+ if (
88
+ self.tool_call_start_token in content_block
89
+ and self.tool_call_end_token in content_block
90
+ ):
91
+ # Extract function name and arguments from tool call block
92
+ matches = re.findall(self.tool_calls_regex, content_block, re.DOTALL)
93
+ if not matches:
94
+ # Malformed tool call, treat as regular content
95
+ results.append((content_block, None, None))
96
+ continue
97
+
98
+ func_name, raw_json = matches[0] # Take the first match
99
+
100
+ func_and_args = None
101
+ try:
102
+ # Parse JSON arguments
103
+ func_and_args = json.loads(raw_json)
104
+ # Create hashable representation for deduplication
105
+ arguments_hashable = frozenset(func_and_args.items())
106
+ tool_call_tuple = (
107
+ None, # No content error
108
+ func_name,
109
+ func_and_args,
110
+ )
111
+ except Exception as e:
112
+ # JSON parsing failed, treat as raw content
113
+ logger.warning(
114
+ f"Failed to parse tool call JSON: {raw_json}, error: {e}"
115
+ )
116
+ tool_call_tuple = (raw_json, None, None)
117
+ arguments_hashable = None
118
+
119
+ # Create deduplication key
120
+ dedup_key = (
121
+ (func_name, arguments_hashable)
122
+ if func_and_args is not None
123
+ else raw_json
79
124
  )
80
- except Exception as e:
81
- # JSON parsing failed, treat as raw content
82
- logger.warning(
83
- f"Failed to parse tool call JSON: {raw_json}, error: {e}"
84
- )
85
- tool_call_tuple = (raw_json, None, None)
86
- arguments_hashable = None
87
-
88
- # Create deduplication key
89
- dedup_key = (
90
- (func_name, arguments_hashable)
91
- if func_and_args is not None
92
- else raw_json
93
- )
94
125
 
95
- # Add to results if not already seen
96
- if dedup_key not in tool_calls:
97
- tool_calls.add(dedup_key)
98
- results.append(tool_call_tuple)
126
+ # Add to results if not already seen
127
+ if dedup_key not in tool_calls:
128
+ tool_calls.add(dedup_key)
129
+ results.append(tool_call_tuple)
130
+ else:
131
+ # This is regular content (text or thinking block), add as-is
132
+ if content_block.strip(): # Only add non-empty content
133
+ results.append((content_block, None, None))
99
134
 
100
135
  return results
101
136
 
137
+ def _get_function_calls(self, model_output: str) -> List[str]:
138
+ """
139
+ Extract all function calls and content blocks from model output.
140
+
141
+ Parses the model output to separate thinking blocks, tool calls,
142
+ and regular content into individual components.
143
+
144
+ Args:
145
+ model_output (str): The complete model output to parse.
146
+
147
+ Returns:
148
+ List[str]: List of content blocks (text, thinking blocks, tool calls).
149
+ """
150
+ functions_calls = []
151
+ last_end = 0
152
+ for m in re.finditer(self.content_regex, model_output, re.DOTALL):
153
+ # Add any text before the current match
154
+ if m.start() > last_end:
155
+ functions_calls.append(model_output[last_end : m.start()])
156
+ # Add the matched content (think or tool_call block)
157
+ functions_calls.append(m.group(0))
158
+ last_end = m.end()
159
+ # Add any remaining text after the last match
160
+ if last_end < len(model_output):
161
+ functions_calls.append(model_output[last_end:])
162
+ return functions_calls
163
+
102
164
  def extract_tool_calls_streaming(
103
165
  self, previous_text: List[str], current_text: str, delta_text: str
104
166
  ) -> Optional[Any]:
@@ -27,11 +27,19 @@ logger = logging.getLogger(__name__)
27
27
 
28
28
 
29
29
  @register_batching_multimodal_models(
30
- "qwen2-vl-instruct", "qwen2.5-vl-instruct", "QvQ-72B-Preview"
30
+ "qwen2-vl-instruct",
31
+ "qwen2.5-vl-instruct",
32
+ "QvQ-72B-Preview",
33
+ "Qwen3-VL-Instruct",
34
+ "Qwen3-VL-Thinking",
31
35
  )
32
36
  @register_transformer
33
37
  @register_non_default_model(
34
- "qwen2-vl-instruct", "qwen2.5-vl-instruct", "QvQ-72B-Preview"
38
+ "qwen2-vl-instruct",
39
+ "qwen2.5-vl-instruct",
40
+ "QvQ-72B-Preview",
41
+ "Qwen3-VL-Instruct",
42
+ "Qwen3-VL-Thinking",
35
43
  )
36
44
  class Qwen2VLChatModel(PytorchMultiModalModel):
37
45
  def _sanitize_model_config(
@@ -47,7 +55,7 @@ class Qwen2VLChatModel(PytorchMultiModalModel):
47
55
  def match_json(
48
56
  cls, model_family: "LLMFamilyV2", model_spec: "LLMSpecV1", quantization: str
49
57
  ) -> bool:
50
- if model_spec.model_format not in ["pytorch", "gptq", "awq", "bnb"]:
58
+ if model_spec.model_format not in ["pytorch", "gptq", "awq", "bnb", "fp8"]:
51
59
  return False
52
60
  llm_family = model_family.model_family or model_family.model_name
53
61
  if "qwen2-vl-instruct".lower() in llm_family.lower():
@@ -56,6 +64,8 @@ class Qwen2VLChatModel(PytorchMultiModalModel):
56
64
  return True
57
65
  if "qvq-72b-preview".lower() in llm_family.lower():
58
66
  return True
67
+ if "qwen3-vl" in llm_family.lower():
68
+ return True
59
69
  return False
60
70
 
61
71
  def decide_device(self):
@@ -85,13 +95,19 @@ class Qwen2VLChatModel(PytorchMultiModalModel):
85
95
  except ImportError:
86
96
  Qwen2_5_VLForConditionalGeneration = None
87
97
 
98
+ try:
99
+ from transformers import AutoModelForImageTextToText
100
+ except ImportError:
101
+ AutoModelForImageTextToText = None
102
+
88
103
  kwargs = self.apply_bnb_quantization()
89
104
  llm_family = self.model_family.model_family or self.model_family.model_name
90
- model_cls = (
91
- Qwen2_5_VLForConditionalGeneration
92
- if "qwen2.5" in llm_family
93
- else Qwen2VLForConditionalGeneration
94
- )
105
+ if "qwen2.5" in llm_family:
106
+ model_cls = Qwen2_5_VLForConditionalGeneration
107
+ elif "qwen3" in llm_family:
108
+ model_cls = AutoModelForImageTextToText
109
+ else:
110
+ model_cls = Qwen2VLForConditionalGeneration
95
111
  if model_cls is None:
96
112
  raise ImportError("`transformers` version is too old, please upgrade it")
97
113
  device = "auto" if self._device == "cuda" else self._device
@@ -118,6 +134,16 @@ class Qwen2VLChatModel(PytorchMultiModalModel):
118
134
  torch_dtype="float16",
119
135
  **kwargs,
120
136
  ).eval()
137
+ elif device == "mps":
138
+ # MacOS special, see https://github.com/QwenLM/Qwen2.5-VL/issues/761
139
+ self._model = model_cls.from_pretrained(
140
+ self.model_path,
141
+ torch_dtype="bfloat16",
142
+ device_map=device,
143
+ attn_implementation="eager",
144
+ low_cpu_mem_usage=True,
145
+ trust_remote_code=True,
146
+ ).eval()
121
147
  else:
122
148
  self._model = model_cls.from_pretrained(
123
149
  self.model_path,
@@ -71,6 +71,10 @@ QWEN_TOOL_CALL_FAMILY = [
71
71
  "Qwen3-Thinking",
72
72
  "Qwen3-Instruct",
73
73
  "Qwen3-Coder",
74
+ "Qwen3-VL-Instruct",
75
+ "Qwen3-VL-Thinking",
76
+ "Qwen3-Next-Instruct",
77
+ "Qwen3-Next-Thinking",
74
78
  ]
75
79
 
76
80
  GLM4_TOOL_CALL_FAMILY = [
@@ -347,9 +351,7 @@ class ChatModelMixin:
347
351
  assert choices is not None
348
352
  usage = (
349
353
  chunk["usage"]
350
- if choices[0]["finish_reason"] is not None
351
- and reasoning_parser
352
- and reasoning_parser.check_content_parser()
354
+ if choices and choices[0]["finish_reason"] is not None or not choices
353
355
  else None
354
356
  )
355
357
  chat_chunk = {
@@ -798,7 +800,11 @@ class ChatModelMixin:
798
800
  chunk_id=None,
799
801
  previous_texts: List[str] = [""],
800
802
  ):
803
+ if not c.get("choices"):
804
+ return c
801
805
  _id = chunk_id if chunk_id is not None else str(uuid.uuid4())
806
+ tool_result = None
807
+ finish_reason = None
802
808
  if isinstance(self.tool_parser, Glm4ToolParser):
803
809
  tool_result = self.tool_parser.extract_tool_calls_streaming(
804
810
  [],
@@ -851,11 +857,7 @@ class ChatModelMixin:
851
857
  usage = c.get("usage")
852
858
  assert "prompt_tokens" in usage
853
859
  except Exception:
854
- usage = {
855
- "prompt_tokens": -1,
856
- "completion_tokens": -1,
857
- "total_tokens": -1,
858
- }
860
+ usage = None
859
861
  return {
860
862
  "id": "chat" + f"cmpl-{_id}",
861
863
  "model": model_uid,
@@ -1009,7 +1011,8 @@ class ChatModelMixin:
1009
1011
  completion_chunk, self.reasoning_parser, previous_texts
1010
1012
  )
1011
1013
  if (
1012
- "reasoning_content" in chat_chunk["choices"][0]["delta"]
1014
+ chat_chunk["choices"]
1015
+ and "reasoning_content" in chat_chunk["choices"][0]["delta"]
1013
1016
  and chat_chunk["choices"][0]["delta"]["reasoning_content"] is not None
1014
1017
  ):
1015
1018
  yield chat_chunk
@@ -264,6 +264,9 @@ if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.8.4"):
264
264
  if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.8.5"):
265
265
  VLLM_SUPPORTED_CHAT_MODELS.append("qwen3")
266
266
 
267
+ if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.9.0"):
268
+ VLLM_SUPPORTED_CHAT_MODELS.append("Baichuan-M2")
269
+
267
270
  if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.9.1"):
268
271
  VLLM_SUPPORTED_CHAT_MODELS.append("minicpm4")
269
272
 
@@ -282,10 +285,15 @@ if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.10.0"):
282
285
 
283
286
  if VLLM_INSTALLED and VLLM_VERSION > version.parse("0.10.0"):
284
287
  VLLM_SUPPORTED_CHAT_MODELS.append("gpt-oss")
285
- VLLM_SUPPORTED_CHAT_MODELS.append("seed-oss")
286
288
 
287
- if VLLM_INSTALLED and VLLM_VERSION > version.parse("0.10.1.1"):
289
+ if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.10.2"):
288
290
  VLLM_SUPPORTED_CHAT_MODELS.append("seed-oss")
291
+ VLLM_SUPPORTED_CHAT_MODELS.append("Qwen3-Next-Instruct")
292
+ VLLM_SUPPORTED_CHAT_MODELS.append("Qwen3-Next-Thinking")
293
+
294
+ if VLLM_INSTALLED and VLLM_VERSION > version.parse("0.10.2"):
295
+ VLLM_SUPPORTED_VISION_MODEL_LIST.append("Qwen3-VL-Instruct")
296
+ VLLM_SUPPORTED_VISION_MODEL_LIST.append("Qwen3-VL-Instruct")
289
297
 
290
298
 
291
299
  class VLLMModel(LLM):
@@ -934,9 +942,21 @@ class VLLMModel(LLM):
934
942
 
935
943
  async def _get_tokenizer(self, lora_request: Any) -> Any:
936
944
  try:
937
- return await self._engine.get_tokenizer(lora_request) # type: ignore
945
+ # vLLM 0.11.0+ get_tokenizer doesn't accept lora_request parameter
946
+ if (
947
+ VLLM_VERSION >= version.parse("0.11.0")
948
+ or VLLM_VERSION.base_version >= "0.11.0"
949
+ ):
950
+ return await self._engine.get_tokenizer() # type: ignore
951
+ else:
952
+ return await self._engine.get_tokenizer(lora_request) # type: ignore
938
953
  except AttributeError:
939
- return await self._engine.get_tokenizer_async(lora_request) # type: ignore
954
+ # Fallback to get_tokenizer_async for older versions
955
+ try:
956
+ return await self._engine.get_tokenizer_async(lora_request) # type: ignore
957
+ except (AttributeError, TypeError):
958
+ # If all else fails, try without parameters
959
+ return await self._engine.get_tokenizer() # type: ignore
940
960
 
941
961
  def _tokenize(self, tokenizer: Any, prompt: str, config: dict) -> List[int]:
942
962
  truncate_prompt_tokens = config.get("truncate_prompt_tokens")
@@ -1017,23 +1037,65 @@ class VLLMModel(LLM):
1017
1037
  # guided decoding only available for vllm >= 0.6.3
1018
1038
  from vllm.sampling_params import GuidedDecodingParams
1019
1039
 
1020
- guided_options = GuidedDecodingParams.from_optional(
1021
- json=sanitized_generate_config.pop("guided_json", None),
1022
- regex=sanitized_generate_config.pop("guided_regex", None),
1023
- choice=sanitized_generate_config.pop("guided_choice", None),
1024
- grammar=sanitized_generate_config.pop("guided_grammar", None),
1025
- json_object=sanitized_generate_config.pop("guided_json_object", None),
1026
- backend=sanitized_generate_config.pop("guided_decoding_backend", None),
1027
- whitespace_pattern=sanitized_generate_config.pop(
1028
- "guided_whitespace_pattern", None
1029
- ),
1040
+ # Extract guided decoding parameters
1041
+ guided_params: dict[str, Any] = {}
1042
+ guided_json = sanitized_generate_config.pop("guided_json", None)
1043
+ if guided_json:
1044
+ guided_params["json"] = guided_json
1045
+
1046
+ guided_regex = sanitized_generate_config.pop("guided_regex", None)
1047
+ if guided_regex:
1048
+ guided_params["regex"] = guided_regex
1049
+
1050
+ guided_choice = sanitized_generate_config.pop("guided_choice", None)
1051
+ if guided_choice:
1052
+ guided_params["choice"] = guided_choice
1053
+
1054
+ guided_grammar = sanitized_generate_config.pop("guided_grammar", None)
1055
+ if guided_grammar:
1056
+ guided_params["grammar"] = guided_grammar
1057
+
1058
+ guided_json_object = sanitized_generate_config.pop(
1059
+ "guided_json_object", None
1030
1060
  )
1061
+ if guided_json_object:
1062
+ guided_params["json_object"] = guided_json_object
1031
1063
 
1032
- sampling_params = SamplingParams(
1033
- guided_decoding=guided_options, **sanitized_generate_config
1064
+ guided_backend = sanitized_generate_config.pop(
1065
+ "guided_decoding_backend", None
1034
1066
  )
1067
+ if guided_backend:
1068
+ guided_params["_backend"] = guided_backend
1069
+
1070
+ guided_whitespace_pattern = sanitized_generate_config.pop(
1071
+ "guided_whitespace_pattern", None
1072
+ )
1073
+ if guided_whitespace_pattern:
1074
+ guided_params["whitespace_pattern"] = guided_whitespace_pattern
1075
+
1076
+ # Create GuidedDecodingParams if we have any guided parameters
1077
+ guided_options = None
1078
+ if guided_params:
1079
+ try:
1080
+ guided_options = GuidedDecodingParams(**guided_params)
1081
+ except Exception as e:
1082
+ logger.warning(f"Failed to create GuidedDecodingParams: {e}")
1083
+ guided_options = None
1084
+
1085
+ # Use structured_outputs for vLLM >= 0.11.0, guided_decoding for older versions
1086
+ if (
1087
+ VLLM_VERSION >= version.parse("0.11.0")
1088
+ or VLLM_VERSION.base_version >= "0.11.0"
1089
+ ):
1090
+ sampling_params = SamplingParams(
1091
+ structured_outputs=guided_options, **sanitized_generate_config
1092
+ )
1093
+ else:
1094
+ sampling_params = SamplingParams(
1095
+ guided_decoding=guided_options, **sanitized_generate_config
1096
+ )
1035
1097
  else:
1036
- # ignore generate configs
1098
+ # ignore generate configs for older versions
1037
1099
  sanitized_generate_config.pop("guided_json", None)
1038
1100
  sanitized_generate_config.pop("guided_regex", None)
1039
1101
  sanitized_generate_config.pop("guided_choice", None)
@@ -1236,6 +1298,7 @@ class VLLMChatModel(VLLMModel, ChatModelMixin):
1236
1298
  ) -> Dict:
1237
1299
  if not generate_config:
1238
1300
  generate_config = {}
1301
+
1239
1302
  if "reasoning" in getattr(self.model_family, "model_ability", []):
1240
1303
  generate_config.pop("stop", None)
1241
1304
  generate_config.pop("stop_token_ids", None)
@@ -1249,6 +1312,19 @@ class VLLMChatModel(VLLMModel, ChatModelMixin):
1249
1312
  generate_config["stop_token_ids"] = (
1250
1313
  self.model_family.stop_token_ids.copy()
1251
1314
  )
1315
+
1316
+ # if response_format exists,generate guided_json
1317
+ if "response_format" in generate_config:
1318
+ resp_format = generate_config["response_format"]
1319
+ if (
1320
+ isinstance(resp_format, dict)
1321
+ and resp_format.get("type") == "json_schema"
1322
+ and "json_schema" in resp_format
1323
+ ):
1324
+ schema = resp_format["json_schema"].get("schema_")
1325
+ if schema:
1326
+ generate_config["guided_json"] = schema
1327
+
1252
1328
  return generate_config
1253
1329
 
1254
1330
  @staticmethod
@@ -0,0 +1,10 @@
1
+ __version__ = "0.7.4"
2
+ from .core import AudioSignal
3
+ from .core import STFTParams
4
+ from .core import Meter
5
+ from .core import util
6
+ from . import metrics
7
+ from . import data
8
+ from . import ml
9
+ from .data import datasets
10
+ from .data import transforms
@@ -0,0 +1,4 @@
1
+ from . import util
2
+ from .audio_signal import AudioSignal
3
+ from .audio_signal import STFTParams
4
+ from .loudness import Meter