xinference 1.9.1__py3-none-any.whl → 1.10.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (334) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/restful_api.py +400 -3
  3. xinference/client/restful/async_restful_client.py +20 -3
  4. xinference/client/restful/restful_client.py +20 -3
  5. xinference/constants.py +2 -0
  6. xinference/core/supervisor.py +111 -49
  7. xinference/core/worker.py +10 -0
  8. xinference/deploy/cmdline.py +15 -0
  9. xinference/model/audio/core.py +26 -6
  10. xinference/model/audio/indextts2.py +166 -0
  11. xinference/model/audio/kokoro.py +1 -1
  12. xinference/model/audio/kokoro_zh.py +124 -0
  13. xinference/model/audio/model_spec.json +58 -1
  14. xinference/model/embedding/sentence_transformers/core.py +4 -4
  15. xinference/model/embedding/vllm/core.py +7 -1
  16. xinference/model/image/model_spec.json +71 -3
  17. xinference/model/image/stable_diffusion/core.py +13 -4
  18. xinference/model/llm/__init__.py +4 -0
  19. xinference/model/llm/core.py +10 -0
  20. xinference/model/llm/llama_cpp/core.py +1 -0
  21. xinference/model/llm/llm_family.json +503 -21
  22. xinference/model/llm/llm_family.py +1 -0
  23. xinference/model/llm/mlx/core.py +52 -33
  24. xinference/model/llm/sglang/core.py +32 -55
  25. xinference/model/llm/tool_parsers/__init__.py +58 -0
  26. xinference/model/llm/tool_parsers/abstract_tool_parser.py +33 -0
  27. xinference/model/llm/tool_parsers/deepseek_r1_tool_parser.py +190 -0
  28. xinference/model/llm/tool_parsers/deepseek_v3_tool_parser.py +145 -0
  29. xinference/model/llm/tool_parsers/glm4_tool_parser.py +123 -0
  30. xinference/model/llm/tool_parsers/llama3_tool_parser.py +77 -0
  31. xinference/model/llm/tool_parsers/qwen_tool_parser.py +320 -0
  32. xinference/model/llm/transformers/core.py +1 -1
  33. xinference/model/llm/transformers/multimodal/qwen2_vl.py +34 -8
  34. xinference/model/llm/utils.py +138 -53
  35. xinference/model/llm/vllm/core.py +95 -78
  36. xinference/thirdparty/audiotools/__init__.py +10 -0
  37. xinference/thirdparty/audiotools/core/__init__.py +4 -0
  38. xinference/thirdparty/audiotools/core/audio_signal.py +1682 -0
  39. xinference/thirdparty/audiotools/core/display.py +194 -0
  40. xinference/thirdparty/audiotools/core/dsp.py +390 -0
  41. xinference/thirdparty/audiotools/core/effects.py +647 -0
  42. xinference/thirdparty/audiotools/core/ffmpeg.py +211 -0
  43. xinference/thirdparty/audiotools/core/loudness.py +320 -0
  44. xinference/thirdparty/audiotools/core/playback.py +252 -0
  45. xinference/thirdparty/audiotools/core/templates/__init__.py +0 -0
  46. xinference/thirdparty/audiotools/core/templates/headers.html +322 -0
  47. xinference/thirdparty/audiotools/core/templates/pandoc.css +407 -0
  48. xinference/thirdparty/audiotools/core/templates/widget.html +52 -0
  49. xinference/thirdparty/audiotools/core/util.py +671 -0
  50. xinference/thirdparty/audiotools/core/whisper.py +97 -0
  51. xinference/thirdparty/audiotools/data/__init__.py +3 -0
  52. xinference/thirdparty/audiotools/data/datasets.py +517 -0
  53. xinference/thirdparty/audiotools/data/preprocess.py +81 -0
  54. xinference/thirdparty/audiotools/data/transforms.py +1592 -0
  55. xinference/thirdparty/audiotools/metrics/__init__.py +6 -0
  56. xinference/thirdparty/audiotools/metrics/distance.py +131 -0
  57. xinference/thirdparty/audiotools/metrics/quality.py +159 -0
  58. xinference/thirdparty/audiotools/metrics/spectral.py +247 -0
  59. xinference/thirdparty/audiotools/ml/__init__.py +5 -0
  60. xinference/thirdparty/audiotools/ml/accelerator.py +184 -0
  61. xinference/thirdparty/audiotools/ml/decorators.py +440 -0
  62. xinference/thirdparty/audiotools/ml/experiment.py +90 -0
  63. xinference/thirdparty/audiotools/ml/layers/__init__.py +2 -0
  64. xinference/thirdparty/audiotools/ml/layers/base.py +328 -0
  65. xinference/thirdparty/audiotools/ml/layers/spectral_gate.py +127 -0
  66. xinference/thirdparty/audiotools/post.py +140 -0
  67. xinference/thirdparty/audiotools/preference.py +600 -0
  68. xinference/thirdparty/indextts/BigVGAN/ECAPA_TDNN.py +656 -0
  69. xinference/thirdparty/indextts/BigVGAN/__init__.py +0 -0
  70. xinference/thirdparty/indextts/BigVGAN/activations.py +122 -0
  71. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/__init__.py +0 -0
  72. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/.gitignore +1 -0
  73. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/__init__.py +0 -0
  74. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/activation1d.py +76 -0
  75. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/anti_alias_activation.cpp +23 -0
  76. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/anti_alias_activation_cuda.cu +256 -0
  77. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/compat.h +29 -0
  78. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/load.py +121 -0
  79. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/type_shim.h +92 -0
  80. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/torch/__init__.py +6 -0
  81. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/torch/act.py +31 -0
  82. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/torch/filter.py +102 -0
  83. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/torch/resample.py +58 -0
  84. xinference/thirdparty/indextts/BigVGAN/alias_free_torch/__init__.py +6 -0
  85. xinference/thirdparty/indextts/BigVGAN/alias_free_torch/act.py +29 -0
  86. xinference/thirdparty/indextts/BigVGAN/alias_free_torch/filter.py +96 -0
  87. xinference/thirdparty/indextts/BigVGAN/alias_free_torch/resample.py +49 -0
  88. xinference/thirdparty/indextts/BigVGAN/bigvgan.py +534 -0
  89. xinference/thirdparty/indextts/BigVGAN/models.py +451 -0
  90. xinference/thirdparty/indextts/BigVGAN/nnet/CNN.py +546 -0
  91. xinference/thirdparty/indextts/BigVGAN/nnet/__init__.py +0 -0
  92. xinference/thirdparty/indextts/BigVGAN/nnet/linear.py +89 -0
  93. xinference/thirdparty/indextts/BigVGAN/nnet/normalization.py +670 -0
  94. xinference/thirdparty/indextts/BigVGAN/utils.py +101 -0
  95. xinference/thirdparty/indextts/__init__.py +0 -0
  96. xinference/thirdparty/indextts/cli.py +65 -0
  97. xinference/thirdparty/indextts/gpt/__init__.py +0 -0
  98. xinference/thirdparty/indextts/gpt/conformer/__init__.py +0 -0
  99. xinference/thirdparty/indextts/gpt/conformer/attention.py +312 -0
  100. xinference/thirdparty/indextts/gpt/conformer/embedding.py +163 -0
  101. xinference/thirdparty/indextts/gpt/conformer/subsampling.py +348 -0
  102. xinference/thirdparty/indextts/gpt/conformer_encoder.py +520 -0
  103. xinference/thirdparty/indextts/gpt/model.py +713 -0
  104. xinference/thirdparty/indextts/gpt/model_v2.py +747 -0
  105. xinference/thirdparty/indextts/gpt/perceiver.py +317 -0
  106. xinference/thirdparty/indextts/gpt/transformers_beam_search.py +1013 -0
  107. xinference/thirdparty/indextts/gpt/transformers_generation_utils.py +4747 -0
  108. xinference/thirdparty/indextts/gpt/transformers_gpt2.py +1878 -0
  109. xinference/thirdparty/indextts/gpt/transformers_modeling_utils.py +5525 -0
  110. xinference/thirdparty/indextts/infer.py +690 -0
  111. xinference/thirdparty/indextts/infer_v2.py +739 -0
  112. xinference/thirdparty/indextts/s2mel/dac/__init__.py +16 -0
  113. xinference/thirdparty/indextts/s2mel/dac/__main__.py +36 -0
  114. xinference/thirdparty/indextts/s2mel/dac/model/__init__.py +4 -0
  115. xinference/thirdparty/indextts/s2mel/dac/model/base.py +294 -0
  116. xinference/thirdparty/indextts/s2mel/dac/model/dac.py +400 -0
  117. xinference/thirdparty/indextts/s2mel/dac/model/discriminator.py +228 -0
  118. xinference/thirdparty/indextts/s2mel/dac/model/encodec.py +320 -0
  119. xinference/thirdparty/indextts/s2mel/dac/nn/__init__.py +3 -0
  120. xinference/thirdparty/indextts/s2mel/dac/nn/layers.py +33 -0
  121. xinference/thirdparty/indextts/s2mel/dac/nn/loss.py +368 -0
  122. xinference/thirdparty/indextts/s2mel/dac/nn/quantize.py +339 -0
  123. xinference/thirdparty/indextts/s2mel/dac/utils/__init__.py +123 -0
  124. xinference/thirdparty/indextts/s2mel/dac/utils/decode.py +95 -0
  125. xinference/thirdparty/indextts/s2mel/dac/utils/encode.py +94 -0
  126. xinference/thirdparty/indextts/s2mel/hf_utils.py +12 -0
  127. xinference/thirdparty/indextts/s2mel/modules/alias_free_torch/__init__.py +5 -0
  128. xinference/thirdparty/indextts/s2mel/modules/alias_free_torch/act.py +29 -0
  129. xinference/thirdparty/indextts/s2mel/modules/alias_free_torch/filter.py +96 -0
  130. xinference/thirdparty/indextts/s2mel/modules/alias_free_torch/resample.py +57 -0
  131. xinference/thirdparty/indextts/s2mel/modules/audio.py +82 -0
  132. xinference/thirdparty/indextts/s2mel/modules/bigvgan/activations.py +120 -0
  133. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/__init__.py +0 -0
  134. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/activation1d.py +77 -0
  135. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/anti_alias_activation.cpp +23 -0
  136. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/anti_alias_activation_cuda.cu +246 -0
  137. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/compat.h +29 -0
  138. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/load.py +86 -0
  139. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/type_shim.h +92 -0
  140. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/torch/__init__.py +6 -0
  141. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/torch/act.py +30 -0
  142. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/torch/filter.py +101 -0
  143. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/torch/resample.py +58 -0
  144. xinference/thirdparty/indextts/s2mel/modules/bigvgan/bigvgan.py +492 -0
  145. xinference/thirdparty/indextts/s2mel/modules/bigvgan/config.json +63 -0
  146. xinference/thirdparty/indextts/s2mel/modules/bigvgan/env.py +18 -0
  147. xinference/thirdparty/indextts/s2mel/modules/bigvgan/meldataset.py +354 -0
  148. xinference/thirdparty/indextts/s2mel/modules/bigvgan/utils.py +99 -0
  149. xinference/thirdparty/indextts/s2mel/modules/campplus/DTDNN.py +115 -0
  150. xinference/thirdparty/indextts/s2mel/modules/campplus/classifier.py +70 -0
  151. xinference/thirdparty/indextts/s2mel/modules/campplus/layers.py +253 -0
  152. xinference/thirdparty/indextts/s2mel/modules/commons.py +632 -0
  153. xinference/thirdparty/indextts/s2mel/modules/diffusion_transformer.py +257 -0
  154. xinference/thirdparty/indextts/s2mel/modules/encodec.py +292 -0
  155. xinference/thirdparty/indextts/s2mel/modules/flow_matching.py +171 -0
  156. xinference/thirdparty/indextts/s2mel/modules/gpt_fast/generate.py +436 -0
  157. xinference/thirdparty/indextts/s2mel/modules/gpt_fast/model.py +360 -0
  158. xinference/thirdparty/indextts/s2mel/modules/gpt_fast/quantize.py +622 -0
  159. xinference/thirdparty/indextts/s2mel/modules/hifigan/f0_predictor.py +55 -0
  160. xinference/thirdparty/indextts/s2mel/modules/hifigan/generator.py +454 -0
  161. xinference/thirdparty/indextts/s2mel/modules/layers.py +354 -0
  162. xinference/thirdparty/indextts/s2mel/modules/length_regulator.py +141 -0
  163. xinference/thirdparty/indextts/s2mel/modules/openvoice/__init__.py +0 -0
  164. xinference/thirdparty/indextts/s2mel/modules/openvoice/api.py +186 -0
  165. xinference/thirdparty/indextts/s2mel/modules/openvoice/attentions.py +465 -0
  166. xinference/thirdparty/indextts/s2mel/modules/openvoice/checkpoints_v2/converter/config.json +57 -0
  167. xinference/thirdparty/indextts/s2mel/modules/openvoice/commons.py +160 -0
  168. xinference/thirdparty/indextts/s2mel/modules/openvoice/mel_processing.py +183 -0
  169. xinference/thirdparty/indextts/s2mel/modules/openvoice/models.py +499 -0
  170. xinference/thirdparty/indextts/s2mel/modules/openvoice/modules.py +598 -0
  171. xinference/thirdparty/indextts/s2mel/modules/openvoice/openvoice_app.py +275 -0
  172. xinference/thirdparty/indextts/s2mel/modules/openvoice/se_extractor.py +153 -0
  173. xinference/thirdparty/indextts/s2mel/modules/openvoice/transforms.py +209 -0
  174. xinference/thirdparty/indextts/s2mel/modules/openvoice/utils.py +194 -0
  175. xinference/thirdparty/indextts/s2mel/modules/quantize.py +229 -0
  176. xinference/thirdparty/indextts/s2mel/modules/rmvpe.py +631 -0
  177. xinference/thirdparty/indextts/s2mel/modules/vocos/__init__.py +4 -0
  178. xinference/thirdparty/indextts/s2mel/modules/vocos/heads.py +164 -0
  179. xinference/thirdparty/indextts/s2mel/modules/vocos/helpers.py +71 -0
  180. xinference/thirdparty/indextts/s2mel/modules/vocos/loss.py +114 -0
  181. xinference/thirdparty/indextts/s2mel/modules/vocos/models.py +118 -0
  182. xinference/thirdparty/indextts/s2mel/modules/vocos/modules.py +213 -0
  183. xinference/thirdparty/indextts/s2mel/modules/vocos/pretrained.py +51 -0
  184. xinference/thirdparty/indextts/s2mel/modules/vocos/spectral_ops.py +192 -0
  185. xinference/thirdparty/indextts/s2mel/modules/wavenet.py +174 -0
  186. xinference/thirdparty/indextts/s2mel/optimizers.py +96 -0
  187. xinference/thirdparty/indextts/s2mel/wav2vecbert_extract.py +148 -0
  188. xinference/thirdparty/indextts/utils/__init__.py +0 -0
  189. xinference/thirdparty/indextts/utils/arch_util.py +120 -0
  190. xinference/thirdparty/indextts/utils/checkpoint.py +34 -0
  191. xinference/thirdparty/indextts/utils/common.py +121 -0
  192. xinference/thirdparty/indextts/utils/feature_extractors.py +50 -0
  193. xinference/thirdparty/indextts/utils/front.py +536 -0
  194. xinference/thirdparty/indextts/utils/maskgct/models/codec/__init__.py +0 -0
  195. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/codec.py +427 -0
  196. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/__init__.py +11 -0
  197. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/factorized_vector_quantize.py +150 -0
  198. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/lookup_free_quantize.py +77 -0
  199. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/residual_vq.py +177 -0
  200. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/vector_quantize.py +401 -0
  201. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/vocos.py +881 -0
  202. xinference/thirdparty/indextts/utils/maskgct/models/codec/codec_dataset.py +264 -0
  203. xinference/thirdparty/indextts/utils/maskgct/models/codec/codec_inference.py +515 -0
  204. xinference/thirdparty/indextts/utils/maskgct/models/codec/codec_sampler.py +126 -0
  205. xinference/thirdparty/indextts/utils/maskgct/models/codec/codec_trainer.py +166 -0
  206. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/__init__.py +0 -0
  207. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/__init__.py +5 -0
  208. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/act.py +29 -0
  209. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/filter.py +96 -0
  210. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/resample.py +57 -0
  211. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/facodec_dataset.py +98 -0
  212. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/facodec_inference.py +137 -0
  213. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/facodec_trainer.py +776 -0
  214. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/JDC/__init__.py +1 -0
  215. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/JDC/bst.t7 +0 -0
  216. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/JDC/model.py +219 -0
  217. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/attentions.py +437 -0
  218. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/commons.py +331 -0
  219. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/gradient_reversal.py +35 -0
  220. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/layers.py +460 -0
  221. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/quantize.py +741 -0
  222. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/style_encoder.py +110 -0
  223. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/wavenet.py +224 -0
  224. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/optimizer.py +104 -0
  225. xinference/thirdparty/indextts/utils/maskgct/models/codec/kmeans/repcodec_model.py +210 -0
  226. xinference/thirdparty/indextts/utils/maskgct/models/codec/kmeans/vocos.py +850 -0
  227. xinference/thirdparty/indextts/utils/maskgct/models/codec/melvqgan/melspec.py +108 -0
  228. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/README.md +216 -0
  229. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/__init__.py +6 -0
  230. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/__init__.py +5 -0
  231. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/act.py +29 -0
  232. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/filter.py +96 -0
  233. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/resample.py +57 -0
  234. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/facodec.py +1222 -0
  235. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/gradient_reversal.py +35 -0
  236. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/melspec.py +102 -0
  237. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/quantize/__init__.py +7 -0
  238. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/quantize/fvq.py +116 -0
  239. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/quantize/rvq.py +87 -0
  240. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/transformer.py +234 -0
  241. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/model.py +184 -0
  242. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/__init__.py +27 -0
  243. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/conv.py +346 -0
  244. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/lstm.py +46 -0
  245. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/norm.py +37 -0
  246. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/__init__.py +14 -0
  247. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/ac.py +317 -0
  248. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/core_vq.py +388 -0
  249. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/distrib.py +135 -0
  250. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/vq.py +125 -0
  251. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/seanet.py +414 -0
  252. xinference/thirdparty/indextts/utils/maskgct/models/codec/vevo/vevo_repcodec.py +592 -0
  253. xinference/thirdparty/indextts/utils/maskgct/models/tts/maskgct/ckpt/wav2vec2bert_stats.pt +0 -0
  254. xinference/thirdparty/indextts/utils/maskgct/models/tts/maskgct/llama_nar.py +650 -0
  255. xinference/thirdparty/indextts/utils/maskgct/models/tts/maskgct/maskgct_s2a.py +503 -0
  256. xinference/thirdparty/indextts/utils/maskgct_utils.py +259 -0
  257. xinference/thirdparty/indextts/utils/text_utils.py +41 -0
  258. xinference/thirdparty/indextts/utils/typical_sampling.py +30 -0
  259. xinference/thirdparty/indextts/utils/utils.py +93 -0
  260. xinference/thirdparty/indextts/utils/webui_utils.py +42 -0
  261. xinference/thirdparty/indextts/utils/xtransformers.py +1247 -0
  262. xinference/thirdparty/indextts/vqvae/__init__.py +0 -0
  263. xinference/thirdparty/indextts/vqvae/xtts_dvae.py +395 -0
  264. xinference/types.py +105 -2
  265. xinference/ui/gradio/media_interface.py +66 -8
  266. xinference/ui/web/ui/build/asset-manifest.json +6 -6
  267. xinference/ui/web/ui/build/index.html +1 -1
  268. xinference/ui/web/ui/build/static/css/main.5ea97072.css +2 -0
  269. xinference/ui/web/ui/build/static/css/main.5ea97072.css.map +1 -0
  270. xinference/ui/web/ui/build/static/js/main.d192c4f3.js +3 -0
  271. xinference/ui/web/ui/build/static/js/{main.1086c759.js.LICENSE.txt → main.d192c4f3.js.LICENSE.txt} +0 -7
  272. xinference/ui/web/ui/build/static/js/main.d192c4f3.js.map +1 -0
  273. xinference/ui/web/ui/node_modules/.cache/babel-loader/089c38df5f52348d212ed868dda5c518a42e0c2762caed4175487c0405830c35.json +1 -0
  274. xinference/ui/web/ui/node_modules/.cache/babel-loader/2b6e3a5b6eb2c5c5f2d007e68cd46c372721cd52bf63508adcdb21ecf79241d8.json +1 -0
  275. xinference/ui/web/ui/node_modules/.cache/babel-loader/2d887825fd07a56f872eda4420da25fba0b5b62a23bdcc6c6da1a5281887f618.json +1 -0
  276. xinference/ui/web/ui/node_modules/.cache/babel-loader/4001f9c3e64e73a4f2158826650c174a59d5e3f89ddecddf17cbb6bb688cc4ca.json +1 -0
  277. xinference/ui/web/ui/node_modules/.cache/babel-loader/4a7018a69e6b7f90fc313248c2aa86f2a8f1eb1db120df586047a8023549b44b.json +1 -0
  278. xinference/ui/web/ui/node_modules/.cache/babel-loader/64b12aaa1c1d1bf53820ada8a63769067c0ccc5aab46b32348eb1917ae7f2a11.json +1 -0
  279. xinference/ui/web/ui/node_modules/.cache/babel-loader/7275b67c78ec76ce38a686bb8a576d8c9cecf54e1573614c84859d538efb9be5.json +1 -0
  280. xinference/ui/web/ui/node_modules/.cache/babel-loader/a68b6ee3b31eadc051fb95ce8f8ccb9c2e8b52c60f290dbab545a1917e065282.json +1 -0
  281. xinference/ui/web/ui/node_modules/.cache/babel-loader/ae8771cc37693feb160fa8727231312a0c54ef2d1d1ca893be568cd70016ca7e.json +1 -0
  282. xinference/ui/web/ui/node_modules/.cache/babel-loader/bb4e8722d2d41d87f1fce3661bc8937bffe9448e231fc5f0462630849e851592.json +1 -0
  283. xinference/ui/web/ui/node_modules/.cache/babel-loader/be6aada1ee4adc2bbf65dbe56d17db32bb3b5478be05d6b527805a8ba6cfb2b9.json +1 -0
  284. xinference/ui/web/ui/node_modules/.cache/babel-loader/de91c352653c233cf0cb6674e6e04049a44fd0e1156560de65d5c4620521391e.json +1 -0
  285. xinference/ui/web/ui/node_modules/.cache/babel-loader/e85f7002fc325c83b9c9cd8a1619e5b3ebc701d30e811afc284b88e6ae710cb5.json +1 -0
  286. xinference/ui/web/ui/node_modules/.cache/babel-loader/e8b603c78944bf3d213639078bfe155ff5c0dfa4048a93cbb967cad6a4eb4ff3.json +1 -0
  287. xinference/ui/web/ui/node_modules/.cache/babel-loader/f05535160a508b2a312de546a6de234776c613db276479ea4253c0b1bdeeb7d6.json +1 -0
  288. xinference/ui/web/ui/node_modules/.cache/babel-loader/f09ba9e11106bd59a0de10cc85c55084097729dcab575f43dfcf07375961ed87.json +1 -0
  289. xinference/ui/web/ui/node_modules/.cache/babel-loader/f995a2425dfb0822fd07127f66ffe9b026883bc156b402eb8bd0b83d52460a93.json +1 -0
  290. xinference/ui/web/ui/node_modules/.package-lock.json +0 -33
  291. xinference/ui/web/ui/package-lock.json +0 -34
  292. xinference/ui/web/ui/package.json +0 -1
  293. xinference/ui/web/ui/src/locales/en.json +9 -3
  294. xinference/ui/web/ui/src/locales/ja.json +9 -3
  295. xinference/ui/web/ui/src/locales/ko.json +9 -3
  296. xinference/ui/web/ui/src/locales/zh.json +9 -3
  297. {xinference-1.9.1.dist-info → xinference-1.10.1.dist-info}/METADATA +24 -4
  298. {xinference-1.9.1.dist-info → xinference-1.10.1.dist-info}/RECORD +302 -76
  299. xinference/ui/web/ui/build/static/css/main.013f296b.css +0 -2
  300. xinference/ui/web/ui/build/static/css/main.013f296b.css.map +0 -1
  301. xinference/ui/web/ui/build/static/js/main.1086c759.js +0 -3
  302. xinference/ui/web/ui/build/static/js/main.1086c759.js.map +0 -1
  303. xinference/ui/web/ui/node_modules/.cache/babel-loader/0b0f77000cc1b482ca091cfbcae511dfe02f08916971645fad21d0b1234d04a2.json +0 -1
  304. xinference/ui/web/ui/node_modules/.cache/babel-loader/1c5f8ff423a7c9202bea60b15680f04b1e9964b445b0da3f86c6ff70cf24e797.json +0 -1
  305. xinference/ui/web/ui/node_modules/.cache/babel-loader/44ce7993e344980e3ed4f13e8f69237d4a5dfc60e37ca6b54f51f8ee1357bd67.json +0 -1
  306. xinference/ui/web/ui/node_modules/.cache/babel-loader/4aec1cc414ac3ebb3481d3d915e4db597d9127de813291346eacb8554ab170d4.json +0 -1
  307. xinference/ui/web/ui/node_modules/.cache/babel-loader/644cfec52f3c57a6e222ce60f112237a1efefe9835efd9aad857a685f53d8eed.json +0 -1
  308. xinference/ui/web/ui/node_modules/.cache/babel-loader/663436f72af53fe0d72394f56d003fa4e0bba489e5bb4e483fd34b00f84637f7.json +0 -1
  309. xinference/ui/web/ui/node_modules/.cache/babel-loader/69db82ca9bfe27fe417cc6cf2b1716b09be9c6f0cd198530f12bfc60e801bbcf.json +0 -1
  310. xinference/ui/web/ui/node_modules/.cache/babel-loader/85087e27618d740c236bf159f30e0219db443ab55f0997388eed5fde6f9e90cc.json +0 -1
  311. xinference/ui/web/ui/node_modules/.cache/babel-loader/88b07838348864aa86c672be3bbca1e9f58f6f3a2881b32070ec27f4e7b449d1.json +0 -1
  312. xinference/ui/web/ui/node_modules/.cache/babel-loader/8b8cd408ccfbe115acef27ccfa5b233da8597131a2a5712add13e1e4d5d4504b.json +0 -1
  313. xinference/ui/web/ui/node_modules/.cache/babel-loader/a23824fe746b9c6ca5eee9159b5764d1ff1653c1d856288c0f75c742bbb0023b.json +0 -1
  314. xinference/ui/web/ui/node_modules/.cache/babel-loader/a3eb18af328280b139693c9092dff2a0ef8c9a967e6c8956ceee0996611f1984.json +0 -1
  315. xinference/ui/web/ui/node_modules/.cache/babel-loader/bc1aacc65a102db325ca61bcd2f681e1ae22c36a1f1d98a6ff5e4ad49dc7544f.json +0 -1
  316. xinference/ui/web/ui/node_modules/.cache/babel-loader/c682fd521747c19dae437d83ce3235a306ce6b68e24a117bc57c27ebb8d1f1ca.json +0 -1
  317. xinference/ui/web/ui/node_modules/.cache/babel-loader/d5c224be7081f18cba1678b7874a9782eba895df004874ff8f243f94ba79942a.json +0 -1
  318. xinference/ui/web/ui/node_modules/.cache/babel-loader/f7f18bfb539b036a6a342176dd98a85df5057a884a8da978d679f2a0264883d0.json +0 -1
  319. xinference/ui/web/ui/node_modules/clipboard/.babelrc.json +0 -11
  320. xinference/ui/web/ui/node_modules/clipboard/.eslintrc.json +0 -24
  321. xinference/ui/web/ui/node_modules/clipboard/.prettierrc.json +0 -9
  322. xinference/ui/web/ui/node_modules/clipboard/bower.json +0 -18
  323. xinference/ui/web/ui/node_modules/clipboard/composer.json +0 -25
  324. xinference/ui/web/ui/node_modules/clipboard/package.json +0 -63
  325. xinference/ui/web/ui/node_modules/delegate/package.json +0 -31
  326. xinference/ui/web/ui/node_modules/good-listener/bower.json +0 -11
  327. xinference/ui/web/ui/node_modules/good-listener/package.json +0 -35
  328. xinference/ui/web/ui/node_modules/select/bower.json +0 -13
  329. xinference/ui/web/ui/node_modules/select/package.json +0 -29
  330. xinference/ui/web/ui/node_modules/tiny-emitter/package.json +0 -53
  331. {xinference-1.9.1.dist-info → xinference-1.10.1.dist-info}/WHEEL +0 -0
  332. {xinference-1.9.1.dist-info → xinference-1.10.1.dist-info}/entry_points.txt +0 -0
  333. {xinference-1.9.1.dist-info → xinference-1.10.1.dist-info}/licenses/LICENSE +0 -0
  334. {xinference-1.9.1.dist-info → xinference-1.10.1.dist-info}/top_level.txt +0 -0
@@ -27,11 +27,19 @@ logger = logging.getLogger(__name__)
27
27
 
28
28
 
29
29
  @register_batching_multimodal_models(
30
- "qwen2-vl-instruct", "qwen2.5-vl-instruct", "QvQ-72B-Preview"
30
+ "qwen2-vl-instruct",
31
+ "qwen2.5-vl-instruct",
32
+ "QvQ-72B-Preview",
33
+ "Qwen3-VL-Instruct",
34
+ "Qwen3-VL-Thinking",
31
35
  )
32
36
  @register_transformer
33
37
  @register_non_default_model(
34
- "qwen2-vl-instruct", "qwen2.5-vl-instruct", "QvQ-72B-Preview"
38
+ "qwen2-vl-instruct",
39
+ "qwen2.5-vl-instruct",
40
+ "QvQ-72B-Preview",
41
+ "Qwen3-VL-Instruct",
42
+ "Qwen3-VL-Thinking",
35
43
  )
36
44
  class Qwen2VLChatModel(PytorchMultiModalModel):
37
45
  def _sanitize_model_config(
@@ -47,7 +55,7 @@ class Qwen2VLChatModel(PytorchMultiModalModel):
47
55
  def match_json(
48
56
  cls, model_family: "LLMFamilyV2", model_spec: "LLMSpecV1", quantization: str
49
57
  ) -> bool:
50
- if model_spec.model_format not in ["pytorch", "gptq", "awq", "bnb"]:
58
+ if model_spec.model_format not in ["pytorch", "gptq", "awq", "bnb", "fp8"]:
51
59
  return False
52
60
  llm_family = model_family.model_family or model_family.model_name
53
61
  if "qwen2-vl-instruct".lower() in llm_family.lower():
@@ -56,6 +64,8 @@ class Qwen2VLChatModel(PytorchMultiModalModel):
56
64
  return True
57
65
  if "qvq-72b-preview".lower() in llm_family.lower():
58
66
  return True
67
+ if "qwen3-vl" in llm_family.lower():
68
+ return True
59
69
  return False
60
70
 
61
71
  def decide_device(self):
@@ -85,13 +95,19 @@ class Qwen2VLChatModel(PytorchMultiModalModel):
85
95
  except ImportError:
86
96
  Qwen2_5_VLForConditionalGeneration = None
87
97
 
98
+ try:
99
+ from transformers import AutoModelForImageTextToText
100
+ except ImportError:
101
+ AutoModelForImageTextToText = None
102
+
88
103
  kwargs = self.apply_bnb_quantization()
89
104
  llm_family = self.model_family.model_family or self.model_family.model_name
90
- model_cls = (
91
- Qwen2_5_VLForConditionalGeneration
92
- if "qwen2.5" in llm_family
93
- else Qwen2VLForConditionalGeneration
94
- )
105
+ if "qwen2.5" in llm_family:
106
+ model_cls = Qwen2_5_VLForConditionalGeneration
107
+ elif "qwen3" in llm_family:
108
+ model_cls = AutoModelForImageTextToText
109
+ else:
110
+ model_cls = Qwen2VLForConditionalGeneration
95
111
  if model_cls is None:
96
112
  raise ImportError("`transformers` version is too old, please upgrade it")
97
113
  device = "auto" if self._device == "cuda" else self._device
@@ -118,6 +134,16 @@ class Qwen2VLChatModel(PytorchMultiModalModel):
118
134
  torch_dtype="float16",
119
135
  **kwargs,
120
136
  ).eval()
137
+ elif device == "mps":
138
+ # MacOS special, see https://github.com/QwenLM/Qwen2.5-VL/issues/761
139
+ self._model = model_cls.from_pretrained(
140
+ self.model_path,
141
+ torch_dtype="bfloat16",
142
+ device_map=device,
143
+ attn_implementation="eager",
144
+ low_cpu_mem_usage=True,
145
+ trust_remote_code=True,
146
+ ).eval()
121
147
  else:
122
148
  self._model = model_cls.from_pretrained(
123
149
  self.model_path,
@@ -51,6 +51,7 @@ from ...types import (
51
51
  )
52
52
  from .core import chat_context_var
53
53
  from .reasoning_parser import ReasoningParser
54
+ from .tool_parsers.glm4_tool_parser import Glm4ToolParser
54
55
 
55
56
  logger = logging.getLogger(__name__)
56
57
 
@@ -70,6 +71,10 @@ QWEN_TOOL_CALL_FAMILY = [
70
71
  "Qwen3-Thinking",
71
72
  "Qwen3-Instruct",
72
73
  "Qwen3-Coder",
74
+ "Qwen3-VL-Instruct",
75
+ "Qwen3-VL-Thinking",
76
+ "Qwen3-Next-Instruct",
77
+ "Qwen3-Next-Thinking",
73
78
  ]
74
79
 
75
80
  GLM4_TOOL_CALL_FAMILY = [
@@ -95,6 +100,13 @@ QWEN_TOOL_CALL_SYMBOLS = ["<tool_call>", "</tool_call>"]
95
100
 
96
101
 
97
102
  class ChatModelMixin:
103
+
104
+ def __init__(self):
105
+ self.model_family = None
106
+ self.model_uid = None
107
+ self.reasoning_parser = None
108
+ self.tool_parser = None
109
+
98
110
  @staticmethod
99
111
  @functools.lru_cache
100
112
  def _compile_jinja_template(chat_template):
@@ -339,9 +351,7 @@ class ChatModelMixin:
339
351
  assert choices is not None
340
352
  usage = (
341
353
  chunk["usage"]
342
- if choices[0]["finish_reason"] is not None
343
- and reasoning_parser
344
- and reasoning_parser.check_content_parser()
354
+ if choices and choices[0]["finish_reason"] is not None or not choices
345
355
  else None
346
356
  )
347
357
  chat_chunk = {
@@ -590,16 +600,41 @@ class ChatModelMixin:
590
600
  pos2 = content.find(QWEN_TOOL_CALL_SYMBOLS[1])
591
601
  if pos2 != -1:
592
602
  content = content[:pos2]
603
+
604
+ # Skip empty content after extraction
605
+ if not content.strip():
606
+ continue
607
+
593
608
  try:
594
609
  res = json.loads(content, strict=False)
595
- results.append((None, res["name"], res["arguments"]))
596
- except Exception as e:
610
+ if isinstance(res, dict):
611
+ # Check if required fields exist
612
+ if "name" in res and "arguments" in res:
613
+ results.append((None, res["name"], res["arguments"]))
614
+ else:
615
+ logger.warning(
616
+ "Missing required fields in qwen tool call: %s", content
617
+ )
618
+ results.append((content, None, None))
619
+ else:
620
+ logger.warning(
621
+ "Qwen tool call result is not a dict: %s", content
622
+ )
623
+ results.append((content, None, None))
624
+ except json.JSONDecodeError as e:
597
625
  logger.error(
598
626
  "Can't parse single qwen tool call output: %s. Error: %s",
599
627
  content,
600
628
  e,
601
629
  )
602
630
  results.append((content, None, None))
631
+ except Exception as e:
632
+ logger.error(
633
+ "Unexpected error parsing qwen tool call: %s. Error: %s",
634
+ content,
635
+ e,
636
+ )
637
+ results.append((content, None, None))
603
638
  return results
604
639
 
605
640
  @classmethod
@@ -757,47 +792,64 @@ class ChatModelMixin:
757
792
  logger.debug(f"Tool call content: {result}")
758
793
  return result
759
794
 
760
- @classmethod
761
795
  def _post_process_completion_chunk(
762
- cls,
796
+ self,
763
797
  model_family,
764
798
  model_uid,
765
799
  c,
766
800
  chunk_id=None,
767
- reasoning_parser: Optional[ReasoningParser] = None,
768
- tool_call_text: Optional[str] = None,
801
+ previous_texts: List[str] = [""],
769
802
  ):
803
+ if not c.get("choices"):
804
+ return c
770
805
  _id = chunk_id if chunk_id is not None else str(uuid.uuid4())
771
- tool_result = cls._eval_tool_arguments(model_family, c, tool_call_text)
806
+ tool_result = None
807
+ finish_reason = None
808
+ if isinstance(self.tool_parser, Glm4ToolParser):
809
+ tool_result = self.tool_parser.extract_tool_calls_streaming(
810
+ [],
811
+ c,
812
+ c,
813
+ )
814
+ else:
815
+ finish_reason = c["choices"][0]["finish_reason"]
816
+ delta_text = c["choices"][0]["delta"]["content"]
817
+ current_text = (
818
+ previous_texts[-1] + delta_text if previous_texts else delta_text
819
+ )
820
+ tool_result = self.tool_parser.extract_tool_calls_streaming(
821
+ previous_texts,
822
+ current_text,
823
+ delta_text,
824
+ )
825
+ previous_texts[-1] = current_text
826
+ if tool_result is None and not finish_reason:
827
+ return None
772
828
  tool_calls = []
773
829
  failed_contents = []
774
- for content, func, args in tool_result:
775
- if func:
776
- tool_calls.append(
777
- {
778
- "index": 0,
779
- "id": f"call_{_id}",
780
- "type": "function",
781
- "function": {
782
- "name": func,
783
- "arguments": json.dumps(args, ensure_ascii=False),
784
- },
785
- }
786
- )
787
- else:
788
- failed_contents.append(content)
789
- finish_reason = "tool_calls" if tool_calls else "stop"
830
+ content, func, args = tool_result if tool_result else ("", None, None)
831
+ if func:
832
+ tool_calls.append(
833
+ {
834
+ "index": 0,
835
+ "id": f"call_{_id}",
836
+ "type": "function",
837
+ "function": {
838
+ "name": func,
839
+ "arguments": json.dumps(args, ensure_ascii=False),
840
+ },
841
+ }
842
+ )
843
+ else:
844
+ failed_contents.append(content)
790
845
 
791
- content = "".join(failed_contents) if failed_contents else None
846
+ finish_reason = "tool_calls" if tool_calls else finish_reason
792
847
 
793
- # fix: qwen tool_call content field return null
794
- family = model_family.model_family or model_family.model_name
795
- if tool_calls and family in QWEN_TOOL_CALL_FAMILY and content is None:
796
- content = ""
848
+ content = "".join(failed_contents) if failed_contents else None
797
849
 
798
850
  d = {
799
851
  "role": "assistant",
800
- "content": content,
852
+ "content": content if content else "",
801
853
  "tool_calls": tool_calls,
802
854
  }
803
855
 
@@ -805,11 +857,7 @@ class ChatModelMixin:
805
857
  usage = c.get("usage")
806
858
  assert "prompt_tokens" in usage
807
859
  except Exception:
808
- usage = {
809
- "prompt_tokens": -1,
810
- "completion_tokens": -1,
811
- "total_tokens": -1,
812
- }
860
+ usage = None
813
861
  return {
814
862
  "id": "chat" + f"cmpl-{_id}",
815
863
  "model": model_uid,
@@ -826,29 +874,32 @@ class ChatModelMixin:
826
874
  "usage": usage,
827
875
  }
828
876
 
829
- @classmethod
830
877
  def _post_process_completion(
831
- cls,
878
+ self,
832
879
  model_family,
833
880
  model_uid,
834
881
  c,
835
- reasoning_parser: Optional[ReasoningParser] = None,
836
882
  ):
837
- if reasoning_parser:
838
- c = reasoning_parser.prepare_reasoning_content(c)
883
+ if not self.tool_parser:
884
+ return self._get_final_chat_completion_chunk(c)
885
+ if self.reasoning_parser:
886
+ c = self.reasoning_parser.prepare_reasoning_content(c)
839
887
  _id = str(uuid.uuid4())
840
888
  reasoning_content = None
841
- if reasoning_parser and reasoning_parser.check_content_parser():
889
+ if self.reasoning_parser and self.reasoning_parser.check_content_parser():
842
890
  text = c["choices"][0]["text"]
843
- reasoning_content, content = reasoning_parser.extract_reasoning_content(
844
- text
891
+ reasoning_content, content = (
892
+ self.reasoning_parser.extract_reasoning_content(text)
845
893
  )
846
894
  c["choices"][0]["text"] = content
847
895
 
848
- tool_result = cls._eval_tool_arguments(model_family, c)
849
-
850
896
  tool_calls = []
851
897
  failed_contents = []
898
+ if isinstance(self.tool_parser, Glm4ToolParser):
899
+ tool_result = self.tool_parser.extract_tool_calls(c)
900
+ else:
901
+ text = c["choices"][0]["text"]
902
+ tool_result = self.tool_parser.extract_tool_calls(text)
852
903
  for content, func, args in tool_result:
853
904
  if func:
854
905
  tool_calls.append(
@@ -868,14 +919,9 @@ class ChatModelMixin:
868
919
 
869
920
  content = "".join(failed_contents) if failed_contents else None
870
921
 
871
- # fix: qwen tool_call content field return null
872
- family = model_family.model_family or model_family.model_name
873
- if tool_calls and family in QWEN_TOOL_CALL_FAMILY and content is None:
874
- content = ""
875
-
876
922
  m = {
877
923
  "role": "assistant",
878
- "content": content,
924
+ "content": content if content else "",
879
925
  "tool_calls": tool_calls,
880
926
  }
881
927
  # add only reasoning_content is None
@@ -943,6 +989,45 @@ class ChatModelMixin:
943
989
 
944
990
  return transformed_messages
945
991
 
992
+ async def _async_to_tool_completion_chunks(
993
+ self,
994
+ chunks: AsyncGenerator[CompletionChunk, None],
995
+ ctx: Optional[Dict[str, Any]] = None,
996
+ ) -> AsyncGenerator[ChatCompletionChunk, None]:
997
+ def set_context():
998
+ if ctx:
999
+ chat_context_var.set(ctx)
1000
+
1001
+ i = 0
1002
+ previous_texts = [""]
1003
+ previous_tools_texts = [""]
1004
+ full_text = ""
1005
+ if self.reasoning_parser:
1006
+ set_context()
1007
+ chunks = self.reasoning_parser.prepare_reasoning_content_streaming(chunks)
1008
+ async for completion_chunk in chunks:
1009
+ set_context()
1010
+ chat_chunk = self._to_chat_completion_chunk(
1011
+ completion_chunk, self.reasoning_parser, previous_texts
1012
+ )
1013
+ if (
1014
+ chat_chunk["choices"]
1015
+ and "reasoning_content" in chat_chunk["choices"][0]["delta"]
1016
+ and chat_chunk["choices"][0]["delta"]["reasoning_content"] is not None
1017
+ ):
1018
+ yield chat_chunk
1019
+ continue
1020
+ processed_chunk = self._post_process_completion_chunk(
1021
+ self.model_family,
1022
+ self.model_uid,
1023
+ chat_chunk,
1024
+ previous_texts=previous_tools_texts,
1025
+ )
1026
+ if processed_chunk:
1027
+ yield processed_chunk
1028
+ i += 1
1029
+ logger.debug("Chat finished, output: %s", full_text)
1030
+
946
1031
 
947
1032
  def get_model_version(
948
1033
  model_name: str,
@@ -264,6 +264,9 @@ if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.8.4"):
264
264
  if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.8.5"):
265
265
  VLLM_SUPPORTED_CHAT_MODELS.append("qwen3")
266
266
 
267
+ if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.9.0"):
268
+ VLLM_SUPPORTED_CHAT_MODELS.append("Baichuan-M2")
269
+
267
270
  if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.9.1"):
268
271
  VLLM_SUPPORTED_CHAT_MODELS.append("minicpm4")
269
272
 
@@ -282,10 +285,15 @@ if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.10.0"):
282
285
 
283
286
  if VLLM_INSTALLED and VLLM_VERSION > version.parse("0.10.0"):
284
287
  VLLM_SUPPORTED_CHAT_MODELS.append("gpt-oss")
285
- VLLM_SUPPORTED_CHAT_MODELS.append("seed-oss")
286
288
 
287
- if VLLM_INSTALLED and VLLM_VERSION > version.parse("0.10.1.1"):
289
+ if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.10.2"):
288
290
  VLLM_SUPPORTED_CHAT_MODELS.append("seed-oss")
291
+ VLLM_SUPPORTED_CHAT_MODELS.append("Qwen3-Next-Instruct")
292
+ VLLM_SUPPORTED_CHAT_MODELS.append("Qwen3-Next-Thinking")
293
+
294
+ if VLLM_INSTALLED and VLLM_VERSION > version.parse("0.10.2"):
295
+ VLLM_SUPPORTED_VISION_MODEL_LIST.append("Qwen3-VL-Instruct")
296
+ VLLM_SUPPORTED_VISION_MODEL_LIST.append("Qwen3-VL-Instruct")
289
297
 
290
298
 
291
299
  class VLLMModel(LLM):
@@ -393,6 +401,7 @@ class VLLMModel(LLM):
393
401
  self.prepare_parse_reasoning_content(
394
402
  reasoning_content, enable_thinking=enable_thinking
395
403
  )
404
+ self.prepare_parse_tool_calls()
396
405
 
397
406
  if (
398
407
  isinstance(self.model_spec, LlamaCppLLMSpecV2)
@@ -773,7 +782,6 @@ class VLLMModel(LLM):
773
782
  sanitized = VLLMGenerateConfig()
774
783
 
775
784
  response_format = generate_config.pop("response_format", None)
776
- guided_decoding_backend = generate_config.get("guided_decoding_backend", None)
777
785
  guided_json_object = None
778
786
  guided_json = None
779
787
 
@@ -784,8 +792,6 @@ class VLLMModel(LLM):
784
792
  json_schema = response_format.get("json_schema")
785
793
  assert json_schema is not None
786
794
  guided_json = json_schema.get("json_schema")
787
- if guided_decoding_backend is None:
788
- guided_decoding_backend = "outlines"
789
795
 
790
796
  sanitized.setdefault("lora_name", generate_config.get("lora_name", None))
791
797
  sanitized.setdefault("n", generate_config.get("n", 1))
@@ -833,10 +839,6 @@ class VLLMModel(LLM):
833
839
  "guided_json_object",
834
840
  generate_config.get("guided_json_object", guided_json_object),
835
841
  )
836
- sanitized.setdefault(
837
- "guided_decoding_backend",
838
- generate_config.get("guided_decoding_backend", guided_decoding_backend),
839
- )
840
842
 
841
843
  return sanitized
842
844
 
@@ -940,9 +942,21 @@ class VLLMModel(LLM):
940
942
 
941
943
  async def _get_tokenizer(self, lora_request: Any) -> Any:
942
944
  try:
943
- return await self._engine.get_tokenizer(lora_request) # type: ignore
945
+ # vLLM 0.11.0+ get_tokenizer doesn't accept lora_request parameter
946
+ if (
947
+ VLLM_VERSION >= version.parse("0.11.0")
948
+ or VLLM_VERSION.base_version >= "0.11.0"
949
+ ):
950
+ return await self._engine.get_tokenizer() # type: ignore
951
+ else:
952
+ return await self._engine.get_tokenizer(lora_request) # type: ignore
944
953
  except AttributeError:
945
- return await self._engine.get_tokenizer_async(lora_request) # type: ignore
954
+ # Fallback to get_tokenizer_async for older versions
955
+ try:
956
+ return await self._engine.get_tokenizer_async(lora_request) # type: ignore
957
+ except (AttributeError, TypeError):
958
+ # If all else fails, try without parameters
959
+ return await self._engine.get_tokenizer() # type: ignore
946
960
 
947
961
  def _tokenize(self, tokenizer: Any, prompt: str, config: dict) -> List[int]:
948
962
  truncate_prompt_tokens = config.get("truncate_prompt_tokens")
@@ -1023,23 +1037,65 @@ class VLLMModel(LLM):
1023
1037
  # guided decoding only available for vllm >= 0.6.3
1024
1038
  from vllm.sampling_params import GuidedDecodingParams
1025
1039
 
1026
- guided_options = GuidedDecodingParams.from_optional(
1027
- json=sanitized_generate_config.pop("guided_json", None),
1028
- regex=sanitized_generate_config.pop("guided_regex", None),
1029
- choice=sanitized_generate_config.pop("guided_choice", None),
1030
- grammar=sanitized_generate_config.pop("guided_grammar", None),
1031
- json_object=sanitized_generate_config.pop("guided_json_object", None),
1032
- backend=sanitized_generate_config.pop("guided_decoding_backend", None),
1033
- whitespace_pattern=sanitized_generate_config.pop(
1034
- "guided_whitespace_pattern", None
1035
- ),
1040
+ # Extract guided decoding parameters
1041
+ guided_params: dict[str, Any] = {}
1042
+ guided_json = sanitized_generate_config.pop("guided_json", None)
1043
+ if guided_json:
1044
+ guided_params["json"] = guided_json
1045
+
1046
+ guided_regex = sanitized_generate_config.pop("guided_regex", None)
1047
+ if guided_regex:
1048
+ guided_params["regex"] = guided_regex
1049
+
1050
+ guided_choice = sanitized_generate_config.pop("guided_choice", None)
1051
+ if guided_choice:
1052
+ guided_params["choice"] = guided_choice
1053
+
1054
+ guided_grammar = sanitized_generate_config.pop("guided_grammar", None)
1055
+ if guided_grammar:
1056
+ guided_params["grammar"] = guided_grammar
1057
+
1058
+ guided_json_object = sanitized_generate_config.pop(
1059
+ "guided_json_object", None
1036
1060
  )
1061
+ if guided_json_object:
1062
+ guided_params["json_object"] = guided_json_object
1037
1063
 
1038
- sampling_params = SamplingParams(
1039
- guided_decoding=guided_options, **sanitized_generate_config
1064
+ guided_backend = sanitized_generate_config.pop(
1065
+ "guided_decoding_backend", None
1040
1066
  )
1067
+ if guided_backend:
1068
+ guided_params["_backend"] = guided_backend
1069
+
1070
+ guided_whitespace_pattern = sanitized_generate_config.pop(
1071
+ "guided_whitespace_pattern", None
1072
+ )
1073
+ if guided_whitespace_pattern:
1074
+ guided_params["whitespace_pattern"] = guided_whitespace_pattern
1075
+
1076
+ # Create GuidedDecodingParams if we have any guided parameters
1077
+ guided_options = None
1078
+ if guided_params:
1079
+ try:
1080
+ guided_options = GuidedDecodingParams(**guided_params)
1081
+ except Exception as e:
1082
+ logger.warning(f"Failed to create GuidedDecodingParams: {e}")
1083
+ guided_options = None
1084
+
1085
+ # Use structured_outputs for vLLM >= 0.11.0, guided_decoding for older versions
1086
+ if (
1087
+ VLLM_VERSION >= version.parse("0.11.0")
1088
+ or VLLM_VERSION.base_version >= "0.11.0"
1089
+ ):
1090
+ sampling_params = SamplingParams(
1091
+ structured_outputs=guided_options, **sanitized_generate_config
1092
+ )
1093
+ else:
1094
+ sampling_params = SamplingParams(
1095
+ guided_decoding=guided_options, **sanitized_generate_config
1096
+ )
1041
1097
  else:
1042
- # ignore generate configs
1098
+ # ignore generate configs for older versions
1043
1099
  sanitized_generate_config.pop("guided_json", None)
1044
1100
  sanitized_generate_config.pop("guided_regex", None)
1045
1101
  sanitized_generate_config.pop("guided_choice", None)
@@ -1242,6 +1298,7 @@ class VLLMChatModel(VLLMModel, ChatModelMixin):
1242
1298
  ) -> Dict:
1243
1299
  if not generate_config:
1244
1300
  generate_config = {}
1301
+
1245
1302
  if "reasoning" in getattr(self.model_family, "model_ability", []):
1246
1303
  generate_config.pop("stop", None)
1247
1304
  generate_config.pop("stop_token_ids", None)
@@ -1255,6 +1312,19 @@ class VLLMChatModel(VLLMModel, ChatModelMixin):
1255
1312
  generate_config["stop_token_ids"] = (
1256
1313
  self.model_family.stop_token_ids.copy()
1257
1314
  )
1315
+
1316
+ # if response_format exists,generate guided_json
1317
+ if "response_format" in generate_config:
1318
+ resp_format = generate_config["response_format"]
1319
+ if (
1320
+ isinstance(resp_format, dict)
1321
+ and resp_format.get("type") == "json_schema"
1322
+ and "json_schema" in resp_format
1323
+ ):
1324
+ schema = resp_format["json_schema"].get("schema_")
1325
+ if schema:
1326
+ generate_config["guided_json"] = schema
1327
+
1258
1328
  return generate_config
1259
1329
 
1260
1330
  @staticmethod
@@ -1291,59 +1361,6 @@ class VLLMChatModel(VLLMModel, ChatModelMixin):
1291
1361
 
1292
1362
  return processed_messages
1293
1363
 
1294
- async def _async_to_tool_completion_chunks(
1295
- self,
1296
- chunks: AsyncGenerator[CompletionChunk, None],
1297
- ctx: Optional[Dict[str, Any]] = {},
1298
- ) -> AsyncGenerator[ChatCompletionChunk, None]:
1299
- def set_context():
1300
- if ctx:
1301
- chat_context_var.set(ctx)
1302
-
1303
- i = 0
1304
- previous_texts = [""]
1305
- tool_call = False
1306
- tool_call_texts = [""]
1307
- full_text = ""
1308
- if self.reasoning_parser:
1309
- set_context()
1310
- chunks = self.reasoning_parser.prepare_reasoning_content_streaming(chunks)
1311
- async for chunk in chunks:
1312
- set_context()
1313
- if i == 0:
1314
- for first_chunk in self._get_first_chat_completion_chunk(
1315
- chunk, self.reasoning_parser
1316
- ):
1317
- yield first_chunk
1318
- # usage
1319
- choices = chunk.get("choices")
1320
- if not choices:
1321
- yield self._get_final_chat_completion_chunk(chunk)
1322
- else:
1323
- full_text += chunk["choices"][0]["text"]
1324
- if self.is_tool_call_chunk_start(chunk):
1325
- tool_call = True
1326
- if tool_call:
1327
- tool_call_text = tool_call_texts[-1]
1328
- tool_call_text += chunk["choices"][0]["text"]
1329
- tool_call_texts.append(tool_call_text)
1330
- if self.is_tool_call_chunk_end(chunk):
1331
- yield self._post_process_completion_chunk(
1332
- self.model_family,
1333
- self.model_uid,
1334
- chunk,
1335
- reasoning_parser=self.reasoning_parser,
1336
- tool_call_text=tool_call_text,
1337
- )
1338
- tool_call = False
1339
- tool_call_texts = [""]
1340
- else:
1341
- yield self._to_chat_completion_chunk(
1342
- chunk, self.reasoning_parser, previous_texts
1343
- )
1344
- i += 1
1345
- logger.debug("Chat finished, output: %s", full_text)
1346
-
1347
1364
  @vllm_check
1348
1365
  async def async_chat(
1349
1366
  self,
@@ -1408,7 +1425,7 @@ class VLLMChatModel(VLLMModel, ChatModelMixin):
1408
1425
  assert not isinstance(c, AsyncGenerator)
1409
1426
  if tools:
1410
1427
  return self._post_process_completion(
1411
- self.model_family, self.model_uid, c, self.reasoning_parser
1428
+ self.model_family, self.model_uid, c
1412
1429
  )
1413
1430
  return self._to_chat_completion(c, self.reasoning_parser)
1414
1431
 
@@ -0,0 +1,10 @@
1
+ __version__ = "0.7.4"
2
+ from .core import AudioSignal
3
+ from .core import STFTParams
4
+ from .core import Meter
5
+ from .core import util
6
+ from . import metrics
7
+ from . import data
8
+ from . import ml
9
+ from .data import datasets
10
+ from .data import transforms
@@ -0,0 +1,4 @@
1
+ from . import util
2
+ from .audio_signal import AudioSignal
3
+ from .audio_signal import STFTParams
4
+ from .loudness import Meter