xinference 1.10.0__py3-none-any.whl → 1.11.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (328) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/restful_api.py +473 -31
  3. xinference/client/restful/async_restful_client.py +178 -8
  4. xinference/client/restful/restful_client.py +151 -3
  5. xinference/core/supervisor.py +99 -53
  6. xinference/core/worker.py +10 -0
  7. xinference/deploy/cmdline.py +15 -0
  8. xinference/model/audio/core.py +21 -6
  9. xinference/model/audio/indextts2.py +166 -0
  10. xinference/model/audio/model_spec.json +58 -21
  11. xinference/model/image/model_spec.json +159 -90
  12. xinference/model/image/stable_diffusion/core.py +13 -4
  13. xinference/model/llm/__init__.py +6 -2
  14. xinference/model/llm/llm_family.json +1299 -174
  15. xinference/model/llm/mlx/distributed_models/core.py +41 -0
  16. xinference/model/llm/mlx/distributed_models/qwen2.py +1 -2
  17. xinference/model/llm/sglang/core.py +44 -11
  18. xinference/model/llm/tool_parsers/deepseek_r1_tool_parser.py +94 -32
  19. xinference/model/llm/tool_parsers/qwen_tool_parser.py +29 -4
  20. xinference/model/llm/transformers/chatglm.py +3 -0
  21. xinference/model/llm/transformers/core.py +129 -36
  22. xinference/model/llm/transformers/multimodal/minicpmv45.py +340 -0
  23. xinference/model/llm/transformers/multimodal/qwen2_vl.py +34 -8
  24. xinference/model/llm/transformers/utils.py +23 -0
  25. xinference/model/llm/utils.py +48 -32
  26. xinference/model/llm/vllm/core.py +207 -72
  27. xinference/model/utils.py +74 -31
  28. xinference/thirdparty/audiotools/__init__.py +10 -0
  29. xinference/thirdparty/audiotools/core/__init__.py +4 -0
  30. xinference/thirdparty/audiotools/core/audio_signal.py +1682 -0
  31. xinference/thirdparty/audiotools/core/display.py +194 -0
  32. xinference/thirdparty/audiotools/core/dsp.py +390 -0
  33. xinference/thirdparty/audiotools/core/effects.py +647 -0
  34. xinference/thirdparty/audiotools/core/ffmpeg.py +211 -0
  35. xinference/thirdparty/audiotools/core/loudness.py +320 -0
  36. xinference/thirdparty/audiotools/core/playback.py +252 -0
  37. xinference/thirdparty/audiotools/core/templates/__init__.py +0 -0
  38. xinference/thirdparty/audiotools/core/templates/headers.html +322 -0
  39. xinference/thirdparty/audiotools/core/templates/pandoc.css +407 -0
  40. xinference/thirdparty/audiotools/core/templates/widget.html +52 -0
  41. xinference/thirdparty/audiotools/core/util.py +671 -0
  42. xinference/thirdparty/audiotools/core/whisper.py +97 -0
  43. xinference/thirdparty/audiotools/data/__init__.py +3 -0
  44. xinference/thirdparty/audiotools/data/datasets.py +517 -0
  45. xinference/thirdparty/audiotools/data/preprocess.py +81 -0
  46. xinference/thirdparty/audiotools/data/transforms.py +1592 -0
  47. xinference/thirdparty/audiotools/metrics/__init__.py +6 -0
  48. xinference/thirdparty/audiotools/metrics/distance.py +131 -0
  49. xinference/thirdparty/audiotools/metrics/quality.py +159 -0
  50. xinference/thirdparty/audiotools/metrics/spectral.py +247 -0
  51. xinference/thirdparty/audiotools/ml/__init__.py +5 -0
  52. xinference/thirdparty/audiotools/ml/accelerator.py +184 -0
  53. xinference/thirdparty/audiotools/ml/decorators.py +440 -0
  54. xinference/thirdparty/audiotools/ml/experiment.py +90 -0
  55. xinference/thirdparty/audiotools/ml/layers/__init__.py +2 -0
  56. xinference/thirdparty/audiotools/ml/layers/base.py +328 -0
  57. xinference/thirdparty/audiotools/ml/layers/spectral_gate.py +127 -0
  58. xinference/thirdparty/audiotools/post.py +140 -0
  59. xinference/thirdparty/audiotools/preference.py +600 -0
  60. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/text.py +1 -1
  61. xinference/thirdparty/indextts/BigVGAN/ECAPA_TDNN.py +656 -0
  62. xinference/thirdparty/indextts/BigVGAN/__init__.py +0 -0
  63. xinference/thirdparty/indextts/BigVGAN/activations.py +122 -0
  64. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/__init__.py +0 -0
  65. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/.gitignore +1 -0
  66. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/__init__.py +0 -0
  67. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/activation1d.py +76 -0
  68. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/anti_alias_activation.cpp +23 -0
  69. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/anti_alias_activation_cuda.cu +256 -0
  70. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/compat.h +29 -0
  71. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/load.py +121 -0
  72. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/type_shim.h +92 -0
  73. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/torch/__init__.py +6 -0
  74. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/torch/act.py +31 -0
  75. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/torch/filter.py +102 -0
  76. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/torch/resample.py +58 -0
  77. xinference/thirdparty/indextts/BigVGAN/alias_free_torch/__init__.py +6 -0
  78. xinference/thirdparty/indextts/BigVGAN/alias_free_torch/act.py +29 -0
  79. xinference/thirdparty/indextts/BigVGAN/alias_free_torch/filter.py +96 -0
  80. xinference/thirdparty/indextts/BigVGAN/alias_free_torch/resample.py +49 -0
  81. xinference/thirdparty/indextts/BigVGAN/bigvgan.py +534 -0
  82. xinference/thirdparty/indextts/BigVGAN/models.py +451 -0
  83. xinference/thirdparty/indextts/BigVGAN/nnet/CNN.py +546 -0
  84. xinference/thirdparty/indextts/BigVGAN/nnet/__init__.py +0 -0
  85. xinference/thirdparty/indextts/BigVGAN/nnet/linear.py +89 -0
  86. xinference/thirdparty/indextts/BigVGAN/nnet/normalization.py +670 -0
  87. xinference/thirdparty/indextts/BigVGAN/utils.py +101 -0
  88. xinference/thirdparty/indextts/__init__.py +0 -0
  89. xinference/thirdparty/indextts/cli.py +65 -0
  90. xinference/thirdparty/indextts/gpt/__init__.py +0 -0
  91. xinference/thirdparty/indextts/gpt/conformer/__init__.py +0 -0
  92. xinference/thirdparty/indextts/gpt/conformer/attention.py +312 -0
  93. xinference/thirdparty/indextts/gpt/conformer/embedding.py +163 -0
  94. xinference/thirdparty/indextts/gpt/conformer/subsampling.py +348 -0
  95. xinference/thirdparty/indextts/gpt/conformer_encoder.py +520 -0
  96. xinference/thirdparty/indextts/gpt/model.py +713 -0
  97. xinference/thirdparty/indextts/gpt/model_v2.py +747 -0
  98. xinference/thirdparty/indextts/gpt/perceiver.py +317 -0
  99. xinference/thirdparty/indextts/gpt/transformers_beam_search.py +1013 -0
  100. xinference/thirdparty/indextts/gpt/transformers_generation_utils.py +4747 -0
  101. xinference/thirdparty/indextts/gpt/transformers_gpt2.py +1878 -0
  102. xinference/thirdparty/indextts/gpt/transformers_modeling_utils.py +5525 -0
  103. xinference/thirdparty/indextts/infer.py +690 -0
  104. xinference/thirdparty/indextts/infer_v2.py +739 -0
  105. xinference/thirdparty/indextts/s2mel/dac/__init__.py +16 -0
  106. xinference/thirdparty/indextts/s2mel/dac/__main__.py +36 -0
  107. xinference/thirdparty/indextts/s2mel/dac/model/__init__.py +4 -0
  108. xinference/thirdparty/indextts/s2mel/dac/model/base.py +294 -0
  109. xinference/thirdparty/indextts/s2mel/dac/model/dac.py +400 -0
  110. xinference/thirdparty/indextts/s2mel/dac/model/discriminator.py +228 -0
  111. xinference/thirdparty/indextts/s2mel/dac/model/encodec.py +320 -0
  112. xinference/thirdparty/indextts/s2mel/dac/nn/__init__.py +3 -0
  113. xinference/thirdparty/indextts/s2mel/dac/nn/layers.py +33 -0
  114. xinference/thirdparty/indextts/s2mel/dac/nn/loss.py +368 -0
  115. xinference/thirdparty/indextts/s2mel/dac/nn/quantize.py +339 -0
  116. xinference/thirdparty/indextts/s2mel/dac/utils/__init__.py +123 -0
  117. xinference/thirdparty/indextts/s2mel/dac/utils/decode.py +95 -0
  118. xinference/thirdparty/indextts/s2mel/dac/utils/encode.py +94 -0
  119. xinference/thirdparty/indextts/s2mel/hf_utils.py +12 -0
  120. xinference/thirdparty/indextts/s2mel/modules/alias_free_torch/__init__.py +5 -0
  121. xinference/thirdparty/indextts/s2mel/modules/alias_free_torch/act.py +29 -0
  122. xinference/thirdparty/indextts/s2mel/modules/alias_free_torch/filter.py +96 -0
  123. xinference/thirdparty/indextts/s2mel/modules/alias_free_torch/resample.py +57 -0
  124. xinference/thirdparty/indextts/s2mel/modules/audio.py +82 -0
  125. xinference/thirdparty/indextts/s2mel/modules/bigvgan/activations.py +120 -0
  126. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/__init__.py +0 -0
  127. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/activation1d.py +77 -0
  128. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/anti_alias_activation.cpp +23 -0
  129. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/anti_alias_activation_cuda.cu +246 -0
  130. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/compat.h +29 -0
  131. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/load.py +86 -0
  132. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/type_shim.h +92 -0
  133. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/torch/__init__.py +6 -0
  134. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/torch/act.py +30 -0
  135. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/torch/filter.py +101 -0
  136. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/torch/resample.py +58 -0
  137. xinference/thirdparty/indextts/s2mel/modules/bigvgan/bigvgan.py +492 -0
  138. xinference/thirdparty/indextts/s2mel/modules/bigvgan/config.json +63 -0
  139. xinference/thirdparty/indextts/s2mel/modules/bigvgan/env.py +18 -0
  140. xinference/thirdparty/indextts/s2mel/modules/bigvgan/meldataset.py +354 -0
  141. xinference/thirdparty/indextts/s2mel/modules/bigvgan/utils.py +99 -0
  142. xinference/thirdparty/indextts/s2mel/modules/campplus/DTDNN.py +115 -0
  143. xinference/thirdparty/indextts/s2mel/modules/campplus/classifier.py +70 -0
  144. xinference/thirdparty/indextts/s2mel/modules/campplus/layers.py +253 -0
  145. xinference/thirdparty/indextts/s2mel/modules/commons.py +632 -0
  146. xinference/thirdparty/indextts/s2mel/modules/diffusion_transformer.py +257 -0
  147. xinference/thirdparty/indextts/s2mel/modules/encodec.py +292 -0
  148. xinference/thirdparty/indextts/s2mel/modules/flow_matching.py +171 -0
  149. xinference/thirdparty/indextts/s2mel/modules/gpt_fast/generate.py +436 -0
  150. xinference/thirdparty/indextts/s2mel/modules/gpt_fast/model.py +360 -0
  151. xinference/thirdparty/indextts/s2mel/modules/gpt_fast/quantize.py +622 -0
  152. xinference/thirdparty/indextts/s2mel/modules/hifigan/f0_predictor.py +55 -0
  153. xinference/thirdparty/indextts/s2mel/modules/hifigan/generator.py +454 -0
  154. xinference/thirdparty/indextts/s2mel/modules/layers.py +354 -0
  155. xinference/thirdparty/indextts/s2mel/modules/length_regulator.py +141 -0
  156. xinference/thirdparty/indextts/s2mel/modules/openvoice/__init__.py +0 -0
  157. xinference/thirdparty/indextts/s2mel/modules/openvoice/api.py +186 -0
  158. xinference/thirdparty/indextts/s2mel/modules/openvoice/attentions.py +465 -0
  159. xinference/thirdparty/indextts/s2mel/modules/openvoice/checkpoints_v2/converter/config.json +57 -0
  160. xinference/thirdparty/indextts/s2mel/modules/openvoice/commons.py +160 -0
  161. xinference/thirdparty/indextts/s2mel/modules/openvoice/mel_processing.py +183 -0
  162. xinference/thirdparty/indextts/s2mel/modules/openvoice/models.py +499 -0
  163. xinference/thirdparty/indextts/s2mel/modules/openvoice/modules.py +598 -0
  164. xinference/thirdparty/indextts/s2mel/modules/openvoice/openvoice_app.py +275 -0
  165. xinference/thirdparty/indextts/s2mel/modules/openvoice/se_extractor.py +153 -0
  166. xinference/thirdparty/indextts/s2mel/modules/openvoice/transforms.py +209 -0
  167. xinference/thirdparty/indextts/s2mel/modules/openvoice/utils.py +194 -0
  168. xinference/thirdparty/indextts/s2mel/modules/quantize.py +229 -0
  169. xinference/thirdparty/indextts/s2mel/modules/rmvpe.py +631 -0
  170. xinference/thirdparty/indextts/s2mel/modules/vocos/__init__.py +4 -0
  171. xinference/thirdparty/indextts/s2mel/modules/vocos/heads.py +164 -0
  172. xinference/thirdparty/indextts/s2mel/modules/vocos/helpers.py +71 -0
  173. xinference/thirdparty/indextts/s2mel/modules/vocos/loss.py +114 -0
  174. xinference/thirdparty/indextts/s2mel/modules/vocos/models.py +118 -0
  175. xinference/thirdparty/indextts/s2mel/modules/vocos/modules.py +213 -0
  176. xinference/thirdparty/indextts/s2mel/modules/vocos/pretrained.py +51 -0
  177. xinference/thirdparty/indextts/s2mel/modules/vocos/spectral_ops.py +192 -0
  178. xinference/thirdparty/indextts/s2mel/modules/wavenet.py +174 -0
  179. xinference/thirdparty/indextts/s2mel/optimizers.py +96 -0
  180. xinference/thirdparty/indextts/s2mel/wav2vecbert_extract.py +148 -0
  181. xinference/thirdparty/indextts/utils/__init__.py +0 -0
  182. xinference/thirdparty/indextts/utils/arch_util.py +120 -0
  183. xinference/thirdparty/indextts/utils/checkpoint.py +34 -0
  184. xinference/thirdparty/indextts/utils/common.py +121 -0
  185. xinference/thirdparty/indextts/utils/feature_extractors.py +50 -0
  186. xinference/thirdparty/indextts/utils/front.py +536 -0
  187. xinference/thirdparty/indextts/utils/maskgct/models/codec/__init__.py +0 -0
  188. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/codec.py +427 -0
  189. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/__init__.py +11 -0
  190. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/factorized_vector_quantize.py +150 -0
  191. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/lookup_free_quantize.py +77 -0
  192. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/residual_vq.py +177 -0
  193. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/vector_quantize.py +401 -0
  194. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/vocos.py +881 -0
  195. xinference/thirdparty/indextts/utils/maskgct/models/codec/codec_dataset.py +264 -0
  196. xinference/thirdparty/indextts/utils/maskgct/models/codec/codec_inference.py +515 -0
  197. xinference/thirdparty/indextts/utils/maskgct/models/codec/codec_sampler.py +126 -0
  198. xinference/thirdparty/indextts/utils/maskgct/models/codec/codec_trainer.py +166 -0
  199. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/__init__.py +0 -0
  200. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/__init__.py +5 -0
  201. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/act.py +29 -0
  202. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/filter.py +96 -0
  203. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/resample.py +57 -0
  204. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/facodec_dataset.py +98 -0
  205. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/facodec_inference.py +137 -0
  206. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/facodec_trainer.py +776 -0
  207. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/JDC/__init__.py +1 -0
  208. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/JDC/bst.t7 +0 -0
  209. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/JDC/model.py +219 -0
  210. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/attentions.py +437 -0
  211. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/commons.py +331 -0
  212. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/gradient_reversal.py +35 -0
  213. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/layers.py +460 -0
  214. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/quantize.py +741 -0
  215. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/style_encoder.py +110 -0
  216. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/wavenet.py +224 -0
  217. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/optimizer.py +104 -0
  218. xinference/thirdparty/indextts/utils/maskgct/models/codec/kmeans/repcodec_model.py +210 -0
  219. xinference/thirdparty/indextts/utils/maskgct/models/codec/kmeans/vocos.py +850 -0
  220. xinference/thirdparty/indextts/utils/maskgct/models/codec/melvqgan/melspec.py +108 -0
  221. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/README.md +216 -0
  222. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/__init__.py +6 -0
  223. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/__init__.py +5 -0
  224. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/act.py +29 -0
  225. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/filter.py +96 -0
  226. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/resample.py +57 -0
  227. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/facodec.py +1222 -0
  228. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/gradient_reversal.py +35 -0
  229. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/melspec.py +102 -0
  230. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/quantize/__init__.py +7 -0
  231. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/quantize/fvq.py +116 -0
  232. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/quantize/rvq.py +87 -0
  233. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/transformer.py +234 -0
  234. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/model.py +184 -0
  235. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/__init__.py +27 -0
  236. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/conv.py +346 -0
  237. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/lstm.py +46 -0
  238. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/norm.py +37 -0
  239. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/__init__.py +14 -0
  240. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/ac.py +317 -0
  241. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/core_vq.py +388 -0
  242. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/distrib.py +135 -0
  243. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/vq.py +125 -0
  244. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/seanet.py +414 -0
  245. xinference/thirdparty/indextts/utils/maskgct/models/codec/vevo/vevo_repcodec.py +592 -0
  246. xinference/thirdparty/indextts/utils/maskgct/models/tts/maskgct/ckpt/wav2vec2bert_stats.pt +0 -0
  247. xinference/thirdparty/indextts/utils/maskgct/models/tts/maskgct/llama_nar.py +650 -0
  248. xinference/thirdparty/indextts/utils/maskgct/models/tts/maskgct/maskgct_s2a.py +503 -0
  249. xinference/thirdparty/indextts/utils/maskgct_utils.py +259 -0
  250. xinference/thirdparty/indextts/utils/text_utils.py +41 -0
  251. xinference/thirdparty/indextts/utils/typical_sampling.py +30 -0
  252. xinference/thirdparty/indextts/utils/utils.py +93 -0
  253. xinference/thirdparty/indextts/utils/webui_utils.py +42 -0
  254. xinference/thirdparty/indextts/utils/xtransformers.py +1247 -0
  255. xinference/thirdparty/indextts/vqvae/__init__.py +0 -0
  256. xinference/thirdparty/indextts/vqvae/xtts_dvae.py +395 -0
  257. xinference/thirdparty/melo/text/chinese_mix.py +2 -2
  258. xinference/types.py +9 -0
  259. xinference/ui/gradio/media_interface.py +66 -8
  260. xinference/ui/web/ui/build/asset-manifest.json +6 -6
  261. xinference/ui/web/ui/build/index.html +1 -1
  262. xinference/ui/web/ui/build/static/css/main.5ea97072.css +2 -0
  263. xinference/ui/web/ui/build/static/css/main.5ea97072.css.map +1 -0
  264. xinference/ui/web/ui/build/static/js/main.45e78536.js +3 -0
  265. xinference/ui/web/ui/build/static/js/{main.1086c759.js.LICENSE.txt → main.45e78536.js.LICENSE.txt} +0 -7
  266. xinference/ui/web/ui/build/static/js/main.45e78536.js.map +1 -0
  267. xinference/ui/web/ui/node_modules/.cache/babel-loader/089c38df5f52348d212ed868dda5c518a42e0c2762caed4175487c0405830c35.json +1 -0
  268. xinference/ui/web/ui/node_modules/.cache/babel-loader/2b6e3a5b6eb2c5c5f2d007e68cd46c372721cd52bf63508adcdb21ecf79241d8.json +1 -0
  269. xinference/ui/web/ui/node_modules/.cache/babel-loader/2d887825fd07a56f872eda4420da25fba0b5b62a23bdcc6c6da1a5281887f618.json +1 -0
  270. xinference/ui/web/ui/node_modules/.cache/babel-loader/4001f9c3e64e73a4f2158826650c174a59d5e3f89ddecddf17cbb6bb688cc4ca.json +1 -0
  271. xinference/ui/web/ui/node_modules/.cache/babel-loader/4a7018a69e6b7f90fc313248c2aa86f2a8f1eb1db120df586047a8023549b44b.json +1 -0
  272. xinference/ui/web/ui/node_modules/.cache/babel-loader/64b12aaa1c1d1bf53820ada8a63769067c0ccc5aab46b32348eb1917ae7f2a11.json +1 -0
  273. xinference/ui/web/ui/node_modules/.cache/babel-loader/7275b67c78ec76ce38a686bb8a576d8c9cecf54e1573614c84859d538efb9be5.json +1 -0
  274. xinference/ui/web/ui/node_modules/.cache/babel-loader/a68b6ee3b31eadc051fb95ce8f8ccb9c2e8b52c60f290dbab545a1917e065282.json +1 -0
  275. xinference/ui/web/ui/node_modules/.cache/babel-loader/ae8771cc37693feb160fa8727231312a0c54ef2d1d1ca893be568cd70016ca7e.json +1 -0
  276. xinference/ui/web/ui/node_modules/.cache/babel-loader/bb4e8722d2d41d87f1fce3661bc8937bffe9448e231fc5f0462630849e851592.json +1 -0
  277. xinference/ui/web/ui/node_modules/.cache/babel-loader/be6aada1ee4adc2bbf65dbe56d17db32bb3b5478be05d6b527805a8ba6cfb2b9.json +1 -0
  278. xinference/ui/web/ui/node_modules/.cache/babel-loader/de91c352653c233cf0cb6674e6e04049a44fd0e1156560de65d5c4620521391e.json +1 -0
  279. xinference/ui/web/ui/node_modules/.cache/babel-loader/e85f7002fc325c83b9c9cd8a1619e5b3ebc701d30e811afc284b88e6ae710cb5.json +1 -0
  280. xinference/ui/web/ui/node_modules/.cache/babel-loader/e8b603c78944bf3d213639078bfe155ff5c0dfa4048a93cbb967cad6a4eb4ff3.json +1 -0
  281. xinference/ui/web/ui/node_modules/.cache/babel-loader/ea2a26361204e70cf1018d6990fb6354bed82b3ac69690391e0f100385e7abb7.json +1 -0
  282. xinference/ui/web/ui/node_modules/.cache/babel-loader/f05535160a508b2a312de546a6de234776c613db276479ea4253c0b1bdeeb7d6.json +1 -0
  283. xinference/ui/web/ui/node_modules/.cache/babel-loader/f09ba9e11106bd59a0de10cc85c55084097729dcab575f43dfcf07375961ed87.json +1 -0
  284. xinference/ui/web/ui/node_modules/.package-lock.json +0 -33
  285. xinference/ui/web/ui/package-lock.json +0 -34
  286. xinference/ui/web/ui/package.json +0 -1
  287. xinference/ui/web/ui/src/locales/en.json +9 -3
  288. xinference/ui/web/ui/src/locales/ja.json +9 -3
  289. xinference/ui/web/ui/src/locales/ko.json +9 -3
  290. xinference/ui/web/ui/src/locales/zh.json +9 -3
  291. {xinference-1.10.0.dist-info → xinference-1.11.0.dist-info}/METADATA +24 -6
  292. {xinference-1.10.0.dist-info → xinference-1.11.0.dist-info}/RECORD +296 -77
  293. xinference/ui/web/ui/build/static/css/main.013f296b.css +0 -2
  294. xinference/ui/web/ui/build/static/css/main.013f296b.css.map +0 -1
  295. xinference/ui/web/ui/build/static/js/main.1086c759.js +0 -3
  296. xinference/ui/web/ui/build/static/js/main.1086c759.js.map +0 -1
  297. xinference/ui/web/ui/node_modules/.cache/babel-loader/0b0f77000cc1b482ca091cfbcae511dfe02f08916971645fad21d0b1234d04a2.json +0 -1
  298. xinference/ui/web/ui/node_modules/.cache/babel-loader/1c5f8ff423a7c9202bea60b15680f04b1e9964b445b0da3f86c6ff70cf24e797.json +0 -1
  299. xinference/ui/web/ui/node_modules/.cache/babel-loader/44ce7993e344980e3ed4f13e8f69237d4a5dfc60e37ca6b54f51f8ee1357bd67.json +0 -1
  300. xinference/ui/web/ui/node_modules/.cache/babel-loader/4aec1cc414ac3ebb3481d3d915e4db597d9127de813291346eacb8554ab170d4.json +0 -1
  301. xinference/ui/web/ui/node_modules/.cache/babel-loader/644cfec52f3c57a6e222ce60f112237a1efefe9835efd9aad857a685f53d8eed.json +0 -1
  302. xinference/ui/web/ui/node_modules/.cache/babel-loader/663436f72af53fe0d72394f56d003fa4e0bba489e5bb4e483fd34b00f84637f7.json +0 -1
  303. xinference/ui/web/ui/node_modules/.cache/babel-loader/69db82ca9bfe27fe417cc6cf2b1716b09be9c6f0cd198530f12bfc60e801bbcf.json +0 -1
  304. xinference/ui/web/ui/node_modules/.cache/babel-loader/85087e27618d740c236bf159f30e0219db443ab55f0997388eed5fde6f9e90cc.json +0 -1
  305. xinference/ui/web/ui/node_modules/.cache/babel-loader/88b07838348864aa86c672be3bbca1e9f58f6f3a2881b32070ec27f4e7b449d1.json +0 -1
  306. xinference/ui/web/ui/node_modules/.cache/babel-loader/8b8cd408ccfbe115acef27ccfa5b233da8597131a2a5712add13e1e4d5d4504b.json +0 -1
  307. xinference/ui/web/ui/node_modules/.cache/babel-loader/a23824fe746b9c6ca5eee9159b5764d1ff1653c1d856288c0f75c742bbb0023b.json +0 -1
  308. xinference/ui/web/ui/node_modules/.cache/babel-loader/a3eb18af328280b139693c9092dff2a0ef8c9a967e6c8956ceee0996611f1984.json +0 -1
  309. xinference/ui/web/ui/node_modules/.cache/babel-loader/bc1aacc65a102db325ca61bcd2f681e1ae22c36a1f1d98a6ff5e4ad49dc7544f.json +0 -1
  310. xinference/ui/web/ui/node_modules/.cache/babel-loader/c682fd521747c19dae437d83ce3235a306ce6b68e24a117bc57c27ebb8d1f1ca.json +0 -1
  311. xinference/ui/web/ui/node_modules/.cache/babel-loader/d5c224be7081f18cba1678b7874a9782eba895df004874ff8f243f94ba79942a.json +0 -1
  312. xinference/ui/web/ui/node_modules/.cache/babel-loader/f7f18bfb539b036a6a342176dd98a85df5057a884a8da978d679f2a0264883d0.json +0 -1
  313. xinference/ui/web/ui/node_modules/clipboard/.babelrc.json +0 -11
  314. xinference/ui/web/ui/node_modules/clipboard/.eslintrc.json +0 -24
  315. xinference/ui/web/ui/node_modules/clipboard/.prettierrc.json +0 -9
  316. xinference/ui/web/ui/node_modules/clipboard/bower.json +0 -18
  317. xinference/ui/web/ui/node_modules/clipboard/composer.json +0 -25
  318. xinference/ui/web/ui/node_modules/clipboard/package.json +0 -63
  319. xinference/ui/web/ui/node_modules/delegate/package.json +0 -31
  320. xinference/ui/web/ui/node_modules/good-listener/bower.json +0 -11
  321. xinference/ui/web/ui/node_modules/good-listener/package.json +0 -35
  322. xinference/ui/web/ui/node_modules/select/bower.json +0 -13
  323. xinference/ui/web/ui/node_modules/select/package.json +0 -29
  324. xinference/ui/web/ui/node_modules/tiny-emitter/package.json +0 -53
  325. {xinference-1.10.0.dist-info → xinference-1.11.0.dist-info}/WHEEL +0 -0
  326. {xinference-1.10.0.dist-info → xinference-1.11.0.dist-info}/entry_points.txt +0 -0
  327. {xinference-1.10.0.dist-info → xinference-1.11.0.dist-info}/licenses/LICENSE +0 -0
  328. {xinference-1.10.0.dist-info → xinference-1.11.0.dist-info}/top_level.txt +0 -0
@@ -131,7 +131,7 @@ except ImportError:
131
131
  VLLM_INSTALLED = False
132
132
  VLLM_VERSION = None
133
133
 
134
- VLLM_SUPPORTED_VISION_MODEL_LIST: List[str] = []
134
+ VLLM_SUPPORTED_MULTI_MODEL_LIST: List[str] = []
135
135
  VLLM_SUPPORTED_MODELS = [
136
136
  "llama-2",
137
137
  "llama-3",
@@ -229,34 +229,37 @@ if VLLM_INSTALLED and VLLM_VERSION > version.parse("0.5.3"):
229
229
  VLLM_SUPPORTED_CHAT_MODELS.append("HuatuoGPT-o1-LLaMA-3.1")
230
230
 
231
231
  if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.6.1"):
232
- VLLM_SUPPORTED_VISION_MODEL_LIST.append("internvl2")
233
- VLLM_SUPPORTED_VISION_MODEL_LIST.append("InternVL2.5")
234
- VLLM_SUPPORTED_VISION_MODEL_LIST.append("InternVL2.5-MPO")
235
- VLLM_SUPPORTED_VISION_MODEL_LIST.append("InternVL3")
232
+ VLLM_SUPPORTED_MULTI_MODEL_LIST.append("internvl2")
233
+ VLLM_SUPPORTED_MULTI_MODEL_LIST.append("InternVL2.5")
234
+ VLLM_SUPPORTED_MULTI_MODEL_LIST.append("InternVL2.5-MPO")
235
+ VLLM_SUPPORTED_MULTI_MODEL_LIST.append("InternVL3")
236
236
 
237
237
  if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.6.2"):
238
238
  VLLM_SUPPORTED_CHAT_MODELS.append("minicpm3-4b")
239
239
 
240
240
  if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.6.3"):
241
241
  VLLM_SUPPORTED_MODELS.append("llama-3.2-vision")
242
- VLLM_SUPPORTED_VISION_MODEL_LIST.append("llama-3.2-vision-instruct")
243
- VLLM_SUPPORTED_VISION_MODEL_LIST.append("qwen2-vl-instruct")
244
- VLLM_SUPPORTED_VISION_MODEL_LIST.append("QvQ-72B-Preview")
242
+ VLLM_SUPPORTED_MULTI_MODEL_LIST.append("llama-3.2-vision-instruct")
243
+ VLLM_SUPPORTED_MULTI_MODEL_LIST.append("qwen2-vl-instruct")
244
+ VLLM_SUPPORTED_MULTI_MODEL_LIST.append("QvQ-72B-Preview")
245
+ VLLM_SUPPORTED_MULTI_MODEL_LIST.append("qwen2-audio")
245
246
 
246
247
  if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.7.0"):
247
248
  VLLM_SUPPORTED_CHAT_MODELS.append("internlm3-instruct")
248
249
 
249
250
  if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.7.2"):
250
- VLLM_SUPPORTED_VISION_MODEL_LIST.append("qwen2.5-vl-instruct")
251
+ VLLM_SUPPORTED_MULTI_MODEL_LIST.append("qwen2.5-vl-instruct")
251
252
  VLLM_SUPPORTED_CHAT_MODELS.append("moonlight-16b-a3b-instruct")
253
+ VLLM_SUPPORTED_MULTI_MODEL_LIST.append("qwen2-audio-instruct")
252
254
 
253
255
  if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.7.3"):
254
256
  VLLM_SUPPORTED_CHAT_MODELS.append("qwen2.5-instruct-1m")
255
257
  VLLM_SUPPORTED_CHAT_MODELS.append("qwenLong-l1")
258
+ VLLM_SUPPORTED_MULTI_MODEL_LIST.append("qwen2.5-omni")
256
259
 
257
260
  if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.8.0"):
258
261
  VLLM_SUPPORTED_CHAT_MODELS.append("gemma-3-1b-it")
259
- VLLM_SUPPORTED_VISION_MODEL_LIST.append("gemma-3-it")
262
+ VLLM_SUPPORTED_MULTI_MODEL_LIST.append("gemma-3-it")
260
263
 
261
264
  if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.8.4"):
262
265
  VLLM_SUPPORTED_CHAT_MODELS.append("glm4-0414")
@@ -264,12 +267,15 @@ if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.8.4"):
264
267
  if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.8.5"):
265
268
  VLLM_SUPPORTED_CHAT_MODELS.append("qwen3")
266
269
 
270
+ if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.9.0"):
271
+ VLLM_SUPPORTED_CHAT_MODELS.append("Baichuan-M2")
272
+
267
273
  if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.9.1"):
268
274
  VLLM_SUPPORTED_CHAT_MODELS.append("minicpm4")
269
275
 
270
276
  if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.9.2"):
271
277
  VLLM_SUPPORTED_CHAT_MODELS.append("Ernie4.5")
272
- VLLM_SUPPORTED_VISION_MODEL_LIST.append("glm-4.1v-thinking")
278
+ VLLM_SUPPORTED_MULTI_MODEL_LIST.append("glm-4.1v-thinking")
273
279
  VLLM_SUPPORTED_CHAT_MODELS.append("Qwen3-Instruct")
274
280
  VLLM_SUPPORTED_CHAT_MODELS.append("Qwen3-Thinking")
275
281
  VLLM_SUPPORTED_CHAT_MODELS.append("Qwen3-Coder")
@@ -277,15 +283,22 @@ if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.9.2"):
277
283
 
278
284
  if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.10.0"):
279
285
  VLLM_SUPPORTED_CHAT_MODELS.append("glm-4.5")
280
- VLLM_SUPPORTED_VISION_MODEL_LIST.append("glm-4.5v")
286
+ VLLM_SUPPORTED_MULTI_MODEL_LIST.append("glm-4.5v")
281
287
  VLLM_SUPPORTED_CHAT_MODELS.append("KAT-V1")
282
288
 
283
289
  if VLLM_INSTALLED and VLLM_VERSION > version.parse("0.10.0"):
284
290
  VLLM_SUPPORTED_CHAT_MODELS.append("gpt-oss")
285
- VLLM_SUPPORTED_CHAT_MODELS.append("seed-oss")
286
291
 
287
- if VLLM_INSTALLED and VLLM_VERSION > version.parse("0.10.1.1"):
292
+ if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.10.2"):
288
293
  VLLM_SUPPORTED_CHAT_MODELS.append("seed-oss")
294
+ VLLM_SUPPORTED_CHAT_MODELS.append("Qwen3-Next-Instruct")
295
+ VLLM_SUPPORTED_CHAT_MODELS.append("Qwen3-Next-Thinking")
296
+
297
+ if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.11.0"):
298
+ VLLM_SUPPORTED_MULTI_MODEL_LIST.append("Qwen3-VL-Thinking")
299
+ VLLM_SUPPORTED_MULTI_MODEL_LIST.append("Qwen3-VL-Instruct")
300
+ VLLM_SUPPORTED_MULTI_MODEL_LIST.append("Qwen3-Omni-Thinking")
301
+ VLLM_SUPPORTED_MULTI_MODEL_LIST.append("Qwen3-Omni-Instruct")
289
302
 
290
303
 
291
304
  class VLLMModel(LLM):
@@ -537,7 +550,7 @@ class VLLMModel(LLM):
537
550
  # patch vllm Executor.get_class
538
551
  Executor.get_class = lambda vllm_config: executor_cls
539
552
  self._engine = AsyncLLMEngine.from_engine_args(engine_args)
540
- except:
553
+ except: # noqa: E722
541
554
  logger.exception("Creating vllm engine failed")
542
555
  self._loading_error = sys.exc_info()
543
556
 
@@ -706,7 +719,7 @@ class VLLMModel(LLM):
706
719
  logger.info("Detecting vLLM is not health, prepare to quit the process")
707
720
  try:
708
721
  self.stop()
709
- except:
722
+ except: # noqa: E722
710
723
  # ignore error when stop
711
724
  pass
712
725
  # Just kill the process and let xinference auto-recover the model
@@ -849,7 +862,7 @@ class VLLMModel(LLM):
849
862
  if llm_spec.model_format not in ["pytorch", "gptq", "awq", "fp8", "bnb"]:
850
863
  return False
851
864
  if llm_spec.model_format == "pytorch":
852
- if quantization != "none" and not (quantization is None):
865
+ if quantization != "none" and quantization is not None:
853
866
  return False
854
867
  if llm_spec.model_format == "awq":
855
868
  # Currently, only 4-bit weight quantization is supported for AWQ, but got 8 bits.
@@ -934,9 +947,21 @@ class VLLMModel(LLM):
934
947
 
935
948
  async def _get_tokenizer(self, lora_request: Any) -> Any:
936
949
  try:
937
- return await self._engine.get_tokenizer(lora_request) # type: ignore
950
+ # vLLM 0.11.0+ get_tokenizer doesn't accept lora_request parameter
951
+ if (
952
+ VLLM_VERSION >= version.parse("0.11.0")
953
+ or VLLM_VERSION.base_version >= "0.11.0"
954
+ ):
955
+ return await self._engine.get_tokenizer() # type: ignore
956
+ else:
957
+ return await self._engine.get_tokenizer(lora_request) # type: ignore
938
958
  except AttributeError:
939
- return await self._engine.get_tokenizer_async(lora_request) # type: ignore
959
+ # Fallback to get_tokenizer_async for older versions
960
+ try:
961
+ return await self._engine.get_tokenizer_async(lora_request) # type: ignore
962
+ except (AttributeError, TypeError):
963
+ # If all else fails, try without parameters
964
+ return await self._engine.get_tokenizer() # type: ignore
940
965
 
941
966
  def _tokenize(self, tokenizer: Any, prompt: str, config: dict) -> List[int]:
942
967
  truncate_prompt_tokens = config.get("truncate_prompt_tokens")
@@ -968,7 +993,10 @@ class VLLMModel(LLM):
968
993
  from vllm import TokensPrompt
969
994
 
970
995
  token_ids = await asyncio.to_thread(
971
- self._tokenize, tokenizer, prompt, config # type: ignore
996
+ self._tokenize,
997
+ tokenizer,
998
+ prompt, # type: ignore
999
+ config,
972
1000
  )
973
1001
  return TokensPrompt(prompt_token_ids=token_ids)
974
1002
 
@@ -1017,23 +1045,90 @@ class VLLMModel(LLM):
1017
1045
  # guided decoding only available for vllm >= 0.6.3
1018
1046
  from vllm.sampling_params import GuidedDecodingParams
1019
1047
 
1020
- guided_options = GuidedDecodingParams.from_optional(
1021
- json=sanitized_generate_config.pop("guided_json", None),
1022
- regex=sanitized_generate_config.pop("guided_regex", None),
1023
- choice=sanitized_generate_config.pop("guided_choice", None),
1024
- grammar=sanitized_generate_config.pop("guided_grammar", None),
1025
- json_object=sanitized_generate_config.pop("guided_json_object", None),
1026
- backend=sanitized_generate_config.pop("guided_decoding_backend", None),
1027
- whitespace_pattern=sanitized_generate_config.pop(
1028
- "guided_whitespace_pattern", None
1029
- ),
1048
+ # Extract guided decoding parameters
1049
+ guided_params: dict[str, Any] = {}
1050
+ guided_json = sanitized_generate_config.pop("guided_json", None)
1051
+ if guided_json:
1052
+ guided_params["json"] = guided_json
1053
+
1054
+ guided_regex = sanitized_generate_config.pop("guided_regex", None)
1055
+ if guided_regex:
1056
+ guided_params["regex"] = guided_regex
1057
+
1058
+ guided_choice = sanitized_generate_config.pop("guided_choice", None)
1059
+ if guided_choice:
1060
+ guided_params["choice"] = guided_choice
1061
+
1062
+ guided_grammar = sanitized_generate_config.pop("guided_grammar", None)
1063
+ if guided_grammar:
1064
+ guided_params["grammar"] = guided_grammar
1065
+
1066
+ guided_json_object = sanitized_generate_config.pop(
1067
+ "guided_json_object", None
1068
+ )
1069
+ if guided_json_object:
1070
+ guided_params["json_object"] = guided_json_object
1071
+
1072
+ guided_backend = sanitized_generate_config.pop(
1073
+ "guided_decoding_backend", None
1030
1074
  )
1075
+ if guided_backend:
1076
+ guided_params["_backend"] = guided_backend
1031
1077
 
1032
- sampling_params = SamplingParams(
1033
- guided_decoding=guided_options, **sanitized_generate_config
1078
+ guided_whitespace_pattern = sanitized_generate_config.pop(
1079
+ "guided_whitespace_pattern", None
1034
1080
  )
1081
+ if guided_whitespace_pattern:
1082
+ guided_params["whitespace_pattern"] = guided_whitespace_pattern
1083
+
1084
+ # Create GuidedDecodingParams if we have any guided parameters
1085
+ guided_options = None
1086
+ if guided_params:
1087
+ try:
1088
+ guided_options = GuidedDecodingParams(**guided_params)
1089
+ except Exception as e:
1090
+ logger.warning(f"Failed to create GuidedDecodingParams: {e}")
1091
+ guided_options = None
1092
+
1093
+ try:
1094
+ import inspect
1095
+
1096
+ sp_sig = inspect.signature(SamplingParams)
1097
+ # For v0.9.2 and similar versions, prioritize guided_decoding over structured_outputs
1098
+ # structured_outputs was introduced later (around v0.11.0) and may not accept
1099
+ # GuidedDecodingParams in earlier versions even if the parameter exists
1100
+ if "guided_decoding" in sp_sig.parameters:
1101
+ sampling_params = SamplingParams(
1102
+ guided_decoding=guided_options, **sanitized_generate_config
1103
+ )
1104
+ elif "structured_outputs" in sp_sig.parameters:
1105
+ try:
1106
+ sampling_params = SamplingParams(
1107
+ structured_outputs=guided_options,
1108
+ **sanitized_generate_config,
1109
+ )
1110
+ except TypeError as e:
1111
+ if "structured_outputs" in str(e):
1112
+ # structured_outputs parameter exists but doesn't accept GuidedDecodingParams
1113
+ # Fall back to no guided decoding
1114
+ logger.warning(
1115
+ f"structured_outputs parameter failed: {e}. "
1116
+ "Falling back to no guided decoding for vLLM version compatibility."
1117
+ )
1118
+ sampling_params = SamplingParams(
1119
+ **sanitized_generate_config
1120
+ )
1121
+ else:
1122
+ raise
1123
+ else:
1124
+ sampling_params = SamplingParams(**sanitized_generate_config)
1125
+ except Exception as e:
1126
+ logger.warning(
1127
+ f"Failed to create SamplingParams with guided decoding: {e}"
1128
+ )
1129
+ sampling_params = SamplingParams(**sanitized_generate_config)
1035
1130
  else:
1036
- # ignore generate configs
1131
+ # ignore generate configs for older versions
1037
1132
  sanitized_generate_config.pop("guided_json", None)
1038
1133
  sanitized_generate_config.pop("guided_regex", None)
1039
1134
  sanitized_generate_config.pop("guided_choice", None)
@@ -1049,7 +1144,9 @@ class VLLMModel(LLM):
1049
1144
  # this requires tokenizing
1050
1145
  tokenizer = await self._get_tokenizer(lora_request)
1051
1146
  prompt_or_token_ids = await self._gen_tokens_prompt(
1052
- tokenizer, prompt, sanitized_generate_config # type: ignore
1147
+ tokenizer,
1148
+ prompt,
1149
+ sanitized_generate_config, # type: ignore
1053
1150
  )
1054
1151
  sampling_params.max_tokens = max_tokens = self._context_length - len( # type: ignore
1055
1152
  prompt_or_token_ids["prompt_token_ids"] # type: ignore
@@ -1204,11 +1301,10 @@ class VLLMChatModel(VLLMModel, ChatModelMixin):
1204
1301
  ]:
1205
1302
  return False
1206
1303
  if llm_spec.model_format == "pytorch":
1207
- if quantization != "none" and not (quantization is None):
1304
+ if quantization != "none" and quantization is not None:
1208
1305
  return False
1209
1306
  if llm_spec.model_format == "awq":
1210
- # Currently, only 4-bit weight quantization is supported for AWQ, but got 8 bits.
1211
- if "4" not in quantization:
1307
+ if not any(q in quantization for q in ("4", "8")):
1212
1308
  return False
1213
1309
  if llm_spec.model_format == "gptq":
1214
1310
  if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.3.3"):
@@ -1236,6 +1332,7 @@ class VLLMChatModel(VLLMModel, ChatModelMixin):
1236
1332
  ) -> Dict:
1237
1333
  if not generate_config:
1238
1334
  generate_config = {}
1335
+
1239
1336
  if "reasoning" in getattr(self.model_family, "model_ability", []):
1240
1337
  generate_config.pop("stop", None)
1241
1338
  generate_config.pop("stop_token_ids", None)
@@ -1249,6 +1346,19 @@ class VLLMChatModel(VLLMModel, ChatModelMixin):
1249
1346
  generate_config["stop_token_ids"] = (
1250
1347
  self.model_family.stop_token_ids.copy()
1251
1348
  )
1349
+
1350
+ # if response_format exists,generate guided_json
1351
+ if "response_format" in generate_config:
1352
+ resp_format = generate_config["response_format"]
1353
+ if (
1354
+ isinstance(resp_format, dict)
1355
+ and resp_format.get("type") == "json_schema"
1356
+ and "json_schema" in resp_format
1357
+ ):
1358
+ schema = resp_format["json_schema"].get("schema_")
1359
+ if schema:
1360
+ generate_config["guided_json"] = schema
1361
+
1252
1362
  return generate_config
1253
1363
 
1254
1364
  @staticmethod
@@ -1354,7 +1464,7 @@ class VLLMChatModel(VLLMModel, ChatModelMixin):
1354
1464
  return self._to_chat_completion(c, self.reasoning_parser)
1355
1465
 
1356
1466
 
1357
- class VLLMVisionModel(VLLMModel, ChatModelMixin):
1467
+ class VLLMMultiModel(VLLMModel, ChatModelMixin):
1358
1468
  @classmethod
1359
1469
  def match_json(
1360
1470
  cls, llm_family: "LLMFamilyV2", llm_spec: "LLMSpecV1", quantization: str
@@ -1366,11 +1476,10 @@ class VLLMVisionModel(VLLMModel, ChatModelMixin):
1366
1476
  if llm_spec.model_format not in ["pytorch", "gptq", "awq", "fp8", "bnb"]:
1367
1477
  return False
1368
1478
  if llm_spec.model_format == "pytorch":
1369
- if quantization != "none" and not (quantization is None):
1479
+ if quantization != "none" and quantization is not None:
1370
1480
  return False
1371
1481
  if llm_spec.model_format == "awq":
1372
- # Currently, only 4-bit weight quantization is supported for AWQ, but got 8 bits.
1373
- if "4" not in quantization:
1482
+ if not any(q in quantization for q in ("4", "8")):
1374
1483
  return False
1375
1484
  if llm_spec.model_format == "gptq":
1376
1485
  if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.3.3"):
@@ -1380,12 +1489,16 @@ class VLLMVisionModel(VLLMModel, ChatModelMixin):
1380
1489
  if "4" not in quantization:
1381
1490
  return False
1382
1491
  if isinstance(llm_family, CustomLLMFamilyV2):
1383
- if llm_family.model_family not in VLLM_SUPPORTED_VISION_MODEL_LIST:
1492
+ if llm_family.model_family not in VLLM_SUPPORTED_MULTI_MODEL_LIST:
1384
1493
  return False
1385
1494
  else:
1386
- if llm_family.model_name not in VLLM_SUPPORTED_VISION_MODEL_LIST:
1495
+ if llm_family.model_name not in VLLM_SUPPORTED_MULTI_MODEL_LIST:
1387
1496
  return False
1388
- if "vision" not in llm_family.model_ability:
1497
+ if (
1498
+ "vision" not in llm_family.model_ability
1499
+ and "audio" not in llm_family.model_ability
1500
+ and "omni" not in llm_family.model_ability
1501
+ ):
1389
1502
  return False
1390
1503
  return VLLM_INSTALLED
1391
1504
 
@@ -1394,13 +1507,21 @@ class VLLMVisionModel(VLLMModel, ChatModelMixin):
1394
1507
  ) -> VLLMModelConfig:
1395
1508
  model_config = super()._sanitize_model_config(model_config)
1396
1509
  if VLLM_VERSION >= version.parse("0.5.5"):
1397
- model_config["limit_mm_per_prompt"] = (
1398
- json.loads(model_config.get("limit_mm_per_prompt")) # type: ignore
1399
- if model_config.get("limit_mm_per_prompt")
1400
- else {
1401
- "image": 2, # default 2 images all chat
1402
- }
1403
- )
1510
+ if model_config.get("limit_mm_per_prompt"):
1511
+ model_config["limit_mm_per_prompt"] = json.loads(
1512
+ model_config.get("limit_mm_per_prompt") # type: ignore
1513
+ )
1514
+ else:
1515
+ if "omni" in self.model_family.model_ability:
1516
+ model_config["limit_mm_per_prompt"] = {
1517
+ "image": 2,
1518
+ "video": 2,
1519
+ "audio": 2,
1520
+ }
1521
+ elif "vision" in self.model_family.model_ability:
1522
+ model_config["limit_mm_per_prompt"] = {"image": 2, "video": 2}
1523
+ elif "audio" in self.model_family.model_ability:
1524
+ model_config["limit_mm_per_prompt"] = {"audio": 2}
1404
1525
  return model_config
1405
1526
 
1406
1527
  def _sanitize_chat_config(
@@ -1434,7 +1555,10 @@ class VLLMVisionModel(VLLMModel, ChatModelMixin):
1434
1555
  multi_modal_data = prompt.get("multi_modal_data")
1435
1556
 
1436
1557
  token_ids = await asyncio.to_thread(
1437
- self._tokenize, tokenizer, prompt_str, config # type: ignore
1558
+ self._tokenize,
1559
+ tokenizer,
1560
+ prompt_str,
1561
+ config, # type: ignore
1438
1562
  )
1439
1563
  return TokensPrompt(
1440
1564
  prompt_token_ids=token_ids, multi_modal_data=multi_modal_data
@@ -1450,9 +1574,13 @@ class VLLMVisionModel(VLLMModel, ChatModelMixin):
1450
1574
  tools = generate_config.pop("tools", []) if generate_config else None
1451
1575
 
1452
1576
  model_family = self.model_family.model_family or self.model_family.model_name
1453
-
1577
+ audios, images, videos = None, None, None
1454
1578
  if "internvl" not in model_family.lower():
1455
- from qwen_vl_utils import process_vision_info
1579
+ from qwen_omni_utils import (
1580
+ process_audio_info,
1581
+ process_mm_info,
1582
+ process_vision_info,
1583
+ )
1456
1584
 
1457
1585
  messages = self._transform_messages(messages)
1458
1586
 
@@ -1467,29 +1595,36 @@ class VLLMVisionModel(VLLMModel, ChatModelMixin):
1467
1595
  if tools and model_family in QWEN_TOOL_CALL_FAMILY:
1468
1596
  full_context_kwargs["tools"] = tools
1469
1597
  assert self.model_family.chat_template is not None
1598
+ if "omni" in self.model_family.model_ability:
1599
+ audios, images, videos = process_mm_info(
1600
+ messages, use_audio_in_video=True
1601
+ )
1602
+ elif "audio" in self.model_family.model_ability:
1603
+ audios = process_audio_info(messages, use_audio_in_video=False)
1604
+ elif "vision" in self.model_family.model_ability:
1605
+ images, videos = process_vision_info( # type: ignore
1606
+ messages, return_video_kwargs=False
1607
+ )
1608
+
1470
1609
  prompt = self.get_full_context(
1471
1610
  messages, self.model_family.chat_template, **full_context_kwargs
1472
1611
  )
1473
- images, video_inputs = process_vision_info(messages)
1474
- if video_inputs:
1475
- raise ValueError("Not support video input now.")
1476
- else:
1477
- prompt, images = self.get_specific_prompt(model_family, messages)
1478
1612
 
1479
- if not images:
1480
- inputs = {
1481
- "prompt": prompt,
1482
- }
1483
- elif len(images) == 1:
1484
- inputs = {
1485
- "prompt": prompt,
1486
- "multi_modal_data": {"image": images[-1]}, # type: ignore
1487
- }
1488
1613
  else:
1489
- inputs = {
1490
- "prompt": prompt,
1491
- "multi_modal_data": {"image": images}, # type: ignore
1492
- }
1614
+ prompt, images = self.get_specific_prompt(model_family, messages)
1615
+ inputs = {"prompt": prompt, "multi_modal_data": {}, "mm_processor_kwargs": {}}
1616
+ if images:
1617
+ inputs["multi_modal_data"]["image"] = images
1618
+ if videos:
1619
+ inputs["multi_modal_data"]["video"] = videos
1620
+ if audios:
1621
+ inputs["multi_modal_data"]["audio"] = audios
1622
+ if "omni" in self.model_family.model_ability:
1623
+ inputs["mm_processor_kwargs"]["use_audio_in_video"] = True
1624
+ if inputs["multi_modal_data"] == {}:
1625
+ inputs.pop("multi_modal_data")
1626
+ if inputs["mm_processor_kwargs"] == {}:
1627
+ inputs.pop("mm_processor_kwargs")
1493
1628
  generate_config = self._sanitize_chat_config(generate_config)
1494
1629
 
1495
1630
  stream = generate_config.get("stream", None)
xinference/model/utils.py CHANGED
@@ -315,6 +315,11 @@ def set_all_random_seed(seed: int):
315
315
 
316
316
 
317
317
  class CancellableDownloader:
318
+ _global_lock = threading.Lock()
319
+ _active_instances = 0
320
+ _original_update = None # Class-level original update method
321
+ _patch_lock = threading.Lock() # Additional lock for patching operations
322
+
318
323
  def __init__(
319
324
  self,
320
325
  cancel_error_cls: Type[BaseException] = asyncio.CancelledError,
@@ -325,23 +330,23 @@ class CancellableDownloader:
325
330
  self._cancelled = threading.Event()
326
331
  self._done_event = threading.Event()
327
332
  self._cancel_error_cls = cancel_error_cls
328
- self._original_update = None
329
333
  # progress for tqdm that is main
330
334
  self._main_progresses: Set[tqdm] = set()
331
335
  # progress for file downloader
332
336
  # mainly when tqdm unit is set
333
337
  self._download_progresses: Set[tqdm] = set()
334
- # tqdm original update
335
- self._original_tqdm_update = None
338
+ # Instance-specific tqdm tracking
339
+ self._patched_instances: Set[int] = set()
336
340
 
337
341
  def reset(self):
338
342
  self._main_progresses.clear()
339
343
  self._download_progresses.clear()
340
344
 
341
345
  def get_progress(self) -> float:
342
- if self.cancelled or self.done:
343
- # directly return 1.0 when cancelled or finished
346
+ if self.done:
347
+ # directly return 1.0 when finished
344
348
  return 1.0
349
+ # Don't return 1.0 when cancelled, calculate actual progress
345
350
 
346
351
  tasks = finished_tasks = 0
347
352
  for main_progress in self._main_progresses:
@@ -376,6 +381,7 @@ class CancellableDownloader:
376
381
 
377
382
  def cancel(self):
378
383
  self._cancelled.set()
384
+ self._done_event.set()
379
385
 
380
386
  @property
381
387
  def cancelled(self):
@@ -392,39 +398,76 @@ class CancellableDownloader:
392
398
  raise self._cancel_error_cls(error_msg)
393
399
 
394
400
  def patch_tqdm(self):
395
- # patch tqdm
396
- # raise error if cancelled
397
- self._original_update = original_update = tqdm.update
398
- downloader = self
399
-
400
- def patched_update(self, n):
401
- if downloader.cancelled:
402
- downloader.raise_error()
403
- if not self.disable:
404
- progresses = (
405
- downloader._main_progresses
406
- if getattr(self, "unit", "it") == "it"
407
- else downloader._download_progresses
408
- )
409
- progresses.add(self)
410
- return original_update(self, n)
411
-
412
- tqdm.update = patched_update
401
+ # Use class-level patching to avoid conflicts
402
+ with self._patch_lock:
403
+ if self._original_update is None:
404
+ self._original_update = original_update = tqdm.update
405
+
406
+ # Thread-safe patched update
407
+ def patched_update(tqdm_instance, n):
408
+ import gc
409
+
410
+ # Get all CancellableDownloader instances and check for cancellation
411
+ downloaders = [
412
+ obj
413
+ for obj in gc.get_objects()
414
+ if isinstance(obj, CancellableDownloader)
415
+ ]
416
+
417
+ for downloader in downloaders:
418
+ # if download cancelled, throw error
419
+ if getattr(downloader, "cancelled", False):
420
+ downloader.raise_error()
421
+
422
+ progresses = None
423
+ if not getattr(tqdm_instance, "disable", False):
424
+ unit = getattr(tqdm_instance, "unit", "it")
425
+ if unit == "it":
426
+ progresses = getattr(
427
+ downloader, "_main_progresses", None
428
+ )
429
+ else:
430
+ progresses = getattr(
431
+ downloader, "_download_progresses", None
432
+ )
433
+
434
+ if progresses is not None:
435
+ progresses.add(tqdm_instance)
436
+ else:
437
+ logger.debug(
438
+ f"No progresses found for downloader {downloader}"
439
+ )
440
+
441
+ # Call original update with safety check
442
+ return original_update(tqdm_instance, n)
443
+
444
+ tqdm.update = patched_update
413
445
 
414
446
  def unpatch_tqdm(self):
415
- from tqdm.auto import tqdm
416
-
417
- if self._original_update:
418
- tqdm.update = self._original_update
447
+ with self._patch_lock:
448
+ if self._original_update is not None and self._active_instances == 0:
449
+ tqdm.update = self._original_update
450
+ self._original_update = None
419
451
 
420
452
  def __enter__(self):
421
- self.patch_tqdm()
453
+ # Use global lock to prevent concurrent patching
454
+ with self._global_lock:
455
+ if self._active_instances == 0:
456
+ self.patch_tqdm()
457
+ self._active_instances += 1
422
458
  return self
423
459
 
424
460
  def __exit__(self, exc_type, exc_val, exc_tb):
425
- self.unpatch_tqdm()
426
- self._done_event.set()
427
- self.reset()
461
+ # Use global lock to prevent concurrent unpatching
462
+ with self._global_lock:
463
+ self._active_instances -= 1
464
+ if self._active_instances == 0:
465
+ self.unpatch_tqdm()
466
+ try:
467
+ self._done_event.set()
468
+ self.reset()
469
+ except Exception as e:
470
+ logger.debug(f"Error during CancellableDownloader cleanup: {e}")
428
471
 
429
472
 
430
473
  def get_engine_params_by_name(
@@ -0,0 +1,10 @@
1
+ __version__ = "0.7.4"
2
+ from .core import AudioSignal
3
+ from .core import STFTParams
4
+ from .core import Meter
5
+ from .core import util
6
+ from . import metrics
7
+ from . import data
8
+ from . import ml
9
+ from .data import datasets
10
+ from .data import transforms
@@ -0,0 +1,4 @@
1
+ from . import util
2
+ from .audio_signal import AudioSignal
3
+ from .audio_signal import STFTParams
4
+ from .loudness import Meter