xinference 1.9.1__py3-none-any.whl → 1.10.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (334) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/restful_api.py +400 -3
  3. xinference/client/restful/async_restful_client.py +20 -3
  4. xinference/client/restful/restful_client.py +20 -3
  5. xinference/constants.py +2 -0
  6. xinference/core/supervisor.py +111 -49
  7. xinference/core/worker.py +10 -0
  8. xinference/deploy/cmdline.py +15 -0
  9. xinference/model/audio/core.py +26 -6
  10. xinference/model/audio/indextts2.py +166 -0
  11. xinference/model/audio/kokoro.py +1 -1
  12. xinference/model/audio/kokoro_zh.py +124 -0
  13. xinference/model/audio/model_spec.json +58 -1
  14. xinference/model/embedding/sentence_transformers/core.py +4 -4
  15. xinference/model/embedding/vllm/core.py +7 -1
  16. xinference/model/image/model_spec.json +71 -3
  17. xinference/model/image/stable_diffusion/core.py +13 -4
  18. xinference/model/llm/__init__.py +4 -0
  19. xinference/model/llm/core.py +10 -0
  20. xinference/model/llm/llama_cpp/core.py +1 -0
  21. xinference/model/llm/llm_family.json +503 -21
  22. xinference/model/llm/llm_family.py +1 -0
  23. xinference/model/llm/mlx/core.py +52 -33
  24. xinference/model/llm/sglang/core.py +32 -55
  25. xinference/model/llm/tool_parsers/__init__.py +58 -0
  26. xinference/model/llm/tool_parsers/abstract_tool_parser.py +33 -0
  27. xinference/model/llm/tool_parsers/deepseek_r1_tool_parser.py +190 -0
  28. xinference/model/llm/tool_parsers/deepseek_v3_tool_parser.py +145 -0
  29. xinference/model/llm/tool_parsers/glm4_tool_parser.py +123 -0
  30. xinference/model/llm/tool_parsers/llama3_tool_parser.py +77 -0
  31. xinference/model/llm/tool_parsers/qwen_tool_parser.py +320 -0
  32. xinference/model/llm/transformers/core.py +1 -1
  33. xinference/model/llm/transformers/multimodal/qwen2_vl.py +34 -8
  34. xinference/model/llm/utils.py +138 -53
  35. xinference/model/llm/vllm/core.py +95 -78
  36. xinference/thirdparty/audiotools/__init__.py +10 -0
  37. xinference/thirdparty/audiotools/core/__init__.py +4 -0
  38. xinference/thirdparty/audiotools/core/audio_signal.py +1682 -0
  39. xinference/thirdparty/audiotools/core/display.py +194 -0
  40. xinference/thirdparty/audiotools/core/dsp.py +390 -0
  41. xinference/thirdparty/audiotools/core/effects.py +647 -0
  42. xinference/thirdparty/audiotools/core/ffmpeg.py +211 -0
  43. xinference/thirdparty/audiotools/core/loudness.py +320 -0
  44. xinference/thirdparty/audiotools/core/playback.py +252 -0
  45. xinference/thirdparty/audiotools/core/templates/__init__.py +0 -0
  46. xinference/thirdparty/audiotools/core/templates/headers.html +322 -0
  47. xinference/thirdparty/audiotools/core/templates/pandoc.css +407 -0
  48. xinference/thirdparty/audiotools/core/templates/widget.html +52 -0
  49. xinference/thirdparty/audiotools/core/util.py +671 -0
  50. xinference/thirdparty/audiotools/core/whisper.py +97 -0
  51. xinference/thirdparty/audiotools/data/__init__.py +3 -0
  52. xinference/thirdparty/audiotools/data/datasets.py +517 -0
  53. xinference/thirdparty/audiotools/data/preprocess.py +81 -0
  54. xinference/thirdparty/audiotools/data/transforms.py +1592 -0
  55. xinference/thirdparty/audiotools/metrics/__init__.py +6 -0
  56. xinference/thirdparty/audiotools/metrics/distance.py +131 -0
  57. xinference/thirdparty/audiotools/metrics/quality.py +159 -0
  58. xinference/thirdparty/audiotools/metrics/spectral.py +247 -0
  59. xinference/thirdparty/audiotools/ml/__init__.py +5 -0
  60. xinference/thirdparty/audiotools/ml/accelerator.py +184 -0
  61. xinference/thirdparty/audiotools/ml/decorators.py +440 -0
  62. xinference/thirdparty/audiotools/ml/experiment.py +90 -0
  63. xinference/thirdparty/audiotools/ml/layers/__init__.py +2 -0
  64. xinference/thirdparty/audiotools/ml/layers/base.py +328 -0
  65. xinference/thirdparty/audiotools/ml/layers/spectral_gate.py +127 -0
  66. xinference/thirdparty/audiotools/post.py +140 -0
  67. xinference/thirdparty/audiotools/preference.py +600 -0
  68. xinference/thirdparty/indextts/BigVGAN/ECAPA_TDNN.py +656 -0
  69. xinference/thirdparty/indextts/BigVGAN/__init__.py +0 -0
  70. xinference/thirdparty/indextts/BigVGAN/activations.py +122 -0
  71. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/__init__.py +0 -0
  72. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/.gitignore +1 -0
  73. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/__init__.py +0 -0
  74. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/activation1d.py +76 -0
  75. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/anti_alias_activation.cpp +23 -0
  76. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/anti_alias_activation_cuda.cu +256 -0
  77. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/compat.h +29 -0
  78. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/load.py +121 -0
  79. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/type_shim.h +92 -0
  80. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/torch/__init__.py +6 -0
  81. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/torch/act.py +31 -0
  82. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/torch/filter.py +102 -0
  83. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/torch/resample.py +58 -0
  84. xinference/thirdparty/indextts/BigVGAN/alias_free_torch/__init__.py +6 -0
  85. xinference/thirdparty/indextts/BigVGAN/alias_free_torch/act.py +29 -0
  86. xinference/thirdparty/indextts/BigVGAN/alias_free_torch/filter.py +96 -0
  87. xinference/thirdparty/indextts/BigVGAN/alias_free_torch/resample.py +49 -0
  88. xinference/thirdparty/indextts/BigVGAN/bigvgan.py +534 -0
  89. xinference/thirdparty/indextts/BigVGAN/models.py +451 -0
  90. xinference/thirdparty/indextts/BigVGAN/nnet/CNN.py +546 -0
  91. xinference/thirdparty/indextts/BigVGAN/nnet/__init__.py +0 -0
  92. xinference/thirdparty/indextts/BigVGAN/nnet/linear.py +89 -0
  93. xinference/thirdparty/indextts/BigVGAN/nnet/normalization.py +670 -0
  94. xinference/thirdparty/indextts/BigVGAN/utils.py +101 -0
  95. xinference/thirdparty/indextts/__init__.py +0 -0
  96. xinference/thirdparty/indextts/cli.py +65 -0
  97. xinference/thirdparty/indextts/gpt/__init__.py +0 -0
  98. xinference/thirdparty/indextts/gpt/conformer/__init__.py +0 -0
  99. xinference/thirdparty/indextts/gpt/conformer/attention.py +312 -0
  100. xinference/thirdparty/indextts/gpt/conformer/embedding.py +163 -0
  101. xinference/thirdparty/indextts/gpt/conformer/subsampling.py +348 -0
  102. xinference/thirdparty/indextts/gpt/conformer_encoder.py +520 -0
  103. xinference/thirdparty/indextts/gpt/model.py +713 -0
  104. xinference/thirdparty/indextts/gpt/model_v2.py +747 -0
  105. xinference/thirdparty/indextts/gpt/perceiver.py +317 -0
  106. xinference/thirdparty/indextts/gpt/transformers_beam_search.py +1013 -0
  107. xinference/thirdparty/indextts/gpt/transformers_generation_utils.py +4747 -0
  108. xinference/thirdparty/indextts/gpt/transformers_gpt2.py +1878 -0
  109. xinference/thirdparty/indextts/gpt/transformers_modeling_utils.py +5525 -0
  110. xinference/thirdparty/indextts/infer.py +690 -0
  111. xinference/thirdparty/indextts/infer_v2.py +739 -0
  112. xinference/thirdparty/indextts/s2mel/dac/__init__.py +16 -0
  113. xinference/thirdparty/indextts/s2mel/dac/__main__.py +36 -0
  114. xinference/thirdparty/indextts/s2mel/dac/model/__init__.py +4 -0
  115. xinference/thirdparty/indextts/s2mel/dac/model/base.py +294 -0
  116. xinference/thirdparty/indextts/s2mel/dac/model/dac.py +400 -0
  117. xinference/thirdparty/indextts/s2mel/dac/model/discriminator.py +228 -0
  118. xinference/thirdparty/indextts/s2mel/dac/model/encodec.py +320 -0
  119. xinference/thirdparty/indextts/s2mel/dac/nn/__init__.py +3 -0
  120. xinference/thirdparty/indextts/s2mel/dac/nn/layers.py +33 -0
  121. xinference/thirdparty/indextts/s2mel/dac/nn/loss.py +368 -0
  122. xinference/thirdparty/indextts/s2mel/dac/nn/quantize.py +339 -0
  123. xinference/thirdparty/indextts/s2mel/dac/utils/__init__.py +123 -0
  124. xinference/thirdparty/indextts/s2mel/dac/utils/decode.py +95 -0
  125. xinference/thirdparty/indextts/s2mel/dac/utils/encode.py +94 -0
  126. xinference/thirdparty/indextts/s2mel/hf_utils.py +12 -0
  127. xinference/thirdparty/indextts/s2mel/modules/alias_free_torch/__init__.py +5 -0
  128. xinference/thirdparty/indextts/s2mel/modules/alias_free_torch/act.py +29 -0
  129. xinference/thirdparty/indextts/s2mel/modules/alias_free_torch/filter.py +96 -0
  130. xinference/thirdparty/indextts/s2mel/modules/alias_free_torch/resample.py +57 -0
  131. xinference/thirdparty/indextts/s2mel/modules/audio.py +82 -0
  132. xinference/thirdparty/indextts/s2mel/modules/bigvgan/activations.py +120 -0
  133. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/__init__.py +0 -0
  134. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/activation1d.py +77 -0
  135. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/anti_alias_activation.cpp +23 -0
  136. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/anti_alias_activation_cuda.cu +246 -0
  137. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/compat.h +29 -0
  138. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/load.py +86 -0
  139. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/type_shim.h +92 -0
  140. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/torch/__init__.py +6 -0
  141. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/torch/act.py +30 -0
  142. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/torch/filter.py +101 -0
  143. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/torch/resample.py +58 -0
  144. xinference/thirdparty/indextts/s2mel/modules/bigvgan/bigvgan.py +492 -0
  145. xinference/thirdparty/indextts/s2mel/modules/bigvgan/config.json +63 -0
  146. xinference/thirdparty/indextts/s2mel/modules/bigvgan/env.py +18 -0
  147. xinference/thirdparty/indextts/s2mel/modules/bigvgan/meldataset.py +354 -0
  148. xinference/thirdparty/indextts/s2mel/modules/bigvgan/utils.py +99 -0
  149. xinference/thirdparty/indextts/s2mel/modules/campplus/DTDNN.py +115 -0
  150. xinference/thirdparty/indextts/s2mel/modules/campplus/classifier.py +70 -0
  151. xinference/thirdparty/indextts/s2mel/modules/campplus/layers.py +253 -0
  152. xinference/thirdparty/indextts/s2mel/modules/commons.py +632 -0
  153. xinference/thirdparty/indextts/s2mel/modules/diffusion_transformer.py +257 -0
  154. xinference/thirdparty/indextts/s2mel/modules/encodec.py +292 -0
  155. xinference/thirdparty/indextts/s2mel/modules/flow_matching.py +171 -0
  156. xinference/thirdparty/indextts/s2mel/modules/gpt_fast/generate.py +436 -0
  157. xinference/thirdparty/indextts/s2mel/modules/gpt_fast/model.py +360 -0
  158. xinference/thirdparty/indextts/s2mel/modules/gpt_fast/quantize.py +622 -0
  159. xinference/thirdparty/indextts/s2mel/modules/hifigan/f0_predictor.py +55 -0
  160. xinference/thirdparty/indextts/s2mel/modules/hifigan/generator.py +454 -0
  161. xinference/thirdparty/indextts/s2mel/modules/layers.py +354 -0
  162. xinference/thirdparty/indextts/s2mel/modules/length_regulator.py +141 -0
  163. xinference/thirdparty/indextts/s2mel/modules/openvoice/__init__.py +0 -0
  164. xinference/thirdparty/indextts/s2mel/modules/openvoice/api.py +186 -0
  165. xinference/thirdparty/indextts/s2mel/modules/openvoice/attentions.py +465 -0
  166. xinference/thirdparty/indextts/s2mel/modules/openvoice/checkpoints_v2/converter/config.json +57 -0
  167. xinference/thirdparty/indextts/s2mel/modules/openvoice/commons.py +160 -0
  168. xinference/thirdparty/indextts/s2mel/modules/openvoice/mel_processing.py +183 -0
  169. xinference/thirdparty/indextts/s2mel/modules/openvoice/models.py +499 -0
  170. xinference/thirdparty/indextts/s2mel/modules/openvoice/modules.py +598 -0
  171. xinference/thirdparty/indextts/s2mel/modules/openvoice/openvoice_app.py +275 -0
  172. xinference/thirdparty/indextts/s2mel/modules/openvoice/se_extractor.py +153 -0
  173. xinference/thirdparty/indextts/s2mel/modules/openvoice/transforms.py +209 -0
  174. xinference/thirdparty/indextts/s2mel/modules/openvoice/utils.py +194 -0
  175. xinference/thirdparty/indextts/s2mel/modules/quantize.py +229 -0
  176. xinference/thirdparty/indextts/s2mel/modules/rmvpe.py +631 -0
  177. xinference/thirdparty/indextts/s2mel/modules/vocos/__init__.py +4 -0
  178. xinference/thirdparty/indextts/s2mel/modules/vocos/heads.py +164 -0
  179. xinference/thirdparty/indextts/s2mel/modules/vocos/helpers.py +71 -0
  180. xinference/thirdparty/indextts/s2mel/modules/vocos/loss.py +114 -0
  181. xinference/thirdparty/indextts/s2mel/modules/vocos/models.py +118 -0
  182. xinference/thirdparty/indextts/s2mel/modules/vocos/modules.py +213 -0
  183. xinference/thirdparty/indextts/s2mel/modules/vocos/pretrained.py +51 -0
  184. xinference/thirdparty/indextts/s2mel/modules/vocos/spectral_ops.py +192 -0
  185. xinference/thirdparty/indextts/s2mel/modules/wavenet.py +174 -0
  186. xinference/thirdparty/indextts/s2mel/optimizers.py +96 -0
  187. xinference/thirdparty/indextts/s2mel/wav2vecbert_extract.py +148 -0
  188. xinference/thirdparty/indextts/utils/__init__.py +0 -0
  189. xinference/thirdparty/indextts/utils/arch_util.py +120 -0
  190. xinference/thirdparty/indextts/utils/checkpoint.py +34 -0
  191. xinference/thirdparty/indextts/utils/common.py +121 -0
  192. xinference/thirdparty/indextts/utils/feature_extractors.py +50 -0
  193. xinference/thirdparty/indextts/utils/front.py +536 -0
  194. xinference/thirdparty/indextts/utils/maskgct/models/codec/__init__.py +0 -0
  195. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/codec.py +427 -0
  196. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/__init__.py +11 -0
  197. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/factorized_vector_quantize.py +150 -0
  198. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/lookup_free_quantize.py +77 -0
  199. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/residual_vq.py +177 -0
  200. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/vector_quantize.py +401 -0
  201. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/vocos.py +881 -0
  202. xinference/thirdparty/indextts/utils/maskgct/models/codec/codec_dataset.py +264 -0
  203. xinference/thirdparty/indextts/utils/maskgct/models/codec/codec_inference.py +515 -0
  204. xinference/thirdparty/indextts/utils/maskgct/models/codec/codec_sampler.py +126 -0
  205. xinference/thirdparty/indextts/utils/maskgct/models/codec/codec_trainer.py +166 -0
  206. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/__init__.py +0 -0
  207. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/__init__.py +5 -0
  208. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/act.py +29 -0
  209. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/filter.py +96 -0
  210. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/resample.py +57 -0
  211. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/facodec_dataset.py +98 -0
  212. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/facodec_inference.py +137 -0
  213. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/facodec_trainer.py +776 -0
  214. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/JDC/__init__.py +1 -0
  215. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/JDC/bst.t7 +0 -0
  216. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/JDC/model.py +219 -0
  217. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/attentions.py +437 -0
  218. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/commons.py +331 -0
  219. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/gradient_reversal.py +35 -0
  220. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/layers.py +460 -0
  221. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/quantize.py +741 -0
  222. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/style_encoder.py +110 -0
  223. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/wavenet.py +224 -0
  224. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/optimizer.py +104 -0
  225. xinference/thirdparty/indextts/utils/maskgct/models/codec/kmeans/repcodec_model.py +210 -0
  226. xinference/thirdparty/indextts/utils/maskgct/models/codec/kmeans/vocos.py +850 -0
  227. xinference/thirdparty/indextts/utils/maskgct/models/codec/melvqgan/melspec.py +108 -0
  228. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/README.md +216 -0
  229. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/__init__.py +6 -0
  230. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/__init__.py +5 -0
  231. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/act.py +29 -0
  232. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/filter.py +96 -0
  233. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/resample.py +57 -0
  234. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/facodec.py +1222 -0
  235. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/gradient_reversal.py +35 -0
  236. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/melspec.py +102 -0
  237. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/quantize/__init__.py +7 -0
  238. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/quantize/fvq.py +116 -0
  239. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/quantize/rvq.py +87 -0
  240. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/transformer.py +234 -0
  241. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/model.py +184 -0
  242. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/__init__.py +27 -0
  243. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/conv.py +346 -0
  244. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/lstm.py +46 -0
  245. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/norm.py +37 -0
  246. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/__init__.py +14 -0
  247. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/ac.py +317 -0
  248. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/core_vq.py +388 -0
  249. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/distrib.py +135 -0
  250. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/vq.py +125 -0
  251. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/seanet.py +414 -0
  252. xinference/thirdparty/indextts/utils/maskgct/models/codec/vevo/vevo_repcodec.py +592 -0
  253. xinference/thirdparty/indextts/utils/maskgct/models/tts/maskgct/ckpt/wav2vec2bert_stats.pt +0 -0
  254. xinference/thirdparty/indextts/utils/maskgct/models/tts/maskgct/llama_nar.py +650 -0
  255. xinference/thirdparty/indextts/utils/maskgct/models/tts/maskgct/maskgct_s2a.py +503 -0
  256. xinference/thirdparty/indextts/utils/maskgct_utils.py +259 -0
  257. xinference/thirdparty/indextts/utils/text_utils.py +41 -0
  258. xinference/thirdparty/indextts/utils/typical_sampling.py +30 -0
  259. xinference/thirdparty/indextts/utils/utils.py +93 -0
  260. xinference/thirdparty/indextts/utils/webui_utils.py +42 -0
  261. xinference/thirdparty/indextts/utils/xtransformers.py +1247 -0
  262. xinference/thirdparty/indextts/vqvae/__init__.py +0 -0
  263. xinference/thirdparty/indextts/vqvae/xtts_dvae.py +395 -0
  264. xinference/types.py +105 -2
  265. xinference/ui/gradio/media_interface.py +66 -8
  266. xinference/ui/web/ui/build/asset-manifest.json +6 -6
  267. xinference/ui/web/ui/build/index.html +1 -1
  268. xinference/ui/web/ui/build/static/css/main.5ea97072.css +2 -0
  269. xinference/ui/web/ui/build/static/css/main.5ea97072.css.map +1 -0
  270. xinference/ui/web/ui/build/static/js/main.d192c4f3.js +3 -0
  271. xinference/ui/web/ui/build/static/js/{main.1086c759.js.LICENSE.txt → main.d192c4f3.js.LICENSE.txt} +0 -7
  272. xinference/ui/web/ui/build/static/js/main.d192c4f3.js.map +1 -0
  273. xinference/ui/web/ui/node_modules/.cache/babel-loader/089c38df5f52348d212ed868dda5c518a42e0c2762caed4175487c0405830c35.json +1 -0
  274. xinference/ui/web/ui/node_modules/.cache/babel-loader/2b6e3a5b6eb2c5c5f2d007e68cd46c372721cd52bf63508adcdb21ecf79241d8.json +1 -0
  275. xinference/ui/web/ui/node_modules/.cache/babel-loader/2d887825fd07a56f872eda4420da25fba0b5b62a23bdcc6c6da1a5281887f618.json +1 -0
  276. xinference/ui/web/ui/node_modules/.cache/babel-loader/4001f9c3e64e73a4f2158826650c174a59d5e3f89ddecddf17cbb6bb688cc4ca.json +1 -0
  277. xinference/ui/web/ui/node_modules/.cache/babel-loader/4a7018a69e6b7f90fc313248c2aa86f2a8f1eb1db120df586047a8023549b44b.json +1 -0
  278. xinference/ui/web/ui/node_modules/.cache/babel-loader/64b12aaa1c1d1bf53820ada8a63769067c0ccc5aab46b32348eb1917ae7f2a11.json +1 -0
  279. xinference/ui/web/ui/node_modules/.cache/babel-loader/7275b67c78ec76ce38a686bb8a576d8c9cecf54e1573614c84859d538efb9be5.json +1 -0
  280. xinference/ui/web/ui/node_modules/.cache/babel-loader/a68b6ee3b31eadc051fb95ce8f8ccb9c2e8b52c60f290dbab545a1917e065282.json +1 -0
  281. xinference/ui/web/ui/node_modules/.cache/babel-loader/ae8771cc37693feb160fa8727231312a0c54ef2d1d1ca893be568cd70016ca7e.json +1 -0
  282. xinference/ui/web/ui/node_modules/.cache/babel-loader/bb4e8722d2d41d87f1fce3661bc8937bffe9448e231fc5f0462630849e851592.json +1 -0
  283. xinference/ui/web/ui/node_modules/.cache/babel-loader/be6aada1ee4adc2bbf65dbe56d17db32bb3b5478be05d6b527805a8ba6cfb2b9.json +1 -0
  284. xinference/ui/web/ui/node_modules/.cache/babel-loader/de91c352653c233cf0cb6674e6e04049a44fd0e1156560de65d5c4620521391e.json +1 -0
  285. xinference/ui/web/ui/node_modules/.cache/babel-loader/e85f7002fc325c83b9c9cd8a1619e5b3ebc701d30e811afc284b88e6ae710cb5.json +1 -0
  286. xinference/ui/web/ui/node_modules/.cache/babel-loader/e8b603c78944bf3d213639078bfe155ff5c0dfa4048a93cbb967cad6a4eb4ff3.json +1 -0
  287. xinference/ui/web/ui/node_modules/.cache/babel-loader/f05535160a508b2a312de546a6de234776c613db276479ea4253c0b1bdeeb7d6.json +1 -0
  288. xinference/ui/web/ui/node_modules/.cache/babel-loader/f09ba9e11106bd59a0de10cc85c55084097729dcab575f43dfcf07375961ed87.json +1 -0
  289. xinference/ui/web/ui/node_modules/.cache/babel-loader/f995a2425dfb0822fd07127f66ffe9b026883bc156b402eb8bd0b83d52460a93.json +1 -0
  290. xinference/ui/web/ui/node_modules/.package-lock.json +0 -33
  291. xinference/ui/web/ui/package-lock.json +0 -34
  292. xinference/ui/web/ui/package.json +0 -1
  293. xinference/ui/web/ui/src/locales/en.json +9 -3
  294. xinference/ui/web/ui/src/locales/ja.json +9 -3
  295. xinference/ui/web/ui/src/locales/ko.json +9 -3
  296. xinference/ui/web/ui/src/locales/zh.json +9 -3
  297. {xinference-1.9.1.dist-info → xinference-1.10.1.dist-info}/METADATA +24 -4
  298. {xinference-1.9.1.dist-info → xinference-1.10.1.dist-info}/RECORD +302 -76
  299. xinference/ui/web/ui/build/static/css/main.013f296b.css +0 -2
  300. xinference/ui/web/ui/build/static/css/main.013f296b.css.map +0 -1
  301. xinference/ui/web/ui/build/static/js/main.1086c759.js +0 -3
  302. xinference/ui/web/ui/build/static/js/main.1086c759.js.map +0 -1
  303. xinference/ui/web/ui/node_modules/.cache/babel-loader/0b0f77000cc1b482ca091cfbcae511dfe02f08916971645fad21d0b1234d04a2.json +0 -1
  304. xinference/ui/web/ui/node_modules/.cache/babel-loader/1c5f8ff423a7c9202bea60b15680f04b1e9964b445b0da3f86c6ff70cf24e797.json +0 -1
  305. xinference/ui/web/ui/node_modules/.cache/babel-loader/44ce7993e344980e3ed4f13e8f69237d4a5dfc60e37ca6b54f51f8ee1357bd67.json +0 -1
  306. xinference/ui/web/ui/node_modules/.cache/babel-loader/4aec1cc414ac3ebb3481d3d915e4db597d9127de813291346eacb8554ab170d4.json +0 -1
  307. xinference/ui/web/ui/node_modules/.cache/babel-loader/644cfec52f3c57a6e222ce60f112237a1efefe9835efd9aad857a685f53d8eed.json +0 -1
  308. xinference/ui/web/ui/node_modules/.cache/babel-loader/663436f72af53fe0d72394f56d003fa4e0bba489e5bb4e483fd34b00f84637f7.json +0 -1
  309. xinference/ui/web/ui/node_modules/.cache/babel-loader/69db82ca9bfe27fe417cc6cf2b1716b09be9c6f0cd198530f12bfc60e801bbcf.json +0 -1
  310. xinference/ui/web/ui/node_modules/.cache/babel-loader/85087e27618d740c236bf159f30e0219db443ab55f0997388eed5fde6f9e90cc.json +0 -1
  311. xinference/ui/web/ui/node_modules/.cache/babel-loader/88b07838348864aa86c672be3bbca1e9f58f6f3a2881b32070ec27f4e7b449d1.json +0 -1
  312. xinference/ui/web/ui/node_modules/.cache/babel-loader/8b8cd408ccfbe115acef27ccfa5b233da8597131a2a5712add13e1e4d5d4504b.json +0 -1
  313. xinference/ui/web/ui/node_modules/.cache/babel-loader/a23824fe746b9c6ca5eee9159b5764d1ff1653c1d856288c0f75c742bbb0023b.json +0 -1
  314. xinference/ui/web/ui/node_modules/.cache/babel-loader/a3eb18af328280b139693c9092dff2a0ef8c9a967e6c8956ceee0996611f1984.json +0 -1
  315. xinference/ui/web/ui/node_modules/.cache/babel-loader/bc1aacc65a102db325ca61bcd2f681e1ae22c36a1f1d98a6ff5e4ad49dc7544f.json +0 -1
  316. xinference/ui/web/ui/node_modules/.cache/babel-loader/c682fd521747c19dae437d83ce3235a306ce6b68e24a117bc57c27ebb8d1f1ca.json +0 -1
  317. xinference/ui/web/ui/node_modules/.cache/babel-loader/d5c224be7081f18cba1678b7874a9782eba895df004874ff8f243f94ba79942a.json +0 -1
  318. xinference/ui/web/ui/node_modules/.cache/babel-loader/f7f18bfb539b036a6a342176dd98a85df5057a884a8da978d679f2a0264883d0.json +0 -1
  319. xinference/ui/web/ui/node_modules/clipboard/.babelrc.json +0 -11
  320. xinference/ui/web/ui/node_modules/clipboard/.eslintrc.json +0 -24
  321. xinference/ui/web/ui/node_modules/clipboard/.prettierrc.json +0 -9
  322. xinference/ui/web/ui/node_modules/clipboard/bower.json +0 -18
  323. xinference/ui/web/ui/node_modules/clipboard/composer.json +0 -25
  324. xinference/ui/web/ui/node_modules/clipboard/package.json +0 -63
  325. xinference/ui/web/ui/node_modules/delegate/package.json +0 -31
  326. xinference/ui/web/ui/node_modules/good-listener/bower.json +0 -11
  327. xinference/ui/web/ui/node_modules/good-listener/package.json +0 -35
  328. xinference/ui/web/ui/node_modules/select/bower.json +0 -13
  329. xinference/ui/web/ui/node_modules/select/package.json +0 -29
  330. xinference/ui/web/ui/node_modules/tiny-emitter/package.json +0 -53
  331. {xinference-1.9.1.dist-info → xinference-1.10.1.dist-info}/WHEEL +0 -0
  332. {xinference-1.9.1.dist-info → xinference-1.10.1.dist-info}/entry_points.txt +0 -0
  333. {xinference-1.9.1.dist-info → xinference-1.10.1.dist-info}/licenses/LICENSE +0 -0
  334. {xinference-1.9.1.dist-info → xinference-1.10.1.dist-info}/top_level.txt +0 -0
@@ -154,6 +154,7 @@ class LLMFamilyV2(BaseModel, ModelInstanceInfoMixin):
154
154
  reasoning_end_tag: Optional[str]
155
155
  cache_config: Optional[dict]
156
156
  virtualenv: Optional[VirtualEnvSettings]
157
+ tool_parser: Optional[str]
157
158
 
158
159
  class Config:
159
160
  extra = "allow"
@@ -148,6 +148,16 @@ class MLXModel(LLM):
148
148
  # to call aynsc method with asyncio.run_coroutine_threadsafe
149
149
  self._loop = loop # type: ignore
150
150
 
151
+ def _cleanup_memory(self):
152
+ import gc
153
+
154
+ import mlx.core as mx
155
+
156
+ # mandatory recycling
157
+ gc.collect()
158
+ # clear the MLX cache
159
+ mx.clear_cache()
160
+
151
161
  @property
152
162
  def driver_info(self) -> Optional[dict]:
153
163
  return self._driver_info
@@ -333,6 +343,7 @@ class MLXModel(LLM):
333
343
  self.prepare_parse_reasoning_content(
334
344
  reasoning_content, enable_thinking=enable_thinking
335
345
  )
346
+ self.prepare_parse_tool_calls()
336
347
 
337
348
  kwargs = {}
338
349
  kwargs["revision"] = self._model_config.get(
@@ -458,14 +469,18 @@ class MLXModel(LLM):
458
469
  repetition_penalty=kwargs.pop("repetition_penalty"),
459
470
  repetition_context_size=kwargs.pop("repetition_context_size"),
460
471
  )
461
- yield from stream_generate(
462
- self._model,
463
- self._tokenizer,
464
- prompt_token_ids,
465
- sampler=sampler,
466
- logits_processors=logits_processors,
467
- **kwargs,
468
- )
472
+ try:
473
+ yield from stream_generate(
474
+ self._model,
475
+ self._tokenizer,
476
+ prompt_token_ids,
477
+ sampler=sampler,
478
+ logits_processors=logits_processors,
479
+ **kwargs,
480
+ )
481
+ finally:
482
+ # after completing the inference, clear the memory.
483
+ self._cleanup_memory()
469
484
 
470
485
  def _prepare_inputs(
471
486
  self, prompt: Union[str, Dict[str, Any]], kwargs
@@ -755,7 +770,7 @@ class MLXChatModel(MLXModel, ChatModelMixin):
755
770
  assert not isinstance(c, Iterator)
756
771
  if tools:
757
772
  return self._post_process_completion(
758
- self.model_family, self.model_uid, c, self.reasoning_parser
773
+ self.model_family, self.model_uid, c
759
774
  )
760
775
  return self._to_chat_completion(c, self.reasoning_parser)
761
776
 
@@ -831,18 +846,32 @@ class MLXVisionModel(MLXModel, ChatModelMixin):
831
846
 
832
847
  detokenizer.reset()
833
848
  tic = time.perf_counter()
834
- for n, (token, logprobs) in enumerate(
835
- generate_step(input_ids, self._model, pixel_values, mask, **kwargs),
836
- ):
837
- if n == 0:
838
- prompt_time = time.perf_counter() - tic
839
- prompt_tps = len(input_ids) / prompt_time
840
- tic = time.perf_counter()
841
- if token == tokenizer.eos_token_id:
842
- break
843
- detokenizer.add_token(token)
849
+ try:
850
+ for n, (token, logprobs) in enumerate(
851
+ generate_step(input_ids, self._model, pixel_values, mask, **kwargs),
852
+ ):
853
+ if n == 0:
854
+ prompt_time = time.perf_counter() - tic
855
+ prompt_tps = len(input_ids) / prompt_time
856
+ tic = time.perf_counter()
857
+ if token == tokenizer.eos_token_id:
858
+ break
859
+ detokenizer.add_token(token)
860
+
861
+ # Yield the last segment if streaming
862
+ yield GenerationResponse(
863
+ text=detokenizer.last_segment,
864
+ token=token,
865
+ logprobs=logprobs,
866
+ from_draft=False,
867
+ prompt_tokens=len(input_ids),
868
+ prompt_tps=prompt_tps,
869
+ generation_tokens=n + 1,
870
+ generation_tps=(n + 1) / (time.perf_counter() - tic),
871
+ peak_memory=mx.metal.get_peak_memory() / 1e9,
872
+ )
844
873
 
845
- # Yield the last segment if streaming
874
+ detokenizer.finalize()
846
875
  yield GenerationResponse(
847
876
  text=detokenizer.last_segment,
848
877
  token=token,
@@ -854,19 +883,9 @@ class MLXVisionModel(MLXModel, ChatModelMixin):
854
883
  generation_tps=(n + 1) / (time.perf_counter() - tic),
855
884
  peak_memory=mx.metal.get_peak_memory() / 1e9,
856
885
  )
857
-
858
- detokenizer.finalize()
859
- yield GenerationResponse(
860
- text=detokenizer.last_segment,
861
- token=token,
862
- logprobs=logprobs,
863
- from_draft=False,
864
- prompt_tokens=len(input_ids),
865
- prompt_tps=prompt_tps,
866
- generation_tokens=n + 1,
867
- generation_tps=(n + 1) / (time.perf_counter() - tic),
868
- peak_memory=mx.metal.get_peak_memory() / 1e9,
869
- )
886
+ finally:
887
+ # after completing the inference, clear the memory
888
+ self._cleanup_memory()
870
889
 
871
890
  def _prepare_inputs(
872
891
  self, prompt: Union[str, Dict[str, Any]], kwargs
@@ -73,6 +73,7 @@ class SGLANGGenerateConfig(TypedDict, total=False):
73
73
  stream: bool
74
74
  stream_options: Optional[Union[dict, None]]
75
75
  json_schema: Optional[dict]
76
+ response_format: dict
76
77
 
77
78
 
78
79
  try:
@@ -175,6 +176,7 @@ class SGLANGModel(LLM):
175
176
  self.prepare_parse_reasoning_content(
176
177
  reasoning_content, enable_thinking=enable_thinking
177
178
  )
179
+ self.prepare_parse_tool_calls()
178
180
 
179
181
  # Fix: GH#2169
180
182
  if sgl.__version__ >= "0.2.14":
@@ -316,13 +318,16 @@ class SGLANGModel(LLM):
316
318
  stream_options = generate_config.get("stream_options")
317
319
  generate_config.setdefault("stream_options", stream_options)
318
320
  generate_config.setdefault("ignore_eos", False)
319
- json_schema = (
320
- generate_config.pop("response_format", {}) # type: ignore
321
- .pop("json_schema", {})
322
- .pop("schema", {})
323
- )
324
- if json_schema:
325
- generate_config.setdefault("json_schema", json.dumps(json_schema)) # type: ignore
321
+ response_format = generate_config.pop("response_format", None)
322
+ if response_format:
323
+ json_schema_config = response_format.pop("json_schema", None)
324
+ json_schema = None
325
+ if "schema_" in json_schema_config:
326
+ json_schema = json_schema_config.pop("schema_")
327
+ elif "schema" in json_schema_config:
328
+ json_schema = json_schema_config.pop("schema")
329
+ if json_schema:
330
+ generate_config.setdefault("json_schema", json.dumps(json_schema)) # type: ignore
326
331
 
327
332
  return generate_config
328
333
 
@@ -355,22 +360,31 @@ class SGLANGModel(LLM):
355
360
 
356
361
  @staticmethod
357
362
  def _convert_state_to_completion_chunk(
358
- request_id: str, model: str, output_text: str
363
+ request_id: str, model: str, output_text: str, meta_info: Dict
359
364
  ) -> CompletionChunk:
365
+ finish_reason = meta_info.get("finish_reason", None)
366
+ if isinstance(finish_reason, dict) and "type" in finish_reason:
367
+ finish_reason = finish_reason["type"]
360
368
  choices: List[CompletionChoice] = [
361
369
  CompletionChoice(
362
370
  text=output_text,
363
371
  index=0,
364
372
  logprobs=None,
365
- finish_reason=None,
373
+ finish_reason=finish_reason,
366
374
  )
367
375
  ]
376
+ usage = CompletionUsage(
377
+ prompt_tokens=meta_info["prompt_tokens"],
378
+ completion_tokens=meta_info["completion_tokens"],
379
+ total_tokens=meta_info["prompt_tokens"] + meta_info["completion_tokens"],
380
+ )
368
381
  chunk = CompletionChunk(
369
382
  id=request_id,
370
383
  object="text_completion",
371
384
  created=int(time.time()),
372
385
  model=model,
373
386
  choices=choices,
387
+ usage=usage,
374
388
  )
375
389
  return chunk
376
390
 
@@ -378,12 +392,15 @@ class SGLANGModel(LLM):
378
392
  def _convert_state_to_completion(
379
393
  request_id: str, model: str, output_text: str, meta_info: Dict
380
394
  ) -> Completion:
395
+ finish_reason = meta_info.get("finish_reason", None)
396
+ if isinstance(finish_reason, dict) and "type" in finish_reason:
397
+ finish_reason = finish_reason["type"]
381
398
  choices = [
382
399
  CompletionChoice(
383
400
  text=output_text,
384
401
  index=0,
385
402
  logprobs=None,
386
- finish_reason=None,
403
+ finish_reason=finish_reason,
387
404
  )
388
405
  ]
389
406
 
@@ -512,7 +529,10 @@ class SGLANGModel(LLM):
512
529
  prompt, image_data, **sanitized_generate_config
513
530
  ):
514
531
  chunk = self._convert_state_to_completion_chunk(
515
- request_id, self.model_uid, output_text=out
532
+ request_id,
533
+ self.model_uid,
534
+ output_text=out,
535
+ meta_info=meta_info,
516
536
  )
517
537
  complete_response += out
518
538
  finish_reason = meta_info["finish_reason"]
@@ -646,49 +666,6 @@ class SGLANGChatModel(SGLANGModel, ChatModelMixin):
646
666
  def is_tool_call_chunk_end(chunk):
647
667
  return chunk["choices"][0]["text"].endswith(QWEN_TOOL_CALL_SYMBOLS[1])
648
668
 
649
- async def _async_to_tool_completion_chunks(
650
- self,
651
- chunks: AsyncGenerator[CompletionChunk, None],
652
- ) -> AsyncGenerator[ChatCompletionChunk, None]:
653
- i = 0
654
- previous_texts = [""]
655
- tool_call = False
656
- tool_call_texts = [""]
657
- if self.reasoning_parser:
658
- chunks = self.reasoning_parser.prepare_reasoning_content_streaming(chunks)
659
- async for chunk in chunks:
660
- if i == 0:
661
- for first_chunk in self._get_first_chat_completion_chunk(
662
- chunk, self.reasoning_parser
663
- ):
664
- yield first_chunk
665
- # usage
666
- choices = chunk.get("choices")
667
- if not choices:
668
- yield self._get_final_chat_completion_chunk(chunk)
669
- else:
670
- if self.is_tool_call_chunk_start(chunk):
671
- tool_call = True
672
- if tool_call:
673
- tool_call_text = tool_call_texts[-1]
674
- tool_call_text += chunk["choices"][0]["text"]
675
- tool_call_texts.append(tool_call_text)
676
- if self.is_tool_call_chunk_end(chunk):
677
- yield self._post_process_completion_chunk(
678
- self.model_family,
679
- self.model_uid,
680
- chunk,
681
- reasoning_parser=self.reasoning_parser,
682
- tool_call_text=tool_call_text,
683
- )
684
- tool_call = False
685
- tool_call_texts = [""]
686
- else:
687
- yield self._to_chat_completion_chunk(
688
- chunk, self.reasoning_parser, previous_texts
689
- )
690
- i += 1
691
-
692
669
  async def async_chat(
693
670
  self,
694
671
  messages: List[Dict],
@@ -731,7 +708,7 @@ class SGLANGChatModel(SGLANGModel, ChatModelMixin):
731
708
  assert not isinstance(c, AsyncGenerator)
732
709
  if tools:
733
710
  return self._post_process_completion(
734
- self.model_family, self.model_uid, c, self.reasoning_parser
711
+ self.model_family, self.model_uid, c
735
712
  )
736
713
  return self._to_chat_completion(c, self.reasoning_parser)
737
714
 
@@ -0,0 +1,58 @@
1
+ from functools import wraps
2
+ from typing import Any, Callable, Dict, Type
3
+
4
+ # Global registry for tool parsers, mapping parser names to their classes
5
+ TOOL_PARSERS: Dict[str, Type[Any]] = {}
6
+
7
+
8
+ def register_tool_parser(name: str):
9
+ """
10
+ Decorator for registering ToolParser classes to the TOOL_PARSERS registry.
11
+
12
+ This decorator allows tool parser classes to be automatically registered
13
+ when they are defined, making them available for dynamic lookup.
14
+
15
+ Args:
16
+ name (str): The name to register the tool parser under. This should
17
+ typically match the model family name (e.g., "qwen", "glm4").
18
+
19
+ Returns:
20
+ Callable: The decorator function that registers the class.
21
+
22
+ Example:
23
+ @register_tool_parser("qwen")
24
+ class QwenToolParser(ToolParser):
25
+ def parse_tool_calls(self, text: str) -> List[ToolCall]:
26
+ # Implementation for parsing Qwen model tool calls
27
+ pass
28
+
29
+ Note:
30
+ The registered class should implement the ToolParser interface
31
+ and provide methods for parsing tool calls from model outputs.
32
+ """
33
+
34
+ def decorator(cls: Type[Any]) -> Type[Any]:
35
+ """
36
+ The actual decorator that performs the registration.
37
+
38
+ Args:
39
+ cls: The tool parser class to register.
40
+
41
+ Returns:
42
+ The same class (unmodified) after registration.
43
+ """
44
+ TOOL_PARSERS[name] = cls
45
+ return cls
46
+
47
+ return decorator
48
+
49
+
50
+ # Import all tool parser modules to trigger decorator registration
51
+ # This ensures all tool parsers are automatically registered when this module is imported
52
+ from . import (
53
+ deepseek_r1_tool_parser,
54
+ deepseek_v3_tool_parser,
55
+ glm4_tool_parser,
56
+ llama3_tool_parser,
57
+ qwen_tool_parser,
58
+ )
@@ -0,0 +1,33 @@
1
+ class ToolParser:
2
+ """
3
+ Abstract ToolParser class that should not be used directly. Provided
4
+ properties and methods should be used in
5
+ derived classes.
6
+ """
7
+
8
+ def extract_tool_calls(self, model_output: str):
9
+ """
10
+ Static method that should be implemented for extracting tool calls from
11
+ a complete model-generated string.
12
+ Used for non-streaming responses where we have the entire model response
13
+ available before sending to the client.
14
+ Static because it's stateless.
15
+ """
16
+ raise NotImplementedError(
17
+ "AbstractToolParser.extract_tool_calls has not been implemented!"
18
+ )
19
+
20
+ def extract_tool_calls_streaming(
21
+ self, previous_text, current_text: str, delta_text: str
22
+ ):
23
+ """
24
+ Instance method that should be implemented for extracting tool calls
25
+ from an incomplete response; for use when handling tool calls and
26
+ streaming. Has to be an instance method because it requires state -
27
+ the current tokens/diffs, but also the information about what has
28
+ previously been parsed and extracted (see constructor)
29
+ """
30
+ raise NotImplementedError(
31
+ "AbstractToolParser.extract_tool_calls_streaming has not been "
32
+ "implemented!"
33
+ )
@@ -0,0 +1,190 @@
1
+ import json
2
+ import logging
3
+ import re
4
+ from typing import Any, List, Optional, Tuple
5
+
6
+ from . import register_tool_parser
7
+ from .abstract_tool_parser import ToolParser
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+
12
+ @register_tool_parser("deepseek-r1")
13
+ class DeepseekR1ToolParser(ToolParser):
14
+ """
15
+ Tool parser implementation for DeepSeek R1 model.
16
+
17
+ This parser handles the specific format used by DeepSeek R1 for tool calls,
18
+ which includes special Unicode tokens and JSON-formatted function arguments.
19
+ """
20
+
21
+ def __init__(self):
22
+ """
23
+ Initialize the DeepSeek R1 tool parser.
24
+ """
25
+ super().__init__()
26
+
27
+ # Sentinel tokens for streaming mode
28
+ self.think_start_token: str = "<think>"
29
+ self.think_end_token: str = "</think>"
30
+ self.tool_call_start_token: str = "<|tool▁call▁begin|>"
31
+ self.tool_call_end_token: str = "<|tool▁call▁end|>"
32
+
33
+ # Regex pattern to match DeepSeek R1 tool call format
34
+ self.tool_calls_regex = (
35
+ r"<\|tool▁call▁begin|>function<\|tool▁sep|>([^\n]+)\n"
36
+ r"```json\n(.*?)\n```<\|tool▁call▁end|>"
37
+ )
38
+
39
+ # Regex pattern to match the entire tool-calls wrapper block.
40
+ # We intentionally do NOT match <think> blocks here so that the
41
+ # "text before" chunk will include both the think block and any
42
+ # narrative text up to the tool calls wrapper, yielding exactly two
43
+ # blocks when there is a single tool calls section:
44
+ # [before_text_including_think, tool_calls_wrapper_block]
45
+ self.content_regex = r"(<\|tool▁calls▁begin|>.*?<\|tool▁calls▁end|>)"
46
+
47
+ def extract_tool_calls(
48
+ self, model_output: str
49
+ ) -> List[Tuple[Optional[str], Optional[str], Optional[dict]]]:
50
+ """
51
+ Extract tool calls from complete model output.
52
+
53
+ Parses the model output to find tool call patterns and extracts
54
+ function names and arguments. Handles JSON parsing errors gracefully
55
+ and deduplicates identical tool calls.
56
+
57
+ Args:
58
+ model_output (str): The complete output string from the model.
59
+
60
+ Returns:
61
+ List[Tuple[Optional[str], Optional[str], Optional[dict]]]:
62
+ A list of tuples where each tuple contains:
63
+ - content (str or None): Raw content if parsing failed, None if successful
64
+ - function_name (str or None): Name of the function to call
65
+ - arguments (dict or None): Parsed function arguments
66
+
67
+ Example:
68
+ >>> parser = DeepseekR1ToolParser()
69
+ >>> output = '<|tool▁call▁begin|>function<|tool▁sep|>get_current_weather\n```json\n{"location": "上海", "unit": "celsius"}\n```<|tool▁call▁end|>'
70
+ >>> result = parser.extract_tool_calls(output)
71
+ >>> print(result)
72
+ [(None, 'get_current_weather', {'location': 'Beijing'})]
73
+ """
74
+ # If no tool call tokens, return original output as content
75
+ if self.tool_call_start_token not in model_output:
76
+ return [(model_output, None, None)]
77
+
78
+ # Get all content blocks (text, thinking blocks, tool calls)
79
+ function_calls = self._get_function_calls(model_output)
80
+
81
+ # Use set for deduplication of identical tool calls
82
+ tool_calls = set()
83
+ results: List[Tuple[Optional[str], Optional[str], Optional[dict]]] = []
84
+
85
+ for content_block in function_calls:
86
+ # Check if this block is a tool call
87
+ if (
88
+ self.tool_call_start_token in content_block
89
+ and self.tool_call_end_token in content_block
90
+ ):
91
+ # Extract function name and arguments from tool call block
92
+ matches = re.findall(self.tool_calls_regex, content_block, re.DOTALL)
93
+ if not matches:
94
+ # Malformed tool call, treat as regular content
95
+ results.append((content_block, None, None))
96
+ continue
97
+
98
+ func_name, raw_json = matches[0] # Take the first match
99
+
100
+ func_and_args = None
101
+ try:
102
+ # Parse JSON arguments
103
+ func_and_args = json.loads(raw_json)
104
+ # Create hashable representation for deduplication
105
+ arguments_hashable = frozenset(func_and_args.items())
106
+ tool_call_tuple = (
107
+ None, # No content error
108
+ func_name,
109
+ func_and_args,
110
+ )
111
+ except Exception as e:
112
+ # JSON parsing failed, treat as raw content
113
+ logger.warning(
114
+ f"Failed to parse tool call JSON: {raw_json}, error: {e}"
115
+ )
116
+ tool_call_tuple = (raw_json, None, None)
117
+ arguments_hashable = None
118
+
119
+ # Create deduplication key
120
+ dedup_key = (
121
+ (func_name, arguments_hashable)
122
+ if func_and_args is not None
123
+ else raw_json
124
+ )
125
+
126
+ # Add to results if not already seen
127
+ if dedup_key not in tool_calls:
128
+ tool_calls.add(dedup_key)
129
+ results.append(tool_call_tuple)
130
+ else:
131
+ # This is regular content (text or thinking block), add as-is
132
+ if content_block.strip(): # Only add non-empty content
133
+ results.append((content_block, None, None))
134
+
135
+ return results
136
+
137
+ def _get_function_calls(self, model_output: str) -> List[str]:
138
+ """
139
+ Extract all function calls and content blocks from model output.
140
+
141
+ Parses the model output to separate thinking blocks, tool calls,
142
+ and regular content into individual components.
143
+
144
+ Args:
145
+ model_output (str): The complete model output to parse.
146
+
147
+ Returns:
148
+ List[str]: List of content blocks (text, thinking blocks, tool calls).
149
+ """
150
+ functions_calls = []
151
+ last_end = 0
152
+ for m in re.finditer(self.content_regex, model_output, re.DOTALL):
153
+ # Add any text before the current match
154
+ if m.start() > last_end:
155
+ functions_calls.append(model_output[last_end : m.start()])
156
+ # Add the matched content (think or tool_call block)
157
+ functions_calls.append(m.group(0))
158
+ last_end = m.end()
159
+ # Add any remaining text after the last match
160
+ if last_end < len(model_output):
161
+ functions_calls.append(model_output[last_end:])
162
+ return functions_calls
163
+
164
+ def extract_tool_calls_streaming(
165
+ self, previous_text: List[str], current_text: str, delta_text: str
166
+ ) -> Optional[Any]:
167
+ """
168
+ Extract tool calls from streaming output.
169
+
170
+ Currently not supported for DeepSeek R1 model. This method raises
171
+ a ValueError indicating that streaming tool call extraction is only
172
+ available for specific model/backend combinations.
173
+
174
+ Args:
175
+ previous_text (List[str]): Previous text chunks from the stream.
176
+ current_text (str): Current accumulated text.
177
+ delta_text (str): New text delta in this chunk.
178
+
179
+ Raises:
180
+ ValueError: Always raised as streaming is not supported.
181
+
182
+ Note:
183
+ DeepSeek R1 model does not currently support streaming tool call
184
+ extraction. Use extract_tool_calls() with complete output instead.
185
+ """
186
+ raise NotImplementedError(
187
+ "Streaming support for tool calls is available only when using "
188
+ "Qwen models with vLLM backend or GLM4-chat models without vLLM backend. "
189
+ "DeepSeek R1 does not support streaming tool call extraction."
190
+ )