xinference 1.9.1__py3-none-any.whl → 1.10.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (334) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/restful_api.py +400 -3
  3. xinference/client/restful/async_restful_client.py +20 -3
  4. xinference/client/restful/restful_client.py +20 -3
  5. xinference/constants.py +2 -0
  6. xinference/core/supervisor.py +111 -49
  7. xinference/core/worker.py +10 -0
  8. xinference/deploy/cmdline.py +15 -0
  9. xinference/model/audio/core.py +26 -6
  10. xinference/model/audio/indextts2.py +166 -0
  11. xinference/model/audio/kokoro.py +1 -1
  12. xinference/model/audio/kokoro_zh.py +124 -0
  13. xinference/model/audio/model_spec.json +58 -1
  14. xinference/model/embedding/sentence_transformers/core.py +4 -4
  15. xinference/model/embedding/vllm/core.py +7 -1
  16. xinference/model/image/model_spec.json +71 -3
  17. xinference/model/image/stable_diffusion/core.py +13 -4
  18. xinference/model/llm/__init__.py +4 -0
  19. xinference/model/llm/core.py +10 -0
  20. xinference/model/llm/llama_cpp/core.py +1 -0
  21. xinference/model/llm/llm_family.json +503 -21
  22. xinference/model/llm/llm_family.py +1 -0
  23. xinference/model/llm/mlx/core.py +52 -33
  24. xinference/model/llm/sglang/core.py +32 -55
  25. xinference/model/llm/tool_parsers/__init__.py +58 -0
  26. xinference/model/llm/tool_parsers/abstract_tool_parser.py +33 -0
  27. xinference/model/llm/tool_parsers/deepseek_r1_tool_parser.py +190 -0
  28. xinference/model/llm/tool_parsers/deepseek_v3_tool_parser.py +145 -0
  29. xinference/model/llm/tool_parsers/glm4_tool_parser.py +123 -0
  30. xinference/model/llm/tool_parsers/llama3_tool_parser.py +77 -0
  31. xinference/model/llm/tool_parsers/qwen_tool_parser.py +320 -0
  32. xinference/model/llm/transformers/core.py +1 -1
  33. xinference/model/llm/transformers/multimodal/qwen2_vl.py +34 -8
  34. xinference/model/llm/utils.py +138 -53
  35. xinference/model/llm/vllm/core.py +95 -78
  36. xinference/thirdparty/audiotools/__init__.py +10 -0
  37. xinference/thirdparty/audiotools/core/__init__.py +4 -0
  38. xinference/thirdparty/audiotools/core/audio_signal.py +1682 -0
  39. xinference/thirdparty/audiotools/core/display.py +194 -0
  40. xinference/thirdparty/audiotools/core/dsp.py +390 -0
  41. xinference/thirdparty/audiotools/core/effects.py +647 -0
  42. xinference/thirdparty/audiotools/core/ffmpeg.py +211 -0
  43. xinference/thirdparty/audiotools/core/loudness.py +320 -0
  44. xinference/thirdparty/audiotools/core/playback.py +252 -0
  45. xinference/thirdparty/audiotools/core/templates/__init__.py +0 -0
  46. xinference/thirdparty/audiotools/core/templates/headers.html +322 -0
  47. xinference/thirdparty/audiotools/core/templates/pandoc.css +407 -0
  48. xinference/thirdparty/audiotools/core/templates/widget.html +52 -0
  49. xinference/thirdparty/audiotools/core/util.py +671 -0
  50. xinference/thirdparty/audiotools/core/whisper.py +97 -0
  51. xinference/thirdparty/audiotools/data/__init__.py +3 -0
  52. xinference/thirdparty/audiotools/data/datasets.py +517 -0
  53. xinference/thirdparty/audiotools/data/preprocess.py +81 -0
  54. xinference/thirdparty/audiotools/data/transforms.py +1592 -0
  55. xinference/thirdparty/audiotools/metrics/__init__.py +6 -0
  56. xinference/thirdparty/audiotools/metrics/distance.py +131 -0
  57. xinference/thirdparty/audiotools/metrics/quality.py +159 -0
  58. xinference/thirdparty/audiotools/metrics/spectral.py +247 -0
  59. xinference/thirdparty/audiotools/ml/__init__.py +5 -0
  60. xinference/thirdparty/audiotools/ml/accelerator.py +184 -0
  61. xinference/thirdparty/audiotools/ml/decorators.py +440 -0
  62. xinference/thirdparty/audiotools/ml/experiment.py +90 -0
  63. xinference/thirdparty/audiotools/ml/layers/__init__.py +2 -0
  64. xinference/thirdparty/audiotools/ml/layers/base.py +328 -0
  65. xinference/thirdparty/audiotools/ml/layers/spectral_gate.py +127 -0
  66. xinference/thirdparty/audiotools/post.py +140 -0
  67. xinference/thirdparty/audiotools/preference.py +600 -0
  68. xinference/thirdparty/indextts/BigVGAN/ECAPA_TDNN.py +656 -0
  69. xinference/thirdparty/indextts/BigVGAN/__init__.py +0 -0
  70. xinference/thirdparty/indextts/BigVGAN/activations.py +122 -0
  71. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/__init__.py +0 -0
  72. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/.gitignore +1 -0
  73. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/__init__.py +0 -0
  74. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/activation1d.py +76 -0
  75. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/anti_alias_activation.cpp +23 -0
  76. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/anti_alias_activation_cuda.cu +256 -0
  77. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/compat.h +29 -0
  78. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/load.py +121 -0
  79. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/type_shim.h +92 -0
  80. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/torch/__init__.py +6 -0
  81. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/torch/act.py +31 -0
  82. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/torch/filter.py +102 -0
  83. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/torch/resample.py +58 -0
  84. xinference/thirdparty/indextts/BigVGAN/alias_free_torch/__init__.py +6 -0
  85. xinference/thirdparty/indextts/BigVGAN/alias_free_torch/act.py +29 -0
  86. xinference/thirdparty/indextts/BigVGAN/alias_free_torch/filter.py +96 -0
  87. xinference/thirdparty/indextts/BigVGAN/alias_free_torch/resample.py +49 -0
  88. xinference/thirdparty/indextts/BigVGAN/bigvgan.py +534 -0
  89. xinference/thirdparty/indextts/BigVGAN/models.py +451 -0
  90. xinference/thirdparty/indextts/BigVGAN/nnet/CNN.py +546 -0
  91. xinference/thirdparty/indextts/BigVGAN/nnet/__init__.py +0 -0
  92. xinference/thirdparty/indextts/BigVGAN/nnet/linear.py +89 -0
  93. xinference/thirdparty/indextts/BigVGAN/nnet/normalization.py +670 -0
  94. xinference/thirdparty/indextts/BigVGAN/utils.py +101 -0
  95. xinference/thirdparty/indextts/__init__.py +0 -0
  96. xinference/thirdparty/indextts/cli.py +65 -0
  97. xinference/thirdparty/indextts/gpt/__init__.py +0 -0
  98. xinference/thirdparty/indextts/gpt/conformer/__init__.py +0 -0
  99. xinference/thirdparty/indextts/gpt/conformer/attention.py +312 -0
  100. xinference/thirdparty/indextts/gpt/conformer/embedding.py +163 -0
  101. xinference/thirdparty/indextts/gpt/conformer/subsampling.py +348 -0
  102. xinference/thirdparty/indextts/gpt/conformer_encoder.py +520 -0
  103. xinference/thirdparty/indextts/gpt/model.py +713 -0
  104. xinference/thirdparty/indextts/gpt/model_v2.py +747 -0
  105. xinference/thirdparty/indextts/gpt/perceiver.py +317 -0
  106. xinference/thirdparty/indextts/gpt/transformers_beam_search.py +1013 -0
  107. xinference/thirdparty/indextts/gpt/transformers_generation_utils.py +4747 -0
  108. xinference/thirdparty/indextts/gpt/transformers_gpt2.py +1878 -0
  109. xinference/thirdparty/indextts/gpt/transformers_modeling_utils.py +5525 -0
  110. xinference/thirdparty/indextts/infer.py +690 -0
  111. xinference/thirdparty/indextts/infer_v2.py +739 -0
  112. xinference/thirdparty/indextts/s2mel/dac/__init__.py +16 -0
  113. xinference/thirdparty/indextts/s2mel/dac/__main__.py +36 -0
  114. xinference/thirdparty/indextts/s2mel/dac/model/__init__.py +4 -0
  115. xinference/thirdparty/indextts/s2mel/dac/model/base.py +294 -0
  116. xinference/thirdparty/indextts/s2mel/dac/model/dac.py +400 -0
  117. xinference/thirdparty/indextts/s2mel/dac/model/discriminator.py +228 -0
  118. xinference/thirdparty/indextts/s2mel/dac/model/encodec.py +320 -0
  119. xinference/thirdparty/indextts/s2mel/dac/nn/__init__.py +3 -0
  120. xinference/thirdparty/indextts/s2mel/dac/nn/layers.py +33 -0
  121. xinference/thirdparty/indextts/s2mel/dac/nn/loss.py +368 -0
  122. xinference/thirdparty/indextts/s2mel/dac/nn/quantize.py +339 -0
  123. xinference/thirdparty/indextts/s2mel/dac/utils/__init__.py +123 -0
  124. xinference/thirdparty/indextts/s2mel/dac/utils/decode.py +95 -0
  125. xinference/thirdparty/indextts/s2mel/dac/utils/encode.py +94 -0
  126. xinference/thirdparty/indextts/s2mel/hf_utils.py +12 -0
  127. xinference/thirdparty/indextts/s2mel/modules/alias_free_torch/__init__.py +5 -0
  128. xinference/thirdparty/indextts/s2mel/modules/alias_free_torch/act.py +29 -0
  129. xinference/thirdparty/indextts/s2mel/modules/alias_free_torch/filter.py +96 -0
  130. xinference/thirdparty/indextts/s2mel/modules/alias_free_torch/resample.py +57 -0
  131. xinference/thirdparty/indextts/s2mel/modules/audio.py +82 -0
  132. xinference/thirdparty/indextts/s2mel/modules/bigvgan/activations.py +120 -0
  133. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/__init__.py +0 -0
  134. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/activation1d.py +77 -0
  135. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/anti_alias_activation.cpp +23 -0
  136. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/anti_alias_activation_cuda.cu +246 -0
  137. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/compat.h +29 -0
  138. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/load.py +86 -0
  139. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/type_shim.h +92 -0
  140. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/torch/__init__.py +6 -0
  141. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/torch/act.py +30 -0
  142. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/torch/filter.py +101 -0
  143. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/torch/resample.py +58 -0
  144. xinference/thirdparty/indextts/s2mel/modules/bigvgan/bigvgan.py +492 -0
  145. xinference/thirdparty/indextts/s2mel/modules/bigvgan/config.json +63 -0
  146. xinference/thirdparty/indextts/s2mel/modules/bigvgan/env.py +18 -0
  147. xinference/thirdparty/indextts/s2mel/modules/bigvgan/meldataset.py +354 -0
  148. xinference/thirdparty/indextts/s2mel/modules/bigvgan/utils.py +99 -0
  149. xinference/thirdparty/indextts/s2mel/modules/campplus/DTDNN.py +115 -0
  150. xinference/thirdparty/indextts/s2mel/modules/campplus/classifier.py +70 -0
  151. xinference/thirdparty/indextts/s2mel/modules/campplus/layers.py +253 -0
  152. xinference/thirdparty/indextts/s2mel/modules/commons.py +632 -0
  153. xinference/thirdparty/indextts/s2mel/modules/diffusion_transformer.py +257 -0
  154. xinference/thirdparty/indextts/s2mel/modules/encodec.py +292 -0
  155. xinference/thirdparty/indextts/s2mel/modules/flow_matching.py +171 -0
  156. xinference/thirdparty/indextts/s2mel/modules/gpt_fast/generate.py +436 -0
  157. xinference/thirdparty/indextts/s2mel/modules/gpt_fast/model.py +360 -0
  158. xinference/thirdparty/indextts/s2mel/modules/gpt_fast/quantize.py +622 -0
  159. xinference/thirdparty/indextts/s2mel/modules/hifigan/f0_predictor.py +55 -0
  160. xinference/thirdparty/indextts/s2mel/modules/hifigan/generator.py +454 -0
  161. xinference/thirdparty/indextts/s2mel/modules/layers.py +354 -0
  162. xinference/thirdparty/indextts/s2mel/modules/length_regulator.py +141 -0
  163. xinference/thirdparty/indextts/s2mel/modules/openvoice/__init__.py +0 -0
  164. xinference/thirdparty/indextts/s2mel/modules/openvoice/api.py +186 -0
  165. xinference/thirdparty/indextts/s2mel/modules/openvoice/attentions.py +465 -0
  166. xinference/thirdparty/indextts/s2mel/modules/openvoice/checkpoints_v2/converter/config.json +57 -0
  167. xinference/thirdparty/indextts/s2mel/modules/openvoice/commons.py +160 -0
  168. xinference/thirdparty/indextts/s2mel/modules/openvoice/mel_processing.py +183 -0
  169. xinference/thirdparty/indextts/s2mel/modules/openvoice/models.py +499 -0
  170. xinference/thirdparty/indextts/s2mel/modules/openvoice/modules.py +598 -0
  171. xinference/thirdparty/indextts/s2mel/modules/openvoice/openvoice_app.py +275 -0
  172. xinference/thirdparty/indextts/s2mel/modules/openvoice/se_extractor.py +153 -0
  173. xinference/thirdparty/indextts/s2mel/modules/openvoice/transforms.py +209 -0
  174. xinference/thirdparty/indextts/s2mel/modules/openvoice/utils.py +194 -0
  175. xinference/thirdparty/indextts/s2mel/modules/quantize.py +229 -0
  176. xinference/thirdparty/indextts/s2mel/modules/rmvpe.py +631 -0
  177. xinference/thirdparty/indextts/s2mel/modules/vocos/__init__.py +4 -0
  178. xinference/thirdparty/indextts/s2mel/modules/vocos/heads.py +164 -0
  179. xinference/thirdparty/indextts/s2mel/modules/vocos/helpers.py +71 -0
  180. xinference/thirdparty/indextts/s2mel/modules/vocos/loss.py +114 -0
  181. xinference/thirdparty/indextts/s2mel/modules/vocos/models.py +118 -0
  182. xinference/thirdparty/indextts/s2mel/modules/vocos/modules.py +213 -0
  183. xinference/thirdparty/indextts/s2mel/modules/vocos/pretrained.py +51 -0
  184. xinference/thirdparty/indextts/s2mel/modules/vocos/spectral_ops.py +192 -0
  185. xinference/thirdparty/indextts/s2mel/modules/wavenet.py +174 -0
  186. xinference/thirdparty/indextts/s2mel/optimizers.py +96 -0
  187. xinference/thirdparty/indextts/s2mel/wav2vecbert_extract.py +148 -0
  188. xinference/thirdparty/indextts/utils/__init__.py +0 -0
  189. xinference/thirdparty/indextts/utils/arch_util.py +120 -0
  190. xinference/thirdparty/indextts/utils/checkpoint.py +34 -0
  191. xinference/thirdparty/indextts/utils/common.py +121 -0
  192. xinference/thirdparty/indextts/utils/feature_extractors.py +50 -0
  193. xinference/thirdparty/indextts/utils/front.py +536 -0
  194. xinference/thirdparty/indextts/utils/maskgct/models/codec/__init__.py +0 -0
  195. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/codec.py +427 -0
  196. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/__init__.py +11 -0
  197. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/factorized_vector_quantize.py +150 -0
  198. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/lookup_free_quantize.py +77 -0
  199. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/residual_vq.py +177 -0
  200. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/vector_quantize.py +401 -0
  201. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/vocos.py +881 -0
  202. xinference/thirdparty/indextts/utils/maskgct/models/codec/codec_dataset.py +264 -0
  203. xinference/thirdparty/indextts/utils/maskgct/models/codec/codec_inference.py +515 -0
  204. xinference/thirdparty/indextts/utils/maskgct/models/codec/codec_sampler.py +126 -0
  205. xinference/thirdparty/indextts/utils/maskgct/models/codec/codec_trainer.py +166 -0
  206. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/__init__.py +0 -0
  207. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/__init__.py +5 -0
  208. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/act.py +29 -0
  209. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/filter.py +96 -0
  210. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/resample.py +57 -0
  211. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/facodec_dataset.py +98 -0
  212. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/facodec_inference.py +137 -0
  213. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/facodec_trainer.py +776 -0
  214. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/JDC/__init__.py +1 -0
  215. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/JDC/bst.t7 +0 -0
  216. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/JDC/model.py +219 -0
  217. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/attentions.py +437 -0
  218. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/commons.py +331 -0
  219. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/gradient_reversal.py +35 -0
  220. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/layers.py +460 -0
  221. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/quantize.py +741 -0
  222. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/style_encoder.py +110 -0
  223. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/wavenet.py +224 -0
  224. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/optimizer.py +104 -0
  225. xinference/thirdparty/indextts/utils/maskgct/models/codec/kmeans/repcodec_model.py +210 -0
  226. xinference/thirdparty/indextts/utils/maskgct/models/codec/kmeans/vocos.py +850 -0
  227. xinference/thirdparty/indextts/utils/maskgct/models/codec/melvqgan/melspec.py +108 -0
  228. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/README.md +216 -0
  229. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/__init__.py +6 -0
  230. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/__init__.py +5 -0
  231. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/act.py +29 -0
  232. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/filter.py +96 -0
  233. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/resample.py +57 -0
  234. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/facodec.py +1222 -0
  235. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/gradient_reversal.py +35 -0
  236. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/melspec.py +102 -0
  237. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/quantize/__init__.py +7 -0
  238. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/quantize/fvq.py +116 -0
  239. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/quantize/rvq.py +87 -0
  240. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/transformer.py +234 -0
  241. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/model.py +184 -0
  242. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/__init__.py +27 -0
  243. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/conv.py +346 -0
  244. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/lstm.py +46 -0
  245. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/norm.py +37 -0
  246. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/__init__.py +14 -0
  247. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/ac.py +317 -0
  248. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/core_vq.py +388 -0
  249. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/distrib.py +135 -0
  250. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/vq.py +125 -0
  251. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/seanet.py +414 -0
  252. xinference/thirdparty/indextts/utils/maskgct/models/codec/vevo/vevo_repcodec.py +592 -0
  253. xinference/thirdparty/indextts/utils/maskgct/models/tts/maskgct/ckpt/wav2vec2bert_stats.pt +0 -0
  254. xinference/thirdparty/indextts/utils/maskgct/models/tts/maskgct/llama_nar.py +650 -0
  255. xinference/thirdparty/indextts/utils/maskgct/models/tts/maskgct/maskgct_s2a.py +503 -0
  256. xinference/thirdparty/indextts/utils/maskgct_utils.py +259 -0
  257. xinference/thirdparty/indextts/utils/text_utils.py +41 -0
  258. xinference/thirdparty/indextts/utils/typical_sampling.py +30 -0
  259. xinference/thirdparty/indextts/utils/utils.py +93 -0
  260. xinference/thirdparty/indextts/utils/webui_utils.py +42 -0
  261. xinference/thirdparty/indextts/utils/xtransformers.py +1247 -0
  262. xinference/thirdparty/indextts/vqvae/__init__.py +0 -0
  263. xinference/thirdparty/indextts/vqvae/xtts_dvae.py +395 -0
  264. xinference/types.py +105 -2
  265. xinference/ui/gradio/media_interface.py +66 -8
  266. xinference/ui/web/ui/build/asset-manifest.json +6 -6
  267. xinference/ui/web/ui/build/index.html +1 -1
  268. xinference/ui/web/ui/build/static/css/main.5ea97072.css +2 -0
  269. xinference/ui/web/ui/build/static/css/main.5ea97072.css.map +1 -0
  270. xinference/ui/web/ui/build/static/js/main.d192c4f3.js +3 -0
  271. xinference/ui/web/ui/build/static/js/{main.1086c759.js.LICENSE.txt → main.d192c4f3.js.LICENSE.txt} +0 -7
  272. xinference/ui/web/ui/build/static/js/main.d192c4f3.js.map +1 -0
  273. xinference/ui/web/ui/node_modules/.cache/babel-loader/089c38df5f52348d212ed868dda5c518a42e0c2762caed4175487c0405830c35.json +1 -0
  274. xinference/ui/web/ui/node_modules/.cache/babel-loader/2b6e3a5b6eb2c5c5f2d007e68cd46c372721cd52bf63508adcdb21ecf79241d8.json +1 -0
  275. xinference/ui/web/ui/node_modules/.cache/babel-loader/2d887825fd07a56f872eda4420da25fba0b5b62a23bdcc6c6da1a5281887f618.json +1 -0
  276. xinference/ui/web/ui/node_modules/.cache/babel-loader/4001f9c3e64e73a4f2158826650c174a59d5e3f89ddecddf17cbb6bb688cc4ca.json +1 -0
  277. xinference/ui/web/ui/node_modules/.cache/babel-loader/4a7018a69e6b7f90fc313248c2aa86f2a8f1eb1db120df586047a8023549b44b.json +1 -0
  278. xinference/ui/web/ui/node_modules/.cache/babel-loader/64b12aaa1c1d1bf53820ada8a63769067c0ccc5aab46b32348eb1917ae7f2a11.json +1 -0
  279. xinference/ui/web/ui/node_modules/.cache/babel-loader/7275b67c78ec76ce38a686bb8a576d8c9cecf54e1573614c84859d538efb9be5.json +1 -0
  280. xinference/ui/web/ui/node_modules/.cache/babel-loader/a68b6ee3b31eadc051fb95ce8f8ccb9c2e8b52c60f290dbab545a1917e065282.json +1 -0
  281. xinference/ui/web/ui/node_modules/.cache/babel-loader/ae8771cc37693feb160fa8727231312a0c54ef2d1d1ca893be568cd70016ca7e.json +1 -0
  282. xinference/ui/web/ui/node_modules/.cache/babel-loader/bb4e8722d2d41d87f1fce3661bc8937bffe9448e231fc5f0462630849e851592.json +1 -0
  283. xinference/ui/web/ui/node_modules/.cache/babel-loader/be6aada1ee4adc2bbf65dbe56d17db32bb3b5478be05d6b527805a8ba6cfb2b9.json +1 -0
  284. xinference/ui/web/ui/node_modules/.cache/babel-loader/de91c352653c233cf0cb6674e6e04049a44fd0e1156560de65d5c4620521391e.json +1 -0
  285. xinference/ui/web/ui/node_modules/.cache/babel-loader/e85f7002fc325c83b9c9cd8a1619e5b3ebc701d30e811afc284b88e6ae710cb5.json +1 -0
  286. xinference/ui/web/ui/node_modules/.cache/babel-loader/e8b603c78944bf3d213639078bfe155ff5c0dfa4048a93cbb967cad6a4eb4ff3.json +1 -0
  287. xinference/ui/web/ui/node_modules/.cache/babel-loader/f05535160a508b2a312de546a6de234776c613db276479ea4253c0b1bdeeb7d6.json +1 -0
  288. xinference/ui/web/ui/node_modules/.cache/babel-loader/f09ba9e11106bd59a0de10cc85c55084097729dcab575f43dfcf07375961ed87.json +1 -0
  289. xinference/ui/web/ui/node_modules/.cache/babel-loader/f995a2425dfb0822fd07127f66ffe9b026883bc156b402eb8bd0b83d52460a93.json +1 -0
  290. xinference/ui/web/ui/node_modules/.package-lock.json +0 -33
  291. xinference/ui/web/ui/package-lock.json +0 -34
  292. xinference/ui/web/ui/package.json +0 -1
  293. xinference/ui/web/ui/src/locales/en.json +9 -3
  294. xinference/ui/web/ui/src/locales/ja.json +9 -3
  295. xinference/ui/web/ui/src/locales/ko.json +9 -3
  296. xinference/ui/web/ui/src/locales/zh.json +9 -3
  297. {xinference-1.9.1.dist-info → xinference-1.10.1.dist-info}/METADATA +24 -4
  298. {xinference-1.9.1.dist-info → xinference-1.10.1.dist-info}/RECORD +302 -76
  299. xinference/ui/web/ui/build/static/css/main.013f296b.css +0 -2
  300. xinference/ui/web/ui/build/static/css/main.013f296b.css.map +0 -1
  301. xinference/ui/web/ui/build/static/js/main.1086c759.js +0 -3
  302. xinference/ui/web/ui/build/static/js/main.1086c759.js.map +0 -1
  303. xinference/ui/web/ui/node_modules/.cache/babel-loader/0b0f77000cc1b482ca091cfbcae511dfe02f08916971645fad21d0b1234d04a2.json +0 -1
  304. xinference/ui/web/ui/node_modules/.cache/babel-loader/1c5f8ff423a7c9202bea60b15680f04b1e9964b445b0da3f86c6ff70cf24e797.json +0 -1
  305. xinference/ui/web/ui/node_modules/.cache/babel-loader/44ce7993e344980e3ed4f13e8f69237d4a5dfc60e37ca6b54f51f8ee1357bd67.json +0 -1
  306. xinference/ui/web/ui/node_modules/.cache/babel-loader/4aec1cc414ac3ebb3481d3d915e4db597d9127de813291346eacb8554ab170d4.json +0 -1
  307. xinference/ui/web/ui/node_modules/.cache/babel-loader/644cfec52f3c57a6e222ce60f112237a1efefe9835efd9aad857a685f53d8eed.json +0 -1
  308. xinference/ui/web/ui/node_modules/.cache/babel-loader/663436f72af53fe0d72394f56d003fa4e0bba489e5bb4e483fd34b00f84637f7.json +0 -1
  309. xinference/ui/web/ui/node_modules/.cache/babel-loader/69db82ca9bfe27fe417cc6cf2b1716b09be9c6f0cd198530f12bfc60e801bbcf.json +0 -1
  310. xinference/ui/web/ui/node_modules/.cache/babel-loader/85087e27618d740c236bf159f30e0219db443ab55f0997388eed5fde6f9e90cc.json +0 -1
  311. xinference/ui/web/ui/node_modules/.cache/babel-loader/88b07838348864aa86c672be3bbca1e9f58f6f3a2881b32070ec27f4e7b449d1.json +0 -1
  312. xinference/ui/web/ui/node_modules/.cache/babel-loader/8b8cd408ccfbe115acef27ccfa5b233da8597131a2a5712add13e1e4d5d4504b.json +0 -1
  313. xinference/ui/web/ui/node_modules/.cache/babel-loader/a23824fe746b9c6ca5eee9159b5764d1ff1653c1d856288c0f75c742bbb0023b.json +0 -1
  314. xinference/ui/web/ui/node_modules/.cache/babel-loader/a3eb18af328280b139693c9092dff2a0ef8c9a967e6c8956ceee0996611f1984.json +0 -1
  315. xinference/ui/web/ui/node_modules/.cache/babel-loader/bc1aacc65a102db325ca61bcd2f681e1ae22c36a1f1d98a6ff5e4ad49dc7544f.json +0 -1
  316. xinference/ui/web/ui/node_modules/.cache/babel-loader/c682fd521747c19dae437d83ce3235a306ce6b68e24a117bc57c27ebb8d1f1ca.json +0 -1
  317. xinference/ui/web/ui/node_modules/.cache/babel-loader/d5c224be7081f18cba1678b7874a9782eba895df004874ff8f243f94ba79942a.json +0 -1
  318. xinference/ui/web/ui/node_modules/.cache/babel-loader/f7f18bfb539b036a6a342176dd98a85df5057a884a8da978d679f2a0264883d0.json +0 -1
  319. xinference/ui/web/ui/node_modules/clipboard/.babelrc.json +0 -11
  320. xinference/ui/web/ui/node_modules/clipboard/.eslintrc.json +0 -24
  321. xinference/ui/web/ui/node_modules/clipboard/.prettierrc.json +0 -9
  322. xinference/ui/web/ui/node_modules/clipboard/bower.json +0 -18
  323. xinference/ui/web/ui/node_modules/clipboard/composer.json +0 -25
  324. xinference/ui/web/ui/node_modules/clipboard/package.json +0 -63
  325. xinference/ui/web/ui/node_modules/delegate/package.json +0 -31
  326. xinference/ui/web/ui/node_modules/good-listener/bower.json +0 -11
  327. xinference/ui/web/ui/node_modules/good-listener/package.json +0 -35
  328. xinference/ui/web/ui/node_modules/select/bower.json +0 -13
  329. xinference/ui/web/ui/node_modules/select/package.json +0 -29
  330. xinference/ui/web/ui/node_modules/tiny-emitter/package.json +0 -53
  331. {xinference-1.9.1.dist-info → xinference-1.10.1.dist-info}/WHEEL +0 -0
  332. {xinference-1.9.1.dist-info → xinference-1.10.1.dist-info}/entry_points.txt +0 -0
  333. {xinference-1.9.1.dist-info → xinference-1.10.1.dist-info}/licenses/LICENSE +0 -0
  334. {xinference-1.9.1.dist-info → xinference-1.10.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,124 @@
1
+ # Copyright 2022-2023 XProbe Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ import logging
15
+ from io import BytesIO
16
+ from typing import TYPE_CHECKING, Optional
17
+
18
+ import numpy as np
19
+
20
+ from ...device_utils import get_available_device, is_device_available
21
+
22
+ if TYPE_CHECKING:
23
+ from .core import AudioModelFamilyV2
24
+
25
+ logger = logging.getLogger(__name__)
26
+
27
+ REPO_ID = "hexgrad/Kokoro-82M-v1.1-zh"
28
+
29
+
30
+ class KokoroZHModel:
31
+ def __init__(
32
+ self,
33
+ model_uid: str,
34
+ model_path: str,
35
+ model_spec: "AudioModelFamilyV2",
36
+ device: Optional[str] = None,
37
+ **kwargs,
38
+ ):
39
+ self.model_family = model_spec
40
+ self._model_uid = model_uid
41
+ self._model_path = model_path
42
+ self._model_spec = model_spec
43
+ self._device = device
44
+ self._model = None
45
+ self._kwargs = kwargs
46
+ self._en_pipeline = None
47
+
48
+ def _en_callable(self, text):
49
+ """
50
+ Fixing the issue of English words being skipped in the Chinese model.
51
+ from https://hf-mirror.com/hexgrad/Kokoro-82M-v1.1-zh/blob/main/samples/make_zh.py
52
+ """
53
+ if text == "Kokoro":
54
+ return "kˈOkəɹO"
55
+ elif text == "Sol":
56
+ return "sˈOl"
57
+ return next(self._en_pipeline(text)).phonemes
58
+
59
+ @property
60
+ def model_ability(self):
61
+ return self._model_spec.model_ability
62
+
63
+ def load(self):
64
+ if self._device is None:
65
+ self._device = get_available_device()
66
+ else:
67
+ if not is_device_available(self._device):
68
+ raise ValueError(f"Device {self._device} is not available!")
69
+
70
+ import os
71
+
72
+ from kokoro import KModel, KPipeline
73
+
74
+ self._en_pipeline = KPipeline(lang_code="a", repo_id=REPO_ID, model=False)
75
+
76
+ config_path = os.path.join(self._model_path, "config.json")
77
+ model_path = os.path.join(self._model_path, "kokoro-v1_1-zh.pth")
78
+ lang_code = self._kwargs.get("lang_code", "z")
79
+ logger.info("Launching Kokoro model with language code: %s", lang_code)
80
+
81
+ self._model = KPipeline(
82
+ lang_code=lang_code,
83
+ model=KModel(config=config_path, model=model_path).to(self._device),
84
+ repo_id=REPO_ID,
85
+ en_callable=self._en_callable,
86
+ device=self._device,
87
+ )
88
+
89
+ def speech(
90
+ self,
91
+ input: str,
92
+ voice: str,
93
+ response_format: str = "mp3",
94
+ speed: float = 1.0,
95
+ stream: bool = False,
96
+ **kwargs,
97
+ ):
98
+ import soundfile
99
+
100
+ if stream:
101
+ raise Exception("Kokoro does not support stream mode.")
102
+ assert self._model is not None
103
+ if not voice:
104
+ voice = "zf_001"
105
+ logger.info("Auto select speaker: %s", voice)
106
+ elif voice.endswith(".pt"):
107
+ logger.info("Using custom voice pt: %s", voice)
108
+ else:
109
+ logger.info("Using voice: %s", voice)
110
+ logger.info("Speech kwargs: %s", kwargs)
111
+ generator = self._model(text=input, voice=voice, speed=speed, **kwargs)
112
+ results = list(generator)
113
+ audio = np.concatenate([r[2] for r in results])
114
+ # Save the generated audio
115
+ with BytesIO() as out:
116
+ with soundfile.SoundFile(
117
+ out,
118
+ "w",
119
+ 24000,
120
+ 1,
121
+ format=response_format.upper(),
122
+ ) as f:
123
+ f.write(audio)
124
+ return out.getvalue()
@@ -685,7 +685,7 @@
685
685
  "model_revision": "4dcc16f297f2ff98a17b3726b16f5de5a5e45672"
686
686
  },
687
687
  "modelscope": {
688
- "model_id": "SWivid/F5-TTS_Emilia-ZH-EN",
688
+ "model_id": "AI-ModelScope/F5-TTS",
689
689
  "model_revision": "master"
690
690
  }
691
691
  }
@@ -862,6 +862,26 @@
862
862
  "model_revision": "master"
863
863
  }
864
864
  }
865
+ },
866
+ {
867
+ "version": 2,
868
+ "model_name": "Kokoro-82M-v1.1-zh",
869
+ "model_family": "Kokoro-zh",
870
+ "model_ability": [
871
+ "text2audio",
872
+ "text2audio_zero_shot"
873
+ ],
874
+ "multilingual": false,
875
+ "model_src": {
876
+ "huggingface": {
877
+ "model_id": "hexgrad/Kokoro-82M-v1.1-zh",
878
+ "model_revision": "01e7505bd6a7a2ac4975463114c3a7650a9f7218"
879
+ },
880
+ "modelscope": {
881
+ "model_id": "AI-ModelScope/Kokoro-82M-v1.1-zh",
882
+ "model_revision": "master"
883
+ }
884
+ }
865
885
  },
866
886
  {
867
887
  "version": 2,
@@ -911,5 +931,42 @@
911
931
  "model_revision": "master"
912
932
  }
913
933
  }
934
+ },
935
+ {
936
+ "version": 2,
937
+ "model_name": "IndexTTS2",
938
+ "model_family": "IndexTTS2",
939
+ "model_ability": [
940
+ "text2audio",
941
+ "text2audio_zero_shot",
942
+ "text2audio_voice_cloning",
943
+ "text2audio_emotion_control"
944
+ ],
945
+ "multilingual": true,
946
+ "virtualenv": {
947
+ "packages": [
948
+ "transformers==4.52.1",
949
+ "#system_torch#",
950
+ "#system_numpy#",
951
+ "json5",
952
+ "munch",
953
+ "matplotlib",
954
+ "flatten_dict",
955
+ "julius",
956
+ "tensorboard",
957
+ "randomname",
958
+ "argbind"
959
+ ]
960
+ },
961
+ "model_src": {
962
+ "huggingface": {
963
+ "model_id": "IndexTeam/IndexTTS-2",
964
+ "model_revision": "main"
965
+ },
966
+ "modelscope": {
967
+ "model_id": "IndexTeam/IndexTTS-2",
968
+ "model_revision": "master"
969
+ }
970
+ }
914
971
  }
915
972
  ]
@@ -265,10 +265,10 @@ class SentenceTransformerEmbeddingModel(EmbeddingModel):
265
265
  "clip" in self.model_family.model_name.lower()
266
266
  or "jina-embeddings-v4" in self.model_family.model_name.lower()
267
267
  ):
268
- if "input_ids" in features and hasattr(
269
- features["input_ids"], "numel"
270
- ):
271
- all_token_nums += features["input_ids"].numel()
268
+ # support input_ids and text_input_ids
269
+ for key in ["input_ids", "text_input_ids"]:
270
+ if key in features and hasattr(features[key], "numel"):
271
+ all_token_nums += features[key].numel()
272
272
  if "pixel_values" in features and hasattr(
273
273
  features["pixel_values"], "numel"
274
274
  ):
@@ -13,6 +13,7 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import importlib.util
16
+ import json
16
17
  import logging
17
18
  from typing import List, Union
18
19
 
@@ -54,13 +55,18 @@ class VLLMEmbeddingModel(EmbeddingModel):
54
55
  self._kwargs["hf_overrides"].update(
55
56
  is_matryoshka=True,
56
57
  )
58
+ elif isinstance(self._kwargs["hf_overrides"], str):
59
+ self._kwargs["hf_overrides"] = json.loads(self._kwargs["hf_overrides"])
60
+ self._kwargs["hf_overrides"].update(
61
+ is_matryoshka=True,
62
+ )
57
63
 
58
64
  self._model = LLM(model=self._model_path, task="embed", **self._kwargs)
59
65
  self._tokenizer = self._model.get_tokenizer()
60
66
 
61
67
  @staticmethod
62
68
  def _get_detailed_instruct(task_description: str, query: str) -> str:
63
- return f"Instruct: {task_description}\nQuery:{query}"
69
+ return f"Instruct: {task_description}\nQuery:{query}" # noqa: E231
64
70
 
65
71
  @cache_clean
66
72
  def create_embedding(
@@ -352,6 +352,75 @@
352
352
  "no_build_isolation": true
353
353
  }
354
354
  },
355
+ {
356
+ "version": 2,
357
+ "model_name": "Qwen-Image-Edit-2509",
358
+ "model_family": "stable_diffusion",
359
+ "model_ability": [
360
+ "image2image"
361
+ ],
362
+ "model_src": {
363
+ "huggingface": {
364
+ "model_id": "Qwen/Qwen-Image-Edit-2509",
365
+ "model_revision": "d3968ef930e841f4c73640fb8afa3b306a78167e",
366
+ "gguf_model_id": "QuantStack/Qwen-Image-Edit-2509-GGUF",
367
+ "gguf_quantizations": [
368
+ "Q2_K",
369
+ "Q3_K_M",
370
+ "Q3_K_S",
371
+ "Q4_0",
372
+ "Q4_1",
373
+ "Q4_K_M",
374
+ "Q4_K_S",
375
+ "Q5_0",
376
+ "Q5_1",
377
+ "Q5_K_M",
378
+ "Q5_K_S",
379
+ "Q6_K",
380
+ "Q8_0"
381
+ ],
382
+ "gguf_model_file_name_template": "Qwen-Image-Edit-2509-{quantization}.gguf"
383
+ },
384
+ "modelscope": {
385
+ "model_id": "Qwen/Qwen-Image-Edit-2509",
386
+ "model_revision": "master",
387
+ "gguf_model_id": "QuantStack/Qwen-Image-Edit-2509-GGUF",
388
+ "gguf_quantizations": [
389
+ "Q2_K",
390
+ "Q3_K_M",
391
+ "Q3_K_S",
392
+ "Q4_0",
393
+ "Q4_1",
394
+ "Q4_K_M",
395
+ "Q4_K_S",
396
+ "Q5_0",
397
+ "Q5_1",
398
+ "Q5_K_M",
399
+ "Q5_K_S",
400
+ "Q6_K",
401
+ "Q8_0"
402
+ ],
403
+ "gguf_model_file_name_template": "Qwen-Image-Edit-2509-{quantization}.gguf"
404
+ }
405
+ },
406
+ "default_model_config": {
407
+ "quantize": true,
408
+ "quantize_text_encoder": "text_encoder",
409
+ "torch_dtype": "bfloat16"
410
+ },
411
+ "default_generate_config": {
412
+ "true_cfg_scale": 4.0
413
+ },
414
+ "virtualenv": {
415
+ "packages": [
416
+ "git+https://github.com/huggingface/diffusers",
417
+ "peft>=0.17.0",
418
+ "#system_torch#",
419
+ "#system_numpy#"
420
+ ],
421
+ "no_build_isolation": true
422
+ }
423
+ },
355
424
  {
356
425
  "version": 2,
357
426
  "model_name": "sd3-medium",
@@ -824,13 +893,12 @@
824
893
  "deepspeed==0.12.3",
825
894
  "peft==0.4.0",
826
895
  "tiktoken==0.6.0",
827
- "bitsandbytes==0.41.0",
828
- "scikit-learn==1.2.2",
829
896
  "sentencepiece==0.1.99",
830
897
  "einops==0.6.1",
831
898
  "einops-exts==0.0.4",
832
899
  "timm==0.6.13",
833
- "numpy==1.26.4"
900
+ "#system_numpy#",
901
+ "#system_torch#"
834
902
  ]
835
903
  },
836
904
  "model_src": {
@@ -836,7 +836,7 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
836
836
 
837
837
  def image_to_image(
838
838
  self,
839
- image: PIL.Image,
839
+ image: Union[PIL.Image.Image, List[PIL.Image.Image]],
840
840
  prompt: Optional[Union[str, List[str]]] = None,
841
841
  n: int = 1,
842
842
  size: Optional[str] = None,
@@ -856,7 +856,10 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
856
856
  if padding_image_to_multiple := kwargs.pop("padding_image_to_multiple", None):
857
857
  # Model like SD3 image to image requires image's height and width is times of 16
858
858
  # padding the image if specified
859
- origin_x, origin_y = image.size
859
+ if isinstance(image, list):
860
+ origin_x, origin_y = image[0].size
861
+ else:
862
+ origin_x, origin_y = image.size
860
863
  kwargs["origin_size"] = (origin_x, origin_y)
861
864
  kwargs["is_padded"] = True
862
865
  image = self.pad_to_multiple(image, multiple=int(padding_image_to_multiple))
@@ -864,14 +867,20 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
864
867
  if size:
865
868
  width, height = map(int, re.split(r"[^\d]+", size))
866
869
  if padding_image_to_multiple:
867
- width, height = image.size
870
+ if isinstance(image, list):
871
+ width, height = image[0].size
872
+ else:
873
+ width, height = image.size
868
874
  kwargs["width"] = width
869
875
  kwargs["height"] = height
870
876
  else:
871
877
  # SD3 image2image cannot accept width and height
872
878
  allow_width_height = model_accept_param(["width", "height"], model)
873
879
  if allow_width_height:
874
- kwargs["width"], kwargs["height"] = image.size
880
+ if isinstance(image, list):
881
+ kwargs["width"], kwargs["height"] = image[0].size
882
+ else:
883
+ kwargs["width"], kwargs["height"] = image.size
875
884
 
876
885
  # generate config for lightning
877
886
  self._gen_config_for_lightning(kwargs)
@@ -159,6 +159,10 @@ def load_model_family_from_json(json_filename, target_families):
159
159
  BUILTIN_LLM_PROMPT_STYLE[model_spec.model_name][
160
160
  "reasoning_end_tag"
161
161
  ] = model_spec.reasoning_end_tag
162
+ if model_spec.tool_parser:
163
+ BUILTIN_LLM_PROMPT_STYLE[model_spec.model_name][
164
+ "tool_parser"
165
+ ] = model_spec.tool_parser
162
166
 
163
167
  # register model family
164
168
  if "chat" in model_spec.model_ability:
@@ -27,6 +27,7 @@ from typing import TYPE_CHECKING, Dict, List, Literal, Optional, Union
27
27
  from ...core.utils import parse_replica_model_uid
28
28
  from ...types import PeftModelConfig
29
29
  from .reasoning_parser import ReasoningParser
30
+ from .tool_parsers import TOOL_PARSERS
30
31
 
31
32
  if TYPE_CHECKING:
32
33
  from .llm_family import LLMFamilyV2, LLMSpecV1
@@ -59,6 +60,7 @@ class LLM(abc.ABC):
59
60
  self.quantization = model_family.model_specs[0].quantization
60
61
  self.model_path = model_path
61
62
  self.reasoning_parser = None
63
+ self.tool_parser = None
62
64
  if args:
63
65
  raise ValueError(f"Unrecognized positional arguments: {args}")
64
66
  if kwargs:
@@ -171,6 +173,14 @@ class LLM(abc.ABC):
171
173
  enable_thinking=enable_thinking,
172
174
  )
173
175
 
176
+ def prepare_parse_tool_calls(self):
177
+ if self.model_family.tool_parser is None:
178
+ return
179
+ if self.model_family.tool_parser not in TOOL_PARSERS:
180
+ return
181
+ tool_parser = TOOL_PARSERS[self.model_family.tool_parser]
182
+ self.tool_parser = tool_parser()
183
+
174
184
 
175
185
  # Context variable for passing per-request chat context (e.g., chat_template_kwargs).
176
186
  # This variable should be set at the beginning of each chat or stream_chat call.
@@ -122,6 +122,7 @@ class XllamaCppModel(LLM, ChatModelMixin):
122
122
  self.prepare_parse_reasoning_content(
123
123
  reasoning_content, enable_thinking=enable_thinking
124
124
  )
125
+ self.prepare_parse_tool_calls()
125
126
 
126
127
  if os.path.isfile(self.model_path):
127
128
  # mostly passed from --model_path