xinference 1.9.1__py3-none-any.whl → 1.10.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (334) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/restful_api.py +400 -3
  3. xinference/client/restful/async_restful_client.py +20 -3
  4. xinference/client/restful/restful_client.py +20 -3
  5. xinference/constants.py +2 -0
  6. xinference/core/supervisor.py +111 -49
  7. xinference/core/worker.py +10 -0
  8. xinference/deploy/cmdline.py +15 -0
  9. xinference/model/audio/core.py +26 -6
  10. xinference/model/audio/indextts2.py +166 -0
  11. xinference/model/audio/kokoro.py +1 -1
  12. xinference/model/audio/kokoro_zh.py +124 -0
  13. xinference/model/audio/model_spec.json +58 -1
  14. xinference/model/embedding/sentence_transformers/core.py +4 -4
  15. xinference/model/embedding/vllm/core.py +7 -1
  16. xinference/model/image/model_spec.json +71 -3
  17. xinference/model/image/stable_diffusion/core.py +13 -4
  18. xinference/model/llm/__init__.py +4 -0
  19. xinference/model/llm/core.py +10 -0
  20. xinference/model/llm/llama_cpp/core.py +1 -0
  21. xinference/model/llm/llm_family.json +503 -21
  22. xinference/model/llm/llm_family.py +1 -0
  23. xinference/model/llm/mlx/core.py +52 -33
  24. xinference/model/llm/sglang/core.py +32 -55
  25. xinference/model/llm/tool_parsers/__init__.py +58 -0
  26. xinference/model/llm/tool_parsers/abstract_tool_parser.py +33 -0
  27. xinference/model/llm/tool_parsers/deepseek_r1_tool_parser.py +190 -0
  28. xinference/model/llm/tool_parsers/deepseek_v3_tool_parser.py +145 -0
  29. xinference/model/llm/tool_parsers/glm4_tool_parser.py +123 -0
  30. xinference/model/llm/tool_parsers/llama3_tool_parser.py +77 -0
  31. xinference/model/llm/tool_parsers/qwen_tool_parser.py +320 -0
  32. xinference/model/llm/transformers/core.py +1 -1
  33. xinference/model/llm/transformers/multimodal/qwen2_vl.py +34 -8
  34. xinference/model/llm/utils.py +138 -53
  35. xinference/model/llm/vllm/core.py +95 -78
  36. xinference/thirdparty/audiotools/__init__.py +10 -0
  37. xinference/thirdparty/audiotools/core/__init__.py +4 -0
  38. xinference/thirdparty/audiotools/core/audio_signal.py +1682 -0
  39. xinference/thirdparty/audiotools/core/display.py +194 -0
  40. xinference/thirdparty/audiotools/core/dsp.py +390 -0
  41. xinference/thirdparty/audiotools/core/effects.py +647 -0
  42. xinference/thirdparty/audiotools/core/ffmpeg.py +211 -0
  43. xinference/thirdparty/audiotools/core/loudness.py +320 -0
  44. xinference/thirdparty/audiotools/core/playback.py +252 -0
  45. xinference/thirdparty/audiotools/core/templates/__init__.py +0 -0
  46. xinference/thirdparty/audiotools/core/templates/headers.html +322 -0
  47. xinference/thirdparty/audiotools/core/templates/pandoc.css +407 -0
  48. xinference/thirdparty/audiotools/core/templates/widget.html +52 -0
  49. xinference/thirdparty/audiotools/core/util.py +671 -0
  50. xinference/thirdparty/audiotools/core/whisper.py +97 -0
  51. xinference/thirdparty/audiotools/data/__init__.py +3 -0
  52. xinference/thirdparty/audiotools/data/datasets.py +517 -0
  53. xinference/thirdparty/audiotools/data/preprocess.py +81 -0
  54. xinference/thirdparty/audiotools/data/transforms.py +1592 -0
  55. xinference/thirdparty/audiotools/metrics/__init__.py +6 -0
  56. xinference/thirdparty/audiotools/metrics/distance.py +131 -0
  57. xinference/thirdparty/audiotools/metrics/quality.py +159 -0
  58. xinference/thirdparty/audiotools/metrics/spectral.py +247 -0
  59. xinference/thirdparty/audiotools/ml/__init__.py +5 -0
  60. xinference/thirdparty/audiotools/ml/accelerator.py +184 -0
  61. xinference/thirdparty/audiotools/ml/decorators.py +440 -0
  62. xinference/thirdparty/audiotools/ml/experiment.py +90 -0
  63. xinference/thirdparty/audiotools/ml/layers/__init__.py +2 -0
  64. xinference/thirdparty/audiotools/ml/layers/base.py +328 -0
  65. xinference/thirdparty/audiotools/ml/layers/spectral_gate.py +127 -0
  66. xinference/thirdparty/audiotools/post.py +140 -0
  67. xinference/thirdparty/audiotools/preference.py +600 -0
  68. xinference/thirdparty/indextts/BigVGAN/ECAPA_TDNN.py +656 -0
  69. xinference/thirdparty/indextts/BigVGAN/__init__.py +0 -0
  70. xinference/thirdparty/indextts/BigVGAN/activations.py +122 -0
  71. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/__init__.py +0 -0
  72. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/.gitignore +1 -0
  73. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/__init__.py +0 -0
  74. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/activation1d.py +76 -0
  75. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/anti_alias_activation.cpp +23 -0
  76. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/anti_alias_activation_cuda.cu +256 -0
  77. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/compat.h +29 -0
  78. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/load.py +121 -0
  79. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/type_shim.h +92 -0
  80. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/torch/__init__.py +6 -0
  81. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/torch/act.py +31 -0
  82. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/torch/filter.py +102 -0
  83. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/torch/resample.py +58 -0
  84. xinference/thirdparty/indextts/BigVGAN/alias_free_torch/__init__.py +6 -0
  85. xinference/thirdparty/indextts/BigVGAN/alias_free_torch/act.py +29 -0
  86. xinference/thirdparty/indextts/BigVGAN/alias_free_torch/filter.py +96 -0
  87. xinference/thirdparty/indextts/BigVGAN/alias_free_torch/resample.py +49 -0
  88. xinference/thirdparty/indextts/BigVGAN/bigvgan.py +534 -0
  89. xinference/thirdparty/indextts/BigVGAN/models.py +451 -0
  90. xinference/thirdparty/indextts/BigVGAN/nnet/CNN.py +546 -0
  91. xinference/thirdparty/indextts/BigVGAN/nnet/__init__.py +0 -0
  92. xinference/thirdparty/indextts/BigVGAN/nnet/linear.py +89 -0
  93. xinference/thirdparty/indextts/BigVGAN/nnet/normalization.py +670 -0
  94. xinference/thirdparty/indextts/BigVGAN/utils.py +101 -0
  95. xinference/thirdparty/indextts/__init__.py +0 -0
  96. xinference/thirdparty/indextts/cli.py +65 -0
  97. xinference/thirdparty/indextts/gpt/__init__.py +0 -0
  98. xinference/thirdparty/indextts/gpt/conformer/__init__.py +0 -0
  99. xinference/thirdparty/indextts/gpt/conformer/attention.py +312 -0
  100. xinference/thirdparty/indextts/gpt/conformer/embedding.py +163 -0
  101. xinference/thirdparty/indextts/gpt/conformer/subsampling.py +348 -0
  102. xinference/thirdparty/indextts/gpt/conformer_encoder.py +520 -0
  103. xinference/thirdparty/indextts/gpt/model.py +713 -0
  104. xinference/thirdparty/indextts/gpt/model_v2.py +747 -0
  105. xinference/thirdparty/indextts/gpt/perceiver.py +317 -0
  106. xinference/thirdparty/indextts/gpt/transformers_beam_search.py +1013 -0
  107. xinference/thirdparty/indextts/gpt/transformers_generation_utils.py +4747 -0
  108. xinference/thirdparty/indextts/gpt/transformers_gpt2.py +1878 -0
  109. xinference/thirdparty/indextts/gpt/transformers_modeling_utils.py +5525 -0
  110. xinference/thirdparty/indextts/infer.py +690 -0
  111. xinference/thirdparty/indextts/infer_v2.py +739 -0
  112. xinference/thirdparty/indextts/s2mel/dac/__init__.py +16 -0
  113. xinference/thirdparty/indextts/s2mel/dac/__main__.py +36 -0
  114. xinference/thirdparty/indextts/s2mel/dac/model/__init__.py +4 -0
  115. xinference/thirdparty/indextts/s2mel/dac/model/base.py +294 -0
  116. xinference/thirdparty/indextts/s2mel/dac/model/dac.py +400 -0
  117. xinference/thirdparty/indextts/s2mel/dac/model/discriminator.py +228 -0
  118. xinference/thirdparty/indextts/s2mel/dac/model/encodec.py +320 -0
  119. xinference/thirdparty/indextts/s2mel/dac/nn/__init__.py +3 -0
  120. xinference/thirdparty/indextts/s2mel/dac/nn/layers.py +33 -0
  121. xinference/thirdparty/indextts/s2mel/dac/nn/loss.py +368 -0
  122. xinference/thirdparty/indextts/s2mel/dac/nn/quantize.py +339 -0
  123. xinference/thirdparty/indextts/s2mel/dac/utils/__init__.py +123 -0
  124. xinference/thirdparty/indextts/s2mel/dac/utils/decode.py +95 -0
  125. xinference/thirdparty/indextts/s2mel/dac/utils/encode.py +94 -0
  126. xinference/thirdparty/indextts/s2mel/hf_utils.py +12 -0
  127. xinference/thirdparty/indextts/s2mel/modules/alias_free_torch/__init__.py +5 -0
  128. xinference/thirdparty/indextts/s2mel/modules/alias_free_torch/act.py +29 -0
  129. xinference/thirdparty/indextts/s2mel/modules/alias_free_torch/filter.py +96 -0
  130. xinference/thirdparty/indextts/s2mel/modules/alias_free_torch/resample.py +57 -0
  131. xinference/thirdparty/indextts/s2mel/modules/audio.py +82 -0
  132. xinference/thirdparty/indextts/s2mel/modules/bigvgan/activations.py +120 -0
  133. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/__init__.py +0 -0
  134. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/activation1d.py +77 -0
  135. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/anti_alias_activation.cpp +23 -0
  136. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/anti_alias_activation_cuda.cu +246 -0
  137. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/compat.h +29 -0
  138. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/load.py +86 -0
  139. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/type_shim.h +92 -0
  140. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/torch/__init__.py +6 -0
  141. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/torch/act.py +30 -0
  142. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/torch/filter.py +101 -0
  143. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/torch/resample.py +58 -0
  144. xinference/thirdparty/indextts/s2mel/modules/bigvgan/bigvgan.py +492 -0
  145. xinference/thirdparty/indextts/s2mel/modules/bigvgan/config.json +63 -0
  146. xinference/thirdparty/indextts/s2mel/modules/bigvgan/env.py +18 -0
  147. xinference/thirdparty/indextts/s2mel/modules/bigvgan/meldataset.py +354 -0
  148. xinference/thirdparty/indextts/s2mel/modules/bigvgan/utils.py +99 -0
  149. xinference/thirdparty/indextts/s2mel/modules/campplus/DTDNN.py +115 -0
  150. xinference/thirdparty/indextts/s2mel/modules/campplus/classifier.py +70 -0
  151. xinference/thirdparty/indextts/s2mel/modules/campplus/layers.py +253 -0
  152. xinference/thirdparty/indextts/s2mel/modules/commons.py +632 -0
  153. xinference/thirdparty/indextts/s2mel/modules/diffusion_transformer.py +257 -0
  154. xinference/thirdparty/indextts/s2mel/modules/encodec.py +292 -0
  155. xinference/thirdparty/indextts/s2mel/modules/flow_matching.py +171 -0
  156. xinference/thirdparty/indextts/s2mel/modules/gpt_fast/generate.py +436 -0
  157. xinference/thirdparty/indextts/s2mel/modules/gpt_fast/model.py +360 -0
  158. xinference/thirdparty/indextts/s2mel/modules/gpt_fast/quantize.py +622 -0
  159. xinference/thirdparty/indextts/s2mel/modules/hifigan/f0_predictor.py +55 -0
  160. xinference/thirdparty/indextts/s2mel/modules/hifigan/generator.py +454 -0
  161. xinference/thirdparty/indextts/s2mel/modules/layers.py +354 -0
  162. xinference/thirdparty/indextts/s2mel/modules/length_regulator.py +141 -0
  163. xinference/thirdparty/indextts/s2mel/modules/openvoice/__init__.py +0 -0
  164. xinference/thirdparty/indextts/s2mel/modules/openvoice/api.py +186 -0
  165. xinference/thirdparty/indextts/s2mel/modules/openvoice/attentions.py +465 -0
  166. xinference/thirdparty/indextts/s2mel/modules/openvoice/checkpoints_v2/converter/config.json +57 -0
  167. xinference/thirdparty/indextts/s2mel/modules/openvoice/commons.py +160 -0
  168. xinference/thirdparty/indextts/s2mel/modules/openvoice/mel_processing.py +183 -0
  169. xinference/thirdparty/indextts/s2mel/modules/openvoice/models.py +499 -0
  170. xinference/thirdparty/indextts/s2mel/modules/openvoice/modules.py +598 -0
  171. xinference/thirdparty/indextts/s2mel/modules/openvoice/openvoice_app.py +275 -0
  172. xinference/thirdparty/indextts/s2mel/modules/openvoice/se_extractor.py +153 -0
  173. xinference/thirdparty/indextts/s2mel/modules/openvoice/transforms.py +209 -0
  174. xinference/thirdparty/indextts/s2mel/modules/openvoice/utils.py +194 -0
  175. xinference/thirdparty/indextts/s2mel/modules/quantize.py +229 -0
  176. xinference/thirdparty/indextts/s2mel/modules/rmvpe.py +631 -0
  177. xinference/thirdparty/indextts/s2mel/modules/vocos/__init__.py +4 -0
  178. xinference/thirdparty/indextts/s2mel/modules/vocos/heads.py +164 -0
  179. xinference/thirdparty/indextts/s2mel/modules/vocos/helpers.py +71 -0
  180. xinference/thirdparty/indextts/s2mel/modules/vocos/loss.py +114 -0
  181. xinference/thirdparty/indextts/s2mel/modules/vocos/models.py +118 -0
  182. xinference/thirdparty/indextts/s2mel/modules/vocos/modules.py +213 -0
  183. xinference/thirdparty/indextts/s2mel/modules/vocos/pretrained.py +51 -0
  184. xinference/thirdparty/indextts/s2mel/modules/vocos/spectral_ops.py +192 -0
  185. xinference/thirdparty/indextts/s2mel/modules/wavenet.py +174 -0
  186. xinference/thirdparty/indextts/s2mel/optimizers.py +96 -0
  187. xinference/thirdparty/indextts/s2mel/wav2vecbert_extract.py +148 -0
  188. xinference/thirdparty/indextts/utils/__init__.py +0 -0
  189. xinference/thirdparty/indextts/utils/arch_util.py +120 -0
  190. xinference/thirdparty/indextts/utils/checkpoint.py +34 -0
  191. xinference/thirdparty/indextts/utils/common.py +121 -0
  192. xinference/thirdparty/indextts/utils/feature_extractors.py +50 -0
  193. xinference/thirdparty/indextts/utils/front.py +536 -0
  194. xinference/thirdparty/indextts/utils/maskgct/models/codec/__init__.py +0 -0
  195. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/codec.py +427 -0
  196. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/__init__.py +11 -0
  197. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/factorized_vector_quantize.py +150 -0
  198. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/lookup_free_quantize.py +77 -0
  199. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/residual_vq.py +177 -0
  200. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/vector_quantize.py +401 -0
  201. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/vocos.py +881 -0
  202. xinference/thirdparty/indextts/utils/maskgct/models/codec/codec_dataset.py +264 -0
  203. xinference/thirdparty/indextts/utils/maskgct/models/codec/codec_inference.py +515 -0
  204. xinference/thirdparty/indextts/utils/maskgct/models/codec/codec_sampler.py +126 -0
  205. xinference/thirdparty/indextts/utils/maskgct/models/codec/codec_trainer.py +166 -0
  206. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/__init__.py +0 -0
  207. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/__init__.py +5 -0
  208. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/act.py +29 -0
  209. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/filter.py +96 -0
  210. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/resample.py +57 -0
  211. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/facodec_dataset.py +98 -0
  212. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/facodec_inference.py +137 -0
  213. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/facodec_trainer.py +776 -0
  214. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/JDC/__init__.py +1 -0
  215. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/JDC/bst.t7 +0 -0
  216. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/JDC/model.py +219 -0
  217. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/attentions.py +437 -0
  218. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/commons.py +331 -0
  219. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/gradient_reversal.py +35 -0
  220. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/layers.py +460 -0
  221. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/quantize.py +741 -0
  222. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/style_encoder.py +110 -0
  223. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/wavenet.py +224 -0
  224. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/optimizer.py +104 -0
  225. xinference/thirdparty/indextts/utils/maskgct/models/codec/kmeans/repcodec_model.py +210 -0
  226. xinference/thirdparty/indextts/utils/maskgct/models/codec/kmeans/vocos.py +850 -0
  227. xinference/thirdparty/indextts/utils/maskgct/models/codec/melvqgan/melspec.py +108 -0
  228. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/README.md +216 -0
  229. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/__init__.py +6 -0
  230. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/__init__.py +5 -0
  231. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/act.py +29 -0
  232. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/filter.py +96 -0
  233. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/resample.py +57 -0
  234. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/facodec.py +1222 -0
  235. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/gradient_reversal.py +35 -0
  236. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/melspec.py +102 -0
  237. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/quantize/__init__.py +7 -0
  238. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/quantize/fvq.py +116 -0
  239. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/quantize/rvq.py +87 -0
  240. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/transformer.py +234 -0
  241. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/model.py +184 -0
  242. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/__init__.py +27 -0
  243. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/conv.py +346 -0
  244. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/lstm.py +46 -0
  245. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/norm.py +37 -0
  246. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/__init__.py +14 -0
  247. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/ac.py +317 -0
  248. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/core_vq.py +388 -0
  249. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/distrib.py +135 -0
  250. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/vq.py +125 -0
  251. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/seanet.py +414 -0
  252. xinference/thirdparty/indextts/utils/maskgct/models/codec/vevo/vevo_repcodec.py +592 -0
  253. xinference/thirdparty/indextts/utils/maskgct/models/tts/maskgct/ckpt/wav2vec2bert_stats.pt +0 -0
  254. xinference/thirdparty/indextts/utils/maskgct/models/tts/maskgct/llama_nar.py +650 -0
  255. xinference/thirdparty/indextts/utils/maskgct/models/tts/maskgct/maskgct_s2a.py +503 -0
  256. xinference/thirdparty/indextts/utils/maskgct_utils.py +259 -0
  257. xinference/thirdparty/indextts/utils/text_utils.py +41 -0
  258. xinference/thirdparty/indextts/utils/typical_sampling.py +30 -0
  259. xinference/thirdparty/indextts/utils/utils.py +93 -0
  260. xinference/thirdparty/indextts/utils/webui_utils.py +42 -0
  261. xinference/thirdparty/indextts/utils/xtransformers.py +1247 -0
  262. xinference/thirdparty/indextts/vqvae/__init__.py +0 -0
  263. xinference/thirdparty/indextts/vqvae/xtts_dvae.py +395 -0
  264. xinference/types.py +105 -2
  265. xinference/ui/gradio/media_interface.py +66 -8
  266. xinference/ui/web/ui/build/asset-manifest.json +6 -6
  267. xinference/ui/web/ui/build/index.html +1 -1
  268. xinference/ui/web/ui/build/static/css/main.5ea97072.css +2 -0
  269. xinference/ui/web/ui/build/static/css/main.5ea97072.css.map +1 -0
  270. xinference/ui/web/ui/build/static/js/main.d192c4f3.js +3 -0
  271. xinference/ui/web/ui/build/static/js/{main.1086c759.js.LICENSE.txt → main.d192c4f3.js.LICENSE.txt} +0 -7
  272. xinference/ui/web/ui/build/static/js/main.d192c4f3.js.map +1 -0
  273. xinference/ui/web/ui/node_modules/.cache/babel-loader/089c38df5f52348d212ed868dda5c518a42e0c2762caed4175487c0405830c35.json +1 -0
  274. xinference/ui/web/ui/node_modules/.cache/babel-loader/2b6e3a5b6eb2c5c5f2d007e68cd46c372721cd52bf63508adcdb21ecf79241d8.json +1 -0
  275. xinference/ui/web/ui/node_modules/.cache/babel-loader/2d887825fd07a56f872eda4420da25fba0b5b62a23bdcc6c6da1a5281887f618.json +1 -0
  276. xinference/ui/web/ui/node_modules/.cache/babel-loader/4001f9c3e64e73a4f2158826650c174a59d5e3f89ddecddf17cbb6bb688cc4ca.json +1 -0
  277. xinference/ui/web/ui/node_modules/.cache/babel-loader/4a7018a69e6b7f90fc313248c2aa86f2a8f1eb1db120df586047a8023549b44b.json +1 -0
  278. xinference/ui/web/ui/node_modules/.cache/babel-loader/64b12aaa1c1d1bf53820ada8a63769067c0ccc5aab46b32348eb1917ae7f2a11.json +1 -0
  279. xinference/ui/web/ui/node_modules/.cache/babel-loader/7275b67c78ec76ce38a686bb8a576d8c9cecf54e1573614c84859d538efb9be5.json +1 -0
  280. xinference/ui/web/ui/node_modules/.cache/babel-loader/a68b6ee3b31eadc051fb95ce8f8ccb9c2e8b52c60f290dbab545a1917e065282.json +1 -0
  281. xinference/ui/web/ui/node_modules/.cache/babel-loader/ae8771cc37693feb160fa8727231312a0c54ef2d1d1ca893be568cd70016ca7e.json +1 -0
  282. xinference/ui/web/ui/node_modules/.cache/babel-loader/bb4e8722d2d41d87f1fce3661bc8937bffe9448e231fc5f0462630849e851592.json +1 -0
  283. xinference/ui/web/ui/node_modules/.cache/babel-loader/be6aada1ee4adc2bbf65dbe56d17db32bb3b5478be05d6b527805a8ba6cfb2b9.json +1 -0
  284. xinference/ui/web/ui/node_modules/.cache/babel-loader/de91c352653c233cf0cb6674e6e04049a44fd0e1156560de65d5c4620521391e.json +1 -0
  285. xinference/ui/web/ui/node_modules/.cache/babel-loader/e85f7002fc325c83b9c9cd8a1619e5b3ebc701d30e811afc284b88e6ae710cb5.json +1 -0
  286. xinference/ui/web/ui/node_modules/.cache/babel-loader/e8b603c78944bf3d213639078bfe155ff5c0dfa4048a93cbb967cad6a4eb4ff3.json +1 -0
  287. xinference/ui/web/ui/node_modules/.cache/babel-loader/f05535160a508b2a312de546a6de234776c613db276479ea4253c0b1bdeeb7d6.json +1 -0
  288. xinference/ui/web/ui/node_modules/.cache/babel-loader/f09ba9e11106bd59a0de10cc85c55084097729dcab575f43dfcf07375961ed87.json +1 -0
  289. xinference/ui/web/ui/node_modules/.cache/babel-loader/f995a2425dfb0822fd07127f66ffe9b026883bc156b402eb8bd0b83d52460a93.json +1 -0
  290. xinference/ui/web/ui/node_modules/.package-lock.json +0 -33
  291. xinference/ui/web/ui/package-lock.json +0 -34
  292. xinference/ui/web/ui/package.json +0 -1
  293. xinference/ui/web/ui/src/locales/en.json +9 -3
  294. xinference/ui/web/ui/src/locales/ja.json +9 -3
  295. xinference/ui/web/ui/src/locales/ko.json +9 -3
  296. xinference/ui/web/ui/src/locales/zh.json +9 -3
  297. {xinference-1.9.1.dist-info → xinference-1.10.1.dist-info}/METADATA +24 -4
  298. {xinference-1.9.1.dist-info → xinference-1.10.1.dist-info}/RECORD +302 -76
  299. xinference/ui/web/ui/build/static/css/main.013f296b.css +0 -2
  300. xinference/ui/web/ui/build/static/css/main.013f296b.css.map +0 -1
  301. xinference/ui/web/ui/build/static/js/main.1086c759.js +0 -3
  302. xinference/ui/web/ui/build/static/js/main.1086c759.js.map +0 -1
  303. xinference/ui/web/ui/node_modules/.cache/babel-loader/0b0f77000cc1b482ca091cfbcae511dfe02f08916971645fad21d0b1234d04a2.json +0 -1
  304. xinference/ui/web/ui/node_modules/.cache/babel-loader/1c5f8ff423a7c9202bea60b15680f04b1e9964b445b0da3f86c6ff70cf24e797.json +0 -1
  305. xinference/ui/web/ui/node_modules/.cache/babel-loader/44ce7993e344980e3ed4f13e8f69237d4a5dfc60e37ca6b54f51f8ee1357bd67.json +0 -1
  306. xinference/ui/web/ui/node_modules/.cache/babel-loader/4aec1cc414ac3ebb3481d3d915e4db597d9127de813291346eacb8554ab170d4.json +0 -1
  307. xinference/ui/web/ui/node_modules/.cache/babel-loader/644cfec52f3c57a6e222ce60f112237a1efefe9835efd9aad857a685f53d8eed.json +0 -1
  308. xinference/ui/web/ui/node_modules/.cache/babel-loader/663436f72af53fe0d72394f56d003fa4e0bba489e5bb4e483fd34b00f84637f7.json +0 -1
  309. xinference/ui/web/ui/node_modules/.cache/babel-loader/69db82ca9bfe27fe417cc6cf2b1716b09be9c6f0cd198530f12bfc60e801bbcf.json +0 -1
  310. xinference/ui/web/ui/node_modules/.cache/babel-loader/85087e27618d740c236bf159f30e0219db443ab55f0997388eed5fde6f9e90cc.json +0 -1
  311. xinference/ui/web/ui/node_modules/.cache/babel-loader/88b07838348864aa86c672be3bbca1e9f58f6f3a2881b32070ec27f4e7b449d1.json +0 -1
  312. xinference/ui/web/ui/node_modules/.cache/babel-loader/8b8cd408ccfbe115acef27ccfa5b233da8597131a2a5712add13e1e4d5d4504b.json +0 -1
  313. xinference/ui/web/ui/node_modules/.cache/babel-loader/a23824fe746b9c6ca5eee9159b5764d1ff1653c1d856288c0f75c742bbb0023b.json +0 -1
  314. xinference/ui/web/ui/node_modules/.cache/babel-loader/a3eb18af328280b139693c9092dff2a0ef8c9a967e6c8956ceee0996611f1984.json +0 -1
  315. xinference/ui/web/ui/node_modules/.cache/babel-loader/bc1aacc65a102db325ca61bcd2f681e1ae22c36a1f1d98a6ff5e4ad49dc7544f.json +0 -1
  316. xinference/ui/web/ui/node_modules/.cache/babel-loader/c682fd521747c19dae437d83ce3235a306ce6b68e24a117bc57c27ebb8d1f1ca.json +0 -1
  317. xinference/ui/web/ui/node_modules/.cache/babel-loader/d5c224be7081f18cba1678b7874a9782eba895df004874ff8f243f94ba79942a.json +0 -1
  318. xinference/ui/web/ui/node_modules/.cache/babel-loader/f7f18bfb539b036a6a342176dd98a85df5057a884a8da978d679f2a0264883d0.json +0 -1
  319. xinference/ui/web/ui/node_modules/clipboard/.babelrc.json +0 -11
  320. xinference/ui/web/ui/node_modules/clipboard/.eslintrc.json +0 -24
  321. xinference/ui/web/ui/node_modules/clipboard/.prettierrc.json +0 -9
  322. xinference/ui/web/ui/node_modules/clipboard/bower.json +0 -18
  323. xinference/ui/web/ui/node_modules/clipboard/composer.json +0 -25
  324. xinference/ui/web/ui/node_modules/clipboard/package.json +0 -63
  325. xinference/ui/web/ui/node_modules/delegate/package.json +0 -31
  326. xinference/ui/web/ui/node_modules/good-listener/bower.json +0 -11
  327. xinference/ui/web/ui/node_modules/good-listener/package.json +0 -35
  328. xinference/ui/web/ui/node_modules/select/bower.json +0 -13
  329. xinference/ui/web/ui/node_modules/select/package.json +0 -29
  330. xinference/ui/web/ui/node_modules/tiny-emitter/package.json +0 -53
  331. {xinference-1.9.1.dist-info → xinference-1.10.1.dist-info}/WHEEL +0 -0
  332. {xinference-1.9.1.dist-info → xinference-1.10.1.dist-info}/entry_points.txt +0 -0
  333. {xinference-1.9.1.dist-info → xinference-1.10.1.dist-info}/licenses/LICENSE +0 -0
  334. {xinference-1.9.1.dist-info → xinference-1.10.1.dist-info}/top_level.txt +0 -0
@@ -31,6 +31,7 @@ from typing import (
31
31
  Literal,
32
32
  Optional,
33
33
  Tuple,
34
+ Type,
34
35
  Union,
35
36
  )
36
37
 
@@ -406,6 +407,26 @@ class SupervisorActor(xo.StatelessActor):
406
407
  "workers": self._worker_status,
407
408
  }
408
409
 
410
+ def _get_spec_dicts(
411
+ self, model_family: Any, cache_manager_cls: Type
412
+ ) -> Tuple[List[dict], List[str]]:
413
+ specs = []
414
+ download_hubs: Dict[str, None] = dict()
415
+ for spec in model_family.model_specs:
416
+ model_hub = spec.model_hub
417
+ if model_hub not in download_hubs:
418
+ download_hubs[model_hub] = None
419
+ if model_hub != "huggingface":
420
+ # since we only need to know all specs
421
+ # thus filter huggingface specs only
422
+ continue
423
+ model_family.model_specs = [spec]
424
+ cache_manager = cache_manager_cls(model_family)
425
+ specs.append(
426
+ {**spec.dict(), "cache_status": cache_manager.get_cache_status()}
427
+ )
428
+ return specs, list(download_hubs)
429
+
409
430
  async def _to_llm_reg(
410
431
  self, llm_family: "LLMFamilyV2", is_builtin: bool
411
432
  ) -> Dict[str, Any]:
@@ -415,20 +436,15 @@ class SupervisorActor(xo.StatelessActor):
415
436
  version_cnt = await self.get_model_version_count(llm_family.model_name)
416
437
 
417
438
  if self.is_local_deployment():
418
- specs = []
419
439
  # TODO: does not work when the supervisor and worker are running on separate nodes.
420
440
  _llm_family = llm_family.copy()
421
- for spec in [
422
- _spec
423
- for _spec in llm_family.model_specs
424
- if _spec.model_hub == "huggingface"
425
- ]:
426
- _llm_family.model_specs = [spec]
427
- cache_manager = LLMCacheManager(_llm_family)
428
- specs.append(
429
- {**spec.dict(), "cache_status": cache_manager.get_cache_status()}
430
- )
431
- res = {**llm_family.dict(), "is_builtin": is_builtin, "model_specs": specs}
441
+ specs, download_hubs = self._get_spec_dicts(_llm_family, LLMCacheManager)
442
+ res = {
443
+ **llm_family.dict(),
444
+ "is_builtin": is_builtin,
445
+ "model_specs": specs,
446
+ "download_hubs": download_hubs,
447
+ }
432
448
  else:
433
449
  res = {**llm_family.dict(), "is_builtin": is_builtin}
434
450
  res["model_version_count"] = version_cnt
@@ -445,24 +461,13 @@ class SupervisorActor(xo.StatelessActor):
445
461
 
446
462
  if self.is_local_deployment():
447
463
  _family = model_family.copy()
448
- specs = []
449
464
  # TODO: does not work when the supervisor and worker are running on separate nodes.
450
- for spec in [
451
- x for x in model_family.model_specs if x.model_hub == "huggingface"
452
- ]:
453
- _family.model_specs = [spec]
454
- specs.append(
455
- {
456
- **spec.dict(),
457
- "cache_status": EmbeddingCacheManager(
458
- _family
459
- ).get_cache_status(),
460
- }
461
- )
465
+ specs, download_hubs = self._get_spec_dicts(_family, EmbeddingCacheManager)
462
466
  res = {
463
467
  **model_family.dict(),
464
468
  "is_builtin": is_builtin,
465
469
  "model_specs": specs,
470
+ "download_hubs": download_hubs,
466
471
  }
467
472
  else:
468
473
  res = {
@@ -474,25 +479,26 @@ class SupervisorActor(xo.StatelessActor):
474
479
  return res
475
480
 
476
481
  async def _to_rerank_model_reg(
477
- self, model_spec: "RerankModelFamilyV2", is_builtin: bool
482
+ self, model_family: "RerankModelFamilyV2", is_builtin: bool
478
483
  ) -> Dict[str, Any]:
479
- from ..model.rerank.cache_manager import RerankCacheManager as CacheManager
484
+ from ..model.rerank.cache_manager import RerankCacheManager
480
485
 
481
- instance_cnt = await self.get_instance_count(model_spec.model_name)
482
- version_cnt = await self.get_model_version_count(model_spec.model_name)
483
- cache_manager = CacheManager(model_spec)
486
+ instance_cnt = await self.get_instance_count(model_family.model_name)
487
+ version_cnt = await self.get_model_version_count(model_family.model_name)
484
488
 
485
489
  if self.is_local_deployment():
490
+ _family = model_family.copy()
486
491
  # TODO: does not work when the supervisor and worker are running on separate nodes.
487
- cache_status = cache_manager.get_cache_status()
492
+ specs, download_hubs = self._get_spec_dicts(_family, RerankCacheManager)
488
493
  res = {
489
- **model_spec.dict(),
490
- "cache_status": cache_status,
494
+ **model_family.dict(),
491
495
  "is_builtin": is_builtin,
496
+ "model_specs": specs,
497
+ "download_hubs": download_hubs,
492
498
  }
493
499
  else:
494
500
  res = {
495
- **model_spec.dict(),
501
+ **model_family.dict(),
496
502
  "is_builtin": is_builtin,
497
503
  }
498
504
  res["model_version_count"] = version_cnt
@@ -657,7 +663,9 @@ class SupervisorActor(xo.StatelessActor):
657
663
  for model_name, families in BUILTIN_IMAGE_MODELS.items():
658
664
  if detailed:
659
665
  family = [x for x in families if x.model_hub == "huggingface"][0]
660
- ret.append(await self._to_image_model_reg(family, is_builtin=True))
666
+ info = await self._to_image_model_reg(family, is_builtin=True)
667
+ info["download_hubs"] = [x.model_hub for x in families]
668
+ ret.append(info)
661
669
  else:
662
670
  ret.append({"model_name": model_name, "is_builtin": True})
663
671
 
@@ -680,7 +688,9 @@ class SupervisorActor(xo.StatelessActor):
680
688
  for model_name, families in BUILTIN_AUDIO_MODELS.items():
681
689
  if detailed:
682
690
  family = [x for x in families if x.model_hub == "huggingface"][0]
683
- ret.append(await self._to_audio_model_reg(family, is_builtin=True))
691
+ info = await self._to_audio_model_reg(family, is_builtin=True)
692
+ info["download_hubs"] = [x.model_hub for x in families]
693
+ ret.append(info)
684
694
  else:
685
695
  ret.append({"model_name": model_name, "is_builtin": True})
686
696
 
@@ -702,7 +712,9 @@ class SupervisorActor(xo.StatelessActor):
702
712
  for model_name, families in BUILTIN_VIDEO_MODELS.items():
703
713
  if detailed:
704
714
  family = [x for x in families if x.model_hub == "huggingface"][0]
705
- ret.append(await self._to_video_model_reg(family, is_builtin=True))
715
+ info = await self._to_video_model_reg(family, is_builtin=True)
716
+ info["download_hubs"] = [x.model_hub for x in families]
717
+ ret.append(info)
706
718
  else:
707
719
  ret.append({"model_name": model_name, "is_builtin": True})
708
720
 
@@ -812,16 +824,9 @@ class SupervisorActor(xo.StatelessActor):
812
824
  from ..model.rerank import BUILTIN_RERANK_MODELS
813
825
  from ..model.rerank.custom import get_user_defined_reranks
814
826
 
815
- if model_name in BUILTIN_RERANK_MODELS:
816
- return [
817
- x
818
- for x in BUILTIN_RERANK_MODELS[model_name]
819
- if x.model_hub == "huggingface"
820
- ][0]
821
- else:
822
- for f in get_user_defined_reranks():
823
- if f.model_name == model_name:
824
- return f
827
+ for f in list(BUILTIN_RERANK_MODELS.values()) + get_user_defined_reranks():
828
+ if f.model_name == model_name:
829
+ return f
825
830
  raise ValueError(f"Model {model_name} not found")
826
831
  elif model_type == "flexible":
827
832
  from ..model.flexible import get_flexible_models
@@ -830,6 +835,16 @@ class SupervisorActor(xo.StatelessActor):
830
835
  if f.model_name == model_name:
831
836
  return f
832
837
  raise ValueError(f"Model {model_name} not found")
838
+ elif model_type == "video":
839
+ from ..model.video import BUILTIN_VIDEO_MODELS
840
+
841
+ if model_name in BUILTIN_VIDEO_MODELS:
842
+ return [
843
+ x
844
+ for x in BUILTIN_VIDEO_MODELS[model_name]
845
+ if x.model_hub == "huggingface"
846
+ ][0]
847
+ raise ValueError(f"Model {model_name} not found")
833
848
  else:
834
849
  raise ValueError(f"Unsupported model type: {model_type}")
835
850
 
@@ -864,6 +879,26 @@ class SupervisorActor(xo.StatelessActor):
864
879
  generate_fn,
865
880
  ) = self._custom_register_type_to_cls[model_type]
866
881
 
882
+ model_spec = model_spec_cls.parse_raw(model)
883
+
884
+ # check if model already registered
885
+ try:
886
+ model = await self.get_model_registration(
887
+ model_type, model_spec.model_name
888
+ )
889
+ if model is not None:
890
+ raise ValueError(
891
+ f"Model {model_spec.model_name} already registered"
892
+ )
893
+ except ValueError as e:
894
+ if "not found" in str(e):
895
+ pass
896
+ else:
897
+ raise e
898
+ except Exception:
899
+ logger.error("Get model registration failed.", exc_info=True)
900
+ raise
901
+
867
902
  target_ip_worker_ref = (
868
903
  self._get_worker_ref_by_ip(worker_ip) if worker_ip is not None else None
869
904
  )
@@ -880,12 +915,15 @@ class SupervisorActor(xo.StatelessActor):
880
915
  await target_ip_worker_ref.register_model(model_type, model, persist)
881
916
  return
882
917
 
883
- model_spec = model_spec_cls.parse_raw(model)
884
918
  try:
885
919
  register_fn(model_spec, persist)
886
920
  await self._cache_tracker_ref.record_model_version(
887
921
  generate_fn(model_spec), self.address
888
922
  )
923
+ await self._sync_register_model(
924
+ model_type, model, persist, model_spec.model_name
925
+ )
926
+
889
927
  except ValueError as e:
890
928
  raise e
891
929
  except Exception as e:
@@ -894,6 +932,30 @@ class SupervisorActor(xo.StatelessActor):
894
932
  else:
895
933
  raise ValueError(f"Unsupported model type: {model_type}")
896
934
 
935
+ async def _sync_register_model(
936
+ self, model_type: str, model: str, persist: bool, model_name: str
937
+ ):
938
+ logger.info(f"begin sync model: {model_name} to worker")
939
+ try:
940
+ # Sync model to all workers.
941
+ for name, worker in self._worker_address_to_worker.items():
942
+ logger.info(f"sync model: {model_name} to {name}")
943
+ if name == self.address:
944
+ # Ignore: when worker and supervisor at the same node.
945
+ logger.info(
946
+ f"ignore sync model: {model_name} to {name} for same node"
947
+ )
948
+ else:
949
+ await worker.register_model(model_type, model, persist)
950
+ logger.info(f"success sync model: {model_name} to {name}")
951
+ except Exception as e:
952
+ # If sync fails, unregister the model in all workers.
953
+ for name, worker in self._worker_address_to_worker.items():
954
+ logger.warning(f"ready to unregister model for {name}")
955
+ await worker.unregister_model(model_type, model_name)
956
+ logger.warning(f"finish unregister model: {model} for {name}")
957
+ raise e
958
+
897
959
  @log_async(logger=logger)
898
960
  async def unregister_model(self, model_type: str, model_name: str):
899
961
  if model_type in self._custom_register_type_to_cls:
@@ -1014,7 +1076,7 @@ class SupervisorActor(xo.StatelessActor):
1014
1076
  )
1015
1077
 
1016
1078
  # search in worker first
1017
- if not self.is_local_deployment():
1079
+ if not self.is_local_deployment() and worker_ip is None:
1018
1080
  workers = list(self._worker_address_to_worker.values())
1019
1081
  for worker in workers:
1020
1082
  res = await worker.get_model_registration(model_type, model_name)
xinference/core/worker.py CHANGED
@@ -710,6 +710,16 @@ class WorkerActor(xo.StatelessActor):
710
710
  for model_spec in get_user_defined_reranks():
711
711
  ret.append({"model_name": model_spec.model_name, "is_builtin": False})
712
712
 
713
+ ret.sort(key=sort_helper)
714
+ return ret
715
+ elif model_type == "flexible":
716
+ from ..model.flexible.custom import get_flexible_models
717
+
718
+ ret = []
719
+
720
+ for model_spec in get_flexible_models():
721
+ ret.append({"model_name": model_spec.model_name, "is_builtin": False})
722
+
713
723
  ret.sort(key=sort_helper)
714
724
  return ret
715
725
  else:
@@ -576,6 +576,21 @@ def list_model_registrations(
576
576
  ),
577
577
  file=sys.stderr,
578
578
  )
579
+ elif model_type == "flexible":
580
+ for registration in registrations:
581
+ model_name = registration["model_name"]
582
+ model_family = client.get_model_registration(model_type, model_name)
583
+ table.append(
584
+ [
585
+ model_type,
586
+ model_family["model_name"],
587
+ registration["is_builtin"],
588
+ ]
589
+ )
590
+ print(
591
+ tabulate(table, headers=["Type", "Name", "Is-built-in"]),
592
+ file=sys.stderr,
593
+ )
579
594
  else:
580
595
  raise NotImplementedError(f"List {model_type} is not implemented.")
581
596
 
@@ -23,8 +23,10 @@ from .f5tts import F5TTSModel
23
23
  from .f5tts_mlx import F5TTSMLXModel
24
24
  from .fish_speech import FishSpeechModel
25
25
  from .funasr import FunASRModel
26
+ from .indextts2 import Indextts2
26
27
  from .kokoro import KokoroModel
27
28
  from .kokoro_mlx import KokoroMLXModel
29
+ from .kokoro_zh import KokoroZHModel
28
30
  from .megatts import MegaTTSModel
29
31
  from .melotts import MeloTTSModel
30
32
  from .whisper import WhisperModel
@@ -106,13 +108,23 @@ def match_audio(
106
108
 
107
109
  if model_name in BUILTIN_AUDIO_MODELS:
108
110
  model_families = BUILTIN_AUDIO_MODELS[model_name]
109
- if download_hub == "modelscope" or download_from_modelscope():
110
- return (
111
- [x for x in model_families if x.model_hub == "modelscope"]
112
- + [x for x in model_families if x.model_hub == "huggingface"]
113
- )[0]
111
+ if download_hub is not None:
112
+ if download_hub == "modelscope":
113
+ return (
114
+ [x for x in model_families if x.model_hub == "modelscope"]
115
+ + [x for x in model_families if x.model_hub == "huggingface"]
116
+ )[0]
117
+ else:
118
+ return [x for x in model_families if x.model_hub == download_hub][0]
114
119
  else:
115
- return [x for x in model_families if x.model_hub == "huggingface"][0]
120
+ if download_from_modelscope():
121
+ return (
122
+ [x for x in model_families if x.model_hub == "modelscope"]
123
+ + [x for x in model_families if x.model_hub == "huggingface"]
124
+ )[0]
125
+ else:
126
+ return [x for x in model_families if x.model_hub == "huggingface"][0]
127
+
116
128
  else:
117
129
  raise ValueError(
118
130
  f"Audio model {model_name} not found, available"
@@ -140,7 +152,9 @@ def create_audio_model_instance(
140
152
  MeloTTSModel,
141
153
  KokoroModel,
142
154
  KokoroMLXModel,
155
+ KokoroZHModel,
143
156
  MegaTTSModel,
157
+ Indextts2,
144
158
  ]:
145
159
  from ..cache_manager import CacheManager
146
160
 
@@ -160,7 +174,9 @@ def create_audio_model_instance(
160
174
  MeloTTSModel,
161
175
  KokoroModel,
162
176
  KokoroMLXModel,
177
+ KokoroZHModel,
163
178
  MegaTTSModel,
179
+ Indextts2,
164
180
  ]
165
181
  if model_spec.model_family == "whisper":
166
182
  if not model_spec.engine:
@@ -183,10 +199,14 @@ def create_audio_model_instance(
183
199
  model = MeloTTSModel(model_uid, model_path, model_spec, **kwargs)
184
200
  elif model_spec.model_family == "Kokoro":
185
201
  model = KokoroModel(model_uid, model_path, model_spec, **kwargs)
202
+ elif model_spec.model_family == "Kokoro-zh":
203
+ model = KokoroZHModel(model_uid, model_path, model_spec, **kwargs)
186
204
  elif model_spec.model_family == "Kokoro-MLX":
187
205
  model = KokoroMLXModel(model_uid, model_path, model_spec, **kwargs)
188
206
  elif model_spec.model_family == "MegaTTS":
189
207
  model = MegaTTSModel(model_uid, model_path, model_spec, **kwargs)
208
+ elif model_spec.model_family == "IndexTTS2":
209
+ model = Indextts2(model_uid, model_path, model_spec, **kwargs)
190
210
  else:
191
211
  raise Exception(f"Unsupported audio model family: {model_spec.model_family}")
192
212
  return model
@@ -0,0 +1,166 @@
1
+ # Copyright 2022-2025 XProbe Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ import logging
15
+ import os
16
+ import sys
17
+ from typing import TYPE_CHECKING, Optional
18
+
19
+ from ..utils import set_all_random_seed
20
+
21
+ if TYPE_CHECKING:
22
+ from .core import AudioModelFamilyV2
23
+
24
+ logger = logging.getLogger(__name__)
25
+
26
+
27
+ class Indextts2:
28
+ def __init__(
29
+ self,
30
+ model_uid: str,
31
+ model_path: str,
32
+ model_spec: "AudioModelFamilyV2",
33
+ device: Optional[str] = None,
34
+ **kwargs,
35
+ ):
36
+ self.model_family = model_spec
37
+ self._model_uid = model_uid
38
+ self._model_path = model_path
39
+ self._model_spec = model_spec
40
+ self._device = device
41
+ self._model = None
42
+ self._kwargs = kwargs
43
+
44
+ @property
45
+ def model_ability(self):
46
+ return self._model_spec.model_ability
47
+
48
+ def load(self):
49
+ # The yaml config loaded from model has hard-coded the import paths
50
+ thirdparty_dir = os.path.join(os.path.dirname(__file__), "../../thirdparty")
51
+ sys.path.insert(0, thirdparty_dir)
52
+
53
+ from indextts.infer_v2 import IndexTTS2
54
+
55
+ config_path = os.path.join(self._model_path, "config.yaml")
56
+ use_fp16 = self._kwargs.get("use_fp16", False)
57
+ use_deepspeed = self._kwargs.get("use_deepspeed", False)
58
+
59
+ logger.info("Loading IndexTTS2 model...")
60
+ self._model = IndexTTS2(
61
+ cfg_path=config_path,
62
+ model_dir=self._model_path,
63
+ use_fp16=use_fp16,
64
+ device=self._device,
65
+ use_deepspeed=use_deepspeed,
66
+ )
67
+
68
+ def speech(
69
+ self,
70
+ input: str,
71
+ voice: str,
72
+ response_format: str = "mp3",
73
+ speed: float = 1.0,
74
+ stream: bool = False,
75
+ **kwargs,
76
+ ):
77
+ from io import BytesIO
78
+
79
+ import soundfile
80
+
81
+ if stream:
82
+ raise Exception("IndexTTS2 does not support stream generation.")
83
+
84
+ prompt_speech: Optional[bytes] = kwargs.pop("prompt_speech", None)
85
+ emo_prompt_speech: Optional[bytes] = kwargs.pop("emo_prompt_speech", None)
86
+ emo_alpha: float = kwargs.pop("emo_alpha", 1.0)
87
+ emo_text: Optional[str] = kwargs.pop("emo_text", None)
88
+ use_random: bool = kwargs.pop("use_random", False)
89
+ emo_vector: Optional[list] = kwargs.pop("emo_vector", None)
90
+ seed: Optional[int] = kwargs.pop("seed", 0)
91
+ use_emo_text: bool = kwargs.pop("use_emo_text", False)
92
+
93
+ if prompt_speech is None:
94
+ # IndexTTS2 requires reference audio for voice cloning
95
+ # We'll provide a helpful error message with usage examples
96
+ raise ValueError(
97
+ "IndexTTS2 requires a reference audio for voice cloning.\n"
98
+ "Please provide a short audio sample (3-10 seconds) as 'prompt_speech' parameter.\n"
99
+ "Example usage:\n"
100
+ " with open('reference.wav', 'rb') as f:\n"
101
+ " prompt_speech = f.read()\n"
102
+ " audio_bytes = model.speech(\n"
103
+ " input='Hello, world!',\n"
104
+ " voice='default',\n"
105
+ " prompt_speech=prompt_speech"
106
+ " )\n\n"
107
+ "For emotion control, you can also add:\n"
108
+ " emo_prompt_speech=emotion_audio_bytes # Optional: emotion reference\n"
109
+ " emo_text='happy and cheerful' # Optional: emotion description\n"
110
+ " emo_alpha=1.5 # Optional: emotion intensity"
111
+ )
112
+
113
+ assert self._model is not None
114
+
115
+ set_all_random_seed(seed)
116
+
117
+ # Save prompt speech to temp file
118
+ import tempfile
119
+
120
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_prompt:
121
+ temp_prompt.write(prompt_speech)
122
+ temp_prompt_path = temp_prompt.name
123
+
124
+ emo_prompt_path = None
125
+ if emo_prompt_speech is not None:
126
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_emo:
127
+ temp_emo.write(emo_prompt_speech)
128
+ emo_prompt_path = temp_emo.name
129
+
130
+ try:
131
+ # Generate audio
132
+ with tempfile.NamedTemporaryFile(
133
+ suffix=".wav", delete=False
134
+ ) as temp_output:
135
+ output_path = temp_output.name
136
+
137
+ self._model.infer(
138
+ spk_audio_prompt=temp_prompt_path,
139
+ text=input,
140
+ output_path=output_path,
141
+ emo_audio_prompt=emo_prompt_path,
142
+ emo_alpha=emo_alpha,
143
+ emo_text=emo_text,
144
+ use_random=use_random,
145
+ emo_vector=emo_vector,
146
+ use_emo_text=use_emo_text,
147
+ )
148
+
149
+ # Read generated audio and convert to requested format
150
+ audio, sample_rate = soundfile.read(output_path)
151
+
152
+ with BytesIO() as out:
153
+ with soundfile.SoundFile(
154
+ out, "w", sample_rate, 1, format=response_format.upper()
155
+ ) as f:
156
+ f.write(audio)
157
+ return out.getvalue()
158
+ finally:
159
+ # Clean up temp files
160
+ try:
161
+ os.unlink(temp_prompt_path)
162
+ os.unlink(output_path)
163
+ if emo_prompt_path:
164
+ os.unlink(emo_prompt_path)
165
+ except:
166
+ pass
@@ -81,7 +81,7 @@ class KokoroModel:
81
81
  logger.info("Launching Kokoro model with language code: %s", lang_code)
82
82
  self._model = KPipeline(
83
83
  lang_code=lang_code,
84
- model=KModel(config=config_path, model=model_path),
84
+ model=KModel(config=config_path, model=model_path).to(self._device),
85
85
  device=self._device,
86
86
  )
87
87