xinference 1.10.0__py3-none-any.whl → 1.11.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (328) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/restful_api.py +473 -31
  3. xinference/client/restful/async_restful_client.py +178 -8
  4. xinference/client/restful/restful_client.py +151 -3
  5. xinference/core/supervisor.py +99 -53
  6. xinference/core/worker.py +10 -0
  7. xinference/deploy/cmdline.py +15 -0
  8. xinference/model/audio/core.py +21 -6
  9. xinference/model/audio/indextts2.py +166 -0
  10. xinference/model/audio/model_spec.json +58 -21
  11. xinference/model/image/model_spec.json +159 -90
  12. xinference/model/image/stable_diffusion/core.py +13 -4
  13. xinference/model/llm/__init__.py +6 -2
  14. xinference/model/llm/llm_family.json +1299 -174
  15. xinference/model/llm/mlx/distributed_models/core.py +41 -0
  16. xinference/model/llm/mlx/distributed_models/qwen2.py +1 -2
  17. xinference/model/llm/sglang/core.py +44 -11
  18. xinference/model/llm/tool_parsers/deepseek_r1_tool_parser.py +94 -32
  19. xinference/model/llm/tool_parsers/qwen_tool_parser.py +29 -4
  20. xinference/model/llm/transformers/chatglm.py +3 -0
  21. xinference/model/llm/transformers/core.py +129 -36
  22. xinference/model/llm/transformers/multimodal/minicpmv45.py +340 -0
  23. xinference/model/llm/transformers/multimodal/qwen2_vl.py +34 -8
  24. xinference/model/llm/transformers/utils.py +23 -0
  25. xinference/model/llm/utils.py +48 -32
  26. xinference/model/llm/vllm/core.py +207 -72
  27. xinference/model/utils.py +74 -31
  28. xinference/thirdparty/audiotools/__init__.py +10 -0
  29. xinference/thirdparty/audiotools/core/__init__.py +4 -0
  30. xinference/thirdparty/audiotools/core/audio_signal.py +1682 -0
  31. xinference/thirdparty/audiotools/core/display.py +194 -0
  32. xinference/thirdparty/audiotools/core/dsp.py +390 -0
  33. xinference/thirdparty/audiotools/core/effects.py +647 -0
  34. xinference/thirdparty/audiotools/core/ffmpeg.py +211 -0
  35. xinference/thirdparty/audiotools/core/loudness.py +320 -0
  36. xinference/thirdparty/audiotools/core/playback.py +252 -0
  37. xinference/thirdparty/audiotools/core/templates/__init__.py +0 -0
  38. xinference/thirdparty/audiotools/core/templates/headers.html +322 -0
  39. xinference/thirdparty/audiotools/core/templates/pandoc.css +407 -0
  40. xinference/thirdparty/audiotools/core/templates/widget.html +52 -0
  41. xinference/thirdparty/audiotools/core/util.py +671 -0
  42. xinference/thirdparty/audiotools/core/whisper.py +97 -0
  43. xinference/thirdparty/audiotools/data/__init__.py +3 -0
  44. xinference/thirdparty/audiotools/data/datasets.py +517 -0
  45. xinference/thirdparty/audiotools/data/preprocess.py +81 -0
  46. xinference/thirdparty/audiotools/data/transforms.py +1592 -0
  47. xinference/thirdparty/audiotools/metrics/__init__.py +6 -0
  48. xinference/thirdparty/audiotools/metrics/distance.py +131 -0
  49. xinference/thirdparty/audiotools/metrics/quality.py +159 -0
  50. xinference/thirdparty/audiotools/metrics/spectral.py +247 -0
  51. xinference/thirdparty/audiotools/ml/__init__.py +5 -0
  52. xinference/thirdparty/audiotools/ml/accelerator.py +184 -0
  53. xinference/thirdparty/audiotools/ml/decorators.py +440 -0
  54. xinference/thirdparty/audiotools/ml/experiment.py +90 -0
  55. xinference/thirdparty/audiotools/ml/layers/__init__.py +2 -0
  56. xinference/thirdparty/audiotools/ml/layers/base.py +328 -0
  57. xinference/thirdparty/audiotools/ml/layers/spectral_gate.py +127 -0
  58. xinference/thirdparty/audiotools/post.py +140 -0
  59. xinference/thirdparty/audiotools/preference.py +600 -0
  60. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/text.py +1 -1
  61. xinference/thirdparty/indextts/BigVGAN/ECAPA_TDNN.py +656 -0
  62. xinference/thirdparty/indextts/BigVGAN/__init__.py +0 -0
  63. xinference/thirdparty/indextts/BigVGAN/activations.py +122 -0
  64. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/__init__.py +0 -0
  65. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/.gitignore +1 -0
  66. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/__init__.py +0 -0
  67. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/activation1d.py +76 -0
  68. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/anti_alias_activation.cpp +23 -0
  69. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/anti_alias_activation_cuda.cu +256 -0
  70. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/compat.h +29 -0
  71. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/load.py +121 -0
  72. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/type_shim.h +92 -0
  73. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/torch/__init__.py +6 -0
  74. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/torch/act.py +31 -0
  75. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/torch/filter.py +102 -0
  76. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/torch/resample.py +58 -0
  77. xinference/thirdparty/indextts/BigVGAN/alias_free_torch/__init__.py +6 -0
  78. xinference/thirdparty/indextts/BigVGAN/alias_free_torch/act.py +29 -0
  79. xinference/thirdparty/indextts/BigVGAN/alias_free_torch/filter.py +96 -0
  80. xinference/thirdparty/indextts/BigVGAN/alias_free_torch/resample.py +49 -0
  81. xinference/thirdparty/indextts/BigVGAN/bigvgan.py +534 -0
  82. xinference/thirdparty/indextts/BigVGAN/models.py +451 -0
  83. xinference/thirdparty/indextts/BigVGAN/nnet/CNN.py +546 -0
  84. xinference/thirdparty/indextts/BigVGAN/nnet/__init__.py +0 -0
  85. xinference/thirdparty/indextts/BigVGAN/nnet/linear.py +89 -0
  86. xinference/thirdparty/indextts/BigVGAN/nnet/normalization.py +670 -0
  87. xinference/thirdparty/indextts/BigVGAN/utils.py +101 -0
  88. xinference/thirdparty/indextts/__init__.py +0 -0
  89. xinference/thirdparty/indextts/cli.py +65 -0
  90. xinference/thirdparty/indextts/gpt/__init__.py +0 -0
  91. xinference/thirdparty/indextts/gpt/conformer/__init__.py +0 -0
  92. xinference/thirdparty/indextts/gpt/conformer/attention.py +312 -0
  93. xinference/thirdparty/indextts/gpt/conformer/embedding.py +163 -0
  94. xinference/thirdparty/indextts/gpt/conformer/subsampling.py +348 -0
  95. xinference/thirdparty/indextts/gpt/conformer_encoder.py +520 -0
  96. xinference/thirdparty/indextts/gpt/model.py +713 -0
  97. xinference/thirdparty/indextts/gpt/model_v2.py +747 -0
  98. xinference/thirdparty/indextts/gpt/perceiver.py +317 -0
  99. xinference/thirdparty/indextts/gpt/transformers_beam_search.py +1013 -0
  100. xinference/thirdparty/indextts/gpt/transformers_generation_utils.py +4747 -0
  101. xinference/thirdparty/indextts/gpt/transformers_gpt2.py +1878 -0
  102. xinference/thirdparty/indextts/gpt/transformers_modeling_utils.py +5525 -0
  103. xinference/thirdparty/indextts/infer.py +690 -0
  104. xinference/thirdparty/indextts/infer_v2.py +739 -0
  105. xinference/thirdparty/indextts/s2mel/dac/__init__.py +16 -0
  106. xinference/thirdparty/indextts/s2mel/dac/__main__.py +36 -0
  107. xinference/thirdparty/indextts/s2mel/dac/model/__init__.py +4 -0
  108. xinference/thirdparty/indextts/s2mel/dac/model/base.py +294 -0
  109. xinference/thirdparty/indextts/s2mel/dac/model/dac.py +400 -0
  110. xinference/thirdparty/indextts/s2mel/dac/model/discriminator.py +228 -0
  111. xinference/thirdparty/indextts/s2mel/dac/model/encodec.py +320 -0
  112. xinference/thirdparty/indextts/s2mel/dac/nn/__init__.py +3 -0
  113. xinference/thirdparty/indextts/s2mel/dac/nn/layers.py +33 -0
  114. xinference/thirdparty/indextts/s2mel/dac/nn/loss.py +368 -0
  115. xinference/thirdparty/indextts/s2mel/dac/nn/quantize.py +339 -0
  116. xinference/thirdparty/indextts/s2mel/dac/utils/__init__.py +123 -0
  117. xinference/thirdparty/indextts/s2mel/dac/utils/decode.py +95 -0
  118. xinference/thirdparty/indextts/s2mel/dac/utils/encode.py +94 -0
  119. xinference/thirdparty/indextts/s2mel/hf_utils.py +12 -0
  120. xinference/thirdparty/indextts/s2mel/modules/alias_free_torch/__init__.py +5 -0
  121. xinference/thirdparty/indextts/s2mel/modules/alias_free_torch/act.py +29 -0
  122. xinference/thirdparty/indextts/s2mel/modules/alias_free_torch/filter.py +96 -0
  123. xinference/thirdparty/indextts/s2mel/modules/alias_free_torch/resample.py +57 -0
  124. xinference/thirdparty/indextts/s2mel/modules/audio.py +82 -0
  125. xinference/thirdparty/indextts/s2mel/modules/bigvgan/activations.py +120 -0
  126. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/__init__.py +0 -0
  127. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/activation1d.py +77 -0
  128. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/anti_alias_activation.cpp +23 -0
  129. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/anti_alias_activation_cuda.cu +246 -0
  130. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/compat.h +29 -0
  131. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/load.py +86 -0
  132. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/type_shim.h +92 -0
  133. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/torch/__init__.py +6 -0
  134. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/torch/act.py +30 -0
  135. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/torch/filter.py +101 -0
  136. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/torch/resample.py +58 -0
  137. xinference/thirdparty/indextts/s2mel/modules/bigvgan/bigvgan.py +492 -0
  138. xinference/thirdparty/indextts/s2mel/modules/bigvgan/config.json +63 -0
  139. xinference/thirdparty/indextts/s2mel/modules/bigvgan/env.py +18 -0
  140. xinference/thirdparty/indextts/s2mel/modules/bigvgan/meldataset.py +354 -0
  141. xinference/thirdparty/indextts/s2mel/modules/bigvgan/utils.py +99 -0
  142. xinference/thirdparty/indextts/s2mel/modules/campplus/DTDNN.py +115 -0
  143. xinference/thirdparty/indextts/s2mel/modules/campplus/classifier.py +70 -0
  144. xinference/thirdparty/indextts/s2mel/modules/campplus/layers.py +253 -0
  145. xinference/thirdparty/indextts/s2mel/modules/commons.py +632 -0
  146. xinference/thirdparty/indextts/s2mel/modules/diffusion_transformer.py +257 -0
  147. xinference/thirdparty/indextts/s2mel/modules/encodec.py +292 -0
  148. xinference/thirdparty/indextts/s2mel/modules/flow_matching.py +171 -0
  149. xinference/thirdparty/indextts/s2mel/modules/gpt_fast/generate.py +436 -0
  150. xinference/thirdparty/indextts/s2mel/modules/gpt_fast/model.py +360 -0
  151. xinference/thirdparty/indextts/s2mel/modules/gpt_fast/quantize.py +622 -0
  152. xinference/thirdparty/indextts/s2mel/modules/hifigan/f0_predictor.py +55 -0
  153. xinference/thirdparty/indextts/s2mel/modules/hifigan/generator.py +454 -0
  154. xinference/thirdparty/indextts/s2mel/modules/layers.py +354 -0
  155. xinference/thirdparty/indextts/s2mel/modules/length_regulator.py +141 -0
  156. xinference/thirdparty/indextts/s2mel/modules/openvoice/__init__.py +0 -0
  157. xinference/thirdparty/indextts/s2mel/modules/openvoice/api.py +186 -0
  158. xinference/thirdparty/indextts/s2mel/modules/openvoice/attentions.py +465 -0
  159. xinference/thirdparty/indextts/s2mel/modules/openvoice/checkpoints_v2/converter/config.json +57 -0
  160. xinference/thirdparty/indextts/s2mel/modules/openvoice/commons.py +160 -0
  161. xinference/thirdparty/indextts/s2mel/modules/openvoice/mel_processing.py +183 -0
  162. xinference/thirdparty/indextts/s2mel/modules/openvoice/models.py +499 -0
  163. xinference/thirdparty/indextts/s2mel/modules/openvoice/modules.py +598 -0
  164. xinference/thirdparty/indextts/s2mel/modules/openvoice/openvoice_app.py +275 -0
  165. xinference/thirdparty/indextts/s2mel/modules/openvoice/se_extractor.py +153 -0
  166. xinference/thirdparty/indextts/s2mel/modules/openvoice/transforms.py +209 -0
  167. xinference/thirdparty/indextts/s2mel/modules/openvoice/utils.py +194 -0
  168. xinference/thirdparty/indextts/s2mel/modules/quantize.py +229 -0
  169. xinference/thirdparty/indextts/s2mel/modules/rmvpe.py +631 -0
  170. xinference/thirdparty/indextts/s2mel/modules/vocos/__init__.py +4 -0
  171. xinference/thirdparty/indextts/s2mel/modules/vocos/heads.py +164 -0
  172. xinference/thirdparty/indextts/s2mel/modules/vocos/helpers.py +71 -0
  173. xinference/thirdparty/indextts/s2mel/modules/vocos/loss.py +114 -0
  174. xinference/thirdparty/indextts/s2mel/modules/vocos/models.py +118 -0
  175. xinference/thirdparty/indextts/s2mel/modules/vocos/modules.py +213 -0
  176. xinference/thirdparty/indextts/s2mel/modules/vocos/pretrained.py +51 -0
  177. xinference/thirdparty/indextts/s2mel/modules/vocos/spectral_ops.py +192 -0
  178. xinference/thirdparty/indextts/s2mel/modules/wavenet.py +174 -0
  179. xinference/thirdparty/indextts/s2mel/optimizers.py +96 -0
  180. xinference/thirdparty/indextts/s2mel/wav2vecbert_extract.py +148 -0
  181. xinference/thirdparty/indextts/utils/__init__.py +0 -0
  182. xinference/thirdparty/indextts/utils/arch_util.py +120 -0
  183. xinference/thirdparty/indextts/utils/checkpoint.py +34 -0
  184. xinference/thirdparty/indextts/utils/common.py +121 -0
  185. xinference/thirdparty/indextts/utils/feature_extractors.py +50 -0
  186. xinference/thirdparty/indextts/utils/front.py +536 -0
  187. xinference/thirdparty/indextts/utils/maskgct/models/codec/__init__.py +0 -0
  188. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/codec.py +427 -0
  189. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/__init__.py +11 -0
  190. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/factorized_vector_quantize.py +150 -0
  191. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/lookup_free_quantize.py +77 -0
  192. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/residual_vq.py +177 -0
  193. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/vector_quantize.py +401 -0
  194. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/vocos.py +881 -0
  195. xinference/thirdparty/indextts/utils/maskgct/models/codec/codec_dataset.py +264 -0
  196. xinference/thirdparty/indextts/utils/maskgct/models/codec/codec_inference.py +515 -0
  197. xinference/thirdparty/indextts/utils/maskgct/models/codec/codec_sampler.py +126 -0
  198. xinference/thirdparty/indextts/utils/maskgct/models/codec/codec_trainer.py +166 -0
  199. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/__init__.py +0 -0
  200. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/__init__.py +5 -0
  201. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/act.py +29 -0
  202. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/filter.py +96 -0
  203. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/resample.py +57 -0
  204. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/facodec_dataset.py +98 -0
  205. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/facodec_inference.py +137 -0
  206. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/facodec_trainer.py +776 -0
  207. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/JDC/__init__.py +1 -0
  208. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/JDC/bst.t7 +0 -0
  209. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/JDC/model.py +219 -0
  210. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/attentions.py +437 -0
  211. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/commons.py +331 -0
  212. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/gradient_reversal.py +35 -0
  213. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/layers.py +460 -0
  214. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/quantize.py +741 -0
  215. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/style_encoder.py +110 -0
  216. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/wavenet.py +224 -0
  217. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/optimizer.py +104 -0
  218. xinference/thirdparty/indextts/utils/maskgct/models/codec/kmeans/repcodec_model.py +210 -0
  219. xinference/thirdparty/indextts/utils/maskgct/models/codec/kmeans/vocos.py +850 -0
  220. xinference/thirdparty/indextts/utils/maskgct/models/codec/melvqgan/melspec.py +108 -0
  221. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/README.md +216 -0
  222. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/__init__.py +6 -0
  223. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/__init__.py +5 -0
  224. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/act.py +29 -0
  225. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/filter.py +96 -0
  226. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/resample.py +57 -0
  227. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/facodec.py +1222 -0
  228. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/gradient_reversal.py +35 -0
  229. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/melspec.py +102 -0
  230. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/quantize/__init__.py +7 -0
  231. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/quantize/fvq.py +116 -0
  232. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/quantize/rvq.py +87 -0
  233. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/transformer.py +234 -0
  234. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/model.py +184 -0
  235. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/__init__.py +27 -0
  236. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/conv.py +346 -0
  237. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/lstm.py +46 -0
  238. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/norm.py +37 -0
  239. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/__init__.py +14 -0
  240. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/ac.py +317 -0
  241. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/core_vq.py +388 -0
  242. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/distrib.py +135 -0
  243. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/vq.py +125 -0
  244. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/seanet.py +414 -0
  245. xinference/thirdparty/indextts/utils/maskgct/models/codec/vevo/vevo_repcodec.py +592 -0
  246. xinference/thirdparty/indextts/utils/maskgct/models/tts/maskgct/ckpt/wav2vec2bert_stats.pt +0 -0
  247. xinference/thirdparty/indextts/utils/maskgct/models/tts/maskgct/llama_nar.py +650 -0
  248. xinference/thirdparty/indextts/utils/maskgct/models/tts/maskgct/maskgct_s2a.py +503 -0
  249. xinference/thirdparty/indextts/utils/maskgct_utils.py +259 -0
  250. xinference/thirdparty/indextts/utils/text_utils.py +41 -0
  251. xinference/thirdparty/indextts/utils/typical_sampling.py +30 -0
  252. xinference/thirdparty/indextts/utils/utils.py +93 -0
  253. xinference/thirdparty/indextts/utils/webui_utils.py +42 -0
  254. xinference/thirdparty/indextts/utils/xtransformers.py +1247 -0
  255. xinference/thirdparty/indextts/vqvae/__init__.py +0 -0
  256. xinference/thirdparty/indextts/vqvae/xtts_dvae.py +395 -0
  257. xinference/thirdparty/melo/text/chinese_mix.py +2 -2
  258. xinference/types.py +9 -0
  259. xinference/ui/gradio/media_interface.py +66 -8
  260. xinference/ui/web/ui/build/asset-manifest.json +6 -6
  261. xinference/ui/web/ui/build/index.html +1 -1
  262. xinference/ui/web/ui/build/static/css/main.5ea97072.css +2 -0
  263. xinference/ui/web/ui/build/static/css/main.5ea97072.css.map +1 -0
  264. xinference/ui/web/ui/build/static/js/main.45e78536.js +3 -0
  265. xinference/ui/web/ui/build/static/js/{main.1086c759.js.LICENSE.txt → main.45e78536.js.LICENSE.txt} +0 -7
  266. xinference/ui/web/ui/build/static/js/main.45e78536.js.map +1 -0
  267. xinference/ui/web/ui/node_modules/.cache/babel-loader/089c38df5f52348d212ed868dda5c518a42e0c2762caed4175487c0405830c35.json +1 -0
  268. xinference/ui/web/ui/node_modules/.cache/babel-loader/2b6e3a5b6eb2c5c5f2d007e68cd46c372721cd52bf63508adcdb21ecf79241d8.json +1 -0
  269. xinference/ui/web/ui/node_modules/.cache/babel-loader/2d887825fd07a56f872eda4420da25fba0b5b62a23bdcc6c6da1a5281887f618.json +1 -0
  270. xinference/ui/web/ui/node_modules/.cache/babel-loader/4001f9c3e64e73a4f2158826650c174a59d5e3f89ddecddf17cbb6bb688cc4ca.json +1 -0
  271. xinference/ui/web/ui/node_modules/.cache/babel-loader/4a7018a69e6b7f90fc313248c2aa86f2a8f1eb1db120df586047a8023549b44b.json +1 -0
  272. xinference/ui/web/ui/node_modules/.cache/babel-loader/64b12aaa1c1d1bf53820ada8a63769067c0ccc5aab46b32348eb1917ae7f2a11.json +1 -0
  273. xinference/ui/web/ui/node_modules/.cache/babel-loader/7275b67c78ec76ce38a686bb8a576d8c9cecf54e1573614c84859d538efb9be5.json +1 -0
  274. xinference/ui/web/ui/node_modules/.cache/babel-loader/a68b6ee3b31eadc051fb95ce8f8ccb9c2e8b52c60f290dbab545a1917e065282.json +1 -0
  275. xinference/ui/web/ui/node_modules/.cache/babel-loader/ae8771cc37693feb160fa8727231312a0c54ef2d1d1ca893be568cd70016ca7e.json +1 -0
  276. xinference/ui/web/ui/node_modules/.cache/babel-loader/bb4e8722d2d41d87f1fce3661bc8937bffe9448e231fc5f0462630849e851592.json +1 -0
  277. xinference/ui/web/ui/node_modules/.cache/babel-loader/be6aada1ee4adc2bbf65dbe56d17db32bb3b5478be05d6b527805a8ba6cfb2b9.json +1 -0
  278. xinference/ui/web/ui/node_modules/.cache/babel-loader/de91c352653c233cf0cb6674e6e04049a44fd0e1156560de65d5c4620521391e.json +1 -0
  279. xinference/ui/web/ui/node_modules/.cache/babel-loader/e85f7002fc325c83b9c9cd8a1619e5b3ebc701d30e811afc284b88e6ae710cb5.json +1 -0
  280. xinference/ui/web/ui/node_modules/.cache/babel-loader/e8b603c78944bf3d213639078bfe155ff5c0dfa4048a93cbb967cad6a4eb4ff3.json +1 -0
  281. xinference/ui/web/ui/node_modules/.cache/babel-loader/ea2a26361204e70cf1018d6990fb6354bed82b3ac69690391e0f100385e7abb7.json +1 -0
  282. xinference/ui/web/ui/node_modules/.cache/babel-loader/f05535160a508b2a312de546a6de234776c613db276479ea4253c0b1bdeeb7d6.json +1 -0
  283. xinference/ui/web/ui/node_modules/.cache/babel-loader/f09ba9e11106bd59a0de10cc85c55084097729dcab575f43dfcf07375961ed87.json +1 -0
  284. xinference/ui/web/ui/node_modules/.package-lock.json +0 -33
  285. xinference/ui/web/ui/package-lock.json +0 -34
  286. xinference/ui/web/ui/package.json +0 -1
  287. xinference/ui/web/ui/src/locales/en.json +9 -3
  288. xinference/ui/web/ui/src/locales/ja.json +9 -3
  289. xinference/ui/web/ui/src/locales/ko.json +9 -3
  290. xinference/ui/web/ui/src/locales/zh.json +9 -3
  291. {xinference-1.10.0.dist-info → xinference-1.11.0.dist-info}/METADATA +24 -6
  292. {xinference-1.10.0.dist-info → xinference-1.11.0.dist-info}/RECORD +296 -77
  293. xinference/ui/web/ui/build/static/css/main.013f296b.css +0 -2
  294. xinference/ui/web/ui/build/static/css/main.013f296b.css.map +0 -1
  295. xinference/ui/web/ui/build/static/js/main.1086c759.js +0 -3
  296. xinference/ui/web/ui/build/static/js/main.1086c759.js.map +0 -1
  297. xinference/ui/web/ui/node_modules/.cache/babel-loader/0b0f77000cc1b482ca091cfbcae511dfe02f08916971645fad21d0b1234d04a2.json +0 -1
  298. xinference/ui/web/ui/node_modules/.cache/babel-loader/1c5f8ff423a7c9202bea60b15680f04b1e9964b445b0da3f86c6ff70cf24e797.json +0 -1
  299. xinference/ui/web/ui/node_modules/.cache/babel-loader/44ce7993e344980e3ed4f13e8f69237d4a5dfc60e37ca6b54f51f8ee1357bd67.json +0 -1
  300. xinference/ui/web/ui/node_modules/.cache/babel-loader/4aec1cc414ac3ebb3481d3d915e4db597d9127de813291346eacb8554ab170d4.json +0 -1
  301. xinference/ui/web/ui/node_modules/.cache/babel-loader/644cfec52f3c57a6e222ce60f112237a1efefe9835efd9aad857a685f53d8eed.json +0 -1
  302. xinference/ui/web/ui/node_modules/.cache/babel-loader/663436f72af53fe0d72394f56d003fa4e0bba489e5bb4e483fd34b00f84637f7.json +0 -1
  303. xinference/ui/web/ui/node_modules/.cache/babel-loader/69db82ca9bfe27fe417cc6cf2b1716b09be9c6f0cd198530f12bfc60e801bbcf.json +0 -1
  304. xinference/ui/web/ui/node_modules/.cache/babel-loader/85087e27618d740c236bf159f30e0219db443ab55f0997388eed5fde6f9e90cc.json +0 -1
  305. xinference/ui/web/ui/node_modules/.cache/babel-loader/88b07838348864aa86c672be3bbca1e9f58f6f3a2881b32070ec27f4e7b449d1.json +0 -1
  306. xinference/ui/web/ui/node_modules/.cache/babel-loader/8b8cd408ccfbe115acef27ccfa5b233da8597131a2a5712add13e1e4d5d4504b.json +0 -1
  307. xinference/ui/web/ui/node_modules/.cache/babel-loader/a23824fe746b9c6ca5eee9159b5764d1ff1653c1d856288c0f75c742bbb0023b.json +0 -1
  308. xinference/ui/web/ui/node_modules/.cache/babel-loader/a3eb18af328280b139693c9092dff2a0ef8c9a967e6c8956ceee0996611f1984.json +0 -1
  309. xinference/ui/web/ui/node_modules/.cache/babel-loader/bc1aacc65a102db325ca61bcd2f681e1ae22c36a1f1d98a6ff5e4ad49dc7544f.json +0 -1
  310. xinference/ui/web/ui/node_modules/.cache/babel-loader/c682fd521747c19dae437d83ce3235a306ce6b68e24a117bc57c27ebb8d1f1ca.json +0 -1
  311. xinference/ui/web/ui/node_modules/.cache/babel-loader/d5c224be7081f18cba1678b7874a9782eba895df004874ff8f243f94ba79942a.json +0 -1
  312. xinference/ui/web/ui/node_modules/.cache/babel-loader/f7f18bfb539b036a6a342176dd98a85df5057a884a8da978d679f2a0264883d0.json +0 -1
  313. xinference/ui/web/ui/node_modules/clipboard/.babelrc.json +0 -11
  314. xinference/ui/web/ui/node_modules/clipboard/.eslintrc.json +0 -24
  315. xinference/ui/web/ui/node_modules/clipboard/.prettierrc.json +0 -9
  316. xinference/ui/web/ui/node_modules/clipboard/bower.json +0 -18
  317. xinference/ui/web/ui/node_modules/clipboard/composer.json +0 -25
  318. xinference/ui/web/ui/node_modules/clipboard/package.json +0 -63
  319. xinference/ui/web/ui/node_modules/delegate/package.json +0 -31
  320. xinference/ui/web/ui/node_modules/good-listener/bower.json +0 -11
  321. xinference/ui/web/ui/node_modules/good-listener/package.json +0 -35
  322. xinference/ui/web/ui/node_modules/select/bower.json +0 -13
  323. xinference/ui/web/ui/node_modules/select/package.json +0 -29
  324. xinference/ui/web/ui/node_modules/tiny-emitter/package.json +0 -53
  325. {xinference-1.10.0.dist-info → xinference-1.11.0.dist-info}/WHEEL +0 -0
  326. {xinference-1.10.0.dist-info → xinference-1.11.0.dist-info}/entry_points.txt +0 -0
  327. {xinference-1.10.0.dist-info → xinference-1.11.0.dist-info}/licenses/LICENSE +0 -0
  328. {xinference-1.10.0.dist-info → xinference-1.11.0.dist-info}/top_level.txt +0 -0
@@ -31,6 +31,7 @@ from typing import (
31
31
  Literal,
32
32
  Optional,
33
33
  Tuple,
34
+ Type,
34
35
  Union,
35
36
  )
36
37
 
@@ -406,6 +407,26 @@ class SupervisorActor(xo.StatelessActor):
406
407
  "workers": self._worker_status,
407
408
  }
408
409
 
410
+ def _get_spec_dicts(
411
+ self, model_family: Any, cache_manager_cls: Type
412
+ ) -> Tuple[List[dict], List[str]]:
413
+ specs = []
414
+ download_hubs: Dict[str, None] = dict()
415
+ for spec in model_family.model_specs:
416
+ model_hub = spec.model_hub
417
+ if model_hub not in download_hubs:
418
+ download_hubs[model_hub] = None
419
+ if model_hub != "huggingface":
420
+ # since we only need to know all specs
421
+ # thus filter huggingface specs only
422
+ continue
423
+ model_family.model_specs = [spec]
424
+ cache_manager = cache_manager_cls(model_family)
425
+ specs.append(
426
+ {**spec.dict(), "cache_status": cache_manager.get_cache_status()}
427
+ )
428
+ return specs, list(download_hubs)
429
+
409
430
  async def _to_llm_reg(
410
431
  self, llm_family: "LLMFamilyV2", is_builtin: bool
411
432
  ) -> Dict[str, Any]:
@@ -415,20 +436,15 @@ class SupervisorActor(xo.StatelessActor):
415
436
  version_cnt = await self.get_model_version_count(llm_family.model_name)
416
437
 
417
438
  if self.is_local_deployment():
418
- specs = []
419
439
  # TODO: does not work when the supervisor and worker are running on separate nodes.
420
440
  _llm_family = llm_family.copy()
421
- for spec in [
422
- _spec
423
- for _spec in llm_family.model_specs
424
- if _spec.model_hub == "huggingface"
425
- ]:
426
- _llm_family.model_specs = [spec]
427
- cache_manager = LLMCacheManager(_llm_family)
428
- specs.append(
429
- {**spec.dict(), "cache_status": cache_manager.get_cache_status()}
430
- )
431
- res = {**llm_family.dict(), "is_builtin": is_builtin, "model_specs": specs}
441
+ specs, download_hubs = self._get_spec_dicts(_llm_family, LLMCacheManager)
442
+ res = {
443
+ **llm_family.dict(),
444
+ "is_builtin": is_builtin,
445
+ "model_specs": specs,
446
+ "download_hubs": download_hubs,
447
+ }
432
448
  else:
433
449
  res = {**llm_family.dict(), "is_builtin": is_builtin}
434
450
  res["model_version_count"] = version_cnt
@@ -445,24 +461,13 @@ class SupervisorActor(xo.StatelessActor):
445
461
 
446
462
  if self.is_local_deployment():
447
463
  _family = model_family.copy()
448
- specs = []
449
464
  # TODO: does not work when the supervisor and worker are running on separate nodes.
450
- for spec in [
451
- x for x in model_family.model_specs if x.model_hub == "huggingface"
452
- ]:
453
- _family.model_specs = [spec]
454
- specs.append(
455
- {
456
- **spec.dict(),
457
- "cache_status": EmbeddingCacheManager(
458
- _family
459
- ).get_cache_status(),
460
- }
461
- )
465
+ specs, download_hubs = self._get_spec_dicts(_family, EmbeddingCacheManager)
462
466
  res = {
463
467
  **model_family.dict(),
464
468
  "is_builtin": is_builtin,
465
469
  "model_specs": specs,
470
+ "download_hubs": download_hubs,
466
471
  }
467
472
  else:
468
473
  res = {
@@ -474,25 +479,26 @@ class SupervisorActor(xo.StatelessActor):
474
479
  return res
475
480
 
476
481
  async def _to_rerank_model_reg(
477
- self, model_spec: "RerankModelFamilyV2", is_builtin: bool
482
+ self, model_family: "RerankModelFamilyV2", is_builtin: bool
478
483
  ) -> Dict[str, Any]:
479
- from ..model.rerank.cache_manager import RerankCacheManager as CacheManager
484
+ from ..model.rerank.cache_manager import RerankCacheManager
480
485
 
481
- instance_cnt = await self.get_instance_count(model_spec.model_name)
482
- version_cnt = await self.get_model_version_count(model_spec.model_name)
483
- cache_manager = CacheManager(model_spec)
486
+ instance_cnt = await self.get_instance_count(model_family.model_name)
487
+ version_cnt = await self.get_model_version_count(model_family.model_name)
484
488
 
485
489
  if self.is_local_deployment():
490
+ _family = model_family.copy()
486
491
  # TODO: does not work when the supervisor and worker are running on separate nodes.
487
- cache_status = cache_manager.get_cache_status()
492
+ specs, download_hubs = self._get_spec_dicts(_family, RerankCacheManager)
488
493
  res = {
489
- **model_spec.dict(),
490
- "cache_status": cache_status,
494
+ **model_family.dict(),
491
495
  "is_builtin": is_builtin,
496
+ "model_specs": specs,
497
+ "download_hubs": download_hubs,
492
498
  }
493
499
  else:
494
500
  res = {
495
- **model_spec.dict(),
501
+ **model_family.dict(),
496
502
  "is_builtin": is_builtin,
497
503
  }
498
504
  res["model_version_count"] = version_cnt
@@ -657,7 +663,9 @@ class SupervisorActor(xo.StatelessActor):
657
663
  for model_name, families in BUILTIN_IMAGE_MODELS.items():
658
664
  if detailed:
659
665
  family = [x for x in families if x.model_hub == "huggingface"][0]
660
- ret.append(await self._to_image_model_reg(family, is_builtin=True))
666
+ info = await self._to_image_model_reg(family, is_builtin=True)
667
+ info["download_hubs"] = [x.model_hub for x in families]
668
+ ret.append(info)
661
669
  else:
662
670
  ret.append({"model_name": model_name, "is_builtin": True})
663
671
 
@@ -680,7 +688,9 @@ class SupervisorActor(xo.StatelessActor):
680
688
  for model_name, families in BUILTIN_AUDIO_MODELS.items():
681
689
  if detailed:
682
690
  family = [x for x in families if x.model_hub == "huggingface"][0]
683
- ret.append(await self._to_audio_model_reg(family, is_builtin=True))
691
+ info = await self._to_audio_model_reg(family, is_builtin=True)
692
+ info["download_hubs"] = [x.model_hub for x in families]
693
+ ret.append(info)
684
694
  else:
685
695
  ret.append({"model_name": model_name, "is_builtin": True})
686
696
 
@@ -702,7 +712,9 @@ class SupervisorActor(xo.StatelessActor):
702
712
  for model_name, families in BUILTIN_VIDEO_MODELS.items():
703
713
  if detailed:
704
714
  family = [x for x in families if x.model_hub == "huggingface"][0]
705
- ret.append(await self._to_video_model_reg(family, is_builtin=True))
715
+ info = await self._to_video_model_reg(family, is_builtin=True)
716
+ info["download_hubs"] = [x.model_hub for x in families]
717
+ ret.append(info)
706
718
  else:
707
719
  ret.append({"model_name": model_name, "is_builtin": True})
708
720
 
@@ -812,16 +824,9 @@ class SupervisorActor(xo.StatelessActor):
812
824
  from ..model.rerank import BUILTIN_RERANK_MODELS
813
825
  from ..model.rerank.custom import get_user_defined_reranks
814
826
 
815
- if model_name in BUILTIN_RERANK_MODELS:
816
- return [
817
- x
818
- for x in BUILTIN_RERANK_MODELS[model_name]
819
- if x.model_hub == "huggingface"
820
- ][0]
821
- else:
822
- for f in get_user_defined_reranks():
823
- if f.model_name == model_name:
824
- return f
827
+ for f in list(BUILTIN_RERANK_MODELS.values()) + get_user_defined_reranks():
828
+ if f.model_name == model_name:
829
+ return f
825
830
  raise ValueError(f"Model {model_name} not found")
826
831
  elif model_type == "flexible":
827
832
  from ..model.flexible import get_flexible_models
@@ -830,6 +835,16 @@ class SupervisorActor(xo.StatelessActor):
830
835
  if f.model_name == model_name:
831
836
  return f
832
837
  raise ValueError(f"Model {model_name} not found")
838
+ elif model_type == "video":
839
+ from ..model.video import BUILTIN_VIDEO_MODELS
840
+
841
+ if model_name in BUILTIN_VIDEO_MODELS:
842
+ return [
843
+ x
844
+ for x in BUILTIN_VIDEO_MODELS[model_name]
845
+ if x.model_hub == "huggingface"
846
+ ][0]
847
+ raise ValueError(f"Model {model_name} not found")
833
848
  else:
834
849
  raise ValueError(f"Unsupported model type: {model_type}")
835
850
 
@@ -864,6 +879,26 @@ class SupervisorActor(xo.StatelessActor):
864
879
  generate_fn,
865
880
  ) = self._custom_register_type_to_cls[model_type]
866
881
 
882
+ model_spec = model_spec_cls.parse_raw(model)
883
+
884
+ # check if model already registered
885
+ try:
886
+ model = await self.get_model_registration(
887
+ model_type, model_spec.model_name
888
+ )
889
+ if model is not None:
890
+ raise ValueError(
891
+ f"Model {model_spec.model_name} already registered"
892
+ )
893
+ except ValueError as e:
894
+ if "not found" in str(e):
895
+ pass
896
+ else:
897
+ raise e
898
+ except Exception:
899
+ logger.error("Get model registration failed.", exc_info=True)
900
+ raise
901
+
867
902
  target_ip_worker_ref = (
868
903
  self._get_worker_ref_by_ip(worker_ip) if worker_ip is not None else None
869
904
  )
@@ -880,7 +915,6 @@ class SupervisorActor(xo.StatelessActor):
880
915
  await target_ip_worker_ref.register_model(model_type, model, persist)
881
916
  return
882
917
 
883
- model_spec = model_spec_cls.parse_raw(model)
884
918
  try:
885
919
  register_fn(model_spec, persist)
886
920
  await self._cache_tracker_ref.record_model_version(
@@ -901,25 +935,25 @@ class SupervisorActor(xo.StatelessActor):
901
935
  async def _sync_register_model(
902
936
  self, model_type: str, model: str, persist: bool, model_name: str
903
937
  ):
904
- logger.info(f"begin sync model:{model_name} to worker")
938
+ logger.info(f"begin sync model: {model_name} to worker")
905
939
  try:
906
940
  # Sync model to all workers.
907
941
  for name, worker in self._worker_address_to_worker.items():
908
- logger.info(f"sync model:{model_name} to {name}")
942
+ logger.info(f"sync model: {model_name} to {name}")
909
943
  if name == self.address:
910
944
  # Ignore: when worker and supervisor at the same node.
911
945
  logger.info(
912
- f"ignore sync model:{model_name} to {name} for same node"
946
+ f"ignore sync model: {model_name} to {name} for same node"
913
947
  )
914
948
  else:
915
949
  await worker.register_model(model_type, model, persist)
916
- logger.info(f"success sync model:{model_name} to {name}")
950
+ logger.info(f"success sync model: {model_name} to {name}")
917
951
  except Exception as e:
918
952
  # If sync fails, unregister the model in all workers.
919
953
  for name, worker in self._worker_address_to_worker.items():
920
954
  logger.warning(f"ready to unregister model for {name}")
921
955
  await worker.unregister_model(model_type, model_name)
922
- logger.warning(f"finish unregister model:{model} for {name}")
956
+ logger.warning(f"finish unregister model: {model} for {name}")
923
957
  raise e
924
958
 
925
959
  @log_async(logger=logger)
@@ -1627,6 +1661,9 @@ class SupervisorActor(xo.StatelessActor):
1627
1661
  if isinstance(worker_ref, list):
1628
1662
  # get first worker to fetch information if model across workers
1629
1663
  worker_ref = worker_ref[0]
1664
+ assert not isinstance(
1665
+ worker_ref, (list, tuple)
1666
+ ), "worker_ref must be a single worker"
1630
1667
  return await worker_ref.get_model(model_uid=replica_model_uid)
1631
1668
 
1632
1669
  @log_async(logger=logger)
@@ -1639,6 +1676,9 @@ class SupervisorActor(xo.StatelessActor):
1639
1676
  if isinstance(worker_ref, list):
1640
1677
  # get status from first shard if model has multiple shards across workers
1641
1678
  worker_ref = worker_ref[0]
1679
+ assert not isinstance(
1680
+ worker_ref, (list, tuple)
1681
+ ), "worker_ref must be a single worker"
1642
1682
  return await worker_ref.get_model_status(replica_model_uid)
1643
1683
 
1644
1684
  @log_async(logger=logger)
@@ -1657,6 +1697,9 @@ class SupervisorActor(xo.StatelessActor):
1657
1697
  if isinstance(worker_ref, list):
1658
1698
  # get status from first shard if model has multiple shards across workers
1659
1699
  worker_ref = worker_ref[0]
1700
+ assert not isinstance(
1701
+ worker_ref, (list, tuple)
1702
+ ), "worker_ref must be a single worker"
1660
1703
  info = await worker_ref.describe_model(model_uid=replica_model_uid)
1661
1704
  info["replica"] = replica_info.replica
1662
1705
  return info
@@ -1732,6 +1775,9 @@ class SupervisorActor(xo.StatelessActor):
1732
1775
  if isinstance(worker_ref, list):
1733
1776
  # get status from first shard if model has multiple shards across workers
1734
1777
  worker_ref = worker_ref[0]
1778
+ assert not isinstance(
1779
+ worker_ref, (list, tuple)
1780
+ ), "worker_ref must be a single worker"
1735
1781
  model_ref = await worker_ref.get_model(model_uid=rep_mid)
1736
1782
  result_info = await model_ref.abort_request(request_id, block_duration)
1737
1783
  res["msg"] = result_info
xinference/core/worker.py CHANGED
@@ -710,6 +710,16 @@ class WorkerActor(xo.StatelessActor):
710
710
  for model_spec in get_user_defined_reranks():
711
711
  ret.append({"model_name": model_spec.model_name, "is_builtin": False})
712
712
 
713
+ ret.sort(key=sort_helper)
714
+ return ret
715
+ elif model_type == "flexible":
716
+ from ..model.flexible.custom import get_flexible_models
717
+
718
+ ret = []
719
+
720
+ for model_spec in get_flexible_models():
721
+ ret.append({"model_name": model_spec.model_name, "is_builtin": False})
722
+
713
723
  ret.sort(key=sort_helper)
714
724
  return ret
715
725
  else:
@@ -576,6 +576,21 @@ def list_model_registrations(
576
576
  ),
577
577
  file=sys.stderr,
578
578
  )
579
+ elif model_type == "flexible":
580
+ for registration in registrations:
581
+ model_name = registration["model_name"]
582
+ model_family = client.get_model_registration(model_type, model_name)
583
+ table.append(
584
+ [
585
+ model_type,
586
+ model_family["model_name"],
587
+ registration["is_builtin"],
588
+ ]
589
+ )
590
+ print(
591
+ tabulate(table, headers=["Type", "Name", "Is-built-in"]),
592
+ file=sys.stderr,
593
+ )
579
594
  else:
580
595
  raise NotImplementedError(f"List {model_type} is not implemented.")
581
596
 
@@ -23,6 +23,7 @@ from .f5tts import F5TTSModel
23
23
  from .f5tts_mlx import F5TTSMLXModel
24
24
  from .fish_speech import FishSpeechModel
25
25
  from .funasr import FunASRModel
26
+ from .indextts2 import Indextts2
26
27
  from .kokoro import KokoroModel
27
28
  from .kokoro_mlx import KokoroMLXModel
28
29
  from .kokoro_zh import KokoroZHModel
@@ -107,13 +108,23 @@ def match_audio(
107
108
 
108
109
  if model_name in BUILTIN_AUDIO_MODELS:
109
110
  model_families = BUILTIN_AUDIO_MODELS[model_name]
110
- if download_hub == "modelscope" or download_from_modelscope():
111
- return (
112
- [x for x in model_families if x.model_hub == "modelscope"]
113
- + [x for x in model_families if x.model_hub == "huggingface"]
114
- )[0]
111
+ if download_hub is not None:
112
+ if download_hub == "modelscope":
113
+ return (
114
+ [x for x in model_families if x.model_hub == "modelscope"]
115
+ + [x for x in model_families if x.model_hub == "huggingface"]
116
+ )[0]
117
+ else:
118
+ return [x for x in model_families if x.model_hub == download_hub][0]
115
119
  else:
116
- return [x for x in model_families if x.model_hub == "huggingface"][0]
120
+ if download_from_modelscope():
121
+ return (
122
+ [x for x in model_families if x.model_hub == "modelscope"]
123
+ + [x for x in model_families if x.model_hub == "huggingface"]
124
+ )[0]
125
+ else:
126
+ return [x for x in model_families if x.model_hub == "huggingface"][0]
127
+
117
128
  else:
118
129
  raise ValueError(
119
130
  f"Audio model {model_name} not found, available"
@@ -143,6 +154,7 @@ def create_audio_model_instance(
143
154
  KokoroMLXModel,
144
155
  KokoroZHModel,
145
156
  MegaTTSModel,
157
+ Indextts2,
146
158
  ]:
147
159
  from ..cache_manager import CacheManager
148
160
 
@@ -164,6 +176,7 @@ def create_audio_model_instance(
164
176
  KokoroMLXModel,
165
177
  KokoroZHModel,
166
178
  MegaTTSModel,
179
+ Indextts2,
167
180
  ]
168
181
  if model_spec.model_family == "whisper":
169
182
  if not model_spec.engine:
@@ -192,6 +205,8 @@ def create_audio_model_instance(
192
205
  model = KokoroMLXModel(model_uid, model_path, model_spec, **kwargs)
193
206
  elif model_spec.model_family == "MegaTTS":
194
207
  model = MegaTTSModel(model_uid, model_path, model_spec, **kwargs)
208
+ elif model_spec.model_family == "IndexTTS2":
209
+ model = Indextts2(model_uid, model_path, model_spec, **kwargs)
195
210
  else:
196
211
  raise Exception(f"Unsupported audio model family: {model_spec.model_family}")
197
212
  return model
@@ -0,0 +1,166 @@
1
+ # Copyright 2022-2025 XProbe Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ import logging
15
+ import os
16
+ import sys
17
+ from typing import TYPE_CHECKING, Optional
18
+
19
+ from ..utils import set_all_random_seed
20
+
21
+ if TYPE_CHECKING:
22
+ from .core import AudioModelFamilyV2
23
+
24
+ logger = logging.getLogger(__name__)
25
+
26
+
27
+ class Indextts2:
28
+ def __init__(
29
+ self,
30
+ model_uid: str,
31
+ model_path: str,
32
+ model_spec: "AudioModelFamilyV2",
33
+ device: Optional[str] = None,
34
+ **kwargs,
35
+ ):
36
+ self.model_family = model_spec
37
+ self._model_uid = model_uid
38
+ self._model_path = model_path
39
+ self._model_spec = model_spec
40
+ self._device = device
41
+ self._model = None
42
+ self._kwargs = kwargs
43
+
44
+ @property
45
+ def model_ability(self):
46
+ return self._model_spec.model_ability
47
+
48
+ def load(self):
49
+ # The yaml config loaded from model has hard-coded the import paths
50
+ thirdparty_dir = os.path.join(os.path.dirname(__file__), "../../thirdparty")
51
+ sys.path.insert(0, thirdparty_dir)
52
+
53
+ from indextts.infer_v2 import IndexTTS2
54
+
55
+ config_path = os.path.join(self._model_path, "config.yaml")
56
+ use_fp16 = self._kwargs.get("use_fp16", False)
57
+ use_deepspeed = self._kwargs.get("use_deepspeed", False)
58
+
59
+ logger.info("Loading IndexTTS2 model...")
60
+ self._model = IndexTTS2(
61
+ cfg_path=config_path,
62
+ model_dir=self._model_path,
63
+ use_fp16=use_fp16,
64
+ device=self._device,
65
+ use_deepspeed=use_deepspeed,
66
+ )
67
+
68
+ def speech(
69
+ self,
70
+ input: str,
71
+ voice: str,
72
+ response_format: str = "mp3",
73
+ speed: float = 1.0,
74
+ stream: bool = False,
75
+ **kwargs,
76
+ ):
77
+ from io import BytesIO
78
+
79
+ import soundfile
80
+
81
+ if stream:
82
+ raise Exception("IndexTTS2 does not support stream generation.")
83
+
84
+ prompt_speech: Optional[bytes] = kwargs.pop("prompt_speech", None)
85
+ emo_prompt_speech: Optional[bytes] = kwargs.pop("emo_prompt_speech", None)
86
+ emo_alpha: float = kwargs.pop("emo_alpha", 1.0)
87
+ emo_text: Optional[str] = kwargs.pop("emo_text", None)
88
+ use_random: bool = kwargs.pop("use_random", False)
89
+ emo_vector: Optional[list] = kwargs.pop("emo_vector", None)
90
+ seed: Optional[int] = kwargs.pop("seed", 0)
91
+ use_emo_text: bool = kwargs.pop("use_emo_text", False)
92
+
93
+ if prompt_speech is None:
94
+ # IndexTTS2 requires reference audio for voice cloning
95
+ # We'll provide a helpful error message with usage examples
96
+ raise ValueError(
97
+ "IndexTTS2 requires a reference audio for voice cloning.\n"
98
+ "Please provide a short audio sample (3-10 seconds) as 'prompt_speech' parameter.\n"
99
+ "Example usage:\n"
100
+ " with open('reference.wav', 'rb') as f:\n"
101
+ " prompt_speech = f.read()\n"
102
+ " audio_bytes = model.speech(\n"
103
+ " input='Hello, world!',\n"
104
+ " voice='default',\n"
105
+ " prompt_speech=prompt_speech"
106
+ " )\n\n"
107
+ "For emotion control, you can also add:\n"
108
+ " emo_prompt_speech=emotion_audio_bytes # Optional: emotion reference\n"
109
+ " emo_text='happy and cheerful' # Optional: emotion description\n"
110
+ " emo_alpha=1.5 # Optional: emotion intensity"
111
+ )
112
+
113
+ assert self._model is not None
114
+
115
+ set_all_random_seed(seed)
116
+
117
+ # Save prompt speech to temp file
118
+ import tempfile
119
+
120
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_prompt:
121
+ temp_prompt.write(prompt_speech)
122
+ temp_prompt_path = temp_prompt.name
123
+
124
+ emo_prompt_path = None
125
+ if emo_prompt_speech is not None:
126
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_emo:
127
+ temp_emo.write(emo_prompt_speech)
128
+ emo_prompt_path = temp_emo.name
129
+
130
+ try:
131
+ # Generate audio
132
+ with tempfile.NamedTemporaryFile(
133
+ suffix=".wav", delete=False
134
+ ) as temp_output:
135
+ output_path = temp_output.name
136
+
137
+ self._model.infer(
138
+ spk_audio_prompt=temp_prompt_path,
139
+ text=input,
140
+ output_path=output_path,
141
+ emo_audio_prompt=emo_prompt_path,
142
+ emo_alpha=emo_alpha,
143
+ emo_text=emo_text,
144
+ use_random=use_random,
145
+ emo_vector=emo_vector,
146
+ use_emo_text=use_emo_text,
147
+ )
148
+
149
+ # Read generated audio and convert to requested format
150
+ audio, sample_rate = soundfile.read(output_path)
151
+
152
+ with BytesIO() as out:
153
+ with soundfile.SoundFile(
154
+ out, "w", sample_rate, 1, format=response_format.upper()
155
+ ) as f:
156
+ f.write(audio)
157
+ return out.getvalue()
158
+ finally:
159
+ # Clean up temp files
160
+ try:
161
+ os.unlink(temp_prompt_path)
162
+ os.unlink(output_path)
163
+ if emo_prompt_path:
164
+ os.unlink(emo_prompt_path)
165
+ except:
166
+ pass
@@ -685,7 +685,7 @@
685
685
  "model_revision": "4dcc16f297f2ff98a17b3726b16f5de5a5e45672"
686
686
  },
687
687
  "modelscope": {
688
- "model_id": "SWivid/F5-TTS_Emilia-ZH-EN",
688
+ "model_id": "AI-ModelScope/F5-TTS",
689
689
  "model_revision": "master"
690
690
  }
691
691
  }
@@ -862,26 +862,6 @@
862
862
  "model_revision": "master"
863
863
  }
864
864
  }
865
- },
866
- {
867
- "version": 2,
868
- "model_name": "Kokoro-82M-v1.1-zh",
869
- "model_family": "Kokoro-zh",
870
- "model_ability": [
871
- "text2audio",
872
- "text2audio_zero_shot"
873
- ],
874
- "multilingual": false,
875
- "model_src": {
876
- "huggingface": {
877
- "model_id": "hexgrad/Kokoro-82M-v1.1-zh",
878
- "model_revision": "01e7505bd6a7a2ac4975463114c3a7650a9f7218"
879
- },
880
- "modelscope": {
881
- "model_id": "AI-ModelScope/Kokoro-82M-v1.1-zh",
882
- "model_revision": "master"
883
- }
884
- }
885
865
  },
886
866
  {
887
867
  "version": 2,
@@ -931,5 +911,62 @@
931
911
  "model_revision": "master"
932
912
  }
933
913
  }
914
+ },
915
+ {
916
+ "version": 2,
917
+ "model_name": "Kokoro-82M-v1.1-zh",
918
+ "model_family": "Kokoro-zh",
919
+ "model_ability": [
920
+ "text2audio",
921
+ "text2audio_zero_shot"
922
+ ],
923
+ "multilingual": false,
924
+ "model_src": {
925
+ "huggingface": {
926
+ "model_id": "hexgrad/Kokoro-82M-v1.1-zh",
927
+ "model_revision": "01e7505bd6a7a2ac4975463114c3a7650a9f7218"
928
+ },
929
+ "modelscope": {
930
+ "model_id": "AI-ModelScope/Kokoro-82M-v1.1-zh",
931
+ "model_revision": "master"
932
+ }
933
+ }
934
+ },
935
+ {
936
+ "version": 2,
937
+ "model_name": "IndexTTS2",
938
+ "model_family": "IndexTTS2",
939
+ "model_ability": [
940
+ "text2audio",
941
+ "text2audio_zero_shot",
942
+ "text2audio_voice_cloning",
943
+ "text2audio_emotion_control"
944
+ ],
945
+ "multilingual": true,
946
+ "virtualenv": {
947
+ "packages": [
948
+ "transformers==4.52.1",
949
+ "#system_torch#",
950
+ "#system_numpy#",
951
+ "json5",
952
+ "munch",
953
+ "matplotlib",
954
+ "flatten_dict",
955
+ "julius",
956
+ "tensorboard",
957
+ "randomname",
958
+ "argbind"
959
+ ]
960
+ },
961
+ "model_src": {
962
+ "huggingface": {
963
+ "model_id": "IndexTeam/IndexTTS-2",
964
+ "model_revision": "main"
965
+ },
966
+ "modelscope": {
967
+ "model_id": "IndexTeam/IndexTTS-2",
968
+ "model_revision": "master"
969
+ }
970
+ }
934
971
  }
935
972
  ]