xinference 1.10.0__py3-none-any.whl → 1.11.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (328) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/restful_api.py +473 -31
  3. xinference/client/restful/async_restful_client.py +178 -8
  4. xinference/client/restful/restful_client.py +151 -3
  5. xinference/core/supervisor.py +99 -53
  6. xinference/core/worker.py +10 -0
  7. xinference/deploy/cmdline.py +15 -0
  8. xinference/model/audio/core.py +21 -6
  9. xinference/model/audio/indextts2.py +166 -0
  10. xinference/model/audio/model_spec.json +58 -21
  11. xinference/model/image/model_spec.json +159 -90
  12. xinference/model/image/stable_diffusion/core.py +13 -4
  13. xinference/model/llm/__init__.py +6 -2
  14. xinference/model/llm/llm_family.json +1299 -174
  15. xinference/model/llm/mlx/distributed_models/core.py +41 -0
  16. xinference/model/llm/mlx/distributed_models/qwen2.py +1 -2
  17. xinference/model/llm/sglang/core.py +44 -11
  18. xinference/model/llm/tool_parsers/deepseek_r1_tool_parser.py +94 -32
  19. xinference/model/llm/tool_parsers/qwen_tool_parser.py +29 -4
  20. xinference/model/llm/transformers/chatglm.py +3 -0
  21. xinference/model/llm/transformers/core.py +129 -36
  22. xinference/model/llm/transformers/multimodal/minicpmv45.py +340 -0
  23. xinference/model/llm/transformers/multimodal/qwen2_vl.py +34 -8
  24. xinference/model/llm/transformers/utils.py +23 -0
  25. xinference/model/llm/utils.py +48 -32
  26. xinference/model/llm/vllm/core.py +207 -72
  27. xinference/model/utils.py +74 -31
  28. xinference/thirdparty/audiotools/__init__.py +10 -0
  29. xinference/thirdparty/audiotools/core/__init__.py +4 -0
  30. xinference/thirdparty/audiotools/core/audio_signal.py +1682 -0
  31. xinference/thirdparty/audiotools/core/display.py +194 -0
  32. xinference/thirdparty/audiotools/core/dsp.py +390 -0
  33. xinference/thirdparty/audiotools/core/effects.py +647 -0
  34. xinference/thirdparty/audiotools/core/ffmpeg.py +211 -0
  35. xinference/thirdparty/audiotools/core/loudness.py +320 -0
  36. xinference/thirdparty/audiotools/core/playback.py +252 -0
  37. xinference/thirdparty/audiotools/core/templates/__init__.py +0 -0
  38. xinference/thirdparty/audiotools/core/templates/headers.html +322 -0
  39. xinference/thirdparty/audiotools/core/templates/pandoc.css +407 -0
  40. xinference/thirdparty/audiotools/core/templates/widget.html +52 -0
  41. xinference/thirdparty/audiotools/core/util.py +671 -0
  42. xinference/thirdparty/audiotools/core/whisper.py +97 -0
  43. xinference/thirdparty/audiotools/data/__init__.py +3 -0
  44. xinference/thirdparty/audiotools/data/datasets.py +517 -0
  45. xinference/thirdparty/audiotools/data/preprocess.py +81 -0
  46. xinference/thirdparty/audiotools/data/transforms.py +1592 -0
  47. xinference/thirdparty/audiotools/metrics/__init__.py +6 -0
  48. xinference/thirdparty/audiotools/metrics/distance.py +131 -0
  49. xinference/thirdparty/audiotools/metrics/quality.py +159 -0
  50. xinference/thirdparty/audiotools/metrics/spectral.py +247 -0
  51. xinference/thirdparty/audiotools/ml/__init__.py +5 -0
  52. xinference/thirdparty/audiotools/ml/accelerator.py +184 -0
  53. xinference/thirdparty/audiotools/ml/decorators.py +440 -0
  54. xinference/thirdparty/audiotools/ml/experiment.py +90 -0
  55. xinference/thirdparty/audiotools/ml/layers/__init__.py +2 -0
  56. xinference/thirdparty/audiotools/ml/layers/base.py +328 -0
  57. xinference/thirdparty/audiotools/ml/layers/spectral_gate.py +127 -0
  58. xinference/thirdparty/audiotools/post.py +140 -0
  59. xinference/thirdparty/audiotools/preference.py +600 -0
  60. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/text.py +1 -1
  61. xinference/thirdparty/indextts/BigVGAN/ECAPA_TDNN.py +656 -0
  62. xinference/thirdparty/indextts/BigVGAN/__init__.py +0 -0
  63. xinference/thirdparty/indextts/BigVGAN/activations.py +122 -0
  64. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/__init__.py +0 -0
  65. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/.gitignore +1 -0
  66. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/__init__.py +0 -0
  67. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/activation1d.py +76 -0
  68. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/anti_alias_activation.cpp +23 -0
  69. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/anti_alias_activation_cuda.cu +256 -0
  70. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/compat.h +29 -0
  71. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/load.py +121 -0
  72. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/type_shim.h +92 -0
  73. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/torch/__init__.py +6 -0
  74. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/torch/act.py +31 -0
  75. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/torch/filter.py +102 -0
  76. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/torch/resample.py +58 -0
  77. xinference/thirdparty/indextts/BigVGAN/alias_free_torch/__init__.py +6 -0
  78. xinference/thirdparty/indextts/BigVGAN/alias_free_torch/act.py +29 -0
  79. xinference/thirdparty/indextts/BigVGAN/alias_free_torch/filter.py +96 -0
  80. xinference/thirdparty/indextts/BigVGAN/alias_free_torch/resample.py +49 -0
  81. xinference/thirdparty/indextts/BigVGAN/bigvgan.py +534 -0
  82. xinference/thirdparty/indextts/BigVGAN/models.py +451 -0
  83. xinference/thirdparty/indextts/BigVGAN/nnet/CNN.py +546 -0
  84. xinference/thirdparty/indextts/BigVGAN/nnet/__init__.py +0 -0
  85. xinference/thirdparty/indextts/BigVGAN/nnet/linear.py +89 -0
  86. xinference/thirdparty/indextts/BigVGAN/nnet/normalization.py +670 -0
  87. xinference/thirdparty/indextts/BigVGAN/utils.py +101 -0
  88. xinference/thirdparty/indextts/__init__.py +0 -0
  89. xinference/thirdparty/indextts/cli.py +65 -0
  90. xinference/thirdparty/indextts/gpt/__init__.py +0 -0
  91. xinference/thirdparty/indextts/gpt/conformer/__init__.py +0 -0
  92. xinference/thirdparty/indextts/gpt/conformer/attention.py +312 -0
  93. xinference/thirdparty/indextts/gpt/conformer/embedding.py +163 -0
  94. xinference/thirdparty/indextts/gpt/conformer/subsampling.py +348 -0
  95. xinference/thirdparty/indextts/gpt/conformer_encoder.py +520 -0
  96. xinference/thirdparty/indextts/gpt/model.py +713 -0
  97. xinference/thirdparty/indextts/gpt/model_v2.py +747 -0
  98. xinference/thirdparty/indextts/gpt/perceiver.py +317 -0
  99. xinference/thirdparty/indextts/gpt/transformers_beam_search.py +1013 -0
  100. xinference/thirdparty/indextts/gpt/transformers_generation_utils.py +4747 -0
  101. xinference/thirdparty/indextts/gpt/transformers_gpt2.py +1878 -0
  102. xinference/thirdparty/indextts/gpt/transformers_modeling_utils.py +5525 -0
  103. xinference/thirdparty/indextts/infer.py +690 -0
  104. xinference/thirdparty/indextts/infer_v2.py +739 -0
  105. xinference/thirdparty/indextts/s2mel/dac/__init__.py +16 -0
  106. xinference/thirdparty/indextts/s2mel/dac/__main__.py +36 -0
  107. xinference/thirdparty/indextts/s2mel/dac/model/__init__.py +4 -0
  108. xinference/thirdparty/indextts/s2mel/dac/model/base.py +294 -0
  109. xinference/thirdparty/indextts/s2mel/dac/model/dac.py +400 -0
  110. xinference/thirdparty/indextts/s2mel/dac/model/discriminator.py +228 -0
  111. xinference/thirdparty/indextts/s2mel/dac/model/encodec.py +320 -0
  112. xinference/thirdparty/indextts/s2mel/dac/nn/__init__.py +3 -0
  113. xinference/thirdparty/indextts/s2mel/dac/nn/layers.py +33 -0
  114. xinference/thirdparty/indextts/s2mel/dac/nn/loss.py +368 -0
  115. xinference/thirdparty/indextts/s2mel/dac/nn/quantize.py +339 -0
  116. xinference/thirdparty/indextts/s2mel/dac/utils/__init__.py +123 -0
  117. xinference/thirdparty/indextts/s2mel/dac/utils/decode.py +95 -0
  118. xinference/thirdparty/indextts/s2mel/dac/utils/encode.py +94 -0
  119. xinference/thirdparty/indextts/s2mel/hf_utils.py +12 -0
  120. xinference/thirdparty/indextts/s2mel/modules/alias_free_torch/__init__.py +5 -0
  121. xinference/thirdparty/indextts/s2mel/modules/alias_free_torch/act.py +29 -0
  122. xinference/thirdparty/indextts/s2mel/modules/alias_free_torch/filter.py +96 -0
  123. xinference/thirdparty/indextts/s2mel/modules/alias_free_torch/resample.py +57 -0
  124. xinference/thirdparty/indextts/s2mel/modules/audio.py +82 -0
  125. xinference/thirdparty/indextts/s2mel/modules/bigvgan/activations.py +120 -0
  126. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/__init__.py +0 -0
  127. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/activation1d.py +77 -0
  128. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/anti_alias_activation.cpp +23 -0
  129. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/anti_alias_activation_cuda.cu +246 -0
  130. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/compat.h +29 -0
  131. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/load.py +86 -0
  132. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/type_shim.h +92 -0
  133. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/torch/__init__.py +6 -0
  134. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/torch/act.py +30 -0
  135. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/torch/filter.py +101 -0
  136. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/torch/resample.py +58 -0
  137. xinference/thirdparty/indextts/s2mel/modules/bigvgan/bigvgan.py +492 -0
  138. xinference/thirdparty/indextts/s2mel/modules/bigvgan/config.json +63 -0
  139. xinference/thirdparty/indextts/s2mel/modules/bigvgan/env.py +18 -0
  140. xinference/thirdparty/indextts/s2mel/modules/bigvgan/meldataset.py +354 -0
  141. xinference/thirdparty/indextts/s2mel/modules/bigvgan/utils.py +99 -0
  142. xinference/thirdparty/indextts/s2mel/modules/campplus/DTDNN.py +115 -0
  143. xinference/thirdparty/indextts/s2mel/modules/campplus/classifier.py +70 -0
  144. xinference/thirdparty/indextts/s2mel/modules/campplus/layers.py +253 -0
  145. xinference/thirdparty/indextts/s2mel/modules/commons.py +632 -0
  146. xinference/thirdparty/indextts/s2mel/modules/diffusion_transformer.py +257 -0
  147. xinference/thirdparty/indextts/s2mel/modules/encodec.py +292 -0
  148. xinference/thirdparty/indextts/s2mel/modules/flow_matching.py +171 -0
  149. xinference/thirdparty/indextts/s2mel/modules/gpt_fast/generate.py +436 -0
  150. xinference/thirdparty/indextts/s2mel/modules/gpt_fast/model.py +360 -0
  151. xinference/thirdparty/indextts/s2mel/modules/gpt_fast/quantize.py +622 -0
  152. xinference/thirdparty/indextts/s2mel/modules/hifigan/f0_predictor.py +55 -0
  153. xinference/thirdparty/indextts/s2mel/modules/hifigan/generator.py +454 -0
  154. xinference/thirdparty/indextts/s2mel/modules/layers.py +354 -0
  155. xinference/thirdparty/indextts/s2mel/modules/length_regulator.py +141 -0
  156. xinference/thirdparty/indextts/s2mel/modules/openvoice/__init__.py +0 -0
  157. xinference/thirdparty/indextts/s2mel/modules/openvoice/api.py +186 -0
  158. xinference/thirdparty/indextts/s2mel/modules/openvoice/attentions.py +465 -0
  159. xinference/thirdparty/indextts/s2mel/modules/openvoice/checkpoints_v2/converter/config.json +57 -0
  160. xinference/thirdparty/indextts/s2mel/modules/openvoice/commons.py +160 -0
  161. xinference/thirdparty/indextts/s2mel/modules/openvoice/mel_processing.py +183 -0
  162. xinference/thirdparty/indextts/s2mel/modules/openvoice/models.py +499 -0
  163. xinference/thirdparty/indextts/s2mel/modules/openvoice/modules.py +598 -0
  164. xinference/thirdparty/indextts/s2mel/modules/openvoice/openvoice_app.py +275 -0
  165. xinference/thirdparty/indextts/s2mel/modules/openvoice/se_extractor.py +153 -0
  166. xinference/thirdparty/indextts/s2mel/modules/openvoice/transforms.py +209 -0
  167. xinference/thirdparty/indextts/s2mel/modules/openvoice/utils.py +194 -0
  168. xinference/thirdparty/indextts/s2mel/modules/quantize.py +229 -0
  169. xinference/thirdparty/indextts/s2mel/modules/rmvpe.py +631 -0
  170. xinference/thirdparty/indextts/s2mel/modules/vocos/__init__.py +4 -0
  171. xinference/thirdparty/indextts/s2mel/modules/vocos/heads.py +164 -0
  172. xinference/thirdparty/indextts/s2mel/modules/vocos/helpers.py +71 -0
  173. xinference/thirdparty/indextts/s2mel/modules/vocos/loss.py +114 -0
  174. xinference/thirdparty/indextts/s2mel/modules/vocos/models.py +118 -0
  175. xinference/thirdparty/indextts/s2mel/modules/vocos/modules.py +213 -0
  176. xinference/thirdparty/indextts/s2mel/modules/vocos/pretrained.py +51 -0
  177. xinference/thirdparty/indextts/s2mel/modules/vocos/spectral_ops.py +192 -0
  178. xinference/thirdparty/indextts/s2mel/modules/wavenet.py +174 -0
  179. xinference/thirdparty/indextts/s2mel/optimizers.py +96 -0
  180. xinference/thirdparty/indextts/s2mel/wav2vecbert_extract.py +148 -0
  181. xinference/thirdparty/indextts/utils/__init__.py +0 -0
  182. xinference/thirdparty/indextts/utils/arch_util.py +120 -0
  183. xinference/thirdparty/indextts/utils/checkpoint.py +34 -0
  184. xinference/thirdparty/indextts/utils/common.py +121 -0
  185. xinference/thirdparty/indextts/utils/feature_extractors.py +50 -0
  186. xinference/thirdparty/indextts/utils/front.py +536 -0
  187. xinference/thirdparty/indextts/utils/maskgct/models/codec/__init__.py +0 -0
  188. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/codec.py +427 -0
  189. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/__init__.py +11 -0
  190. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/factorized_vector_quantize.py +150 -0
  191. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/lookup_free_quantize.py +77 -0
  192. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/residual_vq.py +177 -0
  193. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/vector_quantize.py +401 -0
  194. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/vocos.py +881 -0
  195. xinference/thirdparty/indextts/utils/maskgct/models/codec/codec_dataset.py +264 -0
  196. xinference/thirdparty/indextts/utils/maskgct/models/codec/codec_inference.py +515 -0
  197. xinference/thirdparty/indextts/utils/maskgct/models/codec/codec_sampler.py +126 -0
  198. xinference/thirdparty/indextts/utils/maskgct/models/codec/codec_trainer.py +166 -0
  199. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/__init__.py +0 -0
  200. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/__init__.py +5 -0
  201. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/act.py +29 -0
  202. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/filter.py +96 -0
  203. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/resample.py +57 -0
  204. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/facodec_dataset.py +98 -0
  205. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/facodec_inference.py +137 -0
  206. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/facodec_trainer.py +776 -0
  207. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/JDC/__init__.py +1 -0
  208. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/JDC/bst.t7 +0 -0
  209. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/JDC/model.py +219 -0
  210. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/attentions.py +437 -0
  211. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/commons.py +331 -0
  212. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/gradient_reversal.py +35 -0
  213. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/layers.py +460 -0
  214. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/quantize.py +741 -0
  215. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/style_encoder.py +110 -0
  216. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/wavenet.py +224 -0
  217. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/optimizer.py +104 -0
  218. xinference/thirdparty/indextts/utils/maskgct/models/codec/kmeans/repcodec_model.py +210 -0
  219. xinference/thirdparty/indextts/utils/maskgct/models/codec/kmeans/vocos.py +850 -0
  220. xinference/thirdparty/indextts/utils/maskgct/models/codec/melvqgan/melspec.py +108 -0
  221. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/README.md +216 -0
  222. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/__init__.py +6 -0
  223. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/__init__.py +5 -0
  224. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/act.py +29 -0
  225. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/filter.py +96 -0
  226. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/resample.py +57 -0
  227. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/facodec.py +1222 -0
  228. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/gradient_reversal.py +35 -0
  229. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/melspec.py +102 -0
  230. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/quantize/__init__.py +7 -0
  231. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/quantize/fvq.py +116 -0
  232. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/quantize/rvq.py +87 -0
  233. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/transformer.py +234 -0
  234. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/model.py +184 -0
  235. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/__init__.py +27 -0
  236. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/conv.py +346 -0
  237. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/lstm.py +46 -0
  238. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/norm.py +37 -0
  239. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/__init__.py +14 -0
  240. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/ac.py +317 -0
  241. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/core_vq.py +388 -0
  242. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/distrib.py +135 -0
  243. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/vq.py +125 -0
  244. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/seanet.py +414 -0
  245. xinference/thirdparty/indextts/utils/maskgct/models/codec/vevo/vevo_repcodec.py +592 -0
  246. xinference/thirdparty/indextts/utils/maskgct/models/tts/maskgct/ckpt/wav2vec2bert_stats.pt +0 -0
  247. xinference/thirdparty/indextts/utils/maskgct/models/tts/maskgct/llama_nar.py +650 -0
  248. xinference/thirdparty/indextts/utils/maskgct/models/tts/maskgct/maskgct_s2a.py +503 -0
  249. xinference/thirdparty/indextts/utils/maskgct_utils.py +259 -0
  250. xinference/thirdparty/indextts/utils/text_utils.py +41 -0
  251. xinference/thirdparty/indextts/utils/typical_sampling.py +30 -0
  252. xinference/thirdparty/indextts/utils/utils.py +93 -0
  253. xinference/thirdparty/indextts/utils/webui_utils.py +42 -0
  254. xinference/thirdparty/indextts/utils/xtransformers.py +1247 -0
  255. xinference/thirdparty/indextts/vqvae/__init__.py +0 -0
  256. xinference/thirdparty/indextts/vqvae/xtts_dvae.py +395 -0
  257. xinference/thirdparty/melo/text/chinese_mix.py +2 -2
  258. xinference/types.py +9 -0
  259. xinference/ui/gradio/media_interface.py +66 -8
  260. xinference/ui/web/ui/build/asset-manifest.json +6 -6
  261. xinference/ui/web/ui/build/index.html +1 -1
  262. xinference/ui/web/ui/build/static/css/main.5ea97072.css +2 -0
  263. xinference/ui/web/ui/build/static/css/main.5ea97072.css.map +1 -0
  264. xinference/ui/web/ui/build/static/js/main.45e78536.js +3 -0
  265. xinference/ui/web/ui/build/static/js/{main.1086c759.js.LICENSE.txt → main.45e78536.js.LICENSE.txt} +0 -7
  266. xinference/ui/web/ui/build/static/js/main.45e78536.js.map +1 -0
  267. xinference/ui/web/ui/node_modules/.cache/babel-loader/089c38df5f52348d212ed868dda5c518a42e0c2762caed4175487c0405830c35.json +1 -0
  268. xinference/ui/web/ui/node_modules/.cache/babel-loader/2b6e3a5b6eb2c5c5f2d007e68cd46c372721cd52bf63508adcdb21ecf79241d8.json +1 -0
  269. xinference/ui/web/ui/node_modules/.cache/babel-loader/2d887825fd07a56f872eda4420da25fba0b5b62a23bdcc6c6da1a5281887f618.json +1 -0
  270. xinference/ui/web/ui/node_modules/.cache/babel-loader/4001f9c3e64e73a4f2158826650c174a59d5e3f89ddecddf17cbb6bb688cc4ca.json +1 -0
  271. xinference/ui/web/ui/node_modules/.cache/babel-loader/4a7018a69e6b7f90fc313248c2aa86f2a8f1eb1db120df586047a8023549b44b.json +1 -0
  272. xinference/ui/web/ui/node_modules/.cache/babel-loader/64b12aaa1c1d1bf53820ada8a63769067c0ccc5aab46b32348eb1917ae7f2a11.json +1 -0
  273. xinference/ui/web/ui/node_modules/.cache/babel-loader/7275b67c78ec76ce38a686bb8a576d8c9cecf54e1573614c84859d538efb9be5.json +1 -0
  274. xinference/ui/web/ui/node_modules/.cache/babel-loader/a68b6ee3b31eadc051fb95ce8f8ccb9c2e8b52c60f290dbab545a1917e065282.json +1 -0
  275. xinference/ui/web/ui/node_modules/.cache/babel-loader/ae8771cc37693feb160fa8727231312a0c54ef2d1d1ca893be568cd70016ca7e.json +1 -0
  276. xinference/ui/web/ui/node_modules/.cache/babel-loader/bb4e8722d2d41d87f1fce3661bc8937bffe9448e231fc5f0462630849e851592.json +1 -0
  277. xinference/ui/web/ui/node_modules/.cache/babel-loader/be6aada1ee4adc2bbf65dbe56d17db32bb3b5478be05d6b527805a8ba6cfb2b9.json +1 -0
  278. xinference/ui/web/ui/node_modules/.cache/babel-loader/de91c352653c233cf0cb6674e6e04049a44fd0e1156560de65d5c4620521391e.json +1 -0
  279. xinference/ui/web/ui/node_modules/.cache/babel-loader/e85f7002fc325c83b9c9cd8a1619e5b3ebc701d30e811afc284b88e6ae710cb5.json +1 -0
  280. xinference/ui/web/ui/node_modules/.cache/babel-loader/e8b603c78944bf3d213639078bfe155ff5c0dfa4048a93cbb967cad6a4eb4ff3.json +1 -0
  281. xinference/ui/web/ui/node_modules/.cache/babel-loader/ea2a26361204e70cf1018d6990fb6354bed82b3ac69690391e0f100385e7abb7.json +1 -0
  282. xinference/ui/web/ui/node_modules/.cache/babel-loader/f05535160a508b2a312de546a6de234776c613db276479ea4253c0b1bdeeb7d6.json +1 -0
  283. xinference/ui/web/ui/node_modules/.cache/babel-loader/f09ba9e11106bd59a0de10cc85c55084097729dcab575f43dfcf07375961ed87.json +1 -0
  284. xinference/ui/web/ui/node_modules/.package-lock.json +0 -33
  285. xinference/ui/web/ui/package-lock.json +0 -34
  286. xinference/ui/web/ui/package.json +0 -1
  287. xinference/ui/web/ui/src/locales/en.json +9 -3
  288. xinference/ui/web/ui/src/locales/ja.json +9 -3
  289. xinference/ui/web/ui/src/locales/ko.json +9 -3
  290. xinference/ui/web/ui/src/locales/zh.json +9 -3
  291. {xinference-1.10.0.dist-info → xinference-1.11.0.dist-info}/METADATA +24 -6
  292. {xinference-1.10.0.dist-info → xinference-1.11.0.dist-info}/RECORD +296 -77
  293. xinference/ui/web/ui/build/static/css/main.013f296b.css +0 -2
  294. xinference/ui/web/ui/build/static/css/main.013f296b.css.map +0 -1
  295. xinference/ui/web/ui/build/static/js/main.1086c759.js +0 -3
  296. xinference/ui/web/ui/build/static/js/main.1086c759.js.map +0 -1
  297. xinference/ui/web/ui/node_modules/.cache/babel-loader/0b0f77000cc1b482ca091cfbcae511dfe02f08916971645fad21d0b1234d04a2.json +0 -1
  298. xinference/ui/web/ui/node_modules/.cache/babel-loader/1c5f8ff423a7c9202bea60b15680f04b1e9964b445b0da3f86c6ff70cf24e797.json +0 -1
  299. xinference/ui/web/ui/node_modules/.cache/babel-loader/44ce7993e344980e3ed4f13e8f69237d4a5dfc60e37ca6b54f51f8ee1357bd67.json +0 -1
  300. xinference/ui/web/ui/node_modules/.cache/babel-loader/4aec1cc414ac3ebb3481d3d915e4db597d9127de813291346eacb8554ab170d4.json +0 -1
  301. xinference/ui/web/ui/node_modules/.cache/babel-loader/644cfec52f3c57a6e222ce60f112237a1efefe9835efd9aad857a685f53d8eed.json +0 -1
  302. xinference/ui/web/ui/node_modules/.cache/babel-loader/663436f72af53fe0d72394f56d003fa4e0bba489e5bb4e483fd34b00f84637f7.json +0 -1
  303. xinference/ui/web/ui/node_modules/.cache/babel-loader/69db82ca9bfe27fe417cc6cf2b1716b09be9c6f0cd198530f12bfc60e801bbcf.json +0 -1
  304. xinference/ui/web/ui/node_modules/.cache/babel-loader/85087e27618d740c236bf159f30e0219db443ab55f0997388eed5fde6f9e90cc.json +0 -1
  305. xinference/ui/web/ui/node_modules/.cache/babel-loader/88b07838348864aa86c672be3bbca1e9f58f6f3a2881b32070ec27f4e7b449d1.json +0 -1
  306. xinference/ui/web/ui/node_modules/.cache/babel-loader/8b8cd408ccfbe115acef27ccfa5b233da8597131a2a5712add13e1e4d5d4504b.json +0 -1
  307. xinference/ui/web/ui/node_modules/.cache/babel-loader/a23824fe746b9c6ca5eee9159b5764d1ff1653c1d856288c0f75c742bbb0023b.json +0 -1
  308. xinference/ui/web/ui/node_modules/.cache/babel-loader/a3eb18af328280b139693c9092dff2a0ef8c9a967e6c8956ceee0996611f1984.json +0 -1
  309. xinference/ui/web/ui/node_modules/.cache/babel-loader/bc1aacc65a102db325ca61bcd2f681e1ae22c36a1f1d98a6ff5e4ad49dc7544f.json +0 -1
  310. xinference/ui/web/ui/node_modules/.cache/babel-loader/c682fd521747c19dae437d83ce3235a306ce6b68e24a117bc57c27ebb8d1f1ca.json +0 -1
  311. xinference/ui/web/ui/node_modules/.cache/babel-loader/d5c224be7081f18cba1678b7874a9782eba895df004874ff8f243f94ba79942a.json +0 -1
  312. xinference/ui/web/ui/node_modules/.cache/babel-loader/f7f18bfb539b036a6a342176dd98a85df5057a884a8da978d679f2a0264883d0.json +0 -1
  313. xinference/ui/web/ui/node_modules/clipboard/.babelrc.json +0 -11
  314. xinference/ui/web/ui/node_modules/clipboard/.eslintrc.json +0 -24
  315. xinference/ui/web/ui/node_modules/clipboard/.prettierrc.json +0 -9
  316. xinference/ui/web/ui/node_modules/clipboard/bower.json +0 -18
  317. xinference/ui/web/ui/node_modules/clipboard/composer.json +0 -25
  318. xinference/ui/web/ui/node_modules/clipboard/package.json +0 -63
  319. xinference/ui/web/ui/node_modules/delegate/package.json +0 -31
  320. xinference/ui/web/ui/node_modules/good-listener/bower.json +0 -11
  321. xinference/ui/web/ui/node_modules/good-listener/package.json +0 -35
  322. xinference/ui/web/ui/node_modules/select/bower.json +0 -13
  323. xinference/ui/web/ui/node_modules/select/package.json +0 -29
  324. xinference/ui/web/ui/node_modules/tiny-emitter/package.json +0 -53
  325. {xinference-1.10.0.dist-info → xinference-1.11.0.dist-info}/WHEEL +0 -0
  326. {xinference-1.10.0.dist-info → xinference-1.11.0.dist-info}/entry_points.txt +0 -0
  327. {xinference-1.10.0.dist-info → xinference-1.11.0.dist-info}/licenses/LICENSE +0 -0
  328. {xinference-1.10.0.dist-info → xinference-1.11.0.dist-info}/top_level.txt +0 -0
File without changes
@@ -0,0 +1,395 @@
1
+ import functools
2
+ from math import sqrt
3
+
4
+ import torch
5
+ import torch.distributed as distributed
6
+ import torch.nn as nn
7
+ import torch.nn.functional as F
8
+ import torchaudio
9
+ from einops import rearrange
10
+
11
+
12
+ def default(val, d):
13
+ return val if val is not None else d
14
+
15
+
16
+ def eval_decorator(fn):
17
+ def inner(model, *args, **kwargs):
18
+ was_training = model.training
19
+ model.eval()
20
+ out = fn(model, *args, **kwargs)
21
+ model.train(was_training)
22
+ return out
23
+
24
+ return inner
25
+
26
+
27
+ def dvae_wav_to_mel(
28
+ wav, mel_norms_file="../experiments/clips_mel_norms.pth", mel_norms=None, device=torch.device("cpu")
29
+ ):
30
+ mel_stft = torchaudio.transforms.MelSpectrogram(
31
+ n_fft=1024,
32
+ hop_length=256,
33
+ win_length=1024,
34
+ power=2,
35
+ normalized=False,
36
+ sample_rate=22050,
37
+ f_min=0,
38
+ f_max=8000,
39
+ n_mels=80,
40
+ norm="slaney",
41
+ ).to(device)
42
+ wav = wav.to(device)
43
+ mel = mel_stft(wav)
44
+ mel = torch.log(torch.clamp(mel, min=1e-5))
45
+ if mel_norms is None:
46
+ mel_norms = torch.load(mel_norms_file, map_location=device)
47
+ mel = mel / mel_norms.unsqueeze(0).unsqueeze(-1)
48
+ return mel
49
+
50
+
51
+ class Quantize(nn.Module):
52
+ def __init__(self, dim, n_embed, decay=0.99, eps=1e-5, balancing_heuristic=False, new_return_order=False):
53
+ super().__init__()
54
+
55
+ self.dim = dim
56
+ self.n_embed = n_embed
57
+ self.decay = decay
58
+ self.eps = eps
59
+
60
+ self.balancing_heuristic = balancing_heuristic
61
+ self.codes = None
62
+ self.max_codes = 64000
63
+ self.codes_full = False
64
+ self.new_return_order = new_return_order
65
+
66
+ embed = torch.randn(dim, n_embed)
67
+ self.register_buffer("embed", embed)
68
+ self.register_buffer("cluster_size", torch.zeros(n_embed))
69
+ self.register_buffer("embed_avg", embed.clone())
70
+
71
+ def forward(self, input, return_soft_codes=False):
72
+ if self.balancing_heuristic and self.codes_full:
73
+ h = torch.histc(self.codes, bins=self.n_embed, min=0, max=self.n_embed) / len(self.codes)
74
+ mask = torch.logical_or(h > 0.9, h < 0.01).unsqueeze(1)
75
+ ep = self.embed.permute(1, 0)
76
+ ea = self.embed_avg.permute(1, 0)
77
+ rand_embed = torch.randn_like(ep) * mask
78
+ self.embed = (ep * ~mask + rand_embed).permute(1, 0)
79
+ self.embed_avg = (ea * ~mask + rand_embed).permute(1, 0)
80
+ self.cluster_size = self.cluster_size * ~mask.squeeze()
81
+ if torch.any(mask):
82
+ print(f"Reset {torch.sum(mask)} embedding codes.")
83
+ self.codes = None
84
+ self.codes_full = False
85
+
86
+ flatten = input.reshape(-1, self.dim)
87
+ dist = flatten.pow(2).sum(1, keepdim=True) - 2 * flatten @ self.embed + self.embed.pow(2).sum(0, keepdim=True)
88
+ soft_codes = -dist
89
+ _, embed_ind = soft_codes.max(1)
90
+ embed_onehot = F.one_hot(embed_ind, self.n_embed).type(flatten.dtype)
91
+ embed_ind = embed_ind.view(*input.shape[:-1])
92
+ quantize = self.embed_code(embed_ind)
93
+
94
+ if self.balancing_heuristic:
95
+ if self.codes is None:
96
+ self.codes = embed_ind.flatten()
97
+ else:
98
+ self.codes = torch.cat([self.codes, embed_ind.flatten()])
99
+ if len(self.codes) > self.max_codes:
100
+ self.codes = self.codes[-self.max_codes :]
101
+ self.codes_full = True
102
+
103
+ if self.training:
104
+ embed_onehot_sum = embed_onehot.sum(0)
105
+ embed_sum = flatten.transpose(0, 1) @ embed_onehot
106
+
107
+ if distributed.is_initialized() and distributed.get_world_size() > 1:
108
+ distributed.all_reduce(embed_onehot_sum)
109
+ distributed.all_reduce(embed_sum)
110
+
111
+ self.cluster_size.data.mul_(self.decay).add_(embed_onehot_sum, alpha=1 - self.decay)
112
+ self.embed_avg.data.mul_(self.decay).add_(embed_sum, alpha=1 - self.decay)
113
+ n = self.cluster_size.sum()
114
+ cluster_size = (self.cluster_size + self.eps) / (n + self.n_embed * self.eps) * n
115
+ embed_normalized = self.embed_avg / cluster_size.unsqueeze(0)
116
+ self.embed.data.copy_(embed_normalized)
117
+
118
+ diff = (quantize.detach() - input).pow(2).mean()
119
+ quantize = input + (quantize - input).detach()
120
+
121
+ if return_soft_codes:
122
+ return quantize, diff, embed_ind, soft_codes.view(input.shape[:-1] + (-1,))
123
+ elif self.new_return_order:
124
+ return quantize, embed_ind, diff
125
+ else:
126
+ return quantize, diff, embed_ind
127
+
128
+ def embed_code(self, embed_id):
129
+ return F.embedding(embed_id, self.embed.transpose(0, 1))
130
+
131
+
132
+ # Fits a soft-discretized input to a normal-PDF across the specified dimension.
133
+ # In other words, attempts to force the discretization function to have a mean equal utilization across all discrete
134
+ # values with the specified expected variance.
135
+ class DiscretizationLoss(nn.Module):
136
+ def __init__(self, discrete_bins, dim, expected_variance, store_past=0):
137
+ super().__init__()
138
+ self.discrete_bins = discrete_bins
139
+ self.dim = dim
140
+ self.dist = torch.distributions.Normal(0, scale=expected_variance)
141
+ if store_past > 0:
142
+ self.record_past = True
143
+ self.register_buffer("accumulator_index", torch.zeros(1, dtype=torch.long, device="cpu"))
144
+ self.register_buffer("accumulator_filled", torch.zeros(1, dtype=torch.long, device="cpu"))
145
+ self.register_buffer("accumulator", torch.zeros(store_past, discrete_bins))
146
+ else:
147
+ self.record_past = False
148
+
149
+ def forward(self, x):
150
+ other_dims = set(range(len(x.shape))) - set([self.dim])
151
+ averaged = x.sum(dim=tuple(other_dims)) / x.sum()
152
+ averaged = averaged - averaged.mean()
153
+
154
+ if self.record_past:
155
+ acc_count = self.accumulator.shape[0]
156
+ avg = averaged.detach().clone()
157
+ if self.accumulator_filled > 0:
158
+ averaged = torch.mean(self.accumulator, dim=0) * (acc_count - 1) / acc_count + averaged / acc_count
159
+
160
+ # Also push averaged into the accumulator.
161
+ self.accumulator[self.accumulator_index] = avg
162
+ self.accumulator_index += 1
163
+ if self.accumulator_index >= acc_count:
164
+ self.accumulator_index *= 0
165
+ if self.accumulator_filled <= 0:
166
+ self.accumulator_filled += 1
167
+
168
+ return torch.sum(-self.dist.log_prob(averaged))
169
+
170
+
171
+ class ResBlock(nn.Module):
172
+ def __init__(self, chan, conv, activation):
173
+ super().__init__()
174
+ self.net = nn.Sequential(
175
+ conv(chan, chan, 3, padding=1),
176
+ activation(),
177
+ conv(chan, chan, 3, padding=1),
178
+ activation(),
179
+ conv(chan, chan, 1),
180
+ )
181
+
182
+ def forward(self, x):
183
+ return self.net(x) + x
184
+
185
+
186
+ class UpsampledConv(nn.Module):
187
+ def __init__(self, conv, *args, **kwargs):
188
+ super().__init__()
189
+ assert "stride" in kwargs.keys()
190
+ self.stride = kwargs["stride"]
191
+ del kwargs["stride"]
192
+ self.conv = conv(*args, **kwargs)
193
+
194
+ def forward(self, x):
195
+ up = nn.functional.interpolate(x, scale_factor=self.stride, mode="nearest")
196
+ return self.conv(up)
197
+
198
+
199
+ # DiscreteVAE partially derived from lucidrains DALLE implementation
200
+ # Credit: https://github.com/lucidrains/DALLE-pytorch
201
+ class DiscreteVAE(nn.Module):
202
+ def __init__(
203
+ self,
204
+ positional_dims=2,
205
+ num_tokens=512,
206
+ codebook_dim=512,
207
+ num_layers=3,
208
+ num_resnet_blocks=0,
209
+ hidden_dim=64,
210
+ channels=3,
211
+ stride=2,
212
+ kernel_size=4,
213
+ use_transposed_convs=True,
214
+ encoder_norm=False,
215
+ activation="relu",
216
+ smooth_l1_loss=False,
217
+ straight_through=False,
218
+ normalization=None, # ((0.5,) * 3, (0.5,) * 3),
219
+ record_codes=False,
220
+ discretization_loss_averaging_steps=100,
221
+ lr_quantizer_args={},
222
+ ):
223
+ super().__init__()
224
+ has_resblocks = num_resnet_blocks > 0
225
+
226
+ self.num_tokens = num_tokens
227
+ self.num_layers = num_layers
228
+ self.straight_through = straight_through
229
+ self.positional_dims = positional_dims
230
+ self.discrete_loss = DiscretizationLoss(
231
+ num_tokens, 2, 1 / (num_tokens * 2), discretization_loss_averaging_steps
232
+ )
233
+
234
+ assert positional_dims > 0 and positional_dims < 3 # This VAE only supports 1d and 2d inputs for now.
235
+ if positional_dims == 2:
236
+ conv = nn.Conv2d
237
+ conv_transpose = nn.ConvTranspose2d
238
+ else:
239
+ conv = nn.Conv1d
240
+ conv_transpose = nn.ConvTranspose1d
241
+ if not use_transposed_convs:
242
+ conv_transpose = functools.partial(UpsampledConv, conv)
243
+
244
+ if activation == "relu":
245
+ act = nn.ReLU
246
+ elif activation == "silu":
247
+ act = nn.SiLU
248
+ else:
249
+ assert NotImplementedError()
250
+
251
+ enc_layers = []
252
+ dec_layers = []
253
+
254
+ if num_layers > 0:
255
+ enc_chans = [hidden_dim * 2**i for i in range(num_layers)]
256
+ dec_chans = list(reversed(enc_chans))
257
+
258
+ enc_chans = [channels, *enc_chans]
259
+
260
+ dec_init_chan = codebook_dim if not has_resblocks else dec_chans[0]
261
+ dec_chans = [dec_init_chan, *dec_chans]
262
+
263
+ enc_chans_io, dec_chans_io = map(lambda t: list(zip(t[:-1], t[1:])), (enc_chans, dec_chans))
264
+
265
+ pad = (kernel_size - 1) // 2
266
+ for (enc_in, enc_out), (dec_in, dec_out) in zip(enc_chans_io, dec_chans_io):
267
+ enc_layers.append(nn.Sequential(conv(enc_in, enc_out, kernel_size, stride=stride, padding=pad), act()))
268
+ if encoder_norm:
269
+ enc_layers.append(nn.GroupNorm(8, enc_out))
270
+ dec_layers.append(
271
+ nn.Sequential(conv_transpose(dec_in, dec_out, kernel_size, stride=stride, padding=pad), act())
272
+ )
273
+ dec_out_chans = dec_chans[-1]
274
+ innermost_dim = dec_chans[0]
275
+ else:
276
+ enc_layers.append(nn.Sequential(conv(channels, hidden_dim, 1), act()))
277
+ dec_out_chans = hidden_dim
278
+ innermost_dim = hidden_dim
279
+
280
+ for _ in range(num_resnet_blocks):
281
+ dec_layers.insert(0, ResBlock(innermost_dim, conv, act))
282
+ enc_layers.append(ResBlock(innermost_dim, conv, act))
283
+
284
+ if num_resnet_blocks > 0:
285
+ dec_layers.insert(0, conv(codebook_dim, innermost_dim, 1))
286
+
287
+ enc_layers.append(conv(innermost_dim, codebook_dim, 1))
288
+ dec_layers.append(conv(dec_out_chans, channels, 1))
289
+
290
+ self.encoder = nn.Sequential(*enc_layers)
291
+ self.decoder = nn.Sequential(*dec_layers)
292
+
293
+ self.loss_fn = F.smooth_l1_loss if smooth_l1_loss else F.mse_loss
294
+ self.codebook = Quantize(codebook_dim, num_tokens, new_return_order=True)
295
+
296
+ # take care of normalization within class
297
+ self.normalization = normalization
298
+ self.record_codes = record_codes
299
+ if record_codes:
300
+ self.codes = torch.zeros((1228800,), dtype=torch.long)
301
+ self.code_ind = 0
302
+ self.total_codes = 0
303
+ self.internal_step = 0
304
+
305
+ def norm(self, images):
306
+ if not self.normalization is not None:
307
+ return images
308
+
309
+ means, stds = map(lambda t: torch.as_tensor(t).to(images), self.normalization)
310
+ arrange = "c -> () c () ()" if self.positional_dims == 2 else "c -> () c ()"
311
+ means, stds = map(lambda t: rearrange(t, arrange), (means, stds))
312
+ images = images.clone()
313
+ images.sub_(means).div_(stds)
314
+ return images
315
+
316
+ def get_debug_values(self, step, __):
317
+ if self.record_codes and self.total_codes > 0:
318
+ # Report annealing schedule
319
+ return {"histogram_codes": self.codes[: self.total_codes]}
320
+ else:
321
+ return {}
322
+
323
+ @torch.no_grad()
324
+ @eval_decorator
325
+ def get_codebook_indices(self, images):
326
+ img = self.norm(images)
327
+ logits = self.encoder(img).permute((0, 2, 3, 1) if len(img.shape) == 4 else (0, 2, 1))
328
+ sampled, codes, _ = self.codebook(logits)
329
+ self.log_codes(codes)
330
+ return codes
331
+
332
+ def decode(self, img_seq):
333
+ self.log_codes(img_seq)
334
+ if hasattr(self.codebook, "embed_code"):
335
+ image_embeds = self.codebook.embed_code(img_seq)
336
+ else:
337
+ image_embeds = F.embedding(img_seq, self.codebook.codebook)
338
+ b, n, d = image_embeds.shape
339
+
340
+ kwargs = {}
341
+ if self.positional_dims == 1:
342
+ arrange = "b n d -> b d n"
343
+ else:
344
+ h = w = int(sqrt(n))
345
+ arrange = "b (h w) d -> b d h w"
346
+ kwargs = {"h": h, "w": w}
347
+ image_embeds = rearrange(image_embeds, arrange, **kwargs)
348
+ images = [image_embeds]
349
+ for layer in self.decoder:
350
+ images.append(layer(images[-1]))
351
+ return images[-1], images[-2]
352
+
353
+ def infer(self, img):
354
+ img = self.norm(img)
355
+ logits = self.encoder(img).permute((0, 2, 3, 1) if len(img.shape) == 4 else (0, 2, 1))
356
+ sampled, codes, commitment_loss = self.codebook(logits)
357
+ return self.decode(codes)
358
+
359
+ # Note: This module is not meant to be run in forward() except while training. It has special logic which performs
360
+ # evaluation using quantized values when it detects that it is being run in eval() mode, which will be substantially
361
+ # more lossy (but useful for determining network performance).
362
+ def forward(self, img):
363
+ img = self.norm(img)
364
+ logits = self.encoder(img).permute((0, 2, 3, 1) if len(img.shape) == 4 else (0, 2, 1))
365
+ sampled, codes, commitment_loss = self.codebook(logits)
366
+ sampled = sampled.permute((0, 3, 1, 2) if len(img.shape) == 4 else (0, 2, 1))
367
+
368
+ if self.training:
369
+ out = sampled
370
+ for d in self.decoder:
371
+ out = d(out)
372
+ self.log_codes(codes)
373
+ else:
374
+ # This is non-differentiable, but gives a better idea of how the network is actually performing.
375
+ out, _ = self.decode(codes)
376
+
377
+ # reconstruction loss
378
+ out = out[..., :img.shape[-1]]
379
+ recon_loss = self.loss_fn(img, out, reduction="mean")
380
+ ssim_loss = torch.zeros(size=(1,)).cuda()
381
+
382
+ return recon_loss, ssim_loss, commitment_loss, out
383
+
384
+ def log_codes(self, codes):
385
+ # This is so we can debug the distribution of codes being learned.
386
+ if self.record_codes and self.internal_step % 10 == 0:
387
+ codes = codes.flatten()
388
+ l = codes.shape[0]
389
+ i = self.code_ind if (self.codes.shape[0] - self.code_ind) > l else self.codes.shape[0] - l
390
+ self.codes[i : i + l] = codes.cpu()
391
+ self.code_ind = self.code_ind + l
392
+ if self.code_ind >= self.codes.shape[0]:
393
+ self.code_ind = 0
394
+ self.total_codes += 1
395
+ self.internal_step += 1
@@ -209,13 +209,13 @@ def _g2p_v2(segments):
209
209
  for text in segments:
210
210
  assert spliter not in text
211
211
  # replace all english words
212
- text = re.sub('([a-zA-Z\s]+)', lambda x: f'{spliter}{x.group(1)}{spliter}', text)
212
+ text = re.sub(r'([a-zA-Z\s]+)', lambda x: f'{spliter}{x.group(1)}{spliter}', text)
213
213
  texts = text.split(spliter)
214
214
  texts = [t for t in texts if len(t) > 0]
215
215
 
216
216
 
217
217
  for text in texts:
218
- if re.match('[a-zA-Z\s]+', text):
218
+ if re.match(r'[a-zA-Z\s]+', text):
219
219
  # english
220
220
  tokenized_en = tokenizer.tokenize(text)
221
221
  phones_en, tones_en, word2ph_en = g2p_en(text=None, pad_start_end=False, tokenized=tokenized_en)
xinference/types.py CHANGED
@@ -47,6 +47,15 @@ class ImageList(TypedDict):
47
47
  data: List[Image]
48
48
 
49
49
 
50
+ class ImageEditRequest(TypedDict, total=False):
51
+ image: Union[Union[str, bytes], List[Union[str, bytes]]]
52
+ mask: Optional[Union[str, bytes]]
53
+ prompt: str
54
+ n: int
55
+ size: Optional[str]
56
+ response_format: str
57
+
58
+
50
59
  class SDAPIResult(TypedDict):
51
60
  images: List[str]
52
61
  parameters: dict
@@ -217,7 +217,7 @@ class MediaInterface:
217
217
  def image_generate_image(
218
218
  prompt: str,
219
219
  negative_prompt: str,
220
- image: PIL.Image.Image,
220
+ images: Optional[List[PIL.Image.Image]],
221
221
  n: int,
222
222
  size_width: int,
223
223
  size_height: int,
@@ -250,8 +250,21 @@ class MediaInterface:
250
250
  kwargs["strength"] = strength
251
251
  sampler_name = None if sampler_name == "default" else sampler_name
252
252
 
253
- bio = io.BytesIO()
254
- image.save(bio, format="png")
253
+ # Handle single image or multiple images
254
+ if images is None:
255
+ raise ValueError("Please upload at least one image")
256
+
257
+ # Process uploaded files to get PIL images
258
+ processed_images = process_uploaded_files(images)
259
+ if processed_images is None:
260
+ raise ValueError("Please upload at least one image")
261
+
262
+ # Convert all images to bytes
263
+ image_bytes_list = []
264
+ for img in processed_images:
265
+ bio = io.BytesIO()
266
+ img.save(bio, format="png")
267
+ image_bytes_list.append(bio.getvalue())
255
268
 
256
269
  response = None
257
270
  exc = None
@@ -265,7 +278,7 @@ class MediaInterface:
265
278
  prompt=prompt,
266
279
  negative_prompt=negative_prompt,
267
280
  n=n,
268
- image=bio.getvalue(),
281
+ image=image_bytes_list,
269
282
  size=size,
270
283
  response_format="b64_json",
271
284
  num_inference_steps=num_inference_steps,
@@ -300,7 +313,7 @@ class MediaInterface:
300
313
 
301
314
  return images
302
315
 
303
- with gr.Blocks() as image2image_inteface:
316
+ with gr.Blocks() as image2image_interface:
304
317
  with gr.Column():
305
318
  with gr.Row():
306
319
  with gr.Column(scale=10):
@@ -341,16 +354,61 @@ class MediaInterface:
341
354
 
342
355
  with gr.Row():
343
356
  with gr.Column(scale=1):
344
- uploaded_image = gr.Image(type="pil", label="Upload Image")
357
+ gr.Markdown("### Upload Images")
358
+ gr.Markdown(
359
+ "*Multiple images supported for image-to-image generation*"
360
+ )
361
+ uploaded_images = gr.File(
362
+ file_count="multiple",
363
+ file_types=["image"],
364
+ label="Upload Images",
365
+ )
366
+ image_preview = gr.Gallery(label="Image Preview", height=300)
345
367
  with gr.Column(scale=1):
346
368
  output_gallery = gr.Gallery()
347
369
 
370
+ # Function to handle file uploads and convert to PIL images
371
+ def process_uploaded_files(files):
372
+ if files is None:
373
+ return None
374
+
375
+ images = []
376
+ for file_info in files:
377
+ if isinstance(file_info, dict) and "name" in file_info:
378
+ # Handle file info format from gradio
379
+ file_path = file_info["name"]
380
+ try:
381
+ img = PIL.Image.open(file_path)
382
+ images.append(img)
383
+ except Exception as e:
384
+ logger.warning(f"Failed to load image {file_path}: {e}")
385
+ elif hasattr(file_info, "name"):
386
+ # Handle file object
387
+ try:
388
+ img = PIL.Image.open(file_info.name)
389
+ images.append(img)
390
+ except Exception as e:
391
+ logger.warning(
392
+ f"Failed to load image {file_info.name}: {e}"
393
+ )
394
+
395
+ return images if images else None
396
+
397
+ # Update gallery when files are uploaded
398
+ def update_gallery(files):
399
+ images = process_uploaded_files(files)
400
+ return images if images else []
401
+
402
+ uploaded_images.change(
403
+ update_gallery, inputs=[uploaded_images], outputs=[image_preview]
404
+ )
405
+
348
406
  generate_button.click(
349
407
  image_generate_image,
350
408
  inputs=[
351
409
  prompt,
352
410
  negative_prompt,
353
- uploaded_image,
411
+ uploaded_images,
354
412
  n,
355
413
  size_width,
356
414
  size_height,
@@ -362,7 +420,7 @@ class MediaInterface:
362
420
  ],
363
421
  outputs=output_gallery,
364
422
  )
365
- return image2image_inteface
423
+ return image2image_interface
366
424
 
367
425
  def inpainting_interface(self) -> "gr.Blocks":
368
426
  from ...model.image.stable_diffusion.core import SAMPLING_METHODS
@@ -1,14 +1,14 @@
1
1
  {
2
2
  "files": {
3
- "main.css": "./static/css/main.013f296b.css",
4
- "main.js": "./static/js/main.1086c759.js",
3
+ "main.css": "./static/css/main.5ea97072.css",
4
+ "main.js": "./static/js/main.45e78536.js",
5
5
  "static/media/icon.webp": "./static/media/icon.4603d52c63041e5dfbfd.webp",
6
6
  "index.html": "./index.html",
7
- "main.013f296b.css.map": "./static/css/main.013f296b.css.map",
8
- "main.1086c759.js.map": "./static/js/main.1086c759.js.map"
7
+ "main.5ea97072.css.map": "./static/css/main.5ea97072.css.map",
8
+ "main.45e78536.js.map": "./static/js/main.45e78536.js.map"
9
9
  },
10
10
  "entrypoints": [
11
- "static/css/main.013f296b.css",
12
- "static/js/main.1086c759.js"
11
+ "static/css/main.5ea97072.css",
12
+ "static/js/main.45e78536.js"
13
13
  ]
14
14
  }
@@ -1 +1 @@
1
- <!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.svg"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="Web site created using create-react-app"/><link rel="apple-touch-icon" href="./logo192.png"/><link rel="manifest" href="./manifest.json"/><title>Xinference</title><script defer="defer" src="./static/js/main.1086c759.js"></script><link href="./static/css/main.013f296b.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>
1
+ <!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.svg"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="Web site created using create-react-app"/><link rel="apple-touch-icon" href="./logo192.png"/><link rel="manifest" href="./manifest.json"/><title>Xinference</title><script defer="defer" src="./static/js/main.45e78536.js"></script><link href="./static/css/main.5ea97072.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>
@@ -0,0 +1,2 @@
1
+ .container{border-radius:20px!important;cursor:pointer;display:block}.container,.descriptionCard{height:300px;position:relative;width:300px}.descriptionCard{border-radius:20px;flex-direction:column;left:-1px;padding:20px;top:-1px}.cardTitle,.descriptionCard{display:flex;justify-content:space-between}.iconButtonBox{align-items:center;display:flex}.drawerCard{display:flex;flex-direction:column;min-height:100%;min-width:350px;overflow-y:scroll;padding:20px 80px 100px;position:relative;width:60vw}.pasteText{color:#1976d2;cursor:pointer;font-size:18px!important;margin-inline:10px}.pasteText:hover{color:#1976d2b3}.copyToCommandLine{color:#1976d2;cursor:pointer;font-size:16px!important}.copyToCommandLine:hover{color:#1976d2b3}.css-1be5mm1-MuiLinearProgress-root-MuiMobileStepper-progress,.css-r5rjnf-MuiLinearProgress-root-MuiMobileStepper-progress{width:100%!important}.pathBox{cursor:pointer;overflow:hidden;text-overflow:ellipsis;white-space:nowrap;width:160px}.pathBox2{width:300px}.empty{color:#555;font-size:20px;left:50%;position:absolute;top:30%;-webkit-transform:translate(-50%);transform:translate(-50%)}.deleteDialog{align-items:center;display:flex}.warningIcon{color:#ed6c02;margin-right:10px}.textHighlight .MuiInputBase-input,.textHighlight .MuiSelect-select{color:#1976d2}.formBox{max-height:80vh;max-width:50vw;min-width:50vw;overflow:auto;padding:40px 20px 0 0;position:relative;transition:all .4s ease-in-out}.broaden{max-width:100%;min-width:100%;padding-right:0}.show-json{align-items:center;display:flex;right:60px;top:90px}.icon,.show-json{position:absolute}.icon{cursor:pointer;margin-left:20px;right:-40px}.icon:hover{color:#1976d2}.arrow{font-size:24px!important}.jsonBox{min-height:80vh;position:relative;transition:all .4s ease-in-out;width:100%}.hide{overflow:hidden;-webkit-transform:translate(30vw);transform:translate(30vw);width:0}.checkboxWrapper{align-items:center;display:flex;flex-wrap:wrap;width:100%}.jsonBox-header{align-items:center;display:flex;justify-content:space-between}.jsonBox-title{font-weight:700;line-height:40px}.textarea{background-color:initial;border:1px solid #ddd;border-radius:5px;color:#666;height:calc(100% - 40px);padding:5px 10px;resize:none;width:100%}.addBtn{margin-left:20px!important}.item{border:1px solid #ddd;border-radius:10px;margin:10px 50px 0;overflow:hidden;padding:20px;position:relative}.item:hover .deleteBtn{-webkit-transform:translateX(-50px);transform:translateX(-50px)}.deleteBtn{background-color:#1976d2;border-radius:25px;height:50px;line-height:70px;position:absolute;right:20px;text-align:center;top:calc(50% - 25px);-webkit-transform:translateX(80px);transform:translateX(80px);transition:all .3s ease-in-out;width:50px}.deleteBtn:hover{box-shadow:0 0 10px #aaa;cursor:pointer}.deleteIcon{color:#fff;font-size:28px!important}.chat_template_box{align-items:start;display:flex;gap:10px}.chat_template_test{width:30%}.chat_template_test_mainBox{border:1px solid #ccc;border-radius:4px;height:137px;overflow:scroll;padding:10px}.chat_template_test_tip{color:rgba(0,0,0,.6);font-size:10px;margin:4px 14px 0}.test_res_box{border:1px solid #ddd;border-radius:4px;margin-top:5px;min-height:55px;padding:10px}.css-19qh8xo-MuiInputBase-input-MuiOutlinedInput-input.Mui-disabled{-webkit-text-fill-color:#000!important}
2
+ /*# sourceMappingURL=main.5ea97072.css.map*/
@@ -0,0 +1 @@
1
+ {"version":3,"file":"static/css/main.5ea97072.css","mappings":"AAAA,WAME,4BAA8B,CAD9B,cAAe,CAJf,aAMF,CACA,4BAJE,YAAa,CAFb,iBAAkB,CAClB,WAgBF,CAXA,iBAUE,kBAAmB,CARnB,qBAAsB,CAItB,SAAU,CAGV,YAAa,CAJb,QAMF,CACA,4BAXE,YAAa,CAEb,6BAYF,CACA,eAEE,kBAAmB,CADnB,YAEF,CACA,YAEE,YAAa,CACb,qBAAsB,CAEtB,eAAgB,CAEhB,eAAgB,CAChB,iBAAkB,CAJlB,uBAAwB,CAHxB,iBAAkB,CAKlB,UAGF,CACA,WAEE,aAAc,CACd,cAAe,CAFf,wBAA0B,CAG1B,kBACF,CACA,iBACE,eACF,CACA,mBAEE,aAAc,CACd,cAAe,CAFf,wBAGF,CACA,yBACE,eACF,CAIA,2HACE,oBACF,CACA,SAEE,cAAe,CACf,eAAgB,CAEhB,sBAAuB,CADvB,kBAAmB,CAHnB,WAKF,CACA,UACE,WACF,CACA,OAKE,UAAW,CADX,cAAe,CAFf,QAAS,CADT,iBAAkB,CAElB,OAAQ,CAGR,iCAA6B,CAA7B,yBACF,CACA,cAEE,kBAAmB,CADnB,YAEF,CACA,aAEE,aAAuB,CADvB,iBAEF,CAIA,oEACE,aACF,CC5FA,SAIE,eAAgB,CAFhB,cAAe,CACf,cAAe,CAEf,aAAc,CACd,qBAAsB,CALtB,iBAAkB,CAMlB,8BACF,CAEA,SACE,cAAe,CACf,cAAe,CACf,eACF,CAEA,WAEE,kBAAmB,CADnB,YAAa,CAIb,UAAW,CADX,QAEF,CAEA,iBALE,iBAUF,CALA,MAGE,cAAe,CACf,gBAAiB,CAFjB,WAGF,CAEA,YACE,aACF,CAEA,OACE,wBACF,CAEA,SAEE,eAAgB,CADhB,iBAAkB,CAGlB,8BAAgC,CADhC,UAEF,CAEA,MAGE,eAAgB,CADhB,iCAA6B,CAA7B,yBAA6B,CAD7B,OAGF,CAEA,iBAGE,kBAAmB,CAFnB,YAAa,CACb,cAAe,CAEf,UACF,CAEA,gBAGE,kBAAmB,CAFnB,YAAa,CACb,6BAEF,CAEA,eAEE,eAAgB,CADhB,gBAEF,CAEA,UAQE,wBAA6B,CAJ7B,qBAAsB,CACtB,iBAAkB,CAElB,UAAW,CALX,wBAAyB,CACzB,gBAAiB,CAGjB,WAAY,CALZ,UAQF,CAEA,QACE,0BACF,CAEA,MAEE,qBAAsB,CAGtB,kBAAmB,CAFnB,kBAAmB,CAGnB,eAAgB,CAFhB,YAAa,CAHb,iBAMF,CAEA,uBACE,mCAA4B,CAA5B,2BACF,CAEA,WAUE,wBAAyB,CADzB,kBAAmB,CAJnB,WAAY,CAGZ,gBAAiB,CAPjB,iBAAkB,CAClB,UAAW,CAKX,iBAAkB,CAJlB,oBAAqB,CAGrB,kCAA2B,CAA3B,0BAA2B,CAK3B,8BAAgC,CAPhC,UAQF,CAEA,iBAEE,wBAAyB,CADzB,cAEF,CAEA,YAEE,UAAW,CADX,wBAEF,CAEA,mBAEE,iBAAkB,CADlB,YAAa,CAEb,QACF,CAEA,oBACE,SACF,CAEA,4BAGE,qBAAsB,CACtB,iBAAkB,CAHlB,YAAa,CAIb,eAAgB,CAHhB,YAIF,CAEA,wBAGE,oBAAyB,CAFzB,cAAe,CACf,iBAEF,CAEA,cACE,qBAAsB,CAItB,iBAAkB,CADlB,cAAe,CAFf,eAAgB,CAChB,YAGF,CAEA,oEACE,sCACF","sources":["scenes/launch_model/styles/modelCardStyle.css","scenes/register_model/styles/registerModelStyle.css"],"sourcesContent":[".container {\n display: block;\n position: relative;\n width: 300px;\n height: 300px;\n cursor: pointer;\n border-radius: 20px !important;\n}\n.descriptionCard {\n display: flex;\n flex-direction: column;\n justify-content: space-between;\n position: relative;\n top: -1px;\n left: -1px;\n width: 300px;\n height: 300px;\n padding: 20px;\n border-radius: 20px;\n}\n.cardTitle {\n display: flex;\n justify-content: space-between;\n}\n.iconButtonBox {\n display: flex;\n align-items: center;\n}\n.drawerCard {\n position: relative;\n display: flex;\n flex-direction: column;\n padding: 20px 80px 100px;\n min-height: 100%;\n width: 60vw;\n min-width: 350px;\n overflow-y: scroll;\n}\n.pasteText {\n font-size: 18px !important;\n color: #1976d2;\n cursor: pointer;\n margin-inline: 10px;\n}\n.pasteText:hover {\n color: #1976d2b3;\n}\n.copyToCommandLine {\n font-size: 16px !important;\n color: #1976d2;\n cursor: pointer;\n}\n.copyToCommandLine:hover {\n color: #1976d2b3;\n}\n.css-1be5mm1-MuiLinearProgress-root-MuiMobileStepper-progress {\n width: 100% !important;\n}\n.css-r5rjnf-MuiLinearProgress-root-MuiMobileStepper-progress {\n width: 100% !important;\n}\n.pathBox {\n width: 160px;\n cursor: pointer;\n overflow: hidden;\n white-space: nowrap;\n text-overflow: ellipsis;\n}\n.pathBox2 {\n width: 300px;\n}\n.empty {\n position: absolute;\n left: 50%;\n top: 30%;\n font-size: 20px;\n color: #555;\n transform: translate(-50%, 0);\n}\n.deleteDialog {\n display: flex;\n align-items: center;\n}\n.warningIcon {\n margin-right: 10px;\n color: rgb(237, 108, 2);\n}\n.textHighlight .MuiSelect-select {\n color: #1976d2;\n}\n.textHighlight .MuiInputBase-input {\n color: #1976d2;\n}\n",".formBox {\n position: relative;\n max-width: 50vw;\n min-width: 50vw;\n max-height: 80vh;\n overflow: auto;\n padding: 40px 20px 0 0;\n transition: all 0.4s ease-in-out;\n}\n\n.broaden {\n max-width: 100%;\n min-width: 100%;\n padding-right: 0;\n}\n\n.show-json {\n display: flex;\n align-items: center;\n position: absolute;\n top: 90px;\n right: 60px;\n}\n\n.icon {\n position: absolute;\n right: -40px;\n cursor: pointer;\n margin-left: 20px;\n}\n\n.icon:hover {\n color: #1976d2;\n}\n\n.arrow {\n font-size: 24px !important;\n}\n\n.jsonBox {\n position: relative;\n min-height: 80vh;\n width: 100%;\n transition: all 0.4s ease-in-out;\n}\n\n.hide {\n width: 0;\n transform: translate(30vw, 0);\n overflow: hidden;\n}\n\n.checkboxWrapper {\n display: flex;\n flex-wrap: wrap;\n align-items: center;\n width: 100%;\n}\n\n.jsonBox-header {\n display: flex;\n justify-content: space-between;\n align-items: center;\n}\n\n.jsonBox-title {\n line-height: 40px;\n font-weight: 700;\n}\n\n.textarea {\n width: 100%;\n height: calc(100% - 40px);\n padding: 5px 10px;\n border: 1px solid #ddd;\n border-radius: 5px;\n resize: none;\n color: #666;\n background-color: transparent;\n}\n\n.addBtn {\n margin-left: 20px !important;\n}\n\n.item {\n position: relative;\n border: 1px solid #ddd;\n margin: 10px 50px 0;\n padding: 20px;\n border-radius: 10px;\n overflow: hidden;\n}\n\n.item:hover .deleteBtn {\n transform: translateX(-50px);\n}\n\n.deleteBtn {\n position: absolute;\n right: 20px;\n top: calc(50% - 25px);\n width: 50px;\n height: 50px;\n transform: translateX(80px);\n text-align: center;\n line-height: 70px;\n border-radius: 25px;\n background-color: #1976d2;\n transition: all 0.3s ease-in-out;\n}\n\n.deleteBtn:hover {\n cursor: pointer;\n box-shadow: 0 0 10px #aaa;\n}\n\n.deleteIcon {\n font-size: 28px !important;\n color: #fff;\n}\n\n.chat_template_box {\n display: flex;\n align-items: start;\n gap: 10px;\n}\n\n.chat_template_test {\n width: 30%;\n}\n\n.chat_template_test_mainBox {\n height: 137px;\n padding: 10px;\n border: 1px solid #ccc;\n border-radius: 4px;\n overflow: scroll;\n}\n\n.chat_template_test_tip {\n font-size: 10px;\n margin: 4px 14px 0;\n color: rgba(0, 0, 0, 0.6);\n}\n\n.test_res_box {\n border: 1px solid #ddd;\n min-height: 55px;\n padding: 10px;\n margin-top: 5px;\n border-radius: 4px;\n}\n\n.css-19qh8xo-MuiInputBase-input-MuiOutlinedInput-input.Mui-disabled {\n -webkit-text-fill-color: #000 !important;\n}\n"],"names":[],"sourceRoot":""}