xinference 1.9.1__py3-none-any.whl → 1.10.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (334) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/restful_api.py +400 -3
  3. xinference/client/restful/async_restful_client.py +20 -3
  4. xinference/client/restful/restful_client.py +20 -3
  5. xinference/constants.py +2 -0
  6. xinference/core/supervisor.py +111 -49
  7. xinference/core/worker.py +10 -0
  8. xinference/deploy/cmdline.py +15 -0
  9. xinference/model/audio/core.py +26 -6
  10. xinference/model/audio/indextts2.py +166 -0
  11. xinference/model/audio/kokoro.py +1 -1
  12. xinference/model/audio/kokoro_zh.py +124 -0
  13. xinference/model/audio/model_spec.json +58 -1
  14. xinference/model/embedding/sentence_transformers/core.py +4 -4
  15. xinference/model/embedding/vllm/core.py +7 -1
  16. xinference/model/image/model_spec.json +71 -3
  17. xinference/model/image/stable_diffusion/core.py +13 -4
  18. xinference/model/llm/__init__.py +4 -0
  19. xinference/model/llm/core.py +10 -0
  20. xinference/model/llm/llama_cpp/core.py +1 -0
  21. xinference/model/llm/llm_family.json +503 -21
  22. xinference/model/llm/llm_family.py +1 -0
  23. xinference/model/llm/mlx/core.py +52 -33
  24. xinference/model/llm/sglang/core.py +32 -55
  25. xinference/model/llm/tool_parsers/__init__.py +58 -0
  26. xinference/model/llm/tool_parsers/abstract_tool_parser.py +33 -0
  27. xinference/model/llm/tool_parsers/deepseek_r1_tool_parser.py +190 -0
  28. xinference/model/llm/tool_parsers/deepseek_v3_tool_parser.py +145 -0
  29. xinference/model/llm/tool_parsers/glm4_tool_parser.py +123 -0
  30. xinference/model/llm/tool_parsers/llama3_tool_parser.py +77 -0
  31. xinference/model/llm/tool_parsers/qwen_tool_parser.py +320 -0
  32. xinference/model/llm/transformers/core.py +1 -1
  33. xinference/model/llm/transformers/multimodal/qwen2_vl.py +34 -8
  34. xinference/model/llm/utils.py +138 -53
  35. xinference/model/llm/vllm/core.py +95 -78
  36. xinference/thirdparty/audiotools/__init__.py +10 -0
  37. xinference/thirdparty/audiotools/core/__init__.py +4 -0
  38. xinference/thirdparty/audiotools/core/audio_signal.py +1682 -0
  39. xinference/thirdparty/audiotools/core/display.py +194 -0
  40. xinference/thirdparty/audiotools/core/dsp.py +390 -0
  41. xinference/thirdparty/audiotools/core/effects.py +647 -0
  42. xinference/thirdparty/audiotools/core/ffmpeg.py +211 -0
  43. xinference/thirdparty/audiotools/core/loudness.py +320 -0
  44. xinference/thirdparty/audiotools/core/playback.py +252 -0
  45. xinference/thirdparty/audiotools/core/templates/__init__.py +0 -0
  46. xinference/thirdparty/audiotools/core/templates/headers.html +322 -0
  47. xinference/thirdparty/audiotools/core/templates/pandoc.css +407 -0
  48. xinference/thirdparty/audiotools/core/templates/widget.html +52 -0
  49. xinference/thirdparty/audiotools/core/util.py +671 -0
  50. xinference/thirdparty/audiotools/core/whisper.py +97 -0
  51. xinference/thirdparty/audiotools/data/__init__.py +3 -0
  52. xinference/thirdparty/audiotools/data/datasets.py +517 -0
  53. xinference/thirdparty/audiotools/data/preprocess.py +81 -0
  54. xinference/thirdparty/audiotools/data/transforms.py +1592 -0
  55. xinference/thirdparty/audiotools/metrics/__init__.py +6 -0
  56. xinference/thirdparty/audiotools/metrics/distance.py +131 -0
  57. xinference/thirdparty/audiotools/metrics/quality.py +159 -0
  58. xinference/thirdparty/audiotools/metrics/spectral.py +247 -0
  59. xinference/thirdparty/audiotools/ml/__init__.py +5 -0
  60. xinference/thirdparty/audiotools/ml/accelerator.py +184 -0
  61. xinference/thirdparty/audiotools/ml/decorators.py +440 -0
  62. xinference/thirdparty/audiotools/ml/experiment.py +90 -0
  63. xinference/thirdparty/audiotools/ml/layers/__init__.py +2 -0
  64. xinference/thirdparty/audiotools/ml/layers/base.py +328 -0
  65. xinference/thirdparty/audiotools/ml/layers/spectral_gate.py +127 -0
  66. xinference/thirdparty/audiotools/post.py +140 -0
  67. xinference/thirdparty/audiotools/preference.py +600 -0
  68. xinference/thirdparty/indextts/BigVGAN/ECAPA_TDNN.py +656 -0
  69. xinference/thirdparty/indextts/BigVGAN/__init__.py +0 -0
  70. xinference/thirdparty/indextts/BigVGAN/activations.py +122 -0
  71. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/__init__.py +0 -0
  72. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/.gitignore +1 -0
  73. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/__init__.py +0 -0
  74. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/activation1d.py +76 -0
  75. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/anti_alias_activation.cpp +23 -0
  76. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/anti_alias_activation_cuda.cu +256 -0
  77. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/compat.h +29 -0
  78. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/load.py +121 -0
  79. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/type_shim.h +92 -0
  80. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/torch/__init__.py +6 -0
  81. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/torch/act.py +31 -0
  82. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/torch/filter.py +102 -0
  83. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/torch/resample.py +58 -0
  84. xinference/thirdparty/indextts/BigVGAN/alias_free_torch/__init__.py +6 -0
  85. xinference/thirdparty/indextts/BigVGAN/alias_free_torch/act.py +29 -0
  86. xinference/thirdparty/indextts/BigVGAN/alias_free_torch/filter.py +96 -0
  87. xinference/thirdparty/indextts/BigVGAN/alias_free_torch/resample.py +49 -0
  88. xinference/thirdparty/indextts/BigVGAN/bigvgan.py +534 -0
  89. xinference/thirdparty/indextts/BigVGAN/models.py +451 -0
  90. xinference/thirdparty/indextts/BigVGAN/nnet/CNN.py +546 -0
  91. xinference/thirdparty/indextts/BigVGAN/nnet/__init__.py +0 -0
  92. xinference/thirdparty/indextts/BigVGAN/nnet/linear.py +89 -0
  93. xinference/thirdparty/indextts/BigVGAN/nnet/normalization.py +670 -0
  94. xinference/thirdparty/indextts/BigVGAN/utils.py +101 -0
  95. xinference/thirdparty/indextts/__init__.py +0 -0
  96. xinference/thirdparty/indextts/cli.py +65 -0
  97. xinference/thirdparty/indextts/gpt/__init__.py +0 -0
  98. xinference/thirdparty/indextts/gpt/conformer/__init__.py +0 -0
  99. xinference/thirdparty/indextts/gpt/conformer/attention.py +312 -0
  100. xinference/thirdparty/indextts/gpt/conformer/embedding.py +163 -0
  101. xinference/thirdparty/indextts/gpt/conformer/subsampling.py +348 -0
  102. xinference/thirdparty/indextts/gpt/conformer_encoder.py +520 -0
  103. xinference/thirdparty/indextts/gpt/model.py +713 -0
  104. xinference/thirdparty/indextts/gpt/model_v2.py +747 -0
  105. xinference/thirdparty/indextts/gpt/perceiver.py +317 -0
  106. xinference/thirdparty/indextts/gpt/transformers_beam_search.py +1013 -0
  107. xinference/thirdparty/indextts/gpt/transformers_generation_utils.py +4747 -0
  108. xinference/thirdparty/indextts/gpt/transformers_gpt2.py +1878 -0
  109. xinference/thirdparty/indextts/gpt/transformers_modeling_utils.py +5525 -0
  110. xinference/thirdparty/indextts/infer.py +690 -0
  111. xinference/thirdparty/indextts/infer_v2.py +739 -0
  112. xinference/thirdparty/indextts/s2mel/dac/__init__.py +16 -0
  113. xinference/thirdparty/indextts/s2mel/dac/__main__.py +36 -0
  114. xinference/thirdparty/indextts/s2mel/dac/model/__init__.py +4 -0
  115. xinference/thirdparty/indextts/s2mel/dac/model/base.py +294 -0
  116. xinference/thirdparty/indextts/s2mel/dac/model/dac.py +400 -0
  117. xinference/thirdparty/indextts/s2mel/dac/model/discriminator.py +228 -0
  118. xinference/thirdparty/indextts/s2mel/dac/model/encodec.py +320 -0
  119. xinference/thirdparty/indextts/s2mel/dac/nn/__init__.py +3 -0
  120. xinference/thirdparty/indextts/s2mel/dac/nn/layers.py +33 -0
  121. xinference/thirdparty/indextts/s2mel/dac/nn/loss.py +368 -0
  122. xinference/thirdparty/indextts/s2mel/dac/nn/quantize.py +339 -0
  123. xinference/thirdparty/indextts/s2mel/dac/utils/__init__.py +123 -0
  124. xinference/thirdparty/indextts/s2mel/dac/utils/decode.py +95 -0
  125. xinference/thirdparty/indextts/s2mel/dac/utils/encode.py +94 -0
  126. xinference/thirdparty/indextts/s2mel/hf_utils.py +12 -0
  127. xinference/thirdparty/indextts/s2mel/modules/alias_free_torch/__init__.py +5 -0
  128. xinference/thirdparty/indextts/s2mel/modules/alias_free_torch/act.py +29 -0
  129. xinference/thirdparty/indextts/s2mel/modules/alias_free_torch/filter.py +96 -0
  130. xinference/thirdparty/indextts/s2mel/modules/alias_free_torch/resample.py +57 -0
  131. xinference/thirdparty/indextts/s2mel/modules/audio.py +82 -0
  132. xinference/thirdparty/indextts/s2mel/modules/bigvgan/activations.py +120 -0
  133. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/__init__.py +0 -0
  134. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/activation1d.py +77 -0
  135. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/anti_alias_activation.cpp +23 -0
  136. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/anti_alias_activation_cuda.cu +246 -0
  137. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/compat.h +29 -0
  138. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/load.py +86 -0
  139. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/type_shim.h +92 -0
  140. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/torch/__init__.py +6 -0
  141. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/torch/act.py +30 -0
  142. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/torch/filter.py +101 -0
  143. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/torch/resample.py +58 -0
  144. xinference/thirdparty/indextts/s2mel/modules/bigvgan/bigvgan.py +492 -0
  145. xinference/thirdparty/indextts/s2mel/modules/bigvgan/config.json +63 -0
  146. xinference/thirdparty/indextts/s2mel/modules/bigvgan/env.py +18 -0
  147. xinference/thirdparty/indextts/s2mel/modules/bigvgan/meldataset.py +354 -0
  148. xinference/thirdparty/indextts/s2mel/modules/bigvgan/utils.py +99 -0
  149. xinference/thirdparty/indextts/s2mel/modules/campplus/DTDNN.py +115 -0
  150. xinference/thirdparty/indextts/s2mel/modules/campplus/classifier.py +70 -0
  151. xinference/thirdparty/indextts/s2mel/modules/campplus/layers.py +253 -0
  152. xinference/thirdparty/indextts/s2mel/modules/commons.py +632 -0
  153. xinference/thirdparty/indextts/s2mel/modules/diffusion_transformer.py +257 -0
  154. xinference/thirdparty/indextts/s2mel/modules/encodec.py +292 -0
  155. xinference/thirdparty/indextts/s2mel/modules/flow_matching.py +171 -0
  156. xinference/thirdparty/indextts/s2mel/modules/gpt_fast/generate.py +436 -0
  157. xinference/thirdparty/indextts/s2mel/modules/gpt_fast/model.py +360 -0
  158. xinference/thirdparty/indextts/s2mel/modules/gpt_fast/quantize.py +622 -0
  159. xinference/thirdparty/indextts/s2mel/modules/hifigan/f0_predictor.py +55 -0
  160. xinference/thirdparty/indextts/s2mel/modules/hifigan/generator.py +454 -0
  161. xinference/thirdparty/indextts/s2mel/modules/layers.py +354 -0
  162. xinference/thirdparty/indextts/s2mel/modules/length_regulator.py +141 -0
  163. xinference/thirdparty/indextts/s2mel/modules/openvoice/__init__.py +0 -0
  164. xinference/thirdparty/indextts/s2mel/modules/openvoice/api.py +186 -0
  165. xinference/thirdparty/indextts/s2mel/modules/openvoice/attentions.py +465 -0
  166. xinference/thirdparty/indextts/s2mel/modules/openvoice/checkpoints_v2/converter/config.json +57 -0
  167. xinference/thirdparty/indextts/s2mel/modules/openvoice/commons.py +160 -0
  168. xinference/thirdparty/indextts/s2mel/modules/openvoice/mel_processing.py +183 -0
  169. xinference/thirdparty/indextts/s2mel/modules/openvoice/models.py +499 -0
  170. xinference/thirdparty/indextts/s2mel/modules/openvoice/modules.py +598 -0
  171. xinference/thirdparty/indextts/s2mel/modules/openvoice/openvoice_app.py +275 -0
  172. xinference/thirdparty/indextts/s2mel/modules/openvoice/se_extractor.py +153 -0
  173. xinference/thirdparty/indextts/s2mel/modules/openvoice/transforms.py +209 -0
  174. xinference/thirdparty/indextts/s2mel/modules/openvoice/utils.py +194 -0
  175. xinference/thirdparty/indextts/s2mel/modules/quantize.py +229 -0
  176. xinference/thirdparty/indextts/s2mel/modules/rmvpe.py +631 -0
  177. xinference/thirdparty/indextts/s2mel/modules/vocos/__init__.py +4 -0
  178. xinference/thirdparty/indextts/s2mel/modules/vocos/heads.py +164 -0
  179. xinference/thirdparty/indextts/s2mel/modules/vocos/helpers.py +71 -0
  180. xinference/thirdparty/indextts/s2mel/modules/vocos/loss.py +114 -0
  181. xinference/thirdparty/indextts/s2mel/modules/vocos/models.py +118 -0
  182. xinference/thirdparty/indextts/s2mel/modules/vocos/modules.py +213 -0
  183. xinference/thirdparty/indextts/s2mel/modules/vocos/pretrained.py +51 -0
  184. xinference/thirdparty/indextts/s2mel/modules/vocos/spectral_ops.py +192 -0
  185. xinference/thirdparty/indextts/s2mel/modules/wavenet.py +174 -0
  186. xinference/thirdparty/indextts/s2mel/optimizers.py +96 -0
  187. xinference/thirdparty/indextts/s2mel/wav2vecbert_extract.py +148 -0
  188. xinference/thirdparty/indextts/utils/__init__.py +0 -0
  189. xinference/thirdparty/indextts/utils/arch_util.py +120 -0
  190. xinference/thirdparty/indextts/utils/checkpoint.py +34 -0
  191. xinference/thirdparty/indextts/utils/common.py +121 -0
  192. xinference/thirdparty/indextts/utils/feature_extractors.py +50 -0
  193. xinference/thirdparty/indextts/utils/front.py +536 -0
  194. xinference/thirdparty/indextts/utils/maskgct/models/codec/__init__.py +0 -0
  195. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/codec.py +427 -0
  196. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/__init__.py +11 -0
  197. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/factorized_vector_quantize.py +150 -0
  198. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/lookup_free_quantize.py +77 -0
  199. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/residual_vq.py +177 -0
  200. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/vector_quantize.py +401 -0
  201. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/vocos.py +881 -0
  202. xinference/thirdparty/indextts/utils/maskgct/models/codec/codec_dataset.py +264 -0
  203. xinference/thirdparty/indextts/utils/maskgct/models/codec/codec_inference.py +515 -0
  204. xinference/thirdparty/indextts/utils/maskgct/models/codec/codec_sampler.py +126 -0
  205. xinference/thirdparty/indextts/utils/maskgct/models/codec/codec_trainer.py +166 -0
  206. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/__init__.py +0 -0
  207. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/__init__.py +5 -0
  208. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/act.py +29 -0
  209. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/filter.py +96 -0
  210. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/resample.py +57 -0
  211. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/facodec_dataset.py +98 -0
  212. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/facodec_inference.py +137 -0
  213. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/facodec_trainer.py +776 -0
  214. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/JDC/__init__.py +1 -0
  215. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/JDC/bst.t7 +0 -0
  216. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/JDC/model.py +219 -0
  217. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/attentions.py +437 -0
  218. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/commons.py +331 -0
  219. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/gradient_reversal.py +35 -0
  220. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/layers.py +460 -0
  221. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/quantize.py +741 -0
  222. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/style_encoder.py +110 -0
  223. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/wavenet.py +224 -0
  224. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/optimizer.py +104 -0
  225. xinference/thirdparty/indextts/utils/maskgct/models/codec/kmeans/repcodec_model.py +210 -0
  226. xinference/thirdparty/indextts/utils/maskgct/models/codec/kmeans/vocos.py +850 -0
  227. xinference/thirdparty/indextts/utils/maskgct/models/codec/melvqgan/melspec.py +108 -0
  228. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/README.md +216 -0
  229. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/__init__.py +6 -0
  230. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/__init__.py +5 -0
  231. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/act.py +29 -0
  232. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/filter.py +96 -0
  233. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/resample.py +57 -0
  234. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/facodec.py +1222 -0
  235. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/gradient_reversal.py +35 -0
  236. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/melspec.py +102 -0
  237. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/quantize/__init__.py +7 -0
  238. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/quantize/fvq.py +116 -0
  239. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/quantize/rvq.py +87 -0
  240. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/transformer.py +234 -0
  241. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/model.py +184 -0
  242. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/__init__.py +27 -0
  243. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/conv.py +346 -0
  244. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/lstm.py +46 -0
  245. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/norm.py +37 -0
  246. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/__init__.py +14 -0
  247. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/ac.py +317 -0
  248. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/core_vq.py +388 -0
  249. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/distrib.py +135 -0
  250. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/vq.py +125 -0
  251. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/seanet.py +414 -0
  252. xinference/thirdparty/indextts/utils/maskgct/models/codec/vevo/vevo_repcodec.py +592 -0
  253. xinference/thirdparty/indextts/utils/maskgct/models/tts/maskgct/ckpt/wav2vec2bert_stats.pt +0 -0
  254. xinference/thirdparty/indextts/utils/maskgct/models/tts/maskgct/llama_nar.py +650 -0
  255. xinference/thirdparty/indextts/utils/maskgct/models/tts/maskgct/maskgct_s2a.py +503 -0
  256. xinference/thirdparty/indextts/utils/maskgct_utils.py +259 -0
  257. xinference/thirdparty/indextts/utils/text_utils.py +41 -0
  258. xinference/thirdparty/indextts/utils/typical_sampling.py +30 -0
  259. xinference/thirdparty/indextts/utils/utils.py +93 -0
  260. xinference/thirdparty/indextts/utils/webui_utils.py +42 -0
  261. xinference/thirdparty/indextts/utils/xtransformers.py +1247 -0
  262. xinference/thirdparty/indextts/vqvae/__init__.py +0 -0
  263. xinference/thirdparty/indextts/vqvae/xtts_dvae.py +395 -0
  264. xinference/types.py +105 -2
  265. xinference/ui/gradio/media_interface.py +66 -8
  266. xinference/ui/web/ui/build/asset-manifest.json +6 -6
  267. xinference/ui/web/ui/build/index.html +1 -1
  268. xinference/ui/web/ui/build/static/css/main.5ea97072.css +2 -0
  269. xinference/ui/web/ui/build/static/css/main.5ea97072.css.map +1 -0
  270. xinference/ui/web/ui/build/static/js/main.d192c4f3.js +3 -0
  271. xinference/ui/web/ui/build/static/js/{main.1086c759.js.LICENSE.txt → main.d192c4f3.js.LICENSE.txt} +0 -7
  272. xinference/ui/web/ui/build/static/js/main.d192c4f3.js.map +1 -0
  273. xinference/ui/web/ui/node_modules/.cache/babel-loader/089c38df5f52348d212ed868dda5c518a42e0c2762caed4175487c0405830c35.json +1 -0
  274. xinference/ui/web/ui/node_modules/.cache/babel-loader/2b6e3a5b6eb2c5c5f2d007e68cd46c372721cd52bf63508adcdb21ecf79241d8.json +1 -0
  275. xinference/ui/web/ui/node_modules/.cache/babel-loader/2d887825fd07a56f872eda4420da25fba0b5b62a23bdcc6c6da1a5281887f618.json +1 -0
  276. xinference/ui/web/ui/node_modules/.cache/babel-loader/4001f9c3e64e73a4f2158826650c174a59d5e3f89ddecddf17cbb6bb688cc4ca.json +1 -0
  277. xinference/ui/web/ui/node_modules/.cache/babel-loader/4a7018a69e6b7f90fc313248c2aa86f2a8f1eb1db120df586047a8023549b44b.json +1 -0
  278. xinference/ui/web/ui/node_modules/.cache/babel-loader/64b12aaa1c1d1bf53820ada8a63769067c0ccc5aab46b32348eb1917ae7f2a11.json +1 -0
  279. xinference/ui/web/ui/node_modules/.cache/babel-loader/7275b67c78ec76ce38a686bb8a576d8c9cecf54e1573614c84859d538efb9be5.json +1 -0
  280. xinference/ui/web/ui/node_modules/.cache/babel-loader/a68b6ee3b31eadc051fb95ce8f8ccb9c2e8b52c60f290dbab545a1917e065282.json +1 -0
  281. xinference/ui/web/ui/node_modules/.cache/babel-loader/ae8771cc37693feb160fa8727231312a0c54ef2d1d1ca893be568cd70016ca7e.json +1 -0
  282. xinference/ui/web/ui/node_modules/.cache/babel-loader/bb4e8722d2d41d87f1fce3661bc8937bffe9448e231fc5f0462630849e851592.json +1 -0
  283. xinference/ui/web/ui/node_modules/.cache/babel-loader/be6aada1ee4adc2bbf65dbe56d17db32bb3b5478be05d6b527805a8ba6cfb2b9.json +1 -0
  284. xinference/ui/web/ui/node_modules/.cache/babel-loader/de91c352653c233cf0cb6674e6e04049a44fd0e1156560de65d5c4620521391e.json +1 -0
  285. xinference/ui/web/ui/node_modules/.cache/babel-loader/e85f7002fc325c83b9c9cd8a1619e5b3ebc701d30e811afc284b88e6ae710cb5.json +1 -0
  286. xinference/ui/web/ui/node_modules/.cache/babel-loader/e8b603c78944bf3d213639078bfe155ff5c0dfa4048a93cbb967cad6a4eb4ff3.json +1 -0
  287. xinference/ui/web/ui/node_modules/.cache/babel-loader/f05535160a508b2a312de546a6de234776c613db276479ea4253c0b1bdeeb7d6.json +1 -0
  288. xinference/ui/web/ui/node_modules/.cache/babel-loader/f09ba9e11106bd59a0de10cc85c55084097729dcab575f43dfcf07375961ed87.json +1 -0
  289. xinference/ui/web/ui/node_modules/.cache/babel-loader/f995a2425dfb0822fd07127f66ffe9b026883bc156b402eb8bd0b83d52460a93.json +1 -0
  290. xinference/ui/web/ui/node_modules/.package-lock.json +0 -33
  291. xinference/ui/web/ui/package-lock.json +0 -34
  292. xinference/ui/web/ui/package.json +0 -1
  293. xinference/ui/web/ui/src/locales/en.json +9 -3
  294. xinference/ui/web/ui/src/locales/ja.json +9 -3
  295. xinference/ui/web/ui/src/locales/ko.json +9 -3
  296. xinference/ui/web/ui/src/locales/zh.json +9 -3
  297. {xinference-1.9.1.dist-info → xinference-1.10.1.dist-info}/METADATA +24 -4
  298. {xinference-1.9.1.dist-info → xinference-1.10.1.dist-info}/RECORD +302 -76
  299. xinference/ui/web/ui/build/static/css/main.013f296b.css +0 -2
  300. xinference/ui/web/ui/build/static/css/main.013f296b.css.map +0 -1
  301. xinference/ui/web/ui/build/static/js/main.1086c759.js +0 -3
  302. xinference/ui/web/ui/build/static/js/main.1086c759.js.map +0 -1
  303. xinference/ui/web/ui/node_modules/.cache/babel-loader/0b0f77000cc1b482ca091cfbcae511dfe02f08916971645fad21d0b1234d04a2.json +0 -1
  304. xinference/ui/web/ui/node_modules/.cache/babel-loader/1c5f8ff423a7c9202bea60b15680f04b1e9964b445b0da3f86c6ff70cf24e797.json +0 -1
  305. xinference/ui/web/ui/node_modules/.cache/babel-loader/44ce7993e344980e3ed4f13e8f69237d4a5dfc60e37ca6b54f51f8ee1357bd67.json +0 -1
  306. xinference/ui/web/ui/node_modules/.cache/babel-loader/4aec1cc414ac3ebb3481d3d915e4db597d9127de813291346eacb8554ab170d4.json +0 -1
  307. xinference/ui/web/ui/node_modules/.cache/babel-loader/644cfec52f3c57a6e222ce60f112237a1efefe9835efd9aad857a685f53d8eed.json +0 -1
  308. xinference/ui/web/ui/node_modules/.cache/babel-loader/663436f72af53fe0d72394f56d003fa4e0bba489e5bb4e483fd34b00f84637f7.json +0 -1
  309. xinference/ui/web/ui/node_modules/.cache/babel-loader/69db82ca9bfe27fe417cc6cf2b1716b09be9c6f0cd198530f12bfc60e801bbcf.json +0 -1
  310. xinference/ui/web/ui/node_modules/.cache/babel-loader/85087e27618d740c236bf159f30e0219db443ab55f0997388eed5fde6f9e90cc.json +0 -1
  311. xinference/ui/web/ui/node_modules/.cache/babel-loader/88b07838348864aa86c672be3bbca1e9f58f6f3a2881b32070ec27f4e7b449d1.json +0 -1
  312. xinference/ui/web/ui/node_modules/.cache/babel-loader/8b8cd408ccfbe115acef27ccfa5b233da8597131a2a5712add13e1e4d5d4504b.json +0 -1
  313. xinference/ui/web/ui/node_modules/.cache/babel-loader/a23824fe746b9c6ca5eee9159b5764d1ff1653c1d856288c0f75c742bbb0023b.json +0 -1
  314. xinference/ui/web/ui/node_modules/.cache/babel-loader/a3eb18af328280b139693c9092dff2a0ef8c9a967e6c8956ceee0996611f1984.json +0 -1
  315. xinference/ui/web/ui/node_modules/.cache/babel-loader/bc1aacc65a102db325ca61bcd2f681e1ae22c36a1f1d98a6ff5e4ad49dc7544f.json +0 -1
  316. xinference/ui/web/ui/node_modules/.cache/babel-loader/c682fd521747c19dae437d83ce3235a306ce6b68e24a117bc57c27ebb8d1f1ca.json +0 -1
  317. xinference/ui/web/ui/node_modules/.cache/babel-loader/d5c224be7081f18cba1678b7874a9782eba895df004874ff8f243f94ba79942a.json +0 -1
  318. xinference/ui/web/ui/node_modules/.cache/babel-loader/f7f18bfb539b036a6a342176dd98a85df5057a884a8da978d679f2a0264883d0.json +0 -1
  319. xinference/ui/web/ui/node_modules/clipboard/.babelrc.json +0 -11
  320. xinference/ui/web/ui/node_modules/clipboard/.eslintrc.json +0 -24
  321. xinference/ui/web/ui/node_modules/clipboard/.prettierrc.json +0 -9
  322. xinference/ui/web/ui/node_modules/clipboard/bower.json +0 -18
  323. xinference/ui/web/ui/node_modules/clipboard/composer.json +0 -25
  324. xinference/ui/web/ui/node_modules/clipboard/package.json +0 -63
  325. xinference/ui/web/ui/node_modules/delegate/package.json +0 -31
  326. xinference/ui/web/ui/node_modules/good-listener/bower.json +0 -11
  327. xinference/ui/web/ui/node_modules/good-listener/package.json +0 -35
  328. xinference/ui/web/ui/node_modules/select/bower.json +0 -13
  329. xinference/ui/web/ui/node_modules/select/package.json +0 -29
  330. xinference/ui/web/ui/node_modules/tiny-emitter/package.json +0 -53
  331. {xinference-1.9.1.dist-info → xinference-1.10.1.dist-info}/WHEEL +0 -0
  332. {xinference-1.9.1.dist-info → xinference-1.10.1.dist-info}/entry_points.txt +0 -0
  333. {xinference-1.9.1.dist-info → xinference-1.10.1.dist-info}/licenses/LICENSE +0 -0
  334. {xinference-1.9.1.dist-info → xinference-1.10.1.dist-info}/top_level.txt +0 -0
File without changes
@@ -0,0 +1,395 @@
1
+ import functools
2
+ from math import sqrt
3
+
4
+ import torch
5
+ import torch.distributed as distributed
6
+ import torch.nn as nn
7
+ import torch.nn.functional as F
8
+ import torchaudio
9
+ from einops import rearrange
10
+
11
+
12
+ def default(val, d):
13
+ return val if val is not None else d
14
+
15
+
16
+ def eval_decorator(fn):
17
+ def inner(model, *args, **kwargs):
18
+ was_training = model.training
19
+ model.eval()
20
+ out = fn(model, *args, **kwargs)
21
+ model.train(was_training)
22
+ return out
23
+
24
+ return inner
25
+
26
+
27
+ def dvae_wav_to_mel(
28
+ wav, mel_norms_file="../experiments/clips_mel_norms.pth", mel_norms=None, device=torch.device("cpu")
29
+ ):
30
+ mel_stft = torchaudio.transforms.MelSpectrogram(
31
+ n_fft=1024,
32
+ hop_length=256,
33
+ win_length=1024,
34
+ power=2,
35
+ normalized=False,
36
+ sample_rate=22050,
37
+ f_min=0,
38
+ f_max=8000,
39
+ n_mels=80,
40
+ norm="slaney",
41
+ ).to(device)
42
+ wav = wav.to(device)
43
+ mel = mel_stft(wav)
44
+ mel = torch.log(torch.clamp(mel, min=1e-5))
45
+ if mel_norms is None:
46
+ mel_norms = torch.load(mel_norms_file, map_location=device)
47
+ mel = mel / mel_norms.unsqueeze(0).unsqueeze(-1)
48
+ return mel
49
+
50
+
51
+ class Quantize(nn.Module):
52
+ def __init__(self, dim, n_embed, decay=0.99, eps=1e-5, balancing_heuristic=False, new_return_order=False):
53
+ super().__init__()
54
+
55
+ self.dim = dim
56
+ self.n_embed = n_embed
57
+ self.decay = decay
58
+ self.eps = eps
59
+
60
+ self.balancing_heuristic = balancing_heuristic
61
+ self.codes = None
62
+ self.max_codes = 64000
63
+ self.codes_full = False
64
+ self.new_return_order = new_return_order
65
+
66
+ embed = torch.randn(dim, n_embed)
67
+ self.register_buffer("embed", embed)
68
+ self.register_buffer("cluster_size", torch.zeros(n_embed))
69
+ self.register_buffer("embed_avg", embed.clone())
70
+
71
+ def forward(self, input, return_soft_codes=False):
72
+ if self.balancing_heuristic and self.codes_full:
73
+ h = torch.histc(self.codes, bins=self.n_embed, min=0, max=self.n_embed) / len(self.codes)
74
+ mask = torch.logical_or(h > 0.9, h < 0.01).unsqueeze(1)
75
+ ep = self.embed.permute(1, 0)
76
+ ea = self.embed_avg.permute(1, 0)
77
+ rand_embed = torch.randn_like(ep) * mask
78
+ self.embed = (ep * ~mask + rand_embed).permute(1, 0)
79
+ self.embed_avg = (ea * ~mask + rand_embed).permute(1, 0)
80
+ self.cluster_size = self.cluster_size * ~mask.squeeze()
81
+ if torch.any(mask):
82
+ print(f"Reset {torch.sum(mask)} embedding codes.")
83
+ self.codes = None
84
+ self.codes_full = False
85
+
86
+ flatten = input.reshape(-1, self.dim)
87
+ dist = flatten.pow(2).sum(1, keepdim=True) - 2 * flatten @ self.embed + self.embed.pow(2).sum(0, keepdim=True)
88
+ soft_codes = -dist
89
+ _, embed_ind = soft_codes.max(1)
90
+ embed_onehot = F.one_hot(embed_ind, self.n_embed).type(flatten.dtype)
91
+ embed_ind = embed_ind.view(*input.shape[:-1])
92
+ quantize = self.embed_code(embed_ind)
93
+
94
+ if self.balancing_heuristic:
95
+ if self.codes is None:
96
+ self.codes = embed_ind.flatten()
97
+ else:
98
+ self.codes = torch.cat([self.codes, embed_ind.flatten()])
99
+ if len(self.codes) > self.max_codes:
100
+ self.codes = self.codes[-self.max_codes :]
101
+ self.codes_full = True
102
+
103
+ if self.training:
104
+ embed_onehot_sum = embed_onehot.sum(0)
105
+ embed_sum = flatten.transpose(0, 1) @ embed_onehot
106
+
107
+ if distributed.is_initialized() and distributed.get_world_size() > 1:
108
+ distributed.all_reduce(embed_onehot_sum)
109
+ distributed.all_reduce(embed_sum)
110
+
111
+ self.cluster_size.data.mul_(self.decay).add_(embed_onehot_sum, alpha=1 - self.decay)
112
+ self.embed_avg.data.mul_(self.decay).add_(embed_sum, alpha=1 - self.decay)
113
+ n = self.cluster_size.sum()
114
+ cluster_size = (self.cluster_size + self.eps) / (n + self.n_embed * self.eps) * n
115
+ embed_normalized = self.embed_avg / cluster_size.unsqueeze(0)
116
+ self.embed.data.copy_(embed_normalized)
117
+
118
+ diff = (quantize.detach() - input).pow(2).mean()
119
+ quantize = input + (quantize - input).detach()
120
+
121
+ if return_soft_codes:
122
+ return quantize, diff, embed_ind, soft_codes.view(input.shape[:-1] + (-1,))
123
+ elif self.new_return_order:
124
+ return quantize, embed_ind, diff
125
+ else:
126
+ return quantize, diff, embed_ind
127
+
128
+ def embed_code(self, embed_id):
129
+ return F.embedding(embed_id, self.embed.transpose(0, 1))
130
+
131
+
132
+ # Fits a soft-discretized input to a normal-PDF across the specified dimension.
133
+ # In other words, attempts to force the discretization function to have a mean equal utilization across all discrete
134
+ # values with the specified expected variance.
135
+ class DiscretizationLoss(nn.Module):
136
+ def __init__(self, discrete_bins, dim, expected_variance, store_past=0):
137
+ super().__init__()
138
+ self.discrete_bins = discrete_bins
139
+ self.dim = dim
140
+ self.dist = torch.distributions.Normal(0, scale=expected_variance)
141
+ if store_past > 0:
142
+ self.record_past = True
143
+ self.register_buffer("accumulator_index", torch.zeros(1, dtype=torch.long, device="cpu"))
144
+ self.register_buffer("accumulator_filled", torch.zeros(1, dtype=torch.long, device="cpu"))
145
+ self.register_buffer("accumulator", torch.zeros(store_past, discrete_bins))
146
+ else:
147
+ self.record_past = False
148
+
149
+ def forward(self, x):
150
+ other_dims = set(range(len(x.shape))) - set([self.dim])
151
+ averaged = x.sum(dim=tuple(other_dims)) / x.sum()
152
+ averaged = averaged - averaged.mean()
153
+
154
+ if self.record_past:
155
+ acc_count = self.accumulator.shape[0]
156
+ avg = averaged.detach().clone()
157
+ if self.accumulator_filled > 0:
158
+ averaged = torch.mean(self.accumulator, dim=0) * (acc_count - 1) / acc_count + averaged / acc_count
159
+
160
+ # Also push averaged into the accumulator.
161
+ self.accumulator[self.accumulator_index] = avg
162
+ self.accumulator_index += 1
163
+ if self.accumulator_index >= acc_count:
164
+ self.accumulator_index *= 0
165
+ if self.accumulator_filled <= 0:
166
+ self.accumulator_filled += 1
167
+
168
+ return torch.sum(-self.dist.log_prob(averaged))
169
+
170
+
171
+ class ResBlock(nn.Module):
172
+ def __init__(self, chan, conv, activation):
173
+ super().__init__()
174
+ self.net = nn.Sequential(
175
+ conv(chan, chan, 3, padding=1),
176
+ activation(),
177
+ conv(chan, chan, 3, padding=1),
178
+ activation(),
179
+ conv(chan, chan, 1),
180
+ )
181
+
182
+ def forward(self, x):
183
+ return self.net(x) + x
184
+
185
+
186
+ class UpsampledConv(nn.Module):
187
+ def __init__(self, conv, *args, **kwargs):
188
+ super().__init__()
189
+ assert "stride" in kwargs.keys()
190
+ self.stride = kwargs["stride"]
191
+ del kwargs["stride"]
192
+ self.conv = conv(*args, **kwargs)
193
+
194
+ def forward(self, x):
195
+ up = nn.functional.interpolate(x, scale_factor=self.stride, mode="nearest")
196
+ return self.conv(up)
197
+
198
+
199
+ # DiscreteVAE partially derived from lucidrains DALLE implementation
200
+ # Credit: https://github.com/lucidrains/DALLE-pytorch
201
+ class DiscreteVAE(nn.Module):
202
+ def __init__(
203
+ self,
204
+ positional_dims=2,
205
+ num_tokens=512,
206
+ codebook_dim=512,
207
+ num_layers=3,
208
+ num_resnet_blocks=0,
209
+ hidden_dim=64,
210
+ channels=3,
211
+ stride=2,
212
+ kernel_size=4,
213
+ use_transposed_convs=True,
214
+ encoder_norm=False,
215
+ activation="relu",
216
+ smooth_l1_loss=False,
217
+ straight_through=False,
218
+ normalization=None, # ((0.5,) * 3, (0.5,) * 3),
219
+ record_codes=False,
220
+ discretization_loss_averaging_steps=100,
221
+ lr_quantizer_args={},
222
+ ):
223
+ super().__init__()
224
+ has_resblocks = num_resnet_blocks > 0
225
+
226
+ self.num_tokens = num_tokens
227
+ self.num_layers = num_layers
228
+ self.straight_through = straight_through
229
+ self.positional_dims = positional_dims
230
+ self.discrete_loss = DiscretizationLoss(
231
+ num_tokens, 2, 1 / (num_tokens * 2), discretization_loss_averaging_steps
232
+ )
233
+
234
+ assert positional_dims > 0 and positional_dims < 3 # This VAE only supports 1d and 2d inputs for now.
235
+ if positional_dims == 2:
236
+ conv = nn.Conv2d
237
+ conv_transpose = nn.ConvTranspose2d
238
+ else:
239
+ conv = nn.Conv1d
240
+ conv_transpose = nn.ConvTranspose1d
241
+ if not use_transposed_convs:
242
+ conv_transpose = functools.partial(UpsampledConv, conv)
243
+
244
+ if activation == "relu":
245
+ act = nn.ReLU
246
+ elif activation == "silu":
247
+ act = nn.SiLU
248
+ else:
249
+ assert NotImplementedError()
250
+
251
+ enc_layers = []
252
+ dec_layers = []
253
+
254
+ if num_layers > 0:
255
+ enc_chans = [hidden_dim * 2**i for i in range(num_layers)]
256
+ dec_chans = list(reversed(enc_chans))
257
+
258
+ enc_chans = [channels, *enc_chans]
259
+
260
+ dec_init_chan = codebook_dim if not has_resblocks else dec_chans[0]
261
+ dec_chans = [dec_init_chan, *dec_chans]
262
+
263
+ enc_chans_io, dec_chans_io = map(lambda t: list(zip(t[:-1], t[1:])), (enc_chans, dec_chans))
264
+
265
+ pad = (kernel_size - 1) // 2
266
+ for (enc_in, enc_out), (dec_in, dec_out) in zip(enc_chans_io, dec_chans_io):
267
+ enc_layers.append(nn.Sequential(conv(enc_in, enc_out, kernel_size, stride=stride, padding=pad), act()))
268
+ if encoder_norm:
269
+ enc_layers.append(nn.GroupNorm(8, enc_out))
270
+ dec_layers.append(
271
+ nn.Sequential(conv_transpose(dec_in, dec_out, kernel_size, stride=stride, padding=pad), act())
272
+ )
273
+ dec_out_chans = dec_chans[-1]
274
+ innermost_dim = dec_chans[0]
275
+ else:
276
+ enc_layers.append(nn.Sequential(conv(channels, hidden_dim, 1), act()))
277
+ dec_out_chans = hidden_dim
278
+ innermost_dim = hidden_dim
279
+
280
+ for _ in range(num_resnet_blocks):
281
+ dec_layers.insert(0, ResBlock(innermost_dim, conv, act))
282
+ enc_layers.append(ResBlock(innermost_dim, conv, act))
283
+
284
+ if num_resnet_blocks > 0:
285
+ dec_layers.insert(0, conv(codebook_dim, innermost_dim, 1))
286
+
287
+ enc_layers.append(conv(innermost_dim, codebook_dim, 1))
288
+ dec_layers.append(conv(dec_out_chans, channels, 1))
289
+
290
+ self.encoder = nn.Sequential(*enc_layers)
291
+ self.decoder = nn.Sequential(*dec_layers)
292
+
293
+ self.loss_fn = F.smooth_l1_loss if smooth_l1_loss else F.mse_loss
294
+ self.codebook = Quantize(codebook_dim, num_tokens, new_return_order=True)
295
+
296
+ # take care of normalization within class
297
+ self.normalization = normalization
298
+ self.record_codes = record_codes
299
+ if record_codes:
300
+ self.codes = torch.zeros((1228800,), dtype=torch.long)
301
+ self.code_ind = 0
302
+ self.total_codes = 0
303
+ self.internal_step = 0
304
+
305
+ def norm(self, images):
306
+ if not self.normalization is not None:
307
+ return images
308
+
309
+ means, stds = map(lambda t: torch.as_tensor(t).to(images), self.normalization)
310
+ arrange = "c -> () c () ()" if self.positional_dims == 2 else "c -> () c ()"
311
+ means, stds = map(lambda t: rearrange(t, arrange), (means, stds))
312
+ images = images.clone()
313
+ images.sub_(means).div_(stds)
314
+ return images
315
+
316
+ def get_debug_values(self, step, __):
317
+ if self.record_codes and self.total_codes > 0:
318
+ # Report annealing schedule
319
+ return {"histogram_codes": self.codes[: self.total_codes]}
320
+ else:
321
+ return {}
322
+
323
+ @torch.no_grad()
324
+ @eval_decorator
325
+ def get_codebook_indices(self, images):
326
+ img = self.norm(images)
327
+ logits = self.encoder(img).permute((0, 2, 3, 1) if len(img.shape) == 4 else (0, 2, 1))
328
+ sampled, codes, _ = self.codebook(logits)
329
+ self.log_codes(codes)
330
+ return codes
331
+
332
+ def decode(self, img_seq):
333
+ self.log_codes(img_seq)
334
+ if hasattr(self.codebook, "embed_code"):
335
+ image_embeds = self.codebook.embed_code(img_seq)
336
+ else:
337
+ image_embeds = F.embedding(img_seq, self.codebook.codebook)
338
+ b, n, d = image_embeds.shape
339
+
340
+ kwargs = {}
341
+ if self.positional_dims == 1:
342
+ arrange = "b n d -> b d n"
343
+ else:
344
+ h = w = int(sqrt(n))
345
+ arrange = "b (h w) d -> b d h w"
346
+ kwargs = {"h": h, "w": w}
347
+ image_embeds = rearrange(image_embeds, arrange, **kwargs)
348
+ images = [image_embeds]
349
+ for layer in self.decoder:
350
+ images.append(layer(images[-1]))
351
+ return images[-1], images[-2]
352
+
353
+ def infer(self, img):
354
+ img = self.norm(img)
355
+ logits = self.encoder(img).permute((0, 2, 3, 1) if len(img.shape) == 4 else (0, 2, 1))
356
+ sampled, codes, commitment_loss = self.codebook(logits)
357
+ return self.decode(codes)
358
+
359
+ # Note: This module is not meant to be run in forward() except while training. It has special logic which performs
360
+ # evaluation using quantized values when it detects that it is being run in eval() mode, which will be substantially
361
+ # more lossy (but useful for determining network performance).
362
+ def forward(self, img):
363
+ img = self.norm(img)
364
+ logits = self.encoder(img).permute((0, 2, 3, 1) if len(img.shape) == 4 else (0, 2, 1))
365
+ sampled, codes, commitment_loss = self.codebook(logits)
366
+ sampled = sampled.permute((0, 3, 1, 2) if len(img.shape) == 4 else (0, 2, 1))
367
+
368
+ if self.training:
369
+ out = sampled
370
+ for d in self.decoder:
371
+ out = d(out)
372
+ self.log_codes(codes)
373
+ else:
374
+ # This is non-differentiable, but gives a better idea of how the network is actually performing.
375
+ out, _ = self.decode(codes)
376
+
377
+ # reconstruction loss
378
+ out = out[..., :img.shape[-1]]
379
+ recon_loss = self.loss_fn(img, out, reduction="mean")
380
+ ssim_loss = torch.zeros(size=(1,)).cuda()
381
+
382
+ return recon_loss, ssim_loss, commitment_loss, out
383
+
384
+ def log_codes(self, codes):
385
+ # This is so we can debug the distribution of codes being learned.
386
+ if self.record_codes and self.internal_step % 10 == 0:
387
+ codes = codes.flatten()
388
+ l = codes.shape[0]
389
+ i = self.code_ind if (self.codes.shape[0] - self.code_ind) > l else self.codes.shape[0] - l
390
+ self.codes[i : i + l] = codes.cpu()
391
+ self.code_ind = self.code_ind + l
392
+ if self.code_ind >= self.codes.shape[0]:
393
+ self.code_ind = 0
394
+ self.total_codes += 1
395
+ self.internal_step += 1
xinference/types.py CHANGED
@@ -351,6 +351,11 @@ class ModelAndPrompt(BaseModel):
351
351
  prompt: str
352
352
 
353
353
 
354
+ class ModelAndMessages(BaseModel):
355
+ model: str
356
+ messages: List[Dict[str, Any]]
357
+
358
+
354
359
  class CreateCompletionTorch(BaseModel):
355
360
  echo: bool = echo_field
356
361
  max_tokens: Optional[int] = max_tokens_field
@@ -371,7 +376,6 @@ class CreateCompletionTorch(BaseModel):
371
376
  # This type is for openai API compatibility
372
377
  CreateCompletionOpenAI: BaseModel
373
378
 
374
-
375
379
  from openai.types.completion_create_params import CompletionCreateParamsNonStreaming
376
380
 
377
381
  CreateCompletionOpenAI = create_model_from_typeddict(
@@ -395,7 +399,6 @@ class CreateChatModel(BaseModel):
395
399
  # Currently, chat calls generates, so the params share the same one.
396
400
  CreateChatCompletionTorch = CreateCompletionTorch
397
401
 
398
-
399
402
  from ._compat import CreateChatCompletionOpenAI
400
403
 
401
404
 
@@ -462,3 +465,103 @@ class PeftModelConfig:
462
465
  image_lora_load_kwargs=data.get("image_lora_load_kwargs"),
463
466
  image_lora_fuse_kwargs=data.get("image_lora_fuse_kwargs"),
464
467
  )
468
+
469
+
470
+ # This type is for Anthropic API compatibility
471
+ ANTHROPIC_AVAILABLE = False
472
+
473
+ try:
474
+ from anthropic.types import ContentBlock, Usage
475
+
476
+ ANTHROPIC_AVAILABLE = True
477
+ except ImportError:
478
+ ContentBlock = None
479
+ Usage = None
480
+
481
+ # Use TYPE_CHECKING to avoid runtime issues with mypy
482
+ from typing import TYPE_CHECKING
483
+
484
+ if TYPE_CHECKING:
485
+ # For type checking, define the types as if Anthropic is available
486
+ from anthropic.types import ContentBlock as ContentBlock_
487
+ from anthropic.types import Usage as Usage_
488
+
489
+ class AnthropicMessage(TypedDict):
490
+ id: str
491
+ type: str
492
+ role: str
493
+ content: List[ContentBlock_]
494
+ model: str
495
+ stop_reason: str
496
+ stop_sequence: str
497
+ usage: Usage_
498
+ container: Dict[str, Any]
499
+
500
+ class MessageCreateParams(TypedDict):
501
+ model: str
502
+ messages: List[Dict[str, Any]]
503
+ max_tokens: int
504
+ stream: NotRequired[bool]
505
+ temperature: NotRequired[float]
506
+ top_p: NotRequired[float]
507
+ top_k: NotRequired[int]
508
+ stop_sequences: NotRequired[List[str]]
509
+ metadata: NotRequired[Dict[str, Any]]
510
+ tools: NotRequired[List[Dict[str, Any]]]
511
+ tool_choice: NotRequired[Union[str, Dict[str, Any]]]
512
+
513
+ CreateMessageAnthropic: BaseModel
514
+
515
+ class CreateMessage(
516
+ ModelAndMessages,
517
+ ):
518
+ pass
519
+
520
+ else:
521
+ # Runtime definitions
522
+ if ANTHROPIC_AVAILABLE:
523
+
524
+ class AnthropicMessage(TypedDict):
525
+ id: str
526
+ type: str
527
+ role: str
528
+ content: List[ContentBlock]
529
+ model: str
530
+ stop_reason: str
531
+ stop_sequence: str
532
+ usage: Usage
533
+ container: Dict[str, Any]
534
+
535
+ class MessageCreateParams(TypedDict):
536
+ model: str
537
+ messages: List[Dict[str, Any]]
538
+ max_tokens: int
539
+ stream: NotRequired[bool]
540
+ temperature: NotRequired[float]
541
+ top_p: NotRequired[float]
542
+ top_k: NotRequired[int]
543
+ stop_sequences: NotRequired[List[str]]
544
+ metadata: NotRequired[Dict[str, Any]]
545
+ tools: NotRequired[List[Dict[str, Any]]]
546
+ tool_choice: NotRequired[Union[str, Dict[str, Any]]]
547
+
548
+ CreateMessageAnthropic: BaseModel = create_model_from_typeddict(
549
+ MessageCreateParams,
550
+ )
551
+ CreateMessageAnthropic = fix_forward_ref(CreateMessageAnthropic)
552
+
553
+ class CreateMessage(CreateMessageAnthropic):
554
+ pass
555
+
556
+ else:
557
+ # Define dummy types when Anthropic is not available
558
+ class AnthropicMessage:
559
+ pass
560
+
561
+ class MessageCreateParams:
562
+ pass
563
+
564
+ CreateMessageAnthropic = None
565
+
566
+ class CreateMessage:
567
+ pass
@@ -217,7 +217,7 @@ class MediaInterface:
217
217
  def image_generate_image(
218
218
  prompt: str,
219
219
  negative_prompt: str,
220
- image: PIL.Image.Image,
220
+ images: Optional[List[PIL.Image.Image]],
221
221
  n: int,
222
222
  size_width: int,
223
223
  size_height: int,
@@ -250,8 +250,21 @@ class MediaInterface:
250
250
  kwargs["strength"] = strength
251
251
  sampler_name = None if sampler_name == "default" else sampler_name
252
252
 
253
- bio = io.BytesIO()
254
- image.save(bio, format="png")
253
+ # Handle single image or multiple images
254
+ if images is None:
255
+ raise ValueError("Please upload at least one image")
256
+
257
+ # Process uploaded files to get PIL images
258
+ processed_images = process_uploaded_files(images)
259
+ if processed_images is None:
260
+ raise ValueError("Please upload at least one image")
261
+
262
+ # Convert all images to bytes
263
+ image_bytes_list = []
264
+ for img in processed_images:
265
+ bio = io.BytesIO()
266
+ img.save(bio, format="png")
267
+ image_bytes_list.append(bio.getvalue())
255
268
 
256
269
  response = None
257
270
  exc = None
@@ -265,7 +278,7 @@ class MediaInterface:
265
278
  prompt=prompt,
266
279
  negative_prompt=negative_prompt,
267
280
  n=n,
268
- image=bio.getvalue(),
281
+ image=image_bytes_list,
269
282
  size=size,
270
283
  response_format="b64_json",
271
284
  num_inference_steps=num_inference_steps,
@@ -300,7 +313,7 @@ class MediaInterface:
300
313
 
301
314
  return images
302
315
 
303
- with gr.Blocks() as image2image_inteface:
316
+ with gr.Blocks() as image2image_interface:
304
317
  with gr.Column():
305
318
  with gr.Row():
306
319
  with gr.Column(scale=10):
@@ -341,16 +354,61 @@ class MediaInterface:
341
354
 
342
355
  with gr.Row():
343
356
  with gr.Column(scale=1):
344
- uploaded_image = gr.Image(type="pil", label="Upload Image")
357
+ gr.Markdown("### Upload Images")
358
+ gr.Markdown(
359
+ "*Multiple images supported for image-to-image generation*"
360
+ )
361
+ uploaded_images = gr.File(
362
+ file_count="multiple",
363
+ file_types=["image"],
364
+ label="Upload Images",
365
+ )
366
+ image_preview = gr.Gallery(label="Image Preview", height=300)
345
367
  with gr.Column(scale=1):
346
368
  output_gallery = gr.Gallery()
347
369
 
370
+ # Function to handle file uploads and convert to PIL images
371
+ def process_uploaded_files(files):
372
+ if files is None:
373
+ return None
374
+
375
+ images = []
376
+ for file_info in files:
377
+ if isinstance(file_info, dict) and "name" in file_info:
378
+ # Handle file info format from gradio
379
+ file_path = file_info["name"]
380
+ try:
381
+ img = PIL.Image.open(file_path)
382
+ images.append(img)
383
+ except Exception as e:
384
+ logger.warning(f"Failed to load image {file_path}: {e}")
385
+ elif hasattr(file_info, "name"):
386
+ # Handle file object
387
+ try:
388
+ img = PIL.Image.open(file_info.name)
389
+ images.append(img)
390
+ except Exception as e:
391
+ logger.warning(
392
+ f"Failed to load image {file_info.name}: {e}"
393
+ )
394
+
395
+ return images if images else None
396
+
397
+ # Update gallery when files are uploaded
398
+ def update_gallery(files):
399
+ images = process_uploaded_files(files)
400
+ return images if images else []
401
+
402
+ uploaded_images.change(
403
+ update_gallery, inputs=[uploaded_images], outputs=[image_preview]
404
+ )
405
+
348
406
  generate_button.click(
349
407
  image_generate_image,
350
408
  inputs=[
351
409
  prompt,
352
410
  negative_prompt,
353
- uploaded_image,
411
+ uploaded_images,
354
412
  n,
355
413
  size_width,
356
414
  size_height,
@@ -362,7 +420,7 @@ class MediaInterface:
362
420
  ],
363
421
  outputs=output_gallery,
364
422
  )
365
- return image2image_inteface
423
+ return image2image_interface
366
424
 
367
425
  def inpainting_interface(self) -> "gr.Blocks":
368
426
  from ...model.image.stable_diffusion.core import SAMPLING_METHODS
@@ -1,14 +1,14 @@
1
1
  {
2
2
  "files": {
3
- "main.css": "./static/css/main.013f296b.css",
4
- "main.js": "./static/js/main.1086c759.js",
3
+ "main.css": "./static/css/main.5ea97072.css",
4
+ "main.js": "./static/js/main.d192c4f3.js",
5
5
  "static/media/icon.webp": "./static/media/icon.4603d52c63041e5dfbfd.webp",
6
6
  "index.html": "./index.html",
7
- "main.013f296b.css.map": "./static/css/main.013f296b.css.map",
8
- "main.1086c759.js.map": "./static/js/main.1086c759.js.map"
7
+ "main.5ea97072.css.map": "./static/css/main.5ea97072.css.map",
8
+ "main.d192c4f3.js.map": "./static/js/main.d192c4f3.js.map"
9
9
  },
10
10
  "entrypoints": [
11
- "static/css/main.013f296b.css",
12
- "static/js/main.1086c759.js"
11
+ "static/css/main.5ea97072.css",
12
+ "static/js/main.d192c4f3.js"
13
13
  ]
14
14
  }
@@ -1 +1 @@
1
- <!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.svg"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="Web site created using create-react-app"/><link rel="apple-touch-icon" href="./logo192.png"/><link rel="manifest" href="./manifest.json"/><title>Xinference</title><script defer="defer" src="./static/js/main.1086c759.js"></script><link href="./static/css/main.013f296b.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>
1
+ <!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.svg"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="Web site created using create-react-app"/><link rel="apple-touch-icon" href="./logo192.png"/><link rel="manifest" href="./manifest.json"/><title>Xinference</title><script defer="defer" src="./static/js/main.d192c4f3.js"></script><link href="./static/css/main.5ea97072.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>
@@ -0,0 +1,2 @@
1
+ .container{border-radius:20px!important;cursor:pointer;display:block}.container,.descriptionCard{height:300px;position:relative;width:300px}.descriptionCard{border-radius:20px;flex-direction:column;left:-1px;padding:20px;top:-1px}.cardTitle,.descriptionCard{display:flex;justify-content:space-between}.iconButtonBox{align-items:center;display:flex}.drawerCard{display:flex;flex-direction:column;min-height:100%;min-width:350px;overflow-y:scroll;padding:20px 80px 100px;position:relative;width:60vw}.pasteText{color:#1976d2;cursor:pointer;font-size:18px!important;margin-inline:10px}.pasteText:hover{color:#1976d2b3}.copyToCommandLine{color:#1976d2;cursor:pointer;font-size:16px!important}.copyToCommandLine:hover{color:#1976d2b3}.css-1be5mm1-MuiLinearProgress-root-MuiMobileStepper-progress,.css-r5rjnf-MuiLinearProgress-root-MuiMobileStepper-progress{width:100%!important}.pathBox{cursor:pointer;overflow:hidden;text-overflow:ellipsis;white-space:nowrap;width:160px}.pathBox2{width:300px}.empty{color:#555;font-size:20px;left:50%;position:absolute;top:30%;-webkit-transform:translate(-50%);transform:translate(-50%)}.deleteDialog{align-items:center;display:flex}.warningIcon{color:#ed6c02;margin-right:10px}.textHighlight .MuiInputBase-input,.textHighlight .MuiSelect-select{color:#1976d2}.formBox{max-height:80vh;max-width:50vw;min-width:50vw;overflow:auto;padding:40px 20px 0 0;position:relative;transition:all .4s ease-in-out}.broaden{max-width:100%;min-width:100%;padding-right:0}.show-json{align-items:center;display:flex;right:60px;top:90px}.icon,.show-json{position:absolute}.icon{cursor:pointer;margin-left:20px;right:-40px}.icon:hover{color:#1976d2}.arrow{font-size:24px!important}.jsonBox{min-height:80vh;position:relative;transition:all .4s ease-in-out;width:100%}.hide{overflow:hidden;-webkit-transform:translate(30vw);transform:translate(30vw);width:0}.checkboxWrapper{align-items:center;display:flex;flex-wrap:wrap;width:100%}.jsonBox-header{align-items:center;display:flex;justify-content:space-between}.jsonBox-title{font-weight:700;line-height:40px}.textarea{background-color:initial;border:1px solid #ddd;border-radius:5px;color:#666;height:calc(100% - 40px);padding:5px 10px;resize:none;width:100%}.addBtn{margin-left:20px!important}.item{border:1px solid #ddd;border-radius:10px;margin:10px 50px 0;overflow:hidden;padding:20px;position:relative}.item:hover .deleteBtn{-webkit-transform:translateX(-50px);transform:translateX(-50px)}.deleteBtn{background-color:#1976d2;border-radius:25px;height:50px;line-height:70px;position:absolute;right:20px;text-align:center;top:calc(50% - 25px);-webkit-transform:translateX(80px);transform:translateX(80px);transition:all .3s ease-in-out;width:50px}.deleteBtn:hover{box-shadow:0 0 10px #aaa;cursor:pointer}.deleteIcon{color:#fff;font-size:28px!important}.chat_template_box{align-items:start;display:flex;gap:10px}.chat_template_test{width:30%}.chat_template_test_mainBox{border:1px solid #ccc;border-radius:4px;height:137px;overflow:scroll;padding:10px}.chat_template_test_tip{color:rgba(0,0,0,.6);font-size:10px;margin:4px 14px 0}.test_res_box{border:1px solid #ddd;border-radius:4px;margin-top:5px;min-height:55px;padding:10px}.css-19qh8xo-MuiInputBase-input-MuiOutlinedInput-input.Mui-disabled{-webkit-text-fill-color:#000!important}
2
+ /*# sourceMappingURL=main.5ea97072.css.map*/