xinference 1.9.1__py3-none-any.whl → 1.10.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (334) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/restful_api.py +400 -3
  3. xinference/client/restful/async_restful_client.py +20 -3
  4. xinference/client/restful/restful_client.py +20 -3
  5. xinference/constants.py +2 -0
  6. xinference/core/supervisor.py +111 -49
  7. xinference/core/worker.py +10 -0
  8. xinference/deploy/cmdline.py +15 -0
  9. xinference/model/audio/core.py +26 -6
  10. xinference/model/audio/indextts2.py +166 -0
  11. xinference/model/audio/kokoro.py +1 -1
  12. xinference/model/audio/kokoro_zh.py +124 -0
  13. xinference/model/audio/model_spec.json +58 -1
  14. xinference/model/embedding/sentence_transformers/core.py +4 -4
  15. xinference/model/embedding/vllm/core.py +7 -1
  16. xinference/model/image/model_spec.json +71 -3
  17. xinference/model/image/stable_diffusion/core.py +13 -4
  18. xinference/model/llm/__init__.py +4 -0
  19. xinference/model/llm/core.py +10 -0
  20. xinference/model/llm/llama_cpp/core.py +1 -0
  21. xinference/model/llm/llm_family.json +503 -21
  22. xinference/model/llm/llm_family.py +1 -0
  23. xinference/model/llm/mlx/core.py +52 -33
  24. xinference/model/llm/sglang/core.py +32 -55
  25. xinference/model/llm/tool_parsers/__init__.py +58 -0
  26. xinference/model/llm/tool_parsers/abstract_tool_parser.py +33 -0
  27. xinference/model/llm/tool_parsers/deepseek_r1_tool_parser.py +190 -0
  28. xinference/model/llm/tool_parsers/deepseek_v3_tool_parser.py +145 -0
  29. xinference/model/llm/tool_parsers/glm4_tool_parser.py +123 -0
  30. xinference/model/llm/tool_parsers/llama3_tool_parser.py +77 -0
  31. xinference/model/llm/tool_parsers/qwen_tool_parser.py +320 -0
  32. xinference/model/llm/transformers/core.py +1 -1
  33. xinference/model/llm/transformers/multimodal/qwen2_vl.py +34 -8
  34. xinference/model/llm/utils.py +138 -53
  35. xinference/model/llm/vllm/core.py +95 -78
  36. xinference/thirdparty/audiotools/__init__.py +10 -0
  37. xinference/thirdparty/audiotools/core/__init__.py +4 -0
  38. xinference/thirdparty/audiotools/core/audio_signal.py +1682 -0
  39. xinference/thirdparty/audiotools/core/display.py +194 -0
  40. xinference/thirdparty/audiotools/core/dsp.py +390 -0
  41. xinference/thirdparty/audiotools/core/effects.py +647 -0
  42. xinference/thirdparty/audiotools/core/ffmpeg.py +211 -0
  43. xinference/thirdparty/audiotools/core/loudness.py +320 -0
  44. xinference/thirdparty/audiotools/core/playback.py +252 -0
  45. xinference/thirdparty/audiotools/core/templates/__init__.py +0 -0
  46. xinference/thirdparty/audiotools/core/templates/headers.html +322 -0
  47. xinference/thirdparty/audiotools/core/templates/pandoc.css +407 -0
  48. xinference/thirdparty/audiotools/core/templates/widget.html +52 -0
  49. xinference/thirdparty/audiotools/core/util.py +671 -0
  50. xinference/thirdparty/audiotools/core/whisper.py +97 -0
  51. xinference/thirdparty/audiotools/data/__init__.py +3 -0
  52. xinference/thirdparty/audiotools/data/datasets.py +517 -0
  53. xinference/thirdparty/audiotools/data/preprocess.py +81 -0
  54. xinference/thirdparty/audiotools/data/transforms.py +1592 -0
  55. xinference/thirdparty/audiotools/metrics/__init__.py +6 -0
  56. xinference/thirdparty/audiotools/metrics/distance.py +131 -0
  57. xinference/thirdparty/audiotools/metrics/quality.py +159 -0
  58. xinference/thirdparty/audiotools/metrics/spectral.py +247 -0
  59. xinference/thirdparty/audiotools/ml/__init__.py +5 -0
  60. xinference/thirdparty/audiotools/ml/accelerator.py +184 -0
  61. xinference/thirdparty/audiotools/ml/decorators.py +440 -0
  62. xinference/thirdparty/audiotools/ml/experiment.py +90 -0
  63. xinference/thirdparty/audiotools/ml/layers/__init__.py +2 -0
  64. xinference/thirdparty/audiotools/ml/layers/base.py +328 -0
  65. xinference/thirdparty/audiotools/ml/layers/spectral_gate.py +127 -0
  66. xinference/thirdparty/audiotools/post.py +140 -0
  67. xinference/thirdparty/audiotools/preference.py +600 -0
  68. xinference/thirdparty/indextts/BigVGAN/ECAPA_TDNN.py +656 -0
  69. xinference/thirdparty/indextts/BigVGAN/__init__.py +0 -0
  70. xinference/thirdparty/indextts/BigVGAN/activations.py +122 -0
  71. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/__init__.py +0 -0
  72. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/.gitignore +1 -0
  73. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/__init__.py +0 -0
  74. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/activation1d.py +76 -0
  75. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/anti_alias_activation.cpp +23 -0
  76. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/anti_alias_activation_cuda.cu +256 -0
  77. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/compat.h +29 -0
  78. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/load.py +121 -0
  79. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/type_shim.h +92 -0
  80. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/torch/__init__.py +6 -0
  81. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/torch/act.py +31 -0
  82. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/torch/filter.py +102 -0
  83. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/torch/resample.py +58 -0
  84. xinference/thirdparty/indextts/BigVGAN/alias_free_torch/__init__.py +6 -0
  85. xinference/thirdparty/indextts/BigVGAN/alias_free_torch/act.py +29 -0
  86. xinference/thirdparty/indextts/BigVGAN/alias_free_torch/filter.py +96 -0
  87. xinference/thirdparty/indextts/BigVGAN/alias_free_torch/resample.py +49 -0
  88. xinference/thirdparty/indextts/BigVGAN/bigvgan.py +534 -0
  89. xinference/thirdparty/indextts/BigVGAN/models.py +451 -0
  90. xinference/thirdparty/indextts/BigVGAN/nnet/CNN.py +546 -0
  91. xinference/thirdparty/indextts/BigVGAN/nnet/__init__.py +0 -0
  92. xinference/thirdparty/indextts/BigVGAN/nnet/linear.py +89 -0
  93. xinference/thirdparty/indextts/BigVGAN/nnet/normalization.py +670 -0
  94. xinference/thirdparty/indextts/BigVGAN/utils.py +101 -0
  95. xinference/thirdparty/indextts/__init__.py +0 -0
  96. xinference/thirdparty/indextts/cli.py +65 -0
  97. xinference/thirdparty/indextts/gpt/__init__.py +0 -0
  98. xinference/thirdparty/indextts/gpt/conformer/__init__.py +0 -0
  99. xinference/thirdparty/indextts/gpt/conformer/attention.py +312 -0
  100. xinference/thirdparty/indextts/gpt/conformer/embedding.py +163 -0
  101. xinference/thirdparty/indextts/gpt/conformer/subsampling.py +348 -0
  102. xinference/thirdparty/indextts/gpt/conformer_encoder.py +520 -0
  103. xinference/thirdparty/indextts/gpt/model.py +713 -0
  104. xinference/thirdparty/indextts/gpt/model_v2.py +747 -0
  105. xinference/thirdparty/indextts/gpt/perceiver.py +317 -0
  106. xinference/thirdparty/indextts/gpt/transformers_beam_search.py +1013 -0
  107. xinference/thirdparty/indextts/gpt/transformers_generation_utils.py +4747 -0
  108. xinference/thirdparty/indextts/gpt/transformers_gpt2.py +1878 -0
  109. xinference/thirdparty/indextts/gpt/transformers_modeling_utils.py +5525 -0
  110. xinference/thirdparty/indextts/infer.py +690 -0
  111. xinference/thirdparty/indextts/infer_v2.py +739 -0
  112. xinference/thirdparty/indextts/s2mel/dac/__init__.py +16 -0
  113. xinference/thirdparty/indextts/s2mel/dac/__main__.py +36 -0
  114. xinference/thirdparty/indextts/s2mel/dac/model/__init__.py +4 -0
  115. xinference/thirdparty/indextts/s2mel/dac/model/base.py +294 -0
  116. xinference/thirdparty/indextts/s2mel/dac/model/dac.py +400 -0
  117. xinference/thirdparty/indextts/s2mel/dac/model/discriminator.py +228 -0
  118. xinference/thirdparty/indextts/s2mel/dac/model/encodec.py +320 -0
  119. xinference/thirdparty/indextts/s2mel/dac/nn/__init__.py +3 -0
  120. xinference/thirdparty/indextts/s2mel/dac/nn/layers.py +33 -0
  121. xinference/thirdparty/indextts/s2mel/dac/nn/loss.py +368 -0
  122. xinference/thirdparty/indextts/s2mel/dac/nn/quantize.py +339 -0
  123. xinference/thirdparty/indextts/s2mel/dac/utils/__init__.py +123 -0
  124. xinference/thirdparty/indextts/s2mel/dac/utils/decode.py +95 -0
  125. xinference/thirdparty/indextts/s2mel/dac/utils/encode.py +94 -0
  126. xinference/thirdparty/indextts/s2mel/hf_utils.py +12 -0
  127. xinference/thirdparty/indextts/s2mel/modules/alias_free_torch/__init__.py +5 -0
  128. xinference/thirdparty/indextts/s2mel/modules/alias_free_torch/act.py +29 -0
  129. xinference/thirdparty/indextts/s2mel/modules/alias_free_torch/filter.py +96 -0
  130. xinference/thirdparty/indextts/s2mel/modules/alias_free_torch/resample.py +57 -0
  131. xinference/thirdparty/indextts/s2mel/modules/audio.py +82 -0
  132. xinference/thirdparty/indextts/s2mel/modules/bigvgan/activations.py +120 -0
  133. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/__init__.py +0 -0
  134. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/activation1d.py +77 -0
  135. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/anti_alias_activation.cpp +23 -0
  136. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/anti_alias_activation_cuda.cu +246 -0
  137. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/compat.h +29 -0
  138. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/load.py +86 -0
  139. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/type_shim.h +92 -0
  140. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/torch/__init__.py +6 -0
  141. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/torch/act.py +30 -0
  142. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/torch/filter.py +101 -0
  143. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/torch/resample.py +58 -0
  144. xinference/thirdparty/indextts/s2mel/modules/bigvgan/bigvgan.py +492 -0
  145. xinference/thirdparty/indextts/s2mel/modules/bigvgan/config.json +63 -0
  146. xinference/thirdparty/indextts/s2mel/modules/bigvgan/env.py +18 -0
  147. xinference/thirdparty/indextts/s2mel/modules/bigvgan/meldataset.py +354 -0
  148. xinference/thirdparty/indextts/s2mel/modules/bigvgan/utils.py +99 -0
  149. xinference/thirdparty/indextts/s2mel/modules/campplus/DTDNN.py +115 -0
  150. xinference/thirdparty/indextts/s2mel/modules/campplus/classifier.py +70 -0
  151. xinference/thirdparty/indextts/s2mel/modules/campplus/layers.py +253 -0
  152. xinference/thirdparty/indextts/s2mel/modules/commons.py +632 -0
  153. xinference/thirdparty/indextts/s2mel/modules/diffusion_transformer.py +257 -0
  154. xinference/thirdparty/indextts/s2mel/modules/encodec.py +292 -0
  155. xinference/thirdparty/indextts/s2mel/modules/flow_matching.py +171 -0
  156. xinference/thirdparty/indextts/s2mel/modules/gpt_fast/generate.py +436 -0
  157. xinference/thirdparty/indextts/s2mel/modules/gpt_fast/model.py +360 -0
  158. xinference/thirdparty/indextts/s2mel/modules/gpt_fast/quantize.py +622 -0
  159. xinference/thirdparty/indextts/s2mel/modules/hifigan/f0_predictor.py +55 -0
  160. xinference/thirdparty/indextts/s2mel/modules/hifigan/generator.py +454 -0
  161. xinference/thirdparty/indextts/s2mel/modules/layers.py +354 -0
  162. xinference/thirdparty/indextts/s2mel/modules/length_regulator.py +141 -0
  163. xinference/thirdparty/indextts/s2mel/modules/openvoice/__init__.py +0 -0
  164. xinference/thirdparty/indextts/s2mel/modules/openvoice/api.py +186 -0
  165. xinference/thirdparty/indextts/s2mel/modules/openvoice/attentions.py +465 -0
  166. xinference/thirdparty/indextts/s2mel/modules/openvoice/checkpoints_v2/converter/config.json +57 -0
  167. xinference/thirdparty/indextts/s2mel/modules/openvoice/commons.py +160 -0
  168. xinference/thirdparty/indextts/s2mel/modules/openvoice/mel_processing.py +183 -0
  169. xinference/thirdparty/indextts/s2mel/modules/openvoice/models.py +499 -0
  170. xinference/thirdparty/indextts/s2mel/modules/openvoice/modules.py +598 -0
  171. xinference/thirdparty/indextts/s2mel/modules/openvoice/openvoice_app.py +275 -0
  172. xinference/thirdparty/indextts/s2mel/modules/openvoice/se_extractor.py +153 -0
  173. xinference/thirdparty/indextts/s2mel/modules/openvoice/transforms.py +209 -0
  174. xinference/thirdparty/indextts/s2mel/modules/openvoice/utils.py +194 -0
  175. xinference/thirdparty/indextts/s2mel/modules/quantize.py +229 -0
  176. xinference/thirdparty/indextts/s2mel/modules/rmvpe.py +631 -0
  177. xinference/thirdparty/indextts/s2mel/modules/vocos/__init__.py +4 -0
  178. xinference/thirdparty/indextts/s2mel/modules/vocos/heads.py +164 -0
  179. xinference/thirdparty/indextts/s2mel/modules/vocos/helpers.py +71 -0
  180. xinference/thirdparty/indextts/s2mel/modules/vocos/loss.py +114 -0
  181. xinference/thirdparty/indextts/s2mel/modules/vocos/models.py +118 -0
  182. xinference/thirdparty/indextts/s2mel/modules/vocos/modules.py +213 -0
  183. xinference/thirdparty/indextts/s2mel/modules/vocos/pretrained.py +51 -0
  184. xinference/thirdparty/indextts/s2mel/modules/vocos/spectral_ops.py +192 -0
  185. xinference/thirdparty/indextts/s2mel/modules/wavenet.py +174 -0
  186. xinference/thirdparty/indextts/s2mel/optimizers.py +96 -0
  187. xinference/thirdparty/indextts/s2mel/wav2vecbert_extract.py +148 -0
  188. xinference/thirdparty/indextts/utils/__init__.py +0 -0
  189. xinference/thirdparty/indextts/utils/arch_util.py +120 -0
  190. xinference/thirdparty/indextts/utils/checkpoint.py +34 -0
  191. xinference/thirdparty/indextts/utils/common.py +121 -0
  192. xinference/thirdparty/indextts/utils/feature_extractors.py +50 -0
  193. xinference/thirdparty/indextts/utils/front.py +536 -0
  194. xinference/thirdparty/indextts/utils/maskgct/models/codec/__init__.py +0 -0
  195. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/codec.py +427 -0
  196. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/__init__.py +11 -0
  197. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/factorized_vector_quantize.py +150 -0
  198. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/lookup_free_quantize.py +77 -0
  199. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/residual_vq.py +177 -0
  200. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/vector_quantize.py +401 -0
  201. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/vocos.py +881 -0
  202. xinference/thirdparty/indextts/utils/maskgct/models/codec/codec_dataset.py +264 -0
  203. xinference/thirdparty/indextts/utils/maskgct/models/codec/codec_inference.py +515 -0
  204. xinference/thirdparty/indextts/utils/maskgct/models/codec/codec_sampler.py +126 -0
  205. xinference/thirdparty/indextts/utils/maskgct/models/codec/codec_trainer.py +166 -0
  206. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/__init__.py +0 -0
  207. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/__init__.py +5 -0
  208. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/act.py +29 -0
  209. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/filter.py +96 -0
  210. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/resample.py +57 -0
  211. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/facodec_dataset.py +98 -0
  212. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/facodec_inference.py +137 -0
  213. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/facodec_trainer.py +776 -0
  214. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/JDC/__init__.py +1 -0
  215. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/JDC/bst.t7 +0 -0
  216. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/JDC/model.py +219 -0
  217. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/attentions.py +437 -0
  218. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/commons.py +331 -0
  219. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/gradient_reversal.py +35 -0
  220. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/layers.py +460 -0
  221. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/quantize.py +741 -0
  222. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/style_encoder.py +110 -0
  223. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/wavenet.py +224 -0
  224. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/optimizer.py +104 -0
  225. xinference/thirdparty/indextts/utils/maskgct/models/codec/kmeans/repcodec_model.py +210 -0
  226. xinference/thirdparty/indextts/utils/maskgct/models/codec/kmeans/vocos.py +850 -0
  227. xinference/thirdparty/indextts/utils/maskgct/models/codec/melvqgan/melspec.py +108 -0
  228. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/README.md +216 -0
  229. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/__init__.py +6 -0
  230. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/__init__.py +5 -0
  231. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/act.py +29 -0
  232. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/filter.py +96 -0
  233. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/resample.py +57 -0
  234. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/facodec.py +1222 -0
  235. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/gradient_reversal.py +35 -0
  236. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/melspec.py +102 -0
  237. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/quantize/__init__.py +7 -0
  238. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/quantize/fvq.py +116 -0
  239. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/quantize/rvq.py +87 -0
  240. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/transformer.py +234 -0
  241. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/model.py +184 -0
  242. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/__init__.py +27 -0
  243. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/conv.py +346 -0
  244. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/lstm.py +46 -0
  245. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/norm.py +37 -0
  246. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/__init__.py +14 -0
  247. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/ac.py +317 -0
  248. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/core_vq.py +388 -0
  249. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/distrib.py +135 -0
  250. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/vq.py +125 -0
  251. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/seanet.py +414 -0
  252. xinference/thirdparty/indextts/utils/maskgct/models/codec/vevo/vevo_repcodec.py +592 -0
  253. xinference/thirdparty/indextts/utils/maskgct/models/tts/maskgct/ckpt/wav2vec2bert_stats.pt +0 -0
  254. xinference/thirdparty/indextts/utils/maskgct/models/tts/maskgct/llama_nar.py +650 -0
  255. xinference/thirdparty/indextts/utils/maskgct/models/tts/maskgct/maskgct_s2a.py +503 -0
  256. xinference/thirdparty/indextts/utils/maskgct_utils.py +259 -0
  257. xinference/thirdparty/indextts/utils/text_utils.py +41 -0
  258. xinference/thirdparty/indextts/utils/typical_sampling.py +30 -0
  259. xinference/thirdparty/indextts/utils/utils.py +93 -0
  260. xinference/thirdparty/indextts/utils/webui_utils.py +42 -0
  261. xinference/thirdparty/indextts/utils/xtransformers.py +1247 -0
  262. xinference/thirdparty/indextts/vqvae/__init__.py +0 -0
  263. xinference/thirdparty/indextts/vqvae/xtts_dvae.py +395 -0
  264. xinference/types.py +105 -2
  265. xinference/ui/gradio/media_interface.py +66 -8
  266. xinference/ui/web/ui/build/asset-manifest.json +6 -6
  267. xinference/ui/web/ui/build/index.html +1 -1
  268. xinference/ui/web/ui/build/static/css/main.5ea97072.css +2 -0
  269. xinference/ui/web/ui/build/static/css/main.5ea97072.css.map +1 -0
  270. xinference/ui/web/ui/build/static/js/main.d192c4f3.js +3 -0
  271. xinference/ui/web/ui/build/static/js/{main.1086c759.js.LICENSE.txt → main.d192c4f3.js.LICENSE.txt} +0 -7
  272. xinference/ui/web/ui/build/static/js/main.d192c4f3.js.map +1 -0
  273. xinference/ui/web/ui/node_modules/.cache/babel-loader/089c38df5f52348d212ed868dda5c518a42e0c2762caed4175487c0405830c35.json +1 -0
  274. xinference/ui/web/ui/node_modules/.cache/babel-loader/2b6e3a5b6eb2c5c5f2d007e68cd46c372721cd52bf63508adcdb21ecf79241d8.json +1 -0
  275. xinference/ui/web/ui/node_modules/.cache/babel-loader/2d887825fd07a56f872eda4420da25fba0b5b62a23bdcc6c6da1a5281887f618.json +1 -0
  276. xinference/ui/web/ui/node_modules/.cache/babel-loader/4001f9c3e64e73a4f2158826650c174a59d5e3f89ddecddf17cbb6bb688cc4ca.json +1 -0
  277. xinference/ui/web/ui/node_modules/.cache/babel-loader/4a7018a69e6b7f90fc313248c2aa86f2a8f1eb1db120df586047a8023549b44b.json +1 -0
  278. xinference/ui/web/ui/node_modules/.cache/babel-loader/64b12aaa1c1d1bf53820ada8a63769067c0ccc5aab46b32348eb1917ae7f2a11.json +1 -0
  279. xinference/ui/web/ui/node_modules/.cache/babel-loader/7275b67c78ec76ce38a686bb8a576d8c9cecf54e1573614c84859d538efb9be5.json +1 -0
  280. xinference/ui/web/ui/node_modules/.cache/babel-loader/a68b6ee3b31eadc051fb95ce8f8ccb9c2e8b52c60f290dbab545a1917e065282.json +1 -0
  281. xinference/ui/web/ui/node_modules/.cache/babel-loader/ae8771cc37693feb160fa8727231312a0c54ef2d1d1ca893be568cd70016ca7e.json +1 -0
  282. xinference/ui/web/ui/node_modules/.cache/babel-loader/bb4e8722d2d41d87f1fce3661bc8937bffe9448e231fc5f0462630849e851592.json +1 -0
  283. xinference/ui/web/ui/node_modules/.cache/babel-loader/be6aada1ee4adc2bbf65dbe56d17db32bb3b5478be05d6b527805a8ba6cfb2b9.json +1 -0
  284. xinference/ui/web/ui/node_modules/.cache/babel-loader/de91c352653c233cf0cb6674e6e04049a44fd0e1156560de65d5c4620521391e.json +1 -0
  285. xinference/ui/web/ui/node_modules/.cache/babel-loader/e85f7002fc325c83b9c9cd8a1619e5b3ebc701d30e811afc284b88e6ae710cb5.json +1 -0
  286. xinference/ui/web/ui/node_modules/.cache/babel-loader/e8b603c78944bf3d213639078bfe155ff5c0dfa4048a93cbb967cad6a4eb4ff3.json +1 -0
  287. xinference/ui/web/ui/node_modules/.cache/babel-loader/f05535160a508b2a312de546a6de234776c613db276479ea4253c0b1bdeeb7d6.json +1 -0
  288. xinference/ui/web/ui/node_modules/.cache/babel-loader/f09ba9e11106bd59a0de10cc85c55084097729dcab575f43dfcf07375961ed87.json +1 -0
  289. xinference/ui/web/ui/node_modules/.cache/babel-loader/f995a2425dfb0822fd07127f66ffe9b026883bc156b402eb8bd0b83d52460a93.json +1 -0
  290. xinference/ui/web/ui/node_modules/.package-lock.json +0 -33
  291. xinference/ui/web/ui/package-lock.json +0 -34
  292. xinference/ui/web/ui/package.json +0 -1
  293. xinference/ui/web/ui/src/locales/en.json +9 -3
  294. xinference/ui/web/ui/src/locales/ja.json +9 -3
  295. xinference/ui/web/ui/src/locales/ko.json +9 -3
  296. xinference/ui/web/ui/src/locales/zh.json +9 -3
  297. {xinference-1.9.1.dist-info → xinference-1.10.1.dist-info}/METADATA +24 -4
  298. {xinference-1.9.1.dist-info → xinference-1.10.1.dist-info}/RECORD +302 -76
  299. xinference/ui/web/ui/build/static/css/main.013f296b.css +0 -2
  300. xinference/ui/web/ui/build/static/css/main.013f296b.css.map +0 -1
  301. xinference/ui/web/ui/build/static/js/main.1086c759.js +0 -3
  302. xinference/ui/web/ui/build/static/js/main.1086c759.js.map +0 -1
  303. xinference/ui/web/ui/node_modules/.cache/babel-loader/0b0f77000cc1b482ca091cfbcae511dfe02f08916971645fad21d0b1234d04a2.json +0 -1
  304. xinference/ui/web/ui/node_modules/.cache/babel-loader/1c5f8ff423a7c9202bea60b15680f04b1e9964b445b0da3f86c6ff70cf24e797.json +0 -1
  305. xinference/ui/web/ui/node_modules/.cache/babel-loader/44ce7993e344980e3ed4f13e8f69237d4a5dfc60e37ca6b54f51f8ee1357bd67.json +0 -1
  306. xinference/ui/web/ui/node_modules/.cache/babel-loader/4aec1cc414ac3ebb3481d3d915e4db597d9127de813291346eacb8554ab170d4.json +0 -1
  307. xinference/ui/web/ui/node_modules/.cache/babel-loader/644cfec52f3c57a6e222ce60f112237a1efefe9835efd9aad857a685f53d8eed.json +0 -1
  308. xinference/ui/web/ui/node_modules/.cache/babel-loader/663436f72af53fe0d72394f56d003fa4e0bba489e5bb4e483fd34b00f84637f7.json +0 -1
  309. xinference/ui/web/ui/node_modules/.cache/babel-loader/69db82ca9bfe27fe417cc6cf2b1716b09be9c6f0cd198530f12bfc60e801bbcf.json +0 -1
  310. xinference/ui/web/ui/node_modules/.cache/babel-loader/85087e27618d740c236bf159f30e0219db443ab55f0997388eed5fde6f9e90cc.json +0 -1
  311. xinference/ui/web/ui/node_modules/.cache/babel-loader/88b07838348864aa86c672be3bbca1e9f58f6f3a2881b32070ec27f4e7b449d1.json +0 -1
  312. xinference/ui/web/ui/node_modules/.cache/babel-loader/8b8cd408ccfbe115acef27ccfa5b233da8597131a2a5712add13e1e4d5d4504b.json +0 -1
  313. xinference/ui/web/ui/node_modules/.cache/babel-loader/a23824fe746b9c6ca5eee9159b5764d1ff1653c1d856288c0f75c742bbb0023b.json +0 -1
  314. xinference/ui/web/ui/node_modules/.cache/babel-loader/a3eb18af328280b139693c9092dff2a0ef8c9a967e6c8956ceee0996611f1984.json +0 -1
  315. xinference/ui/web/ui/node_modules/.cache/babel-loader/bc1aacc65a102db325ca61bcd2f681e1ae22c36a1f1d98a6ff5e4ad49dc7544f.json +0 -1
  316. xinference/ui/web/ui/node_modules/.cache/babel-loader/c682fd521747c19dae437d83ce3235a306ce6b68e24a117bc57c27ebb8d1f1ca.json +0 -1
  317. xinference/ui/web/ui/node_modules/.cache/babel-loader/d5c224be7081f18cba1678b7874a9782eba895df004874ff8f243f94ba79942a.json +0 -1
  318. xinference/ui/web/ui/node_modules/.cache/babel-loader/f7f18bfb539b036a6a342176dd98a85df5057a884a8da978d679f2a0264883d0.json +0 -1
  319. xinference/ui/web/ui/node_modules/clipboard/.babelrc.json +0 -11
  320. xinference/ui/web/ui/node_modules/clipboard/.eslintrc.json +0 -24
  321. xinference/ui/web/ui/node_modules/clipboard/.prettierrc.json +0 -9
  322. xinference/ui/web/ui/node_modules/clipboard/bower.json +0 -18
  323. xinference/ui/web/ui/node_modules/clipboard/composer.json +0 -25
  324. xinference/ui/web/ui/node_modules/clipboard/package.json +0 -63
  325. xinference/ui/web/ui/node_modules/delegate/package.json +0 -31
  326. xinference/ui/web/ui/node_modules/good-listener/bower.json +0 -11
  327. xinference/ui/web/ui/node_modules/good-listener/package.json +0 -35
  328. xinference/ui/web/ui/node_modules/select/bower.json +0 -13
  329. xinference/ui/web/ui/node_modules/select/package.json +0 -29
  330. xinference/ui/web/ui/node_modules/tiny-emitter/package.json +0 -53
  331. {xinference-1.9.1.dist-info → xinference-1.10.1.dist-info}/WHEEL +0 -0
  332. {xinference-1.9.1.dist-info → xinference-1.10.1.dist-info}/entry_points.txt +0 -0
  333. {xinference-1.9.1.dist-info → xinference-1.10.1.dist-info}/licenses/LICENSE +0 -0
  334. {xinference-1.9.1.dist-info → xinference-1.10.1.dist-info}/top_level.txt +0 -0
xinference/_version.py CHANGED
@@ -8,11 +8,11 @@ import json
8
8
 
9
9
  version_json = '''
10
10
  {
11
- "date": "2025-08-30T03:57:39+0800",
11
+ "date": "2025-09-30T23:23:16+0800",
12
12
  "dirty": false,
13
13
  "error": null,
14
- "full-revisionid": "b2d793d0b4a0af632932eb63dbeb1bc91b5b3d74",
15
- "version": "1.9.1"
14
+ "full-revisionid": "98a3c8ad4a6debd97ef29cc05aad3514f4ba488c",
15
+ "version": "1.10.1"
16
16
  }
17
17
  ''' # END VERSION_JSON
18
18
 
@@ -14,6 +14,7 @@
14
14
 
15
15
  import asyncio
16
16
  import inspect
17
+ import ipaddress
17
18
  import json
18
19
  import logging
19
20
  import multiprocessing
@@ -21,6 +22,7 @@ import os
21
22
  import pprint
22
23
  import sys
23
24
  import time
25
+ import uuid
24
26
  import warnings
25
27
  from typing import Any, Dict, List, Optional, Union
26
28
 
@@ -53,6 +55,7 @@ from xoscar.utils import get_next_port
53
55
  from .._compat import BaseModel, Field
54
56
  from .._version import get_versions
55
57
  from ..constants import (
58
+ XINFERENCE_ALLOWED_IPS,
56
59
  XINFERENCE_DEFAULT_CANCEL_BLOCK_DURATION,
57
60
  XINFERENCE_DEFAULT_ENDPOINT_PORT,
58
61
  XINFERENCE_DISABLE_METRICS,
@@ -61,11 +64,16 @@ from ..constants import (
61
64
  from ..core.event import Event, EventCollectorActor, EventType
62
65
  from ..core.supervisor import SupervisorActor
63
66
  from ..core.utils import CancelMixin, json_dumps
67
+
68
+ # Import Anthropic-related types and availability flag
64
69
  from ..types import (
70
+ ANTHROPIC_AVAILABLE,
71
+ AnthropicMessage,
65
72
  ChatCompletion,
66
73
  Completion,
67
74
  CreateChatCompletion,
68
75
  CreateCompletion,
76
+ CreateMessage,
69
77
  ImageList,
70
78
  PeftModelConfig,
71
79
  SDAPIResult,
@@ -213,6 +221,9 @@ class BuildGradioMediaInterfaceRequest(BaseModel):
213
221
 
214
222
 
215
223
  class RESTfulAPI(CancelMixin):
224
+ # Add new class attributes
225
+ _allowed_ip_list: Optional[List[ipaddress.IPv4Network]] = None
226
+
216
227
  def __init__(
217
228
  self,
218
229
  supervisor_address: str,
@@ -229,6 +240,45 @@ class RESTfulAPI(CancelMixin):
229
240
  self._auth_service = AuthService(auth_config_file)
230
241
  self._router = APIRouter()
231
242
  self._app = FastAPI()
243
+ # Initialize allowed IP list once
244
+ self._init_allowed_ip_list()
245
+
246
+ def _init_allowed_ip_list(self):
247
+ """Initialize the allowed IP list from environment variable."""
248
+ if RESTfulAPI._allowed_ip_list is None:
249
+ # ie: export XINFERENCE_ALLOWED_IPS=192.168.1.0/24
250
+ allowed_ips = XINFERENCE_ALLOWED_IPS
251
+ if allowed_ips:
252
+ RESTfulAPI._allowed_ip_list = []
253
+ for ip in allowed_ips.split(","):
254
+ ip = ip.strip()
255
+ try:
256
+ # Try parsing as network/CIDR
257
+ if "/" in ip:
258
+ RESTfulAPI._allowed_ip_list.append(ipaddress.ip_network(ip))
259
+ else:
260
+ # Parse as single IP
261
+ RESTfulAPI._allowed_ip_list.append(
262
+ ipaddress.ip_network(f"{ip}/32")
263
+ )
264
+ except ValueError:
265
+ logger.error(
266
+ f"Invalid IP address or network: {ip}", exc_info=True
267
+ )
268
+ continue
269
+
270
+ def _is_ip_allowed(self, ip: str) -> bool:
271
+ """Check if an IP is allowed based on configured rules."""
272
+ if not RESTfulAPI._allowed_ip_list:
273
+ return True
274
+
275
+ try:
276
+ client_ip = ipaddress.ip_address(ip)
277
+ return any(
278
+ client_ip in allowed_net for allowed_net in RESTfulAPI._allowed_ip_list
279
+ )
280
+ except ValueError:
281
+ return False
232
282
 
233
283
  def is_authenticated(self):
234
284
  return False if self._auth_service.config is None else True
@@ -287,6 +337,16 @@ class RESTfulAPI(CancelMixin):
287
337
  allow_headers=["*"],
288
338
  )
289
339
 
340
+ @self._app.middleware("http")
341
+ async def ip_restriction_middleware(request: Request, call_next):
342
+ client_ip = request.client.host
343
+ if not self._is_ip_allowed(client_ip):
344
+ return PlainTextResponse(
345
+ status_code=403, content=f"Access denied for IP: {client_ip}\n"
346
+ )
347
+ response = await call_next(request)
348
+ return response
349
+
290
350
  @self._app.exception_handler(500)
291
351
  async def internal_exception_handler(request: Request, exc: Exception):
292
352
  logger.exception("Handling request %s failed: %s", request.url, exc)
@@ -532,6 +592,40 @@ class RESTfulAPI(CancelMixin):
532
592
  else None
533
593
  ),
534
594
  )
595
+ # Register messages endpoint only if Anthropic is available
596
+ if ANTHROPIC_AVAILABLE:
597
+ self._router.add_api_route(
598
+ "/anthropic/v1/messages",
599
+ self.create_message,
600
+ methods=["POST"],
601
+ response_model=AnthropicMessage,
602
+ dependencies=(
603
+ [Security(self._auth_service, scopes=["models:read"])]
604
+ if self.is_authenticated()
605
+ else None
606
+ ),
607
+ )
608
+ # Register Anthropic models endpoints
609
+ self._router.add_api_route(
610
+ "/anthropic/v1/models",
611
+ self.anthropic_list_models,
612
+ methods=["GET"],
613
+ dependencies=(
614
+ [Security(self._auth_service, scopes=["models:list"])]
615
+ if self.is_authenticated()
616
+ else None
617
+ ),
618
+ )
619
+ self._router.add_api_route(
620
+ "/anthropic/v1/models/{model_id}",
621
+ self.anthropic_get_model,
622
+ methods=["GET"],
623
+ dependencies=(
624
+ [Security(self._auth_service, scopes=["models:list"])]
625
+ if self.is_authenticated()
626
+ else None
627
+ ),
628
+ )
535
629
  self._router.add_api_route(
536
630
  "/v1/embeddings",
537
631
  self.create_embedding,
@@ -994,6 +1088,58 @@ class RESTfulAPI(CancelMixin):
994
1088
  logger.error(e, exc_info=True)
995
1089
  raise HTTPException(status_code=500, detail=str(e))
996
1090
 
1091
+ async def anthropic_list_models(self) -> JSONResponse:
1092
+ """Anthropic-compatible models endpoint"""
1093
+ try:
1094
+
1095
+ # Get running models from xinference
1096
+ running_models = await (await self._get_supervisor_ref()).list_models()
1097
+
1098
+ # For backward compatibility with tests, only return running models by default
1099
+ model_list = []
1100
+
1101
+ # Add running models to the list
1102
+ for model_id, model_info in running_models.items():
1103
+ anthropic_model = {
1104
+ "id": model_id,
1105
+ "object": "model",
1106
+ "created": 0,
1107
+ "display_name": model_info.get("model_name", model_id),
1108
+ "type": model_info.get("model_type", "model"),
1109
+ "max_tokens": model_info.get("context_length", 4096),
1110
+ }
1111
+ model_list.append(anthropic_model)
1112
+
1113
+ return JSONResponse(content=model_list)
1114
+ except Exception as e:
1115
+ logger.error(e, exc_info=True)
1116
+ raise HTTPException(status_code=500, detail=str(e))
1117
+
1118
+ async def anthropic_get_model(self, model_id: str) -> JSONResponse:
1119
+ """Anthropic-compatible model retrieval endpoint"""
1120
+ try:
1121
+ models = await (await self._get_supervisor_ref()).list_models()
1122
+
1123
+ model_info = models[model_id]
1124
+
1125
+ # Convert to Anthropic format
1126
+ anthropic_model = {
1127
+ "id": model_id, # Return the original requested ID
1128
+ "object": "model",
1129
+ "created": 0,
1130
+ "display_name": model_info.get("model_name", model_id),
1131
+ "type": model_info.get("model_type", "model"),
1132
+ "max_tokens": model_info.get("context_length", 4096),
1133
+ **model_info,
1134
+ }
1135
+
1136
+ return JSONResponse(content=anthropic_model)
1137
+ except HTTPException:
1138
+ raise
1139
+ except Exception as e:
1140
+ logger.error(e, exc_info=True)
1141
+ raise HTTPException(status_code=500, detail=str(e))
1142
+
997
1143
  async def describe_model(self, model_uid: str) -> JSONResponse:
998
1144
  try:
999
1145
  data = await (await self._get_supervisor_ref()).describe_model(model_uid)
@@ -1417,6 +1563,151 @@ class RESTfulAPI(CancelMixin):
1417
1563
  self.handle_request_limit_error(e)
1418
1564
  raise HTTPException(status_code=500, detail=str(e))
1419
1565
 
1566
+ async def create_message(self, request: Request) -> Response:
1567
+ raw_body = await request.json()
1568
+ body = CreateMessage.parse_obj(raw_body)
1569
+
1570
+ exclude = {
1571
+ "model",
1572
+ "messages",
1573
+ "stream",
1574
+ "stop_sequences",
1575
+ "metadata",
1576
+ "tool_choice",
1577
+ "tools",
1578
+ }
1579
+ raw_kwargs = {k: v for k, v in raw_body.items() if k not in exclude}
1580
+ kwargs = body.dict(exclude_unset=True, exclude=exclude)
1581
+
1582
+ # guided_decoding params
1583
+ kwargs.update(self.extract_guided_params(raw_body=raw_body))
1584
+
1585
+ # TODO: Decide if this default value override is necessary #1061
1586
+ if body.max_tokens is None:
1587
+ kwargs["max_tokens"] = max_tokens_field.default
1588
+
1589
+ messages = body.messages and list(body.messages)
1590
+
1591
+ if not messages or messages[-1].get("role") not in ["user", "assistant"]:
1592
+ raise HTTPException(
1593
+ status_code=400, detail="Invalid input. Please specify the prompt."
1594
+ )
1595
+
1596
+ # Handle tools parameter
1597
+ if hasattr(body, "tools") and body.tools:
1598
+ kwargs["tools"] = body.tools
1599
+
1600
+ # Handle tool_choice parameter
1601
+ if hasattr(body, "tool_choice") and body.tool_choice:
1602
+ kwargs["tool_choice"] = body.tool_choice
1603
+
1604
+ # Get model mapping
1605
+ try:
1606
+ running_models = await (await self._get_supervisor_ref()).list_models()
1607
+ except Exception as e:
1608
+ logger.error(f"Failed to get model mapping: {e}", exc_info=True)
1609
+ raise HTTPException(status_code=500, detail="Failed to get model mapping")
1610
+
1611
+ if not running_models:
1612
+ raise HTTPException(
1613
+ status_code=400,
1614
+ detail=f"No running models available. Please start a model in xinference first.",
1615
+ )
1616
+
1617
+ requested_model_id = body.model
1618
+ if "claude" in requested_model_id:
1619
+ requested_model_id = list(running_models.keys())[0]
1620
+
1621
+ if requested_model_id not in running_models:
1622
+ raise HTTPException(
1623
+ status_code=400,
1624
+ detail=f"Model '{requested_model_id}' is not available. Available models: {list(running_models.keys())}",
1625
+ )
1626
+ else:
1627
+ model_uid = requested_model_id
1628
+
1629
+ try:
1630
+ model = await (await self._get_supervisor_ref()).get_model(model_uid)
1631
+ except ValueError as ve:
1632
+ logger.error(str(ve), exc_info=True)
1633
+ await self._report_error_event(model_uid, str(ve))
1634
+ raise HTTPException(status_code=400, detail=str(ve))
1635
+ except Exception as e:
1636
+ logger.error(e, exc_info=True)
1637
+ await self._report_error_event(model_uid, str(e))
1638
+ raise HTTPException(status_code=500, detail=str(e))
1639
+
1640
+ if body.stream:
1641
+
1642
+ async def stream_results():
1643
+ iterator = None
1644
+ try:
1645
+ try:
1646
+ iterator = await model.chat(
1647
+ messages, kwargs, raw_params=raw_kwargs
1648
+ )
1649
+ except RuntimeError as re:
1650
+ self.handle_request_limit_error(re)
1651
+
1652
+ # Check if iterator is actually an async iterator
1653
+ if hasattr(iterator, "__aiter__"):
1654
+ async for item in iterator:
1655
+ yield item
1656
+ elif isinstance(iterator, (str, bytes)):
1657
+ # Handle case where chat returns bytes/string instead of iterator
1658
+ if isinstance(iterator, bytes):
1659
+ try:
1660
+ content = iterator.decode("utf-8")
1661
+ except UnicodeDecodeError:
1662
+ content = str(iterator)
1663
+ else:
1664
+ content = iterator
1665
+ yield dict(data=json.dumps({"content": content}))
1666
+ else:
1667
+ # Fallback: try to iterate normally
1668
+ try:
1669
+ for item in iterator:
1670
+ yield item
1671
+ except TypeError:
1672
+ # If not iterable, yield as single result
1673
+ yield dict(data=json.dumps({"content": str(iterator)}))
1674
+
1675
+ yield "[DONE]"
1676
+ except asyncio.CancelledError:
1677
+ logger.info(
1678
+ f"Disconnected from client (via refresh/close) {request.client} during chat."
1679
+ )
1680
+ return
1681
+ except Exception as ex:
1682
+ ex = await self._get_model_last_error(model.uid, ex)
1683
+ logger.exception("Message stream got an error: %s", ex)
1684
+ await self._report_error_event(model_uid, str(ex))
1685
+ yield dict(data=json.dumps({"error": str(ex)}))
1686
+ return
1687
+ finally:
1688
+ await model.decrease_serve_count()
1689
+
1690
+ return EventSourceResponse(
1691
+ stream_results(), ping=XINFERENCE_SSE_PING_ATTEMPTS_SECONDS
1692
+ )
1693
+ else:
1694
+ try:
1695
+ data = await model.chat(messages, kwargs, raw_params=raw_kwargs)
1696
+ # Convert OpenAI format to Anthropic format
1697
+ openai_response = json.loads(data)
1698
+ anthropic_response = self._convert_openai_to_anthropic(
1699
+ openai_response, body.model
1700
+ )
1701
+ return Response(
1702
+ json.dumps(anthropic_response), media_type="application/json"
1703
+ )
1704
+ except Exception as e:
1705
+ e = await self._get_model_last_error(model.uid, e)
1706
+ logger.error(e, exc_info=True)
1707
+ await self._report_error_event(model_uid, str(e))
1708
+ self.handle_request_limit_error(e)
1709
+ raise HTTPException(status_code=500, detail=str(e))
1710
+
1420
1711
  async def create_embedding(self, request: Request) -> Response:
1421
1712
  payload = await request.json()
1422
1713
  body = CreateEmbeddingRequest.parse_obj(payload)
@@ -1845,7 +2136,7 @@ class RESTfulAPI(CancelMixin):
1845
2136
  async def create_variations(
1846
2137
  self,
1847
2138
  model: str = Form(...),
1848
- image: UploadFile = File(media_type="application/octet-stream"),
2139
+ image: List[UploadFile] = File(media_type="application/octet-stream"),
1849
2140
  prompt: Optional[Union[str, List[str]]] = Form(None),
1850
2141
  negative_prompt: Optional[Union[str, List[str]]] = Form(None),
1851
2142
  n: Optional[int] = Form(1),
@@ -1873,8 +2164,17 @@ class RESTfulAPI(CancelMixin):
1873
2164
  parsed_kwargs = {}
1874
2165
  request_id = parsed_kwargs.get("request_id")
1875
2166
  self._add_running_task(request_id)
2167
+
2168
+ # Handle single image or multiple images
2169
+ if len(image) == 1:
2170
+ # Single image
2171
+ image_data = Image.open(image[0].file)
2172
+ else:
2173
+ # Multiple images - convert to list of PIL Images
2174
+ image_data = [Image.open(img.file) for img in image]
2175
+
1876
2176
  image_list = await model_ref.image_to_image(
1877
- image=Image.open(image.file),
2177
+ image=image_data,
1878
2178
  prompt=prompt,
1879
2179
  negative_prompt=negative_prompt,
1880
2180
  n=n,
@@ -2371,7 +2671,14 @@ class RESTfulAPI(CancelMixin):
2371
2671
  data = await (await self._get_supervisor_ref()).list_model_registrations(
2372
2672
  model_type, detailed=detailed
2373
2673
  )
2374
- return JSONResponse(content=data)
2674
+ # Remove duplicate model names.
2675
+ model_names = set()
2676
+ final_data = []
2677
+ for item in data:
2678
+ if item["model_name"] not in model_names:
2679
+ model_names.add(item["model_name"])
2680
+ final_data.append(item)
2681
+ return JSONResponse(content=final_data)
2375
2682
  except ValueError as re:
2376
2683
  logger.error(re, exc_info=True)
2377
2684
  raise HTTPException(status_code=400, detail=str(re))
@@ -2603,6 +2910,96 @@ class RESTfulAPI(CancelMixin):
2603
2910
 
2604
2911
  return kwargs
2605
2912
 
2913
+ def _convert_openai_to_anthropic(self, openai_response: dict, model: str) -> dict:
2914
+ """
2915
+ Convert OpenAI response format to Anthropic response format.
2916
+
2917
+ Args:
2918
+ openai_response: OpenAI format response
2919
+ model: Model name
2920
+
2921
+ Returns:
2922
+ Anthropic format response
2923
+ """
2924
+
2925
+ # Extract content and tool calls from OpenAI response
2926
+ content_blocks = []
2927
+ stop_reason = "stop"
2928
+
2929
+ if "choices" in openai_response and len(openai_response["choices"]) > 0:
2930
+ choice = openai_response["choices"][0]
2931
+ message = choice.get("message", {})
2932
+
2933
+ # Handle content text
2934
+ content = message.get("content", "")
2935
+ if content:
2936
+ if isinstance(content, str):
2937
+ # If content is a string, use it directly
2938
+ content_blocks.append({"type": "text", "text": content})
2939
+ elif isinstance(content, list):
2940
+ # If content is a list, extract text from each content block
2941
+ for content_block in content:
2942
+ if isinstance(content_block, dict):
2943
+ if content_block.get("type") == "text":
2944
+ text = content_block.get("text", "")
2945
+ if text:
2946
+ content_blocks.append(
2947
+ {"type": "text", "text": text}
2948
+ )
2949
+ elif "text" in content_block:
2950
+ # Handle different content block format
2951
+ text = content_block.get("text", "")
2952
+ if text:
2953
+ content_blocks.append(
2954
+ {"type": "text", "text": text}
2955
+ )
2956
+
2957
+ # Handle tool calls
2958
+ tool_calls = message.get("tool_calls", [])
2959
+ for tool_call in tool_calls:
2960
+ function = tool_call.get("function", {})
2961
+ arguments = function.get("arguments", "{}")
2962
+ try:
2963
+ input_data = json.loads(arguments)
2964
+ except json.JSONDecodeError:
2965
+ input_data = {}
2966
+ tool_use_block = {
2967
+ "type": "tool_use",
2968
+ "cache_control": {"type": "ephemeral"},
2969
+ "id": tool_call.get("id", str(uuid.uuid4())),
2970
+ "name": function.get("name", ""),
2971
+ "input": input_data,
2972
+ }
2973
+ content_blocks.append(tool_use_block)
2974
+
2975
+ # Set stop reason based on finish reason
2976
+ finish_reason = choice.get("finish_reason", "stop")
2977
+ if finish_reason == "tool_calls":
2978
+ stop_reason = "tool_use"
2979
+
2980
+ # Build Anthropic response
2981
+ anthropic_response = {
2982
+ "id": str(uuid.uuid4()),
2983
+ "type": "message",
2984
+ "role": "assistant",
2985
+ "content": content_blocks,
2986
+ "model": model,
2987
+ "stop_reason": stop_reason,
2988
+ "stop_sequence": None,
2989
+ "usage": {
2990
+ "input_tokens": openai_response.get("usage", {}).get(
2991
+ "prompt_tokens", 0
2992
+ ),
2993
+ "output_tokens": openai_response.get("usage", {}).get(
2994
+ "completion_tokens", 0
2995
+ ),
2996
+ "cache_creation_input_tokens": 0,
2997
+ "cache_read_input_tokens": 0,
2998
+ },
2999
+ }
3000
+
3001
+ return anthropic_response
3002
+
2606
3003
 
2607
3004
  def run(
2608
3005
  supervisor_address: str,
@@ -285,7 +285,7 @@ class AsyncRESTfulImageModelHandle(AsyncRESTfulModelHandle):
285
285
 
286
286
  async def image_to_image(
287
287
  self,
288
- image: Union[str, bytes],
288
+ image: Union[str, bytes, List[Union[str, bytes]]],
289
289
  prompt: str,
290
290
  negative_prompt: Optional[str] = None,
291
291
  n: int = 1,
@@ -298,7 +298,7 @@ class AsyncRESTfulImageModelHandle(AsyncRESTfulModelHandle):
298
298
 
299
299
  Parameters
300
300
  ----------
301
- image: `Union[str, bytes]`
301
+ image: `Union[str, bytes, List[Union[str, bytes]]]`
302
302
  The ControlNet input condition to provide guidance to the `unet` for generation. If the type is
303
303
  specified as `torch.FloatTensor`, it is passed to ControlNet as is. `PIL.Image.Image` can also be
304
304
  accepted as an image. The dimensions of the output image defaults to `image`'s dimensions. If height
@@ -338,7 +338,24 @@ class AsyncRESTfulImageModelHandle(AsyncRESTfulModelHandle):
338
338
  files: List[Any] = []
339
339
  for key, value in params.items():
340
340
  files.append((key, (None, value)))
341
- files.append(("image", ("image", image, "application/octet-stream")))
341
+
342
+ # Handle both single image and list of images
343
+ if isinstance(image, list):
344
+ if len(image) == 0:
345
+ raise ValueError("Image list cannot be empty")
346
+ elif len(image) == 1:
347
+ # Single image in list, use it directly
348
+ files.append(("image", ("image", image[0], "application/octet-stream")))
349
+ else:
350
+ # Multiple images - send all images with same field name
351
+ # FastAPI will collect them into a list
352
+ for img_data in image:
353
+ files.append(
354
+ ("image", ("image", img_data, "application/octet-stream"))
355
+ )
356
+ else:
357
+ # Single image
358
+ files.append(("image", ("image", image, "application/octet-stream")))
342
359
  response = await self.session.post(url, files=files, headers=self.auth_headers)
343
360
  if response.status != 200:
344
361
  raise RuntimeError(
@@ -250,7 +250,7 @@ class RESTfulImageModelHandle(RESTfulModelHandle):
250
250
 
251
251
  def image_to_image(
252
252
  self,
253
- image: Union[str, bytes],
253
+ image: Union[str, bytes, List[Union[str, bytes]]],
254
254
  prompt: str,
255
255
  negative_prompt: Optional[str] = None,
256
256
  n: int = 1,
@@ -263,7 +263,7 @@ class RESTfulImageModelHandle(RESTfulModelHandle):
263
263
 
264
264
  Parameters
265
265
  ----------
266
- image: `Union[str, bytes]`
266
+ image: `Union[str, bytes, List[Union[str, bytes]]]`
267
267
  The ControlNet input condition to provide guidance to the `unet` for generation. If the type is
268
268
  specified as `torch.FloatTensor`, it is passed to ControlNet as is. `PIL.Image.Image` can also be
269
269
  accepted as an image. The dimensions of the output image defaults to `image`'s dimensions. If height
@@ -302,7 +302,24 @@ class RESTfulImageModelHandle(RESTfulModelHandle):
302
302
  files: List[Any] = []
303
303
  for key, value in params.items():
304
304
  files.append((key, (None, value)))
305
- files.append(("image", ("image", image, "application/octet-stream")))
305
+
306
+ # Handle both single image and list of images
307
+ if isinstance(image, list):
308
+ if len(image) == 0:
309
+ raise ValueError("Image list cannot be empty")
310
+ elif len(image) == 1:
311
+ # Single image in list, use it directly
312
+ files.append(("image", ("image", image[0], "application/octet-stream")))
313
+ else:
314
+ # Multiple images - send all images with same field name
315
+ # FastAPI will collect them into a list
316
+ for img_data in image:
317
+ files.append(
318
+ ("image", ("image", img_data, "application/octet-stream"))
319
+ )
320
+ else:
321
+ # Single image
322
+ files.append(("image", ("image", image, "application/octet-stream")))
306
323
  response = self.session.post(url, files=files, headers=self.auth_headers)
307
324
  if response.status_code != 200:
308
325
  raise RuntimeError(
xinference/constants.py CHANGED
@@ -33,6 +33,7 @@ XINFERENCE_ENV_VIRTUAL_ENV = "XINFERENCE_ENABLE_VIRTUAL_ENV"
33
33
  XINFERENCE_ENV_VIRTUAL_ENV_SKIP_INSTALLED = "XINFERENCE_VIRTUAL_ENV_SKIP_INSTALLED"
34
34
  XINFERENCE_ENV_SSE_PING_ATTEMPTS_SECONDS = "XINFERENCE_SSE_PING_ATTEMPTS_SECONDS"
35
35
  XINFERENCE_ENV_MAX_TOKENS = "XINFERENCE_MAX_TOKENS"
36
+ XINFERENCE_ENV_ALLOWED_IPS = "XINFERENCE_ALLOWED_IPS"
36
37
 
37
38
 
38
39
  def get_xinference_home() -> str:
@@ -110,3 +111,4 @@ XINFERENCE_VIRTUAL_ENV_SKIP_INSTALLED = (
110
111
  if os.getenv(XINFERENCE_ENV_VIRTUAL_ENV_SKIP_INSTALLED)
111
112
  else None
112
113
  )
114
+ XINFERENCE_ALLOWED_IPS = os.getenv(XINFERENCE_ENV_ALLOWED_IPS)