xinference 1.0.1__py3-none-any.whl → 1.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (343) hide show
  1. xinference/_compat.py +2 -0
  2. xinference/_version.py +3 -3
  3. xinference/api/restful_api.py +77 -71
  4. xinference/core/chat_interface.py +6 -1
  5. xinference/core/model.py +79 -19
  6. xinference/core/supervisor.py +172 -10
  7. xinference/core/utils.py +12 -8
  8. xinference/core/worker.py +102 -4
  9. xinference/deploy/cmdline.py +3 -1
  10. xinference/deploy/test/test_cmdline.py +56 -0
  11. xinference/isolation.py +24 -0
  12. xinference/model/audio/core.py +16 -0
  13. xinference/model/audio/cosyvoice.py +39 -6
  14. xinference/model/audio/f5tts.py +200 -0
  15. xinference/model/audio/f5tts_mlx.py +260 -0
  16. xinference/model/audio/fish_speech.py +36 -111
  17. xinference/model/audio/melotts.py +110 -0
  18. xinference/model/audio/model_spec.json +99 -3
  19. xinference/model/audio/model_spec_modelscope.json +27 -0
  20. xinference/model/audio/utils.py +32 -0
  21. xinference/model/audio/whisper.py +35 -10
  22. xinference/model/embedding/core.py +203 -142
  23. xinference/model/embedding/model_spec.json +7 -0
  24. xinference/model/embedding/model_spec_modelscope.json +8 -0
  25. xinference/model/image/core.py +69 -1
  26. xinference/model/image/model_spec.json +145 -4
  27. xinference/model/image/model_spec_modelscope.json +150 -4
  28. xinference/model/image/stable_diffusion/core.py +45 -13
  29. xinference/model/llm/__init__.py +4 -2
  30. xinference/model/llm/llm_family.json +536 -53
  31. xinference/model/llm/llm_family.py +15 -36
  32. xinference/model/llm/llm_family_modelscope.json +454 -20
  33. xinference/model/llm/memory.py +1 -1
  34. xinference/model/llm/mlx/core.py +248 -52
  35. xinference/model/llm/sglang/core.py +1 -0
  36. xinference/model/llm/transformers/chatglm.py +9 -5
  37. xinference/model/llm/transformers/cogagent.py +272 -0
  38. xinference/model/llm/transformers/core.py +2 -0
  39. xinference/model/llm/transformers/qwen2_vl.py +12 -1
  40. xinference/model/llm/transformers/utils.py +16 -8
  41. xinference/model/llm/utils.py +36 -4
  42. xinference/model/llm/vllm/core.py +53 -10
  43. xinference/model/llm/vllm/xavier/__init__.py +13 -0
  44. xinference/model/llm/vllm/xavier/allocator.py +74 -0
  45. xinference/model/llm/vllm/xavier/block.py +111 -0
  46. xinference/model/llm/vllm/xavier/block_manager.py +71 -0
  47. xinference/model/llm/vllm/xavier/block_tracker.py +129 -0
  48. xinference/model/llm/vllm/xavier/collective.py +74 -0
  49. xinference/model/llm/vllm/xavier/collective_manager.py +147 -0
  50. xinference/model/llm/vllm/xavier/engine.py +247 -0
  51. xinference/model/llm/vllm/xavier/executor.py +134 -0
  52. xinference/model/llm/vllm/xavier/scheduler.py +438 -0
  53. xinference/model/llm/vllm/xavier/test/__init__.py +13 -0
  54. xinference/model/llm/vllm/xavier/test/test_xavier.py +147 -0
  55. xinference/model/llm/vllm/xavier/transfer.py +319 -0
  56. xinference/model/video/diffusers.py +14 -0
  57. xinference/model/video/model_spec.json +15 -0
  58. xinference/model/video/model_spec_modelscope.json +16 -0
  59. xinference/thirdparty/cosyvoice/bin/average_model.py +92 -0
  60. xinference/thirdparty/cosyvoice/bin/export_jit.py +12 -2
  61. xinference/thirdparty/cosyvoice/bin/export_onnx.py +112 -0
  62. xinference/thirdparty/cosyvoice/bin/export_trt.sh +9 -0
  63. xinference/thirdparty/cosyvoice/bin/inference.py +5 -7
  64. xinference/thirdparty/cosyvoice/bin/spk2info.pt +0 -0
  65. xinference/thirdparty/cosyvoice/bin/train.py +42 -8
  66. xinference/thirdparty/cosyvoice/cli/cosyvoice.py +96 -25
  67. xinference/thirdparty/cosyvoice/cli/frontend.py +77 -30
  68. xinference/thirdparty/cosyvoice/cli/model.py +330 -80
  69. xinference/thirdparty/cosyvoice/dataset/dataset.py +6 -2
  70. xinference/thirdparty/cosyvoice/dataset/processor.py +76 -14
  71. xinference/thirdparty/cosyvoice/flow/decoder.py +92 -13
  72. xinference/thirdparty/cosyvoice/flow/flow.py +99 -9
  73. xinference/thirdparty/cosyvoice/flow/flow_matching.py +110 -13
  74. xinference/thirdparty/cosyvoice/flow/length_regulator.py +5 -4
  75. xinference/thirdparty/cosyvoice/hifigan/discriminator.py +140 -0
  76. xinference/thirdparty/cosyvoice/hifigan/generator.py +58 -42
  77. xinference/thirdparty/cosyvoice/hifigan/hifigan.py +67 -0
  78. xinference/thirdparty/cosyvoice/llm/llm.py +139 -6
  79. xinference/thirdparty/cosyvoice/tokenizer/assets/multilingual_zh_ja_yue_char_del.tiktoken +58836 -0
  80. xinference/thirdparty/cosyvoice/tokenizer/tokenizer.py +279 -0
  81. xinference/thirdparty/cosyvoice/transformer/embedding.py +2 -2
  82. xinference/thirdparty/cosyvoice/transformer/encoder_layer.py +7 -7
  83. xinference/thirdparty/cosyvoice/transformer/upsample_encoder.py +318 -0
  84. xinference/thirdparty/cosyvoice/utils/common.py +28 -1
  85. xinference/thirdparty/cosyvoice/utils/executor.py +69 -7
  86. xinference/thirdparty/cosyvoice/utils/file_utils.py +2 -12
  87. xinference/thirdparty/cosyvoice/utils/frontend_utils.py +9 -5
  88. xinference/thirdparty/cosyvoice/utils/losses.py +20 -0
  89. xinference/thirdparty/cosyvoice/utils/scheduler.py +1 -2
  90. xinference/thirdparty/cosyvoice/utils/train_utils.py +101 -45
  91. xinference/thirdparty/f5_tts/api.py +166 -0
  92. xinference/thirdparty/f5_tts/configs/E2TTS_Base_train.yaml +44 -0
  93. xinference/thirdparty/f5_tts/configs/E2TTS_Small_train.yaml +44 -0
  94. xinference/thirdparty/f5_tts/configs/F5TTS_Base_train.yaml +46 -0
  95. xinference/thirdparty/f5_tts/configs/F5TTS_Small_train.yaml +46 -0
  96. xinference/thirdparty/f5_tts/eval/README.md +49 -0
  97. xinference/thirdparty/f5_tts/eval/ecapa_tdnn.py +330 -0
  98. xinference/thirdparty/f5_tts/eval/eval_infer_batch.py +207 -0
  99. xinference/thirdparty/f5_tts/eval/eval_infer_batch.sh +13 -0
  100. xinference/thirdparty/f5_tts/eval/eval_librispeech_test_clean.py +84 -0
  101. xinference/thirdparty/f5_tts/eval/eval_seedtts_testset.py +84 -0
  102. xinference/thirdparty/f5_tts/eval/utils_eval.py +405 -0
  103. xinference/thirdparty/f5_tts/infer/README.md +191 -0
  104. xinference/thirdparty/f5_tts/infer/SHARED.md +74 -0
  105. xinference/thirdparty/f5_tts/infer/examples/basic/basic.toml +11 -0
  106. xinference/thirdparty/f5_tts/infer/examples/basic/basic_ref_en.wav +0 -0
  107. xinference/thirdparty/f5_tts/infer/examples/basic/basic_ref_zh.wav +0 -0
  108. xinference/thirdparty/f5_tts/infer/examples/multi/country.flac +0 -0
  109. xinference/thirdparty/f5_tts/infer/examples/multi/main.flac +0 -0
  110. xinference/thirdparty/f5_tts/infer/examples/multi/story.toml +19 -0
  111. xinference/thirdparty/f5_tts/infer/examples/multi/story.txt +1 -0
  112. xinference/thirdparty/f5_tts/infer/examples/multi/town.flac +0 -0
  113. xinference/thirdparty/f5_tts/infer/examples/vocab.txt +2545 -0
  114. xinference/thirdparty/f5_tts/infer/infer_cli.py +226 -0
  115. xinference/thirdparty/f5_tts/infer/infer_gradio.py +851 -0
  116. xinference/thirdparty/f5_tts/infer/speech_edit.py +193 -0
  117. xinference/thirdparty/f5_tts/infer/utils_infer.py +538 -0
  118. xinference/thirdparty/f5_tts/model/__init__.py +10 -0
  119. xinference/thirdparty/f5_tts/model/backbones/README.md +20 -0
  120. xinference/thirdparty/f5_tts/model/backbones/dit.py +163 -0
  121. xinference/thirdparty/f5_tts/model/backbones/mmdit.py +146 -0
  122. xinference/thirdparty/f5_tts/model/backbones/unett.py +219 -0
  123. xinference/thirdparty/f5_tts/model/cfm.py +285 -0
  124. xinference/thirdparty/f5_tts/model/dataset.py +319 -0
  125. xinference/thirdparty/f5_tts/model/modules.py +658 -0
  126. xinference/thirdparty/f5_tts/model/trainer.py +366 -0
  127. xinference/thirdparty/f5_tts/model/utils.py +185 -0
  128. xinference/thirdparty/f5_tts/scripts/count_max_epoch.py +33 -0
  129. xinference/thirdparty/f5_tts/scripts/count_params_gflops.py +39 -0
  130. xinference/thirdparty/f5_tts/socket_server.py +159 -0
  131. xinference/thirdparty/f5_tts/train/README.md +77 -0
  132. xinference/thirdparty/f5_tts/train/datasets/prepare_csv_wavs.py +139 -0
  133. xinference/thirdparty/f5_tts/train/datasets/prepare_emilia.py +230 -0
  134. xinference/thirdparty/f5_tts/train/datasets/prepare_libritts.py +92 -0
  135. xinference/thirdparty/f5_tts/train/datasets/prepare_ljspeech.py +65 -0
  136. xinference/thirdparty/f5_tts/train/datasets/prepare_wenetspeech4tts.py +125 -0
  137. xinference/thirdparty/f5_tts/train/finetune_cli.py +174 -0
  138. xinference/thirdparty/f5_tts/train/finetune_gradio.py +1846 -0
  139. xinference/thirdparty/f5_tts/train/train.py +75 -0
  140. xinference/thirdparty/fish_speech/fish_speech/conversation.py +94 -83
  141. xinference/thirdparty/fish_speech/fish_speech/models/text2semantic/llama.py +63 -20
  142. xinference/thirdparty/fish_speech/fish_speech/text/clean.py +1 -26
  143. xinference/thirdparty/fish_speech/fish_speech/text/spliter.py +1 -1
  144. xinference/thirdparty/fish_speech/fish_speech/tokenizer.py +152 -0
  145. xinference/thirdparty/fish_speech/fish_speech/train.py +2 -2
  146. xinference/thirdparty/fish_speech/fish_speech/webui/manage.py +1 -1
  147. xinference/thirdparty/fish_speech/tools/{post_api.py → api_client.py} +7 -13
  148. xinference/thirdparty/fish_speech/tools/api_server.py +98 -0
  149. xinference/thirdparty/fish_speech/tools/download_models.py +5 -5
  150. xinference/thirdparty/fish_speech/tools/fish_e2e.py +2 -2
  151. xinference/thirdparty/fish_speech/tools/inference_engine/__init__.py +192 -0
  152. xinference/thirdparty/fish_speech/tools/inference_engine/reference_loader.py +125 -0
  153. xinference/thirdparty/fish_speech/tools/inference_engine/utils.py +39 -0
  154. xinference/thirdparty/fish_speech/tools/inference_engine/vq_manager.py +57 -0
  155. xinference/thirdparty/fish_speech/tools/llama/eval_in_context.py +2 -2
  156. xinference/thirdparty/fish_speech/tools/llama/generate.py +117 -89
  157. xinference/thirdparty/fish_speech/tools/run_webui.py +104 -0
  158. xinference/thirdparty/fish_speech/tools/schema.py +11 -28
  159. xinference/thirdparty/fish_speech/tools/server/agent/__init__.py +57 -0
  160. xinference/thirdparty/fish_speech/tools/server/agent/generate.py +119 -0
  161. xinference/thirdparty/fish_speech/tools/server/agent/generation_utils.py +122 -0
  162. xinference/thirdparty/fish_speech/tools/server/agent/pre_generation_utils.py +72 -0
  163. xinference/thirdparty/fish_speech/tools/server/api_utils.py +75 -0
  164. xinference/thirdparty/fish_speech/tools/server/exception_handler.py +27 -0
  165. xinference/thirdparty/fish_speech/tools/server/inference.py +45 -0
  166. xinference/thirdparty/fish_speech/tools/server/model_manager.py +122 -0
  167. xinference/thirdparty/fish_speech/tools/server/model_utils.py +129 -0
  168. xinference/thirdparty/fish_speech/tools/server/views.py +246 -0
  169. xinference/thirdparty/fish_speech/tools/webui/__init__.py +173 -0
  170. xinference/thirdparty/fish_speech/tools/webui/inference.py +91 -0
  171. xinference/thirdparty/fish_speech/tools/webui/variables.py +14 -0
  172. xinference/thirdparty/matcha/utils/utils.py +2 -2
  173. xinference/thirdparty/melo/api.py +135 -0
  174. xinference/thirdparty/melo/app.py +61 -0
  175. xinference/thirdparty/melo/attentions.py +459 -0
  176. xinference/thirdparty/melo/commons.py +160 -0
  177. xinference/thirdparty/melo/configs/config.json +94 -0
  178. xinference/thirdparty/melo/data/example/metadata.list +20 -0
  179. xinference/thirdparty/melo/data_utils.py +413 -0
  180. xinference/thirdparty/melo/download_utils.py +67 -0
  181. xinference/thirdparty/melo/infer.py +25 -0
  182. xinference/thirdparty/melo/init_downloads.py +14 -0
  183. xinference/thirdparty/melo/losses.py +58 -0
  184. xinference/thirdparty/melo/main.py +36 -0
  185. xinference/thirdparty/melo/mel_processing.py +174 -0
  186. xinference/thirdparty/melo/models.py +1030 -0
  187. xinference/thirdparty/melo/modules.py +598 -0
  188. xinference/thirdparty/melo/monotonic_align/__init__.py +16 -0
  189. xinference/thirdparty/melo/monotonic_align/core.py +46 -0
  190. xinference/thirdparty/melo/preprocess_text.py +135 -0
  191. xinference/thirdparty/melo/split_utils.py +174 -0
  192. xinference/thirdparty/melo/text/__init__.py +35 -0
  193. xinference/thirdparty/melo/text/chinese.py +199 -0
  194. xinference/thirdparty/melo/text/chinese_bert.py +107 -0
  195. xinference/thirdparty/melo/text/chinese_mix.py +253 -0
  196. xinference/thirdparty/melo/text/cleaner.py +36 -0
  197. xinference/thirdparty/melo/text/cleaner_multiling.py +110 -0
  198. xinference/thirdparty/melo/text/cmudict.rep +129530 -0
  199. xinference/thirdparty/melo/text/cmudict_cache.pickle +0 -0
  200. xinference/thirdparty/melo/text/english.py +284 -0
  201. xinference/thirdparty/melo/text/english_bert.py +39 -0
  202. xinference/thirdparty/melo/text/english_utils/abbreviations.py +35 -0
  203. xinference/thirdparty/melo/text/english_utils/number_norm.py +97 -0
  204. xinference/thirdparty/melo/text/english_utils/time_norm.py +47 -0
  205. xinference/thirdparty/melo/text/es_phonemizer/base.py +140 -0
  206. xinference/thirdparty/melo/text/es_phonemizer/cleaner.py +109 -0
  207. xinference/thirdparty/melo/text/es_phonemizer/es_symbols.json +79 -0
  208. xinference/thirdparty/melo/text/es_phonemizer/es_symbols.txt +1 -0
  209. xinference/thirdparty/melo/text/es_phonemizer/es_symbols_v2.json +83 -0
  210. xinference/thirdparty/melo/text/es_phonemizer/es_to_ipa.py +12 -0
  211. xinference/thirdparty/melo/text/es_phonemizer/example_ipa.txt +400 -0
  212. xinference/thirdparty/melo/text/es_phonemizer/gruut_wrapper.py +253 -0
  213. xinference/thirdparty/melo/text/es_phonemizer/punctuation.py +174 -0
  214. xinference/thirdparty/melo/text/es_phonemizer/spanish_symbols.txt +1 -0
  215. xinference/thirdparty/melo/text/es_phonemizer/test.ipynb +124 -0
  216. xinference/thirdparty/melo/text/fr_phonemizer/base.py +140 -0
  217. xinference/thirdparty/melo/text/fr_phonemizer/cleaner.py +122 -0
  218. xinference/thirdparty/melo/text/fr_phonemizer/en_symbols.json +78 -0
  219. xinference/thirdparty/melo/text/fr_phonemizer/example_ipa.txt +1 -0
  220. xinference/thirdparty/melo/text/fr_phonemizer/fr_symbols.json +89 -0
  221. xinference/thirdparty/melo/text/fr_phonemizer/fr_to_ipa.py +30 -0
  222. xinference/thirdparty/melo/text/fr_phonemizer/french_abbreviations.py +48 -0
  223. xinference/thirdparty/melo/text/fr_phonemizer/french_symbols.txt +1 -0
  224. xinference/thirdparty/melo/text/fr_phonemizer/gruut_wrapper.py +258 -0
  225. xinference/thirdparty/melo/text/fr_phonemizer/punctuation.py +172 -0
  226. xinference/thirdparty/melo/text/french.py +94 -0
  227. xinference/thirdparty/melo/text/french_bert.py +39 -0
  228. xinference/thirdparty/melo/text/japanese.py +647 -0
  229. xinference/thirdparty/melo/text/japanese_bert.py +49 -0
  230. xinference/thirdparty/melo/text/ko_dictionary.py +44 -0
  231. xinference/thirdparty/melo/text/korean.py +192 -0
  232. xinference/thirdparty/melo/text/opencpop-strict.txt +429 -0
  233. xinference/thirdparty/melo/text/spanish.py +122 -0
  234. xinference/thirdparty/melo/text/spanish_bert.py +39 -0
  235. xinference/thirdparty/melo/text/symbols.py +290 -0
  236. xinference/thirdparty/melo/text/tone_sandhi.py +769 -0
  237. xinference/thirdparty/melo/train.py +635 -0
  238. xinference/thirdparty/melo/train.sh +19 -0
  239. xinference/thirdparty/melo/transforms.py +209 -0
  240. xinference/thirdparty/melo/utils.py +424 -0
  241. xinference/types.py +15 -0
  242. xinference/web/ui/build/asset-manifest.json +6 -6
  243. xinference/web/ui/build/index.html +1 -1
  244. xinference/web/ui/build/static/css/main.51a587ff.css +2 -0
  245. xinference/web/ui/build/static/css/main.51a587ff.css.map +1 -0
  246. xinference/web/ui/build/static/js/main.b0936c54.js +3 -0
  247. xinference/web/ui/build/static/js/main.b0936c54.js.map +1 -0
  248. xinference/web/ui/node_modules/.cache/babel-loader/03c4052f1b91f6ba0c5389bdcf49c43319b4076c08e4b8585dab312538ae290a.json +1 -0
  249. xinference/web/ui/node_modules/.cache/babel-loader/1786b83003b8e9605a0f5f855a185d4d16e38fc893dfb326a2a9cca206b4240a.json +1 -0
  250. xinference/web/ui/node_modules/.cache/babel-loader/17cbc181dd674b9150b80c73ed6a82656de0082d857f6e5f66d9716129ac0b38.json +1 -0
  251. xinference/web/ui/node_modules/.cache/babel-loader/185ceb8872d562e032b47e79df6a45670e06345b8ed70aad1a131e0476783c5c.json +1 -0
  252. xinference/web/ui/node_modules/.cache/babel-loader/26b8c9f34b0bed789b3a833767672e39302d1e0c09b4276f4d58d1df7b6bd93b.json +1 -0
  253. xinference/web/ui/node_modules/.cache/babel-loader/2b484da66c724d0d56a40849c109327408796a668b1381511b6e9e03baa48658.json +1 -0
  254. xinference/web/ui/node_modules/.cache/babel-loader/2cbbbce9b84df73330d4c42b82436ed881b3847628f2fbc346aa62e2859fd88c.json +1 -0
  255. xinference/web/ui/node_modules/.cache/babel-loader/2ec9b14431ed33ce6901bf9f27007be4e6e472709c99d6e22b50ce528e4b78ee.json +1 -0
  256. xinference/web/ui/node_modules/.cache/babel-loader/3b966db018f96be4a055d6ca205f0990d4d0b370e2980c17d8bca2c9a021819c.json +1 -0
  257. xinference/web/ui/node_modules/.cache/babel-loader/3eefb411b24c2b3ce053570ef50daccf154022f0e168be5ed0fec21394baf9f4.json +1 -0
  258. xinference/web/ui/node_modules/.cache/babel-loader/522b229e3cac219123f0d69673f5570e191c2d2a505dc65b312d336eae2279c0.json +1 -0
  259. xinference/web/ui/node_modules/.cache/babel-loader/52e45f17ba300580ea3fcc9f9228ccba194bb092b76f25e9255af311f8b05aab.json +1 -0
  260. xinference/web/ui/node_modules/.cache/babel-loader/5a0bc4631f936459afc1a3b1d3ec2420118b1f00e11f60ccac3e08088f3f27a8.json +1 -0
  261. xinference/web/ui/node_modules/.cache/babel-loader/611fa2c6c53b66039991d06dfb0473b5ab37fc63b4564e0f6e1718523768a045.json +1 -0
  262. xinference/web/ui/node_modules/.cache/babel-loader/6329bc76c406fe5eb305412383fbde5950f847bb5e43261f73f37622c365acb4.json +1 -0
  263. xinference/web/ui/node_modules/.cache/babel-loader/63c8e07687ea53a4f8a910ee5e42e0eb26cd1acbfbe820f3e3248a786ee51401.json +1 -0
  264. xinference/web/ui/node_modules/.cache/babel-loader/69b2d5001684174ec9da57e07914eed3eac4960018bceb6cbfa801d861301d7c.json +1 -0
  265. xinference/web/ui/node_modules/.cache/babel-loader/710c1acda69e561e30a933b98c6a56d50197868b15c21e2aad55ab6d46649eb6.json +1 -0
  266. xinference/web/ui/node_modules/.cache/babel-loader/720deca1fce5a1dc5056048fa8258fd138a82ea855f350b6613f104a73fb761f.json +1 -0
  267. xinference/web/ui/node_modules/.cache/babel-loader/76a23b92d26a499c57e61eea2b895fbc9771bd0849a72e66f8e633192017978b.json +1 -0
  268. xinference/web/ui/node_modules/.cache/babel-loader/858063f23b34dfe600254eb5afd85518b0002ec4b30b7386616c45600826e3b2.json +1 -0
  269. xinference/web/ui/node_modules/.cache/babel-loader/920b82c1c89124cf217109eeedbfcd3aae3b917be50c9dfb6bbb4ce26bdfd2e7.json +1 -0
  270. xinference/web/ui/node_modules/.cache/babel-loader/94d8b7aeb0076f2ce07db598cea0e87b13bc8d5614eb530b8d6e696c2daf6f88.json +1 -0
  271. xinference/web/ui/node_modules/.cache/babel-loader/9e917fe7022d01b2ccbe5cc0ce73d70bb72bee584ff293bad71bdff6695dee28.json +1 -0
  272. xinference/web/ui/node_modules/.cache/babel-loader/9f28fdb8399f1d0474f0aca86f1658dc94f5bf0c90f6146352de150692de8862.json +1 -0
  273. xinference/web/ui/node_modules/.cache/babel-loader/a0dfafa06b2bb7cba8cad41c482503f61944f759f4318139362602ef5cc47ccb.json +1 -0
  274. xinference/web/ui/node_modules/.cache/babel-loader/a3ff866acddf34917a7ee399e0e571a4dfd8ba66d5057db885f243e16a6eb17d.json +1 -0
  275. xinference/web/ui/node_modules/.cache/babel-loader/afb8084f539534cd594755ea2205ecd5bd1f62dddcfdf75a2eace59a28131278.json +1 -0
  276. xinference/web/ui/node_modules/.cache/babel-loader/b57b1438b77294c1f3f6cfce12ac487d8106c6f016975ba0aec94d98997e2e1e.json +1 -0
  277. xinference/web/ui/node_modules/.cache/babel-loader/b9917b0bf8e4d55ccbac1c334aa04d6ff3c5b6ed9e5d38b9ea2c687fa7d3f5a9.json +1 -0
  278. xinference/web/ui/node_modules/.cache/babel-loader/bbcc94b0149963d1d6f267ee1f4f03d3925b758392ce2f516c3fe8af0e0169fc.json +1 -0
  279. xinference/web/ui/node_modules/.cache/babel-loader/bdee44abeadc4abc17d41c52eb49c6e19a4b1a267b6e16876ce91bdeeebfc52d.json +1 -0
  280. xinference/web/ui/node_modules/.cache/babel-loader/beb112b70f4a56db95920a9e20efb6c97c37b68450716730217a9ee1a9ae92be.json +1 -0
  281. xinference/web/ui/node_modules/.cache/babel-loader/c88db97be0cdf440193b3995996e83510a04cb00048135485fc0e26d197e80b5.json +1 -0
  282. xinference/web/ui/node_modules/.cache/babel-loader/d49e5314d34310a62d01a03067ce1bec5da00abce84c5196aa9c6842fa79a430.json +1 -0
  283. xinference/web/ui/node_modules/.cache/babel-loader/d7664d18c4ddbad9c3a6a31b91f7c00fb0dde804608674a9860ee50f33e54708.json +1 -0
  284. xinference/web/ui/node_modules/.cache/babel-loader/d9072c318b819b7c90a0f7e9cc0b6413b4dbeb8e9859898e53d75ea882fcde99.json +1 -0
  285. xinference/web/ui/node_modules/.cache/babel-loader/db16a983bc08a05f0439cc61ca0840e49e1d8400eef678909f16c032a418a3d6.json +1 -0
  286. xinference/web/ui/node_modules/.cache/babel-loader/dc249829767b8abcbc3677e0b07b6d3ecbfdfe6d08cfe23a665eb33373a9aa9d.json +1 -0
  287. xinference/web/ui/node_modules/.cache/babel-loader/e242c583c2dbc2784f0fcf513523975f7d5df447e106c1c17e49e8578a6fc3ed.json +1 -0
  288. xinference/web/ui/node_modules/.cache/babel-loader/eac5f1296513e69e4b96f750ddccd4d0264e2bae4e4c449144e83274a48698d9.json +1 -0
  289. xinference/web/ui/node_modules/.cache/babel-loader/ed57202cb79649bb716400436590245547df241988fc7c8e1d85d132299542d2.json +1 -0
  290. xinference/web/ui/node_modules/.cache/babel-loader/f125bf72e773a14cdaebd0c343e80adb909d12e317ee5c00cd4a57442fbe2c62.json +1 -0
  291. xinference/web/ui/node_modules/.cache/babel-loader/f91af913d7f91c410719ab13136aaed3aaf0f8dda06652f25c42cb5231587398.json +1 -0
  292. xinference/web/ui/node_modules/.package-lock.json +67 -3
  293. xinference/web/ui/node_modules/@babel/runtime/package.json +592 -538
  294. xinference/web/ui/node_modules/html-parse-stringify/package.json +50 -0
  295. xinference/web/ui/node_modules/i18next/dist/esm/package.json +1 -0
  296. xinference/web/ui/node_modules/i18next/package.json +129 -0
  297. xinference/web/ui/node_modules/react-i18next/.eslintrc.json +74 -0
  298. xinference/web/ui/node_modules/react-i18next/dist/es/package.json +1 -0
  299. xinference/web/ui/node_modules/react-i18next/package.json +162 -0
  300. xinference/web/ui/node_modules/void-elements/package.json +34 -0
  301. xinference/web/ui/package-lock.json +69 -3
  302. xinference/web/ui/package.json +2 -0
  303. xinference/web/ui/src/locales/en.json +186 -0
  304. xinference/web/ui/src/locales/zh.json +186 -0
  305. {xinference-1.0.1.dist-info → xinference-1.2.1.dist-info}/METADATA +68 -32
  306. {xinference-1.0.1.dist-info → xinference-1.2.1.dist-info}/RECORD +316 -122
  307. xinference/thirdparty/cosyvoice/bin/export_trt.py +0 -8
  308. xinference/thirdparty/fish_speech/tools/api.py +0 -943
  309. xinference/thirdparty/fish_speech/tools/msgpack_api.py +0 -95
  310. xinference/thirdparty/fish_speech/tools/webui.py +0 -548
  311. xinference/web/ui/build/static/css/main.5061c4c3.css +0 -2
  312. xinference/web/ui/build/static/css/main.5061c4c3.css.map +0 -1
  313. xinference/web/ui/build/static/js/main.2f269bb3.js +0 -3
  314. xinference/web/ui/build/static/js/main.2f269bb3.js.map +0 -1
  315. xinference/web/ui/node_modules/.cache/babel-loader/07ce9e632e6aff24d7aa3ad8e48224433bbfeb0d633fca723453f1fcae0c9f1c.json +0 -1
  316. xinference/web/ui/node_modules/.cache/babel-loader/1130403f9e46f5738a23b45ac59b57de8f360c908c713e2c0670c2cce9bd367a.json +0 -1
  317. xinference/web/ui/node_modules/.cache/babel-loader/131091b25d26b17cdca187d7542a21475c211138d900cf667682260e76ef9463.json +0 -1
  318. xinference/web/ui/node_modules/.cache/babel-loader/1f269fb2a368363c1cb2237825f1dba093b6bdd8c44cc05954fd19ec2c1fff03.json +0 -1
  319. xinference/web/ui/node_modules/.cache/babel-loader/331312668fa8bd3d7401818f4a25fa98135d7f61371cd6bfff78b18cf4fbdd92.json +0 -1
  320. xinference/web/ui/node_modules/.cache/babel-loader/40f17338fc75ae095de7d2b4d8eae0d5ca0193a7e2bcece4ee745b22a7a2f4b7.json +0 -1
  321. xinference/web/ui/node_modules/.cache/babel-loader/4de9a6942c5f1749d6cbfdd54279699975f16016b182848bc253886f52ec2ec3.json +0 -1
  322. xinference/web/ui/node_modules/.cache/babel-loader/822586ed1077201b64b954f12f25e3f9b45678c1acbabe53d8af3ca82ca71f33.json +0 -1
  323. xinference/web/ui/node_modules/.cache/babel-loader/8d33354bd2100c8602afc3341f131a88cc36aaeecd5a4b365ed038514708e350.json +0 -1
  324. xinference/web/ui/node_modules/.cache/babel-loader/9375a35b05d56989b2755bf72161fa707c92f28569d33765a75f91a568fda6e9.json +0 -1
  325. xinference/web/ui/node_modules/.cache/babel-loader/a158a9ffa0c9b169aee53dd4a0c44501a596755b4e4f6ede7746d65a72e2a71f.json +0 -1
  326. xinference/web/ui/node_modules/.cache/babel-loader/bd6ad8159341315a1764c397621a560809f7eb7219ab5174c801fca7e969d943.json +0 -1
  327. xinference/web/ui/node_modules/.cache/babel-loader/c7bf40bab396765f67d0fed627ed3665890608b2d0edaa3e8cb7cfc96310db45.json +0 -1
  328. xinference/web/ui/node_modules/.cache/babel-loader/d6c643278a0b28320e6f33a60f5fb64c053997cbdc39a60e53ccc574688ade9e.json +0 -1
  329. xinference/web/ui/node_modules/.cache/babel-loader/e42b72d4cc1ea412ebecbb8d040dc6c6bfee462c33903c2f1f3facb602ad742e.json +0 -1
  330. xinference/web/ui/node_modules/.cache/babel-loader/e64b7e8cedcf43d4c95deba60ec1341855c887705805bb62431693118b870c69.json +0 -1
  331. xinference/web/ui/node_modules/.cache/babel-loader/f5039ddbeb815c51491a1989532006b96fc3ae49c6c60e3c097f875b4ae915ae.json +0 -1
  332. xinference/web/ui/node_modules/.cache/babel-loader/f72f011744c4649fabddca6f7a9327861ac0a315a89b1a2e62a39774e7863845.json +0 -1
  333. xinference/web/ui/node_modules/.cache/babel-loader/feabb04b4aa507102da0a64398a40818e878fd1df9b75dda8461b3e1e7ff3f11.json +0 -1
  334. /xinference/thirdparty/{cosyvoice/bin → f5_tts}/__init__.py +0 -0
  335. /xinference/thirdparty/{cosyvoice/flow → melo}/__init__.py +0 -0
  336. /xinference/thirdparty/{cosyvoice/hifigan → melo/text/english_utils}/__init__.py +0 -0
  337. /xinference/thirdparty/{cosyvoice/llm → melo/text/es_phonemizer}/__init__.py +0 -0
  338. /xinference/thirdparty/{fish_speech/tools → melo/text/fr_phonemizer}/__init__.py +0 -0
  339. /xinference/web/ui/build/static/js/{main.2f269bb3.js.LICENSE.txt → main.b0936c54.js.LICENSE.txt} +0 -0
  340. {xinference-1.0.1.dist-info → xinference-1.2.1.dist-info}/LICENSE +0 -0
  341. {xinference-1.0.1.dist-info → xinference-1.2.1.dist-info}/WHEEL +0 -0
  342. {xinference-1.0.1.dist-info → xinference-1.2.1.dist-info}/entry_points.txt +0 -0
  343. {xinference-1.0.1.dist-info → xinference-1.2.1.dist-info}/top_level.txt +0 -0
xinference/_compat.py CHANGED
@@ -72,6 +72,7 @@ OpenAIChatCompletionToolParam = create_model_from_typeddict(ChatCompletionToolPa
72
72
  OpenAIChatCompletionNamedToolChoiceParam = create_model_from_typeddict(
73
73
  ChatCompletionNamedToolChoiceParam
74
74
  )
75
+ from openai._types import Body
75
76
 
76
77
 
77
78
  class JSONSchema(BaseModel):
@@ -120,4 +121,5 @@ class CreateChatCompletionOpenAI(BaseModel):
120
121
  tools: Optional[Iterable[OpenAIChatCompletionToolParam]] # type: ignore
121
122
  top_logprobs: Optional[int]
122
123
  top_p: Optional[float]
124
+ extra_body: Optional[Body]
123
125
  user: Optional[str]
xinference/_version.py CHANGED
@@ -8,11 +8,11 @@ import json
8
8
 
9
9
  version_json = '''
10
10
  {
11
- "date": "2024-11-29T16:57:04+0800",
11
+ "date": "2025-01-24T16:52:57+0800",
12
12
  "dirty": false,
13
13
  "error": null,
14
- "full-revisionid": "eb8ddd431f5c5fcb2216e25e0d43745f8455d9b9",
15
- "version": "1.0.1"
14
+ "full-revisionid": "a57b99b07b40d1082f69a8fc5b968d56bc3636bc",
15
+ "version": "1.2.1"
16
16
  }
17
17
  ''' # END VERSION_JSON
18
18
 
@@ -94,9 +94,9 @@ class CreateCompletionRequest(CreateCompletion):
94
94
 
95
95
  class CreateEmbeddingRequest(BaseModel):
96
96
  model: str
97
- input: Union[str, List[str], List[int], List[List[int]]] = Field(
98
- description="The input to embed."
99
- )
97
+ input: Union[
98
+ str, List[str], List[int], List[List[int]], Dict[str, str], List[Dict[str, str]]
99
+ ] = Field(description="The input to embed.")
100
100
  user: Optional[str] = None
101
101
 
102
102
  class Config:
@@ -1214,6 +1214,19 @@ class RESTfulAPI(CancelMixin):
1214
1214
  async def get_address(self) -> JSONResponse:
1215
1215
  return JSONResponse(content=self._supervisor_address)
1216
1216
 
1217
+ async def _get_model_last_error(self, replica_model_uid: bytes, e: Exception):
1218
+ if not isinstance(e, xo.ServerClosed):
1219
+ return e
1220
+ try:
1221
+ model_status = await (await self._get_supervisor_ref()).get_model_status(
1222
+ replica_model_uid.decode("utf-8")
1223
+ )
1224
+ if model_status is not None and model_status.last_error:
1225
+ return Exception(model_status.last_error)
1226
+ except Exception as ex:
1227
+ return ex
1228
+ return e
1229
+
1217
1230
  async def create_completion(self, request: Request) -> Response:
1218
1231
  raw_body = await request.json()
1219
1232
  body = CreateCompletionRequest.parse_obj(raw_body)
@@ -1272,6 +1285,7 @@ class RESTfulAPI(CancelMixin):
1272
1285
  )
1273
1286
  return
1274
1287
  except Exception as ex:
1288
+ ex = await self._get_model_last_error(model.uid, ex)
1275
1289
  logger.exception("Completion stream got an error: %s", ex)
1276
1290
  await self._report_error_event(model_uid, str(ex))
1277
1291
  # https://github.com/openai/openai-python/blob/e0aafc6c1a45334ac889fe3e54957d309c3af93f/src/openai/_streaming.py#L107
@@ -1286,6 +1300,7 @@ class RESTfulAPI(CancelMixin):
1286
1300
  data = await model.generate(body.prompt, kwargs, raw_params=raw_kwargs)
1287
1301
  return Response(data, media_type="application/json")
1288
1302
  except Exception as e:
1303
+ e = await self._get_model_last_error(model.uid, e)
1289
1304
  logger.error(e, exc_info=True)
1290
1305
  await self._report_error_event(model_uid, str(e))
1291
1306
  self.handle_request_limit_error(e)
@@ -1317,14 +1332,11 @@ class RESTfulAPI(CancelMixin):
1317
1332
  try:
1318
1333
  embedding = await model.create_embedding(body.input, **kwargs)
1319
1334
  return Response(embedding, media_type="application/json")
1320
- except RuntimeError as re:
1321
- logger.error(re, exc_info=True)
1322
- await self._report_error_event(model_uid, str(re))
1323
- self.handle_request_limit_error(re)
1324
- raise HTTPException(status_code=400, detail=str(re))
1325
1335
  except Exception as e:
1336
+ e = await self._get_model_last_error(model.uid, e)
1326
1337
  logger.error(e, exc_info=True)
1327
1338
  await self._report_error_event(model_uid, str(e))
1339
+ self.handle_request_limit_error(e)
1328
1340
  raise HTTPException(status_code=500, detail=str(e))
1329
1341
 
1330
1342
  async def convert_ids_to_tokens(self, request: Request) -> Response:
@@ -1352,14 +1364,11 @@ class RESTfulAPI(CancelMixin):
1352
1364
  try:
1353
1365
  decoded_texts = await model.convert_ids_to_tokens(body.input, **kwargs)
1354
1366
  return Response(decoded_texts, media_type="application/json")
1355
- except RuntimeError as re:
1356
- logger.error(re, exc_info=True)
1357
- await self._report_error_event(model_uid, str(re))
1358
- self.handle_request_limit_error(re)
1359
- raise HTTPException(status_code=400, detail=str(re))
1360
1367
  except Exception as e:
1368
+ e = await self._get_model_last_error(model.uid, e)
1361
1369
  logger.error(e, exc_info=True)
1362
1370
  await self._report_error_event(model_uid, str(e))
1371
+ self.handle_request_limit_error(e)
1363
1372
  raise HTTPException(status_code=500, detail=str(e))
1364
1373
 
1365
1374
  async def rerank(self, request: Request) -> Response:
@@ -1393,14 +1402,11 @@ class RESTfulAPI(CancelMixin):
1393
1402
  **parsed_kwargs,
1394
1403
  )
1395
1404
  return Response(scores, media_type="application/json")
1396
- except RuntimeError as re:
1397
- logger.error(re, exc_info=True)
1398
- await self._report_error_event(model_uid, str(re))
1399
- self.handle_request_limit_error(re)
1400
- raise HTTPException(status_code=400, detail=str(re))
1401
1405
  except Exception as e:
1406
+ e = await self._get_model_last_error(model.uid, e)
1402
1407
  logger.error(e, exc_info=True)
1403
1408
  await self._report_error_event(model_uid, str(e))
1409
+ self.handle_request_limit_error(e)
1404
1410
  raise HTTPException(status_code=500, detail=str(e))
1405
1411
 
1406
1412
  async def create_transcriptions(
@@ -1445,13 +1451,11 @@ class RESTfulAPI(CancelMixin):
1445
1451
  **parsed_kwargs,
1446
1452
  )
1447
1453
  return Response(content=transcription, media_type="application/json")
1448
- except RuntimeError as re:
1449
- logger.error(re, exc_info=True)
1450
- await self._report_error_event(model_uid, str(re))
1451
- raise HTTPException(status_code=400, detail=str(re))
1452
1454
  except Exception as e:
1455
+ e = await self._get_model_last_error(model_ref.uid, e)
1453
1456
  logger.error(e, exc_info=True)
1454
1457
  await self._report_error_event(model_uid, str(e))
1458
+ self.handle_request_limit_error(e)
1455
1459
  raise HTTPException(status_code=500, detail=str(e))
1456
1460
 
1457
1461
  async def create_translations(
@@ -1496,13 +1500,11 @@ class RESTfulAPI(CancelMixin):
1496
1500
  **parsed_kwargs,
1497
1501
  )
1498
1502
  return Response(content=translation, media_type="application/json")
1499
- except RuntimeError as re:
1500
- logger.error(re, exc_info=True)
1501
- await self._report_error_event(model_uid, str(re))
1502
- raise HTTPException(status_code=400, detail=str(re))
1503
1503
  except Exception as e:
1504
+ e = await self._get_model_last_error(model_ref.uid, e)
1504
1505
  logger.error(e, exc_info=True)
1505
1506
  await self._report_error_event(model_uid, str(e))
1507
+ self.handle_request_limit_error(e)
1506
1508
  raise HTTPException(status_code=500, detail=str(e))
1507
1509
 
1508
1510
  async def create_speech(
@@ -1558,14 +1560,11 @@ class RESTfulAPI(CancelMixin):
1558
1560
  )
1559
1561
  else:
1560
1562
  return Response(media_type="application/octet-stream", content=out)
1561
- except RuntimeError as re:
1562
- logger.error(re, exc_info=True)
1563
- await self._report_error_event(model_uid, str(re))
1564
- self.handle_request_limit_error(re)
1565
- raise HTTPException(status_code=400, detail=str(re))
1566
1563
  except Exception as e:
1564
+ e = await self._get_model_last_error(model.uid, e)
1567
1565
  logger.error(e, exc_info=True)
1568
1566
  await self._report_error_event(model_uid, str(e))
1567
+ self.handle_request_limit_error(e)
1569
1568
  raise HTTPException(status_code=500, detail=str(e))
1570
1569
 
1571
1570
  async def get_progress(self, request_id: str) -> JSONResponse:
@@ -1611,14 +1610,11 @@ class RESTfulAPI(CancelMixin):
1611
1610
  logger.error(err_str)
1612
1611
  await self._report_error_event(model_uid, err_str)
1613
1612
  raise HTTPException(status_code=409, detail=err_str)
1614
- except RuntimeError as re:
1615
- logger.error(re, exc_info=True)
1616
- await self._report_error_event(model_uid, str(re))
1617
- self.handle_request_limit_error(re)
1618
- raise HTTPException(status_code=400, detail=str(re))
1619
1613
  except Exception as e:
1614
+ e = await self._get_model_last_error(model.uid, e)
1620
1615
  logger.error(e, exc_info=True)
1621
1616
  await self._report_error_event(model_uid, str(e))
1617
+ self.handle_request_limit_error(e)
1622
1618
  raise HTTPException(status_code=500, detail=str(e))
1623
1619
 
1624
1620
  async def sdapi_options(self, request: Request) -> Response:
@@ -1689,14 +1685,11 @@ class RESTfulAPI(CancelMixin):
1689
1685
  **kwargs,
1690
1686
  )
1691
1687
  return Response(content=image_list, media_type="application/json")
1692
- except RuntimeError as re:
1693
- logger.error(re, exc_info=True)
1694
- await self._report_error_event(model_uid, str(re))
1695
- self.handle_request_limit_error(re)
1696
- raise HTTPException(status_code=400, detail=str(re))
1697
1688
  except Exception as e:
1689
+ e = await self._get_model_last_error(model.uid, e)
1698
1690
  logger.error(e, exc_info=True)
1699
1691
  await self._report_error_event(model_uid, str(e))
1692
+ self.handle_request_limit_error(e)
1700
1693
  raise HTTPException(status_code=500, detail=str(e))
1701
1694
 
1702
1695
  async def sdapi_img2img(self, request: Request) -> Response:
@@ -1723,14 +1716,11 @@ class RESTfulAPI(CancelMixin):
1723
1716
  **kwargs,
1724
1717
  )
1725
1718
  return Response(content=image_list, media_type="application/json")
1726
- except RuntimeError as re:
1727
- logger.error(re, exc_info=True)
1728
- await self._report_error_event(model_uid, str(re))
1729
- self.handle_request_limit_error(re)
1730
- raise HTTPException(status_code=400, detail=str(re))
1731
1719
  except Exception as e:
1720
+ e = await self._get_model_last_error(model.uid, e)
1732
1721
  logger.error(e, exc_info=True)
1733
1722
  await self._report_error_event(model_uid, str(e))
1723
+ self.handle_request_limit_error(e)
1734
1724
  raise HTTPException(status_code=500, detail=str(e))
1735
1725
 
1736
1726
  async def create_variations(
@@ -1779,13 +1769,11 @@ class RESTfulAPI(CancelMixin):
1779
1769
  logger.error(err_str)
1780
1770
  await self._report_error_event(model_uid, err_str)
1781
1771
  raise HTTPException(status_code=409, detail=err_str)
1782
- except RuntimeError as re:
1783
- logger.error(re, exc_info=True)
1784
- await self._report_error_event(model_uid, str(re))
1785
- raise HTTPException(status_code=400, detail=str(re))
1786
1772
  except Exception as e:
1773
+ e = await self._get_model_last_error(model_ref.uid, e)
1787
1774
  logger.error(e, exc_info=True)
1788
1775
  await self._report_error_event(model_uid, str(e))
1776
+ self.handle_request_limit_error(e)
1789
1777
  raise HTTPException(status_code=500, detail=str(e))
1790
1778
 
1791
1779
  async def create_inpainting(
@@ -1841,13 +1829,11 @@ class RESTfulAPI(CancelMixin):
1841
1829
  logger.error(err_str)
1842
1830
  await self._report_error_event(model_uid, err_str)
1843
1831
  raise HTTPException(status_code=409, detail=err_str)
1844
- except RuntimeError as re:
1845
- logger.error(re, exc_info=True)
1846
- await self._report_error_event(model_uid, str(re))
1847
- raise HTTPException(status_code=400, detail=str(re))
1848
1832
  except Exception as e:
1833
+ e = await self._get_model_last_error(model_ref.uid, e)
1849
1834
  logger.error(e, exc_info=True)
1850
1835
  await self._report_error_event(model_uid, str(e))
1836
+ self.handle_request_limit_error(e)
1851
1837
  raise HTTPException(status_code=500, detail=str(e))
1852
1838
 
1853
1839
  async def create_ocr(
@@ -1887,13 +1873,11 @@ class RESTfulAPI(CancelMixin):
1887
1873
  logger.error(err_str)
1888
1874
  await self._report_error_event(model_uid, err_str)
1889
1875
  raise HTTPException(status_code=409, detail=err_str)
1890
- except RuntimeError as re:
1891
- logger.error(re, exc_info=True)
1892
- await self._report_error_event(model_uid, str(re))
1893
- raise HTTPException(status_code=400, detail=str(re))
1894
1876
  except Exception as e:
1877
+ e = await self._get_model_last_error(model_ref.uid, e)
1895
1878
  logger.error(e, exc_info=True)
1896
1879
  await self._report_error_event(model_uid, str(e))
1880
+ self.handle_request_limit_error(e)
1897
1881
  raise HTTPException(status_code=500, detail=str(e))
1898
1882
 
1899
1883
  async def create_flexible_infer(self, request: Request) -> Response:
@@ -1920,14 +1904,11 @@ class RESTfulAPI(CancelMixin):
1920
1904
  try:
1921
1905
  result = await model.infer(**kwargs)
1922
1906
  return Response(result, media_type="application/json")
1923
- except RuntimeError as re:
1924
- logger.error(re, exc_info=True)
1925
- await self._report_error_event(model_uid, str(re))
1926
- self.handle_request_limit_error(re)
1927
- raise HTTPException(status_code=400, detail=str(re))
1928
1907
  except Exception as e:
1908
+ e = await self._get_model_last_error(model.uid, e)
1929
1909
  logger.error(e, exc_info=True)
1930
1910
  await self._report_error_event(model_uid, str(e))
1911
+ self.handle_request_limit_error(e)
1931
1912
  raise HTTPException(status_code=500, detail=str(e))
1932
1913
 
1933
1914
  async def create_videos(self, request: Request) -> Response:
@@ -1952,14 +1933,11 @@ class RESTfulAPI(CancelMixin):
1952
1933
  **kwargs,
1953
1934
  )
1954
1935
  return Response(content=video_list, media_type="application/json")
1955
- except RuntimeError as re:
1956
- logger.error(re, exc_info=True)
1957
- await self._report_error_event(model_uid, str(re))
1958
- self.handle_request_limit_error(re)
1959
- raise HTTPException(status_code=400, detail=str(re))
1960
1936
  except Exception as e:
1937
+ e = await self._get_model_last_error(model.uid, e)
1961
1938
  logger.error(e, exc_info=True)
1962
1939
  await self._report_error_event(model_uid, str(e))
1940
+ self.handle_request_limit_error(e)
1963
1941
  raise HTTPException(status_code=500, detail=str(e))
1964
1942
 
1965
1943
  async def create_chat_completion(self, request: Request) -> Response:
@@ -2044,7 +2022,6 @@ class RESTfulAPI(CancelMixin):
2044
2022
  )
2045
2023
  if body.tools and body.stream:
2046
2024
  is_vllm = await model.is_vllm_backend()
2047
-
2048
2025
  if not (
2049
2026
  (is_vllm and model_family in QWEN_TOOL_CALL_FAMILY)
2050
2027
  or (not is_vllm and model_family in GLM4_TOOL_CALL_FAMILY)
@@ -2054,7 +2031,8 @@ class RESTfulAPI(CancelMixin):
2054
2031
  detail="Streaming support for tool calls is available only when using "
2055
2032
  "Qwen models with vLLM backend or GLM4-chat models without vLLM backend.",
2056
2033
  )
2057
-
2034
+ if "skip_special_tokens" in raw_kwargs and await model.is_vllm_backend():
2035
+ kwargs["skip_special_tokens"] = raw_kwargs["skip_special_tokens"]
2058
2036
  if body.stream:
2059
2037
 
2060
2038
  async def stream_results():
@@ -2084,6 +2062,7 @@ class RESTfulAPI(CancelMixin):
2084
2062
  # TODO: Cannot yield here. Yield here would leads to error for the next streaming request.
2085
2063
  return
2086
2064
  except Exception as ex:
2065
+ ex = await self._get_model_last_error(model.uid, ex)
2087
2066
  logger.exception("Chat completion stream got an error: %s", ex)
2088
2067
  await self._report_error_event(model_uid, str(ex))
2089
2068
  # https://github.com/openai/openai-python/blob/e0aafc6c1a45334ac889fe3e54957d309c3af93f/src/openai/_streaming.py#L107
@@ -2102,6 +2081,7 @@ class RESTfulAPI(CancelMixin):
2102
2081
  )
2103
2082
  return Response(content=data, media_type="application/json")
2104
2083
  except Exception as e:
2084
+ e = await self._get_model_last_error(model.uid, e)
2105
2085
  logger.error(e, exc_info=True)
2106
2086
  await self._report_error_event(model_uid, str(e))
2107
2087
  self.handle_request_limit_error(e)
@@ -2346,7 +2326,8 @@ class RESTfulAPI(CancelMixin):
2346
2326
  @staticmethod
2347
2327
  def extract_guided_params(raw_body: dict) -> dict:
2348
2328
  kwargs = {}
2349
- if raw_body.get("guided_json") is not None:
2329
+ raw_extra_body: dict = raw_body.get("extra_body") # type: ignore
2330
+ if raw_body.get("guided_json"):
2350
2331
  kwargs["guided_json"] = raw_body.get("guided_json")
2351
2332
  if raw_body.get("guided_regex") is not None:
2352
2333
  kwargs["guided_regex"] = raw_body.get("guided_regex")
@@ -2362,6 +2343,31 @@ class RESTfulAPI(CancelMixin):
2362
2343
  kwargs["guided_whitespace_pattern"] = raw_body.get(
2363
2344
  "guided_whitespace_pattern"
2364
2345
  )
2346
+ # Parse OpenAI extra_body
2347
+ if raw_extra_body is not None:
2348
+ if raw_extra_body.get("guided_json"):
2349
+ kwargs["guided_json"] = raw_extra_body.get("guided_json")
2350
+ if raw_extra_body.get("guided_regex") is not None:
2351
+ kwargs["guided_regex"] = raw_extra_body.get("guided_regex")
2352
+ if raw_extra_body.get("guided_choice") is not None:
2353
+ kwargs["guided_choice"] = raw_extra_body.get("guided_choice")
2354
+ if raw_extra_body.get("guided_grammar") is not None:
2355
+ kwargs["guided_grammar"] = raw_extra_body.get("guided_grammar")
2356
+ if raw_extra_body.get("guided_json_object") is not None:
2357
+ kwargs["guided_json_object"] = raw_extra_body.get("guided_json_object")
2358
+ if raw_extra_body.get("guided_decoding_backend") is not None:
2359
+ kwargs["guided_decoding_backend"] = raw_extra_body.get(
2360
+ "guided_decoding_backend"
2361
+ )
2362
+ if raw_extra_body.get("guided_whitespace_pattern") is not None:
2363
+ kwargs["guided_whitespace_pattern"] = raw_extra_body.get(
2364
+ "guided_whitespace_pattern"
2365
+ )
2366
+ if raw_extra_body.get("platform") is not None:
2367
+ kwargs["platform"] = raw_extra_body.get("platform")
2368
+ if raw_extra_body.get("format") is not None:
2369
+ kwargs["format"] = raw_extra_body.get("format")
2370
+
2365
2371
  return kwargs
2366
2372
 
2367
2373
 
@@ -13,6 +13,7 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import base64
16
+ import html
16
17
  import logging
17
18
  import os
18
19
  from io import BytesIO
@@ -137,7 +138,11 @@ class GradioInterface:
137
138
  if "content" not in delta:
138
139
  continue
139
140
  else:
140
- response_content += delta["content"]
141
+ # some model like deepseek-r1-distill-qwen
142
+ # will generate <think>...</think> ...
143
+ # in gradio, no output will be rendered,
144
+ # thus escape html tags in advance
145
+ response_content += html.escape(delta["content"])
141
146
  yield response_content
142
147
 
143
148
  yield response_content
xinference/core/model.py CHANGED
@@ -35,6 +35,7 @@ from typing import (
35
35
  List,
36
36
  Optional,
37
37
  Union,
38
+ no_type_check,
38
39
  )
39
40
 
40
41
  import sse_starlette.sse
@@ -78,6 +79,9 @@ XINFERENCE_BATCHING_ALLOWED_VISION_MODELS = [
78
79
  ]
79
80
 
80
81
  XINFERENCE_TEXT_TO_IMAGE_BATCHING_ALLOWED_MODELS = ["FLUX.1-dev", "FLUX.1-schnell"]
82
+ XINFERENCE_TEST_OUT_OF_MEMORY_ERROR = bool(
83
+ os.getenv("XINFERENCE_TEST_OUT_OF_MEMORY_ERROR", False)
84
+ )
81
85
 
82
86
 
83
87
  def request_limit(fn):
@@ -118,20 +122,25 @@ def request_limit(fn):
118
122
 
119
123
  def oom_check(fn):
120
124
  @functools.wraps(fn)
121
- def _wrapper(*args, **kwargs):
125
+ def _wrapper(self, *args, **kwargs):
122
126
  try:
123
- return fn(*args, **kwargs)
124
- except OutOfMemoryError:
125
- logger.exception("Model actor is out of memory.")
126
- os._exit(1)
127
+ if XINFERENCE_TEST_OUT_OF_MEMORY_ERROR:
128
+ raise OutOfMemoryError("Test Out of Memory Error")
129
+ return fn(self, *args, **kwargs)
130
+ except OutOfMemoryError as ex:
131
+ assert self._loop is not None
132
+ asyncio.run_coroutine_threadsafe(
133
+ self._handle_oom_error(ex), loop=self._loop
134
+ )
127
135
 
128
136
  @functools.wraps(fn)
129
- async def _async_wrapper(*args, **kwargs):
137
+ async def _async_wrapper(self, *args, **kwargs):
130
138
  try:
131
- return await fn(*args, **kwargs)
132
- except OutOfMemoryError:
133
- logger.exception("Model actor is out of memory.")
134
- os._exit(1)
139
+ if XINFERENCE_TEST_OUT_OF_MEMORY_ERROR:
140
+ raise OutOfMemoryError("Test Out of Memory Error")
141
+ return await fn(self, *args, **kwargs)
142
+ except OutOfMemoryError as ex:
143
+ await self._handle_oom_error(ex)
135
144
 
136
145
  assert not inspect.isasyncgen(fn)
137
146
  assert not inspect.isgenerator(fn)
@@ -178,6 +187,16 @@ class ModelActor(xo.StatelessActor, CancelMixin):
178
187
  if hasattr(self._model, "stop") and callable(self._model.stop):
179
188
  self._model.stop()
180
189
 
190
+ if isinstance(self._model, LLMVLLMModel):
191
+ if self._transfer_ref is not None:
192
+ try:
193
+ await xo.destroy_actor(self._transfer_ref)
194
+ del self._transfer_ref
195
+ except Exception as e:
196
+ logger.debug(
197
+ f"Destroy transfer actor failed, address: {self.address}, error: {e}"
198
+ )
199
+
181
200
  if (
182
201
  isinstance(self._model, (LLMPytorchModel, LLMVLLMModel, SGLANGModel))
183
202
  and self._model.model_spec.model_format == "pytorch"
@@ -206,6 +225,7 @@ class ModelActor(xo.StatelessActor, CancelMixin):
206
225
  replica_model_uid: str,
207
226
  model_description: Optional["ModelDescription"] = None,
208
227
  request_limits: Optional[int] = None,
228
+ xavier_config: Optional[Dict] = None,
209
229
  ):
210
230
  super().__init__()
211
231
  from ..model.llm.lmdeploy.core import LMDeployModel
@@ -247,6 +267,11 @@ class ModelActor(xo.StatelessActor, CancelMixin):
247
267
  self._scheduler_ref = None
248
268
  self._text_to_image_scheduler_ref = None
249
269
 
270
+ if isinstance(self._model, VLLMModel):
271
+ self._xavier_config = xavier_config
272
+ self._model.set_xavier_config(xavier_config)
273
+ self._transfer_ref = None
274
+
250
275
  async def __post_create__(self):
251
276
  self._loop = asyncio.get_running_loop()
252
277
 
@@ -278,6 +303,29 @@ class ModelActor(xo.StatelessActor, CancelMixin):
278
303
  def decrease_serve_count(self):
279
304
  self._serve_count -= 1
280
305
 
306
+ @no_type_check
307
+ async def start_transfer_for_vllm(self, rank_addresses: List[str]):
308
+ from ..model.llm.vllm.core import VLLMModel
309
+ from ..model.llm.vllm.xavier.transfer import TransferActor
310
+
311
+ assert isinstance(self._model, VLLMModel)
312
+ rank = self._xavier_config.get("rank") # type: ignore
313
+ self._transfer_ref = await xo.create_actor(
314
+ TransferActor,
315
+ address=self.address,
316
+ uid=f"{TransferActor.default_uid()}-{rank}",
317
+ rank=rank,
318
+ world_size=self._xavier_config.get("world_size"), # type: ignore
319
+ rank_address=self._xavier_config.get("rank_address"), # type: ignore
320
+ store_address=self._xavier_config.get("store_address"), # type: ignore
321
+ store_port=self._xavier_config.get("store_port"), # type: ignore
322
+ world_addresses=rank_addresses,
323
+ )
324
+ await self._model.init_xavier()
325
+ logger.debug(
326
+ f"Init transfer actor: {self._transfer_ref.address}, rank: {rank} done for vllm." # type: ignore
327
+ )
328
+
281
329
  async def _record_completion_metrics(
282
330
  self, duration, completion_tokens, prompt_tokens
283
331
  ):
@@ -440,11 +488,24 @@ class ModelActor(xo.StatelessActor, CancelMixin):
440
488
  )
441
489
  )
442
490
 
491
+ async def _handle_oom_error(self, ex):
492
+ error_message = (
493
+ f"Model actor is out of memory, model id: {self.model_uid()}, error: {ex}"
494
+ )
495
+ logger.exception(error_message)
496
+ worker_ref = await self._get_worker_ref()
497
+ await worker_ref.update_model_status(
498
+ self._replica_model_uid, last_error=error_message
499
+ )
500
+ os._exit(1)
501
+
443
502
  def _to_generator(self, output_type: str, gen: types.GeneratorType):
444
503
  start_time = time.time()
445
504
  time_to_first_token = None
446
505
  final_usage = None
447
506
  try:
507
+ if XINFERENCE_TEST_OUT_OF_MEMORY_ERROR:
508
+ raise OutOfMemoryError("Test Out of Memory Error")
448
509
  for v in gen:
449
510
  if time_to_first_token is None:
450
511
  time_to_first_token = (time.time() - start_time) * 1000
@@ -456,11 +517,11 @@ class ModelActor(xo.StatelessActor, CancelMixin):
456
517
  output_type == "binary"
457
518
  ), f"Unknown output type '{output_type}'"
458
519
  yield sse_starlette.sse.ensure_bytes(v, None)
459
- except OutOfMemoryError:
460
- logger.exception(
461
- "Model actor is out of memory, model id: %s", self.model_uid()
520
+ except OutOfMemoryError as ex:
521
+ assert self._loop is not None
522
+ asyncio.run_coroutine_threadsafe(
523
+ self._handle_oom_error(ex), loop=self._loop
462
524
  )
463
- os._exit(1)
464
525
  finally:
465
526
  if self._loop is not None and time_to_first_token is not None:
466
527
  coro = self.record_metrics(
@@ -482,6 +543,8 @@ class ModelActor(xo.StatelessActor, CancelMixin):
482
543
  time_to_first_token = None
483
544
  final_usage = None
484
545
  try:
546
+ if XINFERENCE_TEST_OUT_OF_MEMORY_ERROR:
547
+ raise OutOfMemoryError("Test Out of Memory Error")
485
548
  async for v in gen:
486
549
  if time_to_first_token is None:
487
550
  time_to_first_token = (time.time() - start_time) * 1000
@@ -494,11 +557,8 @@ class ModelActor(xo.StatelessActor, CancelMixin):
494
557
  output_type == "binary"
495
558
  ), f"Unknown output type '{output_type}'"
496
559
  yield await asyncio.to_thread(sse_starlette.sse.ensure_bytes, v, None)
497
- except OutOfMemoryError:
498
- logger.exception(
499
- "Model actor is out of memory, model id: %s", self.model_uid()
500
- )
501
- os._exit(1)
560
+ except OutOfMemoryError as ex:
561
+ await self._handle_oom_error(ex)
502
562
  finally:
503
563
  coros = []
504
564
  if time_to_first_token is not None: