xinference 0.16.3__py3-none-any.whl → 1.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (373) hide show
  1. xinference/_compat.py +24 -2
  2. xinference/_version.py +3 -3
  3. xinference/api/restful_api.py +219 -77
  4. xinference/client/restful/restful_client.py +47 -2
  5. xinference/constants.py +1 -0
  6. xinference/core/chat_interface.py +6 -1
  7. xinference/core/model.py +124 -34
  8. xinference/core/supervisor.py +180 -12
  9. xinference/core/utils.py +73 -4
  10. xinference/core/worker.py +102 -4
  11. xinference/deploy/cmdline.py +3 -1
  12. xinference/deploy/test/test_cmdline.py +56 -0
  13. xinference/isolation.py +24 -0
  14. xinference/model/audio/__init__.py +12 -0
  15. xinference/model/audio/core.py +37 -4
  16. xinference/model/audio/cosyvoice.py +39 -6
  17. xinference/model/audio/f5tts.py +200 -0
  18. xinference/model/audio/f5tts_mlx.py +260 -0
  19. xinference/model/audio/fish_speech.py +70 -110
  20. xinference/model/audio/melotts.py +110 -0
  21. xinference/model/audio/model_spec.json +179 -3
  22. xinference/model/audio/model_spec_modelscope.json +27 -0
  23. xinference/model/audio/utils.py +32 -0
  24. xinference/model/audio/whisper.py +35 -10
  25. xinference/model/audio/whisper_mlx.py +208 -0
  26. xinference/model/embedding/core.py +322 -6
  27. xinference/model/embedding/model_spec.json +8 -1
  28. xinference/model/embedding/model_spec_modelscope.json +9 -1
  29. xinference/model/image/core.py +69 -1
  30. xinference/model/image/model_spec.json +145 -4
  31. xinference/model/image/model_spec_modelscope.json +150 -4
  32. xinference/model/image/stable_diffusion/core.py +50 -15
  33. xinference/model/llm/__init__.py +6 -2
  34. xinference/model/llm/llm_family.json +1055 -93
  35. xinference/model/llm/llm_family.py +15 -36
  36. xinference/model/llm/llm_family_modelscope.json +1031 -78
  37. xinference/model/llm/memory.py +1 -1
  38. xinference/model/llm/mlx/core.py +285 -47
  39. xinference/model/llm/sglang/core.py +2 -0
  40. xinference/model/llm/transformers/chatglm.py +9 -5
  41. xinference/model/llm/transformers/cogagent.py +272 -0
  42. xinference/model/llm/transformers/core.py +3 -0
  43. xinference/model/llm/transformers/glm_edge_v.py +230 -0
  44. xinference/model/llm/transformers/qwen2_vl.py +12 -1
  45. xinference/model/llm/transformers/utils.py +16 -8
  46. xinference/model/llm/utils.py +55 -4
  47. xinference/model/llm/vllm/core.py +137 -12
  48. xinference/model/llm/vllm/xavier/__init__.py +13 -0
  49. xinference/model/llm/vllm/xavier/allocator.py +74 -0
  50. xinference/model/llm/vllm/xavier/block.py +111 -0
  51. xinference/model/llm/vllm/xavier/block_manager.py +71 -0
  52. xinference/model/llm/vllm/xavier/block_tracker.py +129 -0
  53. xinference/model/llm/vllm/xavier/collective.py +74 -0
  54. xinference/model/llm/vllm/xavier/collective_manager.py +147 -0
  55. xinference/model/llm/vllm/xavier/engine.py +247 -0
  56. xinference/model/llm/vllm/xavier/executor.py +134 -0
  57. xinference/model/llm/vllm/xavier/scheduler.py +438 -0
  58. xinference/model/llm/vllm/xavier/test/__init__.py +13 -0
  59. xinference/model/llm/vllm/xavier/test/test_xavier.py +147 -0
  60. xinference/model/llm/vllm/xavier/transfer.py +319 -0
  61. xinference/model/rerank/core.py +11 -4
  62. xinference/model/video/diffusers.py +14 -0
  63. xinference/model/video/model_spec.json +15 -0
  64. xinference/model/video/model_spec_modelscope.json +16 -0
  65. xinference/thirdparty/cosyvoice/bin/average_model.py +92 -0
  66. xinference/thirdparty/cosyvoice/bin/export_jit.py +12 -2
  67. xinference/thirdparty/cosyvoice/bin/export_onnx.py +112 -0
  68. xinference/thirdparty/cosyvoice/bin/export_trt.sh +9 -0
  69. xinference/thirdparty/cosyvoice/bin/inference.py +5 -7
  70. xinference/thirdparty/cosyvoice/bin/spk2info.pt +0 -0
  71. xinference/thirdparty/cosyvoice/bin/train.py +42 -8
  72. xinference/thirdparty/cosyvoice/cli/cosyvoice.py +96 -25
  73. xinference/thirdparty/cosyvoice/cli/frontend.py +77 -30
  74. xinference/thirdparty/cosyvoice/cli/model.py +330 -80
  75. xinference/thirdparty/cosyvoice/dataset/dataset.py +6 -2
  76. xinference/thirdparty/cosyvoice/dataset/processor.py +76 -14
  77. xinference/thirdparty/cosyvoice/flow/decoder.py +92 -13
  78. xinference/thirdparty/cosyvoice/flow/flow.py +99 -9
  79. xinference/thirdparty/cosyvoice/flow/flow_matching.py +110 -13
  80. xinference/thirdparty/cosyvoice/flow/length_regulator.py +5 -4
  81. xinference/thirdparty/cosyvoice/hifigan/discriminator.py +140 -0
  82. xinference/thirdparty/cosyvoice/hifigan/generator.py +58 -42
  83. xinference/thirdparty/cosyvoice/hifigan/hifigan.py +67 -0
  84. xinference/thirdparty/cosyvoice/llm/llm.py +139 -6
  85. xinference/thirdparty/cosyvoice/tokenizer/assets/multilingual_zh_ja_yue_char_del.tiktoken +58836 -0
  86. xinference/thirdparty/cosyvoice/tokenizer/tokenizer.py +279 -0
  87. xinference/thirdparty/cosyvoice/transformer/embedding.py +2 -2
  88. xinference/thirdparty/cosyvoice/transformer/encoder_layer.py +7 -7
  89. xinference/thirdparty/cosyvoice/transformer/upsample_encoder.py +318 -0
  90. xinference/thirdparty/cosyvoice/utils/common.py +28 -1
  91. xinference/thirdparty/cosyvoice/utils/executor.py +69 -7
  92. xinference/thirdparty/cosyvoice/utils/file_utils.py +2 -12
  93. xinference/thirdparty/cosyvoice/utils/frontend_utils.py +9 -5
  94. xinference/thirdparty/cosyvoice/utils/losses.py +20 -0
  95. xinference/thirdparty/cosyvoice/utils/scheduler.py +1 -2
  96. xinference/thirdparty/cosyvoice/utils/train_utils.py +101 -45
  97. xinference/thirdparty/f5_tts/api.py +166 -0
  98. xinference/thirdparty/f5_tts/configs/E2TTS_Base_train.yaml +44 -0
  99. xinference/thirdparty/f5_tts/configs/E2TTS_Small_train.yaml +44 -0
  100. xinference/thirdparty/f5_tts/configs/F5TTS_Base_train.yaml +46 -0
  101. xinference/thirdparty/f5_tts/configs/F5TTS_Small_train.yaml +46 -0
  102. xinference/thirdparty/f5_tts/eval/README.md +49 -0
  103. xinference/thirdparty/f5_tts/eval/ecapa_tdnn.py +330 -0
  104. xinference/thirdparty/f5_tts/eval/eval_infer_batch.py +207 -0
  105. xinference/thirdparty/f5_tts/eval/eval_infer_batch.sh +13 -0
  106. xinference/thirdparty/f5_tts/eval/eval_librispeech_test_clean.py +84 -0
  107. xinference/thirdparty/f5_tts/eval/eval_seedtts_testset.py +84 -0
  108. xinference/thirdparty/f5_tts/eval/utils_eval.py +405 -0
  109. xinference/thirdparty/f5_tts/infer/README.md +191 -0
  110. xinference/thirdparty/f5_tts/infer/SHARED.md +74 -0
  111. xinference/thirdparty/f5_tts/infer/examples/basic/basic.toml +11 -0
  112. xinference/thirdparty/f5_tts/infer/examples/basic/basic_ref_en.wav +0 -0
  113. xinference/thirdparty/f5_tts/infer/examples/basic/basic_ref_zh.wav +0 -0
  114. xinference/thirdparty/f5_tts/infer/examples/multi/country.flac +0 -0
  115. xinference/thirdparty/f5_tts/infer/examples/multi/main.flac +0 -0
  116. xinference/thirdparty/f5_tts/infer/examples/multi/story.toml +19 -0
  117. xinference/thirdparty/f5_tts/infer/examples/multi/story.txt +1 -0
  118. xinference/thirdparty/f5_tts/infer/examples/multi/town.flac +0 -0
  119. xinference/thirdparty/f5_tts/infer/examples/vocab.txt +2545 -0
  120. xinference/thirdparty/f5_tts/infer/infer_cli.py +226 -0
  121. xinference/thirdparty/f5_tts/infer/infer_gradio.py +851 -0
  122. xinference/thirdparty/f5_tts/infer/speech_edit.py +193 -0
  123. xinference/thirdparty/f5_tts/infer/utils_infer.py +538 -0
  124. xinference/thirdparty/f5_tts/model/__init__.py +10 -0
  125. xinference/thirdparty/f5_tts/model/backbones/README.md +20 -0
  126. xinference/thirdparty/f5_tts/model/backbones/dit.py +163 -0
  127. xinference/thirdparty/f5_tts/model/backbones/mmdit.py +146 -0
  128. xinference/thirdparty/f5_tts/model/backbones/unett.py +219 -0
  129. xinference/thirdparty/f5_tts/model/cfm.py +285 -0
  130. xinference/thirdparty/f5_tts/model/dataset.py +319 -0
  131. xinference/thirdparty/f5_tts/model/modules.py +658 -0
  132. xinference/thirdparty/f5_tts/model/trainer.py +366 -0
  133. xinference/thirdparty/f5_tts/model/utils.py +185 -0
  134. xinference/thirdparty/f5_tts/scripts/count_max_epoch.py +33 -0
  135. xinference/thirdparty/f5_tts/scripts/count_params_gflops.py +39 -0
  136. xinference/thirdparty/f5_tts/socket_server.py +159 -0
  137. xinference/thirdparty/f5_tts/train/README.md +77 -0
  138. xinference/thirdparty/f5_tts/train/datasets/prepare_csv_wavs.py +139 -0
  139. xinference/thirdparty/f5_tts/train/datasets/prepare_emilia.py +230 -0
  140. xinference/thirdparty/f5_tts/train/datasets/prepare_libritts.py +92 -0
  141. xinference/thirdparty/f5_tts/train/datasets/prepare_ljspeech.py +65 -0
  142. xinference/thirdparty/f5_tts/train/datasets/prepare_wenetspeech4tts.py +125 -0
  143. xinference/thirdparty/f5_tts/train/finetune_cli.py +174 -0
  144. xinference/thirdparty/f5_tts/train/finetune_gradio.py +1846 -0
  145. xinference/thirdparty/f5_tts/train/train.py +75 -0
  146. xinference/thirdparty/fish_speech/fish_speech/conversation.py +266 -1
  147. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/en_US.json +2 -1
  148. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/es_ES.json +2 -1
  149. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/ja_JP.json +2 -2
  150. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/ko_KR.json +123 -0
  151. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/zh_CN.json +2 -1
  152. xinference/thirdparty/fish_speech/fish_speech/models/text2semantic/llama.py +137 -29
  153. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/firefly.py +9 -9
  154. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/fsq.py +1 -1
  155. xinference/thirdparty/fish_speech/fish_speech/text/clean.py +17 -11
  156. xinference/thirdparty/fish_speech/fish_speech/text/spliter.py +1 -1
  157. xinference/thirdparty/fish_speech/fish_speech/tokenizer.py +152 -0
  158. xinference/thirdparty/fish_speech/fish_speech/train.py +2 -2
  159. xinference/thirdparty/fish_speech/fish_speech/utils/__init__.py +2 -1
  160. xinference/thirdparty/fish_speech/fish_speech/utils/utils.py +22 -0
  161. xinference/thirdparty/fish_speech/fish_speech/webui/launch_utils.py +1 -1
  162. xinference/thirdparty/fish_speech/fish_speech/webui/manage.py +2 -2
  163. xinference/thirdparty/fish_speech/tools/{post_api.py → api_client.py} +34 -18
  164. xinference/thirdparty/fish_speech/tools/api_server.py +98 -0
  165. xinference/thirdparty/fish_speech/tools/download_models.py +5 -5
  166. xinference/thirdparty/fish_speech/tools/e2e_webui.py +232 -0
  167. xinference/thirdparty/fish_speech/tools/fish_e2e.py +298 -0
  168. xinference/thirdparty/fish_speech/tools/inference_engine/__init__.py +192 -0
  169. xinference/thirdparty/fish_speech/tools/inference_engine/reference_loader.py +125 -0
  170. xinference/thirdparty/fish_speech/tools/inference_engine/utils.py +39 -0
  171. xinference/thirdparty/fish_speech/tools/inference_engine/vq_manager.py +57 -0
  172. xinference/thirdparty/fish_speech/tools/llama/eval_in_context.py +2 -2
  173. xinference/thirdparty/fish_speech/tools/llama/generate.py +484 -72
  174. xinference/thirdparty/fish_speech/tools/run_webui.py +104 -0
  175. xinference/thirdparty/fish_speech/tools/schema.py +170 -0
  176. xinference/thirdparty/fish_speech/tools/server/agent/__init__.py +57 -0
  177. xinference/thirdparty/fish_speech/tools/server/agent/generate.py +119 -0
  178. xinference/thirdparty/fish_speech/tools/server/agent/generation_utils.py +122 -0
  179. xinference/thirdparty/fish_speech/tools/server/agent/pre_generation_utils.py +72 -0
  180. xinference/thirdparty/fish_speech/tools/server/api_utils.py +75 -0
  181. xinference/thirdparty/fish_speech/tools/server/exception_handler.py +27 -0
  182. xinference/thirdparty/fish_speech/tools/server/inference.py +45 -0
  183. xinference/thirdparty/fish_speech/tools/server/model_manager.py +122 -0
  184. xinference/thirdparty/fish_speech/tools/server/model_utils.py +129 -0
  185. xinference/thirdparty/fish_speech/tools/server/views.py +246 -0
  186. xinference/thirdparty/fish_speech/tools/vqgan/extract_vq.py +7 -1
  187. xinference/thirdparty/fish_speech/tools/vqgan/inference.py +2 -3
  188. xinference/thirdparty/fish_speech/tools/webui/__init__.py +173 -0
  189. xinference/thirdparty/fish_speech/tools/webui/inference.py +91 -0
  190. xinference/thirdparty/fish_speech/tools/webui/variables.py +14 -0
  191. xinference/thirdparty/matcha/utils/utils.py +2 -2
  192. xinference/thirdparty/melo/api.py +135 -0
  193. xinference/thirdparty/melo/app.py +61 -0
  194. xinference/thirdparty/melo/attentions.py +459 -0
  195. xinference/thirdparty/melo/commons.py +160 -0
  196. xinference/thirdparty/melo/configs/config.json +94 -0
  197. xinference/thirdparty/melo/data/example/metadata.list +20 -0
  198. xinference/thirdparty/melo/data_utils.py +413 -0
  199. xinference/thirdparty/melo/download_utils.py +67 -0
  200. xinference/thirdparty/melo/infer.py +25 -0
  201. xinference/thirdparty/melo/init_downloads.py +14 -0
  202. xinference/thirdparty/melo/losses.py +58 -0
  203. xinference/thirdparty/melo/main.py +36 -0
  204. xinference/thirdparty/melo/mel_processing.py +174 -0
  205. xinference/thirdparty/melo/models.py +1030 -0
  206. xinference/thirdparty/melo/modules.py +598 -0
  207. xinference/thirdparty/melo/monotonic_align/__init__.py +16 -0
  208. xinference/thirdparty/melo/monotonic_align/core.py +46 -0
  209. xinference/thirdparty/melo/preprocess_text.py +135 -0
  210. xinference/thirdparty/melo/split_utils.py +174 -0
  211. xinference/thirdparty/melo/text/__init__.py +35 -0
  212. xinference/thirdparty/melo/text/chinese.py +199 -0
  213. xinference/thirdparty/melo/text/chinese_bert.py +107 -0
  214. xinference/thirdparty/melo/text/chinese_mix.py +253 -0
  215. xinference/thirdparty/melo/text/cleaner.py +36 -0
  216. xinference/thirdparty/melo/text/cleaner_multiling.py +110 -0
  217. xinference/thirdparty/melo/text/cmudict.rep +129530 -0
  218. xinference/thirdparty/melo/text/cmudict_cache.pickle +0 -0
  219. xinference/thirdparty/melo/text/english.py +284 -0
  220. xinference/thirdparty/melo/text/english_bert.py +39 -0
  221. xinference/thirdparty/melo/text/english_utils/abbreviations.py +35 -0
  222. xinference/thirdparty/melo/text/english_utils/number_norm.py +97 -0
  223. xinference/thirdparty/melo/text/english_utils/time_norm.py +47 -0
  224. xinference/thirdparty/melo/text/es_phonemizer/base.py +140 -0
  225. xinference/thirdparty/melo/text/es_phonemizer/cleaner.py +109 -0
  226. xinference/thirdparty/melo/text/es_phonemizer/es_symbols.json +79 -0
  227. xinference/thirdparty/melo/text/es_phonemizer/es_symbols.txt +1 -0
  228. xinference/thirdparty/melo/text/es_phonemizer/es_symbols_v2.json +83 -0
  229. xinference/thirdparty/melo/text/es_phonemizer/es_to_ipa.py +12 -0
  230. xinference/thirdparty/melo/text/es_phonemizer/example_ipa.txt +400 -0
  231. xinference/thirdparty/melo/text/es_phonemizer/gruut_wrapper.py +253 -0
  232. xinference/thirdparty/melo/text/es_phonemizer/punctuation.py +174 -0
  233. xinference/thirdparty/melo/text/es_phonemizer/spanish_symbols.txt +1 -0
  234. xinference/thirdparty/melo/text/es_phonemizer/test.ipynb +124 -0
  235. xinference/thirdparty/melo/text/fr_phonemizer/base.py +140 -0
  236. xinference/thirdparty/melo/text/fr_phonemizer/cleaner.py +122 -0
  237. xinference/thirdparty/melo/text/fr_phonemizer/en_symbols.json +78 -0
  238. xinference/thirdparty/melo/text/fr_phonemizer/example_ipa.txt +1 -0
  239. xinference/thirdparty/melo/text/fr_phonemizer/fr_symbols.json +89 -0
  240. xinference/thirdparty/melo/text/fr_phonemizer/fr_to_ipa.py +30 -0
  241. xinference/thirdparty/melo/text/fr_phonemizer/french_abbreviations.py +48 -0
  242. xinference/thirdparty/melo/text/fr_phonemizer/french_symbols.txt +1 -0
  243. xinference/thirdparty/melo/text/fr_phonemizer/gruut_wrapper.py +258 -0
  244. xinference/thirdparty/melo/text/fr_phonemizer/punctuation.py +172 -0
  245. xinference/thirdparty/melo/text/french.py +94 -0
  246. xinference/thirdparty/melo/text/french_bert.py +39 -0
  247. xinference/thirdparty/melo/text/japanese.py +647 -0
  248. xinference/thirdparty/melo/text/japanese_bert.py +49 -0
  249. xinference/thirdparty/melo/text/ko_dictionary.py +44 -0
  250. xinference/thirdparty/melo/text/korean.py +192 -0
  251. xinference/thirdparty/melo/text/opencpop-strict.txt +429 -0
  252. xinference/thirdparty/melo/text/spanish.py +122 -0
  253. xinference/thirdparty/melo/text/spanish_bert.py +39 -0
  254. xinference/thirdparty/melo/text/symbols.py +290 -0
  255. xinference/thirdparty/melo/text/tone_sandhi.py +769 -0
  256. xinference/thirdparty/melo/train.py +635 -0
  257. xinference/thirdparty/melo/train.sh +19 -0
  258. xinference/thirdparty/melo/transforms.py +209 -0
  259. xinference/thirdparty/melo/utils.py +424 -0
  260. xinference/types.py +17 -1
  261. xinference/web/ui/build/asset-manifest.json +6 -6
  262. xinference/web/ui/build/index.html +1 -1
  263. xinference/web/ui/build/static/css/main.51a587ff.css +2 -0
  264. xinference/web/ui/build/static/css/main.51a587ff.css.map +1 -0
  265. xinference/web/ui/build/static/js/main.b0936c54.js +3 -0
  266. xinference/web/ui/build/static/js/main.b0936c54.js.map +1 -0
  267. xinference/web/ui/node_modules/.cache/babel-loader/03c4052f1b91f6ba0c5389bdcf49c43319b4076c08e4b8585dab312538ae290a.json +1 -0
  268. xinference/web/ui/node_modules/.cache/babel-loader/1786b83003b8e9605a0f5f855a185d4d16e38fc893dfb326a2a9cca206b4240a.json +1 -0
  269. xinference/web/ui/node_modules/.cache/babel-loader/17cbc181dd674b9150b80c73ed6a82656de0082d857f6e5f66d9716129ac0b38.json +1 -0
  270. xinference/web/ui/node_modules/.cache/babel-loader/185ceb8872d562e032b47e79df6a45670e06345b8ed70aad1a131e0476783c5c.json +1 -0
  271. xinference/web/ui/node_modules/.cache/babel-loader/26b8c9f34b0bed789b3a833767672e39302d1e0c09b4276f4d58d1df7b6bd93b.json +1 -0
  272. xinference/web/ui/node_modules/.cache/babel-loader/2b484da66c724d0d56a40849c109327408796a668b1381511b6e9e03baa48658.json +1 -0
  273. xinference/web/ui/node_modules/.cache/babel-loader/2cbbbce9b84df73330d4c42b82436ed881b3847628f2fbc346aa62e2859fd88c.json +1 -0
  274. xinference/web/ui/node_modules/.cache/babel-loader/2ec9b14431ed33ce6901bf9f27007be4e6e472709c99d6e22b50ce528e4b78ee.json +1 -0
  275. xinference/web/ui/node_modules/.cache/babel-loader/3b966db018f96be4a055d6ca205f0990d4d0b370e2980c17d8bca2c9a021819c.json +1 -0
  276. xinference/web/ui/node_modules/.cache/babel-loader/3eefb411b24c2b3ce053570ef50daccf154022f0e168be5ed0fec21394baf9f4.json +1 -0
  277. xinference/web/ui/node_modules/.cache/babel-loader/522b229e3cac219123f0d69673f5570e191c2d2a505dc65b312d336eae2279c0.json +1 -0
  278. xinference/web/ui/node_modules/.cache/babel-loader/52e45f17ba300580ea3fcc9f9228ccba194bb092b76f25e9255af311f8b05aab.json +1 -0
  279. xinference/web/ui/node_modules/.cache/babel-loader/5a0bc4631f936459afc1a3b1d3ec2420118b1f00e11f60ccac3e08088f3f27a8.json +1 -0
  280. xinference/web/ui/node_modules/.cache/babel-loader/611fa2c6c53b66039991d06dfb0473b5ab37fc63b4564e0f6e1718523768a045.json +1 -0
  281. xinference/web/ui/node_modules/.cache/babel-loader/6329bc76c406fe5eb305412383fbde5950f847bb5e43261f73f37622c365acb4.json +1 -0
  282. xinference/web/ui/node_modules/.cache/babel-loader/63c8e07687ea53a4f8a910ee5e42e0eb26cd1acbfbe820f3e3248a786ee51401.json +1 -0
  283. xinference/web/ui/node_modules/.cache/babel-loader/69b2d5001684174ec9da57e07914eed3eac4960018bceb6cbfa801d861301d7c.json +1 -0
  284. xinference/web/ui/node_modules/.cache/babel-loader/710c1acda69e561e30a933b98c6a56d50197868b15c21e2aad55ab6d46649eb6.json +1 -0
  285. xinference/web/ui/node_modules/.cache/babel-loader/720deca1fce5a1dc5056048fa8258fd138a82ea855f350b6613f104a73fb761f.json +1 -0
  286. xinference/web/ui/node_modules/.cache/babel-loader/76a23b92d26a499c57e61eea2b895fbc9771bd0849a72e66f8e633192017978b.json +1 -0
  287. xinference/web/ui/node_modules/.cache/babel-loader/858063f23b34dfe600254eb5afd85518b0002ec4b30b7386616c45600826e3b2.json +1 -0
  288. xinference/web/ui/node_modules/.cache/babel-loader/920b82c1c89124cf217109eeedbfcd3aae3b917be50c9dfb6bbb4ce26bdfd2e7.json +1 -0
  289. xinference/web/ui/node_modules/.cache/babel-loader/94d8b7aeb0076f2ce07db598cea0e87b13bc8d5614eb530b8d6e696c2daf6f88.json +1 -0
  290. xinference/web/ui/node_modules/.cache/babel-loader/9e917fe7022d01b2ccbe5cc0ce73d70bb72bee584ff293bad71bdff6695dee28.json +1 -0
  291. xinference/web/ui/node_modules/.cache/babel-loader/9f28fdb8399f1d0474f0aca86f1658dc94f5bf0c90f6146352de150692de8862.json +1 -0
  292. xinference/web/ui/node_modules/.cache/babel-loader/a0dfafa06b2bb7cba8cad41c482503f61944f759f4318139362602ef5cc47ccb.json +1 -0
  293. xinference/web/ui/node_modules/.cache/babel-loader/a3ff866acddf34917a7ee399e0e571a4dfd8ba66d5057db885f243e16a6eb17d.json +1 -0
  294. xinference/web/ui/node_modules/.cache/babel-loader/afb8084f539534cd594755ea2205ecd5bd1f62dddcfdf75a2eace59a28131278.json +1 -0
  295. xinference/web/ui/node_modules/.cache/babel-loader/b57b1438b77294c1f3f6cfce12ac487d8106c6f016975ba0aec94d98997e2e1e.json +1 -0
  296. xinference/web/ui/node_modules/.cache/babel-loader/b9917b0bf8e4d55ccbac1c334aa04d6ff3c5b6ed9e5d38b9ea2c687fa7d3f5a9.json +1 -0
  297. xinference/web/ui/node_modules/.cache/babel-loader/bbcc94b0149963d1d6f267ee1f4f03d3925b758392ce2f516c3fe8af0e0169fc.json +1 -0
  298. xinference/web/ui/node_modules/.cache/babel-loader/bdee44abeadc4abc17d41c52eb49c6e19a4b1a267b6e16876ce91bdeeebfc52d.json +1 -0
  299. xinference/web/ui/node_modules/.cache/babel-loader/beb112b70f4a56db95920a9e20efb6c97c37b68450716730217a9ee1a9ae92be.json +1 -0
  300. xinference/web/ui/node_modules/.cache/babel-loader/c88db97be0cdf440193b3995996e83510a04cb00048135485fc0e26d197e80b5.json +1 -0
  301. xinference/web/ui/node_modules/.cache/babel-loader/d49e5314d34310a62d01a03067ce1bec5da00abce84c5196aa9c6842fa79a430.json +1 -0
  302. xinference/web/ui/node_modules/.cache/babel-loader/d7664d18c4ddbad9c3a6a31b91f7c00fb0dde804608674a9860ee50f33e54708.json +1 -0
  303. xinference/web/ui/node_modules/.cache/babel-loader/d9072c318b819b7c90a0f7e9cc0b6413b4dbeb8e9859898e53d75ea882fcde99.json +1 -0
  304. xinference/web/ui/node_modules/.cache/babel-loader/db16a983bc08a05f0439cc61ca0840e49e1d8400eef678909f16c032a418a3d6.json +1 -0
  305. xinference/web/ui/node_modules/.cache/babel-loader/dc249829767b8abcbc3677e0b07b6d3ecbfdfe6d08cfe23a665eb33373a9aa9d.json +1 -0
  306. xinference/web/ui/node_modules/.cache/babel-loader/e242c583c2dbc2784f0fcf513523975f7d5df447e106c1c17e49e8578a6fc3ed.json +1 -0
  307. xinference/web/ui/node_modules/.cache/babel-loader/eac5f1296513e69e4b96f750ddccd4d0264e2bae4e4c449144e83274a48698d9.json +1 -0
  308. xinference/web/ui/node_modules/.cache/babel-loader/ed57202cb79649bb716400436590245547df241988fc7c8e1d85d132299542d2.json +1 -0
  309. xinference/web/ui/node_modules/.cache/babel-loader/f125bf72e773a14cdaebd0c343e80adb909d12e317ee5c00cd4a57442fbe2c62.json +1 -0
  310. xinference/web/ui/node_modules/.cache/babel-loader/f91af913d7f91c410719ab13136aaed3aaf0f8dda06652f25c42cb5231587398.json +1 -0
  311. xinference/web/ui/node_modules/.package-lock.json +67 -3
  312. xinference/web/ui/node_modules/@babel/runtime/package.json +592 -538
  313. xinference/web/ui/node_modules/html-parse-stringify/package.json +50 -0
  314. xinference/web/ui/node_modules/i18next/dist/esm/package.json +1 -0
  315. xinference/web/ui/node_modules/i18next/package.json +129 -0
  316. xinference/web/ui/node_modules/react-i18next/.eslintrc.json +74 -0
  317. xinference/web/ui/node_modules/react-i18next/dist/es/package.json +1 -0
  318. xinference/web/ui/node_modules/react-i18next/package.json +162 -0
  319. xinference/web/ui/node_modules/void-elements/package.json +34 -0
  320. xinference/web/ui/package-lock.json +69 -3
  321. xinference/web/ui/package.json +2 -0
  322. xinference/web/ui/src/locales/en.json +186 -0
  323. xinference/web/ui/src/locales/zh.json +186 -0
  324. {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/METADATA +96 -36
  325. {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/RECORD +335 -146
  326. {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/WHEEL +1 -1
  327. xinference/thirdparty/cosyvoice/bin/export_trt.py +0 -8
  328. xinference/thirdparty/fish_speech/fish_speech/configs/lora/__init__.py +0 -0
  329. xinference/thirdparty/fish_speech/fish_speech/datasets/__init__.py +0 -0
  330. xinference/thirdparty/fish_speech/fish_speech/datasets/protos/__init__.py +0 -0
  331. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/__init__.py +0 -0
  332. xinference/thirdparty/fish_speech/fish_speech/models/__init__.py +0 -0
  333. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/__init__.py +0 -0
  334. xinference/thirdparty/fish_speech/fish_speech/webui/__init__.py +0 -0
  335. xinference/thirdparty/fish_speech/tools/__init__.py +0 -0
  336. xinference/thirdparty/fish_speech/tools/api.py +0 -440
  337. xinference/thirdparty/fish_speech/tools/commons.py +0 -35
  338. xinference/thirdparty/fish_speech/tools/llama/__init__.py +0 -0
  339. xinference/thirdparty/fish_speech/tools/msgpack_api.py +0 -34
  340. xinference/thirdparty/fish_speech/tools/vqgan/__init__.py +0 -0
  341. xinference/thirdparty/fish_speech/tools/webui.py +0 -485
  342. xinference/web/ui/build/static/css/main.5061c4c3.css +0 -2
  343. xinference/web/ui/build/static/css/main.5061c4c3.css.map +0 -1
  344. xinference/web/ui/build/static/js/main.2f269bb3.js +0 -3
  345. xinference/web/ui/build/static/js/main.2f269bb3.js.map +0 -1
  346. xinference/web/ui/node_modules/.cache/babel-loader/07ce9e632e6aff24d7aa3ad8e48224433bbfeb0d633fca723453f1fcae0c9f1c.json +0 -1
  347. xinference/web/ui/node_modules/.cache/babel-loader/1130403f9e46f5738a23b45ac59b57de8f360c908c713e2c0670c2cce9bd367a.json +0 -1
  348. xinference/web/ui/node_modules/.cache/babel-loader/131091b25d26b17cdca187d7542a21475c211138d900cf667682260e76ef9463.json +0 -1
  349. xinference/web/ui/node_modules/.cache/babel-loader/1f269fb2a368363c1cb2237825f1dba093b6bdd8c44cc05954fd19ec2c1fff03.json +0 -1
  350. xinference/web/ui/node_modules/.cache/babel-loader/331312668fa8bd3d7401818f4a25fa98135d7f61371cd6bfff78b18cf4fbdd92.json +0 -1
  351. xinference/web/ui/node_modules/.cache/babel-loader/40f17338fc75ae095de7d2b4d8eae0d5ca0193a7e2bcece4ee745b22a7a2f4b7.json +0 -1
  352. xinference/web/ui/node_modules/.cache/babel-loader/4de9a6942c5f1749d6cbfdd54279699975f16016b182848bc253886f52ec2ec3.json +0 -1
  353. xinference/web/ui/node_modules/.cache/babel-loader/822586ed1077201b64b954f12f25e3f9b45678c1acbabe53d8af3ca82ca71f33.json +0 -1
  354. xinference/web/ui/node_modules/.cache/babel-loader/8d33354bd2100c8602afc3341f131a88cc36aaeecd5a4b365ed038514708e350.json +0 -1
  355. xinference/web/ui/node_modules/.cache/babel-loader/9375a35b05d56989b2755bf72161fa707c92f28569d33765a75f91a568fda6e9.json +0 -1
  356. xinference/web/ui/node_modules/.cache/babel-loader/a158a9ffa0c9b169aee53dd4a0c44501a596755b4e4f6ede7746d65a72e2a71f.json +0 -1
  357. xinference/web/ui/node_modules/.cache/babel-loader/bd6ad8159341315a1764c397621a560809f7eb7219ab5174c801fca7e969d943.json +0 -1
  358. xinference/web/ui/node_modules/.cache/babel-loader/c7bf40bab396765f67d0fed627ed3665890608b2d0edaa3e8cb7cfc96310db45.json +0 -1
  359. xinference/web/ui/node_modules/.cache/babel-loader/d6c643278a0b28320e6f33a60f5fb64c053997cbdc39a60e53ccc574688ade9e.json +0 -1
  360. xinference/web/ui/node_modules/.cache/babel-loader/e42b72d4cc1ea412ebecbb8d040dc6c6bfee462c33903c2f1f3facb602ad742e.json +0 -1
  361. xinference/web/ui/node_modules/.cache/babel-loader/e64b7e8cedcf43d4c95deba60ec1341855c887705805bb62431693118b870c69.json +0 -1
  362. xinference/web/ui/node_modules/.cache/babel-loader/f5039ddbeb815c51491a1989532006b96fc3ae49c6c60e3c097f875b4ae915ae.json +0 -1
  363. xinference/web/ui/node_modules/.cache/babel-loader/f72f011744c4649fabddca6f7a9327861ac0a315a89b1a2e62a39774e7863845.json +0 -1
  364. xinference/web/ui/node_modules/.cache/babel-loader/feabb04b4aa507102da0a64398a40818e878fd1df9b75dda8461b3e1e7ff3f11.json +0 -1
  365. /xinference/thirdparty/{cosyvoice/bin → f5_tts}/__init__.py +0 -0
  366. /xinference/thirdparty/{cosyvoice/flow → melo}/__init__.py +0 -0
  367. /xinference/thirdparty/{cosyvoice/hifigan → melo/text/english_utils}/__init__.py +0 -0
  368. /xinference/thirdparty/{cosyvoice/llm → melo/text/es_phonemizer}/__init__.py +0 -0
  369. /xinference/thirdparty/{fish_speech/fish_speech/configs → melo/text/fr_phonemizer}/__init__.py +0 -0
  370. /xinference/web/ui/build/static/js/{main.2f269bb3.js.LICENSE.txt → main.b0936c54.js.LICENSE.txt} +0 -0
  371. {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/LICENSE +0 -0
  372. {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/entry_points.txt +0 -0
  373. {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,147 @@
1
+ # Copyright 2022-2025 XProbe Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ import os
15
+ import sys
16
+
17
+ import pytest
18
+ import xoscar as xo
19
+
20
+ from ..block_tracker import VLLMBlockTracker
21
+
22
+
23
+ class ExtendedBlockTracker(VLLMBlockTracker):
24
+ def get_hash_to_rank_and_block_id(self):
25
+ return self._hash_to_rank_and_block_id
26
+
27
+ def get_rank_to_hash_and_block_id(self):
28
+ return self._rank_to_hash_and_block_id
29
+
30
+
31
+ @pytest.fixture
32
+ async def actor_pool_context():
33
+ start_method = (
34
+ os.environ.get("POOL_START_METHOD", "forkserver")
35
+ if sys.platform != "win32"
36
+ else None
37
+ )
38
+ pool = await xo.create_actor_pool(
39
+ "127.0.0.1", n_process=2, subprocess_start_method=start_method
40
+ )
41
+ async with pool:
42
+ yield pool
43
+
44
+
45
+ @pytest.mark.asyncio
46
+ async def test_block_tracker(actor_pool_context):
47
+ actor_pool = actor_pool_context
48
+ addr = actor_pool.external_address
49
+ tracker_ref: xo.ActorRefType[ExtendedBlockTracker] = await xo.create_actor(
50
+ ExtendedBlockTracker,
51
+ address=addr,
52
+ uid=VLLMBlockTracker.default_uid(),
53
+ )
54
+
55
+ virtual_engine = 0
56
+ rank = 0
57
+ block_infos = [(123, 0), (456, 1), (789, 2)]
58
+
59
+ # register blocks
60
+ await tracker_ref.register_blocks(virtual_engine, block_infos, rank)
61
+
62
+ # query blocks
63
+ res = await tracker_ref.query_blocks(virtual_engine, [(123, 4), (789, 5)])
64
+ assert len(res) == 1
65
+ assert rank in res
66
+ assert len(res[rank]) == 2
67
+ assert {x[0] for x in res[rank]} == {123, 789}
68
+ assert {x[1] for x in res[rank]} == {0, 2}
69
+ assert {x[2] for x in res[rank]} == {4, 5}
70
+
71
+ # query with extra info
72
+ res = await tracker_ref.query_blocks(virtual_engine, [(123, 4), (789, 5), (110, 6)])
73
+ assert len(res) == 1
74
+ assert rank in res
75
+ assert len(res[rank]) == 2
76
+ assert {x[0] for x in res[rank]} == {123, 789}
77
+ assert {x[1] for x in res[rank]} == {0, 2}
78
+ assert {x[2] for x in res[rank]} == {4, 5}
79
+
80
+ # unregister block
81
+ await tracker_ref.unregister_block(virtual_engine, rank, 1)
82
+ res = await tracker_ref.query_blocks(virtual_engine, [(123, 4), (456, 7)])
83
+ assert len(res) == 1
84
+ assert rank in res
85
+ assert len(res[rank]) == 1
86
+ assert {x[0] for x in res[rank]} == {123}
87
+ assert {x[1] for x in res[rank]} == {
88
+ 0,
89
+ }
90
+ assert {x[2] for x in res[rank]} == {
91
+ 4,
92
+ }
93
+ # nothing happens
94
+ await tracker_ref.unregister_block(virtual_engine, rank, 3)
95
+ res = await tracker_ref.query_blocks(virtual_engine, [(123, 4), (456, 7)])
96
+ assert len(res) == 1
97
+ assert rank in res
98
+ assert len(res[rank]) == 1
99
+ assert {x[0] for x in res[rank]} == {123}
100
+ assert {x[1] for x in res[rank]} == {
101
+ 0,
102
+ }
103
+ assert {x[2] for x in res[rank]} == {
104
+ 4,
105
+ }
106
+ # query returns empty
107
+ res = await tracker_ref.query_blocks(virtual_engine, [(456, 8)])
108
+ assert res == {}
109
+
110
+ # check internal data
111
+ hash_to_rank_and_block_id = await tracker_ref.get_hash_to_rank_and_block_id()
112
+ assert virtual_engine in hash_to_rank_and_block_id
113
+ assert hash_to_rank_and_block_id[virtual_engine] == {
114
+ 123: {
115
+ (rank, 0),
116
+ },
117
+ 456: set(),
118
+ 789: {(rank, 2)},
119
+ }
120
+
121
+ rank_to_hash_and_block_id = await tracker_ref.get_rank_to_hash_and_block_id()
122
+ assert virtual_engine in rank_to_hash_and_block_id
123
+ assert rank_to_hash_and_block_id[virtual_engine] == {rank: {(123, 0), (789, 2)}}
124
+
125
+ # register blocks
126
+ new_rank = 1
127
+ block_infos = [(111, 7), (222, 8), (333, 9), (123, 10)]
128
+ await tracker_ref.register_blocks(virtual_engine, block_infos, new_rank)
129
+
130
+ # test unregister rank
131
+ await tracker_ref.unregister_rank(0)
132
+ res = await tracker_ref.query_blocks(virtual_engine, [(789, 5)])
133
+ assert len(res) == 0
134
+ res = await tracker_ref.query_blocks(virtual_engine, [(123, 6)])
135
+ assert len(res) == 1
136
+ assert new_rank in res
137
+
138
+ # check internal data
139
+ rank_to_hash_and_block_id = await tracker_ref.get_rank_to_hash_and_block_id()
140
+ assert rank in rank_to_hash_and_block_id[virtual_engine]
141
+ assert new_rank in rank_to_hash_and_block_id[virtual_engine]
142
+
143
+ # test register rank
144
+ await tracker_ref.register_rank(0)
145
+ rank_to_hash_and_block_id = await tracker_ref.get_rank_to_hash_and_block_id()
146
+ assert rank not in rank_to_hash_and_block_id[virtual_engine]
147
+ assert new_rank in rank_to_hash_and_block_id[virtual_engine]
@@ -0,0 +1,319 @@
1
+ # Copyright 2022-2025 XProbe Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ import asyncio
15
+ import logging
16
+ from functools import lru_cache
17
+ from queue import Queue
18
+ from typing import Dict, List, Optional, no_type_check
19
+
20
+ import torch
21
+ import xoscar as xo
22
+ from vllm.core.scheduler import Scheduler
23
+ from vllm.utils import TORCH_DTYPE_TO_NUMPY_DTYPE, Device
24
+ from vllm.worker.cache_engine import CacheEngine
25
+
26
+ from .collective import CollectiveRank
27
+
28
+ logger = logging.getLogger(__name__)
29
+
30
+
31
+ class BufferTransferMixin:
32
+ def __init__(self):
33
+ self.num_buffer: int = 0
34
+ self.buffers: List[torch.Tensor] = []
35
+ self.buffer_queue: Optional[Queue] = None
36
+ self.transfer_block_num = 0
37
+ self.num_attn_layers = 0
38
+
39
+ def init_buffer(
40
+ self, num_buffer: int, buffer_shape, buffer_dtype, buffer_device, pin_memory
41
+ ):
42
+ # (transfer_block_num, num_attn_layers, 2, *kv_cache_shape[2:])
43
+
44
+ if buffer_dtype is torch.bfloat16:
45
+ buffer_dtype = torch.float16
46
+
47
+ self.num_buffer = num_buffer
48
+ self.transfer_block_num = buffer_shape[0]
49
+ self.num_attn_layers = buffer_shape[1]
50
+
51
+ self.buffers = [
52
+ torch.zeros(
53
+ size=buffer_shape,
54
+ dtype=buffer_dtype,
55
+ device=buffer_device,
56
+ pin_memory=pin_memory,
57
+ )
58
+ for _ in range(self.num_buffer)
59
+ ]
60
+
61
+ self.buffer_queue = Queue()
62
+ for i in range(self.num_buffer):
63
+ self.buffer_queue.put_nowait(i)
64
+ logger.debug(
65
+ f"Init buffer done. "
66
+ f"transfer_block_num: {self.transfer_block_num}, "
67
+ f"num_buffer: {self.num_buffer}, "
68
+ f"buffer_dtype: {buffer_dtype}, "
69
+ f"buffer_shape: {buffer_shape}"
70
+ )
71
+
72
+ @no_type_check
73
+ def get_buffer_index(self) -> int:
74
+ return self.buffer_queue.get()
75
+
76
+ @no_type_check
77
+ def free_buffer_index(self, index: int) -> None:
78
+ self.buffer_queue.put_nowait(index)
79
+
80
+ def get_swap_buffer(self, index: int, num_blocks: int) -> torch.Tensor:
81
+ buf = self.buffers[index]
82
+ buffer = buf[:num_blocks].view(
83
+ self.num_attn_layers, 2, num_blocks, *buf.shape[3:]
84
+ )
85
+ return buffer
86
+
87
+ @lru_cache(maxsize=None)
88
+ def get_gloo_dtype(self, input_dtype: torch.dtype):
89
+ from xoscar.collective.common import TypeMappingGloo
90
+
91
+ return TypeMappingGloo[TORCH_DTYPE_TO_NUMPY_DTYPE[input_dtype]]
92
+
93
+
94
+ class TransferActor(xo.StatelessActor, BufferTransferMixin, CollectiveRank):
95
+ @classmethod
96
+ def default_uid(cls):
97
+ return f"vllm-transfer-actor"
98
+
99
+ def __init__(
100
+ self,
101
+ rank: int,
102
+ world_size: int,
103
+ rank_address: str,
104
+ store_address: str,
105
+ store_port: int,
106
+ world_addresses: List[str],
107
+ ):
108
+ super().__init__()
109
+ CollectiveRank.__init__(
110
+ self,
111
+ rank,
112
+ world_size,
113
+ rank_address,
114
+ store_address,
115
+ store_port,
116
+ world_addresses,
117
+ )
118
+ self._cache_engine: Optional[List[CacheEngine]] = None
119
+ self._scheduler: Optional[List[Scheduler]] = None
120
+ self._swap_stream = torch.cuda.Stream()
121
+
122
+ async def __post_create__(self):
123
+ self.init_rank()
124
+
125
+ def setup(
126
+ self,
127
+ cache_engine: List[CacheEngine],
128
+ scheduler: List[Scheduler],
129
+ num_buffer: int,
130
+ buffer_shape,
131
+ buffer_dtype,
132
+ buffer_device,
133
+ pin_memory: bool,
134
+ ):
135
+ self._cache_engine = cache_engine
136
+ self._scheduler = scheduler
137
+ self.init_buffer(
138
+ num_buffer, buffer_shape, buffer_dtype, buffer_device, pin_memory
139
+ )
140
+
141
+ async def __pre_destroy__(self):
142
+ self._context.closeConnections()
143
+
144
+ def _get_cache_engine(self, virtual_engine: int) -> CacheEngine:
145
+ return self._cache_engine[virtual_engine] # type: ignore
146
+
147
+ @staticmethod
148
+ def _get_swap_block_ids(src_to_dst: Dict[int, int], is_sender: bool) -> List[int]:
149
+ return list(sorted([r if is_sender else l for r, l in src_to_dst.items()]))
150
+
151
+ def _swap_out_to_buffer(
152
+ self, cache_engine: CacheEngine, cpu_buf_index: int, block_ids: List[int]
153
+ ) -> torch.Tensor:
154
+ num_blocks = len(block_ids)
155
+ src_to_dst = torch.tensor(
156
+ [(block_num, idx) for idx, block_num in enumerate(block_ids)],
157
+ device="cpu",
158
+ dtype=torch.int64,
159
+ ).view(-1, 2)
160
+ cpu_buf = self.get_swap_buffer(cpu_buf_index, num_blocks)
161
+ with torch.cuda.stream(self._swap_stream):
162
+ for i in range(self.num_attn_layers):
163
+ cache_engine.attn_backend.swap_blocks(
164
+ cache_engine.gpu_cache[i], cpu_buf[i], src_to_dst
165
+ )
166
+ torch.cuda.Stream.synchronize(self._swap_stream)
167
+ return cpu_buf
168
+
169
+ def _swap_in_from_buffer(
170
+ self, cache_engine: CacheEngine, cpu_buf: torch.Tensor, block_ids: List[int]
171
+ ) -> None:
172
+ src_to_dst = torch.tensor(
173
+ [(idx, block_num) for idx, block_num in enumerate(block_ids)],
174
+ device="cpu",
175
+ dtype=torch.int64,
176
+ ).view(-1, 2)
177
+ with torch.cuda.stream(self._swap_stream):
178
+ for i in range(self.num_attn_layers):
179
+ cache_engine.attn_backend.swap_blocks(
180
+ cpu_buf[i], cache_engine.gpu_cache[i], src_to_dst
181
+ )
182
+ torch.cuda.Stream.synchronize(self._swap_stream)
183
+
184
+ def _incr_count_for_block_id(self, virtual_engine: int, block_ids: List[int]):
185
+ """
186
+ The reference count of the `block_id` involved in the transfer is incremented by 1
187
+ to ensure it is not reclaimed.
188
+ """
189
+ scheduler = self._scheduler[virtual_engine] # type: ignore
190
+ gpu_allocator = scheduler.block_manager.block_allocator._allocators[Device.GPU]
191
+
192
+ for _id in block_ids:
193
+ gpu_allocator._refcounter.incr(_id)
194
+
195
+ def _decr_count_for_block_id(self, virtual_engine: int, block_ids: List[int]):
196
+ """
197
+ After the transfer, the reference count is decremented by 1.
198
+ """
199
+ scheduler = self._scheduler[virtual_engine] # type: ignore
200
+ gpu_allocator = scheduler.block_manager.block_allocator._allocators[Device.GPU]
201
+
202
+ for _id in block_ids:
203
+ gpu_allocator._refcounter.decr(_id)
204
+
205
+ async def do_send(
206
+ self, virtual_engine: int, to_rank: int, src_to_dst: Dict[int, int]
207
+ ):
208
+ """
209
+ Sending logic: GPU -> Buffer -> Gloo send.
210
+ GPU -> Buffer is directly handled using the internal `swap_out` interface of vllm.
211
+ """
212
+ from xoscar.collective import xoscar_pygloo as xp
213
+
214
+ cache_engine = self._get_cache_engine(virtual_engine)
215
+
216
+ block_ids = self._get_swap_block_ids(src_to_dst, is_sender=True)
217
+ self._incr_count_for_block_id(virtual_engine, block_ids)
218
+ cpu_buf_index = self.get_buffer_index()
219
+ total_blocks: int = len(block_ids)
220
+
221
+ try:
222
+ for start_idx in range(0, total_blocks, self.transfer_block_num):
223
+ offset = min(self.transfer_block_num, total_blocks - start_idx)
224
+ send_block_ids = block_ids[start_idx : start_idx + offset]
225
+ sendbuf = self._swap_out_to_buffer(
226
+ cache_engine, cpu_buf_index, send_block_ids
227
+ )
228
+ assert sendbuf.is_contiguous()
229
+ sendptr = sendbuf.numpy().ctypes.data
230
+ data_size = sendbuf.numel()
231
+ datatype = self.get_gloo_dtype(sendbuf.dtype)
232
+ peer = to_rank
233
+ xp.send(self._context, sendptr, data_size, datatype, peer)
234
+ finally:
235
+ self._decr_count_for_block_id(virtual_engine, block_ids)
236
+ self.free_buffer_index(cpu_buf_index)
237
+
238
+ async def do_recv(
239
+ self, virtual_engine: int, from_rank: int, src_to_dst: Dict[int, int]
240
+ ):
241
+ """
242
+ Receiving logic: Gloo recv -> Buffer -> GPU.
243
+ Buffer -> GPU is directly handled using the internal `swap_in` interface of vllm.
244
+ """
245
+ from xoscar.collective import xoscar_pygloo as xp
246
+
247
+ cache_engine = self._get_cache_engine(virtual_engine)
248
+
249
+ block_ids = self._get_swap_block_ids(src_to_dst, is_sender=False)
250
+ self._incr_count_for_block_id(virtual_engine, block_ids)
251
+ total_blocks = len(block_ids)
252
+ cpu_buf_index = self.get_buffer_index()
253
+
254
+ try:
255
+ for start_idx in range(0, total_blocks, self.transfer_block_num):
256
+ offset = min(self.transfer_block_num, total_blocks - start_idx)
257
+ recv_block_ids = block_ids[start_idx : start_idx + offset]
258
+ recvbuf = self.get_swap_buffer(cpu_buf_index, len(recv_block_ids))
259
+ assert recvbuf.is_contiguous()
260
+ recvptr = recvbuf.numpy().ctypes.data
261
+ data_size = recvbuf.numel()
262
+ datatype = self.get_gloo_dtype(recvbuf.dtype)
263
+ peer = from_rank
264
+ xp.recv(self._context, recvptr, data_size, datatype, peer)
265
+
266
+ self._swap_in_from_buffer(cache_engine, recvbuf, recv_block_ids)
267
+ finally:
268
+ self._decr_count_for_block_id(virtual_engine, block_ids)
269
+ self.free_buffer_index(cpu_buf_index)
270
+
271
+ async def recv(
272
+ self, virtual_engine: int, from_rank: int, src_to_dst: Dict[int, int]
273
+ ):
274
+ """
275
+ This is the external entry point for the call.
276
+ The transfer logic is as follows:
277
+ the receiver requests the sender to send the data directly to itself in a point-to-point manner.
278
+ """
279
+ from_address = self._world_addresses[from_rank]
280
+ sender_ref = await xo.actor_ref(
281
+ address=from_address, uid=f"{TransferActor.default_uid()}-{from_rank}"
282
+ )
283
+ await asyncio.gather(
284
+ sender_ref.do_send(virtual_engine, self._rank, src_to_dst),
285
+ self.do_recv(virtual_engine, from_rank, src_to_dst),
286
+ )
287
+
288
+
289
+ class Rank0TransferActor(xo.StatelessActor, CollectiveRank):
290
+ """
291
+ The Rank 0 transfer actor is only used for constructing the collective communication world,
292
+ so it only needs to inherit the `CollectiveWorld` class.
293
+ """
294
+
295
+ @classmethod
296
+ def default_uid(cls):
297
+ return f"vllm-transfer-actor"
298
+
299
+ def __init__(
300
+ self,
301
+ rank: int,
302
+ world_size: int,
303
+ rank_address: str,
304
+ store_address: str,
305
+ store_port: int,
306
+ world_addresses: List[str],
307
+ ):
308
+ CollectiveRank.__init__(
309
+ self,
310
+ rank,
311
+ world_size,
312
+ rank_address,
313
+ store_address,
314
+ store_port,
315
+ world_addresses,
316
+ )
317
+
318
+ async def __post_create__(self):
319
+ self.init_rank()
@@ -179,6 +179,7 @@ class RerankModel:
179
179
  return rerank_type
180
180
 
181
181
  def load(self):
182
+ logger.info("Loading rerank model: %s", self._model_path)
182
183
  flash_attn_installed = importlib.util.find_spec("flash_attn") is not None
183
184
  if (
184
185
  self._auto_detect_type(self._model_path) != "normal"
@@ -189,6 +190,7 @@ class RerankModel:
189
190
  "will force set `use_fp16` to True"
190
191
  )
191
192
  self._use_fp16 = True
193
+
192
194
  if self._model_spec.type == "normal":
193
195
  try:
194
196
  import sentence_transformers
@@ -250,22 +252,27 @@ class RerankModel:
250
252
  **kwargs,
251
253
  ) -> Rerank:
252
254
  assert self._model is not None
253
- if kwargs:
254
- raise ValueError("rerank hasn't support extra parameter.")
255
255
  if max_chunks_per_doc is not None:
256
256
  raise ValueError("rerank hasn't support `max_chunks_per_doc` parameter.")
257
+ logger.info("Rerank with kwargs: %s, model: %s", kwargs, self._model)
257
258
  sentence_combinations = [[query, doc] for doc in documents]
258
259
  # reset n tokens
259
260
  self._model.model.n_tokens = 0
260
261
  if self._model_spec.type == "normal":
261
262
  similarity_scores = self._model.predict(
262
- sentence_combinations, convert_to_numpy=False, convert_to_tensor=True
263
+ sentence_combinations,
264
+ convert_to_numpy=False,
265
+ convert_to_tensor=True,
266
+ **kwargs,
263
267
  ).cpu()
264
268
  if similarity_scores.dtype == torch.bfloat16:
265
269
  similarity_scores = similarity_scores.float()
266
270
  else:
267
271
  # Related issue: https://github.com/xorbitsai/inference/issues/1775
268
- similarity_scores = self._model.compute_score(sentence_combinations)
272
+ similarity_scores = self._model.compute_score(
273
+ sentence_combinations, **kwargs
274
+ )
275
+
269
276
  if not isinstance(similarity_scores, Sequence):
270
277
  similarity_scores = [similarity_scores]
271
278
  elif (
@@ -91,6 +91,20 @@ class DiffUsersVideoModel:
91
91
  pipeline = self._model = CogVideoXPipeline.from_pretrained(
92
92
  self._model_path, **kwargs
93
93
  )
94
+ elif self._model_spec.model_family == "HunyuanVideo":
95
+ from diffusers import HunyuanVideoPipeline, HunyuanVideoTransformer3DModel
96
+
97
+ transformer_torch_dtype = kwargs.pop("transformer_torch_dtype")
98
+ if isinstance(transformer_torch_dtype, str):
99
+ transformer_torch_dtype = getattr(torch, transformer_torch_dtype)
100
+ transformer = HunyuanVideoTransformer3DModel.from_pretrained(
101
+ self._model_path,
102
+ subfolder="transformer",
103
+ torch_dtype=transformer_torch_dtype,
104
+ )
105
+ pipeline = self._model = HunyuanVideoPipeline.from_pretrained(
106
+ self._model_path, transformer=transformer, **kwargs
107
+ )
94
108
  else:
95
109
  raise Exception(
96
110
  f"Unsupported model family: {self._model_spec.model_family}"
@@ -30,5 +30,20 @@
30
30
  "default_generate_config": {
31
31
  "guidance_scale": 7
32
32
  }
33
+ },
34
+ {
35
+ "model_name": "HunyuanVideo",
36
+ "model_family": "HunyuanVideo",
37
+ "model_id": "hunyuanvideo-community/HunyuanVideo",
38
+ "model_revision": "e8c2aaa66fe3742a32c11a6766aecbf07c56e773",
39
+ "model_ability": [
40
+ "text2video"
41
+ ],
42
+ "default_model_config": {
43
+ "transformer_torch_dtype": "bfloat16",
44
+ "torch_dtype": "float16"
45
+ },
46
+ "default_generate_config": {
47
+ }
33
48
  }
34
49
  ]
@@ -32,5 +32,21 @@
32
32
  "default_generate_config": {
33
33
  "guidance_scale": 7
34
34
  }
35
+ },
36
+ {
37
+ "model_name": "HunyuanVideo",
38
+ "model_family": "HunyuanVideo",
39
+ "model_hub": "modelscope",
40
+ "model_id": "Xorbits/HunyuanVideo",
41
+ "model_revision": "master",
42
+ "model_ability": [
43
+ "text2video"
44
+ ],
45
+ "default_model_config": {
46
+ "transformer_torch_dtype": "bfloat16",
47
+ "torch_dtype": "float16"
48
+ },
49
+ "default_generate_config": {
50
+ }
35
51
  }
36
52
  ]
@@ -0,0 +1,92 @@
1
+ # Copyright (c) 2020 Mobvoi Inc (Di Wu)
2
+ # Copyright (c) 2024 Alibaba Inc (authors: Xiang Lyu)
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ import os
17
+ import argparse
18
+ import glob
19
+
20
+ import yaml
21
+ import torch
22
+
23
+
24
+ def get_args():
25
+ parser = argparse.ArgumentParser(description='average model')
26
+ parser.add_argument('--dst_model', required=True, help='averaged model')
27
+ parser.add_argument('--src_path',
28
+ required=True,
29
+ help='src model path for average')
30
+ parser.add_argument('--val_best',
31
+ action="store_true",
32
+ help='averaged model')
33
+ parser.add_argument('--num',
34
+ default=5,
35
+ type=int,
36
+ help='nums for averaged model')
37
+
38
+ args = parser.parse_args()
39
+ print(args)
40
+ return args
41
+
42
+
43
+ def main():
44
+ args = get_args()
45
+ val_scores = []
46
+ if args.val_best:
47
+ yamls = glob.glob('{}/*.yaml'.format(args.src_path))
48
+ yamls = [
49
+ f for f in yamls
50
+ if not (os.path.basename(f).startswith('train')
51
+ or os.path.basename(f).startswith('init'))
52
+ ]
53
+ for y in yamls:
54
+ with open(y, 'r') as f:
55
+ dic_yaml = yaml.load(f, Loader=yaml.BaseLoader)
56
+ loss = float(dic_yaml['loss_dict']['loss'])
57
+ epoch = int(dic_yaml['epoch'])
58
+ step = int(dic_yaml['step'])
59
+ tag = dic_yaml['tag']
60
+ val_scores += [[epoch, step, loss, tag]]
61
+ sorted_val_scores = sorted(val_scores,
62
+ key=lambda x: x[2],
63
+ reverse=False)
64
+ print("best val (epoch, step, loss, tag) = " +
65
+ str(sorted_val_scores[:args.num]))
66
+ path_list = [
67
+ args.src_path + '/epoch_{}_whole.pt'.format(score[0])
68
+ for score in sorted_val_scores[:args.num]
69
+ ]
70
+ print(path_list)
71
+ avg = {}
72
+ num = args.num
73
+ assert num == len(path_list)
74
+ for path in path_list:
75
+ print('Processing {}'.format(path))
76
+ states = torch.load(path, map_location=torch.device('cpu'))
77
+ for k in states.keys():
78
+ if k not in avg.keys():
79
+ avg[k] = states[k].clone()
80
+ else:
81
+ avg[k] += states[k]
82
+ # average
83
+ for k in avg.keys():
84
+ if avg[k] is not None:
85
+ # pytorch 1.6 use true_divide instead of /=
86
+ avg[k] = torch.true_divide(avg[k], num)
87
+ print('Saving to {}'.format(args.dst_model))
88
+ torch.save(avg, args.dst_model)
89
+
90
+
91
+ if __name__ == '__main__':
92
+ main()