xinference 0.16.3__py3-none-any.whl → 1.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (373) hide show
  1. xinference/_compat.py +24 -2
  2. xinference/_version.py +3 -3
  3. xinference/api/restful_api.py +219 -77
  4. xinference/client/restful/restful_client.py +47 -2
  5. xinference/constants.py +1 -0
  6. xinference/core/chat_interface.py +6 -1
  7. xinference/core/model.py +124 -34
  8. xinference/core/supervisor.py +180 -12
  9. xinference/core/utils.py +73 -4
  10. xinference/core/worker.py +102 -4
  11. xinference/deploy/cmdline.py +3 -1
  12. xinference/deploy/test/test_cmdline.py +56 -0
  13. xinference/isolation.py +24 -0
  14. xinference/model/audio/__init__.py +12 -0
  15. xinference/model/audio/core.py +37 -4
  16. xinference/model/audio/cosyvoice.py +39 -6
  17. xinference/model/audio/f5tts.py +200 -0
  18. xinference/model/audio/f5tts_mlx.py +260 -0
  19. xinference/model/audio/fish_speech.py +70 -110
  20. xinference/model/audio/melotts.py +110 -0
  21. xinference/model/audio/model_spec.json +179 -3
  22. xinference/model/audio/model_spec_modelscope.json +27 -0
  23. xinference/model/audio/utils.py +32 -0
  24. xinference/model/audio/whisper.py +35 -10
  25. xinference/model/audio/whisper_mlx.py +208 -0
  26. xinference/model/embedding/core.py +322 -6
  27. xinference/model/embedding/model_spec.json +8 -1
  28. xinference/model/embedding/model_spec_modelscope.json +9 -1
  29. xinference/model/image/core.py +69 -1
  30. xinference/model/image/model_spec.json +145 -4
  31. xinference/model/image/model_spec_modelscope.json +150 -4
  32. xinference/model/image/stable_diffusion/core.py +50 -15
  33. xinference/model/llm/__init__.py +6 -2
  34. xinference/model/llm/llm_family.json +1055 -93
  35. xinference/model/llm/llm_family.py +15 -36
  36. xinference/model/llm/llm_family_modelscope.json +1031 -78
  37. xinference/model/llm/memory.py +1 -1
  38. xinference/model/llm/mlx/core.py +285 -47
  39. xinference/model/llm/sglang/core.py +2 -0
  40. xinference/model/llm/transformers/chatglm.py +9 -5
  41. xinference/model/llm/transformers/cogagent.py +272 -0
  42. xinference/model/llm/transformers/core.py +3 -0
  43. xinference/model/llm/transformers/glm_edge_v.py +230 -0
  44. xinference/model/llm/transformers/qwen2_vl.py +12 -1
  45. xinference/model/llm/transformers/utils.py +16 -8
  46. xinference/model/llm/utils.py +55 -4
  47. xinference/model/llm/vllm/core.py +137 -12
  48. xinference/model/llm/vllm/xavier/__init__.py +13 -0
  49. xinference/model/llm/vllm/xavier/allocator.py +74 -0
  50. xinference/model/llm/vllm/xavier/block.py +111 -0
  51. xinference/model/llm/vllm/xavier/block_manager.py +71 -0
  52. xinference/model/llm/vllm/xavier/block_tracker.py +129 -0
  53. xinference/model/llm/vllm/xavier/collective.py +74 -0
  54. xinference/model/llm/vllm/xavier/collective_manager.py +147 -0
  55. xinference/model/llm/vllm/xavier/engine.py +247 -0
  56. xinference/model/llm/vllm/xavier/executor.py +134 -0
  57. xinference/model/llm/vllm/xavier/scheduler.py +438 -0
  58. xinference/model/llm/vllm/xavier/test/__init__.py +13 -0
  59. xinference/model/llm/vllm/xavier/test/test_xavier.py +147 -0
  60. xinference/model/llm/vllm/xavier/transfer.py +319 -0
  61. xinference/model/rerank/core.py +11 -4
  62. xinference/model/video/diffusers.py +14 -0
  63. xinference/model/video/model_spec.json +15 -0
  64. xinference/model/video/model_spec_modelscope.json +16 -0
  65. xinference/thirdparty/cosyvoice/bin/average_model.py +92 -0
  66. xinference/thirdparty/cosyvoice/bin/export_jit.py +12 -2
  67. xinference/thirdparty/cosyvoice/bin/export_onnx.py +112 -0
  68. xinference/thirdparty/cosyvoice/bin/export_trt.sh +9 -0
  69. xinference/thirdparty/cosyvoice/bin/inference.py +5 -7
  70. xinference/thirdparty/cosyvoice/bin/spk2info.pt +0 -0
  71. xinference/thirdparty/cosyvoice/bin/train.py +42 -8
  72. xinference/thirdparty/cosyvoice/cli/cosyvoice.py +96 -25
  73. xinference/thirdparty/cosyvoice/cli/frontend.py +77 -30
  74. xinference/thirdparty/cosyvoice/cli/model.py +330 -80
  75. xinference/thirdparty/cosyvoice/dataset/dataset.py +6 -2
  76. xinference/thirdparty/cosyvoice/dataset/processor.py +76 -14
  77. xinference/thirdparty/cosyvoice/flow/decoder.py +92 -13
  78. xinference/thirdparty/cosyvoice/flow/flow.py +99 -9
  79. xinference/thirdparty/cosyvoice/flow/flow_matching.py +110 -13
  80. xinference/thirdparty/cosyvoice/flow/length_regulator.py +5 -4
  81. xinference/thirdparty/cosyvoice/hifigan/discriminator.py +140 -0
  82. xinference/thirdparty/cosyvoice/hifigan/generator.py +58 -42
  83. xinference/thirdparty/cosyvoice/hifigan/hifigan.py +67 -0
  84. xinference/thirdparty/cosyvoice/llm/llm.py +139 -6
  85. xinference/thirdparty/cosyvoice/tokenizer/assets/multilingual_zh_ja_yue_char_del.tiktoken +58836 -0
  86. xinference/thirdparty/cosyvoice/tokenizer/tokenizer.py +279 -0
  87. xinference/thirdparty/cosyvoice/transformer/embedding.py +2 -2
  88. xinference/thirdparty/cosyvoice/transformer/encoder_layer.py +7 -7
  89. xinference/thirdparty/cosyvoice/transformer/upsample_encoder.py +318 -0
  90. xinference/thirdparty/cosyvoice/utils/common.py +28 -1
  91. xinference/thirdparty/cosyvoice/utils/executor.py +69 -7
  92. xinference/thirdparty/cosyvoice/utils/file_utils.py +2 -12
  93. xinference/thirdparty/cosyvoice/utils/frontend_utils.py +9 -5
  94. xinference/thirdparty/cosyvoice/utils/losses.py +20 -0
  95. xinference/thirdparty/cosyvoice/utils/scheduler.py +1 -2
  96. xinference/thirdparty/cosyvoice/utils/train_utils.py +101 -45
  97. xinference/thirdparty/f5_tts/api.py +166 -0
  98. xinference/thirdparty/f5_tts/configs/E2TTS_Base_train.yaml +44 -0
  99. xinference/thirdparty/f5_tts/configs/E2TTS_Small_train.yaml +44 -0
  100. xinference/thirdparty/f5_tts/configs/F5TTS_Base_train.yaml +46 -0
  101. xinference/thirdparty/f5_tts/configs/F5TTS_Small_train.yaml +46 -0
  102. xinference/thirdparty/f5_tts/eval/README.md +49 -0
  103. xinference/thirdparty/f5_tts/eval/ecapa_tdnn.py +330 -0
  104. xinference/thirdparty/f5_tts/eval/eval_infer_batch.py +207 -0
  105. xinference/thirdparty/f5_tts/eval/eval_infer_batch.sh +13 -0
  106. xinference/thirdparty/f5_tts/eval/eval_librispeech_test_clean.py +84 -0
  107. xinference/thirdparty/f5_tts/eval/eval_seedtts_testset.py +84 -0
  108. xinference/thirdparty/f5_tts/eval/utils_eval.py +405 -0
  109. xinference/thirdparty/f5_tts/infer/README.md +191 -0
  110. xinference/thirdparty/f5_tts/infer/SHARED.md +74 -0
  111. xinference/thirdparty/f5_tts/infer/examples/basic/basic.toml +11 -0
  112. xinference/thirdparty/f5_tts/infer/examples/basic/basic_ref_en.wav +0 -0
  113. xinference/thirdparty/f5_tts/infer/examples/basic/basic_ref_zh.wav +0 -0
  114. xinference/thirdparty/f5_tts/infer/examples/multi/country.flac +0 -0
  115. xinference/thirdparty/f5_tts/infer/examples/multi/main.flac +0 -0
  116. xinference/thirdparty/f5_tts/infer/examples/multi/story.toml +19 -0
  117. xinference/thirdparty/f5_tts/infer/examples/multi/story.txt +1 -0
  118. xinference/thirdparty/f5_tts/infer/examples/multi/town.flac +0 -0
  119. xinference/thirdparty/f5_tts/infer/examples/vocab.txt +2545 -0
  120. xinference/thirdparty/f5_tts/infer/infer_cli.py +226 -0
  121. xinference/thirdparty/f5_tts/infer/infer_gradio.py +851 -0
  122. xinference/thirdparty/f5_tts/infer/speech_edit.py +193 -0
  123. xinference/thirdparty/f5_tts/infer/utils_infer.py +538 -0
  124. xinference/thirdparty/f5_tts/model/__init__.py +10 -0
  125. xinference/thirdparty/f5_tts/model/backbones/README.md +20 -0
  126. xinference/thirdparty/f5_tts/model/backbones/dit.py +163 -0
  127. xinference/thirdparty/f5_tts/model/backbones/mmdit.py +146 -0
  128. xinference/thirdparty/f5_tts/model/backbones/unett.py +219 -0
  129. xinference/thirdparty/f5_tts/model/cfm.py +285 -0
  130. xinference/thirdparty/f5_tts/model/dataset.py +319 -0
  131. xinference/thirdparty/f5_tts/model/modules.py +658 -0
  132. xinference/thirdparty/f5_tts/model/trainer.py +366 -0
  133. xinference/thirdparty/f5_tts/model/utils.py +185 -0
  134. xinference/thirdparty/f5_tts/scripts/count_max_epoch.py +33 -0
  135. xinference/thirdparty/f5_tts/scripts/count_params_gflops.py +39 -0
  136. xinference/thirdparty/f5_tts/socket_server.py +159 -0
  137. xinference/thirdparty/f5_tts/train/README.md +77 -0
  138. xinference/thirdparty/f5_tts/train/datasets/prepare_csv_wavs.py +139 -0
  139. xinference/thirdparty/f5_tts/train/datasets/prepare_emilia.py +230 -0
  140. xinference/thirdparty/f5_tts/train/datasets/prepare_libritts.py +92 -0
  141. xinference/thirdparty/f5_tts/train/datasets/prepare_ljspeech.py +65 -0
  142. xinference/thirdparty/f5_tts/train/datasets/prepare_wenetspeech4tts.py +125 -0
  143. xinference/thirdparty/f5_tts/train/finetune_cli.py +174 -0
  144. xinference/thirdparty/f5_tts/train/finetune_gradio.py +1846 -0
  145. xinference/thirdparty/f5_tts/train/train.py +75 -0
  146. xinference/thirdparty/fish_speech/fish_speech/conversation.py +266 -1
  147. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/en_US.json +2 -1
  148. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/es_ES.json +2 -1
  149. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/ja_JP.json +2 -2
  150. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/ko_KR.json +123 -0
  151. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/zh_CN.json +2 -1
  152. xinference/thirdparty/fish_speech/fish_speech/models/text2semantic/llama.py +137 -29
  153. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/firefly.py +9 -9
  154. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/fsq.py +1 -1
  155. xinference/thirdparty/fish_speech/fish_speech/text/clean.py +17 -11
  156. xinference/thirdparty/fish_speech/fish_speech/text/spliter.py +1 -1
  157. xinference/thirdparty/fish_speech/fish_speech/tokenizer.py +152 -0
  158. xinference/thirdparty/fish_speech/fish_speech/train.py +2 -2
  159. xinference/thirdparty/fish_speech/fish_speech/utils/__init__.py +2 -1
  160. xinference/thirdparty/fish_speech/fish_speech/utils/utils.py +22 -0
  161. xinference/thirdparty/fish_speech/fish_speech/webui/launch_utils.py +1 -1
  162. xinference/thirdparty/fish_speech/fish_speech/webui/manage.py +2 -2
  163. xinference/thirdparty/fish_speech/tools/{post_api.py → api_client.py} +34 -18
  164. xinference/thirdparty/fish_speech/tools/api_server.py +98 -0
  165. xinference/thirdparty/fish_speech/tools/download_models.py +5 -5
  166. xinference/thirdparty/fish_speech/tools/e2e_webui.py +232 -0
  167. xinference/thirdparty/fish_speech/tools/fish_e2e.py +298 -0
  168. xinference/thirdparty/fish_speech/tools/inference_engine/__init__.py +192 -0
  169. xinference/thirdparty/fish_speech/tools/inference_engine/reference_loader.py +125 -0
  170. xinference/thirdparty/fish_speech/tools/inference_engine/utils.py +39 -0
  171. xinference/thirdparty/fish_speech/tools/inference_engine/vq_manager.py +57 -0
  172. xinference/thirdparty/fish_speech/tools/llama/eval_in_context.py +2 -2
  173. xinference/thirdparty/fish_speech/tools/llama/generate.py +484 -72
  174. xinference/thirdparty/fish_speech/tools/run_webui.py +104 -0
  175. xinference/thirdparty/fish_speech/tools/schema.py +170 -0
  176. xinference/thirdparty/fish_speech/tools/server/agent/__init__.py +57 -0
  177. xinference/thirdparty/fish_speech/tools/server/agent/generate.py +119 -0
  178. xinference/thirdparty/fish_speech/tools/server/agent/generation_utils.py +122 -0
  179. xinference/thirdparty/fish_speech/tools/server/agent/pre_generation_utils.py +72 -0
  180. xinference/thirdparty/fish_speech/tools/server/api_utils.py +75 -0
  181. xinference/thirdparty/fish_speech/tools/server/exception_handler.py +27 -0
  182. xinference/thirdparty/fish_speech/tools/server/inference.py +45 -0
  183. xinference/thirdparty/fish_speech/tools/server/model_manager.py +122 -0
  184. xinference/thirdparty/fish_speech/tools/server/model_utils.py +129 -0
  185. xinference/thirdparty/fish_speech/tools/server/views.py +246 -0
  186. xinference/thirdparty/fish_speech/tools/vqgan/extract_vq.py +7 -1
  187. xinference/thirdparty/fish_speech/tools/vqgan/inference.py +2 -3
  188. xinference/thirdparty/fish_speech/tools/webui/__init__.py +173 -0
  189. xinference/thirdparty/fish_speech/tools/webui/inference.py +91 -0
  190. xinference/thirdparty/fish_speech/tools/webui/variables.py +14 -0
  191. xinference/thirdparty/matcha/utils/utils.py +2 -2
  192. xinference/thirdparty/melo/api.py +135 -0
  193. xinference/thirdparty/melo/app.py +61 -0
  194. xinference/thirdparty/melo/attentions.py +459 -0
  195. xinference/thirdparty/melo/commons.py +160 -0
  196. xinference/thirdparty/melo/configs/config.json +94 -0
  197. xinference/thirdparty/melo/data/example/metadata.list +20 -0
  198. xinference/thirdparty/melo/data_utils.py +413 -0
  199. xinference/thirdparty/melo/download_utils.py +67 -0
  200. xinference/thirdparty/melo/infer.py +25 -0
  201. xinference/thirdparty/melo/init_downloads.py +14 -0
  202. xinference/thirdparty/melo/losses.py +58 -0
  203. xinference/thirdparty/melo/main.py +36 -0
  204. xinference/thirdparty/melo/mel_processing.py +174 -0
  205. xinference/thirdparty/melo/models.py +1030 -0
  206. xinference/thirdparty/melo/modules.py +598 -0
  207. xinference/thirdparty/melo/monotonic_align/__init__.py +16 -0
  208. xinference/thirdparty/melo/monotonic_align/core.py +46 -0
  209. xinference/thirdparty/melo/preprocess_text.py +135 -0
  210. xinference/thirdparty/melo/split_utils.py +174 -0
  211. xinference/thirdparty/melo/text/__init__.py +35 -0
  212. xinference/thirdparty/melo/text/chinese.py +199 -0
  213. xinference/thirdparty/melo/text/chinese_bert.py +107 -0
  214. xinference/thirdparty/melo/text/chinese_mix.py +253 -0
  215. xinference/thirdparty/melo/text/cleaner.py +36 -0
  216. xinference/thirdparty/melo/text/cleaner_multiling.py +110 -0
  217. xinference/thirdparty/melo/text/cmudict.rep +129530 -0
  218. xinference/thirdparty/melo/text/cmudict_cache.pickle +0 -0
  219. xinference/thirdparty/melo/text/english.py +284 -0
  220. xinference/thirdparty/melo/text/english_bert.py +39 -0
  221. xinference/thirdparty/melo/text/english_utils/abbreviations.py +35 -0
  222. xinference/thirdparty/melo/text/english_utils/number_norm.py +97 -0
  223. xinference/thirdparty/melo/text/english_utils/time_norm.py +47 -0
  224. xinference/thirdparty/melo/text/es_phonemizer/base.py +140 -0
  225. xinference/thirdparty/melo/text/es_phonemizer/cleaner.py +109 -0
  226. xinference/thirdparty/melo/text/es_phonemizer/es_symbols.json +79 -0
  227. xinference/thirdparty/melo/text/es_phonemizer/es_symbols.txt +1 -0
  228. xinference/thirdparty/melo/text/es_phonemizer/es_symbols_v2.json +83 -0
  229. xinference/thirdparty/melo/text/es_phonemizer/es_to_ipa.py +12 -0
  230. xinference/thirdparty/melo/text/es_phonemizer/example_ipa.txt +400 -0
  231. xinference/thirdparty/melo/text/es_phonemizer/gruut_wrapper.py +253 -0
  232. xinference/thirdparty/melo/text/es_phonemizer/punctuation.py +174 -0
  233. xinference/thirdparty/melo/text/es_phonemizer/spanish_symbols.txt +1 -0
  234. xinference/thirdparty/melo/text/es_phonemizer/test.ipynb +124 -0
  235. xinference/thirdparty/melo/text/fr_phonemizer/base.py +140 -0
  236. xinference/thirdparty/melo/text/fr_phonemizer/cleaner.py +122 -0
  237. xinference/thirdparty/melo/text/fr_phonemizer/en_symbols.json +78 -0
  238. xinference/thirdparty/melo/text/fr_phonemizer/example_ipa.txt +1 -0
  239. xinference/thirdparty/melo/text/fr_phonemizer/fr_symbols.json +89 -0
  240. xinference/thirdparty/melo/text/fr_phonemizer/fr_to_ipa.py +30 -0
  241. xinference/thirdparty/melo/text/fr_phonemizer/french_abbreviations.py +48 -0
  242. xinference/thirdparty/melo/text/fr_phonemizer/french_symbols.txt +1 -0
  243. xinference/thirdparty/melo/text/fr_phonemizer/gruut_wrapper.py +258 -0
  244. xinference/thirdparty/melo/text/fr_phonemizer/punctuation.py +172 -0
  245. xinference/thirdparty/melo/text/french.py +94 -0
  246. xinference/thirdparty/melo/text/french_bert.py +39 -0
  247. xinference/thirdparty/melo/text/japanese.py +647 -0
  248. xinference/thirdparty/melo/text/japanese_bert.py +49 -0
  249. xinference/thirdparty/melo/text/ko_dictionary.py +44 -0
  250. xinference/thirdparty/melo/text/korean.py +192 -0
  251. xinference/thirdparty/melo/text/opencpop-strict.txt +429 -0
  252. xinference/thirdparty/melo/text/spanish.py +122 -0
  253. xinference/thirdparty/melo/text/spanish_bert.py +39 -0
  254. xinference/thirdparty/melo/text/symbols.py +290 -0
  255. xinference/thirdparty/melo/text/tone_sandhi.py +769 -0
  256. xinference/thirdparty/melo/train.py +635 -0
  257. xinference/thirdparty/melo/train.sh +19 -0
  258. xinference/thirdparty/melo/transforms.py +209 -0
  259. xinference/thirdparty/melo/utils.py +424 -0
  260. xinference/types.py +17 -1
  261. xinference/web/ui/build/asset-manifest.json +6 -6
  262. xinference/web/ui/build/index.html +1 -1
  263. xinference/web/ui/build/static/css/main.51a587ff.css +2 -0
  264. xinference/web/ui/build/static/css/main.51a587ff.css.map +1 -0
  265. xinference/web/ui/build/static/js/main.b0936c54.js +3 -0
  266. xinference/web/ui/build/static/js/main.b0936c54.js.map +1 -0
  267. xinference/web/ui/node_modules/.cache/babel-loader/03c4052f1b91f6ba0c5389bdcf49c43319b4076c08e4b8585dab312538ae290a.json +1 -0
  268. xinference/web/ui/node_modules/.cache/babel-loader/1786b83003b8e9605a0f5f855a185d4d16e38fc893dfb326a2a9cca206b4240a.json +1 -0
  269. xinference/web/ui/node_modules/.cache/babel-loader/17cbc181dd674b9150b80c73ed6a82656de0082d857f6e5f66d9716129ac0b38.json +1 -0
  270. xinference/web/ui/node_modules/.cache/babel-loader/185ceb8872d562e032b47e79df6a45670e06345b8ed70aad1a131e0476783c5c.json +1 -0
  271. xinference/web/ui/node_modules/.cache/babel-loader/26b8c9f34b0bed789b3a833767672e39302d1e0c09b4276f4d58d1df7b6bd93b.json +1 -0
  272. xinference/web/ui/node_modules/.cache/babel-loader/2b484da66c724d0d56a40849c109327408796a668b1381511b6e9e03baa48658.json +1 -0
  273. xinference/web/ui/node_modules/.cache/babel-loader/2cbbbce9b84df73330d4c42b82436ed881b3847628f2fbc346aa62e2859fd88c.json +1 -0
  274. xinference/web/ui/node_modules/.cache/babel-loader/2ec9b14431ed33ce6901bf9f27007be4e6e472709c99d6e22b50ce528e4b78ee.json +1 -0
  275. xinference/web/ui/node_modules/.cache/babel-loader/3b966db018f96be4a055d6ca205f0990d4d0b370e2980c17d8bca2c9a021819c.json +1 -0
  276. xinference/web/ui/node_modules/.cache/babel-loader/3eefb411b24c2b3ce053570ef50daccf154022f0e168be5ed0fec21394baf9f4.json +1 -0
  277. xinference/web/ui/node_modules/.cache/babel-loader/522b229e3cac219123f0d69673f5570e191c2d2a505dc65b312d336eae2279c0.json +1 -0
  278. xinference/web/ui/node_modules/.cache/babel-loader/52e45f17ba300580ea3fcc9f9228ccba194bb092b76f25e9255af311f8b05aab.json +1 -0
  279. xinference/web/ui/node_modules/.cache/babel-loader/5a0bc4631f936459afc1a3b1d3ec2420118b1f00e11f60ccac3e08088f3f27a8.json +1 -0
  280. xinference/web/ui/node_modules/.cache/babel-loader/611fa2c6c53b66039991d06dfb0473b5ab37fc63b4564e0f6e1718523768a045.json +1 -0
  281. xinference/web/ui/node_modules/.cache/babel-loader/6329bc76c406fe5eb305412383fbde5950f847bb5e43261f73f37622c365acb4.json +1 -0
  282. xinference/web/ui/node_modules/.cache/babel-loader/63c8e07687ea53a4f8a910ee5e42e0eb26cd1acbfbe820f3e3248a786ee51401.json +1 -0
  283. xinference/web/ui/node_modules/.cache/babel-loader/69b2d5001684174ec9da57e07914eed3eac4960018bceb6cbfa801d861301d7c.json +1 -0
  284. xinference/web/ui/node_modules/.cache/babel-loader/710c1acda69e561e30a933b98c6a56d50197868b15c21e2aad55ab6d46649eb6.json +1 -0
  285. xinference/web/ui/node_modules/.cache/babel-loader/720deca1fce5a1dc5056048fa8258fd138a82ea855f350b6613f104a73fb761f.json +1 -0
  286. xinference/web/ui/node_modules/.cache/babel-loader/76a23b92d26a499c57e61eea2b895fbc9771bd0849a72e66f8e633192017978b.json +1 -0
  287. xinference/web/ui/node_modules/.cache/babel-loader/858063f23b34dfe600254eb5afd85518b0002ec4b30b7386616c45600826e3b2.json +1 -0
  288. xinference/web/ui/node_modules/.cache/babel-loader/920b82c1c89124cf217109eeedbfcd3aae3b917be50c9dfb6bbb4ce26bdfd2e7.json +1 -0
  289. xinference/web/ui/node_modules/.cache/babel-loader/94d8b7aeb0076f2ce07db598cea0e87b13bc8d5614eb530b8d6e696c2daf6f88.json +1 -0
  290. xinference/web/ui/node_modules/.cache/babel-loader/9e917fe7022d01b2ccbe5cc0ce73d70bb72bee584ff293bad71bdff6695dee28.json +1 -0
  291. xinference/web/ui/node_modules/.cache/babel-loader/9f28fdb8399f1d0474f0aca86f1658dc94f5bf0c90f6146352de150692de8862.json +1 -0
  292. xinference/web/ui/node_modules/.cache/babel-loader/a0dfafa06b2bb7cba8cad41c482503f61944f759f4318139362602ef5cc47ccb.json +1 -0
  293. xinference/web/ui/node_modules/.cache/babel-loader/a3ff866acddf34917a7ee399e0e571a4dfd8ba66d5057db885f243e16a6eb17d.json +1 -0
  294. xinference/web/ui/node_modules/.cache/babel-loader/afb8084f539534cd594755ea2205ecd5bd1f62dddcfdf75a2eace59a28131278.json +1 -0
  295. xinference/web/ui/node_modules/.cache/babel-loader/b57b1438b77294c1f3f6cfce12ac487d8106c6f016975ba0aec94d98997e2e1e.json +1 -0
  296. xinference/web/ui/node_modules/.cache/babel-loader/b9917b0bf8e4d55ccbac1c334aa04d6ff3c5b6ed9e5d38b9ea2c687fa7d3f5a9.json +1 -0
  297. xinference/web/ui/node_modules/.cache/babel-loader/bbcc94b0149963d1d6f267ee1f4f03d3925b758392ce2f516c3fe8af0e0169fc.json +1 -0
  298. xinference/web/ui/node_modules/.cache/babel-loader/bdee44abeadc4abc17d41c52eb49c6e19a4b1a267b6e16876ce91bdeeebfc52d.json +1 -0
  299. xinference/web/ui/node_modules/.cache/babel-loader/beb112b70f4a56db95920a9e20efb6c97c37b68450716730217a9ee1a9ae92be.json +1 -0
  300. xinference/web/ui/node_modules/.cache/babel-loader/c88db97be0cdf440193b3995996e83510a04cb00048135485fc0e26d197e80b5.json +1 -0
  301. xinference/web/ui/node_modules/.cache/babel-loader/d49e5314d34310a62d01a03067ce1bec5da00abce84c5196aa9c6842fa79a430.json +1 -0
  302. xinference/web/ui/node_modules/.cache/babel-loader/d7664d18c4ddbad9c3a6a31b91f7c00fb0dde804608674a9860ee50f33e54708.json +1 -0
  303. xinference/web/ui/node_modules/.cache/babel-loader/d9072c318b819b7c90a0f7e9cc0b6413b4dbeb8e9859898e53d75ea882fcde99.json +1 -0
  304. xinference/web/ui/node_modules/.cache/babel-loader/db16a983bc08a05f0439cc61ca0840e49e1d8400eef678909f16c032a418a3d6.json +1 -0
  305. xinference/web/ui/node_modules/.cache/babel-loader/dc249829767b8abcbc3677e0b07b6d3ecbfdfe6d08cfe23a665eb33373a9aa9d.json +1 -0
  306. xinference/web/ui/node_modules/.cache/babel-loader/e242c583c2dbc2784f0fcf513523975f7d5df447e106c1c17e49e8578a6fc3ed.json +1 -0
  307. xinference/web/ui/node_modules/.cache/babel-loader/eac5f1296513e69e4b96f750ddccd4d0264e2bae4e4c449144e83274a48698d9.json +1 -0
  308. xinference/web/ui/node_modules/.cache/babel-loader/ed57202cb79649bb716400436590245547df241988fc7c8e1d85d132299542d2.json +1 -0
  309. xinference/web/ui/node_modules/.cache/babel-loader/f125bf72e773a14cdaebd0c343e80adb909d12e317ee5c00cd4a57442fbe2c62.json +1 -0
  310. xinference/web/ui/node_modules/.cache/babel-loader/f91af913d7f91c410719ab13136aaed3aaf0f8dda06652f25c42cb5231587398.json +1 -0
  311. xinference/web/ui/node_modules/.package-lock.json +67 -3
  312. xinference/web/ui/node_modules/@babel/runtime/package.json +592 -538
  313. xinference/web/ui/node_modules/html-parse-stringify/package.json +50 -0
  314. xinference/web/ui/node_modules/i18next/dist/esm/package.json +1 -0
  315. xinference/web/ui/node_modules/i18next/package.json +129 -0
  316. xinference/web/ui/node_modules/react-i18next/.eslintrc.json +74 -0
  317. xinference/web/ui/node_modules/react-i18next/dist/es/package.json +1 -0
  318. xinference/web/ui/node_modules/react-i18next/package.json +162 -0
  319. xinference/web/ui/node_modules/void-elements/package.json +34 -0
  320. xinference/web/ui/package-lock.json +69 -3
  321. xinference/web/ui/package.json +2 -0
  322. xinference/web/ui/src/locales/en.json +186 -0
  323. xinference/web/ui/src/locales/zh.json +186 -0
  324. {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/METADATA +96 -36
  325. {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/RECORD +335 -146
  326. {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/WHEEL +1 -1
  327. xinference/thirdparty/cosyvoice/bin/export_trt.py +0 -8
  328. xinference/thirdparty/fish_speech/fish_speech/configs/lora/__init__.py +0 -0
  329. xinference/thirdparty/fish_speech/fish_speech/datasets/__init__.py +0 -0
  330. xinference/thirdparty/fish_speech/fish_speech/datasets/protos/__init__.py +0 -0
  331. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/__init__.py +0 -0
  332. xinference/thirdparty/fish_speech/fish_speech/models/__init__.py +0 -0
  333. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/__init__.py +0 -0
  334. xinference/thirdparty/fish_speech/fish_speech/webui/__init__.py +0 -0
  335. xinference/thirdparty/fish_speech/tools/__init__.py +0 -0
  336. xinference/thirdparty/fish_speech/tools/api.py +0 -440
  337. xinference/thirdparty/fish_speech/tools/commons.py +0 -35
  338. xinference/thirdparty/fish_speech/tools/llama/__init__.py +0 -0
  339. xinference/thirdparty/fish_speech/tools/msgpack_api.py +0 -34
  340. xinference/thirdparty/fish_speech/tools/vqgan/__init__.py +0 -0
  341. xinference/thirdparty/fish_speech/tools/webui.py +0 -485
  342. xinference/web/ui/build/static/css/main.5061c4c3.css +0 -2
  343. xinference/web/ui/build/static/css/main.5061c4c3.css.map +0 -1
  344. xinference/web/ui/build/static/js/main.2f269bb3.js +0 -3
  345. xinference/web/ui/build/static/js/main.2f269bb3.js.map +0 -1
  346. xinference/web/ui/node_modules/.cache/babel-loader/07ce9e632e6aff24d7aa3ad8e48224433bbfeb0d633fca723453f1fcae0c9f1c.json +0 -1
  347. xinference/web/ui/node_modules/.cache/babel-loader/1130403f9e46f5738a23b45ac59b57de8f360c908c713e2c0670c2cce9bd367a.json +0 -1
  348. xinference/web/ui/node_modules/.cache/babel-loader/131091b25d26b17cdca187d7542a21475c211138d900cf667682260e76ef9463.json +0 -1
  349. xinference/web/ui/node_modules/.cache/babel-loader/1f269fb2a368363c1cb2237825f1dba093b6bdd8c44cc05954fd19ec2c1fff03.json +0 -1
  350. xinference/web/ui/node_modules/.cache/babel-loader/331312668fa8bd3d7401818f4a25fa98135d7f61371cd6bfff78b18cf4fbdd92.json +0 -1
  351. xinference/web/ui/node_modules/.cache/babel-loader/40f17338fc75ae095de7d2b4d8eae0d5ca0193a7e2bcece4ee745b22a7a2f4b7.json +0 -1
  352. xinference/web/ui/node_modules/.cache/babel-loader/4de9a6942c5f1749d6cbfdd54279699975f16016b182848bc253886f52ec2ec3.json +0 -1
  353. xinference/web/ui/node_modules/.cache/babel-loader/822586ed1077201b64b954f12f25e3f9b45678c1acbabe53d8af3ca82ca71f33.json +0 -1
  354. xinference/web/ui/node_modules/.cache/babel-loader/8d33354bd2100c8602afc3341f131a88cc36aaeecd5a4b365ed038514708e350.json +0 -1
  355. xinference/web/ui/node_modules/.cache/babel-loader/9375a35b05d56989b2755bf72161fa707c92f28569d33765a75f91a568fda6e9.json +0 -1
  356. xinference/web/ui/node_modules/.cache/babel-loader/a158a9ffa0c9b169aee53dd4a0c44501a596755b4e4f6ede7746d65a72e2a71f.json +0 -1
  357. xinference/web/ui/node_modules/.cache/babel-loader/bd6ad8159341315a1764c397621a560809f7eb7219ab5174c801fca7e969d943.json +0 -1
  358. xinference/web/ui/node_modules/.cache/babel-loader/c7bf40bab396765f67d0fed627ed3665890608b2d0edaa3e8cb7cfc96310db45.json +0 -1
  359. xinference/web/ui/node_modules/.cache/babel-loader/d6c643278a0b28320e6f33a60f5fb64c053997cbdc39a60e53ccc574688ade9e.json +0 -1
  360. xinference/web/ui/node_modules/.cache/babel-loader/e42b72d4cc1ea412ebecbb8d040dc6c6bfee462c33903c2f1f3facb602ad742e.json +0 -1
  361. xinference/web/ui/node_modules/.cache/babel-loader/e64b7e8cedcf43d4c95deba60ec1341855c887705805bb62431693118b870c69.json +0 -1
  362. xinference/web/ui/node_modules/.cache/babel-loader/f5039ddbeb815c51491a1989532006b96fc3ae49c6c60e3c097f875b4ae915ae.json +0 -1
  363. xinference/web/ui/node_modules/.cache/babel-loader/f72f011744c4649fabddca6f7a9327861ac0a315a89b1a2e62a39774e7863845.json +0 -1
  364. xinference/web/ui/node_modules/.cache/babel-loader/feabb04b4aa507102da0a64398a40818e878fd1df9b75dda8461b3e1e7ff3f11.json +0 -1
  365. /xinference/thirdparty/{cosyvoice/bin → f5_tts}/__init__.py +0 -0
  366. /xinference/thirdparty/{cosyvoice/flow → melo}/__init__.py +0 -0
  367. /xinference/thirdparty/{cosyvoice/hifigan → melo/text/english_utils}/__init__.py +0 -0
  368. /xinference/thirdparty/{cosyvoice/llm → melo/text/es_phonemizer}/__init__.py +0 -0
  369. /xinference/thirdparty/{fish_speech/fish_speech/configs → melo/text/fr_phonemizer}/__init__.py +0 -0
  370. /xinference/web/ui/build/static/js/{main.2f269bb3.js.LICENSE.txt → main.b0936c54.js.LICENSE.txt} +0 -0
  371. {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/LICENSE +0 -0
  372. {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/entry_points.txt +0 -0
  373. {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/top_level.txt +0 -0
@@ -21,6 +21,7 @@ from typing import Dict, List, Literal, Optional, Tuple, Union, no_type_check
21
21
  import numpy as np
22
22
  import torch
23
23
 
24
+ from ..._compat import ROOT_KEY, ErrorWrapper, ValidationError
24
25
  from ...device_utils import empty_cache
25
26
  from ...types import Embedding, EmbeddingData, EmbeddingUsage
26
27
  from ..core import CacheableModelSpec, ModelDescription
@@ -193,6 +194,29 @@ class EmbeddingModel:
193
194
  device=self._device,
194
195
  model_kwargs=model_kwargs,
195
196
  )
197
+ elif (
198
+ self._kwargs.get("hybrid_mode")
199
+ and "m3" in self._model_spec.model_name.lower()
200
+ ):
201
+ try:
202
+ from FlagEmbedding import BGEM3FlagModel
203
+ except ImportError:
204
+ error_message = "Failed to import module 'BGEM3FlagModel'"
205
+ installation_guide = [
206
+ "Please make sure 'FlagEmbedding' is installed. ",
207
+ "You can install it by `pip install FlagEmbedding`\n",
208
+ ]
209
+ raise ImportError(f"{error_message}\n\n{''.join(installation_guide)}")
210
+
211
+ if torch_dtype and torch_dtype == torch.float16:
212
+ model_kwargs = {"use_fp16": True}
213
+ else:
214
+ model_kwargs = {}
215
+ self._model = BGEM3FlagModel(
216
+ self._model_path,
217
+ device=self._device,
218
+ **model_kwargs,
219
+ )
196
220
  else:
197
221
  model_kwargs = {"torch_dtype": torch_dtype} if torch_dtype else None
198
222
  self._model = SentenceTransformer(
@@ -202,11 +226,209 @@ class EmbeddingModel:
202
226
  trust_remote_code=True,
203
227
  )
204
228
 
205
- def create_embedding(self, sentences: Union[str, List[str]], **kwargs):
229
+ def _fix_langchain_openai_inputs(
230
+ self, sentences: Union[str, List[str], Dict[str, str], List[Dict[str, str]]]
231
+ ):
232
+ # Check if sentences is a two-dimensional list of integers
233
+ if (
234
+ isinstance(sentences, list)
235
+ and len(sentences) > 0
236
+ and isinstance(sentences[0], list)
237
+ and len(sentences[0]) > 0
238
+ and isinstance(sentences[0][0], int)
239
+ ):
240
+ # List[List[int]] stands for encoded inputs
241
+ import tiktoken
242
+
243
+ enc = tiktoken.get_encoding("cl100k_base")
244
+ lines_decoded = []
245
+
246
+ for line in sentences:
247
+ try:
248
+ # Decode each token into bytes, then join them into a complete string
249
+ output = b"".join(
250
+ enc.decode_single_token_bytes(token) for token in line
251
+ )
252
+ # Convert the byte sequence into a UTF-8 encoded string
253
+ decoded_line = output.decode("utf-8")
254
+ lines_decoded.append(decoded_line)
255
+ except (ValueError, TypeError, UnicodeDecodeError) as e:
256
+ raise ValidationError([ErrorWrapper(e, loc=ROOT_KEY)], self)
257
+
258
+ # Update sentences to be the list of decoded strings
259
+ if len(lines_decoded) == 1:
260
+ sentences = lines_decoded[0]
261
+ else:
262
+ sentences = lines_decoded
263
+ return sentences
264
+
265
+ def create_embedding(
266
+ self,
267
+ sentences: Union[str, List[str]],
268
+ **kwargs,
269
+ ):
270
+ sentences = self._fix_langchain_openai_inputs(sentences)
271
+
206
272
  from sentence_transformers import SentenceTransformer
207
273
 
208
274
  kwargs.setdefault("normalize_embeddings", True)
209
275
 
276
+ try:
277
+ from FlagEmbedding import BGEM3FlagModel
278
+
279
+ @no_type_check
280
+ def _encode_bgem3(
281
+ model: Union[SentenceTransformer, BGEM3FlagModel],
282
+ sentences: Union[str, List[str]],
283
+ batch_size: int = 32,
284
+ show_progress_bar: bool = None,
285
+ output_value: str = "sparse_embedding",
286
+ convert_to_numpy: bool = True,
287
+ convert_to_tensor: bool = False,
288
+ device: str = None,
289
+ normalize_embeddings: bool = False,
290
+ **kwargs,
291
+ ):
292
+ """
293
+ Computes sentence embeddings with bge-m3 model
294
+ Nothing special here, just replace sentence-transformer with FlagEmbedding
295
+ TODO: think about how to solve the redundant code of encode method in the future
296
+
297
+ :param sentences: the sentences to embed
298
+ :param batch_size: the batch size used for the computation
299
+ :param show_progress_bar: Output a progress bar when encode sentences
300
+ :param output_value: Default sentence_embedding, to get sentence embeddings. Can be set to token_embeddings to get wordpiece token embeddings. Set to None, to get all output values
301
+ :param convert_to_numpy: If true, the output is a list of numpy vectors. Else, it is a list of pytorch tensors.
302
+ :param convert_to_tensor: If true, you get one large tensor as return. Overwrites any setting from convert_to_numpy
303
+ :param device: Which torch.device to use for the computation
304
+ :param normalize_embeddings: If set to true, returned vectors will have length 1. In that case, the faster dot-product (util.dot_score) instead of cosine similarity can be used.
305
+
306
+ :return:
307
+ By default, a list of tensors is returned. If convert_to_tensor, a stacked tensor is returned. If convert_to_numpy, a numpy matrix is returned.
308
+ """
309
+ import torch
310
+ from tqdm.autonotebook import trange
311
+
312
+ if show_progress_bar is None:
313
+ show_progress_bar = (
314
+ logger.getEffectiveLevel() == logging.INFO
315
+ or logger.getEffectiveLevel() == logging.DEBUG
316
+ )
317
+
318
+ if convert_to_tensor:
319
+ convert_to_numpy = False
320
+
321
+ if output_value != "sparse_embedding":
322
+ convert_to_tensor = False
323
+ convert_to_numpy = False
324
+
325
+ input_was_string = False
326
+ if isinstance(sentences, str) or not hasattr(
327
+ sentences, "__len__"
328
+ ): # Cast an individual sentence to a list with length 1
329
+ sentences = [sentences]
330
+ input_was_string = True
331
+
332
+ if device is None:
333
+ # Same as SentenceTransformer.py
334
+ from sentence_transformers.util import get_device_name
335
+
336
+ device = get_device_name()
337
+ logger.info(f"Use pytorch device_name: {device}")
338
+
339
+ all_embeddings = []
340
+ all_token_nums = 0
341
+
342
+ # The original code does not support other inference engines
343
+ def _text_length(text):
344
+ if isinstance(text, dict): # {key: value} case
345
+ return len(next(iter(text.values())))
346
+ elif not hasattr(text, "__len__"): # Object has no len() method
347
+ return 1
348
+ elif len(text) == 0 or isinstance(
349
+ text[0], int
350
+ ): # Empty string or list of ints
351
+ return len(text)
352
+ else:
353
+ return sum(
354
+ [len(t) for t in text]
355
+ ) # Sum of length of individual strings
356
+
357
+ length_sorted_idx = np.argsort(
358
+ [-_text_length(sen) for sen in sentences]
359
+ )
360
+ sentences_sorted = [sentences[idx] for idx in length_sorted_idx]
361
+
362
+ for start_index in trange(
363
+ 0,
364
+ len(sentences),
365
+ batch_size,
366
+ desc="Batches",
367
+ disable=not show_progress_bar,
368
+ ):
369
+ sentences_batch = sentences_sorted[
370
+ start_index : start_index + batch_size
371
+ ]
372
+
373
+ with torch.no_grad():
374
+ out_features = model.encode(sentences_batch, **kwargs)
375
+
376
+ if output_value == "token_embeddings":
377
+ embeddings = []
378
+ for token_emb, attention in zip(
379
+ out_features[output_value],
380
+ out_features["attention_mask"],
381
+ ):
382
+ last_mask_id = len(attention) - 1
383
+ while (
384
+ last_mask_id > 0
385
+ and attention[last_mask_id].item() == 0
386
+ ):
387
+ last_mask_id -= 1
388
+
389
+ embeddings.append(token_emb[0 : last_mask_id + 1])
390
+ elif output_value is None: # Return all outputs
391
+ embeddings = []
392
+ for sent_idx in range(
393
+ len(out_features["sentence_embedding"])
394
+ ):
395
+ row = {
396
+ name: out_features[name][sent_idx]
397
+ for name in out_features
398
+ }
399
+ embeddings.append(row)
400
+ # for sparse embedding
401
+ else:
402
+ if kwargs.get("return_sparse"):
403
+ embeddings = out_features["lexical_weights"]
404
+ else:
405
+ embeddings = out_features["dense_vecs"]
406
+
407
+ if convert_to_numpy:
408
+ embeddings = embeddings.cpu()
409
+
410
+ all_embeddings.extend(embeddings)
411
+
412
+ all_embeddings = [
413
+ all_embeddings[idx] for idx in np.argsort(length_sorted_idx)
414
+ ]
415
+
416
+ if convert_to_tensor:
417
+ if len(all_embeddings):
418
+ all_embeddings = torch.stack(all_embeddings)
419
+ else:
420
+ all_embeddings = torch.Tensor()
421
+ elif convert_to_numpy:
422
+ all_embeddings = np.asarray([emb.numpy() for emb in all_embeddings])
423
+
424
+ if input_was_string:
425
+ all_embeddings = all_embeddings[0]
426
+
427
+ return all_embeddings, all_token_nums
428
+
429
+ except ImportError:
430
+ _encode_bgem3 = None
431
+
210
432
  # copied from sentence-transformers, and modify it to return tokens num
211
433
  @no_type_check
212
434
  def encode(
@@ -323,7 +545,11 @@ class EmbeddingModel:
323
545
  features.update(extra_features)
324
546
  # when batching, the attention mask 1 means there is a token
325
547
  # thus we just sum up it to get the total number of tokens
326
- all_token_nums += features["attention_mask"].sum().item()
548
+ if "clip" in self._model_spec.model_name.lower():
549
+ all_token_nums += features["input_ids"].numel()
550
+ all_token_nums += features["pixel_values"].numel()
551
+ else:
552
+ all_token_nums += features["attention_mask"].sum().item()
327
553
 
328
554
  with torch.no_grad():
329
555
  out_features = model.forward(features, **kwargs)
@@ -379,6 +605,10 @@ class EmbeddingModel:
379
605
 
380
606
  return all_embeddings, all_token_nums
381
607
 
608
+ is_bge_m3_flag_model = (
609
+ self._kwargs.get("hybrid_mode")
610
+ and "m3" in self._model_spec.model_name.lower()
611
+ )
382
612
  if (
383
613
  "gte" in self._model_spec.model_name.lower()
384
614
  and "qwen2" in self._model_spec.model_name.lower()
@@ -390,6 +620,45 @@ class EmbeddingModel:
390
620
  convert_to_numpy=False,
391
621
  **kwargs,
392
622
  )
623
+ elif is_bge_m3_flag_model:
624
+ assert _encode_bgem3 is not None
625
+ all_embeddings, all_token_nums = _encode_bgem3(
626
+ self._model, sentences, convert_to_numpy=False, **kwargs
627
+ )
628
+ elif "clip" in self._model_spec.model_name.lower():
629
+ import base64
630
+ import re
631
+ from io import BytesIO
632
+
633
+ from PIL import Image
634
+
635
+ def base64_to_image(base64_str: str) -> Image.Image:
636
+ # base64_data = re.sub("^data:image/.+;base64,", "", base64_str)
637
+ base64_data = base64_str.split(",", 1)[1]
638
+ byte_data = base64.b64decode(base64_data)
639
+ image_data = BytesIO(byte_data)
640
+ img = Image.open(image_data)
641
+ return img
642
+
643
+ objs: list[dict[str, str]] = []
644
+ for item in sentences:
645
+ if isinstance(item, dict):
646
+ if item.get("text") is not None:
647
+ objs.append(item["text"])
648
+ elif item.get("image") is not None:
649
+ if re.match(r"^data:image/.+;base64,", item["image"]):
650
+ image = base64_to_image(item["image"])
651
+ objs.append(image)
652
+ else:
653
+ objs.append(item["image"])
654
+ else:
655
+ logger.error("Please check the input data.")
656
+ all_embeddings, all_token_nums = encode(
657
+ self._model,
658
+ objs,
659
+ convert_to_numpy=False,
660
+ **self._kwargs,
661
+ )
393
662
  else:
394
663
  all_embeddings, all_token_nums = encode(
395
664
  self._model,
@@ -401,14 +670,29 @@ class EmbeddingModel:
401
670
  all_embeddings = [all_embeddings]
402
671
  embedding_list = []
403
672
  for index, data in enumerate(all_embeddings):
404
- embedding_list.append(
405
- EmbeddingData(index=index, object="embedding", embedding=data.tolist())
406
- )
673
+ if kwargs.get("return_sparse") and is_bge_m3_flag_model:
674
+ embedding_list.append(
675
+ EmbeddingData(
676
+ index=index,
677
+ object="embedding",
678
+ embedding={k: float(v) for k, v in data.items()},
679
+ )
680
+ )
681
+ else:
682
+ embedding_list.append(
683
+ EmbeddingData(
684
+ index=index, object="embedding", embedding=data.tolist()
685
+ )
686
+ )
407
687
  usage = EmbeddingUsage(
408
688
  prompt_tokens=all_token_nums, total_tokens=all_token_nums
409
689
  )
410
690
  result = Embedding(
411
- object="list",
691
+ object=(
692
+ "list" # type: ignore
693
+ if not is_bge_m3_flag_model and not kwargs.get("return_sparse")
694
+ else "dict"
695
+ ),
412
696
  model=self._model_uid,
413
697
  data=embedding_list,
414
698
  usage=usage,
@@ -430,6 +714,38 @@ class EmbeddingModel:
430
714
 
431
715
  return result
432
716
 
717
+ def convert_ids_to_tokens(
718
+ self,
719
+ batch_token_ids: Union[List[Union[int, str]], List[List[Union[int, str]]]],
720
+ **kwargs,
721
+ ) -> Union[List[str]]:
722
+ batch_decoded_texts: List[str] = []
723
+
724
+ assert self._model is not None
725
+
726
+ if isinstance(batch_token_ids, (int, str)):
727
+ return self._model.tokenizer.convert_ids_to_tokens(
728
+ [int(str(batch_token_ids))]
729
+ )[0]
730
+
731
+ # check if it's a nested list
732
+ if (
733
+ isinstance(batch_token_ids, list)
734
+ and batch_token_ids
735
+ and isinstance(batch_token_ids[0], list)
736
+ ):
737
+ for token_ids in batch_token_ids:
738
+ token_ids = [int(token_id) for token_id in token_ids]
739
+ batch_decoded_texts.append(
740
+ self._model.tokenizer.convert_ids_to_tokens(token_ids)
741
+ )
742
+ else:
743
+ batch_token_ids = [int(token_id) for token_id in batch_token_ids]
744
+ batch_decoded_texts = self._model.tokenizer.convert_ids_to_tokens(
745
+ batch_token_ids
746
+ )
747
+ return batch_decoded_texts
748
+
433
749
 
434
750
  def match_embedding(
435
751
  model_name: str,
@@ -233,7 +233,7 @@
233
233
  },
234
234
  {
235
235
  "model_name": "gte-Qwen2",
236
- "dimensions": 4096,
236
+ "dimensions": 3584,
237
237
  "max_tokens": 32000,
238
238
  "language": ["zh", "en"],
239
239
  "model_id": "Alibaba-NLP/gte-Qwen2-7B-instruct",
@@ -245,5 +245,12 @@
245
245
  "max_tokens": 8192,
246
246
  "language": ["zh", "en"],
247
247
  "model_id": "jinaai/jina-embeddings-v3"
248
+ },
249
+ {
250
+ "model_name": "jina-clip-v2",
251
+ "dimensions": 1024,
252
+ "max_tokens": 8192,
253
+ "language": ["89 languages supported"],
254
+ "model_id": "jinaai/jina-clip-v2"
248
255
  }
249
256
  ]
@@ -235,7 +235,7 @@
235
235
  },
236
236
  {
237
237
  "model_name": "gte-Qwen2",
238
- "dimensions": 4096,
238
+ "dimensions": 3584,
239
239
  "max_tokens": 32000,
240
240
  "language": ["zh", "en"],
241
241
  "model_id": "iic/gte_Qwen2-7B-instruct",
@@ -248,5 +248,13 @@
248
248
  "language": ["zh", "en"],
249
249
  "model_id": "jinaai/jina-embeddings-v3",
250
250
  "model_hub": "modelscope"
251
+ },
252
+ {
253
+ "model_name": "jina-clip-v2",
254
+ "dimensions": 1024,
255
+ "max_tokens": 8192,
256
+ "language": ["89 languages supported"],
257
+ "model_id": "jinaai/jina-clip-v2",
258
+ "model_hub": "modelscope"
251
259
  }
252
260
  ]
@@ -22,7 +22,12 @@ from typing import Dict, List, Literal, Optional, Tuple, Union
22
22
  from ...constants import XINFERENCE_CACHE_DIR
23
23
  from ...types import PeftModelConfig
24
24
  from ..core import CacheableModelSpec, ModelDescription
25
- from ..utils import valid_model_revision
25
+ from ..utils import (
26
+ IS_NEW_HUGGINGFACE_HUB,
27
+ retry_download,
28
+ symlink_local_file,
29
+ valid_model_revision,
30
+ )
26
31
  from .ocr.got_ocr2 import GotOCR2Model
27
32
  from .stable_diffusion.core import DiffusionModel
28
33
  from .stable_diffusion.mlx import MLXDiffusionModel
@@ -51,6 +56,9 @@ class ImageModelFamilyV1(CacheableModelSpec):
51
56
  controlnet: Optional[List["ImageModelFamilyV1"]]
52
57
  default_model_config: Optional[dict] = {}
53
58
  default_generate_config: Optional[dict] = {}
59
+ gguf_model_id: Optional[str]
60
+ gguf_quantizations: Optional[List[str]]
61
+ gguf_model_file_name_template: Optional[str]
54
62
 
55
63
 
56
64
  class ImageModelDescription(ModelDescription):
@@ -187,6 +195,61 @@ def get_cache_status(
187
195
  return valid_model_revision(meta_path, model_spec.model_revision)
188
196
 
189
197
 
198
+ def cache_gguf(spec: ImageModelFamilyV1, quantization: Optional[str] = None):
199
+ if not quantization:
200
+ return
201
+
202
+ cache_dir = os.path.realpath(os.path.join(XINFERENCE_CACHE_DIR, spec.model_name))
203
+ if not os.path.exists(cache_dir):
204
+ os.makedirs(cache_dir, exist_ok=True)
205
+
206
+ if not spec.gguf_model_file_name_template:
207
+ raise NotImplementedError(
208
+ f"{spec.model_name} does not support GGUF quantization"
209
+ )
210
+ if quantization not in (spec.gguf_quantizations or []):
211
+ raise ValueError(
212
+ f"Cannot support quantization {quantization}, "
213
+ f"available quantizations: {spec.gguf_quantizations}"
214
+ )
215
+
216
+ filename = spec.gguf_model_file_name_template.format(quantization=quantization) # type: ignore
217
+ full_path = os.path.join(cache_dir, filename)
218
+
219
+ if spec.model_hub == "huggingface":
220
+ import huggingface_hub
221
+
222
+ use_symlinks = {}
223
+ if not IS_NEW_HUGGINGFACE_HUB:
224
+ use_symlinks = {"local_dir_use_symlinks": True, "local_dir": cache_dir}
225
+ download_file_path = retry_download(
226
+ huggingface_hub.hf_hub_download,
227
+ spec.model_name,
228
+ None,
229
+ spec.gguf_model_id,
230
+ filename=filename,
231
+ **use_symlinks,
232
+ )
233
+ if IS_NEW_HUGGINGFACE_HUB:
234
+ symlink_local_file(download_file_path, cache_dir, filename)
235
+ elif spec.model_hub == "modelscope":
236
+ from modelscope.hub.file_download import model_file_download
237
+
238
+ download_file_path = retry_download(
239
+ model_file_download,
240
+ spec.model_name,
241
+ None,
242
+ spec.gguf_model_id,
243
+ filename,
244
+ revision=spec.model_revision,
245
+ )
246
+ symlink_local_file(download_file_path, cache_dir, filename)
247
+ else:
248
+ raise NotImplementedError
249
+
250
+ return full_path
251
+
252
+
190
253
  def create_ocr_model_instance(
191
254
  subpool_addr: str,
192
255
  devices: List[str],
@@ -219,6 +282,8 @@ def create_image_model_instance(
219
282
  Literal["huggingface", "modelscope", "openmind_hub", "csghub"]
220
283
  ] = None,
221
284
  model_path: Optional[str] = None,
285
+ gguf_quantization: Optional[str] = None,
286
+ gguf_model_path: Optional[str] = None,
222
287
  **kwargs,
223
288
  ) -> Tuple[
224
289
  Union[DiffusionModel, MLXDiffusionModel, GotOCR2Model], ImageModelDescription
@@ -272,6 +337,8 @@ def create_image_model_instance(
272
337
  ]
273
338
  if not model_path:
274
339
  model_path = cache(model_spec)
340
+ if not gguf_model_path and gguf_quantization:
341
+ gguf_model_path = cache_gguf(model_spec, gguf_quantization)
275
342
  if peft_model_config is not None:
276
343
  lora_model = peft_model_config.peft_model
277
344
  lora_load_kwargs = peft_model_config.image_lora_load_kwargs
@@ -298,6 +365,7 @@ def create_image_model_instance(
298
365
  lora_load_kwargs=lora_load_kwargs,
299
366
  lora_fuse_kwargs=lora_fuse_kwargs,
300
367
  model_spec=model_spec,
368
+ gguf_model_path=gguf_model_path,
301
369
  **kwargs,
302
370
  )
303
371
  model_description = ImageModelDescription(