xinference 0.16.3__py3-none-any.whl → 1.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (373) hide show
  1. xinference/_compat.py +24 -2
  2. xinference/_version.py +3 -3
  3. xinference/api/restful_api.py +219 -77
  4. xinference/client/restful/restful_client.py +47 -2
  5. xinference/constants.py +1 -0
  6. xinference/core/chat_interface.py +6 -1
  7. xinference/core/model.py +124 -34
  8. xinference/core/supervisor.py +180 -12
  9. xinference/core/utils.py +73 -4
  10. xinference/core/worker.py +102 -4
  11. xinference/deploy/cmdline.py +3 -1
  12. xinference/deploy/test/test_cmdline.py +56 -0
  13. xinference/isolation.py +24 -0
  14. xinference/model/audio/__init__.py +12 -0
  15. xinference/model/audio/core.py +37 -4
  16. xinference/model/audio/cosyvoice.py +39 -6
  17. xinference/model/audio/f5tts.py +200 -0
  18. xinference/model/audio/f5tts_mlx.py +260 -0
  19. xinference/model/audio/fish_speech.py +70 -110
  20. xinference/model/audio/melotts.py +110 -0
  21. xinference/model/audio/model_spec.json +179 -3
  22. xinference/model/audio/model_spec_modelscope.json +27 -0
  23. xinference/model/audio/utils.py +32 -0
  24. xinference/model/audio/whisper.py +35 -10
  25. xinference/model/audio/whisper_mlx.py +208 -0
  26. xinference/model/embedding/core.py +322 -6
  27. xinference/model/embedding/model_spec.json +8 -1
  28. xinference/model/embedding/model_spec_modelscope.json +9 -1
  29. xinference/model/image/core.py +69 -1
  30. xinference/model/image/model_spec.json +145 -4
  31. xinference/model/image/model_spec_modelscope.json +150 -4
  32. xinference/model/image/stable_diffusion/core.py +50 -15
  33. xinference/model/llm/__init__.py +6 -2
  34. xinference/model/llm/llm_family.json +1055 -93
  35. xinference/model/llm/llm_family.py +15 -36
  36. xinference/model/llm/llm_family_modelscope.json +1031 -78
  37. xinference/model/llm/memory.py +1 -1
  38. xinference/model/llm/mlx/core.py +285 -47
  39. xinference/model/llm/sglang/core.py +2 -0
  40. xinference/model/llm/transformers/chatglm.py +9 -5
  41. xinference/model/llm/transformers/cogagent.py +272 -0
  42. xinference/model/llm/transformers/core.py +3 -0
  43. xinference/model/llm/transformers/glm_edge_v.py +230 -0
  44. xinference/model/llm/transformers/qwen2_vl.py +12 -1
  45. xinference/model/llm/transformers/utils.py +16 -8
  46. xinference/model/llm/utils.py +55 -4
  47. xinference/model/llm/vllm/core.py +137 -12
  48. xinference/model/llm/vllm/xavier/__init__.py +13 -0
  49. xinference/model/llm/vllm/xavier/allocator.py +74 -0
  50. xinference/model/llm/vllm/xavier/block.py +111 -0
  51. xinference/model/llm/vllm/xavier/block_manager.py +71 -0
  52. xinference/model/llm/vllm/xavier/block_tracker.py +129 -0
  53. xinference/model/llm/vllm/xavier/collective.py +74 -0
  54. xinference/model/llm/vllm/xavier/collective_manager.py +147 -0
  55. xinference/model/llm/vllm/xavier/engine.py +247 -0
  56. xinference/model/llm/vllm/xavier/executor.py +134 -0
  57. xinference/model/llm/vllm/xavier/scheduler.py +438 -0
  58. xinference/model/llm/vllm/xavier/test/__init__.py +13 -0
  59. xinference/model/llm/vllm/xavier/test/test_xavier.py +147 -0
  60. xinference/model/llm/vllm/xavier/transfer.py +319 -0
  61. xinference/model/rerank/core.py +11 -4
  62. xinference/model/video/diffusers.py +14 -0
  63. xinference/model/video/model_spec.json +15 -0
  64. xinference/model/video/model_spec_modelscope.json +16 -0
  65. xinference/thirdparty/cosyvoice/bin/average_model.py +92 -0
  66. xinference/thirdparty/cosyvoice/bin/export_jit.py +12 -2
  67. xinference/thirdparty/cosyvoice/bin/export_onnx.py +112 -0
  68. xinference/thirdparty/cosyvoice/bin/export_trt.sh +9 -0
  69. xinference/thirdparty/cosyvoice/bin/inference.py +5 -7
  70. xinference/thirdparty/cosyvoice/bin/spk2info.pt +0 -0
  71. xinference/thirdparty/cosyvoice/bin/train.py +42 -8
  72. xinference/thirdparty/cosyvoice/cli/cosyvoice.py +96 -25
  73. xinference/thirdparty/cosyvoice/cli/frontend.py +77 -30
  74. xinference/thirdparty/cosyvoice/cli/model.py +330 -80
  75. xinference/thirdparty/cosyvoice/dataset/dataset.py +6 -2
  76. xinference/thirdparty/cosyvoice/dataset/processor.py +76 -14
  77. xinference/thirdparty/cosyvoice/flow/decoder.py +92 -13
  78. xinference/thirdparty/cosyvoice/flow/flow.py +99 -9
  79. xinference/thirdparty/cosyvoice/flow/flow_matching.py +110 -13
  80. xinference/thirdparty/cosyvoice/flow/length_regulator.py +5 -4
  81. xinference/thirdparty/cosyvoice/hifigan/discriminator.py +140 -0
  82. xinference/thirdparty/cosyvoice/hifigan/generator.py +58 -42
  83. xinference/thirdparty/cosyvoice/hifigan/hifigan.py +67 -0
  84. xinference/thirdparty/cosyvoice/llm/llm.py +139 -6
  85. xinference/thirdparty/cosyvoice/tokenizer/assets/multilingual_zh_ja_yue_char_del.tiktoken +58836 -0
  86. xinference/thirdparty/cosyvoice/tokenizer/tokenizer.py +279 -0
  87. xinference/thirdparty/cosyvoice/transformer/embedding.py +2 -2
  88. xinference/thirdparty/cosyvoice/transformer/encoder_layer.py +7 -7
  89. xinference/thirdparty/cosyvoice/transformer/upsample_encoder.py +318 -0
  90. xinference/thirdparty/cosyvoice/utils/common.py +28 -1
  91. xinference/thirdparty/cosyvoice/utils/executor.py +69 -7
  92. xinference/thirdparty/cosyvoice/utils/file_utils.py +2 -12
  93. xinference/thirdparty/cosyvoice/utils/frontend_utils.py +9 -5
  94. xinference/thirdparty/cosyvoice/utils/losses.py +20 -0
  95. xinference/thirdparty/cosyvoice/utils/scheduler.py +1 -2
  96. xinference/thirdparty/cosyvoice/utils/train_utils.py +101 -45
  97. xinference/thirdparty/f5_tts/api.py +166 -0
  98. xinference/thirdparty/f5_tts/configs/E2TTS_Base_train.yaml +44 -0
  99. xinference/thirdparty/f5_tts/configs/E2TTS_Small_train.yaml +44 -0
  100. xinference/thirdparty/f5_tts/configs/F5TTS_Base_train.yaml +46 -0
  101. xinference/thirdparty/f5_tts/configs/F5TTS_Small_train.yaml +46 -0
  102. xinference/thirdparty/f5_tts/eval/README.md +49 -0
  103. xinference/thirdparty/f5_tts/eval/ecapa_tdnn.py +330 -0
  104. xinference/thirdparty/f5_tts/eval/eval_infer_batch.py +207 -0
  105. xinference/thirdparty/f5_tts/eval/eval_infer_batch.sh +13 -0
  106. xinference/thirdparty/f5_tts/eval/eval_librispeech_test_clean.py +84 -0
  107. xinference/thirdparty/f5_tts/eval/eval_seedtts_testset.py +84 -0
  108. xinference/thirdparty/f5_tts/eval/utils_eval.py +405 -0
  109. xinference/thirdparty/f5_tts/infer/README.md +191 -0
  110. xinference/thirdparty/f5_tts/infer/SHARED.md +74 -0
  111. xinference/thirdparty/f5_tts/infer/examples/basic/basic.toml +11 -0
  112. xinference/thirdparty/f5_tts/infer/examples/basic/basic_ref_en.wav +0 -0
  113. xinference/thirdparty/f5_tts/infer/examples/basic/basic_ref_zh.wav +0 -0
  114. xinference/thirdparty/f5_tts/infer/examples/multi/country.flac +0 -0
  115. xinference/thirdparty/f5_tts/infer/examples/multi/main.flac +0 -0
  116. xinference/thirdparty/f5_tts/infer/examples/multi/story.toml +19 -0
  117. xinference/thirdparty/f5_tts/infer/examples/multi/story.txt +1 -0
  118. xinference/thirdparty/f5_tts/infer/examples/multi/town.flac +0 -0
  119. xinference/thirdparty/f5_tts/infer/examples/vocab.txt +2545 -0
  120. xinference/thirdparty/f5_tts/infer/infer_cli.py +226 -0
  121. xinference/thirdparty/f5_tts/infer/infer_gradio.py +851 -0
  122. xinference/thirdparty/f5_tts/infer/speech_edit.py +193 -0
  123. xinference/thirdparty/f5_tts/infer/utils_infer.py +538 -0
  124. xinference/thirdparty/f5_tts/model/__init__.py +10 -0
  125. xinference/thirdparty/f5_tts/model/backbones/README.md +20 -0
  126. xinference/thirdparty/f5_tts/model/backbones/dit.py +163 -0
  127. xinference/thirdparty/f5_tts/model/backbones/mmdit.py +146 -0
  128. xinference/thirdparty/f5_tts/model/backbones/unett.py +219 -0
  129. xinference/thirdparty/f5_tts/model/cfm.py +285 -0
  130. xinference/thirdparty/f5_tts/model/dataset.py +319 -0
  131. xinference/thirdparty/f5_tts/model/modules.py +658 -0
  132. xinference/thirdparty/f5_tts/model/trainer.py +366 -0
  133. xinference/thirdparty/f5_tts/model/utils.py +185 -0
  134. xinference/thirdparty/f5_tts/scripts/count_max_epoch.py +33 -0
  135. xinference/thirdparty/f5_tts/scripts/count_params_gflops.py +39 -0
  136. xinference/thirdparty/f5_tts/socket_server.py +159 -0
  137. xinference/thirdparty/f5_tts/train/README.md +77 -0
  138. xinference/thirdparty/f5_tts/train/datasets/prepare_csv_wavs.py +139 -0
  139. xinference/thirdparty/f5_tts/train/datasets/prepare_emilia.py +230 -0
  140. xinference/thirdparty/f5_tts/train/datasets/prepare_libritts.py +92 -0
  141. xinference/thirdparty/f5_tts/train/datasets/prepare_ljspeech.py +65 -0
  142. xinference/thirdparty/f5_tts/train/datasets/prepare_wenetspeech4tts.py +125 -0
  143. xinference/thirdparty/f5_tts/train/finetune_cli.py +174 -0
  144. xinference/thirdparty/f5_tts/train/finetune_gradio.py +1846 -0
  145. xinference/thirdparty/f5_tts/train/train.py +75 -0
  146. xinference/thirdparty/fish_speech/fish_speech/conversation.py +266 -1
  147. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/en_US.json +2 -1
  148. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/es_ES.json +2 -1
  149. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/ja_JP.json +2 -2
  150. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/ko_KR.json +123 -0
  151. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/zh_CN.json +2 -1
  152. xinference/thirdparty/fish_speech/fish_speech/models/text2semantic/llama.py +137 -29
  153. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/firefly.py +9 -9
  154. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/fsq.py +1 -1
  155. xinference/thirdparty/fish_speech/fish_speech/text/clean.py +17 -11
  156. xinference/thirdparty/fish_speech/fish_speech/text/spliter.py +1 -1
  157. xinference/thirdparty/fish_speech/fish_speech/tokenizer.py +152 -0
  158. xinference/thirdparty/fish_speech/fish_speech/train.py +2 -2
  159. xinference/thirdparty/fish_speech/fish_speech/utils/__init__.py +2 -1
  160. xinference/thirdparty/fish_speech/fish_speech/utils/utils.py +22 -0
  161. xinference/thirdparty/fish_speech/fish_speech/webui/launch_utils.py +1 -1
  162. xinference/thirdparty/fish_speech/fish_speech/webui/manage.py +2 -2
  163. xinference/thirdparty/fish_speech/tools/{post_api.py → api_client.py} +34 -18
  164. xinference/thirdparty/fish_speech/tools/api_server.py +98 -0
  165. xinference/thirdparty/fish_speech/tools/download_models.py +5 -5
  166. xinference/thirdparty/fish_speech/tools/e2e_webui.py +232 -0
  167. xinference/thirdparty/fish_speech/tools/fish_e2e.py +298 -0
  168. xinference/thirdparty/fish_speech/tools/inference_engine/__init__.py +192 -0
  169. xinference/thirdparty/fish_speech/tools/inference_engine/reference_loader.py +125 -0
  170. xinference/thirdparty/fish_speech/tools/inference_engine/utils.py +39 -0
  171. xinference/thirdparty/fish_speech/tools/inference_engine/vq_manager.py +57 -0
  172. xinference/thirdparty/fish_speech/tools/llama/eval_in_context.py +2 -2
  173. xinference/thirdparty/fish_speech/tools/llama/generate.py +484 -72
  174. xinference/thirdparty/fish_speech/tools/run_webui.py +104 -0
  175. xinference/thirdparty/fish_speech/tools/schema.py +170 -0
  176. xinference/thirdparty/fish_speech/tools/server/agent/__init__.py +57 -0
  177. xinference/thirdparty/fish_speech/tools/server/agent/generate.py +119 -0
  178. xinference/thirdparty/fish_speech/tools/server/agent/generation_utils.py +122 -0
  179. xinference/thirdparty/fish_speech/tools/server/agent/pre_generation_utils.py +72 -0
  180. xinference/thirdparty/fish_speech/tools/server/api_utils.py +75 -0
  181. xinference/thirdparty/fish_speech/tools/server/exception_handler.py +27 -0
  182. xinference/thirdparty/fish_speech/tools/server/inference.py +45 -0
  183. xinference/thirdparty/fish_speech/tools/server/model_manager.py +122 -0
  184. xinference/thirdparty/fish_speech/tools/server/model_utils.py +129 -0
  185. xinference/thirdparty/fish_speech/tools/server/views.py +246 -0
  186. xinference/thirdparty/fish_speech/tools/vqgan/extract_vq.py +7 -1
  187. xinference/thirdparty/fish_speech/tools/vqgan/inference.py +2 -3
  188. xinference/thirdparty/fish_speech/tools/webui/__init__.py +173 -0
  189. xinference/thirdparty/fish_speech/tools/webui/inference.py +91 -0
  190. xinference/thirdparty/fish_speech/tools/webui/variables.py +14 -0
  191. xinference/thirdparty/matcha/utils/utils.py +2 -2
  192. xinference/thirdparty/melo/api.py +135 -0
  193. xinference/thirdparty/melo/app.py +61 -0
  194. xinference/thirdparty/melo/attentions.py +459 -0
  195. xinference/thirdparty/melo/commons.py +160 -0
  196. xinference/thirdparty/melo/configs/config.json +94 -0
  197. xinference/thirdparty/melo/data/example/metadata.list +20 -0
  198. xinference/thirdparty/melo/data_utils.py +413 -0
  199. xinference/thirdparty/melo/download_utils.py +67 -0
  200. xinference/thirdparty/melo/infer.py +25 -0
  201. xinference/thirdparty/melo/init_downloads.py +14 -0
  202. xinference/thirdparty/melo/losses.py +58 -0
  203. xinference/thirdparty/melo/main.py +36 -0
  204. xinference/thirdparty/melo/mel_processing.py +174 -0
  205. xinference/thirdparty/melo/models.py +1030 -0
  206. xinference/thirdparty/melo/modules.py +598 -0
  207. xinference/thirdparty/melo/monotonic_align/__init__.py +16 -0
  208. xinference/thirdparty/melo/monotonic_align/core.py +46 -0
  209. xinference/thirdparty/melo/preprocess_text.py +135 -0
  210. xinference/thirdparty/melo/split_utils.py +174 -0
  211. xinference/thirdparty/melo/text/__init__.py +35 -0
  212. xinference/thirdparty/melo/text/chinese.py +199 -0
  213. xinference/thirdparty/melo/text/chinese_bert.py +107 -0
  214. xinference/thirdparty/melo/text/chinese_mix.py +253 -0
  215. xinference/thirdparty/melo/text/cleaner.py +36 -0
  216. xinference/thirdparty/melo/text/cleaner_multiling.py +110 -0
  217. xinference/thirdparty/melo/text/cmudict.rep +129530 -0
  218. xinference/thirdparty/melo/text/cmudict_cache.pickle +0 -0
  219. xinference/thirdparty/melo/text/english.py +284 -0
  220. xinference/thirdparty/melo/text/english_bert.py +39 -0
  221. xinference/thirdparty/melo/text/english_utils/abbreviations.py +35 -0
  222. xinference/thirdparty/melo/text/english_utils/number_norm.py +97 -0
  223. xinference/thirdparty/melo/text/english_utils/time_norm.py +47 -0
  224. xinference/thirdparty/melo/text/es_phonemizer/base.py +140 -0
  225. xinference/thirdparty/melo/text/es_phonemizer/cleaner.py +109 -0
  226. xinference/thirdparty/melo/text/es_phonemizer/es_symbols.json +79 -0
  227. xinference/thirdparty/melo/text/es_phonemizer/es_symbols.txt +1 -0
  228. xinference/thirdparty/melo/text/es_phonemizer/es_symbols_v2.json +83 -0
  229. xinference/thirdparty/melo/text/es_phonemizer/es_to_ipa.py +12 -0
  230. xinference/thirdparty/melo/text/es_phonemizer/example_ipa.txt +400 -0
  231. xinference/thirdparty/melo/text/es_phonemizer/gruut_wrapper.py +253 -0
  232. xinference/thirdparty/melo/text/es_phonemizer/punctuation.py +174 -0
  233. xinference/thirdparty/melo/text/es_phonemizer/spanish_symbols.txt +1 -0
  234. xinference/thirdparty/melo/text/es_phonemizer/test.ipynb +124 -0
  235. xinference/thirdparty/melo/text/fr_phonemizer/base.py +140 -0
  236. xinference/thirdparty/melo/text/fr_phonemizer/cleaner.py +122 -0
  237. xinference/thirdparty/melo/text/fr_phonemizer/en_symbols.json +78 -0
  238. xinference/thirdparty/melo/text/fr_phonemizer/example_ipa.txt +1 -0
  239. xinference/thirdparty/melo/text/fr_phonemizer/fr_symbols.json +89 -0
  240. xinference/thirdparty/melo/text/fr_phonemizer/fr_to_ipa.py +30 -0
  241. xinference/thirdparty/melo/text/fr_phonemizer/french_abbreviations.py +48 -0
  242. xinference/thirdparty/melo/text/fr_phonemizer/french_symbols.txt +1 -0
  243. xinference/thirdparty/melo/text/fr_phonemizer/gruut_wrapper.py +258 -0
  244. xinference/thirdparty/melo/text/fr_phonemizer/punctuation.py +172 -0
  245. xinference/thirdparty/melo/text/french.py +94 -0
  246. xinference/thirdparty/melo/text/french_bert.py +39 -0
  247. xinference/thirdparty/melo/text/japanese.py +647 -0
  248. xinference/thirdparty/melo/text/japanese_bert.py +49 -0
  249. xinference/thirdparty/melo/text/ko_dictionary.py +44 -0
  250. xinference/thirdparty/melo/text/korean.py +192 -0
  251. xinference/thirdparty/melo/text/opencpop-strict.txt +429 -0
  252. xinference/thirdparty/melo/text/spanish.py +122 -0
  253. xinference/thirdparty/melo/text/spanish_bert.py +39 -0
  254. xinference/thirdparty/melo/text/symbols.py +290 -0
  255. xinference/thirdparty/melo/text/tone_sandhi.py +769 -0
  256. xinference/thirdparty/melo/train.py +635 -0
  257. xinference/thirdparty/melo/train.sh +19 -0
  258. xinference/thirdparty/melo/transforms.py +209 -0
  259. xinference/thirdparty/melo/utils.py +424 -0
  260. xinference/types.py +17 -1
  261. xinference/web/ui/build/asset-manifest.json +6 -6
  262. xinference/web/ui/build/index.html +1 -1
  263. xinference/web/ui/build/static/css/main.51a587ff.css +2 -0
  264. xinference/web/ui/build/static/css/main.51a587ff.css.map +1 -0
  265. xinference/web/ui/build/static/js/main.b0936c54.js +3 -0
  266. xinference/web/ui/build/static/js/main.b0936c54.js.map +1 -0
  267. xinference/web/ui/node_modules/.cache/babel-loader/03c4052f1b91f6ba0c5389bdcf49c43319b4076c08e4b8585dab312538ae290a.json +1 -0
  268. xinference/web/ui/node_modules/.cache/babel-loader/1786b83003b8e9605a0f5f855a185d4d16e38fc893dfb326a2a9cca206b4240a.json +1 -0
  269. xinference/web/ui/node_modules/.cache/babel-loader/17cbc181dd674b9150b80c73ed6a82656de0082d857f6e5f66d9716129ac0b38.json +1 -0
  270. xinference/web/ui/node_modules/.cache/babel-loader/185ceb8872d562e032b47e79df6a45670e06345b8ed70aad1a131e0476783c5c.json +1 -0
  271. xinference/web/ui/node_modules/.cache/babel-loader/26b8c9f34b0bed789b3a833767672e39302d1e0c09b4276f4d58d1df7b6bd93b.json +1 -0
  272. xinference/web/ui/node_modules/.cache/babel-loader/2b484da66c724d0d56a40849c109327408796a668b1381511b6e9e03baa48658.json +1 -0
  273. xinference/web/ui/node_modules/.cache/babel-loader/2cbbbce9b84df73330d4c42b82436ed881b3847628f2fbc346aa62e2859fd88c.json +1 -0
  274. xinference/web/ui/node_modules/.cache/babel-loader/2ec9b14431ed33ce6901bf9f27007be4e6e472709c99d6e22b50ce528e4b78ee.json +1 -0
  275. xinference/web/ui/node_modules/.cache/babel-loader/3b966db018f96be4a055d6ca205f0990d4d0b370e2980c17d8bca2c9a021819c.json +1 -0
  276. xinference/web/ui/node_modules/.cache/babel-loader/3eefb411b24c2b3ce053570ef50daccf154022f0e168be5ed0fec21394baf9f4.json +1 -0
  277. xinference/web/ui/node_modules/.cache/babel-loader/522b229e3cac219123f0d69673f5570e191c2d2a505dc65b312d336eae2279c0.json +1 -0
  278. xinference/web/ui/node_modules/.cache/babel-loader/52e45f17ba300580ea3fcc9f9228ccba194bb092b76f25e9255af311f8b05aab.json +1 -0
  279. xinference/web/ui/node_modules/.cache/babel-loader/5a0bc4631f936459afc1a3b1d3ec2420118b1f00e11f60ccac3e08088f3f27a8.json +1 -0
  280. xinference/web/ui/node_modules/.cache/babel-loader/611fa2c6c53b66039991d06dfb0473b5ab37fc63b4564e0f6e1718523768a045.json +1 -0
  281. xinference/web/ui/node_modules/.cache/babel-loader/6329bc76c406fe5eb305412383fbde5950f847bb5e43261f73f37622c365acb4.json +1 -0
  282. xinference/web/ui/node_modules/.cache/babel-loader/63c8e07687ea53a4f8a910ee5e42e0eb26cd1acbfbe820f3e3248a786ee51401.json +1 -0
  283. xinference/web/ui/node_modules/.cache/babel-loader/69b2d5001684174ec9da57e07914eed3eac4960018bceb6cbfa801d861301d7c.json +1 -0
  284. xinference/web/ui/node_modules/.cache/babel-loader/710c1acda69e561e30a933b98c6a56d50197868b15c21e2aad55ab6d46649eb6.json +1 -0
  285. xinference/web/ui/node_modules/.cache/babel-loader/720deca1fce5a1dc5056048fa8258fd138a82ea855f350b6613f104a73fb761f.json +1 -0
  286. xinference/web/ui/node_modules/.cache/babel-loader/76a23b92d26a499c57e61eea2b895fbc9771bd0849a72e66f8e633192017978b.json +1 -0
  287. xinference/web/ui/node_modules/.cache/babel-loader/858063f23b34dfe600254eb5afd85518b0002ec4b30b7386616c45600826e3b2.json +1 -0
  288. xinference/web/ui/node_modules/.cache/babel-loader/920b82c1c89124cf217109eeedbfcd3aae3b917be50c9dfb6bbb4ce26bdfd2e7.json +1 -0
  289. xinference/web/ui/node_modules/.cache/babel-loader/94d8b7aeb0076f2ce07db598cea0e87b13bc8d5614eb530b8d6e696c2daf6f88.json +1 -0
  290. xinference/web/ui/node_modules/.cache/babel-loader/9e917fe7022d01b2ccbe5cc0ce73d70bb72bee584ff293bad71bdff6695dee28.json +1 -0
  291. xinference/web/ui/node_modules/.cache/babel-loader/9f28fdb8399f1d0474f0aca86f1658dc94f5bf0c90f6146352de150692de8862.json +1 -0
  292. xinference/web/ui/node_modules/.cache/babel-loader/a0dfafa06b2bb7cba8cad41c482503f61944f759f4318139362602ef5cc47ccb.json +1 -0
  293. xinference/web/ui/node_modules/.cache/babel-loader/a3ff866acddf34917a7ee399e0e571a4dfd8ba66d5057db885f243e16a6eb17d.json +1 -0
  294. xinference/web/ui/node_modules/.cache/babel-loader/afb8084f539534cd594755ea2205ecd5bd1f62dddcfdf75a2eace59a28131278.json +1 -0
  295. xinference/web/ui/node_modules/.cache/babel-loader/b57b1438b77294c1f3f6cfce12ac487d8106c6f016975ba0aec94d98997e2e1e.json +1 -0
  296. xinference/web/ui/node_modules/.cache/babel-loader/b9917b0bf8e4d55ccbac1c334aa04d6ff3c5b6ed9e5d38b9ea2c687fa7d3f5a9.json +1 -0
  297. xinference/web/ui/node_modules/.cache/babel-loader/bbcc94b0149963d1d6f267ee1f4f03d3925b758392ce2f516c3fe8af0e0169fc.json +1 -0
  298. xinference/web/ui/node_modules/.cache/babel-loader/bdee44abeadc4abc17d41c52eb49c6e19a4b1a267b6e16876ce91bdeeebfc52d.json +1 -0
  299. xinference/web/ui/node_modules/.cache/babel-loader/beb112b70f4a56db95920a9e20efb6c97c37b68450716730217a9ee1a9ae92be.json +1 -0
  300. xinference/web/ui/node_modules/.cache/babel-loader/c88db97be0cdf440193b3995996e83510a04cb00048135485fc0e26d197e80b5.json +1 -0
  301. xinference/web/ui/node_modules/.cache/babel-loader/d49e5314d34310a62d01a03067ce1bec5da00abce84c5196aa9c6842fa79a430.json +1 -0
  302. xinference/web/ui/node_modules/.cache/babel-loader/d7664d18c4ddbad9c3a6a31b91f7c00fb0dde804608674a9860ee50f33e54708.json +1 -0
  303. xinference/web/ui/node_modules/.cache/babel-loader/d9072c318b819b7c90a0f7e9cc0b6413b4dbeb8e9859898e53d75ea882fcde99.json +1 -0
  304. xinference/web/ui/node_modules/.cache/babel-loader/db16a983bc08a05f0439cc61ca0840e49e1d8400eef678909f16c032a418a3d6.json +1 -0
  305. xinference/web/ui/node_modules/.cache/babel-loader/dc249829767b8abcbc3677e0b07b6d3ecbfdfe6d08cfe23a665eb33373a9aa9d.json +1 -0
  306. xinference/web/ui/node_modules/.cache/babel-loader/e242c583c2dbc2784f0fcf513523975f7d5df447e106c1c17e49e8578a6fc3ed.json +1 -0
  307. xinference/web/ui/node_modules/.cache/babel-loader/eac5f1296513e69e4b96f750ddccd4d0264e2bae4e4c449144e83274a48698d9.json +1 -0
  308. xinference/web/ui/node_modules/.cache/babel-loader/ed57202cb79649bb716400436590245547df241988fc7c8e1d85d132299542d2.json +1 -0
  309. xinference/web/ui/node_modules/.cache/babel-loader/f125bf72e773a14cdaebd0c343e80adb909d12e317ee5c00cd4a57442fbe2c62.json +1 -0
  310. xinference/web/ui/node_modules/.cache/babel-loader/f91af913d7f91c410719ab13136aaed3aaf0f8dda06652f25c42cb5231587398.json +1 -0
  311. xinference/web/ui/node_modules/.package-lock.json +67 -3
  312. xinference/web/ui/node_modules/@babel/runtime/package.json +592 -538
  313. xinference/web/ui/node_modules/html-parse-stringify/package.json +50 -0
  314. xinference/web/ui/node_modules/i18next/dist/esm/package.json +1 -0
  315. xinference/web/ui/node_modules/i18next/package.json +129 -0
  316. xinference/web/ui/node_modules/react-i18next/.eslintrc.json +74 -0
  317. xinference/web/ui/node_modules/react-i18next/dist/es/package.json +1 -0
  318. xinference/web/ui/node_modules/react-i18next/package.json +162 -0
  319. xinference/web/ui/node_modules/void-elements/package.json +34 -0
  320. xinference/web/ui/package-lock.json +69 -3
  321. xinference/web/ui/package.json +2 -0
  322. xinference/web/ui/src/locales/en.json +186 -0
  323. xinference/web/ui/src/locales/zh.json +186 -0
  324. {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/METADATA +96 -36
  325. {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/RECORD +335 -146
  326. {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/WHEEL +1 -1
  327. xinference/thirdparty/cosyvoice/bin/export_trt.py +0 -8
  328. xinference/thirdparty/fish_speech/fish_speech/configs/lora/__init__.py +0 -0
  329. xinference/thirdparty/fish_speech/fish_speech/datasets/__init__.py +0 -0
  330. xinference/thirdparty/fish_speech/fish_speech/datasets/protos/__init__.py +0 -0
  331. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/__init__.py +0 -0
  332. xinference/thirdparty/fish_speech/fish_speech/models/__init__.py +0 -0
  333. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/__init__.py +0 -0
  334. xinference/thirdparty/fish_speech/fish_speech/webui/__init__.py +0 -0
  335. xinference/thirdparty/fish_speech/tools/__init__.py +0 -0
  336. xinference/thirdparty/fish_speech/tools/api.py +0 -440
  337. xinference/thirdparty/fish_speech/tools/commons.py +0 -35
  338. xinference/thirdparty/fish_speech/tools/llama/__init__.py +0 -0
  339. xinference/thirdparty/fish_speech/tools/msgpack_api.py +0 -34
  340. xinference/thirdparty/fish_speech/tools/vqgan/__init__.py +0 -0
  341. xinference/thirdparty/fish_speech/tools/webui.py +0 -485
  342. xinference/web/ui/build/static/css/main.5061c4c3.css +0 -2
  343. xinference/web/ui/build/static/css/main.5061c4c3.css.map +0 -1
  344. xinference/web/ui/build/static/js/main.2f269bb3.js +0 -3
  345. xinference/web/ui/build/static/js/main.2f269bb3.js.map +0 -1
  346. xinference/web/ui/node_modules/.cache/babel-loader/07ce9e632e6aff24d7aa3ad8e48224433bbfeb0d633fca723453f1fcae0c9f1c.json +0 -1
  347. xinference/web/ui/node_modules/.cache/babel-loader/1130403f9e46f5738a23b45ac59b57de8f360c908c713e2c0670c2cce9bd367a.json +0 -1
  348. xinference/web/ui/node_modules/.cache/babel-loader/131091b25d26b17cdca187d7542a21475c211138d900cf667682260e76ef9463.json +0 -1
  349. xinference/web/ui/node_modules/.cache/babel-loader/1f269fb2a368363c1cb2237825f1dba093b6bdd8c44cc05954fd19ec2c1fff03.json +0 -1
  350. xinference/web/ui/node_modules/.cache/babel-loader/331312668fa8bd3d7401818f4a25fa98135d7f61371cd6bfff78b18cf4fbdd92.json +0 -1
  351. xinference/web/ui/node_modules/.cache/babel-loader/40f17338fc75ae095de7d2b4d8eae0d5ca0193a7e2bcece4ee745b22a7a2f4b7.json +0 -1
  352. xinference/web/ui/node_modules/.cache/babel-loader/4de9a6942c5f1749d6cbfdd54279699975f16016b182848bc253886f52ec2ec3.json +0 -1
  353. xinference/web/ui/node_modules/.cache/babel-loader/822586ed1077201b64b954f12f25e3f9b45678c1acbabe53d8af3ca82ca71f33.json +0 -1
  354. xinference/web/ui/node_modules/.cache/babel-loader/8d33354bd2100c8602afc3341f131a88cc36aaeecd5a4b365ed038514708e350.json +0 -1
  355. xinference/web/ui/node_modules/.cache/babel-loader/9375a35b05d56989b2755bf72161fa707c92f28569d33765a75f91a568fda6e9.json +0 -1
  356. xinference/web/ui/node_modules/.cache/babel-loader/a158a9ffa0c9b169aee53dd4a0c44501a596755b4e4f6ede7746d65a72e2a71f.json +0 -1
  357. xinference/web/ui/node_modules/.cache/babel-loader/bd6ad8159341315a1764c397621a560809f7eb7219ab5174c801fca7e969d943.json +0 -1
  358. xinference/web/ui/node_modules/.cache/babel-loader/c7bf40bab396765f67d0fed627ed3665890608b2d0edaa3e8cb7cfc96310db45.json +0 -1
  359. xinference/web/ui/node_modules/.cache/babel-loader/d6c643278a0b28320e6f33a60f5fb64c053997cbdc39a60e53ccc574688ade9e.json +0 -1
  360. xinference/web/ui/node_modules/.cache/babel-loader/e42b72d4cc1ea412ebecbb8d040dc6c6bfee462c33903c2f1f3facb602ad742e.json +0 -1
  361. xinference/web/ui/node_modules/.cache/babel-loader/e64b7e8cedcf43d4c95deba60ec1341855c887705805bb62431693118b870c69.json +0 -1
  362. xinference/web/ui/node_modules/.cache/babel-loader/f5039ddbeb815c51491a1989532006b96fc3ae49c6c60e3c097f875b4ae915ae.json +0 -1
  363. xinference/web/ui/node_modules/.cache/babel-loader/f72f011744c4649fabddca6f7a9327861ac0a315a89b1a2e62a39774e7863845.json +0 -1
  364. xinference/web/ui/node_modules/.cache/babel-loader/feabb04b4aa507102da0a64398a40818e878fd1df9b75dda8461b3e1e7ff3f11.json +0 -1
  365. /xinference/thirdparty/{cosyvoice/bin → f5_tts}/__init__.py +0 -0
  366. /xinference/thirdparty/{cosyvoice/flow → melo}/__init__.py +0 -0
  367. /xinference/thirdparty/{cosyvoice/hifigan → melo/text/english_utils}/__init__.py +0 -0
  368. /xinference/thirdparty/{cosyvoice/llm → melo/text/es_phonemizer}/__init__.py +0 -0
  369. /xinference/thirdparty/{fish_speech/fish_speech/configs → melo/text/fr_phonemizer}/__init__.py +0 -0
  370. /xinference/web/ui/build/static/js/{main.2f269bb3.js.LICENSE.txt → main.b0936c54.js.LICENSE.txt} +0 -0
  371. {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/LICENSE +0 -0
  372. {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/entry_points.txt +0 -0
  373. {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,272 @@
1
+ # Copyright 2022-2023 XProbe Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ import logging
15
+ import re
16
+ import uuid
17
+ from concurrent.futures import ThreadPoolExecutor
18
+ from typing import Dict, Iterator, List, Literal, Optional, Union
19
+
20
+ import torch
21
+
22
+ from ....model.utils import select_device
23
+ from ....types import (
24
+ ChatCompletion,
25
+ ChatCompletionChunk,
26
+ CogagentGenerateConfig,
27
+ CompletionChunk,
28
+ )
29
+ from ..llm_family import LLMFamilyV1, LLMSpecV1
30
+ from ..utils import (
31
+ _decode_image,
32
+ generate_chat_completion,
33
+ generate_completion_chunk,
34
+ parse_messages,
35
+ )
36
+ from .core import PytorchChatModel
37
+ from .utils import cache_clean
38
+
39
+ logger = logging.getLogger(__name__)
40
+
41
+
42
+ class CogAgentChatModel(PytorchChatModel):
43
+ def __init__(self, *args, **kwargs):
44
+ super().__init__(*args, **kwargs)
45
+ self._torch_type = None
46
+ self._device = None
47
+ self._tokenizer = None
48
+ self._model = None
49
+ self._platform: Literal["Mac", "WIN", "Mobile"] | None = "Mac"
50
+ self._format: Literal[
51
+ "(Answer in Action-Operation-Sensitive format.)",
52
+ "(Answer in Status-Plan-Action-Operation format.)",
53
+ "(Answer in Status-Action-Operation-Sensitive format.)",
54
+ "(Answer in Status-Action-Operation format.)",
55
+ "(Answer in Action-Operation format.)",
56
+ ] | None = "(Answer in Action-Operation-Sensitive format.)"
57
+
58
+ @classmethod
59
+ def match(
60
+ cls, model_family: "LLMFamilyV1", model_spec: "LLMSpecV1", quantization: str
61
+ ) -> bool:
62
+ family = model_family.model_family or model_family.model_name
63
+ if "cogagent" in family.lower():
64
+ return True
65
+ return False
66
+
67
+ def load(self, **kwargs):
68
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
69
+
70
+ device = self._pytorch_model_config.get("device", "auto")
71
+ self._device = select_device(device)
72
+
73
+ self._tokenizer = AutoTokenizer.from_pretrained(
74
+ self.model_path, trust_remote_code=True
75
+ )
76
+ if self.quantization == "4-bit":
77
+ quantization_config = BitsAndBytesConfig(load_in_4bit=True)
78
+ elif self.quantization == "8-bit":
79
+ quantization_config = BitsAndBytesConfig(load_in_8bit=True)
80
+ else:
81
+ quantization_config = None
82
+
83
+ self._model = AutoModelForCausalLM.from_pretrained(
84
+ self.model_path,
85
+ torch_dtype=torch.bfloat16,
86
+ trust_remote_code=True,
87
+ device_map=self._device,
88
+ quantization_config=quantization_config,
89
+ ).eval()
90
+
91
+ def _message_content_to_cogagent(self, content):
92
+ assert isinstance(content, list)
93
+ texts = []
94
+ image_urls = []
95
+ for c in content:
96
+ c_type = c.get("type")
97
+ if c_type == "text":
98
+ texts.append(c["text"])
99
+ elif c_type == "image_url":
100
+ image_urls.append(c["image_url"]["url"])
101
+ image_futures = []
102
+ with ThreadPoolExecutor() as executor:
103
+ for image_url in image_urls:
104
+ fut = executor.submit(_decode_image, image_url)
105
+ image_futures.append(fut)
106
+ images = [fut.result() for fut in image_futures]
107
+ text = " ".join(texts)
108
+ if len(images) == 0:
109
+ raise RuntimeError(
110
+ "CogAgent requires image input to perform GUI Agent tasks. Pure text-based interaction cannot execute such tasks."
111
+ )
112
+ elif len(images) == 1:
113
+ return text, images[-1]
114
+ else:
115
+ logger.warning(
116
+ "There are multiple images in the prompt, CogAgent will automatically use the most recently provided image as the input."
117
+ )
118
+ return text, images[-1]
119
+
120
+ def _history_content_to_cogagent(self, chat_history: List[Dict]):
121
+ grounded_pattern = r"Grounded Operation:\s*(.*)"
122
+ action_pattern = r"Action:\s*(.*)"
123
+
124
+ def extract_operations(_content: str):
125
+ """extract grounded operation and action operation"""
126
+ _history_step = []
127
+ _history_action = []
128
+
129
+ matches_history = re.search(grounded_pattern, _content)
130
+ matches_actions = re.search(action_pattern, _content)
131
+
132
+ if matches_history:
133
+ grounded_operation = matches_history.group(1)
134
+ _history_step.append(grounded_operation)
135
+ if matches_actions:
136
+ action_operation = matches_actions.group(1)
137
+ _history_action.append(action_operation)
138
+
139
+ return _history_step, _history_action
140
+
141
+ history_step = []
142
+ history_action = []
143
+
144
+ for i in range(0, len(chat_history) - 1, 2):
145
+ content = chat_history[i + 1].get("content")
146
+ if isinstance(content, str): # 如果内容是字符串
147
+ steps, actions = extract_operations(content)
148
+ history_step.extend(steps)
149
+ history_action.extend(actions)
150
+
151
+ elif isinstance(content, list): # 如果内容是列表
152
+ for c in content:
153
+ c_content = c.get("content")
154
+ if isinstance(c_content, str): # 确保是字符串类型
155
+ steps, actions = extract_operations(c_content)
156
+ history_step.extend(steps)
157
+ history_action.extend(actions)
158
+
159
+ return history_step, history_action
160
+
161
+ def get_query_and_history(
162
+ self,
163
+ prompt: Union[str, List[Dict]],
164
+ chat_history: Optional[List[Dict]] = None,
165
+ ):
166
+ task, image = self._message_content_to_cogagent(prompt)
167
+
168
+ history_step, history_action = [], []
169
+
170
+ if chat_history:
171
+ history_step, history_action = self._history_content_to_cogagent(
172
+ chat_history
173
+ )
174
+
175
+ # Verify history lengths match
176
+ if len(history_step) != len(history_action):
177
+ raise ValueError("Mismatch in lengths of history_step and history_action.")
178
+
179
+ # Format history steps for output
180
+ history_str = "\nHistory steps: "
181
+ for index, (step, action) in enumerate(zip(history_step, history_action)):
182
+ history_str += f"\n{index}. {step}\t{action}"
183
+
184
+ # Compose the query with task, platform, and selected format instructions
185
+ query = f"Task: {task}{history_str}\n{self._platform}{self._format}"
186
+ logger.info(f"query:{query}")
187
+ return query, image
188
+
189
+ @cache_clean
190
+ def chat(
191
+ self,
192
+ messages: List[Dict],
193
+ generate_config: Optional[CogagentGenerateConfig] = None,
194
+ ) -> Union[ChatCompletion, Iterator[ChatCompletionChunk]]:
195
+ if generate_config is not None:
196
+ self._platform = generate_config.pop("platform", self._platform)
197
+ self._format = generate_config.pop("format", self._format)
198
+
199
+ sanitize_generate_config = self._sanitize_generate_config(generate_config)
200
+ stream = sanitize_generate_config.get("stream")
201
+ sanitized_config = {
202
+ "max_length": sanitize_generate_config.get("max_tokens", 512),
203
+ "top_k": sanitize_generate_config.get("top_k", 1),
204
+ "do_sample": True,
205
+ }
206
+ prompt, _, chat_history = parse_messages(messages)
207
+
208
+ query, image = self.get_query_and_history(prompt, chat_history)
209
+
210
+ full_context_kwargs = {
211
+ "return_tensors": "pt",
212
+ "return_dict": True,
213
+ }
214
+ assert self.model_family.chat_template is not None
215
+ inputs = self.get_full_context(
216
+ [{"role": "user", "image": image, "content": query}],
217
+ self.model_family.chat_template,
218
+ self._tokenizer,
219
+ tokenize=True,
220
+ **full_context_kwargs,
221
+ )
222
+ inputs.to(self._model.device)
223
+
224
+ if stream:
225
+ it = self._streaming_chat_response(inputs, sanitized_config)
226
+ return self._to_chat_completion_chunks(it)
227
+ else:
228
+ # Generate response
229
+ with torch.no_grad():
230
+ outputs = self._model.generate(**inputs, **sanitized_config)
231
+ outputs = outputs[:, inputs["input_ids"].shape[1] :]
232
+ response = self._tokenizer.decode(outputs[0], skip_special_tokens=True)
233
+
234
+ return generate_chat_completion(self.model_uid, response)
235
+
236
+ def _streaming_chat_response(
237
+ self, inputs: Dict, config: Dict
238
+ ) -> Iterator[CompletionChunk]:
239
+ from threading import Thread
240
+
241
+ from transformers import TextIteratorStreamer
242
+
243
+ streamer = TextIteratorStreamer(
244
+ self._tokenizer, skip_prompt=True, skip_special_tokens=True
245
+ )
246
+ generation_kwargs = {**inputs, **config}
247
+
248
+ thread = Thread(target=self._model.generate, kwargs=generation_kwargs)
249
+ thread.start()
250
+
251
+ completion_id = str(uuid.uuid1())
252
+ for new_text in streamer:
253
+ yield generate_completion_chunk(
254
+ chunk_text=new_text,
255
+ finish_reason=None,
256
+ chunk_id=completion_id,
257
+ model_uid=self.model_uid,
258
+ prompt_tokens=-1,
259
+ completion_tokens=-1,
260
+ total_tokens=-1,
261
+ )
262
+ yield generate_completion_chunk(
263
+ chunk_text=None,
264
+ finish_reason="stop",
265
+ chunk_id=completion_id,
266
+ model_uid=self.model_uid,
267
+ prompt_tokens=-1,
268
+ completion_tokens=-1,
269
+ total_tokens=-1,
270
+ has_choice=True,
271
+ has_content=False,
272
+ )
@@ -68,6 +68,9 @@ NON_DEFAULT_MODEL_LIST: List[str] = [
68
68
  "deepseek-v2-chat",
69
69
  "deepseek-v2.5",
70
70
  "deepseek-v2-chat-0628",
71
+ "glm-edge-v",
72
+ "QvQ-72B-Preview",
73
+ "cogagent",
71
74
  ]
72
75
 
73
76
 
@@ -0,0 +1,230 @@
1
+ # Copyright 2022-2023 XProbe Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ import logging
15
+ import uuid
16
+ from concurrent.futures import ThreadPoolExecutor
17
+ from threading import Thread
18
+ from typing import Any, Dict, Iterator, List, Optional, Tuple, Union
19
+
20
+ import torch
21
+
22
+ from ....types import ChatCompletion, ChatCompletionChunk, CompletionChunk
23
+ from ...utils import select_device
24
+ from ..llm_family import LLMFamilyV1, LLMSpecV1
25
+ from ..utils import (
26
+ _decode_image_without_rgb,
27
+ generate_chat_completion,
28
+ generate_completion_chunk,
29
+ )
30
+ from .core import PytorchChatModel, PytorchGenerateConfig
31
+ from .utils import cache_clean
32
+
33
+ logger = logging.getLogger(__name__)
34
+
35
+
36
+ class GlmEdgeVModel(PytorchChatModel):
37
+ def __init__(self, *args, **kwargs):
38
+ super().__init__(*args, **kwargs)
39
+ self._device = None
40
+ self._tokenizer = None
41
+ self._model = None
42
+ self._processor = None
43
+
44
+ @classmethod
45
+ def match(
46
+ cls, model_family: "LLMFamilyV1", model_spec: "LLMSpecV1", quantization: str
47
+ ) -> bool:
48
+ family = model_family.model_family or model_family.model_name
49
+ if "glm-edge-v" in family.lower():
50
+ return True
51
+ return False
52
+
53
+ def load(self):
54
+ from transformers import AutoImageProcessor, AutoModelForCausalLM, AutoTokenizer
55
+
56
+ device = self._pytorch_model_config.get("device", "auto")
57
+ self._device = select_device(device)
58
+
59
+ kwargs = {"device_map": self._device}
60
+ quantization = self.quantization
61
+
62
+ # referenced from PytorchModel.load
63
+ if quantization != "none":
64
+ if self._device == "cuda" and self._is_linux():
65
+ kwargs["device_map"] = "auto"
66
+ if quantization == "4-bit":
67
+ kwargs["load_in_4bit"] = True
68
+ elif quantization == "8-bit":
69
+ kwargs["load_in_8bit"] = True
70
+ else:
71
+ raise ValueError(
72
+ f"Quantization {quantization} is not supported in temporary"
73
+ )
74
+ else:
75
+ if quantization != "8-bit":
76
+ raise ValueError(
77
+ f"Only 8-bit quantization is supported if it is not linux system or cuda device"
78
+ )
79
+
80
+ processor = AutoImageProcessor.from_pretrained(
81
+ self.model_path, trust_remote_code=True
82
+ )
83
+ self._processor = processor
84
+
85
+ model = AutoModelForCausalLM.from_pretrained(
86
+ self.model_path,
87
+ trust_remote_code=True,
88
+ torch_dtype=torch.bfloat16,
89
+ device_map="auto",
90
+ )
91
+
92
+ self._model = model
93
+
94
+ tokenizer = AutoTokenizer.from_pretrained(
95
+ self.model_path, trust_remote_code=True
96
+ )
97
+ self._tokenizer = tokenizer
98
+
99
+ @staticmethod
100
+ def _get_processed_msgs(
101
+ messages: List[Dict],
102
+ ) -> Tuple[List[Dict[str, Any]], List[Any]]:
103
+ res = []
104
+ img = []
105
+ for message in messages:
106
+ role = message["role"]
107
+ content = message["content"]
108
+ if isinstance(content, str):
109
+ res.append({"role": role, "content": content})
110
+ else:
111
+ texts = []
112
+ image_urls = []
113
+ for c in content:
114
+ c_type = c.get("type")
115
+ if c_type == "text":
116
+ texts.append(c["text"])
117
+ else:
118
+ assert (
119
+ c_type == "image_url"
120
+ ), "Please follow the image input of the OpenAI API."
121
+ image_urls.append(c["image_url"]["url"])
122
+ if len(image_urls) > 1:
123
+ raise RuntimeError("Only one image per message is supported")
124
+ image_futures = []
125
+ with ThreadPoolExecutor() as executor:
126
+ for image_url in image_urls:
127
+ fut = executor.submit(_decode_image_without_rgb, image_url)
128
+ image_futures.append(fut)
129
+ images = [fut.result() for fut in image_futures]
130
+ assert len(images) <= 1
131
+ text = " ".join(texts)
132
+ img.extend(images)
133
+ if images:
134
+ res.append(
135
+ {
136
+ "role": role,
137
+ "content": [
138
+ {"type": "image"},
139
+ {"type": "text", "text": text},
140
+ ],
141
+ }
142
+ )
143
+ else:
144
+ res.append({"role": role, "content": text})
145
+ return res, img
146
+
147
+ @cache_clean
148
+ def chat(
149
+ self,
150
+ messages: List[Dict],
151
+ generate_config: Optional[PytorchGenerateConfig] = None,
152
+ ) -> Union[ChatCompletion, Iterator[ChatCompletionChunk]]:
153
+ from transformers import TextIteratorStreamer
154
+
155
+ if not generate_config:
156
+ generate_config = {}
157
+
158
+ stream = generate_config.get("stream", False)
159
+ msgs, imgs = self._get_processed_msgs(messages)
160
+
161
+ inputs = self._tokenizer.apply_chat_template(
162
+ msgs,
163
+ add_generation_prompt=True,
164
+ tokenize=True,
165
+ return_tensors="pt",
166
+ return_dict=True,
167
+ ) # chat mode
168
+ inputs = inputs.to(self._model.device)
169
+
170
+ generate_kwargs = {
171
+ **inputs,
172
+ }
173
+ if len(imgs) > 0:
174
+ generate_kwargs["pixel_values"] = torch.tensor(
175
+ self._processor(imgs[-1]).pixel_values
176
+ ).to(self._model.device)
177
+ stop_str = "<|endoftext|>"
178
+
179
+ if stream:
180
+ streamer = TextIteratorStreamer(
181
+ tokenizer=self._tokenizer,
182
+ timeout=60,
183
+ skip_prompt=True,
184
+ skip_special_tokens=True,
185
+ )
186
+ generate_kwargs = {
187
+ **generate_kwargs,
188
+ "streamer": streamer,
189
+ }
190
+ t = Thread(target=self._model.generate, kwargs=generate_kwargs)
191
+ t.start()
192
+
193
+ it = self.chat_stream(streamer, stop_str)
194
+ return self._to_chat_completion_chunks(it)
195
+ else:
196
+ with torch.no_grad():
197
+ outputs = self._model.generate(**generate_kwargs)
198
+ outputs = outputs[0][len(inputs["input_ids"][0]) :]
199
+ response = self._tokenizer.decode(outputs)
200
+ if response.endswith(stop_str):
201
+ response = response[: -len(stop_str)]
202
+ return generate_chat_completion(self.model_uid, response)
203
+
204
+ def chat_stream(self, streamer, stop_str) -> Iterator[CompletionChunk]:
205
+ completion_id = str(uuid.uuid1())
206
+ for new_text in streamer:
207
+ if not new_text.endswith(stop_str):
208
+ yield generate_completion_chunk(
209
+ chunk_text=new_text,
210
+ finish_reason=None,
211
+ chunk_id=completion_id,
212
+ model_uid=self.model_uid,
213
+ prompt_tokens=-1,
214
+ completion_tokens=-1,
215
+ total_tokens=-1,
216
+ has_choice=True,
217
+ has_content=True,
218
+ )
219
+
220
+ yield generate_completion_chunk(
221
+ chunk_text=None,
222
+ finish_reason="stop",
223
+ chunk_id=completion_id,
224
+ model_uid=self.model_uid,
225
+ prompt_tokens=-1,
226
+ completion_tokens=-1,
227
+ total_tokens=-1,
228
+ has_choice=True,
229
+ has_content=False,
230
+ )
@@ -17,6 +17,7 @@ import sys
17
17
  import uuid
18
18
  from typing import Iterator, List, Optional, Union
19
19
 
20
+ from ....device_utils import is_npu_available
20
21
  from ....model.utils import select_device
21
22
  from ....types import (
22
23
  ChatCompletion,
@@ -47,6 +48,8 @@ class Qwen2VLChatModel(PytorchChatModel):
47
48
  llm_family = model_family.model_family or model_family.model_name
48
49
  if "qwen2-vl-instruct".lower() in llm_family.lower():
49
50
  return True
51
+ if "qvq-72b-preview".lower() in llm_family.lower():
52
+ return True
50
53
  return False
51
54
 
52
55
  def load(self):
@@ -71,6 +74,14 @@ class Qwen2VLChatModel(PytorchChatModel):
71
74
  attn_implementation="flash_attention_2",
72
75
  trust_remote_code=True,
73
76
  ).eval()
77
+ elif is_npu_available():
78
+ # Ascend do not support bf16
79
+ self._model = Qwen2VLForConditionalGeneration.from_pretrained(
80
+ self.model_path,
81
+ device_map="auto",
82
+ trust_remote_code=True,
83
+ torch_dtype="float16",
84
+ ).eval()
74
85
  else:
75
86
  self._model = Qwen2VLForConditionalGeneration.from_pretrained(
76
87
  self.model_path, device_map=device, trust_remote_code=True
@@ -112,7 +123,7 @@ class Qwen2VLChatModel(PytorchChatModel):
112
123
  padding=True,
113
124
  return_tensors="pt",
114
125
  )
115
- inputs = inputs.to("cuda")
126
+ inputs = inputs.to(self._device)
116
127
 
117
128
  # Inference: Generation of the output
118
129
  generated_ids = self._model.generate(
@@ -156,6 +156,7 @@ def _get_completion(
156
156
  finish_reason: Optional[str],
157
157
  model_uid: str,
158
158
  r: InferenceRequest,
159
+ completion_tokens: int,
159
160
  ):
160
161
  completion_choice = CompletionChoice(
161
162
  text=output, index=0, logprobs=None, finish_reason=finish_reason
@@ -170,8 +171,8 @@ def _get_completion(
170
171
  )
171
172
  completion_usage = CompletionUsage(
172
173
  prompt_tokens=len(r.prompt_tokens),
173
- completion_tokens=len(r.new_tokens),
174
- total_tokens=len(r.prompt_tokens) + len(r.new_tokens),
174
+ completion_tokens=completion_tokens,
175
+ total_tokens=len(r.prompt_tokens) + completion_tokens,
175
176
  )
176
177
  completion = Completion(
177
178
  id=completion_chunk["id"],
@@ -371,7 +372,7 @@ def _batch_inference_one_step_internal(
371
372
  r.stopped = stopped
372
373
  r.finish_reason = finish_reason
373
374
 
374
- if r.stopped and r not in stop_token_mapping and r not in output_mapping:
375
+ if r.stopped and r not in stop_token_mapping:
375
376
  stop_token_mapping[r] = _i + 1
376
377
 
377
378
  if r.stream:
@@ -446,12 +447,14 @@ def _batch_inference_one_step_internal(
446
447
  else:
447
448
  # last round, handle non-stream result
448
449
  if r.stopped and _i == decode_round - 1:
449
- invalid_token_num = decode_round - stop_token_mapping[r]
450
+ invalid_token_num = (
451
+ (decode_round - stop_token_mapping[r] + 1)
452
+ if r.finish_reason == "stop"
453
+ else (decode_round - stop_token_mapping[r])
454
+ )
450
455
  outputs = (
451
456
  tokenizer.decode(
452
- r.new_tokens[: -(invalid_token_num + 1)]
453
- if r.finish_reason == "stop"
454
- else r.new_tokens[:-invalid_token_num],
457
+ r.new_tokens[:-invalid_token_num],
455
458
  skip_special_tokens=True,
456
459
  spaces_between_special_tokens=False,
457
460
  clean_up_tokenization_spaces=True,
@@ -460,7 +463,12 @@ def _batch_inference_one_step_internal(
460
463
  else output_mapping[r]
461
464
  )
462
465
  completion = _get_completion(
463
- outputs, r.chunk_id, r.finish_reason, model_uid, r
466
+ outputs,
467
+ r.chunk_id,
468
+ r.finish_reason,
469
+ model_uid,
470
+ r,
471
+ len(r.new_tokens) - invalid_token_num,
464
472
  )
465
473
  r.completion = [completion]
466
474