xinference 1.0.1__py3-none-any.whl → 1.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (343) hide show
  1. xinference/_compat.py +2 -0
  2. xinference/_version.py +3 -3
  3. xinference/api/restful_api.py +77 -71
  4. xinference/core/chat_interface.py +6 -1
  5. xinference/core/model.py +79 -19
  6. xinference/core/supervisor.py +172 -10
  7. xinference/core/utils.py +12 -8
  8. xinference/core/worker.py +102 -4
  9. xinference/deploy/cmdline.py +3 -1
  10. xinference/deploy/test/test_cmdline.py +56 -0
  11. xinference/isolation.py +24 -0
  12. xinference/model/audio/core.py +16 -0
  13. xinference/model/audio/cosyvoice.py +39 -6
  14. xinference/model/audio/f5tts.py +200 -0
  15. xinference/model/audio/f5tts_mlx.py +260 -0
  16. xinference/model/audio/fish_speech.py +36 -111
  17. xinference/model/audio/melotts.py +110 -0
  18. xinference/model/audio/model_spec.json +99 -3
  19. xinference/model/audio/model_spec_modelscope.json +27 -0
  20. xinference/model/audio/utils.py +32 -0
  21. xinference/model/audio/whisper.py +35 -10
  22. xinference/model/embedding/core.py +203 -142
  23. xinference/model/embedding/model_spec.json +7 -0
  24. xinference/model/embedding/model_spec_modelscope.json +8 -0
  25. xinference/model/image/core.py +69 -1
  26. xinference/model/image/model_spec.json +145 -4
  27. xinference/model/image/model_spec_modelscope.json +150 -4
  28. xinference/model/image/stable_diffusion/core.py +45 -13
  29. xinference/model/llm/__init__.py +4 -2
  30. xinference/model/llm/llm_family.json +536 -53
  31. xinference/model/llm/llm_family.py +15 -36
  32. xinference/model/llm/llm_family_modelscope.json +454 -20
  33. xinference/model/llm/memory.py +1 -1
  34. xinference/model/llm/mlx/core.py +248 -52
  35. xinference/model/llm/sglang/core.py +1 -0
  36. xinference/model/llm/transformers/chatglm.py +9 -5
  37. xinference/model/llm/transformers/cogagent.py +272 -0
  38. xinference/model/llm/transformers/core.py +2 -0
  39. xinference/model/llm/transformers/qwen2_vl.py +12 -1
  40. xinference/model/llm/transformers/utils.py +16 -8
  41. xinference/model/llm/utils.py +36 -4
  42. xinference/model/llm/vllm/core.py +53 -10
  43. xinference/model/llm/vllm/xavier/__init__.py +13 -0
  44. xinference/model/llm/vllm/xavier/allocator.py +74 -0
  45. xinference/model/llm/vllm/xavier/block.py +111 -0
  46. xinference/model/llm/vllm/xavier/block_manager.py +71 -0
  47. xinference/model/llm/vllm/xavier/block_tracker.py +129 -0
  48. xinference/model/llm/vllm/xavier/collective.py +74 -0
  49. xinference/model/llm/vllm/xavier/collective_manager.py +147 -0
  50. xinference/model/llm/vllm/xavier/engine.py +247 -0
  51. xinference/model/llm/vllm/xavier/executor.py +134 -0
  52. xinference/model/llm/vllm/xavier/scheduler.py +438 -0
  53. xinference/model/llm/vllm/xavier/test/__init__.py +13 -0
  54. xinference/model/llm/vllm/xavier/test/test_xavier.py +147 -0
  55. xinference/model/llm/vllm/xavier/transfer.py +319 -0
  56. xinference/model/video/diffusers.py +14 -0
  57. xinference/model/video/model_spec.json +15 -0
  58. xinference/model/video/model_spec_modelscope.json +16 -0
  59. xinference/thirdparty/cosyvoice/bin/average_model.py +92 -0
  60. xinference/thirdparty/cosyvoice/bin/export_jit.py +12 -2
  61. xinference/thirdparty/cosyvoice/bin/export_onnx.py +112 -0
  62. xinference/thirdparty/cosyvoice/bin/export_trt.sh +9 -0
  63. xinference/thirdparty/cosyvoice/bin/inference.py +5 -7
  64. xinference/thirdparty/cosyvoice/bin/spk2info.pt +0 -0
  65. xinference/thirdparty/cosyvoice/bin/train.py +42 -8
  66. xinference/thirdparty/cosyvoice/cli/cosyvoice.py +96 -25
  67. xinference/thirdparty/cosyvoice/cli/frontend.py +77 -30
  68. xinference/thirdparty/cosyvoice/cli/model.py +330 -80
  69. xinference/thirdparty/cosyvoice/dataset/dataset.py +6 -2
  70. xinference/thirdparty/cosyvoice/dataset/processor.py +76 -14
  71. xinference/thirdparty/cosyvoice/flow/decoder.py +92 -13
  72. xinference/thirdparty/cosyvoice/flow/flow.py +99 -9
  73. xinference/thirdparty/cosyvoice/flow/flow_matching.py +110 -13
  74. xinference/thirdparty/cosyvoice/flow/length_regulator.py +5 -4
  75. xinference/thirdparty/cosyvoice/hifigan/discriminator.py +140 -0
  76. xinference/thirdparty/cosyvoice/hifigan/generator.py +58 -42
  77. xinference/thirdparty/cosyvoice/hifigan/hifigan.py +67 -0
  78. xinference/thirdparty/cosyvoice/llm/llm.py +139 -6
  79. xinference/thirdparty/cosyvoice/tokenizer/assets/multilingual_zh_ja_yue_char_del.tiktoken +58836 -0
  80. xinference/thirdparty/cosyvoice/tokenizer/tokenizer.py +279 -0
  81. xinference/thirdparty/cosyvoice/transformer/embedding.py +2 -2
  82. xinference/thirdparty/cosyvoice/transformer/encoder_layer.py +7 -7
  83. xinference/thirdparty/cosyvoice/transformer/upsample_encoder.py +318 -0
  84. xinference/thirdparty/cosyvoice/utils/common.py +28 -1
  85. xinference/thirdparty/cosyvoice/utils/executor.py +69 -7
  86. xinference/thirdparty/cosyvoice/utils/file_utils.py +2 -12
  87. xinference/thirdparty/cosyvoice/utils/frontend_utils.py +9 -5
  88. xinference/thirdparty/cosyvoice/utils/losses.py +20 -0
  89. xinference/thirdparty/cosyvoice/utils/scheduler.py +1 -2
  90. xinference/thirdparty/cosyvoice/utils/train_utils.py +101 -45
  91. xinference/thirdparty/f5_tts/api.py +166 -0
  92. xinference/thirdparty/f5_tts/configs/E2TTS_Base_train.yaml +44 -0
  93. xinference/thirdparty/f5_tts/configs/E2TTS_Small_train.yaml +44 -0
  94. xinference/thirdparty/f5_tts/configs/F5TTS_Base_train.yaml +46 -0
  95. xinference/thirdparty/f5_tts/configs/F5TTS_Small_train.yaml +46 -0
  96. xinference/thirdparty/f5_tts/eval/README.md +49 -0
  97. xinference/thirdparty/f5_tts/eval/ecapa_tdnn.py +330 -0
  98. xinference/thirdparty/f5_tts/eval/eval_infer_batch.py +207 -0
  99. xinference/thirdparty/f5_tts/eval/eval_infer_batch.sh +13 -0
  100. xinference/thirdparty/f5_tts/eval/eval_librispeech_test_clean.py +84 -0
  101. xinference/thirdparty/f5_tts/eval/eval_seedtts_testset.py +84 -0
  102. xinference/thirdparty/f5_tts/eval/utils_eval.py +405 -0
  103. xinference/thirdparty/f5_tts/infer/README.md +191 -0
  104. xinference/thirdparty/f5_tts/infer/SHARED.md +74 -0
  105. xinference/thirdparty/f5_tts/infer/examples/basic/basic.toml +11 -0
  106. xinference/thirdparty/f5_tts/infer/examples/basic/basic_ref_en.wav +0 -0
  107. xinference/thirdparty/f5_tts/infer/examples/basic/basic_ref_zh.wav +0 -0
  108. xinference/thirdparty/f5_tts/infer/examples/multi/country.flac +0 -0
  109. xinference/thirdparty/f5_tts/infer/examples/multi/main.flac +0 -0
  110. xinference/thirdparty/f5_tts/infer/examples/multi/story.toml +19 -0
  111. xinference/thirdparty/f5_tts/infer/examples/multi/story.txt +1 -0
  112. xinference/thirdparty/f5_tts/infer/examples/multi/town.flac +0 -0
  113. xinference/thirdparty/f5_tts/infer/examples/vocab.txt +2545 -0
  114. xinference/thirdparty/f5_tts/infer/infer_cli.py +226 -0
  115. xinference/thirdparty/f5_tts/infer/infer_gradio.py +851 -0
  116. xinference/thirdparty/f5_tts/infer/speech_edit.py +193 -0
  117. xinference/thirdparty/f5_tts/infer/utils_infer.py +538 -0
  118. xinference/thirdparty/f5_tts/model/__init__.py +10 -0
  119. xinference/thirdparty/f5_tts/model/backbones/README.md +20 -0
  120. xinference/thirdparty/f5_tts/model/backbones/dit.py +163 -0
  121. xinference/thirdparty/f5_tts/model/backbones/mmdit.py +146 -0
  122. xinference/thirdparty/f5_tts/model/backbones/unett.py +219 -0
  123. xinference/thirdparty/f5_tts/model/cfm.py +285 -0
  124. xinference/thirdparty/f5_tts/model/dataset.py +319 -0
  125. xinference/thirdparty/f5_tts/model/modules.py +658 -0
  126. xinference/thirdparty/f5_tts/model/trainer.py +366 -0
  127. xinference/thirdparty/f5_tts/model/utils.py +185 -0
  128. xinference/thirdparty/f5_tts/scripts/count_max_epoch.py +33 -0
  129. xinference/thirdparty/f5_tts/scripts/count_params_gflops.py +39 -0
  130. xinference/thirdparty/f5_tts/socket_server.py +159 -0
  131. xinference/thirdparty/f5_tts/train/README.md +77 -0
  132. xinference/thirdparty/f5_tts/train/datasets/prepare_csv_wavs.py +139 -0
  133. xinference/thirdparty/f5_tts/train/datasets/prepare_emilia.py +230 -0
  134. xinference/thirdparty/f5_tts/train/datasets/prepare_libritts.py +92 -0
  135. xinference/thirdparty/f5_tts/train/datasets/prepare_ljspeech.py +65 -0
  136. xinference/thirdparty/f5_tts/train/datasets/prepare_wenetspeech4tts.py +125 -0
  137. xinference/thirdparty/f5_tts/train/finetune_cli.py +174 -0
  138. xinference/thirdparty/f5_tts/train/finetune_gradio.py +1846 -0
  139. xinference/thirdparty/f5_tts/train/train.py +75 -0
  140. xinference/thirdparty/fish_speech/fish_speech/conversation.py +94 -83
  141. xinference/thirdparty/fish_speech/fish_speech/models/text2semantic/llama.py +63 -20
  142. xinference/thirdparty/fish_speech/fish_speech/text/clean.py +1 -26
  143. xinference/thirdparty/fish_speech/fish_speech/text/spliter.py +1 -1
  144. xinference/thirdparty/fish_speech/fish_speech/tokenizer.py +152 -0
  145. xinference/thirdparty/fish_speech/fish_speech/train.py +2 -2
  146. xinference/thirdparty/fish_speech/fish_speech/webui/manage.py +1 -1
  147. xinference/thirdparty/fish_speech/tools/{post_api.py → api_client.py} +7 -13
  148. xinference/thirdparty/fish_speech/tools/api_server.py +98 -0
  149. xinference/thirdparty/fish_speech/tools/download_models.py +5 -5
  150. xinference/thirdparty/fish_speech/tools/fish_e2e.py +2 -2
  151. xinference/thirdparty/fish_speech/tools/inference_engine/__init__.py +192 -0
  152. xinference/thirdparty/fish_speech/tools/inference_engine/reference_loader.py +125 -0
  153. xinference/thirdparty/fish_speech/tools/inference_engine/utils.py +39 -0
  154. xinference/thirdparty/fish_speech/tools/inference_engine/vq_manager.py +57 -0
  155. xinference/thirdparty/fish_speech/tools/llama/eval_in_context.py +2 -2
  156. xinference/thirdparty/fish_speech/tools/llama/generate.py +117 -89
  157. xinference/thirdparty/fish_speech/tools/run_webui.py +104 -0
  158. xinference/thirdparty/fish_speech/tools/schema.py +11 -28
  159. xinference/thirdparty/fish_speech/tools/server/agent/__init__.py +57 -0
  160. xinference/thirdparty/fish_speech/tools/server/agent/generate.py +119 -0
  161. xinference/thirdparty/fish_speech/tools/server/agent/generation_utils.py +122 -0
  162. xinference/thirdparty/fish_speech/tools/server/agent/pre_generation_utils.py +72 -0
  163. xinference/thirdparty/fish_speech/tools/server/api_utils.py +75 -0
  164. xinference/thirdparty/fish_speech/tools/server/exception_handler.py +27 -0
  165. xinference/thirdparty/fish_speech/tools/server/inference.py +45 -0
  166. xinference/thirdparty/fish_speech/tools/server/model_manager.py +122 -0
  167. xinference/thirdparty/fish_speech/tools/server/model_utils.py +129 -0
  168. xinference/thirdparty/fish_speech/tools/server/views.py +246 -0
  169. xinference/thirdparty/fish_speech/tools/webui/__init__.py +173 -0
  170. xinference/thirdparty/fish_speech/tools/webui/inference.py +91 -0
  171. xinference/thirdparty/fish_speech/tools/webui/variables.py +14 -0
  172. xinference/thirdparty/matcha/utils/utils.py +2 -2
  173. xinference/thirdparty/melo/api.py +135 -0
  174. xinference/thirdparty/melo/app.py +61 -0
  175. xinference/thirdparty/melo/attentions.py +459 -0
  176. xinference/thirdparty/melo/commons.py +160 -0
  177. xinference/thirdparty/melo/configs/config.json +94 -0
  178. xinference/thirdparty/melo/data/example/metadata.list +20 -0
  179. xinference/thirdparty/melo/data_utils.py +413 -0
  180. xinference/thirdparty/melo/download_utils.py +67 -0
  181. xinference/thirdparty/melo/infer.py +25 -0
  182. xinference/thirdparty/melo/init_downloads.py +14 -0
  183. xinference/thirdparty/melo/losses.py +58 -0
  184. xinference/thirdparty/melo/main.py +36 -0
  185. xinference/thirdparty/melo/mel_processing.py +174 -0
  186. xinference/thirdparty/melo/models.py +1030 -0
  187. xinference/thirdparty/melo/modules.py +598 -0
  188. xinference/thirdparty/melo/monotonic_align/__init__.py +16 -0
  189. xinference/thirdparty/melo/monotonic_align/core.py +46 -0
  190. xinference/thirdparty/melo/preprocess_text.py +135 -0
  191. xinference/thirdparty/melo/split_utils.py +174 -0
  192. xinference/thirdparty/melo/text/__init__.py +35 -0
  193. xinference/thirdparty/melo/text/chinese.py +199 -0
  194. xinference/thirdparty/melo/text/chinese_bert.py +107 -0
  195. xinference/thirdparty/melo/text/chinese_mix.py +253 -0
  196. xinference/thirdparty/melo/text/cleaner.py +36 -0
  197. xinference/thirdparty/melo/text/cleaner_multiling.py +110 -0
  198. xinference/thirdparty/melo/text/cmudict.rep +129530 -0
  199. xinference/thirdparty/melo/text/cmudict_cache.pickle +0 -0
  200. xinference/thirdparty/melo/text/english.py +284 -0
  201. xinference/thirdparty/melo/text/english_bert.py +39 -0
  202. xinference/thirdparty/melo/text/english_utils/abbreviations.py +35 -0
  203. xinference/thirdparty/melo/text/english_utils/number_norm.py +97 -0
  204. xinference/thirdparty/melo/text/english_utils/time_norm.py +47 -0
  205. xinference/thirdparty/melo/text/es_phonemizer/base.py +140 -0
  206. xinference/thirdparty/melo/text/es_phonemizer/cleaner.py +109 -0
  207. xinference/thirdparty/melo/text/es_phonemizer/es_symbols.json +79 -0
  208. xinference/thirdparty/melo/text/es_phonemizer/es_symbols.txt +1 -0
  209. xinference/thirdparty/melo/text/es_phonemizer/es_symbols_v2.json +83 -0
  210. xinference/thirdparty/melo/text/es_phonemizer/es_to_ipa.py +12 -0
  211. xinference/thirdparty/melo/text/es_phonemizer/example_ipa.txt +400 -0
  212. xinference/thirdparty/melo/text/es_phonemizer/gruut_wrapper.py +253 -0
  213. xinference/thirdparty/melo/text/es_phonemizer/punctuation.py +174 -0
  214. xinference/thirdparty/melo/text/es_phonemizer/spanish_symbols.txt +1 -0
  215. xinference/thirdparty/melo/text/es_phonemizer/test.ipynb +124 -0
  216. xinference/thirdparty/melo/text/fr_phonemizer/base.py +140 -0
  217. xinference/thirdparty/melo/text/fr_phonemizer/cleaner.py +122 -0
  218. xinference/thirdparty/melo/text/fr_phonemizer/en_symbols.json +78 -0
  219. xinference/thirdparty/melo/text/fr_phonemizer/example_ipa.txt +1 -0
  220. xinference/thirdparty/melo/text/fr_phonemizer/fr_symbols.json +89 -0
  221. xinference/thirdparty/melo/text/fr_phonemizer/fr_to_ipa.py +30 -0
  222. xinference/thirdparty/melo/text/fr_phonemizer/french_abbreviations.py +48 -0
  223. xinference/thirdparty/melo/text/fr_phonemizer/french_symbols.txt +1 -0
  224. xinference/thirdparty/melo/text/fr_phonemizer/gruut_wrapper.py +258 -0
  225. xinference/thirdparty/melo/text/fr_phonemizer/punctuation.py +172 -0
  226. xinference/thirdparty/melo/text/french.py +94 -0
  227. xinference/thirdparty/melo/text/french_bert.py +39 -0
  228. xinference/thirdparty/melo/text/japanese.py +647 -0
  229. xinference/thirdparty/melo/text/japanese_bert.py +49 -0
  230. xinference/thirdparty/melo/text/ko_dictionary.py +44 -0
  231. xinference/thirdparty/melo/text/korean.py +192 -0
  232. xinference/thirdparty/melo/text/opencpop-strict.txt +429 -0
  233. xinference/thirdparty/melo/text/spanish.py +122 -0
  234. xinference/thirdparty/melo/text/spanish_bert.py +39 -0
  235. xinference/thirdparty/melo/text/symbols.py +290 -0
  236. xinference/thirdparty/melo/text/tone_sandhi.py +769 -0
  237. xinference/thirdparty/melo/train.py +635 -0
  238. xinference/thirdparty/melo/train.sh +19 -0
  239. xinference/thirdparty/melo/transforms.py +209 -0
  240. xinference/thirdparty/melo/utils.py +424 -0
  241. xinference/types.py +15 -0
  242. xinference/web/ui/build/asset-manifest.json +6 -6
  243. xinference/web/ui/build/index.html +1 -1
  244. xinference/web/ui/build/static/css/main.51a587ff.css +2 -0
  245. xinference/web/ui/build/static/css/main.51a587ff.css.map +1 -0
  246. xinference/web/ui/build/static/js/main.b0936c54.js +3 -0
  247. xinference/web/ui/build/static/js/main.b0936c54.js.map +1 -0
  248. xinference/web/ui/node_modules/.cache/babel-loader/03c4052f1b91f6ba0c5389bdcf49c43319b4076c08e4b8585dab312538ae290a.json +1 -0
  249. xinference/web/ui/node_modules/.cache/babel-loader/1786b83003b8e9605a0f5f855a185d4d16e38fc893dfb326a2a9cca206b4240a.json +1 -0
  250. xinference/web/ui/node_modules/.cache/babel-loader/17cbc181dd674b9150b80c73ed6a82656de0082d857f6e5f66d9716129ac0b38.json +1 -0
  251. xinference/web/ui/node_modules/.cache/babel-loader/185ceb8872d562e032b47e79df6a45670e06345b8ed70aad1a131e0476783c5c.json +1 -0
  252. xinference/web/ui/node_modules/.cache/babel-loader/26b8c9f34b0bed789b3a833767672e39302d1e0c09b4276f4d58d1df7b6bd93b.json +1 -0
  253. xinference/web/ui/node_modules/.cache/babel-loader/2b484da66c724d0d56a40849c109327408796a668b1381511b6e9e03baa48658.json +1 -0
  254. xinference/web/ui/node_modules/.cache/babel-loader/2cbbbce9b84df73330d4c42b82436ed881b3847628f2fbc346aa62e2859fd88c.json +1 -0
  255. xinference/web/ui/node_modules/.cache/babel-loader/2ec9b14431ed33ce6901bf9f27007be4e6e472709c99d6e22b50ce528e4b78ee.json +1 -0
  256. xinference/web/ui/node_modules/.cache/babel-loader/3b966db018f96be4a055d6ca205f0990d4d0b370e2980c17d8bca2c9a021819c.json +1 -0
  257. xinference/web/ui/node_modules/.cache/babel-loader/3eefb411b24c2b3ce053570ef50daccf154022f0e168be5ed0fec21394baf9f4.json +1 -0
  258. xinference/web/ui/node_modules/.cache/babel-loader/522b229e3cac219123f0d69673f5570e191c2d2a505dc65b312d336eae2279c0.json +1 -0
  259. xinference/web/ui/node_modules/.cache/babel-loader/52e45f17ba300580ea3fcc9f9228ccba194bb092b76f25e9255af311f8b05aab.json +1 -0
  260. xinference/web/ui/node_modules/.cache/babel-loader/5a0bc4631f936459afc1a3b1d3ec2420118b1f00e11f60ccac3e08088f3f27a8.json +1 -0
  261. xinference/web/ui/node_modules/.cache/babel-loader/611fa2c6c53b66039991d06dfb0473b5ab37fc63b4564e0f6e1718523768a045.json +1 -0
  262. xinference/web/ui/node_modules/.cache/babel-loader/6329bc76c406fe5eb305412383fbde5950f847bb5e43261f73f37622c365acb4.json +1 -0
  263. xinference/web/ui/node_modules/.cache/babel-loader/63c8e07687ea53a4f8a910ee5e42e0eb26cd1acbfbe820f3e3248a786ee51401.json +1 -0
  264. xinference/web/ui/node_modules/.cache/babel-loader/69b2d5001684174ec9da57e07914eed3eac4960018bceb6cbfa801d861301d7c.json +1 -0
  265. xinference/web/ui/node_modules/.cache/babel-loader/710c1acda69e561e30a933b98c6a56d50197868b15c21e2aad55ab6d46649eb6.json +1 -0
  266. xinference/web/ui/node_modules/.cache/babel-loader/720deca1fce5a1dc5056048fa8258fd138a82ea855f350b6613f104a73fb761f.json +1 -0
  267. xinference/web/ui/node_modules/.cache/babel-loader/76a23b92d26a499c57e61eea2b895fbc9771bd0849a72e66f8e633192017978b.json +1 -0
  268. xinference/web/ui/node_modules/.cache/babel-loader/858063f23b34dfe600254eb5afd85518b0002ec4b30b7386616c45600826e3b2.json +1 -0
  269. xinference/web/ui/node_modules/.cache/babel-loader/920b82c1c89124cf217109eeedbfcd3aae3b917be50c9dfb6bbb4ce26bdfd2e7.json +1 -0
  270. xinference/web/ui/node_modules/.cache/babel-loader/94d8b7aeb0076f2ce07db598cea0e87b13bc8d5614eb530b8d6e696c2daf6f88.json +1 -0
  271. xinference/web/ui/node_modules/.cache/babel-loader/9e917fe7022d01b2ccbe5cc0ce73d70bb72bee584ff293bad71bdff6695dee28.json +1 -0
  272. xinference/web/ui/node_modules/.cache/babel-loader/9f28fdb8399f1d0474f0aca86f1658dc94f5bf0c90f6146352de150692de8862.json +1 -0
  273. xinference/web/ui/node_modules/.cache/babel-loader/a0dfafa06b2bb7cba8cad41c482503f61944f759f4318139362602ef5cc47ccb.json +1 -0
  274. xinference/web/ui/node_modules/.cache/babel-loader/a3ff866acddf34917a7ee399e0e571a4dfd8ba66d5057db885f243e16a6eb17d.json +1 -0
  275. xinference/web/ui/node_modules/.cache/babel-loader/afb8084f539534cd594755ea2205ecd5bd1f62dddcfdf75a2eace59a28131278.json +1 -0
  276. xinference/web/ui/node_modules/.cache/babel-loader/b57b1438b77294c1f3f6cfce12ac487d8106c6f016975ba0aec94d98997e2e1e.json +1 -0
  277. xinference/web/ui/node_modules/.cache/babel-loader/b9917b0bf8e4d55ccbac1c334aa04d6ff3c5b6ed9e5d38b9ea2c687fa7d3f5a9.json +1 -0
  278. xinference/web/ui/node_modules/.cache/babel-loader/bbcc94b0149963d1d6f267ee1f4f03d3925b758392ce2f516c3fe8af0e0169fc.json +1 -0
  279. xinference/web/ui/node_modules/.cache/babel-loader/bdee44abeadc4abc17d41c52eb49c6e19a4b1a267b6e16876ce91bdeeebfc52d.json +1 -0
  280. xinference/web/ui/node_modules/.cache/babel-loader/beb112b70f4a56db95920a9e20efb6c97c37b68450716730217a9ee1a9ae92be.json +1 -0
  281. xinference/web/ui/node_modules/.cache/babel-loader/c88db97be0cdf440193b3995996e83510a04cb00048135485fc0e26d197e80b5.json +1 -0
  282. xinference/web/ui/node_modules/.cache/babel-loader/d49e5314d34310a62d01a03067ce1bec5da00abce84c5196aa9c6842fa79a430.json +1 -0
  283. xinference/web/ui/node_modules/.cache/babel-loader/d7664d18c4ddbad9c3a6a31b91f7c00fb0dde804608674a9860ee50f33e54708.json +1 -0
  284. xinference/web/ui/node_modules/.cache/babel-loader/d9072c318b819b7c90a0f7e9cc0b6413b4dbeb8e9859898e53d75ea882fcde99.json +1 -0
  285. xinference/web/ui/node_modules/.cache/babel-loader/db16a983bc08a05f0439cc61ca0840e49e1d8400eef678909f16c032a418a3d6.json +1 -0
  286. xinference/web/ui/node_modules/.cache/babel-loader/dc249829767b8abcbc3677e0b07b6d3ecbfdfe6d08cfe23a665eb33373a9aa9d.json +1 -0
  287. xinference/web/ui/node_modules/.cache/babel-loader/e242c583c2dbc2784f0fcf513523975f7d5df447e106c1c17e49e8578a6fc3ed.json +1 -0
  288. xinference/web/ui/node_modules/.cache/babel-loader/eac5f1296513e69e4b96f750ddccd4d0264e2bae4e4c449144e83274a48698d9.json +1 -0
  289. xinference/web/ui/node_modules/.cache/babel-loader/ed57202cb79649bb716400436590245547df241988fc7c8e1d85d132299542d2.json +1 -0
  290. xinference/web/ui/node_modules/.cache/babel-loader/f125bf72e773a14cdaebd0c343e80adb909d12e317ee5c00cd4a57442fbe2c62.json +1 -0
  291. xinference/web/ui/node_modules/.cache/babel-loader/f91af913d7f91c410719ab13136aaed3aaf0f8dda06652f25c42cb5231587398.json +1 -0
  292. xinference/web/ui/node_modules/.package-lock.json +67 -3
  293. xinference/web/ui/node_modules/@babel/runtime/package.json +592 -538
  294. xinference/web/ui/node_modules/html-parse-stringify/package.json +50 -0
  295. xinference/web/ui/node_modules/i18next/dist/esm/package.json +1 -0
  296. xinference/web/ui/node_modules/i18next/package.json +129 -0
  297. xinference/web/ui/node_modules/react-i18next/.eslintrc.json +74 -0
  298. xinference/web/ui/node_modules/react-i18next/dist/es/package.json +1 -0
  299. xinference/web/ui/node_modules/react-i18next/package.json +162 -0
  300. xinference/web/ui/node_modules/void-elements/package.json +34 -0
  301. xinference/web/ui/package-lock.json +69 -3
  302. xinference/web/ui/package.json +2 -0
  303. xinference/web/ui/src/locales/en.json +186 -0
  304. xinference/web/ui/src/locales/zh.json +186 -0
  305. {xinference-1.0.1.dist-info → xinference-1.2.1.dist-info}/METADATA +68 -32
  306. {xinference-1.0.1.dist-info → xinference-1.2.1.dist-info}/RECORD +316 -122
  307. xinference/thirdparty/cosyvoice/bin/export_trt.py +0 -8
  308. xinference/thirdparty/fish_speech/tools/api.py +0 -943
  309. xinference/thirdparty/fish_speech/tools/msgpack_api.py +0 -95
  310. xinference/thirdparty/fish_speech/tools/webui.py +0 -548
  311. xinference/web/ui/build/static/css/main.5061c4c3.css +0 -2
  312. xinference/web/ui/build/static/css/main.5061c4c3.css.map +0 -1
  313. xinference/web/ui/build/static/js/main.2f269bb3.js +0 -3
  314. xinference/web/ui/build/static/js/main.2f269bb3.js.map +0 -1
  315. xinference/web/ui/node_modules/.cache/babel-loader/07ce9e632e6aff24d7aa3ad8e48224433bbfeb0d633fca723453f1fcae0c9f1c.json +0 -1
  316. xinference/web/ui/node_modules/.cache/babel-loader/1130403f9e46f5738a23b45ac59b57de8f360c908c713e2c0670c2cce9bd367a.json +0 -1
  317. xinference/web/ui/node_modules/.cache/babel-loader/131091b25d26b17cdca187d7542a21475c211138d900cf667682260e76ef9463.json +0 -1
  318. xinference/web/ui/node_modules/.cache/babel-loader/1f269fb2a368363c1cb2237825f1dba093b6bdd8c44cc05954fd19ec2c1fff03.json +0 -1
  319. xinference/web/ui/node_modules/.cache/babel-loader/331312668fa8bd3d7401818f4a25fa98135d7f61371cd6bfff78b18cf4fbdd92.json +0 -1
  320. xinference/web/ui/node_modules/.cache/babel-loader/40f17338fc75ae095de7d2b4d8eae0d5ca0193a7e2bcece4ee745b22a7a2f4b7.json +0 -1
  321. xinference/web/ui/node_modules/.cache/babel-loader/4de9a6942c5f1749d6cbfdd54279699975f16016b182848bc253886f52ec2ec3.json +0 -1
  322. xinference/web/ui/node_modules/.cache/babel-loader/822586ed1077201b64b954f12f25e3f9b45678c1acbabe53d8af3ca82ca71f33.json +0 -1
  323. xinference/web/ui/node_modules/.cache/babel-loader/8d33354bd2100c8602afc3341f131a88cc36aaeecd5a4b365ed038514708e350.json +0 -1
  324. xinference/web/ui/node_modules/.cache/babel-loader/9375a35b05d56989b2755bf72161fa707c92f28569d33765a75f91a568fda6e9.json +0 -1
  325. xinference/web/ui/node_modules/.cache/babel-loader/a158a9ffa0c9b169aee53dd4a0c44501a596755b4e4f6ede7746d65a72e2a71f.json +0 -1
  326. xinference/web/ui/node_modules/.cache/babel-loader/bd6ad8159341315a1764c397621a560809f7eb7219ab5174c801fca7e969d943.json +0 -1
  327. xinference/web/ui/node_modules/.cache/babel-loader/c7bf40bab396765f67d0fed627ed3665890608b2d0edaa3e8cb7cfc96310db45.json +0 -1
  328. xinference/web/ui/node_modules/.cache/babel-loader/d6c643278a0b28320e6f33a60f5fb64c053997cbdc39a60e53ccc574688ade9e.json +0 -1
  329. xinference/web/ui/node_modules/.cache/babel-loader/e42b72d4cc1ea412ebecbb8d040dc6c6bfee462c33903c2f1f3facb602ad742e.json +0 -1
  330. xinference/web/ui/node_modules/.cache/babel-loader/e64b7e8cedcf43d4c95deba60ec1341855c887705805bb62431693118b870c69.json +0 -1
  331. xinference/web/ui/node_modules/.cache/babel-loader/f5039ddbeb815c51491a1989532006b96fc3ae49c6c60e3c097f875b4ae915ae.json +0 -1
  332. xinference/web/ui/node_modules/.cache/babel-loader/f72f011744c4649fabddca6f7a9327861ac0a315a89b1a2e62a39774e7863845.json +0 -1
  333. xinference/web/ui/node_modules/.cache/babel-loader/feabb04b4aa507102da0a64398a40818e878fd1df9b75dda8461b3e1e7ff3f11.json +0 -1
  334. /xinference/thirdparty/{cosyvoice/bin → f5_tts}/__init__.py +0 -0
  335. /xinference/thirdparty/{cosyvoice/flow → melo}/__init__.py +0 -0
  336. /xinference/thirdparty/{cosyvoice/hifigan → melo/text/english_utils}/__init__.py +0 -0
  337. /xinference/thirdparty/{cosyvoice/llm → melo/text/es_phonemizer}/__init__.py +0 -0
  338. /xinference/thirdparty/{fish_speech/tools → melo/text/fr_phonemizer}/__init__.py +0 -0
  339. /xinference/web/ui/build/static/js/{main.2f269bb3.js.LICENSE.txt → main.b0936c54.js.LICENSE.txt} +0 -0
  340. {xinference-1.0.1.dist-info → xinference-1.2.1.dist-info}/LICENSE +0 -0
  341. {xinference-1.0.1.dist-info → xinference-1.2.1.dist-info}/WHEEL +0 -0
  342. {xinference-1.0.1.dist-info → xinference-1.2.1.dist-info}/entry_points.txt +0 -0
  343. {xinference-1.0.1.dist-info → xinference-1.2.1.dist-info}/top_level.txt +0 -0
@@ -103,10 +103,10 @@ class MLXModel(LLM):
103
103
  # default config is adapted from
104
104
  # https://github.com/ml-explore/mlx-examples/blob/f212b770d8b5143e23102eda20400ae43340f844/llms/mlx_lm/utils.py#L129
105
105
  generate_config.setdefault("temperature", 0.0)
106
+ generate_config.setdefault("logit_bias", None)
106
107
  generate_config.setdefault("repetition_penalty", None)
107
108
  generate_config.setdefault("repetition_context_size", 20)
108
109
  generate_config.setdefault("top_p", 1.0)
109
- generate_config.setdefault("logit_bias", None)
110
110
  return generate_config
111
111
 
112
112
  def _load_model(self, **kwargs):
@@ -168,9 +168,14 @@ class MLXModel(LLM):
168
168
  return False
169
169
  if "generate" not in llm_family.model_ability:
170
170
  return False
171
+ if "chat" in llm_family.model_ability or "vision" in llm_family.model_ability:
172
+ # do not process chat or vision
173
+ return False
171
174
  return True
172
175
 
173
- def _get_prompt_cache(self, prompt, lora_name: Optional[str] = None):
176
+ def _get_prompt_cache(
177
+ self, prompt, lora_name: Optional[str] = None, model: Any = None
178
+ ):
174
179
  from mlx_lm.models.cache import make_prompt_cache
175
180
 
176
181
  assert self._prompt_cache is not None
@@ -182,7 +187,9 @@ class MLXModel(LLM):
182
187
  or self._prompt_cache.tokens != prompt[:cache_len]
183
188
  ):
184
189
  self._prompt_cache.model_key = model_key
185
- self._prompt_cache.cache = make_prompt_cache(self._model, self._max_kv_size)
190
+ self._prompt_cache.cache = make_prompt_cache(
191
+ model or self._model, self._max_kv_size
192
+ )
186
193
  self._prompt_cache.tokens = []
187
194
  logger.debug("Making new prompt cache for %s", self.model_uid)
188
195
  else:
@@ -191,18 +198,45 @@ class MLXModel(LLM):
191
198
  self._prompt_cache.tokens.extend(prompt)
192
199
  return prompt
193
200
 
194
- def _generate_stream(self, prompt: str, kwargs: MLXGenerateConfig):
195
- import mlx.core as mx
196
- from mlx_lm.utils import generate_step
201
+ def _generate_stream_inner(self, **kwargs):
202
+ from mlx_lm.utils import make_logits_processors, make_sampler, stream_generate
197
203
 
198
- model = self._model
204
+ sampler = make_sampler(
205
+ temp=kwargs.pop("temperature"), top_p=kwargs.pop("top_p")
206
+ )
207
+ prompt_token_ids = kwargs.pop("prompt_token_ids")
208
+ logits_processors = make_logits_processors(
209
+ logit_bias=kwargs.pop("logits_bias", None),
210
+ repetition_penalty=kwargs.pop("repetition_penalty"),
211
+ repetition_context_size=kwargs.pop("repetition_context_size"),
212
+ )
213
+ yield from stream_generate(
214
+ self._model,
215
+ self._tokenizer,
216
+ prompt_token_ids,
217
+ sampler=sampler,
218
+ logits_processors=logits_processors,
219
+ **kwargs,
220
+ )
221
+
222
+ def _prepare_inputs(
223
+ self, prompt: Union[str, Dict[str, Any]], kwargs
224
+ ) -> Tuple[Any, int]:
225
+ prompt_token_ids = self._tokenizer.encode(prompt)
226
+ prompt_token_ids = self._get_prompt_cache(
227
+ prompt_token_ids, kwargs.get("lora_name")
228
+ )
229
+ return prompt_token_ids, len(prompt_token_ids)
230
+
231
+ def _generate_stream(
232
+ self, prompt: Union[str, Dict[str, Any]], kwargs: MLXGenerateConfig
233
+ ):
199
234
  model_uid = self.model_uid
200
235
  tokenizer = self._tokenizer
201
236
  max_tokens = kwargs["max_tokens"]
202
237
  chunk_id = str(uuid.uuid4())
203
238
  stop_token_ids = kwargs.get("stop_token_ids", [])
204
239
  stream = kwargs.get("stream", False)
205
- lora_name = kwargs.get("lora_name")
206
240
  stream_options = kwargs.pop("stream_options", None)
207
241
  include_usage = (
208
242
  stream_options["include_usage"]
@@ -210,39 +244,28 @@ class MLXModel(LLM):
210
244
  else False
211
245
  )
212
246
 
213
- prompt_token_ids = tokenizer.encode(prompt)
214
- prompt_token_ids = self._get_prompt_cache(prompt_token_ids, lora_name)
215
- prompt_tokens = mx.array(prompt_token_ids)
216
- input_echo_len = len(prompt_tokens)
247
+ prompt_token_ids, input_echo_len = self._prepare_inputs(prompt, kwargs)
217
248
 
218
249
  i = 0
219
250
  start = time.time()
220
251
  output = ""
221
252
  tokens = []
222
- for (token, _), i in zip(
223
- generate_step(
224
- prompt_tokens,
225
- model,
226
- temp=kwargs["temperature"],
253
+ for chunk_resp, i in zip(
254
+ self._generate_stream_inner(
255
+ prompt_token_ids=prompt_token_ids,
256
+ max_tokens=max_tokens,
257
+ temperature=kwargs["temperature"],
258
+ top_p=kwargs["top_p"],
227
259
  repetition_penalty=kwargs["repetition_penalty"],
228
260
  repetition_context_size=kwargs["repetition_context_size"],
229
- top_p=kwargs["top_p"],
230
- prompt_cache=self._prompt_cache.cache, # type: ignore
261
+ prompt_cache=self._prompt_cache.cache if self._prompt_cache else None, # type: ignore
231
262
  ),
232
263
  range(max_tokens),
233
264
  ):
265
+ token = chunk_resp.token
234
266
  tokens.append(token)
235
- if token == tokenizer.eos_token_id or token in stop_token_ids: # type: ignore
236
- break
237
-
238
- # Yield the last segment if streaming
239
- out = tokenizer.decode(
240
- token,
241
- skip_special_tokens=True,
242
- spaces_between_special_tokens=False,
243
- clean_up_tokenization_spaces=True,
244
- )
245
267
 
268
+ out = chunk_resp.text
246
269
  if stream:
247
270
  # this special character is mainly for qwen
248
271
  out = out.strip("�")
@@ -266,11 +289,15 @@ class MLXModel(LLM):
266
289
  total_tokens=(input_echo_len + i),
267
290
  ), completion_usage
268
291
 
292
+ if token == tokenizer.eos_token_id or token in stop_token_ids: # type: ignore
293
+ break
294
+
269
295
  logger.info(
270
296
  f"Average generation speed: {i / (time.time() - start):.2f} tokens/s."
271
297
  )
272
298
 
273
- self._prompt_cache.tokens.extend(tokens) # type: ignore
299
+ if self._prompt_cache:
300
+ self._prompt_cache.tokens.extend(tokens) # type: ignore
274
301
 
275
302
  if i == max_tokens - 1:
276
303
  finish_reason = "length"
@@ -314,10 +341,12 @@ class MLXModel(LLM):
314
341
  yield completion_chunk, completion_usage
315
342
 
316
343
  def generate(
317
- self, prompt: str, generate_config: Optional[MLXGenerateConfig] = None
344
+ self,
345
+ prompt: Union[str, Dict[str, Any]],
346
+ generate_config: Optional[MLXGenerateConfig] = None,
318
347
  ) -> Union[Completion, Iterator[CompletionChunk]]:
319
348
  def generator_wrapper(
320
- prompt: str, generate_config: MLXGenerateConfig
349
+ prompt: Union[str, Dict[str, Any]], generate_config: MLXGenerateConfig
321
350
  ) -> Iterator[CompletionChunk]:
322
351
  for completion_chunk, completion_usage in self._generate_stream(
323
352
  prompt,
@@ -356,26 +385,6 @@ class MLXModel(LLM):
356
385
 
357
386
 
358
387
  class MLXChatModel(MLXModel, ChatModelMixin):
359
- def __init__(
360
- self,
361
- model_uid: str,
362
- model_family: "LLMFamilyV1",
363
- model_spec: "LLMSpecV1",
364
- quantization: str,
365
- model_path: str,
366
- model_config: Optional[MLXModelConfig] = None,
367
- peft_model: Optional[List[LoRA]] = None,
368
- ):
369
- super().__init__(
370
- model_uid,
371
- model_family,
372
- model_spec,
373
- quantization,
374
- model_path,
375
- model_config,
376
- peft_model,
377
- )
378
-
379
388
  def _sanitize_generate_config(
380
389
  self,
381
390
  generate_config: Optional[MLXGenerateConfig],
@@ -402,6 +411,9 @@ class MLXChatModel(MLXModel, ChatModelMixin):
402
411
  return False
403
412
  if "chat" not in llm_family.model_ability:
404
413
  return False
414
+ if "vision" in llm_family.model_ability:
415
+ # do not process vision
416
+ return False
405
417
  return True
406
418
 
407
419
  def chat(
@@ -432,3 +444,187 @@ class MLXChatModel(MLXModel, ChatModelMixin):
432
444
  if tools:
433
445
  return self._tool_calls_completion(self.model_family, self.model_uid, c)
434
446
  return self._to_chat_completion(c)
447
+
448
+
449
+ class MLXVisionModel(MLXModel, ChatModelMixin):
450
+ @classmethod
451
+ def match(
452
+ cls, llm_family: "LLMFamilyV1", llm_spec: "LLMSpecV1", quantization: str
453
+ ) -> bool:
454
+ if llm_spec.model_format not in ["mlx"]:
455
+ return False
456
+ if sys.platform != "darwin" or platform.processor() != "arm":
457
+ # only work for Mac M chips
458
+ return False
459
+ if "vision" not in llm_family.model_ability:
460
+ return False
461
+ return True
462
+
463
+ def _load_model(self, **kwargs):
464
+ try:
465
+ from mlx_vlm import load
466
+ except ImportError:
467
+ error_message = "Failed to import module 'mlx_vlm'"
468
+ installation_guide = [
469
+ "Please make sure 'mlx_vlm' is installed. ",
470
+ "You can install it by `pip install mlx_vlm`\n",
471
+ ]
472
+
473
+ raise ImportError(f"{error_message}\n\n{''.join(installation_guide)}")
474
+
475
+ self._prompt_cache = PromptCache()
476
+
477
+ return load(self.model_path)
478
+
479
+ def load(self):
480
+ kwargs = {}
481
+ kwargs["revision"] = self._model_config.get(
482
+ "revision", self.model_spec.model_revision
483
+ )
484
+ kwargs["trust_remote_code"] = self._model_config.get("trust_remote_code")
485
+ kwargs["cache_limit_gb"] = self._model_config.pop("cache_limit_gb", None)
486
+
487
+ self._model, self._processor = self._load_model(**kwargs)
488
+ self._tokenizer = self._processor.tokenizer
489
+
490
+ def _generate_stream_inner(self, **kwargs):
491
+ import mlx.core as mx
492
+ from mlx_lm.utils import GenerationResponse
493
+ from mlx_vlm.utils import generate_step
494
+
495
+ inputs = kwargs["prompt_token_ids"]
496
+
497
+ max_tokens = kwargs.pop("max_tokens")
498
+ input_ids, pixel_values, mask, kwargs = inputs
499
+
500
+ tokenizer = self._processor.tokenizer
501
+ detokenizer = self._processor.detokenizer
502
+
503
+ detokenizer.reset()
504
+ tic = time.perf_counter()
505
+ for (token, logprobs), n in zip(
506
+ generate_step(input_ids, self._model, pixel_values, mask, **kwargs),
507
+ range(max_tokens),
508
+ ):
509
+ if n == 0:
510
+ prompt_time = time.perf_counter() - tic
511
+ prompt_tps = len(input_ids) / prompt_time
512
+ tic = time.perf_counter()
513
+ if token == tokenizer.eos_token_id:
514
+ break
515
+ detokenizer.add_token(token)
516
+
517
+ # Yield the last segment if streaming
518
+ yield GenerationResponse(
519
+ text=detokenizer.last_segment,
520
+ token=token,
521
+ logprobs=logprobs,
522
+ prompt_tokens=len(input_ids),
523
+ prompt_tps=prompt_tps,
524
+ generation_tokens=n + 1,
525
+ generation_tps=(n + 1) / (time.perf_counter() - tic),
526
+ peak_memory=mx.metal.get_peak_memory() / 1e9,
527
+ )
528
+
529
+ detokenizer.finalize()
530
+ yield GenerationResponse(
531
+ text=detokenizer.last_segment,
532
+ token=token,
533
+ logprobs=logprobs,
534
+ prompt_tokens=len(input_ids),
535
+ prompt_tps=prompt_tps,
536
+ generation_tokens=n + 1,
537
+ generation_tps=(n + 1) / (time.perf_counter() - tic),
538
+ peak_memory=mx.metal.get_peak_memory() / 1e9,
539
+ )
540
+
541
+ def _prepare_inputs(
542
+ self, prompt: Union[str, Dict[str, Any]], kwargs
543
+ ) -> Tuple[Any, int]:
544
+ import mlx.core as mx
545
+ from mlx_vlm import prepare_inputs
546
+
547
+ prompt_str = prompt.get("prompt") # type: ignore
548
+ images = prompt.get("multi_modal_data", {}).get("image") # type: ignore
549
+ if images and not isinstance(images, list):
550
+ images = [images]
551
+ resize_shape = kwargs.pop("resize_shape", None)
552
+ image_token_index = getattr(self._model.config, "image_token_index", None)
553
+
554
+ processor = self._processor
555
+ tokenizer = processor if hasattr(processor, "encode") else processor.tokenizer
556
+ prompt_tokens = mx.array(tokenizer.encode(prompt_str))
557
+
558
+ if not images:
559
+ input_ids = prompt_tokens[None, :]
560
+ pixel_values = mask = None
561
+ kwargs = {}
562
+ input_token_len = input_ids.size
563
+ else:
564
+ inputs = prepare_inputs(
565
+ processor, images, prompt_str, image_token_index, resize_shape
566
+ )
567
+ input_ids = inputs["input_ids"]
568
+ pixel_values = inputs["pixel_values"]
569
+ mask = inputs["attention_mask"]
570
+ kwargs = {
571
+ k: v
572
+ for k, v in inputs.items()
573
+ if k not in ["input_ids", "pixel_values", "attention_mask"]
574
+ }
575
+ input_token_len = int(mask.sum())
576
+ return (input_ids, pixel_values, mask, kwargs), input_token_len
577
+
578
+ def chat(
579
+ self,
580
+ messages: List[Dict],
581
+ generate_config: Optional[MLXGenerateConfig] = None,
582
+ ) -> Union[ChatCompletion, Iterator[ChatCompletionChunk]]:
583
+ messages = self._transform_messages(messages) # type: ignore
584
+ tools = generate_config.pop("tools", []) if generate_config else None
585
+
586
+ model_family = self.model_family.model_family or self.model_family.model_name
587
+
588
+ if "internvl2" not in model_family.lower():
589
+ from qwen_vl_utils import process_vision_info
590
+
591
+ full_context_kwargs = {}
592
+ if tools and model_family in QWEN_TOOL_CALL_FAMILY:
593
+ full_context_kwargs["tools"] = tools
594
+ assert self.model_family.chat_template is not None
595
+ prompt = self.get_full_context(
596
+ messages, self.model_family.chat_template, **full_context_kwargs
597
+ )
598
+ images, video_inputs = process_vision_info(messages)
599
+ if video_inputs:
600
+ raise ValueError("Not support video input now.")
601
+ else:
602
+ prompt, images = self.get_specific_prompt(model_family, messages) # type: ignore
603
+
604
+ if not images:
605
+ inputs = {
606
+ "prompt": prompt,
607
+ }
608
+ elif len(images) == 1:
609
+ inputs = {
610
+ "prompt": prompt,
611
+ "multi_modal_data": {"image": images[-1]}, # type: ignore
612
+ }
613
+ else:
614
+ inputs = {
615
+ "prompt": prompt,
616
+ "multi_modal_data": {"image": images}, # type: ignore
617
+ }
618
+ generate_config = self._sanitize_generate_config(generate_config)
619
+
620
+ stream = generate_config.get("stream", False)
621
+ if stream:
622
+ it = self.generate(inputs, generate_config)
623
+ assert isinstance(it, Iterator)
624
+ return self._to_chat_completion_chunks(it)
625
+ else:
626
+ c = self.generate(inputs, generate_config)
627
+ assert not isinstance(c, Iterator)
628
+ if tools:
629
+ return self._tool_calls_completion(self.model_family, self.model_uid, c)
630
+ return self._to_chat_completion(c)
@@ -75,6 +75,7 @@ SGLANG_SUPPORTED_CHAT_MODELS = [
75
75
  "llama-2-chat",
76
76
  "llama-3-instruct",
77
77
  "llama-3.1-instruct",
78
+ "llama-3.3-instruct",
78
79
  "qwen-chat",
79
80
  "qwen1.5-chat",
80
81
  "qwen2-instruct",
@@ -61,7 +61,7 @@ class ChatglmPytorchChatModel(PytorchChatModel):
61
61
 
62
62
  def _load_model(self, **kwargs):
63
63
  try:
64
- from transformers import AutoModel, AutoTokenizer
64
+ from transformers import AutoModelForCausalLM, AutoTokenizer
65
65
  except ImportError:
66
66
  error_message = "Failed to import module 'transformers'"
67
67
  installation_guide = [
@@ -77,7 +77,7 @@ class ChatglmPytorchChatModel(PytorchChatModel):
77
77
  encode_special_tokens=True,
78
78
  revision=kwargs["revision"],
79
79
  )
80
- model = AutoModel.from_pretrained(
80
+ model = AutoModelForCausalLM.from_pretrained(
81
81
  self.model_path,
82
82
  **kwargs,
83
83
  )
@@ -232,9 +232,11 @@ class ChatglmPytorchChatModel(PytorchChatModel):
232
232
  content = {
233
233
  "name": function_name,
234
234
  "arguments": json.dumps(
235
- arguments_json
236
- if isinstance(arguments_json, dict)
237
- else arguments,
235
+ (
236
+ arguments_json
237
+ if isinstance(arguments_json, dict)
238
+ else arguments
239
+ ),
238
240
  ensure_ascii=False,
239
241
  ),
240
242
  }
@@ -331,6 +333,8 @@ class ChatglmPytorchChatModel(PytorchChatModel):
331
333
  max_new_tokens = generate_config.get("max_tokens")
332
334
  if max_new_tokens is not None:
333
335
  kwargs["max_new_tokens"] = int(max_new_tokens)
336
+ else:
337
+ kwargs["max_new_tokens"] = 1024
334
338
  do_sample = generate_config.get("do_sample")
335
339
  if do_sample is not None:
336
340
  kwargs["do_sample"] = bool(do_sample)