xinference 0.16.3__py3-none-any.whl → 1.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (373) hide show
  1. xinference/_compat.py +24 -2
  2. xinference/_version.py +3 -3
  3. xinference/api/restful_api.py +219 -77
  4. xinference/client/restful/restful_client.py +47 -2
  5. xinference/constants.py +1 -0
  6. xinference/core/chat_interface.py +6 -1
  7. xinference/core/model.py +124 -34
  8. xinference/core/supervisor.py +180 -12
  9. xinference/core/utils.py +73 -4
  10. xinference/core/worker.py +102 -4
  11. xinference/deploy/cmdline.py +3 -1
  12. xinference/deploy/test/test_cmdline.py +56 -0
  13. xinference/isolation.py +24 -0
  14. xinference/model/audio/__init__.py +12 -0
  15. xinference/model/audio/core.py +37 -4
  16. xinference/model/audio/cosyvoice.py +39 -6
  17. xinference/model/audio/f5tts.py +200 -0
  18. xinference/model/audio/f5tts_mlx.py +260 -0
  19. xinference/model/audio/fish_speech.py +70 -110
  20. xinference/model/audio/melotts.py +110 -0
  21. xinference/model/audio/model_spec.json +179 -3
  22. xinference/model/audio/model_spec_modelscope.json +27 -0
  23. xinference/model/audio/utils.py +32 -0
  24. xinference/model/audio/whisper.py +35 -10
  25. xinference/model/audio/whisper_mlx.py +208 -0
  26. xinference/model/embedding/core.py +322 -6
  27. xinference/model/embedding/model_spec.json +8 -1
  28. xinference/model/embedding/model_spec_modelscope.json +9 -1
  29. xinference/model/image/core.py +69 -1
  30. xinference/model/image/model_spec.json +145 -4
  31. xinference/model/image/model_spec_modelscope.json +150 -4
  32. xinference/model/image/stable_diffusion/core.py +50 -15
  33. xinference/model/llm/__init__.py +6 -2
  34. xinference/model/llm/llm_family.json +1055 -93
  35. xinference/model/llm/llm_family.py +15 -36
  36. xinference/model/llm/llm_family_modelscope.json +1031 -78
  37. xinference/model/llm/memory.py +1 -1
  38. xinference/model/llm/mlx/core.py +285 -47
  39. xinference/model/llm/sglang/core.py +2 -0
  40. xinference/model/llm/transformers/chatglm.py +9 -5
  41. xinference/model/llm/transformers/cogagent.py +272 -0
  42. xinference/model/llm/transformers/core.py +3 -0
  43. xinference/model/llm/transformers/glm_edge_v.py +230 -0
  44. xinference/model/llm/transformers/qwen2_vl.py +12 -1
  45. xinference/model/llm/transformers/utils.py +16 -8
  46. xinference/model/llm/utils.py +55 -4
  47. xinference/model/llm/vllm/core.py +137 -12
  48. xinference/model/llm/vllm/xavier/__init__.py +13 -0
  49. xinference/model/llm/vllm/xavier/allocator.py +74 -0
  50. xinference/model/llm/vllm/xavier/block.py +111 -0
  51. xinference/model/llm/vllm/xavier/block_manager.py +71 -0
  52. xinference/model/llm/vllm/xavier/block_tracker.py +129 -0
  53. xinference/model/llm/vllm/xavier/collective.py +74 -0
  54. xinference/model/llm/vllm/xavier/collective_manager.py +147 -0
  55. xinference/model/llm/vllm/xavier/engine.py +247 -0
  56. xinference/model/llm/vllm/xavier/executor.py +134 -0
  57. xinference/model/llm/vllm/xavier/scheduler.py +438 -0
  58. xinference/model/llm/vllm/xavier/test/__init__.py +13 -0
  59. xinference/model/llm/vllm/xavier/test/test_xavier.py +147 -0
  60. xinference/model/llm/vllm/xavier/transfer.py +319 -0
  61. xinference/model/rerank/core.py +11 -4
  62. xinference/model/video/diffusers.py +14 -0
  63. xinference/model/video/model_spec.json +15 -0
  64. xinference/model/video/model_spec_modelscope.json +16 -0
  65. xinference/thirdparty/cosyvoice/bin/average_model.py +92 -0
  66. xinference/thirdparty/cosyvoice/bin/export_jit.py +12 -2
  67. xinference/thirdparty/cosyvoice/bin/export_onnx.py +112 -0
  68. xinference/thirdparty/cosyvoice/bin/export_trt.sh +9 -0
  69. xinference/thirdparty/cosyvoice/bin/inference.py +5 -7
  70. xinference/thirdparty/cosyvoice/bin/spk2info.pt +0 -0
  71. xinference/thirdparty/cosyvoice/bin/train.py +42 -8
  72. xinference/thirdparty/cosyvoice/cli/cosyvoice.py +96 -25
  73. xinference/thirdparty/cosyvoice/cli/frontend.py +77 -30
  74. xinference/thirdparty/cosyvoice/cli/model.py +330 -80
  75. xinference/thirdparty/cosyvoice/dataset/dataset.py +6 -2
  76. xinference/thirdparty/cosyvoice/dataset/processor.py +76 -14
  77. xinference/thirdparty/cosyvoice/flow/decoder.py +92 -13
  78. xinference/thirdparty/cosyvoice/flow/flow.py +99 -9
  79. xinference/thirdparty/cosyvoice/flow/flow_matching.py +110 -13
  80. xinference/thirdparty/cosyvoice/flow/length_regulator.py +5 -4
  81. xinference/thirdparty/cosyvoice/hifigan/discriminator.py +140 -0
  82. xinference/thirdparty/cosyvoice/hifigan/generator.py +58 -42
  83. xinference/thirdparty/cosyvoice/hifigan/hifigan.py +67 -0
  84. xinference/thirdparty/cosyvoice/llm/llm.py +139 -6
  85. xinference/thirdparty/cosyvoice/tokenizer/assets/multilingual_zh_ja_yue_char_del.tiktoken +58836 -0
  86. xinference/thirdparty/cosyvoice/tokenizer/tokenizer.py +279 -0
  87. xinference/thirdparty/cosyvoice/transformer/embedding.py +2 -2
  88. xinference/thirdparty/cosyvoice/transformer/encoder_layer.py +7 -7
  89. xinference/thirdparty/cosyvoice/transformer/upsample_encoder.py +318 -0
  90. xinference/thirdparty/cosyvoice/utils/common.py +28 -1
  91. xinference/thirdparty/cosyvoice/utils/executor.py +69 -7
  92. xinference/thirdparty/cosyvoice/utils/file_utils.py +2 -12
  93. xinference/thirdparty/cosyvoice/utils/frontend_utils.py +9 -5
  94. xinference/thirdparty/cosyvoice/utils/losses.py +20 -0
  95. xinference/thirdparty/cosyvoice/utils/scheduler.py +1 -2
  96. xinference/thirdparty/cosyvoice/utils/train_utils.py +101 -45
  97. xinference/thirdparty/f5_tts/api.py +166 -0
  98. xinference/thirdparty/f5_tts/configs/E2TTS_Base_train.yaml +44 -0
  99. xinference/thirdparty/f5_tts/configs/E2TTS_Small_train.yaml +44 -0
  100. xinference/thirdparty/f5_tts/configs/F5TTS_Base_train.yaml +46 -0
  101. xinference/thirdparty/f5_tts/configs/F5TTS_Small_train.yaml +46 -0
  102. xinference/thirdparty/f5_tts/eval/README.md +49 -0
  103. xinference/thirdparty/f5_tts/eval/ecapa_tdnn.py +330 -0
  104. xinference/thirdparty/f5_tts/eval/eval_infer_batch.py +207 -0
  105. xinference/thirdparty/f5_tts/eval/eval_infer_batch.sh +13 -0
  106. xinference/thirdparty/f5_tts/eval/eval_librispeech_test_clean.py +84 -0
  107. xinference/thirdparty/f5_tts/eval/eval_seedtts_testset.py +84 -0
  108. xinference/thirdparty/f5_tts/eval/utils_eval.py +405 -0
  109. xinference/thirdparty/f5_tts/infer/README.md +191 -0
  110. xinference/thirdparty/f5_tts/infer/SHARED.md +74 -0
  111. xinference/thirdparty/f5_tts/infer/examples/basic/basic.toml +11 -0
  112. xinference/thirdparty/f5_tts/infer/examples/basic/basic_ref_en.wav +0 -0
  113. xinference/thirdparty/f5_tts/infer/examples/basic/basic_ref_zh.wav +0 -0
  114. xinference/thirdparty/f5_tts/infer/examples/multi/country.flac +0 -0
  115. xinference/thirdparty/f5_tts/infer/examples/multi/main.flac +0 -0
  116. xinference/thirdparty/f5_tts/infer/examples/multi/story.toml +19 -0
  117. xinference/thirdparty/f5_tts/infer/examples/multi/story.txt +1 -0
  118. xinference/thirdparty/f5_tts/infer/examples/multi/town.flac +0 -0
  119. xinference/thirdparty/f5_tts/infer/examples/vocab.txt +2545 -0
  120. xinference/thirdparty/f5_tts/infer/infer_cli.py +226 -0
  121. xinference/thirdparty/f5_tts/infer/infer_gradio.py +851 -0
  122. xinference/thirdparty/f5_tts/infer/speech_edit.py +193 -0
  123. xinference/thirdparty/f5_tts/infer/utils_infer.py +538 -0
  124. xinference/thirdparty/f5_tts/model/__init__.py +10 -0
  125. xinference/thirdparty/f5_tts/model/backbones/README.md +20 -0
  126. xinference/thirdparty/f5_tts/model/backbones/dit.py +163 -0
  127. xinference/thirdparty/f5_tts/model/backbones/mmdit.py +146 -0
  128. xinference/thirdparty/f5_tts/model/backbones/unett.py +219 -0
  129. xinference/thirdparty/f5_tts/model/cfm.py +285 -0
  130. xinference/thirdparty/f5_tts/model/dataset.py +319 -0
  131. xinference/thirdparty/f5_tts/model/modules.py +658 -0
  132. xinference/thirdparty/f5_tts/model/trainer.py +366 -0
  133. xinference/thirdparty/f5_tts/model/utils.py +185 -0
  134. xinference/thirdparty/f5_tts/scripts/count_max_epoch.py +33 -0
  135. xinference/thirdparty/f5_tts/scripts/count_params_gflops.py +39 -0
  136. xinference/thirdparty/f5_tts/socket_server.py +159 -0
  137. xinference/thirdparty/f5_tts/train/README.md +77 -0
  138. xinference/thirdparty/f5_tts/train/datasets/prepare_csv_wavs.py +139 -0
  139. xinference/thirdparty/f5_tts/train/datasets/prepare_emilia.py +230 -0
  140. xinference/thirdparty/f5_tts/train/datasets/prepare_libritts.py +92 -0
  141. xinference/thirdparty/f5_tts/train/datasets/prepare_ljspeech.py +65 -0
  142. xinference/thirdparty/f5_tts/train/datasets/prepare_wenetspeech4tts.py +125 -0
  143. xinference/thirdparty/f5_tts/train/finetune_cli.py +174 -0
  144. xinference/thirdparty/f5_tts/train/finetune_gradio.py +1846 -0
  145. xinference/thirdparty/f5_tts/train/train.py +75 -0
  146. xinference/thirdparty/fish_speech/fish_speech/conversation.py +266 -1
  147. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/en_US.json +2 -1
  148. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/es_ES.json +2 -1
  149. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/ja_JP.json +2 -2
  150. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/ko_KR.json +123 -0
  151. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/zh_CN.json +2 -1
  152. xinference/thirdparty/fish_speech/fish_speech/models/text2semantic/llama.py +137 -29
  153. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/firefly.py +9 -9
  154. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/fsq.py +1 -1
  155. xinference/thirdparty/fish_speech/fish_speech/text/clean.py +17 -11
  156. xinference/thirdparty/fish_speech/fish_speech/text/spliter.py +1 -1
  157. xinference/thirdparty/fish_speech/fish_speech/tokenizer.py +152 -0
  158. xinference/thirdparty/fish_speech/fish_speech/train.py +2 -2
  159. xinference/thirdparty/fish_speech/fish_speech/utils/__init__.py +2 -1
  160. xinference/thirdparty/fish_speech/fish_speech/utils/utils.py +22 -0
  161. xinference/thirdparty/fish_speech/fish_speech/webui/launch_utils.py +1 -1
  162. xinference/thirdparty/fish_speech/fish_speech/webui/manage.py +2 -2
  163. xinference/thirdparty/fish_speech/tools/{post_api.py → api_client.py} +34 -18
  164. xinference/thirdparty/fish_speech/tools/api_server.py +98 -0
  165. xinference/thirdparty/fish_speech/tools/download_models.py +5 -5
  166. xinference/thirdparty/fish_speech/tools/e2e_webui.py +232 -0
  167. xinference/thirdparty/fish_speech/tools/fish_e2e.py +298 -0
  168. xinference/thirdparty/fish_speech/tools/inference_engine/__init__.py +192 -0
  169. xinference/thirdparty/fish_speech/tools/inference_engine/reference_loader.py +125 -0
  170. xinference/thirdparty/fish_speech/tools/inference_engine/utils.py +39 -0
  171. xinference/thirdparty/fish_speech/tools/inference_engine/vq_manager.py +57 -0
  172. xinference/thirdparty/fish_speech/tools/llama/eval_in_context.py +2 -2
  173. xinference/thirdparty/fish_speech/tools/llama/generate.py +484 -72
  174. xinference/thirdparty/fish_speech/tools/run_webui.py +104 -0
  175. xinference/thirdparty/fish_speech/tools/schema.py +170 -0
  176. xinference/thirdparty/fish_speech/tools/server/agent/__init__.py +57 -0
  177. xinference/thirdparty/fish_speech/tools/server/agent/generate.py +119 -0
  178. xinference/thirdparty/fish_speech/tools/server/agent/generation_utils.py +122 -0
  179. xinference/thirdparty/fish_speech/tools/server/agent/pre_generation_utils.py +72 -0
  180. xinference/thirdparty/fish_speech/tools/server/api_utils.py +75 -0
  181. xinference/thirdparty/fish_speech/tools/server/exception_handler.py +27 -0
  182. xinference/thirdparty/fish_speech/tools/server/inference.py +45 -0
  183. xinference/thirdparty/fish_speech/tools/server/model_manager.py +122 -0
  184. xinference/thirdparty/fish_speech/tools/server/model_utils.py +129 -0
  185. xinference/thirdparty/fish_speech/tools/server/views.py +246 -0
  186. xinference/thirdparty/fish_speech/tools/vqgan/extract_vq.py +7 -1
  187. xinference/thirdparty/fish_speech/tools/vqgan/inference.py +2 -3
  188. xinference/thirdparty/fish_speech/tools/webui/__init__.py +173 -0
  189. xinference/thirdparty/fish_speech/tools/webui/inference.py +91 -0
  190. xinference/thirdparty/fish_speech/tools/webui/variables.py +14 -0
  191. xinference/thirdparty/matcha/utils/utils.py +2 -2
  192. xinference/thirdparty/melo/api.py +135 -0
  193. xinference/thirdparty/melo/app.py +61 -0
  194. xinference/thirdparty/melo/attentions.py +459 -0
  195. xinference/thirdparty/melo/commons.py +160 -0
  196. xinference/thirdparty/melo/configs/config.json +94 -0
  197. xinference/thirdparty/melo/data/example/metadata.list +20 -0
  198. xinference/thirdparty/melo/data_utils.py +413 -0
  199. xinference/thirdparty/melo/download_utils.py +67 -0
  200. xinference/thirdparty/melo/infer.py +25 -0
  201. xinference/thirdparty/melo/init_downloads.py +14 -0
  202. xinference/thirdparty/melo/losses.py +58 -0
  203. xinference/thirdparty/melo/main.py +36 -0
  204. xinference/thirdparty/melo/mel_processing.py +174 -0
  205. xinference/thirdparty/melo/models.py +1030 -0
  206. xinference/thirdparty/melo/modules.py +598 -0
  207. xinference/thirdparty/melo/monotonic_align/__init__.py +16 -0
  208. xinference/thirdparty/melo/monotonic_align/core.py +46 -0
  209. xinference/thirdparty/melo/preprocess_text.py +135 -0
  210. xinference/thirdparty/melo/split_utils.py +174 -0
  211. xinference/thirdparty/melo/text/__init__.py +35 -0
  212. xinference/thirdparty/melo/text/chinese.py +199 -0
  213. xinference/thirdparty/melo/text/chinese_bert.py +107 -0
  214. xinference/thirdparty/melo/text/chinese_mix.py +253 -0
  215. xinference/thirdparty/melo/text/cleaner.py +36 -0
  216. xinference/thirdparty/melo/text/cleaner_multiling.py +110 -0
  217. xinference/thirdparty/melo/text/cmudict.rep +129530 -0
  218. xinference/thirdparty/melo/text/cmudict_cache.pickle +0 -0
  219. xinference/thirdparty/melo/text/english.py +284 -0
  220. xinference/thirdparty/melo/text/english_bert.py +39 -0
  221. xinference/thirdparty/melo/text/english_utils/abbreviations.py +35 -0
  222. xinference/thirdparty/melo/text/english_utils/number_norm.py +97 -0
  223. xinference/thirdparty/melo/text/english_utils/time_norm.py +47 -0
  224. xinference/thirdparty/melo/text/es_phonemizer/base.py +140 -0
  225. xinference/thirdparty/melo/text/es_phonemizer/cleaner.py +109 -0
  226. xinference/thirdparty/melo/text/es_phonemizer/es_symbols.json +79 -0
  227. xinference/thirdparty/melo/text/es_phonemizer/es_symbols.txt +1 -0
  228. xinference/thirdparty/melo/text/es_phonemizer/es_symbols_v2.json +83 -0
  229. xinference/thirdparty/melo/text/es_phonemizer/es_to_ipa.py +12 -0
  230. xinference/thirdparty/melo/text/es_phonemizer/example_ipa.txt +400 -0
  231. xinference/thirdparty/melo/text/es_phonemizer/gruut_wrapper.py +253 -0
  232. xinference/thirdparty/melo/text/es_phonemizer/punctuation.py +174 -0
  233. xinference/thirdparty/melo/text/es_phonemizer/spanish_symbols.txt +1 -0
  234. xinference/thirdparty/melo/text/es_phonemizer/test.ipynb +124 -0
  235. xinference/thirdparty/melo/text/fr_phonemizer/base.py +140 -0
  236. xinference/thirdparty/melo/text/fr_phonemizer/cleaner.py +122 -0
  237. xinference/thirdparty/melo/text/fr_phonemizer/en_symbols.json +78 -0
  238. xinference/thirdparty/melo/text/fr_phonemizer/example_ipa.txt +1 -0
  239. xinference/thirdparty/melo/text/fr_phonemizer/fr_symbols.json +89 -0
  240. xinference/thirdparty/melo/text/fr_phonemizer/fr_to_ipa.py +30 -0
  241. xinference/thirdparty/melo/text/fr_phonemizer/french_abbreviations.py +48 -0
  242. xinference/thirdparty/melo/text/fr_phonemizer/french_symbols.txt +1 -0
  243. xinference/thirdparty/melo/text/fr_phonemizer/gruut_wrapper.py +258 -0
  244. xinference/thirdparty/melo/text/fr_phonemizer/punctuation.py +172 -0
  245. xinference/thirdparty/melo/text/french.py +94 -0
  246. xinference/thirdparty/melo/text/french_bert.py +39 -0
  247. xinference/thirdparty/melo/text/japanese.py +647 -0
  248. xinference/thirdparty/melo/text/japanese_bert.py +49 -0
  249. xinference/thirdparty/melo/text/ko_dictionary.py +44 -0
  250. xinference/thirdparty/melo/text/korean.py +192 -0
  251. xinference/thirdparty/melo/text/opencpop-strict.txt +429 -0
  252. xinference/thirdparty/melo/text/spanish.py +122 -0
  253. xinference/thirdparty/melo/text/spanish_bert.py +39 -0
  254. xinference/thirdparty/melo/text/symbols.py +290 -0
  255. xinference/thirdparty/melo/text/tone_sandhi.py +769 -0
  256. xinference/thirdparty/melo/train.py +635 -0
  257. xinference/thirdparty/melo/train.sh +19 -0
  258. xinference/thirdparty/melo/transforms.py +209 -0
  259. xinference/thirdparty/melo/utils.py +424 -0
  260. xinference/types.py +17 -1
  261. xinference/web/ui/build/asset-manifest.json +6 -6
  262. xinference/web/ui/build/index.html +1 -1
  263. xinference/web/ui/build/static/css/main.51a587ff.css +2 -0
  264. xinference/web/ui/build/static/css/main.51a587ff.css.map +1 -0
  265. xinference/web/ui/build/static/js/main.b0936c54.js +3 -0
  266. xinference/web/ui/build/static/js/main.b0936c54.js.map +1 -0
  267. xinference/web/ui/node_modules/.cache/babel-loader/03c4052f1b91f6ba0c5389bdcf49c43319b4076c08e4b8585dab312538ae290a.json +1 -0
  268. xinference/web/ui/node_modules/.cache/babel-loader/1786b83003b8e9605a0f5f855a185d4d16e38fc893dfb326a2a9cca206b4240a.json +1 -0
  269. xinference/web/ui/node_modules/.cache/babel-loader/17cbc181dd674b9150b80c73ed6a82656de0082d857f6e5f66d9716129ac0b38.json +1 -0
  270. xinference/web/ui/node_modules/.cache/babel-loader/185ceb8872d562e032b47e79df6a45670e06345b8ed70aad1a131e0476783c5c.json +1 -0
  271. xinference/web/ui/node_modules/.cache/babel-loader/26b8c9f34b0bed789b3a833767672e39302d1e0c09b4276f4d58d1df7b6bd93b.json +1 -0
  272. xinference/web/ui/node_modules/.cache/babel-loader/2b484da66c724d0d56a40849c109327408796a668b1381511b6e9e03baa48658.json +1 -0
  273. xinference/web/ui/node_modules/.cache/babel-loader/2cbbbce9b84df73330d4c42b82436ed881b3847628f2fbc346aa62e2859fd88c.json +1 -0
  274. xinference/web/ui/node_modules/.cache/babel-loader/2ec9b14431ed33ce6901bf9f27007be4e6e472709c99d6e22b50ce528e4b78ee.json +1 -0
  275. xinference/web/ui/node_modules/.cache/babel-loader/3b966db018f96be4a055d6ca205f0990d4d0b370e2980c17d8bca2c9a021819c.json +1 -0
  276. xinference/web/ui/node_modules/.cache/babel-loader/3eefb411b24c2b3ce053570ef50daccf154022f0e168be5ed0fec21394baf9f4.json +1 -0
  277. xinference/web/ui/node_modules/.cache/babel-loader/522b229e3cac219123f0d69673f5570e191c2d2a505dc65b312d336eae2279c0.json +1 -0
  278. xinference/web/ui/node_modules/.cache/babel-loader/52e45f17ba300580ea3fcc9f9228ccba194bb092b76f25e9255af311f8b05aab.json +1 -0
  279. xinference/web/ui/node_modules/.cache/babel-loader/5a0bc4631f936459afc1a3b1d3ec2420118b1f00e11f60ccac3e08088f3f27a8.json +1 -0
  280. xinference/web/ui/node_modules/.cache/babel-loader/611fa2c6c53b66039991d06dfb0473b5ab37fc63b4564e0f6e1718523768a045.json +1 -0
  281. xinference/web/ui/node_modules/.cache/babel-loader/6329bc76c406fe5eb305412383fbde5950f847bb5e43261f73f37622c365acb4.json +1 -0
  282. xinference/web/ui/node_modules/.cache/babel-loader/63c8e07687ea53a4f8a910ee5e42e0eb26cd1acbfbe820f3e3248a786ee51401.json +1 -0
  283. xinference/web/ui/node_modules/.cache/babel-loader/69b2d5001684174ec9da57e07914eed3eac4960018bceb6cbfa801d861301d7c.json +1 -0
  284. xinference/web/ui/node_modules/.cache/babel-loader/710c1acda69e561e30a933b98c6a56d50197868b15c21e2aad55ab6d46649eb6.json +1 -0
  285. xinference/web/ui/node_modules/.cache/babel-loader/720deca1fce5a1dc5056048fa8258fd138a82ea855f350b6613f104a73fb761f.json +1 -0
  286. xinference/web/ui/node_modules/.cache/babel-loader/76a23b92d26a499c57e61eea2b895fbc9771bd0849a72e66f8e633192017978b.json +1 -0
  287. xinference/web/ui/node_modules/.cache/babel-loader/858063f23b34dfe600254eb5afd85518b0002ec4b30b7386616c45600826e3b2.json +1 -0
  288. xinference/web/ui/node_modules/.cache/babel-loader/920b82c1c89124cf217109eeedbfcd3aae3b917be50c9dfb6bbb4ce26bdfd2e7.json +1 -0
  289. xinference/web/ui/node_modules/.cache/babel-loader/94d8b7aeb0076f2ce07db598cea0e87b13bc8d5614eb530b8d6e696c2daf6f88.json +1 -0
  290. xinference/web/ui/node_modules/.cache/babel-loader/9e917fe7022d01b2ccbe5cc0ce73d70bb72bee584ff293bad71bdff6695dee28.json +1 -0
  291. xinference/web/ui/node_modules/.cache/babel-loader/9f28fdb8399f1d0474f0aca86f1658dc94f5bf0c90f6146352de150692de8862.json +1 -0
  292. xinference/web/ui/node_modules/.cache/babel-loader/a0dfafa06b2bb7cba8cad41c482503f61944f759f4318139362602ef5cc47ccb.json +1 -0
  293. xinference/web/ui/node_modules/.cache/babel-loader/a3ff866acddf34917a7ee399e0e571a4dfd8ba66d5057db885f243e16a6eb17d.json +1 -0
  294. xinference/web/ui/node_modules/.cache/babel-loader/afb8084f539534cd594755ea2205ecd5bd1f62dddcfdf75a2eace59a28131278.json +1 -0
  295. xinference/web/ui/node_modules/.cache/babel-loader/b57b1438b77294c1f3f6cfce12ac487d8106c6f016975ba0aec94d98997e2e1e.json +1 -0
  296. xinference/web/ui/node_modules/.cache/babel-loader/b9917b0bf8e4d55ccbac1c334aa04d6ff3c5b6ed9e5d38b9ea2c687fa7d3f5a9.json +1 -0
  297. xinference/web/ui/node_modules/.cache/babel-loader/bbcc94b0149963d1d6f267ee1f4f03d3925b758392ce2f516c3fe8af0e0169fc.json +1 -0
  298. xinference/web/ui/node_modules/.cache/babel-loader/bdee44abeadc4abc17d41c52eb49c6e19a4b1a267b6e16876ce91bdeeebfc52d.json +1 -0
  299. xinference/web/ui/node_modules/.cache/babel-loader/beb112b70f4a56db95920a9e20efb6c97c37b68450716730217a9ee1a9ae92be.json +1 -0
  300. xinference/web/ui/node_modules/.cache/babel-loader/c88db97be0cdf440193b3995996e83510a04cb00048135485fc0e26d197e80b5.json +1 -0
  301. xinference/web/ui/node_modules/.cache/babel-loader/d49e5314d34310a62d01a03067ce1bec5da00abce84c5196aa9c6842fa79a430.json +1 -0
  302. xinference/web/ui/node_modules/.cache/babel-loader/d7664d18c4ddbad9c3a6a31b91f7c00fb0dde804608674a9860ee50f33e54708.json +1 -0
  303. xinference/web/ui/node_modules/.cache/babel-loader/d9072c318b819b7c90a0f7e9cc0b6413b4dbeb8e9859898e53d75ea882fcde99.json +1 -0
  304. xinference/web/ui/node_modules/.cache/babel-loader/db16a983bc08a05f0439cc61ca0840e49e1d8400eef678909f16c032a418a3d6.json +1 -0
  305. xinference/web/ui/node_modules/.cache/babel-loader/dc249829767b8abcbc3677e0b07b6d3ecbfdfe6d08cfe23a665eb33373a9aa9d.json +1 -0
  306. xinference/web/ui/node_modules/.cache/babel-loader/e242c583c2dbc2784f0fcf513523975f7d5df447e106c1c17e49e8578a6fc3ed.json +1 -0
  307. xinference/web/ui/node_modules/.cache/babel-loader/eac5f1296513e69e4b96f750ddccd4d0264e2bae4e4c449144e83274a48698d9.json +1 -0
  308. xinference/web/ui/node_modules/.cache/babel-loader/ed57202cb79649bb716400436590245547df241988fc7c8e1d85d132299542d2.json +1 -0
  309. xinference/web/ui/node_modules/.cache/babel-loader/f125bf72e773a14cdaebd0c343e80adb909d12e317ee5c00cd4a57442fbe2c62.json +1 -0
  310. xinference/web/ui/node_modules/.cache/babel-loader/f91af913d7f91c410719ab13136aaed3aaf0f8dda06652f25c42cb5231587398.json +1 -0
  311. xinference/web/ui/node_modules/.package-lock.json +67 -3
  312. xinference/web/ui/node_modules/@babel/runtime/package.json +592 -538
  313. xinference/web/ui/node_modules/html-parse-stringify/package.json +50 -0
  314. xinference/web/ui/node_modules/i18next/dist/esm/package.json +1 -0
  315. xinference/web/ui/node_modules/i18next/package.json +129 -0
  316. xinference/web/ui/node_modules/react-i18next/.eslintrc.json +74 -0
  317. xinference/web/ui/node_modules/react-i18next/dist/es/package.json +1 -0
  318. xinference/web/ui/node_modules/react-i18next/package.json +162 -0
  319. xinference/web/ui/node_modules/void-elements/package.json +34 -0
  320. xinference/web/ui/package-lock.json +69 -3
  321. xinference/web/ui/package.json +2 -0
  322. xinference/web/ui/src/locales/en.json +186 -0
  323. xinference/web/ui/src/locales/zh.json +186 -0
  324. {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/METADATA +96 -36
  325. {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/RECORD +335 -146
  326. {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/WHEEL +1 -1
  327. xinference/thirdparty/cosyvoice/bin/export_trt.py +0 -8
  328. xinference/thirdparty/fish_speech/fish_speech/configs/lora/__init__.py +0 -0
  329. xinference/thirdparty/fish_speech/fish_speech/datasets/__init__.py +0 -0
  330. xinference/thirdparty/fish_speech/fish_speech/datasets/protos/__init__.py +0 -0
  331. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/__init__.py +0 -0
  332. xinference/thirdparty/fish_speech/fish_speech/models/__init__.py +0 -0
  333. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/__init__.py +0 -0
  334. xinference/thirdparty/fish_speech/fish_speech/webui/__init__.py +0 -0
  335. xinference/thirdparty/fish_speech/tools/__init__.py +0 -0
  336. xinference/thirdparty/fish_speech/tools/api.py +0 -440
  337. xinference/thirdparty/fish_speech/tools/commons.py +0 -35
  338. xinference/thirdparty/fish_speech/tools/llama/__init__.py +0 -0
  339. xinference/thirdparty/fish_speech/tools/msgpack_api.py +0 -34
  340. xinference/thirdparty/fish_speech/tools/vqgan/__init__.py +0 -0
  341. xinference/thirdparty/fish_speech/tools/webui.py +0 -485
  342. xinference/web/ui/build/static/css/main.5061c4c3.css +0 -2
  343. xinference/web/ui/build/static/css/main.5061c4c3.css.map +0 -1
  344. xinference/web/ui/build/static/js/main.2f269bb3.js +0 -3
  345. xinference/web/ui/build/static/js/main.2f269bb3.js.map +0 -1
  346. xinference/web/ui/node_modules/.cache/babel-loader/07ce9e632e6aff24d7aa3ad8e48224433bbfeb0d633fca723453f1fcae0c9f1c.json +0 -1
  347. xinference/web/ui/node_modules/.cache/babel-loader/1130403f9e46f5738a23b45ac59b57de8f360c908c713e2c0670c2cce9bd367a.json +0 -1
  348. xinference/web/ui/node_modules/.cache/babel-loader/131091b25d26b17cdca187d7542a21475c211138d900cf667682260e76ef9463.json +0 -1
  349. xinference/web/ui/node_modules/.cache/babel-loader/1f269fb2a368363c1cb2237825f1dba093b6bdd8c44cc05954fd19ec2c1fff03.json +0 -1
  350. xinference/web/ui/node_modules/.cache/babel-loader/331312668fa8bd3d7401818f4a25fa98135d7f61371cd6bfff78b18cf4fbdd92.json +0 -1
  351. xinference/web/ui/node_modules/.cache/babel-loader/40f17338fc75ae095de7d2b4d8eae0d5ca0193a7e2bcece4ee745b22a7a2f4b7.json +0 -1
  352. xinference/web/ui/node_modules/.cache/babel-loader/4de9a6942c5f1749d6cbfdd54279699975f16016b182848bc253886f52ec2ec3.json +0 -1
  353. xinference/web/ui/node_modules/.cache/babel-loader/822586ed1077201b64b954f12f25e3f9b45678c1acbabe53d8af3ca82ca71f33.json +0 -1
  354. xinference/web/ui/node_modules/.cache/babel-loader/8d33354bd2100c8602afc3341f131a88cc36aaeecd5a4b365ed038514708e350.json +0 -1
  355. xinference/web/ui/node_modules/.cache/babel-loader/9375a35b05d56989b2755bf72161fa707c92f28569d33765a75f91a568fda6e9.json +0 -1
  356. xinference/web/ui/node_modules/.cache/babel-loader/a158a9ffa0c9b169aee53dd4a0c44501a596755b4e4f6ede7746d65a72e2a71f.json +0 -1
  357. xinference/web/ui/node_modules/.cache/babel-loader/bd6ad8159341315a1764c397621a560809f7eb7219ab5174c801fca7e969d943.json +0 -1
  358. xinference/web/ui/node_modules/.cache/babel-loader/c7bf40bab396765f67d0fed627ed3665890608b2d0edaa3e8cb7cfc96310db45.json +0 -1
  359. xinference/web/ui/node_modules/.cache/babel-loader/d6c643278a0b28320e6f33a60f5fb64c053997cbdc39a60e53ccc574688ade9e.json +0 -1
  360. xinference/web/ui/node_modules/.cache/babel-loader/e42b72d4cc1ea412ebecbb8d040dc6c6bfee462c33903c2f1f3facb602ad742e.json +0 -1
  361. xinference/web/ui/node_modules/.cache/babel-loader/e64b7e8cedcf43d4c95deba60ec1341855c887705805bb62431693118b870c69.json +0 -1
  362. xinference/web/ui/node_modules/.cache/babel-loader/f5039ddbeb815c51491a1989532006b96fc3ae49c6c60e3c097f875b4ae915ae.json +0 -1
  363. xinference/web/ui/node_modules/.cache/babel-loader/f72f011744c4649fabddca6f7a9327861ac0a315a89b1a2e62a39774e7863845.json +0 -1
  364. xinference/web/ui/node_modules/.cache/babel-loader/feabb04b4aa507102da0a64398a40818e878fd1df9b75dda8461b3e1e7ff3f11.json +0 -1
  365. /xinference/thirdparty/{cosyvoice/bin → f5_tts}/__init__.py +0 -0
  366. /xinference/thirdparty/{cosyvoice/flow → melo}/__init__.py +0 -0
  367. /xinference/thirdparty/{cosyvoice/hifigan → melo/text/english_utils}/__init__.py +0 -0
  368. /xinference/thirdparty/{cosyvoice/llm → melo/text/es_phonemizer}/__init__.py +0 -0
  369. /xinference/thirdparty/{fish_speech/fish_speech/configs → melo/text/fr_phonemizer}/__init__.py +0 -0
  370. /xinference/web/ui/build/static/js/{main.2f269bb3.js.LICENSE.txt → main.b0936c54.js.LICENSE.txt} +0 -0
  371. {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/LICENSE +0 -0
  372. {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/entry_points.txt +0 -0
  373. {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/top_level.txt +0 -0
@@ -205,8 +205,8 @@
205
205
  "8-bit",
206
206
  "none"
207
207
  ],
208
- "model_id": "THUDM/glm-4-9b-chat",
209
- "model_revision": "eb55a443d66541f30869f6caac5ad0d2e95bcbaa"
208
+ "model_id": "THUDM/glm-4-9b-chat-hf",
209
+ "model_revision": "c7f73fd9e0f378c87f3c8f2c25aec6ad705043cd"
210
210
  },
211
211
  {
212
212
  "model_format": "ggufv2",
@@ -269,8 +269,8 @@
269
269
  "8-bit",
270
270
  "none"
271
271
  ],
272
- "model_id": "THUDM/glm-4-9b-chat-1m",
273
- "model_revision": "0aa722c7e0745dd21453427dd44c257dd253304f"
272
+ "model_id": "THUDM/glm-4-9b-chat-1m-hf",
273
+ "model_revision": "0588cb62942f0f0a5545c695e5c1b019d64eabdc"
274
274
  },
275
275
  {
276
276
  "model_format": "ggufv2",
@@ -952,7 +952,7 @@
952
952
  "model_format": "mlx",
953
953
  "model_size_in_billions": 8,
954
954
  "quantizations": [
955
- "4-bit"
955
+ "4bit"
956
956
  ],
957
957
  "model_id": "mlx-community/Meta-Llama-3-8B-Instruct-4bit"
958
958
  },
@@ -960,7 +960,7 @@
960
960
  "model_format": "mlx",
961
961
  "model_size_in_billions": 8,
962
962
  "quantizations": [
963
- "8-bit"
963
+ "8bit"
964
964
  ],
965
965
  "model_id": "mlx-community/Meta-Llama-3-8B-Instruct-8bit"
966
966
  },
@@ -976,7 +976,7 @@
976
976
  "model_format": "mlx",
977
977
  "model_size_in_billions": 70,
978
978
  "quantizations": [
979
- "4-bit"
979
+ "4bit"
980
980
  ],
981
981
  "model_id": "mlx-community/Meta-Llama-3-70B-Instruct-4bit-mlx"
982
982
  },
@@ -984,7 +984,7 @@
984
984
  "model_format": "mlx",
985
985
  "model_size_in_billions": 70,
986
986
  "quantizations": [
987
- "8-bit"
987
+ "8bit"
988
988
  ],
989
989
  "model_id": "mlx-community/Meta-Llama-3-70B-Instruct-8bit"
990
990
  },
@@ -1229,7 +1229,7 @@
1229
1229
  "model_format": "mlx",
1230
1230
  "model_size_in_billions": 8,
1231
1231
  "quantizations": [
1232
- "4-bit"
1232
+ "4bit"
1233
1233
  ],
1234
1234
  "model_id": "mlx-community/Meta-Llama-3.1-8B-Instruct-4bit"
1235
1235
  },
@@ -1237,7 +1237,7 @@
1237
1237
  "model_format": "mlx",
1238
1238
  "model_size_in_billions": 8,
1239
1239
  "quantizations": [
1240
- "8-bit"
1240
+ "8bit"
1241
1241
  ],
1242
1242
  "model_id": "mlx-community/Meta-Llama-3.1-8B-Instruct-8bit"
1243
1243
  },
@@ -1253,7 +1253,7 @@
1253
1253
  "model_format": "mlx",
1254
1254
  "model_size_in_billions": 70,
1255
1255
  "quantizations": [
1256
- "4-bit"
1256
+ "4bit"
1257
1257
  ],
1258
1258
  "model_id": "mlx-community/Meta-Llama-3.1-70B-Instruct-4bit"
1259
1259
  },
@@ -1261,7 +1261,7 @@
1261
1261
  "model_format": "mlx",
1262
1262
  "model_size_in_billions": 70,
1263
1263
  "quantizations": [
1264
- "8-bit"
1264
+ "8bit"
1265
1265
  ],
1266
1266
  "model_id": "mlx-community/Meta-Llama-3.1-70B-Instruct-8bit"
1267
1267
  },
@@ -1399,6 +1399,98 @@
1399
1399
  }
1400
1400
  ]
1401
1401
  },
1402
+ {
1403
+ "version": 1,
1404
+ "context_length": 131072,
1405
+ "model_name": "llama-3.3-instruct",
1406
+ "model_lang": [
1407
+ "en",
1408
+ "de",
1409
+ "fr",
1410
+ "it",
1411
+ "pt",
1412
+ "hi",
1413
+ "es",
1414
+ "th"
1415
+ ],
1416
+ "model_ability": [
1417
+ "chat",
1418
+ "tools"
1419
+ ],
1420
+ "model_description": "The Llama 3.3 instruction tuned models are optimized for dialogue use cases and outperform many of the available open source chat models on common industry benchmarks..",
1421
+ "model_specs": [
1422
+ {
1423
+ "model_format": "pytorch",
1424
+ "model_size_in_billions": 70,
1425
+ "quantizations": [
1426
+ "none"
1427
+ ],
1428
+ "model_id": "meta-llama/Llama-3.3-70B-Instruct"
1429
+ },
1430
+ {
1431
+ "model_format": "gptq",
1432
+ "model_size_in_billions": 70,
1433
+ "quantizations": [
1434
+ "Int4"
1435
+ ],
1436
+ "model_id": "shuyuej/Llama-3.3-70B-Instruct-GPTQ"
1437
+ },
1438
+ {
1439
+ "model_format": "awq",
1440
+ "model_size_in_billions": 70,
1441
+ "quantizations": [
1442
+ "Int4"
1443
+ ],
1444
+ "model_id": "casperhansen/llama-3.3-70b-instruct-awq"
1445
+ },
1446
+ {
1447
+ "model_format": "mlx",
1448
+ "model_size_in_billions": 70,
1449
+ "quantizations": [
1450
+ "3bit",
1451
+ "4bit",
1452
+ "6bit",
1453
+ "8bit",
1454
+ "fp16"
1455
+ ],
1456
+ "model_id": "mlx-community/Llama-3.3-70B-Instruct-{quantization}"
1457
+ },
1458
+ {
1459
+ "model_format": "ggufv2",
1460
+ "model_size_in_billions": 70,
1461
+ "quantizations": [
1462
+ "Q3_K_L",
1463
+ "Q4_K_M",
1464
+ "Q6_K",
1465
+ "Q8_0"
1466
+ ],
1467
+ "quantization_parts": {
1468
+ "Q6_K": [
1469
+ "00001-of-00002",
1470
+ "00002-of-00002"
1471
+ ],
1472
+ "Q8_0": [
1473
+ "00001-of-00002",
1474
+ "00002-of-00002"
1475
+ ]
1476
+ },
1477
+ "model_id": "lmstudio-community/Llama-3.3-70B-Instruct-GGUF",
1478
+ "model_file_name_template": "Llama-3.3-70B-Instruct-{quantization}.gguf",
1479
+ "model_file_name_split_template": "Llama-3.3-70B-Instruct-{quantization}-{part}.gguf"
1480
+ }
1481
+ ],
1482
+ "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\\n\\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n {%- for arg_name, arg_val in tool_call.arguments | items %}\n {{- arg_name + '=\"' + arg_val + '\"' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \")\" }}\n {%- else %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- endif %}\n {%- if builtin_tools is defined %}\n {#- This means we're in ipython mode #}\n {{- \"<|eom_id|>\" }}\n {%- else %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n",
1483
+ "stop_token_ids": [
1484
+ 128001,
1485
+ 128008,
1486
+ 128009
1487
+ ],
1488
+ "stop": [
1489
+ "<|end_of_text|>",
1490
+ "<|eot_id|>",
1491
+ "<|eom_id|>"
1492
+ ]
1493
+ },
1402
1494
  {
1403
1495
  "version": 1,
1404
1496
  "context_length": 2048,
@@ -2199,7 +2291,7 @@
2199
2291
  "model_format": "mlx",
2200
2292
  "model_size_in_billions": "0_5",
2201
2293
  "quantizations": [
2202
- "4-bit"
2294
+ "4bit"
2203
2295
  ],
2204
2296
  "model_id": "Qwen/Qwen2-0.5B-Instruct-MLX"
2205
2297
  },
@@ -2207,7 +2299,7 @@
2207
2299
  "model_format": "mlx",
2208
2300
  "model_size_in_billions": "1_5",
2209
2301
  "quantizations": [
2210
- "4-bit"
2302
+ "4bit"
2211
2303
  ],
2212
2304
  "model_id": "Qwen/Qwen2-1.5B-Instruct-MLX"
2213
2305
  },
@@ -2215,7 +2307,7 @@
2215
2307
  "model_format": "mlx",
2216
2308
  "model_size_in_billions": 7,
2217
2309
  "quantizations": [
2218
- "4-bit"
2310
+ "4bit"
2219
2311
  ],
2220
2312
  "model_id": "Qwen/Qwen2-7B-Instruct-MLX"
2221
2313
  },
@@ -2223,7 +2315,7 @@
2223
2315
  "model_format": "mlx",
2224
2316
  "model_size_in_billions": 72,
2225
2317
  "quantizations": [
2226
- "4-bit"
2318
+ "4bit"
2227
2319
  ],
2228
2320
  "model_id": "mlx-community/Qwen2-72B-Instruct-4bit"
2229
2321
  },
@@ -3222,7 +3314,7 @@
3222
3314
  "model_format": "mlx",
3223
3315
  "model_size_in_billions": 12,
3224
3316
  "quantizations": [
3225
- "4-bit"
3317
+ "4bit"
3226
3318
  ],
3227
3319
  "model_id": "mlx-community/Mistral-Nemo-Instruct-2407-4bit"
3228
3320
  },
@@ -3230,7 +3322,7 @@
3230
3322
  "model_format": "mlx",
3231
3323
  "model_size_in_billions": 12,
3232
3324
  "quantizations": [
3233
- "8-bit"
3325
+ "8bit"
3234
3326
  ],
3235
3327
  "model_id": "mlx-community/Mistral-Nemo-Instruct-2407-8bit"
3236
3328
  }
@@ -3370,7 +3462,7 @@
3370
3462
  "model_format": "mlx",
3371
3463
  "model_size_in_billions": 123,
3372
3464
  "quantizations": [
3373
- "4-bit"
3465
+ "4bit"
3374
3466
  ],
3375
3467
  "model_id": "mlx-community/Mistral-Large-Instruct-2407-4bit"
3376
3468
  },
@@ -3378,7 +3470,7 @@
3378
3470
  "model_format": "mlx",
3379
3471
  "model_size_in_billions": 123,
3380
3472
  "quantizations": [
3381
- "8-bit"
3473
+ "8bit"
3382
3474
  ],
3383
3475
  "model_id": "mlx-community/Mistral-Large-Instruct-2407-8bit"
3384
3476
  }
@@ -3411,8 +3503,8 @@
3411
3503
  "8-bit",
3412
3504
  "none"
3413
3505
  ],
3414
- "model_id": "mistralai/Mistral-7B-Instruct-v0.2",
3415
- "model_revision": "9552e7b1d9b2d5bbd87a5aa7221817285dbb6366"
3506
+ "model_id": "mistralai/Codestral-22B-v0.1",
3507
+ "model_revision": "8f5fe23af91885222a1563283c87416745a5e212"
3416
3508
  },
3417
3509
  {
3418
3510
  "model_format": "ggufv2",
@@ -3436,7 +3528,7 @@
3436
3528
  "model_format": "mlx",
3437
3529
  "model_size_in_billions": 22,
3438
3530
  "quantizations": [
3439
- "4-bit"
3531
+ "4bit"
3440
3532
  ],
3441
3533
  "model_id": "mlx-community/Codestral-22B-v0.1-4bit",
3442
3534
  "model_revision": "544626b38eb1c9524f0fa570ec7b29550c26b78d"
@@ -3445,7 +3537,7 @@
3445
3537
  "model_format": "mlx",
3446
3538
  "model_size_in_billions": 22,
3447
3539
  "quantizations": [
3448
- "8-bit"
3540
+ "8bit"
3449
3541
  ],
3450
3542
  "model_id": "mlx-community/Codestral-22B-v0.1-8bit",
3451
3543
  "model_revision": "0399a53970663950d57010e61a2796af524a1588"
@@ -4170,7 +4262,7 @@
4170
4262
  "model_format": "mlx",
4171
4263
  "model_size_in_billions": 6,
4172
4264
  "quantizations": [
4173
- "4-bit"
4265
+ "4bit"
4174
4266
  ],
4175
4267
  "model_id": "mlx-community/Yi-1.5-6B-Chat-4bit",
4176
4268
  "model_revision": "0177c9a12b869d6bc73f772b5a1981a7c966adb6"
@@ -4179,7 +4271,7 @@
4179
4271
  "model_format": "mlx",
4180
4272
  "model_size_in_billions": 6,
4181
4273
  "quantizations": [
4182
- "8-bit"
4274
+ "8bit"
4183
4275
  ],
4184
4276
  "model_id": "mlx-community/Yi-1.5-6B-Chat-8bit",
4185
4277
  "model_revision": "7756e65d1bf1e2e6e97aef6bc9484307225f536b"
@@ -4188,7 +4280,7 @@
4188
4280
  "model_format": "mlx",
4189
4281
  "model_size_in_billions": 9,
4190
4282
  "quantizations": [
4191
- "4-bit"
4283
+ "4bit"
4192
4284
  ],
4193
4285
  "model_id": "mlx-community/Yi-1.5-9B-Chat-4bit",
4194
4286
  "model_revision": "e15f886479c44e7d90f0ac13ace69b2319b71c2f"
@@ -4197,7 +4289,7 @@
4197
4289
  "model_format": "mlx",
4198
4290
  "model_size_in_billions": 9,
4199
4291
  "quantizations": [
4200
- "8-bit"
4292
+ "8bit"
4201
4293
  ],
4202
4294
  "model_id": "mlx-community/Yi-1.5-9B-Chat-8bit",
4203
4295
  "model_revision": "c1f742fcf3683edbe2d2c2fd1ad7ac2bb6c5ca36"
@@ -4206,7 +4298,7 @@
4206
4298
  "model_format": "mlx",
4207
4299
  "model_size_in_billions": 34,
4208
4300
  "quantizations": [
4209
- "4-bit"
4301
+ "4bit"
4210
4302
  ],
4211
4303
  "model_id": "mlx-community/Yi-1.5-34B-Chat-4bit",
4212
4304
  "model_revision": "945e3b306ef37c46ab444fdc857d1f3ea7247374"
@@ -4215,7 +4307,7 @@
4215
4307
  "model_format": "mlx",
4216
4308
  "model_size_in_billions": 34,
4217
4309
  "quantizations": [
4218
- "8-bit"
4310
+ "8bit"
4219
4311
  ],
4220
4312
  "model_id": "mlx-community/Yi-1.5-34B-Chat-8bit",
4221
4313
  "model_revision": "3c12761a2c6663f216caab6dff84b0dd29b472ac"
@@ -5266,7 +5358,7 @@
5266
5358
  "model_format": "mlx",
5267
5359
  "model_size_in_billions": 7,
5268
5360
  "quantizations": [
5269
- "4-bit"
5361
+ "4bit"
5270
5362
  ],
5271
5363
  "model_id": "mlx-community/internlm2_5-7b-chat-4bit",
5272
5364
  "model_revision": "d12097a867721978142a6048399f470a3d18beee"
@@ -5275,7 +5367,7 @@
5275
5367
  "model_format": "mlx",
5276
5368
  "model_size_in_billions": 7,
5277
5369
  "quantizations": [
5278
- "8-bit"
5370
+ "8bit"
5279
5371
  ],
5280
5372
  "model_id": "mlx-community/internlm2_5-7b-chat-8bit",
5281
5373
  "model_revision": "0ec94d61d30ab161b49c69f9bf92ec2b9986d234"
@@ -5803,7 +5895,7 @@
5803
5895
  "model_format": "mlx",
5804
5896
  "model_size_in_billions": 2,
5805
5897
  "quantizations": [
5806
- "4-bit"
5898
+ "4bit"
5807
5899
  ],
5808
5900
  "model_id": "mlx-community/gemma-2-2b-it-4bit"
5809
5901
  },
@@ -5811,7 +5903,7 @@
5811
5903
  "model_format": "mlx",
5812
5904
  "model_size_in_billions": 2,
5813
5905
  "quantizations": [
5814
- "8-bit"
5906
+ "8bit"
5815
5907
  ],
5816
5908
  "model_id": "mlx-community/gemma-2-2b-it-8bit"
5817
5909
  },
@@ -5827,7 +5919,7 @@
5827
5919
  "model_format": "mlx",
5828
5920
  "model_size_in_billions": 9,
5829
5921
  "quantizations": [
5830
- "4-bit"
5922
+ "4bit"
5831
5923
  ],
5832
5924
  "model_id": "mlx-community/gemma-2-9b-it-4bit"
5833
5925
  },
@@ -5835,7 +5927,7 @@
5835
5927
  "model_format": "mlx",
5836
5928
  "model_size_in_billions": 9,
5837
5929
  "quantizations": [
5838
- "8-bit"
5930
+ "8bit"
5839
5931
  ],
5840
5932
  "model_id": "mlx-community/gemma-2-9b-it-8bit"
5841
5933
  },
@@ -5851,7 +5943,7 @@
5851
5943
  "model_format": "mlx",
5852
5944
  "model_size_in_billions": 27,
5853
5945
  "quantizations": [
5854
- "4-bit"
5946
+ "4bit"
5855
5947
  ],
5856
5948
  "model_id": "mlx-community/gemma-2-27b-it-4bit"
5857
5949
  },
@@ -5859,7 +5951,7 @@
5859
5951
  "model_format": "mlx",
5860
5952
  "model_size_in_billions": 27,
5861
5953
  "quantizations": [
5862
- "8-bit"
5954
+ "8bit"
5863
5955
  ],
5864
5956
  "model_id": "mlx-community/gemma-2-27b-it-8bit"
5865
5957
  },
@@ -6925,7 +7017,7 @@
6925
7017
  "model_id":"Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4",
6926
7018
  "model_revision":"800d396518c82960ce6d231adecd07bbc474f0a9"
6927
7019
  },
6928
- {
7020
+ {
6929
7021
  "model_format":"awq",
6930
7022
  "model_size_in_billions":2,
6931
7023
  "quantizations":[
@@ -6934,6 +7026,15 @@
6934
7026
  "model_id":"Qwen/Qwen2-VL-2B-Instruct-AWQ",
6935
7027
  "model_revision":"ea8c5854c0044e28626719292de0d9b1a671f6fc"
6936
7028
  },
7029
+ {
7030
+ "model_format":"mlx",
7031
+ "model_size_in_billions":2,
7032
+ "quantizations":[
7033
+ "4bit",
7034
+ "8bit"
7035
+ ],
7036
+ "model_id":"mlx-community/Qwen2-VL-2B-Instruct-{quantization}"
7037
+ },
6937
7038
  {
6938
7039
  "model_format":"pytorch",
6939
7040
  "model_size_in_billions":7,
@@ -6970,6 +7071,15 @@
6970
7071
  "model_id":"Qwen/Qwen2-VL-7B-Instruct-AWQ",
6971
7072
  "model_revision":"f94216e8b513933bccd567bcd9b7350199f32538"
6972
7073
  },
7074
+ {
7075
+ "model_format":"mlx",
7076
+ "model_size_in_billions":7,
7077
+ "quantizations":[
7078
+ "4bit",
7079
+ "8bit"
7080
+ ],
7081
+ "model_id":"mlx-community/Qwen2-VL-7B-Instruct-{quantization}"
7082
+ },
6973
7083
  {
6974
7084
  "model_format":"pytorch",
6975
7085
  "model_size_in_billions":72,
@@ -6994,6 +7104,15 @@
6994
7104
  "Int8"
6995
7105
  ],
6996
7106
  "model_id":"Qwen/Qwen2-VL-72B-Instruct-GPTQ-{quantization}"
7107
+ },
7108
+ {
7109
+ "model_format":"mlx",
7110
+ "model_size_in_billions":72,
7111
+ "quantizations":[
7112
+ "4bit",
7113
+ "8bit"
7114
+ ],
7115
+ "model_id":"mlx-community/Qwen2-VL-72B-Instruct-{quantization}"
6997
7116
  }
6998
7117
  ],
6999
7118
  "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
@@ -8015,7 +8134,7 @@
8015
8134
  "model_format": "mlx",
8016
8135
  "model_size_in_billions": "0_5",
8017
8136
  "quantizations": [
8018
- "4-bit"
8137
+ "4bit"
8019
8138
  ],
8020
8139
  "model_id": "mlx-community/Qwen2.5-0.5B-Instruct-4bit"
8021
8140
  },
@@ -8023,7 +8142,7 @@
8023
8142
  "model_format": "mlx",
8024
8143
  "model_size_in_billions": "0_5",
8025
8144
  "quantizations": [
8026
- "8-bit"
8145
+ "8bit"
8027
8146
  ],
8028
8147
  "model_id": "mlx-community/Qwen2.5-0.5B-Instruct-8bit"
8029
8148
  },
@@ -8039,7 +8158,7 @@
8039
8158
  "model_format": "mlx",
8040
8159
  "model_size_in_billions": "1_5",
8041
8160
  "quantizations": [
8042
- "4-bit"
8161
+ "4bit"
8043
8162
  ],
8044
8163
  "model_id": "mlx-community/Qwen2.5-1.5B-Instruct-4bit"
8045
8164
  },
@@ -8047,7 +8166,7 @@
8047
8166
  "model_format": "mlx",
8048
8167
  "model_size_in_billions": "1_5",
8049
8168
  "quantizations": [
8050
- "8-bit"
8169
+ "8bit"
8051
8170
  ],
8052
8171
  "model_id": "mlx-community/Qwen2.5-1.5B-Instruct-8bit"
8053
8172
  },
@@ -8063,7 +8182,7 @@
8063
8182
  "model_format": "mlx",
8064
8183
  "model_size_in_billions": 3,
8065
8184
  "quantizations": [
8066
- "4-bit"
8185
+ "4bit"
8067
8186
  ],
8068
8187
  "model_id": "mlx-community/Qwen2.5-3B-Instruct-4bit"
8069
8188
  },
@@ -8071,7 +8190,7 @@
8071
8190
  "model_format": "mlx",
8072
8191
  "model_size_in_billions": 3,
8073
8192
  "quantizations": [
8074
- "8-bit"
8193
+ "8bit"
8075
8194
  ],
8076
8195
  "model_id": "mlx-community/Qwen2.5-3B-Instruct-8bit"
8077
8196
  },
@@ -8087,7 +8206,7 @@
8087
8206
  "model_format": "mlx",
8088
8207
  "model_size_in_billions": 7,
8089
8208
  "quantizations": [
8090
- "4-bit"
8209
+ "4bit"
8091
8210
  ],
8092
8211
  "model_id": "mlx-community/Qwen2.5-7B-Instruct-4bit"
8093
8212
  },
@@ -8095,7 +8214,7 @@
8095
8214
  "model_format": "mlx",
8096
8215
  "model_size_in_billions": 7,
8097
8216
  "quantizations": [
8098
- "8-bit"
8217
+ "8bit"
8099
8218
  ],
8100
8219
  "model_id": "mlx-community/Qwen2.5-7B-Instruct-8bit"
8101
8220
  },
@@ -8111,7 +8230,7 @@
8111
8230
  "model_format": "mlx",
8112
8231
  "model_size_in_billions": 14,
8113
8232
  "quantizations": [
8114
- "4-bit"
8233
+ "4bit"
8115
8234
  ],
8116
8235
  "model_id": "mlx-community/Qwen2.5-14B-Instruct-4bit"
8117
8236
  },
@@ -8119,7 +8238,7 @@
8119
8238
  "model_format": "mlx",
8120
8239
  "model_size_in_billions": 14,
8121
8240
  "quantizations": [
8122
- "8-bit"
8241
+ "8bit"
8123
8242
  ],
8124
8243
  "model_id": "mlx-community/Qwen2.5-14B-Instruct-8bit"
8125
8244
  },
@@ -8135,7 +8254,7 @@
8135
8254
  "model_format": "mlx",
8136
8255
  "model_size_in_billions": 32,
8137
8256
  "quantizations": [
8138
- "4-bit"
8257
+ "4bit"
8139
8258
  ],
8140
8259
  "model_id": "mlx-community/Qwen2.5-32B-Instruct-4bit"
8141
8260
  },
@@ -8143,7 +8262,7 @@
8143
8262
  "model_format": "mlx",
8144
8263
  "model_size_in_billions": 32,
8145
8264
  "quantizations": [
8146
- "8-bit"
8265
+ "8bit"
8147
8266
  ],
8148
8267
  "model_id": "mlx-community/Qwen2.5-32B-Instruct-8bit"
8149
8268
  },
@@ -8159,7 +8278,7 @@
8159
8278
  "model_format": "mlx",
8160
8279
  "model_size_in_billions": 72,
8161
8280
  "quantizations": [
8162
- "4-bit"
8281
+ "4bit"
8163
8282
  ],
8164
8283
  "model_id": "mlx-community/Qwen2.5-72B-Instruct-4bit"
8165
8284
  },
@@ -8167,7 +8286,7 @@
8167
8286
  "model_format": "mlx",
8168
8287
  "model_size_in_billions": 72,
8169
8288
  "quantizations": [
8170
- "8-bit"
8289
+ "8bit"
8171
8290
  ],
8172
8291
  "model_id": "mlx-community/Qwen2.5-72B-Instruct-8bit"
8173
8292
  },
@@ -8205,6 +8324,16 @@
8205
8324
  ],
8206
8325
  "model_description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models (formerly known as CodeQwen).",
8207
8326
  "model_specs": [
8327
+ {
8328
+ "model_format": "pytorch",
8329
+ "model_size_in_billions": "0_5",
8330
+ "quantizations": [
8331
+ "4-bit",
8332
+ "8-bit",
8333
+ "none"
8334
+ ],
8335
+ "model_id": "Qwen/Qwen2.5-Coder-0.5B"
8336
+ },
8208
8337
  {
8209
8338
  "model_format": "pytorch",
8210
8339
  "model_size_in_billions": "1_5",
@@ -8213,8 +8342,17 @@
8213
8342
  "8-bit",
8214
8343
  "none"
8215
8344
  ],
8216
- "model_id": "Qwen/Qwen2.5-Coder-1.5B",
8217
- "model_revision": "d3586cfe793730945f8e4d7ef31032a3ee50247d"
8345
+ "model_id": "Qwen/Qwen2.5-Coder-1.5B"
8346
+ },
8347
+ {
8348
+ "model_format": "pytorch",
8349
+ "model_size_in_billions": "3",
8350
+ "quantizations": [
8351
+ "4-bit",
8352
+ "8-bit",
8353
+ "none"
8354
+ ],
8355
+ "model_id": "Qwen/Qwen2.5-Coder-3B"
8218
8356
  },
8219
8357
  {
8220
8358
  "model_format": "pytorch",
@@ -8224,8 +8362,27 @@
8224
8362
  "8-bit",
8225
8363
  "none"
8226
8364
  ],
8227
- "model_id": "Qwen/Qwen2.5-Coder-7B",
8228
- "model_revision": "30b6a7e874a78d46b80fa1db3194ea427dd41b08"
8365
+ "model_id": "Qwen/Qwen2.5-Coder-7B"
8366
+ },
8367
+ {
8368
+ "model_format": "pytorch",
8369
+ "model_size_in_billions": 14,
8370
+ "quantizations": [
8371
+ "4-bit",
8372
+ "8-bit",
8373
+ "none"
8374
+ ],
8375
+ "model_id": "Qwen/Qwen2.5-Coder-14B"
8376
+ },
8377
+ {
8378
+ "model_format": "pytorch",
8379
+ "model_size_in_billions": 32,
8380
+ "quantizations": [
8381
+ "4-bit",
8382
+ "8-bit",
8383
+ "none"
8384
+ ],
8385
+ "model_id": "Qwen/Qwen2.5-Coder-32B"
8229
8386
  }
8230
8387
  ]
8231
8388
  },
@@ -8243,6 +8400,16 @@
8243
8400
  ],
8244
8401
  "model_description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models (formerly known as CodeQwen).",
8245
8402
  "model_specs": [
8403
+ {
8404
+ "model_format": "pytorch",
8405
+ "model_size_in_billions": "0_5",
8406
+ "quantizations": [
8407
+ "4-bit",
8408
+ "8-bit",
8409
+ "none"
8410
+ ],
8411
+ "model_id": "Qwen/Qwen2.5-Coder-0.5B-Instruct"
8412
+ },
8246
8413
  {
8247
8414
  "model_format": "pytorch",
8248
8415
  "model_size_in_billions": "1_5",
@@ -8253,6 +8420,16 @@
8253
8420
  ],
8254
8421
  "model_id": "Qwen/Qwen2.5-Coder-1.5B-Instruct"
8255
8422
  },
8423
+ {
8424
+ "model_format": "pytorch",
8425
+ "model_size_in_billions": "3",
8426
+ "quantizations": [
8427
+ "4-bit",
8428
+ "8-bit",
8429
+ "none"
8430
+ ],
8431
+ "model_id": "Qwen/Qwen2.5-Coder-3B-Instruct"
8432
+ },
8256
8433
  {
8257
8434
  "model_format": "pytorch",
8258
8435
  "model_size_in_billions": 7,
@@ -8263,57 +8440,171 @@
8263
8440
  ],
8264
8441
  "model_id": "Qwen/Qwen2.5-Coder-7B-Instruct"
8265
8442
  },
8443
+ {
8444
+ "model_format": "pytorch",
8445
+ "model_size_in_billions": 14,
8446
+ "quantizations": [
8447
+ "4-bit",
8448
+ "8-bit",
8449
+ "none"
8450
+ ],
8451
+ "model_id": "Qwen/Qwen2.5-Coder-14B-Instruct"
8452
+ },
8453
+ {
8454
+ "model_format": "pytorch",
8455
+ "model_size_in_billions": 32,
8456
+ "quantizations": [
8457
+ "4-bit",
8458
+ "8-bit",
8459
+ "none"
8460
+ ],
8461
+ "model_id": "Qwen/Qwen2.5-Coder-32B-Instruct"
8462
+ },
8266
8463
  {
8267
8464
  "model_format": "gptq",
8268
- "model_size_in_billions": "7",
8465
+ "model_size_in_billions": "0_5",
8269
8466
  "quantizations": [
8270
8467
  "Int4",
8271
8468
  "Int8"
8272
8469
  ],
8273
- "model_id": "Qwen/Qwen2.5-Coder-7B-Instruct-GPTQ-{quantization}"
8470
+ "model_id": "Qwen/Qwen2.5-Coder-0.5B-Instruct-GPTQ-{quantization}"
8274
8471
  },
8275
8472
  {
8276
- "model_format": "ggufv2",
8473
+ "model_format": "gptq",
8277
8474
  "model_size_in_billions": "1_5",
8278
8475
  "quantizations": [
8279
- "q2_k",
8280
- "q3_k_m",
8281
- "q4_0",
8282
- "q4_k_m",
8283
- "q5_0",
8284
- "q5_k_m",
8285
- "q6_k",
8286
- "q8_0"
8476
+ "Int4",
8477
+ "Int8"
8287
8478
  ],
8288
- "model_id": "Qwen/Qwen2.5-Coder-1.5B-Instruct-GGUF",
8289
- "model_file_name_template": "qwen2.5-coder-1.5b-instruct-{quantization}.gguf"
8479
+ "model_id": "Qwen/Qwen2.5-Coder-1.5B-Instruct-GPTQ-{quantization}"
8290
8480
  },
8291
8481
  {
8292
- "model_format": "ggufv2",
8293
- "model_size_in_billions": 7,
8482
+ "model_format": "gptq",
8483
+ "model_size_in_billions": "3",
8294
8484
  "quantizations": [
8295
- "q2_k",
8296
- "q3_k_m",
8297
- "q4_0",
8298
- "q4_k_m",
8299
- "q5_0",
8300
- "q5_k_m",
8301
- "q6_k",
8302
- "q8_0"
8485
+ "Int4",
8486
+ "Int8"
8303
8487
  ],
8304
- "model_id": "Qwen/Qwen2.5-Coder-7B-Instruct-GGUF",
8305
- "model_file_name_template": "qwen2.5-coder-7b-instruct-{quantization}.gguf",
8306
- "model_file_name_split_template": "qwen2.5-coder-7b-instruct-{quantization}-{part}.gguf",
8307
- "quantization_parts": {
8308
- "q4_0": [
8309
- "00001-of-00002",
8310
- "00002-of-00002"
8311
- ],
8312
- "q4_k_m": [
8313
- "00001-of-00002",
8314
- "00002-of-00002"
8315
- ],
8316
- "q5_0": [
8488
+ "model_id": "Qwen/Qwen2.5-Coder-3B-Instruct-GPTQ-{quantization}"
8489
+ },
8490
+ {
8491
+ "model_format": "gptq",
8492
+ "model_size_in_billions": "7",
8493
+ "quantizations": [
8494
+ "Int4",
8495
+ "Int8"
8496
+ ],
8497
+ "model_id": "Qwen/Qwen2.5-Coder-7B-Instruct-GPTQ-{quantization}"
8498
+ },
8499
+ {
8500
+ "model_format": "gptq",
8501
+ "model_size_in_billions": "14",
8502
+ "quantizations": [
8503
+ "Int4",
8504
+ "Int8"
8505
+ ],
8506
+ "model_id": "Qwen/Qwen2.5-Coder-14B-Instruct-GPTQ-{quantization}"
8507
+ },
8508
+ {
8509
+ "model_format": "gptq",
8510
+ "model_size_in_billions": "32",
8511
+ "quantizations": [
8512
+ "Int4",
8513
+ "Int8"
8514
+ ],
8515
+ "model_id": "Qwen/Qwen2.5-Coder-32B-Instruct-GPTQ-{quantization}"
8516
+ },
8517
+ {
8518
+ "model_format": "awq",
8519
+ "model_size_in_billions": "0_5",
8520
+ "quantizations": [
8521
+ "Int4"
8522
+ ],
8523
+ "model_id": "Qwen/Qwen2.5-Coder-0.5B-Instruct-AWQ"
8524
+ },
8525
+ {
8526
+ "model_format": "awq",
8527
+ "model_size_in_billions": "1_5",
8528
+ "quantizations": [
8529
+ "Int4"
8530
+ ],
8531
+ "model_id": "Qwen/Qwen2.5-Coder-1.5B-Instruct-AWQ"
8532
+ },
8533
+ {
8534
+ "model_format": "awq",
8535
+ "model_size_in_billions": "3",
8536
+ "quantizations": [
8537
+ "Int4"
8538
+ ],
8539
+ "model_id": "Qwen/Qwen2.5-Coder-3B-Instruct-AWQ"
8540
+ },
8541
+ {
8542
+ "model_format": "awq",
8543
+ "model_size_in_billions": "7",
8544
+ "quantizations": [
8545
+ "Int4"
8546
+ ],
8547
+ "model_id": "Qwen/Qwen2.5-Coder-7B-Instruct-AWQ"
8548
+ },
8549
+ {
8550
+ "model_format": "awq",
8551
+ "model_size_in_billions": "14",
8552
+ "quantizations": [
8553
+ "Int4"
8554
+ ],
8555
+ "model_id": "Qwen/Qwen2.5-Coder-14B-Instruct-AWQ"
8556
+ },
8557
+ {
8558
+ "model_format": "awq",
8559
+ "model_size_in_billions": "32",
8560
+ "quantizations": [
8561
+ "Int4"
8562
+ ],
8563
+ "model_id": "Qwen/Qwen2.5-Coder-32B-Instruct-AWQ"
8564
+ },
8565
+
8566
+ {
8567
+ "model_format": "ggufv2",
8568
+ "model_size_in_billions": "1_5",
8569
+ "quantizations": [
8570
+ "q2_k",
8571
+ "q3_k_m",
8572
+ "q4_0",
8573
+ "q4_k_m",
8574
+ "q5_0",
8575
+ "q5_k_m",
8576
+ "q6_k",
8577
+ "q8_0"
8578
+ ],
8579
+ "model_id": "Qwen/Qwen2.5-Coder-1.5B-Instruct-GGUF",
8580
+ "model_file_name_template": "qwen2.5-coder-1.5b-instruct-{quantization}.gguf"
8581
+ },
8582
+ {
8583
+ "model_format": "ggufv2",
8584
+ "model_size_in_billions": 7,
8585
+ "quantizations": [
8586
+ "q2_k",
8587
+ "q3_k_m",
8588
+ "q4_0",
8589
+ "q4_k_m",
8590
+ "q5_0",
8591
+ "q5_k_m",
8592
+ "q6_k",
8593
+ "q8_0"
8594
+ ],
8595
+ "model_id": "Qwen/Qwen2.5-Coder-7B-Instruct-GGUF",
8596
+ "model_file_name_template": "qwen2.5-coder-7b-instruct-{quantization}.gguf",
8597
+ "model_file_name_split_template": "qwen2.5-coder-7b-instruct-{quantization}-{part}.gguf",
8598
+ "quantization_parts": {
8599
+ "q4_0": [
8600
+ "00001-of-00002",
8601
+ "00002-of-00002"
8602
+ ],
8603
+ "q4_k_m": [
8604
+ "00001-of-00002",
8605
+ "00002-of-00002"
8606
+ ],
8607
+ "q5_0": [
8317
8608
  "00001-of-00002",
8318
8609
  "00002-of-00002"
8319
8610
  ],
@@ -8344,5 +8635,676 @@
8344
8635
  "<|im_start|>",
8345
8636
  "<|im_end|>"
8346
8637
  ]
8638
+ },
8639
+ {
8640
+ "version": 1,
8641
+ "context_length": 32768,
8642
+ "model_name": "QwQ-32B-Preview",
8643
+ "model_lang": [
8644
+ "en",
8645
+ "zh"
8646
+ ],
8647
+ "model_ability": [
8648
+ "chat"
8649
+ ],
8650
+ "model_description": "QwQ-32B-Preview is an experimental research model developed by the Qwen Team, focused on advancing AI reasoning capabilities.",
8651
+ "model_specs": [
8652
+ {
8653
+ "model_format": "pytorch",
8654
+ "model_size_in_billions": 32,
8655
+ "quantizations": [
8656
+ "4-bit",
8657
+ "8-bit",
8658
+ "none"
8659
+ ],
8660
+ "model_id": "Qwen/QwQ-32B-Preview"
8661
+ },
8662
+ {
8663
+ "model_format": "awq",
8664
+ "model_size_in_billions": 32,
8665
+ "quantizations": [
8666
+ "Int4"
8667
+ ],
8668
+ "model_id": "KirillR/QwQ-32B-Preview-AWQ"
8669
+ },
8670
+ {
8671
+ "model_format": "ggufv2",
8672
+ "model_size_in_billions": 32,
8673
+ "quantizations": [
8674
+ "Q3_K_L",
8675
+ "Q4_K_M",
8676
+ "Q6_K",
8677
+ "Q8_0"
8678
+ ],
8679
+ "model_id": "lmstudio-community/QwQ-32B-Preview-GGUF",
8680
+ "model_file_name_template": "QwQ-32B-Preview-{quantization}.gguf"
8681
+ },
8682
+ {
8683
+ "model_format": "mlx",
8684
+ "model_size_in_billions": 32,
8685
+ "quantizations": [
8686
+ "4bit"
8687
+ ],
8688
+ "model_id": "mlx-community/Qwen_QwQ-32B-Preview_MLX-4bit"
8689
+ },
8690
+ {
8691
+ "model_format": "mlx",
8692
+ "model_size_in_billions": 32,
8693
+ "quantizations": [
8694
+ "8bit"
8695
+ ],
8696
+ "model_id": "mlx-community/Qwen_QwQ-32B-Preview_MLX-8bit"
8697
+ },
8698
+ {
8699
+ "model_format": "mlx",
8700
+ "model_size_in_billions": 32,
8701
+ "quantizations": [
8702
+ "none"
8703
+ ],
8704
+ "model_id": "mlx-community/QwQ-32B-Preview-bf16"
8705
+ }
8706
+ ],
8707
+ "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
8708
+ "stop_token_ids": [
8709
+ 151643,
8710
+ 151644,
8711
+ 151645
8712
+ ],
8713
+ "stop": [
8714
+ "<|endoftext|>",
8715
+ "<|im_start|>",
8716
+ "<|im_end|>"
8717
+ ]
8718
+ },
8719
+ {
8720
+ "version": 1,
8721
+ "context_length": 131072,
8722
+ "model_name": "deepseek-r1-distill-qwen",
8723
+ "model_lang": [
8724
+ "en",
8725
+ "zh"
8726
+ ],
8727
+ "model_ability": [
8728
+ "chat"
8729
+ ],
8730
+ "model_description": "deepseek-r1-distill-qwen is distilled from DeepSeek-R1 based on Qwen",
8731
+ "model_specs": [
8732
+ {
8733
+ "model_format": "pytorch",
8734
+ "model_size_in_billions": "1_5",
8735
+ "quantizations": [
8736
+ "4-bit",
8737
+ "8-bit",
8738
+ "none"
8739
+ ],
8740
+ "model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
8741
+ },
8742
+ {
8743
+ "model_format": "awq",
8744
+ "model_size_in_billions": "1_5",
8745
+ "quantizations": [
8746
+ "Int4"
8747
+ ],
8748
+ "model_id": "casperhansen/deepseek-r1-distill-qwen-1.5b-awq"
8749
+ },
8750
+ {
8751
+ "model_format": "gptq",
8752
+ "model_size_in_billions": "1_5",
8753
+ "quantizations": [
8754
+ "Int4"
8755
+ ],
8756
+ "model_id": "jakiAJK/DeepSeek-R1-Distill-Qwen-1.5B_GPTQ-int4"
8757
+ },
8758
+ {
8759
+ "model_format": "ggufv2",
8760
+ "model_size_in_billions": "1_5",
8761
+ "quantizations": [
8762
+ "Q2_K",
8763
+ "Q2_K_L",
8764
+ "Q3_K_M",
8765
+ "Q4_K_M",
8766
+ "Q5_K_M",
8767
+ "Q6_K",
8768
+ "Q8_0"
8769
+ ],
8770
+ "model_id": "unsloth/DeepSeek-R1-Distill-Qwen-1.5B-GGUF",
8771
+ "model_file_name_template": "DeepSeek-R1-Distill-Qwen-1.5B-{quantization}.gguf"
8772
+ },
8773
+ {
8774
+ "model_format": "mlx",
8775
+ "model_size_in_billions": "1_5",
8776
+ "quantizations": [
8777
+ "3bit",
8778
+ "4bit",
8779
+ "6bit",
8780
+ "8bit",
8781
+ "bf16"
8782
+ ],
8783
+ "model_id": "mlx-community/DeepSeek-R1-Distill-Qwen-1.5B-{quantization}"
8784
+ },
8785
+ {
8786
+ "model_format": "pytorch",
8787
+ "model_size_in_billions": 7,
8788
+ "quantizations": [
8789
+ "4-bit",
8790
+ "8-bit",
8791
+ "none"
8792
+ ],
8793
+ "model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"
8794
+ },
8795
+ {
8796
+ "model_format": "awq",
8797
+ "model_size_in_billions": 7,
8798
+ "quantizations": [
8799
+ "Int4"
8800
+ ],
8801
+ "model_id": "jakiAJK/DeepSeek-R1-Distill-Qwen-7B_AWQ"
8802
+ },
8803
+ {
8804
+ "model_format": "gptq",
8805
+ "model_size_in_billions": 7,
8806
+ "quantizations": [
8807
+ "Int4"
8808
+ ],
8809
+ "model_id": "jakiAJK/DeepSeek-R1-Distill-Qwen-7B_GPTQ-int4"
8810
+ },
8811
+ {
8812
+ "model_format": "ggufv2",
8813
+ "model_size_in_billions": 7,
8814
+ "quantizations": [
8815
+ "Q2_K",
8816
+ "Q2_K_L",
8817
+ "Q3_K_M",
8818
+ "Q4_K_M",
8819
+ "Q5_K_M",
8820
+ "Q6_K",
8821
+ "Q8_0",
8822
+ "F16"
8823
+ ],
8824
+ "model_id": "unsloth/DeepSeek-R1-Distill-Qwen-7B-GGUF",
8825
+ "model_file_name_template": "DeepSeek-R1-Distill-Qwen-7B-{quantization}.gguf"
8826
+ },
8827
+ {
8828
+ "model_format": "mlx",
8829
+ "model_size_in_billions": 7,
8830
+ "quantizations": [
8831
+ "3bit",
8832
+ "4bit",
8833
+ "6bit",
8834
+ "8bit",
8835
+ "bf16"
8836
+ ],
8837
+ "model_id": "mlx-community/DeepSeek-R1-Distill-Qwen-7B-{quantization}"
8838
+ },
8839
+ {
8840
+ "model_format": "pytorch",
8841
+ "model_size_in_billions": 14,
8842
+ "quantizations": [
8843
+ "4-bit",
8844
+ "8-bit",
8845
+ "none"
8846
+ ],
8847
+ "model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B"
8848
+ },
8849
+ {
8850
+ "model_format": "awq",
8851
+ "model_size_in_billions": 14,
8852
+ "quantizations": [
8853
+ "Int4"
8854
+ ],
8855
+ "model_id": "casperhansen/deepseek-r1-distill-qwen-14b-awq"
8856
+ },
8857
+ {
8858
+ "model_format": "ggufv2",
8859
+ "model_size_in_billions": 14,
8860
+ "quantizations": [
8861
+ "Q2_K",
8862
+ "Q2_K_L",
8863
+ "Q3_K_M",
8864
+ "Q4_K_M",
8865
+ "Q5_K_M",
8866
+ "Q6_K",
8867
+ "Q8_0",
8868
+ "F16"
8869
+ ],
8870
+ "model_id": "unsloth/DeepSeek-R1-Distill-Qwen-14B-GGUF",
8871
+ "model_file_name_template": "DeepSeek-R1-Distill-Qwen-14B-{quantization}.gguf"
8872
+ },
8873
+ {
8874
+ "model_format": "mlx",
8875
+ "model_size_in_billions": 14,
8876
+ "quantizations": [
8877
+ "3bit",
8878
+ "4bit",
8879
+ "6bit",
8880
+ "8bit",
8881
+ "bf16"
8882
+ ],
8883
+ "model_id": "mlx-community/DeepSeek-R1-Distill-Qwen-14B-{quantization}"
8884
+ },
8885
+ {
8886
+ "model_format": "pytorch",
8887
+ "model_size_in_billions": 32,
8888
+ "quantizations": [
8889
+ "4-bit",
8890
+ "8-bit",
8891
+ "none"
8892
+ ],
8893
+ "model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"
8894
+ },
8895
+ {
8896
+ "model_format": "awq",
8897
+ "model_size_in_billions": 32,
8898
+ "quantizations": [
8899
+ "Int4"
8900
+ ],
8901
+ "model_id": "casperhansen/deepseek-r1-distill-qwen-32b-awq"
8902
+ },
8903
+ {
8904
+ "model_format": "ggufv2",
8905
+ "model_size_in_billions": 32,
8906
+ "quantizations": [
8907
+ "Q2_K",
8908
+ "Q2_K_L",
8909
+ "Q3_K_M",
8910
+ "Q4_K_M",
8911
+ "Q5_K_M",
8912
+ "Q6_K",
8913
+ "Q8_0",
8914
+ "F16"
8915
+ ],
8916
+ "model_id": "unsloth/DeepSeek-R1-Distill-Qwen-32B-GGUF",
8917
+ "model_file_name_template": "DeepSeek-R1-Distill-Qwen-32B-{quantization}.gguf"
8918
+ },
8919
+ {
8920
+ "model_format": "mlx",
8921
+ "model_size_in_billions": 32,
8922
+ "quantizations": [
8923
+ "3bit",
8924
+ "4bit",
8925
+ "6bit",
8926
+ "8bit",
8927
+ "bf16"
8928
+ ],
8929
+ "model_id": "mlx-community/DeepSeek-R1-Distill-Qwen-32B-{quantization}"
8930
+ }
8931
+ ],
8932
+ "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>'}}{% endif %}",
8933
+ "stop_token_ids": [
8934
+ 151643
8935
+ ],
8936
+ "stop": [
8937
+ "<|end▁of▁sentence|>"
8938
+ ]
8939
+ },
8940
+ {
8941
+ "version": 1,
8942
+ "context_length": 8192,
8943
+ "model_name": "glm-edge-chat",
8944
+ "model_lang": [
8945
+ "en",
8946
+ "zh"
8947
+ ],
8948
+ "model_ability": [
8949
+ "chat"
8950
+ ],
8951
+ "model_description": "The GLM-Edge series is our attempt to face the end-side real-life scenarios, which consists of two sizes of large-language dialogue models and multimodal comprehension models (GLM-Edge-1.5B-Chat, GLM-Edge-4B-Chat, GLM-Edge-V-2B, GLM-Edge-V-5B). Among them, the 1.5B / 2B model is mainly for platforms such as mobile phones and cars, and the 4B / 5B model is mainly for platforms such as PCs.",
8952
+ "model_specs": [
8953
+ {
8954
+ "model_format": "pytorch",
8955
+ "model_size_in_billions": "1_5",
8956
+ "quantizations": [
8957
+ "4-bit",
8958
+ "8-bit",
8959
+ "none"
8960
+ ],
8961
+ "model_id": "THUDM/glm-edge-1.5b-chat"
8962
+ },
8963
+ {
8964
+ "model_format": "pytorch",
8965
+ "model_size_in_billions": "4",
8966
+ "quantizations": [
8967
+ "4-bit",
8968
+ "8-bit",
8969
+ "none"
8970
+ ],
8971
+ "model_id": "THUDM/glm-edge-4b-chat"
8972
+ },
8973
+ {
8974
+ "model_format": "ggufv2",
8975
+ "model_size_in_billions": "1_5",
8976
+ "quantizations": [
8977
+ "Q4_0",
8978
+ "Q4_1",
8979
+ "Q4_K",
8980
+ "Q4_K_M",
8981
+ "Q4_K_S",
8982
+ "Q5_0",
8983
+ "Q5_1",
8984
+ "Q5_K",
8985
+ "Q5_K_M",
8986
+ "Q5_K_S",
8987
+ "Q6_K",
8988
+ "Q8_0"
8989
+ ],
8990
+ "model_file_name_template": "ggml-model-{quantization}.gguf",
8991
+ "model_id": "THUDM/glm-edge-1.5b-chat-gguf"
8992
+ },
8993
+ {
8994
+ "model_format": "ggufv2",
8995
+ "model_size_in_billions": "1_5",
8996
+ "quantizations": [
8997
+ "F16"
8998
+ ],
8999
+ "model_file_name_template": "glm-edge-1.5B-chat-{quantization}.gguf",
9000
+ "model_id": "THUDM/glm-edge-1.5b-chat-gguf"
9001
+ },
9002
+ {
9003
+ "model_format": "ggufv2",
9004
+ "model_size_in_billions": "4",
9005
+ "quantizations": [
9006
+ "Q4_0",
9007
+ "Q4_1",
9008
+ "Q4_K",
9009
+ "Q4_K_M",
9010
+ "Q4_K_S",
9011
+ "Q5_0",
9012
+ "Q5_1",
9013
+ "Q5_K",
9014
+ "Q5_K_M",
9015
+ "Q5_K_S",
9016
+ "Q6_K",
9017
+ "Q8_0"
9018
+ ],
9019
+ "model_file_name_template": "ggml-model-{quantization}.gguf",
9020
+ "model_id": "THUDM/glm-edge-4b-chat-gguf"
9021
+ },
9022
+ {
9023
+ "model_format": "ggufv2",
9024
+ "model_size_in_billions": "4",
9025
+ "quantizations": [
9026
+ "F16"
9027
+ ],
9028
+ "model_file_name_template": "glm-edge-4B-chat-{quantization}.gguf",
9029
+ "model_id": "THUDM/glm-edge-4b-chat-gguf"
9030
+ }
9031
+ ],
9032
+ "chat_template": "{% for item in messages %}{% if item['role'] == 'system' %}<|system|>\n{{ item['content'] }}{% elif item['role'] == 'user' %}<|user|>\n{{ item['content'] }}{% elif item['role'] == 'assistant' %}<|assistant|>\n{{ item['content'] }}{% endif %}{% endfor %}{% if add_generation_prompt %}<|assistant|>\n{% endif %}",
9033
+ "stop_token_ids": [
9034
+ 59246,
9035
+ 59253,
9036
+ 59255
9037
+ ],
9038
+ "stop": [
9039
+ "<|endoftext|>",
9040
+ "<|user|>",
9041
+ "<|observation|>"
9042
+ ]
9043
+ },
9044
+ {
9045
+ "version": 1,
9046
+ "context_length": 8192,
9047
+ "model_name": "glm-edge-v",
9048
+ "model_lang": [
9049
+ "en",
9050
+ "zh"
9051
+ ],
9052
+ "model_ability": [
9053
+ "chat",
9054
+ "vision"
9055
+ ],
9056
+ "model_description": "The GLM-Edge series is our attempt to face the end-side real-life scenarios, which consists of two sizes of large-language dialogue models and multimodal comprehension models (GLM-Edge-1.5B-Chat, GLM-Edge-4B-Chat, GLM-Edge-V-2B, GLM-Edge-V-5B). Among them, the 1.5B / 2B model is mainly for platforms such as mobile phones and cars, and the 4B / 5B model is mainly for platforms such as PCs.",
9057
+ "model_specs": [
9058
+ {
9059
+ "model_format": "pytorch",
9060
+ "model_size_in_billions": "2",
9061
+ "quantizations": [
9062
+ "4-bit",
9063
+ "8-bit",
9064
+ "none"
9065
+ ],
9066
+ "model_id": "THUDM/glm-edge-v-2b"
9067
+ },
9068
+ {
9069
+ "model_format": "pytorch",
9070
+ "model_size_in_billions": "5",
9071
+ "quantizations": [
9072
+ "4-bit",
9073
+ "8-bit",
9074
+ "none"
9075
+ ],
9076
+ "model_id": "THUDM/glm-edge-v-5b"
9077
+ },
9078
+ {
9079
+ "model_format": "ggufv2",
9080
+ "model_size_in_billions": "2",
9081
+ "quantizations": [
9082
+ "Q4_0",
9083
+ "Q4_1",
9084
+ "Q4_K",
9085
+ "Q4_K_M",
9086
+ "Q4_K_S",
9087
+ "Q5_0",
9088
+ "Q5_1",
9089
+ "Q5_K",
9090
+ "Q5_K_M",
9091
+ "Q5_K_S",
9092
+ "Q6_K",
9093
+ "Q8_0"
9094
+ ],
9095
+ "model_file_name_template": "ggml-model-{quantization}.gguf",
9096
+ "model_id": "THUDM/glm-edge-v-2b-gguf"
9097
+ },
9098
+ {
9099
+ "model_format": "ggufv2",
9100
+ "model_size_in_billions": "2",
9101
+ "quantizations": [
9102
+ "F16"
9103
+ ],
9104
+ "model_file_name_template": "glm-edge-v-2B-{quantization}.gguf",
9105
+ "model_id": "THUDM/glm-edge-v-2b-gguf"
9106
+ },
9107
+ {
9108
+ "model_format": "ggufv2",
9109
+ "model_size_in_billions": "2",
9110
+ "quantizations": [
9111
+ "f16"
9112
+ ],
9113
+ "model_file_name_template": "mmproj-model-{quantization}.gguf",
9114
+ "model_id": "THUDM/glm-edge-v-2b-gguf"
9115
+ },
9116
+ {
9117
+ "model_format": "ggufv2",
9118
+ "model_size_in_billions": "5",
9119
+ "quantizations": [
9120
+ "Q4_0",
9121
+ "Q4_1",
9122
+ "Q4_K",
9123
+ "Q4_K_M",
9124
+ "Q4_K_S",
9125
+ "Q5_0",
9126
+ "Q5_1",
9127
+ "Q5_K",
9128
+ "Q5_K_M",
9129
+ "Q5_K_S",
9130
+ "Q6_K",
9131
+ "Q8_0"
9132
+ ],
9133
+ "model_file_name_template": "ggml-model-{quantization}.gguf",
9134
+ "model_id": "THUDM/glm-edge-v-5b-gguf"
9135
+ },
9136
+ {
9137
+ "model_format": "ggufv2",
9138
+ "model_size_in_billions": "5",
9139
+ "quantizations": [
9140
+ "F16"
9141
+ ],
9142
+ "model_file_name_template": "glm-edge-v-5B-{quantization}.gguf",
9143
+ "model_id": "THUDM/glm-edge-v-5b-gguf"
9144
+ },
9145
+ {
9146
+ "model_format": "ggufv2",
9147
+ "model_size_in_billions": "5",
9148
+ "quantizations": [
9149
+ "f16"
9150
+ ],
9151
+ "model_file_name_template": "mmproj-model-{quantization}.gguf",
9152
+ "model_id": "THUDM/glm-edge-v-5b-gguf"
9153
+ }
9154
+ ],
9155
+ "chat_template": "{% for item in messages %}{% if item['role'] != 'system' %}<|{{ item['role'] }}|>\n{% for content in item['content'] %}{% if content['type'] == 'image' %}{% for _ in range(578) %}<|begin_of_image|>{% endfor %}{% elif content['type'] == 'text' %}{{ content['text'] }}{% endif %}{% endfor %}\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|assistant|>\n{% endif %}",
9156
+ "stop_token_ids": [
9157
+ 59246,
9158
+ 59253,
9159
+ 59255
9160
+ ],
9161
+ "stop": [
9162
+ "<|endoftext|>",
9163
+ "<|user|>",
9164
+ "<|observation|>"
9165
+ ]
9166
+ },
9167
+ {
9168
+ "version": 1,
9169
+ "context_length": 32768,
9170
+ "model_name": "QvQ-72B-Preview",
9171
+ "model_lang": [
9172
+ "en",
9173
+ "zh"
9174
+ ],
9175
+ "model_ability": [
9176
+ "chat",
9177
+ "vision"
9178
+ ],
9179
+ "model_description": "QVQ-72B-Preview is an experimental research model developed by the Qwen team, focusing on enhancing visual reasoning capabilities.",
9180
+ "model_specs": [
9181
+ {
9182
+ "model_format": "pytorch",
9183
+ "model_size_in_billions": 72,
9184
+ "quantizations": [
9185
+ "4-bit",
9186
+ "8-bit",
9187
+ "none"
9188
+ ],
9189
+ "model_id": "Qwen/QVQ-72B-Preview"
9190
+ },
9191
+ {
9192
+ "model_format": "mlx",
9193
+ "model_size_in_billions": 72,
9194
+ "quantizations": [
9195
+ "3bit",
9196
+ "4bit",
9197
+ "6bit",
9198
+ "8bit",
9199
+ "bf16"
9200
+ ],
9201
+ "model_id": "mlx-community/QVQ-72B-Preview-{quantization}"
9202
+ }
9203
+ ],
9204
+ "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful and harmless assistant. You are Qwen developed by Alibaba. You should think step-by-step.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
9205
+ "stop_token_ids": [
9206
+ 151645,
9207
+ 151643
9208
+ ],
9209
+ "stop": [
9210
+ "<|im_end|>",
9211
+ "<|endoftext|>"
9212
+ ]
9213
+ },
9214
+ {
9215
+ "version": 1,
9216
+ "context_length": 32768,
9217
+ "model_name": "marco-o1",
9218
+ "model_lang": [
9219
+ "en",
9220
+ "zh"
9221
+ ],
9222
+ "model_ability": [
9223
+ "chat",
9224
+ "tools"
9225
+ ],
9226
+ "model_description": "Marco-o1: Towards Open Reasoning Models for Open-Ended Solutions",
9227
+ "model_specs": [
9228
+ {
9229
+ "model_format": "pytorch",
9230
+ "model_size_in_billions": 7,
9231
+ "quantizations": [
9232
+ "4-bit",
9233
+ "8-bit",
9234
+ "none"
9235
+ ],
9236
+ "model_id": "AIDC-AI/Marco-o1"
9237
+ },
9238
+ {
9239
+ "model_format": "ggufv2",
9240
+ "model_size_in_billions": 7,
9241
+ "quantizations": [
9242
+ "Q2_K",
9243
+ "Q3_K_L",
9244
+ "Q3_K_M",
9245
+ "Q3_K_S",
9246
+ "Q4_0",
9247
+ "Q4_1",
9248
+ "Q4_K_M",
9249
+ "Q4_K_S",
9250
+ "Q5_0",
9251
+ "Q5_1",
9252
+ "Q5_K_M",
9253
+ "Q5_K_S",
9254
+ "Q6_K",
9255
+ "Q8_0"
9256
+ ],
9257
+ "model_id": "QuantFactory/Marco-o1-GGUF",
9258
+ "model_file_name_template": "Marco-o1.{quantization}.gguf"
9259
+ }
9260
+ ],
9261
+ "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\n\n你是一个经过良好训练的AI助手,你的名字是Marco-o1.由阿里国际数字商业集团的AI Business创造.\n \n## 重要!!!!!\n当你回答问题时,你的思考应该在<Thought>内完成,<Output>内输出你的结果。\n<Thought>应该尽可能是英文,但是有2个特例,一个是对原文中的引用,另一个是是数学应该使用markdown格式,<Output>内的输出需要遵循用户输入的语言。\n <|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
9262
+ "stop_token_ids": [
9263
+ 151643,
9264
+ 151644,
9265
+ 151645
9266
+ ],
9267
+ "stop": [
9268
+ "<|endoftext|>",
9269
+ "<|im_start|>",
9270
+ "<|im_end|>"
9271
+ ]
9272
+ },
9273
+ {
9274
+ "version": 1,
9275
+ "context_length": 4096,
9276
+ "model_name": "cogagent",
9277
+ "model_lang": [
9278
+ "en",
9279
+ "zh"
9280
+ ],
9281
+ "model_ability": [
9282
+ "chat",
9283
+ "vision"
9284
+ ],
9285
+ "model_description": "The CogAgent-9B-20241220 model is based on GLM-4V-9B, a bilingual open-source VLM base model. Through data collection and optimization, multi-stage training, and strategy improvements, CogAgent-9B-20241220 achieves significant advancements in GUI perception, inference prediction accuracy, action space completeness, and task generalizability. ",
9286
+ "model_specs": [
9287
+ {
9288
+ "model_format": "pytorch",
9289
+ "model_size_in_billions": "9",
9290
+ "quantizations": [
9291
+ "4-bit",
9292
+ "8-bit",
9293
+ "none"
9294
+ ],
9295
+ "model_id": "THUDM/cogagent-9b-20241220"
9296
+ }
9297
+ ],
9298
+ "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
9299
+ "stop_token_ids": [
9300
+ 151329,
9301
+ 151336,
9302
+ 151338
9303
+ ],
9304
+ "stop": [
9305
+ "<|endoftext|>",
9306
+ "<|user|>",
9307
+ "<|observation|>"
9308
+ ]
8347
9309
  }
8348
9310
  ]