xinference 1.6.0.post1__py3-none-any.whl → 1.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (124) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/restful_api.py +79 -2
  3. xinference/client/restful/restful_client.py +65 -3
  4. xinference/conftest.py +0 -7
  5. xinference/core/media_interface.py +132 -8
  6. xinference/core/model.py +44 -6
  7. xinference/core/scheduler.py +1 -10
  8. xinference/core/supervisor.py +8 -17
  9. xinference/core/worker.py +5 -27
  10. xinference/deploy/cmdline.py +6 -2
  11. xinference/model/audio/chattts.py +24 -39
  12. xinference/model/audio/cosyvoice.py +18 -30
  13. xinference/model/audio/funasr.py +42 -0
  14. xinference/model/audio/model_spec.json +71 -1
  15. xinference/model/audio/model_spec_modelscope.json +76 -2
  16. xinference/model/audio/utils.py +75 -0
  17. xinference/model/core.py +1 -0
  18. xinference/model/embedding/__init__.py +74 -18
  19. xinference/model/embedding/core.py +98 -589
  20. xinference/model/embedding/embed_family.py +133 -0
  21. xinference/{thirdparty/omnilmm/train → model/embedding/flag}/__init__.py +1 -1
  22. xinference/model/embedding/flag/core.py +282 -0
  23. xinference/model/embedding/model_spec.json +24 -0
  24. xinference/model/embedding/model_spec_modelscope.json +24 -0
  25. xinference/model/embedding/sentence_transformers/__init__.py +13 -0
  26. xinference/model/embedding/sentence_transformers/core.py +399 -0
  27. xinference/model/embedding/vllm/core.py +95 -0
  28. xinference/model/image/model_spec.json +30 -3
  29. xinference/model/image/model_spec_modelscope.json +41 -2
  30. xinference/model/image/stable_diffusion/core.py +144 -53
  31. xinference/model/llm/__init__.py +6 -54
  32. xinference/model/llm/core.py +19 -5
  33. xinference/model/llm/llama_cpp/core.py +59 -3
  34. xinference/model/llm/llama_cpp/memory.py +457 -0
  35. xinference/model/llm/llm_family.json +247 -402
  36. xinference/model/llm/llm_family.py +88 -16
  37. xinference/model/llm/llm_family_modelscope.json +260 -421
  38. xinference/model/llm/llm_family_openmind_hub.json +0 -34
  39. xinference/model/llm/sglang/core.py +8 -0
  40. xinference/model/llm/transformers/__init__.py +27 -6
  41. xinference/model/llm/transformers/chatglm.py +4 -2
  42. xinference/model/llm/transformers/core.py +49 -28
  43. xinference/model/llm/transformers/deepseek_v2.py +6 -49
  44. xinference/model/llm/transformers/gemma3.py +119 -164
  45. xinference/model/llm/transformers/multimodal/__init__.py +13 -0
  46. xinference/model/llm/transformers/{cogagent.py → multimodal/cogagent.py} +58 -95
  47. xinference/model/llm/transformers/multimodal/core.py +205 -0
  48. xinference/model/llm/transformers/{deepseek_vl2.py → multimodal/deepseek_vl2.py} +59 -120
  49. xinference/model/llm/transformers/multimodal/gemma3.py +117 -0
  50. xinference/model/llm/transformers/{glm4v.py → multimodal/glm4v.py} +57 -93
  51. xinference/model/llm/transformers/multimodal/intern_vl.py +412 -0
  52. xinference/model/llm/transformers/{minicpmv26.py → multimodal/minicpmv26.py} +55 -102
  53. xinference/model/llm/transformers/{ovis2.py → multimodal/ovis2.py} +114 -175
  54. xinference/model/llm/transformers/{qwen-omni.py → multimodal/qwen-omni.py} +82 -167
  55. xinference/model/llm/transformers/multimodal/qwen2_audio.py +131 -0
  56. xinference/model/llm/transformers/{qwen2_vl.py → multimodal/qwen2_vl.py} +224 -256
  57. xinference/model/llm/transformers/opt.py +4 -2
  58. xinference/model/llm/transformers/utils.py +6 -37
  59. xinference/model/llm/utils.py +11 -0
  60. xinference/model/llm/vllm/core.py +7 -0
  61. xinference/model/rerank/core.py +91 -3
  62. xinference/model/rerank/model_spec.json +24 -0
  63. xinference/model/rerank/model_spec_modelscope.json +24 -0
  64. xinference/model/rerank/utils.py +20 -2
  65. xinference/model/utils.py +38 -1
  66. xinference/model/video/diffusers.py +65 -3
  67. xinference/model/video/model_spec.json +31 -4
  68. xinference/model/video/model_spec_modelscope.json +32 -4
  69. xinference/web/ui/build/asset-manifest.json +6 -6
  70. xinference/web/ui/build/index.html +1 -1
  71. xinference/web/ui/build/static/css/main.013f296b.css +2 -0
  72. xinference/web/ui/build/static/css/main.013f296b.css.map +1 -0
  73. xinference/web/ui/build/static/js/main.8a9e3ba0.js +3 -0
  74. xinference/web/ui/build/static/js/main.8a9e3ba0.js.map +1 -0
  75. xinference/web/ui/node_modules/.cache/babel-loader/34cfbfb7836e136ba3261cfd411cc554bf99ba24b35dcceebeaa4f008cb3c9dc.json +1 -0
  76. xinference/web/ui/node_modules/.cache/babel-loader/55b9fb40b57fa926e8f05f31c2f96467e76e5ad62f033dca97c03f9e8c4eb4fe.json +1 -0
  77. xinference/web/ui/node_modules/.cache/babel-loader/567e49df411efb24425d289bb484758cb57067ca54f8b5c67fe4505f698deb96.json +1 -0
  78. xinference/web/ui/node_modules/.cache/babel-loader/6595880facebca7ceace6f17cf21c3a5a9219a2f52fb0ba9f3cf1131eddbcf6b.json +1 -0
  79. xinference/web/ui/node_modules/.cache/babel-loader/aa998bc2d9c11853add6b8a2e08f50327f56d8824ccaaec92d6dde1b305f0d85.json +1 -0
  80. xinference/web/ui/node_modules/.cache/babel-loader/c748246b1d7bcebc16153be69f37e955bb2145526c47dd425aeeff70d3004dbc.json +1 -0
  81. xinference/web/ui/node_modules/.cache/babel-loader/e31234e95d60a5a7883fbcd70de2475dc1c88c90705df1a530abb68f86f80a51.json +1 -0
  82. xinference/web/ui/src/locales/en.json +21 -8
  83. xinference/web/ui/src/locales/ja.json +224 -0
  84. xinference/web/ui/src/locales/ko.json +224 -0
  85. xinference/web/ui/src/locales/zh.json +21 -8
  86. {xinference-1.6.0.post1.dist-info → xinference-1.7.0.dist-info}/METADATA +14 -11
  87. {xinference-1.6.0.post1.dist-info → xinference-1.7.0.dist-info}/RECORD +93 -100
  88. {xinference-1.6.0.post1.dist-info → xinference-1.7.0.dist-info}/WHEEL +1 -1
  89. xinference/model/llm/transformers/cogvlm2.py +0 -442
  90. xinference/model/llm/transformers/cogvlm2_video.py +0 -333
  91. xinference/model/llm/transformers/deepseek_vl.py +0 -280
  92. xinference/model/llm/transformers/glm_edge_v.py +0 -213
  93. xinference/model/llm/transformers/intern_vl.py +0 -526
  94. xinference/model/llm/transformers/internlm2.py +0 -94
  95. xinference/model/llm/transformers/minicpmv25.py +0 -193
  96. xinference/model/llm/transformers/omnilmm.py +0 -132
  97. xinference/model/llm/transformers/qwen2_audio.py +0 -179
  98. xinference/model/llm/transformers/qwen_vl.py +0 -360
  99. xinference/thirdparty/omnilmm/LICENSE +0 -201
  100. xinference/thirdparty/omnilmm/chat.py +0 -218
  101. xinference/thirdparty/omnilmm/constants.py +0 -4
  102. xinference/thirdparty/omnilmm/conversation.py +0 -332
  103. xinference/thirdparty/omnilmm/model/__init__.py +0 -1
  104. xinference/thirdparty/omnilmm/model/omnilmm.py +0 -595
  105. xinference/thirdparty/omnilmm/model/resampler.py +0 -166
  106. xinference/thirdparty/omnilmm/model/utils.py +0 -578
  107. xinference/thirdparty/omnilmm/train/train_utils.py +0 -150
  108. xinference/thirdparty/omnilmm/utils.py +0 -134
  109. xinference/web/ui/build/static/css/main.337afe76.css +0 -2
  110. xinference/web/ui/build/static/css/main.337afe76.css.map +0 -1
  111. xinference/web/ui/build/static/js/main.ae579a97.js +0 -3
  112. xinference/web/ui/build/static/js/main.ae579a97.js.map +0 -1
  113. xinference/web/ui/node_modules/.cache/babel-loader/12e02ee790dbf57ead09a241a93bb5f893393aa36628ca741d44390e836a103f.json +0 -1
  114. xinference/web/ui/node_modules/.cache/babel-loader/2fdc61dcb6a9d1fbcb44be592d0e87d8c3f21297a7327559ef5345665f8343f7.json +0 -1
  115. xinference/web/ui/node_modules/.cache/babel-loader/3d596a3e8dd6430d7ce81d164e32c31f8d47cfa5f725c328a298754d78563e14.json +0 -1
  116. xinference/web/ui/node_modules/.cache/babel-loader/5c08e2cd07809ed3e41486b16652253404cbb63a3ff8d0366ee50f57e2413cea.json +0 -1
  117. xinference/web/ui/node_modules/.cache/babel-loader/8472e58a31720892d534f3febda31f746b25ec4aa60787eef34217b074e67965.json +0 -1
  118. xinference/web/ui/node_modules/.cache/babel-loader/dc249829767b8abcbc3677e0b07b6d3ecbfdfe6d08cfe23a665eb33373a9aa9d.json +0 -1
  119. xinference/web/ui/node_modules/.cache/babel-loader/f91af913d7f91c410719ab13136aaed3aaf0f8dda06652f25c42cb5231587398.json +0 -1
  120. /xinference/{thirdparty/omnilmm → model/embedding/vllm}/__init__.py +0 -0
  121. /xinference/web/ui/build/static/js/{main.ae579a97.js.LICENSE.txt → main.8a9e3ba0.js.LICENSE.txt} +0 -0
  122. {xinference-1.6.0.post1.dist-info → xinference-1.7.0.dist-info}/entry_points.txt +0 -0
  123. {xinference-1.6.0.post1.dist-info → xinference-1.7.0.dist-info}/licenses/LICENSE +0 -0
  124. {xinference-1.6.0.post1.dist-info → xinference-1.7.0.dist-info}/top_level.txt +0 -0
@@ -4392,47 +4392,6 @@
4392
4392
  "<|end▁of▁sentence|>"
4393
4393
  ]
4394
4394
  },
4395
- {
4396
- "version": 1,
4397
- "context_length": 4096,
4398
- "model_name": "deepseek-vl-chat",
4399
- "model_lang": [
4400
- "en",
4401
- "zh"
4402
- ],
4403
- "model_ability": [
4404
- "chat",
4405
- "vision"
4406
- ],
4407
- "model_description": "DeepSeek-VL possesses general multimodal understanding capabilities, capable of processing logical diagrams, web pages, formula recognition, scientific literature, natural images, and embodied intelligence in complex scenarios.",
4408
- "model_specs": [
4409
- {
4410
- "model_format": "pytorch",
4411
- "model_size_in_billions": "1_3",
4412
- "quantizations": [
4413
- "none"
4414
- ],
4415
- "model_id": "deepseek-ai/deepseek-vl-1.3b-chat",
4416
- "model_revision": "8f13a8e00dbdc381d614a9d29d61b07e8fe91b3f"
4417
- },
4418
- {
4419
- "model_format": "pytorch",
4420
- "model_size_in_billions": 7,
4421
- "quantizations": [
4422
- "none"
4423
- ],
4424
- "model_id": "deepseek-ai/deepseek-vl-7b-chat",
4425
- "model_revision": "6f16f00805f45b5249f709ce21820122eeb43556"
4426
- }
4427
- ],
4428
- "chat_template": "",
4429
- "stop_token_ids": [
4430
- 100001
4431
- ],
4432
- "stop": [
4433
- "<|end▁of▁sentence|>"
4434
- ]
4435
- },
4436
4395
  {
4437
4396
  "version": 1,
4438
4397
  "context_length": 4096,
@@ -5009,88 +4968,6 @@
5009
4968
  }
5010
4969
  ]
5011
4970
  },
5012
- {
5013
- "version": 1,
5014
- "context_length": 2048,
5015
- "model_name": "OmniLMM",
5016
- "model_lang": [
5017
- "en",
5018
- "zh"
5019
- ],
5020
- "model_ability": [
5021
- "chat",
5022
- "vision"
5023
- ],
5024
- "model_description": "OmniLMM is a family of open-source large multimodal models (LMMs) adept at vision & language modeling.",
5025
- "model_specs": [
5026
- {
5027
- "model_format": "pytorch",
5028
- "model_size_in_billions": 3,
5029
- "quantizations": [
5030
- "none"
5031
- ],
5032
- "model_id": "openbmb/MiniCPM-V",
5033
- "model_revision": "bec7d1cd1c9e804c064ec291163e40624825eaaa"
5034
- },
5035
- {
5036
- "model_format": "pytorch",
5037
- "model_size_in_billions": 12,
5038
- "quantizations": [
5039
- "none"
5040
- ],
5041
- "model_id": "openbmb/OmniLMM-12B",
5042
- "model_revision": "ef62bae5af34be653b9801037cd613e05ab24fdc"
5043
- }
5044
- ],
5045
- "chat_template": "",
5046
- "stop_token_ids": [
5047
- 2
5048
- ],
5049
- "stop": [
5050
- "</s>"
5051
- ]
5052
- },
5053
- {
5054
- "version": 1,
5055
- "context_length": 8192,
5056
- "model_name": "MiniCPM-Llama3-V-2_5",
5057
- "model_lang": [
5058
- "en",
5059
- "zh"
5060
- ],
5061
- "model_ability": [
5062
- "chat",
5063
- "vision"
5064
- ],
5065
- "model_description": "MiniCPM-Llama3-V 2.5 is the latest model in the MiniCPM-V series. The model is built on SigLip-400M and Llama3-8B-Instruct with a total of 8B parameters.",
5066
- "model_specs": [
5067
- {
5068
- "model_format": "pytorch",
5069
- "model_size_in_billions": 8,
5070
- "quantizations": [
5071
- "none"
5072
- ],
5073
- "model_id": "openbmb/MiniCPM-Llama3-V-2_5",
5074
- "model_revision": "285a637ba8a30a0660dfcccad16f9a864f75abfd"
5075
- },
5076
- {
5077
- "model_format": "pytorch",
5078
- "model_size_in_billions": 8,
5079
- "quantizations": [
5080
- "none"
5081
- ],
5082
- "model_id": "openbmb/MiniCPM-Llama3-V-2_5-{quantization}",
5083
- "model_revision": "f92aff28552de35de3be204e8fe292dd4824e544"
5084
- }
5085
- ],
5086
- "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = '<|begin_of_text|>' + content %}{% endif %}{{ content }}{% endfor %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}",
5087
- "stop_token_ids": [
5088
- 128001
5089
- ],
5090
- "stop": [
5091
- "<|end_of_text|>"
5092
- ]
5093
- },
5094
4971
  {
5095
4972
  "version": 1,
5096
4973
  "context_length": 32768,
@@ -5134,51 +5011,6 @@
5134
5011
  "<|endoftext|>"
5135
5012
  ]
5136
5013
  },
5137
- {
5138
- "version": 1,
5139
- "context_length": 4096,
5140
- "model_name": "qwen-vl-chat",
5141
- "model_lang": [
5142
- "en",
5143
- "zh"
5144
- ],
5145
- "model_ability": [
5146
- "chat",
5147
- "vision"
5148
- ],
5149
- "model_description": "Qwen-VL-Chat supports more flexible interaction, such as multiple image inputs, multi-round question answering, and creative capabilities.",
5150
- "model_specs": [
5151
- {
5152
- "model_format": "pytorch",
5153
- "model_size_in_billions": 7,
5154
- "quantizations": [
5155
- "none"
5156
- ],
5157
- "model_id": "Qwen/Qwen-VL-Chat",
5158
- "model_revision": "6665c780ade5ff3f08853b4262dcb9c8f9598d42"
5159
- },
5160
- {
5161
- "model_format": "gptq",
5162
- "model_size_in_billions": 7,
5163
- "quantizations": [
5164
- "Int4"
5165
- ],
5166
- "model_id": "Qwen/Qwen-VL-Chat-{quantization}",
5167
- "model_revision": "5d3a5aa033ed2c502300d426c81cc5b13bcd1409"
5168
- }
5169
- ],
5170
- "chat_template": "",
5171
- "stop_token_ids": [
5172
- 151643,
5173
- 151644,
5174
- 151645
5175
- ],
5176
- "stop": [
5177
- "<|endoftext|>",
5178
- "<|im_start|>",
5179
- "<|im_end|>"
5180
- ]
5181
- },
5182
5014
  {
5183
5015
  "version": 1,
5184
5016
  "context_length": 4096,
@@ -5362,6 +5194,11 @@
5362
5194
  "Q8_0",
5363
5195
  "bf16"
5364
5196
  ],
5197
+ "multimodal_projectors": [
5198
+ "mmproj-google_gemma-3-4b-it-f16.gguf",
5199
+ "mmproj-google_gemma-3-4b-it-f32.gguf",
5200
+ "mmproj-google_gemma-3-4b-it-bf16.gguf"
5201
+ ],
5365
5202
  "model_id": "bartowski/google_gemma-3-4b-it-GGUF",
5366
5203
  "model_file_name_template": "google_gemma-3-4b-it-{quantization}.gguf"
5367
5204
  },
@@ -5393,6 +5230,11 @@
5393
5230
  "Q8_0",
5394
5231
  "bf16"
5395
5232
  ],
5233
+ "multimodal_projectors": [
5234
+ "mmproj-google_gemma-3-12b-it-f16.gguf",
5235
+ "mmproj-google_gemma-3-12b-it-f32.gguf",
5236
+ "mmproj-google_gemma-3-12b-it-bf16.gguf"
5237
+ ],
5396
5238
  "model_id": "bartowski/google_gemma-3-12b-it-GGUF",
5397
5239
  "model_file_name_template": "google_gemma-3-12b-it-{quantization}.gguf"
5398
5240
  },
@@ -5424,6 +5266,11 @@
5424
5266
  "Q8_0",
5425
5267
  "bf16"
5426
5268
  ],
5269
+ "multimodal_projectors": [
5270
+ "mmproj-google_gemma-3-27b-it-f16.gguf",
5271
+ "mmproj-google_gemma-3-27b-it-f32.gguf",
5272
+ "mmproj-google_gemma-3-27b-it-bf16.gguf"
5273
+ ],
5427
5274
  "model_id": "bartowski/google_gemma-3-27b-it-GGUF",
5428
5275
  "model_file_name_template": "google_gemma-3-27b-it-{quantization}.gguf"
5429
5276
  },
@@ -5852,83 +5699,6 @@
5852
5699
  "<|im_end|>"
5853
5700
  ]
5854
5701
  },
5855
- {
5856
- "version": 1,
5857
- "context_length": 8192,
5858
- "model_name": "cogvlm2",
5859
- "model_lang": [
5860
- "en",
5861
- "zh"
5862
- ],
5863
- "model_ability": [
5864
- "chat",
5865
- "vision"
5866
- ],
5867
- "model_description": "CogVLM2 have achieved good results in many lists compared to the previous generation of CogVLM open source models. Its excellent performance can compete with some non-open source models.",
5868
- "model_specs": [
5869
- {
5870
- "model_format": "pytorch",
5871
- "model_size_in_billions": 20,
5872
- "quantizations": [
5873
- "none"
5874
- ],
5875
- "model_id": "THUDM/cogvlm2-llama3-chinese-chat-19B",
5876
- "model_revision": "d88b352bce5ee58a289b1ac8328553eb31efa2ef"
5877
- },
5878
- {
5879
- "model_format": "pytorch",
5880
- "model_size_in_billions": 20,
5881
- "quantizations": [
5882
- "none"
5883
- ],
5884
- "model_id": "THUDM/cogvlm2-llama3-chinese-chat-19B-{quantization}",
5885
- "model_revision": "7863e362174f4718c2fe9cba4befd0b580a3194f"
5886
- }
5887
- ],
5888
- "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = '<|begin_of_text|>' + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% else %}{{ '<|end_of_text|>' }}{% endif %}",
5889
- "stop_token_ids": [
5890
- 128001,
5891
- 128009
5892
- ],
5893
- "stop": [
5894
- "<|end_of_text|>",
5895
- "<|eot_id|>"
5896
- ]
5897
- },
5898
- {
5899
- "version": 1,
5900
- "context_length": 8192,
5901
- "model_name": "cogvlm2-video-llama3-chat",
5902
- "model_lang": [
5903
- "en",
5904
- "zh"
5905
- ],
5906
- "model_ability": [
5907
- "chat",
5908
- "vision"
5909
- ],
5910
- "model_description": "CogVLM2-Video achieves state-of-the-art performance on multiple video question answering tasks.",
5911
- "model_specs": [
5912
- {
5913
- "model_format": "pytorch",
5914
- "model_size_in_billions": 12,
5915
- "quantizations": [
5916
- "none"
5917
- ],
5918
- "model_id": "THUDM/cogvlm2-video-llama3-chat",
5919
- "model_revision": "f375ead7d8202ebe2c3d09f1068abdddeb2929fa"
5920
- }
5921
- ],
5922
- "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = '<|begin_of_text|>' + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% else %}{{ '<|end_of_text|>' }}{% endif %}",
5923
- "stop_token_ids": [
5924
- 128001,
5925
- 128009
5926
- ],
5927
- "stop": [
5928
- "<|end_of_text|>",
5929
- "<|eot_id|>"
5930
- ]
5931
- },
5932
5702
  {
5933
5703
  "version": 1,
5934
5704
  "context_length": 8192,
@@ -6372,6 +6142,53 @@
6372
6142
  "</s>"
6373
6143
  ]
6374
6144
  },
6145
+ {
6146
+ "version": 1,
6147
+ "context_length": 32768,
6148
+ "model_name": "minicpm4",
6149
+ "model_lang": [
6150
+ "zh"
6151
+ ],
6152
+ "model_ability": [
6153
+ "chat"
6154
+ ],
6155
+ "model_description": "MiniCPM4 series are highly efficient large language models (LLMs) designed explicitly for end-side devices, which achieves this efficiency through systematic innovation in four key dimensions: model architecture, training data, training algorithms, and inference systems.",
6156
+ "model_specs": [
6157
+ {
6158
+ "model_format": "pytorch",
6159
+ "model_size_in_billions": "0_5",
6160
+ "quantizations": [
6161
+ "none"
6162
+ ],
6163
+ "model_id": "openbmb/MiniCPM4-0.5B"
6164
+ },
6165
+ {
6166
+ "model_format": "pytorch",
6167
+ "model_size_in_billions": 8,
6168
+ "quantizations": [
6169
+ "none"
6170
+ ],
6171
+ "model_id": "openbmb/MiniCPM4-8B"
6172
+ },
6173
+ {
6174
+ "model_format": "mlx",
6175
+ "model_size_in_billions": 8,
6176
+ "quantizations": [
6177
+ "4bit"
6178
+ ],
6179
+ "model_id": "mlx-community/MiniCPM4-8B-4bit"
6180
+ }
6181
+ ],
6182
+ "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
6183
+ "stop_token_ids": [
6184
+ 2,
6185
+ 73440
6186
+ ],
6187
+ "stop": [
6188
+ "</s>",
6189
+ "<|im_end|>"
6190
+ ]
6191
+ },
6375
6192
  {
6376
6193
  "version": 1,
6377
6194
  "context_length": 32768,
@@ -6440,44 +6257,11 @@
6440
6257
  "user",
6441
6258
  "assistant"
6442
6259
  ],
6443
- "stop": [
6444
- "<|im_end|>",
6445
- "<|endoftext|>"
6446
- ]
6447
- }
6448
- },
6449
- {
6450
- "version": 1,
6451
- "context_length": 128000,
6452
- "model_name": "deepseek-v2",
6453
- "model_lang": [
6454
- "en",
6455
- "zh"
6456
- ],
6457
- "model_ability": [
6458
- "generate"
6459
- ],
6460
- "model_description": "DeepSeek-V2, a strong Mixture-of-Experts (MoE) language model characterized by economical training and efficient inference. ",
6461
- "model_specs": [
6462
- {
6463
- "model_format": "pytorch",
6464
- "model_size_in_billions": 16,
6465
- "quantizations": [
6466
- "none"
6467
- ],
6468
- "model_id": "deepseek-ai/DeepSeek-V2-Lite",
6469
- "model_revision": "604d5664dddd88a0433dbae533b7fe9472482de0"
6470
- },
6471
- {
6472
- "model_format": "pytorch",
6473
- "model_size_in_billions": 236,
6474
- "quantizations": [
6475
- "none"
6476
- ],
6477
- "model_id": "deepseek-ai/DeepSeek-V2",
6478
- "model_revision": "4461458f186c35188585855f28f77af5661ad489"
6479
- }
6480
- ]
6260
+ "stop": [
6261
+ "<|im_end|>",
6262
+ "<|endoftext|>"
6263
+ ]
6264
+ }
6481
6265
  },
6482
6266
  {
6483
6267
  "version": 1,
@@ -6725,6 +6509,44 @@
6725
6509
  "<|end▁of▁sentence|>"
6726
6510
  ]
6727
6511
  },
6512
+ {
6513
+ "version": 1,
6514
+ "context_length": 163840,
6515
+ "model_name": "deepseek-v3-0324",
6516
+ "model_lang": [
6517
+ "en",
6518
+ "zh"
6519
+ ],
6520
+ "model_ability": [
6521
+ "chat"
6522
+ ],
6523
+ "model_description": "DeepSeek-V3, a strong Mixture-of-Experts (MoE) language model with 671B total parameters with 37B activated for each token. ",
6524
+ "model_specs": [
6525
+ {
6526
+ "model_format": "pytorch",
6527
+ "model_size_in_billions": 671,
6528
+ "quantizations": [
6529
+ "none"
6530
+ ],
6531
+ "model_id": "deepseek-ai/DeepSeek-V3-0324"
6532
+ },
6533
+ {
6534
+ "model_format": "awq",
6535
+ "model_size_in_billions": 671,
6536
+ "quantizations": [
6537
+ "Int4"
6538
+ ],
6539
+ "model_id": "cognitivecomputations/DeepSeek-V3-0324-AWQ"
6540
+ }
6541
+ ],
6542
+ "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='', is_first_sp=true, is_last_user=false) %}{%- for message in messages %}{%- if message['role'] == 'system' %}{%- if ns.is_first_sp %}{% set ns.system_prompt = ns.system_prompt + message['content'] %}{% set ns.is_first_sp = false %}{%- else %}{% set ns.system_prompt = ns.system_prompt + '\n\n' + message['content'] %}{%- endif %}{%- endif %}{%- endfor %}{{ bos_token }}{{ ns.system_prompt }}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{%- set ns.is_first = false -%}{%- set ns.is_last_user = true -%}{{'<|User|>' + message['content'] + '<|Assistant|>'}}{%- endif %}{%- if message['role'] == 'assistant' and message['tool_calls'] is defined and message['tool_calls'] is not none %}{%- set ns.is_last_user = false -%}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{%- endif %}{%- set ns.is_first = false %}{%- set ns.is_tool = false -%}{%- set ns.is_output_first = true %}{%- for tool in message['tool_calls'] %}{%- if not ns.is_first %}{%- if message['content'] is none %}{{'<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{%- else %}{{message['content'] + '<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{%- endif %}{%- set ns.is_first = true -%}{%- else %}{{'\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{%- endif %}{%- endfor %}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- if message['role'] == 'assistant' and (message['tool_calls'] is not defined or message['tool_calls'] is none)%}{%- set ns.is_last_user = false -%}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{{content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_last_user = false -%}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_last_user and not ns.is_tool %}{{'<|Assistant|>'}}{% endif %}",
6543
+ "stop_token_ids": [
6544
+ 1
6545
+ ],
6546
+ "stop": [
6547
+ "<|end▁of▁sentence|>"
6548
+ ]
6549
+ },
6728
6550
  {
6729
6551
  "version": 1,
6730
6552
  "context_length": 163840,
@@ -6941,6 +6763,148 @@
6941
6763
  "reasoning_start_tag": "<think>",
6942
6764
  "reasoning_end_tag": "</think>"
6943
6765
  },
6766
+ {
6767
+ "version": 1,
6768
+ "context_length": 163840,
6769
+ "model_name": "deepseek-r1-0528",
6770
+ "model_lang": [
6771
+ "en",
6772
+ "zh"
6773
+ ],
6774
+ "model_ability": [
6775
+ "chat",
6776
+ "reasoning"
6777
+ ],
6778
+ "model_description": "DeepSeek-R1, which incorporates cold-start data before RL. DeepSeek-R1 achieves performance comparable to OpenAI-o1 across math, code, and reasoning tasks.",
6779
+ "model_specs": [
6780
+ {
6781
+ "model_format": "pytorch",
6782
+ "model_size_in_billions": 671,
6783
+ "quantizations": [
6784
+ "none"
6785
+ ],
6786
+ "model_id": "deepseek-ai/DeepSeek-R1-0528"
6787
+ },
6788
+ {
6789
+ "model_format": "gptq",
6790
+ "model_size_in_billions": 671,
6791
+ "quantizations": [
6792
+ "Int4-Int8Mix-Lite",
6793
+ "Int4-Int8Mix-Compact",
6794
+ "Int4-Int8Mix-Medium"
6795
+ ],
6796
+ "model_id": "QuantTrio/DeepSeek-R1-0528-GPTQ-{quantization}"
6797
+ }
6798
+ ],
6799
+ "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='', is_first_sp=true) %}{%- for message in messages %}{%- if message['role'] == 'system' %}{%- if ns.is_first_sp %}{% set ns.system_prompt = ns.system_prompt + message['content'] %}{% set ns.is_first_sp = false %}{%- else %}{% set ns.system_prompt = ns.system_prompt + '\\n\\n' + message['content'] %}{%- endif %}{%- endif %}{%- endfor %}{{ bos_token }}{{ ns.system_prompt }}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and 'tool_calls' in message %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls'] %}{%- if not ns.is_first %}{%- if message['content'] is none %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- else %}{{'<|Assistant|>' + message['content'] + '<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- endif %}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- endif %}{%- endfor %}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- if message['role'] == 'assistant' and 'tool_calls' not in message %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>'}}{% endif %}",
6800
+ "stop_token_ids": [
6801
+ 1
6802
+ ],
6803
+ "stop": [
6804
+ "<|end▁of▁sentence|>"
6805
+ ],
6806
+ "reasoning_start_tag": "<think>",
6807
+ "reasoning_end_tag": "</think>"
6808
+ },
6809
+ {
6810
+ "version": 1,
6811
+ "context_length": 131072,
6812
+ "model_name": "deepseek-r1-0528-qwen3",
6813
+ "model_lang": [
6814
+ "en",
6815
+ "zh"
6816
+ ],
6817
+ "model_ability": [
6818
+ "chat",
6819
+ "reasoning"
6820
+ ],
6821
+ "model_description": "The DeepSeek R1 model has undergone a minor version upgrade, with the current version being DeepSeek-R1-0528. In the latest update, DeepSeek R1 has significantly improved its depth of reasoning and inference capabilities by leveraging increased computational resources and introducing algorithmic optimization mechanisms during post-training. The model has demonstrated outstanding performance across various benchmark evaluations, including mathematics, programming, and general logic. Its overall performance is now approaching that of leading models, such as O3 and Gemini 2.5 Pro",
6822
+ "model_specs": [
6823
+ {
6824
+ "model_format": "pytorch",
6825
+ "model_size_in_billions": 8,
6826
+ "quantizations": [
6827
+ "none"
6828
+ ],
6829
+ "model_id": "deepseek-ai/DeepSeek-R1-0528-Qwen3-8B"
6830
+ },
6831
+ {
6832
+ "model_format": "gptq",
6833
+ "model_size_in_billions": 8,
6834
+ "quantizations": [
6835
+ "Int4-W4A16",
6836
+ "Int8-W8A16"
6837
+ ],
6838
+ "model_id": "QuantTrio/DeepSeek-R1-0528-Qwen3-8B-{quantization}"
6839
+ },
6840
+ {
6841
+ "model_format": "gptq",
6842
+ "model_size_in_billions": 8,
6843
+ "quantizations": [
6844
+ "Int4-Int8Mix"
6845
+ ],
6846
+ "model_id": "QuantTrio/DeepSeek-R1-0528-Qwen3-8B-GPTQ-Int4-Int8Mix"
6847
+ }
6848
+ ],
6849
+ "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='', is_first_sp=true, is_last_user=false) %}{%- for message in messages %}{%- if message['role'] == 'system' %}{%- if ns.is_first_sp %}{% set ns.system_prompt = ns.system_prompt + message['content'] %}{% set ns.is_first_sp = false %}{%- else %}{% set ns.system_prompt = ns.system_prompt + '\n\n' + message['content'] %}{%- endif %}{%- endif %}{%- endfor %}{{ bos_token }}{{ ns.system_prompt }}{%- for message in messages %}{% set content = message['content'] %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{%- set ns.is_first = false -%}{%- set ns.is_last_user = true -%}{{'<|User|>' + content + '<|Assistant|>'}}{%- endif %}{%- if message['role'] == 'assistant' %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{% endif %}{%- if message['role'] == 'assistant' and message['tool_calls'] is defined and message['tool_calls'] is not none %}{%- set ns.is_last_user = false -%}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{%- endif %}{%- set ns.is_first = false %}{%- set ns.is_tool = false -%}{%- set ns.is_output_first = true %}{%- for tool in message['tool_calls'] %}{%- if not ns.is_first %}{%- if content is none %}{{'<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{%- else %}{{content + '<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{%- endif %}{%- set ns.is_first = true -%}{%- else %}{{'\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{%- endif %}{%- endfor %}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- if message['role'] == 'assistant' and (message['tool_calls'] is not defined or message['tool_calls'] is none)%}{%- set ns.is_last_user = false -%}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + content + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{{content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_last_user = false -%}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + content + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\n<|tool▁output▁begin|>' + content + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_last_user and not ns.is_tool %}{{'<|Assistant|>'}}{% endif %}",
6850
+ "stop_token_ids": [
6851
+ 151645
6852
+ ],
6853
+ "stop": [
6854
+ "<|end▁of▁sentence|>"
6855
+ ],
6856
+ "reasoning_start_tag": "<think>",
6857
+ "reasoning_end_tag": "</think>"
6858
+ },
6859
+ {
6860
+ "version": 1,
6861
+ "context_length": 163840,
6862
+ "model_name": "deepseek-prover-v2",
6863
+ "model_lang": [
6864
+ "en",
6865
+ "zh"
6866
+ ],
6867
+ "model_ability": [
6868
+ "chat",
6869
+ "reasoning"
6870
+ ],
6871
+ "model_description": "We introduce DeepSeek-Prover-V2, an open-source large language model designed for formal theorem proving in Lean 4, with initialization data collected through a recursive theorem proving pipeline powered by DeepSeek-V3. The cold-start training procedure begins by prompting DeepSeek-V3 to decompose complex problems into a series of subgoals. The proofs of resolved subgoals are synthesized into a chain-of-thought process, combined with DeepSeek-V3's step-by-step reasoning, to create an initial cold start for reinforcement learning. This process enables us to integrate both informal and formal mathematical reasoning into a unified model",
6872
+ "model_specs": [
6873
+ {
6874
+ "model_format": "pytorch",
6875
+ "model_size_in_billions": 671,
6876
+ "quantizations": [
6877
+ "none"
6878
+ ],
6879
+ "model_id": "deepseek-ai/DeepSeek-Prover-V2-671B"
6880
+ },
6881
+ {
6882
+ "model_format": "pytorch",
6883
+ "model_size_in_billions": 7,
6884
+ "quantizations": [
6885
+ "none"
6886
+ ],
6887
+ "model_id": "deepseek-ai/DeepSeek-Prover-V2-7B"
6888
+ },
6889
+ {
6890
+ "model_format": "mlx",
6891
+ "model_size_in_billions": 7,
6892
+ "quantizations": [
6893
+ "4bit"
6894
+ ],
6895
+ "model_id": "mlx-community/DeepSeek-Prover-V2-7B-4bit"
6896
+ }
6897
+ ],
6898
+ "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='', is_first_sp=true, is_last_user=false) %}{%- for message in messages %}{%- if message['role'] == 'system' %}{%- if ns.is_first_sp %}{% set ns.system_prompt = ns.system_prompt + message['content'] %}{% set ns.is_first_sp = false %}{%- else %}{% set ns.system_prompt = ns.system_prompt + '\n\n' + message['content'] %}{%- endif %}{%- endif %}{%- endfor %}{{ bos_token }}{{ ns.system_prompt }}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{%- set ns.is_first = false -%}{%- set ns.is_last_user = true -%}{{'<|User|>' + message['content'] + '<|Assistant|>'}}{%- endif %}{%- if message['role'] == 'assistant' and message['tool_calls'] is defined and message['tool_calls'] is not none %}{%- set ns.is_last_user = false -%}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{%- endif %}{%- set ns.is_first = false %}{%- set ns.is_tool = false -%}{%- set ns.is_output_first = true %}{%- for tool in message['tool_calls'] %}{%- if not ns.is_first %}{%- if message['content'] is none %}{{'<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{%- else %}{{message['content'] + '<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{%- endif %}{%- set ns.is_first = true -%}{%- else %}{{'\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{%- endif %}{%- endfor %}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- if message['role'] == 'assistant' and (message['tool_calls'] is not defined or message['tool_calls'] is none)%}{%- set ns.is_last_user = false -%}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{{content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_last_user = false -%}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_last_user and not ns.is_tool %}{{'<|Assistant|>'}}{% endif %}",
6899
+ "stop_token_ids": [
6900
+ 1
6901
+ ],
6902
+ "stop": [
6903
+ "<|end▁of▁sentence|>"
6904
+ ],
6905
+ "reasoning_start_tag": "<think>",
6906
+ "reasoning_end_tag": "</think>"
6907
+ },
6944
6908
  {
6945
6909
  "version": 1,
6946
6910
  "context_length": 32768,
@@ -8968,125 +8932,6 @@
8968
8932
  "<|observation|>"
8969
8933
  ]
8970
8934
  },
8971
- {
8972
- "version": 1,
8973
- "context_length": 8192,
8974
- "model_name": "glm-edge-v",
8975
- "model_lang": [
8976
- "en",
8977
- "zh"
8978
- ],
8979
- "model_ability": [
8980
- "chat",
8981
- "vision"
8982
- ],
8983
- "model_description": "The GLM-Edge series is our attempt to face the end-side real-life scenarios, which consists of two sizes of large-language dialogue models and multimodal comprehension models (GLM-Edge-1.5B-Chat, GLM-Edge-4B-Chat, GLM-Edge-V-2B, GLM-Edge-V-5B). Among them, the 1.5B / 2B model is mainly for platforms such as mobile phones and cars, and the 4B / 5B model is mainly for platforms such as PCs.",
8984
- "model_specs": [
8985
- {
8986
- "model_format": "pytorch",
8987
- "model_size_in_billions": "2",
8988
- "quantizations": [
8989
- "none"
8990
- ],
8991
- "model_id": "THUDM/glm-edge-v-2b"
8992
- },
8993
- {
8994
- "model_format": "pytorch",
8995
- "model_size_in_billions": "5",
8996
- "quantizations": [
8997
- "none"
8998
- ],
8999
- "model_id": "THUDM/glm-edge-v-5b"
9000
- },
9001
- {
9002
- "model_format": "ggufv2",
9003
- "model_size_in_billions": "2",
9004
- "quantizations": [
9005
- "Q4_0",
9006
- "Q4_1",
9007
- "Q4_K",
9008
- "Q4_K_M",
9009
- "Q4_K_S",
9010
- "Q5_0",
9011
- "Q5_1",
9012
- "Q5_K",
9013
- "Q5_K_M",
9014
- "Q5_K_S",
9015
- "Q6_K",
9016
- "Q8_0"
9017
- ],
9018
- "model_file_name_template": "ggml-model-{quantization}.gguf",
9019
- "model_id": "THUDM/glm-edge-v-2b-gguf"
9020
- },
9021
- {
9022
- "model_format": "ggufv2",
9023
- "model_size_in_billions": "2",
9024
- "quantizations": [
9025
- "F16"
9026
- ],
9027
- "model_file_name_template": "glm-edge-v-2B-{quantization}.gguf",
9028
- "model_id": "THUDM/glm-edge-v-2b-gguf"
9029
- },
9030
- {
9031
- "model_format": "ggufv2",
9032
- "model_size_in_billions": "2",
9033
- "quantizations": [
9034
- "f16"
9035
- ],
9036
- "model_file_name_template": "mmproj-model-{quantization}.gguf",
9037
- "model_id": "THUDM/glm-edge-v-2b-gguf"
9038
- },
9039
- {
9040
- "model_format": "ggufv2",
9041
- "model_size_in_billions": "5",
9042
- "quantizations": [
9043
- "Q4_0",
9044
- "Q4_1",
9045
- "Q4_K",
9046
- "Q4_K_M",
9047
- "Q4_K_S",
9048
- "Q5_0",
9049
- "Q5_1",
9050
- "Q5_K",
9051
- "Q5_K_M",
9052
- "Q5_K_S",
9053
- "Q6_K",
9054
- "Q8_0"
9055
- ],
9056
- "model_file_name_template": "ggml-model-{quantization}.gguf",
9057
- "model_id": "THUDM/glm-edge-v-5b-gguf"
9058
- },
9059
- {
9060
- "model_format": "ggufv2",
9061
- "model_size_in_billions": "5",
9062
- "quantizations": [
9063
- "F16"
9064
- ],
9065
- "model_file_name_template": "glm-edge-v-5B-{quantization}.gguf",
9066
- "model_id": "THUDM/glm-edge-v-5b-gguf"
9067
- },
9068
- {
9069
- "model_format": "ggufv2",
9070
- "model_size_in_billions": "5",
9071
- "quantizations": [
9072
- "f16"
9073
- ],
9074
- "model_file_name_template": "mmproj-model-{quantization}.gguf",
9075
- "model_id": "THUDM/glm-edge-v-5b-gguf"
9076
- }
9077
- ],
9078
- "chat_template": "{% for item in messages %}{% if item['role'] != 'system' %}<|{{ item['role'] }}|>\n{% for content in item['content'] %}{% if content['type'] == 'image' %}{% for _ in range(578) %}<|begin_of_image|>{% endfor %}{% elif content['type'] == 'text' %}{{ content['text'] }}{% endif %}{% endfor %}\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|assistant|>\n{% endif %}",
9079
- "stop_token_ids": [
9080
- 59246,
9081
- 59253,
9082
- 59255
9083
- ],
9084
- "stop": [
9085
- "<|endoftext|>",
9086
- "<|user|>",
9087
- "<|observation|>"
9088
- ]
9089
- },
9090
8935
  {
9091
8936
  "version": 1,
9092
8937
  "context_length": 32768,