xinference 1.5.0.post1__py3-none-any.whl → 1.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +10 -3
- xinference/constants.py +5 -1
- xinference/core/supervisor.py +12 -3
- xinference/core/utils.py +1 -1
- xinference/core/worker.py +2 -2
- xinference/deploy/cmdline.py +17 -0
- xinference/model/audio/core.py +1 -1
- xinference/model/audio/model_spec.json +43 -43
- xinference/model/audio/model_spec_modelscope.json +13 -13
- xinference/model/llm/__init__.py +3 -5
- xinference/model/llm/core.py +14 -0
- xinference/model/llm/llama_cpp/core.py +15 -4
- xinference/model/llm/llm_family.json +3251 -4304
- xinference/model/llm/llm_family.py +62 -6
- xinference/model/llm/llm_family_csghub.json +0 -32
- xinference/model/llm/llm_family_modelscope.json +1161 -1789
- xinference/model/llm/llm_family_openmind_hub.json +19 -325
- xinference/model/llm/lmdeploy/core.py +7 -2
- xinference/model/llm/mlx/core.py +19 -6
- xinference/model/llm/sglang/core.py +25 -10
- xinference/model/llm/transformers/chatglm.py +8 -1
- xinference/model/llm/transformers/cogagent.py +10 -12
- xinference/model/llm/transformers/cogvlm2.py +6 -3
- xinference/model/llm/transformers/cogvlm2_video.py +3 -6
- xinference/model/llm/transformers/core.py +50 -58
- xinference/model/llm/transformers/deepseek_v2.py +4 -2
- xinference/model/llm/transformers/deepseek_vl.py +10 -4
- xinference/model/llm/transformers/deepseek_vl2.py +9 -4
- xinference/model/llm/transformers/gemma3.py +15 -7
- xinference/model/llm/transformers/glm4v.py +2 -20
- xinference/model/llm/transformers/glm_edge_v.py +3 -20
- xinference/model/llm/transformers/intern_vl.py +3 -6
- xinference/model/llm/transformers/internlm2.py +1 -1
- xinference/model/llm/transformers/minicpmv25.py +4 -2
- xinference/model/llm/transformers/minicpmv26.py +5 -3
- xinference/model/llm/transformers/omnilmm.py +1 -1
- xinference/model/llm/transformers/opt.py +1 -1
- xinference/model/llm/transformers/ovis2.py +302 -0
- xinference/model/llm/transformers/qwen-omni.py +2 -1
- xinference/model/llm/transformers/qwen2_audio.py +3 -1
- xinference/model/llm/transformers/qwen2_vl.py +5 -1
- xinference/model/llm/transformers/qwen_vl.py +5 -2
- xinference/model/llm/utils.py +28 -0
- xinference/model/llm/vllm/core.py +73 -9
- xinference/model/llm/vllm/distributed_executor.py +8 -7
- xinference/model/llm/vllm/xavier/allocator.py +1 -1
- xinference/model/llm/vllm/xavier/block_manager.py +1 -1
- xinference/model/llm/vllm/xavier/block_tracker.py +3 -3
- xinference/model/llm/vllm/xavier/executor.py +1 -1
- xinference/model/llm/vllm/xavier/test/test_xavier.py +1 -1
- xinference/model/video/diffusers.py +30 -3
- xinference/model/video/model_spec.json +46 -0
- xinference/model/video/model_spec_modelscope.json +48 -0
- xinference/types.py +2 -0
- xinference/web/ui/build/asset-manifest.json +6 -6
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/css/{main.0f6523be.css → main.337afe76.css} +2 -2
- xinference/web/ui/build/static/css/main.337afe76.css.map +1 -0
- xinference/web/ui/build/static/js/main.91e77b5c.js +3 -0
- xinference/web/ui/build/static/js/main.91e77b5c.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/5c08e2cd07809ed3e41486b16652253404cbb63a3ff8d0366ee50f57e2413cea.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/5e6edb0fb87e3798f142e9abf8dd2dc46bab33a60d31dff525797c0c99887097.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/6087820be1bd5c02c42dff797e7df365448ef35ab26dd5d6bd33e967e05cbfd4.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/6798e126f3bc5f95a4c16a9c2ad52ffe77970c62406d83e20604dfda7ffd2247.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/b617f7d21a95045fc57b26a9373551740f1978a826134cbf705c3a1bf8714a93.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c1506cb142151366074975f30fa1ff9cd6e5e978b62a4b074dfc16fe08d70d75.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c5c7c2cd1b863ce41adff2c4737bba06eef3a1acf28288cb83d992060f6b8923.json +1 -0
- xinference/web/ui/src/locales/en.json +1 -0
- xinference/web/ui/src/locales/zh.json +1 -0
- {xinference-1.5.0.post1.dist-info → xinference-1.5.1.dist-info}/METADATA +1 -1
- {xinference-1.5.0.post1.dist-info → xinference-1.5.1.dist-info}/RECORD +77 -78
- {xinference-1.5.0.post1.dist-info → xinference-1.5.1.dist-info}/WHEEL +1 -1
- xinference/model/llm/transformers/compression.py +0 -258
- xinference/model/llm/transformers/yi_vl.py +0 -239
- xinference/web/ui/build/static/css/main.0f6523be.css.map +0 -1
- xinference/web/ui/build/static/js/main.58bd483c.js +0 -3
- xinference/web/ui/build/static/js/main.58bd483c.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/51709f5d3e53bcf19e613662ef9b91fb9174942c5518987a248348dd4e1e0e02.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/69081049f0c7447544b7cfd73dd13d8846c02fe5febe4d81587e95c89a412d5b.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/8f9af2979e45d4648f0cfae108363e58ee421c29a9d4e7329b6f06d9adfd4133.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/9c8b1a86e7c65b2b2599a205e30920652d6c2105f926508ef5bcf29a3ef4ce76.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/b8551e9775a01b28ae674125c688febe763732ea969ae344512e64ea01bf632e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/efe7cd132c27a8f9fd5352a394c491fd5fb0da0348cf9fcbd923164a32365eab.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f199e8173f6409a5802ed44acb95f218388131136504b2e9132129e150c92f9a.json +0 -1
- /xinference/web/ui/build/static/js/{main.58bd483c.js.LICENSE.txt → main.91e77b5c.js.LICENSE.txt} +0 -0
- {xinference-1.5.0.post1.dist-info → xinference-1.5.1.dist-info}/entry_points.txt +0 -0
- {xinference-1.5.0.post1.dist-info → xinference-1.5.1.dist-info}/licenses/LICENSE +0 -0
- {xinference-1.5.0.post1.dist-info → xinference-1.5.1.dist-info}/top_level.txt +0 -0
|
@@ -37,8 +37,6 @@
|
|
|
37
37
|
"model_format": "pytorch",
|
|
38
38
|
"model_size_in_billions": 7,
|
|
39
39
|
"quantizations": [
|
|
40
|
-
"4-bit",
|
|
41
|
-
"8-bit",
|
|
42
40
|
"none"
|
|
43
41
|
],
|
|
44
42
|
"model_id": "modelscope/Llama-2-7b-chat-ms",
|
|
@@ -49,8 +47,6 @@
|
|
|
49
47
|
"model_format": "pytorch",
|
|
50
48
|
"model_size_in_billions": 13,
|
|
51
49
|
"quantizations": [
|
|
52
|
-
"4-bit",
|
|
53
|
-
"8-bit",
|
|
54
50
|
"none"
|
|
55
51
|
],
|
|
56
52
|
"model_id": "modelscope/Llama-2-13b-chat-ms",
|
|
@@ -61,8 +57,6 @@
|
|
|
61
57
|
"model_format": "pytorch",
|
|
62
58
|
"model_size_in_billions": 70,
|
|
63
59
|
"quantizations": [
|
|
64
|
-
"4-bit",
|
|
65
|
-
"8-bit",
|
|
66
60
|
"none"
|
|
67
61
|
],
|
|
68
62
|
"model_id": "modelscope/Llama-2-70b-chat-ms",
|
|
@@ -72,7 +66,7 @@
|
|
|
72
66
|
],
|
|
73
67
|
"chat_template": "{% if messages[0]['role'] == 'system' %}{% set system_message = '<<SYS>>\n' + messages[0]['content'] | trim + '\n<</SYS>>\n\n' %}{% set messages = messages[1:] %}{% else %}{% set system_message = '' %}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 %}{% set content = system_message + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ '<s>' + '[INST] ' + content | trim + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content | trim + ' ' + '</s>' }}{% endif %}{% endfor %}",
|
|
74
68
|
"stop_token_ids": [
|
|
75
|
-
|
|
69
|
+
2
|
|
76
70
|
],
|
|
77
71
|
"stop": []
|
|
78
72
|
},
|
|
@@ -92,8 +86,6 @@
|
|
|
92
86
|
"model_format": "pytorch",
|
|
93
87
|
"model_size_in_billions": 8,
|
|
94
88
|
"quantizations": [
|
|
95
|
-
"4-bit",
|
|
96
|
-
"8-bit",
|
|
97
89
|
"none"
|
|
98
90
|
],
|
|
99
91
|
"model_id": "LLM-Research/Meta-Llama-3-8B",
|
|
@@ -103,8 +95,6 @@
|
|
|
103
95
|
"model_format": "pytorch",
|
|
104
96
|
"model_size_in_billions": 70,
|
|
105
97
|
"quantizations": [
|
|
106
|
-
"4-bit",
|
|
107
|
-
"8-bit",
|
|
108
98
|
"none"
|
|
109
99
|
],
|
|
110
100
|
"model_id": "LLM-Research/Meta-Llama-3-70B",
|
|
@@ -128,8 +118,6 @@
|
|
|
128
118
|
"model_format": "pytorch",
|
|
129
119
|
"model_size_in_billions": 8,
|
|
130
120
|
"quantizations": [
|
|
131
|
-
"4-bit",
|
|
132
|
-
"8-bit",
|
|
133
121
|
"none"
|
|
134
122
|
],
|
|
135
123
|
"model_id": "LLM-Research/Meta-Llama-3-8B-Instruct",
|
|
@@ -139,8 +127,6 @@
|
|
|
139
127
|
"model_format": "pytorch",
|
|
140
128
|
"model_size_in_billions": 70,
|
|
141
129
|
"quantizations": [
|
|
142
|
-
"4-bit",
|
|
143
|
-
"8-bit",
|
|
144
130
|
"none"
|
|
145
131
|
],
|
|
146
132
|
"model_id": "LLM-Research/Meta-Llama-3-70B-Instruct",
|
|
@@ -200,8 +186,6 @@
|
|
|
200
186
|
"model_format": "pytorch",
|
|
201
187
|
"model_size_in_billions": 8,
|
|
202
188
|
"quantizations": [
|
|
203
|
-
"4-bit",
|
|
204
|
-
"8-bit",
|
|
205
189
|
"none"
|
|
206
190
|
],
|
|
207
191
|
"model_id": "LLM-Research/Meta-Llama-3.1-8B",
|
|
@@ -211,8 +195,6 @@
|
|
|
211
195
|
"model_format": "pytorch",
|
|
212
196
|
"model_size_in_billions": 70,
|
|
213
197
|
"quantizations": [
|
|
214
|
-
"4-bit",
|
|
215
|
-
"8-bit",
|
|
216
198
|
"none"
|
|
217
199
|
],
|
|
218
200
|
"model_id": "LLM-Research/Meta-Llama-3.1-70B",
|
|
@@ -222,8 +204,6 @@
|
|
|
222
204
|
"model_format": "pytorch",
|
|
223
205
|
"model_size_in_billions": 405,
|
|
224
206
|
"quantizations": [
|
|
225
|
-
"4-bit",
|
|
226
|
-
"8-bit",
|
|
227
207
|
"none"
|
|
228
208
|
],
|
|
229
209
|
"model_id": "LLM-Research/Meta-Llama-3.1-405B",
|
|
@@ -296,8 +276,6 @@
|
|
|
296
276
|
"model_format": "pytorch",
|
|
297
277
|
"model_size_in_billions": 70,
|
|
298
278
|
"quantizations": [
|
|
299
|
-
"4-bit",
|
|
300
|
-
"8-bit",
|
|
301
279
|
"none"
|
|
302
280
|
],
|
|
303
281
|
"model_id": "LLM-Research/Meta-Llama-3.1-70B-Instruct",
|
|
@@ -325,8 +303,6 @@
|
|
|
325
303
|
"model_format": "pytorch",
|
|
326
304
|
"model_size_in_billions": 405,
|
|
327
305
|
"quantizations": [
|
|
328
|
-
"4-bit",
|
|
329
|
-
"8-bit",
|
|
330
306
|
"none"
|
|
331
307
|
],
|
|
332
308
|
"model_id": "LLM-Research/Meta-Llama-3.1-405B-Instruct",
|
|
@@ -378,8 +354,8 @@
|
|
|
378
354
|
"th"
|
|
379
355
|
],
|
|
380
356
|
"model_ability": [
|
|
381
|
-
|
|
382
|
-
|
|
357
|
+
"chat",
|
|
358
|
+
"vision"
|
|
383
359
|
],
|
|
384
360
|
"model_description": "Llama 3.2-Vision instruction-tuned models are optimized for visual recognition, image reasoning, captioning, and answering general questions about an image...",
|
|
385
361
|
"model_specs": [
|
|
@@ -404,14 +380,14 @@
|
|
|
404
380
|
],
|
|
405
381
|
"chat_template": "{% for message in messages %}{% if loop.index0 == 0 %}{{ bos_token }}{% endif %}{{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n' }}{% if message['content'] is string %}{{ message['content'] }}{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' %}{{ '<|image|>' }}{% elif content['type'] == 'text' %}{{ content['text'] }}{% endif %}{% endfor %}{% endif %}{{ '<|eot_id|>' }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
|
|
406
382
|
"stop_token_ids": [
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
383
|
+
128001,
|
|
384
|
+
128008,
|
|
385
|
+
128009
|
|
410
386
|
],
|
|
411
387
|
"stop": [
|
|
412
388
|
"<|end_of_text|>",
|
|
413
|
-
|
|
414
|
-
|
|
389
|
+
"<|eot_id|>",
|
|
390
|
+
"<|eom_id|>"
|
|
415
391
|
]
|
|
416
392
|
},
|
|
417
393
|
{
|
|
@@ -429,8 +405,8 @@
|
|
|
429
405
|
"th"
|
|
430
406
|
],
|
|
431
407
|
"model_ability": [
|
|
432
|
-
|
|
433
|
-
|
|
408
|
+
"generate",
|
|
409
|
+
"vision"
|
|
434
410
|
],
|
|
435
411
|
"model_description": "The Llama 3.2-Vision instruction-tuned models are optimized for visual recognition, image reasoning, captioning, and answering general questions about an image...",
|
|
436
412
|
"model_specs": [
|
|
@@ -440,8 +416,8 @@
|
|
|
440
416
|
"quantizations": [
|
|
441
417
|
"none"
|
|
442
418
|
],
|
|
443
|
-
|
|
444
|
-
|
|
419
|
+
"model_id": "LLM-Research/Llama-3.2-11B-Vision",
|
|
420
|
+
"model_hub": "modelscope"
|
|
445
421
|
},
|
|
446
422
|
{
|
|
447
423
|
"model_format": "pytorch",
|
|
@@ -449,8 +425,8 @@
|
|
|
449
425
|
"quantizations": [
|
|
450
426
|
"none"
|
|
451
427
|
],
|
|
452
|
-
|
|
453
|
-
|
|
428
|
+
"model_id": "LLM-Research/Llama-3.2-90B-Vision",
|
|
429
|
+
"model_hub": "modelscope"
|
|
454
430
|
}
|
|
455
431
|
]
|
|
456
432
|
},
|
|
@@ -562,8 +538,6 @@
|
|
|
562
538
|
"model_format": "pytorch",
|
|
563
539
|
"model_size_in_billions": 7,
|
|
564
540
|
"quantizations": [
|
|
565
|
-
"4-bit",
|
|
566
|
-
"8-bit",
|
|
567
541
|
"none"
|
|
568
542
|
],
|
|
569
543
|
"model_id": "baichuan-inc/Baichuan2-7B-Chat",
|
|
@@ -574,8 +548,6 @@
|
|
|
574
548
|
"model_format": "pytorch",
|
|
575
549
|
"model_size_in_billions": 13,
|
|
576
550
|
"quantizations": [
|
|
577
|
-
"4-bit",
|
|
578
|
-
"8-bit",
|
|
579
551
|
"none"
|
|
580
552
|
],
|
|
581
553
|
"model_id": "baichuan-inc/Baichuan2-13B-Chat",
|
|
@@ -607,8 +579,6 @@
|
|
|
607
579
|
"model_format": "pytorch",
|
|
608
580
|
"model_size_in_billions": 7,
|
|
609
581
|
"quantizations": [
|
|
610
|
-
"4-bit",
|
|
611
|
-
"8-bit",
|
|
612
582
|
"none"
|
|
613
583
|
],
|
|
614
584
|
"model_id": "baichuan-inc/Baichuan2-7B-Base",
|
|
@@ -619,8 +589,6 @@
|
|
|
619
589
|
"model_format": "pytorch",
|
|
620
590
|
"model_size_in_billions": 13,
|
|
621
591
|
"quantizations": [
|
|
622
|
-
"4-bit",
|
|
623
|
-
"8-bit",
|
|
624
592
|
"none"
|
|
625
593
|
],
|
|
626
594
|
"model_id": "baichuan-inc/Baichuan2-13B-Base",
|
|
@@ -647,8 +615,6 @@
|
|
|
647
615
|
"model_format": "pytorch",
|
|
648
616
|
"model_size_in_billions": 9,
|
|
649
617
|
"quantizations": [
|
|
650
|
-
"4-bit",
|
|
651
|
-
"8-bit",
|
|
652
618
|
"none"
|
|
653
619
|
],
|
|
654
620
|
"model_hub": "modelscope",
|
|
@@ -713,8 +679,6 @@
|
|
|
713
679
|
"model_format": "pytorch",
|
|
714
680
|
"model_size_in_billions": 9,
|
|
715
681
|
"quantizations": [
|
|
716
|
-
"4-bit",
|
|
717
|
-
"8-bit",
|
|
718
682
|
"none"
|
|
719
683
|
],
|
|
720
684
|
"model_hub": "modelscope",
|
|
@@ -779,8 +743,6 @@
|
|
|
779
743
|
"model_format": "pytorch",
|
|
780
744
|
"model_size_in_billions": 9,
|
|
781
745
|
"quantizations": [
|
|
782
|
-
"4-bit",
|
|
783
|
-
"8-bit",
|
|
784
746
|
"none"
|
|
785
747
|
],
|
|
786
748
|
"model_hub": "modelscope",
|
|
@@ -817,8 +779,6 @@
|
|
|
817
779
|
"model_format": "pytorch",
|
|
818
780
|
"model_size_in_billions": 9,
|
|
819
781
|
"quantizations": [
|
|
820
|
-
"4-bit",
|
|
821
|
-
"8-bit",
|
|
822
782
|
"none"
|
|
823
783
|
],
|
|
824
784
|
"model_id": "ZhipuAI/codegeex4-all-9b",
|
|
@@ -870,8 +830,6 @@
|
|
|
870
830
|
"model_format": "pytorch",
|
|
871
831
|
"model_size_in_billions": 7,
|
|
872
832
|
"quantizations": [
|
|
873
|
-
"4-bit",
|
|
874
|
-
"8-bit",
|
|
875
833
|
"none"
|
|
876
834
|
],
|
|
877
835
|
"model_hub": "modelscope",
|
|
@@ -882,8 +840,6 @@
|
|
|
882
840
|
"model_format": "pytorch",
|
|
883
841
|
"model_size_in_billions": 13,
|
|
884
842
|
"quantizations": [
|
|
885
|
-
"4-bit",
|
|
886
|
-
"8-bit",
|
|
887
843
|
"none"
|
|
888
844
|
],
|
|
889
845
|
"model_hub": "modelscope",
|
|
@@ -916,8 +872,6 @@
|
|
|
916
872
|
"model_format": "pytorch",
|
|
917
873
|
"model_size_in_billions": 7,
|
|
918
874
|
"quantizations": [
|
|
919
|
-
"4-bit",
|
|
920
|
-
"8-bit",
|
|
921
875
|
"none"
|
|
922
876
|
],
|
|
923
877
|
"model_id": "xverse/XVERSE-7B",
|
|
@@ -928,8 +882,6 @@
|
|
|
928
882
|
"model_format": "pytorch",
|
|
929
883
|
"model_size_in_billions": 13,
|
|
930
884
|
"quantizations": [
|
|
931
|
-
"4-bit",
|
|
932
|
-
"8-bit",
|
|
933
885
|
"none"
|
|
934
886
|
],
|
|
935
887
|
"model_id": "xverse/XVERSE-13B",
|
|
@@ -940,8 +892,6 @@
|
|
|
940
892
|
"model_format": "pytorch",
|
|
941
893
|
"model_size_in_billions": 65,
|
|
942
894
|
"quantizations": [
|
|
943
|
-
"4-bit",
|
|
944
|
-
"8-bit",
|
|
945
895
|
"none"
|
|
946
896
|
],
|
|
947
897
|
"model_id": "xverse/XVERSE-65B",
|
|
@@ -950,108 +900,6 @@
|
|
|
950
900
|
}
|
|
951
901
|
]
|
|
952
902
|
},
|
|
953
|
-
{
|
|
954
|
-
"version": 1,
|
|
955
|
-
"context_length": 32768,
|
|
956
|
-
"model_name": "internlm2.5-chat",
|
|
957
|
-
"model_lang": [
|
|
958
|
-
"en",
|
|
959
|
-
"zh"
|
|
960
|
-
],
|
|
961
|
-
"model_ability": [
|
|
962
|
-
"chat"
|
|
963
|
-
],
|
|
964
|
-
"model_description": "InternLM2.5 series of the InternLM model.",
|
|
965
|
-
"model_specs": [
|
|
966
|
-
{
|
|
967
|
-
"model_format": "pytorch",
|
|
968
|
-
"model_size_in_billions": "1_8",
|
|
969
|
-
"quantizations": [
|
|
970
|
-
"none"
|
|
971
|
-
],
|
|
972
|
-
"model_id": "Shanghai_AI_Laboratory/internlm2_5-1_8b-chat",
|
|
973
|
-
"model_hub": "modelscope"
|
|
974
|
-
},
|
|
975
|
-
{
|
|
976
|
-
"model_format": "pytorch",
|
|
977
|
-
"model_size_in_billions": 7,
|
|
978
|
-
"quantizations": [
|
|
979
|
-
"none"
|
|
980
|
-
],
|
|
981
|
-
"model_id": "Shanghai_AI_Laboratory/internlm2_5-7b-chat",
|
|
982
|
-
"model_hub": "modelscope"
|
|
983
|
-
},
|
|
984
|
-
{
|
|
985
|
-
"model_format": "ggufv2",
|
|
986
|
-
"model_size_in_billions": 7,
|
|
987
|
-
"quantizations": [
|
|
988
|
-
"q2_k",
|
|
989
|
-
"q3_k_m",
|
|
990
|
-
"q4_0",
|
|
991
|
-
"q4_k_m",
|
|
992
|
-
"q5_0",
|
|
993
|
-
"q5_k_m",
|
|
994
|
-
"q6_k",
|
|
995
|
-
"q8_0",
|
|
996
|
-
"fp16"
|
|
997
|
-
],
|
|
998
|
-
"model_id": "Shanghai_AI_Laboratory/internlm2_5-7b-chat-gguf",
|
|
999
|
-
"model_file_name_template": "internlm2_5-7b-chat-{quantization}.gguf",
|
|
1000
|
-
"model_hub": "modelscope"
|
|
1001
|
-
},
|
|
1002
|
-
{
|
|
1003
|
-
"model_format": "pytorch",
|
|
1004
|
-
"model_size_in_billions": 20,
|
|
1005
|
-
"quantizations": [
|
|
1006
|
-
"none"
|
|
1007
|
-
],
|
|
1008
|
-
"model_id": "Shanghai_AI_Laboratory/internlm2_5-20b-chat",
|
|
1009
|
-
"model_hub": "modelscope"
|
|
1010
|
-
}
|
|
1011
|
-
],
|
|
1012
|
-
"chat_template": "{{ '<s>' }}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
|
|
1013
|
-
"stop_token_ids": [
|
|
1014
|
-
2,
|
|
1015
|
-
92542
|
|
1016
|
-
],
|
|
1017
|
-
"stop": [
|
|
1018
|
-
"</s>",
|
|
1019
|
-
"<|im_end|>"
|
|
1020
|
-
]
|
|
1021
|
-
},
|
|
1022
|
-
{
|
|
1023
|
-
"version": 1,
|
|
1024
|
-
"context_length": 262144,
|
|
1025
|
-
"model_name": "internlm2.5-chat-1m",
|
|
1026
|
-
"model_lang": [
|
|
1027
|
-
"en",
|
|
1028
|
-
"zh"
|
|
1029
|
-
],
|
|
1030
|
-
"model_ability": [
|
|
1031
|
-
"chat"
|
|
1032
|
-
],
|
|
1033
|
-
"model_description": "InternLM2.5 series of the InternLM model supports 1M long-context",
|
|
1034
|
-
"model_specs": [
|
|
1035
|
-
{
|
|
1036
|
-
"model_format": "pytorch",
|
|
1037
|
-
"model_size_in_billions": 7,
|
|
1038
|
-
"quantizations": [
|
|
1039
|
-
"none"
|
|
1040
|
-
],
|
|
1041
|
-
"model_id": "Shanghai_AI_Laboratory/internlm2_5-7b-chat-1m",
|
|
1042
|
-
"model_hub": "modelscope"
|
|
1043
|
-
}
|
|
1044
|
-
],
|
|
1045
|
-
"chat_template": "{{ '<s>' }}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
|
|
1046
|
-
"stop_token_ids": [
|
|
1047
|
-
2,
|
|
1048
|
-
92542
|
|
1049
|
-
],
|
|
1050
|
-
"stop": [
|
|
1051
|
-
"</s>",
|
|
1052
|
-
"<|im_end|>"
|
|
1053
|
-
]
|
|
1054
|
-
},
|
|
1055
903
|
{
|
|
1056
904
|
"version": 1,
|
|
1057
905
|
"context_length": 100000,
|
|
@@ -1067,8 +915,6 @@
|
|
|
1067
915
|
"model_format": "pytorch",
|
|
1068
916
|
"model_size_in_billions": 13,
|
|
1069
917
|
"quantizations": [
|
|
1070
|
-
"4-bit",
|
|
1071
|
-
"8-bit",
|
|
1072
918
|
"none"
|
|
1073
919
|
],
|
|
1074
920
|
"model_hub": "modelscope",
|
|
@@ -1079,8 +925,6 @@
|
|
|
1079
925
|
"model_format": "pytorch",
|
|
1080
926
|
"model_size_in_billions": 34,
|
|
1081
927
|
"quantizations": [
|
|
1082
|
-
"4-bit",
|
|
1083
|
-
"8-bit",
|
|
1084
928
|
"none"
|
|
1085
929
|
],
|
|
1086
930
|
"model_hub": "modelscope",
|
|
@@ -1112,8 +956,6 @@
|
|
|
1112
956
|
"model_format": "pytorch",
|
|
1113
957
|
"model_size_in_billions": 7,
|
|
1114
958
|
"quantizations": [
|
|
1115
|
-
"4-bit",
|
|
1116
|
-
"8-bit",
|
|
1117
959
|
"none"
|
|
1118
960
|
],
|
|
1119
961
|
"model_hub": "modelscope",
|
|
@@ -1124,8 +966,6 @@
|
|
|
1124
966
|
"model_format": "pytorch",
|
|
1125
967
|
"model_size_in_billions": 13,
|
|
1126
968
|
"quantizations": [
|
|
1127
|
-
"4-bit",
|
|
1128
|
-
"8-bit",
|
|
1129
969
|
"none"
|
|
1130
970
|
],
|
|
1131
971
|
"model_hub": "modelscope",
|
|
@@ -1136,8 +976,6 @@
|
|
|
1136
976
|
"model_format": "pytorch",
|
|
1137
977
|
"model_size_in_billions": 34,
|
|
1138
978
|
"quantizations": [
|
|
1139
|
-
"4-bit",
|
|
1140
|
-
"8-bit",
|
|
1141
979
|
"none"
|
|
1142
980
|
],
|
|
1143
981
|
"model_hub": "modelscope",
|
|
@@ -1221,8 +1059,6 @@
|
|
|
1221
1059
|
"model_format": "pytorch",
|
|
1222
1060
|
"model_size_in_billions": 7,
|
|
1223
1061
|
"quantizations": [
|
|
1224
|
-
"4-bit",
|
|
1225
|
-
"8-bit",
|
|
1226
1062
|
"none"
|
|
1227
1063
|
],
|
|
1228
1064
|
"model_hub": "modelscope",
|
|
@@ -1233,8 +1069,6 @@
|
|
|
1233
1069
|
"model_format": "pytorch",
|
|
1234
1070
|
"model_size_in_billions": 13,
|
|
1235
1071
|
"quantizations": [
|
|
1236
|
-
"4-bit",
|
|
1237
|
-
"8-bit",
|
|
1238
1072
|
"none"
|
|
1239
1073
|
],
|
|
1240
1074
|
"model_hub": "modelscope",
|
|
@@ -1245,8 +1079,6 @@
|
|
|
1245
1079
|
"model_format": "pytorch",
|
|
1246
1080
|
"model_size_in_billions": 34,
|
|
1247
1081
|
"quantizations": [
|
|
1248
|
-
"4-bit",
|
|
1249
|
-
"8-bit",
|
|
1250
1082
|
"none"
|
|
1251
1083
|
],
|
|
1252
1084
|
"model_hub": "modelscope",
|
|
@@ -1289,7 +1121,7 @@
|
|
|
1289
1121
|
],
|
|
1290
1122
|
"chat_template": "{% if messages[0]['role'] == 'system' %}{% set system_message = '<<SYS>>\n' + messages[0]['content'] | trim + '\n<</SYS>>\n\n' %}{% set messages = messages[1:] %}{% else %}{% set system_message = '' %}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 %}{% set content = system_message + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ '<s>' + '[INST] ' + content | trim + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content | trim + ' ' + '</s>' }}{% endif %}{% endfor %}",
|
|
1291
1123
|
"stop_token_ids": [
|
|
1292
|
-
|
|
1124
|
+
2
|
|
1293
1125
|
],
|
|
1294
1126
|
"stop": [
|
|
1295
1127
|
"</s>"
|
|
@@ -1311,8 +1143,6 @@
|
|
|
1311
1143
|
"model_format": "pytorch",
|
|
1312
1144
|
"model_size_in_billions": 7,
|
|
1313
1145
|
"quantizations": [
|
|
1314
|
-
"4-bit",
|
|
1315
|
-
"8-bit",
|
|
1316
1146
|
"none"
|
|
1317
1147
|
],
|
|
1318
1148
|
"model_hub": "modelscope",
|
|
@@ -1402,8 +1232,6 @@
|
|
|
1402
1232
|
"model_format": "pytorch",
|
|
1403
1233
|
"model_size_in_billions": 13,
|
|
1404
1234
|
"quantizations": [
|
|
1405
|
-
"4-bit",
|
|
1406
|
-
"8-bit",
|
|
1407
1235
|
"none"
|
|
1408
1236
|
],
|
|
1409
1237
|
"model_hub": "modelscope",
|
|
@@ -1414,8 +1242,6 @@
|
|
|
1414
1242
|
"model_format": "pytorch",
|
|
1415
1243
|
"model_size_in_billions": 7,
|
|
1416
1244
|
"quantizations": [
|
|
1417
|
-
"4-bit",
|
|
1418
|
-
"8-bit",
|
|
1419
1245
|
"none"
|
|
1420
1246
|
],
|
|
1421
1247
|
"model_hub": "modelscope",
|
|
@@ -1426,8 +1252,6 @@
|
|
|
1426
1252
|
"model_format": "pytorch",
|
|
1427
1253
|
"model_size_in_billions": 13,
|
|
1428
1254
|
"quantizations": [
|
|
1429
|
-
"4-bit",
|
|
1430
|
-
"8-bit",
|
|
1431
1255
|
"none"
|
|
1432
1256
|
],
|
|
1433
1257
|
"model_hub": "modelscope",
|
|
@@ -1456,8 +1280,6 @@
|
|
|
1456
1280
|
"model_format": "pytorch",
|
|
1457
1281
|
"model_size_in_billions": "46_7",
|
|
1458
1282
|
"quantizations": [
|
|
1459
|
-
"4-bit",
|
|
1460
|
-
"8-bit",
|
|
1461
1283
|
"none"
|
|
1462
1284
|
],
|
|
1463
1285
|
"model_hub": "modelscope",
|
|
@@ -1486,8 +1308,6 @@
|
|
|
1486
1308
|
"model_format": "pytorch",
|
|
1487
1309
|
"model_size_in_billions": "46_7",
|
|
1488
1310
|
"quantizations": [
|
|
1489
|
-
"4-bit",
|
|
1490
|
-
"8-bit",
|
|
1491
1311
|
"none"
|
|
1492
1312
|
],
|
|
1493
1313
|
"model_hub": "modelscope",
|
|
@@ -1520,8 +1340,6 @@
|
|
|
1520
1340
|
"model_format": "pytorch",
|
|
1521
1341
|
"model_size_in_billions": 6,
|
|
1522
1342
|
"quantizations": [
|
|
1523
|
-
"4-bit",
|
|
1524
|
-
"8-bit",
|
|
1525
1343
|
"none"
|
|
1526
1344
|
],
|
|
1527
1345
|
"model_hub": "modelscope",
|
|
@@ -1532,8 +1350,6 @@
|
|
|
1532
1350
|
"model_format": "pytorch",
|
|
1533
1351
|
"model_size_in_billions": 9,
|
|
1534
1352
|
"quantizations": [
|
|
1535
|
-
"4-bit",
|
|
1536
|
-
"8-bit",
|
|
1537
1353
|
"none"
|
|
1538
1354
|
],
|
|
1539
1355
|
"model_hub": "modelscope",
|
|
@@ -1544,8 +1360,6 @@
|
|
|
1544
1360
|
"model_format": "pytorch",
|
|
1545
1361
|
"model_size_in_billions": 34,
|
|
1546
1362
|
"quantizations": [
|
|
1547
|
-
"4-bit",
|
|
1548
|
-
"8-bit",
|
|
1549
1363
|
"none"
|
|
1550
1364
|
],
|
|
1551
1365
|
"model_hub": "modelscope",
|
|
@@ -1571,8 +1385,6 @@
|
|
|
1571
1385
|
"model_format": "pytorch",
|
|
1572
1386
|
"model_size_in_billions": 6,
|
|
1573
1387
|
"quantizations": [
|
|
1574
|
-
"4-bit",
|
|
1575
|
-
"8-bit",
|
|
1576
1388
|
"none"
|
|
1577
1389
|
],
|
|
1578
1390
|
"model_hub": "modelscope",
|
|
@@ -1583,8 +1395,6 @@
|
|
|
1583
1395
|
"model_format": "pytorch",
|
|
1584
1396
|
"model_size_in_billions": 34,
|
|
1585
1397
|
"quantizations": [
|
|
1586
|
-
"4-bit",
|
|
1587
|
-
"8-bit",
|
|
1588
1398
|
"none"
|
|
1589
1399
|
],
|
|
1590
1400
|
"model_hub": "modelscope",
|
|
@@ -1620,8 +1430,6 @@
|
|
|
1620
1430
|
"model_format": "pytorch",
|
|
1621
1431
|
"model_size_in_billions": 6,
|
|
1622
1432
|
"quantizations": [
|
|
1623
|
-
"4-bit",
|
|
1624
|
-
"8-bit",
|
|
1625
1433
|
"none"
|
|
1626
1434
|
],
|
|
1627
1435
|
"model_hub": "modelscope",
|
|
@@ -1632,8 +1440,6 @@
|
|
|
1632
1440
|
"model_format": "pytorch",
|
|
1633
1441
|
"model_size_in_billions": 34,
|
|
1634
1442
|
"quantizations": [
|
|
1635
|
-
"4-bit",
|
|
1636
|
-
"8-bit",
|
|
1637
1443
|
"none"
|
|
1638
1444
|
],
|
|
1639
1445
|
"model_hub": "modelscope",
|
|
@@ -1672,8 +1478,6 @@
|
|
|
1672
1478
|
"model_format": "pytorch",
|
|
1673
1479
|
"model_size_in_billions": 6,
|
|
1674
1480
|
"quantizations": [
|
|
1675
|
-
"4-bit",
|
|
1676
|
-
"8-bit",
|
|
1677
1481
|
"none"
|
|
1678
1482
|
],
|
|
1679
1483
|
"model_hub": "modelscope",
|
|
@@ -1684,8 +1488,6 @@
|
|
|
1684
1488
|
"model_format": "pytorch",
|
|
1685
1489
|
"model_size_in_billions": 9,
|
|
1686
1490
|
"quantizations": [
|
|
1687
|
-
"4-bit",
|
|
1688
|
-
"8-bit",
|
|
1689
1491
|
"none"
|
|
1690
1492
|
],
|
|
1691
1493
|
"model_hub": "modelscope",
|
|
@@ -1696,8 +1498,6 @@
|
|
|
1696
1498
|
"model_format": "pytorch",
|
|
1697
1499
|
"model_size_in_billions": 34,
|
|
1698
1500
|
"quantizations": [
|
|
1699
|
-
"4-bit",
|
|
1700
|
-
"8-bit",
|
|
1701
1501
|
"none"
|
|
1702
1502
|
],
|
|
1703
1503
|
"model_hub": "modelscope",
|
|
@@ -1723,8 +1523,6 @@
|
|
|
1723
1523
|
"model_format": "pytorch",
|
|
1724
1524
|
"model_size_in_billions": 6,
|
|
1725
1525
|
"quantizations": [
|
|
1726
|
-
"4-bit",
|
|
1727
|
-
"8-bit",
|
|
1728
1526
|
"none"
|
|
1729
1527
|
],
|
|
1730
1528
|
"model_hub": "modelscope",
|
|
@@ -1735,8 +1533,6 @@
|
|
|
1735
1533
|
"model_format": "pytorch",
|
|
1736
1534
|
"model_size_in_billions": 9,
|
|
1737
1535
|
"quantizations": [
|
|
1738
|
-
"4-bit",
|
|
1739
|
-
"8-bit",
|
|
1740
1536
|
"none"
|
|
1741
1537
|
],
|
|
1742
1538
|
"model_hub": "modelscope",
|
|
@@ -1747,8 +1543,6 @@
|
|
|
1747
1543
|
"model_format": "pytorch",
|
|
1748
1544
|
"model_size_in_billions": 34,
|
|
1749
1545
|
"quantizations": [
|
|
1750
|
-
"4-bit",
|
|
1751
|
-
"8-bit",
|
|
1752
1546
|
"none"
|
|
1753
1547
|
],
|
|
1754
1548
|
"model_hub": "modelscope",
|
|
@@ -1847,8 +1641,6 @@
|
|
|
1847
1641
|
"model_format": "pytorch",
|
|
1848
1642
|
"model_size_in_billions": 9,
|
|
1849
1643
|
"quantizations": [
|
|
1850
|
-
"4-bit",
|
|
1851
|
-
"8-bit",
|
|
1852
1644
|
"none"
|
|
1853
1645
|
],
|
|
1854
1646
|
"model_hub": "modelscope",
|
|
@@ -1859,8 +1651,6 @@
|
|
|
1859
1651
|
"model_format": "pytorch",
|
|
1860
1652
|
"model_size_in_billions": 34,
|
|
1861
1653
|
"quantizations": [
|
|
1862
|
-
"4-bit",
|
|
1863
|
-
"8-bit",
|
|
1864
1654
|
"none"
|
|
1865
1655
|
],
|
|
1866
1656
|
"model_hub": "modelscope",
|
|
@@ -1898,8 +1688,6 @@
|
|
|
1898
1688
|
"model_format": "pytorch",
|
|
1899
1689
|
"model_size_in_billions": 7,
|
|
1900
1690
|
"quantizations": [
|
|
1901
|
-
"4-bit",
|
|
1902
|
-
"8-bit",
|
|
1903
1691
|
"none"
|
|
1904
1692
|
],
|
|
1905
1693
|
"model_hub": "modelscope",
|
|
@@ -1931,8 +1719,6 @@
|
|
|
1931
1719
|
"model_format": "pytorch",
|
|
1932
1720
|
"model_size_in_billions": 7,
|
|
1933
1721
|
"quantizations": [
|
|
1934
|
-
"4-bit",
|
|
1935
|
-
"8-bit",
|
|
1936
1722
|
"none"
|
|
1937
1723
|
],
|
|
1938
1724
|
"model_hub": "modelscope",
|
|
@@ -1964,8 +1750,6 @@
|
|
|
1964
1750
|
"model_format": "pytorch",
|
|
1965
1751
|
"model_size_in_billions": 7,
|
|
1966
1752
|
"quantizations": [
|
|
1967
|
-
"4-bit",
|
|
1968
|
-
"8-bit",
|
|
1969
1753
|
"none"
|
|
1970
1754
|
],
|
|
1971
1755
|
"model_hub": "modelscope",
|
|
@@ -2071,7 +1855,7 @@
|
|
|
2071
1855
|
"model_format": "pytorch",
|
|
2072
1856
|
"model_size_in_billions": 123,
|
|
2073
1857
|
"quantizations": [
|
|
2074
|
-
"
|
|
1858
|
+
"none"
|
|
2075
1859
|
],
|
|
2076
1860
|
"model_id": "LLM-Research/Mistral-Large-Instruct-2407-bnb-4bit",
|
|
2077
1861
|
"model_hub": "modelscope"
|
|
@@ -2124,8 +1908,6 @@
|
|
|
2124
1908
|
"model_format": "pytorch",
|
|
2125
1909
|
"model_size_in_billions": "1_8",
|
|
2126
1910
|
"quantizations": [
|
|
2127
|
-
"4-bit",
|
|
2128
|
-
"8-bit",
|
|
2129
1911
|
"none"
|
|
2130
1912
|
],
|
|
2131
1913
|
"model_hub": "modelscope",
|
|
@@ -2136,8 +1918,6 @@
|
|
|
2136
1918
|
"model_format": "pytorch",
|
|
2137
1919
|
"model_size_in_billions": 7,
|
|
2138
1920
|
"quantizations": [
|
|
2139
|
-
"4-bit",
|
|
2140
|
-
"8-bit",
|
|
2141
1921
|
"none"
|
|
2142
1922
|
],
|
|
2143
1923
|
"model_hub": "modelscope",
|
|
@@ -2148,8 +1928,6 @@
|
|
|
2148
1928
|
"model_format": "pytorch",
|
|
2149
1929
|
"model_size_in_billions": 72,
|
|
2150
1930
|
"quantizations": [
|
|
2151
|
-
"4-bit",
|
|
2152
|
-
"8-bit",
|
|
2153
1931
|
"none"
|
|
2154
1932
|
],
|
|
2155
1933
|
"model_hub": "modelscope",
|
|
@@ -2160,8 +1938,6 @@
|
|
|
2160
1938
|
"model_format": "pytorch",
|
|
2161
1939
|
"model_size_in_billions": 14,
|
|
2162
1940
|
"quantizations": [
|
|
2163
|
-
"4-bit",
|
|
2164
|
-
"8-bit",
|
|
2165
1941
|
"none"
|
|
2166
1942
|
],
|
|
2167
1943
|
"model_id": "qwen/Qwen-14B-Chat",
|
|
@@ -2243,8 +2019,6 @@
|
|
|
2243
2019
|
"model_format": "pytorch",
|
|
2244
2020
|
"model_size_in_billions": "0_5",
|
|
2245
2021
|
"quantizations": [
|
|
2246
|
-
"4-bit",
|
|
2247
|
-
"8-bit",
|
|
2248
2022
|
"none"
|
|
2249
2023
|
],
|
|
2250
2024
|
"model_id": "qwen/Qwen1.5-0.5B-Chat",
|
|
@@ -2254,8 +2028,6 @@
|
|
|
2254
2028
|
"model_format": "pytorch",
|
|
2255
2029
|
"model_size_in_billions": "1_8",
|
|
2256
2030
|
"quantizations": [
|
|
2257
|
-
"4-bit",
|
|
2258
|
-
"8-bit",
|
|
2259
2031
|
"none"
|
|
2260
2032
|
],
|
|
2261
2033
|
"model_id": "qwen/Qwen1.5-1.8B-Chat",
|
|
@@ -2265,8 +2037,6 @@
|
|
|
2265
2037
|
"model_format": "pytorch",
|
|
2266
2038
|
"model_size_in_billions": 4,
|
|
2267
2039
|
"quantizations": [
|
|
2268
|
-
"4-bit",
|
|
2269
|
-
"8-bit",
|
|
2270
2040
|
"none"
|
|
2271
2041
|
],
|
|
2272
2042
|
"model_id": "qwen/Qwen1.5-4B-Chat",
|
|
@@ -2276,8 +2046,6 @@
|
|
|
2276
2046
|
"model_format": "pytorch",
|
|
2277
2047
|
"model_size_in_billions": 7,
|
|
2278
2048
|
"quantizations": [
|
|
2279
|
-
"4-bit",
|
|
2280
|
-
"8-bit",
|
|
2281
2049
|
"none"
|
|
2282
2050
|
],
|
|
2283
2051
|
"model_id": "qwen/Qwen1.5-7B-Chat",
|
|
@@ -2287,8 +2055,6 @@
|
|
|
2287
2055
|
"model_format": "pytorch",
|
|
2288
2056
|
"model_size_in_billions": 14,
|
|
2289
2057
|
"quantizations": [
|
|
2290
|
-
"4-bit",
|
|
2291
|
-
"8-bit",
|
|
2292
2058
|
"none"
|
|
2293
2059
|
],
|
|
2294
2060
|
"model_id": "qwen/Qwen1.5-14B-Chat",
|
|
@@ -2298,8 +2064,6 @@
|
|
|
2298
2064
|
"model_format": "pytorch",
|
|
2299
2065
|
"model_size_in_billions": 32,
|
|
2300
2066
|
"quantizations": [
|
|
2301
|
-
"4-bit",
|
|
2302
|
-
"8-bit",
|
|
2303
2067
|
"none"
|
|
2304
2068
|
],
|
|
2305
2069
|
"model_id": "qwen/Qwen1.5-32B-Chat",
|
|
@@ -2309,8 +2073,6 @@
|
|
|
2309
2073
|
"model_format": "pytorch",
|
|
2310
2074
|
"model_size_in_billions": 72,
|
|
2311
2075
|
"quantizations": [
|
|
2312
|
-
"4-bit",
|
|
2313
|
-
"8-bit",
|
|
2314
2076
|
"none"
|
|
2315
2077
|
],
|
|
2316
2078
|
"model_id": "qwen/Qwen1.5-72B-Chat",
|
|
@@ -2320,8 +2082,6 @@
|
|
|
2320
2082
|
"model_format": "pytorch",
|
|
2321
2083
|
"model_size_in_billions": 110,
|
|
2322
2084
|
"quantizations": [
|
|
2323
|
-
"4-bit",
|
|
2324
|
-
"8-bit",
|
|
2325
2085
|
"none"
|
|
2326
2086
|
],
|
|
2327
2087
|
"model_id": "qwen/Qwen1.5-110B-Chat",
|
|
@@ -2629,8 +2389,6 @@
|
|
|
2629
2389
|
"model_format": "pytorch",
|
|
2630
2390
|
"model_size_in_billions": "2_7",
|
|
2631
2391
|
"quantizations": [
|
|
2632
|
-
"4-bit",
|
|
2633
|
-
"8-bit",
|
|
2634
2392
|
"none"
|
|
2635
2393
|
],
|
|
2636
2394
|
"model_id": "qwen/Qwen1.5-MoE-A2.7B-Chat",
|
|
@@ -2675,8 +2433,6 @@
|
|
|
2675
2433
|
"model_format": "pytorch",
|
|
2676
2434
|
"model_size_in_billions": 7,
|
|
2677
2435
|
"quantizations": [
|
|
2678
|
-
"4-bit",
|
|
2679
|
-
"8-bit",
|
|
2680
2436
|
"none"
|
|
2681
2437
|
],
|
|
2682
2438
|
"model_id": "qwen/CodeQwen1.5-7B",
|
|
@@ -2718,8 +2474,6 @@
|
|
|
2718
2474
|
"model_format": "pytorch",
|
|
2719
2475
|
"model_size_in_billions": 7,
|
|
2720
2476
|
"quantizations": [
|
|
2721
|
-
"4-bit",
|
|
2722
|
-
"8-bit",
|
|
2723
2477
|
"none"
|
|
2724
2478
|
],
|
|
2725
2479
|
"model_id": "qwen/CodeQwen1.5-7B-Chat",
|
|
@@ -2765,8 +2519,6 @@
|
|
|
2765
2519
|
"model_format": "pytorch",
|
|
2766
2520
|
"model_size_in_billions": "0_5",
|
|
2767
2521
|
"quantizations": [
|
|
2768
|
-
"4-bit",
|
|
2769
|
-
"8-bit",
|
|
2770
2522
|
"none"
|
|
2771
2523
|
],
|
|
2772
2524
|
"model_id": "qwen/Qwen2-0.5B-Instruct",
|
|
@@ -2776,8 +2528,6 @@
|
|
|
2776
2528
|
"model_format": "pytorch",
|
|
2777
2529
|
"model_size_in_billions": "1_5",
|
|
2778
2530
|
"quantizations": [
|
|
2779
|
-
"4-bit",
|
|
2780
|
-
"8-bit",
|
|
2781
2531
|
"none"
|
|
2782
2532
|
],
|
|
2783
2533
|
"model_id": "qwen/Qwen2-1.5B-Instruct",
|
|
@@ -2787,8 +2537,6 @@
|
|
|
2787
2537
|
"model_format": "pytorch",
|
|
2788
2538
|
"model_size_in_billions": 7,
|
|
2789
2539
|
"quantizations": [
|
|
2790
|
-
"4-bit",
|
|
2791
|
-
"8-bit",
|
|
2792
2540
|
"none"
|
|
2793
2541
|
],
|
|
2794
2542
|
"model_id": "qwen/Qwen2-7B-Instruct",
|
|
@@ -2798,8 +2546,6 @@
|
|
|
2798
2546
|
"model_format": "pytorch",
|
|
2799
2547
|
"model_size_in_billions": 72,
|
|
2800
2548
|
"quantizations": [
|
|
2801
|
-
"4-bit",
|
|
2802
|
-
"8-bit",
|
|
2803
2549
|
"none"
|
|
2804
2550
|
],
|
|
2805
2551
|
"model_id": "qwen/Qwen2-72B-Instruct",
|
|
@@ -3054,8 +2800,6 @@
|
|
|
3054
2800
|
"model_format": "pytorch",
|
|
3055
2801
|
"model_size_in_billions": 14,
|
|
3056
2802
|
"quantizations": [
|
|
3057
|
-
"4-bit",
|
|
3058
|
-
"8-bit",
|
|
3059
2803
|
"none"
|
|
3060
2804
|
],
|
|
3061
2805
|
"model_id": "qwen/Qwen2-57B-A14B-Instruct",
|
|
@@ -3170,8 +2914,6 @@
|
|
|
3170
2914
|
"model_format": "pytorch",
|
|
3171
2915
|
"model_size_in_billions": 7,
|
|
3172
2916
|
"quantizations": [
|
|
3173
|
-
"4-bit",
|
|
3174
|
-
"8-bit",
|
|
3175
2917
|
"none"
|
|
3176
2918
|
],
|
|
3177
2919
|
"model_id": "deepseek-ai/deepseek-llm-7b-base",
|
|
@@ -3181,8 +2923,6 @@
|
|
|
3181
2923
|
"model_format": "pytorch",
|
|
3182
2924
|
"model_size_in_billions": 67,
|
|
3183
2925
|
"quantizations": [
|
|
3184
|
-
"4-bit",
|
|
3185
|
-
"8-bit",
|
|
3186
2926
|
"none"
|
|
3187
2927
|
],
|
|
3188
2928
|
"model_id": "deepseek-ai/deepseek-llm-67b-base",
|
|
@@ -3207,8 +2947,6 @@
|
|
|
3207
2947
|
"model_format": "pytorch",
|
|
3208
2948
|
"model_size_in_billions": 7,
|
|
3209
2949
|
"quantizations": [
|
|
3210
|
-
"4-bit",
|
|
3211
|
-
"8-bit",
|
|
3212
2950
|
"none"
|
|
3213
2951
|
],
|
|
3214
2952
|
"model_id": "deepseek-ai/deepseek-llm-7b-chat",
|
|
@@ -3218,8 +2956,6 @@
|
|
|
3218
2956
|
"model_format": "pytorch",
|
|
3219
2957
|
"model_size_in_billions": 67,
|
|
3220
2958
|
"quantizations": [
|
|
3221
|
-
"4-bit",
|
|
3222
|
-
"8-bit",
|
|
3223
2959
|
"none"
|
|
3224
2960
|
],
|
|
3225
2961
|
"model_id": "deepseek-ai/deepseek-llm-67b-chat",
|
|
@@ -3251,8 +2987,6 @@
|
|
|
3251
2987
|
"model_format": "pytorch",
|
|
3252
2988
|
"model_size_in_billions": "1_3",
|
|
3253
2989
|
"quantizations": [
|
|
3254
|
-
"4-bit",
|
|
3255
|
-
"8-bit",
|
|
3256
2990
|
"none"
|
|
3257
2991
|
],
|
|
3258
2992
|
"model_id": "deepseek-ai/deepseek-coder-1.3b-base",
|
|
@@ -3262,8 +2996,6 @@
|
|
|
3262
2996
|
"model_format": "pytorch",
|
|
3263
2997
|
"model_size_in_billions": "6_7",
|
|
3264
2998
|
"quantizations": [
|
|
3265
|
-
"4-bit",
|
|
3266
|
-
"8-bit",
|
|
3267
2999
|
"none"
|
|
3268
3000
|
],
|
|
3269
3001
|
"model_id": "deepseek-ai/deepseek-coder-6.7b-base",
|
|
@@ -3273,8 +3005,6 @@
|
|
|
3273
3005
|
"model_format": "pytorch",
|
|
3274
3006
|
"model_size_in_billions": 33,
|
|
3275
3007
|
"quantizations": [
|
|
3276
|
-
"4-bit",
|
|
3277
|
-
"8-bit",
|
|
3278
3008
|
"none"
|
|
3279
3009
|
],
|
|
3280
3010
|
"model_id": "deepseek-ai/deepseek-coder-33b-base",
|
|
@@ -3299,8 +3029,6 @@
|
|
|
3299
3029
|
"model_format": "pytorch",
|
|
3300
3030
|
"model_size_in_billions": "1_3",
|
|
3301
3031
|
"quantizations": [
|
|
3302
|
-
"4-bit",
|
|
3303
|
-
"8-bit",
|
|
3304
3032
|
"none"
|
|
3305
3033
|
],
|
|
3306
3034
|
"model_id": "deepseek-ai/deepseek-coder-1.3b-instruct",
|
|
@@ -3310,8 +3038,6 @@
|
|
|
3310
3038
|
"model_format": "pytorch",
|
|
3311
3039
|
"model_size_in_billions": "6_7",
|
|
3312
3040
|
"quantizations": [
|
|
3313
|
-
"4-bit",
|
|
3314
|
-
"8-bit",
|
|
3315
3041
|
"none"
|
|
3316
3042
|
],
|
|
3317
3043
|
"model_id": "deepseek-ai/deepseek-coder-6.7b-instruct",
|
|
@@ -3321,8 +3047,6 @@
|
|
|
3321
3047
|
"model_format": "pytorch",
|
|
3322
3048
|
"model_size_in_billions": 33,
|
|
3323
3049
|
"quantizations": [
|
|
3324
|
-
"4-bit",
|
|
3325
|
-
"8-bit",
|
|
3326
3050
|
"none"
|
|
3327
3051
|
],
|
|
3328
3052
|
"model_id": "deepseek-ai/deepseek-coder-33b-instruct",
|
|
@@ -3354,7 +3078,6 @@
|
|
|
3354
3078
|
"model_format": "pytorch",
|
|
3355
3079
|
"model_size_in_billions": 13,
|
|
3356
3080
|
"quantizations": [
|
|
3357
|
-
"8-bit",
|
|
3358
3081
|
"none"
|
|
3359
3082
|
],
|
|
3360
3083
|
"model_hub": "modelscope",
|
|
@@ -3380,7 +3103,6 @@
|
|
|
3380
3103
|
"model_format": "pytorch",
|
|
3381
3104
|
"model_size_in_billions": 13,
|
|
3382
3105
|
"quantizations": [
|
|
3383
|
-
"8-bit",
|
|
3384
3106
|
"none"
|
|
3385
3107
|
],
|
|
3386
3108
|
"model_hub": "modelscope",
|
|
@@ -3389,50 +3111,6 @@
|
|
|
3389
3111
|
}
|
|
3390
3112
|
]
|
|
3391
3113
|
},
|
|
3392
|
-
{
|
|
3393
|
-
"version": 1,
|
|
3394
|
-
"context_length": 204800,
|
|
3395
|
-
"model_name": "internlm2-chat",
|
|
3396
|
-
"model_lang": [
|
|
3397
|
-
"en",
|
|
3398
|
-
"zh"
|
|
3399
|
-
],
|
|
3400
|
-
"model_ability": [
|
|
3401
|
-
"chat"
|
|
3402
|
-
],
|
|
3403
|
-
"model_description": "The second generation of the InternLM model, InternLM2.",
|
|
3404
|
-
"model_specs": [
|
|
3405
|
-
{
|
|
3406
|
-
"model_format": "pytorch",
|
|
3407
|
-
"model_size_in_billions": 7,
|
|
3408
|
-
"quantizations": [
|
|
3409
|
-
"none"
|
|
3410
|
-
],
|
|
3411
|
-
"model_id": "Shanghai_AI_Laboratory/internlm2-chat-7b",
|
|
3412
|
-
"model_hub": "modelscope",
|
|
3413
|
-
"model_revision": "master"
|
|
3414
|
-
},
|
|
3415
|
-
{
|
|
3416
|
-
"model_format": "pytorch",
|
|
3417
|
-
"model_size_in_billions": 20,
|
|
3418
|
-
"quantizations": [
|
|
3419
|
-
"none"
|
|
3420
|
-
],
|
|
3421
|
-
"model_id": "Shanghai_AI_Laboratory/internlm2-chat-20b",
|
|
3422
|
-
"model_hub": "modelscope",
|
|
3423
|
-
"model_revision": "master"
|
|
3424
|
-
}
|
|
3425
|
-
],
|
|
3426
|
-
"chat_template": "{{ '<s>' }}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
|
|
3427
|
-
"stop_token_ids": [
|
|
3428
|
-
2,
|
|
3429
|
-
92542
|
|
3430
|
-
],
|
|
3431
|
-
"stop": [
|
|
3432
|
-
"</s>",
|
|
3433
|
-
"<|im_end|>"
|
|
3434
|
-
]
|
|
3435
|
-
},
|
|
3436
3114
|
{
|
|
3437
3115
|
"version": 1,
|
|
3438
3116
|
"context_length": 4096,
|
|
@@ -3497,9 +3175,7 @@
|
|
|
3497
3175
|
"model_format": "pytorch",
|
|
3498
3176
|
"model_size_in_billions": 14,
|
|
3499
3177
|
"quantizations": [
|
|
3500
|
-
"none"
|
|
3501
|
-
"4-bit",
|
|
3502
|
-
"8-bit"
|
|
3178
|
+
"none"
|
|
3503
3179
|
],
|
|
3504
3180
|
"model_id": "OrionStarAI/Orion-14B-Chat",
|
|
3505
3181
|
"model_hub": "modelscope"
|
|
@@ -3528,336 +3204,116 @@
|
|
|
3528
3204
|
},
|
|
3529
3205
|
{
|
|
3530
3206
|
"version": 1,
|
|
3531
|
-
"context_length":
|
|
3532
|
-
"model_name": "
|
|
3207
|
+
"context_length": 32768,
|
|
3208
|
+
"model_name": "gemma-3-1b-it",
|
|
3533
3209
|
"model_lang": [
|
|
3534
|
-
"en"
|
|
3535
|
-
"zh"
|
|
3210
|
+
"en"
|
|
3536
3211
|
],
|
|
3537
3212
|
"model_ability": [
|
|
3538
3213
|
"chat"
|
|
3539
3214
|
],
|
|
3540
|
-
"model_description": "
|
|
3215
|
+
"model_description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.",
|
|
3541
3216
|
"model_specs": [
|
|
3542
3217
|
{
|
|
3543
3218
|
"model_format": "pytorch",
|
|
3544
|
-
"model_size_in_billions":
|
|
3219
|
+
"model_size_in_billions": 1,
|
|
3545
3220
|
"quantizations": [
|
|
3546
|
-
"none"
|
|
3547
|
-
"4-bit",
|
|
3548
|
-
"8-bit"
|
|
3221
|
+
"none"
|
|
3549
3222
|
],
|
|
3550
|
-
"
|
|
3551
|
-
"
|
|
3223
|
+
"model_id": "LLM-Research/gemma-3-1b-it",
|
|
3224
|
+
"model_hub": "modelscope"
|
|
3225
|
+
},
|
|
3226
|
+
{
|
|
3227
|
+
"model_format": "ggufv2",
|
|
3228
|
+
"model_size_in_billions": 1,
|
|
3229
|
+
"quantizations": [
|
|
3230
|
+
"Q2_K",
|
|
3231
|
+
"Q3_K_L",
|
|
3232
|
+
"Q3_K_M",
|
|
3233
|
+
"Q3_K_S",
|
|
3234
|
+
"Q4_K_L",
|
|
3235
|
+
"Q4_K_M",
|
|
3236
|
+
"Q4_K_S",
|
|
3237
|
+
"Q5_K_L",
|
|
3238
|
+
"Q5_K_M",
|
|
3239
|
+
"Q5_K_S",
|
|
3240
|
+
"Q6_K",
|
|
3241
|
+
"Q6_K_L",
|
|
3242
|
+
"Q8_0",
|
|
3243
|
+
"bf16"
|
|
3244
|
+
],
|
|
3245
|
+
"model_id": "bartowski/google_gemma-3-1b-it-GGUF",
|
|
3246
|
+
"model_file_name_template": "google_gemma-3-1b-it-{quantization}.gguf",
|
|
3247
|
+
"model_hub": "modelscope"
|
|
3248
|
+
},
|
|
3249
|
+
{
|
|
3250
|
+
"model_format": "mlx",
|
|
3251
|
+
"model_size_in_billions": 1,
|
|
3252
|
+
"quantizations": [
|
|
3253
|
+
"4bit",
|
|
3254
|
+
"6bit",
|
|
3255
|
+
"8bit",
|
|
3256
|
+
"fp16"
|
|
3257
|
+
],
|
|
3258
|
+
"model_id": "mlx-community/gemma-3-1b-it-{quantization}",
|
|
3259
|
+
"model_hub": "modelscope"
|
|
3552
3260
|
}
|
|
3553
3261
|
],
|
|
3554
|
-
"chat_template": "{
|
|
3262
|
+
"chat_template": "{{ bos_token }}\n{%- if messages[0]['role'] == 'system' -%}\n {%- if messages[0]['content'] is string -%}\n {%- set first_user_prefix = messages[0]['content'] + '\n\n' -%}\n {%- else -%}\n {%- set first_user_prefix = messages[0]['content'][0]['text'] + '\n\n' -%}\n {%- endif -%}\n {%- set loop_messages = messages[1:] -%}\n{%- else -%}\n {%- set first_user_prefix = \"\" -%}\n {%- set loop_messages = messages -%}\n{%- endif -%}\n{%- for message in loop_messages -%}\n {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}\n {{ raise_exception(\"Conversation roles must alternate user/assistant/user/assistant/...\") }}\n {%- endif -%}\n {%- if (message['role'] == 'assistant') -%}\n {%- set role = \"model\" -%}\n {%- else -%}\n {%- set role = message['role'] -%}\n {%- endif -%}\n {{ '<start_of_turn>' + role + '\n' + (first_user_prefix if loop.first else \"\") }}\n {%- if message['content'] is string -%}\n {{ message['content'] | trim }}\n {%- elif message['content'] is iterable -%}\n {%- for item in message['content'] -%}\n {%- if item['type'] == 'image' -%}\n {{ '<start_of_image>' }}\n {%- elif item['type'] == 'text' -%}\n {{ item['text'] | trim }}\n {%- endif -%}\n {%- endfor -%}\n {%- else -%}\n {{ raise_exception(\"Invalid content type\") }}\n {%- endif -%}\n {{ '<end_of_turn>\n' }}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n {{'<start_of_turn>model\n'}}\n{%- endif -%}\n",
|
|
3555
3263
|
"stop_token_ids": [
|
|
3556
3264
|
1,
|
|
3557
|
-
|
|
3558
|
-
|
|
3265
|
+
106,
|
|
3266
|
+
107
|
|
3559
3267
|
],
|
|
3560
3268
|
"stop": [
|
|
3561
|
-
"<
|
|
3562
|
-
"
|
|
3563
|
-
"<
|
|
3269
|
+
"<eos>",
|
|
3270
|
+
"<end_of_turn>",
|
|
3271
|
+
"<start_of_turn>"
|
|
3564
3272
|
]
|
|
3565
3273
|
},
|
|
3566
3274
|
{
|
|
3567
3275
|
"version": 1,
|
|
3568
|
-
"context_length":
|
|
3569
|
-
"model_name": "
|
|
3276
|
+
"context_length": 131072,
|
|
3277
|
+
"model_name": "gemma-3-it",
|
|
3570
3278
|
"model_lang": [
|
|
3571
|
-
"en"
|
|
3572
|
-
"zh"
|
|
3279
|
+
"en"
|
|
3573
3280
|
],
|
|
3574
3281
|
"model_ability": [
|
|
3575
3282
|
"chat",
|
|
3576
3283
|
"vision"
|
|
3577
3284
|
],
|
|
3578
|
-
"model_description": "
|
|
3285
|
+
"model_description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.",
|
|
3579
3286
|
"model_specs": [
|
|
3580
3287
|
{
|
|
3581
3288
|
"model_format": "pytorch",
|
|
3582
|
-
"model_size_in_billions":
|
|
3289
|
+
"model_size_in_billions": 4,
|
|
3583
3290
|
"quantizations": [
|
|
3584
3291
|
"none"
|
|
3585
3292
|
],
|
|
3586
|
-
"
|
|
3587
|
-
"
|
|
3293
|
+
"model_id": "LLM-Research/gemma-3-4b-it",
|
|
3294
|
+
"model_hub": "modelscope"
|
|
3588
3295
|
},
|
|
3589
3296
|
{
|
|
3590
3297
|
"model_format": "pytorch",
|
|
3591
|
-
"model_size_in_billions":
|
|
3298
|
+
"model_size_in_billions": 12,
|
|
3592
3299
|
"quantizations": [
|
|
3593
3300
|
"none"
|
|
3594
3301
|
],
|
|
3595
|
-
"
|
|
3596
|
-
"
|
|
3597
|
-
}
|
|
3598
|
-
],
|
|
3599
|
-
"chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
|
|
3600
|
-
"stop_token_ids": [
|
|
3601
|
-
2,
|
|
3602
|
-
6,
|
|
3603
|
-
7,
|
|
3604
|
-
8
|
|
3605
|
-
],
|
|
3606
|
-
"stop": [
|
|
3607
|
-
"<|endoftext|>",
|
|
3608
|
-
"<|im_start|>",
|
|
3609
|
-
"<|im_end|>",
|
|
3610
|
-
"<|im_sep|>"
|
|
3611
|
-
]
|
|
3612
|
-
},
|
|
3613
|
-
{
|
|
3614
|
-
"version": 1,
|
|
3615
|
-
"context_length": 8192,
|
|
3616
|
-
"model_name": "gemma-it",
|
|
3617
|
-
"model_lang": [
|
|
3618
|
-
"en"
|
|
3619
|
-
],
|
|
3620
|
-
"model_ability": [
|
|
3621
|
-
"chat"
|
|
3622
|
-
],
|
|
3623
|
-
"model_description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.",
|
|
3624
|
-
"model_specs": [
|
|
3302
|
+
"model_id": "LLM-Research/gemma-3-12b-it",
|
|
3303
|
+
"model_hub": "modelscope"
|
|
3304
|
+
},
|
|
3625
3305
|
{
|
|
3626
3306
|
"model_format": "pytorch",
|
|
3627
|
-
"model_size_in_billions":
|
|
3307
|
+
"model_size_in_billions": 27,
|
|
3628
3308
|
"quantizations": [
|
|
3629
|
-
"none"
|
|
3630
|
-
"4-bit",
|
|
3631
|
-
"8-bit"
|
|
3309
|
+
"none"
|
|
3632
3310
|
],
|
|
3633
|
-
"
|
|
3634
|
-
"
|
|
3311
|
+
"model_id": "LLM-Research/gemma-3-27b-it",
|
|
3312
|
+
"model_hub": "modelscope"
|
|
3635
3313
|
},
|
|
3636
3314
|
{
|
|
3637
|
-
"model_format": "
|
|
3638
|
-
"model_size_in_billions":
|
|
3639
|
-
"quantizations": [
|
|
3640
|
-
"none",
|
|
3641
|
-
"4-bit",
|
|
3642
|
-
"8-bit"
|
|
3643
|
-
],
|
|
3644
|
-
"model_hub": "modelscope",
|
|
3645
|
-
"model_id": "AI-ModelScope/gemma-7b-it"
|
|
3646
|
-
}
|
|
3647
|
-
],
|
|
3648
|
-
"chat_template": "{{ '<bos>' }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '<start_of_turn>' + role + '\n' + message['content'] | trim + '<end_of_turn>\n' }}{% endfor %}{% if add_generation_prompt %}{{'<start_of_turn>model\n'}}{% endif %}",
|
|
3649
|
-
"stop_token_ids": [
|
|
3650
|
-
1,
|
|
3651
|
-
106,
|
|
3652
|
-
107
|
|
3653
|
-
],
|
|
3654
|
-
"stop": [
|
|
3655
|
-
"<eos>",
|
|
3656
|
-
"<end_of_turn>",
|
|
3657
|
-
"<start_of_turn>"
|
|
3658
|
-
]
|
|
3659
|
-
},
|
|
3660
|
-
{
|
|
3661
|
-
"version": 1,
|
|
3662
|
-
"context_length": 8192,
|
|
3663
|
-
"model_name": "gemma-2-it",
|
|
3664
|
-
"model_lang": [
|
|
3665
|
-
"en"
|
|
3666
|
-
],
|
|
3667
|
-
"model_ability": [
|
|
3668
|
-
"chat"
|
|
3669
|
-
],
|
|
3670
|
-
"model_description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.",
|
|
3671
|
-
"model_specs": [
|
|
3672
|
-
{
|
|
3673
|
-
"model_format": "pytorch",
|
|
3674
|
-
"model_size_in_billions": 2,
|
|
3675
|
-
"quantizations": [
|
|
3676
|
-
"none",
|
|
3677
|
-
"4-bit",
|
|
3678
|
-
"8-bit"
|
|
3679
|
-
],
|
|
3680
|
-
"model_id": "LLM-Research/gemma-2-2b-it",
|
|
3681
|
-
"model_hub": "modelscope"
|
|
3682
|
-
},
|
|
3683
|
-
{
|
|
3684
|
-
"model_format": "pytorch",
|
|
3685
|
-
"model_size_in_billions": 9,
|
|
3686
|
-
"quantizations": [
|
|
3687
|
-
"none",
|
|
3688
|
-
"4-bit",
|
|
3689
|
-
"8-bit"
|
|
3690
|
-
],
|
|
3691
|
-
"model_id": "AI-ModelScope/gemma-2-9b-it",
|
|
3692
|
-
"model_hub": "modelscope"
|
|
3693
|
-
},
|
|
3694
|
-
{
|
|
3695
|
-
"model_format": "pytorch",
|
|
3696
|
-
"model_size_in_billions": 27,
|
|
3697
|
-
"quantizations": [
|
|
3698
|
-
"none",
|
|
3699
|
-
"4-bit",
|
|
3700
|
-
"8-bit"
|
|
3701
|
-
],
|
|
3702
|
-
"model_id": "AI-ModelScope/gemma-2-27b-it",
|
|
3703
|
-
"model_hub": "modelscope"
|
|
3704
|
-
},
|
|
3705
|
-
{
|
|
3706
|
-
"model_format": "ggufv2",
|
|
3707
|
-
"model_size_in_billions": 9,
|
|
3708
|
-
"quantizations": [
|
|
3709
|
-
"Q2_K",
|
|
3710
|
-
"Q3_K_L",
|
|
3711
|
-
"Q3_K_M",
|
|
3712
|
-
"Q3_K_S",
|
|
3713
|
-
"Q4_K_L",
|
|
3714
|
-
"Q4_K_M",
|
|
3715
|
-
"Q4_K_S",
|
|
3716
|
-
"Q5_K_L",
|
|
3717
|
-
"Q5_K_M",
|
|
3718
|
-
"Q5_K_S",
|
|
3719
|
-
"Q6_K",
|
|
3720
|
-
"Q6_K_L",
|
|
3721
|
-
"Q8_0",
|
|
3722
|
-
"f32"
|
|
3723
|
-
],
|
|
3724
|
-
"model_id": "LLM-Research/gemma-2-9b-it-GGUF",
|
|
3725
|
-
"model_file_name_template": "gemma-2-9b-it-{quantization}.gguf",
|
|
3726
|
-
"model_hub": "modelscope"
|
|
3727
|
-
}
|
|
3728
|
-
],
|
|
3729
|
-
"chat_template": "{{ '<bos>' }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '<start_of_turn>' + role + '\n' + message['content'] | trim + '<end_of_turn>\n' }}{% endfor %}{% if add_generation_prompt %}{{'<start_of_turn>model\n'}}{% endif %}",
|
|
3730
|
-
"stop_token_ids": [
|
|
3731
|
-
1,
|
|
3732
|
-
106,
|
|
3733
|
-
107
|
|
3734
|
-
],
|
|
3735
|
-
"stop": [
|
|
3736
|
-
"<eos>",
|
|
3737
|
-
"<end_of_turn>",
|
|
3738
|
-
"<start_of_turn>"
|
|
3739
|
-
]
|
|
3740
|
-
},
|
|
3741
|
-
{
|
|
3742
|
-
"version": 1,
|
|
3743
|
-
"context_length": 32768,
|
|
3744
|
-
"model_name": "gemma-3-1b-it",
|
|
3745
|
-
"model_lang": [
|
|
3746
|
-
"en"
|
|
3747
|
-
],
|
|
3748
|
-
"model_ability": [
|
|
3749
|
-
"chat"
|
|
3750
|
-
],
|
|
3751
|
-
"model_description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.",
|
|
3752
|
-
"model_specs": [
|
|
3753
|
-
{
|
|
3754
|
-
"model_format": "pytorch",
|
|
3755
|
-
"model_size_in_billions": 1,
|
|
3756
|
-
"quantizations": [
|
|
3757
|
-
"none",
|
|
3758
|
-
"4-bit",
|
|
3759
|
-
"8-bit"
|
|
3760
|
-
],
|
|
3761
|
-
"model_id": "LLM-Research/gemma-3-1b-it",
|
|
3762
|
-
"model_hub": "modelscope"
|
|
3763
|
-
},
|
|
3764
|
-
{
|
|
3765
|
-
"model_format": "ggufv2",
|
|
3766
|
-
"model_size_in_billions": 1,
|
|
3767
|
-
"quantizations": [
|
|
3768
|
-
"Q2_K",
|
|
3769
|
-
"Q3_K_L",
|
|
3770
|
-
"Q3_K_M",
|
|
3771
|
-
"Q3_K_S",
|
|
3772
|
-
"Q4_K_L",
|
|
3773
|
-
"Q4_K_M",
|
|
3774
|
-
"Q4_K_S",
|
|
3775
|
-
"Q5_K_L",
|
|
3776
|
-
"Q5_K_M",
|
|
3777
|
-
"Q5_K_S",
|
|
3778
|
-
"Q6_K",
|
|
3779
|
-
"Q6_K_L",
|
|
3780
|
-
"Q8_0",
|
|
3781
|
-
"bf16"
|
|
3782
|
-
],
|
|
3783
|
-
"model_id": "bartowski/google_gemma-3-1b-it-GGUF",
|
|
3784
|
-
"model_file_name_template": "google_gemma-3-1b-it-{quantization}.gguf",
|
|
3785
|
-
"model_hub": "modelscope"
|
|
3786
|
-
},
|
|
3787
|
-
{
|
|
3788
|
-
"model_format": "mlx",
|
|
3789
|
-
"model_size_in_billions": 1,
|
|
3790
|
-
"quantizations": [
|
|
3791
|
-
"4bit",
|
|
3792
|
-
"6bit",
|
|
3793
|
-
"8bit",
|
|
3794
|
-
"fp16"
|
|
3795
|
-
],
|
|
3796
|
-
"model_id": "mlx-community/gemma-3-1b-it-{quantization}",
|
|
3797
|
-
"model_hub": "modelscope"
|
|
3798
|
-
}
|
|
3799
|
-
],
|
|
3800
|
-
"chat_template": "{{ bos_token }}\n{%- if messages[0]['role'] == 'system' -%}\n {%- if messages[0]['content'] is string -%}\n {%- set first_user_prefix = messages[0]['content'] + '\n\n' -%}\n {%- else -%}\n {%- set first_user_prefix = messages[0]['content'][0]['text'] + '\n\n' -%}\n {%- endif -%}\n {%- set loop_messages = messages[1:] -%}\n{%- else -%}\n {%- set first_user_prefix = \"\" -%}\n {%- set loop_messages = messages -%}\n{%- endif -%}\n{%- for message in loop_messages -%}\n {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}\n {{ raise_exception(\"Conversation roles must alternate user/assistant/user/assistant/...\") }}\n {%- endif -%}\n {%- if (message['role'] == 'assistant') -%}\n {%- set role = \"model\" -%}\n {%- else -%}\n {%- set role = message['role'] -%}\n {%- endif -%}\n {{ '<start_of_turn>' + role + '\n' + (first_user_prefix if loop.first else \"\") }}\n {%- if message['content'] is string -%}\n {{ message['content'] | trim }}\n {%- elif message['content'] is iterable -%}\n {%- for item in message['content'] -%}\n {%- if item['type'] == 'image' -%}\n {{ '<start_of_image>' }}\n {%- elif item['type'] == 'text' -%}\n {{ item['text'] | trim }}\n {%- endif -%}\n {%- endfor -%}\n {%- else -%}\n {{ raise_exception(\"Invalid content type\") }}\n {%- endif -%}\n {{ '<end_of_turn>\n' }}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n {{'<start_of_turn>model\n'}}\n{%- endif -%}\n",
|
|
3801
|
-
"stop_token_ids": [
|
|
3802
|
-
1,
|
|
3803
|
-
106,
|
|
3804
|
-
107
|
|
3805
|
-
],
|
|
3806
|
-
"stop": [
|
|
3807
|
-
"<eos>",
|
|
3808
|
-
"<end_of_turn>",
|
|
3809
|
-
"<start_of_turn>"
|
|
3810
|
-
]
|
|
3811
|
-
},
|
|
3812
|
-
{
|
|
3813
|
-
"version": 1,
|
|
3814
|
-
"context_length": 131072,
|
|
3815
|
-
"model_name": "gemma-3-it",
|
|
3816
|
-
"model_lang": [
|
|
3817
|
-
"en"
|
|
3818
|
-
],
|
|
3819
|
-
"model_ability": [
|
|
3820
|
-
"chat",
|
|
3821
|
-
"vision"
|
|
3822
|
-
],
|
|
3823
|
-
"model_description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.",
|
|
3824
|
-
"model_specs": [
|
|
3825
|
-
{
|
|
3826
|
-
"model_format": "pytorch",
|
|
3827
|
-
"model_size_in_billions": 4,
|
|
3828
|
-
"quantizations": [
|
|
3829
|
-
"none",
|
|
3830
|
-
"4-bit",
|
|
3831
|
-
"8-bit"
|
|
3832
|
-
],
|
|
3833
|
-
"model_id": "LLM-Research/gemma-3-4b-it",
|
|
3834
|
-
"model_hub": "modelscope"
|
|
3835
|
-
},
|
|
3836
|
-
{
|
|
3837
|
-
"model_format": "pytorch",
|
|
3838
|
-
"model_size_in_billions": 12,
|
|
3839
|
-
"quantizations": [
|
|
3840
|
-
"none",
|
|
3841
|
-
"4-bit",
|
|
3842
|
-
"8-bit"
|
|
3843
|
-
],
|
|
3844
|
-
"model_id": "LLM-Research/gemma-3-12b-it",
|
|
3845
|
-
"model_hub": "modelscope"
|
|
3846
|
-
},
|
|
3847
|
-
{
|
|
3848
|
-
"model_format": "pytorch",
|
|
3849
|
-
"model_size_in_billions": 27,
|
|
3850
|
-
"quantizations": [
|
|
3851
|
-
"none",
|
|
3852
|
-
"4-bit",
|
|
3853
|
-
"8-bit"
|
|
3854
|
-
],
|
|
3855
|
-
"model_id": "LLM-Research/gemma-3-27b-it",
|
|
3856
|
-
"model_hub": "modelscope"
|
|
3857
|
-
},
|
|
3858
|
-
{
|
|
3859
|
-
"model_format": "ggufv2",
|
|
3860
|
-
"model_size_in_billions": 4,
|
|
3315
|
+
"model_format": "ggufv2",
|
|
3316
|
+
"model_size_in_billions": 4,
|
|
3861
3317
|
"quantizations": [
|
|
3862
3318
|
"Q2_K",
|
|
3863
3319
|
"Q3_K_L",
|
|
@@ -3974,38 +3430,38 @@
|
|
|
3974
3430
|
]
|
|
3975
3431
|
},
|
|
3976
3432
|
{
|
|
3977
|
-
"version":1,
|
|
3978
|
-
"context_length":2048,
|
|
3979
|
-
"model_name":"OmniLMM",
|
|
3980
|
-
"model_lang":[
|
|
3433
|
+
"version": 1,
|
|
3434
|
+
"context_length": 2048,
|
|
3435
|
+
"model_name": "OmniLMM",
|
|
3436
|
+
"model_lang": [
|
|
3981
3437
|
"en",
|
|
3982
3438
|
"zh"
|
|
3983
3439
|
],
|
|
3984
|
-
"model_ability":[
|
|
3440
|
+
"model_ability": [
|
|
3985
3441
|
"chat",
|
|
3986
3442
|
"vision"
|
|
3987
3443
|
],
|
|
3988
|
-
"model_description":"OmniLMM is a family of open-source large multimodal models (LMMs) adept at vision & language modeling.",
|
|
3989
|
-
"model_specs":[
|
|
3444
|
+
"model_description": "OmniLMM is a family of open-source large multimodal models (LMMs) adept at vision & language modeling.",
|
|
3445
|
+
"model_specs": [
|
|
3990
3446
|
{
|
|
3991
|
-
"model_format":"pytorch",
|
|
3992
|
-
"model_size_in_billions":3,
|
|
3993
|
-
"quantizations":[
|
|
3447
|
+
"model_format": "pytorch",
|
|
3448
|
+
"model_size_in_billions": 3,
|
|
3449
|
+
"quantizations": [
|
|
3994
3450
|
"none"
|
|
3995
3451
|
],
|
|
3996
|
-
"model_id":"OpenBMB/MiniCPM-V",
|
|
3997
|
-
"model_hub":"modelscope",
|
|
3998
|
-
"model_revision":"master"
|
|
3452
|
+
"model_id": "OpenBMB/MiniCPM-V",
|
|
3453
|
+
"model_hub": "modelscope",
|
|
3454
|
+
"model_revision": "master"
|
|
3999
3455
|
},
|
|
4000
3456
|
{
|
|
4001
|
-
"model_format":"pytorch",
|
|
4002
|
-
"model_size_in_billions":12,
|
|
4003
|
-
"quantizations":[
|
|
3457
|
+
"model_format": "pytorch",
|
|
3458
|
+
"model_size_in_billions": 12,
|
|
3459
|
+
"quantizations": [
|
|
4004
3460
|
"none"
|
|
4005
3461
|
],
|
|
4006
|
-
"model_id":"OpenBMB/OmniLMM-12B",
|
|
4007
|
-
"model_hub":"modelscope",
|
|
4008
|
-
"model_revision":"master"
|
|
3462
|
+
"model_id": "OpenBMB/OmniLMM-12B",
|
|
3463
|
+
"model_hub": "modelscope",
|
|
3464
|
+
"model_revision": "master"
|
|
4009
3465
|
}
|
|
4010
3466
|
],
|
|
4011
3467
|
"chat_template": "",
|
|
@@ -4182,38 +3638,38 @@
|
|
|
4182
3638
|
]
|
|
4183
3639
|
},
|
|
4184
3640
|
{
|
|
4185
|
-
"version":1,
|
|
4186
|
-
"context_length":8192,
|
|
4187
|
-
"model_name":"MiniCPM-Llama3-V-2_5",
|
|
4188
|
-
"model_lang":[
|
|
3641
|
+
"version": 1,
|
|
3642
|
+
"context_length": 8192,
|
|
3643
|
+
"model_name": "MiniCPM-Llama3-V-2_5",
|
|
3644
|
+
"model_lang": [
|
|
4189
3645
|
"en",
|
|
4190
3646
|
"zh"
|
|
4191
3647
|
],
|
|
4192
|
-
"model_ability":[
|
|
3648
|
+
"model_ability": [
|
|
4193
3649
|
"chat",
|
|
4194
3650
|
"vision"
|
|
4195
3651
|
],
|
|
4196
|
-
"model_description":"MiniCPM-Llama3-V 2.5 is the latest model in the MiniCPM-V series. The model is built on SigLip-400M and Llama3-8B-Instruct with a total of 8B parameters.",
|
|
4197
|
-
"model_specs":[
|
|
3652
|
+
"model_description": "MiniCPM-Llama3-V 2.5 is the latest model in the MiniCPM-V series. The model is built on SigLip-400M and Llama3-8B-Instruct with a total of 8B parameters.",
|
|
3653
|
+
"model_specs": [
|
|
4198
3654
|
{
|
|
4199
|
-
"model_format":"pytorch",
|
|
4200
|
-
"model_size_in_billions":8,
|
|
4201
|
-
"quantizations":[
|
|
3655
|
+
"model_format": "pytorch",
|
|
3656
|
+
"model_size_in_billions": 8,
|
|
3657
|
+
"quantizations": [
|
|
4202
3658
|
"none"
|
|
4203
3659
|
],
|
|
4204
3660
|
"model_hub": "modelscope",
|
|
4205
|
-
"model_id":"OpenBMB/MiniCPM-Llama3-V-2_5",
|
|
4206
|
-
"model_revision":"master"
|
|
3661
|
+
"model_id": "OpenBMB/MiniCPM-Llama3-V-2_5",
|
|
3662
|
+
"model_revision": "master"
|
|
4207
3663
|
},
|
|
4208
3664
|
{
|
|
4209
|
-
"model_format":"pytorch",
|
|
4210
|
-
"model_size_in_billions":8,
|
|
4211
|
-
"quantizations":[
|
|
4212
|
-
"
|
|
3665
|
+
"model_format": "pytorch",
|
|
3666
|
+
"model_size_in_billions": 8,
|
|
3667
|
+
"quantizations": [
|
|
3668
|
+
"none"
|
|
4213
3669
|
],
|
|
4214
3670
|
"model_hub": "modelscope",
|
|
4215
|
-
"model_id":"OpenBMB/MiniCPM-Llama3-V-2_5-{quantization}",
|
|
4216
|
-
"model_revision":"master"
|
|
3671
|
+
"model_id": "OpenBMB/MiniCPM-Llama3-V-2_5-{quantization}",
|
|
3672
|
+
"model_revision": "master"
|
|
4217
3673
|
}
|
|
4218
3674
|
],
|
|
4219
3675
|
"chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = '<|begin_of_text|>' + content %}{% endif %}{{ content }}{% endfor %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}",
|
|
@@ -4225,244 +3681,48 @@
|
|
|
4225
3681
|
]
|
|
4226
3682
|
},
|
|
4227
3683
|
{
|
|
4228
|
-
"version":1,
|
|
4229
|
-
"context_length":32768,
|
|
4230
|
-
"model_name":"MiniCPM-V-2.6",
|
|
4231
|
-
"model_lang":[
|
|
3684
|
+
"version": 1,
|
|
3685
|
+
"context_length": 32768,
|
|
3686
|
+
"model_name": "MiniCPM-V-2.6",
|
|
3687
|
+
"model_lang": [
|
|
4232
3688
|
"en",
|
|
4233
3689
|
"zh"
|
|
4234
3690
|
],
|
|
4235
|
-
"model_ability":[
|
|
3691
|
+
"model_ability": [
|
|
4236
3692
|
"chat",
|
|
4237
3693
|
"vision"
|
|
4238
3694
|
],
|
|
4239
|
-
"model_description":"MiniCPM-V 2.6 is the latest model in the MiniCPM-V series. The model is built on SigLip-400M and Qwen2-7B with a total of 8B parameters.",
|
|
4240
|
-
"model_specs":[
|
|
3695
|
+
"model_description": "MiniCPM-V 2.6 is the latest model in the MiniCPM-V series. The model is built on SigLip-400M and Qwen2-7B with a total of 8B parameters.",
|
|
3696
|
+
"model_specs": [
|
|
4241
3697
|
{
|
|
4242
|
-
"model_format":"pytorch",
|
|
4243
|
-
"model_size_in_billions":8,
|
|
4244
|
-
"quantizations":[
|
|
3698
|
+
"model_format": "pytorch",
|
|
3699
|
+
"model_size_in_billions": 8,
|
|
3700
|
+
"quantizations": [
|
|
4245
3701
|
"none"
|
|
4246
3702
|
],
|
|
4247
3703
|
"model_hub": "modelscope",
|
|
4248
|
-
"model_id":"OpenBMB/MiniCPM-V-2_6",
|
|
4249
|
-
"model_revision":"master"
|
|
4250
|
-
},
|
|
4251
|
-
{
|
|
4252
|
-
"model_format":"pytorch",
|
|
4253
|
-
"model_size_in_billions":8,
|
|
4254
|
-
"quantizations":[
|
|
4255
|
-
"4-bit"
|
|
4256
|
-
],
|
|
4257
|
-
"model_hub": "modelscope",
|
|
4258
|
-
"model_id":"OpenBMB/MiniCPM-V-2_6-int4",
|
|
4259
|
-
"model_revision":"master"
|
|
4260
|
-
}
|
|
4261
|
-
],
|
|
4262
|
-
"chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
|
|
4263
|
-
"stop_token_ids": [
|
|
4264
|
-
151645,
|
|
4265
|
-
151643
|
|
4266
|
-
],
|
|
4267
|
-
"stop": [
|
|
4268
|
-
"<|im_end|>",
|
|
4269
|
-
"<|endoftext|>"
|
|
4270
|
-
]
|
|
4271
|
-
},
|
|
4272
|
-
{
|
|
4273
|
-
"version": 1,
|
|
4274
|
-
"context_length": 2048,
|
|
4275
|
-
"model_name": "aquila2",
|
|
4276
|
-
"model_lang": [
|
|
4277
|
-
"zh"
|
|
4278
|
-
],
|
|
4279
|
-
"model_ability": [
|
|
4280
|
-
"generate"
|
|
4281
|
-
],
|
|
4282
|
-
"model_description": "Aquila2 series models are the base language models",
|
|
4283
|
-
"model_specs": [
|
|
4284
|
-
{
|
|
4285
|
-
"model_format": "pytorch",
|
|
4286
|
-
"model_size_in_billions": 34,
|
|
4287
|
-
"quantizations": [
|
|
4288
|
-
"none"
|
|
4289
|
-
],
|
|
4290
|
-
"model_hub": "modelscope",
|
|
4291
|
-
"model_id": "BAAI/Aquila2-34B",
|
|
4292
|
-
"model_revision": "master"
|
|
4293
|
-
},
|
|
4294
|
-
{
|
|
4295
|
-
"model_format": "pytorch",
|
|
4296
|
-
"model_size_in_billions": 70,
|
|
4297
|
-
"quantizations": [
|
|
4298
|
-
"none"
|
|
4299
|
-
],
|
|
4300
|
-
"model_hub": "modelscope",
|
|
4301
|
-
"model_id": "BAAI/Aquila2-70B-Expr",
|
|
4302
|
-
"model_revision": "master"
|
|
4303
|
-
}
|
|
4304
|
-
]
|
|
4305
|
-
},
|
|
4306
|
-
{
|
|
4307
|
-
"version": 1,
|
|
4308
|
-
"context_length": 2048,
|
|
4309
|
-
"model_name": "aquila2-chat",
|
|
4310
|
-
"model_lang": [
|
|
4311
|
-
"zh"
|
|
4312
|
-
],
|
|
4313
|
-
"model_ability": [
|
|
4314
|
-
"chat"
|
|
4315
|
-
],
|
|
4316
|
-
"model_description": "Aquila2-chat series models are the chat models",
|
|
4317
|
-
"model_specs": [
|
|
4318
|
-
{
|
|
4319
|
-
"model_format": "pytorch",
|
|
4320
|
-
"model_size_in_billions": 34,
|
|
4321
|
-
"quantizations": [
|
|
4322
|
-
"none"
|
|
4323
|
-
],
|
|
4324
|
-
"model_hub": "modelscope",
|
|
4325
|
-
"model_id": "BAAI/AquilaChat2-34B",
|
|
4326
|
-
"model_revision": "master"
|
|
4327
|
-
},
|
|
4328
|
-
{
|
|
4329
|
-
"model_format": "gptq",
|
|
4330
|
-
"model_size_in_billions": 34,
|
|
4331
|
-
"quantizations": [
|
|
4332
|
-
"Int4"
|
|
4333
|
-
],
|
|
4334
|
-
"model_hub": "modelscope",
|
|
4335
|
-
"model_id": "BAAI/AquilaChat2-34B-Int4-GPTQ",
|
|
4336
|
-
"model_revision": "master"
|
|
4337
|
-
},
|
|
4338
|
-
{
|
|
4339
|
-
"model_format": "pytorch",
|
|
4340
|
-
"model_size_in_billions": 70,
|
|
4341
|
-
"quantizations": [
|
|
4342
|
-
"none"
|
|
4343
|
-
],
|
|
4344
|
-
"model_hub": "modelscope",
|
|
4345
|
-
"model_id": "BAAI/AquilaChat2-70B-Expr",
|
|
4346
|
-
"model_revision": "master"
|
|
4347
|
-
}
|
|
4348
|
-
],
|
|
4349
|
-
"chat_template": "{% for item in messages %}{% if loop.first and item['role'] == 'system' %}{{ item['content'] + '\n' }}{% endif %}{% if item['role'] == 'user' %}{{ 'USER: ' + item['content'] + '\n' }}{% elif item['role'] == 'assistant' %}{{ 'ASSISTANT: ' + item['content'] + '\n' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ 'ASSISTANT: ' }}{% endif %}",
|
|
4350
|
-
"stop_token_ids": [
|
|
4351
|
-
100006,
|
|
4352
|
-
100007
|
|
4353
|
-
],
|
|
4354
|
-
"stop": [
|
|
4355
|
-
"[CLS]",
|
|
4356
|
-
"</s>"
|
|
4357
|
-
]
|
|
4358
|
-
},
|
|
4359
|
-
{
|
|
4360
|
-
"version": 1,
|
|
4361
|
-
"context_length": 16384,
|
|
4362
|
-
"model_name": "aquila2-chat-16k",
|
|
4363
|
-
"model_lang": [
|
|
4364
|
-
"zh"
|
|
4365
|
-
],
|
|
4366
|
-
"model_ability": [
|
|
4367
|
-
"generate"
|
|
4368
|
-
],
|
|
4369
|
-
"model_description": "AquilaChat2-16k series models are the long-text chat models",
|
|
4370
|
-
"model_specs": [
|
|
4371
|
-
{
|
|
4372
|
-
"model_format": "pytorch",
|
|
4373
|
-
"model_size_in_billions": 34,
|
|
4374
|
-
"quantizations": [
|
|
4375
|
-
"none"
|
|
4376
|
-
],
|
|
4377
|
-
"model_hub": "modelscope",
|
|
4378
|
-
"model_id": "BAAI/AquilaChat2-34B-16K",
|
|
4379
|
-
"model_revision": "master"
|
|
4380
|
-
}
|
|
4381
|
-
],
|
|
4382
|
-
"chat_template": "{% for item in messages %}{% if loop.first and item['role'] == 'system' %}{{ item['content'] + '\n' }}{% endif %}{% if item['role'] == 'user' %}{{ 'USER: ' + item['content'] + '\n' }}{% elif item['role'] == 'assistant' %}{{ 'ASSISTANT: ' + item['content'] + '\n' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ 'ASSISTANT: ' }}{% endif %}",
|
|
4383
|
-
"stop_token_ids": [
|
|
4384
|
-
100006,
|
|
4385
|
-
100007
|
|
4386
|
-
],
|
|
4387
|
-
"stop": [
|
|
4388
|
-
"[CLS]",
|
|
4389
|
-
"</s>"
|
|
4390
|
-
]
|
|
4391
|
-
},
|
|
4392
|
-
{
|
|
4393
|
-
"version": 1,
|
|
4394
|
-
"context_length": 131072,
|
|
4395
|
-
"model_name": "c4ai-command-r-v01",
|
|
4396
|
-
"model_lang": [
|
|
4397
|
-
"en",
|
|
4398
|
-
"fr",
|
|
4399
|
-
"de",
|
|
4400
|
-
"es",
|
|
4401
|
-
"it",
|
|
4402
|
-
"pt",
|
|
4403
|
-
"ja",
|
|
4404
|
-
"ko",
|
|
4405
|
-
"zh",
|
|
4406
|
-
"ar"
|
|
4407
|
-
],
|
|
4408
|
-
"model_ability": [
|
|
4409
|
-
"chat"
|
|
4410
|
-
],
|
|
4411
|
-
"model_description": "C4AI Command-R is a research release of a 35 billion parameter highly performant generative model.",
|
|
4412
|
-
"model_specs": [
|
|
4413
|
-
{
|
|
4414
|
-
"model_format": "pytorch",
|
|
4415
|
-
"model_size_in_billions": 35,
|
|
4416
|
-
"quantizations": [
|
|
4417
|
-
"none"
|
|
4418
|
-
],
|
|
4419
|
-
"model_hub": "modelscope",
|
|
4420
|
-
"model_id": "AI-ModelScope/c4ai-command-r-v01",
|
|
4421
|
-
"model_revision": "master"
|
|
4422
|
-
},
|
|
4423
|
-
{
|
|
4424
|
-
"model_format": "pytorch",
|
|
4425
|
-
"model_size_in_billions": 35,
|
|
4426
|
-
"quantizations": [
|
|
4427
|
-
"4-bit"
|
|
4428
|
-
],
|
|
4429
|
-
"model_hub": "modelscope",
|
|
4430
|
-
"model_id": "mirror013/c4ai-command-r-v01-4bit",
|
|
4431
|
-
"model_revision": "master"
|
|
4432
|
-
},
|
|
4433
|
-
{
|
|
4434
|
-
"model_format": "ggufv2",
|
|
4435
|
-
"model_size_in_billions": 35,
|
|
4436
|
-
"quantizations": [
|
|
4437
|
-
"Q2_K",
|
|
4438
|
-
"Q3_K_M",
|
|
4439
|
-
"Q4_K_M",
|
|
4440
|
-
"Q5_K_M"
|
|
4441
|
-
],
|
|
4442
|
-
"model_id": "mirror013/C4AI-Command-R-v01-GGUF",
|
|
4443
|
-
"model_file_name_template": "c4ai-command-r-v01-{quantization}.gguf",
|
|
4444
|
-
"model_hub": "modelscope",
|
|
4445
|
-
"model_revision": "master"
|
|
3704
|
+
"model_id": "OpenBMB/MiniCPM-V-2_6",
|
|
3705
|
+
"model_revision": "master"
|
|
4446
3706
|
},
|
|
4447
3707
|
{
|
|
4448
3708
|
"model_format": "pytorch",
|
|
4449
|
-
"model_size_in_billions":
|
|
3709
|
+
"model_size_in_billions": 8,
|
|
4450
3710
|
"quantizations": [
|
|
4451
3711
|
"none"
|
|
4452
3712
|
],
|
|
4453
3713
|
"model_hub": "modelscope",
|
|
4454
|
-
"model_id": "
|
|
3714
|
+
"model_id": "OpenBMB/MiniCPM-V-2_6-int4",
|
|
4455
3715
|
"model_revision": "master"
|
|
4456
3716
|
}
|
|
4457
3717
|
],
|
|
4458
|
-
"chat_template": "{
|
|
3718
|
+
"chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
|
|
4459
3719
|
"stop_token_ids": [
|
|
4460
|
-
|
|
4461
|
-
|
|
3720
|
+
151645,
|
|
3721
|
+
151643
|
|
4462
3722
|
],
|
|
4463
3723
|
"stop": [
|
|
4464
|
-
"
|
|
4465
|
-
"<|
|
|
3724
|
+
"<|im_end|>",
|
|
3725
|
+
"<|endoftext|>"
|
|
4466
3726
|
]
|
|
4467
3727
|
},
|
|
4468
3728
|
{
|
|
@@ -4481,8 +3741,6 @@
|
|
|
4481
3741
|
"model_format": "pytorch",
|
|
4482
3742
|
"model_size_in_billions": 4,
|
|
4483
3743
|
"quantizations": [
|
|
4484
|
-
"4-bit",
|
|
4485
|
-
"8-bit",
|
|
4486
3744
|
"none"
|
|
4487
3745
|
],
|
|
4488
3746
|
"model_hub": "modelscope",
|
|
@@ -4491,7 +3749,7 @@
|
|
|
4491
3749
|
}
|
|
4492
3750
|
],
|
|
4493
3751
|
"chat_template": "{% for message in messages %}{% if message['role'] == 'system' %}{{'<|system|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'user' %}{{'<|user|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'assistant' %}{{'<|assistant|>\n' + message['content'] + '<|end|>\n'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% else %}{{ '<|endoftext|>' }}{% endif %}",
|
|
4494
|
-
"stop_token_ids":[
|
|
3752
|
+
"stop_token_ids": [
|
|
4495
3753
|
32000,
|
|
4496
3754
|
32001,
|
|
4497
3755
|
32007
|
|
@@ -4518,8 +3776,6 @@
|
|
|
4518
3776
|
"model_format": "pytorch",
|
|
4519
3777
|
"model_size_in_billions": 4,
|
|
4520
3778
|
"quantizations": [
|
|
4521
|
-
"4-bit",
|
|
4522
|
-
"8-bit",
|
|
4523
3779
|
"none"
|
|
4524
3780
|
],
|
|
4525
3781
|
"model_hub": "modelscope",
|
|
@@ -4528,7 +3784,7 @@
|
|
|
4528
3784
|
}
|
|
4529
3785
|
],
|
|
4530
3786
|
"chat_template": "{% for message in messages %}{% if message['role'] == 'system' %}{{'<|system|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'user' %}{{'<|user|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'assistant' %}{{'<|assistant|>\n' + message['content'] + '<|end|>\n'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% else %}{{ '<|endoftext|>' }}{% endif %}",
|
|
4531
|
-
"stop_token_ids":[
|
|
3787
|
+
"stop_token_ids": [
|
|
4532
3788
|
32000,
|
|
4533
3789
|
32001,
|
|
4534
3790
|
32007
|
|
@@ -4541,267 +3797,64 @@
|
|
|
4541
3797
|
},
|
|
4542
3798
|
{
|
|
4543
3799
|
"version": 1,
|
|
4544
|
-
"context_length":
|
|
4545
|
-
"model_name": "
|
|
4546
|
-
"model_lang": [
|
|
4547
|
-
"en",
|
|
4548
|
-
"zh"
|
|
4549
|
-
],
|
|
4550
|
-
"model_ability": [
|
|
4551
|
-
"chat",
|
|
4552
|
-
"vision"
|
|
4553
|
-
],
|
|
4554
|
-
"model_description": "InternVL 1.5 is an open-source multimodal large language model (MLLM) to bridge the capability gap between open-source and proprietary commercial models in multimodal understanding. ",
|
|
4555
|
-
"model_specs": [
|
|
4556
|
-
{
|
|
4557
|
-
"model_format": "pytorch",
|
|
4558
|
-
"model_size_in_billions": 26,
|
|
4559
|
-
"quantizations": [
|
|
4560
|
-
"4-bit",
|
|
4561
|
-
"8-bit",
|
|
4562
|
-
"none"
|
|
4563
|
-
],
|
|
4564
|
-
"model_hub": "modelscope",
|
|
4565
|
-
"model_id": "OpenGVLab/InternVL-Chat-V1-5",
|
|
4566
|
-
"model_revision": "master"
|
|
4567
|
-
}
|
|
4568
|
-
],
|
|
4569
|
-
"chat_template": "{{ '<s>' }}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
|
|
4570
|
-
"stop_token_ids": [
|
|
4571
|
-
2,
|
|
4572
|
-
92542,
|
|
4573
|
-
92543
|
|
4574
|
-
],
|
|
4575
|
-
"stop": [
|
|
4576
|
-
"</s>",
|
|
4577
|
-
"<|im_end|>",
|
|
4578
|
-
"<|im_start|>"
|
|
4579
|
-
]
|
|
4580
|
-
},
|
|
4581
|
-
{
|
|
4582
|
-
"version": 1,
|
|
4583
|
-
"context_length": 32768,
|
|
4584
|
-
"model_name": "internvl2",
|
|
4585
|
-
"model_lang": [
|
|
4586
|
-
"en",
|
|
4587
|
-
"zh"
|
|
4588
|
-
],
|
|
4589
|
-
"model_ability": [
|
|
4590
|
-
"chat",
|
|
4591
|
-
"vision"
|
|
4592
|
-
],
|
|
4593
|
-
"model_description": "InternVL 2 is an open-source multimodal large language model (MLLM) to bridge the capability gap between open-source and proprietary commercial models in multimodal understanding. ",
|
|
4594
|
-
"model_specs": [
|
|
4595
|
-
|
|
4596
|
-
{
|
|
4597
|
-
"model_format": "pytorch",
|
|
4598
|
-
"model_size_in_billions": 1,
|
|
4599
|
-
"quantizations": [
|
|
4600
|
-
"4-bit",
|
|
4601
|
-
"8-bit",
|
|
4602
|
-
"none"
|
|
4603
|
-
],
|
|
4604
|
-
"model_hub": "modelscope",
|
|
4605
|
-
"model_id": "OpenGVLab/InternVL2-1B",
|
|
4606
|
-
"model_revision": "master"
|
|
4607
|
-
},
|
|
4608
|
-
{
|
|
4609
|
-
"model_format": "pytorch",
|
|
4610
|
-
"model_size_in_billions": 2,
|
|
4611
|
-
"quantizations": [
|
|
4612
|
-
"4-bit",
|
|
4613
|
-
"8-bit",
|
|
4614
|
-
"none"
|
|
4615
|
-
],
|
|
4616
|
-
"model_hub": "modelscope",
|
|
4617
|
-
"model_id": "OpenGVLab/InternVL2-2B",
|
|
4618
|
-
"model_revision": "master"
|
|
4619
|
-
},
|
|
4620
|
-
{
|
|
4621
|
-
"model_format": "awq",
|
|
4622
|
-
"model_size_in_billions": 2,
|
|
4623
|
-
"quantizations": [
|
|
4624
|
-
"Int4"
|
|
4625
|
-
],
|
|
4626
|
-
"model_hub": "modelscope",
|
|
4627
|
-
"model_id": "OpenGVLab/InternVL2-2B-AWQ",
|
|
4628
|
-
"model_revision": "master"
|
|
4629
|
-
},
|
|
4630
|
-
{
|
|
4631
|
-
"model_format": "pytorch",
|
|
4632
|
-
"model_size_in_billions": 4,
|
|
4633
|
-
"quantizations": [
|
|
4634
|
-
"4-bit",
|
|
4635
|
-
"8-bit",
|
|
4636
|
-
"none"
|
|
4637
|
-
],
|
|
4638
|
-
"model_hub": "modelscope",
|
|
4639
|
-
"model_id": "OpenGVLab/InternVL2-4B",
|
|
4640
|
-
"model_revision": "master"
|
|
4641
|
-
},
|
|
4642
|
-
{
|
|
4643
|
-
"model_format": "pytorch",
|
|
4644
|
-
"model_size_in_billions": 8,
|
|
4645
|
-
"quantizations": [
|
|
4646
|
-
"4-bit",
|
|
4647
|
-
"8-bit",
|
|
4648
|
-
"none"
|
|
4649
|
-
],
|
|
4650
|
-
"model_hub": "modelscope",
|
|
4651
|
-
"model_id": "OpenGVLab/InternVL2-8B",
|
|
4652
|
-
"model_revision": "master"
|
|
4653
|
-
},
|
|
4654
|
-
{
|
|
4655
|
-
"model_format": "awq",
|
|
4656
|
-
"model_size_in_billions": 8,
|
|
4657
|
-
"quantizations": [
|
|
4658
|
-
"Int4"
|
|
4659
|
-
],
|
|
4660
|
-
"model_hub": "modelscope",
|
|
4661
|
-
"model_id": "OpenGVLab/InternVL2-8B-AWQ",
|
|
4662
|
-
"model_revision": "master"
|
|
4663
|
-
},
|
|
4664
|
-
{
|
|
4665
|
-
"model_format": "pytorch",
|
|
4666
|
-
"model_size_in_billions": 26,
|
|
4667
|
-
"quantizations": [
|
|
4668
|
-
"4-bit",
|
|
4669
|
-
"8-bit",
|
|
4670
|
-
"none"
|
|
4671
|
-
],
|
|
4672
|
-
"model_hub": "modelscope",
|
|
4673
|
-
"model_id": "OpenGVLab/InternVL2-26B",
|
|
4674
|
-
"model_revision": "master"
|
|
4675
|
-
},
|
|
4676
|
-
{
|
|
4677
|
-
"model_format": "awq",
|
|
4678
|
-
"model_size_in_billions": 26,
|
|
4679
|
-
"quantizations": [
|
|
4680
|
-
"Int4"
|
|
4681
|
-
],
|
|
4682
|
-
"model_hub": "modelscope",
|
|
4683
|
-
"model_id": "OpenGVLab/InternVL2-26B-AWQ",
|
|
4684
|
-
"model_revision": "master"
|
|
4685
|
-
},
|
|
4686
|
-
{
|
|
4687
|
-
"model_format": "pytorch",
|
|
4688
|
-
"model_size_in_billions": 40,
|
|
4689
|
-
"quantizations": [
|
|
4690
|
-
"4-bit",
|
|
4691
|
-
"8-bit",
|
|
4692
|
-
"none"
|
|
4693
|
-
],
|
|
4694
|
-
"model_hub": "modelscope",
|
|
4695
|
-
"model_id": "OpenGVLab/InternVL2-40B",
|
|
4696
|
-
"model_revision": "master"
|
|
4697
|
-
},
|
|
4698
|
-
{
|
|
4699
|
-
"model_format": "awq",
|
|
4700
|
-
"model_size_in_billions": 40,
|
|
4701
|
-
"quantizations": [
|
|
4702
|
-
"Int4"
|
|
4703
|
-
],
|
|
4704
|
-
"model_hub": "modelscope",
|
|
4705
|
-
"model_id": "OpenGVLab/InternVL2-40B-AWQ",
|
|
4706
|
-
"model_revision": "master"
|
|
4707
|
-
},
|
|
4708
|
-
{
|
|
4709
|
-
"model_format": "pytorch",
|
|
4710
|
-
"model_size_in_billions": 76,
|
|
4711
|
-
"quantizations": [
|
|
4712
|
-
"4-bit",
|
|
4713
|
-
"8-bit",
|
|
4714
|
-
"none"
|
|
4715
|
-
],
|
|
4716
|
-
"model_hub": "modelscope",
|
|
4717
|
-
"model_id": "OpenGVLab/InternVL2-Llama3-76B",
|
|
4718
|
-
"model_revision": "master"
|
|
4719
|
-
},
|
|
4720
|
-
{
|
|
4721
|
-
"model_format": "awq",
|
|
4722
|
-
"model_size_in_billions": 76,
|
|
4723
|
-
"quantizations": [
|
|
4724
|
-
"Int4"
|
|
4725
|
-
],
|
|
4726
|
-
"model_hub": "modelscope",
|
|
4727
|
-
"model_id": "OpenGVLab/InternVL2-Llama3-76B-AWQ",
|
|
4728
|
-
"model_revision": "master"
|
|
4729
|
-
}
|
|
4730
|
-
],
|
|
4731
|
-
"chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
|
|
4732
|
-
"stop_token_ids": [],
|
|
4733
|
-
"stop": []
|
|
4734
|
-
},
|
|
4735
|
-
{
|
|
4736
|
-
"version": 1,
|
|
4737
|
-
"context_length": 16384,
|
|
4738
|
-
"model_name": "InternVL2.5",
|
|
3800
|
+
"context_length": 8192,
|
|
3801
|
+
"model_name": "InternVL3",
|
|
4739
3802
|
"model_lang": [
|
|
4740
|
-
|
|
4741
|
-
|
|
3803
|
+
"en",
|
|
3804
|
+
"zh"
|
|
4742
3805
|
],
|
|
4743
3806
|
"model_ability": [
|
|
4744
|
-
|
|
4745
|
-
|
|
3807
|
+
"chat",
|
|
3808
|
+
"vision"
|
|
4746
3809
|
],
|
|
4747
|
-
"model_description": "
|
|
3810
|
+
"model_description": "InternVL3, an advanced multimodal large language model (MLLM) series that demonstrates superior overall performance.",
|
|
4748
3811
|
"model_specs": [
|
|
4749
3812
|
{
|
|
4750
3813
|
"model_format": "pytorch",
|
|
4751
3814
|
"model_size_in_billions": 1,
|
|
4752
3815
|
"quantizations": [
|
|
4753
|
-
"4-bit",
|
|
4754
|
-
"8-bit",
|
|
4755
3816
|
"none"
|
|
4756
3817
|
],
|
|
3818
|
+
"model_id": "OpenGVLab/InternVL3-1B",
|
|
4757
3819
|
"model_hub": "modelscope",
|
|
4758
|
-
"model_id": "OpenGVLab/InternVL2_5-1B",
|
|
4759
3820
|
"model_revision": "master"
|
|
4760
3821
|
},
|
|
4761
3822
|
{
|
|
4762
|
-
"model_format": "
|
|
4763
|
-
"model_size_in_billions":
|
|
3823
|
+
"model_format": "awq",
|
|
3824
|
+
"model_size_in_billions": 1,
|
|
4764
3825
|
"quantizations": [
|
|
4765
|
-
"
|
|
4766
|
-
"8-bit",
|
|
4767
|
-
"none"
|
|
3826
|
+
"Int4"
|
|
4768
3827
|
],
|
|
4769
|
-
"
|
|
4770
|
-
"
|
|
4771
|
-
"model_revision": "master"
|
|
3828
|
+
"model_id": "OpenGVLab/InternVL3-1B-AWQ",
|
|
3829
|
+
"model_hub": "modelscope"
|
|
4772
3830
|
},
|
|
4773
3831
|
{
|
|
4774
3832
|
"model_format": "pytorch",
|
|
4775
|
-
"model_size_in_billions":
|
|
3833
|
+
"model_size_in_billions": 2,
|
|
4776
3834
|
"quantizations": [
|
|
4777
|
-
"4-bit",
|
|
4778
|
-
"8-bit",
|
|
4779
3835
|
"none"
|
|
4780
3836
|
],
|
|
3837
|
+
"model_id": "OpenGVLab/InternVL3-2B",
|
|
4781
3838
|
"model_hub": "modelscope",
|
|
4782
|
-
"model_id": "OpenGVLab/InternVL2_5-4B",
|
|
4783
3839
|
"model_revision": "master"
|
|
4784
3840
|
},
|
|
4785
3841
|
{
|
|
4786
3842
|
"model_format": "awq",
|
|
4787
|
-
"model_size_in_billions":
|
|
3843
|
+
"model_size_in_billions": 2,
|
|
4788
3844
|
"quantizations": [
|
|
4789
3845
|
"Int4"
|
|
4790
3846
|
],
|
|
4791
|
-
"
|
|
4792
|
-
"
|
|
4793
|
-
"model_revision": "master"
|
|
3847
|
+
"model_id": "OpenGVLab/InternVL3-2B-AWQ",
|
|
3848
|
+
"model_hub": "modelscope"
|
|
4794
3849
|
},
|
|
4795
3850
|
{
|
|
4796
3851
|
"model_format": "pytorch",
|
|
4797
3852
|
"model_size_in_billions": 8,
|
|
4798
3853
|
"quantizations": [
|
|
4799
|
-
"4-bit",
|
|
4800
|
-
"8-bit",
|
|
4801
3854
|
"none"
|
|
4802
3855
|
],
|
|
3856
|
+
"model_id": "OpenGVLab/InternVL3-8B",
|
|
4803
3857
|
"model_hub": "modelscope",
|
|
4804
|
-
"model_id": "OpenGVLab/InternVL2_5-8B",
|
|
4805
3858
|
"model_revision": "master"
|
|
4806
3859
|
},
|
|
4807
3860
|
{
|
|
@@ -4810,353 +3863,51 @@
|
|
|
4810
3863
|
"quantizations": [
|
|
4811
3864
|
"Int4"
|
|
4812
3865
|
],
|
|
4813
|
-
"
|
|
4814
|
-
"
|
|
4815
|
-
"model_revision": "master"
|
|
3866
|
+
"model_id": "OpenGVLab/InternVL3-8B-AWQ",
|
|
3867
|
+
"model_hub": "modelscope"
|
|
4816
3868
|
},
|
|
4817
3869
|
{
|
|
4818
3870
|
"model_format": "pytorch",
|
|
4819
|
-
"model_size_in_billions":
|
|
3871
|
+
"model_size_in_billions": 9,
|
|
4820
3872
|
"quantizations": [
|
|
4821
|
-
"4-bit",
|
|
4822
|
-
"8-bit",
|
|
4823
3873
|
"none"
|
|
4824
3874
|
],
|
|
3875
|
+
"model_id": "OpenGVLab/InternVL3-9B",
|
|
4825
3876
|
"model_hub": "modelscope",
|
|
4826
|
-
"model_id": "OpenGVLab/InternVL2_5-26B",
|
|
4827
3877
|
"model_revision": "master"
|
|
4828
3878
|
},
|
|
4829
3879
|
{
|
|
4830
3880
|
"model_format": "awq",
|
|
4831
|
-
"model_size_in_billions":
|
|
3881
|
+
"model_size_in_billions": 9,
|
|
4832
3882
|
"quantizations": [
|
|
4833
3883
|
"Int4"
|
|
4834
3884
|
],
|
|
4835
|
-
"
|
|
4836
|
-
"
|
|
4837
|
-
"model_revision": "master"
|
|
3885
|
+
"model_id": "OpenGVLab/InternVL3-9B-AWQ",
|
|
3886
|
+
"model_hub": "modelscope"
|
|
4838
3887
|
},
|
|
4839
3888
|
{
|
|
4840
3889
|
"model_format": "pytorch",
|
|
4841
|
-
"model_size_in_billions":
|
|
3890
|
+
"model_size_in_billions": 14,
|
|
4842
3891
|
"quantizations": [
|
|
4843
|
-
"4-bit",
|
|
4844
|
-
"8-bit",
|
|
4845
3892
|
"none"
|
|
4846
3893
|
],
|
|
3894
|
+
"model_id": "OpenGVLab/InternVL3-14B",
|
|
4847
3895
|
"model_hub": "modelscope",
|
|
4848
|
-
"model_id": "OpenGVLab/InternVL2_5-38B",
|
|
4849
3896
|
"model_revision": "master"
|
|
4850
3897
|
},
|
|
4851
3898
|
{
|
|
4852
3899
|
"model_format": "awq",
|
|
4853
|
-
"model_size_in_billions":
|
|
3900
|
+
"model_size_in_billions": 14,
|
|
4854
3901
|
"quantizations": [
|
|
4855
3902
|
"Int4"
|
|
4856
3903
|
],
|
|
4857
|
-
"
|
|
4858
|
-
"
|
|
4859
|
-
"model_revision": "master"
|
|
4860
|
-
},
|
|
4861
|
-
{
|
|
4862
|
-
"model_format": "pytorch",
|
|
4863
|
-
"model_size_in_billions": 78,
|
|
4864
|
-
"quantizations": [
|
|
4865
|
-
"4-bit",
|
|
4866
|
-
"8-bit",
|
|
4867
|
-
"none"
|
|
4868
|
-
],
|
|
4869
|
-
"model_hub": "modelscope",
|
|
4870
|
-
"model_id": "OpenGVLab/InternVL2_5-78B",
|
|
4871
|
-
"model_revision": "master"
|
|
4872
|
-
},
|
|
4873
|
-
{
|
|
4874
|
-
"model_format": "awq",
|
|
4875
|
-
"model_size_in_billions": 78,
|
|
4876
|
-
"quantizations": [
|
|
4877
|
-
"Int4"
|
|
4878
|
-
],
|
|
4879
|
-
"model_hub": "modelscope",
|
|
4880
|
-
"model_id": "OpenGVLab/InternVL2_5-78B-AWQ",
|
|
4881
|
-
"model_revision": "master"
|
|
4882
|
-
}
|
|
4883
|
-
],
|
|
4884
|
-
"chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
|
|
4885
|
-
"stop_token_ids": [],
|
|
4886
|
-
"stop": []
|
|
4887
|
-
},
|
|
4888
|
-
{
|
|
4889
|
-
"version": 1,
|
|
4890
|
-
"context_length": 16384,
|
|
4891
|
-
"model_name": "InternVL2.5-MPO",
|
|
4892
|
-
"model_lang": [
|
|
4893
|
-
"en",
|
|
4894
|
-
"zh"
|
|
4895
|
-
],
|
|
4896
|
-
"model_ability": [
|
|
4897
|
-
"chat",
|
|
4898
|
-
"vision"
|
|
4899
|
-
],
|
|
4900
|
-
"model_description": "InternVL 2.5 is an open-source multimodal large language model (MLLM) to bridge the capability gap between open-source and proprietary commercial models in multimodal understanding. ",
|
|
4901
|
-
"model_specs": [
|
|
4902
|
-
{
|
|
4903
|
-
"model_format": "pytorch",
|
|
4904
|
-
"model_size_in_billions": 1,
|
|
4905
|
-
"quantizations": [
|
|
4906
|
-
"4-bit",
|
|
4907
|
-
"8-bit",
|
|
4908
|
-
"none"
|
|
4909
|
-
],
|
|
4910
|
-
"model_hub": "modelscope",
|
|
4911
|
-
"model_id": "OpenGVLab/InternVL2_5-1B-MPO",
|
|
4912
|
-
"model_revision": "master"
|
|
4913
|
-
},
|
|
4914
|
-
{
|
|
4915
|
-
"model_format": "pytorch",
|
|
4916
|
-
"model_size_in_billions": 2,
|
|
4917
|
-
"quantizations": [
|
|
4918
|
-
"4-bit",
|
|
4919
|
-
"8-bit",
|
|
4920
|
-
"none"
|
|
4921
|
-
],
|
|
4922
|
-
"model_hub": "modelscope",
|
|
4923
|
-
"model_id": "OpenGVLab/InternVL2_5-2B-MPO",
|
|
4924
|
-
"model_revision": "master"
|
|
4925
|
-
},
|
|
4926
|
-
{
|
|
4927
|
-
"model_format": "pytorch",
|
|
4928
|
-
"model_size_in_billions": 4,
|
|
4929
|
-
"quantizations": [
|
|
4930
|
-
"4-bit",
|
|
4931
|
-
"8-bit",
|
|
4932
|
-
"none"
|
|
4933
|
-
],
|
|
4934
|
-
"model_hub": "modelscope",
|
|
4935
|
-
"model_id": "OpenGVLab/InternVL2_5-4B-MPO",
|
|
4936
|
-
"model_revision": "master"
|
|
4937
|
-
},
|
|
4938
|
-
{
|
|
4939
|
-
"model_format": "awq",
|
|
4940
|
-
"model_size_in_billions": 4,
|
|
4941
|
-
"quantizations": [
|
|
4942
|
-
"Int4"
|
|
4943
|
-
],
|
|
4944
|
-
"model_hub": "modelscope",
|
|
4945
|
-
"model_id": "OpenGVLab/InternVL2_5-4B-MPO-AWQ",
|
|
4946
|
-
"model_revision": "master"
|
|
4947
|
-
},
|
|
4948
|
-
{
|
|
4949
|
-
"model_format": "pytorch",
|
|
4950
|
-
"model_size_in_billions": 8,
|
|
4951
|
-
"quantizations": [
|
|
4952
|
-
"4-bit",
|
|
4953
|
-
"8-bit",
|
|
4954
|
-
"none"
|
|
4955
|
-
],
|
|
4956
|
-
"model_hub": "modelscope",
|
|
4957
|
-
"model_id": "OpenGVLab/InternVL2_5-8B-MPO",
|
|
4958
|
-
"model_revision": "master"
|
|
4959
|
-
},
|
|
4960
|
-
{
|
|
4961
|
-
"model_format": "awq",
|
|
4962
|
-
"model_size_in_billions": 8,
|
|
4963
|
-
"quantizations": [
|
|
4964
|
-
"Int4"
|
|
4965
|
-
],
|
|
4966
|
-
"model_hub": "modelscope",
|
|
4967
|
-
"model_id": "OpenGVLab/InternVL2_5-8B-MPO-AWQ",
|
|
4968
|
-
"model_revision": "master"
|
|
4969
|
-
},
|
|
4970
|
-
{
|
|
4971
|
-
"model_format": "pytorch",
|
|
4972
|
-
"model_size_in_billions": 26,
|
|
4973
|
-
"quantizations": [
|
|
4974
|
-
"4-bit",
|
|
4975
|
-
"8-bit",
|
|
4976
|
-
"none"
|
|
4977
|
-
],
|
|
4978
|
-
"model_hub": "modelscope",
|
|
4979
|
-
"model_id": "OpenGVLab/InternVL2_5-26B-MPO",
|
|
4980
|
-
"model_revision": "master"
|
|
4981
|
-
},
|
|
4982
|
-
{
|
|
4983
|
-
"model_format": "awq",
|
|
4984
|
-
"model_size_in_billions": 26,
|
|
4985
|
-
"quantizations": [
|
|
4986
|
-
"Int4"
|
|
4987
|
-
],
|
|
4988
|
-
"model_hub": "modelscope",
|
|
4989
|
-
"model_id": "OpenGVLab/InternVL2_5-26B-MPO-AWQ",
|
|
4990
|
-
"model_revision": "master"
|
|
4991
|
-
},
|
|
4992
|
-
{
|
|
4993
|
-
"model_format": "pytorch",
|
|
4994
|
-
"model_size_in_billions": 38,
|
|
4995
|
-
"quantizations": [
|
|
4996
|
-
"4-bit",
|
|
4997
|
-
"8-bit",
|
|
4998
|
-
"none"
|
|
4999
|
-
],
|
|
5000
|
-
"model_hub": "modelscope",
|
|
5001
|
-
"model_id": "OpenGVLab/InternVL2_5-38B-MPO",
|
|
5002
|
-
"model_revision": "master"
|
|
5003
|
-
},
|
|
5004
|
-
{
|
|
5005
|
-
"model_format": "awq",
|
|
5006
|
-
"model_size_in_billions": 38,
|
|
5007
|
-
"quantizations": [
|
|
5008
|
-
"Int4"
|
|
5009
|
-
],
|
|
5010
|
-
"model_hub": "modelscope",
|
|
5011
|
-
"model_id": "OpenGVLab/InternVL2_5-38B-MPO-AWQ",
|
|
5012
|
-
"model_revision": "master"
|
|
5013
|
-
},
|
|
5014
|
-
{
|
|
5015
|
-
"model_format": "pytorch",
|
|
5016
|
-
"model_size_in_billions": 78,
|
|
5017
|
-
"quantizations": [
|
|
5018
|
-
"4-bit",
|
|
5019
|
-
"8-bit",
|
|
5020
|
-
"none"
|
|
5021
|
-
],
|
|
5022
|
-
"model_hub": "modelscope",
|
|
5023
|
-
"model_id": "OpenGVLab/InternVL2_5-78B-MPO",
|
|
5024
|
-
"model_revision": "master"
|
|
5025
|
-
},
|
|
5026
|
-
{
|
|
5027
|
-
"model_format": "awq",
|
|
5028
|
-
"model_size_in_billions": 78,
|
|
5029
|
-
"quantizations": [
|
|
5030
|
-
"Int4"
|
|
5031
|
-
],
|
|
5032
|
-
"model_hub": "modelscope",
|
|
5033
|
-
"model_id": "OpenGVLab/InternVL2_5-78B-MPO-AWQ",
|
|
5034
|
-
"model_revision": "master"
|
|
5035
|
-
}
|
|
5036
|
-
],
|
|
5037
|
-
"chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
|
|
5038
|
-
"stop_token_ids": [],
|
|
5039
|
-
"stop": []
|
|
5040
|
-
},
|
|
5041
|
-
{
|
|
5042
|
-
"version": 1,
|
|
5043
|
-
"context_length": 8192,
|
|
5044
|
-
"model_name": "InternVL3",
|
|
5045
|
-
"model_lang": [
|
|
5046
|
-
"en",
|
|
5047
|
-
"zh"
|
|
5048
|
-
],
|
|
5049
|
-
"model_ability": [
|
|
5050
|
-
"chat",
|
|
5051
|
-
"vision"
|
|
5052
|
-
],
|
|
5053
|
-
"model_description": "InternVL3, an advanced multimodal large language model (MLLM) series that demonstrates superior overall performance.",
|
|
5054
|
-
"model_specs": [
|
|
5055
|
-
{
|
|
5056
|
-
"model_format": "pytorch",
|
|
5057
|
-
"model_size_in_billions": 1,
|
|
5058
|
-
"quantizations": [
|
|
5059
|
-
"8-bit",
|
|
5060
|
-
"none"
|
|
5061
|
-
],
|
|
5062
|
-
"model_id": "OpenGVLab/InternVL3-1B",
|
|
5063
|
-
"model_hub": "modelscope",
|
|
5064
|
-
"model_revision": "master"
|
|
5065
|
-
},
|
|
5066
|
-
{
|
|
5067
|
-
"model_format": "awq",
|
|
5068
|
-
"model_size_in_billions": 1,
|
|
5069
|
-
"quantizations": [
|
|
5070
|
-
"Int4"
|
|
5071
|
-
],
|
|
5072
|
-
"model_id": "OpenGVLab/InternVL3-1B-AWQ",
|
|
5073
|
-
"model_hub": "modelscope"
|
|
5074
|
-
},
|
|
5075
|
-
{
|
|
5076
|
-
"model_format": "pytorch",
|
|
5077
|
-
"model_size_in_billions": 2,
|
|
5078
|
-
"quantizations": [
|
|
5079
|
-
"8-bit",
|
|
5080
|
-
"none"
|
|
5081
|
-
],
|
|
5082
|
-
"model_id": "OpenGVLab/InternVL3-2B",
|
|
5083
|
-
"model_hub": "modelscope",
|
|
5084
|
-
"model_revision": "master"
|
|
5085
|
-
},
|
|
5086
|
-
{
|
|
5087
|
-
"model_format": "awq",
|
|
5088
|
-
"model_size_in_billions": 2,
|
|
5089
|
-
"quantizations": [
|
|
5090
|
-
"Int4"
|
|
5091
|
-
],
|
|
5092
|
-
"model_id": "OpenGVLab/InternVL3-2B-AWQ",
|
|
5093
|
-
"model_hub": "modelscope"
|
|
5094
|
-
},
|
|
5095
|
-
{
|
|
5096
|
-
"model_format": "pytorch",
|
|
5097
|
-
"model_size_in_billions": 8,
|
|
5098
|
-
"quantizations": [
|
|
5099
|
-
"8-bit",
|
|
5100
|
-
"none"
|
|
5101
|
-
],
|
|
5102
|
-
"model_id": "OpenGVLab/InternVL3-8B",
|
|
5103
|
-
"model_hub": "modelscope",
|
|
5104
|
-
"model_revision": "master"
|
|
5105
|
-
},
|
|
5106
|
-
{
|
|
5107
|
-
"model_format": "awq",
|
|
5108
|
-
"model_size_in_billions": 8,
|
|
5109
|
-
"quantizations": [
|
|
5110
|
-
"Int4"
|
|
5111
|
-
],
|
|
5112
|
-
"model_id": "OpenGVLab/InternVL3-8B-AWQ",
|
|
5113
|
-
"model_hub": "modelscope"
|
|
5114
|
-
},
|
|
5115
|
-
{
|
|
5116
|
-
"model_format": "pytorch",
|
|
5117
|
-
"model_size_in_billions": 9,
|
|
5118
|
-
"quantizations": [
|
|
5119
|
-
"8-bit",
|
|
5120
|
-
"none"
|
|
5121
|
-
],
|
|
5122
|
-
"model_id": "OpenGVLab/InternVL3-9B",
|
|
5123
|
-
"model_hub": "modelscope",
|
|
5124
|
-
"model_revision": "master"
|
|
5125
|
-
},
|
|
5126
|
-
{
|
|
5127
|
-
"model_format": "awq",
|
|
5128
|
-
"model_size_in_billions": 9,
|
|
5129
|
-
"quantizations": [
|
|
5130
|
-
"Int4"
|
|
5131
|
-
],
|
|
5132
|
-
"model_id": "OpenGVLab/InternVL3-9B-AWQ",
|
|
5133
|
-
"model_hub": "modelscope"
|
|
5134
|
-
},
|
|
5135
|
-
{
|
|
5136
|
-
"model_format": "pytorch",
|
|
5137
|
-
"model_size_in_billions": 14,
|
|
5138
|
-
"quantizations": [
|
|
5139
|
-
"8-bit",
|
|
5140
|
-
"none"
|
|
5141
|
-
],
|
|
5142
|
-
"model_id": "OpenGVLab/InternVL3-14B",
|
|
5143
|
-
"model_hub": "modelscope",
|
|
5144
|
-
"model_revision": "master"
|
|
5145
|
-
},
|
|
5146
|
-
{
|
|
5147
|
-
"model_format": "awq",
|
|
5148
|
-
"model_size_in_billions": 14,
|
|
5149
|
-
"quantizations": [
|
|
5150
|
-
"Int4"
|
|
5151
|
-
],
|
|
5152
|
-
"model_id": "OpenGVLab/InternVL3-14B-AWQ",
|
|
5153
|
-
"model_hub": "modelscope"
|
|
3904
|
+
"model_id": "OpenGVLab/InternVL3-14B-AWQ",
|
|
3905
|
+
"model_hub": "modelscope"
|
|
5154
3906
|
},
|
|
5155
3907
|
{
|
|
5156
3908
|
"model_format": "pytorch",
|
|
5157
3909
|
"model_size_in_billions": 38,
|
|
5158
3910
|
"quantizations": [
|
|
5159
|
-
"8-bit",
|
|
5160
3911
|
"none"
|
|
5161
3912
|
],
|
|
5162
3913
|
"model_id": "OpenGVLab/InternVL3-38B",
|
|
@@ -5176,7 +3927,6 @@
|
|
|
5176
3927
|
"model_format": "pytorch",
|
|
5177
3928
|
"model_size_in_billions": 78,
|
|
5178
3929
|
"quantizations": [
|
|
5179
|
-
"8-bit",
|
|
5180
3930
|
"none"
|
|
5181
3931
|
],
|
|
5182
3932
|
"model_id": "OpenGVLab/InternVL3-78B",
|
|
@@ -5206,12 +3956,12 @@
|
|
|
5206
3956
|
"context_length": 8192,
|
|
5207
3957
|
"model_name": "cogvlm2",
|
|
5208
3958
|
"model_lang": [
|
|
5209
|
-
|
|
5210
|
-
|
|
3959
|
+
"en",
|
|
3960
|
+
"zh"
|
|
5211
3961
|
],
|
|
5212
3962
|
"model_ability": [
|
|
5213
|
-
|
|
5214
|
-
|
|
3963
|
+
"chat",
|
|
3964
|
+
"vision"
|
|
5215
3965
|
],
|
|
5216
3966
|
"model_description": "CogVLM2 have achieved good results in many lists compared to the previous generation of CogVLM open source models. Its excellent performance can compete with some non-open source models.",
|
|
5217
3967
|
"model_specs": [
|
|
@@ -5229,7 +3979,7 @@
|
|
|
5229
3979
|
"model_format": "pytorch",
|
|
5230
3980
|
"model_size_in_billions": 20,
|
|
5231
3981
|
"quantizations": [
|
|
5232
|
-
"
|
|
3982
|
+
"none"
|
|
5233
3983
|
],
|
|
5234
3984
|
"model_hub": "modelscope",
|
|
5235
3985
|
"model_id": "ZhipuAI/cogvlm2-llama3-chinese-chat-19B-{quantization}",
|
|
@@ -5251,12 +4001,12 @@
|
|
|
5251
4001
|
"context_length": 8192,
|
|
5252
4002
|
"model_name": "cogvlm2-video-llama3-chat",
|
|
5253
4003
|
"model_lang": [
|
|
5254
|
-
|
|
5255
|
-
|
|
4004
|
+
"en",
|
|
4005
|
+
"zh"
|
|
5256
4006
|
],
|
|
5257
4007
|
"model_ability": [
|
|
5258
|
-
|
|
5259
|
-
|
|
4008
|
+
"chat",
|
|
4009
|
+
"vision"
|
|
5260
4010
|
],
|
|
5261
4011
|
"model_description": "CogVLM2-Video achieves state-of-the-art performance on multiple video question answering tasks.",
|
|
5262
4012
|
"model_specs": [
|
|
@@ -5264,8 +4014,6 @@
|
|
|
5264
4014
|
"model_format": "pytorch",
|
|
5265
4015
|
"model_size_in_billions": 12,
|
|
5266
4016
|
"quantizations": [
|
|
5267
|
-
"4-bit",
|
|
5268
|
-
"8-bit",
|
|
5269
4017
|
"none"
|
|
5270
4018
|
],
|
|
5271
4019
|
"model_hub": "modelscope",
|
|
@@ -5300,8 +4048,6 @@
|
|
|
5300
4048
|
"model_format": "pytorch",
|
|
5301
4049
|
"model_size_in_billions": 7,
|
|
5302
4050
|
"quantizations": [
|
|
5303
|
-
"4-bit",
|
|
5304
|
-
"8-bit",
|
|
5305
4051
|
"none"
|
|
5306
4052
|
],
|
|
5307
4053
|
"model_id": "TeleAI/telechat-7B",
|
|
@@ -5323,8 +4069,6 @@
|
|
|
5323
4069
|
"model_format": "pytorch",
|
|
5324
4070
|
"model_size_in_billions": 12,
|
|
5325
4071
|
"quantizations": [
|
|
5326
|
-
"4-bit",
|
|
5327
|
-
"8-bit",
|
|
5328
4072
|
"none"
|
|
5329
4073
|
],
|
|
5330
4074
|
"model_id": "TeleAI/TeleChat-12B",
|
|
@@ -5346,8 +4090,6 @@
|
|
|
5346
4090
|
"model_format": "pytorch",
|
|
5347
4091
|
"model_size_in_billions": 52,
|
|
5348
4092
|
"quantizations": [
|
|
5349
|
-
"4-bit",
|
|
5350
|
-
"8-bit",
|
|
5351
4093
|
"none"
|
|
5352
4094
|
],
|
|
5353
4095
|
"model_id": "TeleAI/TeleChat-52B",
|
|
@@ -5378,146 +4120,146 @@
|
|
|
5378
4120
|
"vision"
|
|
5379
4121
|
],
|
|
5380
4122
|
"model_description": "Qwen2-VL: To See the World More Clearly.Qwen2-VL is the latest version of the vision language models in the Qwen model familities.",
|
|
5381
|
-
"model_specs":[
|
|
4123
|
+
"model_specs": [
|
|
5382
4124
|
{
|
|
5383
|
-
"model_format":"pytorch",
|
|
5384
|
-
"model_size_in_billions":7,
|
|
5385
|
-
"quantizations":[
|
|
4125
|
+
"model_format": "pytorch",
|
|
4126
|
+
"model_size_in_billions": 7,
|
|
4127
|
+
"quantizations": [
|
|
5386
4128
|
"none"
|
|
5387
4129
|
],
|
|
5388
4130
|
"model_hub": "modelscope",
|
|
5389
|
-
"model_id":"qwen/Qwen2-VL-7B-Instruct",
|
|
5390
|
-
"model_revision":"master"
|
|
4131
|
+
"model_id": "qwen/Qwen2-VL-7B-Instruct",
|
|
4132
|
+
"model_revision": "master"
|
|
5391
4133
|
},
|
|
5392
4134
|
{
|
|
5393
|
-
"model_format":"gptq",
|
|
5394
|
-
"model_size_in_billions":7,
|
|
5395
|
-
"quantizations":[
|
|
4135
|
+
"model_format": "gptq",
|
|
4136
|
+
"model_size_in_billions": 7,
|
|
4137
|
+
"quantizations": [
|
|
5396
4138
|
"Int8"
|
|
5397
4139
|
],
|
|
5398
4140
|
"model_hub": "modelscope",
|
|
5399
|
-
"model_id":"qwen/Qwen2-VL-7B-Instruct-GPTQ-Int8",
|
|
5400
|
-
"model_revision":"master"
|
|
4141
|
+
"model_id": "qwen/Qwen2-VL-7B-Instruct-GPTQ-Int8",
|
|
4142
|
+
"model_revision": "master"
|
|
5401
4143
|
},
|
|
5402
4144
|
{
|
|
5403
|
-
"model_format":"gptq",
|
|
5404
|
-
"model_size_in_billions":7,
|
|
5405
|
-
"quantizations":[
|
|
4145
|
+
"model_format": "gptq",
|
|
4146
|
+
"model_size_in_billions": 7,
|
|
4147
|
+
"quantizations": [
|
|
5406
4148
|
"Int4"
|
|
5407
4149
|
],
|
|
5408
4150
|
"model_hub": "modelscope",
|
|
5409
|
-
"model_id":"qwen/Qwen2-VL-7B-Instruct-GPTQ-Int4",
|
|
5410
|
-
"model_revision":"master"
|
|
4151
|
+
"model_id": "qwen/Qwen2-VL-7B-Instruct-GPTQ-Int4",
|
|
4152
|
+
"model_revision": "master"
|
|
5411
4153
|
},
|
|
5412
4154
|
{
|
|
5413
|
-
"model_format":"awq",
|
|
5414
|
-
"model_size_in_billions":7,
|
|
5415
|
-
"quantizations":[
|
|
4155
|
+
"model_format": "awq",
|
|
4156
|
+
"model_size_in_billions": 7,
|
|
4157
|
+
"quantizations": [
|
|
5416
4158
|
"Int4"
|
|
5417
4159
|
],
|
|
5418
4160
|
"model_hub": "modelscope",
|
|
5419
|
-
"model_id":"qwen/Qwen2-VL-7B-Instruct-AWQ",
|
|
5420
|
-
"model_revision":"master"
|
|
4161
|
+
"model_id": "qwen/Qwen2-VL-7B-Instruct-AWQ",
|
|
4162
|
+
"model_revision": "master"
|
|
5421
4163
|
},
|
|
5422
4164
|
{
|
|
5423
|
-
"model_format":"mlx",
|
|
5424
|
-
"model_size_in_billions":7,
|
|
5425
|
-
"quantizations":[
|
|
4165
|
+
"model_format": "mlx",
|
|
4166
|
+
"model_size_in_billions": 7,
|
|
4167
|
+
"quantizations": [
|
|
5426
4168
|
"8bit"
|
|
5427
4169
|
],
|
|
5428
4170
|
"model_hub": "modelscope",
|
|
5429
|
-
"model_id":"okwinds/Qwen2-VL-7B-Instruct-MLX-8bit",
|
|
5430
|
-
"model_revision":"master"
|
|
4171
|
+
"model_id": "okwinds/Qwen2-VL-7B-Instruct-MLX-8bit",
|
|
4172
|
+
"model_revision": "master"
|
|
5431
4173
|
},
|
|
5432
4174
|
{
|
|
5433
|
-
"model_format":"pytorch",
|
|
5434
|
-
"model_size_in_billions":2,
|
|
5435
|
-
"quantizations":[
|
|
4175
|
+
"model_format": "pytorch",
|
|
4176
|
+
"model_size_in_billions": 2,
|
|
4177
|
+
"quantizations": [
|
|
5436
4178
|
"none"
|
|
5437
4179
|
],
|
|
5438
4180
|
"model_hub": "modelscope",
|
|
5439
|
-
"model_id":"qwen/Qwen2-VL-2B-Instruct",
|
|
5440
|
-
"model_revision":"master"
|
|
4181
|
+
"model_id": "qwen/Qwen2-VL-2B-Instruct",
|
|
4182
|
+
"model_revision": "master"
|
|
5441
4183
|
},
|
|
5442
4184
|
{
|
|
5443
|
-
"model_format":"gptq",
|
|
5444
|
-
"model_size_in_billions":2,
|
|
5445
|
-
"quantizations":[
|
|
4185
|
+
"model_format": "gptq",
|
|
4186
|
+
"model_size_in_billions": 2,
|
|
4187
|
+
"quantizations": [
|
|
5446
4188
|
"Int8"
|
|
5447
4189
|
],
|
|
5448
4190
|
"model_hub": "modelscope",
|
|
5449
|
-
"model_id":"qwen/Qwen2-VL-2B-Instruct-GPTQ-Int8",
|
|
5450
|
-
"model_revision":"master"
|
|
4191
|
+
"model_id": "qwen/Qwen2-VL-2B-Instruct-GPTQ-Int8",
|
|
4192
|
+
"model_revision": "master"
|
|
5451
4193
|
},
|
|
5452
4194
|
{
|
|
5453
|
-
"model_format":"gptq",
|
|
5454
|
-
"model_size_in_billions":2,
|
|
5455
|
-
"quantizations":[
|
|
4195
|
+
"model_format": "gptq",
|
|
4196
|
+
"model_size_in_billions": 2,
|
|
4197
|
+
"quantizations": [
|
|
5456
4198
|
"Int4"
|
|
5457
4199
|
],
|
|
5458
4200
|
"model_hub": "modelscope",
|
|
5459
|
-
"model_id":"qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4",
|
|
5460
|
-
"model_revision":"master"
|
|
4201
|
+
"model_id": "qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4",
|
|
4202
|
+
"model_revision": "master"
|
|
5461
4203
|
},
|
|
5462
4204
|
{
|
|
5463
|
-
"model_format":"awq",
|
|
5464
|
-
"model_size_in_billions":2,
|
|
5465
|
-
"quantizations":[
|
|
4205
|
+
"model_format": "awq",
|
|
4206
|
+
"model_size_in_billions": 2,
|
|
4207
|
+
"quantizations": [
|
|
5466
4208
|
"Int4"
|
|
5467
4209
|
],
|
|
5468
4210
|
"model_hub": "modelscope",
|
|
5469
|
-
"model_id":"qwen/Qwen2-VL-2B-Instruct-AWQ",
|
|
5470
|
-
"model_revision":"master"
|
|
4211
|
+
"model_id": "qwen/Qwen2-VL-2B-Instruct-AWQ",
|
|
4212
|
+
"model_revision": "master"
|
|
5471
4213
|
},
|
|
5472
4214
|
{
|
|
5473
|
-
"model_format":"mlx",
|
|
5474
|
-
"model_size_in_billions":2,
|
|
5475
|
-
"quantizations":[
|
|
4215
|
+
"model_format": "mlx",
|
|
4216
|
+
"model_size_in_billions": 2,
|
|
4217
|
+
"quantizations": [
|
|
5476
4218
|
"4bit",
|
|
5477
4219
|
"8bit"
|
|
5478
4220
|
],
|
|
5479
4221
|
"model_hub": "modelscope",
|
|
5480
|
-
"model_id":"mlx-community/Qwen2-VL-2B-Instruct-{quantization}",
|
|
5481
|
-
"model_revision":"master"
|
|
4222
|
+
"model_id": "mlx-community/Qwen2-VL-2B-Instruct-{quantization}",
|
|
4223
|
+
"model_revision": "master"
|
|
5482
4224
|
},
|
|
5483
4225
|
{
|
|
5484
|
-
"model_format":"pytorch",
|
|
5485
|
-
"model_size_in_billions":72,
|
|
5486
|
-
"quantizations":[
|
|
4226
|
+
"model_format": "pytorch",
|
|
4227
|
+
"model_size_in_billions": 72,
|
|
4228
|
+
"quantizations": [
|
|
5487
4229
|
"none"
|
|
5488
4230
|
],
|
|
5489
|
-
"model_id":"qwen/Qwen2-VL-72B-Instruct",
|
|
4231
|
+
"model_id": "qwen/Qwen2-VL-72B-Instruct",
|
|
5490
4232
|
"model_hub": "modelscope"
|
|
5491
4233
|
},
|
|
5492
4234
|
{
|
|
5493
|
-
"model_format":"awq",
|
|
5494
|
-
"model_size_in_billions":72,
|
|
5495
|
-
"quantizations":[
|
|
4235
|
+
"model_format": "awq",
|
|
4236
|
+
"model_size_in_billions": 72,
|
|
4237
|
+
"quantizations": [
|
|
5496
4238
|
"Int4"
|
|
5497
4239
|
],
|
|
5498
|
-
"model_id":"qwen/Qwen2-VL-72B-Instruct-AWQ",
|
|
4240
|
+
"model_id": "qwen/Qwen2-VL-72B-Instruct-AWQ",
|
|
5499
4241
|
"model_hub": "modelscope"
|
|
5500
4242
|
},
|
|
5501
4243
|
{
|
|
5502
|
-
"model_format":"gptq",
|
|
5503
|
-
"model_size_in_billions":72,
|
|
5504
|
-
"quantizations":[
|
|
4244
|
+
"model_format": "gptq",
|
|
4245
|
+
"model_size_in_billions": 72,
|
|
4246
|
+
"quantizations": [
|
|
5505
4247
|
"Int4",
|
|
5506
4248
|
"Int8"
|
|
5507
4249
|
],
|
|
5508
|
-
"model_id":"qwen/Qwen2-VL-72B-Instruct-GPTQ-{quantization}",
|
|
4250
|
+
"model_id": "qwen/Qwen2-VL-72B-Instruct-GPTQ-{quantization}",
|
|
5509
4251
|
"model_hub": "modelscope"
|
|
5510
4252
|
},
|
|
5511
4253
|
{
|
|
5512
|
-
"model_format":"mlx",
|
|
5513
|
-
"model_size_in_billions":72,
|
|
5514
|
-
"quantizations":[
|
|
4254
|
+
"model_format": "mlx",
|
|
4255
|
+
"model_size_in_billions": 72,
|
|
4256
|
+
"quantizations": [
|
|
5515
4257
|
"4bit",
|
|
5516
4258
|
"8bit"
|
|
5517
4259
|
],
|
|
5518
4260
|
"model_hub": "modelscope",
|
|
5519
|
-
"model_id":"okwinds/Qwen2-VL-72B-Instruct-MLX-{quantization}",
|
|
5520
|
-
"model_revision":"master"
|
|
4261
|
+
"model_id": "okwinds/Qwen2-VL-72B-Instruct-MLX-{quantization}",
|
|
4262
|
+
"model_revision": "master"
|
|
5521
4263
|
}
|
|
5522
4264
|
],
|
|
5523
4265
|
"chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
|
|
@@ -5531,95 +4273,95 @@
|
|
|
5531
4273
|
]
|
|
5532
4274
|
},
|
|
5533
4275
|
{
|
|
5534
|
-
"version":1,
|
|
5535
|
-
"context_length":128000,
|
|
5536
|
-
"model_name":"qwen2.5-vl-instruct",
|
|
5537
|
-
"model_lang":[
|
|
4276
|
+
"version": 1,
|
|
4277
|
+
"context_length": 128000,
|
|
4278
|
+
"model_name": "qwen2.5-vl-instruct",
|
|
4279
|
+
"model_lang": [
|
|
5538
4280
|
"en",
|
|
5539
4281
|
"zh"
|
|
5540
4282
|
],
|
|
5541
|
-
"model_ability":[
|
|
4283
|
+
"model_ability": [
|
|
5542
4284
|
"chat",
|
|
5543
4285
|
"vision"
|
|
5544
4286
|
],
|
|
5545
|
-
"model_description":"Qwen2.5-VL: Qwen2.5-VL is the latest version of the vision language models in the Qwen model familities.",
|
|
5546
|
-
"model_specs":[
|
|
4287
|
+
"model_description": "Qwen2.5-VL: Qwen2.5-VL is the latest version of the vision language models in the Qwen model familities.",
|
|
4288
|
+
"model_specs": [
|
|
5547
4289
|
{
|
|
5548
|
-
"model_format":"pytorch",
|
|
5549
|
-
"model_size_in_billions":3,
|
|
5550
|
-
"quantizations":[
|
|
4290
|
+
"model_format": "pytorch",
|
|
4291
|
+
"model_size_in_billions": 3,
|
|
4292
|
+
"quantizations": [
|
|
5551
4293
|
"none"
|
|
5552
4294
|
],
|
|
5553
4295
|
"model_hub": "modelscope",
|
|
5554
|
-
"model_id":"Qwen/Qwen2.5-VL-3B-Instruct"
|
|
4296
|
+
"model_id": "Qwen/Qwen2.5-VL-3B-Instruct"
|
|
5555
4297
|
},
|
|
5556
4298
|
{
|
|
5557
|
-
"model_format":"pytorch",
|
|
5558
|
-
"model_size_in_billions":7,
|
|
5559
|
-
"quantizations":[
|
|
4299
|
+
"model_format": "pytorch",
|
|
4300
|
+
"model_size_in_billions": 7,
|
|
4301
|
+
"quantizations": [
|
|
5560
4302
|
"none"
|
|
5561
4303
|
],
|
|
5562
4304
|
"model_hub": "modelscope",
|
|
5563
|
-
"model_id":"Qwen/Qwen2.5-VL-7B-Instruct"
|
|
4305
|
+
"model_id": "Qwen/Qwen2.5-VL-7B-Instruct"
|
|
5564
4306
|
},
|
|
5565
4307
|
{
|
|
5566
|
-
"model_format":"pytorch",
|
|
5567
|
-
"model_size_in_billions":32,
|
|
5568
|
-
"quantizations":[
|
|
4308
|
+
"model_format": "pytorch",
|
|
4309
|
+
"model_size_in_billions": 32,
|
|
4310
|
+
"quantizations": [
|
|
5569
4311
|
"none"
|
|
5570
4312
|
],
|
|
5571
4313
|
"model_hub": "modelscope",
|
|
5572
|
-
"model_id":"Qwen/Qwen2.5-VL-32B-Instruct"
|
|
4314
|
+
"model_id": "Qwen/Qwen2.5-VL-32B-Instruct"
|
|
5573
4315
|
},
|
|
5574
4316
|
{
|
|
5575
|
-
"model_format":"pytorch",
|
|
5576
|
-
"model_size_in_billions":72,
|
|
5577
|
-
"quantizations":[
|
|
4317
|
+
"model_format": "pytorch",
|
|
4318
|
+
"model_size_in_billions": 72,
|
|
4319
|
+
"quantizations": [
|
|
5578
4320
|
"none"
|
|
5579
4321
|
],
|
|
5580
4322
|
"model_hub": "modelscope",
|
|
5581
|
-
"model_id":"Qwen/Qwen2.5-VL-72B-Instruct"
|
|
4323
|
+
"model_id": "Qwen/Qwen2.5-VL-72B-Instruct"
|
|
5582
4324
|
},
|
|
5583
4325
|
{
|
|
5584
|
-
"model_format":"awq",
|
|
5585
|
-
"model_size_in_billions":3,
|
|
5586
|
-
"quantizations":[
|
|
4326
|
+
"model_format": "awq",
|
|
4327
|
+
"model_size_in_billions": 3,
|
|
4328
|
+
"quantizations": [
|
|
5587
4329
|
"Int4"
|
|
5588
4330
|
],
|
|
5589
4331
|
"model_hub": "modelscope",
|
|
5590
|
-
"model_id":"Qwen/Qwen2.5-VL-3B-Instruct-AWQ"
|
|
4332
|
+
"model_id": "Qwen/Qwen2.5-VL-3B-Instruct-AWQ"
|
|
5591
4333
|
},
|
|
5592
4334
|
{
|
|
5593
|
-
"model_format":"awq",
|
|
5594
|
-
"model_size_in_billions":7,
|
|
5595
|
-
"quantizations":[
|
|
4335
|
+
"model_format": "awq",
|
|
4336
|
+
"model_size_in_billions": 7,
|
|
4337
|
+
"quantizations": [
|
|
5596
4338
|
"Int4"
|
|
5597
4339
|
],
|
|
5598
4340
|
"model_hub": "modelscope",
|
|
5599
|
-
"model_id":"Qwen/Qwen2.5-VL-7B-Instruct-AWQ"
|
|
4341
|
+
"model_id": "Qwen/Qwen2.5-VL-7B-Instruct-AWQ"
|
|
5600
4342
|
},
|
|
5601
4343
|
{
|
|
5602
|
-
"model_format":"awq",
|
|
5603
|
-
"model_size_in_billions":32,
|
|
5604
|
-
"quantizations":[
|
|
4344
|
+
"model_format": "awq",
|
|
4345
|
+
"model_size_in_billions": 32,
|
|
4346
|
+
"quantizations": [
|
|
5605
4347
|
"Int4"
|
|
5606
4348
|
],
|
|
5607
4349
|
"model_hub": "modelscope",
|
|
5608
|
-
"model_id":"Qwen/Qwen2.5-VL-32B-Instruct-AWQ"
|
|
4350
|
+
"model_id": "Qwen/Qwen2.5-VL-32B-Instruct-AWQ"
|
|
5609
4351
|
},
|
|
5610
4352
|
{
|
|
5611
|
-
"model_format":"pytorch",
|
|
5612
|
-
"model_size_in_billions":72,
|
|
5613
|
-
"quantizations":[
|
|
5614
|
-
"
|
|
4353
|
+
"model_format": "pytorch",
|
|
4354
|
+
"model_size_in_billions": 72,
|
|
4355
|
+
"quantizations": [
|
|
4356
|
+
"none"
|
|
5615
4357
|
],
|
|
5616
4358
|
"model_hub": "modelscope",
|
|
5617
|
-
"model_id":"Qwen/Qwen2.5-VL-72B-Instruct-AWQ"
|
|
4359
|
+
"model_id": "Qwen/Qwen2.5-VL-72B-Instruct-AWQ"
|
|
5618
4360
|
},
|
|
5619
4361
|
{
|
|
5620
|
-
"model_format":"mlx",
|
|
5621
|
-
"model_size_in_billions":3,
|
|
5622
|
-
"quantizations":[
|
|
4362
|
+
"model_format": "mlx",
|
|
4363
|
+
"model_size_in_billions": 3,
|
|
4364
|
+
"quantizations": [
|
|
5623
4365
|
"3bit",
|
|
5624
4366
|
"4bit",
|
|
5625
4367
|
"6bit",
|
|
@@ -5627,12 +4369,12 @@
|
|
|
5627
4369
|
"bf16"
|
|
5628
4370
|
],
|
|
5629
4371
|
"model_hub": "modelscope",
|
|
5630
|
-
"model_id":"mlx-community/Qwen2.5-VL-3B-Instruct-{quantization}"
|
|
4372
|
+
"model_id": "mlx-community/Qwen2.5-VL-3B-Instruct-{quantization}"
|
|
5631
4373
|
},
|
|
5632
4374
|
{
|
|
5633
|
-
"model_format":"mlx",
|
|
5634
|
-
"model_size_in_billions":7,
|
|
5635
|
-
"quantizations":[
|
|
4375
|
+
"model_format": "mlx",
|
|
4376
|
+
"model_size_in_billions": 7,
|
|
4377
|
+
"quantizations": [
|
|
5636
4378
|
"3bit",
|
|
5637
4379
|
"4bit",
|
|
5638
4380
|
"6bit",
|
|
@@ -5640,12 +4382,12 @@
|
|
|
5640
4382
|
"bf16"
|
|
5641
4383
|
],
|
|
5642
4384
|
"model_hub": "modelscope",
|
|
5643
|
-
"model_id":"mlx-community/Qwen2.5-VL-7B-Instruct-{quantization}"
|
|
4385
|
+
"model_id": "mlx-community/Qwen2.5-VL-7B-Instruct-{quantization}"
|
|
5644
4386
|
},
|
|
5645
4387
|
{
|
|
5646
|
-
"model_format":"mlx",
|
|
5647
|
-
"model_size_in_billions":72,
|
|
5648
|
-
"quantizations":[
|
|
4388
|
+
"model_format": "mlx",
|
|
4389
|
+
"model_size_in_billions": 72,
|
|
4390
|
+
"quantizations": [
|
|
5649
4391
|
"3bit",
|
|
5650
4392
|
"4bit",
|
|
5651
4393
|
"6bit",
|
|
@@ -5653,7 +4395,7 @@
|
|
|
5653
4395
|
"bf16"
|
|
5654
4396
|
],
|
|
5655
4397
|
"model_hub": "modelscope",
|
|
5656
|
-
"model_id":"mlx-community/Qwen2.5-VL-72B-Instruct-{quantization}"
|
|
4398
|
+
"model_id": "mlx-community/Qwen2.5-VL-72B-Instruct-{quantization}"
|
|
5657
4399
|
}
|
|
5658
4400
|
],
|
|
5659
4401
|
"chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
|
|
@@ -5667,29 +4409,29 @@
|
|
|
5667
4409
|
]
|
|
5668
4410
|
},
|
|
5669
4411
|
{
|
|
5670
|
-
"version":1,
|
|
5671
|
-
"context_length":32768,
|
|
5672
|
-
"model_name":"qwen2.5-omni",
|
|
5673
|
-
"model_lang":[
|
|
4412
|
+
"version": 1,
|
|
4413
|
+
"context_length": 32768,
|
|
4414
|
+
"model_name": "qwen2.5-omni",
|
|
4415
|
+
"model_lang": [
|
|
5674
4416
|
"en",
|
|
5675
4417
|
"zh"
|
|
5676
4418
|
],
|
|
5677
|
-
"model_ability":[
|
|
4419
|
+
"model_ability": [
|
|
5678
4420
|
"chat",
|
|
5679
4421
|
"vision",
|
|
5680
4422
|
"audio",
|
|
5681
4423
|
"omni"
|
|
5682
4424
|
],
|
|
5683
|
-
"model_description":"Qwen2.5-Omni: the new flagship end-to-end multimodal model in the Qwen series.",
|
|
5684
|
-
"model_specs":[
|
|
4425
|
+
"model_description": "Qwen2.5-Omni: the new flagship end-to-end multimodal model in the Qwen series.",
|
|
4426
|
+
"model_specs": [
|
|
5685
4427
|
{
|
|
5686
|
-
"model_format":"pytorch",
|
|
5687
|
-
"model_size_in_billions":7,
|
|
5688
|
-
"quantizations":[
|
|
4428
|
+
"model_format": "pytorch",
|
|
4429
|
+
"model_size_in_billions": 7,
|
|
4430
|
+
"quantizations": [
|
|
5689
4431
|
"none"
|
|
5690
4432
|
],
|
|
5691
4433
|
"model_hub": "modelscope",
|
|
5692
|
-
"model_id":"Qwen/Qwen2.5-Omni-7B"
|
|
4434
|
+
"model_id": "Qwen/Qwen2.5-Omni-7B"
|
|
5693
4435
|
}
|
|
5694
4436
|
],
|
|
5695
4437
|
"chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
|
|
@@ -5838,8 +4580,6 @@
|
|
|
5838
4580
|
"model_format": "pytorch",
|
|
5839
4581
|
"model_size_in_billions": 16,
|
|
5840
4582
|
"quantizations": [
|
|
5841
|
-
"4-bit",
|
|
5842
|
-
"8-bit",
|
|
5843
4583
|
"none"
|
|
5844
4584
|
],
|
|
5845
4585
|
"model_id": "deepseek-ai/DeepSeek-V2-Lite",
|
|
@@ -5850,8 +4590,6 @@
|
|
|
5850
4590
|
"model_format": "pytorch",
|
|
5851
4591
|
"model_size_in_billions": 236,
|
|
5852
4592
|
"quantizations": [
|
|
5853
|
-
"4-bit",
|
|
5854
|
-
"8-bit",
|
|
5855
4593
|
"none"
|
|
5856
4594
|
],
|
|
5857
4595
|
"model_id": "deepseek-ai/DeepSeek-V2",
|
|
@@ -5877,8 +4615,6 @@
|
|
|
5877
4615
|
"model_format": "pytorch",
|
|
5878
4616
|
"model_size_in_billions": 16,
|
|
5879
4617
|
"quantizations": [
|
|
5880
|
-
"4-bit",
|
|
5881
|
-
"8-bit",
|
|
5882
4618
|
"none"
|
|
5883
4619
|
],
|
|
5884
4620
|
"model_id": "deepseek-ai/DeepSeek-V2-Lite-Chat",
|
|
@@ -5889,8 +4625,6 @@
|
|
|
5889
4625
|
"model_format": "pytorch",
|
|
5890
4626
|
"model_size_in_billions": 236,
|
|
5891
4627
|
"quantizations": [
|
|
5892
|
-
"4-bit",
|
|
5893
|
-
"8-bit",
|
|
5894
4628
|
"none"
|
|
5895
4629
|
],
|
|
5896
4630
|
"model_id": "deepseek-ai/DeepSeek-V2-Chat",
|
|
@@ -5923,8 +4657,6 @@
|
|
|
5923
4657
|
"model_format": "pytorch",
|
|
5924
4658
|
"model_size_in_billions": 236,
|
|
5925
4659
|
"quantizations": [
|
|
5926
|
-
"4-bit",
|
|
5927
|
-
"8-bit",
|
|
5928
4660
|
"none"
|
|
5929
4661
|
],
|
|
5930
4662
|
"model_id": "deepseek-ai/DeepSeek-V2-Chat-0628",
|
|
@@ -5957,8 +4689,6 @@
|
|
|
5957
4689
|
"model_format": "pytorch",
|
|
5958
4690
|
"model_size_in_billions": 236,
|
|
5959
4691
|
"quantizations": [
|
|
5960
|
-
"4-bit",
|
|
5961
|
-
"8-bit",
|
|
5962
4692
|
"none"
|
|
5963
4693
|
],
|
|
5964
4694
|
"model_id": "deepseek-ai/DeepSeek-V2.5",
|
|
@@ -5991,8 +4721,6 @@
|
|
|
5991
4721
|
"model_format": "pytorch",
|
|
5992
4722
|
"model_size_in_billions": 671,
|
|
5993
4723
|
"quantizations": [
|
|
5994
|
-
"4-bit",
|
|
5995
|
-
"8-bit",
|
|
5996
4724
|
"none"
|
|
5997
4725
|
],
|
|
5998
4726
|
"model_id": "deepseek-ai/DeepSeek-V3",
|
|
@@ -6143,8 +4871,6 @@
|
|
|
6143
4871
|
"model_format": "pytorch",
|
|
6144
4872
|
"model_size_in_billions": 671,
|
|
6145
4873
|
"quantizations": [
|
|
6146
|
-
"4-bit",
|
|
6147
|
-
"8-bit",
|
|
6148
4874
|
"none"
|
|
6149
4875
|
],
|
|
6150
4876
|
"model_id": "deepseek-ai/DeepSeek-R1",
|
|
@@ -6348,87 +5074,6 @@
|
|
|
6348
5074
|
"reasoning_start_tag": "<think>",
|
|
6349
5075
|
"reasoning_end_tag": "</think>"
|
|
6350
5076
|
},
|
|
6351
|
-
{
|
|
6352
|
-
"version": 1,
|
|
6353
|
-
"context_length": 131072,
|
|
6354
|
-
"model_name": "yi-coder-chat",
|
|
6355
|
-
"model_lang": [
|
|
6356
|
-
"en"
|
|
6357
|
-
],
|
|
6358
|
-
"model_ability": [
|
|
6359
|
-
"chat"
|
|
6360
|
-
],
|
|
6361
|
-
"model_description": "Yi-Coder is a series of open-source code language models that delivers state-of-the-art coding performance with fewer than 10 billion parameters.Excelling in long-context understanding with a maximum context length of 128K tokens.Supporting 52 major programming languages, including popular ones such as Java, Python, JavaScript, and C++.",
|
|
6362
|
-
"model_specs": [
|
|
6363
|
-
{
|
|
6364
|
-
"model_format": "pytorch",
|
|
6365
|
-
"model_size_in_billions": 9,
|
|
6366
|
-
"quantizations": [
|
|
6367
|
-
"none"
|
|
6368
|
-
],
|
|
6369
|
-
"model_hub": "modelscope",
|
|
6370
|
-
"model_id": "01ai/Yi-Coder-9B-Chat",
|
|
6371
|
-
"model_revision": "master"
|
|
6372
|
-
},
|
|
6373
|
-
{
|
|
6374
|
-
"model_format": "pytorch",
|
|
6375
|
-
"model_size_in_billions": "1_5",
|
|
6376
|
-
"quantizations": [
|
|
6377
|
-
"none"
|
|
6378
|
-
],
|
|
6379
|
-
"model_hub": "modelscope",
|
|
6380
|
-
"model_id": "01ai/Yi-Coder-1.5B-Chat",
|
|
6381
|
-
"model_revision": "master"
|
|
6382
|
-
}
|
|
6383
|
-
],
|
|
6384
|
-
"chat_template": "{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% if system_message is defined %}{{ '<|im_start|>system\n' + system_message + '<|im_end|>\n' }}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|im_start|>user\n' + content + '<|im_end|>\n<|im_start|>assistant\n' }}{% elif message['role'] == 'assistant' %}{{ content + '<|im_end|>' + '\n' }}{% endif %}{% endfor %}",
|
|
6385
|
-
"stop_token_ids": [
|
|
6386
|
-
1,
|
|
6387
|
-
2,
|
|
6388
|
-
6,
|
|
6389
|
-
7
|
|
6390
|
-
],
|
|
6391
|
-
"stop": [
|
|
6392
|
-
"<|startoftext|>",
|
|
6393
|
-
"<|endoftext|>",
|
|
6394
|
-
"<|im_start|>",
|
|
6395
|
-
"<|im_end|>"
|
|
6396
|
-
]
|
|
6397
|
-
},
|
|
6398
|
-
{
|
|
6399
|
-
"version": 1,
|
|
6400
|
-
"context_length": 131072,
|
|
6401
|
-
"model_name": "yi-coder",
|
|
6402
|
-
"model_lang": [
|
|
6403
|
-
"en"
|
|
6404
|
-
],
|
|
6405
|
-
"model_ability": [
|
|
6406
|
-
"generate"
|
|
6407
|
-
],
|
|
6408
|
-
"model_description": "Yi-Coder is a series of open-source code language models that delivers state-of-the-art coding performance with fewer than 10 billion parameters.Excelling in long-context understanding with a maximum context length of 128K tokens.Supporting 52 major programming languages, including popular ones such as Java, Python, JavaScript, and C++.",
|
|
6409
|
-
"model_specs": [
|
|
6410
|
-
{
|
|
6411
|
-
"model_format": "pytorch",
|
|
6412
|
-
"model_size_in_billions": 9,
|
|
6413
|
-
"quantizations": [
|
|
6414
|
-
"none"
|
|
6415
|
-
],
|
|
6416
|
-
"model_hub": "modelscope",
|
|
6417
|
-
"model_id": "01ai/Yi-Coder-9B",
|
|
6418
|
-
"model_revision": "master"
|
|
6419
|
-
},
|
|
6420
|
-
{
|
|
6421
|
-
"model_format": "pytorch",
|
|
6422
|
-
"model_size_in_billions": "1_5",
|
|
6423
|
-
"quantizations": [
|
|
6424
|
-
"none"
|
|
6425
|
-
],
|
|
6426
|
-
"model_hub": "modelscope",
|
|
6427
|
-
"model_id": "01ai/Yi-Coder-1.5B",
|
|
6428
|
-
"model_revision": "master"
|
|
6429
|
-
}
|
|
6430
|
-
]
|
|
6431
|
-
},
|
|
6432
5077
|
{
|
|
6433
5078
|
"version": 1,
|
|
6434
5079
|
"context_length": 32768,
|
|
@@ -6446,8 +5091,6 @@
|
|
|
6446
5091
|
"model_format": "pytorch",
|
|
6447
5092
|
"model_size_in_billions": "0_5",
|
|
6448
5093
|
"quantizations": [
|
|
6449
|
-
"4-bit",
|
|
6450
|
-
"8-bit",
|
|
6451
5094
|
"none"
|
|
6452
5095
|
],
|
|
6453
5096
|
"model_id": "qwen/Qwen2.5-0.5B",
|
|
@@ -6458,8 +5101,6 @@
|
|
|
6458
5101
|
"model_format": "pytorch",
|
|
6459
5102
|
"model_size_in_billions": "1_5",
|
|
6460
5103
|
"quantizations": [
|
|
6461
|
-
"4-bit",
|
|
6462
|
-
"8-bit",
|
|
6463
5104
|
"none"
|
|
6464
5105
|
],
|
|
6465
5106
|
"model_id": "qwen/Qwen2.5-1.5B",
|
|
@@ -6470,8 +5111,6 @@
|
|
|
6470
5111
|
"model_format": "pytorch",
|
|
6471
5112
|
"model_size_in_billions": 3,
|
|
6472
5113
|
"quantizations": [
|
|
6473
|
-
"4-bit",
|
|
6474
|
-
"8-bit",
|
|
6475
5114
|
"none"
|
|
6476
5115
|
],
|
|
6477
5116
|
"model_id": "qwen/Qwen2.5-3B",
|
|
@@ -6482,8 +5121,6 @@
|
|
|
6482
5121
|
"model_format": "pytorch",
|
|
6483
5122
|
"model_size_in_billions": 7,
|
|
6484
5123
|
"quantizations": [
|
|
6485
|
-
"4-bit",
|
|
6486
|
-
"8-bit",
|
|
6487
5124
|
"none"
|
|
6488
5125
|
],
|
|
6489
5126
|
"model_id": "qwen/Qwen2.5-7B",
|
|
@@ -6494,8 +5131,6 @@
|
|
|
6494
5131
|
"model_format": "pytorch",
|
|
6495
5132
|
"model_size_in_billions": 14,
|
|
6496
5133
|
"quantizations": [
|
|
6497
|
-
"4-bit",
|
|
6498
|
-
"8-bit",
|
|
6499
5134
|
"none"
|
|
6500
5135
|
],
|
|
6501
5136
|
"model_id": "qwen/Qwen2.5-14B",
|
|
@@ -6506,8 +5141,6 @@
|
|
|
6506
5141
|
"model_format": "pytorch",
|
|
6507
5142
|
"model_size_in_billions": 32,
|
|
6508
5143
|
"quantizations": [
|
|
6509
|
-
"4-bit",
|
|
6510
|
-
"8-bit",
|
|
6511
5144
|
"none"
|
|
6512
5145
|
],
|
|
6513
5146
|
"model_id": "qwen/Qwen2.5-32B",
|
|
@@ -6518,8 +5151,6 @@
|
|
|
6518
5151
|
"model_format": "pytorch",
|
|
6519
5152
|
"model_size_in_billions": 72,
|
|
6520
5153
|
"quantizations": [
|
|
6521
|
-
"4-bit",
|
|
6522
|
-
"8-bit",
|
|
6523
5154
|
"none"
|
|
6524
5155
|
],
|
|
6525
5156
|
"model_id": "qwen/Qwen2.5-72B",
|
|
@@ -6546,8 +5177,6 @@
|
|
|
6546
5177
|
"model_format": "pytorch",
|
|
6547
5178
|
"model_size_in_billions": "0_5",
|
|
6548
5179
|
"quantizations": [
|
|
6549
|
-
"4-bit",
|
|
6550
|
-
"8-bit",
|
|
6551
5180
|
"none"
|
|
6552
5181
|
],
|
|
6553
5182
|
"model_id": "qwen/Qwen2.5-0.5B-Instruct",
|
|
@@ -6557,8 +5186,6 @@
|
|
|
6557
5186
|
"model_format": "pytorch",
|
|
6558
5187
|
"model_size_in_billions": "1_5",
|
|
6559
5188
|
"quantizations": [
|
|
6560
|
-
"4-bit",
|
|
6561
|
-
"8-bit",
|
|
6562
5189
|
"none"
|
|
6563
5190
|
],
|
|
6564
5191
|
"model_id": "qwen/Qwen2.5-1.5B-Instruct",
|
|
@@ -6568,8 +5195,6 @@
|
|
|
6568
5195
|
"model_format": "pytorch",
|
|
6569
5196
|
"model_size_in_billions": 3,
|
|
6570
5197
|
"quantizations": [
|
|
6571
|
-
"4-bit",
|
|
6572
|
-
"8-bit",
|
|
6573
5198
|
"none"
|
|
6574
5199
|
],
|
|
6575
5200
|
"model_id": "qwen/Qwen2.5-3B-Instruct",
|
|
@@ -6579,8 +5204,6 @@
|
|
|
6579
5204
|
"model_format": "pytorch",
|
|
6580
5205
|
"model_size_in_billions": 7,
|
|
6581
5206
|
"quantizations": [
|
|
6582
|
-
"4-bit",
|
|
6583
|
-
"8-bit",
|
|
6584
5207
|
"none"
|
|
6585
5208
|
],
|
|
6586
5209
|
"model_id": "qwen/Qwen2.5-7B-Instruct",
|
|
@@ -6590,8 +5213,6 @@
|
|
|
6590
5213
|
"model_format": "pytorch",
|
|
6591
5214
|
"model_size_in_billions": 14,
|
|
6592
5215
|
"quantizations": [
|
|
6593
|
-
"4-bit",
|
|
6594
|
-
"8-bit",
|
|
6595
5216
|
"none"
|
|
6596
5217
|
],
|
|
6597
5218
|
"model_id": "qwen/Qwen2.5-14B-Instruct",
|
|
@@ -6601,8 +5222,6 @@
|
|
|
6601
5222
|
"model_format": "pytorch",
|
|
6602
5223
|
"model_size_in_billions": 32,
|
|
6603
5224
|
"quantizations": [
|
|
6604
|
-
"4-bit",
|
|
6605
|
-
"8-bit",
|
|
6606
5225
|
"none"
|
|
6607
5226
|
],
|
|
6608
5227
|
"model_id": "qwen/Qwen2.5-32B-Instruct",
|
|
@@ -6612,8 +5231,6 @@
|
|
|
6612
5231
|
"model_format": "pytorch",
|
|
6613
5232
|
"model_size_in_billions": 72,
|
|
6614
5233
|
"quantizations": [
|
|
6615
|
-
"4-bit",
|
|
6616
|
-
"8-bit",
|
|
6617
5234
|
"none"
|
|
6618
5235
|
],
|
|
6619
5236
|
"model_id": "qwen/Qwen2.5-72B-Instruct",
|
|
@@ -6727,7 +5344,7 @@
|
|
|
6727
5344
|
},
|
|
6728
5345
|
{
|
|
6729
5346
|
"model_format": "awq",
|
|
6730
|
-
"model_size_in_billions":14,
|
|
5347
|
+
"model_size_in_billions": 14,
|
|
6731
5348
|
"quantizations": [
|
|
6732
5349
|
"Int4"
|
|
6733
5350
|
],
|
|
@@ -7008,7 +5625,7 @@
|
|
|
7008
5625
|
"model_file_name_template": "qwen2_5-72b-instruct-{quantization}.gguf",
|
|
7009
5626
|
"model_file_name_split_template": "qwen2.5-72b-instruct-{quantization}-{part}.gguf",
|
|
7010
5627
|
"quantization_parts": {
|
|
7011
|
-
|
|
5628
|
+
"q2_k": [
|
|
7012
5629
|
"00001-of-00007",
|
|
7013
5630
|
"00002-of-00007",
|
|
7014
5631
|
"00003-of-00007",
|
|
@@ -7267,8 +5884,6 @@
|
|
|
7267
5884
|
"model_format": "pytorch",
|
|
7268
5885
|
"model_size_in_billions": "0_5",
|
|
7269
5886
|
"quantizations": [
|
|
7270
|
-
"4-bit",
|
|
7271
|
-
"8-bit",
|
|
7272
5887
|
"none"
|
|
7273
5888
|
],
|
|
7274
5889
|
"model_id": "qwen/Qwen2.5-Coder-0.5B",
|
|
@@ -7279,8 +5894,6 @@
|
|
|
7279
5894
|
"model_format": "pytorch",
|
|
7280
5895
|
"model_size_in_billions": "1_5",
|
|
7281
5896
|
"quantizations": [
|
|
7282
|
-
"4-bit",
|
|
7283
|
-
"8-bit",
|
|
7284
5897
|
"none"
|
|
7285
5898
|
],
|
|
7286
5899
|
"model_id": "qwen/Qwen2.5-Coder-1.5B",
|
|
@@ -7291,8 +5904,6 @@
|
|
|
7291
5904
|
"model_format": "pytorch",
|
|
7292
5905
|
"model_size_in_billions": "3",
|
|
7293
5906
|
"quantizations": [
|
|
7294
|
-
"4-bit",
|
|
7295
|
-
"8-bit",
|
|
7296
5907
|
"none"
|
|
7297
5908
|
],
|
|
7298
5909
|
"model_id": "qwen/Qwen2.5-Coder-3B",
|
|
@@ -7303,8 +5914,6 @@
|
|
|
7303
5914
|
"model_format": "pytorch",
|
|
7304
5915
|
"model_size_in_billions": 7,
|
|
7305
5916
|
"quantizations": [
|
|
7306
|
-
"4-bit",
|
|
7307
|
-
"8-bit",
|
|
7308
5917
|
"none"
|
|
7309
5918
|
],
|
|
7310
5919
|
"model_id": "qwen/Qwen2.5-Coder-7B",
|
|
@@ -7315,8 +5924,6 @@
|
|
|
7315
5924
|
"model_format": "pytorch",
|
|
7316
5925
|
"model_size_in_billions": 14,
|
|
7317
5926
|
"quantizations": [
|
|
7318
|
-
"4-bit",
|
|
7319
|
-
"8-bit",
|
|
7320
5927
|
"none"
|
|
7321
5928
|
],
|
|
7322
5929
|
"model_id": "qwen/Qwen2.5-Coder-14B",
|
|
@@ -7327,8 +5934,6 @@
|
|
|
7327
5934
|
"model_format": "pytorch",
|
|
7328
5935
|
"model_size_in_billions": 32,
|
|
7329
5936
|
"quantizations": [
|
|
7330
|
-
"4-bit",
|
|
7331
|
-
"8-bit",
|
|
7332
5937
|
"none"
|
|
7333
5938
|
],
|
|
7334
5939
|
"model_id": "qwen/Qwen2.5-Coder-32B",
|
|
@@ -7355,8 +5960,6 @@
|
|
|
7355
5960
|
"model_format": "pytorch",
|
|
7356
5961
|
"model_size_in_billions": "0_5",
|
|
7357
5962
|
"quantizations": [
|
|
7358
|
-
"4-bit",
|
|
7359
|
-
"8-bit",
|
|
7360
5963
|
"none"
|
|
7361
5964
|
],
|
|
7362
5965
|
"model_id": "qwen/Qwen2.5-Coder-0.5B-Instruct",
|
|
@@ -7367,19 +5970,16 @@
|
|
|
7367
5970
|
"model_format": "pytorch",
|
|
7368
5971
|
"model_size_in_billions": "1_5",
|
|
7369
5972
|
"quantizations": [
|
|
7370
|
-
"4-bit",
|
|
7371
|
-
"8-bit",
|
|
7372
5973
|
"none"
|
|
7373
5974
|
],
|
|
7374
5975
|
"model_id": "qwen/Qwen2.5-Coder-1.5B-Instruct",
|
|
7375
5976
|
"model_revision": "master",
|
|
7376
5977
|
"model_hub": "modelscope"
|
|
7377
|
-
},
|
|
5978
|
+
},
|
|
5979
|
+
{
|
|
7378
5980
|
"model_format": "pytorch",
|
|
7379
5981
|
"model_size_in_billions": "3",
|
|
7380
5982
|
"quantizations": [
|
|
7381
|
-
"4-bit",
|
|
7382
|
-
"8-bit",
|
|
7383
5983
|
"none"
|
|
7384
5984
|
],
|
|
7385
5985
|
"model_id": "qwen/Qwen2.5-Coder-3B-Instruct",
|
|
@@ -7390,8 +5990,6 @@
|
|
|
7390
5990
|
"model_format": "pytorch",
|
|
7391
5991
|
"model_size_in_billions": 7,
|
|
7392
5992
|
"quantizations": [
|
|
7393
|
-
"4-bit",
|
|
7394
|
-
"8-bit",
|
|
7395
5993
|
"none"
|
|
7396
5994
|
],
|
|
7397
5995
|
"model_id": "qwen/Qwen2.5-Coder-7B-Instruct",
|
|
@@ -7402,8 +6000,6 @@
|
|
|
7402
6000
|
"model_format": "pytorch",
|
|
7403
6001
|
"model_size_in_billions": 14,
|
|
7404
6002
|
"quantizations": [
|
|
7405
|
-
"4-bit",
|
|
7406
|
-
"8-bit",
|
|
7407
6003
|
"none"
|
|
7408
6004
|
],
|
|
7409
6005
|
"model_id": "qwen/Qwen2.5-Coder-14B-Instruct",
|
|
@@ -7414,8 +6010,6 @@
|
|
|
7414
6010
|
"model_format": "pytorch",
|
|
7415
6011
|
"model_size_in_billions": 32,
|
|
7416
6012
|
"quantizations": [
|
|
7417
|
-
"4-bit",
|
|
7418
|
-
"8-bit",
|
|
7419
6013
|
"none"
|
|
7420
6014
|
],
|
|
7421
6015
|
"model_id": "qwen/Qwen2.5-Coder-32B-Instruct",
|
|
@@ -7548,7 +6142,6 @@
|
|
|
7548
6142
|
"model_revision": "master",
|
|
7549
6143
|
"model_hub": "modelscope"
|
|
7550
6144
|
},
|
|
7551
|
-
|
|
7552
6145
|
{
|
|
7553
6146
|
"model_format": "ggufv2",
|
|
7554
6147
|
"model_size_in_billions": "1_5",
|
|
@@ -7641,8 +6234,6 @@
|
|
|
7641
6234
|
"model_format": "pytorch",
|
|
7642
6235
|
"model_size_in_billions": 32,
|
|
7643
6236
|
"quantizations": [
|
|
7644
|
-
"4-bit",
|
|
7645
|
-
"8-bit",
|
|
7646
6237
|
"none"
|
|
7647
6238
|
],
|
|
7648
6239
|
"model_id": "Qwen/QwQ-32B-Preview",
|
|
@@ -7692,7 +6283,7 @@
|
|
|
7692
6283
|
"<|im_end|>"
|
|
7693
6284
|
]
|
|
7694
6285
|
},
|
|
7695
|
-
{
|
|
6286
|
+
{
|
|
7696
6287
|
"version": 1,
|
|
7697
6288
|
"context_length": 131072,
|
|
7698
6289
|
"model_name": "QwQ-32B",
|
|
@@ -7702,7 +6293,8 @@
|
|
|
7702
6293
|
],
|
|
7703
6294
|
"model_ability": [
|
|
7704
6295
|
"chat",
|
|
7705
|
-
"reasoning"
|
|
6296
|
+
"reasoning",
|
|
6297
|
+
"tools"
|
|
7706
6298
|
],
|
|
7707
6299
|
"model_description": "QwQ is the reasoning model of the Qwen series. Compared with conventional instruction-tuned models, QwQ, which is capable of thinking and reasoning, can achieve significantly enhanced performance in downstream tasks, especially hard problems. QwQ-32B is the medium-sized reasoning model, which is capable of achieving competitive performance against state-of-the-art reasoning models, e.g., DeepSeek-R1, o1-mini.",
|
|
7708
6300
|
"model_specs": [
|
|
@@ -7710,8 +6302,6 @@
|
|
|
7710
6302
|
"model_format": "pytorch",
|
|
7711
6303
|
"model_size_in_billions": 32,
|
|
7712
6304
|
"quantizations": [
|
|
7713
|
-
"4-bit",
|
|
7714
|
-
"8-bit",
|
|
7715
6305
|
"none"
|
|
7716
6306
|
],
|
|
7717
6307
|
"model_id": "Qwen/QwQ-32B",
|
|
@@ -7812,8 +6402,6 @@
|
|
|
7812
6402
|
"model_format": "pytorch",
|
|
7813
6403
|
"model_size_in_billions": "1_5",
|
|
7814
6404
|
"quantizations": [
|
|
7815
|
-
"4-bit",
|
|
7816
|
-
"8-bit",
|
|
7817
6405
|
"none"
|
|
7818
6406
|
],
|
|
7819
6407
|
"model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
|
|
@@ -7852,8 +6440,6 @@
|
|
|
7852
6440
|
"model_format": "pytorch",
|
|
7853
6441
|
"model_size_in_billions": 7,
|
|
7854
6442
|
"quantizations": [
|
|
7855
|
-
"4-bit",
|
|
7856
|
-
"8-bit",
|
|
7857
6443
|
"none"
|
|
7858
6444
|
],
|
|
7859
6445
|
"model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
|
|
@@ -7901,8 +6487,6 @@
|
|
|
7901
6487
|
"model_format": "pytorch",
|
|
7902
6488
|
"model_size_in_billions": 14,
|
|
7903
6489
|
"quantizations": [
|
|
7904
|
-
"4-bit",
|
|
7905
|
-
"8-bit",
|
|
7906
6490
|
"none"
|
|
7907
6491
|
],
|
|
7908
6492
|
"model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B",
|
|
@@ -7941,8 +6525,6 @@
|
|
|
7941
6525
|
"model_format": "pytorch",
|
|
7942
6526
|
"model_size_in_billions": 32,
|
|
7943
6527
|
"quantizations": [
|
|
7944
|
-
"4-bit",
|
|
7945
|
-
"8-bit",
|
|
7946
6528
|
"none"
|
|
7947
6529
|
],
|
|
7948
6530
|
"model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
|
|
@@ -8016,8 +6598,6 @@
|
|
|
8016
6598
|
"model_format": "pytorch",
|
|
8017
6599
|
"model_size_in_billions": 8,
|
|
8018
6600
|
"quantizations": [
|
|
8019
|
-
"4-bit",
|
|
8020
|
-
"8-bit",
|
|
8021
6601
|
"none"
|
|
8022
6602
|
],
|
|
8023
6603
|
"model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
|
|
@@ -8057,8 +6637,6 @@
|
|
|
8057
6637
|
"model_format": "pytorch",
|
|
8058
6638
|
"model_size_in_billions": 70,
|
|
8059
6639
|
"quantizations": [
|
|
8060
|
-
"4-bit",
|
|
8061
|
-
"8-bit",
|
|
8062
6640
|
"none"
|
|
8063
6641
|
],
|
|
8064
6642
|
"model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
|
|
@@ -8137,8 +6715,6 @@
|
|
|
8137
6715
|
"model_format": "pytorch",
|
|
8138
6716
|
"model_size_in_billions": "1_5",
|
|
8139
6717
|
"quantizations": [
|
|
8140
|
-
"4-bit",
|
|
8141
|
-
"8-bit",
|
|
8142
6718
|
"none"
|
|
8143
6719
|
],
|
|
8144
6720
|
"model_id": "ZhipuAI/glm-edge-1.5b-chat",
|
|
@@ -8148,8 +6724,6 @@
|
|
|
8148
6724
|
"model_format": "pytorch",
|
|
8149
6725
|
"model_size_in_billions": "4",
|
|
8150
6726
|
"quantizations": [
|
|
8151
|
-
"4-bit",
|
|
8152
|
-
"8-bit",
|
|
8153
6727
|
"none"
|
|
8154
6728
|
],
|
|
8155
6729
|
"model_id": "ZhipuAI/glm-edge-4b-chat",
|
|
@@ -8248,8 +6822,6 @@
|
|
|
8248
6822
|
"model_format": "pytorch",
|
|
8249
6823
|
"model_size_in_billions": "2",
|
|
8250
6824
|
"quantizations": [
|
|
8251
|
-
"4-bit",
|
|
8252
|
-
"8-bit",
|
|
8253
6825
|
"none"
|
|
8254
6826
|
],
|
|
8255
6827
|
"model_id": "ZhipuAI/glm-edge-v-2b",
|
|
@@ -8259,8 +6831,6 @@
|
|
|
8259
6831
|
"model_format": "pytorch",
|
|
8260
6832
|
"model_size_in_billions": "5",
|
|
8261
6833
|
"quantizations": [
|
|
8262
|
-
"4-bit",
|
|
8263
|
-
"8-bit",
|
|
8264
6834
|
"none"
|
|
8265
6835
|
],
|
|
8266
6836
|
"model_id": "ZhipuAI/glm-edge-v-5b",
|
|
@@ -8379,8 +6949,6 @@
|
|
|
8379
6949
|
"model_format": "pytorch",
|
|
8380
6950
|
"model_size_in_billions": 72,
|
|
8381
6951
|
"quantizations": [
|
|
8382
|
-
"4-bit",
|
|
8383
|
-
"8-bit",
|
|
8384
6952
|
"none"
|
|
8385
6953
|
],
|
|
8386
6954
|
"model_id": "Qwen/QVQ-72B-Preview",
|
|
@@ -8428,8 +6996,6 @@
|
|
|
8428
6996
|
"model_format": "pytorch",
|
|
8429
6997
|
"model_size_in_billions": 7,
|
|
8430
6998
|
"quantizations": [
|
|
8431
|
-
"4-bit",
|
|
8432
|
-
"8-bit",
|
|
8433
6999
|
"none"
|
|
8434
7000
|
],
|
|
8435
7001
|
"model_id": "AIDC-AI/Marco-o1",
|
|
@@ -8489,8 +7055,6 @@
|
|
|
8489
7055
|
"model_format": "pytorch",
|
|
8490
7056
|
"model_size_in_billions": "9",
|
|
8491
7057
|
"quantizations": [
|
|
8492
|
-
"4-bit",
|
|
8493
|
-
"8-bit",
|
|
8494
7058
|
"none"
|
|
8495
7059
|
],
|
|
8496
7060
|
"model_id": "ZhipuAI/cogagent-9b-20241220",
|
|
@@ -8527,8 +7091,6 @@
|
|
|
8527
7091
|
"model_format": "pytorch",
|
|
8528
7092
|
"model_size_in_billions": 8,
|
|
8529
7093
|
"quantizations": [
|
|
8530
|
-
"4-bit",
|
|
8531
|
-
"8-bit",
|
|
8532
7094
|
"none"
|
|
8533
7095
|
],
|
|
8534
7096
|
"model_id": "Shanghai_AI_Laboratory/internlm3-8b-instruct",
|
|
@@ -8570,13 +7132,13 @@
|
|
|
8570
7132
|
"model_hub": "modelscope"
|
|
8571
7133
|
},
|
|
8572
7134
|
{
|
|
8573
|
-
"model_format":"mlx",
|
|
8574
|
-
"model_size_in_billions":8,
|
|
8575
|
-
"quantizations":[
|
|
7135
|
+
"model_format": "mlx",
|
|
7136
|
+
"model_size_in_billions": 8,
|
|
7137
|
+
"quantizations": [
|
|
8576
7138
|
"4bit"
|
|
8577
7139
|
],
|
|
8578
7140
|
"model_hub": "modelscope",
|
|
8579
|
-
"model_id":"mlx-community/internlm3-8b-instruct-{quantization}"
|
|
7141
|
+
"model_id": "mlx-community/internlm3-8b-instruct-{quantization}"
|
|
8580
7142
|
}
|
|
8581
7143
|
],
|
|
8582
7144
|
"chat_template": "{{ bos_token }}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
|
|
@@ -8606,8 +7168,6 @@
|
|
|
8606
7168
|
"model_format": "pytorch",
|
|
8607
7169
|
"model_size_in_billions": 7,
|
|
8608
7170
|
"quantizations": [
|
|
8609
|
-
"4-bit",
|
|
8610
|
-
"8-bit",
|
|
8611
7171
|
"none"
|
|
8612
7172
|
],
|
|
8613
7173
|
"model_id": "Qwen/Qwen2.5-7B-Instruct-1M",
|
|
@@ -8617,8 +7177,6 @@
|
|
|
8617
7177
|
"model_format": "pytorch",
|
|
8618
7178
|
"model_size_in_billions": 14,
|
|
8619
7179
|
"quantizations": [
|
|
8620
|
-
"4-bit",
|
|
8621
|
-
"8-bit",
|
|
8622
7180
|
"none"
|
|
8623
7181
|
],
|
|
8624
7182
|
"model_id": "Qwen/Qwen2.5-14B-Instruct-1M",
|
|
@@ -8652,15 +7210,13 @@
|
|
|
8652
7210
|
"model_format": "pytorch",
|
|
8653
7211
|
"model_size_in_billions": 3,
|
|
8654
7212
|
"quantizations": [
|
|
8655
|
-
"4-bit",
|
|
8656
|
-
"8-bit",
|
|
8657
7213
|
"none"
|
|
8658
7214
|
],
|
|
8659
7215
|
"model_id": "moonshotai/Moonlight-16B-A3B-Instruct",
|
|
8660
7216
|
"model_hub": "modelscope"
|
|
8661
7217
|
}
|
|
8662
7218
|
],
|
|
8663
|
-
"chat_template":"{%- for message in messages -%}{%- if loop.first and messages[0]['role'] != 'system' -%}<|im_system|>system<|im_middle|>You are a helpful assistant<|im_end|>{%- endif -%}{%- if message['role'] == 'system' -%}<|im_system|>{%- endif -%}{%- if message['role'] == 'user' -%}<|im_user|>{%- endif -%}{%- if message['role'] == 'assistant' -%}<|im_assistant|>{%- endif -%}{{ message['role'] }}<|im_middle|>{{message['content']}}<|im_end|>{%- endfor -%}{%- if add_generation_prompt -%}<|im_assistant|>assistant<|im_middle|>{%- endif -%}",
|
|
7219
|
+
"chat_template": "{%- for message in messages -%}{%- if loop.first and messages[0]['role'] != 'system' -%}<|im_system|>system<|im_middle|>You are a helpful assistant<|im_end|>{%- endif -%}{%- if message['role'] == 'system' -%}<|im_system|>{%- endif -%}{%- if message['role'] == 'user' -%}<|im_user|>{%- endif -%}{%- if message['role'] == 'assistant' -%}<|im_assistant|>{%- endif -%}{{ message['role'] }}<|im_middle|>{{message['content']}}<|im_end|>{%- endfor -%}{%- if add_generation_prompt -%}<|im_assistant|>assistant<|im_middle|>{%- endif -%}",
|
|
8664
7220
|
"stop_token_ids": [
|
|
8665
7221
|
163586
|
|
8666
7222
|
],
|
|
@@ -8685,8 +7241,6 @@
|
|
|
8685
7241
|
"model_format": "pytorch",
|
|
8686
7242
|
"model_size_in_billions": 7,
|
|
8687
7243
|
"quantizations": [
|
|
8688
|
-
"4-bit",
|
|
8689
|
-
"8-bit",
|
|
8690
7244
|
"none"
|
|
8691
7245
|
],
|
|
8692
7246
|
"model_id": "AI-ModelScope/Fin-R1",
|
|
@@ -8858,25 +7412,246 @@
|
|
|
8858
7412
|
],
|
|
8859
7413
|
"model_id": "ZhipuAI/GLM-4-32B-0414",
|
|
8860
7414
|
"model_hub": "modelscope"
|
|
8861
|
-
}
|
|
8862
|
-
|
|
8863
|
-
|
|
8864
|
-
|
|
8865
|
-
|
|
8866
|
-
|
|
8867
|
-
|
|
8868
|
-
|
|
8869
|
-
|
|
8870
|
-
|
|
8871
|
-
|
|
8872
|
-
|
|
8873
|
-
|
|
8874
|
-
|
|
8875
|
-
|
|
8876
|
-
|
|
8877
|
-
|
|
8878
|
-
|
|
8879
|
-
|
|
7415
|
+
},
|
|
7416
|
+
{
|
|
7417
|
+
"model_format": "mlx",
|
|
7418
|
+
"model_size_in_billions": 9,
|
|
7419
|
+
"quantizations": [
|
|
7420
|
+
"4bit",
|
|
7421
|
+
"6bit",
|
|
7422
|
+
"8bit",
|
|
7423
|
+
"bf16"
|
|
7424
|
+
],
|
|
7425
|
+
"model_id": "mlx-community/GLM-4-9B-0414-{quantization}",
|
|
7426
|
+
"model_hub": "modelscope"
|
|
7427
|
+
},
|
|
7428
|
+
{
|
|
7429
|
+
"model_format": "mlx",
|
|
7430
|
+
"model_size_in_billions": 32,
|
|
7431
|
+
"quantizations": [
|
|
7432
|
+
"4bit",
|
|
7433
|
+
"8bit"
|
|
7434
|
+
],
|
|
7435
|
+
"model_id": "mlx-community/GLM-4-32B-0414-{quantization}",
|
|
7436
|
+
"model_hub": "modelscope"
|
|
7437
|
+
},
|
|
7438
|
+
{
|
|
7439
|
+
"model_format": "ggufv2",
|
|
7440
|
+
"model_size_in_billions": 9,
|
|
7441
|
+
"quantizations": [
|
|
7442
|
+
"IQ2_M",
|
|
7443
|
+
"IQ3_M",
|
|
7444
|
+
"IQ3_XS",
|
|
7445
|
+
"IQ3_XXS",
|
|
7446
|
+
"IQ4_NL",
|
|
7447
|
+
"IQ4_XS",
|
|
7448
|
+
"Q2_K",
|
|
7449
|
+
"Q2_K_L",
|
|
7450
|
+
"Q3_K_L",
|
|
7451
|
+
"Q3_K_M",
|
|
7452
|
+
"Q3_K_S",
|
|
7453
|
+
"Q3_K_XL",
|
|
7454
|
+
"Q4_0",
|
|
7455
|
+
"Q4_1",
|
|
7456
|
+
"Q4_K_L",
|
|
7457
|
+
"Q4_K_M",
|
|
7458
|
+
"Q4_K_S",
|
|
7459
|
+
"Q5_K_L",
|
|
7460
|
+
"Q5_K_M",
|
|
7461
|
+
"Q5_K_S",
|
|
7462
|
+
"Q6_K",
|
|
7463
|
+
"Q6_K_L",
|
|
7464
|
+
"Q8_0",
|
|
7465
|
+
"bf16"
|
|
7466
|
+
],
|
|
7467
|
+
"model_id": "bartowski/THUDM_GLM-4-9B-0414-GGUF",
|
|
7468
|
+
"model_file_name_template": "THUDM_GLM-4-9B-0414-{quantization}.gguf",
|
|
7469
|
+
"model_hub": "modelscope"
|
|
7470
|
+
},
|
|
7471
|
+
{
|
|
7472
|
+
"model_format": "ggufv2",
|
|
7473
|
+
"model_size_in_billions": 32,
|
|
7474
|
+
"quantizations": [
|
|
7475
|
+
"IQ2_M",
|
|
7476
|
+
"IQ2_S",
|
|
7477
|
+
"IQ2_XS",
|
|
7478
|
+
"IQ3_M",
|
|
7479
|
+
"IQ3_XS",
|
|
7480
|
+
"IQ3_XXS",
|
|
7481
|
+
"IQ4_NL",
|
|
7482
|
+
"IQ4_XS",
|
|
7483
|
+
"Q2_K",
|
|
7484
|
+
"Q2_K_L",
|
|
7485
|
+
"Q3_K_L",
|
|
7486
|
+
"Q3_K_M",
|
|
7487
|
+
"Q3_K_S",
|
|
7488
|
+
"Q3_K_XL",
|
|
7489
|
+
"Q4_0",
|
|
7490
|
+
"Q4_1",
|
|
7491
|
+
"Q4_K_L",
|
|
7492
|
+
"Q4_K_M",
|
|
7493
|
+
"Q4_K_S",
|
|
7494
|
+
"Q5_K_L",
|
|
7495
|
+
"Q5_K_M",
|
|
7496
|
+
"Q5_K_S",
|
|
7497
|
+
"Q6_K",
|
|
7498
|
+
"Q6_K_L",
|
|
7499
|
+
"Q8_0"
|
|
7500
|
+
],
|
|
7501
|
+
"model_id": "bartowski/THUDM_GLM-4-9B-0414-GGUF",
|
|
7502
|
+
"model_file_name_template": "THUDM_GLM-4-9B-0414-{quantization}.gguf",
|
|
7503
|
+
"model_hub": "modelscope"
|
|
7504
|
+
}
|
|
7505
|
+
],
|
|
7506
|
+
"chat_template": "[gMASK]<sop>{%- if tools -%}<|system|>\n# 可用工具\n{% for tool in tools %}{%- set function = tool.function if tool.get(\"function\") else tool %}\n\n## {{ function.name }}\n\n{{ function | tojson(indent=4, ensure_ascii=False) }}\n在调用上述函数时,请使用 Json 格式表示调用的参数。{%- endfor %}{%- endif -%}{%- for msg in messages %}{%- if msg.role == 'system' %}<|system|>\n{{ msg.content }}{%- endif %}{%- endfor %}{%- for message in messages if message.role != 'system' %}{%- set role = message['role'] %}{%- set content = message['content'] %}{%- set meta = message.get(\"metadata\", \"\") %}{%- if role == 'user' %}<|user|>\n{{ content }}{%- elif role == 'assistant' and not meta %}<|assistant|>\n{{ content }}{%- elif role == 'assistant' and meta %}<|assistant|>{{ meta }} \n{{ content }}{%- elif role == 'observation' %}<|observation|>\n{{ content }}{%- endif %}{%- endfor %}{% if add_generation_prompt %}<|assistant|>{% endif %}",
|
|
7507
|
+
"stop_token_ids": [
|
|
7508
|
+
151329,
|
|
7509
|
+
151336,
|
|
7510
|
+
151338
|
|
7511
|
+
],
|
|
7512
|
+
"stop": [
|
|
7513
|
+
"<|endoftext|>",
|
|
7514
|
+
"<|user|>",
|
|
7515
|
+
"<|observation|>"
|
|
7516
|
+
],
|
|
7517
|
+
"virtualenv": {
|
|
7518
|
+
"packages": [
|
|
7519
|
+
"transformers>=4.51.3",
|
|
7520
|
+
"mlx-lm>=0.23.1 ; sys_platform=='darwin'",
|
|
7521
|
+
"numpy==1.26.4"
|
|
7522
|
+
]
|
|
7523
|
+
}
|
|
7524
|
+
},
|
|
7525
|
+
{
|
|
7526
|
+
"version":1,
|
|
7527
|
+
"context_length":32768,
|
|
7528
|
+
"model_name":"Ovis2",
|
|
7529
|
+
"model_lang":[
|
|
7530
|
+
"en",
|
|
7531
|
+
"zh"
|
|
7532
|
+
],
|
|
7533
|
+
"model_ability":[
|
|
7534
|
+
"chat",
|
|
7535
|
+
"vision"
|
|
7536
|
+
],
|
|
7537
|
+
"model_description":"Ovis (Open VISion) is a novel Multimodal Large Language Model (MLLM) architecture, designed to structurally align visual and textual embeddings.",
|
|
7538
|
+
"model_specs":[
|
|
7539
|
+
{
|
|
7540
|
+
"model_format":"pytorch",
|
|
7541
|
+
"model_size_in_billions":1,
|
|
7542
|
+
"quantizations":[
|
|
7543
|
+
"none"
|
|
7544
|
+
],
|
|
7545
|
+
"model_id":"AIDC-AI/Ovis2-1B",
|
|
7546
|
+
"model_hub": "modelscope"
|
|
7547
|
+
},
|
|
7548
|
+
{
|
|
7549
|
+
"model_format":"pytorch",
|
|
7550
|
+
"model_size_in_billions":2,
|
|
7551
|
+
"quantizations":[
|
|
7552
|
+
"none"
|
|
7553
|
+
],
|
|
7554
|
+
"model_id":"AIDC-AI/Ovis2-2B",
|
|
7555
|
+
"model_hub": "modelscope"
|
|
7556
|
+
},
|
|
7557
|
+
{
|
|
7558
|
+
"model_format":"pytorch",
|
|
7559
|
+
"model_size_in_billions":4,
|
|
7560
|
+
"quantizations":[
|
|
7561
|
+
"none"
|
|
7562
|
+
],
|
|
7563
|
+
"model_id":"AIDC-AI/Ovis2-4B",
|
|
7564
|
+
"model_hub": "modelscope"
|
|
7565
|
+
},
|
|
7566
|
+
{
|
|
7567
|
+
"model_format":"pytorch",
|
|
7568
|
+
"model_size_in_billions":8,
|
|
7569
|
+
"quantizations":[
|
|
7570
|
+
"none"
|
|
7571
|
+
],
|
|
7572
|
+
"model_id":"AIDC-AI/Ovis2-8B",
|
|
7573
|
+
"model_hub": "modelscope"
|
|
7574
|
+
},
|
|
7575
|
+
{
|
|
7576
|
+
"model_format":"pytorch",
|
|
7577
|
+
"model_size_in_billions":16,
|
|
7578
|
+
"quantizations":[
|
|
7579
|
+
"none"
|
|
7580
|
+
],
|
|
7581
|
+
"model_id":"AIDC-AI/Ovis2-16B",
|
|
7582
|
+
"model_hub": "modelscope"
|
|
7583
|
+
},
|
|
7584
|
+
{
|
|
7585
|
+
"model_format":"pytorch",
|
|
7586
|
+
"model_size_in_billions":34,
|
|
7587
|
+
"quantizations":[
|
|
7588
|
+
"none"
|
|
7589
|
+
],
|
|
7590
|
+
"model_id":"AIDC-AI/Ovis2-34B",
|
|
7591
|
+
"model_hub": "modelscope"
|
|
7592
|
+
},
|
|
7593
|
+
{
|
|
7594
|
+
"model_format":"gptq",
|
|
7595
|
+
"model_size_in_billions":2,
|
|
7596
|
+
"quantizations":[
|
|
7597
|
+
"Int4"
|
|
7598
|
+
],
|
|
7599
|
+
"model_id":"AIDC-AI/Ovis2-2B-GPTQ-{quantization}",
|
|
7600
|
+
"model_hub": "modelscope"
|
|
7601
|
+
},
|
|
7602
|
+
{
|
|
7603
|
+
"model_format":"gptq",
|
|
7604
|
+
"model_size_in_billions":4,
|
|
7605
|
+
"quantizations":[
|
|
7606
|
+
"Int4"
|
|
7607
|
+
],
|
|
7608
|
+
"model_id":"AIDC-AI/Ovis2-4B-GPTQ-{quantization}",
|
|
7609
|
+
"model_hub": "modelscope"
|
|
7610
|
+
},
|
|
7611
|
+
{
|
|
7612
|
+
"model_format":"gptq",
|
|
7613
|
+
"model_size_in_billions":8,
|
|
7614
|
+
"quantizations":[
|
|
7615
|
+
"Int4"
|
|
7616
|
+
],
|
|
7617
|
+
"model_id":"AIDC-AI/Ovis2-8B-GPTQ-{quantization}",
|
|
7618
|
+
"model_hub": "modelscope"
|
|
7619
|
+
},
|
|
7620
|
+
{
|
|
7621
|
+
"model_format":"gptq",
|
|
7622
|
+
"model_size_in_billions":16,
|
|
7623
|
+
"quantizations":[
|
|
7624
|
+
"Int4"
|
|
7625
|
+
],
|
|
7626
|
+
"model_id":"AIDC-AI/Ovis2-16B-GPTQ-{quantization}",
|
|
7627
|
+
"model_hub": "modelscope"
|
|
7628
|
+
},
|
|
7629
|
+
{
|
|
7630
|
+
"model_format":"gptq",
|
|
7631
|
+
"model_size_in_billions":34,
|
|
7632
|
+
"quantizations":[
|
|
7633
|
+
"Int4",
|
|
7634
|
+
"Int8"
|
|
7635
|
+
],
|
|
7636
|
+
"model_id":"AIDC-AI/Ovis2-34B-GPTQ-{quantization}",
|
|
7637
|
+
"model_hub": "modelscope"
|
|
7638
|
+
}
|
|
7639
|
+
],
|
|
7640
|
+
"chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
|
|
7641
|
+
"stop_token_ids": [
|
|
7642
|
+
151645,
|
|
7643
|
+
151643
|
|
7644
|
+
],
|
|
7645
|
+
"stop": [
|
|
7646
|
+
"<|im_end|>",
|
|
7647
|
+
"<|endoftext|>"
|
|
7648
|
+
]
|
|
7649
|
+
},
|
|
7650
|
+
{
|
|
7651
|
+
"version": 1,
|
|
7652
|
+
"context_length": 32768,
|
|
7653
|
+
"model_name": "skywork-or1-preview",
|
|
7654
|
+
"model_lang": [
|
|
8880
7655
|
"en",
|
|
8881
7656
|
"zh"
|
|
8882
7657
|
],
|
|
@@ -8993,5 +7768,602 @@
|
|
|
8993
7768
|
"<|im_start|>",
|
|
8994
7769
|
"<|im_end|>"
|
|
8995
7770
|
]
|
|
7771
|
+
},
|
|
7772
|
+
{
|
|
7773
|
+
"version": 1,
|
|
7774
|
+
"context_length": 40960,
|
|
7775
|
+
"model_name": "qwen3",
|
|
7776
|
+
"model_lang": [
|
|
7777
|
+
"en",
|
|
7778
|
+
"zh"
|
|
7779
|
+
],
|
|
7780
|
+
"model_ability": [
|
|
7781
|
+
"chat",
|
|
7782
|
+
"reasoning",
|
|
7783
|
+
"tools"
|
|
7784
|
+
],
|
|
7785
|
+
"model_description": "Qwen3 is the latest generation of large language models in Qwen series, offering a comprehensive suite of dense and mixture-of-experts (MoE) models. Built upon extensive training, Qwen3 delivers groundbreaking advancements in reasoning, instruction-following, agent capabilities, and multilingual support",
|
|
7786
|
+
"model_specs": [
|
|
7787
|
+
{
|
|
7788
|
+
"model_format": "pytorch",
|
|
7789
|
+
"model_size_in_billions": "0_6",
|
|
7790
|
+
"quantizations": [
|
|
7791
|
+
"none"
|
|
7792
|
+
],
|
|
7793
|
+
"model_id": "Qwen/Qwen3-0.6B",
|
|
7794
|
+
"model_hub": "modelscope"
|
|
7795
|
+
},
|
|
7796
|
+
{
|
|
7797
|
+
"model_format": "fp8",
|
|
7798
|
+
"model_size_in_billions": "0_6",
|
|
7799
|
+
"quantizations": [
|
|
7800
|
+
"fp8"
|
|
7801
|
+
],
|
|
7802
|
+
"model_id": "Qwen/Qwen3-0.6B-FP8",
|
|
7803
|
+
"model_hub": "modelscope"
|
|
7804
|
+
},
|
|
7805
|
+
{
|
|
7806
|
+
"model_format": "gptq",
|
|
7807
|
+
"model_size_in_billions": "0_6",
|
|
7808
|
+
"quantizations": [
|
|
7809
|
+
"Int4",
|
|
7810
|
+
"Int8"
|
|
7811
|
+
],
|
|
7812
|
+
"model_id": "JunHowie/Qwen3-0.6B-GPTQ-{quantization}",
|
|
7813
|
+
"model_hub": "modelscope"
|
|
7814
|
+
},
|
|
7815
|
+
{
|
|
7816
|
+
"model_format": "ggufv2",
|
|
7817
|
+
"model_size_in_billions": "0_6",
|
|
7818
|
+
"quantizations": [
|
|
7819
|
+
"Q2_K",
|
|
7820
|
+
"Q2_K_L",
|
|
7821
|
+
"Q3_K_M",
|
|
7822
|
+
"Q3_K_S",
|
|
7823
|
+
"Q4_0",
|
|
7824
|
+
"Q4_1",
|
|
7825
|
+
"Q4_K_M",
|
|
7826
|
+
"Q5_K_M",
|
|
7827
|
+
"Q6_K",
|
|
7828
|
+
"Q8_0",
|
|
7829
|
+
"BF16",
|
|
7830
|
+
"UD-IQ1_M",
|
|
7831
|
+
"UD-IQ1_S",
|
|
7832
|
+
"UD-IQ2_M",
|
|
7833
|
+
"UD-IQ2_XXS",
|
|
7834
|
+
"UD-IQ3_XXS",
|
|
7835
|
+
"UD-Q2_K_XL",
|
|
7836
|
+
"UD-Q3_K_XL",
|
|
7837
|
+
"UD-Q4_K_XL",
|
|
7838
|
+
"UD-Q5_K_XL",
|
|
7839
|
+
"UD-Q6_K_XL",
|
|
7840
|
+
"UD-Q8_K_XL",
|
|
7841
|
+
"IQ4_NL",
|
|
7842
|
+
"IQ4_XS"
|
|
7843
|
+
],
|
|
7844
|
+
"model_id": "unsloth/Qwen3-0.6B-GGUF",
|
|
7845
|
+
"model_hub": "modelscope",
|
|
7846
|
+
"model_file_name_template": "Qwen3-0.6B-{quantization}.gguf"
|
|
7847
|
+
},
|
|
7848
|
+
{
|
|
7849
|
+
"model_format": "pytorch",
|
|
7850
|
+
"model_size_in_billions": "1_7",
|
|
7851
|
+
"quantizations": [
|
|
7852
|
+
"none"
|
|
7853
|
+
],
|
|
7854
|
+
"model_id": "Qwen/Qwen3-1.7B",
|
|
7855
|
+
"model_hub": "modelscope"
|
|
7856
|
+
},
|
|
7857
|
+
{
|
|
7858
|
+
"model_format": "fp8",
|
|
7859
|
+
"model_size_in_billions": "1_7",
|
|
7860
|
+
"quantizations": [
|
|
7861
|
+
"fp8"
|
|
7862
|
+
],
|
|
7863
|
+
"model_id": "Qwen/Qwen3-1.7B-FP8",
|
|
7864
|
+
"model_hub": "modelscope"
|
|
7865
|
+
},
|
|
7866
|
+
{
|
|
7867
|
+
"model_format": "gptq",
|
|
7868
|
+
"model_size_in_billions": "1_7",
|
|
7869
|
+
"quantizations": [
|
|
7870
|
+
"Int4",
|
|
7871
|
+
"Int8"
|
|
7872
|
+
],
|
|
7873
|
+
"model_id": "JunHowie/Qwen3-1.7B-GPTQ-{quantization}",
|
|
7874
|
+
"model_hub": "modelscope"
|
|
7875
|
+
},
|
|
7876
|
+
{
|
|
7877
|
+
"model_format": "ggufv2",
|
|
7878
|
+
"model_size_in_billions": "1_7",
|
|
7879
|
+
"quantizations": [
|
|
7880
|
+
"Q2_K",
|
|
7881
|
+
"Q2_K_L",
|
|
7882
|
+
"Q3_K_M",
|
|
7883
|
+
"Q3_K_S",
|
|
7884
|
+
"Q4_0",
|
|
7885
|
+
"Q4_1",
|
|
7886
|
+
"Q4_K_M",
|
|
7887
|
+
"Q5_K_M",
|
|
7888
|
+
"Q6_K",
|
|
7889
|
+
"Q8_0",
|
|
7890
|
+
"BF16",
|
|
7891
|
+
"UD-IQ1_M",
|
|
7892
|
+
"UD-IQ1_S",
|
|
7893
|
+
"UD-IQ2_M",
|
|
7894
|
+
"UD-IQ2_XXS",
|
|
7895
|
+
"UD-IQ3_XXS",
|
|
7896
|
+
"UD-Q2_K_XL",
|
|
7897
|
+
"UD-Q3_K_XL",
|
|
7898
|
+
"UD-Q4_K_XL",
|
|
7899
|
+
"UD-Q5_K_XL",
|
|
7900
|
+
"UD-Q6_K_XL",
|
|
7901
|
+
"UD-Q8_K_XL",
|
|
7902
|
+
"IQ4_NL",
|
|
7903
|
+
"IQ4_XS"
|
|
7904
|
+
],
|
|
7905
|
+
"model_id": "unsloth/Qwen3-1.7B-GGUF",
|
|
7906
|
+
"model_hub": "modelscope",
|
|
7907
|
+
"model_file_name_template": "Qwen3-1.7B-{quantization}.gguf"
|
|
7908
|
+
},
|
|
7909
|
+
{
|
|
7910
|
+
"model_format": "pytorch",
|
|
7911
|
+
"model_size_in_billions": 4,
|
|
7912
|
+
"quantizations": [
|
|
7913
|
+
"none"
|
|
7914
|
+
],
|
|
7915
|
+
"model_id": "Qwen/Qwen3-4B",
|
|
7916
|
+
"model_hub": "modelscope"
|
|
7917
|
+
},
|
|
7918
|
+
{
|
|
7919
|
+
"model_format": "fp8",
|
|
7920
|
+
"model_size_in_billions": 4,
|
|
7921
|
+
"quantizations": [
|
|
7922
|
+
"fp8"
|
|
7923
|
+
],
|
|
7924
|
+
"model_id": "Qwen/Qwen3-4B-FP8",
|
|
7925
|
+
"model_hub": "modelscope"
|
|
7926
|
+
},
|
|
7927
|
+
{
|
|
7928
|
+
"model_format": "gptq",
|
|
7929
|
+
"model_size_in_billions": 4,
|
|
7930
|
+
"quantizations": [
|
|
7931
|
+
"Int4",
|
|
7932
|
+
"Int8"
|
|
7933
|
+
],
|
|
7934
|
+
"model_id": "JunHowie/Qwen3-4B-GPTQ-{quantization}",
|
|
7935
|
+
"model_hub": "modelscope"
|
|
7936
|
+
},
|
|
7937
|
+
{
|
|
7938
|
+
"model_format": "ggufv2",
|
|
7939
|
+
"model_size_in_billions": 4,
|
|
7940
|
+
"quantizations": [
|
|
7941
|
+
"Q2_K",
|
|
7942
|
+
"Q2_K_L",
|
|
7943
|
+
"Q3_K_M",
|
|
7944
|
+
"Q3_K_S",
|
|
7945
|
+
"Q4_0",
|
|
7946
|
+
"Q4_1",
|
|
7947
|
+
"Q4_K_M",
|
|
7948
|
+
"Q5_K_M",
|
|
7949
|
+
"Q6_K",
|
|
7950
|
+
"Q8_0",
|
|
7951
|
+
"BF16",
|
|
7952
|
+
"UD-IQ1_M",
|
|
7953
|
+
"UD-IQ1_S",
|
|
7954
|
+
"UD-IQ2_M",
|
|
7955
|
+
"UD-IQ2_XXS",
|
|
7956
|
+
"UD-IQ3_XXS",
|
|
7957
|
+
"UD-Q2_K_XL",
|
|
7958
|
+
"UD-Q3_K_XL",
|
|
7959
|
+
"UD-Q4_K_XL",
|
|
7960
|
+
"UD-Q5_K_XL",
|
|
7961
|
+
"UD-Q6_K_XL",
|
|
7962
|
+
"UD-Q8_K_XL",
|
|
7963
|
+
"IQ4_NL",
|
|
7964
|
+
"IQ4_XS"
|
|
7965
|
+
],
|
|
7966
|
+
"model_id": "unsloth/Qwen3-4B-GGUF",
|
|
7967
|
+
"model_hub": "modelscope",
|
|
7968
|
+
"model_file_name_template": "Qwen3-4B-{quantization}.gguf"
|
|
7969
|
+
},
|
|
7970
|
+
{
|
|
7971
|
+
"model_format": "pytorch",
|
|
7972
|
+
"model_size_in_billions": 8,
|
|
7973
|
+
"quantizations": [
|
|
7974
|
+
"none"
|
|
7975
|
+
],
|
|
7976
|
+
"model_id": "Qwen/Qwen3-8B",
|
|
7977
|
+
"model_hub": "modelscope"
|
|
7978
|
+
},
|
|
7979
|
+
{
|
|
7980
|
+
"model_format": "fp8",
|
|
7981
|
+
"model_size_in_billions": 8,
|
|
7982
|
+
"quantizations": [
|
|
7983
|
+
"fp8"
|
|
7984
|
+
],
|
|
7985
|
+
"model_id": "Qwen/Qwen3-8B-FP8",
|
|
7986
|
+
"model_hub": "modelscope"
|
|
7987
|
+
},
|
|
7988
|
+
{
|
|
7989
|
+
"model_format": "gptq",
|
|
7990
|
+
"model_size_in_billions": 8,
|
|
7991
|
+
"quantizations": [
|
|
7992
|
+
"Int4",
|
|
7993
|
+
"Int8"
|
|
7994
|
+
],
|
|
7995
|
+
"model_id": "JunHowie/Qwen3-8B-GPTQ-{quantization}",
|
|
7996
|
+
"model_hub": "modelscope"
|
|
7997
|
+
},
|
|
7998
|
+
{
|
|
7999
|
+
"model_format": "ggufv2",
|
|
8000
|
+
"model_size_in_billions": 8,
|
|
8001
|
+
"quantizations": [
|
|
8002
|
+
"Q2_K",
|
|
8003
|
+
"Q2_K_L",
|
|
8004
|
+
"Q3_K_M",
|
|
8005
|
+
"Q3_K_S",
|
|
8006
|
+
"Q4_0",
|
|
8007
|
+
"Q4_1",
|
|
8008
|
+
"Q4_K_M",
|
|
8009
|
+
"Q5_K_M",
|
|
8010
|
+
"Q6_K",
|
|
8011
|
+
"Q8_0",
|
|
8012
|
+
"BF16",
|
|
8013
|
+
"UD-IQ1_M",
|
|
8014
|
+
"UD-IQ1_S",
|
|
8015
|
+
"UD-IQ2_M",
|
|
8016
|
+
"UD-IQ2_XXS",
|
|
8017
|
+
"UD-IQ3_XXS",
|
|
8018
|
+
"UD-Q2_K_XL",
|
|
8019
|
+
"UD-Q3_K_XL",
|
|
8020
|
+
"UD-Q4_K_XL",
|
|
8021
|
+
"UD-Q5_K_XL",
|
|
8022
|
+
"UD-Q6_K_XL",
|
|
8023
|
+
"UD-Q8_K_XL",
|
|
8024
|
+
"IQ4_NL",
|
|
8025
|
+
"IQ4_XS"
|
|
8026
|
+
],
|
|
8027
|
+
"model_id": "unsloth/Qwen3-8B-GGUF",
|
|
8028
|
+
"model_hub": "modelscope",
|
|
8029
|
+
"model_file_name_template": "Qwen3-8B-{quantization}.gguf"
|
|
8030
|
+
},
|
|
8031
|
+
{
|
|
8032
|
+
"model_format": "pytorch",
|
|
8033
|
+
"model_size_in_billions": 14,
|
|
8034
|
+
"quantizations": [
|
|
8035
|
+
"none"
|
|
8036
|
+
],
|
|
8037
|
+
"model_id": "Qwen/Qwen3-14B",
|
|
8038
|
+
"model_hub": "modelscope"
|
|
8039
|
+
},
|
|
8040
|
+
{
|
|
8041
|
+
"model_format": "fp8",
|
|
8042
|
+
"model_size_in_billions": 14,
|
|
8043
|
+
"quantizations": [
|
|
8044
|
+
"fp8"
|
|
8045
|
+
],
|
|
8046
|
+
"model_id": "Qwen/Qwen3-14B-FP8",
|
|
8047
|
+
"model_hub": "modelscope"
|
|
8048
|
+
},
|
|
8049
|
+
{
|
|
8050
|
+
"model_format": "gptq",
|
|
8051
|
+
"model_size_in_billions": 14,
|
|
8052
|
+
"quantizations": [
|
|
8053
|
+
"Int4",
|
|
8054
|
+
"Int8"
|
|
8055
|
+
],
|
|
8056
|
+
"model_id": "JunHowie/Qwen3-14B-GPTQ-{quantization}",
|
|
8057
|
+
"model_hub": "modelscope"
|
|
8058
|
+
},
|
|
8059
|
+
{
|
|
8060
|
+
"model_format": "ggufv2",
|
|
8061
|
+
"model_size_in_billions": 14,
|
|
8062
|
+
"quantizations": [
|
|
8063
|
+
"Q2_K",
|
|
8064
|
+
"Q2_K_L",
|
|
8065
|
+
"Q3_K_M",
|
|
8066
|
+
"Q3_K_S",
|
|
8067
|
+
"Q4_0",
|
|
8068
|
+
"Q4_1",
|
|
8069
|
+
"Q4_K_M",
|
|
8070
|
+
"Q5_K_M",
|
|
8071
|
+
"Q6_K",
|
|
8072
|
+
"Q8_0",
|
|
8073
|
+
"BF16",
|
|
8074
|
+
"UD-IQ1_M",
|
|
8075
|
+
"UD-IQ1_S",
|
|
8076
|
+
"UD-IQ2_M",
|
|
8077
|
+
"UD-IQ2_XXS",
|
|
8078
|
+
"UD-IQ3_XXS",
|
|
8079
|
+
"UD-Q2_K_XL",
|
|
8080
|
+
"UD-Q3_K_XL",
|
|
8081
|
+
"UD-Q4_K_XL",
|
|
8082
|
+
"UD-Q5_K_XL",
|
|
8083
|
+
"UD-Q6_K_XL",
|
|
8084
|
+
"UD-Q8_K_XL",
|
|
8085
|
+
"IQ4_NL",
|
|
8086
|
+
"IQ4_XS"
|
|
8087
|
+
],
|
|
8088
|
+
"model_id": "unsloth/Qwen3-14B-GGUF",
|
|
8089
|
+
"model_hub": "modelscope",
|
|
8090
|
+
"model_file_name_template": "Qwen3-14B-{quantization}.gguf"
|
|
8091
|
+
},
|
|
8092
|
+
{
|
|
8093
|
+
"model_format": "pytorch",
|
|
8094
|
+
"model_size_in_billions": 30,
|
|
8095
|
+
"activated_size_in_billions": 3,
|
|
8096
|
+
"quantizations": [
|
|
8097
|
+
"none"
|
|
8098
|
+
],
|
|
8099
|
+
"model_id": "Qwen/Qwen3-30B-A3B",
|
|
8100
|
+
"model_hub": "modelscope"
|
|
8101
|
+
},
|
|
8102
|
+
{
|
|
8103
|
+
"model_format": "fp8",
|
|
8104
|
+
"model_size_in_billions": 30,
|
|
8105
|
+
"activated_size_in_billions": 3,
|
|
8106
|
+
"quantizations": [
|
|
8107
|
+
"fp8"
|
|
8108
|
+
],
|
|
8109
|
+
"model_id": "Qwen/Qwen3-30B-A3B-FP8",
|
|
8110
|
+
"model_hub": "modelscope"
|
|
8111
|
+
},
|
|
8112
|
+
{
|
|
8113
|
+
"model_format": "gptq",
|
|
8114
|
+
"model_size_in_billions": 30,
|
|
8115
|
+
"activated_size_in_billions": 3,
|
|
8116
|
+
"quantizations": [
|
|
8117
|
+
"Int4",
|
|
8118
|
+
"Int8"
|
|
8119
|
+
],
|
|
8120
|
+
"model_id": "JunHowie/Qwen3-30B-A3B-GPTQ-{quantization}",
|
|
8121
|
+
"model_hub": "modelscope"
|
|
8122
|
+
},
|
|
8123
|
+
{
|
|
8124
|
+
"model_format": "ggufv2",
|
|
8125
|
+
"model_size_in_billions": 30,
|
|
8126
|
+
"activated_size_in_billions": 3,
|
|
8127
|
+
"quantizations": [
|
|
8128
|
+
"Q2_K",
|
|
8129
|
+
"Q2_K_L",
|
|
8130
|
+
"Q3_K_M",
|
|
8131
|
+
"Q3_K_S",
|
|
8132
|
+
"Q4_0",
|
|
8133
|
+
"Q4_1",
|
|
8134
|
+
"Q4_K_M",
|
|
8135
|
+
"Q5_K_M",
|
|
8136
|
+
"Q6_K",
|
|
8137
|
+
"Q8_0",
|
|
8138
|
+
"BF16",
|
|
8139
|
+
"UD-IQ1_M",
|
|
8140
|
+
"UD-IQ1_S",
|
|
8141
|
+
"UD-IQ2_M",
|
|
8142
|
+
"UD-IQ2_XXS",
|
|
8143
|
+
"UD-IQ3_XXS",
|
|
8144
|
+
"UD-Q2_K_XL",
|
|
8145
|
+
"UD-Q3_K_XL",
|
|
8146
|
+
"UD-Q4_K_XL",
|
|
8147
|
+
"UD-Q5_K_XL",
|
|
8148
|
+
"UD-Q6_K_XL",
|
|
8149
|
+
"UD-Q8_K_XL",
|
|
8150
|
+
"IQ4_NL",
|
|
8151
|
+
"IQ4_XS"
|
|
8152
|
+
],
|
|
8153
|
+
"quantization_parts": {
|
|
8154
|
+
"BF16": [
|
|
8155
|
+
"00001-of-00002",
|
|
8156
|
+
"00002-of-00002"
|
|
8157
|
+
]
|
|
8158
|
+
},
|
|
8159
|
+
"model_id": "unsloth/Qwen3-30B-A3B-GGUF",
|
|
8160
|
+
"model_hub": "modelscope",
|
|
8161
|
+
"model_file_name_template": "Qwen3-30B-A3B-{quantization}.gguf",
|
|
8162
|
+
"model_file_name_split_template": "BF16/Qwen3-30B-A3B-{quantization}-{part}.gguf"
|
|
8163
|
+
},
|
|
8164
|
+
{
|
|
8165
|
+
"model_format": "pytorch",
|
|
8166
|
+
"model_size_in_billions": 32,
|
|
8167
|
+
"quantizations": [
|
|
8168
|
+
"none"
|
|
8169
|
+
],
|
|
8170
|
+
"model_id": "Qwen/Qwen3-32B",
|
|
8171
|
+
"model_hub": "modelscope"
|
|
8172
|
+
},
|
|
8173
|
+
{
|
|
8174
|
+
"model_format": "fp8",
|
|
8175
|
+
"model_size_in_billions": 32,
|
|
8176
|
+
"quantizations": [
|
|
8177
|
+
"fp8"
|
|
8178
|
+
],
|
|
8179
|
+
"model_id": "Qwen/Qwen3-32B-FP8",
|
|
8180
|
+
"model_hub": "modelscope"
|
|
8181
|
+
},
|
|
8182
|
+
{
|
|
8183
|
+
"model_format": "gptq",
|
|
8184
|
+
"model_size_in_billions": 32,
|
|
8185
|
+
"quantizations": [
|
|
8186
|
+
"Int4",
|
|
8187
|
+
"Int8"
|
|
8188
|
+
],
|
|
8189
|
+
"model_id": "JunHowie/Qwen3-32B-GPTQ-{quantization}",
|
|
8190
|
+
"model_hub": "modelscope"
|
|
8191
|
+
},
|
|
8192
|
+
{
|
|
8193
|
+
"model_format": "ggufv2",
|
|
8194
|
+
"model_size_in_billions": 32,
|
|
8195
|
+
"quantizations": [
|
|
8196
|
+
"Q2_K",
|
|
8197
|
+
"Q2_K_L",
|
|
8198
|
+
"Q3_K_M",
|
|
8199
|
+
"Q3_K_S",
|
|
8200
|
+
"Q4_0",
|
|
8201
|
+
"Q4_1",
|
|
8202
|
+
"Q4_K_M",
|
|
8203
|
+
"Q5_K_M",
|
|
8204
|
+
"Q6_K",
|
|
8205
|
+
"Q8_0",
|
|
8206
|
+
"BF16",
|
|
8207
|
+
"UD-IQ1_M",
|
|
8208
|
+
"UD-IQ1_S",
|
|
8209
|
+
"UD-IQ2_M",
|
|
8210
|
+
"UD-IQ2_XXS",
|
|
8211
|
+
"UD-IQ3_XXS",
|
|
8212
|
+
"UD-Q2_K_XL",
|
|
8213
|
+
"UD-Q3_K_XL",
|
|
8214
|
+
"UD-Q4_K_XL",
|
|
8215
|
+
"UD-Q5_K_XL",
|
|
8216
|
+
"UD-Q6_K_XL",
|
|
8217
|
+
"UD-Q8_K_XL",
|
|
8218
|
+
"IQ4_NL",
|
|
8219
|
+
"IQ4_XS"
|
|
8220
|
+
],
|
|
8221
|
+
"quantization_parts": {
|
|
8222
|
+
"BF16": [
|
|
8223
|
+
"00001-of-00002",
|
|
8224
|
+
"00002-of-00002"
|
|
8225
|
+
]
|
|
8226
|
+
},
|
|
8227
|
+
"model_id": "unsloth/Qwen3-32B-GGUF",
|
|
8228
|
+
"model_hub": "modelscope",
|
|
8229
|
+
"model_file_name_template": "Qwen3-32B-{quantization}.gguf",
|
|
8230
|
+
"model_file_name_split_template": "BF16/Qwen3-32B-{quantization}-{part}.gguf"
|
|
8231
|
+
},
|
|
8232
|
+
{
|
|
8233
|
+
"model_format": "pytorch",
|
|
8234
|
+
"model_size_in_billions": 235,
|
|
8235
|
+
"activated_size_in_billions": 22,
|
|
8236
|
+
"quantizations": [
|
|
8237
|
+
"none"
|
|
8238
|
+
],
|
|
8239
|
+
"model_id": "Qwen/Qwen3-235B",
|
|
8240
|
+
"model_hub": "modelscope"
|
|
8241
|
+
},
|
|
8242
|
+
{
|
|
8243
|
+
"model_format": "fp8",
|
|
8244
|
+
"model_size_in_billions": 235,
|
|
8245
|
+
"activated_size_in_billions": 22,
|
|
8246
|
+
"quantizations": [
|
|
8247
|
+
"fp8"
|
|
8248
|
+
],
|
|
8249
|
+
"model_id": "Qwen/Qwen3-235B-FP8",
|
|
8250
|
+
"model_hub": "modelscope"
|
|
8251
|
+
},
|
|
8252
|
+
{
|
|
8253
|
+
"model_format": "ggufv2",
|
|
8254
|
+
"model_size_in_billions": 235,
|
|
8255
|
+
"activated_size_in_billions": 22,
|
|
8256
|
+
"quantizations": [
|
|
8257
|
+
"Q2_K",
|
|
8258
|
+
"Q2_K_L",
|
|
8259
|
+
"Q3_K_M",
|
|
8260
|
+
"Q3_K_S",
|
|
8261
|
+
"Q4_0",
|
|
8262
|
+
"Q4_1",
|
|
8263
|
+
"Q5_K_M",
|
|
8264
|
+
"Q6_K",
|
|
8265
|
+
"Q8_0",
|
|
8266
|
+
"BF16",
|
|
8267
|
+
"UD-Q2_K_XL",
|
|
8268
|
+
"UD-Q3_K_XL",
|
|
8269
|
+
"IQ4_NL",
|
|
8270
|
+
"IQ4_XS"
|
|
8271
|
+
],
|
|
8272
|
+
"quantization_parts": {
|
|
8273
|
+
"BF16": [
|
|
8274
|
+
"00001-of-00010",
|
|
8275
|
+
"00002-of-00010",
|
|
8276
|
+
"00003-of-00010",
|
|
8277
|
+
"00004-of-00010",
|
|
8278
|
+
"00005-of-00010",
|
|
8279
|
+
"00006-of-00010",
|
|
8280
|
+
"00007-of-00010",
|
|
8281
|
+
"00008-of-00010",
|
|
8282
|
+
"00009-of-00010",
|
|
8283
|
+
"00010-of-00010"
|
|
8284
|
+
],
|
|
8285
|
+
"IQ4_XS": [
|
|
8286
|
+
"00001-of-00003",
|
|
8287
|
+
"00002-of-00003",
|
|
8288
|
+
"00003-of-00003"
|
|
8289
|
+
],
|
|
8290
|
+
"Q2_K": [
|
|
8291
|
+
"00001-of-00002",
|
|
8292
|
+
"00002-of-00002"
|
|
8293
|
+
],
|
|
8294
|
+
"Q2_K_L": [
|
|
8295
|
+
"00001-of-00002",
|
|
8296
|
+
"00002-of-00002"
|
|
8297
|
+
],
|
|
8298
|
+
"Q3_K_S": [
|
|
8299
|
+
"00001-of-00003",
|
|
8300
|
+
"00002-of-00003",
|
|
8301
|
+
"00003-of-00003"
|
|
8302
|
+
],
|
|
8303
|
+
"Q4_0": [
|
|
8304
|
+
"00001-of-00003",
|
|
8305
|
+
"00002-of-00003",
|
|
8306
|
+
"00003-of-00003"
|
|
8307
|
+
],
|
|
8308
|
+
"Q4_1": [
|
|
8309
|
+
"00001-of-00003",
|
|
8310
|
+
"00002-of-00003",
|
|
8311
|
+
"00003-of-00003"
|
|
8312
|
+
],
|
|
8313
|
+
"Q5_K_M": [
|
|
8314
|
+
"00001-of-00004",
|
|
8315
|
+
"00002-of-00004",
|
|
8316
|
+
"00003-of-00004",
|
|
8317
|
+
"00004-of-00004"
|
|
8318
|
+
],
|
|
8319
|
+
"Q6_K": [
|
|
8320
|
+
"00001-of-00004",
|
|
8321
|
+
"00002-of-00004",
|
|
8322
|
+
"00003-of-00004",
|
|
8323
|
+
"00004-of-00004"
|
|
8324
|
+
],
|
|
8325
|
+
"Q8_0": [
|
|
8326
|
+
"00001-of-00006",
|
|
8327
|
+
"00002-of-00006",
|
|
8328
|
+
"00003-of-00006",
|
|
8329
|
+
"00004-of-00006",
|
|
8330
|
+
"00005-of-00006",
|
|
8331
|
+
"00006-of-00006"
|
|
8332
|
+
],
|
|
8333
|
+
"UD-Q2_K_XL": [
|
|
8334
|
+
"00001-of-00002",
|
|
8335
|
+
"00002-of-00002"
|
|
8336
|
+
],
|
|
8337
|
+
"UD-Q3_K_XL": [
|
|
8338
|
+
"00001-of-00003",
|
|
8339
|
+
"00002-of-00003",
|
|
8340
|
+
"00003-of-00003"
|
|
8341
|
+
]
|
|
8342
|
+
},
|
|
8343
|
+
"model_id": "unsloth/Qwen3-235B-A22B-GGUF",
|
|
8344
|
+
"model_hub": "modelscope",
|
|
8345
|
+
"model_file_name_template": "Qwen3-235B-A22B-{quantization}.gguf",
|
|
8346
|
+
"model_file_name_split_template": "{quantization}/Qwen3-235B-A22B-{quantization}-{part}.gguf"
|
|
8347
|
+
}
|
|
8348
|
+
],
|
|
8349
|
+
"chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0].role == 'system' %}\n {{- messages[0].content + '\\n\\n' }}\n {%- endif %}\n {{- \"# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0].role == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0].content + '<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}\n{%- for message in messages[::-1] %}\n {%- set index = (messages|length - 1) - loop.index0 %}\n {%- if ns.multi_step_tool and message.role == \"user\" and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}\n {%- set ns.multi_step_tool = false %}\n {%- set ns.last_query_index = index %}\n {%- endif %}\n{%- endfor %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {%- set content = message.content %}\n {%- set reasoning_content = '' %}\n {%- if message.reasoning_content is defined and message.reasoning_content is not none %}\n {%- set reasoning_content = message.reasoning_content %}\n {%- else %}\n {%- if '</think>' in message.content %}\n {%- set content = message.content.split('</think>')[-1].lstrip('\\n') %}\n {%- set reasoning_content = message.content.split('</think>')[0].rstrip('\\n').split('<think>')[-1].lstrip('\\n') %}\n {%- endif %}\n {%- endif %}\n {%- if loop.index0 > ns.last_query_index %}\n {%- if loop.last or (not loop.last and reasoning_content) %}\n {{- '<|im_start|>' + message.role + '\\n<think>\\n' + reasoning_content.strip('\\n') + '\\n</think>\\n\\n' + content.lstrip('\\n') }}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\\n' + content }}\n {%- endif %}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\\n' + content }}\n {%- endif %}\n {%- if message.tool_calls %}\n {%- for tool_call in message.tool_calls %}\n {%- if (loop.first and content) or (not loop.first) %}\n {{- '\\n' }}\n {%- endif %}\n {%- if tool_call.function %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {%- if tool_call.arguments is string %}\n {{- tool_call.arguments }}\n {%- else %}\n {{- tool_call.arguments | tojson }}\n {%- endif %}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {%- endif %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if loop.first or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n {%- if enable_thinking is defined and enable_thinking is false %}\n {{- '<think>\\n\\n</think>\\n\\n' }}\n {%- endif %}\n{%- endif %}",
|
|
8350
|
+
"stop_token_ids": [
|
|
8351
|
+
151643,
|
|
8352
|
+
151644,
|
|
8353
|
+
151645
|
|
8354
|
+
],
|
|
8355
|
+
"stop": [
|
|
8356
|
+
"<|endoftext|>",
|
|
8357
|
+
"<|im_start|>",
|
|
8358
|
+
"<|im_end|>"
|
|
8359
|
+
],
|
|
8360
|
+
"reasoning_start_tag": "<think>",
|
|
8361
|
+
"reasoning_end_tag": "</think>",
|
|
8362
|
+
"virtualenv": {
|
|
8363
|
+
"packages": [
|
|
8364
|
+
"transformers>=4.51.0",
|
|
8365
|
+
"numpy==1.26.4"
|
|
8366
|
+
]
|
|
8367
|
+
}
|
|
8996
8368
|
}
|
|
8997
8369
|
]
|