xinference 0.11.3__py3-none-any.whl → 0.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +69 -0
- xinference/client/restful/restful_client.py +70 -0
- xinference/constants.py +4 -0
- xinference/core/model.py +141 -12
- xinference/core/scheduler.py +428 -0
- xinference/core/supervisor.py +26 -0
- xinference/isolation.py +9 -2
- xinference/model/audio/chattts.py +84 -0
- xinference/model/audio/core.py +10 -3
- xinference/model/audio/model_spec.json +20 -0
- xinference/model/llm/__init__.py +4 -0
- xinference/model/llm/llm_family.json +507 -1
- xinference/model/llm/llm_family_modelscope.json +409 -2
- xinference/model/llm/pytorch/chatglm.py +2 -1
- xinference/model/llm/pytorch/cogvlm2.py +76 -17
- xinference/model/llm/pytorch/core.py +91 -6
- xinference/model/llm/pytorch/glm4v.py +258 -0
- xinference/model/llm/pytorch/minicpmv25.py +232 -0
- xinference/model/llm/pytorch/utils.py +386 -2
- xinference/model/llm/vllm/core.py +6 -0
- xinference/thirdparty/ChatTTS/__init__.py +1 -0
- xinference/thirdparty/ChatTTS/core.py +200 -0
- xinference/types.py +3 -0
- {xinference-0.11.3.dist-info → xinference-0.12.0.dist-info}/METADATA +26 -9
- {xinference-0.11.3.dist-info → xinference-0.12.0.dist-info}/RECORD +30 -24
- {xinference-0.11.3.dist-info → xinference-0.12.0.dist-info}/LICENSE +0 -0
- {xinference-0.11.3.dist-info → xinference-0.12.0.dist-info}/WHEEL +0 -0
- {xinference-0.11.3.dist-info → xinference-0.12.0.dist-info}/entry_points.txt +0 -0
- {xinference-0.11.3.dist-info → xinference-0.12.0.dist-info}/top_level.txt +0 -0
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
"model_family": "whisper",
|
|
5
5
|
"model_id": "openai/whisper-tiny",
|
|
6
6
|
"model_revision": "167c219b21f11ef214220b8fdb7536b8a88c2475",
|
|
7
|
+
"ability": "audio-to-text",
|
|
7
8
|
"multilingual": true
|
|
8
9
|
},
|
|
9
10
|
{
|
|
@@ -11,6 +12,7 @@
|
|
|
11
12
|
"model_family": "whisper",
|
|
12
13
|
"model_id": "openai/whisper-tiny.en",
|
|
13
14
|
"model_revision": "87c7102498dcde7456f24cfd30239ca606ed9063",
|
|
15
|
+
"ability": "audio-to-text",
|
|
14
16
|
"multilingual": false
|
|
15
17
|
},
|
|
16
18
|
{
|
|
@@ -18,6 +20,7 @@
|
|
|
18
20
|
"model_family": "whisper",
|
|
19
21
|
"model_id": "openai/whisper-base",
|
|
20
22
|
"model_revision": "8c1db9b51951100007a96a525d83a8ec81b3c237",
|
|
23
|
+
"ability": "audio-to-text",
|
|
21
24
|
"multilingual": true
|
|
22
25
|
},
|
|
23
26
|
{
|
|
@@ -25,6 +28,7 @@
|
|
|
25
28
|
"model_family": "whisper",
|
|
26
29
|
"model_id": "openai/whisper-base.en",
|
|
27
30
|
"model_revision": "911407f4214e0e1d82085af863093ec0b66f9cd6",
|
|
31
|
+
"ability": "audio-to-text",
|
|
28
32
|
"multilingual": false
|
|
29
33
|
},
|
|
30
34
|
{
|
|
@@ -32,6 +36,7 @@
|
|
|
32
36
|
"model_family": "whisper",
|
|
33
37
|
"model_id": "openai/whisper-small",
|
|
34
38
|
"model_revision": "998cb1a777c20db53d6033a61b977ed4c3792cac",
|
|
39
|
+
"ability": "audio-to-text",
|
|
35
40
|
"multilingual": true
|
|
36
41
|
},
|
|
37
42
|
{
|
|
@@ -39,6 +44,7 @@
|
|
|
39
44
|
"model_family": "whisper",
|
|
40
45
|
"model_id": "openai/whisper-small.en",
|
|
41
46
|
"model_revision": "e8727524f962ee844a7319d92be39ac1bd25655a",
|
|
47
|
+
"ability": "audio-to-text",
|
|
42
48
|
"multilingual": false
|
|
43
49
|
},
|
|
44
50
|
{
|
|
@@ -46,6 +52,7 @@
|
|
|
46
52
|
"model_family": "whisper",
|
|
47
53
|
"model_id": "openai/whisper-medium",
|
|
48
54
|
"model_revision": "16688beb1294bedd0a6f5cd86fe7eec57bce41ed",
|
|
55
|
+
"ability": "audio-to-text",
|
|
49
56
|
"multilingual": true
|
|
50
57
|
},
|
|
51
58
|
{
|
|
@@ -53,6 +60,7 @@
|
|
|
53
60
|
"model_family": "whisper",
|
|
54
61
|
"model_id": "openai/whisper-medium.en",
|
|
55
62
|
"model_revision": "2e98eb6279edf5095af0c8dedb36bdec0acd172b",
|
|
63
|
+
"ability": "audio-to-text",
|
|
56
64
|
"multilingual": false
|
|
57
65
|
},
|
|
58
66
|
{
|
|
@@ -60,6 +68,7 @@
|
|
|
60
68
|
"model_family": "whisper",
|
|
61
69
|
"model_id": "openai/whisper-large-v3",
|
|
62
70
|
"model_revision": "6cdf07a7e3ec3806e5d55f787915b85d4cd020b1",
|
|
71
|
+
"ability": "audio-to-text",
|
|
63
72
|
"multilingual": true
|
|
64
73
|
},
|
|
65
74
|
{
|
|
@@ -67,6 +76,7 @@
|
|
|
67
76
|
"model_family": "whisper",
|
|
68
77
|
"model_id": "BELLE-2/Belle-distilwhisper-large-v2-zh",
|
|
69
78
|
"model_revision": "ed25d13498fa5bac758b2fc479435b698532dfe8",
|
|
79
|
+
"ability": "audio-to-text",
|
|
70
80
|
"multilingual": false
|
|
71
81
|
},
|
|
72
82
|
{
|
|
@@ -74,6 +84,7 @@
|
|
|
74
84
|
"model_family": "whisper",
|
|
75
85
|
"model_id": "BELLE-2/Belle-whisper-large-v2-zh",
|
|
76
86
|
"model_revision": "ec5bd5d78598545b7585814edde86dac2002b5b9",
|
|
87
|
+
"ability": "audio-to-text",
|
|
77
88
|
"multilingual": false
|
|
78
89
|
},
|
|
79
90
|
{
|
|
@@ -81,6 +92,15 @@
|
|
|
81
92
|
"model_family": "whisper",
|
|
82
93
|
"model_id": "BELLE-2/Belle-whisper-large-v3-zh",
|
|
83
94
|
"model_revision": "3bebc7247696b39f5ab9ed22db426943ac33f600",
|
|
95
|
+
"ability": "audio-to-text",
|
|
84
96
|
"multilingual": false
|
|
97
|
+
},
|
|
98
|
+
{
|
|
99
|
+
"model_name": "ChatTTS",
|
|
100
|
+
"model_family": "ChatTTS",
|
|
101
|
+
"model_id": "2Noise/ChatTTS",
|
|
102
|
+
"model_revision": "ce5913842aebd78e4a01a02d47244b8d62ac4ee3",
|
|
103
|
+
"ability": "text-to-audio",
|
|
104
|
+
"multilingual": true
|
|
85
105
|
}
|
|
86
106
|
]
|
xinference/model/llm/__init__.py
CHANGED
|
@@ -117,9 +117,11 @@ def _install():
|
|
|
117
117
|
from .pytorch.core import PytorchChatModel, PytorchModel
|
|
118
118
|
from .pytorch.deepseek_vl import DeepSeekVLChatModel
|
|
119
119
|
from .pytorch.falcon import FalconPytorchChatModel, FalconPytorchModel
|
|
120
|
+
from .pytorch.glm4v import Glm4VModel
|
|
120
121
|
from .pytorch.intern_vl import InternVLChatModel
|
|
121
122
|
from .pytorch.internlm2 import Internlm2PytorchChatModel
|
|
122
123
|
from .pytorch.llama_2 import LlamaPytorchChatModel, LlamaPytorchModel
|
|
124
|
+
from .pytorch.minicpmv25 import MiniCPMV25Model
|
|
123
125
|
from .pytorch.qwen_vl import QwenVLChatModel
|
|
124
126
|
from .pytorch.vicuna import VicunaPytorchChatModel
|
|
125
127
|
from .pytorch.yi_vl import YiVLChatModel
|
|
@@ -161,6 +163,8 @@ def _install():
|
|
|
161
163
|
InternVLChatModel,
|
|
162
164
|
PytorchModel,
|
|
163
165
|
CogVLM2Model,
|
|
166
|
+
MiniCPMV25Model,
|
|
167
|
+
Glm4VModel,
|
|
164
168
|
]
|
|
165
169
|
)
|
|
166
170
|
if OmniLMMModel: # type: ignore
|
|
@@ -831,6 +831,139 @@
|
|
|
831
831
|
]
|
|
832
832
|
}
|
|
833
833
|
},
|
|
834
|
+
{
|
|
835
|
+
"version": 1,
|
|
836
|
+
"context_length": 131072,
|
|
837
|
+
"model_name": "glm4-chat",
|
|
838
|
+
"model_lang": [
|
|
839
|
+
"en",
|
|
840
|
+
"zh"
|
|
841
|
+
],
|
|
842
|
+
"model_ability": [
|
|
843
|
+
"chat",
|
|
844
|
+
"tools"
|
|
845
|
+
],
|
|
846
|
+
"model_description": "GLM4 is the open source version of the latest generation of pre-trained models in the GLM-4 series launched by Zhipu AI.",
|
|
847
|
+
"model_specs": [
|
|
848
|
+
{
|
|
849
|
+
"model_format": "pytorch",
|
|
850
|
+
"model_size_in_billions": 9,
|
|
851
|
+
"quantizations": [
|
|
852
|
+
"4-bit",
|
|
853
|
+
"8-bit",
|
|
854
|
+
"none"
|
|
855
|
+
],
|
|
856
|
+
"model_id": "THUDM/glm-4-9b-chat",
|
|
857
|
+
"model_revision": "b84dc74294ccd507a3d78bde8aebf628221af9bd"
|
|
858
|
+
}
|
|
859
|
+
],
|
|
860
|
+
"prompt_style": {
|
|
861
|
+
"style_name": "CHATGLM3",
|
|
862
|
+
"system_prompt": "",
|
|
863
|
+
"roles": [
|
|
864
|
+
"user",
|
|
865
|
+
"assistant"
|
|
866
|
+
],
|
|
867
|
+
"stop_token_ids": [
|
|
868
|
+
151329,
|
|
869
|
+
151336,
|
|
870
|
+
151338
|
|
871
|
+
],
|
|
872
|
+
"stop": [
|
|
873
|
+
"<|endoftext|>",
|
|
874
|
+
"<|user|>",
|
|
875
|
+
"<|observation|>"
|
|
876
|
+
]
|
|
877
|
+
}
|
|
878
|
+
},
|
|
879
|
+
{
|
|
880
|
+
"version": 1,
|
|
881
|
+
"context_length": 1048576,
|
|
882
|
+
"model_name": "glm4-chat-1m",
|
|
883
|
+
"model_lang": [
|
|
884
|
+
"en",
|
|
885
|
+
"zh"
|
|
886
|
+
],
|
|
887
|
+
"model_ability": [
|
|
888
|
+
"chat",
|
|
889
|
+
"tools"
|
|
890
|
+
],
|
|
891
|
+
"model_description": "GLM4 is the open source version of the latest generation of pre-trained models in the GLM-4 series launched by Zhipu AI.",
|
|
892
|
+
"model_specs": [
|
|
893
|
+
{
|
|
894
|
+
"model_format": "pytorch",
|
|
895
|
+
"model_size_in_billions": 9,
|
|
896
|
+
"quantizations": [
|
|
897
|
+
"4-bit",
|
|
898
|
+
"8-bit",
|
|
899
|
+
"none"
|
|
900
|
+
],
|
|
901
|
+
"model_id": "THUDM/glm-4-9b-chat-1m",
|
|
902
|
+
"model_revision": "715ddbe91082f976ff6a4ca06d59e5bbff6c3642"
|
|
903
|
+
}
|
|
904
|
+
],
|
|
905
|
+
"prompt_style": {
|
|
906
|
+
"style_name": "CHATGLM3",
|
|
907
|
+
"system_prompt": "",
|
|
908
|
+
"roles": [
|
|
909
|
+
"user",
|
|
910
|
+
"assistant"
|
|
911
|
+
],
|
|
912
|
+
"stop_token_ids": [
|
|
913
|
+
151329,
|
|
914
|
+
151336,
|
|
915
|
+
151338
|
|
916
|
+
],
|
|
917
|
+
"stop": [
|
|
918
|
+
"<|endoftext|>",
|
|
919
|
+
"<|user|>",
|
|
920
|
+
"<|observation|>"
|
|
921
|
+
]
|
|
922
|
+
}
|
|
923
|
+
},
|
|
924
|
+
{
|
|
925
|
+
"version": 1,
|
|
926
|
+
"context_length": 8192,
|
|
927
|
+
"model_name": "glm-4v",
|
|
928
|
+
"model_lang": [
|
|
929
|
+
"en",
|
|
930
|
+
"zh"
|
|
931
|
+
],
|
|
932
|
+
"model_ability": [
|
|
933
|
+
"chat",
|
|
934
|
+
"vision"
|
|
935
|
+
],
|
|
936
|
+
"model_description": "GLM4 is the open source version of the latest generation of pre-trained models in the GLM-4 series launched by Zhipu AI.",
|
|
937
|
+
"model_specs": [
|
|
938
|
+
{
|
|
939
|
+
"model_format": "pytorch",
|
|
940
|
+
"model_size_in_billions": 9,
|
|
941
|
+
"quantizations": [
|
|
942
|
+
"none"
|
|
943
|
+
],
|
|
944
|
+
"model_id": "THUDM/glm-4v-9b",
|
|
945
|
+
"model_revision": "e8b84fefc07e58a90c8489337675573fda95e289"
|
|
946
|
+
}
|
|
947
|
+
],
|
|
948
|
+
"prompt_style": {
|
|
949
|
+
"style_name": "CHATGLM3",
|
|
950
|
+
"system_prompt": "",
|
|
951
|
+
"roles": [
|
|
952
|
+
"user",
|
|
953
|
+
"assistant"
|
|
954
|
+
],
|
|
955
|
+
"stop_token_ids": [
|
|
956
|
+
151329,
|
|
957
|
+
151336,
|
|
958
|
+
151338
|
|
959
|
+
],
|
|
960
|
+
"stop": [
|
|
961
|
+
"<|endoftext|>",
|
|
962
|
+
"<|user|>",
|
|
963
|
+
"<|observation|>"
|
|
964
|
+
]
|
|
965
|
+
}
|
|
966
|
+
},
|
|
834
967
|
{
|
|
835
968
|
"version": 1,
|
|
836
969
|
"context_length": 2048,
|
|
@@ -2291,6 +2424,218 @@
|
|
|
2291
2424
|
]
|
|
2292
2425
|
}
|
|
2293
2426
|
},
|
|
2427
|
+
{
|
|
2428
|
+
"version": 1,
|
|
2429
|
+
"context_length": 32768,
|
|
2430
|
+
"model_name": "qwen2-instruct",
|
|
2431
|
+
"model_lang": [
|
|
2432
|
+
"en",
|
|
2433
|
+
"zh"
|
|
2434
|
+
],
|
|
2435
|
+
"model_ability": [
|
|
2436
|
+
"chat",
|
|
2437
|
+
"tools"
|
|
2438
|
+
],
|
|
2439
|
+
"model_description": "Qwen2 is the new series of Qwen large language models",
|
|
2440
|
+
"model_specs": [
|
|
2441
|
+
{
|
|
2442
|
+
"model_format": "pytorch",
|
|
2443
|
+
"model_size_in_billions": "0_5",
|
|
2444
|
+
"quantizations": [
|
|
2445
|
+
"4-bit",
|
|
2446
|
+
"8-bit",
|
|
2447
|
+
"none"
|
|
2448
|
+
],
|
|
2449
|
+
"model_id": "Qwen/Qwen2-0.5B-Instruct"
|
|
2450
|
+
},
|
|
2451
|
+
{
|
|
2452
|
+
"model_format": "pytorch",
|
|
2453
|
+
"model_size_in_billions": "1_5",
|
|
2454
|
+
"quantizations": [
|
|
2455
|
+
"4-bit",
|
|
2456
|
+
"8-bit",
|
|
2457
|
+
"none"
|
|
2458
|
+
],
|
|
2459
|
+
"model_id": "Qwen/Qwen2-1.5B-Instruct"
|
|
2460
|
+
},
|
|
2461
|
+
{
|
|
2462
|
+
"model_format": "pytorch",
|
|
2463
|
+
"model_size_in_billions": 7,
|
|
2464
|
+
"quantizations": [
|
|
2465
|
+
"4-bit",
|
|
2466
|
+
"8-bit",
|
|
2467
|
+
"none"
|
|
2468
|
+
],
|
|
2469
|
+
"model_id": "Qwen/Qwen2-7B-Instruct"
|
|
2470
|
+
},
|
|
2471
|
+
{
|
|
2472
|
+
"model_format": "pytorch",
|
|
2473
|
+
"model_size_in_billions": 72,
|
|
2474
|
+
"quantizations": [
|
|
2475
|
+
"4-bit",
|
|
2476
|
+
"8-bit",
|
|
2477
|
+
"none"
|
|
2478
|
+
],
|
|
2479
|
+
"model_id": "Qwen/Qwen2-72B-Instruct"
|
|
2480
|
+
},
|
|
2481
|
+
{
|
|
2482
|
+
"model_format": "gptq",
|
|
2483
|
+
"model_size_in_billions": "0_5",
|
|
2484
|
+
"quantizations": [
|
|
2485
|
+
"Int4",
|
|
2486
|
+
"Int8"
|
|
2487
|
+
],
|
|
2488
|
+
"model_id": "Qwen/Qwen2-0.5B-Instruct-GPTQ-{quantization}"
|
|
2489
|
+
},
|
|
2490
|
+
{
|
|
2491
|
+
"model_format": "gptq",
|
|
2492
|
+
"model_size_in_billions": "1_5",
|
|
2493
|
+
"quantizations": [
|
|
2494
|
+
"Int4",
|
|
2495
|
+
"Int8"
|
|
2496
|
+
],
|
|
2497
|
+
"model_id": "Qwen/Qwen2-1.5B-Instruct-GPTQ-{quantization}"
|
|
2498
|
+
},
|
|
2499
|
+
{
|
|
2500
|
+
"model_format": "gptq",
|
|
2501
|
+
"model_size_in_billions": 7,
|
|
2502
|
+
"quantizations": [
|
|
2503
|
+
"Int4",
|
|
2504
|
+
"Int8"
|
|
2505
|
+
],
|
|
2506
|
+
"model_id": "Qwen/Qwen2-7B-Instruct-GPTQ-{quantization}"
|
|
2507
|
+
},
|
|
2508
|
+
{
|
|
2509
|
+
"model_format": "gptq",
|
|
2510
|
+
"model_size_in_billions": 72,
|
|
2511
|
+
"quantizations": [
|
|
2512
|
+
"Int4",
|
|
2513
|
+
"Int8"
|
|
2514
|
+
],
|
|
2515
|
+
"model_id": "Qwen/Qwen2-72B-Instruct-GPTQ-{quantization}"
|
|
2516
|
+
},
|
|
2517
|
+
{
|
|
2518
|
+
"model_format": "awq",
|
|
2519
|
+
"model_size_in_billions": "0_5",
|
|
2520
|
+
"quantizations": [
|
|
2521
|
+
"Int4"
|
|
2522
|
+
],
|
|
2523
|
+
"model_id": "Qwen/Qwen2-0.5B-Instruct-AWQ"
|
|
2524
|
+
},
|
|
2525
|
+
{
|
|
2526
|
+
"model_format": "awq",
|
|
2527
|
+
"model_size_in_billions": "1_5",
|
|
2528
|
+
"quantizations": [
|
|
2529
|
+
"Int4"
|
|
2530
|
+
],
|
|
2531
|
+
"model_id": "Qwen/Qwen2-1.5B-Instruct-AWQ"
|
|
2532
|
+
},
|
|
2533
|
+
{
|
|
2534
|
+
"model_format": "awq",
|
|
2535
|
+
"model_size_in_billions": 7,
|
|
2536
|
+
"quantizations": [
|
|
2537
|
+
"Int4"
|
|
2538
|
+
],
|
|
2539
|
+
"model_id": "Qwen/Qwen2-7B-Instruct-AWQ"
|
|
2540
|
+
},
|
|
2541
|
+
{
|
|
2542
|
+
"model_format": "awq",
|
|
2543
|
+
"model_size_in_billions": 72,
|
|
2544
|
+
"quantizations": [
|
|
2545
|
+
"Int4"
|
|
2546
|
+
],
|
|
2547
|
+
"model_id": "Qwen/Qwen2-72B-Instruct-AWQ"
|
|
2548
|
+
},
|
|
2549
|
+
{
|
|
2550
|
+
"model_format": "ggufv2",
|
|
2551
|
+
"model_size_in_billions": "0_5",
|
|
2552
|
+
"quantizations": [
|
|
2553
|
+
"q2_k",
|
|
2554
|
+
"q3_k_m",
|
|
2555
|
+
"q4_0",
|
|
2556
|
+
"q4_k_m",
|
|
2557
|
+
"q5_0",
|
|
2558
|
+
"q5_k_m",
|
|
2559
|
+
"q6_k",
|
|
2560
|
+
"q8_0",
|
|
2561
|
+
"fp16"
|
|
2562
|
+
],
|
|
2563
|
+
"model_id": "Qwen/Qwen2-0.5B-Instruct-GGUF",
|
|
2564
|
+
"model_file_name_template": "qwen2-0_5b-instruct-{quantization}.gguf"
|
|
2565
|
+
}
|
|
2566
|
+
],
|
|
2567
|
+
"prompt_style": {
|
|
2568
|
+
"style_name": "QWEN",
|
|
2569
|
+
"system_prompt": "You are a helpful assistant.",
|
|
2570
|
+
"roles": [
|
|
2571
|
+
"user",
|
|
2572
|
+
"assistant"
|
|
2573
|
+
],
|
|
2574
|
+
"intra_message_sep": "\n",
|
|
2575
|
+
"stop_token_ids": [
|
|
2576
|
+
151643,
|
|
2577
|
+
151644,
|
|
2578
|
+
151645
|
|
2579
|
+
],
|
|
2580
|
+
"stop": [
|
|
2581
|
+
"<|endoftext|>",
|
|
2582
|
+
"<|im_start|>",
|
|
2583
|
+
"<|im_end|>"
|
|
2584
|
+
]
|
|
2585
|
+
}
|
|
2586
|
+
},
|
|
2587
|
+
{
|
|
2588
|
+
"version": 1,
|
|
2589
|
+
"context_length": 32768,
|
|
2590
|
+
"model_name": "qwen2-moe-instruct",
|
|
2591
|
+
"model_lang": [
|
|
2592
|
+
"en",
|
|
2593
|
+
"zh"
|
|
2594
|
+
],
|
|
2595
|
+
"model_ability": [
|
|
2596
|
+
"chat"
|
|
2597
|
+
],
|
|
2598
|
+
"model_description": "Qwen2 is the new series of Qwen large language models. ",
|
|
2599
|
+
"model_specs": [
|
|
2600
|
+
{
|
|
2601
|
+
"model_format": "pytorch",
|
|
2602
|
+
"model_size_in_billions": 14,
|
|
2603
|
+
"quantizations": [
|
|
2604
|
+
"4-bit",
|
|
2605
|
+
"8-bit",
|
|
2606
|
+
"none"
|
|
2607
|
+
],
|
|
2608
|
+
"model_id": "Qwen/Qwen2-57B-A14B-Instruct"
|
|
2609
|
+
},
|
|
2610
|
+
{
|
|
2611
|
+
"model_format": "gptq",
|
|
2612
|
+
"model_size_in_billions": 14,
|
|
2613
|
+
"quantizations": [
|
|
2614
|
+
"Int4"
|
|
2615
|
+
],
|
|
2616
|
+
"model_id": "Qwen/Qwen2-57B-A14B-Instruct-GPTQ-Int4"
|
|
2617
|
+
}
|
|
2618
|
+
],
|
|
2619
|
+
"prompt_style": {
|
|
2620
|
+
"style_name": "QWEN",
|
|
2621
|
+
"system_prompt": "You are a helpful assistant.",
|
|
2622
|
+
"roles": [
|
|
2623
|
+
"user",
|
|
2624
|
+
"assistant"
|
|
2625
|
+
],
|
|
2626
|
+
"intra_message_sep": "\n",
|
|
2627
|
+
"stop_token_ids": [
|
|
2628
|
+
151643,
|
|
2629
|
+
151644,
|
|
2630
|
+
151645
|
|
2631
|
+
],
|
|
2632
|
+
"stop": [
|
|
2633
|
+
"<|endoftext|>",
|
|
2634
|
+
"<|im_start|>",
|
|
2635
|
+
"<|im_end|>"
|
|
2636
|
+
]
|
|
2637
|
+
}
|
|
2638
|
+
},
|
|
2294
2639
|
{
|
|
2295
2640
|
"version": 1,
|
|
2296
2641
|
"context_length": 8192,
|
|
@@ -3251,6 +3596,125 @@
|
|
|
3251
3596
|
]
|
|
3252
3597
|
}
|
|
3253
3598
|
},
|
|
3599
|
+
{
|
|
3600
|
+
"version": 1,
|
|
3601
|
+
"context_length": 32768,
|
|
3602
|
+
"model_name": "mistral-instruct-v0.3",
|
|
3603
|
+
"model_lang": [
|
|
3604
|
+
"en"
|
|
3605
|
+
],
|
|
3606
|
+
"model_ability": [
|
|
3607
|
+
"chat"
|
|
3608
|
+
],
|
|
3609
|
+
"model_description": "The Mistral-7B-Instruct-v0.2 Large Language Model (LLM) is an improved instruct fine-tuned version of Mistral-7B-Instruct-v0.1.",
|
|
3610
|
+
"model_specs": [
|
|
3611
|
+
{
|
|
3612
|
+
"model_format": "pytorch",
|
|
3613
|
+
"model_size_in_billions": 7,
|
|
3614
|
+
"quantizations": [
|
|
3615
|
+
"4-bit",
|
|
3616
|
+
"8-bit",
|
|
3617
|
+
"none"
|
|
3618
|
+
],
|
|
3619
|
+
"model_id": "mistralai/Mistral-7B-Instruct-v0.3",
|
|
3620
|
+
"model_revision": "83e9aa141f2e28c82232fea5325f54edf17c43de"
|
|
3621
|
+
},
|
|
3622
|
+
{
|
|
3623
|
+
"model_format": "gptq",
|
|
3624
|
+
"model_size_in_billions": 7,
|
|
3625
|
+
"quantizations": [
|
|
3626
|
+
"Int4"
|
|
3627
|
+
],
|
|
3628
|
+
"model_id": "neuralmagic/Mistral-7B-Instruct-v0.3-GPTQ-4bit"
|
|
3629
|
+
},
|
|
3630
|
+
{
|
|
3631
|
+
"model_format": "awq",
|
|
3632
|
+
"model_size_in_billions": 7,
|
|
3633
|
+
"quantizations": [
|
|
3634
|
+
"Int4"
|
|
3635
|
+
],
|
|
3636
|
+
"model_id": "solidrust/Mistral-7B-Instruct-v0.3-AWQ"
|
|
3637
|
+
},
|
|
3638
|
+
{
|
|
3639
|
+
"model_format": "ggufv2",
|
|
3640
|
+
"model_size_in_billions": 7,
|
|
3641
|
+
"quantizations": [
|
|
3642
|
+
"Q2_K",
|
|
3643
|
+
"Q3_K_S",
|
|
3644
|
+
"Q3_K_M",
|
|
3645
|
+
"Q3_K_L",
|
|
3646
|
+
"Q4_K_S",
|
|
3647
|
+
"Q4_K_M",
|
|
3648
|
+
"Q5_K_S",
|
|
3649
|
+
"Q5_K_M",
|
|
3650
|
+
"Q6_K",
|
|
3651
|
+
"Q8_0",
|
|
3652
|
+
"fp16"
|
|
3653
|
+
],
|
|
3654
|
+
"model_id": "MaziyarPanahi/Mistral-7B-Instruct-v0.3-GGUF",
|
|
3655
|
+
"model_file_name_template": "Mistral-7B-Instruct-v0.3.{quantization}.gguf"
|
|
3656
|
+
}
|
|
3657
|
+
],
|
|
3658
|
+
"prompt_style": {
|
|
3659
|
+
"style_name": "LLAMA2",
|
|
3660
|
+
"system_prompt": "[INST] ",
|
|
3661
|
+
"roles": [
|
|
3662
|
+
"[INST]",
|
|
3663
|
+
"[/INST]"
|
|
3664
|
+
],
|
|
3665
|
+
"intra_message_sep": " ",
|
|
3666
|
+
"inter_message_sep": "<s>",
|
|
3667
|
+
"stop_token_ids": [
|
|
3668
|
+
2
|
|
3669
|
+
],
|
|
3670
|
+
"stop": [
|
|
3671
|
+
"</s>"
|
|
3672
|
+
]
|
|
3673
|
+
}
|
|
3674
|
+
},
|
|
3675
|
+
{
|
|
3676
|
+
"version": 1,
|
|
3677
|
+
"context_length": 32768,
|
|
3678
|
+
"model_name": "codestral-v0.1",
|
|
3679
|
+
"model_lang": [
|
|
3680
|
+
"en"
|
|
3681
|
+
],
|
|
3682
|
+
"model_ability": [
|
|
3683
|
+
"generate"
|
|
3684
|
+
],
|
|
3685
|
+
"model_description": "Codestrall-22B-v0.1 is trained on a diverse dataset of 80+ programming languages, including the most popular ones, such as Python, Java, C, C++, JavaScript, and Bash",
|
|
3686
|
+
"model_specs": [
|
|
3687
|
+
{
|
|
3688
|
+
"model_format": "pytorch",
|
|
3689
|
+
"model_size_in_billions": 22,
|
|
3690
|
+
"quantizations": [
|
|
3691
|
+
"4-bit",
|
|
3692
|
+
"8-bit",
|
|
3693
|
+
"none"
|
|
3694
|
+
],
|
|
3695
|
+
"model_id": "mistralai/Mistral-7B-Instruct-v0.2",
|
|
3696
|
+
"model_revision": "9552e7b1d9b2d5bbd87a5aa7221817285dbb6366"
|
|
3697
|
+
},
|
|
3698
|
+
{
|
|
3699
|
+
"model_format": "ggufv2",
|
|
3700
|
+
"model_size_in_billions": 22,
|
|
3701
|
+
"quantizations": [
|
|
3702
|
+
"Q2_K",
|
|
3703
|
+
"Q3_K_S",
|
|
3704
|
+
"Q3_K_M",
|
|
3705
|
+
"Q3_K_L",
|
|
3706
|
+
"Q4_K_S",
|
|
3707
|
+
"Q4_K_M",
|
|
3708
|
+
"Q5_K_S",
|
|
3709
|
+
"Q5_K_M",
|
|
3710
|
+
"Q6_K",
|
|
3711
|
+
"Q8_0"
|
|
3712
|
+
],
|
|
3713
|
+
"model_id": "bartowski/Codestral-22B-v0.1-GGUF",
|
|
3714
|
+
"model_file_name_template": "Codestral-22B-v0.1-{quantization}.gguf"
|
|
3715
|
+
}
|
|
3716
|
+
]
|
|
3717
|
+
},
|
|
3254
3718
|
{
|
|
3255
3719
|
"version": 1,
|
|
3256
3720
|
"context_length": 8192,
|
|
@@ -5258,6 +5722,48 @@
|
|
|
5258
5722
|
]
|
|
5259
5723
|
}
|
|
5260
5724
|
},
|
|
5725
|
+
{
|
|
5726
|
+
"version":1,
|
|
5727
|
+
"context_length":2048,
|
|
5728
|
+
"model_name":"MiniCPM-Llama3-V-2_5",
|
|
5729
|
+
"model_lang":[
|
|
5730
|
+
"en",
|
|
5731
|
+
"zh"
|
|
5732
|
+
],
|
|
5733
|
+
"model_ability":[
|
|
5734
|
+
"chat",
|
|
5735
|
+
"vision"
|
|
5736
|
+
],
|
|
5737
|
+
"model_description":"MiniCPM-Llama3-V 2.5 is the latest model in the MiniCPM-V series. The model is built on SigLip-400M and Llama3-8B-Instruct with a total of 8B parameters.",
|
|
5738
|
+
"model_specs":[
|
|
5739
|
+
{
|
|
5740
|
+
"model_format":"pytorch",
|
|
5741
|
+
"model_size_in_billions":8,
|
|
5742
|
+
"quantizations":[
|
|
5743
|
+
"none"
|
|
5744
|
+
],
|
|
5745
|
+
"model_id":"openbmb/MiniCPM-Llama3-V-2_5",
|
|
5746
|
+
"model_revision":"285a637ba8a30a0660dfcccad16f9a864f75abfd"
|
|
5747
|
+
},
|
|
5748
|
+
{
|
|
5749
|
+
"model_format":"pytorch",
|
|
5750
|
+
"model_size_in_billions":8,
|
|
5751
|
+
"quantizations":[
|
|
5752
|
+
"int4"
|
|
5753
|
+
],
|
|
5754
|
+
"model_id":"openbmb/MiniCPM-Llama3-V-2_5-{quantization}",
|
|
5755
|
+
"model_revision":"f92aff28552de35de3be204e8fe292dd4824e544"
|
|
5756
|
+
}
|
|
5757
|
+
],
|
|
5758
|
+
"prompt_style":{
|
|
5759
|
+
"style_name":"OmniLMM",
|
|
5760
|
+
"system_prompt":"The role of first msg should be user",
|
|
5761
|
+
"roles":[
|
|
5762
|
+
"user",
|
|
5763
|
+
"assistant"
|
|
5764
|
+
]
|
|
5765
|
+
}
|
|
5766
|
+
},
|
|
5261
5767
|
{
|
|
5262
5768
|
"version": 1,
|
|
5263
5769
|
"context_length": 4096,
|
|
@@ -6277,7 +6783,7 @@
|
|
|
6277
6783
|
"quantizations": [
|
|
6278
6784
|
"int4"
|
|
6279
6785
|
],
|
|
6280
|
-
"model_id": "THUDM/cogvlm2-llama3-chinese-chat-19B-{
|
|
6786
|
+
"model_id": "THUDM/cogvlm2-llama3-chinese-chat-19B-{quantization}",
|
|
6281
6787
|
"model_revision": "7863e362174f4718c2fe9cba4befd0b580a3194f"
|
|
6282
6788
|
}
|
|
6283
6789
|
],
|