xinference 0.9.4__py3-none-any.whl → 0.10.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/api/oauth2/auth_service.py +47 -18
- xinference/api/oauth2/types.py +1 -0
- xinference/api/restful_api.py +34 -7
- xinference/client/oscar/actor_client.py +4 -3
- xinference/client/restful/restful_client.py +20 -4
- xinference/conftest.py +13 -2
- xinference/core/supervisor.py +48 -1
- xinference/core/worker.py +139 -20
- xinference/deploy/cmdline.py +119 -20
- xinference/model/embedding/core.py +1 -2
- xinference/model/llm/__init__.py +4 -6
- xinference/model/llm/ggml/llamacpp.py +2 -10
- xinference/model/llm/llm_family.json +877 -13
- xinference/model/llm/llm_family.py +15 -0
- xinference/model/llm/llm_family_modelscope.json +571 -0
- xinference/model/llm/pytorch/chatglm.py +2 -0
- xinference/model/llm/pytorch/core.py +22 -26
- xinference/model/llm/pytorch/deepseek_vl.py +232 -0
- xinference/model/llm/pytorch/internlm2.py +2 -0
- xinference/model/llm/pytorch/omnilmm.py +153 -0
- xinference/model/llm/pytorch/qwen_vl.py +2 -0
- xinference/model/llm/pytorch/yi_vl.py +4 -2
- xinference/model/llm/utils.py +53 -5
- xinference/model/llm/vllm/core.py +54 -6
- xinference/model/rerank/core.py +3 -0
- xinference/thirdparty/deepseek_vl/__init__.py +31 -0
- xinference/thirdparty/deepseek_vl/models/__init__.py +28 -0
- xinference/thirdparty/deepseek_vl/models/clip_encoder.py +242 -0
- xinference/thirdparty/deepseek_vl/models/image_processing_vlm.py +208 -0
- xinference/thirdparty/deepseek_vl/models/modeling_vlm.py +170 -0
- xinference/thirdparty/deepseek_vl/models/processing_vlm.py +390 -0
- xinference/thirdparty/deepseek_vl/models/projector.py +100 -0
- xinference/thirdparty/deepseek_vl/models/sam.py +593 -0
- xinference/thirdparty/deepseek_vl/models/siglip_vit.py +681 -0
- xinference/thirdparty/deepseek_vl/utils/__init__.py +18 -0
- xinference/thirdparty/deepseek_vl/utils/conversation.py +348 -0
- xinference/thirdparty/deepseek_vl/utils/io.py +78 -0
- xinference/thirdparty/omnilmm/__init__.py +0 -0
- xinference/thirdparty/omnilmm/chat.py +216 -0
- xinference/thirdparty/omnilmm/constants.py +4 -0
- xinference/thirdparty/omnilmm/conversation.py +332 -0
- xinference/thirdparty/omnilmm/model/__init__.py +1 -0
- xinference/thirdparty/omnilmm/model/omnilmm.py +594 -0
- xinference/thirdparty/omnilmm/model/resampler.py +166 -0
- xinference/thirdparty/omnilmm/model/utils.py +563 -0
- xinference/thirdparty/omnilmm/train/__init__.py +13 -0
- xinference/thirdparty/omnilmm/train/train_utils.py +150 -0
- xinference/thirdparty/omnilmm/utils.py +134 -0
- xinference/types.py +15 -19
- xinference/web/ui/build/asset-manifest.json +3 -3
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/js/main.76ef2b17.js +3 -0
- xinference/web/ui/build/static/js/main.76ef2b17.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/15e2cf8cd8d0989719b6349428ff576f9009ff4c2dcc52378be0bd938e82495e.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/35d0e4a317e5582cbb79d901302e9d706520ac53f8a734c2fd8bfde6eb5a4f02.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3c2f277c93c5f1638e08db38df0d0fb4e58d1c5571aea03241a5c04ff4094704.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3fa1f69162f9c6dc0f6a6e21b64d49d6b8e6fa8dfa59a82cf829931c5f97d99f.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/44774c783428f952d8e2e4ad0998a9c5bc16a57cd9c68b7c5ff18aaa5a41d65c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/5393569d846332075b93b55656716a34f50e0a8c970be789502d7e6c49755fd7.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/59ce49eae0f486af4c5034d4d2f9ca77c3ec3a32ecc560085caf5ef482b5f4c9.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/62e257ed9016471035fa1a7da57c9e2a4250974ed566b4d1295873d747c68eb2.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/63a4c48f0326d071c7772c46598215c006ae41fd3d4ff3577fe717de66ad6e89.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/b9cbcb6d77ba21b22c6950b6fb5b305d23c19cf747f99f7d48b6b046f8f7b1b0.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d06a96a3c9c32e42689094aa3aaad41c8125894e956b8f84a70fadce6e3f65b3.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d076fd56cf3b15ed2433e3744b98c6b4e4410a19903d1db4de5bba0e1a1b3347.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/daad8131d91134f6d7aef895a0c9c32e1cb928277cb5aa66c01028126d215be0.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/de0299226173b0662b573f49e3992220f6611947073bd66ac079728a8bc8837d.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/e606671420d2937102c3c34b4b04056c11736408c1d3347b8cf42dfe61fb394b.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/e6eccc9aa641e7da833492e27846dc965f9750281420977dc84654ca6ed221e4.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/e9b52d171223bb59fb918316297a051cdfd42dd453e8260fd918e90bc0a4ebdf.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f16aec63602a77bd561d0e67fa00b76469ac54b8033754bba114ec5eb3257964.json +1 -0
- {xinference-0.9.4.dist-info → xinference-0.10.1.dist-info}/METADATA +25 -12
- {xinference-0.9.4.dist-info → xinference-0.10.1.dist-info}/RECORD +79 -58
- xinference/model/llm/ggml/ctransformers.py +0 -281
- xinference/model/llm/ggml/ctransformers_util.py +0 -161
- xinference/web/ui/build/static/js/main.66b1c4fb.js +0 -3
- xinference/web/ui/build/static/js/main.66b1c4fb.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/0bd70b1ecf307e2681318e864f4692305b6350c8683863007f4caf2f9ac33b6e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/0db651c046ef908f45cde73af0dbea0a797d3e35bb57f4a0863b481502103a64.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/18e5d5422e2464abf4a3e6d38164570e2e426e0a921e9a2628bbae81b18da353.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/3d93bd9a74a1ab0cec85af40f9baa5f6a8e7384b9e18c409b95a81a7b45bb7e2.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/3e055de705e397e1d413d7f429589b1a98dd78ef378b97f0cdb462c5f2487d5e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/4fd24800544873512b540544ae54601240a5bfefd9105ff647855c64f8ad828f.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/52aa27272b4b9968f62666262b47661cb1992336a2aff3b13994cc36877b3ec3.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/60c4b98d8ea7479fb0c94cfd19c8128f17bd7e27a1e73e6dd9adf6e9d88d18eb.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/7e094845f611802b024b57439cbf911038169d06cdf6c34a72a7277f35aa71a4.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/95c8cc049fadd23085d8623e1d43d70b614a4e52217676f186a417dca894aa09.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/98b7ef307f436affe13d75a4f265b27e828ccc2b10ffae6513abe2681bc11971.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/a8070ce4b780b4a044218536e158a9e7192a6c80ff593fdc126fee43f46296b5.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/b400cfc9db57fa6c70cd2bad055b73c5079fde0ed37974009d898083f6af8cd8.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/bd04667474fd9cac2983b03725c218908a6cc0ee9128a5953cd00d26d4877f60.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/c2124cfe036b26befcbd386d1d17743b1a58d0b7a041a17bb67f9924400d63c3.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/c230a727b8f68f0e62616a75e14a3d33026dc4164f2e325a9a8072d733850edb.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/d44a6eb6106e09082b691a315c9f6ce17fcfe25beb7547810e0d271ce3301cd2.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/e1d9b2ae4e1248658704bc6bfc5d6160dcd1a9e771ea4ae8c1fed0aaddeedd29.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/fd4a8ae5d192331af1bedd1d2d70efcc569708ee6cc4cb479b225d059482aa81.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/fe5db70859503a54cbe71f9637e5a314cda88b1f0eecb733b6e6f837697db1ef.json +0 -1
- /xinference/web/ui/build/static/js/{main.66b1c4fb.js.LICENSE.txt → main.76ef2b17.js.LICENSE.txt} +0 -0
- {xinference-0.9.4.dist-info → xinference-0.10.1.dist-info}/LICENSE +0 -0
- {xinference-0.9.4.dist-info → xinference-0.10.1.dist-info}/WHEEL +0 -0
- {xinference-0.9.4.dist-info → xinference-0.10.1.dist-info}/entry_points.txt +0 -0
- {xinference-0.9.4.dist-info → xinference-0.10.1.dist-info}/top_level.txt +0 -0
|
@@ -688,6 +688,49 @@
|
|
|
688
688
|
]
|
|
689
689
|
}
|
|
690
690
|
},
|
|
691
|
+
{
|
|
692
|
+
"version": 1,
|
|
693
|
+
"context_length": 131072,
|
|
694
|
+
"model_name": "chatglm3-128k",
|
|
695
|
+
"model_lang": [
|
|
696
|
+
"en",
|
|
697
|
+
"zh"
|
|
698
|
+
],
|
|
699
|
+
"model_ability": [
|
|
700
|
+
"chat"
|
|
701
|
+
],
|
|
702
|
+
"model_description": "ChatGLM3 is the third generation of ChatGLM, still open-source and trained on Chinese and English data.",
|
|
703
|
+
"model_specs": [
|
|
704
|
+
{
|
|
705
|
+
"model_format": "pytorch",
|
|
706
|
+
"model_size_in_billions": 6,
|
|
707
|
+
"quantizations": [
|
|
708
|
+
"4-bit",
|
|
709
|
+
"8-bit",
|
|
710
|
+
"none"
|
|
711
|
+
],
|
|
712
|
+
"model_id": "THUDM/chatglm3-6b-128k",
|
|
713
|
+
"model_revision": "f0afbe671009abc9e31182170cf60636d5546cda"
|
|
714
|
+
}
|
|
715
|
+
],
|
|
716
|
+
"prompt_style": {
|
|
717
|
+
"style_name": "CHATGLM3",
|
|
718
|
+
"system_prompt": "",
|
|
719
|
+
"roles": [
|
|
720
|
+
"user",
|
|
721
|
+
"assistant"
|
|
722
|
+
],
|
|
723
|
+
"stop_token_ids": [
|
|
724
|
+
64795,
|
|
725
|
+
64797,
|
|
726
|
+
2
|
|
727
|
+
],
|
|
728
|
+
"stop": [
|
|
729
|
+
"<|user|>",
|
|
730
|
+
"<|observation|>"
|
|
731
|
+
]
|
|
732
|
+
}
|
|
733
|
+
},
|
|
691
734
|
{
|
|
692
735
|
"version": 1,
|
|
693
736
|
"context_length": 2048,
|
|
@@ -870,6 +913,38 @@
|
|
|
870
913
|
"model_id": "meta-llama/Llama-2-7b-chat-hf",
|
|
871
914
|
"model_revision": "08751db2aca9bf2f7f80d2e516117a53d7450235"
|
|
872
915
|
},
|
|
916
|
+
{
|
|
917
|
+
"model_format": "gptq",
|
|
918
|
+
"model_size_in_billions": 7,
|
|
919
|
+
"quantizations": [
|
|
920
|
+
"Int4"
|
|
921
|
+
],
|
|
922
|
+
"model_id": "TheBloke/Llama-2-7B-Chat-GPTQ"
|
|
923
|
+
},
|
|
924
|
+
{
|
|
925
|
+
"model_format": "gptq",
|
|
926
|
+
"model_size_in_billions": 70,
|
|
927
|
+
"quantizations": [
|
|
928
|
+
"Int4"
|
|
929
|
+
],
|
|
930
|
+
"model_id": "TheBloke/Llama-2-70B-Chat-GPTQ"
|
|
931
|
+
},
|
|
932
|
+
{
|
|
933
|
+
"model_format": "awq",
|
|
934
|
+
"model_size_in_billions": 70,
|
|
935
|
+
"quantizations": [
|
|
936
|
+
"Int4"
|
|
937
|
+
],
|
|
938
|
+
"model_id": "TheBloke/Llama-2-70B-Chat-AWQ"
|
|
939
|
+
},
|
|
940
|
+
{
|
|
941
|
+
"model_format": "awq",
|
|
942
|
+
"model_size_in_billions": 7,
|
|
943
|
+
"quantizations": [
|
|
944
|
+
"Int4"
|
|
945
|
+
],
|
|
946
|
+
"model_id": "TheBloke/Llama-2-7B-Chat-AWQ"
|
|
947
|
+
},
|
|
873
948
|
{
|
|
874
949
|
"model_format": "pytorch",
|
|
875
950
|
"model_size_in_billions": 13,
|
|
@@ -881,6 +956,22 @@
|
|
|
881
956
|
"model_id": "meta-llama/Llama-2-13b-chat-hf",
|
|
882
957
|
"model_revision": "0ba94ac9b9e1d5a0037780667e8b219adde1908c"
|
|
883
958
|
},
|
|
959
|
+
{
|
|
960
|
+
"model_format": "gptq",
|
|
961
|
+
"model_size_in_billions": 13,
|
|
962
|
+
"quantizations": [
|
|
963
|
+
"Int4"
|
|
964
|
+
],
|
|
965
|
+
"model_id": "TheBloke/Llama-2-13B-chat-GPTQ"
|
|
966
|
+
},
|
|
967
|
+
{
|
|
968
|
+
"model_format": "awq",
|
|
969
|
+
"model_size_in_billions": 13,
|
|
970
|
+
"quantizations": [
|
|
971
|
+
"Int4"
|
|
972
|
+
],
|
|
973
|
+
"model_id": "TheBloke/Llama-2-13B-chat-AWQ"
|
|
974
|
+
},
|
|
884
975
|
{
|
|
885
976
|
"model_format": "pytorch",
|
|
886
977
|
"model_size_in_billions": 70,
|
|
@@ -1002,6 +1093,22 @@
|
|
|
1002
1093
|
"model_id": "TheBloke/Llama-2-7B-GGML",
|
|
1003
1094
|
"model_file_name_template": "llama-2-7b.ggmlv3.{quantization}.bin"
|
|
1004
1095
|
},
|
|
1096
|
+
{
|
|
1097
|
+
"model_format": "gptq",
|
|
1098
|
+
"model_size_in_billions": 7,
|
|
1099
|
+
"quantizations": [
|
|
1100
|
+
"Int4"
|
|
1101
|
+
],
|
|
1102
|
+
"model_id": "TheBloke/Llama-2-7B-GPTQ"
|
|
1103
|
+
},
|
|
1104
|
+
{
|
|
1105
|
+
"model_format": "awq",
|
|
1106
|
+
"model_size_in_billions": 7,
|
|
1107
|
+
"quantizations": [
|
|
1108
|
+
"Int4"
|
|
1109
|
+
],
|
|
1110
|
+
"model_id": "TheBloke/Llama-2-7B-AWQ"
|
|
1111
|
+
},
|
|
1005
1112
|
{
|
|
1006
1113
|
"model_format": "ggmlv3",
|
|
1007
1114
|
"model_size_in_billions": 13,
|
|
@@ -1068,6 +1175,22 @@
|
|
|
1068
1175
|
"model_id": "meta-llama/Llama-2-13b-hf",
|
|
1069
1176
|
"model_revision": "db6b8eb1feabb38985fdf785a89895959e944936"
|
|
1070
1177
|
},
|
|
1178
|
+
{
|
|
1179
|
+
"model_format": "gptq",
|
|
1180
|
+
"model_size_in_billions": 13,
|
|
1181
|
+
"quantizations": [
|
|
1182
|
+
"Int4"
|
|
1183
|
+
],
|
|
1184
|
+
"model_id": "TheBloke/Llama-2-13B-GPTQ"
|
|
1185
|
+
},
|
|
1186
|
+
{
|
|
1187
|
+
"model_format": "awq",
|
|
1188
|
+
"model_size_in_billions": 13,
|
|
1189
|
+
"quantizations": [
|
|
1190
|
+
"Int4"
|
|
1191
|
+
],
|
|
1192
|
+
"model_id": "TheBloke/Llama-2-13B-AWQ"
|
|
1193
|
+
},
|
|
1071
1194
|
{
|
|
1072
1195
|
"model_format": "pytorch",
|
|
1073
1196
|
"model_size_in_billions": 70,
|
|
@@ -1078,6 +1201,22 @@
|
|
|
1078
1201
|
],
|
|
1079
1202
|
"model_id": "meta-llama/Llama-2-70b-hf",
|
|
1080
1203
|
"model_revision": "cc8aa03a000ff08b4d5c5b39673321a2a396c396"
|
|
1204
|
+
},
|
|
1205
|
+
{
|
|
1206
|
+
"model_format": "gptq",
|
|
1207
|
+
"model_size_in_billions": 70,
|
|
1208
|
+
"quantizations": [
|
|
1209
|
+
"Int4"
|
|
1210
|
+
],
|
|
1211
|
+
"model_id": "TheBloke/Llama-2-70B-GPTQ"
|
|
1212
|
+
},
|
|
1213
|
+
{
|
|
1214
|
+
"model_format": "awq",
|
|
1215
|
+
"model_size_in_billions": 70,
|
|
1216
|
+
"quantizations": [
|
|
1217
|
+
"Int4"
|
|
1218
|
+
],
|
|
1219
|
+
"model_id": "TheBloke/Llama-2-70B-AWQ"
|
|
1081
1220
|
}
|
|
1082
1221
|
]
|
|
1083
1222
|
},
|
|
@@ -1466,6 +1605,16 @@
|
|
|
1466
1605
|
],
|
|
1467
1606
|
"model_id": "Qwen/Qwen1.5-14B-Chat"
|
|
1468
1607
|
},
|
|
1608
|
+
{
|
|
1609
|
+
"model_format": "pytorch",
|
|
1610
|
+
"model_size_in_billions": 32,
|
|
1611
|
+
"quantizations": [
|
|
1612
|
+
"4-bit",
|
|
1613
|
+
"8-bit",
|
|
1614
|
+
"none"
|
|
1615
|
+
],
|
|
1616
|
+
"model_id": "Qwen/Qwen1.5-32B-Chat"
|
|
1617
|
+
},
|
|
1469
1618
|
{
|
|
1470
1619
|
"model_format": "pytorch",
|
|
1471
1620
|
"model_size_in_billions": 72,
|
|
@@ -1521,6 +1670,14 @@
|
|
|
1521
1670
|
],
|
|
1522
1671
|
"model_id": "Qwen/Qwen1.5-14B-Chat-GPTQ-{quantization}"
|
|
1523
1672
|
},
|
|
1673
|
+
{
|
|
1674
|
+
"model_format": "gptq",
|
|
1675
|
+
"model_size_in_billions": 32,
|
|
1676
|
+
"quantizations": [
|
|
1677
|
+
"Int4"
|
|
1678
|
+
],
|
|
1679
|
+
"model_id": "Qwen/Qwen1.5-32B-Chat-GPTQ-{quantization}"
|
|
1680
|
+
},
|
|
1524
1681
|
{
|
|
1525
1682
|
"model_format": "gptq",
|
|
1526
1683
|
"model_size_in_billions": 72,
|
|
@@ -1570,6 +1727,14 @@
|
|
|
1570
1727
|
],
|
|
1571
1728
|
"model_id": "Qwen/Qwen1.5-14B-Chat-AWQ"
|
|
1572
1729
|
},
|
|
1730
|
+
{
|
|
1731
|
+
"model_format": "awq",
|
|
1732
|
+
"model_size_in_billions": 32,
|
|
1733
|
+
"quantizations": [
|
|
1734
|
+
"Int4"
|
|
1735
|
+
],
|
|
1736
|
+
"model_id": "Qwen/Qwen1.5-32B-Chat-AWQ"
|
|
1737
|
+
},
|
|
1573
1738
|
{
|
|
1574
1739
|
"model_format": "awq",
|
|
1575
1740
|
"model_size_in_billions": 72,
|
|
@@ -1658,6 +1823,22 @@
|
|
|
1658
1823
|
"model_id": "Qwen/Qwen1.5-14B-Chat-GGUF",
|
|
1659
1824
|
"model_file_name_template": "qwen1_5-14b-chat-{quantization}.gguf"
|
|
1660
1825
|
},
|
|
1826
|
+
{
|
|
1827
|
+
"model_format": "ggufv2",
|
|
1828
|
+
"model_size_in_billions": 32,
|
|
1829
|
+
"quantizations": [
|
|
1830
|
+
"q2_k",
|
|
1831
|
+
"q3_k_m",
|
|
1832
|
+
"q4_0",
|
|
1833
|
+
"q4_k_m",
|
|
1834
|
+
"q5_0",
|
|
1835
|
+
"q5_k_m",
|
|
1836
|
+
"q6_k",
|
|
1837
|
+
"q8_0"
|
|
1838
|
+
],
|
|
1839
|
+
"model_id": "Qwen/Qwen1.5-32B-Chat-GGUF",
|
|
1840
|
+
"model_file_name_template": "qwen1_5-32b-chat-{quantization}.gguf"
|
|
1841
|
+
},
|
|
1661
1842
|
{
|
|
1662
1843
|
"model_format": "ggufv2",
|
|
1663
1844
|
"model_size_in_billions": 72,
|
|
@@ -1697,6 +1878,58 @@
|
|
|
1697
1878
|
]
|
|
1698
1879
|
}
|
|
1699
1880
|
},
|
|
1881
|
+
{
|
|
1882
|
+
"version": 1,
|
|
1883
|
+
"context_length": 32768,
|
|
1884
|
+
"model_name": "qwen1.5-moe-chat",
|
|
1885
|
+
"model_lang": [
|
|
1886
|
+
"en",
|
|
1887
|
+
"zh"
|
|
1888
|
+
],
|
|
1889
|
+
"model_ability": [
|
|
1890
|
+
"chat"
|
|
1891
|
+
],
|
|
1892
|
+
"model_description": "Qwen1.5-MoE is a transformer-based MoE decoder-only language model pretrained on a large amount of data.",
|
|
1893
|
+
"model_specs": [
|
|
1894
|
+
{
|
|
1895
|
+
"model_format": "pytorch",
|
|
1896
|
+
"model_size_in_billions": "2_7",
|
|
1897
|
+
"quantizations": [
|
|
1898
|
+
"4-bit",
|
|
1899
|
+
"8-bit",
|
|
1900
|
+
"none"
|
|
1901
|
+
],
|
|
1902
|
+
"model_id": "Qwen/Qwen1.5-MoE-A2.7B-Chat"
|
|
1903
|
+
},
|
|
1904
|
+
{
|
|
1905
|
+
"model_format": "gptq",
|
|
1906
|
+
"model_size_in_billions": "2_7",
|
|
1907
|
+
"quantizations": [
|
|
1908
|
+
"Int4"
|
|
1909
|
+
],
|
|
1910
|
+
"model_id": "Qwen/Qwen1.5-MoE-A2.7B-Chat-GPTQ-Int4"
|
|
1911
|
+
}
|
|
1912
|
+
],
|
|
1913
|
+
"prompt_style": {
|
|
1914
|
+
"style_name": "QWEN",
|
|
1915
|
+
"system_prompt": "You are a helpful assistant.",
|
|
1916
|
+
"roles": [
|
|
1917
|
+
"user",
|
|
1918
|
+
"assistant"
|
|
1919
|
+
],
|
|
1920
|
+
"intra_message_sep": "\n",
|
|
1921
|
+
"stop_token_ids": [
|
|
1922
|
+
151643,
|
|
1923
|
+
151644,
|
|
1924
|
+
151645
|
|
1925
|
+
],
|
|
1926
|
+
"stop": [
|
|
1927
|
+
"<|endoftext|>",
|
|
1928
|
+
"<|im_start|>",
|
|
1929
|
+
"<|im_end|>"
|
|
1930
|
+
]
|
|
1931
|
+
}
|
|
1932
|
+
},
|
|
1700
1933
|
{
|
|
1701
1934
|
"version": 1,
|
|
1702
1935
|
"context_length": 8192,
|
|
@@ -1737,13 +1970,13 @@
|
|
|
1737
1970
|
"model_description": "GPT-2 is a Transformer-based LLM that is trained on WebTest, a 40 GB dataset of Reddit posts with 3+ upvotes.",
|
|
1738
1971
|
"model_specs": [
|
|
1739
1972
|
{
|
|
1740
|
-
"model_format": "
|
|
1741
|
-
"model_size_in_billions":
|
|
1973
|
+
"model_format": "pytorch",
|
|
1974
|
+
"model_size_in_billions": "1_5",
|
|
1742
1975
|
"quantizations": [
|
|
1743
1976
|
"none"
|
|
1744
1977
|
],
|
|
1745
|
-
"model_id": "
|
|
1746
|
-
"
|
|
1978
|
+
"model_id": "openai-community/gpt2",
|
|
1979
|
+
"model_revision": "607a30d783dfa663caf39e06633721c8d4cfcd7e"
|
|
1747
1980
|
}
|
|
1748
1981
|
]
|
|
1749
1982
|
},
|
|
@@ -2526,6 +2759,22 @@
|
|
|
2526
2759
|
"model_id": "mistralai/Mistral-7B-Instruct-v0.1",
|
|
2527
2760
|
"model_revision": "54766df6d50e4d3d7ccd66758e5341ba105a6d36"
|
|
2528
2761
|
},
|
|
2762
|
+
{
|
|
2763
|
+
"model_format": "awq",
|
|
2764
|
+
"model_size_in_billions": 7,
|
|
2765
|
+
"quantizations": [
|
|
2766
|
+
"Int4"
|
|
2767
|
+
],
|
|
2768
|
+
"model_id": "TheBloke/Mistral-7B-Instruct-v0.1-AWQ"
|
|
2769
|
+
},
|
|
2770
|
+
{
|
|
2771
|
+
"model_format": "gptq",
|
|
2772
|
+
"model_size_in_billions": 7,
|
|
2773
|
+
"quantizations": [
|
|
2774
|
+
"Int4"
|
|
2775
|
+
],
|
|
2776
|
+
"model_id": "TheBloke/Mistral-7B-Instruct-v0.1-GPTQ"
|
|
2777
|
+
},
|
|
2529
2778
|
{
|
|
2530
2779
|
"model_format": "ggufv2",
|
|
2531
2780
|
"model_size_in_billions": 7,
|
|
@@ -2587,6 +2836,22 @@
|
|
|
2587
2836
|
"model_id": "mistralai/Mistral-7B-Instruct-v0.2",
|
|
2588
2837
|
"model_revision": "b70aa86578567ba3301b21c8a27bea4e8f6d6d61"
|
|
2589
2838
|
},
|
|
2839
|
+
{
|
|
2840
|
+
"model_format": "gptq",
|
|
2841
|
+
"model_size_in_billions": 7,
|
|
2842
|
+
"quantizations": [
|
|
2843
|
+
"Int4"
|
|
2844
|
+
],
|
|
2845
|
+
"model_id": "TheBloke/Mistral-7B-Instruct-v0.2-GPTQ"
|
|
2846
|
+
},
|
|
2847
|
+
{
|
|
2848
|
+
"model_format": "awq",
|
|
2849
|
+
"model_size_in_billions": 7,
|
|
2850
|
+
"quantizations": [
|
|
2851
|
+
"Int4"
|
|
2852
|
+
],
|
|
2853
|
+
"model_id": "TheBloke/Mistral-7B-Instruct-v0.2-AWQ"
|
|
2854
|
+
},
|
|
2590
2855
|
{
|
|
2591
2856
|
"model_format": "ggufv2",
|
|
2592
2857
|
"model_size_in_billions": 7,
|
|
@@ -2747,6 +3012,14 @@
|
|
|
2747
3012
|
"model_id": "mistralai/Mixtral-8x7B-v0.1",
|
|
2748
3013
|
"model_revision": "58301445dc1378584211722b7ebf8743ec4e192b"
|
|
2749
3014
|
},
|
|
3015
|
+
{
|
|
3016
|
+
"model_format": "gptq",
|
|
3017
|
+
"model_size_in_billions": "46_7",
|
|
3018
|
+
"quantizations": [
|
|
3019
|
+
"Int4"
|
|
3020
|
+
],
|
|
3021
|
+
"model_id": "TheBloke/Mixtral-8x7B-v0.1-GPTQ"
|
|
3022
|
+
},
|
|
2750
3023
|
{
|
|
2751
3024
|
"model_format": "ggufv2",
|
|
2752
3025
|
"model_size_in_billions": "46_7",
|
|
@@ -2796,10 +3069,17 @@
|
|
|
2796
3069
|
"model_format": "awq",
|
|
2797
3070
|
"model_size_in_billions": "46_7",
|
|
2798
3071
|
"quantizations": [
|
|
2799
|
-
"
|
|
3072
|
+
"Int4"
|
|
3073
|
+
],
|
|
3074
|
+
"model_id": "TheBloke/Mixtral-8x7B-Instruct-v0.1-AWQ"
|
|
3075
|
+
},
|
|
3076
|
+
{
|
|
3077
|
+
"model_format": "gptq",
|
|
3078
|
+
"model_size_in_billions": "46_7",
|
|
3079
|
+
"quantizations": [
|
|
3080
|
+
"Int4"
|
|
2800
3081
|
],
|
|
2801
|
-
"model_id": "TheBloke/Mixtral-8x7B-Instruct-v0.1-
|
|
2802
|
-
"model_revision": "9afb6f0a7d7fe9ecebdda1baa4ff4e13e73e97d7"
|
|
3082
|
+
"model_id": "TheBloke/Mixtral-8x7B-Instruct-v0.1-GPTQ"
|
|
2803
3083
|
},
|
|
2804
3084
|
{
|
|
2805
3085
|
"model_format": "ggufv2",
|
|
@@ -3357,22 +3637,122 @@
|
|
|
3357
3637
|
{
|
|
3358
3638
|
"version": 1,
|
|
3359
3639
|
"context_length": 4096,
|
|
3360
|
-
"model_name": "
|
|
3640
|
+
"model_name": "gorilla-openfunctions-v2",
|
|
3361
3641
|
"model_lang": [
|
|
3362
|
-
"en"
|
|
3363
|
-
"zh"
|
|
3642
|
+
"en"
|
|
3364
3643
|
],
|
|
3365
3644
|
"model_ability": [
|
|
3366
3645
|
"chat"
|
|
3367
3646
|
],
|
|
3368
|
-
"model_description": "
|
|
3647
|
+
"model_description": "OpenFunctions is designed to extend Large Language Model (LLM) Chat Completion feature to formulate executable APIs call given natural language instructions and API context.",
|
|
3369
3648
|
"model_specs": [
|
|
3370
3649
|
{
|
|
3371
3650
|
"model_format": "pytorch",
|
|
3372
3651
|
"model_size_in_billions": 7,
|
|
3373
3652
|
"quantizations": [
|
|
3374
|
-
"
|
|
3375
|
-
|
|
3653
|
+
"none"
|
|
3654
|
+
],
|
|
3655
|
+
"model_id": "gorilla-llm/gorilla-openfunctions-v2",
|
|
3656
|
+
"model_revision": "0f91d705e64b77fb55e35a7eab5d03bf965c9b5c"
|
|
3657
|
+
},
|
|
3658
|
+
{
|
|
3659
|
+
"model_format": "ggufv2",
|
|
3660
|
+
"model_size_in_billions": 7,
|
|
3661
|
+
"quantizations": [
|
|
3662
|
+
"Q2_K",
|
|
3663
|
+
"Q3_K_L",
|
|
3664
|
+
"Q3_K_M",
|
|
3665
|
+
"Q3_K_S",
|
|
3666
|
+
"Q4_0",
|
|
3667
|
+
"Q4_K_M",
|
|
3668
|
+
"Q4_K_S",
|
|
3669
|
+
"Q5_K_M",
|
|
3670
|
+
"Q5_K_S",
|
|
3671
|
+
"Q6_K"
|
|
3672
|
+
],
|
|
3673
|
+
"model_id": "gorilla-llm//gorilla-openfunctions-v2-GGUF",
|
|
3674
|
+
"model_file_name_template": "gorilla-openfunctions-v2.{quantization}.gguf"
|
|
3675
|
+
}
|
|
3676
|
+
],
|
|
3677
|
+
"prompt_style": {
|
|
3678
|
+
"style_name": "GORILLA_OPENFUNCTIONS",
|
|
3679
|
+
"system_prompt": "",
|
|
3680
|
+
"roles": [
|
|
3681
|
+
"",
|
|
3682
|
+
""
|
|
3683
|
+
],
|
|
3684
|
+
"intra_message_sep": "\n",
|
|
3685
|
+
"inter_message_sep": "\n",
|
|
3686
|
+
"stop_token_ids": [],
|
|
3687
|
+
"stop": []
|
|
3688
|
+
}
|
|
3689
|
+
},
|
|
3690
|
+
{
|
|
3691
|
+
"version": 1,
|
|
3692
|
+
"context_length": 4096,
|
|
3693
|
+
"model_name": "deepseek-vl-chat",
|
|
3694
|
+
"model_lang": [
|
|
3695
|
+
"en",
|
|
3696
|
+
"zh"
|
|
3697
|
+
],
|
|
3698
|
+
"model_ability": [
|
|
3699
|
+
"chat",
|
|
3700
|
+
"vision"
|
|
3701
|
+
],
|
|
3702
|
+
"model_description": "DeepSeek-VL possesses general multimodal understanding capabilities, capable of processing logical diagrams, web pages, formula recognition, scientific literature, natural images, and embodied intelligence in complex scenarios.",
|
|
3703
|
+
"model_specs": [
|
|
3704
|
+
{
|
|
3705
|
+
"model_format": "pytorch",
|
|
3706
|
+
"model_size_in_billions": "1_3",
|
|
3707
|
+
"quantizations": [
|
|
3708
|
+
"none"
|
|
3709
|
+
],
|
|
3710
|
+
"model_id": "deepseek-ai/deepseek-vl-1.3b-chat",
|
|
3711
|
+
"model_revision": "8f13a8e00dbdc381d614a9d29d61b07e8fe91b3f"
|
|
3712
|
+
},
|
|
3713
|
+
{
|
|
3714
|
+
"model_format": "pytorch",
|
|
3715
|
+
"model_size_in_billions": 7,
|
|
3716
|
+
"quantizations": [
|
|
3717
|
+
"none"
|
|
3718
|
+
],
|
|
3719
|
+
"model_id": "deepseek-ai/deepseek-vl-7b-chat",
|
|
3720
|
+
"model_revision": "6f16f00805f45b5249f709ce21820122eeb43556"
|
|
3721
|
+
}
|
|
3722
|
+
],
|
|
3723
|
+
"prompt_style": {
|
|
3724
|
+
"style_name": "DEEPSEEK_CHAT",
|
|
3725
|
+
"system_prompt": "<|begin▁of▁sentence|>",
|
|
3726
|
+
"roles": [
|
|
3727
|
+
"User",
|
|
3728
|
+
"Assistant"
|
|
3729
|
+
],
|
|
3730
|
+
"intra_message_sep": "\n\n",
|
|
3731
|
+
"inter_message_sep": "<|end▁of▁sentence|>",
|
|
3732
|
+
"stop": [
|
|
3733
|
+
"<|end▁of▁sentence|>"
|
|
3734
|
+
]
|
|
3735
|
+
}
|
|
3736
|
+
},
|
|
3737
|
+
{
|
|
3738
|
+
"version": 1,
|
|
3739
|
+
"context_length": 4096,
|
|
3740
|
+
"model_name": "deepseek-chat",
|
|
3741
|
+
"model_lang": [
|
|
3742
|
+
"en",
|
|
3743
|
+
"zh"
|
|
3744
|
+
],
|
|
3745
|
+
"model_ability": [
|
|
3746
|
+
"chat"
|
|
3747
|
+
],
|
|
3748
|
+
"model_description": "DeepSeek LLM is an advanced language model comprising 67 billion parameters. It has been trained from scratch on a vast dataset of 2 trillion tokens in both English and Chinese.",
|
|
3749
|
+
"model_specs": [
|
|
3750
|
+
{
|
|
3751
|
+
"model_format": "pytorch",
|
|
3752
|
+
"model_size_in_billions": 7,
|
|
3753
|
+
"quantizations": [
|
|
3754
|
+
"4-bit",
|
|
3755
|
+
"8-bit",
|
|
3376
3756
|
"none"
|
|
3377
3757
|
],
|
|
3378
3758
|
"model_id": "deepseek-ai/deepseek-llm-7b-chat",
|
|
@@ -3662,6 +4042,48 @@
|
|
|
3662
4042
|
]
|
|
3663
4043
|
}
|
|
3664
4044
|
},
|
|
4045
|
+
{
|
|
4046
|
+
"version":1,
|
|
4047
|
+
"context_length":2048,
|
|
4048
|
+
"model_name":"OmniLMM",
|
|
4049
|
+
"model_lang":[
|
|
4050
|
+
"en",
|
|
4051
|
+
"zh"
|
|
4052
|
+
],
|
|
4053
|
+
"model_ability":[
|
|
4054
|
+
"chat",
|
|
4055
|
+
"vision"
|
|
4056
|
+
],
|
|
4057
|
+
"model_description":"OmniLMM is a family of open-source large multimodal models (LMMs) adept at vision & language modeling.",
|
|
4058
|
+
"model_specs":[
|
|
4059
|
+
{
|
|
4060
|
+
"model_format":"pytorch",
|
|
4061
|
+
"model_size_in_billions":3,
|
|
4062
|
+
"quantizations":[
|
|
4063
|
+
"none"
|
|
4064
|
+
],
|
|
4065
|
+
"model_id":"openbmb/MiniCPM-V",
|
|
4066
|
+
"model_revision":"bec7d1cd1c9e804c064ec291163e40624825eaaa"
|
|
4067
|
+
},
|
|
4068
|
+
{
|
|
4069
|
+
"model_format":"pytorch",
|
|
4070
|
+
"model_size_in_billions":12,
|
|
4071
|
+
"quantizations":[
|
|
4072
|
+
"none"
|
|
4073
|
+
],
|
|
4074
|
+
"model_id":"openbmb/OmniLMM-12B",
|
|
4075
|
+
"model_revision":"ef62bae5af34be653b9801037cd613e05ab24fdc"
|
|
4076
|
+
}
|
|
4077
|
+
],
|
|
4078
|
+
"prompt_style":{
|
|
4079
|
+
"style_name":"OmniLMM",
|
|
4080
|
+
"system_prompt":"The role of first msg should be user",
|
|
4081
|
+
"roles":[
|
|
4082
|
+
"user",
|
|
4083
|
+
"assistant"
|
|
4084
|
+
]
|
|
4085
|
+
}
|
|
4086
|
+
},
|
|
3665
4087
|
{
|
|
3666
4088
|
"version": 1,
|
|
3667
4089
|
"context_length": 4096,
|
|
@@ -3888,5 +4310,447 @@
|
|
|
3888
4310
|
"<start_of_turn>"
|
|
3889
4311
|
]
|
|
3890
4312
|
}
|
|
4313
|
+
},
|
|
4314
|
+
{
|
|
4315
|
+
"version": 1,
|
|
4316
|
+
"context_length": 4096,
|
|
4317
|
+
"model_name": "platypus2-70b-instruct",
|
|
4318
|
+
"model_lang": [
|
|
4319
|
+
"en"
|
|
4320
|
+
],
|
|
4321
|
+
"model_ability": [
|
|
4322
|
+
"generate"
|
|
4323
|
+
],
|
|
4324
|
+
"model_description": "Platypus-70B-instruct is a merge of garage-bAInd/Platypus2-70B and upstage/Llama-2-70b-instruct-v2.",
|
|
4325
|
+
"model_specs": [
|
|
4326
|
+
{
|
|
4327
|
+
"model_format": "pytorch",
|
|
4328
|
+
"model_size_in_billions": 70,
|
|
4329
|
+
"quantizations": [
|
|
4330
|
+
"none"
|
|
4331
|
+
],
|
|
4332
|
+
"model_id": "garage-bAInd/Platypus2-70B-instruct",
|
|
4333
|
+
"model_revision": "31389b50953688e4e542be53e6d2ab04d5c34e87"
|
|
4334
|
+
}
|
|
4335
|
+
]
|
|
4336
|
+
},
|
|
4337
|
+
{
|
|
4338
|
+
"version": 1,
|
|
4339
|
+
"context_length": 2048,
|
|
4340
|
+
"model_name": "aquila2",
|
|
4341
|
+
"model_lang": [
|
|
4342
|
+
"zh"
|
|
4343
|
+
],
|
|
4344
|
+
"model_ability": [
|
|
4345
|
+
"generate"
|
|
4346
|
+
],
|
|
4347
|
+
"model_description": "Aquila2 series models are the base language models",
|
|
4348
|
+
"model_specs": [
|
|
4349
|
+
{
|
|
4350
|
+
"model_format": "pytorch",
|
|
4351
|
+
"model_size_in_billions": 7,
|
|
4352
|
+
"quantizations": [
|
|
4353
|
+
"none"
|
|
4354
|
+
],
|
|
4355
|
+
"model_id": "BAAI/Aquila2-7B",
|
|
4356
|
+
"model_revision": "9c76e143c6e9621689ca76e078c465b0dee75eb8"
|
|
4357
|
+
},
|
|
4358
|
+
{
|
|
4359
|
+
"model_format": "pytorch",
|
|
4360
|
+
"model_size_in_billions": 34,
|
|
4361
|
+
"quantizations": [
|
|
4362
|
+
"none"
|
|
4363
|
+
],
|
|
4364
|
+
"model_id": "BAAI/Aquila2-34B",
|
|
4365
|
+
"model_revision": "356733caf6221e9dd898cde8ff189a98175526ec"
|
|
4366
|
+
},
|
|
4367
|
+
{
|
|
4368
|
+
"model_format": "pytorch",
|
|
4369
|
+
"model_size_in_billions": 70,
|
|
4370
|
+
"quantizations": [
|
|
4371
|
+
"none"
|
|
4372
|
+
],
|
|
4373
|
+
"model_id": "BAAI/Aquila2-70B-Expr",
|
|
4374
|
+
"model_revision": "32a2897235541b9f5238bbe88f8d76a19993c0ba"
|
|
4375
|
+
}
|
|
4376
|
+
]
|
|
4377
|
+
},
|
|
4378
|
+
{
|
|
4379
|
+
"version": 1,
|
|
4380
|
+
"context_length": 2048,
|
|
4381
|
+
"model_name": "aquila2-chat",
|
|
4382
|
+
"model_lang": [
|
|
4383
|
+
"zh"
|
|
4384
|
+
],
|
|
4385
|
+
"model_ability": [
|
|
4386
|
+
"chat"
|
|
4387
|
+
],
|
|
4388
|
+
"model_description": "Aquila2-chat series models are the chat models",
|
|
4389
|
+
"model_specs": [
|
|
4390
|
+
{
|
|
4391
|
+
"model_format": "pytorch",
|
|
4392
|
+
"model_size_in_billions": 7,
|
|
4393
|
+
"quantizations": [
|
|
4394
|
+
"none"
|
|
4395
|
+
],
|
|
4396
|
+
"model_id": "BAAI/AquilaChat2-7B",
|
|
4397
|
+
"model_revision": "0d060c4edeb4e0febd81130c17f6868653184fb3"
|
|
4398
|
+
},
|
|
4399
|
+
{
|
|
4400
|
+
"model_format": "ggufv2",
|
|
4401
|
+
"model_size_in_billions": 34,
|
|
4402
|
+
"quantizations": [
|
|
4403
|
+
"Q2_K",
|
|
4404
|
+
"Q3_K_L",
|
|
4405
|
+
"Q3_K_M",
|
|
4406
|
+
"Q3_K_S",
|
|
4407
|
+
"Q4_0",
|
|
4408
|
+
"Q4_K_M",
|
|
4409
|
+
"Q4_K_S",
|
|
4410
|
+
"Q5_0",
|
|
4411
|
+
"Q5_K_M",
|
|
4412
|
+
"Q5_K_S",
|
|
4413
|
+
"Q6_K",
|
|
4414
|
+
"Q8_0"
|
|
4415
|
+
],
|
|
4416
|
+
"model_id": "TheBloke/AquilaChat2-34B-GGUF",
|
|
4417
|
+
"model_file_name_template": "aquilachat2-34b.{quantization}.gguf"
|
|
4418
|
+
},
|
|
4419
|
+
{
|
|
4420
|
+
"model_format": "gptq",
|
|
4421
|
+
"model_size_in_billions": 34,
|
|
4422
|
+
"quantizations": [
|
|
4423
|
+
"Int4"
|
|
4424
|
+
],
|
|
4425
|
+
"model_id": "TheBloke/AquilaChat2-34B-GPTQ",
|
|
4426
|
+
"model_revision": "9a9d21424f7db608be51df769885514ab6e052db"
|
|
4427
|
+
},
|
|
4428
|
+
{
|
|
4429
|
+
"model_format": "awq",
|
|
4430
|
+
"model_size_in_billions": "34",
|
|
4431
|
+
"quantizations": [
|
|
4432
|
+
"Int4"
|
|
4433
|
+
],
|
|
4434
|
+
"model_id": "TheBloke/AquilaChat2-34B-AWQ",
|
|
4435
|
+
"model_revision": "ad1dec1c8adb7fa6cb07b7e261aaa04fccf1c4c0"
|
|
4436
|
+
},
|
|
4437
|
+
{
|
|
4438
|
+
"model_format": "pytorch",
|
|
4439
|
+
"model_size_in_billions": 34,
|
|
4440
|
+
"quantizations": [
|
|
4441
|
+
"none"
|
|
4442
|
+
],
|
|
4443
|
+
"model_id": "BAAI/AquilaChat2-34B",
|
|
4444
|
+
"model_revision": "b9cd9c7436435ab9cfa5e4f009be2b0354979ca8"
|
|
4445
|
+
},
|
|
4446
|
+
{
|
|
4447
|
+
"model_format": "pytorch",
|
|
4448
|
+
"model_size_in_billions": 70,
|
|
4449
|
+
"quantizations": [
|
|
4450
|
+
"none"
|
|
4451
|
+
],
|
|
4452
|
+
"model_id": "BAAI/AquilaChat2-70B-Expr",
|
|
4453
|
+
"model_revision": "0df19b6e10f1a19ca663f7cc1141aae10f1825f4"
|
|
4454
|
+
}
|
|
4455
|
+
],
|
|
4456
|
+
"prompt_style": {
|
|
4457
|
+
"style_name": "ADD_COLON_SINGLE",
|
|
4458
|
+
"intra_message_sep": "\n",
|
|
4459
|
+
"system_prompt": "",
|
|
4460
|
+
"roles": [
|
|
4461
|
+
"USER",
|
|
4462
|
+
"ASSISTANT"
|
|
4463
|
+
],
|
|
4464
|
+
"stop_token_ids": [
|
|
4465
|
+
100006,
|
|
4466
|
+
100007
|
|
4467
|
+
],
|
|
4468
|
+
"stop": [
|
|
4469
|
+
"[CLS]",
|
|
4470
|
+
"</s>"
|
|
4471
|
+
]
|
|
4472
|
+
}
|
|
4473
|
+
},
|
|
4474
|
+
{
|
|
4475
|
+
"version": 1,
|
|
4476
|
+
"context_length": 16384,
|
|
4477
|
+
"model_name": "aquila2-chat-16k",
|
|
4478
|
+
"model_lang": [
|
|
4479
|
+
"zh"
|
|
4480
|
+
],
|
|
4481
|
+
"model_ability": [
|
|
4482
|
+
"chat"
|
|
4483
|
+
],
|
|
4484
|
+
"model_description": "AquilaChat2-16k series models are the long-text chat models",
|
|
4485
|
+
"model_specs": [
|
|
4486
|
+
{
|
|
4487
|
+
"model_format": "pytorch",
|
|
4488
|
+
"model_size_in_billions": 7,
|
|
4489
|
+
"quantizations": [
|
|
4490
|
+
"none"
|
|
4491
|
+
],
|
|
4492
|
+
"model_id": "BAAI/AquilaChat2-7B-16K",
|
|
4493
|
+
"model_revision": "fb46d48479d05086ccf6952f19018322fcbb54cd"
|
|
4494
|
+
},
|
|
4495
|
+
{
|
|
4496
|
+
"model_format": "ggufv2",
|
|
4497
|
+
"model_size_in_billions": 34,
|
|
4498
|
+
"quantizations": [
|
|
4499
|
+
"Q2_K",
|
|
4500
|
+
"Q3_K_L",
|
|
4501
|
+
"Q3_K_M",
|
|
4502
|
+
"Q3_K_S",
|
|
4503
|
+
"Q4_0",
|
|
4504
|
+
"Q4_K_M",
|
|
4505
|
+
"Q4_K_S",
|
|
4506
|
+
"Q5_0",
|
|
4507
|
+
"Q5_K_M",
|
|
4508
|
+
"Q5_K_S",
|
|
4509
|
+
"Q6_K",
|
|
4510
|
+
"Q8_0"
|
|
4511
|
+
],
|
|
4512
|
+
"model_id": "TheBloke/AquilaChat2-34B-16K-GGUF",
|
|
4513
|
+
"model_file_name_template": "aquilachat2-34b-16k.{quantization}.gguf"
|
|
4514
|
+
},
|
|
4515
|
+
{
|
|
4516
|
+
"model_format": "gptq",
|
|
4517
|
+
"model_size_in_billions": 34,
|
|
4518
|
+
"quantizations": [
|
|
4519
|
+
"Int4"
|
|
4520
|
+
],
|
|
4521
|
+
"model_id": "TheBloke/AquilaChat2-34B-16K-GPTQ",
|
|
4522
|
+
"model_revision": "0afa1c2a55a4ee1a6f0dba81d9ec296dc7936b91"
|
|
4523
|
+
},
|
|
4524
|
+
{
|
|
4525
|
+
"model_format": "awq",
|
|
4526
|
+
"model_size_in_billions": 34,
|
|
4527
|
+
"quantizations": [
|
|
4528
|
+
"Int4"
|
|
4529
|
+
],
|
|
4530
|
+
"model_id": "TheBloke/AquilaChat2-34B-16K-AWQ",
|
|
4531
|
+
"model_revision": "db7403ca492416903c84a7a38b11cb5506de48b1"
|
|
4532
|
+
},
|
|
4533
|
+
{
|
|
4534
|
+
"model_format": "pytorch",
|
|
4535
|
+
"model_size_in_billions": 34,
|
|
4536
|
+
"quantizations": [
|
|
4537
|
+
"none"
|
|
4538
|
+
],
|
|
4539
|
+
"model_id": "BAAI/AquilaChat2-34B-16K",
|
|
4540
|
+
"model_revision": "a06fd164c7170714924d2881c61c8348425ebc94"
|
|
4541
|
+
}
|
|
4542
|
+
],
|
|
4543
|
+
"prompt_style": {
|
|
4544
|
+
"style_name": "ADD_COLON_SINGLE",
|
|
4545
|
+
"intra_message_sep": "\n",
|
|
4546
|
+
"system_prompt": "",
|
|
4547
|
+
"roles": [
|
|
4548
|
+
"USER",
|
|
4549
|
+
"ASSISTANT"
|
|
4550
|
+
],
|
|
4551
|
+
"stop_token_ids": [
|
|
4552
|
+
100006,
|
|
4553
|
+
100007
|
|
4554
|
+
],
|
|
4555
|
+
"stop": [
|
|
4556
|
+
"[CLS]",
|
|
4557
|
+
"</s>"
|
|
4558
|
+
]
|
|
4559
|
+
}
|
|
4560
|
+
},
|
|
4561
|
+
{
|
|
4562
|
+
"version": 1,
|
|
4563
|
+
"context_length": 4096,
|
|
4564
|
+
"model_name": "minicpm-2b-sft-bf16",
|
|
4565
|
+
"model_lang": [
|
|
4566
|
+
"zh"
|
|
4567
|
+
],
|
|
4568
|
+
"model_ability": [
|
|
4569
|
+
"chat"
|
|
4570
|
+
],
|
|
4571
|
+
"model_description": "MiniCPM is an End-Size LLM developed by ModelBest Inc. and TsinghuaNLP, with only 2.4B parameters excluding embeddings.",
|
|
4572
|
+
"model_specs": [
|
|
4573
|
+
{
|
|
4574
|
+
"model_format": "pytorch",
|
|
4575
|
+
"model_size_in_billions": 2,
|
|
4576
|
+
"quantizations": [
|
|
4577
|
+
"none"
|
|
4578
|
+
],
|
|
4579
|
+
"model_id": "openbmb/MiniCPM-2B-sft-bf16",
|
|
4580
|
+
"model_revision": "fe1d74027ebdd81cef5f815fa3a2d432a6b5de2a"
|
|
4581
|
+
}
|
|
4582
|
+
],
|
|
4583
|
+
"prompt_style": {
|
|
4584
|
+
"style_name": "MINICPM-2B",
|
|
4585
|
+
"system_prompt": "",
|
|
4586
|
+
"roles": [
|
|
4587
|
+
"user",
|
|
4588
|
+
"assistant"
|
|
4589
|
+
],
|
|
4590
|
+
"stop_token_ids": [
|
|
4591
|
+
1,
|
|
4592
|
+
2
|
|
4593
|
+
],
|
|
4594
|
+
"stop": [
|
|
4595
|
+
"<s>",
|
|
4596
|
+
"</s>"
|
|
4597
|
+
]
|
|
4598
|
+
}
|
|
4599
|
+
},
|
|
4600
|
+
{
|
|
4601
|
+
"version": 1,
|
|
4602
|
+
"context_length": 4096,
|
|
4603
|
+
"model_name": "minicpm-2b-sft-fp32",
|
|
4604
|
+
"model_lang": [
|
|
4605
|
+
"zh"
|
|
4606
|
+
],
|
|
4607
|
+
"model_ability": [
|
|
4608
|
+
"chat"
|
|
4609
|
+
],
|
|
4610
|
+
"model_description": "MiniCPM is an End-Size LLM developed by ModelBest Inc. and TsinghuaNLP, with only 2.4B parameters excluding embeddings.",
|
|
4611
|
+
"model_specs": [
|
|
4612
|
+
{
|
|
4613
|
+
"model_format": "pytorch",
|
|
4614
|
+
"model_size_in_billions": 2,
|
|
4615
|
+
"quantizations": [
|
|
4616
|
+
"none"
|
|
4617
|
+
],
|
|
4618
|
+
"model_id": "openbmb/MiniCPM-2B-sft-fp32",
|
|
4619
|
+
"model_revision": "35b90dd57d977b6e5bc4907986fa5b77aa15a82e"
|
|
4620
|
+
}
|
|
4621
|
+
],
|
|
4622
|
+
"prompt_style": {
|
|
4623
|
+
"style_name": "MINICPM-2B",
|
|
4624
|
+
"system_prompt": "",
|
|
4625
|
+
"roles": [
|
|
4626
|
+
"user",
|
|
4627
|
+
"assistant"
|
|
4628
|
+
],
|
|
4629
|
+
"stop_token_ids": [
|
|
4630
|
+
1,
|
|
4631
|
+
2
|
|
4632
|
+
],
|
|
4633
|
+
"stop": [
|
|
4634
|
+
"<s>",
|
|
4635
|
+
"</s>"
|
|
4636
|
+
]
|
|
4637
|
+
}
|
|
4638
|
+
},
|
|
4639
|
+
{
|
|
4640
|
+
"version": 1,
|
|
4641
|
+
"context_length": 4096,
|
|
4642
|
+
"model_name": "minicpm-2b-dpo-bf16",
|
|
4643
|
+
"model_lang": [
|
|
4644
|
+
"zh"
|
|
4645
|
+
],
|
|
4646
|
+
"model_ability": [
|
|
4647
|
+
"chat"
|
|
4648
|
+
],
|
|
4649
|
+
"model_description": "MiniCPM is an End-Size LLM developed by ModelBest Inc. and TsinghuaNLP, with only 2.4B parameters excluding embeddings.",
|
|
4650
|
+
"model_specs": [
|
|
4651
|
+
{
|
|
4652
|
+
"model_format": "pytorch",
|
|
4653
|
+
"model_size_in_billions": 2,
|
|
4654
|
+
"quantizations": [
|
|
4655
|
+
"none"
|
|
4656
|
+
],
|
|
4657
|
+
"model_id": "openbmb/MiniCPM-2B-dpo-bf16",
|
|
4658
|
+
"model_revision": "f4a3ba49f3f18695945c2a7c12400d4da99da498"
|
|
4659
|
+
}
|
|
4660
|
+
],
|
|
4661
|
+
"prompt_style": {
|
|
4662
|
+
"style_name": "MINICPM-2B",
|
|
4663
|
+
"system_prompt": "",
|
|
4664
|
+
"roles": [
|
|
4665
|
+
"user",
|
|
4666
|
+
"assistant"
|
|
4667
|
+
],
|
|
4668
|
+
"stop_token_ids": [
|
|
4669
|
+
1,
|
|
4670
|
+
2
|
|
4671
|
+
],
|
|
4672
|
+
"stop": [
|
|
4673
|
+
"<s>",
|
|
4674
|
+
"</s>"
|
|
4675
|
+
]
|
|
4676
|
+
}
|
|
4677
|
+
},
|
|
4678
|
+
{
|
|
4679
|
+
"version": 1,
|
|
4680
|
+
"context_length": 4096,
|
|
4681
|
+
"model_name": "minicpm-2b-dpo-fp16",
|
|
4682
|
+
"model_lang": [
|
|
4683
|
+
"zh"
|
|
4684
|
+
],
|
|
4685
|
+
"model_ability": [
|
|
4686
|
+
"chat"
|
|
4687
|
+
],
|
|
4688
|
+
"model_description": "MiniCPM is an End-Size LLM developed by ModelBest Inc. and TsinghuaNLP, with only 2.4B parameters excluding embeddings.",
|
|
4689
|
+
"model_specs": [
|
|
4690
|
+
{
|
|
4691
|
+
"model_format": "pytorch",
|
|
4692
|
+
"model_size_in_billions": 2,
|
|
4693
|
+
"quantizations": [
|
|
4694
|
+
"none"
|
|
4695
|
+
],
|
|
4696
|
+
"model_id": "openbmb/MiniCPM-2B-dpo-fp16",
|
|
4697
|
+
"model_revision": "e7a50289e4f839674cf8d4a5a2ce032ccacf64ac"
|
|
4698
|
+
}
|
|
4699
|
+
],
|
|
4700
|
+
"prompt_style": {
|
|
4701
|
+
"style_name": "MINICPM-2B",
|
|
4702
|
+
"system_prompt": "",
|
|
4703
|
+
"roles": [
|
|
4704
|
+
"user",
|
|
4705
|
+
"assistant"
|
|
4706
|
+
],
|
|
4707
|
+
"stop_token_ids": [
|
|
4708
|
+
1,
|
|
4709
|
+
2
|
|
4710
|
+
],
|
|
4711
|
+
"stop": [
|
|
4712
|
+
"<s>",
|
|
4713
|
+
"</s>"
|
|
4714
|
+
]
|
|
4715
|
+
}
|
|
4716
|
+
},
|
|
4717
|
+
{
|
|
4718
|
+
"version": 1,
|
|
4719
|
+
"context_length": 4096,
|
|
4720
|
+
"model_name": "minicpm-2b-dpo-fp32",
|
|
4721
|
+
"model_lang": [
|
|
4722
|
+
"zh"
|
|
4723
|
+
],
|
|
4724
|
+
"model_ability": [
|
|
4725
|
+
"chat"
|
|
4726
|
+
],
|
|
4727
|
+
"model_description": "MiniCPM is an End-Size LLM developed by ModelBest Inc. and TsinghuaNLP, with only 2.4B parameters excluding embeddings.",
|
|
4728
|
+
"model_specs": [
|
|
4729
|
+
{
|
|
4730
|
+
"model_format": "pytorch",
|
|
4731
|
+
"model_size_in_billions": 2,
|
|
4732
|
+
"quantizations": [
|
|
4733
|
+
"none"
|
|
4734
|
+
],
|
|
4735
|
+
"model_id": "openbmb/MiniCPM-2B-dpo-fp32",
|
|
4736
|
+
"model_revision": "b560a1593779b735a84a6daf72fba96ae38da288"
|
|
4737
|
+
}
|
|
4738
|
+
],
|
|
4739
|
+
"prompt_style": {
|
|
4740
|
+
"style_name": "MINICPM-2B",
|
|
4741
|
+
"system_prompt": "",
|
|
4742
|
+
"roles": [
|
|
4743
|
+
"user",
|
|
4744
|
+
"assistant"
|
|
4745
|
+
],
|
|
4746
|
+
"stop_token_ids": [
|
|
4747
|
+
1,
|
|
4748
|
+
2
|
|
4749
|
+
],
|
|
4750
|
+
"stop": [
|
|
4751
|
+
"<s>",
|
|
4752
|
+
"</s>"
|
|
4753
|
+
]
|
|
4754
|
+
}
|
|
3891
4755
|
}
|
|
3892
4756
|
]
|