xinference 0.10.0__py3-none-any.whl → 0.10.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +34 -15
- xinference/client/oscar/actor_client.py +4 -3
- xinference/client/restful/restful_client.py +40 -18
- xinference/core/supervisor.py +48 -9
- xinference/core/worker.py +13 -8
- xinference/deploy/cmdline.py +22 -9
- xinference/model/audio/__init__.py +40 -1
- xinference/model/audio/core.py +25 -45
- xinference/model/audio/custom.py +148 -0
- xinference/model/core.py +6 -9
- xinference/model/embedding/core.py +1 -2
- xinference/model/embedding/model_spec.json +24 -0
- xinference/model/embedding/model_spec_modelscope.json +24 -0
- xinference/model/image/core.py +12 -4
- xinference/model/image/stable_diffusion/core.py +8 -7
- xinference/model/llm/__init__.py +0 -6
- xinference/model/llm/core.py +9 -14
- xinference/model/llm/ggml/llamacpp.py +2 -10
- xinference/model/llm/llm_family.json +507 -7
- xinference/model/llm/llm_family.py +41 -4
- xinference/model/llm/llm_family_modelscope.json +260 -0
- xinference/model/llm/pytorch/baichuan.py +4 -3
- xinference/model/llm/pytorch/chatglm.py +5 -2
- xinference/model/llm/pytorch/core.py +37 -41
- xinference/model/llm/pytorch/falcon.py +6 -5
- xinference/model/llm/pytorch/internlm2.py +5 -2
- xinference/model/llm/pytorch/llama_2.py +6 -5
- xinference/model/llm/pytorch/qwen_vl.py +2 -0
- xinference/model/llm/pytorch/vicuna.py +4 -3
- xinference/model/llm/pytorch/yi_vl.py +4 -2
- xinference/model/llm/utils.py +42 -4
- xinference/model/llm/vllm/core.py +54 -6
- xinference/model/rerank/core.py +26 -12
- xinference/model/rerank/model_spec.json +24 -0
- xinference/model/rerank/model_spec_modelscope.json +25 -1
- xinference/model/utils.py +12 -1
- xinference/thirdparty/omnilmm/chat.py +1 -1
- xinference/types.py +70 -19
- xinference/utils.py +1 -0
- xinference/web/ui/build/asset-manifest.json +3 -3
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/js/main.26fdbfbe.js +3 -0
- xinference/web/ui/build/static/js/main.26fdbfbe.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/15e2cf8cd8d0989719b6349428ff576f9009ff4c2dcc52378be0bd938e82495e.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/1870cd6f7054d04e049e363c0a85526584fe25519378609d2838e28d7492bbf1.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/1e86938a0cdf706d21e99b21f5d868fa247c0c88b26807047e26dcdc4d9a9db3.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3c2f277c93c5f1638e08db38df0d0fb4e58d1c5571aea03241a5c04ff4094704.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3fa1f69162f9c6dc0f6a6e21b64d49d6b8e6fa8dfa59a82cf829931c5f97d99f.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/44774c783428f952d8e2e4ad0998a9c5bc16a57cd9c68b7c5ff18aaa5a41d65c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/5393569d846332075b93b55656716a34f50e0a8c970be789502d7e6c49755fd7.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/59ce49eae0f486af4c5034d4d2f9ca77c3ec3a32ecc560085caf5ef482b5f4c9.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/62e257ed9016471035fa1a7da57c9e2a4250974ed566b4d1295873d747c68eb2.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/63a4c48f0326d071c7772c46598215c006ae41fd3d4ff3577fe717de66ad6e89.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/b9cbcb6d77ba21b22c6950b6fb5b305d23c19cf747f99f7d48b6b046f8f7b1b0.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d06a96a3c9c32e42689094aa3aaad41c8125894e956b8f84a70fadce6e3f65b3.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/de0299226173b0662b573f49e3992220f6611947073bd66ac079728a8bc8837d.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/e606671420d2937102c3c34b4b04056c11736408c1d3347b8cf42dfe61fb394b.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/e6eccc9aa641e7da833492e27846dc965f9750281420977dc84654ca6ed221e4.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/e9b52d171223bb59fb918316297a051cdfd42dd453e8260fd918e90bc0a4ebdf.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f4d5d1a41892a754c1ee0237450d804b20612d1b657945b59e564161ea47aa7a.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f9290c0738db50065492ceedc6a4af25083fe18399b7c44d942273349ad9e643.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/fad4cd70de36ef6e6d5f8fd74a10ded58d964a8a91ef7681693fbb8376552da7.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/feabb04b4aa507102da0a64398a40818e878fd1df9b75dda8461b3e1e7ff3f11.json +1 -0
- {xinference-0.10.0.dist-info → xinference-0.10.2.dist-info}/METADATA +13 -10
- {xinference-0.10.0.dist-info → xinference-0.10.2.dist-info}/RECORD +71 -74
- xinference/model/llm/ggml/ctransformers.py +0 -281
- xinference/model/llm/ggml/ctransformers_util.py +0 -161
- xinference/web/ui/build/static/js/main.98516614.js +0 -3
- xinference/web/ui/build/static/js/main.98516614.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/0bd70b1ecf307e2681318e864f4692305b6350c8683863007f4caf2f9ac33b6e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/0db651c046ef908f45cde73af0dbea0a797d3e35bb57f4a0863b481502103a64.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/139969fd25258eb7decc9505f30b779089bba50c402bb5c663008477c7bff73b.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/18e5d5422e2464abf4a3e6d38164570e2e426e0a921e9a2628bbae81b18da353.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/3d93bd9a74a1ab0cec85af40f9baa5f6a8e7384b9e18c409b95a81a7b45bb7e2.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/3e055de705e397e1d413d7f429589b1a98dd78ef378b97f0cdb462c5f2487d5e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/3f357ab57b8e7fade54c667f0e0ebf2787566f72bfdca0fea14e395b5c203753.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/4fd24800544873512b540544ae54601240a5bfefd9105ff647855c64f8ad828f.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/52aa27272b4b9968f62666262b47661cb1992336a2aff3b13994cc36877b3ec3.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/60c4b98d8ea7479fb0c94cfd19c8128f17bd7e27a1e73e6dd9adf6e9d88d18eb.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/7e094845f611802b024b57439cbf911038169d06cdf6c34a72a7277f35aa71a4.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/95c8cc049fadd23085d8623e1d43d70b614a4e52217676f186a417dca894aa09.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/98b7ef307f436affe13d75a4f265b27e828ccc2b10ffae6513abe2681bc11971.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/9d7c49815d97539207e5aab2fb967591b5fed7791218a0762539efc9491f36af.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/a8070ce4b780b4a044218536e158a9e7192a6c80ff593fdc126fee43f46296b5.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/b400cfc9db57fa6c70cd2bad055b73c5079fde0ed37974009d898083f6af8cd8.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/bd04667474fd9cac2983b03725c218908a6cc0ee9128a5953cd00d26d4877f60.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/c230a727b8f68f0e62616a75e14a3d33026dc4164f2e325a9a8072d733850edb.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/d0d0b591d9adaf42b83ad6633f8b7c118541a4b80ea957c303d3bf9b86fbad0a.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/d44a6eb6106e09082b691a315c9f6ce17fcfe25beb7547810e0d271ce3301cd2.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/e1d9b2ae4e1248658704bc6bfc5d6160dcd1a9e771ea4ae8c1fed0aaddeedd29.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/fe5db70859503a54cbe71f9637e5a314cda88b1f0eecb733b6e6f837697db1ef.json +0 -1
- /xinference/web/ui/build/static/js/{main.98516614.js.LICENSE.txt → main.26fdbfbe.js.LICENSE.txt} +0 -0
- {xinference-0.10.0.dist-info → xinference-0.10.2.dist-info}/LICENSE +0 -0
- {xinference-0.10.0.dist-info → xinference-0.10.2.dist-info}/WHEEL +0 -0
- {xinference-0.10.0.dist-info → xinference-0.10.2.dist-info}/entry_points.txt +0 -0
- {xinference-0.10.0.dist-info → xinference-0.10.2.dist-info}/top_level.txt +0 -0
|
@@ -913,6 +913,38 @@
|
|
|
913
913
|
"model_id": "meta-llama/Llama-2-7b-chat-hf",
|
|
914
914
|
"model_revision": "08751db2aca9bf2f7f80d2e516117a53d7450235"
|
|
915
915
|
},
|
|
916
|
+
{
|
|
917
|
+
"model_format": "gptq",
|
|
918
|
+
"model_size_in_billions": 7,
|
|
919
|
+
"quantizations": [
|
|
920
|
+
"Int4"
|
|
921
|
+
],
|
|
922
|
+
"model_id": "TheBloke/Llama-2-7B-Chat-GPTQ"
|
|
923
|
+
},
|
|
924
|
+
{
|
|
925
|
+
"model_format": "gptq",
|
|
926
|
+
"model_size_in_billions": 70,
|
|
927
|
+
"quantizations": [
|
|
928
|
+
"Int4"
|
|
929
|
+
],
|
|
930
|
+
"model_id": "TheBloke/Llama-2-70B-Chat-GPTQ"
|
|
931
|
+
},
|
|
932
|
+
{
|
|
933
|
+
"model_format": "awq",
|
|
934
|
+
"model_size_in_billions": 70,
|
|
935
|
+
"quantizations": [
|
|
936
|
+
"Int4"
|
|
937
|
+
],
|
|
938
|
+
"model_id": "TheBloke/Llama-2-70B-Chat-AWQ"
|
|
939
|
+
},
|
|
940
|
+
{
|
|
941
|
+
"model_format": "awq",
|
|
942
|
+
"model_size_in_billions": 7,
|
|
943
|
+
"quantizations": [
|
|
944
|
+
"Int4"
|
|
945
|
+
],
|
|
946
|
+
"model_id": "TheBloke/Llama-2-7B-Chat-AWQ"
|
|
947
|
+
},
|
|
916
948
|
{
|
|
917
949
|
"model_format": "pytorch",
|
|
918
950
|
"model_size_in_billions": 13,
|
|
@@ -924,6 +956,22 @@
|
|
|
924
956
|
"model_id": "meta-llama/Llama-2-13b-chat-hf",
|
|
925
957
|
"model_revision": "0ba94ac9b9e1d5a0037780667e8b219adde1908c"
|
|
926
958
|
},
|
|
959
|
+
{
|
|
960
|
+
"model_format": "gptq",
|
|
961
|
+
"model_size_in_billions": 13,
|
|
962
|
+
"quantizations": [
|
|
963
|
+
"Int4"
|
|
964
|
+
],
|
|
965
|
+
"model_id": "TheBloke/Llama-2-13B-chat-GPTQ"
|
|
966
|
+
},
|
|
967
|
+
{
|
|
968
|
+
"model_format": "awq",
|
|
969
|
+
"model_size_in_billions": 13,
|
|
970
|
+
"quantizations": [
|
|
971
|
+
"Int4"
|
|
972
|
+
],
|
|
973
|
+
"model_id": "TheBloke/Llama-2-13B-chat-AWQ"
|
|
974
|
+
},
|
|
927
975
|
{
|
|
928
976
|
"model_format": "pytorch",
|
|
929
977
|
"model_size_in_billions": 70,
|
|
@@ -1045,6 +1093,22 @@
|
|
|
1045
1093
|
"model_id": "TheBloke/Llama-2-7B-GGML",
|
|
1046
1094
|
"model_file_name_template": "llama-2-7b.ggmlv3.{quantization}.bin"
|
|
1047
1095
|
},
|
|
1096
|
+
{
|
|
1097
|
+
"model_format": "gptq",
|
|
1098
|
+
"model_size_in_billions": 7,
|
|
1099
|
+
"quantizations": [
|
|
1100
|
+
"Int4"
|
|
1101
|
+
],
|
|
1102
|
+
"model_id": "TheBloke/Llama-2-7B-GPTQ"
|
|
1103
|
+
},
|
|
1104
|
+
{
|
|
1105
|
+
"model_format": "awq",
|
|
1106
|
+
"model_size_in_billions": 7,
|
|
1107
|
+
"quantizations": [
|
|
1108
|
+
"Int4"
|
|
1109
|
+
],
|
|
1110
|
+
"model_id": "TheBloke/Llama-2-7B-AWQ"
|
|
1111
|
+
},
|
|
1048
1112
|
{
|
|
1049
1113
|
"model_format": "ggmlv3",
|
|
1050
1114
|
"model_size_in_billions": 13,
|
|
@@ -1111,6 +1175,22 @@
|
|
|
1111
1175
|
"model_id": "meta-llama/Llama-2-13b-hf",
|
|
1112
1176
|
"model_revision": "db6b8eb1feabb38985fdf785a89895959e944936"
|
|
1113
1177
|
},
|
|
1178
|
+
{
|
|
1179
|
+
"model_format": "gptq",
|
|
1180
|
+
"model_size_in_billions": 13,
|
|
1181
|
+
"quantizations": [
|
|
1182
|
+
"Int4"
|
|
1183
|
+
],
|
|
1184
|
+
"model_id": "TheBloke/Llama-2-13B-GPTQ"
|
|
1185
|
+
},
|
|
1186
|
+
{
|
|
1187
|
+
"model_format": "awq",
|
|
1188
|
+
"model_size_in_billions": 13,
|
|
1189
|
+
"quantizations": [
|
|
1190
|
+
"Int4"
|
|
1191
|
+
],
|
|
1192
|
+
"model_id": "TheBloke/Llama-2-13B-AWQ"
|
|
1193
|
+
},
|
|
1114
1194
|
{
|
|
1115
1195
|
"model_format": "pytorch",
|
|
1116
1196
|
"model_size_in_billions": 70,
|
|
@@ -1121,6 +1201,22 @@
|
|
|
1121
1201
|
],
|
|
1122
1202
|
"model_id": "meta-llama/Llama-2-70b-hf",
|
|
1123
1203
|
"model_revision": "cc8aa03a000ff08b4d5c5b39673321a2a396c396"
|
|
1204
|
+
},
|
|
1205
|
+
{
|
|
1206
|
+
"model_format": "gptq",
|
|
1207
|
+
"model_size_in_billions": 70,
|
|
1208
|
+
"quantizations": [
|
|
1209
|
+
"Int4"
|
|
1210
|
+
],
|
|
1211
|
+
"model_id": "TheBloke/Llama-2-70B-GPTQ"
|
|
1212
|
+
},
|
|
1213
|
+
{
|
|
1214
|
+
"model_format": "awq",
|
|
1215
|
+
"model_size_in_billions": 70,
|
|
1216
|
+
"quantizations": [
|
|
1217
|
+
"Int4"
|
|
1218
|
+
],
|
|
1219
|
+
"model_id": "TheBloke/Llama-2-70B-AWQ"
|
|
1124
1220
|
}
|
|
1125
1221
|
]
|
|
1126
1222
|
},
|
|
@@ -1509,6 +1605,16 @@
|
|
|
1509
1605
|
],
|
|
1510
1606
|
"model_id": "Qwen/Qwen1.5-14B-Chat"
|
|
1511
1607
|
},
|
|
1608
|
+
{
|
|
1609
|
+
"model_format": "pytorch",
|
|
1610
|
+
"model_size_in_billions": 32,
|
|
1611
|
+
"quantizations": [
|
|
1612
|
+
"4-bit",
|
|
1613
|
+
"8-bit",
|
|
1614
|
+
"none"
|
|
1615
|
+
],
|
|
1616
|
+
"model_id": "Qwen/Qwen1.5-32B-Chat"
|
|
1617
|
+
},
|
|
1512
1618
|
{
|
|
1513
1619
|
"model_format": "pytorch",
|
|
1514
1620
|
"model_size_in_billions": 72,
|
|
@@ -1564,6 +1670,14 @@
|
|
|
1564
1670
|
],
|
|
1565
1671
|
"model_id": "Qwen/Qwen1.5-14B-Chat-GPTQ-{quantization}"
|
|
1566
1672
|
},
|
|
1673
|
+
{
|
|
1674
|
+
"model_format": "gptq",
|
|
1675
|
+
"model_size_in_billions": 32,
|
|
1676
|
+
"quantizations": [
|
|
1677
|
+
"Int4"
|
|
1678
|
+
],
|
|
1679
|
+
"model_id": "Qwen/Qwen1.5-32B-Chat-GPTQ-{quantization}"
|
|
1680
|
+
},
|
|
1567
1681
|
{
|
|
1568
1682
|
"model_format": "gptq",
|
|
1569
1683
|
"model_size_in_billions": 72,
|
|
@@ -1613,6 +1727,14 @@
|
|
|
1613
1727
|
],
|
|
1614
1728
|
"model_id": "Qwen/Qwen1.5-14B-Chat-AWQ"
|
|
1615
1729
|
},
|
|
1730
|
+
{
|
|
1731
|
+
"model_format": "awq",
|
|
1732
|
+
"model_size_in_billions": 32,
|
|
1733
|
+
"quantizations": [
|
|
1734
|
+
"Int4"
|
|
1735
|
+
],
|
|
1736
|
+
"model_id": "Qwen/Qwen1.5-32B-Chat-AWQ"
|
|
1737
|
+
},
|
|
1616
1738
|
{
|
|
1617
1739
|
"model_format": "awq",
|
|
1618
1740
|
"model_size_in_billions": 72,
|
|
@@ -1701,6 +1823,22 @@
|
|
|
1701
1823
|
"model_id": "Qwen/Qwen1.5-14B-Chat-GGUF",
|
|
1702
1824
|
"model_file_name_template": "qwen1_5-14b-chat-{quantization}.gguf"
|
|
1703
1825
|
},
|
|
1826
|
+
{
|
|
1827
|
+
"model_format": "ggufv2",
|
|
1828
|
+
"model_size_in_billions": 32,
|
|
1829
|
+
"quantizations": [
|
|
1830
|
+
"q2_k",
|
|
1831
|
+
"q3_k_m",
|
|
1832
|
+
"q4_0",
|
|
1833
|
+
"q4_k_m",
|
|
1834
|
+
"q5_0",
|
|
1835
|
+
"q5_k_m",
|
|
1836
|
+
"q6_k",
|
|
1837
|
+
"q8_0"
|
|
1838
|
+
],
|
|
1839
|
+
"model_id": "Qwen/Qwen1.5-32B-Chat-GGUF",
|
|
1840
|
+
"model_file_name_template": "qwen1_5-32b-chat-{quantization}.gguf"
|
|
1841
|
+
},
|
|
1704
1842
|
{
|
|
1705
1843
|
"model_format": "ggufv2",
|
|
1706
1844
|
"model_size_in_billions": 72,
|
|
@@ -1740,6 +1878,126 @@
|
|
|
1740
1878
|
]
|
|
1741
1879
|
}
|
|
1742
1880
|
},
|
|
1881
|
+
{
|
|
1882
|
+
"version": 1,
|
|
1883
|
+
"context_length": 32768,
|
|
1884
|
+
"model_name": "qwen1.5-moe-chat",
|
|
1885
|
+
"model_lang": [
|
|
1886
|
+
"en",
|
|
1887
|
+
"zh"
|
|
1888
|
+
],
|
|
1889
|
+
"model_ability": [
|
|
1890
|
+
"chat"
|
|
1891
|
+
],
|
|
1892
|
+
"model_description": "Qwen1.5-MoE is a transformer-based MoE decoder-only language model pretrained on a large amount of data.",
|
|
1893
|
+
"model_specs": [
|
|
1894
|
+
{
|
|
1895
|
+
"model_format": "pytorch",
|
|
1896
|
+
"model_size_in_billions": "2_7",
|
|
1897
|
+
"quantizations": [
|
|
1898
|
+
"4-bit",
|
|
1899
|
+
"8-bit",
|
|
1900
|
+
"none"
|
|
1901
|
+
],
|
|
1902
|
+
"model_id": "Qwen/Qwen1.5-MoE-A2.7B-Chat"
|
|
1903
|
+
},
|
|
1904
|
+
{
|
|
1905
|
+
"model_format": "gptq",
|
|
1906
|
+
"model_size_in_billions": "2_7",
|
|
1907
|
+
"quantizations": [
|
|
1908
|
+
"Int4"
|
|
1909
|
+
],
|
|
1910
|
+
"model_id": "Qwen/Qwen1.5-MoE-A2.7B-Chat-GPTQ-Int4"
|
|
1911
|
+
}
|
|
1912
|
+
],
|
|
1913
|
+
"prompt_style": {
|
|
1914
|
+
"style_name": "QWEN",
|
|
1915
|
+
"system_prompt": "You are a helpful assistant.",
|
|
1916
|
+
"roles": [
|
|
1917
|
+
"user",
|
|
1918
|
+
"assistant"
|
|
1919
|
+
],
|
|
1920
|
+
"intra_message_sep": "\n",
|
|
1921
|
+
"stop_token_ids": [
|
|
1922
|
+
151643,
|
|
1923
|
+
151644,
|
|
1924
|
+
151645
|
|
1925
|
+
],
|
|
1926
|
+
"stop": [
|
|
1927
|
+
"<|endoftext|>",
|
|
1928
|
+
"<|im_start|>",
|
|
1929
|
+
"<|im_end|>"
|
|
1930
|
+
]
|
|
1931
|
+
}
|
|
1932
|
+
},
|
|
1933
|
+
{
|
|
1934
|
+
"version": 1,
|
|
1935
|
+
"context_length": 65536,
|
|
1936
|
+
"model_name": "codeqwen1.5-chat",
|
|
1937
|
+
"model_lang": [
|
|
1938
|
+
"en",
|
|
1939
|
+
"zh"
|
|
1940
|
+
],
|
|
1941
|
+
"model_ability": [
|
|
1942
|
+
"chat"
|
|
1943
|
+
],
|
|
1944
|
+
"model_description": "CodeQwen1.5 is the Code-Specific version of Qwen1.5. It is a transformer-based decoder-only language model pretrained on a large amount of data of codes.",
|
|
1945
|
+
"model_specs": [
|
|
1946
|
+
{
|
|
1947
|
+
"model_format": "ggufv2",
|
|
1948
|
+
"model_size_in_billions": 7,
|
|
1949
|
+
"quantizations": [
|
|
1950
|
+
"q2_k",
|
|
1951
|
+
"q3_k_m",
|
|
1952
|
+
"q4_0",
|
|
1953
|
+
"q4_k_m",
|
|
1954
|
+
"q5_0",
|
|
1955
|
+
"q5_k_m",
|
|
1956
|
+
"q6_k",
|
|
1957
|
+
"q8_0"
|
|
1958
|
+
],
|
|
1959
|
+
"model_id": "Qwen/CodeQwen1.5-7B-Chat-GGUF",
|
|
1960
|
+
"model_file_name_template": "codeqwen-1_5-7b-chat-{quantization}.gguf"
|
|
1961
|
+
},
|
|
1962
|
+
{
|
|
1963
|
+
"model_format": "pytorch",
|
|
1964
|
+
"model_size_in_billions": 7,
|
|
1965
|
+
"quantizations": [
|
|
1966
|
+
"4-bit",
|
|
1967
|
+
"8-bit",
|
|
1968
|
+
"none"
|
|
1969
|
+
],
|
|
1970
|
+
"model_id": "Qwen/CodeQwen1.5-7B-Chat"
|
|
1971
|
+
},
|
|
1972
|
+
{
|
|
1973
|
+
"model_format": "awq",
|
|
1974
|
+
"model_size_in_billions": 7,
|
|
1975
|
+
"quantizations": [
|
|
1976
|
+
"Int4"
|
|
1977
|
+
],
|
|
1978
|
+
"model_id": "Qwen/CodeQwen1.5-7B-Chat-AWQ"
|
|
1979
|
+
}
|
|
1980
|
+
],
|
|
1981
|
+
"prompt_style": {
|
|
1982
|
+
"style_name": "QWEN",
|
|
1983
|
+
"system_prompt": "You are a helpful assistant.",
|
|
1984
|
+
"roles": [
|
|
1985
|
+
"user",
|
|
1986
|
+
"assistant"
|
|
1987
|
+
],
|
|
1988
|
+
"intra_message_sep": "\n",
|
|
1989
|
+
"stop_token_ids": [
|
|
1990
|
+
151643,
|
|
1991
|
+
151644,
|
|
1992
|
+
151645
|
|
1993
|
+
],
|
|
1994
|
+
"stop": [
|
|
1995
|
+
"<|endoftext|>",
|
|
1996
|
+
"<|im_start|>",
|
|
1997
|
+
"<|im_end|>"
|
|
1998
|
+
]
|
|
1999
|
+
}
|
|
2000
|
+
},
|
|
1743
2001
|
{
|
|
1744
2002
|
"version": 1,
|
|
1745
2003
|
"context_length": 8192,
|
|
@@ -1780,13 +2038,13 @@
|
|
|
1780
2038
|
"model_description": "GPT-2 is a Transformer-based LLM that is trained on WebTest, a 40 GB dataset of Reddit posts with 3+ upvotes.",
|
|
1781
2039
|
"model_specs": [
|
|
1782
2040
|
{
|
|
1783
|
-
"model_format": "
|
|
1784
|
-
"model_size_in_billions":
|
|
2041
|
+
"model_format": "pytorch",
|
|
2042
|
+
"model_size_in_billions": "1_5",
|
|
1785
2043
|
"quantizations": [
|
|
1786
2044
|
"none"
|
|
1787
2045
|
],
|
|
1788
|
-
"model_id": "
|
|
1789
|
-
"
|
|
2046
|
+
"model_id": "openai-community/gpt2",
|
|
2047
|
+
"model_revision": "607a30d783dfa663caf39e06633721c8d4cfcd7e"
|
|
1790
2048
|
}
|
|
1791
2049
|
]
|
|
1792
2050
|
},
|
|
@@ -2569,6 +2827,22 @@
|
|
|
2569
2827
|
"model_id": "mistralai/Mistral-7B-Instruct-v0.1",
|
|
2570
2828
|
"model_revision": "54766df6d50e4d3d7ccd66758e5341ba105a6d36"
|
|
2571
2829
|
},
|
|
2830
|
+
{
|
|
2831
|
+
"model_format": "awq",
|
|
2832
|
+
"model_size_in_billions": 7,
|
|
2833
|
+
"quantizations": [
|
|
2834
|
+
"Int4"
|
|
2835
|
+
],
|
|
2836
|
+
"model_id": "TheBloke/Mistral-7B-Instruct-v0.1-AWQ"
|
|
2837
|
+
},
|
|
2838
|
+
{
|
|
2839
|
+
"model_format": "gptq",
|
|
2840
|
+
"model_size_in_billions": 7,
|
|
2841
|
+
"quantizations": [
|
|
2842
|
+
"Int4"
|
|
2843
|
+
],
|
|
2844
|
+
"model_id": "TheBloke/Mistral-7B-Instruct-v0.1-GPTQ"
|
|
2845
|
+
},
|
|
2572
2846
|
{
|
|
2573
2847
|
"model_format": "ggufv2",
|
|
2574
2848
|
"model_size_in_billions": 7,
|
|
@@ -2630,6 +2904,22 @@
|
|
|
2630
2904
|
"model_id": "mistralai/Mistral-7B-Instruct-v0.2",
|
|
2631
2905
|
"model_revision": "b70aa86578567ba3301b21c8a27bea4e8f6d6d61"
|
|
2632
2906
|
},
|
|
2907
|
+
{
|
|
2908
|
+
"model_format": "gptq",
|
|
2909
|
+
"model_size_in_billions": 7,
|
|
2910
|
+
"quantizations": [
|
|
2911
|
+
"Int4"
|
|
2912
|
+
],
|
|
2913
|
+
"model_id": "TheBloke/Mistral-7B-Instruct-v0.2-GPTQ"
|
|
2914
|
+
},
|
|
2915
|
+
{
|
|
2916
|
+
"model_format": "awq",
|
|
2917
|
+
"model_size_in_billions": 7,
|
|
2918
|
+
"quantizations": [
|
|
2919
|
+
"Int4"
|
|
2920
|
+
],
|
|
2921
|
+
"model_id": "TheBloke/Mistral-7B-Instruct-v0.2-AWQ"
|
|
2922
|
+
},
|
|
2633
2923
|
{
|
|
2634
2924
|
"model_format": "ggufv2",
|
|
2635
2925
|
"model_size_in_billions": 7,
|
|
@@ -2790,6 +3080,14 @@
|
|
|
2790
3080
|
"model_id": "mistralai/Mixtral-8x7B-v0.1",
|
|
2791
3081
|
"model_revision": "58301445dc1378584211722b7ebf8743ec4e192b"
|
|
2792
3082
|
},
|
|
3083
|
+
{
|
|
3084
|
+
"model_format": "gptq",
|
|
3085
|
+
"model_size_in_billions": "46_7",
|
|
3086
|
+
"quantizations": [
|
|
3087
|
+
"Int4"
|
|
3088
|
+
],
|
|
3089
|
+
"model_id": "TheBloke/Mixtral-8x7B-v0.1-GPTQ"
|
|
3090
|
+
},
|
|
2793
3091
|
{
|
|
2794
3092
|
"model_format": "ggufv2",
|
|
2795
3093
|
"model_size_in_billions": "46_7",
|
|
@@ -2839,10 +3137,17 @@
|
|
|
2839
3137
|
"model_format": "awq",
|
|
2840
3138
|
"model_size_in_billions": "46_7",
|
|
2841
3139
|
"quantizations": [
|
|
2842
|
-
"
|
|
3140
|
+
"Int4"
|
|
2843
3141
|
],
|
|
2844
|
-
"model_id": "TheBloke/Mixtral-8x7B-Instruct-v0.1-AWQ"
|
|
2845
|
-
|
|
3142
|
+
"model_id": "TheBloke/Mixtral-8x7B-Instruct-v0.1-AWQ"
|
|
3143
|
+
},
|
|
3144
|
+
{
|
|
3145
|
+
"model_format": "gptq",
|
|
3146
|
+
"model_size_in_billions": "46_7",
|
|
3147
|
+
"quantizations": [
|
|
3148
|
+
"Int4"
|
|
3149
|
+
],
|
|
3150
|
+
"model_id": "TheBloke/Mixtral-8x7B-Instruct-v0.1-GPTQ"
|
|
2846
3151
|
},
|
|
2847
3152
|
{
|
|
2848
3153
|
"model_format": "ggufv2",
|
|
@@ -4515,5 +4820,200 @@
|
|
|
4515
4820
|
"</s>"
|
|
4516
4821
|
]
|
|
4517
4822
|
}
|
|
4823
|
+
},
|
|
4824
|
+
{
|
|
4825
|
+
"version": 1,
|
|
4826
|
+
"context_length": 8192,
|
|
4827
|
+
"model_name": "seallm_v2",
|
|
4828
|
+
"model_lang": [
|
|
4829
|
+
"en",
|
|
4830
|
+
"zh",
|
|
4831
|
+
"vi",
|
|
4832
|
+
"id",
|
|
4833
|
+
"th",
|
|
4834
|
+
"ms",
|
|
4835
|
+
"km",
|
|
4836
|
+
"lo",
|
|
4837
|
+
"my",
|
|
4838
|
+
"tl"
|
|
4839
|
+
],
|
|
4840
|
+
"model_ability": [
|
|
4841
|
+
"generate"
|
|
4842
|
+
],
|
|
4843
|
+
"model_description": "We introduce SeaLLM-7B-v2, the state-of-the-art multilingual LLM for Southeast Asian (SEA) languages",
|
|
4844
|
+
"model_specs": [
|
|
4845
|
+
{
|
|
4846
|
+
"model_format": "pytorch",
|
|
4847
|
+
"model_size_in_billions": 7,
|
|
4848
|
+
"quantizations": [
|
|
4849
|
+
"none"
|
|
4850
|
+
],
|
|
4851
|
+
"model_id": "SeaLLMs/SeaLLM-7B-v2",
|
|
4852
|
+
"model_revision": "f1bd48e0d75365c24a3c5ad006b2d0a0c9dca30f"
|
|
4853
|
+
},
|
|
4854
|
+
{
|
|
4855
|
+
"model_format": "ggufv2",
|
|
4856
|
+
"model_size_in_billions": 7,
|
|
4857
|
+
"quantizations": [
|
|
4858
|
+
"Q4_0",
|
|
4859
|
+
"Q8_0"
|
|
4860
|
+
],
|
|
4861
|
+
"model_id": "SeaLLMs/SeaLLM-7B-v2-gguf",
|
|
4862
|
+
"model_file_name_template": "SeaLLM-7B-v2.{quantization}.gguf"
|
|
4863
|
+
}
|
|
4864
|
+
]
|
|
4865
|
+
},
|
|
4866
|
+
{
|
|
4867
|
+
"version": 1,
|
|
4868
|
+
"context_length": 8192,
|
|
4869
|
+
"model_name": "seallm_v2.5",
|
|
4870
|
+
"model_lang": [
|
|
4871
|
+
"en",
|
|
4872
|
+
"zh",
|
|
4873
|
+
"vi",
|
|
4874
|
+
"id",
|
|
4875
|
+
"th",
|
|
4876
|
+
"ms",
|
|
4877
|
+
"km",
|
|
4878
|
+
"lo",
|
|
4879
|
+
"my",
|
|
4880
|
+
"tl"
|
|
4881
|
+
],
|
|
4882
|
+
"model_ability": [
|
|
4883
|
+
"generate"
|
|
4884
|
+
],
|
|
4885
|
+
"model_description": "We introduce SeaLLM-7B-v2.5, the state-of-the-art multilingual LLM for Southeast Asian (SEA) languages",
|
|
4886
|
+
"model_specs": [
|
|
4887
|
+
{
|
|
4888
|
+
"model_format": "pytorch",
|
|
4889
|
+
"model_size_in_billions": 7,
|
|
4890
|
+
"quantizations": [
|
|
4891
|
+
"none"
|
|
4892
|
+
],
|
|
4893
|
+
"model_id": "SeaLLMs/SeaLLM-7B-v2.5",
|
|
4894
|
+
"model_revision": "c54a8eb8e2d58c5a680bfbbe3a7ae71753bb644b"
|
|
4895
|
+
},
|
|
4896
|
+
{
|
|
4897
|
+
"model_format": "ggufv2",
|
|
4898
|
+
"model_size_in_billions": 7,
|
|
4899
|
+
"quantizations": [
|
|
4900
|
+
"Q4_K_M",
|
|
4901
|
+
"Q8_0"
|
|
4902
|
+
],
|
|
4903
|
+
"model_id": "SeaLLMs/SeaLLM-7B-v2.5-GGUF",
|
|
4904
|
+
"model_file_name_template": "SeaLLM-7B-v2.5.{quantization}.gguf"
|
|
4905
|
+
}
|
|
4906
|
+
]
|
|
4907
|
+
},
|
|
4908
|
+
{
|
|
4909
|
+
"version": 1,
|
|
4910
|
+
"context_length": 131072,
|
|
4911
|
+
"model_name": "c4ai-command-r-v01",
|
|
4912
|
+
"model_lang": [
|
|
4913
|
+
"en",
|
|
4914
|
+
"fr",
|
|
4915
|
+
"de",
|
|
4916
|
+
"es",
|
|
4917
|
+
"it",
|
|
4918
|
+
"pt",
|
|
4919
|
+
"ja",
|
|
4920
|
+
"ko",
|
|
4921
|
+
"zh",
|
|
4922
|
+
"ar"
|
|
4923
|
+
],
|
|
4924
|
+
"model_ability": [
|
|
4925
|
+
"generate"
|
|
4926
|
+
],
|
|
4927
|
+
"model_description": "C4AI Command-R is a research release of a 35 billion parameter highly performant generative model.",
|
|
4928
|
+
"model_specs": [
|
|
4929
|
+
{
|
|
4930
|
+
"model_format": "pytorch",
|
|
4931
|
+
"model_size_in_billions": 35,
|
|
4932
|
+
"quantizations": [
|
|
4933
|
+
"none"
|
|
4934
|
+
],
|
|
4935
|
+
"model_id": "CohereForAI/c4ai-command-r-v01",
|
|
4936
|
+
"model_revision": "16881ccde1c68bbc7041280e6a66637bc46bfe88"
|
|
4937
|
+
},
|
|
4938
|
+
{
|
|
4939
|
+
"model_format": "ggufv2",
|
|
4940
|
+
"model_size_in_billions": 35,
|
|
4941
|
+
"quantizations": [
|
|
4942
|
+
"Q2_K",
|
|
4943
|
+
"Q3_K_L",
|
|
4944
|
+
"Q3_K_M",
|
|
4945
|
+
"Q3_K_S",
|
|
4946
|
+
"Q4_0",
|
|
4947
|
+
"Q4_K_M",
|
|
4948
|
+
"Q4_K_S",
|
|
4949
|
+
"Q5_0",
|
|
4950
|
+
"Q5_K_M",
|
|
4951
|
+
"Q5_K_S",
|
|
4952
|
+
"Q6_K",
|
|
4953
|
+
"Q8_0"
|
|
4954
|
+
],
|
|
4955
|
+
"model_id": "andrewcanis/c4ai-command-r-v01-GGUF",
|
|
4956
|
+
"model_file_name_template": "c4ai-command-r-v01.{quantization}.gguf"
|
|
4957
|
+
},
|
|
4958
|
+
{
|
|
4959
|
+
"model_format": "pytorch",
|
|
4960
|
+
"model_size_in_billions": 104,
|
|
4961
|
+
"quantizations": [
|
|
4962
|
+
"none"
|
|
4963
|
+
],
|
|
4964
|
+
"model_id": "CohereForAI/c4ai-command-r-plus",
|
|
4965
|
+
"model_revision": "ba7f1d954c9d1609013677d87e4142ab95c34e62"
|
|
4966
|
+
},
|
|
4967
|
+
{
|
|
4968
|
+
"model_format": "gptq",
|
|
4969
|
+
"model_size_in_billions": 104,
|
|
4970
|
+
"quantizations": [
|
|
4971
|
+
"Int4"
|
|
4972
|
+
],
|
|
4973
|
+
"model_id": "alpindale/c4ai-command-r-plus-GPTQ",
|
|
4974
|
+
"model_revision": "35febfc08f723ac0df32480eb4af349a7d08656e"
|
|
4975
|
+
}
|
|
4976
|
+
]
|
|
4977
|
+
},
|
|
4978
|
+
{
|
|
4979
|
+
"version": 1,
|
|
4980
|
+
"context_length": 131072,
|
|
4981
|
+
"model_name": "c4ai-command-r-v01-4bit",
|
|
4982
|
+
"model_lang": [
|
|
4983
|
+
"en",
|
|
4984
|
+
"fr",
|
|
4985
|
+
"de",
|
|
4986
|
+
"es",
|
|
4987
|
+
"it",
|
|
4988
|
+
"pt",
|
|
4989
|
+
"ja",
|
|
4990
|
+
"ko",
|
|
4991
|
+
"zh",
|
|
4992
|
+
"ar"
|
|
4993
|
+
],
|
|
4994
|
+
"model_ability": [
|
|
4995
|
+
"generate"
|
|
4996
|
+
],
|
|
4997
|
+
"model_description": "This model is 4bit quantized version of C4AI Command-R using bitsandbytes.",
|
|
4998
|
+
"model_specs": [
|
|
4999
|
+
{
|
|
5000
|
+
"model_format": "pytorch",
|
|
5001
|
+
"model_size_in_billions": 35,
|
|
5002
|
+
"quantizations": [
|
|
5003
|
+
"none"
|
|
5004
|
+
],
|
|
5005
|
+
"model_id": "CohereForAI/c4ai-command-r-v01-4bit",
|
|
5006
|
+
"model_revision": "f2e87936a146643c9dd143422dcafb9cb1552611"
|
|
5007
|
+
},
|
|
5008
|
+
{
|
|
5009
|
+
"model_format": "pytorch",
|
|
5010
|
+
"model_size_in_billions": 104,
|
|
5011
|
+
"quantizations": [
|
|
5012
|
+
"none"
|
|
5013
|
+
],
|
|
5014
|
+
"model_id": "CohereForAI/c4ai-command-r-plus-4bit",
|
|
5015
|
+
"model_revision": "bb63b5b7005ecedb30b0cfd0d5953b02a5817f7b"
|
|
5016
|
+
}
|
|
5017
|
+
]
|
|
4518
5018
|
}
|
|
4519
5019
|
]
|
|
@@ -33,6 +33,7 @@ from ..._compat import (
|
|
|
33
33
|
validator,
|
|
34
34
|
)
|
|
35
35
|
from ...constants import XINFERENCE_CACHE_DIR, XINFERENCE_MODEL_DIR
|
|
36
|
+
from ...types import LoRA
|
|
36
37
|
from ..utils import (
|
|
37
38
|
download_from_modelscope,
|
|
38
39
|
is_valid_model_uri,
|
|
@@ -199,6 +200,21 @@ class CustomLLMFamilyV1(LLMFamilyV1):
|
|
|
199
200
|
)
|
|
200
201
|
llm_spec.prompt_style = BUILTIN_LLM_PROMPT_STYLE[prompt_style_name]
|
|
201
202
|
|
|
203
|
+
# check model ability, registering LLM only provides generate and chat
|
|
204
|
+
# but for vision models, we add back the abilities so that
|
|
205
|
+
# gradio chat interface can be generated properly
|
|
206
|
+
if (
|
|
207
|
+
llm_spec.model_family != "other"
|
|
208
|
+
and llm_spec.model_family
|
|
209
|
+
in {
|
|
210
|
+
family.model_name
|
|
211
|
+
for family in BUILTIN_LLM_FAMILIES
|
|
212
|
+
if "vision" in family.model_ability
|
|
213
|
+
}
|
|
214
|
+
and "vision" not in llm_spec.model_ability
|
|
215
|
+
):
|
|
216
|
+
llm_spec.model_ability.append("vision")
|
|
217
|
+
|
|
202
218
|
return llm_spec
|
|
203
219
|
|
|
204
220
|
|
|
@@ -782,10 +798,29 @@ def get_user_defined_llm_families():
|
|
|
782
798
|
return UD_LLM_FAMILIES.copy()
|
|
783
799
|
|
|
784
800
|
|
|
801
|
+
def match_model_size(
|
|
802
|
+
model_size: Union[int, str], spec_model_size: Union[int, str]
|
|
803
|
+
) -> bool:
|
|
804
|
+
if isinstance(model_size, str):
|
|
805
|
+
model_size = model_size.replace("_", ".")
|
|
806
|
+
if isinstance(spec_model_size, str):
|
|
807
|
+
spec_model_size = spec_model_size.replace("_", ".")
|
|
808
|
+
|
|
809
|
+
if model_size == spec_model_size:
|
|
810
|
+
return True
|
|
811
|
+
|
|
812
|
+
try:
|
|
813
|
+
ms = int(model_size)
|
|
814
|
+
ss = int(spec_model_size)
|
|
815
|
+
return ms == ss
|
|
816
|
+
except ValueError:
|
|
817
|
+
return False
|
|
818
|
+
|
|
819
|
+
|
|
785
820
|
def match_llm(
|
|
786
821
|
model_name: str,
|
|
787
822
|
model_format: Optional[str] = None,
|
|
788
|
-
model_size_in_billions: Optional[int] = None,
|
|
823
|
+
model_size_in_billions: Optional[Union[int, str]] = None,
|
|
789
824
|
quantization: Optional[str] = None,
|
|
790
825
|
is_local_deployment: bool = False,
|
|
791
826
|
) -> Optional[Tuple[LLMFamilyV1, LLMSpecV1, str]]:
|
|
@@ -829,7 +864,9 @@ def match_llm(
|
|
|
829
864
|
model_format
|
|
830
865
|
and model_format != spec.model_format
|
|
831
866
|
or model_size_in_billions
|
|
832
|
-
and
|
|
867
|
+
and not match_model_size(
|
|
868
|
+
model_size_in_billions, spec.model_size_in_billions
|
|
869
|
+
)
|
|
833
870
|
or quantization
|
|
834
871
|
and matched_quantization is None
|
|
835
872
|
):
|
|
@@ -939,12 +976,12 @@ def match_llm_cls(
|
|
|
939
976
|
family: LLMFamilyV1,
|
|
940
977
|
llm_spec: "LLMSpecV1",
|
|
941
978
|
quantization: str,
|
|
942
|
-
|
|
979
|
+
peft_model: Optional[List[LoRA]] = None,
|
|
943
980
|
) -> Optional[Type[LLM]]:
|
|
944
981
|
"""
|
|
945
982
|
Find an LLM implementation for given LLM family and spec.
|
|
946
983
|
"""
|
|
947
|
-
if
|
|
984
|
+
if peft_model is not None:
|
|
948
985
|
for cls in PEFT_SUPPORTED_CLASSES:
|
|
949
986
|
if cls.match(family, llm_spec, quantization):
|
|
950
987
|
return cls
|