xinference 0.7.5__py3-none-any.whl → 0.8.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/api/oauth2/__init__.py +13 -0
- xinference/api/oauth2/common.py +14 -0
- xinference/api/oauth2/core.py +93 -0
- xinference/api/oauth2/types.py +36 -0
- xinference/api/oauth2/utils.py +44 -0
- xinference/api/restful_api.py +216 -27
- xinference/client/oscar/actor_client.py +18 -18
- xinference/client/restful/restful_client.py +96 -33
- xinference/conftest.py +63 -1
- xinference/constants.py +1 -0
- xinference/core/chat_interface.py +143 -3
- xinference/core/metrics.py +83 -0
- xinference/core/model.py +244 -181
- xinference/core/status_guard.py +86 -0
- xinference/core/supervisor.py +57 -7
- xinference/core/worker.py +134 -13
- xinference/deploy/cmdline.py +142 -16
- xinference/deploy/local.py +39 -7
- xinference/deploy/supervisor.py +2 -0
- xinference/deploy/worker.py +33 -5
- xinference/fields.py +4 -1
- xinference/model/core.py +8 -1
- xinference/model/embedding/core.py +3 -2
- xinference/model/embedding/model_spec_modelscope.json +60 -18
- xinference/model/image/stable_diffusion/core.py +4 -3
- xinference/model/llm/__init__.py +7 -0
- xinference/model/llm/ggml/llamacpp.py +3 -2
- xinference/model/llm/llm_family.json +87 -3
- xinference/model/llm/llm_family.py +15 -5
- xinference/model/llm/llm_family_modelscope.json +92 -3
- xinference/model/llm/pytorch/chatglm.py +70 -28
- xinference/model/llm/pytorch/core.py +11 -30
- xinference/model/llm/pytorch/internlm2.py +155 -0
- xinference/model/llm/pytorch/utils.py +0 -153
- xinference/model/llm/utils.py +37 -8
- xinference/model/llm/vllm/core.py +15 -3
- xinference/model/multimodal/__init__.py +15 -8
- xinference/model/multimodal/core.py +8 -1
- xinference/model/multimodal/model_spec.json +9 -0
- xinference/model/multimodal/model_spec_modelscope.json +45 -0
- xinference/model/multimodal/qwen_vl.py +5 -9
- xinference/model/utils.py +7 -2
- xinference/types.py +2 -0
- xinference/web/ui/build/asset-manifest.json +3 -3
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/js/main.b83095c2.js +3 -0
- xinference/web/ui/build/static/js/{main.236e72e7.js.LICENSE.txt → main.b83095c2.js.LICENSE.txt} +7 -0
- xinference/web/ui/build/static/js/main.b83095c2.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/0a853b2fa1902551e262a2f1a4b7894341f27b3dd9587f2ef7aaea195af89518.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/101923c539819f26ad11fbcbd6f6e56436b285efbb090dcc7dd648c6e924c4a8.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/193e7ba39e70d4bb2895a5cb317f6f293a5fd02e7e324c02a1eba2f83216419c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/22858de5265f2d279fca9f2f54dfb147e4b2704200dfb5d2ad3ec9769417328f.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/27696db5fcd4fcf0e7974cadf1e4a2ab89690474045c3188eafd586323ad13bb.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/27bcada3ee8f89d21184b359f022fc965f350ffaca52c9814c29f1fc37121173.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/27bdbe25deab8cf08f7fab8f05f8f26cf84a98809527a37986a4ab73a57ba96a.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/2bee7b8bd3d52976a45d6068e1333df88b943e0e679403c809e45382e3818037.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/30670751f55508ef3b861e13dd71b9e5a10d2561373357a12fc3831a0b77fd93.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3605cd3a96ff2a3b443c70a101575482279ad26847924cab0684d165ba0d2492.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3789ef437d3ecbf945bb9cea39093d1f16ebbfa32dbe6daf35abcfb6d48de6f1.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/4942da6bc03bf7373af068e22f916341aabc5b5df855d73c1d348c696724ce37.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/4d933e35e0fe79867d3aa6c46db28804804efddf5490347cb6c2c2879762a157.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/4d96f071168af43965e0fab2ded658fa0a15b8d9ca03789a5ef9c5c16a4e3cee.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/4fd24800544873512b540544ae54601240a5bfefd9105ff647855c64f8ad828f.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/52a6136cb2dbbf9c51d461724d9b283ebe74a73fb19d5df7ba8e13c42bd7174d.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/5c408307c982f07f9c09c85c98212d1b1c22548a9194c69548750a3016b91b88.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/663adbcb60b942e9cf094c8d9fabe57517f5e5e6e722d28b4948a40b7445a3b8.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/666bb2e1b250dc731311a7e4880886177885dfa768508d2ed63e02630cc78725.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/71493aadd34d568fbe605cacaba220aa69bd09273251ee4ba27930f8d01fccd8.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/8b071db2a5a9ef68dc14d5f606540bd23d9785e365a11997c510656764d2dccf.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/8b246d79cd3f6fc78f11777e6a6acca6a2c5d4ecce7f2dd4dcf9a48126440d3c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/8d33354bd2100c8602afc3341f131a88cc36aaeecd5a4b365ed038514708e350.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/95c8cc049fadd23085d8623e1d43d70b614a4e52217676f186a417dca894aa09.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a4d72d3b806ba061919115f0c513738726872e3c79cf258f007519d3f91d1a16.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a8070ce4b780b4a044218536e158a9e7192a6c80ff593fdc126fee43f46296b5.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/b4e4fccaf8f2489a29081f0bf3b191656bd452fb3c8b5e3c6d92d94f680964d5.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/b53eb7c7967f6577bd3e678293c44204fb03ffa7fdc1dd59d3099015c68f6f7f.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/bd04667474fd9cac2983b03725c218908a6cc0ee9128a5953cd00d26d4877f60.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c230a727b8f68f0e62616a75e14a3d33026dc4164f2e325a9a8072d733850edb.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d06af85a84e5c5a29d3acf2dbb5b30c0cf75c8aec4ab5f975e6096f944ee4324.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d44a6eb6106e09082b691a315c9f6ce17fcfe25beb7547810e0d271ce3301cd2.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d5e150bff31715977d8f537c970f06d4fe3de9909d7e8342244a83a9f6447121.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/de36e5c08fd524e341d664883dda6cb1745acc852a4f1b011a35a0b4615f72fa.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f037ffef5992af0892d6d991053c1dace364cd39a3f11f1a41f92776e8a59459.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f23ab356a8603d4a2aaa74388c2f381675c207d37c4d1c832df922e9655c9a6b.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f7c23b0922f4087b9e2e3e46f15c946b772daa46c28c3a12426212ecaf481deb.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f95a8bd358eeb55fa2f49f1224cc2f4f36006359856744ff09ae4bb295f59ec1.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/fe5db70859503a54cbe71f9637e5a314cda88b1f0eecb733b6e6f837697db1ef.json +1 -0
- xinference/web/ui/node_modules/.package-lock.json +36 -0
- xinference/web/ui/node_modules/@types/cookie/package.json +30 -0
- xinference/web/ui/node_modules/@types/hoist-non-react-statics/package.json +33 -0
- xinference/web/ui/node_modules/react-cookie/package.json +55 -0
- xinference/web/ui/node_modules/universal-cookie/package.json +48 -0
- xinference/web/ui/package-lock.json +37 -0
- xinference/web/ui/package.json +3 -2
- {xinference-0.7.5.dist-info → xinference-0.8.1.dist-info}/METADATA +17 -6
- {xinference-0.7.5.dist-info → xinference-0.8.1.dist-info}/RECORD +101 -66
- xinference/web/ui/build/static/js/main.236e72e7.js +0 -3
- xinference/web/ui/build/static/js/main.236e72e7.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/0cccfbe5d963b8e31eb679f9d9677392839cedd04aa2956ac6b33cf19599d597.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/0f3b6cc71b7c83bdc85aa4835927aeb86af2ce0d2ac241917ecfbf90f75c6d27.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/2f651cf60b1bde50c0601c7110f77dd44819fb6e2501ff748a631724d91445d4.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/42bb623f337ad08ed076484185726e072ca52bb88e373d72c7b052db4c273342.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/57af83639c604bd3362d0f03f7505e81c6f67ff77bee7c6bb31f6e5523eba185.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/667753ce39ce1d4bcbf9a5f1a103d653be1d19d42f4e1fbaceb9b507679a52c7.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/66ed1bd4c06748c1b176a625c25c856997edc787856c73162f82f2b465c5d956.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/78f2521da2e2a98b075a2666cb782c7e2c019cd3c72199eecd5901c82d8655df.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/8d2b0b3c6988d1894694dcbbe708ef91cfe62d62dac317031f09915ced637953.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/9427ae7f1e94ae8dcd2333fb361e381f4054fde07394fe5448658e3417368476.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/bcee2b4e76b07620f9087989eb86d43c645ba3c7a74132cf926260af1164af0e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/cc2ddd02ccc1dad1a2737ac247c79e6f6ed2c7836c6b68e511e3048f666b64af.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/d2e8e6665a7efc832b43907dadf4e3c896a59eaf8129f9a520882466c8f2e489.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/d8a42e9df7157de9f28eecefdf178fd113bf2280d28471b6e32a8a45276042df.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/e26750d9556e9741912333349e4da454c53dbfddbfc6002ab49518dcf02af745.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/ef42ec014d7bc373b874b2a1ff0dcd785490f125e913698bc049b0bd778e4d66.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/fe3eb4d76c79ca98833f686d642224eeeb94cc83ad14300d281623796d087f0a.json +0 -1
- {xinference-0.7.5.dist-info → xinference-0.8.1.dist-info}/LICENSE +0 -0
- {xinference-0.7.5.dist-info → xinference-0.8.1.dist-info}/WHEEL +0 -0
- {xinference-0.7.5.dist-info → xinference-0.8.1.dist-info}/entry_points.txt +0 -0
- {xinference-0.7.5.dist-info → xinference-0.8.1.dist-info}/top_level.txt +0 -0
|
@@ -5,7 +5,8 @@
|
|
|
5
5
|
"max_tokens": 512,
|
|
6
6
|
"language": ["en"],
|
|
7
7
|
"model_id": "Xorbits/bge-large-en",
|
|
8
|
-
"model_revision": "v0.0.1"
|
|
8
|
+
"model_revision": "v0.0.1",
|
|
9
|
+
"model_hub": "modelscope"
|
|
9
10
|
},
|
|
10
11
|
{
|
|
11
12
|
"model_name": "bge-base-en",
|
|
@@ -13,7 +14,8 @@
|
|
|
13
14
|
"max_tokens": 512,
|
|
14
15
|
"language": ["en"],
|
|
15
16
|
"model_id": "Xorbits/bge-base-en",
|
|
16
|
-
"model_revision": "v0.0.1"
|
|
17
|
+
"model_revision": "v0.0.1",
|
|
18
|
+
"model_hub": "modelscope"
|
|
17
19
|
},
|
|
18
20
|
{
|
|
19
21
|
"model_name": "gte-large",
|
|
@@ -21,7 +23,8 @@
|
|
|
21
23
|
"max_tokens": 512,
|
|
22
24
|
"language": ["en"],
|
|
23
25
|
"model_id": "Xorbits/gte-large",
|
|
24
|
-
"model_revision": "v0.0.1"
|
|
26
|
+
"model_revision": "v0.0.1",
|
|
27
|
+
"model_hub": "modelscope"
|
|
25
28
|
},
|
|
26
29
|
{
|
|
27
30
|
"model_name": "gte-base",
|
|
@@ -29,7 +32,8 @@
|
|
|
29
32
|
"max_tokens": 512,
|
|
30
33
|
"language": ["en"],
|
|
31
34
|
"model_id": "Xorbits/gte-base",
|
|
32
|
-
"model_revision": "v0.0.1"
|
|
35
|
+
"model_revision": "v0.0.1",
|
|
36
|
+
"model_hub": "modelscope"
|
|
33
37
|
},
|
|
34
38
|
{
|
|
35
39
|
"model_name": "e5-large-v2",
|
|
@@ -37,7 +41,8 @@
|
|
|
37
41
|
"max_tokens": 512,
|
|
38
42
|
"language": ["en"],
|
|
39
43
|
"model_id": "Xorbits/e5-large-v2",
|
|
40
|
-
"model_revision": "v0.0.1"
|
|
44
|
+
"model_revision": "v0.0.1",
|
|
45
|
+
"model_hub": "modelscope"
|
|
41
46
|
},
|
|
42
47
|
{
|
|
43
48
|
"model_name": "bge-large-zh",
|
|
@@ -45,7 +50,8 @@
|
|
|
45
50
|
"max_tokens": 512,
|
|
46
51
|
"language": ["zh"],
|
|
47
52
|
"model_id": "Xorbits/bge-large-zh",
|
|
48
|
-
"model_revision": "v0.0.1"
|
|
53
|
+
"model_revision": "v0.0.1",
|
|
54
|
+
"model_hub": "modelscope"
|
|
49
55
|
},
|
|
50
56
|
{
|
|
51
57
|
"model_name": "bge-large-zh-noinstruct",
|
|
@@ -53,7 +59,8 @@
|
|
|
53
59
|
"max_tokens": 512,
|
|
54
60
|
"language": ["zh"],
|
|
55
61
|
"model_id": "Xorbits/bge-large-zh-noinstruct",
|
|
56
|
-
"model_revision": "v0.0.1"
|
|
62
|
+
"model_revision": "v0.0.1",
|
|
63
|
+
"model_hub": "modelscope"
|
|
57
64
|
},
|
|
58
65
|
{
|
|
59
66
|
"model_name": "bge-base-zh",
|
|
@@ -61,7 +68,8 @@
|
|
|
61
68
|
"max_tokens": 512,
|
|
62
69
|
"language": ["zh"],
|
|
63
70
|
"model_id": "Xorbits/bge-base-zh",
|
|
64
|
-
"model_revision": "v0.0.2"
|
|
71
|
+
"model_revision": "v0.0.2",
|
|
72
|
+
"model_hub": "modelscope"
|
|
65
73
|
},
|
|
66
74
|
{
|
|
67
75
|
"model_name": "multilingual-e5-large",
|
|
@@ -69,7 +77,8 @@
|
|
|
69
77
|
"max_tokens": 514,
|
|
70
78
|
"language": ["zh"],
|
|
71
79
|
"model_id": "Xorbits/multilingual-e5-large",
|
|
72
|
-
"model_revision": "v0.0.1"
|
|
80
|
+
"model_revision": "v0.0.1",
|
|
81
|
+
"model_hub": "modelscope"
|
|
73
82
|
},
|
|
74
83
|
{
|
|
75
84
|
"model_name": "bge-small-zh",
|
|
@@ -77,7 +86,8 @@
|
|
|
77
86
|
"max_tokens": 512,
|
|
78
87
|
"language": ["zh"],
|
|
79
88
|
"model_id": "Xorbits/bge-small-zh",
|
|
80
|
-
"model_revision": "v0.0.1"
|
|
89
|
+
"model_revision": "v0.0.1",
|
|
90
|
+
"model_hub": "modelscope"
|
|
81
91
|
},
|
|
82
92
|
{
|
|
83
93
|
"model_name": "bge-small-zh-v1.5",
|
|
@@ -85,7 +95,8 @@
|
|
|
85
95
|
"max_tokens": 512,
|
|
86
96
|
"language": ["zh"],
|
|
87
97
|
"model_id": "Xorbits/bge-small-zh-v1.5",
|
|
88
|
-
"model_revision": "v0.0.2"
|
|
98
|
+
"model_revision": "v0.0.2",
|
|
99
|
+
"model_hub": "modelscope"
|
|
89
100
|
},
|
|
90
101
|
{
|
|
91
102
|
"model_name": "bge-base-zh-v1.5",
|
|
@@ -93,7 +104,8 @@
|
|
|
93
104
|
"max_tokens": 512,
|
|
94
105
|
"language": ["zh"],
|
|
95
106
|
"model_id": "Xorbits/bge-base-zh-v1.5",
|
|
96
|
-
"model_revision": "v0.0.1"
|
|
107
|
+
"model_revision": "v0.0.1",
|
|
108
|
+
"model_hub": "modelscope"
|
|
97
109
|
},
|
|
98
110
|
{
|
|
99
111
|
"model_name": "bge-large-zh-v1.5",
|
|
@@ -101,7 +113,8 @@
|
|
|
101
113
|
"max_tokens": 512,
|
|
102
114
|
"language": ["zh"],
|
|
103
115
|
"model_id": "Xorbits/bge-large-zh-v1.5",
|
|
104
|
-
"model_revision": "v0.0.1"
|
|
116
|
+
"model_revision": "v0.0.1",
|
|
117
|
+
"model_hub": "modelscope"
|
|
105
118
|
},
|
|
106
119
|
{
|
|
107
120
|
"model_name": "bge-small-en-v1.5",
|
|
@@ -109,7 +122,8 @@
|
|
|
109
122
|
"max_tokens": 512,
|
|
110
123
|
"language": ["en"],
|
|
111
124
|
"model_id": "Xorbits/bge-small-en-v1.5",
|
|
112
|
-
"model_revision": "v0.0.2"
|
|
125
|
+
"model_revision": "v0.0.2",
|
|
126
|
+
"model_hub": "modelscope"
|
|
113
127
|
},
|
|
114
128
|
{
|
|
115
129
|
"model_name": "bge-base-en-v1.5",
|
|
@@ -117,7 +131,8 @@
|
|
|
117
131
|
"max_tokens": 512,
|
|
118
132
|
"language": ["en"],
|
|
119
133
|
"model_id": "Xorbits/bge-base-en-v1.5",
|
|
120
|
-
"model_revision": "v0.0.1"
|
|
134
|
+
"model_revision": "v0.0.1",
|
|
135
|
+
"model_hub": "modelscope"
|
|
121
136
|
},
|
|
122
137
|
{
|
|
123
138
|
"model_name": "bge-large-en-v1.5",
|
|
@@ -125,7 +140,8 @@
|
|
|
125
140
|
"max_tokens": 512,
|
|
126
141
|
"language": ["en"],
|
|
127
142
|
"model_id": "Xorbits/bge-large-en-v1.5",
|
|
128
|
-
"model_revision": "v0.0.1"
|
|
143
|
+
"model_revision": "v0.0.1",
|
|
144
|
+
"model_hub": "modelscope"
|
|
129
145
|
},
|
|
130
146
|
{
|
|
131
147
|
"model_name": "jina-embeddings-v2-small-en",
|
|
@@ -133,7 +149,8 @@
|
|
|
133
149
|
"max_tokens": 8192,
|
|
134
150
|
"language": ["en"],
|
|
135
151
|
"model_id": "Xorbits/jina-embeddings-v2-small-en",
|
|
136
|
-
"model_revision": "v0.0.1"
|
|
152
|
+
"model_revision": "v0.0.1",
|
|
153
|
+
"model_hub": "modelscope"
|
|
137
154
|
},
|
|
138
155
|
{
|
|
139
156
|
"model_name": "jina-embeddings-v2-base-en",
|
|
@@ -141,6 +158,31 @@
|
|
|
141
158
|
"max_tokens": 8192,
|
|
142
159
|
"language": ["en"],
|
|
143
160
|
"model_id": "Xorbits/jina-embeddings-v2-base-en",
|
|
144
|
-
"model_revision": "v0.0.1"
|
|
161
|
+
"model_revision": "v0.0.1",
|
|
162
|
+
"model_hub": "modelscope"
|
|
163
|
+
},
|
|
164
|
+
{
|
|
165
|
+
"model_name": "text2vec-large-chinese",
|
|
166
|
+
"dimensions": 1024,
|
|
167
|
+
"max_tokens": 256,
|
|
168
|
+
"language": ["zh"],
|
|
169
|
+
"model_id": "Jerry0/text2vec-large-chinese",
|
|
170
|
+
"model_hub": "modelscope"
|
|
171
|
+
},
|
|
172
|
+
{
|
|
173
|
+
"model_name": "text2vec-base-chinese",
|
|
174
|
+
"dimensions": 768,
|
|
175
|
+
"max_tokens": 128,
|
|
176
|
+
"language": ["zh"],
|
|
177
|
+
"model_id": "Jerry0/text2vec-base-chinese",
|
|
178
|
+
"model_hub": "modelscope"
|
|
179
|
+
},
|
|
180
|
+
{
|
|
181
|
+
"model_name": "text2vec-base-chinese-paraphrase",
|
|
182
|
+
"dimensions": 768,
|
|
183
|
+
"max_tokens": 256,
|
|
184
|
+
"language": ["zh"],
|
|
185
|
+
"model_id": "mwei23/text2vec-base-chinese-paraphrase",
|
|
186
|
+
"model_hub": "modelscope"
|
|
145
187
|
}
|
|
146
188
|
]
|
|
@@ -15,6 +15,7 @@
|
|
|
15
15
|
import base64
|
|
16
16
|
import logging
|
|
17
17
|
import os
|
|
18
|
+
import re
|
|
18
19
|
import time
|
|
19
20
|
import uuid
|
|
20
21
|
from concurrent.futures import ThreadPoolExecutor
|
|
@@ -101,7 +102,7 @@ class DiffusionModel:
|
|
|
101
102
|
def _gen_base64_image(_img):
|
|
102
103
|
buffered = BytesIO()
|
|
103
104
|
_img.save(buffered, format="jpeg")
|
|
104
|
-
return base64.b64encode(buffered.getvalue())
|
|
105
|
+
return base64.b64encode(buffered.getvalue()).decode()
|
|
105
106
|
|
|
106
107
|
with ThreadPoolExecutor() as executor:
|
|
107
108
|
results = list(map(partial(executor.submit, _gen_base64_image), images))
|
|
@@ -120,7 +121,7 @@ class DiffusionModel:
|
|
|
120
121
|
):
|
|
121
122
|
# References:
|
|
122
123
|
# https://huggingface.co/docs/diffusers/main/en/api/pipelines/controlnet_sdxl
|
|
123
|
-
width, height = map(int,
|
|
124
|
+
width, height = map(int, re.split(r"[^\d]+", size))
|
|
124
125
|
return self._call_model(
|
|
125
126
|
prompt=prompt,
|
|
126
127
|
height=height,
|
|
@@ -140,7 +141,7 @@ class DiffusionModel:
|
|
|
140
141
|
response_format: str = "url",
|
|
141
142
|
**kwargs,
|
|
142
143
|
):
|
|
143
|
-
width, height = map(int,
|
|
144
|
+
width, height = map(int, re.split(r"[^\d]+", size))
|
|
144
145
|
return self._call_model(
|
|
145
146
|
image=image,
|
|
146
147
|
prompt=prompt,
|
xinference/model/llm/__init__.py
CHANGED
|
@@ -21,6 +21,7 @@ from .llm_family import (
|
|
|
21
21
|
BUILTIN_LLM_FAMILIES,
|
|
22
22
|
BUILTIN_LLM_MODEL_CHAT_FAMILIES,
|
|
23
23
|
BUILTIN_LLM_MODEL_GENERATE_FAMILIES,
|
|
24
|
+
BUILTIN_LLM_MODEL_TOOL_CALL_FAMILIES,
|
|
24
25
|
BUILTIN_LLM_PROMPT_STYLE,
|
|
25
26
|
BUILTIN_MODELSCOPE_LLM_FAMILIES,
|
|
26
27
|
LLM_CLASSES,
|
|
@@ -47,6 +48,7 @@ def _install():
|
|
|
47
48
|
from .pytorch.chatglm import ChatglmPytorchChatModel
|
|
48
49
|
from .pytorch.core import PytorchChatModel, PytorchModel
|
|
49
50
|
from .pytorch.falcon import FalconPytorchChatModel, FalconPytorchModel
|
|
51
|
+
from .pytorch.internlm2 import Internlm2PytorchChatModel
|
|
50
52
|
from .pytorch.llama_2 import LlamaPytorchChatModel, LlamaPytorchModel
|
|
51
53
|
from .pytorch.vicuna import VicunaPytorchChatModel
|
|
52
54
|
from .vllm.core import VLLMChatModel, VLLMModel
|
|
@@ -79,6 +81,7 @@ def _install():
|
|
|
79
81
|
LlamaPytorchChatModel,
|
|
80
82
|
PytorchChatModel,
|
|
81
83
|
FalconPytorchModel,
|
|
84
|
+
Internlm2PytorchChatModel,
|
|
82
85
|
PytorchModel,
|
|
83
86
|
]
|
|
84
87
|
)
|
|
@@ -102,6 +105,8 @@ def _install():
|
|
|
102
105
|
BUILTIN_LLM_MODEL_CHAT_FAMILIES.add(model_spec.model_name)
|
|
103
106
|
else:
|
|
104
107
|
BUILTIN_LLM_MODEL_GENERATE_FAMILIES.add(model_spec.model_name)
|
|
108
|
+
if "tool_call" in model_spec.model_ability:
|
|
109
|
+
BUILTIN_LLM_MODEL_TOOL_CALL_FAMILIES.add(model_spec.model_name)
|
|
105
110
|
|
|
106
111
|
modelscope_json_path = os.path.join(
|
|
107
112
|
os.path.dirname(os.path.abspath(__file__)), "llm_family_modelscope.json"
|
|
@@ -123,6 +128,8 @@ def _install():
|
|
|
123
128
|
BUILTIN_LLM_MODEL_CHAT_FAMILIES.add(model_spec.model_name)
|
|
124
129
|
else:
|
|
125
130
|
BUILTIN_LLM_MODEL_GENERATE_FAMILIES.add(model_spec.model_name)
|
|
131
|
+
if "tool_call" in model_spec.model_ability:
|
|
132
|
+
BUILTIN_LLM_MODEL_TOOL_CALL_FAMILIES.add(model_spec.model_name)
|
|
126
133
|
|
|
127
134
|
from ...constants import XINFERENCE_MODEL_DIR
|
|
128
135
|
|
|
@@ -306,7 +306,8 @@ class LlamaCppChatModel(LlamaCppModel, ChatModelMixin):
|
|
|
306
306
|
|
|
307
307
|
generate_config = self._sanitize_generate_config(generate_config)
|
|
308
308
|
# TODO(codingl2k1): qwen hacky to set stop for function call.
|
|
309
|
-
|
|
309
|
+
model_family = self.model_family.model_family or self.model_family.model_name
|
|
310
|
+
if tools and "qwen-chat" == model_family:
|
|
310
311
|
stop = generate_config.get("stop")
|
|
311
312
|
if isinstance(stop, str):
|
|
312
313
|
generate_config["stop"] = [stop, "Observation:"]
|
|
@@ -326,6 +327,6 @@ class LlamaCppChatModel(LlamaCppModel, ChatModelMixin):
|
|
|
326
327
|
assert not isinstance(c, Iterator)
|
|
327
328
|
if tools:
|
|
328
329
|
return self._tool_calls_completion(
|
|
329
|
-
self.model_family
|
|
330
|
+
self.model_family, self.model_uid, c, tools
|
|
330
331
|
)
|
|
331
332
|
return self._to_chat_completion(c)
|
|
@@ -535,7 +535,8 @@
|
|
|
535
535
|
"zh"
|
|
536
536
|
],
|
|
537
537
|
"model_ability": [
|
|
538
|
-
"chat"
|
|
538
|
+
"chat",
|
|
539
|
+
"tools"
|
|
539
540
|
],
|
|
540
541
|
"model_description": "ChatGLM3 is the third generation of ChatGLM, still open-source and trained on Chinese and English data.",
|
|
541
542
|
"model_specs": [
|
|
@@ -609,6 +610,15 @@
|
|
|
609
610
|
"roles": [
|
|
610
611
|
"user",
|
|
611
612
|
"assistant"
|
|
613
|
+
],
|
|
614
|
+
"stop_token_ids": [
|
|
615
|
+
64795,
|
|
616
|
+
64797,
|
|
617
|
+
2
|
|
618
|
+
],
|
|
619
|
+
"stop":[
|
|
620
|
+
"<|user|>",
|
|
621
|
+
"<|observation|>"
|
|
612
622
|
]
|
|
613
623
|
}
|
|
614
624
|
},
|
|
@@ -1139,14 +1149,15 @@
|
|
|
1139
1149
|
},
|
|
1140
1150
|
{
|
|
1141
1151
|
"version": 1,
|
|
1142
|
-
"context_length":
|
|
1152
|
+
"context_length": 32768,
|
|
1143
1153
|
"model_name": "qwen-chat",
|
|
1144
1154
|
"model_lang": [
|
|
1145
1155
|
"en",
|
|
1146
1156
|
"zh"
|
|
1147
1157
|
],
|
|
1148
1158
|
"model_ability": [
|
|
1149
|
-
"chat"
|
|
1159
|
+
"chat",
|
|
1160
|
+
"tools"
|
|
1150
1161
|
],
|
|
1151
1162
|
"model_description": "Qwen-chat is a fine-tuned version of the Qwen LLM trained with alignment techniques, specializing in chatting.",
|
|
1152
1163
|
"model_specs": [
|
|
@@ -1172,6 +1183,8 @@
|
|
|
1172
1183
|
"model_format": "pytorch",
|
|
1173
1184
|
"model_size_in_billions": "1_8",
|
|
1174
1185
|
"quantizations": [
|
|
1186
|
+
"4-bit",
|
|
1187
|
+
"8-bit",
|
|
1175
1188
|
"none"
|
|
1176
1189
|
],
|
|
1177
1190
|
"model_id": "Qwen/Qwen-1_8B-Chat",
|
|
@@ -1181,6 +1194,8 @@
|
|
|
1181
1194
|
"model_format": "pytorch",
|
|
1182
1195
|
"model_size_in_billions": 7,
|
|
1183
1196
|
"quantizations": [
|
|
1197
|
+
"4-bit",
|
|
1198
|
+
"8-bit",
|
|
1184
1199
|
"none"
|
|
1185
1200
|
],
|
|
1186
1201
|
"model_id": "Qwen/Qwen-7B-Chat",
|
|
@@ -1190,6 +1205,8 @@
|
|
|
1190
1205
|
"model_format": "pytorch",
|
|
1191
1206
|
"model_size_in_billions": 14,
|
|
1192
1207
|
"quantizations": [
|
|
1208
|
+
"4-bit",
|
|
1209
|
+
"8-bit",
|
|
1193
1210
|
"none"
|
|
1194
1211
|
],
|
|
1195
1212
|
"model_id": "Qwen/Qwen-14B-Chat",
|
|
@@ -1199,6 +1216,8 @@
|
|
|
1199
1216
|
"model_format": "pytorch",
|
|
1200
1217
|
"model_size_in_billions": 72,
|
|
1201
1218
|
"quantizations": [
|
|
1219
|
+
"4-bit",
|
|
1220
|
+
"8-bit",
|
|
1202
1221
|
"none"
|
|
1203
1222
|
],
|
|
1204
1223
|
"model_id": "Qwen/Qwen-72B-Chat",
|
|
@@ -1213,6 +1232,15 @@
|
|
|
1213
1232
|
],
|
|
1214
1233
|
"model_id": "Qwen/Qwen-7B-Chat-{quantization}"
|
|
1215
1234
|
},
|
|
1235
|
+
{
|
|
1236
|
+
"model_format": "gptq",
|
|
1237
|
+
"model_size_in_billions": "1_8",
|
|
1238
|
+
"quantizations": [
|
|
1239
|
+
"Int4",
|
|
1240
|
+
"Int8"
|
|
1241
|
+
],
|
|
1242
|
+
"model_id": "Qwen/Qwen-1_8B-Chat-{quantization}"
|
|
1243
|
+
},
|
|
1216
1244
|
{
|
|
1217
1245
|
"model_format": "gptq",
|
|
1218
1246
|
"model_size_in_billions": 14,
|
|
@@ -2468,6 +2496,14 @@
|
|
|
2468
2496
|
],
|
|
2469
2497
|
"model_description": "The Yi series models are large language models trained from scratch by developers at 01.AI.",
|
|
2470
2498
|
"model_specs": [
|
|
2499
|
+
{
|
|
2500
|
+
"model_format": "gptq",
|
|
2501
|
+
"model_size_in_billions": 34,
|
|
2502
|
+
"quantizations": [
|
|
2503
|
+
"8bits"
|
|
2504
|
+
],
|
|
2505
|
+
"model_id": "01-ai/Yi-34B-Chat-{quantization}"
|
|
2506
|
+
},
|
|
2471
2507
|
{
|
|
2472
2508
|
"model_format": "pytorch",
|
|
2473
2509
|
"model_size_in_billions": 34,
|
|
@@ -3127,5 +3163,53 @@
|
|
|
3127
3163
|
"model_revision": "70d1740208c8ba39f9ba250b22117ec25311ab33"
|
|
3128
3164
|
}
|
|
3129
3165
|
]
|
|
3166
|
+
},
|
|
3167
|
+
{
|
|
3168
|
+
"version": 1,
|
|
3169
|
+
"context_length": 204800,
|
|
3170
|
+
"model_name": "internlm2-chat",
|
|
3171
|
+
"model_lang": [
|
|
3172
|
+
"en",
|
|
3173
|
+
"zh"
|
|
3174
|
+
],
|
|
3175
|
+
"model_ability": [
|
|
3176
|
+
"chat"
|
|
3177
|
+
],
|
|
3178
|
+
"model_description": "The second generation of the InternLM model, InternLM2.",
|
|
3179
|
+
"model_specs": [
|
|
3180
|
+
{
|
|
3181
|
+
"model_format": "pytorch",
|
|
3182
|
+
"model_size_in_billions": 7,
|
|
3183
|
+
"quantizations": [
|
|
3184
|
+
"none"
|
|
3185
|
+
],
|
|
3186
|
+
"model_id": "internlm/internlm2-chat-7b",
|
|
3187
|
+
"model_revision": "5797f79825bab7013932d57e2babaac1b8de6b4f"
|
|
3188
|
+
},
|
|
3189
|
+
{
|
|
3190
|
+
"model_format": "pytorch",
|
|
3191
|
+
"model_size_in_billions": 20,
|
|
3192
|
+
"quantizations": [
|
|
3193
|
+
"none"
|
|
3194
|
+
],
|
|
3195
|
+
"model_id": "internlm/internlm2-chat-20b",
|
|
3196
|
+
"model_revision": "3ccaf3ae82d5d01c0a95eecf40ee550f9c543635"
|
|
3197
|
+
}
|
|
3198
|
+
],
|
|
3199
|
+
"prompt_style": {
|
|
3200
|
+
"style_name": "INTERNLM2",
|
|
3201
|
+
"system_prompt": "You are InternLM (书生·浦语), a helpful, honest, and harmless AI assistant developed by Shanghai AI Laboratory (上海人工智能实验室).",
|
|
3202
|
+
"roles": [
|
|
3203
|
+
"[UNUSED_TOKEN_146]user",
|
|
3204
|
+
"[UNUSED_TOKEN_146]assistant"
|
|
3205
|
+
],
|
|
3206
|
+
"intra_message_sep": "[UNUSED_TOKEN_145]",
|
|
3207
|
+
"stop_token_ids": [
|
|
3208
|
+
92542
|
|
3209
|
+
],
|
|
3210
|
+
"stop": [
|
|
3211
|
+
"[UNUSED_TOKEN_145]"
|
|
3212
|
+
]
|
|
3213
|
+
}
|
|
3130
3214
|
}
|
|
3131
3215
|
]
|
|
@@ -43,6 +43,7 @@ DEFAULT_CONTEXT_LENGTH = 2048
|
|
|
43
43
|
BUILTIN_LLM_PROMPT_STYLE: Dict[str, "PromptStyleV1"] = {}
|
|
44
44
|
BUILTIN_LLM_MODEL_CHAT_FAMILIES: Set[str] = set()
|
|
45
45
|
BUILTIN_LLM_MODEL_GENERATE_FAMILIES: Set[str] = set()
|
|
46
|
+
BUILTIN_LLM_MODEL_TOOL_CALL_FAMILIES: Set[str] = set()
|
|
46
47
|
|
|
47
48
|
|
|
48
49
|
class GgmlLLMSpecV1(BaseModel):
|
|
@@ -50,7 +51,7 @@ class GgmlLLMSpecV1(BaseModel):
|
|
|
50
51
|
# Must in order that `str` first, then `int`
|
|
51
52
|
model_size_in_billions: Union[str, int]
|
|
52
53
|
quantizations: List[str]
|
|
53
|
-
model_id: str
|
|
54
|
+
model_id: Optional[str]
|
|
54
55
|
model_file_name_template: str
|
|
55
56
|
model_hub: str = "huggingface"
|
|
56
57
|
model_uri: Optional[str]
|
|
@@ -73,7 +74,7 @@ class PytorchLLMSpecV1(BaseModel):
|
|
|
73
74
|
# Must in order that `str` first, then `int`
|
|
74
75
|
model_size_in_billions: Union[str, int]
|
|
75
76
|
quantizations: List[str]
|
|
76
|
-
model_id: str
|
|
77
|
+
model_id: Optional[str]
|
|
77
78
|
model_hub: str = "huggingface"
|
|
78
79
|
model_uri: Optional[str]
|
|
79
80
|
model_revision: Optional[str]
|
|
@@ -105,7 +106,7 @@ class LLMFamilyV1(BaseModel):
|
|
|
105
106
|
context_length: Optional[int] = DEFAULT_CONTEXT_LENGTH
|
|
106
107
|
model_name: str
|
|
107
108
|
model_lang: List[str]
|
|
108
|
-
model_ability: List[Literal["embed", "generate", "chat"]]
|
|
109
|
+
model_ability: List[Literal["embed", "generate", "chat", "tools"]]
|
|
109
110
|
model_description: Optional[str]
|
|
110
111
|
# reason for not required str here: legacy registration
|
|
111
112
|
model_family: Optional[str]
|
|
@@ -155,6 +156,15 @@ class CustomLLMFamilyV1(LLMFamilyV1):
|
|
|
155
156
|
f"`model_family` for chat model must be `other` or one of the following values: \n"
|
|
156
157
|
f"{', '.join(list(BUILTIN_LLM_MODEL_CHAT_FAMILIES))}"
|
|
157
158
|
)
|
|
159
|
+
if (
|
|
160
|
+
llm_spec.model_family != "other"
|
|
161
|
+
and "tool_call" in llm_spec.model_ability
|
|
162
|
+
and llm_spec.model_family not in BUILTIN_LLM_MODEL_TOOL_CALL_FAMILIES
|
|
163
|
+
):
|
|
164
|
+
raise ValueError(
|
|
165
|
+
f"`model_family` for tool call model must be `other` or one of the following values: \n"
|
|
166
|
+
f"{', '.join(list(BUILTIN_LLM_MODEL_TOOL_CALL_FAMILIES))}"
|
|
167
|
+
)
|
|
158
168
|
if (
|
|
159
169
|
llm_spec.model_family != "other"
|
|
160
170
|
and "chat" not in llm_spec.model_ability
|
|
@@ -413,7 +423,7 @@ def _get_cache_dir(
|
|
|
413
423
|
# quantization a dedicated cache dir.
|
|
414
424
|
quant_suffix = ""
|
|
415
425
|
for q in llm_spec.quantizations:
|
|
416
|
-
if q in llm_spec.model_id:
|
|
426
|
+
if llm_spec.model_id and q in llm_spec.model_id:
|
|
417
427
|
quant_suffix = q
|
|
418
428
|
break
|
|
419
429
|
cache_dir_name = (
|
|
@@ -726,7 +736,7 @@ def match_llm(
|
|
|
726
736
|
def _apply_format_to_model_id(spec: LLMSpecV1, q: str) -> LLMSpecV1:
|
|
727
737
|
# Different quantized versions of some models use different model ids,
|
|
728
738
|
# Here we check the `{}` in the model id to format the id.
|
|
729
|
-
if "{" in spec.model_id:
|
|
739
|
+
if spec.model_id and "{" in spec.model_id:
|
|
730
740
|
spec.model_id = spec.model_id.format(quantization=q)
|
|
731
741
|
return spec
|
|
732
742
|
|
|
@@ -297,7 +297,8 @@
|
|
|
297
297
|
"zh"
|
|
298
298
|
],
|
|
299
299
|
"model_ability": [
|
|
300
|
-
"chat"
|
|
300
|
+
"chat",
|
|
301
|
+
"tools"
|
|
301
302
|
],
|
|
302
303
|
"model_description": "ChatGLM3 is the third generation of ChatGLM, still open-source and trained on Chinese and English data.",
|
|
303
304
|
"model_specs": [
|
|
@@ -375,6 +376,15 @@
|
|
|
375
376
|
"roles": [
|
|
376
377
|
"user",
|
|
377
378
|
"assistant"
|
|
379
|
+
],
|
|
380
|
+
"stop_token_ids": [
|
|
381
|
+
64795,
|
|
382
|
+
64797,
|
|
383
|
+
2
|
|
384
|
+
],
|
|
385
|
+
"stop":[
|
|
386
|
+
"<|user|>",
|
|
387
|
+
"<|observation|>"
|
|
378
388
|
]
|
|
379
389
|
}
|
|
380
390
|
},
|
|
@@ -1108,6 +1118,15 @@
|
|
|
1108
1118
|
],
|
|
1109
1119
|
"model_description": "The Yi series models are large language models trained from scratch by developers at 01.AI.",
|
|
1110
1120
|
"model_specs": [
|
|
1121
|
+
{
|
|
1122
|
+
"model_format": "gptq",
|
|
1123
|
+
"model_size_in_billions": 34,
|
|
1124
|
+
"quantizations": [
|
|
1125
|
+
"8bits"
|
|
1126
|
+
],
|
|
1127
|
+
"model_id": "01ai/Yi-34B-Chat-{quantization}",
|
|
1128
|
+
"model_revision": "master"
|
|
1129
|
+
},
|
|
1111
1130
|
{
|
|
1112
1131
|
"model_format": "pytorch",
|
|
1113
1132
|
"model_size_in_billions": 34,
|
|
@@ -1452,14 +1471,15 @@
|
|
|
1452
1471
|
},
|
|
1453
1472
|
{
|
|
1454
1473
|
"version": 1,
|
|
1455
|
-
"context_length":
|
|
1474
|
+
"context_length": 32768,
|
|
1456
1475
|
"model_name": "qwen-chat",
|
|
1457
1476
|
"model_lang": [
|
|
1458
1477
|
"en",
|
|
1459
1478
|
"zh"
|
|
1460
1479
|
],
|
|
1461
1480
|
"model_ability": [
|
|
1462
|
-
"chat"
|
|
1481
|
+
"chat",
|
|
1482
|
+
"tools"
|
|
1463
1483
|
],
|
|
1464
1484
|
"model_description": "Qwen-chat is a fine-tuned version of the Qwen LLM trained with alignment techniques, specializing in chatting.",
|
|
1465
1485
|
"model_specs": [
|
|
@@ -1489,6 +1509,8 @@
|
|
|
1489
1509
|
"model_format": "pytorch",
|
|
1490
1510
|
"model_size_in_billions": "1_8",
|
|
1491
1511
|
"quantizations": [
|
|
1512
|
+
"4-bit",
|
|
1513
|
+
"8-bit",
|
|
1492
1514
|
"none"
|
|
1493
1515
|
],
|
|
1494
1516
|
"model_hub": "modelscope",
|
|
@@ -1499,6 +1521,8 @@
|
|
|
1499
1521
|
"model_format": "pytorch",
|
|
1500
1522
|
"model_size_in_billions": 7,
|
|
1501
1523
|
"quantizations": [
|
|
1524
|
+
"4-bit",
|
|
1525
|
+
"8-bit",
|
|
1502
1526
|
"none"
|
|
1503
1527
|
],
|
|
1504
1528
|
"model_hub": "modelscope",
|
|
@@ -1509,6 +1533,8 @@
|
|
|
1509
1533
|
"model_format": "pytorch",
|
|
1510
1534
|
"model_size_in_billions": 72,
|
|
1511
1535
|
"quantizations": [
|
|
1536
|
+
"4-bit",
|
|
1537
|
+
"8-bit",
|
|
1512
1538
|
"none"
|
|
1513
1539
|
],
|
|
1514
1540
|
"model_hub": "modelscope",
|
|
@@ -1519,12 +1545,25 @@
|
|
|
1519
1545
|
"model_format": "pytorch",
|
|
1520
1546
|
"model_size_in_billions": 14,
|
|
1521
1547
|
"quantizations": [
|
|
1548
|
+
"4-bit",
|
|
1549
|
+
"8-bit",
|
|
1522
1550
|
"none"
|
|
1523
1551
|
],
|
|
1524
1552
|
"model_id": "qwen/Qwen-14B-Chat",
|
|
1525
1553
|
"model_hub": "modelscope",
|
|
1526
1554
|
"model_revision": "v1.0.7"
|
|
1527
1555
|
},
|
|
1556
|
+
{
|
|
1557
|
+
"model_format": "gptq",
|
|
1558
|
+
"model_size_in_billions": "1_8",
|
|
1559
|
+
"quantizations": [
|
|
1560
|
+
"Int4",
|
|
1561
|
+
"Int8"
|
|
1562
|
+
],
|
|
1563
|
+
"model_id": "qwen/Qwen-1_8B-Chat-{quantization}",
|
|
1564
|
+
"model_hub": "modelscope",
|
|
1565
|
+
"model_revision": "master"
|
|
1566
|
+
},
|
|
1528
1567
|
{
|
|
1529
1568
|
"model_format": "gptq",
|
|
1530
1569
|
"model_size_in_billions": 7,
|
|
@@ -1739,5 +1778,55 @@
|
|
|
1739
1778
|
"model_revision": "master"
|
|
1740
1779
|
}
|
|
1741
1780
|
]
|
|
1781
|
+
},
|
|
1782
|
+
{
|
|
1783
|
+
"version": 1,
|
|
1784
|
+
"context_length": 204800,
|
|
1785
|
+
"model_name": "internlm2-chat",
|
|
1786
|
+
"model_lang": [
|
|
1787
|
+
"en",
|
|
1788
|
+
"zh"
|
|
1789
|
+
],
|
|
1790
|
+
"model_ability": [
|
|
1791
|
+
"chat"
|
|
1792
|
+
],
|
|
1793
|
+
"model_description": "The second generation of the InternLM model, InternLM2.",
|
|
1794
|
+
"model_specs": [
|
|
1795
|
+
{
|
|
1796
|
+
"model_format": "pytorch",
|
|
1797
|
+
"model_size_in_billions": 7,
|
|
1798
|
+
"quantizations": [
|
|
1799
|
+
"none"
|
|
1800
|
+
],
|
|
1801
|
+
"model_id": "Shanghai_AI_Laboratory/internlm2-chat-7b",
|
|
1802
|
+
"model_hub": "modelscope",
|
|
1803
|
+
"model_revision": "master"
|
|
1804
|
+
},
|
|
1805
|
+
{
|
|
1806
|
+
"model_format": "pytorch",
|
|
1807
|
+
"model_size_in_billions": 20,
|
|
1808
|
+
"quantizations": [
|
|
1809
|
+
"none"
|
|
1810
|
+
],
|
|
1811
|
+
"model_id": "Shanghai_AI_Laboratory/internlm2-chat-20b",
|
|
1812
|
+
"model_hub": "modelscope",
|
|
1813
|
+
"model_revision": "master"
|
|
1814
|
+
}
|
|
1815
|
+
],
|
|
1816
|
+
"prompt_style": {
|
|
1817
|
+
"style_name": "INTERNLM2",
|
|
1818
|
+
"system_prompt": "You are InternLM (书生·浦语), a helpful, honest, and harmless AI assistant developed by Shanghai AI Laboratory (上海人工智能实验室).",
|
|
1819
|
+
"roles": [
|
|
1820
|
+
"[UNUSED_TOKEN_146]user",
|
|
1821
|
+
"[UNUSED_TOKEN_146]assistant"
|
|
1822
|
+
],
|
|
1823
|
+
"intra_message_sep": "[UNUSED_TOKEN_145]",
|
|
1824
|
+
"stop_token_ids": [
|
|
1825
|
+
92542
|
|
1826
|
+
],
|
|
1827
|
+
"stop": [
|
|
1828
|
+
"[UNUSED_TOKEN_145]"
|
|
1829
|
+
]
|
|
1830
|
+
}
|
|
1742
1831
|
}
|
|
1743
1832
|
]
|