xinference 0.8.3__py3-none-any.whl → 0.8.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +3 -3
- xinference/core/chat_interface.py +1 -8
- xinference/model/llm/llm_family.json +72 -6
- xinference/model/llm/llm_family_modelscope.json +12 -6
- xinference/model/llm/utils.py +24 -16
- xinference/web/ui/build/asset-manifest.json +3 -3
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/js/{main.15822aeb.js → main.476e35cc.js} +3 -3
- xinference/web/ui/build/static/js/main.476e35cc.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/1d5b806c08ffb55539ee26b5fa9746a373f602354d6e55a0d4379d7e2f903868.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/396f7ce6ae6900bfdb00e369ade8a05045dc1df025610057ff7436d9e58af81c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/7b3a58afd15f913e9ab363c6051e449d885049a283ff905aa4d499124d4c20d1.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/ddf597663270471b31251b2abb36e3fa093efe20489387d996f993d2c61be112.json +1 -0
- {xinference-0.8.3.dist-info → xinference-0.8.4.dist-info}/METADATA +3 -4
- {xinference-0.8.3.dist-info → xinference-0.8.4.dist-info}/RECORD +21 -20
- xinference/web/ui/build/static/js/main.15822aeb.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/139e5e4adf436923107d2b02994c7ff6dba2aac1989e9b6638984f0dfe782c4a.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/193e7ba39e70d4bb2895a5cb317f6f293a5fd02e7e324c02a1eba2f83216419c.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/5c408307c982f07f9c09c85c98212d1b1c22548a9194c69548750a3016b91b88.json +0 -1
- /xinference/web/ui/build/static/js/{main.15822aeb.js.LICENSE.txt → main.476e35cc.js.LICENSE.txt} +0 -0
- {xinference-0.8.3.dist-info → xinference-0.8.4.dist-info}/LICENSE +0 -0
- {xinference-0.8.3.dist-info → xinference-0.8.4.dist-info}/WHEEL +0 -0
- {xinference-0.8.3.dist-info → xinference-0.8.4.dist-info}/entry_points.txt +0 -0
- {xinference-0.8.3.dist-info → xinference-0.8.4.dist-info}/top_level.txt +0 -0
xinference/_version.py
CHANGED
|
@@ -8,11 +8,11 @@ import json
|
|
|
8
8
|
|
|
9
9
|
version_json = '''
|
|
10
10
|
{
|
|
11
|
-
"date": "2024-02-
|
|
11
|
+
"date": "2024-02-04T17:16:50+0800",
|
|
12
12
|
"dirty": false,
|
|
13
13
|
"error": null,
|
|
14
|
-
"full-revisionid": "
|
|
15
|
-
"version": "0.8.
|
|
14
|
+
"full-revisionid": "1b9b8c805e4e23a4c34fb05a96b819fee3ca8d50",
|
|
15
|
+
"version": "0.8.4"
|
|
16
16
|
}
|
|
17
17
|
''' # END VERSION_JSON
|
|
18
18
|
|
xinference/api/restful_api.py
CHANGED
|
@@ -1156,17 +1156,17 @@ class RESTfulAPI:
|
|
|
1156
1156
|
await self._report_error_event(model_uid, str(e))
|
|
1157
1157
|
raise HTTPException(status_code=500, detail=str(e))
|
|
1158
1158
|
|
|
1159
|
-
|
|
1159
|
+
model_family = desc.get("model_family", "")
|
|
1160
1160
|
function_call_models = ["chatglm3", "gorilla-openfunctions-v1", "qwen-chat"]
|
|
1161
1161
|
|
|
1162
|
-
is_qwen = desc.get("model_format") == "ggmlv3" and "qwen"
|
|
1162
|
+
is_qwen = desc.get("model_format") == "ggmlv3" and "qwen-chat" == model_family
|
|
1163
1163
|
|
|
1164
1164
|
if is_qwen and system_prompt is not None:
|
|
1165
1165
|
raise HTTPException(
|
|
1166
1166
|
status_code=400, detail="Qwen ggml does not have system prompt"
|
|
1167
1167
|
)
|
|
1168
1168
|
|
|
1169
|
-
if not
|
|
1169
|
+
if model_family not in function_call_models:
|
|
1170
1170
|
if body.tools:
|
|
1171
1171
|
raise HTTPException(
|
|
1172
1172
|
status_code=400,
|
|
@@ -98,16 +98,9 @@ class GradioInterface:
|
|
|
98
98
|
return flat_list
|
|
99
99
|
|
|
100
100
|
def to_chat(lst: List[str]) -> List[ChatCompletionMessage]:
|
|
101
|
-
from ..model.llm import BUILTIN_LLM_PROMPT_STYLE
|
|
102
|
-
|
|
103
101
|
res = []
|
|
104
|
-
prompt_style = BUILTIN_LLM_PROMPT_STYLE.get(self.model_name)
|
|
105
|
-
if prompt_style is None:
|
|
106
|
-
roles = ["assistant", "user"]
|
|
107
|
-
else:
|
|
108
|
-
roles = prompt_style.roles
|
|
109
102
|
for i in range(len(lst)):
|
|
110
|
-
role =
|
|
103
|
+
role = "assistant" if i % 2 == 1 else "user"
|
|
111
104
|
res.append(ChatCompletionMessage(role=role, content=lst[i]))
|
|
112
105
|
return res
|
|
113
106
|
|
|
@@ -827,6 +827,66 @@
|
|
|
827
827
|
],
|
|
828
828
|
"model_id": "meta-llama/Llama-2-70b-chat-hf",
|
|
829
829
|
"model_revision": "36d9a7388cc80e5f4b3e9701ca2f250d21a96c30"
|
|
830
|
+
},
|
|
831
|
+
{
|
|
832
|
+
"model_format": "ggufv2",
|
|
833
|
+
"model_size_in_billions": 7,
|
|
834
|
+
"quantizations": [
|
|
835
|
+
"Q2_K",
|
|
836
|
+
"Q3_K_S",
|
|
837
|
+
"Q3_K_M",
|
|
838
|
+
"Q3_K_L",
|
|
839
|
+
"Q4_0",
|
|
840
|
+
"Q4_K_S",
|
|
841
|
+
"Q4_K_M",
|
|
842
|
+
"Q5_0",
|
|
843
|
+
"Q5_K_S",
|
|
844
|
+
"Q5_K_M",
|
|
845
|
+
"Q6_K",
|
|
846
|
+
"Q8_0"
|
|
847
|
+
],
|
|
848
|
+
"model_id": "TheBloke/Llama-2-7B-Chat-GGUF",
|
|
849
|
+
"model_file_name_template": "llama-2-7b-chat.{quantization}.gguf"
|
|
850
|
+
},
|
|
851
|
+
{
|
|
852
|
+
"model_format": "ggufv2",
|
|
853
|
+
"model_size_in_billions": 13,
|
|
854
|
+
"quantizations": [
|
|
855
|
+
"Q2_K",
|
|
856
|
+
"Q3_K_S",
|
|
857
|
+
"Q3_K_M",
|
|
858
|
+
"Q3_K_L",
|
|
859
|
+
"Q4_0",
|
|
860
|
+
"Q4_K_S",
|
|
861
|
+
"Q4_K_M",
|
|
862
|
+
"Q5_0",
|
|
863
|
+
"Q5_K_S",
|
|
864
|
+
"Q5_K_M",
|
|
865
|
+
"Q6_K",
|
|
866
|
+
"Q8_0"
|
|
867
|
+
],
|
|
868
|
+
"model_id": "TheBloke/Llama-2-13B-chat-GGUF",
|
|
869
|
+
"model_file_name_template": "llama-2-13b-chat.{quantization}.gguf"
|
|
870
|
+
},
|
|
871
|
+
{
|
|
872
|
+
"model_format": "ggufv2",
|
|
873
|
+
"model_size_in_billions": 70,
|
|
874
|
+
"quantizations": [
|
|
875
|
+
"Q2_K",
|
|
876
|
+
"Q3_K_S",
|
|
877
|
+
"Q3_K_M",
|
|
878
|
+
"Q3_K_L",
|
|
879
|
+
"Q4_0",
|
|
880
|
+
"Q4_K_S",
|
|
881
|
+
"Q4_K_M",
|
|
882
|
+
"Q5_0",
|
|
883
|
+
"Q5_K_S",
|
|
884
|
+
"Q5_K_M",
|
|
885
|
+
"Q6_K",
|
|
886
|
+
"Q8_0"
|
|
887
|
+
],
|
|
888
|
+
"model_id": "TheBloke/Llama-2-70B-Chat-GGUF",
|
|
889
|
+
"model_file_name_template": "llama-2-70b-chat.{quantization}.gguf"
|
|
830
890
|
}
|
|
831
891
|
],
|
|
832
892
|
"prompt_style": {
|
|
@@ -2131,16 +2191,19 @@
|
|
|
2131
2191
|
}
|
|
2132
2192
|
],
|
|
2133
2193
|
"prompt_style": {
|
|
2134
|
-
"style_name": "
|
|
2135
|
-
"system_prompt": "
|
|
2194
|
+
"style_name": "LLAMA2",
|
|
2195
|
+
"system_prompt": "[INST] ",
|
|
2136
2196
|
"roles": [
|
|
2137
2197
|
"[INST]",
|
|
2138
2198
|
"[/INST]"
|
|
2139
2199
|
],
|
|
2140
2200
|
"intra_message_sep": " ",
|
|
2141
|
-
"inter_message_sep": "
|
|
2201
|
+
"inter_message_sep": "<s>",
|
|
2142
2202
|
"stop_token_ids": [
|
|
2143
2203
|
2
|
|
2204
|
+
],
|
|
2205
|
+
"stop": [
|
|
2206
|
+
"</s>"
|
|
2144
2207
|
]
|
|
2145
2208
|
}
|
|
2146
2209
|
},
|
|
@@ -2189,16 +2252,19 @@
|
|
|
2189
2252
|
}
|
|
2190
2253
|
],
|
|
2191
2254
|
"prompt_style": {
|
|
2192
|
-
"style_name": "
|
|
2193
|
-
"system_prompt": "
|
|
2255
|
+
"style_name": "LLAMA2",
|
|
2256
|
+
"system_prompt": "[INST] ",
|
|
2194
2257
|
"roles": [
|
|
2195
2258
|
"[INST]",
|
|
2196
2259
|
"[/INST]"
|
|
2197
2260
|
],
|
|
2198
2261
|
"intra_message_sep": " ",
|
|
2199
|
-
"inter_message_sep": "
|
|
2262
|
+
"inter_message_sep": "<s>",
|
|
2200
2263
|
"stop_token_ids": [
|
|
2201
2264
|
2
|
|
2265
|
+
],
|
|
2266
|
+
"stop": [
|
|
2267
|
+
"</s>"
|
|
2202
2268
|
]
|
|
2203
2269
|
}
|
|
2204
2270
|
},
|
|
@@ -1224,16 +1224,19 @@
|
|
|
1224
1224
|
}
|
|
1225
1225
|
],
|
|
1226
1226
|
"prompt_style": {
|
|
1227
|
-
"style_name": "
|
|
1228
|
-
"system_prompt": "
|
|
1227
|
+
"style_name": "LLAMA2",
|
|
1228
|
+
"system_prompt": "[INST] ",
|
|
1229
1229
|
"roles": [
|
|
1230
1230
|
"[INST]",
|
|
1231
1231
|
"[/INST]"
|
|
1232
1232
|
],
|
|
1233
1233
|
"intra_message_sep": " ",
|
|
1234
|
-
"inter_message_sep": "
|
|
1234
|
+
"inter_message_sep": "<s>",
|
|
1235
1235
|
"stop_token_ids": [
|
|
1236
1236
|
2
|
|
1237
|
+
],
|
|
1238
|
+
"stop": [
|
|
1239
|
+
"</s>"
|
|
1237
1240
|
]
|
|
1238
1241
|
}
|
|
1239
1242
|
},
|
|
@@ -1272,16 +1275,19 @@
|
|
|
1272
1275
|
}
|
|
1273
1276
|
],
|
|
1274
1277
|
"prompt_style": {
|
|
1275
|
-
"style_name": "
|
|
1276
|
-
"system_prompt": "
|
|
1278
|
+
"style_name": "LLAMA2",
|
|
1279
|
+
"system_prompt": "[INST] ",
|
|
1277
1280
|
"roles": [
|
|
1278
1281
|
"[INST]",
|
|
1279
1282
|
"[/INST]"
|
|
1280
1283
|
],
|
|
1281
1284
|
"intra_message_sep": " ",
|
|
1282
|
-
"inter_message_sep": "
|
|
1285
|
+
"inter_message_sep": "<s>",
|
|
1283
1286
|
"stop_token_ids": [
|
|
1284
1287
|
2
|
|
1288
|
+
],
|
|
1289
|
+
"stop": [
|
|
1290
|
+
"</s>"
|
|
1285
1291
|
]
|
|
1286
1292
|
}
|
|
1287
1293
|
},
|
xinference/model/llm/utils.py
CHANGED
|
@@ -60,10 +60,18 @@ class ChatModelMixin:
|
|
|
60
60
|
ChatCompletionMessage(role=prompt_style.roles[1], content="")
|
|
61
61
|
)
|
|
62
62
|
|
|
63
|
+
def get_role(role_name: str):
|
|
64
|
+
if role_name == "user":
|
|
65
|
+
return prompt_style.roles[0]
|
|
66
|
+
elif role_name == "assistant":
|
|
67
|
+
return prompt_style.roles[1]
|
|
68
|
+
else:
|
|
69
|
+
return role_name
|
|
70
|
+
|
|
63
71
|
if prompt_style.style_name == "ADD_COLON_SINGLE":
|
|
64
72
|
ret = prompt_style.system_prompt + prompt_style.intra_message_sep
|
|
65
73
|
for message in chat_history:
|
|
66
|
-
role = message["role"]
|
|
74
|
+
role = get_role(message["role"])
|
|
67
75
|
content = message["content"]
|
|
68
76
|
if content:
|
|
69
77
|
ret += role + ": " + content + prompt_style.intra_message_sep
|
|
@@ -74,7 +82,7 @@ class ChatModelMixin:
|
|
|
74
82
|
seps = [prompt_style.intra_message_sep, prompt_style.inter_message_sep]
|
|
75
83
|
ret = prompt_style.system_prompt + seps[0]
|
|
76
84
|
for i, message in enumerate(chat_history):
|
|
77
|
-
role = message["role"]
|
|
85
|
+
role = get_role(message["role"])
|
|
78
86
|
content = message["content"]
|
|
79
87
|
if content:
|
|
80
88
|
ret += role + ": " + content + seps[i % 2]
|
|
@@ -85,7 +93,7 @@ class ChatModelMixin:
|
|
|
85
93
|
seps = [prompt_style.intra_message_sep, prompt_style.inter_message_sep]
|
|
86
94
|
ret = prompt_style.system_prompt
|
|
87
95
|
for i, message in enumerate(chat_history):
|
|
88
|
-
role = message["role"]
|
|
96
|
+
role = get_role(message["role"])
|
|
89
97
|
content = message["content"]
|
|
90
98
|
if content:
|
|
91
99
|
ret += role + content + seps[i % 2]
|
|
@@ -96,7 +104,7 @@ class ChatModelMixin:
|
|
|
96
104
|
seps = [prompt_style.intra_message_sep, prompt_style.inter_message_sep]
|
|
97
105
|
ret = ""
|
|
98
106
|
for i, message in enumerate(chat_history):
|
|
99
|
-
role = message["role"]
|
|
107
|
+
role = get_role(message["role"])
|
|
100
108
|
content = message["content"]
|
|
101
109
|
if content:
|
|
102
110
|
if i == 0:
|
|
@@ -109,7 +117,7 @@ class ChatModelMixin:
|
|
|
109
117
|
elif prompt_style.style_name == "FALCON":
|
|
110
118
|
ret = prompt_style.system_prompt
|
|
111
119
|
for message in chat_history:
|
|
112
|
-
role = message["role"]
|
|
120
|
+
role = get_role(message["role"])
|
|
113
121
|
content = message["content"]
|
|
114
122
|
if content:
|
|
115
123
|
ret += (
|
|
@@ -137,7 +145,7 @@ class ChatModelMixin:
|
|
|
137
145
|
else:
|
|
138
146
|
ret = ""
|
|
139
147
|
for i, message in enumerate(chat_history):
|
|
140
|
-
role = message["role"]
|
|
148
|
+
role = get_role(message["role"])
|
|
141
149
|
content = message["content"]
|
|
142
150
|
if i % 2 == 0:
|
|
143
151
|
ret += f"[Round {i // 2 + round_add_n}]{prompt_style.intra_message_sep}"
|
|
@@ -154,7 +162,7 @@ class ChatModelMixin:
|
|
|
154
162
|
)
|
|
155
163
|
|
|
156
164
|
for i, message in enumerate(chat_history):
|
|
157
|
-
role = message["role"]
|
|
165
|
+
role = get_role(message["role"])
|
|
158
166
|
content = message["content"]
|
|
159
167
|
tool_calls = message.get("tool_calls")
|
|
160
168
|
if tool_calls:
|
|
@@ -173,7 +181,7 @@ class ChatModelMixin:
|
|
|
173
181
|
else ""
|
|
174
182
|
)
|
|
175
183
|
for i, message in enumerate(chat_history):
|
|
176
|
-
role = message["role"]
|
|
184
|
+
role = get_role(message["role"])
|
|
177
185
|
content = message["content"]
|
|
178
186
|
if content:
|
|
179
187
|
ret += f"<|{role}|> \n {content}"
|
|
@@ -239,7 +247,7 @@ Begin!"""
|
|
|
239
247
|
|
|
240
248
|
ret = f"<|im_start|>system\n{prompt_style.system_prompt}<|im_end|>"
|
|
241
249
|
for message in chat_history:
|
|
242
|
-
role = message["role"]
|
|
250
|
+
role = get_role(message["role"])
|
|
243
251
|
content = message["content"]
|
|
244
252
|
|
|
245
253
|
ret += prompt_style.intra_message_sep
|
|
@@ -279,7 +287,7 @@ Begin!"""
|
|
|
279
287
|
else prompt_style.system_prompt + prompt_style.intra_message_sep + "\n"
|
|
280
288
|
)
|
|
281
289
|
for message in chat_history:
|
|
282
|
-
role = message["role"]
|
|
290
|
+
role = get_role(message["role"])
|
|
283
291
|
content = message["content"]
|
|
284
292
|
|
|
285
293
|
if content:
|
|
@@ -293,7 +301,7 @@ Begin!"""
|
|
|
293
301
|
for i, message in enumerate(chat_history[:-2]):
|
|
294
302
|
if i % 2 == 0:
|
|
295
303
|
ret += "<s>"
|
|
296
|
-
role = message["role"]
|
|
304
|
+
role = get_role(message["role"])
|
|
297
305
|
content = message["content"]
|
|
298
306
|
ret += role + ":" + str(content) + seps[i % 2]
|
|
299
307
|
if len(ret) == 0:
|
|
@@ -316,7 +324,7 @@ Begin!"""
|
|
|
316
324
|
+ "\n"
|
|
317
325
|
)
|
|
318
326
|
for message in chat_history:
|
|
319
|
-
role = message["role"]
|
|
327
|
+
role = get_role(message["role"])
|
|
320
328
|
content = message["content"]
|
|
321
329
|
|
|
322
330
|
if content:
|
|
@@ -327,7 +335,7 @@ Begin!"""
|
|
|
327
335
|
elif prompt_style.style_name == "ADD_COLON_SINGLE_COT":
|
|
328
336
|
ret = prompt_style.system_prompt + prompt_style.intra_message_sep
|
|
329
337
|
for message in chat_history:
|
|
330
|
-
role = message["role"]
|
|
338
|
+
role = get_role(message["role"])
|
|
331
339
|
content = message["content"]
|
|
332
340
|
if content:
|
|
333
341
|
ret += role + ": " + content + prompt_style.intra_message_sep
|
|
@@ -341,7 +349,7 @@ Begin!"""
|
|
|
341
349
|
seps = [prompt_style.intra_message_sep, prompt_style.inter_message_sep]
|
|
342
350
|
ret = prompt_style.system_prompt
|
|
343
351
|
for i, message in enumerate(chat_history):
|
|
344
|
-
role = message["role"]
|
|
352
|
+
role = get_role(message["role"])
|
|
345
353
|
content = message["content"]
|
|
346
354
|
if content:
|
|
347
355
|
ret += role + ": " + content + seps[i % 2]
|
|
@@ -352,7 +360,7 @@ Begin!"""
|
|
|
352
360
|
sep = prompt_style.inter_message_sep
|
|
353
361
|
ret = prompt_style.system_prompt + sep
|
|
354
362
|
for i, message in enumerate(chat_history):
|
|
355
|
-
role = message["role"]
|
|
363
|
+
role = get_role(message["role"])
|
|
356
364
|
content = message["content"]
|
|
357
365
|
if content:
|
|
358
366
|
ret += role + "\n" + content + sep
|
|
@@ -384,7 +392,7 @@ Begin!"""
|
|
|
384
392
|
ret = "<s>"
|
|
385
393
|
for i, message in enumerate(chat_history):
|
|
386
394
|
content = message["content"]
|
|
387
|
-
role = message["role"]
|
|
395
|
+
role = get_role(message["role"])
|
|
388
396
|
if i % 2 == 0: # Human
|
|
389
397
|
assert content is not None
|
|
390
398
|
ret += role + ": " + content + "\n\n"
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
{
|
|
2
2
|
"files": {
|
|
3
|
-
"main.js": "./static/js/main.
|
|
3
|
+
"main.js": "./static/js/main.476e35cc.js",
|
|
4
4
|
"static/media/icon.webp": "./static/media/icon.4603d52c63041e5dfbfd.webp",
|
|
5
5
|
"index.html": "./index.html",
|
|
6
|
-
"main.
|
|
6
|
+
"main.476e35cc.js.map": "./static/js/main.476e35cc.js.map"
|
|
7
7
|
},
|
|
8
8
|
"entrypoints": [
|
|
9
|
-
"static/js/main.
|
|
9
|
+
"static/js/main.476e35cc.js"
|
|
10
10
|
]
|
|
11
11
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
<!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.svg"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="Web site created using create-react-app"/><link rel="apple-touch-icon" href="./logo192.png"/><link rel="manifest" href="./manifest.json"/><title>Xinference</title><script defer="defer" src="./static/js/main.
|
|
1
|
+
<!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.svg"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="Web site created using create-react-app"/><link rel="apple-touch-icon" href="./logo192.png"/><link rel="manifest" href="./manifest.json"/><title>Xinference</title><script defer="defer" src="./static/js/main.476e35cc.js"></script></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>
|