xinference 0.8.3__py3-none-any.whl → 0.8.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (25) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/restful_api.py +3 -3
  3. xinference/core/chat_interface.py +1 -8
  4. xinference/model/llm/llm_family.json +72 -6
  5. xinference/model/llm/llm_family_modelscope.json +12 -6
  6. xinference/model/llm/utils.py +24 -16
  7. xinference/web/ui/build/asset-manifest.json +3 -3
  8. xinference/web/ui/build/index.html +1 -1
  9. xinference/web/ui/build/static/js/{main.15822aeb.js → main.476e35cc.js} +3 -3
  10. xinference/web/ui/build/static/js/main.476e35cc.js.map +1 -0
  11. xinference/web/ui/node_modules/.cache/babel-loader/1d5b806c08ffb55539ee26b5fa9746a373f602354d6e55a0d4379d7e2f903868.json +1 -0
  12. xinference/web/ui/node_modules/.cache/babel-loader/396f7ce6ae6900bfdb00e369ade8a05045dc1df025610057ff7436d9e58af81c.json +1 -0
  13. xinference/web/ui/node_modules/.cache/babel-loader/7b3a58afd15f913e9ab363c6051e449d885049a283ff905aa4d499124d4c20d1.json +1 -0
  14. xinference/web/ui/node_modules/.cache/babel-loader/ddf597663270471b31251b2abb36e3fa093efe20489387d996f993d2c61be112.json +1 -0
  15. {xinference-0.8.3.dist-info → xinference-0.8.4.dist-info}/METADATA +3 -4
  16. {xinference-0.8.3.dist-info → xinference-0.8.4.dist-info}/RECORD +21 -20
  17. xinference/web/ui/build/static/js/main.15822aeb.js.map +0 -1
  18. xinference/web/ui/node_modules/.cache/babel-loader/139e5e4adf436923107d2b02994c7ff6dba2aac1989e9b6638984f0dfe782c4a.json +0 -1
  19. xinference/web/ui/node_modules/.cache/babel-loader/193e7ba39e70d4bb2895a5cb317f6f293a5fd02e7e324c02a1eba2f83216419c.json +0 -1
  20. xinference/web/ui/node_modules/.cache/babel-loader/5c408307c982f07f9c09c85c98212d1b1c22548a9194c69548750a3016b91b88.json +0 -1
  21. /xinference/web/ui/build/static/js/{main.15822aeb.js.LICENSE.txt → main.476e35cc.js.LICENSE.txt} +0 -0
  22. {xinference-0.8.3.dist-info → xinference-0.8.4.dist-info}/LICENSE +0 -0
  23. {xinference-0.8.3.dist-info → xinference-0.8.4.dist-info}/WHEEL +0 -0
  24. {xinference-0.8.3.dist-info → xinference-0.8.4.dist-info}/entry_points.txt +0 -0
  25. {xinference-0.8.3.dist-info → xinference-0.8.4.dist-info}/top_level.txt +0 -0
xinference/_version.py CHANGED
@@ -8,11 +8,11 @@ import json
8
8
 
9
9
  version_json = '''
10
10
  {
11
- "date": "2024-02-02T12:27:24+0800",
11
+ "date": "2024-02-04T17:16:50+0800",
12
12
  "dirty": false,
13
13
  "error": null,
14
- "full-revisionid": "749ef3ff298a94b88c1e67415819fae4fb1de75c",
15
- "version": "0.8.3"
14
+ "full-revisionid": "1b9b8c805e4e23a4c34fb05a96b819fee3ca8d50",
15
+ "version": "0.8.4"
16
16
  }
17
17
  ''' # END VERSION_JSON
18
18
 
@@ -1156,17 +1156,17 @@ class RESTfulAPI:
1156
1156
  await self._report_error_event(model_uid, str(e))
1157
1157
  raise HTTPException(status_code=500, detail=str(e))
1158
1158
 
1159
- model_name = desc.get("model_name", "")
1159
+ model_family = desc.get("model_family", "")
1160
1160
  function_call_models = ["chatglm3", "gorilla-openfunctions-v1", "qwen-chat"]
1161
1161
 
1162
- is_qwen = desc.get("model_format") == "ggmlv3" and "qwen" in model_name
1162
+ is_qwen = desc.get("model_format") == "ggmlv3" and "qwen-chat" == model_family
1163
1163
 
1164
1164
  if is_qwen and system_prompt is not None:
1165
1165
  raise HTTPException(
1166
1166
  status_code=400, detail="Qwen ggml does not have system prompt"
1167
1167
  )
1168
1168
 
1169
- if not any(name in model_name for name in function_call_models):
1169
+ if model_family not in function_call_models:
1170
1170
  if body.tools:
1171
1171
  raise HTTPException(
1172
1172
  status_code=400,
@@ -98,16 +98,9 @@ class GradioInterface:
98
98
  return flat_list
99
99
 
100
100
  def to_chat(lst: List[str]) -> List[ChatCompletionMessage]:
101
- from ..model.llm import BUILTIN_LLM_PROMPT_STYLE
102
-
103
101
  res = []
104
- prompt_style = BUILTIN_LLM_PROMPT_STYLE.get(self.model_name)
105
- if prompt_style is None:
106
- roles = ["assistant", "user"]
107
- else:
108
- roles = prompt_style.roles
109
102
  for i in range(len(lst)):
110
- role = roles[0] if i % 2 == 1 else roles[1]
103
+ role = "assistant" if i % 2 == 1 else "user"
111
104
  res.append(ChatCompletionMessage(role=role, content=lst[i]))
112
105
  return res
113
106
 
@@ -827,6 +827,66 @@
827
827
  ],
828
828
  "model_id": "meta-llama/Llama-2-70b-chat-hf",
829
829
  "model_revision": "36d9a7388cc80e5f4b3e9701ca2f250d21a96c30"
830
+ },
831
+ {
832
+ "model_format": "ggufv2",
833
+ "model_size_in_billions": 7,
834
+ "quantizations": [
835
+ "Q2_K",
836
+ "Q3_K_S",
837
+ "Q3_K_M",
838
+ "Q3_K_L",
839
+ "Q4_0",
840
+ "Q4_K_S",
841
+ "Q4_K_M",
842
+ "Q5_0",
843
+ "Q5_K_S",
844
+ "Q5_K_M",
845
+ "Q6_K",
846
+ "Q8_0"
847
+ ],
848
+ "model_id": "TheBloke/Llama-2-7B-Chat-GGUF",
849
+ "model_file_name_template": "llama-2-7b-chat.{quantization}.gguf"
850
+ },
851
+ {
852
+ "model_format": "ggufv2",
853
+ "model_size_in_billions": 13,
854
+ "quantizations": [
855
+ "Q2_K",
856
+ "Q3_K_S",
857
+ "Q3_K_M",
858
+ "Q3_K_L",
859
+ "Q4_0",
860
+ "Q4_K_S",
861
+ "Q4_K_M",
862
+ "Q5_0",
863
+ "Q5_K_S",
864
+ "Q5_K_M",
865
+ "Q6_K",
866
+ "Q8_0"
867
+ ],
868
+ "model_id": "TheBloke/Llama-2-13B-chat-GGUF",
869
+ "model_file_name_template": "llama-2-13b-chat.{quantization}.gguf"
870
+ },
871
+ {
872
+ "model_format": "ggufv2",
873
+ "model_size_in_billions": 70,
874
+ "quantizations": [
875
+ "Q2_K",
876
+ "Q3_K_S",
877
+ "Q3_K_M",
878
+ "Q3_K_L",
879
+ "Q4_0",
880
+ "Q4_K_S",
881
+ "Q4_K_M",
882
+ "Q5_0",
883
+ "Q5_K_S",
884
+ "Q5_K_M",
885
+ "Q6_K",
886
+ "Q8_0"
887
+ ],
888
+ "model_id": "TheBloke/Llama-2-70B-Chat-GGUF",
889
+ "model_file_name_template": "llama-2-70b-chat.{quantization}.gguf"
830
890
  }
831
891
  ],
832
892
  "prompt_style": {
@@ -2131,16 +2191,19 @@
2131
2191
  }
2132
2192
  ],
2133
2193
  "prompt_style": {
2134
- "style_name": "NO_COLON_TWO",
2135
- "system_prompt": "<s>[INST] <<SYS>>\nAn informative and inspiring conversation\n<</SYS>>\n\n",
2194
+ "style_name": "LLAMA2",
2195
+ "system_prompt": "[INST] ",
2136
2196
  "roles": [
2137
2197
  "[INST]",
2138
2198
  "[/INST]"
2139
2199
  ],
2140
2200
  "intra_message_sep": " ",
2141
- "inter_message_sep": " </s><s>",
2201
+ "inter_message_sep": "<s>",
2142
2202
  "stop_token_ids": [
2143
2203
  2
2204
+ ],
2205
+ "stop": [
2206
+ "</s>"
2144
2207
  ]
2145
2208
  }
2146
2209
  },
@@ -2189,16 +2252,19 @@
2189
2252
  }
2190
2253
  ],
2191
2254
  "prompt_style": {
2192
- "style_name": "NO_COLON_TWO",
2193
- "system_prompt": "<s>[INST] <<SYS>>\nAn informative and inspiring conversation\n<</SYS>>\n\n",
2255
+ "style_name": "LLAMA2",
2256
+ "system_prompt": "[INST] ",
2194
2257
  "roles": [
2195
2258
  "[INST]",
2196
2259
  "[/INST]"
2197
2260
  ],
2198
2261
  "intra_message_sep": " ",
2199
- "inter_message_sep": " </s><s>",
2262
+ "inter_message_sep": "<s>",
2200
2263
  "stop_token_ids": [
2201
2264
  2
2265
+ ],
2266
+ "stop": [
2267
+ "</s>"
2202
2268
  ]
2203
2269
  }
2204
2270
  },
@@ -1224,16 +1224,19 @@
1224
1224
  }
1225
1225
  ],
1226
1226
  "prompt_style": {
1227
- "style_name": "NO_COLON_TWO",
1228
- "system_prompt": "<s>[INST] <<SYS>>\nAn informative and inspiring conversation\n<</SYS>>\n\n",
1227
+ "style_name": "LLAMA2",
1228
+ "system_prompt": "[INST] ",
1229
1229
  "roles": [
1230
1230
  "[INST]",
1231
1231
  "[/INST]"
1232
1232
  ],
1233
1233
  "intra_message_sep": " ",
1234
- "inter_message_sep": " </s><s>",
1234
+ "inter_message_sep": "<s>",
1235
1235
  "stop_token_ids": [
1236
1236
  2
1237
+ ],
1238
+ "stop": [
1239
+ "</s>"
1237
1240
  ]
1238
1241
  }
1239
1242
  },
@@ -1272,16 +1275,19 @@
1272
1275
  }
1273
1276
  ],
1274
1277
  "prompt_style": {
1275
- "style_name": "NO_COLON_TWO",
1276
- "system_prompt": "<s>[INST] <<SYS>>\nAn informative and inspiring conversation\n<</SYS>>\n\n",
1278
+ "style_name": "LLAMA2",
1279
+ "system_prompt": "[INST] ",
1277
1280
  "roles": [
1278
1281
  "[INST]",
1279
1282
  "[/INST]"
1280
1283
  ],
1281
1284
  "intra_message_sep": " ",
1282
- "inter_message_sep": " </s><s>",
1285
+ "inter_message_sep": "<s>",
1283
1286
  "stop_token_ids": [
1284
1287
  2
1288
+ ],
1289
+ "stop": [
1290
+ "</s>"
1285
1291
  ]
1286
1292
  }
1287
1293
  },
@@ -60,10 +60,18 @@ class ChatModelMixin:
60
60
  ChatCompletionMessage(role=prompt_style.roles[1], content="")
61
61
  )
62
62
 
63
+ def get_role(role_name: str):
64
+ if role_name == "user":
65
+ return prompt_style.roles[0]
66
+ elif role_name == "assistant":
67
+ return prompt_style.roles[1]
68
+ else:
69
+ return role_name
70
+
63
71
  if prompt_style.style_name == "ADD_COLON_SINGLE":
64
72
  ret = prompt_style.system_prompt + prompt_style.intra_message_sep
65
73
  for message in chat_history:
66
- role = message["role"]
74
+ role = get_role(message["role"])
67
75
  content = message["content"]
68
76
  if content:
69
77
  ret += role + ": " + content + prompt_style.intra_message_sep
@@ -74,7 +82,7 @@ class ChatModelMixin:
74
82
  seps = [prompt_style.intra_message_sep, prompt_style.inter_message_sep]
75
83
  ret = prompt_style.system_prompt + seps[0]
76
84
  for i, message in enumerate(chat_history):
77
- role = message["role"]
85
+ role = get_role(message["role"])
78
86
  content = message["content"]
79
87
  if content:
80
88
  ret += role + ": " + content + seps[i % 2]
@@ -85,7 +93,7 @@ class ChatModelMixin:
85
93
  seps = [prompt_style.intra_message_sep, prompt_style.inter_message_sep]
86
94
  ret = prompt_style.system_prompt
87
95
  for i, message in enumerate(chat_history):
88
- role = message["role"]
96
+ role = get_role(message["role"])
89
97
  content = message["content"]
90
98
  if content:
91
99
  ret += role + content + seps[i % 2]
@@ -96,7 +104,7 @@ class ChatModelMixin:
96
104
  seps = [prompt_style.intra_message_sep, prompt_style.inter_message_sep]
97
105
  ret = ""
98
106
  for i, message in enumerate(chat_history):
99
- role = message["role"]
107
+ role = get_role(message["role"])
100
108
  content = message["content"]
101
109
  if content:
102
110
  if i == 0:
@@ -109,7 +117,7 @@ class ChatModelMixin:
109
117
  elif prompt_style.style_name == "FALCON":
110
118
  ret = prompt_style.system_prompt
111
119
  for message in chat_history:
112
- role = message["role"]
120
+ role = get_role(message["role"])
113
121
  content = message["content"]
114
122
  if content:
115
123
  ret += (
@@ -137,7 +145,7 @@ class ChatModelMixin:
137
145
  else:
138
146
  ret = ""
139
147
  for i, message in enumerate(chat_history):
140
- role = message["role"]
148
+ role = get_role(message["role"])
141
149
  content = message["content"]
142
150
  if i % 2 == 0:
143
151
  ret += f"[Round {i // 2 + round_add_n}]{prompt_style.intra_message_sep}"
@@ -154,7 +162,7 @@ class ChatModelMixin:
154
162
  )
155
163
 
156
164
  for i, message in enumerate(chat_history):
157
- role = message["role"]
165
+ role = get_role(message["role"])
158
166
  content = message["content"]
159
167
  tool_calls = message.get("tool_calls")
160
168
  if tool_calls:
@@ -173,7 +181,7 @@ class ChatModelMixin:
173
181
  else ""
174
182
  )
175
183
  for i, message in enumerate(chat_history):
176
- role = message["role"]
184
+ role = get_role(message["role"])
177
185
  content = message["content"]
178
186
  if content:
179
187
  ret += f"<|{role}|> \n {content}"
@@ -239,7 +247,7 @@ Begin!"""
239
247
 
240
248
  ret = f"<|im_start|>system\n{prompt_style.system_prompt}<|im_end|>"
241
249
  for message in chat_history:
242
- role = message["role"]
250
+ role = get_role(message["role"])
243
251
  content = message["content"]
244
252
 
245
253
  ret += prompt_style.intra_message_sep
@@ -279,7 +287,7 @@ Begin!"""
279
287
  else prompt_style.system_prompt + prompt_style.intra_message_sep + "\n"
280
288
  )
281
289
  for message in chat_history:
282
- role = message["role"]
290
+ role = get_role(message["role"])
283
291
  content = message["content"]
284
292
 
285
293
  if content:
@@ -293,7 +301,7 @@ Begin!"""
293
301
  for i, message in enumerate(chat_history[:-2]):
294
302
  if i % 2 == 0:
295
303
  ret += "<s>"
296
- role = message["role"]
304
+ role = get_role(message["role"])
297
305
  content = message["content"]
298
306
  ret += role + ":" + str(content) + seps[i % 2]
299
307
  if len(ret) == 0:
@@ -316,7 +324,7 @@ Begin!"""
316
324
  + "\n"
317
325
  )
318
326
  for message in chat_history:
319
- role = message["role"]
327
+ role = get_role(message["role"])
320
328
  content = message["content"]
321
329
 
322
330
  if content:
@@ -327,7 +335,7 @@ Begin!"""
327
335
  elif prompt_style.style_name == "ADD_COLON_SINGLE_COT":
328
336
  ret = prompt_style.system_prompt + prompt_style.intra_message_sep
329
337
  for message in chat_history:
330
- role = message["role"]
338
+ role = get_role(message["role"])
331
339
  content = message["content"]
332
340
  if content:
333
341
  ret += role + ": " + content + prompt_style.intra_message_sep
@@ -341,7 +349,7 @@ Begin!"""
341
349
  seps = [prompt_style.intra_message_sep, prompt_style.inter_message_sep]
342
350
  ret = prompt_style.system_prompt
343
351
  for i, message in enumerate(chat_history):
344
- role = message["role"]
352
+ role = get_role(message["role"])
345
353
  content = message["content"]
346
354
  if content:
347
355
  ret += role + ": " + content + seps[i % 2]
@@ -352,7 +360,7 @@ Begin!"""
352
360
  sep = prompt_style.inter_message_sep
353
361
  ret = prompt_style.system_prompt + sep
354
362
  for i, message in enumerate(chat_history):
355
- role = message["role"]
363
+ role = get_role(message["role"])
356
364
  content = message["content"]
357
365
  if content:
358
366
  ret += role + "\n" + content + sep
@@ -384,7 +392,7 @@ Begin!"""
384
392
  ret = "<s>"
385
393
  for i, message in enumerate(chat_history):
386
394
  content = message["content"]
387
- role = message["role"]
395
+ role = get_role(message["role"])
388
396
  if i % 2 == 0: # Human
389
397
  assert content is not None
390
398
  ret += role + ": " + content + "\n\n"
@@ -1,11 +1,11 @@
1
1
  {
2
2
  "files": {
3
- "main.js": "./static/js/main.15822aeb.js",
3
+ "main.js": "./static/js/main.476e35cc.js",
4
4
  "static/media/icon.webp": "./static/media/icon.4603d52c63041e5dfbfd.webp",
5
5
  "index.html": "./index.html",
6
- "main.15822aeb.js.map": "./static/js/main.15822aeb.js.map"
6
+ "main.476e35cc.js.map": "./static/js/main.476e35cc.js.map"
7
7
  },
8
8
  "entrypoints": [
9
- "static/js/main.15822aeb.js"
9
+ "static/js/main.476e35cc.js"
10
10
  ]
11
11
  }
@@ -1 +1 @@
1
- <!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.svg"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="Web site created using create-react-app"/><link rel="apple-touch-icon" href="./logo192.png"/><link rel="manifest" href="./manifest.json"/><title>Xinference</title><script defer="defer" src="./static/js/main.15822aeb.js"></script></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>
1
+ <!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.svg"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="Web site created using create-react-app"/><link rel="apple-touch-icon" href="./logo192.png"/><link rel="manifest" href="./manifest.json"/><title>Xinference</title><script defer="defer" src="./static/js/main.476e35cc.js"></script></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>