xinference 0.7.5__py3-none-any.whl → 0.8.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/api/oauth2/__init__.py +13 -0
- xinference/api/oauth2/common.py +14 -0
- xinference/api/oauth2/core.py +93 -0
- xinference/api/oauth2/types.py +36 -0
- xinference/api/oauth2/utils.py +44 -0
- xinference/api/restful_api.py +216 -27
- xinference/client/oscar/actor_client.py +18 -18
- xinference/client/restful/restful_client.py +96 -33
- xinference/conftest.py +63 -1
- xinference/constants.py +1 -0
- xinference/core/chat_interface.py +143 -3
- xinference/core/metrics.py +83 -0
- xinference/core/model.py +244 -181
- xinference/core/status_guard.py +86 -0
- xinference/core/supervisor.py +57 -7
- xinference/core/worker.py +134 -13
- xinference/deploy/cmdline.py +142 -16
- xinference/deploy/local.py +39 -7
- xinference/deploy/supervisor.py +2 -0
- xinference/deploy/worker.py +33 -5
- xinference/fields.py +4 -1
- xinference/model/core.py +8 -1
- xinference/model/embedding/core.py +3 -2
- xinference/model/embedding/model_spec_modelscope.json +60 -18
- xinference/model/image/stable_diffusion/core.py +4 -3
- xinference/model/llm/__init__.py +7 -0
- xinference/model/llm/ggml/llamacpp.py +3 -2
- xinference/model/llm/llm_family.json +87 -3
- xinference/model/llm/llm_family.py +15 -5
- xinference/model/llm/llm_family_modelscope.json +92 -3
- xinference/model/llm/pytorch/chatglm.py +70 -28
- xinference/model/llm/pytorch/core.py +11 -30
- xinference/model/llm/pytorch/internlm2.py +155 -0
- xinference/model/llm/pytorch/utils.py +0 -153
- xinference/model/llm/utils.py +37 -8
- xinference/model/llm/vllm/core.py +15 -3
- xinference/model/multimodal/__init__.py +15 -8
- xinference/model/multimodal/core.py +8 -1
- xinference/model/multimodal/model_spec.json +9 -0
- xinference/model/multimodal/model_spec_modelscope.json +45 -0
- xinference/model/multimodal/qwen_vl.py +5 -9
- xinference/model/utils.py +7 -2
- xinference/types.py +2 -0
- xinference/web/ui/build/asset-manifest.json +3 -3
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/js/main.b83095c2.js +3 -0
- xinference/web/ui/build/static/js/{main.236e72e7.js.LICENSE.txt → main.b83095c2.js.LICENSE.txt} +7 -0
- xinference/web/ui/build/static/js/main.b83095c2.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/0a853b2fa1902551e262a2f1a4b7894341f27b3dd9587f2ef7aaea195af89518.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/101923c539819f26ad11fbcbd6f6e56436b285efbb090dcc7dd648c6e924c4a8.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/193e7ba39e70d4bb2895a5cb317f6f293a5fd02e7e324c02a1eba2f83216419c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/22858de5265f2d279fca9f2f54dfb147e4b2704200dfb5d2ad3ec9769417328f.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/27696db5fcd4fcf0e7974cadf1e4a2ab89690474045c3188eafd586323ad13bb.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/27bcada3ee8f89d21184b359f022fc965f350ffaca52c9814c29f1fc37121173.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/27bdbe25deab8cf08f7fab8f05f8f26cf84a98809527a37986a4ab73a57ba96a.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/2bee7b8bd3d52976a45d6068e1333df88b943e0e679403c809e45382e3818037.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/30670751f55508ef3b861e13dd71b9e5a10d2561373357a12fc3831a0b77fd93.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3605cd3a96ff2a3b443c70a101575482279ad26847924cab0684d165ba0d2492.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3789ef437d3ecbf945bb9cea39093d1f16ebbfa32dbe6daf35abcfb6d48de6f1.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/4942da6bc03bf7373af068e22f916341aabc5b5df855d73c1d348c696724ce37.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/4d933e35e0fe79867d3aa6c46db28804804efddf5490347cb6c2c2879762a157.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/4d96f071168af43965e0fab2ded658fa0a15b8d9ca03789a5ef9c5c16a4e3cee.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/4fd24800544873512b540544ae54601240a5bfefd9105ff647855c64f8ad828f.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/52a6136cb2dbbf9c51d461724d9b283ebe74a73fb19d5df7ba8e13c42bd7174d.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/5c408307c982f07f9c09c85c98212d1b1c22548a9194c69548750a3016b91b88.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/663adbcb60b942e9cf094c8d9fabe57517f5e5e6e722d28b4948a40b7445a3b8.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/666bb2e1b250dc731311a7e4880886177885dfa768508d2ed63e02630cc78725.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/71493aadd34d568fbe605cacaba220aa69bd09273251ee4ba27930f8d01fccd8.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/8b071db2a5a9ef68dc14d5f606540bd23d9785e365a11997c510656764d2dccf.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/8b246d79cd3f6fc78f11777e6a6acca6a2c5d4ecce7f2dd4dcf9a48126440d3c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/8d33354bd2100c8602afc3341f131a88cc36aaeecd5a4b365ed038514708e350.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/95c8cc049fadd23085d8623e1d43d70b614a4e52217676f186a417dca894aa09.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a4d72d3b806ba061919115f0c513738726872e3c79cf258f007519d3f91d1a16.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a8070ce4b780b4a044218536e158a9e7192a6c80ff593fdc126fee43f46296b5.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/b4e4fccaf8f2489a29081f0bf3b191656bd452fb3c8b5e3c6d92d94f680964d5.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/b53eb7c7967f6577bd3e678293c44204fb03ffa7fdc1dd59d3099015c68f6f7f.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/bd04667474fd9cac2983b03725c218908a6cc0ee9128a5953cd00d26d4877f60.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c230a727b8f68f0e62616a75e14a3d33026dc4164f2e325a9a8072d733850edb.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d06af85a84e5c5a29d3acf2dbb5b30c0cf75c8aec4ab5f975e6096f944ee4324.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d44a6eb6106e09082b691a315c9f6ce17fcfe25beb7547810e0d271ce3301cd2.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d5e150bff31715977d8f537c970f06d4fe3de9909d7e8342244a83a9f6447121.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/de36e5c08fd524e341d664883dda6cb1745acc852a4f1b011a35a0b4615f72fa.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f037ffef5992af0892d6d991053c1dace364cd39a3f11f1a41f92776e8a59459.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f23ab356a8603d4a2aaa74388c2f381675c207d37c4d1c832df922e9655c9a6b.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f7c23b0922f4087b9e2e3e46f15c946b772daa46c28c3a12426212ecaf481deb.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f95a8bd358eeb55fa2f49f1224cc2f4f36006359856744ff09ae4bb295f59ec1.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/fe5db70859503a54cbe71f9637e5a314cda88b1f0eecb733b6e6f837697db1ef.json +1 -0
- xinference/web/ui/node_modules/.package-lock.json +36 -0
- xinference/web/ui/node_modules/@types/cookie/package.json +30 -0
- xinference/web/ui/node_modules/@types/hoist-non-react-statics/package.json +33 -0
- xinference/web/ui/node_modules/react-cookie/package.json +55 -0
- xinference/web/ui/node_modules/universal-cookie/package.json +48 -0
- xinference/web/ui/package-lock.json +37 -0
- xinference/web/ui/package.json +3 -2
- {xinference-0.7.5.dist-info → xinference-0.8.1.dist-info}/METADATA +17 -6
- {xinference-0.7.5.dist-info → xinference-0.8.1.dist-info}/RECORD +101 -66
- xinference/web/ui/build/static/js/main.236e72e7.js +0 -3
- xinference/web/ui/build/static/js/main.236e72e7.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/0cccfbe5d963b8e31eb679f9d9677392839cedd04aa2956ac6b33cf19599d597.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/0f3b6cc71b7c83bdc85aa4835927aeb86af2ce0d2ac241917ecfbf90f75c6d27.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/2f651cf60b1bde50c0601c7110f77dd44819fb6e2501ff748a631724d91445d4.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/42bb623f337ad08ed076484185726e072ca52bb88e373d72c7b052db4c273342.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/57af83639c604bd3362d0f03f7505e81c6f67ff77bee7c6bb31f6e5523eba185.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/667753ce39ce1d4bcbf9a5f1a103d653be1d19d42f4e1fbaceb9b507679a52c7.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/66ed1bd4c06748c1b176a625c25c856997edc787856c73162f82f2b465c5d956.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/78f2521da2e2a98b075a2666cb782c7e2c019cd3c72199eecd5901c82d8655df.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/8d2b0b3c6988d1894694dcbbe708ef91cfe62d62dac317031f09915ced637953.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/9427ae7f1e94ae8dcd2333fb361e381f4054fde07394fe5448658e3417368476.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/bcee2b4e76b07620f9087989eb86d43c645ba3c7a74132cf926260af1164af0e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/cc2ddd02ccc1dad1a2737ac247c79e6f6ed2c7836c6b68e511e3048f666b64af.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/d2e8e6665a7efc832b43907dadf4e3c896a59eaf8129f9a520882466c8f2e489.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/d8a42e9df7157de9f28eecefdf178fd113bf2280d28471b6e32a8a45276042df.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/e26750d9556e9741912333349e4da454c53dbfddbfc6002ab49518dcf02af745.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/ef42ec014d7bc373b874b2a1ff0dcd785490f125e913698bc049b0bd778e4d66.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/fe3eb4d76c79ca98833f686d642224eeeb94cc83ad14300d281623796d087f0a.json +0 -1
- {xinference-0.7.5.dist-info → xinference-0.8.1.dist-info}/LICENSE +0 -0
- {xinference-0.7.5.dist-info → xinference-0.8.1.dist-info}/WHEEL +0 -0
- {xinference-0.7.5.dist-info → xinference-0.8.1.dist-info}/entry_points.txt +0 -0
- {xinference-0.7.5.dist-info → xinference-0.8.1.dist-info}/top_level.txt +0 -0
|
@@ -94,6 +94,7 @@ VLLM_SUPPORTED_CHAT_MODELS = [
|
|
|
94
94
|
"code-llama-python",
|
|
95
95
|
"code-llama-instruct",
|
|
96
96
|
"mistral-instruct-v0.1",
|
|
97
|
+
"mistral-instruct-v0.2",
|
|
97
98
|
"chatglm3",
|
|
98
99
|
]
|
|
99
100
|
|
|
@@ -170,7 +171,7 @@ class VLLMModel(LLM):
|
|
|
170
171
|
)
|
|
171
172
|
sanitized.setdefault("temperature", generate_config.get("temperature", 1.0))
|
|
172
173
|
sanitized.setdefault("top_p", generate_config.get("top_p", 1.0))
|
|
173
|
-
sanitized.setdefault("max_tokens", generate_config.get("max_tokens",
|
|
174
|
+
sanitized.setdefault("max_tokens", generate_config.get("max_tokens", 1024))
|
|
174
175
|
sanitized.setdefault("stop", generate_config.get("stop", None))
|
|
175
176
|
sanitized.setdefault(
|
|
176
177
|
"stop_token_ids", generate_config.get("stop_token_ids", None)
|
|
@@ -303,6 +304,16 @@ class VLLMModel(LLM):
|
|
|
303
304
|
delta = choice["text"][len(previous_texts[i]) :]
|
|
304
305
|
previous_texts[i] = choice["text"]
|
|
305
306
|
choice["text"] = delta
|
|
307
|
+
prompt_tokens = len(_request_output.prompt_token_ids)
|
|
308
|
+
completion_tokens = sum(
|
|
309
|
+
len(output.token_ids) for output in _request_output.outputs
|
|
310
|
+
)
|
|
311
|
+
total_tokens = prompt_tokens + completion_tokens
|
|
312
|
+
chunk["usage"] = CompletionUsage(
|
|
313
|
+
prompt_tokens=prompt_tokens,
|
|
314
|
+
completion_tokens=completion_tokens,
|
|
315
|
+
total_tokens=total_tokens,
|
|
316
|
+
)
|
|
306
317
|
yield chunk
|
|
307
318
|
|
|
308
319
|
if stream:
|
|
@@ -379,7 +390,8 @@ class VLLMChatModel(VLLMModel, ChatModelMixin):
|
|
|
379
390
|
|
|
380
391
|
generate_config = self._sanitize_chat_config(generate_config)
|
|
381
392
|
# TODO(codingl2k1): qwen hacky to set stop for function call.
|
|
382
|
-
|
|
393
|
+
model_family = self.model_family.model_family or self.model_family.model_name
|
|
394
|
+
if tools and "qwen-chat" == model_family:
|
|
383
395
|
stop = generate_config.get("stop")
|
|
384
396
|
if isinstance(stop, str):
|
|
385
397
|
generate_config["stop"] = [stop, "Observation:"]
|
|
@@ -400,6 +412,6 @@ class VLLMChatModel(VLLMModel, ChatModelMixin):
|
|
|
400
412
|
assert not isinstance(c, AsyncGenerator)
|
|
401
413
|
if tools:
|
|
402
414
|
return self._tool_calls_completion(
|
|
403
|
-
self.model_family
|
|
415
|
+
self.model_family, self.model_uid, c, tools
|
|
404
416
|
)
|
|
405
417
|
return self._to_chat_completion(c)
|
|
@@ -30,16 +30,23 @@ MODEL_CLASSES.append(QwenVLChat)
|
|
|
30
30
|
|
|
31
31
|
|
|
32
32
|
def _install():
|
|
33
|
-
|
|
33
|
+
json_path_huggingface = os.path.join(
|
|
34
34
|
os.path.dirname(os.path.abspath(__file__)), "model_spec.json"
|
|
35
35
|
)
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
36
|
+
json_path_modelscope = os.path.join(
|
|
37
|
+
os.path.dirname(os.path.abspath(__file__)), "model_spec_modelscope.json"
|
|
38
|
+
)
|
|
39
|
+
for builtin_family, json_path in [
|
|
40
|
+
(BUILTIN_LVLM_FAMILIES, json_path_huggingface),
|
|
41
|
+
(BUILTIN_MODELSCOPE_LVLM_FAMILIES, json_path_modelscope),
|
|
42
|
+
]:
|
|
43
|
+
for json_obj in json.load(codecs.open(json_path, "r", encoding="utf-8")):
|
|
44
|
+
model_family = LVLMFamilyV1.parse_obj(json_obj)
|
|
45
|
+
builtin_family.append(model_family)
|
|
46
|
+
for model_spec in model_family.model_specs:
|
|
47
|
+
MODEL_NAME_TO_REVISION[model_family.model_name].append(
|
|
48
|
+
model_spec.model_revision
|
|
49
|
+
)
|
|
43
50
|
|
|
44
51
|
|
|
45
52
|
_install()
|
|
@@ -203,6 +203,8 @@ def match_multimodal(
|
|
|
203
203
|
and matched_quantization is None
|
|
204
204
|
):
|
|
205
205
|
continue
|
|
206
|
+
# Copy spec to avoid _apply_format_to_model_id modify the original spec.
|
|
207
|
+
spec = spec.copy()
|
|
206
208
|
if quantization:
|
|
207
209
|
return (
|
|
208
210
|
family,
|
|
@@ -328,6 +330,11 @@ def _skip_download(
|
|
|
328
330
|
logger.warning(f"Cache {cache_dir} exists, but it was from {hub}")
|
|
329
331
|
return True
|
|
330
332
|
return False
|
|
333
|
+
elif model_format in ["ggmlv3", "ggufv2", "gptq"]:
|
|
334
|
+
assert quantization is not None
|
|
335
|
+
return os.path.exists(
|
|
336
|
+
_get_meta_path(cache_dir, model_format, model_hub, quantization)
|
|
337
|
+
)
|
|
331
338
|
else:
|
|
332
339
|
raise ValueError(f"Unsupported format: {model_format}")
|
|
333
340
|
|
|
@@ -414,7 +421,7 @@ def cache_from_huggingface(
|
|
|
414
421
|
):
|
|
415
422
|
return cache_dir
|
|
416
423
|
|
|
417
|
-
if model_spec.model_format in ["pytorch"]:
|
|
424
|
+
if model_spec.model_format in ["pytorch", "gptq"]:
|
|
418
425
|
assert isinstance(model_spec, LVLMSpecV1)
|
|
419
426
|
retry_download(
|
|
420
427
|
huggingface_hub.snapshot_download,
|
|
@@ -20,6 +20,15 @@
|
|
|
20
20
|
],
|
|
21
21
|
"model_id": "Qwen/Qwen-VL-Chat",
|
|
22
22
|
"model_revision": "6665c780ade5ff3f08853b4262dcb9c8f9598d42"
|
|
23
|
+
},
|
|
24
|
+
{
|
|
25
|
+
"model_format": "gptq",
|
|
26
|
+
"model_size_in_billions": 7,
|
|
27
|
+
"quantizations": [
|
|
28
|
+
"Int4"
|
|
29
|
+
],
|
|
30
|
+
"model_id": "Qwen/Qwen-VL-Chat-{quantization}",
|
|
31
|
+
"model_revision": "5d3a5aa033ed2c502300d426c81cc5b13bcd1409"
|
|
23
32
|
}
|
|
24
33
|
],
|
|
25
34
|
"prompt_style": {
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
[
|
|
2
|
+
{
|
|
3
|
+
"version": 1,
|
|
4
|
+
"context_length": 4096,
|
|
5
|
+
"model_name": "qwen-vl-chat",
|
|
6
|
+
"model_lang": [
|
|
7
|
+
"en",
|
|
8
|
+
"zh"
|
|
9
|
+
],
|
|
10
|
+
"model_ability": [
|
|
11
|
+
"chat"
|
|
12
|
+
],
|
|
13
|
+
"model_description": "Qwen-VL-Chat supports more flexible interaction, such as multiple image inputs, multi-round question answering, and creative capabilities.",
|
|
14
|
+
"model_specs": [
|
|
15
|
+
{
|
|
16
|
+
"model_format": "pytorch",
|
|
17
|
+
"model_size_in_billions": 7,
|
|
18
|
+
"quantizations": [
|
|
19
|
+
"none"
|
|
20
|
+
],
|
|
21
|
+
"model_hub": "modelscope",
|
|
22
|
+
"model_id": "Qwen/Qwen-VL-Chat",
|
|
23
|
+
"model_revision": "master"
|
|
24
|
+
},
|
|
25
|
+
{
|
|
26
|
+
"model_format": "gptq",
|
|
27
|
+
"model_size_in_billions": 7,
|
|
28
|
+
"quantizations": [
|
|
29
|
+
"Int4"
|
|
30
|
+
],
|
|
31
|
+
"model_hub": "modelscope",
|
|
32
|
+
"model_id": "Qwen/Qwen-VL-Chat-{quantization}",
|
|
33
|
+
"model_revision": "master"
|
|
34
|
+
}
|
|
35
|
+
],
|
|
36
|
+
"prompt_style": {
|
|
37
|
+
"style_name": "QWEN",
|
|
38
|
+
"system_prompt": "You are a helpful assistant.",
|
|
39
|
+
"roles": [
|
|
40
|
+
"user",
|
|
41
|
+
"assistant"
|
|
42
|
+
]
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
]
|
|
@@ -18,7 +18,6 @@ import tempfile
|
|
|
18
18
|
import time
|
|
19
19
|
import uuid
|
|
20
20
|
from typing import Dict, Iterator, List, Optional, Union
|
|
21
|
-
from urllib.parse import urlparse
|
|
22
21
|
|
|
23
22
|
from ...types import (
|
|
24
23
|
ChatCompletion,
|
|
@@ -73,14 +72,7 @@ class QwenVLChat(LVLM):
|
|
|
73
72
|
|
|
74
73
|
def _message_content_to_qwen(self, content) -> str:
|
|
75
74
|
def _ensure_url(_url):
|
|
76
|
-
|
|
77
|
-
if _url.startswith("data:"):
|
|
78
|
-
raise "Not a valid url."
|
|
79
|
-
parsed = urlparse(_url)
|
|
80
|
-
if not parsed.scheme:
|
|
81
|
-
raise "Not a valid url."
|
|
82
|
-
return _url
|
|
83
|
-
except Exception:
|
|
75
|
+
if _url.startswith("data:"):
|
|
84
76
|
logging.info("Parse url by base64 decoder.")
|
|
85
77
|
# https://platform.openai.com/docs/guides/vision/uploading-base-64-encoded-images
|
|
86
78
|
# e.g. f"data:image/jpeg;base64,{base64_image}"
|
|
@@ -93,6 +85,10 @@ class QwenVLChat(LVLM):
|
|
|
93
85
|
f.write(data)
|
|
94
86
|
logging.info("Dump base64 data to %s", f.name)
|
|
95
87
|
return f.name
|
|
88
|
+
else:
|
|
89
|
+
if len(_url) > 2048:
|
|
90
|
+
raise Exception(f"Image url is too long, {len(_url)} > 2048.")
|
|
91
|
+
return _url
|
|
96
92
|
|
|
97
93
|
if not isinstance(content, str):
|
|
98
94
|
# TODO(codingl2k1): Optimize _ensure_url
|
xinference/model/utils.py
CHANGED
|
@@ -153,8 +153,13 @@ def is_model_cached(model_spec: Any, name_to_revisions_mapping: Dict):
|
|
|
153
153
|
|
|
154
154
|
|
|
155
155
|
def is_valid_model_name(model_name: str) -> bool:
|
|
156
|
-
|
|
157
|
-
|
|
156
|
+
import re
|
|
157
|
+
|
|
158
|
+
if len(model_name) == 0:
|
|
159
|
+
return False
|
|
160
|
+
|
|
161
|
+
# check if contains +/?%#&=\s
|
|
162
|
+
return re.match(r"^[^+\/?%#&=\s]*$", model_name) is not None
|
|
158
163
|
|
|
159
164
|
|
|
160
165
|
def parse_uri(uri: str) -> Tuple[str, str]:
|
xinference/types.py
CHANGED
|
@@ -110,6 +110,7 @@ class CompletionChunk(TypedDict):
|
|
|
110
110
|
created: int
|
|
111
111
|
model: str
|
|
112
112
|
choices: List[CompletionChoice]
|
|
113
|
+
usage: NotRequired[CompletionUsage]
|
|
113
114
|
|
|
114
115
|
|
|
115
116
|
class Completion(TypedDict):
|
|
@@ -160,6 +161,7 @@ class ChatCompletionChunk(TypedDict):
|
|
|
160
161
|
object: Literal["chat.completion.chunk"]
|
|
161
162
|
created: int
|
|
162
163
|
choices: List[ChatCompletionChunkChoice]
|
|
164
|
+
usage: NotRequired[CompletionUsage]
|
|
163
165
|
|
|
164
166
|
|
|
165
167
|
class ChatglmCppModelConfig(TypedDict, total=False):
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
{
|
|
2
2
|
"files": {
|
|
3
|
-
"main.js": "./static/js/main.
|
|
3
|
+
"main.js": "./static/js/main.b83095c2.js",
|
|
4
4
|
"static/media/icon.webp": "./static/media/icon.4603d52c63041e5dfbfd.webp",
|
|
5
5
|
"index.html": "./index.html",
|
|
6
|
-
"main.
|
|
6
|
+
"main.b83095c2.js.map": "./static/js/main.b83095c2.js.map"
|
|
7
7
|
},
|
|
8
8
|
"entrypoints": [
|
|
9
|
-
"static/js/main.
|
|
9
|
+
"static/js/main.b83095c2.js"
|
|
10
10
|
]
|
|
11
11
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
<!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.svg"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="Web site created using create-react-app"/><link rel="apple-touch-icon" href="./logo192.png"/><link rel="manifest" href="./manifest.json"/><title>Xinference</title><script defer="defer" src="./static/js/main.
|
|
1
|
+
<!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.svg"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="Web site created using create-react-app"/><link rel="apple-touch-icon" href="./logo192.png"/><link rel="manifest" href="./manifest.json"/><title>Xinference</title><script defer="defer" src="./static/js/main.b83095c2.js"></script></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>
|