xinference 0.16.1__py3-none-any.whl → 0.16.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/conftest.py +0 -8
- xinference/constants.py +2 -0
- xinference/core/model.py +34 -2
- xinference/core/supervisor.py +5 -5
- xinference/core/utils.py +9 -10
- xinference/core/worker.py +8 -5
- xinference/deploy/cmdline.py +5 -0
- xinference/deploy/utils.py +7 -4
- xinference/model/audio/core.py +6 -2
- xinference/model/audio/model_spec.json +1 -1
- xinference/model/core.py +3 -1
- xinference/model/embedding/core.py +6 -2
- xinference/model/image/core.py +6 -2
- xinference/model/image/ocr/got_ocr2.py +3 -0
- xinference/model/llm/__init__.py +33 -0
- xinference/model/llm/core.py +4 -4
- xinference/model/llm/llm_family.json +87 -0
- xinference/model/llm/llm_family.py +68 -2
- xinference/model/llm/llm_family_modelscope.json +91 -0
- xinference/model/llm/llm_family_openmind_hub.json +1359 -0
- xinference/model/llm/vllm/core.py +2 -1
- xinference/model/rerank/core.py +9 -1
- xinference/model/utils.py +7 -0
- xinference/model/video/core.py +6 -2
- xinference/web/ui/build/asset-manifest.json +3 -3
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/js/{main.b76aeeb7.js → main.2f269bb3.js} +3 -3
- xinference/web/ui/build/static/js/main.2f269bb3.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/1f269fb2a368363c1cb2237825f1dba093b6bdd8c44cc05954fd19ec2c1fff03.json +1 -0
- {xinference-0.16.1.dist-info → xinference-0.16.3.dist-info}/METADATA +5 -4
- {xinference-0.16.1.dist-info → xinference-0.16.3.dist-info}/RECORD +37 -36
- xinference/web/ui/build/static/js/main.b76aeeb7.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/32ea2c04cf0bba2761b4883d2c40cc259952c94d2d6bb774e510963ca37aac0a.json +0 -1
- /xinference/web/ui/build/static/js/{main.b76aeeb7.js.LICENSE.txt → main.2f269bb3.js.LICENSE.txt} +0 -0
- {xinference-0.16.1.dist-info → xinference-0.16.3.dist-info}/LICENSE +0 -0
- {xinference-0.16.1.dist-info → xinference-0.16.3.dist-info}/WHEEL +0 -0
- {xinference-0.16.1.dist-info → xinference-0.16.3.dist-info}/entry_points.txt +0 -0
- {xinference-0.16.1.dist-info → xinference-0.16.3.dist-info}/top_level.txt +0 -0
|
@@ -41,6 +41,7 @@ from ..utils import (
|
|
|
41
41
|
create_symlink,
|
|
42
42
|
download_from_csghub,
|
|
43
43
|
download_from_modelscope,
|
|
44
|
+
download_from_openmind_hub,
|
|
44
45
|
is_valid_model_uri,
|
|
45
46
|
parse_uri,
|
|
46
47
|
retry_download,
|
|
@@ -239,6 +240,7 @@ LLAMA_CLASSES: List[Type[LLM]] = []
|
|
|
239
240
|
|
|
240
241
|
BUILTIN_LLM_FAMILIES: List["LLMFamilyV1"] = []
|
|
241
242
|
BUILTIN_MODELSCOPE_LLM_FAMILIES: List["LLMFamilyV1"] = []
|
|
243
|
+
BUILTIN_OPENMIND_HUB_LLM_FAMILIES: List["LLMFamilyV1"] = []
|
|
242
244
|
BUILTIN_CSGHUB_LLM_FAMILIES: List["LLMFamilyV1"] = []
|
|
243
245
|
|
|
244
246
|
SGLANG_CLASSES: List[Type[LLM]] = []
|
|
@@ -301,6 +303,9 @@ def cache(
|
|
|
301
303
|
elif llm_spec.model_hub == "modelscope":
|
|
302
304
|
logger.info(f"Caching from Modelscope: {llm_spec.model_id}")
|
|
303
305
|
return cache_from_modelscope(llm_family, llm_spec, quantization)
|
|
306
|
+
elif llm_spec.model_hub == "openmind_hub":
|
|
307
|
+
logger.info(f"Caching from openmind_hub: {llm_spec.model_id}")
|
|
308
|
+
return cache_from_openmind_hub(llm_family, llm_spec, quantization)
|
|
304
309
|
elif llm_spec.model_hub == "csghub":
|
|
305
310
|
logger.info(f"Caching from CSGHub: {llm_spec.model_id}")
|
|
306
311
|
return cache_from_csghub(llm_family, llm_spec, quantization)
|
|
@@ -474,7 +479,7 @@ def _skip_download(
|
|
|
474
479
|
model_revision: Optional[str],
|
|
475
480
|
quantization: Optional[str] = None,
|
|
476
481
|
) -> bool:
|
|
477
|
-
if model_format
|
|
482
|
+
if model_format in ["pytorch", "mindspore"]:
|
|
478
483
|
model_hub_to_meta_path = {
|
|
479
484
|
"huggingface": _get_meta_path(
|
|
480
485
|
cache_dir, model_format, "huggingface", quantization
|
|
@@ -482,6 +487,9 @@ def _skip_download(
|
|
|
482
487
|
"modelscope": _get_meta_path(
|
|
483
488
|
cache_dir, model_format, "modelscope", quantization
|
|
484
489
|
),
|
|
490
|
+
"openmind_hub": _get_meta_path(
|
|
491
|
+
cache_dir, model_format, "openmind_hub", quantization
|
|
492
|
+
),
|
|
485
493
|
"csghub": _get_meta_path(cache_dir, model_format, "csghub", quantization),
|
|
486
494
|
}
|
|
487
495
|
if valid_model_revision(model_hub_to_meta_path[model_hub], model_revision):
|
|
@@ -702,6 +710,50 @@ def cache_from_modelscope(
|
|
|
702
710
|
return cache_dir
|
|
703
711
|
|
|
704
712
|
|
|
713
|
+
def cache_from_openmind_hub(
|
|
714
|
+
llm_family: LLMFamilyV1,
|
|
715
|
+
llm_spec: "LLMSpecV1",
|
|
716
|
+
quantization: Optional[str] = None,
|
|
717
|
+
) -> str:
|
|
718
|
+
"""
|
|
719
|
+
Cache model from openmind_hub. Return the cache directory.
|
|
720
|
+
"""
|
|
721
|
+
from openmind_hub import snapshot_download
|
|
722
|
+
|
|
723
|
+
cache_dir = _get_cache_dir(llm_family, llm_spec)
|
|
724
|
+
if _skip_download(
|
|
725
|
+
cache_dir,
|
|
726
|
+
llm_spec.model_format,
|
|
727
|
+
llm_spec.model_hub,
|
|
728
|
+
llm_spec.model_revision,
|
|
729
|
+
quantization,
|
|
730
|
+
):
|
|
731
|
+
return cache_dir
|
|
732
|
+
|
|
733
|
+
if llm_spec.model_format in ["pytorch", "mindspore"]:
|
|
734
|
+
download_dir = retry_download(
|
|
735
|
+
snapshot_download,
|
|
736
|
+
llm_family.model_name,
|
|
737
|
+
{
|
|
738
|
+
"model_size": llm_spec.model_size_in_billions,
|
|
739
|
+
"model_format": llm_spec.model_format,
|
|
740
|
+
},
|
|
741
|
+
llm_spec.model_id,
|
|
742
|
+
revision=llm_spec.model_revision,
|
|
743
|
+
)
|
|
744
|
+
create_symlink(download_dir, cache_dir)
|
|
745
|
+
|
|
746
|
+
else:
|
|
747
|
+
raise ValueError(f"Unsupported format: {llm_spec.model_format}")
|
|
748
|
+
|
|
749
|
+
meta_path = _get_meta_path(
|
|
750
|
+
cache_dir, llm_spec.model_format, llm_spec.model_hub, quantization
|
|
751
|
+
)
|
|
752
|
+
_generate_meta_file(meta_path, llm_family, llm_spec, quantization)
|
|
753
|
+
|
|
754
|
+
return cache_dir
|
|
755
|
+
|
|
756
|
+
|
|
705
757
|
def cache_from_huggingface(
|
|
706
758
|
llm_family: LLMFamilyV1,
|
|
707
759
|
llm_spec: "LLMSpecV1",
|
|
@@ -893,7 +945,9 @@ def match_llm(
|
|
|
893
945
|
model_format: Optional[str] = None,
|
|
894
946
|
model_size_in_billions: Optional[Union[int, str]] = None,
|
|
895
947
|
quantization: Optional[str] = None,
|
|
896
|
-
download_hub: Optional[
|
|
948
|
+
download_hub: Optional[
|
|
949
|
+
Literal["huggingface", "modelscope", "openmind_hub", "csghub"]
|
|
950
|
+
] = None,
|
|
897
951
|
) -> Optional[Tuple[LLMFamilyV1, LLMSpecV1, str]]:
|
|
898
952
|
"""
|
|
899
953
|
Find an LLM family, spec, and quantization that satisfy given criteria.
|
|
@@ -924,6 +978,12 @@ def match_llm(
|
|
|
924
978
|
+ BUILTIN_LLM_FAMILIES
|
|
925
979
|
+ user_defined_llm_families
|
|
926
980
|
)
|
|
981
|
+
elif download_hub == "openmind_hub":
|
|
982
|
+
all_families = (
|
|
983
|
+
BUILTIN_OPENMIND_HUB_LLM_FAMILIES
|
|
984
|
+
+ BUILTIN_LLM_FAMILIES
|
|
985
|
+
+ user_defined_llm_families
|
|
986
|
+
)
|
|
927
987
|
elif download_hub == "csghub":
|
|
928
988
|
all_families = (
|
|
929
989
|
BUILTIN_CSGHUB_LLM_FAMILIES
|
|
@@ -938,6 +998,12 @@ def match_llm(
|
|
|
938
998
|
+ BUILTIN_LLM_FAMILIES
|
|
939
999
|
+ user_defined_llm_families
|
|
940
1000
|
)
|
|
1001
|
+
elif download_from_openmind_hub():
|
|
1002
|
+
all_families = (
|
|
1003
|
+
BUILTIN_OPENMIND_HUB_LLM_FAMILIES
|
|
1004
|
+
+ BUILTIN_LLM_FAMILIES
|
|
1005
|
+
+ user_defined_llm_families
|
|
1006
|
+
)
|
|
941
1007
|
elif download_from_csghub():
|
|
942
1008
|
all_families = (
|
|
943
1009
|
BUILTIN_CSGHUB_LLM_FAMILIES
|
|
@@ -363,6 +363,97 @@
|
|
|
363
363
|
"<|eom_id|>"
|
|
364
364
|
]
|
|
365
365
|
},
|
|
366
|
+
{
|
|
367
|
+
"version": 1,
|
|
368
|
+
"context_length": 131072,
|
|
369
|
+
"model_name": "llama-3.2-vision-instruct",
|
|
370
|
+
"model_lang": [
|
|
371
|
+
"en",
|
|
372
|
+
"de",
|
|
373
|
+
"fr",
|
|
374
|
+
"it",
|
|
375
|
+
"pt",
|
|
376
|
+
"hi",
|
|
377
|
+
"es",
|
|
378
|
+
"th"
|
|
379
|
+
],
|
|
380
|
+
"model_ability": [
|
|
381
|
+
"chat",
|
|
382
|
+
"vision"
|
|
383
|
+
],
|
|
384
|
+
"model_description": "Llama 3.2-Vision instruction-tuned models are optimized for visual recognition, image reasoning, captioning, and answering general questions about an image...",
|
|
385
|
+
"model_specs": [
|
|
386
|
+
{
|
|
387
|
+
"model_format": "pytorch",
|
|
388
|
+
"model_size_in_billions": 11,
|
|
389
|
+
"quantizations": [
|
|
390
|
+
"none"
|
|
391
|
+
],
|
|
392
|
+
"model_id": "LLM-Research/Llama-3.2-11B-Vision-Instruct",
|
|
393
|
+
"model_hub": "modelscope"
|
|
394
|
+
},
|
|
395
|
+
{
|
|
396
|
+
"model_format": "pytorch",
|
|
397
|
+
"model_size_in_billions": 90,
|
|
398
|
+
"quantizations": [
|
|
399
|
+
"none"
|
|
400
|
+
],
|
|
401
|
+
"model_id": "LLM-Research/Llama-3.2-90B-Vision-Instruct",
|
|
402
|
+
"model_hub": "modelscope"
|
|
403
|
+
}
|
|
404
|
+
],
|
|
405
|
+
"chat_template": "{% for message in messages %}{% if loop.index0 == 0 %}{{ bos_token }}{% endif %}{{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n' }}{% if message['content'] is string %}{{ message['content'] }}{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' %}{{ '<|image|>' }}{% elif content['type'] == 'text' %}{{ content['text'] }}{% endif %}{% endfor %}{% endif %}{{ '<|eot_id|>' }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
|
|
406
|
+
"stop_token_ids": [
|
|
407
|
+
128001,
|
|
408
|
+
128008,
|
|
409
|
+
128009
|
|
410
|
+
],
|
|
411
|
+
"stop": [
|
|
412
|
+
"<|end_of_text|>",
|
|
413
|
+
"<|eot_id|>",
|
|
414
|
+
"<|eom_id|>"
|
|
415
|
+
]
|
|
416
|
+
},
|
|
417
|
+
{
|
|
418
|
+
"version": 1,
|
|
419
|
+
"context_length": 131072,
|
|
420
|
+
"model_name": "llama-3.2-vision",
|
|
421
|
+
"model_lang": [
|
|
422
|
+
"en",
|
|
423
|
+
"de",
|
|
424
|
+
"fr",
|
|
425
|
+
"it",
|
|
426
|
+
"pt",
|
|
427
|
+
"hi",
|
|
428
|
+
"es",
|
|
429
|
+
"th"
|
|
430
|
+
],
|
|
431
|
+
"model_ability": [
|
|
432
|
+
"generate",
|
|
433
|
+
"vision"
|
|
434
|
+
],
|
|
435
|
+
"model_description": "The Llama 3.2-Vision instruction-tuned models are optimized for visual recognition, image reasoning, captioning, and answering general questions about an image...",
|
|
436
|
+
"model_specs": [
|
|
437
|
+
{
|
|
438
|
+
"model_format": "pytorch",
|
|
439
|
+
"model_size_in_billions": 11,
|
|
440
|
+
"quantizations": [
|
|
441
|
+
"none"
|
|
442
|
+
],
|
|
443
|
+
"model_id": "LLM-Research/Llama-3.2-11B-Vision",
|
|
444
|
+
"model_hub": "modelscope"
|
|
445
|
+
},
|
|
446
|
+
{
|
|
447
|
+
"model_format": "pytorch",
|
|
448
|
+
"model_size_in_billions": 90,
|
|
449
|
+
"quantizations": [
|
|
450
|
+
"none"
|
|
451
|
+
],
|
|
452
|
+
"model_id": "LLM-Research/Llama-3.2-90B-Vision",
|
|
453
|
+
"model_hub": "modelscope"
|
|
454
|
+
}
|
|
455
|
+
]
|
|
456
|
+
},
|
|
366
457
|
{
|
|
367
458
|
"version": 1,
|
|
368
459
|
"context_length": 2048,
|