xinference 0.9.4__py3-none-any.whl → 0.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/api/oauth2/auth_service.py +47 -18
- xinference/api/oauth2/types.py +1 -0
- xinference/api/restful_api.py +9 -1
- xinference/client/restful/restful_client.py +12 -2
- xinference/conftest.py +13 -2
- xinference/core/supervisor.py +32 -1
- xinference/core/worker.py +139 -20
- xinference/deploy/cmdline.py +119 -20
- xinference/model/llm/__init__.py +4 -0
- xinference/model/llm/llm_family.json +627 -0
- xinference/model/llm/llm_family_modelscope.json +471 -0
- xinference/model/llm/pytorch/core.py +2 -0
- xinference/model/llm/pytorch/deepseek_vl.py +232 -0
- xinference/model/llm/pytorch/omnilmm.py +153 -0
- xinference/model/llm/utils.py +11 -1
- xinference/model/llm/vllm/core.py +3 -0
- xinference/thirdparty/deepseek_vl/__init__.py +31 -0
- xinference/thirdparty/deepseek_vl/models/__init__.py +28 -0
- xinference/thirdparty/deepseek_vl/models/clip_encoder.py +242 -0
- xinference/thirdparty/deepseek_vl/models/image_processing_vlm.py +208 -0
- xinference/thirdparty/deepseek_vl/models/modeling_vlm.py +170 -0
- xinference/thirdparty/deepseek_vl/models/processing_vlm.py +390 -0
- xinference/thirdparty/deepseek_vl/models/projector.py +100 -0
- xinference/thirdparty/deepseek_vl/models/sam.py +593 -0
- xinference/thirdparty/deepseek_vl/models/siglip_vit.py +681 -0
- xinference/thirdparty/deepseek_vl/utils/__init__.py +18 -0
- xinference/thirdparty/deepseek_vl/utils/conversation.py +348 -0
- xinference/thirdparty/deepseek_vl/utils/io.py +78 -0
- xinference/thirdparty/omnilmm/__init__.py +0 -0
- xinference/thirdparty/omnilmm/chat.py +216 -0
- xinference/thirdparty/omnilmm/constants.py +4 -0
- xinference/thirdparty/omnilmm/conversation.py +332 -0
- xinference/thirdparty/omnilmm/model/__init__.py +1 -0
- xinference/thirdparty/omnilmm/model/omnilmm.py +594 -0
- xinference/thirdparty/omnilmm/model/resampler.py +166 -0
- xinference/thirdparty/omnilmm/model/utils.py +563 -0
- xinference/thirdparty/omnilmm/train/__init__.py +13 -0
- xinference/thirdparty/omnilmm/train/train_utils.py +150 -0
- xinference/thirdparty/omnilmm/utils.py +134 -0
- xinference/web/ui/build/asset-manifest.json +3 -3
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/js/main.98516614.js +3 -0
- xinference/web/ui/build/static/js/main.98516614.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/139969fd25258eb7decc9505f30b779089bba50c402bb5c663008477c7bff73b.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3f357ab57b8e7fade54c667f0e0ebf2787566f72bfdca0fea14e395b5c203753.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/9d7c49815d97539207e5aab2fb967591b5fed7791218a0762539efc9491f36af.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d0d0b591d9adaf42b83ad6633f8b7c118541a4b80ea957c303d3bf9b86fbad0a.json +1 -0
- {xinference-0.9.4.dist-info → xinference-0.10.0.dist-info}/METADATA +18 -5
- {xinference-0.9.4.dist-info → xinference-0.10.0.dist-info}/RECORD +55 -28
- xinference/web/ui/build/static/js/main.66b1c4fb.js +0 -3
- xinference/web/ui/build/static/js/main.66b1c4fb.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/c2124cfe036b26befcbd386d1d17743b1a58d0b7a041a17bb67f9924400d63c3.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/fd4a8ae5d192331af1bedd1d2d70efcc569708ee6cc4cb479b225d059482aa81.json +0 -1
- /xinference/web/ui/build/static/js/{main.66b1c4fb.js.LICENSE.txt → main.98516614.js.LICENSE.txt} +0 -0
- {xinference-0.9.4.dist-info → xinference-0.10.0.dist-info}/LICENSE +0 -0
- {xinference-0.9.4.dist-info → xinference-0.10.0.dist-info}/WHEEL +0 -0
- {xinference-0.9.4.dist-info → xinference-0.10.0.dist-info}/entry_points.txt +0 -0
- {xinference-0.9.4.dist-info → xinference-0.10.0.dist-info}/top_level.txt +0 -0
xinference/deploy/cmdline.py
CHANGED
|
@@ -376,18 +376,27 @@ def worker(
|
|
|
376
376
|
is_flag=True,
|
|
377
377
|
help="Persist the model configuration to the filesystem, retains the model registration after server restarts.",
|
|
378
378
|
)
|
|
379
|
+
@click.option(
|
|
380
|
+
"--api-key",
|
|
381
|
+
"-ak",
|
|
382
|
+
default=None,
|
|
383
|
+
type=str,
|
|
384
|
+
help="Api-Key for access xinference api with authorization.",
|
|
385
|
+
)
|
|
379
386
|
def register_model(
|
|
380
387
|
endpoint: Optional[str],
|
|
381
388
|
model_type: str,
|
|
382
389
|
file: str,
|
|
383
390
|
persist: bool,
|
|
391
|
+
api_key: Optional[str],
|
|
384
392
|
):
|
|
385
393
|
endpoint = get_endpoint(endpoint)
|
|
386
394
|
with open(file) as fd:
|
|
387
395
|
model = fd.read()
|
|
388
396
|
|
|
389
|
-
client = RESTfulClient(base_url=endpoint)
|
|
390
|
-
|
|
397
|
+
client = RESTfulClient(base_url=endpoint, api_key=api_key)
|
|
398
|
+
if api_key is None:
|
|
399
|
+
client._set_token(get_stored_token(endpoint, client))
|
|
391
400
|
client.register_model(
|
|
392
401
|
model_type=model_type,
|
|
393
402
|
model=model,
|
|
@@ -408,15 +417,24 @@ def register_model(
|
|
|
408
417
|
help="Type of model to unregister (default is 'LLM').",
|
|
409
418
|
)
|
|
410
419
|
@click.option("--model-name", "-n", type=str, help="Name of the model to unregister.")
|
|
420
|
+
@click.option(
|
|
421
|
+
"--api-key",
|
|
422
|
+
"-ak",
|
|
423
|
+
default=None,
|
|
424
|
+
type=str,
|
|
425
|
+
help="Api-Key for access xinference api with authorization.",
|
|
426
|
+
)
|
|
411
427
|
def unregister_model(
|
|
412
428
|
endpoint: Optional[str],
|
|
413
429
|
model_type: str,
|
|
414
430
|
model_name: str,
|
|
431
|
+
api_key: Optional[str],
|
|
415
432
|
):
|
|
416
433
|
endpoint = get_endpoint(endpoint)
|
|
417
434
|
|
|
418
|
-
client = RESTfulClient(base_url=endpoint)
|
|
419
|
-
|
|
435
|
+
client = RESTfulClient(base_url=endpoint, api_key=api_key)
|
|
436
|
+
if api_key is None:
|
|
437
|
+
client._set_token(get_stored_token(endpoint, client))
|
|
420
438
|
client.unregister_model(
|
|
421
439
|
model_type=model_type,
|
|
422
440
|
model_name=model_name,
|
|
@@ -437,15 +455,24 @@ def unregister_model(
|
|
|
437
455
|
type=str,
|
|
438
456
|
help="Filter by model type (default is 'LLM').",
|
|
439
457
|
)
|
|
458
|
+
@click.option(
|
|
459
|
+
"--api-key",
|
|
460
|
+
"-ak",
|
|
461
|
+
default=None,
|
|
462
|
+
type=str,
|
|
463
|
+
help="Api-Key for access xinference api with authorization.",
|
|
464
|
+
)
|
|
440
465
|
def list_model_registrations(
|
|
441
466
|
endpoint: Optional[str],
|
|
442
467
|
model_type: str,
|
|
468
|
+
api_key: Optional[str],
|
|
443
469
|
):
|
|
444
470
|
from tabulate import tabulate
|
|
445
471
|
|
|
446
472
|
endpoint = get_endpoint(endpoint)
|
|
447
|
-
client = RESTfulClient(base_url=endpoint)
|
|
448
|
-
|
|
473
|
+
client = RESTfulClient(base_url=endpoint, api_key=api_key)
|
|
474
|
+
if api_key is None:
|
|
475
|
+
client._set_token(get_stored_token(endpoint, client))
|
|
449
476
|
|
|
450
477
|
registrations = client.list_model_registrations(model_type=model_type)
|
|
451
478
|
|
|
@@ -632,12 +659,31 @@ def list_model_registrations(
|
|
|
632
659
|
type=(str, str),
|
|
633
660
|
multiple=True,
|
|
634
661
|
)
|
|
662
|
+
@click.option(
|
|
663
|
+
"--worker-ip",
|
|
664
|
+
default=None,
|
|
665
|
+
type=str,
|
|
666
|
+
help="Specify which worker this model runs on by ip, for distributed situation.",
|
|
667
|
+
)
|
|
668
|
+
@click.option(
|
|
669
|
+
"--gpu-idx",
|
|
670
|
+
default=None,
|
|
671
|
+
type=str,
|
|
672
|
+
help="Specify which GPUs of a worker this model can run on, separated with commas.",
|
|
673
|
+
)
|
|
635
674
|
@click.option(
|
|
636
675
|
"--trust-remote-code",
|
|
637
676
|
default=True,
|
|
638
677
|
type=bool,
|
|
639
678
|
help="Whether or not to allow for custom models defined on the Hub in their own modeling files.",
|
|
640
679
|
)
|
|
680
|
+
@click.option(
|
|
681
|
+
"--api-key",
|
|
682
|
+
"-ak",
|
|
683
|
+
default=None,
|
|
684
|
+
type=str,
|
|
685
|
+
help="Api-Key for access xinference api with authorization.",
|
|
686
|
+
)
|
|
641
687
|
@click.pass_context
|
|
642
688
|
def model_launch(
|
|
643
689
|
ctx,
|
|
@@ -653,7 +699,10 @@ def model_launch(
|
|
|
653
699
|
peft_model_path: Optional[str],
|
|
654
700
|
image_lora_load_kwargs: Optional[Tuple],
|
|
655
701
|
image_lora_fuse_kwargs: Optional[Tuple],
|
|
702
|
+
worker_ip: Optional[str],
|
|
703
|
+
gpu_idx: Optional[str],
|
|
656
704
|
trust_remote_code: bool,
|
|
705
|
+
api_key: Optional[str],
|
|
657
706
|
):
|
|
658
707
|
kwargs = {}
|
|
659
708
|
for i in range(0, len(ctx.args), 2):
|
|
@@ -680,14 +729,19 @@ def model_launch(
|
|
|
680
729
|
else None
|
|
681
730
|
)
|
|
682
731
|
|
|
732
|
+
_gpu_idx: Optional[List[int]] = (
|
|
733
|
+
None if gpu_idx is None else [int(idx) for idx in gpu_idx.split(",")]
|
|
734
|
+
)
|
|
735
|
+
|
|
683
736
|
endpoint = get_endpoint(endpoint)
|
|
684
737
|
model_size: Optional[Union[str, int]] = (
|
|
685
738
|
size_in_billions
|
|
686
739
|
if size_in_billions is None or "_" in size_in_billions
|
|
687
740
|
else int(size_in_billions)
|
|
688
741
|
)
|
|
689
|
-
client = RESTfulClient(base_url=endpoint)
|
|
690
|
-
|
|
742
|
+
client = RESTfulClient(base_url=endpoint, api_key=api_key)
|
|
743
|
+
if api_key is None:
|
|
744
|
+
client._set_token(get_stored_token(endpoint, client))
|
|
691
745
|
|
|
692
746
|
model_uid = client.launch_model(
|
|
693
747
|
model_name=model_name,
|
|
@@ -701,6 +755,8 @@ def model_launch(
|
|
|
701
755
|
peft_model_path=peft_model_path,
|
|
702
756
|
image_lora_load_kwargs=image_lora_load_params,
|
|
703
757
|
image_lora_fuse_kwargs=image_lora_fuse_params,
|
|
758
|
+
worker_ip=worker_ip,
|
|
759
|
+
gpu_idx=_gpu_idx,
|
|
704
760
|
trust_remote_code=trust_remote_code,
|
|
705
761
|
**kwargs,
|
|
706
762
|
)
|
|
@@ -718,12 +774,20 @@ def model_launch(
|
|
|
718
774
|
type=str,
|
|
719
775
|
help="Xinference endpoint.",
|
|
720
776
|
)
|
|
721
|
-
|
|
777
|
+
@click.option(
|
|
778
|
+
"--api-key",
|
|
779
|
+
"-ak",
|
|
780
|
+
default=None,
|
|
781
|
+
type=str,
|
|
782
|
+
help="Api-Key for access xinference api with authorization.",
|
|
783
|
+
)
|
|
784
|
+
def model_list(endpoint: Optional[str], api_key: Optional[str]):
|
|
722
785
|
from tabulate import tabulate
|
|
723
786
|
|
|
724
787
|
endpoint = get_endpoint(endpoint)
|
|
725
|
-
client = RESTfulClient(base_url=endpoint)
|
|
726
|
-
|
|
788
|
+
client = RESTfulClient(base_url=endpoint, api_key=api_key)
|
|
789
|
+
if api_key is None:
|
|
790
|
+
client._set_token(get_stored_token(endpoint, client))
|
|
727
791
|
|
|
728
792
|
llm_table = []
|
|
729
793
|
embedding_table = []
|
|
@@ -844,13 +908,22 @@ def model_list(endpoint: Optional[str]):
|
|
|
844
908
|
required=True,
|
|
845
909
|
help="The unique identifier (UID) of the model.",
|
|
846
910
|
)
|
|
911
|
+
@click.option(
|
|
912
|
+
"--api-key",
|
|
913
|
+
"-ak",
|
|
914
|
+
default=None,
|
|
915
|
+
type=str,
|
|
916
|
+
help="Api-Key for access xinference api with authorization.",
|
|
917
|
+
)
|
|
847
918
|
def model_terminate(
|
|
848
919
|
endpoint: Optional[str],
|
|
849
920
|
model_uid: str,
|
|
921
|
+
api_key: Optional[str],
|
|
850
922
|
):
|
|
851
923
|
endpoint = get_endpoint(endpoint)
|
|
852
|
-
client = RESTfulClient(base_url=endpoint)
|
|
853
|
-
|
|
924
|
+
client = RESTfulClient(base_url=endpoint, api_key=api_key)
|
|
925
|
+
if api_key is None:
|
|
926
|
+
client._set_token(get_stored_token(endpoint, client))
|
|
854
927
|
client.terminate_model(model_uid=model_uid)
|
|
855
928
|
|
|
856
929
|
|
|
@@ -873,15 +946,24 @@ def model_terminate(
|
|
|
873
946
|
type=bool,
|
|
874
947
|
help="Whether to stream the generated text. Use 'True' for streaming (default is True).",
|
|
875
948
|
)
|
|
949
|
+
@click.option(
|
|
950
|
+
"--api-key",
|
|
951
|
+
"-ak",
|
|
952
|
+
default=None,
|
|
953
|
+
type=str,
|
|
954
|
+
help="Api-Key for access xinference api with authorization.",
|
|
955
|
+
)
|
|
876
956
|
def model_generate(
|
|
877
957
|
endpoint: Optional[str],
|
|
878
958
|
model_uid: str,
|
|
879
959
|
max_tokens: int,
|
|
880
960
|
stream: bool,
|
|
961
|
+
api_key: Optional[str],
|
|
881
962
|
):
|
|
882
963
|
endpoint = get_endpoint(endpoint)
|
|
883
|
-
client = RESTfulClient(base_url=endpoint)
|
|
884
|
-
|
|
964
|
+
client = RESTfulClient(base_url=endpoint, api_key=api_key)
|
|
965
|
+
if api_key is None:
|
|
966
|
+
client._set_token(get_stored_token(endpoint, client))
|
|
885
967
|
if stream:
|
|
886
968
|
# TODO: when stream=True, RestfulClient cannot generate words one by one.
|
|
887
969
|
# So use Client in temporary. The implementation needs to be changed to
|
|
@@ -959,16 +1041,25 @@ def model_generate(
|
|
|
959
1041
|
type=bool,
|
|
960
1042
|
help="Whether to stream the chat messages. Use 'True' for streaming (default is True).",
|
|
961
1043
|
)
|
|
1044
|
+
@click.option(
|
|
1045
|
+
"--api-key",
|
|
1046
|
+
"-ak",
|
|
1047
|
+
default=None,
|
|
1048
|
+
type=str,
|
|
1049
|
+
help="Api-Key for access xinference api with authorization.",
|
|
1050
|
+
)
|
|
962
1051
|
def model_chat(
|
|
963
1052
|
endpoint: Optional[str],
|
|
964
1053
|
model_uid: str,
|
|
965
1054
|
max_tokens: int,
|
|
966
1055
|
stream: bool,
|
|
1056
|
+
api_key: Optional[str],
|
|
967
1057
|
):
|
|
968
1058
|
# TODO: chat model roles may not be user and assistant.
|
|
969
1059
|
endpoint = get_endpoint(endpoint)
|
|
970
|
-
client = RESTfulClient(base_url=endpoint)
|
|
971
|
-
|
|
1060
|
+
client = RESTfulClient(base_url=endpoint, api_key=api_key)
|
|
1061
|
+
if api_key is None:
|
|
1062
|
+
client._set_token(get_stored_token(endpoint, client))
|
|
972
1063
|
|
|
973
1064
|
chat_history: "List[ChatCompletionMessage]" = []
|
|
974
1065
|
if stream:
|
|
@@ -1048,10 +1139,18 @@ def model_chat(
|
|
|
1048
1139
|
|
|
1049
1140
|
@cli.command("vllm-models", help="Query and display models compatible with vLLM.")
|
|
1050
1141
|
@click.option("--endpoint", "-e", type=str, help="Xinference endpoint.")
|
|
1051
|
-
|
|
1142
|
+
@click.option(
|
|
1143
|
+
"--api-key",
|
|
1144
|
+
"-ak",
|
|
1145
|
+
default=None,
|
|
1146
|
+
type=str,
|
|
1147
|
+
help="Api-Key for access xinference api with authorization.",
|
|
1148
|
+
)
|
|
1149
|
+
def vllm_models(endpoint: Optional[str], api_key: Optional[str]):
|
|
1052
1150
|
endpoint = get_endpoint(endpoint)
|
|
1053
|
-
client = RESTfulClient(base_url=endpoint)
|
|
1054
|
-
|
|
1151
|
+
client = RESTfulClient(base_url=endpoint, api_key=api_key)
|
|
1152
|
+
if api_key is None:
|
|
1153
|
+
client._set_token(get_stored_token(endpoint, client))
|
|
1055
1154
|
vllm_models_dict = client.vllm_models()
|
|
1056
1155
|
print("VLLM supported model families:")
|
|
1057
1156
|
chat_models = vllm_models_dict["chat"]
|
xinference/model/llm/__init__.py
CHANGED
|
@@ -54,9 +54,11 @@ def _install():
|
|
|
54
54
|
from .pytorch.baichuan import BaichuanPytorchChatModel
|
|
55
55
|
from .pytorch.chatglm import ChatglmPytorchChatModel
|
|
56
56
|
from .pytorch.core import PytorchChatModel, PytorchModel
|
|
57
|
+
from .pytorch.deepseek_vl import DeepSeekVLChatModel
|
|
57
58
|
from .pytorch.falcon import FalconPytorchChatModel, FalconPytorchModel
|
|
58
59
|
from .pytorch.internlm2 import Internlm2PytorchChatModel
|
|
59
60
|
from .pytorch.llama_2 import LlamaPytorchChatModel, LlamaPytorchModel
|
|
61
|
+
from .pytorch.omnilmm import OmniLMMModel
|
|
60
62
|
from .pytorch.qwen_vl import QwenVLChatModel
|
|
61
63
|
from .pytorch.vicuna import VicunaPytorchChatModel
|
|
62
64
|
from .pytorch.yi_vl import YiVLChatModel
|
|
@@ -94,7 +96,9 @@ def _install():
|
|
|
94
96
|
FalconPytorchModel,
|
|
95
97
|
Internlm2PytorchChatModel,
|
|
96
98
|
QwenVLChatModel,
|
|
99
|
+
OmniLMMModel,
|
|
97
100
|
YiVLChatModel,
|
|
101
|
+
DeepSeekVLChatModel,
|
|
98
102
|
PytorchModel,
|
|
99
103
|
]
|
|
100
104
|
)
|