xinference 0.9.4__py3-none-any.whl → 0.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (59) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/oauth2/auth_service.py +47 -18
  3. xinference/api/oauth2/types.py +1 -0
  4. xinference/api/restful_api.py +9 -1
  5. xinference/client/restful/restful_client.py +12 -2
  6. xinference/conftest.py +13 -2
  7. xinference/core/supervisor.py +32 -1
  8. xinference/core/worker.py +139 -20
  9. xinference/deploy/cmdline.py +119 -20
  10. xinference/model/llm/__init__.py +4 -0
  11. xinference/model/llm/llm_family.json +627 -0
  12. xinference/model/llm/llm_family_modelscope.json +471 -0
  13. xinference/model/llm/pytorch/core.py +2 -0
  14. xinference/model/llm/pytorch/deepseek_vl.py +232 -0
  15. xinference/model/llm/pytorch/omnilmm.py +153 -0
  16. xinference/model/llm/utils.py +11 -1
  17. xinference/model/llm/vllm/core.py +3 -0
  18. xinference/thirdparty/deepseek_vl/__init__.py +31 -0
  19. xinference/thirdparty/deepseek_vl/models/__init__.py +28 -0
  20. xinference/thirdparty/deepseek_vl/models/clip_encoder.py +242 -0
  21. xinference/thirdparty/deepseek_vl/models/image_processing_vlm.py +208 -0
  22. xinference/thirdparty/deepseek_vl/models/modeling_vlm.py +170 -0
  23. xinference/thirdparty/deepseek_vl/models/processing_vlm.py +390 -0
  24. xinference/thirdparty/deepseek_vl/models/projector.py +100 -0
  25. xinference/thirdparty/deepseek_vl/models/sam.py +593 -0
  26. xinference/thirdparty/deepseek_vl/models/siglip_vit.py +681 -0
  27. xinference/thirdparty/deepseek_vl/utils/__init__.py +18 -0
  28. xinference/thirdparty/deepseek_vl/utils/conversation.py +348 -0
  29. xinference/thirdparty/deepseek_vl/utils/io.py +78 -0
  30. xinference/thirdparty/omnilmm/__init__.py +0 -0
  31. xinference/thirdparty/omnilmm/chat.py +216 -0
  32. xinference/thirdparty/omnilmm/constants.py +4 -0
  33. xinference/thirdparty/omnilmm/conversation.py +332 -0
  34. xinference/thirdparty/omnilmm/model/__init__.py +1 -0
  35. xinference/thirdparty/omnilmm/model/omnilmm.py +594 -0
  36. xinference/thirdparty/omnilmm/model/resampler.py +166 -0
  37. xinference/thirdparty/omnilmm/model/utils.py +563 -0
  38. xinference/thirdparty/omnilmm/train/__init__.py +13 -0
  39. xinference/thirdparty/omnilmm/train/train_utils.py +150 -0
  40. xinference/thirdparty/omnilmm/utils.py +134 -0
  41. xinference/web/ui/build/asset-manifest.json +3 -3
  42. xinference/web/ui/build/index.html +1 -1
  43. xinference/web/ui/build/static/js/main.98516614.js +3 -0
  44. xinference/web/ui/build/static/js/main.98516614.js.map +1 -0
  45. xinference/web/ui/node_modules/.cache/babel-loader/139969fd25258eb7decc9505f30b779089bba50c402bb5c663008477c7bff73b.json +1 -0
  46. xinference/web/ui/node_modules/.cache/babel-loader/3f357ab57b8e7fade54c667f0e0ebf2787566f72bfdca0fea14e395b5c203753.json +1 -0
  47. xinference/web/ui/node_modules/.cache/babel-loader/9d7c49815d97539207e5aab2fb967591b5fed7791218a0762539efc9491f36af.json +1 -0
  48. xinference/web/ui/node_modules/.cache/babel-loader/d0d0b591d9adaf42b83ad6633f8b7c118541a4b80ea957c303d3bf9b86fbad0a.json +1 -0
  49. {xinference-0.9.4.dist-info → xinference-0.10.0.dist-info}/METADATA +18 -5
  50. {xinference-0.9.4.dist-info → xinference-0.10.0.dist-info}/RECORD +55 -28
  51. xinference/web/ui/build/static/js/main.66b1c4fb.js +0 -3
  52. xinference/web/ui/build/static/js/main.66b1c4fb.js.map +0 -1
  53. xinference/web/ui/node_modules/.cache/babel-loader/c2124cfe036b26befcbd386d1d17743b1a58d0b7a041a17bb67f9924400d63c3.json +0 -1
  54. xinference/web/ui/node_modules/.cache/babel-loader/fd4a8ae5d192331af1bedd1d2d70efcc569708ee6cc4cb479b225d059482aa81.json +0 -1
  55. /xinference/web/ui/build/static/js/{main.66b1c4fb.js.LICENSE.txt → main.98516614.js.LICENSE.txt} +0 -0
  56. {xinference-0.9.4.dist-info → xinference-0.10.0.dist-info}/LICENSE +0 -0
  57. {xinference-0.9.4.dist-info → xinference-0.10.0.dist-info}/WHEEL +0 -0
  58. {xinference-0.9.4.dist-info → xinference-0.10.0.dist-info}/entry_points.txt +0 -0
  59. {xinference-0.9.4.dist-info → xinference-0.10.0.dist-info}/top_level.txt +0 -0
@@ -376,18 +376,27 @@ def worker(
376
376
  is_flag=True,
377
377
  help="Persist the model configuration to the filesystem, retains the model registration after server restarts.",
378
378
  )
379
+ @click.option(
380
+ "--api-key",
381
+ "-ak",
382
+ default=None,
383
+ type=str,
384
+ help="Api-Key for access xinference api with authorization.",
385
+ )
379
386
  def register_model(
380
387
  endpoint: Optional[str],
381
388
  model_type: str,
382
389
  file: str,
383
390
  persist: bool,
391
+ api_key: Optional[str],
384
392
  ):
385
393
  endpoint = get_endpoint(endpoint)
386
394
  with open(file) as fd:
387
395
  model = fd.read()
388
396
 
389
- client = RESTfulClient(base_url=endpoint)
390
- client._set_token(get_stored_token(endpoint, client))
397
+ client = RESTfulClient(base_url=endpoint, api_key=api_key)
398
+ if api_key is None:
399
+ client._set_token(get_stored_token(endpoint, client))
391
400
  client.register_model(
392
401
  model_type=model_type,
393
402
  model=model,
@@ -408,15 +417,24 @@ def register_model(
408
417
  help="Type of model to unregister (default is 'LLM').",
409
418
  )
410
419
  @click.option("--model-name", "-n", type=str, help="Name of the model to unregister.")
420
+ @click.option(
421
+ "--api-key",
422
+ "-ak",
423
+ default=None,
424
+ type=str,
425
+ help="Api-Key for access xinference api with authorization.",
426
+ )
411
427
  def unregister_model(
412
428
  endpoint: Optional[str],
413
429
  model_type: str,
414
430
  model_name: str,
431
+ api_key: Optional[str],
415
432
  ):
416
433
  endpoint = get_endpoint(endpoint)
417
434
 
418
- client = RESTfulClient(base_url=endpoint)
419
- client._set_token(get_stored_token(endpoint, client))
435
+ client = RESTfulClient(base_url=endpoint, api_key=api_key)
436
+ if api_key is None:
437
+ client._set_token(get_stored_token(endpoint, client))
420
438
  client.unregister_model(
421
439
  model_type=model_type,
422
440
  model_name=model_name,
@@ -437,15 +455,24 @@ def unregister_model(
437
455
  type=str,
438
456
  help="Filter by model type (default is 'LLM').",
439
457
  )
458
+ @click.option(
459
+ "--api-key",
460
+ "-ak",
461
+ default=None,
462
+ type=str,
463
+ help="Api-Key for access xinference api with authorization.",
464
+ )
440
465
  def list_model_registrations(
441
466
  endpoint: Optional[str],
442
467
  model_type: str,
468
+ api_key: Optional[str],
443
469
  ):
444
470
  from tabulate import tabulate
445
471
 
446
472
  endpoint = get_endpoint(endpoint)
447
- client = RESTfulClient(base_url=endpoint)
448
- client._set_token(get_stored_token(endpoint, client))
473
+ client = RESTfulClient(base_url=endpoint, api_key=api_key)
474
+ if api_key is None:
475
+ client._set_token(get_stored_token(endpoint, client))
449
476
 
450
477
  registrations = client.list_model_registrations(model_type=model_type)
451
478
 
@@ -632,12 +659,31 @@ def list_model_registrations(
632
659
  type=(str, str),
633
660
  multiple=True,
634
661
  )
662
+ @click.option(
663
+ "--worker-ip",
664
+ default=None,
665
+ type=str,
666
+ help="Specify which worker this model runs on by ip, for distributed situation.",
667
+ )
668
+ @click.option(
669
+ "--gpu-idx",
670
+ default=None,
671
+ type=str,
672
+ help="Specify which GPUs of a worker this model can run on, separated with commas.",
673
+ )
635
674
  @click.option(
636
675
  "--trust-remote-code",
637
676
  default=True,
638
677
  type=bool,
639
678
  help="Whether or not to allow for custom models defined on the Hub in their own modeling files.",
640
679
  )
680
+ @click.option(
681
+ "--api-key",
682
+ "-ak",
683
+ default=None,
684
+ type=str,
685
+ help="Api-Key for access xinference api with authorization.",
686
+ )
641
687
  @click.pass_context
642
688
  def model_launch(
643
689
  ctx,
@@ -653,7 +699,10 @@ def model_launch(
653
699
  peft_model_path: Optional[str],
654
700
  image_lora_load_kwargs: Optional[Tuple],
655
701
  image_lora_fuse_kwargs: Optional[Tuple],
702
+ worker_ip: Optional[str],
703
+ gpu_idx: Optional[str],
656
704
  trust_remote_code: bool,
705
+ api_key: Optional[str],
657
706
  ):
658
707
  kwargs = {}
659
708
  for i in range(0, len(ctx.args), 2):
@@ -680,14 +729,19 @@ def model_launch(
680
729
  else None
681
730
  )
682
731
 
732
+ _gpu_idx: Optional[List[int]] = (
733
+ None if gpu_idx is None else [int(idx) for idx in gpu_idx.split(",")]
734
+ )
735
+
683
736
  endpoint = get_endpoint(endpoint)
684
737
  model_size: Optional[Union[str, int]] = (
685
738
  size_in_billions
686
739
  if size_in_billions is None or "_" in size_in_billions
687
740
  else int(size_in_billions)
688
741
  )
689
- client = RESTfulClient(base_url=endpoint)
690
- client._set_token(get_stored_token(endpoint, client))
742
+ client = RESTfulClient(base_url=endpoint, api_key=api_key)
743
+ if api_key is None:
744
+ client._set_token(get_stored_token(endpoint, client))
691
745
 
692
746
  model_uid = client.launch_model(
693
747
  model_name=model_name,
@@ -701,6 +755,8 @@ def model_launch(
701
755
  peft_model_path=peft_model_path,
702
756
  image_lora_load_kwargs=image_lora_load_params,
703
757
  image_lora_fuse_kwargs=image_lora_fuse_params,
758
+ worker_ip=worker_ip,
759
+ gpu_idx=_gpu_idx,
704
760
  trust_remote_code=trust_remote_code,
705
761
  **kwargs,
706
762
  )
@@ -718,12 +774,20 @@ def model_launch(
718
774
  type=str,
719
775
  help="Xinference endpoint.",
720
776
  )
721
- def model_list(endpoint: Optional[str]):
777
+ @click.option(
778
+ "--api-key",
779
+ "-ak",
780
+ default=None,
781
+ type=str,
782
+ help="Api-Key for access xinference api with authorization.",
783
+ )
784
+ def model_list(endpoint: Optional[str], api_key: Optional[str]):
722
785
  from tabulate import tabulate
723
786
 
724
787
  endpoint = get_endpoint(endpoint)
725
- client = RESTfulClient(base_url=endpoint)
726
- client._set_token(get_stored_token(endpoint, client))
788
+ client = RESTfulClient(base_url=endpoint, api_key=api_key)
789
+ if api_key is None:
790
+ client._set_token(get_stored_token(endpoint, client))
727
791
 
728
792
  llm_table = []
729
793
  embedding_table = []
@@ -844,13 +908,22 @@ def model_list(endpoint: Optional[str]):
844
908
  required=True,
845
909
  help="The unique identifier (UID) of the model.",
846
910
  )
911
+ @click.option(
912
+ "--api-key",
913
+ "-ak",
914
+ default=None,
915
+ type=str,
916
+ help="Api-Key for access xinference api with authorization.",
917
+ )
847
918
  def model_terminate(
848
919
  endpoint: Optional[str],
849
920
  model_uid: str,
921
+ api_key: Optional[str],
850
922
  ):
851
923
  endpoint = get_endpoint(endpoint)
852
- client = RESTfulClient(base_url=endpoint)
853
- client._set_token(get_stored_token(endpoint, client))
924
+ client = RESTfulClient(base_url=endpoint, api_key=api_key)
925
+ if api_key is None:
926
+ client._set_token(get_stored_token(endpoint, client))
854
927
  client.terminate_model(model_uid=model_uid)
855
928
 
856
929
 
@@ -873,15 +946,24 @@ def model_terminate(
873
946
  type=bool,
874
947
  help="Whether to stream the generated text. Use 'True' for streaming (default is True).",
875
948
  )
949
+ @click.option(
950
+ "--api-key",
951
+ "-ak",
952
+ default=None,
953
+ type=str,
954
+ help="Api-Key for access xinference api with authorization.",
955
+ )
876
956
  def model_generate(
877
957
  endpoint: Optional[str],
878
958
  model_uid: str,
879
959
  max_tokens: int,
880
960
  stream: bool,
961
+ api_key: Optional[str],
881
962
  ):
882
963
  endpoint = get_endpoint(endpoint)
883
- client = RESTfulClient(base_url=endpoint)
884
- client._set_token(get_stored_token(endpoint, client))
964
+ client = RESTfulClient(base_url=endpoint, api_key=api_key)
965
+ if api_key is None:
966
+ client._set_token(get_stored_token(endpoint, client))
885
967
  if stream:
886
968
  # TODO: when stream=True, RestfulClient cannot generate words one by one.
887
969
  # So use Client in temporary. The implementation needs to be changed to
@@ -959,16 +1041,25 @@ def model_generate(
959
1041
  type=bool,
960
1042
  help="Whether to stream the chat messages. Use 'True' for streaming (default is True).",
961
1043
  )
1044
+ @click.option(
1045
+ "--api-key",
1046
+ "-ak",
1047
+ default=None,
1048
+ type=str,
1049
+ help="Api-Key for access xinference api with authorization.",
1050
+ )
962
1051
  def model_chat(
963
1052
  endpoint: Optional[str],
964
1053
  model_uid: str,
965
1054
  max_tokens: int,
966
1055
  stream: bool,
1056
+ api_key: Optional[str],
967
1057
  ):
968
1058
  # TODO: chat model roles may not be user and assistant.
969
1059
  endpoint = get_endpoint(endpoint)
970
- client = RESTfulClient(base_url=endpoint)
971
- client._set_token(get_stored_token(endpoint, client))
1060
+ client = RESTfulClient(base_url=endpoint, api_key=api_key)
1061
+ if api_key is None:
1062
+ client._set_token(get_stored_token(endpoint, client))
972
1063
 
973
1064
  chat_history: "List[ChatCompletionMessage]" = []
974
1065
  if stream:
@@ -1048,10 +1139,18 @@ def model_chat(
1048
1139
 
1049
1140
  @cli.command("vllm-models", help="Query and display models compatible with vLLM.")
1050
1141
  @click.option("--endpoint", "-e", type=str, help="Xinference endpoint.")
1051
- def vllm_models(endpoint: Optional[str]):
1142
+ @click.option(
1143
+ "--api-key",
1144
+ "-ak",
1145
+ default=None,
1146
+ type=str,
1147
+ help="Api-Key for access xinference api with authorization.",
1148
+ )
1149
+ def vllm_models(endpoint: Optional[str], api_key: Optional[str]):
1052
1150
  endpoint = get_endpoint(endpoint)
1053
- client = RESTfulClient(base_url=endpoint)
1054
- client._set_token(get_stored_token(endpoint, client))
1151
+ client = RESTfulClient(base_url=endpoint, api_key=api_key)
1152
+ if api_key is None:
1153
+ client._set_token(get_stored_token(endpoint, client))
1055
1154
  vllm_models_dict = client.vllm_models()
1056
1155
  print("VLLM supported model families:")
1057
1156
  chat_models = vllm_models_dict["chat"]
@@ -54,9 +54,11 @@ def _install():
54
54
  from .pytorch.baichuan import BaichuanPytorchChatModel
55
55
  from .pytorch.chatglm import ChatglmPytorchChatModel
56
56
  from .pytorch.core import PytorchChatModel, PytorchModel
57
+ from .pytorch.deepseek_vl import DeepSeekVLChatModel
57
58
  from .pytorch.falcon import FalconPytorchChatModel, FalconPytorchModel
58
59
  from .pytorch.internlm2 import Internlm2PytorchChatModel
59
60
  from .pytorch.llama_2 import LlamaPytorchChatModel, LlamaPytorchModel
61
+ from .pytorch.omnilmm import OmniLMMModel
60
62
  from .pytorch.qwen_vl import QwenVLChatModel
61
63
  from .pytorch.vicuna import VicunaPytorchChatModel
62
64
  from .pytorch.yi_vl import YiVLChatModel
@@ -94,7 +96,9 @@ def _install():
94
96
  FalconPytorchModel,
95
97
  Internlm2PytorchChatModel,
96
98
  QwenVLChatModel,
99
+ OmniLMMModel,
97
100
  YiVLChatModel,
101
+ DeepSeekVLChatModel,
98
102
  PytorchModel,
99
103
  ]
100
104
  )