xinference 1.6.0.post1__py3-none-any.whl → 1.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (124) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/restful_api.py +79 -2
  3. xinference/client/restful/restful_client.py +65 -3
  4. xinference/conftest.py +0 -7
  5. xinference/core/media_interface.py +132 -8
  6. xinference/core/model.py +44 -6
  7. xinference/core/scheduler.py +1 -10
  8. xinference/core/supervisor.py +8 -17
  9. xinference/core/worker.py +5 -27
  10. xinference/deploy/cmdline.py +6 -2
  11. xinference/model/audio/chattts.py +24 -39
  12. xinference/model/audio/cosyvoice.py +18 -30
  13. xinference/model/audio/funasr.py +42 -0
  14. xinference/model/audio/model_spec.json +71 -1
  15. xinference/model/audio/model_spec_modelscope.json +76 -2
  16. xinference/model/audio/utils.py +75 -0
  17. xinference/model/core.py +1 -0
  18. xinference/model/embedding/__init__.py +74 -18
  19. xinference/model/embedding/core.py +98 -589
  20. xinference/model/embedding/embed_family.py +133 -0
  21. xinference/{thirdparty/omnilmm/train → model/embedding/flag}/__init__.py +1 -1
  22. xinference/model/embedding/flag/core.py +282 -0
  23. xinference/model/embedding/model_spec.json +24 -0
  24. xinference/model/embedding/model_spec_modelscope.json +24 -0
  25. xinference/model/embedding/sentence_transformers/__init__.py +13 -0
  26. xinference/model/embedding/sentence_transformers/core.py +399 -0
  27. xinference/model/embedding/vllm/core.py +95 -0
  28. xinference/model/image/model_spec.json +30 -3
  29. xinference/model/image/model_spec_modelscope.json +41 -2
  30. xinference/model/image/stable_diffusion/core.py +144 -53
  31. xinference/model/llm/__init__.py +6 -54
  32. xinference/model/llm/core.py +19 -5
  33. xinference/model/llm/llama_cpp/core.py +59 -3
  34. xinference/model/llm/llama_cpp/memory.py +457 -0
  35. xinference/model/llm/llm_family.json +247 -402
  36. xinference/model/llm/llm_family.py +88 -16
  37. xinference/model/llm/llm_family_modelscope.json +260 -421
  38. xinference/model/llm/llm_family_openmind_hub.json +0 -34
  39. xinference/model/llm/sglang/core.py +8 -0
  40. xinference/model/llm/transformers/__init__.py +27 -6
  41. xinference/model/llm/transformers/chatglm.py +4 -2
  42. xinference/model/llm/transformers/core.py +49 -28
  43. xinference/model/llm/transformers/deepseek_v2.py +6 -49
  44. xinference/model/llm/transformers/gemma3.py +119 -164
  45. xinference/model/llm/transformers/multimodal/__init__.py +13 -0
  46. xinference/model/llm/transformers/{cogagent.py → multimodal/cogagent.py} +58 -95
  47. xinference/model/llm/transformers/multimodal/core.py +205 -0
  48. xinference/model/llm/transformers/{deepseek_vl2.py → multimodal/deepseek_vl2.py} +59 -120
  49. xinference/model/llm/transformers/multimodal/gemma3.py +117 -0
  50. xinference/model/llm/transformers/{glm4v.py → multimodal/glm4v.py} +57 -93
  51. xinference/model/llm/transformers/multimodal/intern_vl.py +412 -0
  52. xinference/model/llm/transformers/{minicpmv26.py → multimodal/minicpmv26.py} +55 -102
  53. xinference/model/llm/transformers/{ovis2.py → multimodal/ovis2.py} +114 -175
  54. xinference/model/llm/transformers/{qwen-omni.py → multimodal/qwen-omni.py} +82 -167
  55. xinference/model/llm/transformers/multimodal/qwen2_audio.py +131 -0
  56. xinference/model/llm/transformers/{qwen2_vl.py → multimodal/qwen2_vl.py} +224 -256
  57. xinference/model/llm/transformers/opt.py +4 -2
  58. xinference/model/llm/transformers/utils.py +6 -37
  59. xinference/model/llm/utils.py +11 -0
  60. xinference/model/llm/vllm/core.py +7 -0
  61. xinference/model/rerank/core.py +91 -3
  62. xinference/model/rerank/model_spec.json +24 -0
  63. xinference/model/rerank/model_spec_modelscope.json +24 -0
  64. xinference/model/rerank/utils.py +20 -2
  65. xinference/model/utils.py +38 -1
  66. xinference/model/video/diffusers.py +65 -3
  67. xinference/model/video/model_spec.json +31 -4
  68. xinference/model/video/model_spec_modelscope.json +32 -4
  69. xinference/web/ui/build/asset-manifest.json +6 -6
  70. xinference/web/ui/build/index.html +1 -1
  71. xinference/web/ui/build/static/css/main.013f296b.css +2 -0
  72. xinference/web/ui/build/static/css/main.013f296b.css.map +1 -0
  73. xinference/web/ui/build/static/js/main.8a9e3ba0.js +3 -0
  74. xinference/web/ui/build/static/js/main.8a9e3ba0.js.map +1 -0
  75. xinference/web/ui/node_modules/.cache/babel-loader/34cfbfb7836e136ba3261cfd411cc554bf99ba24b35dcceebeaa4f008cb3c9dc.json +1 -0
  76. xinference/web/ui/node_modules/.cache/babel-loader/55b9fb40b57fa926e8f05f31c2f96467e76e5ad62f033dca97c03f9e8c4eb4fe.json +1 -0
  77. xinference/web/ui/node_modules/.cache/babel-loader/567e49df411efb24425d289bb484758cb57067ca54f8b5c67fe4505f698deb96.json +1 -0
  78. xinference/web/ui/node_modules/.cache/babel-loader/6595880facebca7ceace6f17cf21c3a5a9219a2f52fb0ba9f3cf1131eddbcf6b.json +1 -0
  79. xinference/web/ui/node_modules/.cache/babel-loader/aa998bc2d9c11853add6b8a2e08f50327f56d8824ccaaec92d6dde1b305f0d85.json +1 -0
  80. xinference/web/ui/node_modules/.cache/babel-loader/c748246b1d7bcebc16153be69f37e955bb2145526c47dd425aeeff70d3004dbc.json +1 -0
  81. xinference/web/ui/node_modules/.cache/babel-loader/e31234e95d60a5a7883fbcd70de2475dc1c88c90705df1a530abb68f86f80a51.json +1 -0
  82. xinference/web/ui/src/locales/en.json +21 -8
  83. xinference/web/ui/src/locales/ja.json +224 -0
  84. xinference/web/ui/src/locales/ko.json +224 -0
  85. xinference/web/ui/src/locales/zh.json +21 -8
  86. {xinference-1.6.0.post1.dist-info → xinference-1.7.0.dist-info}/METADATA +14 -11
  87. {xinference-1.6.0.post1.dist-info → xinference-1.7.0.dist-info}/RECORD +93 -100
  88. {xinference-1.6.0.post1.dist-info → xinference-1.7.0.dist-info}/WHEEL +1 -1
  89. xinference/model/llm/transformers/cogvlm2.py +0 -442
  90. xinference/model/llm/transformers/cogvlm2_video.py +0 -333
  91. xinference/model/llm/transformers/deepseek_vl.py +0 -280
  92. xinference/model/llm/transformers/glm_edge_v.py +0 -213
  93. xinference/model/llm/transformers/intern_vl.py +0 -526
  94. xinference/model/llm/transformers/internlm2.py +0 -94
  95. xinference/model/llm/transformers/minicpmv25.py +0 -193
  96. xinference/model/llm/transformers/omnilmm.py +0 -132
  97. xinference/model/llm/transformers/qwen2_audio.py +0 -179
  98. xinference/model/llm/transformers/qwen_vl.py +0 -360
  99. xinference/thirdparty/omnilmm/LICENSE +0 -201
  100. xinference/thirdparty/omnilmm/chat.py +0 -218
  101. xinference/thirdparty/omnilmm/constants.py +0 -4
  102. xinference/thirdparty/omnilmm/conversation.py +0 -332
  103. xinference/thirdparty/omnilmm/model/__init__.py +0 -1
  104. xinference/thirdparty/omnilmm/model/omnilmm.py +0 -595
  105. xinference/thirdparty/omnilmm/model/resampler.py +0 -166
  106. xinference/thirdparty/omnilmm/model/utils.py +0 -578
  107. xinference/thirdparty/omnilmm/train/train_utils.py +0 -150
  108. xinference/thirdparty/omnilmm/utils.py +0 -134
  109. xinference/web/ui/build/static/css/main.337afe76.css +0 -2
  110. xinference/web/ui/build/static/css/main.337afe76.css.map +0 -1
  111. xinference/web/ui/build/static/js/main.ae579a97.js +0 -3
  112. xinference/web/ui/build/static/js/main.ae579a97.js.map +0 -1
  113. xinference/web/ui/node_modules/.cache/babel-loader/12e02ee790dbf57ead09a241a93bb5f893393aa36628ca741d44390e836a103f.json +0 -1
  114. xinference/web/ui/node_modules/.cache/babel-loader/2fdc61dcb6a9d1fbcb44be592d0e87d8c3f21297a7327559ef5345665f8343f7.json +0 -1
  115. xinference/web/ui/node_modules/.cache/babel-loader/3d596a3e8dd6430d7ce81d164e32c31f8d47cfa5f725c328a298754d78563e14.json +0 -1
  116. xinference/web/ui/node_modules/.cache/babel-loader/5c08e2cd07809ed3e41486b16652253404cbb63a3ff8d0366ee50f57e2413cea.json +0 -1
  117. xinference/web/ui/node_modules/.cache/babel-loader/8472e58a31720892d534f3febda31f746b25ec4aa60787eef34217b074e67965.json +0 -1
  118. xinference/web/ui/node_modules/.cache/babel-loader/dc249829767b8abcbc3677e0b07b6d3ecbfdfe6d08cfe23a665eb33373a9aa9d.json +0 -1
  119. xinference/web/ui/node_modules/.cache/babel-loader/f91af913d7f91c410719ab13136aaed3aaf0f8dda06652f25c42cb5231587398.json +0 -1
  120. /xinference/{thirdparty/omnilmm → model/embedding/vllm}/__init__.py +0 -0
  121. /xinference/web/ui/build/static/js/{main.ae579a97.js.LICENSE.txt → main.8a9e3ba0.js.LICENSE.txt} +0 -0
  122. {xinference-1.6.0.post1.dist-info → xinference-1.7.0.dist-info}/entry_points.txt +0 -0
  123. {xinference-1.6.0.post1.dist-info → xinference-1.7.0.dist-info}/licenses/LICENSE +0 -0
  124. {xinference-1.6.0.post1.dist-info → xinference-1.7.0.dist-info}/top_level.txt +0 -0
@@ -65,6 +65,7 @@ class LlamaCppLLMSpecV1(BaseModel):
65
65
  # Must in order that `str` first, then `int`
66
66
  model_size_in_billions: Union[str, int]
67
67
  quantizations: List[str]
68
+ multimodal_projectors: Optional[List[str]]
68
69
  model_id: Optional[str]
69
70
  model_file_name_template: str
70
71
  model_file_name_split_template: Optional[str]
@@ -321,6 +322,7 @@ def cache(
321
322
  llm_family: LLMFamilyV1,
322
323
  llm_spec: "LLMSpecV1",
323
324
  quantization: Optional[str] = None,
325
+ multimodal_projector: Optional[str] = None,
324
326
  ) -> str:
325
327
  legacy_cache_path = get_legacy_cache_path(
326
328
  llm_family.model_name,
@@ -338,16 +340,24 @@ def cache(
338
340
  else:
339
341
  if llm_spec.model_hub == "huggingface":
340
342
  logger.info(f"Caching from Hugging Face: {llm_spec.model_id}")
341
- return cache_from_huggingface(llm_family, llm_spec, quantization)
343
+ return cache_from_huggingface(
344
+ llm_family, llm_spec, quantization, multimodal_projector
345
+ )
342
346
  elif llm_spec.model_hub == "modelscope":
343
347
  logger.info(f"Caching from Modelscope: {llm_spec.model_id}")
344
- return cache_from_modelscope(llm_family, llm_spec, quantization)
348
+ return cache_from_modelscope(
349
+ llm_family, llm_spec, quantization, multimodal_projector
350
+ )
345
351
  elif llm_spec.model_hub == "openmind_hub":
346
352
  logger.info(f"Caching from openmind_hub: {llm_spec.model_id}")
347
- return cache_from_openmind_hub(llm_family, llm_spec, quantization)
353
+ return cache_from_openmind_hub(
354
+ llm_family, llm_spec, quantization, multimodal_projector
355
+ )
348
356
  elif llm_spec.model_hub == "csghub":
349
357
  logger.info(f"Caching from CSGHub: {llm_spec.model_id}")
350
- return cache_from_csghub(llm_family, llm_spec, quantization)
358
+ return cache_from_csghub(
359
+ llm_family, llm_spec, quantization, multimodal_projector
360
+ )
351
361
  else:
352
362
  raise ValueError(f"Unknown model hub: {llm_spec.model_hub}")
353
363
 
@@ -543,13 +553,34 @@ def _get_meta_path(
543
553
  model_format: str,
544
554
  model_hub: str,
545
555
  quantization: Optional[str] = None,
556
+ multimodal_projector: Optional[str] = None,
546
557
  ):
547
558
  if model_format == "pytorch":
548
559
  if model_hub == "huggingface":
549
560
  return os.path.join(cache_dir, "__valid_download")
550
561
  else:
551
562
  return os.path.join(cache_dir, f"__valid_download_{model_hub}")
552
- elif model_format in ["ggufv2", "gptq", "awq", "fp8", "mlx"]:
563
+ elif model_format == "ggufv2":
564
+ assert quantization is not None
565
+ if multimodal_projector is None:
566
+ # Compatible with old cache file to avoid re-download model.
567
+ if model_hub == "huggingface":
568
+ return os.path.join(cache_dir, f"__valid_download_{quantization}")
569
+ else:
570
+ return os.path.join(
571
+ cache_dir, f"__valid_download_{model_hub}_{quantization}"
572
+ )
573
+ else:
574
+ if model_hub == "huggingface":
575
+ return os.path.join(
576
+ cache_dir, f"__valid_download_{quantization}_{multimodal_projector}"
577
+ )
578
+ else:
579
+ return os.path.join(
580
+ cache_dir,
581
+ f"__valid_download_{model_hub}_{quantization}_{multimodal_projector}",
582
+ )
583
+ elif model_format in ["gptq", "awq", "fp8", "mlx"]:
553
584
  assert quantization is not None
554
585
  if model_hub == "huggingface":
555
586
  return os.path.join(cache_dir, f"__valid_download_{quantization}")
@@ -567,6 +598,7 @@ def _skip_download(
567
598
  model_hub: str,
568
599
  model_revision: Optional[str],
569
600
  quantization: Optional[str] = None,
601
+ multimodal_projector: Optional[str] = None,
570
602
  ) -> bool:
571
603
  if model_format in ["pytorch", "mindspore"]:
572
604
  model_hub_to_meta_path = {
@@ -591,7 +623,14 @@ def _skip_download(
591
623
  logger.warning(f"Cache {cache_dir} exists, but it was from {hub}")
592
624
  return True
593
625
  return False
594
- elif model_format in ["ggufv2", "gptq", "awq", "fp8", "mlx"]:
626
+ elif model_format == "ggufv2":
627
+ assert quantization is not None
628
+ return os.path.exists(
629
+ _get_meta_path(
630
+ cache_dir, model_format, model_hub, quantization, multimodal_projector
631
+ )
632
+ )
633
+ elif model_format in ["gptq", "awq", "fp8", "mlx"]:
595
634
  assert quantization is not None
596
635
  return os.path.exists(
597
636
  _get_meta_path(cache_dir, model_format, model_hub, quantization)
@@ -605,6 +644,7 @@ def _generate_meta_file(
605
644
  llm_family: "LLMFamilyV1",
606
645
  llm_spec: "LLMSpecV1",
607
646
  quantization: Optional[str] = None,
647
+ multimodal_projector: Optional[str] = None,
608
648
  ):
609
649
  assert not valid_model_revision(
610
650
  meta_path, llm_spec.model_revision
@@ -614,12 +654,16 @@ def _generate_meta_file(
614
654
 
615
655
  from .core import LLMDescription
616
656
 
617
- desc = LLMDescription(None, None, llm_family, llm_spec, quantization)
657
+ desc = LLMDescription(
658
+ None, None, llm_family, llm_spec, quantization, multimodal_projector
659
+ )
618
660
  json.dump(desc.to_dict(), f)
619
661
 
620
662
 
621
663
  def _generate_model_file_names(
622
- llm_spec: "LLMSpecV1", quantization: Optional[str] = None
664
+ llm_spec: "LLMSpecV1",
665
+ quantization: Optional[str] = None,
666
+ multimodal_projector: Optional[str] = None,
623
667
  ) -> Tuple[List[str], str, bool]:
624
668
  file_names = []
625
669
  final_file_name = llm_spec.model_file_name_template.format(
@@ -650,6 +694,8 @@ def _generate_model_file_names(
650
694
  quantization=quantization, part=part
651
695
  )
652
696
  file_names.append(file_name)
697
+ if multimodal_projector:
698
+ file_names.append(multimodal_projector)
653
699
 
654
700
  return file_names, final_file_name, need_merge
655
701
 
@@ -671,6 +717,7 @@ def cache_from_csghub(
671
717
  llm_family: LLMFamilyV1,
672
718
  llm_spec: "LLMSpecV1",
673
719
  quantization: Optional[str] = None,
720
+ multimodal_projector: Optional[str] = None,
674
721
  ) -> str:
675
722
  """
676
723
  Cache model from CSGHub. Return the cache directory.
@@ -686,6 +733,7 @@ def cache_from_csghub(
686
733
  llm_spec.model_hub,
687
734
  llm_spec.model_revision,
688
735
  quantization,
736
+ multimodal_projector,
689
737
  ):
690
738
  return cache_dir
691
739
 
@@ -705,7 +753,7 @@ def cache_from_csghub(
705
753
 
706
754
  elif llm_spec.model_format in ["ggufv2"]:
707
755
  file_names, final_file_name, need_merge = _generate_model_file_names(
708
- llm_spec, quantization
756
+ llm_spec, quantization, multimodal_projector
709
757
  )
710
758
 
711
759
  for filename in file_names:
@@ -729,9 +777,15 @@ def cache_from_csghub(
729
777
  raise ValueError(f"Unsupported format: {llm_spec.model_format}")
730
778
 
731
779
  meta_path = _get_meta_path(
732
- cache_dir, llm_spec.model_format, llm_spec.model_hub, quantization
780
+ cache_dir,
781
+ llm_spec.model_format,
782
+ llm_spec.model_hub,
783
+ quantization,
784
+ multimodal_projector,
785
+ )
786
+ _generate_meta_file(
787
+ meta_path, llm_family, llm_spec, quantization, multimodal_projector
733
788
  )
734
- _generate_meta_file(meta_path, llm_family, llm_spec, quantization)
735
789
 
736
790
  return cache_dir
737
791
 
@@ -740,6 +794,7 @@ def cache_from_modelscope(
740
794
  llm_family: LLMFamilyV1,
741
795
  llm_spec: "LLMSpecV1",
742
796
  quantization: Optional[str] = None,
797
+ multimodal_projector: Optional[str] = None,
743
798
  ) -> str:
744
799
  """
745
800
  Cache model from Modelscope. Return the cache directory.
@@ -754,6 +809,7 @@ def cache_from_modelscope(
754
809
  llm_spec.model_hub,
755
810
  llm_spec.model_revision,
756
811
  quantization,
812
+ multimodal_projector,
757
813
  ):
758
814
  return cache_dir
759
815
 
@@ -772,7 +828,7 @@ def cache_from_modelscope(
772
828
 
773
829
  elif llm_spec.model_format in ["ggufv2"]:
774
830
  file_names, final_file_name, need_merge = _generate_model_file_names(
775
- llm_spec, quantization
831
+ llm_spec, quantization, multimodal_projector
776
832
  )
777
833
 
778
834
  for filename in file_names:
@@ -795,7 +851,11 @@ def cache_from_modelscope(
795
851
  raise ValueError(f"Unsupported format: {llm_spec.model_format}")
796
852
 
797
853
  meta_path = _get_meta_path(
798
- cache_dir, llm_spec.model_format, llm_spec.model_hub, quantization
854
+ cache_dir,
855
+ llm_spec.model_format,
856
+ llm_spec.model_hub,
857
+ quantization,
858
+ multimodal_projector,
799
859
  )
800
860
  _generate_meta_file(meta_path, llm_family, llm_spec, quantization)
801
861
 
@@ -806,6 +866,7 @@ def cache_from_openmind_hub(
806
866
  llm_family: LLMFamilyV1,
807
867
  llm_spec: "LLMSpecV1",
808
868
  quantization: Optional[str] = None,
869
+ multimodal_projector: Optional[str] = None,
809
870
  ) -> str:
810
871
  """
811
872
  Cache model from openmind_hub. Return the cache directory.
@@ -819,6 +880,7 @@ def cache_from_openmind_hub(
819
880
  llm_spec.model_hub,
820
881
  llm_spec.model_revision,
821
882
  quantization,
883
+ multimodal_projector,
822
884
  ):
823
885
  return cache_dir
824
886
 
@@ -839,7 +901,11 @@ def cache_from_openmind_hub(
839
901
  raise ValueError(f"Unsupported format: {llm_spec.model_format}")
840
902
 
841
903
  meta_path = _get_meta_path(
842
- cache_dir, llm_spec.model_format, llm_spec.model_hub, quantization
904
+ cache_dir,
905
+ llm_spec.model_format,
906
+ llm_spec.model_hub,
907
+ quantization,
908
+ multimodal_projector,
843
909
  )
844
910
  _generate_meta_file(meta_path, llm_family, llm_spec, quantization)
845
911
 
@@ -850,6 +916,7 @@ def cache_from_huggingface(
850
916
  llm_family: LLMFamilyV1,
851
917
  llm_spec: "LLMSpecV1",
852
918
  quantization: Optional[str] = None,
919
+ multimodal_projector: Optional[str] = None,
853
920
  ) -> str:
854
921
  """
855
922
  Cache model from Hugging Face. Return the cache directory.
@@ -863,6 +930,7 @@ def cache_from_huggingface(
863
930
  llm_spec.model_hub,
864
931
  llm_spec.model_revision,
865
932
  quantization,
933
+ multimodal_projector,
866
934
  ):
867
935
  return cache_dir
868
936
 
@@ -889,7 +957,7 @@ def cache_from_huggingface(
889
957
  elif llm_spec.model_format in ["ggufv2"]:
890
958
  assert isinstance(llm_spec, LlamaCppLLMSpecV1)
891
959
  file_names, final_file_name, need_merge = _generate_model_file_names(
892
- llm_spec, quantization
960
+ llm_spec, quantization, multimodal_projector
893
961
  )
894
962
 
895
963
  for file_name in file_names:
@@ -914,7 +982,11 @@ def cache_from_huggingface(
914
982
  raise ValueError(f"Unsupported model format: {llm_spec.model_format}")
915
983
 
916
984
  meta_path = _get_meta_path(
917
- cache_dir, llm_spec.model_format, llm_spec.model_hub, quantization
985
+ cache_dir,
986
+ llm_spec.model_format,
987
+ llm_spec.model_hub,
988
+ quantization,
989
+ multimodal_projector,
918
990
  )
919
991
  _generate_meta_file(meta_path, llm_family, llm_spec, quantization)
920
992