xinference 1.6.0__py3-none-any.whl → 1.6.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/client/restful/restful_client.py +1 -1
- xinference/conftest.py +0 -7
- xinference/core/media_interface.py +9 -8
- xinference/core/model.py +13 -6
- xinference/core/scheduler.py +1 -10
- xinference/core/worker.py +0 -10
- xinference/model/audio/model_spec.json +53 -1
- xinference/model/audio/model_spec_modelscope.json +57 -1
- xinference/model/embedding/core.py +19 -11
- xinference/model/image/model_spec.json +10 -1
- xinference/model/image/model_spec_modelscope.json +20 -0
- xinference/model/llm/__init__.py +6 -54
- xinference/model/llm/core.py +19 -5
- xinference/model/llm/llama_cpp/core.py +59 -3
- xinference/model/llm/llama_cpp/memory.py +455 -0
- xinference/model/llm/llm_family.json +185 -397
- xinference/model/llm/llm_family.py +88 -16
- xinference/model/llm/llm_family_modelscope.json +199 -421
- xinference/model/llm/llm_family_openmind_hub.json +0 -34
- xinference/model/llm/sglang/core.py +4 -0
- xinference/model/llm/transformers/__init__.py +27 -6
- xinference/model/llm/transformers/chatglm.py +4 -2
- xinference/model/llm/transformers/core.py +49 -28
- xinference/model/llm/transformers/deepseek_v2.py +6 -49
- xinference/model/llm/transformers/gemma3.py +119 -164
- xinference/{thirdparty/omnilmm/train → model/llm/transformers/multimodal}/__init__.py +1 -1
- xinference/model/llm/transformers/{cogagent.py → multimodal/cogagent.py} +58 -95
- xinference/model/llm/transformers/multimodal/core.py +205 -0
- xinference/model/llm/transformers/{deepseek_vl2.py → multimodal/deepseek_vl2.py} +59 -120
- xinference/model/llm/transformers/multimodal/gemma3.py +117 -0
- xinference/model/llm/transformers/{glm4v.py → multimodal/glm4v.py} +57 -93
- xinference/model/llm/transformers/multimodal/intern_vl.py +412 -0
- xinference/model/llm/transformers/{minicpmv26.py → multimodal/minicpmv26.py} +55 -102
- xinference/model/llm/transformers/{ovis2.py → multimodal/ovis2.py} +114 -175
- xinference/model/llm/transformers/{qwen-omni.py → multimodal/qwen-omni.py} +82 -167
- xinference/model/llm/transformers/multimodal/qwen2_audio.py +131 -0
- xinference/model/llm/transformers/{qwen2_vl.py → multimodal/qwen2_vl.py} +224 -256
- xinference/model/llm/transformers/opt.py +4 -2
- xinference/model/llm/transformers/utils.py +6 -37
- xinference/model/llm/vllm/core.py +4 -0
- xinference/model/rerank/core.py +7 -1
- xinference/model/rerank/utils.py +17 -0
- xinference/web/ui/build/asset-manifest.json +3 -3
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/js/main.ddf9eaee.js +3 -0
- xinference/web/ui/build/static/js/main.ddf9eaee.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/12e637ed5fa9ca6491b03892b6949c03afd4960fe36ac25744488e7e1982aa19.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/567e49df411efb24425d289bb484758cb57067ca54f8b5c67fe4505f698deb96.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/77ac2665a784e99501ae95d32ef5937837a0439a47e965d291b38e99cb619f5b.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d4ed4e82bfe69915999ec83f5feaa4301c75ecc6bdf1c78f2d03e4671ecbefc8.json +1 -0
- xinference/web/ui/src/locales/en.json +3 -1
- xinference/web/ui/src/locales/zh.json +3 -1
- {xinference-1.6.0.dist-info → xinference-1.6.1.dist-info}/METADATA +16 -14
- {xinference-1.6.0.dist-info → xinference-1.6.1.dist-info}/RECORD +60 -76
- {xinference-1.6.0.dist-info → xinference-1.6.1.dist-info}/WHEEL +1 -1
- xinference/model/llm/transformers/cogvlm2.py +0 -442
- xinference/model/llm/transformers/cogvlm2_video.py +0 -333
- xinference/model/llm/transformers/deepseek_vl.py +0 -280
- xinference/model/llm/transformers/glm_edge_v.py +0 -213
- xinference/model/llm/transformers/intern_vl.py +0 -526
- xinference/model/llm/transformers/internlm2.py +0 -94
- xinference/model/llm/transformers/minicpmv25.py +0 -193
- xinference/model/llm/transformers/omnilmm.py +0 -132
- xinference/model/llm/transformers/qwen2_audio.py +0 -179
- xinference/model/llm/transformers/qwen_vl.py +0 -360
- xinference/thirdparty/omnilmm/LICENSE +0 -201
- xinference/thirdparty/omnilmm/__init__.py +0 -0
- xinference/thirdparty/omnilmm/chat.py +0 -218
- xinference/thirdparty/omnilmm/constants.py +0 -4
- xinference/thirdparty/omnilmm/conversation.py +0 -332
- xinference/thirdparty/omnilmm/model/__init__.py +0 -1
- xinference/thirdparty/omnilmm/model/omnilmm.py +0 -595
- xinference/thirdparty/omnilmm/model/resampler.py +0 -166
- xinference/thirdparty/omnilmm/model/utils.py +0 -578
- xinference/thirdparty/omnilmm/train/train_utils.py +0 -150
- xinference/thirdparty/omnilmm/utils.py +0 -134
- xinference/web/ui/build/static/js/main.ae579a97.js +0 -3
- xinference/web/ui/build/static/js/main.ae579a97.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/2fdc61dcb6a9d1fbcb44be592d0e87d8c3f21297a7327559ef5345665f8343f7.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/3d596a3e8dd6430d7ce81d164e32c31f8d47cfa5f725c328a298754d78563e14.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/5c08e2cd07809ed3e41486b16652253404cbb63a3ff8d0366ee50f57e2413cea.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/8472e58a31720892d534f3febda31f746b25ec4aa60787eef34217b074e67965.json +0 -1
- /xinference/web/ui/build/static/js/{main.ae579a97.js.LICENSE.txt → main.ddf9eaee.js.LICENSE.txt} +0 -0
- {xinference-1.6.0.dist-info → xinference-1.6.1.dist-info}/entry_points.txt +0 -0
- {xinference-1.6.0.dist-info → xinference-1.6.1.dist-info}/licenses/LICENSE +0 -0
- {xinference-1.6.0.dist-info → xinference-1.6.1.dist-info}/top_level.txt +0 -0
|
@@ -65,6 +65,7 @@ class LlamaCppLLMSpecV1(BaseModel):
|
|
|
65
65
|
# Must in order that `str` first, then `int`
|
|
66
66
|
model_size_in_billions: Union[str, int]
|
|
67
67
|
quantizations: List[str]
|
|
68
|
+
multimodal_projectors: Optional[List[str]]
|
|
68
69
|
model_id: Optional[str]
|
|
69
70
|
model_file_name_template: str
|
|
70
71
|
model_file_name_split_template: Optional[str]
|
|
@@ -321,6 +322,7 @@ def cache(
|
|
|
321
322
|
llm_family: LLMFamilyV1,
|
|
322
323
|
llm_spec: "LLMSpecV1",
|
|
323
324
|
quantization: Optional[str] = None,
|
|
325
|
+
multimodal_projector: Optional[str] = None,
|
|
324
326
|
) -> str:
|
|
325
327
|
legacy_cache_path = get_legacy_cache_path(
|
|
326
328
|
llm_family.model_name,
|
|
@@ -338,16 +340,24 @@ def cache(
|
|
|
338
340
|
else:
|
|
339
341
|
if llm_spec.model_hub == "huggingface":
|
|
340
342
|
logger.info(f"Caching from Hugging Face: {llm_spec.model_id}")
|
|
341
|
-
return cache_from_huggingface(
|
|
343
|
+
return cache_from_huggingface(
|
|
344
|
+
llm_family, llm_spec, quantization, multimodal_projector
|
|
345
|
+
)
|
|
342
346
|
elif llm_spec.model_hub == "modelscope":
|
|
343
347
|
logger.info(f"Caching from Modelscope: {llm_spec.model_id}")
|
|
344
|
-
return cache_from_modelscope(
|
|
348
|
+
return cache_from_modelscope(
|
|
349
|
+
llm_family, llm_spec, quantization, multimodal_projector
|
|
350
|
+
)
|
|
345
351
|
elif llm_spec.model_hub == "openmind_hub":
|
|
346
352
|
logger.info(f"Caching from openmind_hub: {llm_spec.model_id}")
|
|
347
|
-
return cache_from_openmind_hub(
|
|
353
|
+
return cache_from_openmind_hub(
|
|
354
|
+
llm_family, llm_spec, quantization, multimodal_projector
|
|
355
|
+
)
|
|
348
356
|
elif llm_spec.model_hub == "csghub":
|
|
349
357
|
logger.info(f"Caching from CSGHub: {llm_spec.model_id}")
|
|
350
|
-
return cache_from_csghub(
|
|
358
|
+
return cache_from_csghub(
|
|
359
|
+
llm_family, llm_spec, quantization, multimodal_projector
|
|
360
|
+
)
|
|
351
361
|
else:
|
|
352
362
|
raise ValueError(f"Unknown model hub: {llm_spec.model_hub}")
|
|
353
363
|
|
|
@@ -543,13 +553,34 @@ def _get_meta_path(
|
|
|
543
553
|
model_format: str,
|
|
544
554
|
model_hub: str,
|
|
545
555
|
quantization: Optional[str] = None,
|
|
556
|
+
multimodal_projector: Optional[str] = None,
|
|
546
557
|
):
|
|
547
558
|
if model_format == "pytorch":
|
|
548
559
|
if model_hub == "huggingface":
|
|
549
560
|
return os.path.join(cache_dir, "__valid_download")
|
|
550
561
|
else:
|
|
551
562
|
return os.path.join(cache_dir, f"__valid_download_{model_hub}")
|
|
552
|
-
elif model_format
|
|
563
|
+
elif model_format == "ggufv2":
|
|
564
|
+
assert quantization is not None
|
|
565
|
+
if multimodal_projector is None:
|
|
566
|
+
# Compatible with old cache file to avoid re-download model.
|
|
567
|
+
if model_hub == "huggingface":
|
|
568
|
+
return os.path.join(cache_dir, f"__valid_download_{quantization}")
|
|
569
|
+
else:
|
|
570
|
+
return os.path.join(
|
|
571
|
+
cache_dir, f"__valid_download_{model_hub}_{quantization}"
|
|
572
|
+
)
|
|
573
|
+
else:
|
|
574
|
+
if model_hub == "huggingface":
|
|
575
|
+
return os.path.join(
|
|
576
|
+
cache_dir, f"__valid_download_{quantization}_{multimodal_projector}"
|
|
577
|
+
)
|
|
578
|
+
else:
|
|
579
|
+
return os.path.join(
|
|
580
|
+
cache_dir,
|
|
581
|
+
f"__valid_download_{model_hub}_{quantization}_{multimodal_projector}",
|
|
582
|
+
)
|
|
583
|
+
elif model_format in ["gptq", "awq", "fp8", "mlx"]:
|
|
553
584
|
assert quantization is not None
|
|
554
585
|
if model_hub == "huggingface":
|
|
555
586
|
return os.path.join(cache_dir, f"__valid_download_{quantization}")
|
|
@@ -567,6 +598,7 @@ def _skip_download(
|
|
|
567
598
|
model_hub: str,
|
|
568
599
|
model_revision: Optional[str],
|
|
569
600
|
quantization: Optional[str] = None,
|
|
601
|
+
multimodal_projector: Optional[str] = None,
|
|
570
602
|
) -> bool:
|
|
571
603
|
if model_format in ["pytorch", "mindspore"]:
|
|
572
604
|
model_hub_to_meta_path = {
|
|
@@ -591,7 +623,14 @@ def _skip_download(
|
|
|
591
623
|
logger.warning(f"Cache {cache_dir} exists, but it was from {hub}")
|
|
592
624
|
return True
|
|
593
625
|
return False
|
|
594
|
-
elif model_format
|
|
626
|
+
elif model_format == "ggufv2":
|
|
627
|
+
assert quantization is not None
|
|
628
|
+
return os.path.exists(
|
|
629
|
+
_get_meta_path(
|
|
630
|
+
cache_dir, model_format, model_hub, quantization, multimodal_projector
|
|
631
|
+
)
|
|
632
|
+
)
|
|
633
|
+
elif model_format in ["gptq", "awq", "fp8", "mlx"]:
|
|
595
634
|
assert quantization is not None
|
|
596
635
|
return os.path.exists(
|
|
597
636
|
_get_meta_path(cache_dir, model_format, model_hub, quantization)
|
|
@@ -605,6 +644,7 @@ def _generate_meta_file(
|
|
|
605
644
|
llm_family: "LLMFamilyV1",
|
|
606
645
|
llm_spec: "LLMSpecV1",
|
|
607
646
|
quantization: Optional[str] = None,
|
|
647
|
+
multimodal_projector: Optional[str] = None,
|
|
608
648
|
):
|
|
609
649
|
assert not valid_model_revision(
|
|
610
650
|
meta_path, llm_spec.model_revision
|
|
@@ -614,12 +654,16 @@ def _generate_meta_file(
|
|
|
614
654
|
|
|
615
655
|
from .core import LLMDescription
|
|
616
656
|
|
|
617
|
-
desc = LLMDescription(
|
|
657
|
+
desc = LLMDescription(
|
|
658
|
+
None, None, llm_family, llm_spec, quantization, multimodal_projector
|
|
659
|
+
)
|
|
618
660
|
json.dump(desc.to_dict(), f)
|
|
619
661
|
|
|
620
662
|
|
|
621
663
|
def _generate_model_file_names(
|
|
622
|
-
llm_spec: "LLMSpecV1",
|
|
664
|
+
llm_spec: "LLMSpecV1",
|
|
665
|
+
quantization: Optional[str] = None,
|
|
666
|
+
multimodal_projector: Optional[str] = None,
|
|
623
667
|
) -> Tuple[List[str], str, bool]:
|
|
624
668
|
file_names = []
|
|
625
669
|
final_file_name = llm_spec.model_file_name_template.format(
|
|
@@ -650,6 +694,8 @@ def _generate_model_file_names(
|
|
|
650
694
|
quantization=quantization, part=part
|
|
651
695
|
)
|
|
652
696
|
file_names.append(file_name)
|
|
697
|
+
if multimodal_projector:
|
|
698
|
+
file_names.append(multimodal_projector)
|
|
653
699
|
|
|
654
700
|
return file_names, final_file_name, need_merge
|
|
655
701
|
|
|
@@ -671,6 +717,7 @@ def cache_from_csghub(
|
|
|
671
717
|
llm_family: LLMFamilyV1,
|
|
672
718
|
llm_spec: "LLMSpecV1",
|
|
673
719
|
quantization: Optional[str] = None,
|
|
720
|
+
multimodal_projector: Optional[str] = None,
|
|
674
721
|
) -> str:
|
|
675
722
|
"""
|
|
676
723
|
Cache model from CSGHub. Return the cache directory.
|
|
@@ -686,6 +733,7 @@ def cache_from_csghub(
|
|
|
686
733
|
llm_spec.model_hub,
|
|
687
734
|
llm_spec.model_revision,
|
|
688
735
|
quantization,
|
|
736
|
+
multimodal_projector,
|
|
689
737
|
):
|
|
690
738
|
return cache_dir
|
|
691
739
|
|
|
@@ -705,7 +753,7 @@ def cache_from_csghub(
|
|
|
705
753
|
|
|
706
754
|
elif llm_spec.model_format in ["ggufv2"]:
|
|
707
755
|
file_names, final_file_name, need_merge = _generate_model_file_names(
|
|
708
|
-
llm_spec, quantization
|
|
756
|
+
llm_spec, quantization, multimodal_projector
|
|
709
757
|
)
|
|
710
758
|
|
|
711
759
|
for filename in file_names:
|
|
@@ -729,9 +777,15 @@ def cache_from_csghub(
|
|
|
729
777
|
raise ValueError(f"Unsupported format: {llm_spec.model_format}")
|
|
730
778
|
|
|
731
779
|
meta_path = _get_meta_path(
|
|
732
|
-
cache_dir,
|
|
780
|
+
cache_dir,
|
|
781
|
+
llm_spec.model_format,
|
|
782
|
+
llm_spec.model_hub,
|
|
783
|
+
quantization,
|
|
784
|
+
multimodal_projector,
|
|
785
|
+
)
|
|
786
|
+
_generate_meta_file(
|
|
787
|
+
meta_path, llm_family, llm_spec, quantization, multimodal_projector
|
|
733
788
|
)
|
|
734
|
-
_generate_meta_file(meta_path, llm_family, llm_spec, quantization)
|
|
735
789
|
|
|
736
790
|
return cache_dir
|
|
737
791
|
|
|
@@ -740,6 +794,7 @@ def cache_from_modelscope(
|
|
|
740
794
|
llm_family: LLMFamilyV1,
|
|
741
795
|
llm_spec: "LLMSpecV1",
|
|
742
796
|
quantization: Optional[str] = None,
|
|
797
|
+
multimodal_projector: Optional[str] = None,
|
|
743
798
|
) -> str:
|
|
744
799
|
"""
|
|
745
800
|
Cache model from Modelscope. Return the cache directory.
|
|
@@ -754,6 +809,7 @@ def cache_from_modelscope(
|
|
|
754
809
|
llm_spec.model_hub,
|
|
755
810
|
llm_spec.model_revision,
|
|
756
811
|
quantization,
|
|
812
|
+
multimodal_projector,
|
|
757
813
|
):
|
|
758
814
|
return cache_dir
|
|
759
815
|
|
|
@@ -772,7 +828,7 @@ def cache_from_modelscope(
|
|
|
772
828
|
|
|
773
829
|
elif llm_spec.model_format in ["ggufv2"]:
|
|
774
830
|
file_names, final_file_name, need_merge = _generate_model_file_names(
|
|
775
|
-
llm_spec, quantization
|
|
831
|
+
llm_spec, quantization, multimodal_projector
|
|
776
832
|
)
|
|
777
833
|
|
|
778
834
|
for filename in file_names:
|
|
@@ -795,7 +851,11 @@ def cache_from_modelscope(
|
|
|
795
851
|
raise ValueError(f"Unsupported format: {llm_spec.model_format}")
|
|
796
852
|
|
|
797
853
|
meta_path = _get_meta_path(
|
|
798
|
-
cache_dir,
|
|
854
|
+
cache_dir,
|
|
855
|
+
llm_spec.model_format,
|
|
856
|
+
llm_spec.model_hub,
|
|
857
|
+
quantization,
|
|
858
|
+
multimodal_projector,
|
|
799
859
|
)
|
|
800
860
|
_generate_meta_file(meta_path, llm_family, llm_spec, quantization)
|
|
801
861
|
|
|
@@ -806,6 +866,7 @@ def cache_from_openmind_hub(
|
|
|
806
866
|
llm_family: LLMFamilyV1,
|
|
807
867
|
llm_spec: "LLMSpecV1",
|
|
808
868
|
quantization: Optional[str] = None,
|
|
869
|
+
multimodal_projector: Optional[str] = None,
|
|
809
870
|
) -> str:
|
|
810
871
|
"""
|
|
811
872
|
Cache model from openmind_hub. Return the cache directory.
|
|
@@ -819,6 +880,7 @@ def cache_from_openmind_hub(
|
|
|
819
880
|
llm_spec.model_hub,
|
|
820
881
|
llm_spec.model_revision,
|
|
821
882
|
quantization,
|
|
883
|
+
multimodal_projector,
|
|
822
884
|
):
|
|
823
885
|
return cache_dir
|
|
824
886
|
|
|
@@ -839,7 +901,11 @@ def cache_from_openmind_hub(
|
|
|
839
901
|
raise ValueError(f"Unsupported format: {llm_spec.model_format}")
|
|
840
902
|
|
|
841
903
|
meta_path = _get_meta_path(
|
|
842
|
-
cache_dir,
|
|
904
|
+
cache_dir,
|
|
905
|
+
llm_spec.model_format,
|
|
906
|
+
llm_spec.model_hub,
|
|
907
|
+
quantization,
|
|
908
|
+
multimodal_projector,
|
|
843
909
|
)
|
|
844
910
|
_generate_meta_file(meta_path, llm_family, llm_spec, quantization)
|
|
845
911
|
|
|
@@ -850,6 +916,7 @@ def cache_from_huggingface(
|
|
|
850
916
|
llm_family: LLMFamilyV1,
|
|
851
917
|
llm_spec: "LLMSpecV1",
|
|
852
918
|
quantization: Optional[str] = None,
|
|
919
|
+
multimodal_projector: Optional[str] = None,
|
|
853
920
|
) -> str:
|
|
854
921
|
"""
|
|
855
922
|
Cache model from Hugging Face. Return the cache directory.
|
|
@@ -863,6 +930,7 @@ def cache_from_huggingface(
|
|
|
863
930
|
llm_spec.model_hub,
|
|
864
931
|
llm_spec.model_revision,
|
|
865
932
|
quantization,
|
|
933
|
+
multimodal_projector,
|
|
866
934
|
):
|
|
867
935
|
return cache_dir
|
|
868
936
|
|
|
@@ -889,7 +957,7 @@ def cache_from_huggingface(
|
|
|
889
957
|
elif llm_spec.model_format in ["ggufv2"]:
|
|
890
958
|
assert isinstance(llm_spec, LlamaCppLLMSpecV1)
|
|
891
959
|
file_names, final_file_name, need_merge = _generate_model_file_names(
|
|
892
|
-
llm_spec, quantization
|
|
960
|
+
llm_spec, quantization, multimodal_projector
|
|
893
961
|
)
|
|
894
962
|
|
|
895
963
|
for file_name in file_names:
|
|
@@ -914,7 +982,11 @@ def cache_from_huggingface(
|
|
|
914
982
|
raise ValueError(f"Unsupported model format: {llm_spec.model_format}")
|
|
915
983
|
|
|
916
984
|
meta_path = _get_meta_path(
|
|
917
|
-
cache_dir,
|
|
985
|
+
cache_dir,
|
|
986
|
+
llm_spec.model_format,
|
|
987
|
+
llm_spec.model_hub,
|
|
988
|
+
quantization,
|
|
989
|
+
multimodal_projector,
|
|
918
990
|
)
|
|
919
991
|
_generate_meta_file(meta_path, llm_family, llm_spec, quantization)
|
|
920
992
|
|