PyPI - xinference - Versions diffs - 1.5.0.post2__py3-none-any.whl → 1.6.0__py3-none-any.whl - Mend - Supply Chain Defender

xinference 1.5.0.post2py3-none-any.whl → 1.6.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (137) hide show

xinference/model/llm/__init__.py CHANGED Viewed

@@ -57,7 +57,7 @@ from .llm_family import (
 def check_format_with_engine(model_format, engine):
     # only llama-cpp-python support and only support ggufv2
-    if model_format in ["ggufv2"] and engine != "llama.cpp":
+    if model_format in ["ggufv2"] and engine not in ["llama.cpp", "vLLM"]:
         return False
     if model_format not in ["ggufv2"] and engine == "llama.cpp":
         return False
@@ -128,8 +128,38 @@ def register_custom_model():
                 warnings.warn(f"{user_defined_llm_dir}/{f} has error, {e}")
+def load_model_family_from_json(json_filename, target_families):
+    json_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), json_filename)
+    for json_obj in json.load(codecs.open(json_path, "r", encoding="utf-8")):
+        model_spec = LLMFamilyV1.parse_obj(json_obj)
+        target_families.append(model_spec)
+        # register chat_template
+        if (
+            "chat" in model_spec.model_ability
+            and isinstance(model_spec.chat_template, str)
+            and model_spec.model_name not in BUILTIN_LLM_PROMPT_STYLE
+        ):
+            # note that the key is the model name,
+            # since there are multiple representations of the same prompt style name in json.
+            if model_spec.model_name not in BUILTIN_LLM_PROMPT_STYLE:
+                BUILTIN_LLM_PROMPT_STYLE[model_spec.model_name] = {
+                    "chat_template": model_spec.chat_template,
+                    "stop_token_ids": model_spec.stop_token_ids,
+                    "stop": model_spec.stop,
+                }
+        # register model family
+        if "chat" in model_spec.model_ability:
+            BUILTIN_LLM_MODEL_CHAT_FAMILIES.add(model_spec.model_name)
+        else:
+            BUILTIN_LLM_MODEL_GENERATE_FAMILIES.add(model_spec.model_name)
+        if "tools" in model_spec.model_ability:
+            BUILTIN_LLM_MODEL_TOOL_CALL_FAMILIES.add(model_spec.model_name)
 def _install():
-    from .llama_cpp.core import LlamaCppChatModel, LlamaCppModel, XllamaCppModel
+    from .llama_cpp.core import XllamaCppModel
     from .lmdeploy.core import LMDeployChatModel, LMDeployModel
     from .mlx.core import MLXChatModel, MLXModel, MLXVisionModel
     from .sglang.core import SGLANGChatModel, SGLANGModel, SGLANGVisionModel
@@ -147,13 +177,12 @@ def _install():
     from .transformers.gemma3 import Gemma3ChatModel, Gemma3TextChatModel
     from .transformers.glm4v import Glm4VModel
     from .transformers.glm_edge_v import GlmEdgeVModel
-    from .transformers.internlm2 import Internlm2PytorchChatModel
     from .transformers.minicpmv25 import MiniCPMV25Model
     from .transformers.minicpmv26 import MiniCPMV26Model
     from .transformers.opt import OptPytorchModel
+    from .transformers.ovis2 import Ovis2ChatModel
     from .transformers.qwen2_audio import Qwen2AudioChatModel
     from .transformers.qwen_vl import QwenVLChatModel
-    from .transformers.yi_vl import YiVLChatModel
     from .vllm.core import VLLMChatModel, VLLMModel, VLLMVisionModel
     try:
@@ -167,8 +196,6 @@ def _install():
     # register llm classes.
     LLAMA_CLASSES.extend(
         [
-            LlamaCppChatModel,
-            LlamaCppModel,
             XllamaCppModel,
         ]
     )
@@ -180,10 +207,8 @@ def _install():
         [
             ChatglmPytorchChatModel,
             PytorchChatModel,
-            Internlm2PytorchChatModel,
             QwenVLChatModel,
             Qwen2AudioChatModel,
-            YiVLChatModel,
             DeepSeekVLChatModel,
             DeepSeekVL2ChatModel,
             PytorchModel,
@@ -199,6 +224,7 @@ def _install():
             CogAgentChatModel,
             Gemma3TextChatModel,
             Gemma3ChatModel,
+            Ovis2ChatModel,
         ]
     )
     if OmniLMMModel:  # type: ignore
@@ -212,115 +238,14 @@ def _install():
     SUPPORTED_ENGINES["MLX"] = MLX_CLASSES
     SUPPORTED_ENGINES["LMDEPLOY"] = LMDEPLOY_CLASSES
-    json_path = os.path.join(
-        os.path.dirname(os.path.abspath(__file__)), "llm_family.json"
+    load_model_family_from_json("llm_family.json", BUILTIN_LLM_FAMILIES)
+    load_model_family_from_json(
+        "llm_family_modelscope.json", BUILTIN_MODELSCOPE_LLM_FAMILIES
     )
-    for json_obj in json.load(codecs.open(json_path, "r", encoding="utf-8")):
-        model_spec = LLMFamilyV1.parse_obj(json_obj)
-        BUILTIN_LLM_FAMILIES.append(model_spec)
-        # register chat_template
-        if "chat" in model_spec.model_ability and isinstance(
-            model_spec.chat_template, str
-        ):
-            # note that the key is the model name,
-            # since there are multiple representations of the same prompt style name in json.
-            BUILTIN_LLM_PROMPT_STYLE[model_spec.model_name] = {
-                "chat_template": model_spec.chat_template,
-                "stop_token_ids": model_spec.stop_token_ids,
-                "stop": model_spec.stop,
-            }
-        # register model family
-        if "chat" in model_spec.model_ability:
-            BUILTIN_LLM_MODEL_CHAT_FAMILIES.add(model_spec.model_name)
-        else:
-            BUILTIN_LLM_MODEL_GENERATE_FAMILIES.add(model_spec.model_name)
-        if "tools" in model_spec.model_ability:
-            BUILTIN_LLM_MODEL_TOOL_CALL_FAMILIES.add(model_spec.model_name)
-    modelscope_json_path = os.path.join(
-        os.path.dirname(os.path.abspath(__file__)), "llm_family_modelscope.json"
+    load_model_family_from_json(
+        "llm_family_openmind_hub.json", BUILTIN_OPENMIND_HUB_LLM_FAMILIES
     )
-    for json_obj in json.load(codecs.open(modelscope_json_path, "r", encoding="utf-8")):
-        model_spec = LLMFamilyV1.parse_obj(json_obj)
-        BUILTIN_MODELSCOPE_LLM_FAMILIES.append(model_spec)
-        # register prompt style, in case that we have something missed
-        # if duplicated with huggingface json, keep it as the huggingface style
-        if (
-            "chat" in model_spec.model_ability
-            and isinstance(model_spec.chat_template, str)
-            and model_spec.model_name not in BUILTIN_LLM_PROMPT_STYLE
-        ):
-            BUILTIN_LLM_PROMPT_STYLE[model_spec.model_name] = {
-                "chat_template": model_spec.chat_template,
-                "stop_token_ids": model_spec.stop_token_ids,
-                "stop": model_spec.stop,
-            }
-        # register model family
-        if "chat" in model_spec.model_ability:
-            BUILTIN_LLM_MODEL_CHAT_FAMILIES.add(model_spec.model_name)
-        else:
-            BUILTIN_LLM_MODEL_GENERATE_FAMILIES.add(model_spec.model_name)
-        if "tools" in model_spec.model_ability:
-            BUILTIN_LLM_MODEL_TOOL_CALL_FAMILIES.add(model_spec.model_name)
-    openmind_hub_json_path = os.path.join(
-        os.path.dirname(os.path.abspath(__file__)), "llm_family_openmind_hub.json"
-    )
-    for json_obj in json.load(
-        codecs.open(openmind_hub_json_path, "r", encoding="utf-8")
-    ):
-        model_spec = LLMFamilyV1.parse_obj(json_obj)
-        BUILTIN_OPENMIND_HUB_LLM_FAMILIES.append(model_spec)
-        # register prompt style, in case that we have something missed
-        # if duplicated with huggingface json, keep it as the huggingface style
-        if (
-            "chat" in model_spec.model_ability
-            and isinstance(model_spec.chat_template, str)
-            and model_spec.model_name not in BUILTIN_LLM_PROMPT_STYLE
-        ):
-            BUILTIN_LLM_PROMPT_STYLE[model_spec.model_name] = {
-                "chat_template": model_spec.chat_template,
-                "stop_token_ids": model_spec.stop_token_ids,
-                "stop": model_spec.stop,
-            }
-        # register model family
-        if "chat" in model_spec.model_ability:
-            BUILTIN_LLM_MODEL_CHAT_FAMILIES.add(model_spec.model_name)
-        else:
-            BUILTIN_LLM_MODEL_GENERATE_FAMILIES.add(model_spec.model_name)
-        if "tools" in model_spec.model_ability:
-            BUILTIN_LLM_MODEL_TOOL_CALL_FAMILIES.add(model_spec.model_name)
-    csghub_json_path = os.path.join(
-        os.path.dirname(os.path.abspath(__file__)), "llm_family_csghub.json"
-    )
-    for json_obj in json.load(codecs.open(csghub_json_path, "r", encoding="utf-8")):
-        model_spec = LLMFamilyV1.parse_obj(json_obj)
-        BUILTIN_CSGHUB_LLM_FAMILIES.append(model_spec)
-        # register prompt style, in case that we have something missed
-        # if duplicated with huggingface json, keep it as the huggingface style
-        if (
-            "chat" in model_spec.model_ability
-            and isinstance(model_spec.chat_template, str)
-            and model_spec.model_name not in BUILTIN_LLM_PROMPT_STYLE
-        ):
-            BUILTIN_LLM_PROMPT_STYLE[model_spec.model_name] = {
-                "chat_template": model_spec.chat_template,
-                "stop_token_ids": model_spec.stop_token_ids,
-                "stop": model_spec.stop,
-            }
-        # register model family
-        if "chat" in model_spec.model_ability:
-            BUILTIN_LLM_MODEL_CHAT_FAMILIES.add(model_spec.model_name)
-        else:
-            BUILTIN_LLM_MODEL_GENERATE_FAMILIES.add(model_spec.model_name)
-        if "tools" in model_spec.model_ability:
-            BUILTIN_LLM_MODEL_TOOL_CALL_FAMILIES.add(model_spec.model_name)
+    load_model_family_from_json("llm_family_csghub.json", BUILTIN_CSGHUB_LLM_FAMILIES)
     for llm_specs in [
         BUILTIN_LLM_FAMILIES,

xinference/model/llm/core.py CHANGED Viewed

@@ -17,6 +17,7 @@ import inspect
 import logging
 import os
 import platform
+import warnings
 from abc import abstractmethod
 from collections import defaultdict
 from functools import lru_cache
@@ -65,6 +66,11 @@ class LLM(abc.ABC):
         if kwargs:
             raise ValueError(f"Unrecognized keyword arguments: {kwargs}")
+    @classmethod
+    @abstractmethod
+    def check_lib(cls) -> bool:
+        raise NotImplementedError
     @staticmethod
     def _is_darwin_and_apple_silicon():
         return platform.system() == "Darwin" and platform.processor() == "arm"
@@ -117,16 +123,33 @@ class LLM(abc.ABC):
     @classmethod
     def match(
         cls, llm_family: "LLMFamilyV1", llm_spec: "LLMSpecV1", quantization: str
+    ) -> bool:
+        if not cls.check_lib():
+            return False
+        return cls.match_json(llm_family, llm_spec, quantization)
+    @classmethod
+    @abstractmethod
+    def match_json(
+        cls, llm_family: "LLMFamilyV1", llm_spec: "LLMSpecV1", quantization: str
     ) -> bool:
         raise NotImplementedError
-    def prepare_parse_reasoning_content(self, reasoning_content):
-        # Initialize reasoning parser if model has reasoning ability
-        if "reasoning" in self.model_family.model_ability and reasoning_content:
-            self.reasoning_parser = ReasoningParser(
-                self.model_family.reasoning_start_tag,
-                self.model_family.reasoning_end_tag,
+    def prepare_parse_reasoning_content(
+        self, reasoning_content: bool, enable_thinking: bool = True
+    ):
+        if "hybrid" not in self.model_family.model_ability and not enable_thinking:
+            enable_thinking = True
+            warnings.warn(
+                "enable_thinking cannot be disabled for non hybrid model, will be ignored"
             )
+        # Initialize reasoning parser if model has reasoning ability
+        self.reasoning_parser = ReasoningParser(  # type: ignore
+            reasoning_content,
+            self.model_family.reasoning_start_tag,  # type: ignore
+            self.model_family.reasoning_end_tag,  # type: ignore
+            enable_thinking=enable_thinking,
+        )
 class LLMDescription(ModelDescription):