PyPI - xinference - Versions diffs - 1.5.0.post2__py3-none-any.whl → 1.5.1__py3-none-any.whl - Mend

xinference 1.5.0.post2py3-none-any.whl → 1.5.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (89) hide show

xinference/model/llm/__init__.py CHANGED Viewed

@@ -57,7 +57,7 @@ from .llm_family import (
 def check_format_with_engine(model_format, engine):
     # only llama-cpp-python support and only support ggufv2
-    if model_format in ["ggufv2"] and engine != "llama.cpp":
+    if model_format in ["ggufv2"] and engine not in ["llama.cpp", "vLLM"]:
         return False
     if model_format not in ["ggufv2"] and engine == "llama.cpp":
         return False
@@ -147,13 +147,12 @@ def _install():
     from .transformers.gemma3 import Gemma3ChatModel, Gemma3TextChatModel
     from .transformers.glm4v import Glm4VModel
     from .transformers.glm_edge_v import GlmEdgeVModel
-    from .transformers.internlm2 import Internlm2PytorchChatModel
     from .transformers.minicpmv25 import MiniCPMV25Model
     from .transformers.minicpmv26 import MiniCPMV26Model
     from .transformers.opt import OptPytorchModel
+    from .transformers.ovis2 import Ovis2ChatModel
     from .transformers.qwen2_audio import Qwen2AudioChatModel
     from .transformers.qwen_vl import QwenVLChatModel
-    from .transformers.yi_vl import YiVLChatModel
     from .vllm.core import VLLMChatModel, VLLMModel, VLLMVisionModel
     try:
@@ -180,10 +179,8 @@ def _install():
         [
             ChatglmPytorchChatModel,
             PytorchChatModel,
-            Internlm2PytorchChatModel,
             QwenVLChatModel,
             Qwen2AudioChatModel,
-            YiVLChatModel,
             DeepSeekVLChatModel,
             DeepSeekVL2ChatModel,
             PytorchModel,
@@ -199,6 +196,7 @@ def _install():
             CogAgentChatModel,
             Gemma3TextChatModel,
             Gemma3ChatModel,
+            Ovis2ChatModel,
         ]
     )
     if OmniLMMModel:  # type: ignore

xinference/model/llm/core.py CHANGED Viewed

@@ -65,6 +65,11 @@ class LLM(abc.ABC):
         if kwargs:
             raise ValueError(f"Unrecognized keyword arguments: {kwargs}")
+    @classmethod
+    @abstractmethod
+    def check_lib(cls) -> bool:
+        raise NotImplementedError
     @staticmethod
     def _is_darwin_and_apple_silicon():
         return platform.system() == "Darwin" and platform.processor() == "arm"
@@ -117,6 +122,15 @@ class LLM(abc.ABC):
     @classmethod
     def match(
         cls, llm_family: "LLMFamilyV1", llm_spec: "LLMSpecV1", quantization: str
+    ) -> bool:
+        if not cls.check_lib():
+            return False
+        return cls.match_json(llm_family, llm_spec, quantization)
+    @classmethod
+    @abstractmethod
+    def match_json(
+        cls, llm_family: "LLMFamilyV1", llm_spec: "LLMSpecV1", quantization: str
     ) -> bool:
         raise NotImplementedError

xinference/model/llm/llama_cpp/core.py CHANGED Viewed

@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import concurrent.futures
+import importlib.util
 import logging
 import os
 import queue
@@ -116,7 +117,11 @@ class XllamaCppModel(LLM, ChatModelMixin):
         return generate_config
     @classmethod
-    def match(
+    def check_lib(cls) -> bool:
+        return importlib.util.find_spec("xllamacpp") is not None
+    @classmethod
+    def match_json(
         cls, llm_family: LLMFamilyV1, llm_spec: LLMSpecV1, quantization: str
     ) -> bool:
         if llm_spec.model_format not in ["ggufv2"]:
@@ -464,7 +469,11 @@ class LlamaCppModel(LLM):
             raise RuntimeError(f"Load model {self.model_family.model_name} failed")
     @classmethod
-    def match(
+    def check_lib(cls) -> bool:
+        return importlib.util.find_spec("llama_cpp") is not None
+    @classmethod
+    def match_json(
         cls, llm_family: LLMFamilyV1, llm_spec: LLMSpecV1, quantization: str
     ) -> bool:
         if llm_spec.model_format not in ["ggufv2"]:
@@ -565,7 +574,7 @@ class LlamaCppChatModel(LlamaCppModel, ChatModelMixin):
         )
     @classmethod
-    def match(
+    def match_json(
         cls, llm_family: LLMFamilyV1, llm_spec: LLMSpecV1, quantization: str
     ) -> bool:
         if llm_spec.model_format not in ["ggufv2"]:
@@ -589,7 +598,9 @@ class LlamaCppChatModel(LlamaCppModel, ChatModelMixin):
     ) -> Union[ChatCompletion, Iterator[ChatCompletionChunk]]:
         model_family = self.model_family.model_family or self.model_family.model_name
         tools = generate_config.pop("tools", []) if generate_config else None
-        full_context_kwargs = {}
+        full_context_kwargs = (
+            self._get_chat_template_kwargs_from_generate_config(generate_config) or {}  # type: ignore
+        )
         if tools:
             if (
                 model_family in QWEN_TOOL_CALL_FAMILY

xinference 1.5.0.post2__py3-none-any.whl → 1.5.1__py3-none-any.whl

Potentially problematic release.

xinference 1.5.0.post2py3-none-any.whl → 1.5.1py3-none-any.whl