PyPI - xinference - Versions diffs - 0.14.2__py3-none-any.whl → 0.14.3__py3-none-any.whl - Mend

xinference 0.14.2py3-none-any.whl → 0.14.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (137) hide show

xinference/model/image/stable_diffusion/core.py CHANGED Viewed

@@ -24,6 +24,9 @@ from functools import partial
 from io import BytesIO
 from typing import Dict, List, Optional, Union
+import PIL.Image
+from PIL import ImageOps
 from ....constants import XINFERENCE_IMAGE_DIR
 from ....device_utils import move_model_to_available_device
 from ....types import Image, ImageList, LoRA
@@ -46,8 +49,13 @@ class DiffusionModel:
         self._model_uid = model_uid
         self._model_path = model_path
         self._device = device
+        # when a model has text2image ability,
+        # it will be loaded as AutoPipelineForText2Image
+        # for image2image and inpainting,
+        # we convert to the corresponding model
         self._model = None
         self._i2i_model = None  # image to image model
+        self._inpainting_model = None  # inpainting model
         self._lora_model = lora_model
         self._lora_load_kwargs = lora_load_kwargs or {}
         self._lora_fuse_kwargs = lora_fuse_kwargs or {}
@@ -152,6 +160,10 @@ class DiffusionModel:
         model=None,
         **kwargs,
     ):
+        import gc
+        from ....device_utils import empty_cache
         logger.debug(
             "stable diffusion args: %s",
             kwargs,
@@ -159,6 +171,11 @@ class DiffusionModel:
         model = model if model is not None else self._model
         assert callable(model)
         images = model(**kwargs).images
+        # clean cache
+        gc.collect()
+        empty_cache()
         if response_format == "url":
             os.makedirs(XINFERENCE_IMAGE_DIR, exist_ok=True)
             image_list = []
@@ -209,9 +226,17 @@ class DiffusionModel:
             **kwargs,
         )
+    @staticmethod
+    def pad_to_multiple(image, multiple=8):
+        x, y = image.size
+        padding_x = (multiple - x % multiple) % multiple
+        padding_y = (multiple - y % multiple) % multiple
+        padding = (0, 0, padding_x, padding_y)
+        return ImageOps.expand(image, padding)
     def image_to_image(
         self,
-        image: bytes,
+        image: PIL.Image,
         prompt: Optional[Union[str, List[str]]] = None,
         negative_prompt: Optional[Union[str, List[str]]] = None,
         n: int = 1,
@@ -236,6 +261,11 @@ class DiffusionModel:
             width, height = map(int, re.split(r"[^\d]+", size))
             kwargs["width"] = width
             kwargs["height"] = height
+        if padding_image_to_multiple := kwargs.pop("padding_image_to_multiple", None):
+            # Model like SD3 image to image requires image's height and width is times of 16
+            # padding the image if specified
+            image = self.pad_to_multiple(image, multiple=int(padding_image_to_multiple))
         self._filter_kwargs(kwargs)
         return self._call_model(
             image=image,
@@ -258,6 +288,23 @@ class DiffusionModel:
         response_format: str = "url",
         **kwargs,
     ):
+        if "inpainting" not in self._abilities:
+            raise RuntimeError(f"{self._model_uid} does not support inpainting")
+        if (
+            "text2image" in self._abilities or "image2image" in self._abilities
+        ) and self._model is not None:
+            from diffusers import AutoPipelineForInpainting
+            if self._inpainting_model is not None:
+                model = self._inpainting_model
+            else:
+                model = self._inpainting_model = AutoPipelineForInpainting.from_pipe(
+                    self._model
+                )
+        else:
+            model = self._model
         width, height = map(int, re.split(r"[^\d]+", size))
         return self._call_model(
             image=image,
@@ -268,5 +315,6 @@ class DiffusionModel:
             width=width,
             num_images_per_prompt=n,
             response_format=response_format,
+            model=model,
             **kwargs,
         )

xinference/model/llm/__init__.py CHANGED Viewed

@@ -34,6 +34,7 @@ from .llm_family import (
     BUILTIN_MODELSCOPE_LLM_FAMILIES,
     LLAMA_CLASSES,
     LLM_ENGINES,
+    LMDEPLOY_CLASSES,
     MLX_CLASSES,
     SGLANG_CLASSES,
     SUPPORTED_ENGINES,
@@ -113,10 +114,12 @@ def generate_engine_config_by_model_family(model_family):
 def _install():
     from .llama_cpp.core import LlamaCppChatModel, LlamaCppModel
+    from .lmdeploy.core import LMDeployChatModel, LMDeployModel
     from .mlx.core import MLXChatModel, MLXModel
     from .sglang.core import SGLANGChatModel, SGLANGModel
     from .transformers.chatglm import ChatglmPytorchChatModel
     from .transformers.cogvlm2 import CogVLM2Model
+    from .transformers.cogvlm2_video import CogVLM2VideoModel
     from .transformers.core import PytorchChatModel, PytorchModel
     from .transformers.deepseek_vl import DeepSeekVLChatModel
     from .transformers.glm4v import Glm4VModel
@@ -147,6 +150,7 @@ def _install():
     SGLANG_CLASSES.extend([SGLANGModel, SGLANGChatModel])
     VLLM_CLASSES.extend([VLLMModel, VLLMChatModel, VLLMVisionModel])
     MLX_CLASSES.extend([MLXModel, MLXChatModel])
+    LMDEPLOY_CLASSES.extend([LMDeployModel, LMDeployChatModel])
     TRANSFORMERS_CLASSES.extend(
         [
             ChatglmPytorchChatModel,
@@ -160,6 +164,7 @@ def _install():
             InternVLChatModel,
             PytorchModel,
             CogVLM2Model,
+            CogVLM2VideoModel,
             MiniCPMV25Model,
             MiniCPMV26Model,
             Glm4VModel,
@@ -174,6 +179,7 @@ def _install():
     SUPPORTED_ENGINES["Transformers"] = TRANSFORMERS_CLASSES
     SUPPORTED_ENGINES["llama.cpp"] = LLAMA_CLASSES
     SUPPORTED_ENGINES["MLX"] = MLX_CLASSES
+    SUPPORTED_ENGINES["LMDEPLOY"] = LMDEPLOY_CLASSES
     json_path = os.path.join(
         os.path.dirname(os.path.abspath(__file__)), "llm_family.json"

xinference/model/llm/llm_family.json CHANGED Viewed

@@ -7189,15 +7189,6 @@
           "model_id": "OpenGVLab/InternVL2-4B",
           "model_revision": "b50544dafada6c41e80bfde2f57cc9b0140fc21c"
         },
-        {
-          "model_format": "awq",
-          "model_size_in_billions": 4,
-          "quantizations": [
-            "Int4"
-          ],
-          "model_id": "OpenGVLab/InternVL2-8B-AWQ",
-          "model_revision": "9f1a4756b7ae18eb26d8a22b618dfc283e8193b3"
-        },
         {
           "model_format": "pytorch",
           "model_size_in_billions": 8,
@@ -7209,6 +7200,15 @@
           "model_id": "OpenGVLab/InternVL2-8B",
           "model_revision": "3bfd3664dea4f3da628785f5125d30f889701253"
         },
+        {
+          "model_format": "awq",
+          "model_size_in_billions": 8,
+          "quantizations": [
+            "Int4"
+          ],
+          "model_id": "OpenGVLab/InternVL2-8B-AWQ",
+          "model_revision": "9f1a4756b7ae18eb26d8a22b618dfc283e8193b3"
+        },
         {
           "model_format": "pytorch",
           "model_size_in_billions": 26,
@@ -7342,6 +7342,51 @@
       ]
     }
   },
+  {
+    "version": 1,
+    "context_length": 8192,
+    "model_name": "cogvlm2-video-llama3-chat",
+    "model_lang": [
+        "en",
+        "zh"
+    ],
+    "model_ability": [
+        "chat",
+        "vision"
+    ],
+    "model_description": "CogVLM2-Video achieves state-of-the-art performance on multiple video question answering tasks.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 12,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "THUDM/cogvlm2-video-llama3-chat",
+        "model_revision": "f375ead7d8202ebe2c3d09f1068abdddeb2929fa"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "LLAMA3",
+      "system_prompt": "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.",
+      "roles": [
+        "user",
+        "assistant"
+      ],
+      "intra_message_sep": "\n\n",
+      "inter_message_sep": "<|eot_id|>",
+      "stop_token_ids": [
+        128001,
+        128009
+      ],
+      "stop": [
+        "<|end_of_text|>",
+        "<|eot_id|>"
+      ]
+    }
+  },
   {
     "version": 1,
     "context_length": 8192,

xinference/model/llm/llm_family.py CHANGED Viewed

@@ -271,6 +271,8 @@ VLLM_CLASSES: List[Type[LLM]] = []
 MLX_CLASSES: List[Type[LLM]] = []
+LMDEPLOY_CLASSES: List[Type[LLM]] = []
 LLM_ENGINES: Dict[str, Dict[str, List[Dict[str, Any]]]] = {}
 SUPPORTED_ENGINES: Dict[str, List[Type[LLM]]] = {}

xinference/model/llm/llm_family_modelscope.json CHANGED Viewed

@@ -4778,10 +4778,10 @@
             "model_revision": "master"
         },
         {
-            "model_format": "pytorch",
+            "model_format": "awq",
             "model_size_in_billions": 2,
             "quantizations": [
-              "none"
+              "Int4"
             ],
             "model_hub": "modelscope",
             "model_id": "OpenGVLab/InternVL2-2B-AWQ",
@@ -4812,10 +4812,10 @@
             "model_revision": "master"
         },
         {
-            "model_format": "pytorch",
+            "model_format": "awq",
             "model_size_in_billions": 8,
             "quantizations": [
-              "none"
+              "Int4"
             ],
             "model_hub": "modelscope",
             "model_id": "OpenGVLab/InternVL2-8B-AWQ",
@@ -4834,10 +4834,10 @@
             "model_revision": "master"
         },
         {
-            "model_format": "pytorch",
+            "model_format": "awq",
             "model_size_in_billions": 26,
             "quantizations": [
-              "none"
+              "Int4"
             ],
             "model_hub": "modelscope",
             "model_id": "OpenGVLab/InternVL2-26B-AWQ",
@@ -4856,10 +4856,10 @@
             "model_revision": "master"
         },
         {
-            "model_format": "pytorch",
+            "model_format": "awq",
             "model_size_in_billions": 40,
             "quantizations": [
-              "none"
+              "Int4"
             ],
             "model_hub": "modelscope",
             "model_id": "OpenGVLab/InternVL2-40B-AWQ",
@@ -4878,10 +4878,10 @@
             "model_revision": "master"
         },
         {
-            "model_format": "pytorch",
+            "model_format": "awq",
             "model_size_in_billions": 76,
             "quantizations": [
-              "none"
+              "Int4"
             ],
             "model_hub": "modelscope",
             "model_id": "OpenGVLab/InternVL2-Llama3-76B-AWQ",
@@ -4962,6 +4962,52 @@
       ]
     }
   },
+  {
+    "version": 1,
+    "context_length": 8192,
+    "model_name": "cogvlm2-video-llama3-chat",
+    "model_lang": [
+        "en",
+        "zh"
+    ],
+    "model_ability": [
+        "chat",
+        "vision"
+    ],
+    "model_description": "CogVLM2-Video achieves state-of-the-art performance on multiple video question answering tasks.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 12,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_hub": "modelscope",
+        "model_id": "ZhipuAI/cogvlm2-video-llama3-chat",
+        "model_revision": "master"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "LLAMA3",
+      "system_prompt": "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.",
+      "roles": [
+        "user",
+        "assistant"
+      ],
+      "intra_message_sep": "\n\n",
+      "inter_message_sep": "<|eot_id|>",
+      "stop_token_ids": [
+        128001,
+        128009
+      ],
+      "stop": [
+        "<|end_of_text|>",
+        "<|eot_id|>"
+      ]
+    }
+  },
   {
     "version": 1,
     "context_length": 8192,

xinference/model/llm/lmdeploy/__init__.py ADDED Viewed

File without changes

xinference 0.14.2__py3-none-any.whl → 0.14.3__py3-none-any.whl

Potentially problematic release.

xinference 0.14.2py3-none-any.whl → 0.14.3py3-none-any.whl