PyPI - xinference - Versions diffs - 0.15.0__py3-none-any.whl → 0.15.2__py3-none-any.whl - Mend

xinference 0.15.0py3-none-any.whl → 0.15.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (84) hide show

xinference/model/image/model_spec_modelscope.json CHANGED Viewed

@@ -6,7 +6,9 @@
     "model_id": "AI-ModelScope/FLUX.1-schnell",
     "model_revision": "master",
     "model_ability": [
-      "text2image"
+      "text2image",
+      "image2image",
+      "inpainting"
     ]
   },
   {
@@ -16,7 +18,9 @@
     "model_id": "AI-ModelScope/FLUX.1-dev",
     "model_revision": "master",
     "model_ability": [
-      "text2image"
+      "text2image",
+      "image2image",
+      "inpainting"
     ]
   },
   {
@@ -39,7 +43,11 @@
     "model_revision": "master",
     "model_ability": [
       "text2image"
-    ]
+    ],
+    "default_generate_config": {
+      "guidance_scale": 0.0,
+      "num_inference_steps": 1
+    }
   },
   {
     "model_name": "sdxl-turbo",
@@ -49,7 +57,11 @@
     "model_revision": "master",
     "model_ability": [
       "text2image"
-    ]
+    ],
+    "default_generate_config": {
+      "guidance_scale": 0.0,
+      "num_inference_steps": 1
+    }
   },
   {
     "model_name": "stable-diffusion-v1.5",

xinference/model/image/sdapi.py ADDED Viewed

@@ -0,0 +1,136 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import base64
+import io
+import warnings
+from PIL import Image
+class SDAPIToDiffusersConverter:
+    txt2img_identical_args = {
+        "prompt",
+        "negative_prompt",
+        "seed",
+        "width",
+        "height",
+        "sampler_name",
+    }
+    txt2img_arg_mapping = {
+        "steps": "num_inference_steps",
+        "cfg_scale": "guidance_scale",
+        # "denoising_strength": "strength",
+    }
+    img2img_identical_args = {
+        "prompt",
+        "negative_prompt",
+        "seed",
+        "width",
+        "height",
+        "sampler_name",
+    }
+    img2img_arg_mapping = {
+        "init_images": "image",
+        "steps": "num_inference_steps",
+        "cfg_scale": "guidance_scale",
+        "denoising_strength": "strength",
+    }
+    @staticmethod
+    def convert_to_diffusers(sd_type: str, params: dict) -> dict:
+        diffusers_params = {}
+        identical_args = getattr(SDAPIToDiffusersConverter, f"{sd_type}_identical_args")
+        mapping_args = getattr(SDAPIToDiffusersConverter, f"{sd_type}_arg_mapping")
+        for param, value in params.items():
+            if param in identical_args:
+                diffusers_params[param] = value
+            elif param in mapping_args:
+                diffusers_params[mapping_args[param]] = value
+            else:
+                raise ValueError(f"Unknown arg: {param}")
+        return diffusers_params
+    @staticmethod
+    def get_available_args(sd_type: str) -> set:
+        identical_args = getattr(SDAPIToDiffusersConverter, f"{sd_type}_identical_args")
+        mapping_args = getattr(SDAPIToDiffusersConverter, f"{sd_type}_arg_mapping")
+        return identical_args.union(mapping_args)
+class SDAPIDiffusionModelMixin:
+    @staticmethod
+    def _check_kwargs(sd_type: str, kwargs: dict):
+        available_args = SDAPIToDiffusersConverter.get_available_args(sd_type)
+        unknown_args = []
+        available_kwargs = {}
+        for arg, value in kwargs.items():
+            if arg in available_args:
+                available_kwargs[arg] = value
+            else:
+                unknown_args.append(arg)
+        if unknown_args:
+            warnings.warn(
+                f"Some args are not supported for now and will be ignored: {unknown_args}"
+            )
+        converted_kwargs = SDAPIToDiffusersConverter.convert_to_diffusers(
+            sd_type, available_kwargs
+        )
+        width, height = converted_kwargs.pop("width", None), converted_kwargs.pop(
+            "height", None
+        )
+        if width and height:
+            converted_kwargs["size"] = f"{width}*{height}"
+        return converted_kwargs
+    def txt2img(self, **kwargs):
+        converted_kwargs = self._check_kwargs("txt2img", kwargs)
+        result = self.text_to_image(response_format="b64_json", **converted_kwargs)  # type: ignore
+        # convert to SD API result
+        return {
+            "images": [r["b64_json"] for r in result["data"]],
+            "info": {"created": result["created"]},
+            "parameters": {},
+        }
+    @staticmethod
+    def _decode_b64_img(img_str: str) -> Image:
+        # img_str in a format: "data:image/png;base64," + raw_b64_img(image)
+        f, data = img_str.split(",", 1)
+        f, encode_type = f.split(";", 1)
+        assert encode_type == "base64"
+        f = f.split("/", 1)[1]
+        b = base64.b64decode(data)
+        return Image.open(io.BytesIO(b), formats=[f])
+    def img2img(self, **kwargs):
+        init_images = kwargs.pop("init_images", [])
+        kwargs["init_images"] = [self._decode_b64_img(i) for i in init_images]
+        clip_skip = kwargs.get("override_settings", {}).get("clip_skip")
+        converted_kwargs = self._check_kwargs("img2img", kwargs)
+        if clip_skip:
+            converted_kwargs["clip_skip"] = clip_skip
+        result = self.image_to_image(response_format="b64_json", **converted_kwargs)  # type: ignore
+        # convert to SD API result
+        return {
+            "images": [r["b64_json"] for r in result["data"]],
+            "info": {"created": result["created"]},
+            "parameters": {},
+        }

xinference/model/image/stable_diffusion/core.py CHANGED Viewed

@@ -13,28 +13,72 @@
 # limitations under the License.
 import base64
+import contextlib
+import inspect
 import logging
 import os
 import re
 import sys
 import time
 import uuid
+import warnings
 from concurrent.futures import ThreadPoolExecutor
 from functools import partial
 from io import BytesIO
-from typing import Dict, List, Optional, Union
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
 import PIL.Image
+import torch
 from PIL import ImageOps
 from ....constants import XINFERENCE_IMAGE_DIR
-from ....device_utils import move_model_to_available_device
+from ....device_utils import get_available_device, move_model_to_available_device
 from ....types import Image, ImageList, LoRA
+from ..sdapi import SDAPIDiffusionModelMixin
-logger = logging.getLogger(__name__)
+if TYPE_CHECKING:
+    from ..core import ImageModelFamilyV1
+logger = logging.getLogger(__name__)
-class DiffusionModel:
+SAMPLING_METHODS = [
+    "default",
+    "DPM++ 2M",
+    "DPM++ 2M Karras",
+    "DPM++ 2M SDE",
+    "DPM++ 2M SDE Karras",
+    "DPM++ SDE",
+    "DPM++ SDE Karras",
+    "DPM2",
+    "DPM2 Karras",
+    "DPM2 a",
+    "DPM2 a Karras",
+    "Euler",
+    "Euler a",
+    "Heun",
+    "LMS",
+    "LMS Karras",
+]
+def model_accept_param(params: Union[str, List[str]], model: Any) -> bool:
+    params = [params] if isinstance(params, str) else params
+    # model is diffusers Pipeline
+    parameters = inspect.signature(model.__call__).parameters  # type: ignore
+    allow_params = False
+    for param in parameters.values():
+        if param.kind == inspect.Parameter.VAR_KEYWORD:
+            # the __call__ can accept **kwargs,
+            # we treat it as it can accept any parameters
+            allow_params = True
+            break
+    if not allow_params:
+        if all(param in parameters for param in params):
+            allow_params = True
+    return allow_params
+class DiffusionModel(SDAPIDiffusionModelMixin):
     def __init__(
         self,
         model_uid: str,
@@ -43,7 +87,7 @@ class DiffusionModel:
         lora_model: Optional[List[LoRA]] = None,
         lora_load_kwargs: Optional[Dict] = None,
         lora_fuse_kwargs: Optional[Dict] = None,
-        abilities: Optional[List[str]] = None,
+        model_spec: Optional["ImageModelFamilyV1"] = None,
         **kwargs,
     ):
         self._model_uid = model_uid
@@ -59,7 +103,8 @@ class DiffusionModel:
         self._lora_model = lora_model
         self._lora_load_kwargs = lora_load_kwargs or {}
         self._lora_fuse_kwargs = lora_fuse_kwargs or {}
-        self._abilities = abilities or []
+        self._model_spec = model_spec
+        self._abilities = model_spec.model_ability or []  # type: ignore
         self._kwargs = kwargs
     @property
@@ -80,8 +125,6 @@ class DiffusionModel:
             logger.info(f"Successfully loaded the LoRA for model {self._model_uid}.")
     def load(self):
-        import torch
         if "text2image" in self._abilities or "image2image" in self._abilities:
             from diffusers import AutoPipelineForText2Image as AutoPipelineModel
         elif "inpainting" in self._abilities:
@@ -143,7 +186,9 @@ class DiffusionModel:
                 self._kwargs[text_encoder_name] = text_encoder
                 self._kwargs["device_map"] = "balanced"
-        logger.debug("Loading model %s", AutoPipelineModel)
+        logger.debug(
+            "Loading model from %s, kwargs: %s", self._model_path, self._kwargs
+        )
         self._model = AutoPipelineModel.from_pretrained(
             self._model_path,
             **self._kwargs,
@@ -158,6 +203,89 @@ class DiffusionModel:
         self._model.enable_attention_slicing()
         self._apply_lora()
+    @staticmethod
+    def _get_scheduler(model: Any, sampler_name: str):
+        if not sampler_name or sampler_name == "default":
+            return
+        assert model is not None
+        import diffusers
+        # see https://github.com/huggingface/diffusers/issues/4167
+        # to get A1111 <> Diffusers Scheduler mapping
+        if sampler_name == "DPM++ 2M":
+            return diffusers.DPMSolverMultistepScheduler.from_config(
+                model.scheduler.config
+            )
+        elif sampler_name == "DPM++ 2M Karras":
+            return diffusers.DPMSolverMultistepScheduler.from_config(
+                model.scheduler.config, use_karras_sigmas=True
+            )
+        elif sampler_name == "DPM++ 2M SDE":
+            return diffusers.DPMSolverMultistepScheduler.from_config(
+                model.scheduler.config, algorithm_type="sde-dpmsolver++"
+            )
+        elif sampler_name == "DPM++ 2M SDE Karras":
+            return diffusers.DPMSolverMultistepScheduler.from_config(
+                model.scheduler.config,
+                algorithm_type="sde-dpmsolver++",
+                use_karras_sigmas=True,
+            )
+        elif sampler_name == "DPM++ SDE":
+            return diffusers.DPMSolverSinglestepScheduler.from_config(
+                model.scheduler.config
+            )
+        elif sampler_name == "DPM++ SDE Karras":
+            return diffusers.DPMSolverSinglestepScheduler.from_config(
+                model.scheduler.config, use_karras_sigmas=True
+            )
+        elif sampler_name == "DPM2":
+            return diffusers.KDPM2DiscreteScheduler.from_config(model.scheduler.config)
+        elif sampler_name == "DPM2 Karras":
+            return diffusers.KDPM2DiscreteScheduler.from_config(
+                model.scheduler.config, use_karras_sigmas=True
+            )
+        elif sampler_name == "DPM2 a":
+            return diffusers.KDPM2AncestralDiscreteScheduler.from_config(
+                model.scheduler.config
+            )
+        elif sampler_name == "DPM2 a Karras":
+            return diffusers.KDPM2AncestralDiscreteScheduler.from_config(
+                model.scheduler.config, use_karras_sigmas=True
+            )
+        elif sampler_name == "Euler":
+            return diffusers.EulerDiscreteScheduler.from_config(model.scheduler.config)
+        elif sampler_name == "Euler a":
+            return diffusers.EulerAncestralDiscreteScheduler.from_config(
+                model.scheduler.config
+            )
+        elif sampler_name == "Heun":
+            return diffusers.HeunDiscreteScheduler.from_config(model.scheduler.config)
+        elif sampler_name == "LMS":
+            return diffusers.LMSDiscreteScheduler.from_config(model.scheduler.config)
+        elif sampler_name == "LMS Karras":
+            return diffusers.LMSDiscreteScheduler.from_config(
+                model.scheduler.config, use_karras_sigmas=True
+            )
+        else:
+            raise ValueError(f"Unknown sampler: {sampler_name}")
+    @staticmethod
+    @contextlib.contextmanager
+    def _reset_when_done(model: Any, sampler_name: str):
+        assert model is not None
+        scheduler = DiffusionModel._get_scheduler(model, sampler_name)
+        if scheduler:
+            default_scheduler = model.scheduler
+            model.scheduler = scheduler
+            try:
+                yield
+            finally:
+                model.scheduler = default_scheduler
+        else:
+            yield
     def _call_model(
         self,
         response_format: str,
@@ -168,16 +296,20 @@ class DiffusionModel:
         from ....device_utils import empty_cache
-        logger.debug(
-            "stable diffusion args: %s",
-            kwargs,
-        )
+        model = model if model is not None else self._model
         is_padded = kwargs.pop("is_padded", None)
         origin_size = kwargs.pop("origin_size", None)
-        model = model if model is not None else self._model
+        seed = kwargs.pop("seed", None)
+        if seed is not None:
+            kwargs["generator"] = generator = torch.Generator(device=get_available_device())  # type: ignore
+            if seed != -1:
+                kwargs["generator"] = generator.manual_seed(seed)
+        sampler_name = kwargs.pop("sampler_name", None)
         assert callable(model)
-        images = model(**kwargs).images
+        with self._reset_when_done(model, sampler_name):
+            logger.debug("stable diffusion args: %s, model: %s", kwargs, model)
+            self._filter_kwargs(model, kwargs)
+            images = model(**kwargs).images
         # revert padding if padded
         if is_padded and origin_size:
@@ -215,11 +347,17 @@ class DiffusionModel:
             raise ValueError(f"Unsupported response format: {response_format}")
     @classmethod
-    def _filter_kwargs(cls, kwargs: dict):
+    def _filter_kwargs(cls, model, kwargs: dict):
         for arg in ["negative_prompt", "num_inference_steps"]:
             if not kwargs.get(arg):
                 kwargs.pop(arg, None)
+        for key in list(kwargs):
+            allow_key = model_accept_param(key, model)
+            if not allow_key:
+                warnings.warn(f"{type(model)} cannot accept `{key}`, will ignore it")
+                kwargs.pop(key)
     def text_to_image(
         self,
         prompt: str,
@@ -231,14 +369,15 @@ class DiffusionModel:
         # References:
         # https://huggingface.co/docs/diffusers/main/en/api/pipelines/controlnet_sdxl
         width, height = map(int, re.split(r"[^\d]+", size))
-        self._filter_kwargs(kwargs)
+        generate_kwargs = self._model_spec.default_generate_config.copy()  # type: ignore
+        generate_kwargs.update({k: v for k, v in kwargs.items() if v is not None})
         return self._call_model(
             prompt=prompt,
             height=height,
             width=width,
             num_images_per_prompt=n,
             response_format=response_format,
-            **kwargs,
+            **generate_kwargs,
         )
     @staticmethod
@@ -253,7 +392,6 @@ class DiffusionModel:
         self,
         image: PIL.Image,
         prompt: Optional[Union[str, List[str]]] = None,
-        negative_prompt: Optional[Union[str, List[str]]] = None,
         n: int = 1,
         size: Optional[str] = None,
         response_format: str = "url",
@@ -287,12 +425,15 @@ class DiffusionModel:
                 width, height = image.size
             kwargs["width"] = width
             kwargs["height"] = height
+        else:
+            # SD3 image2image cannot accept width and height
+            allow_width_height = model_accept_param(["width", "height"], model)
+            if allow_width_height:
+                kwargs["width"], kwargs["height"] = image.size
-        self._filter_kwargs(kwargs)
         return self._call_model(
             image=image,
             prompt=prompt,
-            negative_prompt=negative_prompt,
             num_images_per_prompt=n,
             response_format=response_format,
             model=model,
@@ -304,7 +445,6 @@ class DiffusionModel:
         image: PIL.Image,
         mask_image: PIL.Image,
         prompt: Optional[Union[str, List[str]]] = None,
-        negative_prompt: Optional[Union[str, List[str]]] = None,
         n: int = 1,
         size: str = "1024*1024",
         response_format: str = "url",
@@ -346,7 +486,6 @@ class DiffusionModel:
             image=image,
             mask_image=mask_image,
             prompt=prompt,
-            negative_prompt=negative_prompt,
             height=height,
             width=width,
             num_images_per_prompt=n,

xinference/model/llm/__init__.py CHANGED Viewed

@@ -121,7 +121,7 @@ def register_custom_model():
                 with codecs.open(
                     os.path.join(user_defined_llm_dir, f), encoding="utf-8"
                 ) as fd:
-                    user_defined_llm_family = CustomLLMFamilyV1.parse_obj(json.load(fd))
+                    user_defined_llm_family = CustomLLMFamilyV1.parse_raw(fd.read())
                     register_llm(user_defined_llm_family, persist=False)
             except Exception as e:
                 warnings.warn(f"{user_defined_llm_dir}/{f} has error, {e}")
@@ -136,12 +136,17 @@ def _install():
     from .transformers.cogvlm2 import CogVLM2Model
     from .transformers.cogvlm2_video import CogVLM2VideoModel
     from .transformers.core import PytorchChatModel, PytorchModel
+    from .transformers.deepseek_v2 import (
+        DeepSeekV2PytorchChatModel,
+        DeepSeekV2PytorchModel,
+    )
     from .transformers.deepseek_vl import DeepSeekVLChatModel
     from .transformers.glm4v import Glm4VModel
     from .transformers.intern_vl import InternVLChatModel
     from .transformers.internlm2 import Internlm2PytorchChatModel
     from .transformers.minicpmv25 import MiniCPMV25Model
     from .transformers.minicpmv26 import MiniCPMV26Model
+    from .transformers.qwen2_audio import Qwen2AudioChatModel
     from .transformers.qwen2_vl import Qwen2VLChatModel
     from .transformers.qwen_vl import QwenVLChatModel
     from .transformers.yi_vl import YiVLChatModel
@@ -173,6 +178,7 @@ def _install():
             Internlm2PytorchChatModel,
             QwenVLChatModel,
             Qwen2VLChatModel,
+            Qwen2AudioChatModel,
             YiVLChatModel,
             DeepSeekVLChatModel,
             InternVLChatModel,
@@ -182,6 +188,8 @@ def _install():
             MiniCPMV25Model,
             MiniCPMV26Model,
             Glm4VModel,
+            DeepSeekV2PytorchModel,
+            DeepSeekV2PytorchChatModel,
         ]
     )
     if OmniLMMModel:  # type: ignore

xinference 0.15.0__py3-none-any.whl → 0.15.2__py3-none-any.whl

Potentially problematic release.

xinference 0.15.0py3-none-any.whl → 0.15.2py3-none-any.whl