PyPI - xinference - Versions diffs - 0.13.2__py3-none-any.whl → 0.13.4__py3-none-any.whl - Mend

xinference 0.13.2py3-none-any.whl → 0.13.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (103) hide show

xinference/__init__.py +0 -1
xinference/_version.py +3 -3
xinference/api/restful_api.py +30 -5
xinference/client/restful/restful_client.py +18 -3
xinference/constants.py +0 -4
xinference/core/chat_interface.py +2 -2
xinference/core/image_interface.py +6 -3
xinference/core/model.py +9 -4
xinference/core/scheduler.py +4 -4
xinference/core/supervisor.py +2 -0
xinference/core/worker.py +7 -0
xinference/deploy/utils.py +6 -0
xinference/model/audio/core.py +9 -4
xinference/model/audio/cosyvoice.py +136 -0
xinference/model/audio/model_spec.json +24 -0
xinference/model/audio/model_spec_modelscope.json +27 -0
xinference/model/core.py +25 -4
xinference/model/embedding/core.py +88 -13
xinference/model/embedding/model_spec.json +8 -0
xinference/model/embedding/model_spec_modelscope.json +8 -0
xinference/model/flexible/core.py +8 -2
xinference/model/flexible/launchers/__init__.py +1 -0
xinference/model/flexible/launchers/image_process_launcher.py +70 -0
xinference/model/image/core.py +8 -5
xinference/model/image/model_spec.json +36 -5
xinference/model/image/model_spec_modelscope.json +21 -3
xinference/model/image/stable_diffusion/core.py +36 -28
xinference/model/llm/core.py +6 -4
xinference/model/llm/ggml/llamacpp.py +7 -5
xinference/model/llm/llm_family.json +802 -82
xinference/model/llm/llm_family.py +6 -6
xinference/model/llm/llm_family_csghub.json +39 -0
xinference/model/llm/llm_family_modelscope.json +295 -47
xinference/model/llm/mlx/core.py +7 -0
xinference/model/llm/pytorch/chatglm.py +246 -5
xinference/model/llm/pytorch/cogvlm2.py +1 -1
xinference/model/llm/pytorch/deepseek_vl.py +2 -1
xinference/model/llm/pytorch/falcon.py +2 -1
xinference/model/llm/pytorch/llama_2.py +4 -2
xinference/model/llm/pytorch/omnilmm.py +2 -1
xinference/model/llm/pytorch/qwen_vl.py +2 -1
xinference/model/llm/pytorch/vicuna.py +2 -1
xinference/model/llm/pytorch/yi_vl.py +2 -1
xinference/model/llm/sglang/core.py +12 -6
xinference/model/llm/utils.py +78 -1
xinference/model/llm/vllm/core.py +9 -5
xinference/model/rerank/core.py +4 -3
xinference/thirdparty/cosyvoice/__init__.py +0 -0
xinference/thirdparty/cosyvoice/bin/__init__.py +0 -0
xinference/thirdparty/cosyvoice/bin/inference.py +114 -0
xinference/thirdparty/cosyvoice/bin/train.py +136 -0
xinference/thirdparty/cosyvoice/cli/__init__.py +0 -0
xinference/thirdparty/cosyvoice/cli/cosyvoice.py +83 -0
xinference/thirdparty/cosyvoice/cli/frontend.py +168 -0
xinference/thirdparty/cosyvoice/cli/model.py +60 -0
xinference/thirdparty/cosyvoice/dataset/__init__.py +0 -0
xinference/thirdparty/cosyvoice/dataset/dataset.py +160 -0
xinference/thirdparty/cosyvoice/dataset/processor.py +369 -0
xinference/thirdparty/cosyvoice/flow/__init__.py +0 -0
xinference/thirdparty/cosyvoice/flow/decoder.py +222 -0
xinference/thirdparty/cosyvoice/flow/flow.py +135 -0
xinference/thirdparty/cosyvoice/flow/flow_matching.py +138 -0
xinference/thirdparty/cosyvoice/flow/length_regulator.py +49 -0
xinference/thirdparty/cosyvoice/hifigan/__init__.py +0 -0
xinference/thirdparty/cosyvoice/hifigan/f0_predictor.py +55 -0
xinference/thirdparty/cosyvoice/hifigan/generator.py +391 -0
xinference/thirdparty/cosyvoice/llm/__init__.py +0 -0
xinference/thirdparty/cosyvoice/llm/llm.py +206 -0
xinference/thirdparty/cosyvoice/transformer/__init__.py +0 -0
xinference/thirdparty/cosyvoice/transformer/activation.py +84 -0
xinference/thirdparty/cosyvoice/transformer/attention.py +326 -0
xinference/thirdparty/cosyvoice/transformer/convolution.py +145 -0
xinference/thirdparty/cosyvoice/transformer/decoder.py +396 -0
xinference/thirdparty/cosyvoice/transformer/decoder_layer.py +132 -0
xinference/thirdparty/cosyvoice/transformer/embedding.py +293 -0
xinference/thirdparty/cosyvoice/transformer/encoder.py +472 -0
xinference/thirdparty/cosyvoice/transformer/encoder_layer.py +236 -0
xinference/thirdparty/cosyvoice/transformer/label_smoothing_loss.py +96 -0
xinference/thirdparty/cosyvoice/transformer/positionwise_feed_forward.py +115 -0
xinference/thirdparty/cosyvoice/transformer/subsampling.py +383 -0
xinference/thirdparty/cosyvoice/utils/__init__.py +0 -0
xinference/thirdparty/cosyvoice/utils/class_utils.py +70 -0
xinference/thirdparty/cosyvoice/utils/common.py +103 -0
xinference/thirdparty/cosyvoice/utils/executor.py +110 -0
xinference/thirdparty/cosyvoice/utils/file_utils.py +41 -0
xinference/thirdparty/cosyvoice/utils/frontend_utils.py +125 -0
xinference/thirdparty/cosyvoice/utils/mask.py +227 -0
xinference/thirdparty/cosyvoice/utils/scheduler.py +739 -0
xinference/thirdparty/cosyvoice/utils/train_utils.py +289 -0
xinference/web/ui/build/asset-manifest.json +3 -3
xinference/web/ui/build/index.html +1 -1
xinference/web/ui/build/static/js/{main.95c1d652.js → main.af906659.js} +3 -3
xinference/web/ui/build/static/js/main.af906659.js.map +1 -0
xinference/web/ui/node_modules/.cache/babel-loader/2cd5e4279ad7e13a1f41d486e9fca7756295bfad5bd77d90992f4ac3e10b496d.json +1 -0
{xinference-0.13.2.dist-info → xinference-0.13.4.dist-info}/METADATA +39 -11
{xinference-0.13.2.dist-info → xinference-0.13.4.dist-info}/RECORD +101 -57
xinference/web/ui/build/static/js/main.95c1d652.js.map +0 -1
xinference/web/ui/node_modules/.cache/babel-loader/709711edada3f1596b309d571285fd31f1c364d66f4425bc28723d0088cc351a.json +0 -1
/xinference/web/ui/build/static/js/{main.95c1d652.js.LICENSE.txt → main.af906659.js.LICENSE.txt} +0 -0
{xinference-0.13.2.dist-info → xinference-0.13.4.dist-info}/LICENSE +0 -0
{xinference-0.13.2.dist-info → xinference-0.13.4.dist-info}/WHEEL +0 -0
{xinference-0.13.2.dist-info → xinference-0.13.4.dist-info}/entry_points.txt +0 -0
{xinference-0.13.2.dist-info → xinference-0.13.4.dist-info}/top_level.txt +0 -0

xinference/model/embedding/core.py CHANGED Viewed

@@ -118,12 +118,19 @@ def get_cache_status(
 class EmbeddingModel:
-    def __init__(self, model_uid: str, model_path: str, device: Optional[str] = None):
+    def __init__(
+        self,
+        model_uid: str,
+        model_path: str,
+        model_spec: EmbeddingModelSpec,
+        device: Optional[str] = None,
+    ):
         self._model_uid = model_uid
         self._model_path = model_path
         self._device = device
         self._model = None
         self._counter = 0
+        self._model_spec = model_spec
     def load(self):
         try:
@@ -134,12 +141,26 @@ class EmbeddingModel:
                 "Please make sure 'sentence-transformers' is installed. ",
                 "You can install it by `pip install sentence-transformers`\n",
             ]
             raise ImportError(f"{error_message}\n\n{''.join(installation_guide)}")
+        class XSentenceTransformer(SentenceTransformer):
+            def to(self, *args, **kwargs):
+                pass
         from ..utils import patch_trust_remote_code
         patch_trust_remote_code()
-        self._model = SentenceTransformer(self._model_path, device=self._device)
+        if (
+            "gte-Qwen2" in self._model_spec.model_id
+            or "gte-Qwen2" in self._model_spec.model_name
+        ):
+            self._model = XSentenceTransformer(
+                self._model_path,
+                device=self._device,
+                model_kwargs={"device_map": "auto"},
+            )
+        else:
+            self._model = SentenceTransformer(self._model_path, device=self._device)
     def create_embedding(self, sentences: Union[str, List[str]], **kwargs):
         self._counter += 1
@@ -156,6 +177,8 @@ class EmbeddingModel:
         def encode(
             model: SentenceTransformer,
             sentences: Union[str, List[str]],
+            prompt_name: Optional[str] = None,
+            prompt: Optional[str] = None,
             batch_size: int = 32,
             show_progress_bar: bool = None,
             output_value: str = "sentence_embedding",
@@ -204,10 +227,43 @@ class EmbeddingModel:
                 sentences = [sentences]
                 input_was_string = True
+            if prompt is None:
+                if prompt_name is not None:
+                    try:
+                        prompt = model.prompts[prompt_name]
+                    except KeyError:
+                        raise ValueError(
+                            f"Prompt name '{prompt_name}' not found in the configured prompts dictionary with keys {list(model.prompts.keys())!r}."
+                        )
+                elif model.default_prompt_name is not None:
+                    prompt = model.prompts.get(model.default_prompt_name, None)
+            else:
+                if prompt_name is not None:
+                    logger.warning(
+                        "Encode with either a `prompt`, a `prompt_name`, or neither, but not both. "
+                        "Ignoring the `prompt_name` in favor of `prompt`."
+                    )
+            extra_features = {}
+            if prompt is not None:
+                sentences = [prompt + sentence for sentence in sentences]
+                # Some models (e.g. INSTRUCTOR, GRIT) require removing the prompt before pooling
+                # Tracking the prompt length allow us to remove the prompt during pooling
+                tokenized_prompt = model.tokenize([prompt])
+                if "input_ids" in tokenized_prompt:
+                    extra_features["prompt_length"] = (
+                        tokenized_prompt["input_ids"].shape[-1] - 1
+                    )
             if device is None:
                 device = model._target_device
-            model.to(device)
+            if (
+                "gte-Qwen2" not in self._model_spec.model_id
+                and "gte-Qwen2" not in self._model_spec.model_name
+            ):
+                model.to(device)
             all_embeddings = []
             all_token_nums = 0
@@ -228,6 +284,7 @@ class EmbeddingModel:
                 ]
                 features = model.tokenize(sentences_batch)
                 features = batch_to_device(features, device)
+                features.update(extra_features)
                 all_token_nums += sum([len(f) for f in features])
                 with torch.no_grad():
@@ -272,7 +329,10 @@ class EmbeddingModel:
             ]
             if convert_to_tensor:
-                all_embeddings = torch.stack(all_embeddings)
+                if len(all_embeddings):
+                    all_embeddings = torch.stack(all_embeddings)
+                else:
+                    all_embeddings = torch.Tensor()
             elif convert_to_numpy:
                 all_embeddings = np.asarray([emb.numpy() for emb in all_embeddings])
@@ -281,12 +341,24 @@ class EmbeddingModel:
             return all_embeddings, all_token_nums
-        all_embeddings, all_token_nums = encode(
-            self._model,
-            sentences,
-            convert_to_numpy=False,
-            **kwargs,
-        )
+        if (
+            "gte-Qwen2" in self._model_spec.model_id
+            or "gte-Qwen2" in self._model_spec.model_name
+        ):
+            all_embeddings, all_token_nums = encode(
+                self._model,
+                sentences,
+                prompt_name="query",
+                convert_to_numpy=False,
+                **kwargs,
+            )
+        else:
+            all_embeddings, all_token_nums = encode(
+                self._model,
+                sentences,
+                convert_to_numpy=False,
+                **kwargs,
+            )
         if isinstance(sentences, str):
             all_embeddings = [all_embeddings]
         embedding_list = []
@@ -344,11 +416,14 @@ def create_embedding_model_instance(
     model_uid: str,
     model_name: str,
     download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
+    model_path: Optional[str] = None,
     **kwargs,
 ) -> Tuple[EmbeddingModel, EmbeddingModelDescription]:
     model_spec = match_embedding(model_name, download_hub)
-    model_path = cache(model_spec)
-    model = EmbeddingModel(model_uid, model_path, **kwargs)
+    if model_path is None:
+        model_path = cache(model_spec)
+    model = EmbeddingModel(model_uid, model_path, model_spec, **kwargs)
     model_description = EmbeddingModelDescription(
         subpool_addr, devices, model_spec, model_path=model_path
     )

xinference/model/embedding/model_spec.json CHANGED Viewed

@@ -230,5 +230,13 @@
     "language": ["zh", "en"],
     "model_id": "moka-ai/m3e-large",
     "model_revision": "12900375086c37ba5d83d1e417b21dc7d1d1f388"
+  },
+  {
+    "model_name": "gte-Qwen2",
+    "dimensions": 3584,
+    "max_tokens": 32000,
+    "language": ["zh", "en"],
+    "model_id": "Alibaba-NLP/gte-Qwen2-7B-instruct",
+    "model_revision": "e26182b2122f4435e8b3ebecbf363990f409b45b"
   }
 ]

xinference/model/embedding/model_spec_modelscope.json CHANGED Viewed

@@ -232,5 +232,13 @@
     "language": ["zh", "en"],
     "model_id": "AI-ModelScope/m3e-large",
     "model_hub": "modelscope"
+  },
+    {
+    "model_name": "gte-Qwen2",
+    "dimensions": 4096,
+    "max_tokens": 32000,
+    "language": ["zh", "en"],
+    "model_id": "iic/gte_Qwen2-7B-instruct",
+    "model_hub": "modelscope"
   }
 ]

xinference/model/flexible/core.py CHANGED Viewed

@@ -210,10 +210,16 @@ def match_flexible_model(model_name):
 def create_flexible_model_instance(
-    subpool_addr: str, devices: List[str], model_uid: str, model_name: str, **kwargs
+    subpool_addr: str,
+    devices: List[str],
+    model_uid: str,
+    model_name: str,
+    model_path: Optional[str] = None,
+    **kwargs,
 ) -> Tuple[FlexibleModel, FlexibleModelDescription]:
     model_spec = match_flexible_model(model_name)
-    model_path = model_spec.model_uri
+    if not model_path:
+        model_path = model_spec.model_uri
     launcher_name = model_spec.launcher
     launcher_args = model_spec.parser_args()
     kwargs.update(launcher_args)

xinference/model/flexible/launchers/__init__.py CHANGED Viewed

@@ -12,4 +12,5 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from .image_process_launcher import launcher as image_process
 from .transformers_launcher import launcher as transformers

xinference/model/flexible/launchers/image_process_launcher.py ADDED Viewed

@@ -0,0 +1,70 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import base64
+from io import BytesIO
+import PIL.Image
+import PIL.ImageOps
+from ....types import Image
+from ..core import FlexibleModel, FlexibleModelSpec
+class ImageRemoveBackgroundModel(FlexibleModel):
+    def infer(self, **kwargs):
+        invert = kwargs.get("invert", False)
+        b64_image: str = kwargs.get("image")  # type: ignore
+        only_mask = kwargs.pop("only_mask", True)
+        image_format = kwargs.pop("image_format", "PNG")
+        if not b64_image:
+            raise ValueError("No image found to remove background")
+        image = base64.b64decode(b64_image)
+        try:
+            from rembg import remove
+        except ImportError:
+            error_message = "Failed to import module 'rembg'"
+            installation_guide = [
+                "Please make sure 'rembg' is installed. ",
+                "You can install it by visiting the installation section of the git repo:\n",
+                "https://github.com/danielgatis/rembg?tab=readme-ov-file#installation",
+            ]
+            raise ImportError(f"{error_message}\n\n{''.join(installation_guide)}")
+        im = PIL.Image.open(BytesIO(image))
+        om = remove(im, only_mask=only_mask, **kwargs)
+        if invert:
+            om = PIL.ImageOps.invert(om)
+        buffered = BytesIO()
+        om.save(buffered, format=image_format)
+        img_str = base64.b64encode(buffered.getvalue()).decode()
+        return Image(url=None, b64_json=img_str)
+def launcher(model_uid: str, model_spec: FlexibleModelSpec, **kwargs) -> FlexibleModel:
+    task = kwargs.get("task")
+    device = kwargs.get("device")
+    if task == "remove_background":
+        return ImageRemoveBackgroundModel(
+            model_uid=model_uid,
+            model_path=model_spec.model_uri,  # type: ignore
+            device=device,
+            config=kwargs,
+        )
+    else:
+        raise ValueError(f"Unknown Task for image processing: {task}")

xinference/model/image/core.py CHANGED Viewed

@@ -45,7 +45,7 @@ class ImageModelFamilyV1(CacheableModelSpec):
     model_id: str
     model_revision: str
     model_hub: str = "huggingface"
-    ability: Optional[str]
+    abilities: Optional[List[str]]
     controlnet: Optional[List["ImageModelFamilyV1"]]
@@ -72,7 +72,7 @@ class ImageModelDescription(ModelDescription):
             "model_name": self._model_spec.model_name,
             "model_family": self._model_spec.model_family,
             "model_revision": self._model_spec.model_revision,
-            "ability": self._model_spec.ability,
+            "abilities": self._model_spec.abilities,
             "controlnet": controlnet,
         }
@@ -189,6 +189,7 @@ def create_image_model_instance(
     model_name: str,
     peft_model_config: Optional[PeftModelConfig] = None,
     download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
+    model_path: Optional[str] = None,
     **kwargs,
 ) -> Tuple[DiffusionModel, ImageModelDescription]:
     model_spec = match_diffusion(model_name, download_hub)
@@ -209,7 +210,8 @@ def create_image_model_instance(
         for name in controlnet:
             for cn_model_spec in model_spec.controlnet:
                 if cn_model_spec.model_name == name:
-                    model_path = cache(cn_model_spec)
+                    if not model_path:
+                        model_path = cache(cn_model_spec)
                     controlnet_model_paths.append(model_path)
                     break
             else:
@@ -220,7 +222,8 @@ def create_image_model_instance(
             kwargs["controlnet"] = controlnet_model_paths[0]
         else:
             kwargs["controlnet"] = controlnet_model_paths
-    model_path = cache(model_spec)
+    if not model_path:
+        model_path = cache(model_spec)
     if peft_model_config is not None:
         lora_model = peft_model_config.peft_model
         lora_load_kwargs = peft_model_config.image_lora_load_kwargs
@@ -236,7 +239,7 @@ def create_image_model_instance(
         lora_model_paths=lora_model,
         lora_load_kwargs=lora_load_kwargs,
         lora_fuse_kwargs=lora_fuse_kwargs,
-        ability=model_spec.ability,
+        abilities=model_spec.abilities,
         **kwargs,
     )
     model_description = ImageModelDescription(

xinference/model/image/model_spec.json CHANGED Viewed

@@ -3,25 +3,39 @@
     "model_name": "sd3-medium",
     "model_family": "stable_diffusion",
     "model_id": "stabilityai/stable-diffusion-3-medium-diffusers",
-    "model_revision": "ea42f8cef0f178587cf766dc8129abd379c90671"
+    "model_revision": "ea42f8cef0f178587cf766dc8129abd379c90671",
+    "abilities": [
+      "text2iamge",
+      "image2image"
+    ]
   },
   {
     "model_name": "sd-turbo",
     "model_family": "stable_diffusion",
     "model_id": "stabilityai/sd-turbo",
-    "model_revision": "1681ed09e0cff58eeb41e878a49893228b78b94c"
+    "model_revision": "1681ed09e0cff58eeb41e878a49893228b78b94c",
+    "abilities": [
+      "text2iamge"
+    ]
   },
   {
     "model_name": "sdxl-turbo",
     "model_family": "stable_diffusion",
     "model_id": "stabilityai/sdxl-turbo",
-    "model_revision": "f4b0486b498f84668e828044de1d0c8ba486e05b"
+    "model_revision": "f4b0486b498f84668e828044de1d0c8ba486e05b",
+    "abilities": [
+      "text2iamge"
+    ]
   },
   {
     "model_name": "stable-diffusion-v1.5",
     "model_family": "stable_diffusion",
     "model_id": "runwayml/stable-diffusion-v1-5",
     "model_revision": "1d0c4ebf6ff58a5caecab40fa1406526bca4b5b9",
+    "abilities": [
+      "text2iamge",
+      "image2image"
+    ],
     "controlnet": [
       {
         "model_name":"canny",
@@ -72,6 +86,10 @@
     "model_family": "stable_diffusion",
     "model_id": "stabilityai/stable-diffusion-xl-base-1.0",
     "model_revision": "f898a3e026e802f68796b95e9702464bac78d76f",
+    "abilities": [
+      "text2iamge",
+      "image2image"
+    ],
     "controlnet": [
       {
         "model_name":"canny",
@@ -98,13 +116,26 @@
     "model_family": "stable_diffusion",
     "model_id": "runwayml/stable-diffusion-inpainting",
     "model_revision": "51388a731f57604945fddd703ecb5c50e8e7b49d",
-    "ability": "inpainting"
+    "abilities": [
+      "inpainting"
+    ]
   },
   {
     "model_name": "stable-diffusion-2-inpainting",
     "model_family": "stable_diffusion",
     "model_id": "stabilityai/stable-diffusion-2-inpainting",
     "model_revision": "81a84f49b15956b60b4272a405ad3daef3da4590",
-    "ability": "inpainting"
+    "abilities": [
+      "inpainting"
+    ]
+  },
+  {
+    "model_name": "stable-diffusion-xl-inpainting",
+    "model_family": "stable_diffusion",
+    "model_id": "diffusers/stable-diffusion-xl-1.0-inpainting-0.1",
+    "model_revision": "115134f363124c53c7d878647567d04daf26e41e",
+    "abilities": [
+      "inpainting"
+    ]
   }
 ]

xinference/model/image/model_spec_modelscope.json CHANGED Viewed

@@ -4,21 +4,31 @@
     "model_family": "stable_diffusion",
     "model_hub": "modelscope",
     "model_id": "AI-ModelScope/stable-diffusion-3-medium-diffusers",
-    "model_revision": "master"
+    "model_revision": "master",
+    "abilities": [
+      "text2iamge",
+      "image2image"
+    ]
   },
   {
     "model_name": "sd-turbo",
     "model_family": "stable_diffusion",
     "model_hub": "modelscope",
     "model_id": "AI-ModelScope/sd-turbo",
-    "model_revision": "master"
+    "model_revision": "master",
+    "abilities": [
+      "text2iamge"
+    ]
   },
   {
     "model_name": "sdxl-turbo",
     "model_family": "stable_diffusion",
     "model_hub": "modelscope",
     "model_id": "AI-ModelScope/sdxl-turbo",
-    "model_revision": "master"
+    "model_revision": "master",
+    "abilities": [
+      "text2iamge"
+    ]
   },
   {
     "model_name": "stable-diffusion-v1.5",
@@ -26,6 +36,10 @@
     "model_hub": "modelscope",
     "model_id": "AI-ModelScope/stable-diffusion-v1-5",
     "model_revision": "master",
+    "abilities": [
+      "text2iamge",
+      "image2image"
+    ],
     "controlnet": [
       {
         "model_name":"canny",
@@ -77,6 +91,10 @@
     "model_hub": "modelscope",
     "model_id": "AI-ModelScope/stable-diffusion-xl-base-1.0",
     "model_revision": "master",
+    "abilities": [
+      "text2iamge",
+      "image2image"
+    ],
     "controlnet": [
       {
         "model_name":"canny",

xinference/model/image/stable_diffusion/core.py CHANGED Viewed

@@ -35,22 +35,23 @@ class DiffusionModel:
     def __init__(
         self,
         model_uid: str,
-        model_path: str,
+        model_path: Optional[str] = None,
         device: Optional[str] = None,
         lora_model: Optional[List[LoRA]] = None,
         lora_load_kwargs: Optional[Dict] = None,
         lora_fuse_kwargs: Optional[Dict] = None,
-        ability: Optional[str] = None,
+        abilities: Optional[List[str]] = None,
         **kwargs,
     ):
         self._model_uid = model_uid
         self._model_path = model_path
         self._device = device
         self._model = None
+        self._i2i_model = None  # image to image model
         self._lora_model = lora_model
         self._lora_load_kwargs = lora_load_kwargs or {}
         self._lora_fuse_kwargs = lora_fuse_kwargs or {}
-        self._ability = ability
+        self._abilities = abilities
         self._kwargs = kwargs
     def _apply_lora(self):
@@ -69,12 +70,12 @@ class DiffusionModel:
     def load(self):
         import torch
-        if self._ability in [None, "text2image", "image2image"]:
+        if "text2image" in self._abilities or "image2image" in self._abilities:
             from diffusers import AutoPipelineForText2Image as AutoPipelineModel
-        elif self._ability == "inpainting":
+        elif "inpainting" in self._abilities:
             from diffusers import AutoPipelineForInpainting as AutoPipelineModel
         else:
-            raise ValueError(f"Unknown ability: {self._ability}")
+            raise ValueError(f"Unknown ability: {self._abilities}")
         controlnet = self._kwargs.get("controlnet")
         if controlnet is not None:
@@ -94,35 +95,29 @@ class DiffusionModel:
             self._model_path,
             **self._kwargs,
         )
-        self._model = move_model_to_available_device(self._model)
+        if self._kwargs.get("cpu_offload", False):
+            logger.debug("CPU offloading model")
+            self._model.enable_model_cpu_offload()
+        else:
+            logger.debug("Loading model to available device")
+            self._model = move_model_to_available_device(self._model)
         # Recommended if your computer has < 64 GB of RAM
         self._model.enable_attention_slicing()
         self._apply_lora()
     def _call_model(
         self,
-        height: int,
-        width: int,
-        num_images_per_prompt: int,
         response_format: str,
+        model=None,
         **kwargs,
     ):
         logger.debug(
             "stable diffusion args: %s",
-            dict(
-                kwargs,
-                height=height,
-                width=width,
-                num_images_per_prompt=num_images_per_prompt,
-            ),
+            kwargs,
         )
-        assert callable(self._model)
-        images = self._model(
-            height=height,
-            width=width,
-            num_images_per_prompt=num_images_per_prompt,
-            **kwargs,
-        ).images
+        model = model if model is not None else self._model
+        assert callable(model)
+        images = model(**kwargs).images
         if response_format == "url":
             os.makedirs(XINFERENCE_IMAGE_DIR, exist_ok=True)
             image_list = []
@@ -140,7 +135,7 @@ class DiffusionModel:
                 return base64.b64encode(buffered.getvalue()).decode()
             with ThreadPoolExecutor() as executor:
-                results = list(map(partial(executor.submit, _gen_base64_image), images))
+                results = list(map(partial(executor.submit, _gen_base64_image), images))  # type: ignore
                 image_list = [Image(url=None, b64_json=s.result()) for s in results]
             return ImageList(created=int(time.time()), data=image_list)
         else:
@@ -172,19 +167,32 @@ class DiffusionModel:
         prompt: Optional[Union[str, List[str]]] = None,
         negative_prompt: Optional[Union[str, List[str]]] = None,
         n: int = 1,
-        size: str = "1024*1024",
+        size: Optional[str] = None,
         response_format: str = "url",
         **kwargs,
     ):
-        width, height = map(int, re.split(r"[^\d]+", size))
+        if "controlnet" in self._kwargs:
+            model = self._model
+        else:
+            if self._i2i_model is not None:
+                model = self._i2i_model
+            else:
+                from diffusers import AutoPipelineForImage2Image
+                self._i2i_model = model = AutoPipelineForImage2Image.from_pipe(
+                    self._model
+                )
+        if size:
+            width, height = map(int, re.split(r"[^\d]+", size))
+            kwargs["width"] = width
+            kwargs["height"] = height
         return self._call_model(
             image=image,
             prompt=prompt,
             negative_prompt=negative_prompt,
-            height=height,
-            width=width,
             num_images_per_prompt=n,
             response_format=response_format,
+            model=model,
             **kwargs,
         )

xinference/model/llm/core.py CHANGED Viewed

@@ -194,6 +194,7 @@ def create_llm_model_instance(
     quantization: Optional[str] = None,
     peft_model_config: Optional[PeftModelConfig] = None,
     download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
+    model_path: Optional[str] = None,
     **kwargs,
 ) -> Tuple[LLM, LLMDescription]:
     from .llm_family import cache, check_engine_by_spec_parameters, match_llm
@@ -221,7 +222,8 @@ def create_llm_model_instance(
     )
     logger.debug(f"Launching {model_uid} with {llm_cls.__name__}")
-    save_path = cache(llm_family, llm_spec, quantization)
+    if not model_path:
+        model_path = cache(llm_family, llm_spec, quantization)
     peft_model = peft_model_config.peft_model if peft_model_config else None
     if peft_model is not None:
@@ -231,7 +233,7 @@ def create_llm_model_instance(
                 llm_family,
                 llm_spec,
                 quantization,
-                save_path,
+                model_path,
                 kwargs,
                 peft_model,
             )
@@ -241,11 +243,11 @@ def create_llm_model_instance(
                 f"Load this without lora."
             )
             model = llm_cls(
-                model_uid, llm_family, llm_spec, quantization, save_path, kwargs
+                model_uid, llm_family, llm_spec, quantization, model_path, kwargs
             )
     else:
         model = llm_cls(
-            model_uid, llm_family, llm_spec, quantization, save_path, kwargs
+            model_uid, llm_family, llm_spec, quantization, model_path, kwargs
         )
     return model, LLMDescription(
         subpool_addr, devices, llm_family, llm_spec, quantization

xinference 0.13.2__py3-none-any.whl → 0.13.4__py3-none-any.whl

Potentially problematic release.

xinference 0.13.2py3-none-any.whl → 0.13.4py3-none-any.whl