PyPI - xinference - Versions diffs - 0.13.1__py3-none-any.whl → 0.13.3__py3-none-any.whl - Mend

xinference 0.13.1py3-none-any.whl → 0.13.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (82) hide show

xinference/__init__.py +0 -1
xinference/_version.py +3 -3
xinference/api/restful_api.py +99 -5
xinference/client/restful/restful_client.py +98 -1
xinference/core/chat_interface.py +2 -2
xinference/core/model.py +85 -26
xinference/core/scheduler.py +4 -4
xinference/model/audio/chattts.py +40 -8
xinference/model/audio/core.py +5 -2
xinference/model/audio/cosyvoice.py +136 -0
xinference/model/audio/model_spec.json +24 -0
xinference/model/audio/model_spec_modelscope.json +27 -0
xinference/model/flexible/launchers/__init__.py +1 -0
xinference/model/flexible/launchers/image_process_launcher.py +70 -0
xinference/model/image/core.py +3 -0
xinference/model/image/model_spec.json +21 -0
xinference/model/image/stable_diffusion/core.py +49 -7
xinference/model/llm/llm_family.json +1065 -106
xinference/model/llm/llm_family.py +26 -6
xinference/model/llm/llm_family_csghub.json +39 -0
xinference/model/llm/llm_family_modelscope.json +460 -47
xinference/model/llm/pytorch/chatglm.py +243 -5
xinference/model/llm/pytorch/cogvlm2.py +1 -1
xinference/model/llm/sglang/core.py +7 -2
xinference/model/llm/utils.py +78 -1
xinference/model/llm/vllm/core.py +11 -0
xinference/thirdparty/cosyvoice/__init__.py +0 -0
xinference/thirdparty/cosyvoice/bin/__init__.py +0 -0
xinference/thirdparty/cosyvoice/bin/inference.py +114 -0
xinference/thirdparty/cosyvoice/bin/train.py +136 -0
xinference/thirdparty/cosyvoice/cli/__init__.py +0 -0
xinference/thirdparty/cosyvoice/cli/cosyvoice.py +83 -0
xinference/thirdparty/cosyvoice/cli/frontend.py +168 -0
xinference/thirdparty/cosyvoice/cli/model.py +60 -0
xinference/thirdparty/cosyvoice/dataset/__init__.py +0 -0
xinference/thirdparty/cosyvoice/dataset/dataset.py +160 -0
xinference/thirdparty/cosyvoice/dataset/processor.py +369 -0
xinference/thirdparty/cosyvoice/flow/__init__.py +0 -0
xinference/thirdparty/cosyvoice/flow/decoder.py +222 -0
xinference/thirdparty/cosyvoice/flow/flow.py +135 -0
xinference/thirdparty/cosyvoice/flow/flow_matching.py +138 -0
xinference/thirdparty/cosyvoice/flow/length_regulator.py +49 -0
xinference/thirdparty/cosyvoice/hifigan/__init__.py +0 -0
xinference/thirdparty/cosyvoice/hifigan/f0_predictor.py +55 -0
xinference/thirdparty/cosyvoice/hifigan/generator.py +391 -0
xinference/thirdparty/cosyvoice/llm/__init__.py +0 -0
xinference/thirdparty/cosyvoice/llm/llm.py +206 -0
xinference/thirdparty/cosyvoice/transformer/__init__.py +0 -0
xinference/thirdparty/cosyvoice/transformer/activation.py +84 -0
xinference/thirdparty/cosyvoice/transformer/attention.py +326 -0
xinference/thirdparty/cosyvoice/transformer/convolution.py +145 -0
xinference/thirdparty/cosyvoice/transformer/decoder.py +396 -0
xinference/thirdparty/cosyvoice/transformer/decoder_layer.py +132 -0
xinference/thirdparty/cosyvoice/transformer/embedding.py +293 -0
xinference/thirdparty/cosyvoice/transformer/encoder.py +472 -0
xinference/thirdparty/cosyvoice/transformer/encoder_layer.py +236 -0
xinference/thirdparty/cosyvoice/transformer/label_smoothing_loss.py +96 -0
xinference/thirdparty/cosyvoice/transformer/positionwise_feed_forward.py +115 -0
xinference/thirdparty/cosyvoice/transformer/subsampling.py +383 -0
xinference/thirdparty/cosyvoice/utils/__init__.py +0 -0
xinference/thirdparty/cosyvoice/utils/class_utils.py +70 -0
xinference/thirdparty/cosyvoice/utils/common.py +103 -0
xinference/thirdparty/cosyvoice/utils/executor.py +110 -0
xinference/thirdparty/cosyvoice/utils/file_utils.py +41 -0
xinference/thirdparty/cosyvoice/utils/frontend_utils.py +125 -0
xinference/thirdparty/cosyvoice/utils/mask.py +227 -0
xinference/thirdparty/cosyvoice/utils/scheduler.py +739 -0
xinference/thirdparty/cosyvoice/utils/train_utils.py +289 -0
xinference/web/ui/build/asset-manifest.json +3 -3
xinference/web/ui/build/index.html +1 -1
xinference/web/ui/build/static/js/{main.95c1d652.js → main.2ef0cfaf.js} +3 -3
xinference/web/ui/build/static/js/main.2ef0cfaf.js.map +1 -0
xinference/web/ui/node_modules/.cache/babel-loader/b6807ecc0c231fea699533518a0eb2a2bf68a081ce00d452be40600dbffa17a7.json +1 -0
{xinference-0.13.1.dist-info → xinference-0.13.3.dist-info}/METADATA +18 -8
{xinference-0.13.1.dist-info → xinference-0.13.3.dist-info}/RECORD +80 -36
xinference/web/ui/build/static/js/main.95c1d652.js.map +0 -1
xinference/web/ui/node_modules/.cache/babel-loader/709711edada3f1596b309d571285fd31f1c364d66f4425bc28723d0088cc351a.json +0 -1
/xinference/web/ui/build/static/js/{main.95c1d652.js.LICENSE.txt → main.2ef0cfaf.js.LICENSE.txt} +0 -0
{xinference-0.13.1.dist-info → xinference-0.13.3.dist-info}/LICENSE +0 -0
{xinference-0.13.1.dist-info → xinference-0.13.3.dist-info}/WHEEL +0 -0
{xinference-0.13.1.dist-info → xinference-0.13.3.dist-info}/entry_points.txt +0 -0
{xinference-0.13.1.dist-info → xinference-0.13.3.dist-info}/top_level.txt +0 -0

xinference/model/audio/core.py CHANGED Viewed

@@ -20,6 +20,7 @@ from ...constants import XINFERENCE_CACHE_DIR
 from ..core import CacheableModelSpec, ModelDescription
 from ..utils import valid_model_revision
 from .chattts import ChatTTSModel
+from .cosyvoice import CosyVoiceModel
 from .whisper import WhisperModel
 MAX_ATTEMPTS = 3
@@ -150,14 +151,16 @@ def create_audio_model_instance(
     model_name: str,
     download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
     **kwargs,
-) -> Tuple[Union[WhisperModel, ChatTTSModel], AudioModelDescription]:
+) -> Tuple[Union[WhisperModel, ChatTTSModel, CosyVoiceModel], AudioModelDescription]:
     model_spec = match_audio(model_name, download_hub)
     model_path = cache(model_spec)
-    model: Union[WhisperModel, ChatTTSModel]
+    model: Union[WhisperModel, ChatTTSModel, CosyVoiceModel]
     if model_spec.model_family == "whisper":
         model = WhisperModel(model_uid, model_path, model_spec, **kwargs)
     elif model_spec.model_family == "ChatTTS":
         model = ChatTTSModel(model_uid, model_path, model_spec, **kwargs)
+    elif model_spec.model_family == "CosyVoice":
+        model = CosyVoiceModel(model_uid, model_path, model_spec, **kwargs)
     else:
         raise Exception(f"Unsupported audio model family: {model_spec.model_family}")
     model_description = AudioModelDescription(

xinference/model/audio/cosyvoice.py ADDED Viewed

@@ -0,0 +1,136 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import io
+import logging
+from io import BytesIO
+from typing import TYPE_CHECKING, Optional
+if TYPE_CHECKING:
+    from .core import AudioModelFamilyV1
+logger = logging.getLogger(__name__)
+class CosyVoiceModel:
+    def __init__(
+        self,
+        model_uid: str,
+        model_path: str,
+        model_spec: "AudioModelFamilyV1",
+        device: Optional[str] = None,
+        **kwargs,
+    ):
+        self._model_uid = model_uid
+        self._model_path = model_path
+        self._model_spec = model_spec
+        self._device = device
+        self._model = None
+        self._kwargs = kwargs
+    def load(self):
+        import os
+        import sys
+        # The yaml config loaded from model has hard-coded the import paths. please refer to: load_hyperpyyaml
+        sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../../thirdparty"))
+        from cosyvoice.cli.cosyvoice import CosyVoice
+        self._model = CosyVoice(self._model_path)
+    def speech(
+        self,
+        input: str,
+        voice: str,
+        response_format: str = "mp3",
+        speed: float = 1.0,
+        stream: bool = False,
+        **kwargs,
+    ):
+        if stream:
+            raise Exception("CosyVoiceModel does not support stream.")
+        import torchaudio
+        from cosyvoice.utils.file_utils import load_wav
+        prompt_speech: Optional[bytes] = kwargs.pop("prompt_speech", None)
+        prompt_text: Optional[str] = kwargs.pop("prompt_text", None)
+        instruct_text: Optional[str] = kwargs.pop("instruct_text", None)
+        if "SFT" in self._model_spec.model_name:
+            # inference_sft
+            assert (
+                prompt_speech is None
+            ), "CosyVoice SFT model does not support prompt_speech"
+            assert (
+                prompt_text is None
+            ), "CosyVoice SFT model does not support prompt_text"
+            assert (
+                instruct_text is None
+            ), "CosyVoice SFT model does not support instruct_text"
+        elif "Instruct" in self._model_spec.model_name:
+            # inference_instruct
+            assert (
+                prompt_speech is None
+            ), "CosyVoice Instruct model does not support prompt_speech"
+            assert (
+                prompt_text is None
+            ), "CosyVoice Instruct model does not support prompt_text"
+            assert (
+                instruct_text is not None
+            ), "CosyVoice Instruct model expect a instruct_text"
+        else:
+            # inference_zero_shot
+            # inference_cross_lingual
+            assert prompt_speech is not None, "CosyVoice model expect a prompt_speech"
+            assert (
+                instruct_text is None
+            ), "CosyVoice model does not support instruct_text"
+        assert self._model is not None
+        if prompt_speech:
+            assert not voice, "voice can't be set with prompt speech."
+            with io.BytesIO(prompt_speech) as prompt_speech_io:
+                prompt_speech_16k = load_wav(prompt_speech_io, 16000)
+                if prompt_text:
+                    logger.info("CosyVoice inference_zero_shot")
+                    output = self._model.inference_zero_shot(
+                        input, prompt_text, prompt_speech_16k
+                    )
+                else:
+                    logger.info("CosyVoice inference_cross_lingual")
+                    output = self._model.inference_cross_lingual(
+                        input, prompt_speech_16k
+                    )
+        else:
+            available_speakers = self._model.list_avaliable_spks()
+            if not voice:
+                voice = available_speakers[0]
+            else:
+                assert (
+                    voice in available_speakers
+                ), f"Invalid voice {voice}, CosyVoice available speakers: {available_speakers}"
+            if instruct_text:
+                logger.info("CosyVoice inference_instruct")
+                output = self._model.inference_instruct(
+                    input, voice, instruct_text=instruct_text
+                )
+            else:
+                logger.info("CosyVoice inference_sft")
+                output = self._model.inference_sft(input, voice)
+        # Save the generated audio
+        with BytesIO() as out:
+            torchaudio.save(out, output["tts_speech"], 22050, format=response_format)
+            return out.getvalue()

xinference/model/audio/model_spec.json CHANGED Viewed

@@ -102,5 +102,29 @@
     "model_revision": "ce5913842aebd78e4a01a02d47244b8d62ac4ee3",
     "ability": "text-to-audio",
     "multilingual": true
+  },
+  {
+    "model_name": "CosyVoice-300M",
+    "model_family": "CosyVoice",
+    "model_id": "model-scope/CosyVoice-300M",
+    "model_revision": "ca4e036d2db2aa4731cc1747859a68044b6a4694",
+    "ability": "audio-to-audio",
+    "multilingual": true
+  },
+  {
+    "model_name": "CosyVoice-300M-SFT",
+    "model_family": "CosyVoice",
+    "model_id": "model-scope/CosyVoice-300M-SFT",
+    "model_revision": "ab918940c6c134b1fc1f069246e67bad6b66abcb",
+    "ability": "text-to-audio",
+    "multilingual": true
+  },
+  {
+    "model_name": "CosyVoice-300M-Instruct",
+    "model_family": "CosyVoice",
+    "model_id": "model-scope/CosyVoice-300M-Instruct",
+    "model_revision": "fb5f676733139f35670bed9b59a77d476b1aa898",
+    "ability": "text-to-audio",
+    "multilingual": true
   }
 ]

xinference/model/audio/model_spec_modelscope.json CHANGED Viewed

@@ -16,5 +16,32 @@
     "model_revision": "master",
     "ability": "text-to-audio",
     "multilingual": true
+  },
+  {
+    "model_name": "CosyVoice-300M",
+    "model_family": "CosyVoice",
+    "model_hub": "modelscope",
+    "model_id": "iic/CosyVoice-300M",
+    "model_revision": "master",
+    "ability": "audio-to-audio",
+    "multilingual": true
+  },
+  {
+    "model_name": "CosyVoice-300M-SFT",
+    "model_family": "CosyVoice",
+    "model_hub": "modelscope",
+    "model_id": "iic/CosyVoice-300M-SFT",
+    "model_revision": "master",
+    "ability": "text-to-audio",
+    "multilingual": true
+  },
+  {
+    "model_name": "CosyVoice-300M-Instruct",
+    "model_family": "CosyVoice",
+    "model_hub": "modelscope",
+    "model_id": "iic/CosyVoice-300M-Instruct",
+    "model_revision": "master",
+    "ability": "text-to-audio",
+    "multilingual": true
   }
 ]

xinference/model/flexible/launchers/__init__.py CHANGED Viewed

@@ -12,4 +12,5 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from .image_process_launcher import launcher as image_process
 from .transformers_launcher import launcher as transformers

xinference/model/flexible/launchers/image_process_launcher.py ADDED Viewed

@@ -0,0 +1,70 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import base64
+from io import BytesIO
+import PIL.Image
+import PIL.ImageOps
+from ....types import Image
+from ..core import FlexibleModel, FlexibleModelSpec
+class ImageRemoveBackgroundModel(FlexibleModel):
+    def infer(self, **kwargs):
+        invert = kwargs.get("invert", False)
+        b64_image: str = kwargs.get("image")  # type: ignore
+        only_mask = kwargs.pop("only_mask", True)
+        image_format = kwargs.pop("image_format", "PNG")
+        if not b64_image:
+            raise ValueError("No image found to remove background")
+        image = base64.b64decode(b64_image)
+        try:
+            from rembg import remove
+        except ImportError:
+            error_message = "Failed to import module 'rembg'"
+            installation_guide = [
+                "Please make sure 'rembg' is installed. ",
+                "You can install it by visiting the installation section of the git repo:\n",
+                "https://github.com/danielgatis/rembg?tab=readme-ov-file#installation",
+            ]
+            raise ImportError(f"{error_message}\n\n{''.join(installation_guide)}")
+        im = PIL.Image.open(BytesIO(image))
+        om = remove(im, only_mask=only_mask, **kwargs)
+        if invert:
+            om = PIL.ImageOps.invert(om)
+        buffered = BytesIO()
+        om.save(buffered, format=image_format)
+        img_str = base64.b64encode(buffered.getvalue()).decode()
+        return Image(url=None, b64_json=img_str)
+def launcher(model_uid: str, model_spec: FlexibleModelSpec, **kwargs) -> FlexibleModel:
+    task = kwargs.get("task")
+    device = kwargs.get("device")
+    if task == "remove_background":
+        return ImageRemoveBackgroundModel(
+            model_uid=model_uid,
+            model_path=model_spec.model_uri,  # type: ignore
+            device=device,
+            config=kwargs,
+        )
+    else:
+        raise ValueError(f"Unknown Task for image processing: {task}")

xinference/model/image/core.py CHANGED Viewed

@@ -45,6 +45,7 @@ class ImageModelFamilyV1(CacheableModelSpec):
     model_id: str
     model_revision: str
     model_hub: str = "huggingface"
+    ability: Optional[str]
     controlnet: Optional[List["ImageModelFamilyV1"]]
@@ -71,6 +72,7 @@ class ImageModelDescription(ModelDescription):
             "model_name": self._model_spec.model_name,
             "model_family": self._model_spec.model_family,
             "model_revision": self._model_spec.model_revision,
+            "ability": self._model_spec.ability,
             "controlnet": controlnet,
         }
@@ -234,6 +236,7 @@ def create_image_model_instance(
         lora_model_paths=lora_model,
         lora_load_kwargs=lora_load_kwargs,
         lora_fuse_kwargs=lora_fuse_kwargs,
+        ability=model_spec.ability,
         **kwargs,
     )
     model_description = ImageModelDescription(

xinference/model/image/model_spec.json CHANGED Viewed

@@ -92,5 +92,26 @@
         "model_revision": "62134b9d8e703b5d6f74f1534457287a8bba77ef"
       }
     ]
+  },
+  {
+    "model_name": "stable-diffusion-inpainting",
+    "model_family": "stable_diffusion",
+    "model_id": "runwayml/stable-diffusion-inpainting",
+    "model_revision": "51388a731f57604945fddd703ecb5c50e8e7b49d",
+    "ability": "inpainting"
+  },
+  {
+    "model_name": "stable-diffusion-2-inpainting",
+    "model_family": "stable_diffusion",
+    "model_id": "stabilityai/stable-diffusion-2-inpainting",
+    "model_revision": "81a84f49b15956b60b4272a405ad3daef3da4590",
+    "ability": "inpainting"
+  },
+  {
+    "model_name": "stable-diffusion-xl-inpainting",
+    "model_family": "stable_diffusion",
+    "model_id": "diffusers/stable-diffusion-xl-1.0-inpainting-0.1",
+    "model_revision": "115134f363124c53c7d878647567d04daf26e41e",
+    "ability": "inpainting"
   }
 ]

xinference/model/image/stable_diffusion/core.py CHANGED Viewed

@@ -16,6 +16,7 @@ import base64
 import logging
 import os
 import re
+import sys
 import time
 import uuid
 from concurrent.futures import ThreadPoolExecutor
@@ -39,6 +40,7 @@ class DiffusionModel:
         lora_model: Optional[List[LoRA]] = None,
         lora_load_kwargs: Optional[Dict] = None,
         lora_fuse_kwargs: Optional[Dict] = None,
+        ability: Optional[str] = None,
         **kwargs,
     ):
         self._model_uid = model_uid
@@ -48,6 +50,7 @@ class DiffusionModel:
         self._lora_model = lora_model
         self._lora_load_kwargs = lora_load_kwargs or {}
         self._lora_fuse_kwargs = lora_fuse_kwargs or {}
+        self._ability = ability
         self._kwargs = kwargs
     def _apply_lora(self):
@@ -64,8 +67,14 @@ class DiffusionModel:
             logger.info(f"Successfully loaded the LoRA for model {self._model_uid}.")
     def load(self):
-        # import torch
-        from diffusers import AutoPipelineForText2Image
+        import torch
+        if self._ability in [None, "text2image", "image2image"]:
+            from diffusers import AutoPipelineForText2Image as AutoPipelineModel
+        elif self._ability == "inpainting":
+            from diffusers import AutoPipelineForInpainting as AutoPipelineModel
+        else:
+            raise ValueError(f"Unknown ability: {self._ability}")
         controlnet = self._kwargs.get("controlnet")
         if controlnet is not None:
@@ -74,14 +83,23 @@ class DiffusionModel:
             logger.debug("Loading controlnet %s", controlnet)
             self._kwargs["controlnet"] = ControlNetModel.from_pretrained(controlnet)
-        self._model = AutoPipelineForText2Image.from_pretrained(
+        torch_dtype = self._kwargs.get("torch_dtype")
+        if sys.platform != "darwin" and torch_dtype is None:
+            # The following params crashes on Mac M2
+            self._kwargs["torch_dtype"] = torch.float16
+            self._kwargs["use_safetensors"] = True
+        logger.debug("Loading model %s", AutoPipelineModel)
+        self._model = AutoPipelineModel.from_pretrained(
             self._model_path,
             **self._kwargs,
-            # The following params crashes on Mac M2
-            # torch_dtype=torch.float16,
-            # use_safetensors=True,
         )
-        self._model = move_model_to_available_device(self._model)
+        if self._kwargs.get("cpu_offload", False):
+            logger.debug("CPU offloading model")
+            self._model.enable_model_cpu_offload()
+        else:
+            logger.debug("Loading model to available device")
+            self._model = move_model_to_available_device(self._model)
         # Recommended if your computer has < 64 GB of RAM
         self._model.enable_attention_slicing()
         self._apply_lora()
@@ -174,3 +192,27 @@ class DiffusionModel:
             response_format=response_format,
             **kwargs,
         )
+    def inpainting(
+        self,
+        image: bytes,
+        mask_image: bytes,
+        prompt: Optional[Union[str, List[str]]] = None,
+        negative_prompt: Optional[Union[str, List[str]]] = None,
+        n: int = 1,
+        size: str = "1024*1024",
+        response_format: str = "url",
+        **kwargs,
+    ):
+        width, height = map(int, re.split(r"[^\d]+", size))
+        return self._call_model(
+            image=image,
+            mask_image=mask_image,
+            prompt=prompt,
+            negative_prompt=negative_prompt,
+            height=height,
+            width=width,
+            num_images_per_prompt=n,
+            response_format=response_format,
+            **kwargs,
+        )

xinference 0.13.1__py3-none-any.whl → 0.13.3__py3-none-any.whl

Potentially problematic release.

xinference 0.13.1py3-none-any.whl → 0.13.3py3-none-any.whl