PyPI - xinference - Versions diffs - 0.14.3__py3-none-any.whl → 0.14.4__py3-none-any.whl - Mend

xinference 0.14.3py3-none-any.whl → 0.14.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (70) hide show

xinference/_version.py +3 -3
xinference/core/worker.py +18 -9
xinference/model/audio/chattts.py +4 -3
xinference/model/audio/cosyvoice.py +4 -3
xinference/model/audio/custom.py +4 -5
xinference/model/embedding/core.py +2 -0
xinference/model/embedding/custom.py +4 -5
xinference/model/flexible/core.py +5 -1
xinference/model/image/custom.py +4 -5
xinference/model/image/stable_diffusion/core.py +21 -6
xinference/model/llm/llm_family.py +5 -6
xinference/model/llm/sglang/core.py +7 -1
xinference/model/llm/transformers/core.py +2 -0
xinference/model/llm/utils.py +3 -0
xinference/model/llm/vllm/core.py +0 -33
xinference/model/rerank/custom.py +4 -5
xinference/model/utils.py +41 -1
xinference/model/video/core.py +3 -1
xinference/model/video/diffusers.py +41 -38
xinference/model/video/model_spec.json +24 -1
xinference/model/video/model_spec_modelscope.json +25 -1
xinference/thirdparty/fish_speech/tools/api.py +1 -1
xinference/thirdparty/matcha/__init__.py +0 -0
xinference/thirdparty/matcha/app.py +357 -0
xinference/thirdparty/matcha/cli.py +419 -0
xinference/thirdparty/matcha/data/__init__.py +0 -0
xinference/thirdparty/matcha/data/components/__init__.py +0 -0
xinference/thirdparty/matcha/data/text_mel_datamodule.py +274 -0
xinference/thirdparty/matcha/hifigan/__init__.py +0 -0
xinference/thirdparty/matcha/hifigan/config.py +28 -0
xinference/thirdparty/matcha/hifigan/denoiser.py +64 -0
xinference/thirdparty/matcha/hifigan/env.py +17 -0
xinference/thirdparty/matcha/hifigan/meldataset.py +217 -0
xinference/thirdparty/matcha/hifigan/models.py +368 -0
xinference/thirdparty/matcha/hifigan/xutils.py +60 -0
xinference/thirdparty/matcha/models/__init__.py +0 -0
xinference/thirdparty/matcha/models/baselightningmodule.py +210 -0
xinference/thirdparty/matcha/models/components/__init__.py +0 -0
xinference/thirdparty/matcha/models/components/decoder.py +443 -0
xinference/thirdparty/matcha/models/components/flow_matching.py +132 -0
xinference/thirdparty/matcha/models/components/text_encoder.py +410 -0
xinference/thirdparty/matcha/models/components/transformer.py +316 -0
xinference/thirdparty/matcha/models/matcha_tts.py +244 -0
xinference/thirdparty/matcha/onnx/__init__.py +0 -0
xinference/thirdparty/matcha/onnx/export.py +181 -0
xinference/thirdparty/matcha/onnx/infer.py +168 -0
xinference/thirdparty/matcha/text/__init__.py +53 -0
xinference/thirdparty/matcha/text/cleaners.py +121 -0
xinference/thirdparty/matcha/text/numbers.py +71 -0
xinference/thirdparty/matcha/text/symbols.py +17 -0
xinference/thirdparty/matcha/train.py +122 -0
xinference/thirdparty/matcha/utils/__init__.py +5 -0
xinference/thirdparty/matcha/utils/audio.py +82 -0
xinference/thirdparty/matcha/utils/generate_data_statistics.py +112 -0
xinference/thirdparty/matcha/utils/get_durations_from_trained_model.py +195 -0
xinference/thirdparty/matcha/utils/instantiators.py +56 -0
xinference/thirdparty/matcha/utils/logging_utils.py +53 -0
xinference/thirdparty/matcha/utils/model.py +90 -0
xinference/thirdparty/matcha/utils/monotonic_align/__init__.py +22 -0
xinference/thirdparty/matcha/utils/monotonic_align/core.pyx +47 -0
xinference/thirdparty/matcha/utils/monotonic_align/setup.py +7 -0
xinference/thirdparty/matcha/utils/pylogger.py +21 -0
xinference/thirdparty/matcha/utils/rich_utils.py +101 -0
xinference/thirdparty/matcha/utils/utils.py +259 -0
{xinference-0.14.3.dist-info → xinference-0.14.4.dist-info}/METADATA +20 -12
{xinference-0.14.3.dist-info → xinference-0.14.4.dist-info}/RECORD +70 -28
{xinference-0.14.3.dist-info → xinference-0.14.4.dist-info}/LICENSE +0 -0
{xinference-0.14.3.dist-info → xinference-0.14.4.dist-info}/WHEEL +0 -0
{xinference-0.14.3.dist-info → xinference-0.14.4.dist-info}/entry_points.txt +0 -0
{xinference-0.14.3.dist-info → xinference-0.14.4.dist-info}/top_level.txt +0 -0

xinference/_version.py CHANGED Viewed

@@ -8,11 +8,11 @@ import json
 version_json = '''
 {
- "date": "2024-08-23T18:14:53+0800",
+ "date": "2024-08-30T18:54:16+0800",
  "dirty": false,
  "error": null,
- "full-revisionid": "b5002242e04634bca7e75cac9df0cdc6c0bf407a",
- "version": "0.14.3"
+ "full-revisionid": "f3d510eceffbbbc41ce065919fd2c48411017573",
+ "version": "0.14.4"
 }
 '''  # END VERSION_JSON

xinference/core/worker.py CHANGED Viewed

@@ -73,15 +73,15 @@ class WorkerActor(xo.StatelessActor):
         self._supervisor_ref: Optional[xo.ActorRefType] = None
         self._main_pool = main_pool
         self._main_pool.recover_sub_pool = self.recover_sub_pool
-        self._status_guard_ref: xo.ActorRefType[  # type: ignore
-            "StatusGuardActor"
-        ] = None
+        self._status_guard_ref: xo.ActorRefType["StatusGuardActor"] = (  # type: ignore
+            None
+        )
         self._event_collector_ref: xo.ActorRefType[  # type: ignore
             EventCollectorActor
         ] = None
-        self._cache_tracker_ref: xo.ActorRefType[  # type: ignore
-            CacheTrackerActor
-        ] = None
+        self._cache_tracker_ref: xo.ActorRefType[CacheTrackerActor] = (  # type: ignore
+            None
+        )
         # internal states.
         # temporary placeholder during model launch process:
@@ -146,7 +146,7 @@ class WorkerActor(xo.StatelessActor):
                 else:
                     recover_count = self._model_uid_to_recover_count.get(model_uid)
                     try:
-                        await self.terminate_model(model_uid)
+                        await self.terminate_model(model_uid, is_model_die=True)
                     except Exception:
                         pass
                     if recover_count is not None:
@@ -664,6 +664,8 @@ class WorkerActor(xo.StatelessActor):
             ret.sort(key=sort_helper)
             return ret
+        elif model_type == "video":
+            return []
         elif model_type == "rerank":
             from ..model.rerank.custom import get_user_defined_reranks
@@ -703,6 +705,8 @@ class WorkerActor(xo.StatelessActor):
             for f in get_user_defined_audios():
                 if f.model_name == model_name:
                     return f
+        elif model_type == "video":
+            return None
         elif model_type == "rerank":
             from ..model.rerank.custom import get_user_defined_reranks
@@ -914,7 +918,7 @@ class WorkerActor(xo.StatelessActor):
         )
     @log_async(logger=logger)
-    async def terminate_model(self, model_uid: str):
+    async def terminate_model(self, model_uid: str, is_model_die=False):
         # Terminate model while its launching is not allow
         if model_uid in self._model_uid_launching_guard:
             raise ValueError(f"{model_uid} is launching")
@@ -963,11 +967,16 @@ class WorkerActor(xo.StatelessActor):
             self._model_uid_to_recover_count.pop(model_uid, None)
             self._model_uid_to_launch_args.pop(model_uid, None)
+            if is_model_die:
+                status = LaunchStatus.ERROR.name
+            else:
+                status = LaunchStatus.TERMINATED.name
             if self._status_guard_ref is None:
                 _ = await self.get_supervisor_ref()
             assert self._status_guard_ref is not None
             await self._status_guard_ref.update_instance_info(
-                origin_uid, {"status": LaunchStatus.TERMINATED.name}
+                origin_uid, {"status": status}
             )
     # Provide an interface for future version of supervisor to call

xinference/model/audio/chattts.py CHANGED Viewed

@@ -11,11 +11,14 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import base64
 import logging
 from io import BytesIO
 from typing import TYPE_CHECKING, Optional
+from ..utils import set_all_random_seed
 if TYPE_CHECKING:
     from .core import AudioModelFamilyV1
@@ -78,9 +81,7 @@ class ChatTTSModel:
         if rnd_spk_emb is None:
             seed = xxhash.xxh32_intdigest(voice)
-            torch.manual_seed(seed)
-            np.random.seed(seed)
-            torch.cuda.manual_seed(seed)
+            set_all_random_seed(seed)
             torch.backends.cudnn.deterministic = True
             torch.backends.cudnn.benchmark = False

xinference/model/audio/cosyvoice.py CHANGED Viewed

@@ -16,6 +16,8 @@ import logging
 from io import BytesIO
 from typing import TYPE_CHECKING, Optional
+from ..utils import set_all_random_seed
 if TYPE_CHECKING:
     from .core import AudioModelFamilyV1
@@ -67,6 +69,7 @@ class CosyVoiceModel:
         prompt_speech: Optional[bytes] = kwargs.pop("prompt_speech", None)
         prompt_text: Optional[str] = kwargs.pop("prompt_text", None)
         instruct_text: Optional[str] = kwargs.pop("instruct_text", None)
+        seed: Optional[int] = kwargs.pop("seed", 0)
         if "SFT" in self._model_spec.model_name:
             # inference_sft
@@ -87,9 +90,6 @@ class CosyVoiceModel:
             assert (
                 prompt_text is None
             ), "CosyVoice Instruct model does not support prompt_text"
-            assert (
-                instruct_text is not None
-            ), "CosyVoice Instruct model expect a instruct_text"
         else:
             # inference_zero_shot
             # inference_cross_lingual
@@ -99,6 +99,7 @@ class CosyVoiceModel:
             ), "CosyVoice model does not support instruct_text"
         assert self._model is not None
+        set_all_random_seed(seed)
         if prompt_speech:
             assert not voice, "voice can't be set with prompt speech."
             with io.BytesIO(prompt_speech) as prompt_speech_io:

xinference/model/audio/custom.py CHANGED Viewed

@@ -88,6 +88,10 @@ def register_audio(model_spec: CustomAudioModelFamilyV1, persist: bool):
     if not is_valid_model_name(model_spec.model_name):
         raise ValueError(f"Invalid model name {model_spec.model_name}.")
+    model_uri = model_spec.model_uri
+    if model_uri and not is_valid_model_uri(model_uri):
+        raise ValueError(f"Invalid model URI {model_uri}.")
     with UD_AUDIO_LOCK:
         for model_name in (
             list(BUILTIN_AUDIO_MODELS.keys())
@@ -102,11 +106,6 @@ def register_audio(model_spec: CustomAudioModelFamilyV1, persist: bool):
         UD_AUDIOS.append(model_spec)
     if persist:
-        # We only validate model URL when persist is True.
-        model_uri = model_spec.model_uri
-        if model_uri and not is_valid_model_uri(model_uri):
-            raise ValueError(f"Invalid model URI {model_uri}.")
         persist_path = os.path.join(
             XINFERENCE_MODEL_DIR, "audio", f"{model_spec.model_name}.json"
         )

xinference/model/embedding/core.py CHANGED Viewed

@@ -124,6 +124,7 @@ class EmbeddingModel:
         model_path: str,
         model_spec: EmbeddingModelSpec,
         device: Optional[str] = None,
+        **kwargs,
     ):
         self._model_uid = model_uid
         self._model_path = model_path
@@ -131,6 +132,7 @@ class EmbeddingModel:
         self._model = None
         self._counter = 0
         self._model_spec = model_spec
+        self._kwargs = kwargs
     def load(self):
         try:

xinference/model/embedding/custom.py CHANGED Viewed

@@ -47,6 +47,10 @@ def register_embedding(model_spec: CustomEmbeddingModelSpec, persist: bool):
     if not is_valid_model_name(model_spec.model_name):
         raise ValueError(f"Invalid model name {model_spec.model_name}.")
+    model_uri = model_spec.model_uri
+    if model_uri and not is_valid_model_uri(model_uri):
+        raise ValueError(f"Invalid model URI {model_uri}.")
     with UD_EMBEDDING_LOCK:
         for model_name in (
             list(BUILTIN_EMBEDDING_MODELS.keys())
@@ -61,11 +65,6 @@ def register_embedding(model_spec: CustomEmbeddingModelSpec, persist: bool):
         UD_EMBEDDINGS.append(model_spec)
     if persist:
-        # We only validate model URL when persist is True.
-        model_uri = model_spec.model_uri
-        if model_uri and not is_valid_model_uri(model_uri):
-            raise ValueError(f"Invalid model URI {model_uri}.")
         persist_path = os.path.join(
             XINFERENCE_MODEL_DIR, "embedding", f"{model_spec.model_name}.json"
         )

xinference/model/flexible/core.py CHANGED Viewed

@@ -99,11 +99,15 @@ def get_flexible_model_descriptions():
 def register_flexible_model(model_spec: FlexibleModelSpec, persist: bool):
-    from ..utils import is_valid_model_name
+    from ..utils import is_valid_model_name, is_valid_model_uri
     if not is_valid_model_name(model_spec.model_name):
         raise ValueError(f"Invalid model name {model_spec.model_name}.")
+    model_uri = model_spec.model_uri
+    if model_uri and not is_valid_model_uri(model_uri):
+        raise ValueError(f"Invalid model URI {model_uri}.")
     if model_spec.launcher_args:
         try:
             model_spec.parser_args()

xinference/model/image/custom.py CHANGED Viewed

@@ -47,6 +47,10 @@ def register_image(model_spec: CustomImageModelFamilyV1, persist: bool):
     if not is_valid_model_name(model_spec.model_name):
         raise ValueError(f"Invalid model name {model_spec.model_name}.")
+    model_uri = model_spec.model_uri
+    if model_uri and not is_valid_model_uri(model_uri):
+        raise ValueError(f"Invalid model URI {model_uri}")
     with UD_IMAGE_LOCK:
         for model_name in (
             list(BUILTIN_IMAGE_MODELS.keys())
@@ -60,11 +64,6 @@ def register_image(model_spec: CustomImageModelFamilyV1, persist: bool):
         UD_IMAGES.append(model_spec)
     if persist:
-        #  We only validate model URL when persist is True.
-        model_uri = model_spec.model_uri
-        if model_uri and not is_valid_model_uri(model_uri):
-            raise ValueError(f"Invalid model URI {model_uri}")
         persist_path = os.path.join(
             XINFERENCE_MODEL_DIR, "image", f"{model_spec.model_name}.json"
         )

xinference/model/image/stable_diffusion/core.py CHANGED Viewed

@@ -257,15 +257,19 @@ class DiffusionModel:
                 self._i2i_model = model = AutoPipelineForImage2Image.from_pipe(
                     self._model
                 )
-        if size:
-            width, height = map(int, re.split(r"[^\d]+", size))
-            kwargs["width"] = width
-            kwargs["height"] = height
         if padding_image_to_multiple := kwargs.pop("padding_image_to_multiple", None):
             # Model like SD3 image to image requires image's height and width is times of 16
             # padding the image if specified
             image = self.pad_to_multiple(image, multiple=int(padding_image_to_multiple))
+        if size:
+            width, height = map(int, re.split(r"[^\d]+", size))
+            if padding_image_to_multiple:
+                width, height = image.size
+            kwargs["width"] = width
+            kwargs["height"] = height
         self._filter_kwargs(kwargs)
         return self._call_model(
             image=image,
@@ -279,8 +283,8 @@ class DiffusionModel:
     def inpainting(
         self,
-        image: bytes,
-        mask_image: bytes,
+        image: PIL.Image,
+        mask_image: PIL.Image,
         prompt: Optional[Union[str, List[str]]] = None,
         negative_prompt: Optional[Union[str, List[str]]] = None,
         n: int = 1,
@@ -306,6 +310,17 @@ class DiffusionModel:
             model = self._model
         width, height = map(int, re.split(r"[^\d]+", size))
+        if padding_image_to_multiple := kwargs.pop("padding_image_to_multiple", None):
+            # Model like SD3 inpainting requires image's height and width is times of 16
+            # padding the image if specified
+            image = self.pad_to_multiple(image, multiple=int(padding_image_to_multiple))
+            mask_image = self.pad_to_multiple(
+                mask_image, multiple=int(padding_image_to_multiple)
+            )
+            # calculate actual image size after padding
+            width, height = image.size
         return self._call_model(
             image=image,
             mask_image=mask_image,

xinference/model/llm/llm_family.py CHANGED Viewed

@@ -1004,6 +1004,11 @@ def register_llm(llm_family: LLMFamilyV1, persist: bool):
     if not is_valid_model_name(llm_family.model_name):
         raise ValueError(f"Invalid model name {llm_family.model_name}.")
+    for spec in llm_family.model_specs:
+        model_uri = spec.model_uri
+        if model_uri and not is_valid_model_uri(model_uri):
+            raise ValueError(f"Invalid model URI {model_uri}.")
     with UD_LLM_FAMILIES_LOCK:
         for family in BUILTIN_LLM_FAMILIES + UD_LLM_FAMILIES:
             if llm_family.model_name == family.model_name:
@@ -1015,12 +1020,6 @@ def register_llm(llm_family: LLMFamilyV1, persist: bool):
         generate_engine_config_by_model_family(llm_family)
     if persist:
-        # We only validate model URL when persist is True.
-        for spec in llm_family.model_specs:
-            model_uri = spec.model_uri
-            if model_uri and not is_valid_model_uri(model_uri):
-                raise ValueError(f"Invalid model URI {model_uri}.")
         persist_path = os.path.join(
             XINFERENCE_MODEL_DIR, "llm", f"{llm_family.model_name}.json"
         )

xinference/model/llm/sglang/core.py CHANGED Viewed

@@ -113,6 +113,13 @@ class SGLANGModel(LLM):
             raise ImportError(f"{error_message}\n\n{''.join(installation_guide)}")
         self._model_config = self._sanitize_model_config(self._model_config)
+        # Fix: GH#2169
+        if sgl.__version__ >= "0.2.14":
+            self._model_config.setdefault("triton_attention_reduce_in_fp32", False)
+        else:
+            self._model_config.setdefault("attention_reduce_in_fp32", False)
         logger.info(
             f"Loading {self.model_uid} with following model config: {self._model_config}"
         )
@@ -152,7 +159,6 @@ class SGLANGModel(LLM):
             else:
                 model_config["mem_fraction_static"] = 0.88
         model_config.setdefault("log_level", "info")
-        model_config.setdefault("attention_reduce_in_fp32", False)
         return model_config

xinference/model/llm/transformers/core.py CHANGED Viewed

@@ -319,6 +319,8 @@ class PytorchModel(LLM):
         else:
             self._model, self._tokenizer = self._load_model(**kwargs)
+        self._apply_lora()
         if not is_device_map_auto:
             self._model.to(self._device)

xinference/model/llm/utils.py CHANGED Viewed

@@ -32,6 +32,7 @@ from ...types import (
     Completion,
     CompletionChunk,
 )
+from ..utils import ensure_cache_cleared
 from .llm_family import (
     LlamaCppLLMSpecV1,
     LLMFamilyV1,
@@ -576,6 +577,7 @@ Begin!"""
         return cast(ChatCompletionChunk, chat_chunk)
     @classmethod
+    @ensure_cache_cleared
     def _to_chat_completion_chunks(
         cls,
         chunks: Iterator[CompletionChunk],
@@ -608,6 +610,7 @@ Begin!"""
             i += 1
     @staticmethod
+    @ensure_cache_cleared
     def _to_chat_completion(completion: Completion) -> ChatCompletion:
         return {
             "id": "chat" + completion["id"],

xinference/model/llm/vllm/core.py CHANGED Viewed

@@ -643,39 +643,6 @@ class VLLMChatModel(VLLMModel, ChatModelMixin):
 class VLLMVisionModel(VLLMModel, ChatModelMixin):
-    def load(self):
-        try:
-            import vllm
-            from vllm.engine.arg_utils import AsyncEngineArgs
-            from vllm.engine.async_llm_engine import AsyncLLMEngine
-        except ImportError:
-            error_message = "Failed to import module 'vllm'"
-            installation_guide = [
-                "Please make sure 'vllm' is installed. ",
-                "You can install it by `pip install vllm`\n",
-            ]
-            raise ImportError(f"{error_message}\n\n{''.join(installation_guide)}")
-        if vllm.__version__ >= "0.3.1":
-            # from vllm v0.3.1, it uses cupy as NCCL backend
-            # in which cupy will fork a process
-            # only for xoscar >= 0.3.0, new process is allowed in subpool
-            # besides, xinference set start method as forkserver for unix
-            # we need to set it to fork to make cupy NCCL work
-            multiprocessing.set_start_method("fork", force=True)
-        self._model_config = self._sanitize_model_config(self._model_config)
-        logger.info(
-            f"Loading {self.model_uid} with following model config: {self._model_config}"
-        )
-        engine_args = AsyncEngineArgs(
-            model=self.model_path,
-            **self._model_config,
-        )
-        self._engine = AsyncLLMEngine.from_engine_args(engine_args)
     @classmethod
     def match(
         cls, llm_family: "LLMFamilyV1", llm_spec: "LLMSpecV1", quantization: str

xinference/model/rerank/custom.py CHANGED Viewed

@@ -48,6 +48,10 @@ def register_rerank(model_spec: CustomRerankModelSpec, persist: bool):
     if not is_valid_model_name(model_spec.model_name):
         raise ValueError(f"Invalid model name {model_spec.model_name}.")
+    model_uri = model_spec.model_uri
+    if model_uri and not is_valid_model_uri(model_uri):
+        raise ValueError(f"Invalid model URI {model_uri}.")
     with UD_RERANK_LOCK:
         for model_name in (
             list(BUILTIN_RERANK_MODELS.keys())
@@ -62,11 +66,6 @@ def register_rerank(model_spec: CustomRerankModelSpec, persist: bool):
         UD_RERANKS.append(model_spec)
     if persist:
-        # We only validate model URL when persist is True.
-        model_uri = model_spec.model_uri
-        if model_uri and not is_valid_model_uri(model_uri):
-            raise ValueError(f"Invalid model URI {model_uri}.")
         persist_path = os.path.join(
             XINFERENCE_MODEL_DIR, "rerank", f"{model_spec.model_name}.json"
         )

xinference/model/utils.py CHANGED Viewed

@@ -11,17 +11,24 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import functools
+import gc
+import inspect
 import json
 import logging
 import os
+import random
 from json import JSONDecodeError
 from pathlib import Path
 from typing import Any, Callable, Dict, Optional, Tuple, Union
 import huggingface_hub
+import numpy as np
+import torch
 from ..constants import XINFERENCE_CACHE_DIR, XINFERENCE_ENV_MODEL_SRC
-from ..device_utils import get_available_device, is_device_available
+from ..device_utils import empty_cache, get_available_device, is_device_available
 from .core import CacheableModelSpec
 logger = logging.getLogger(__name__)
@@ -348,3 +355,36 @@ def convert_float_to_int_or_str(model_size: float) -> Union[int, str]:
         return int(model_size)
     else:
         return str(model_size)
+def ensure_cache_cleared(func: Callable):
+    assert not inspect.iscoroutinefunction(func) and not inspect.isasyncgenfunction(
+        func
+    )
+    if inspect.isgeneratorfunction(func):
+        @functools.wraps(func)
+        def inner(*args, **kwargs):
+            for obj in func(*args, **kwargs):
+                yield obj
+            gc.collect()
+            empty_cache()
+    else:
+        @functools.wraps(func)
+        def inner(*args, **kwargs):
+            try:
+                return func(*args, **kwargs)
+            finally:
+                gc.collect()
+                empty_cache()
+    return inner
+def set_all_random_seed(seed: int):
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed_all(seed)

xinference/model/video/core.py CHANGED Viewed

@@ -14,7 +14,7 @@
 import logging
 import os
 from collections import defaultdict
-from typing import Dict, List, Literal, Optional, Tuple
+from typing import Any, Dict, List, Literal, Optional, Tuple
 from ...constants import XINFERENCE_CACHE_DIR
 from ..core import CacheableModelSpec, ModelDescription
@@ -44,6 +44,8 @@ class VideoModelFamilyV1(CacheableModelSpec):
     model_revision: str
     model_hub: str = "huggingface"
     model_ability: Optional[List[str]]
+    default_model_config: Optional[Dict[str, Any]]
+    default_generate_config: Optional[Dict[str, Any]]
 class VideoModelDescription(ModelDescription):

xinference/model/video/diffusers.py CHANGED Viewed

@@ -15,7 +15,6 @@
 import base64
 import logging
 import os
-import sys
 import time
 import uuid
 from concurrent.futures import ThreadPoolExecutor
@@ -24,10 +23,9 @@ from typing import TYPE_CHECKING, List, Union
 import numpy as np
 import PIL.Image
-import torch
 from ...constants import XINFERENCE_VIDEO_DIR
-from ...device_utils import move_model_to_available_device
+from ...device_utils import gpu_count, move_model_to_available_device
 from ...types import Video, VideoList
 if TYPE_CHECKING:
@@ -76,41 +74,58 @@ class DiffUsersVideoModel:
     def load(self):
         import torch
-        torch_dtype = self._kwargs.get("torch_dtype")
-        if sys.platform != "darwin" and torch_dtype is None:
-            # The following params crashes on Mac M2
-            self._kwargs["torch_dtype"] = torch.float16
-            self._kwargs["variant"] = "fp16"
-            self._kwargs["use_safetensors"] = True
+        kwargs = self._model_spec.default_model_config.copy()
+        kwargs.update(self._kwargs)
+        scheduler_cls_name = kwargs.pop("scheduler", None)
+        torch_dtype = kwargs.get("torch_dtype")
         if isinstance(torch_dtype, str):
-            self._kwargs["torch_dtype"] = getattr(torch, torch_dtype)
+            kwargs["torch_dtype"] = getattr(torch, torch_dtype)
+        logger.debug("Loading video model with kwargs: %s", kwargs)
         if self._model_spec.model_family == "CogVideoX":
+            import diffusers
             from diffusers import CogVideoXPipeline
-            self._model = CogVideoXPipeline.from_pretrained(
-                self._model_path, **self._kwargs
+            pipeline = self._model = CogVideoXPipeline.from_pretrained(
+                self._model_path, **kwargs
             )
         else:
             raise Exception(
                 f"Unsupported model family: {self._model_spec.model_family}"
             )
-        if self._kwargs.get("cpu_offload", False):
+        if scheduler_cls_name:
+            logger.debug("Using scheduler: %s", scheduler_cls_name)
+            pipeline.scheduler = getattr(diffusers, scheduler_cls_name).from_config(
+                pipeline.scheduler.config, timestep_spacing="trailing"
+            )
+        if kwargs.get("compile_graph", False):
+            pipeline.transformer = torch.compile(
+                pipeline.transformer, mode="max-autotune", fullgraph=True
+            )
+        if kwargs.get("cpu_offload", False):
             logger.debug("CPU offloading model")
-            self._model.enable_model_cpu_offload()
-        elif not self._kwargs.get("device_map"):
+            pipeline.enable_model_cpu_offload()
+            if kwargs.get("sequential_cpu_offload", True):
+                pipeline.enable_sequential_cpu_offload()
+            pipeline.vae.enable_slicing()
+            pipeline.vae.enable_tiling()
+        elif not kwargs.get("device_map"):
             logger.debug("Loading model to available device")
-            self._model = move_model_to_available_device(self._model)
+            if gpu_count() > 1:
+                kwargs["device_map"] = "balanced"
+            else:
+                pipeline = move_model_to_available_device(self._model)
         # Recommended if your computer has < 64 GB of RAM
-        self._model.enable_attention_slicing()
+        pipeline.enable_attention_slicing()
     def text_to_video(
         self,
         prompt: str,
         n: int = 1,
         num_inference_steps: int = 50,
-        guidance_scale: int = 6,
         response_format: str = "b64_json",
         **kwargs,
     ) -> VideoList:
@@ -121,31 +136,19 @@ class DiffUsersVideoModel:
         # from diffusers.utils import export_to_video
         from ...device_utils import empty_cache
+        assert self._model is not None
+        assert callable(self._model)
+        generate_kwargs = self._model_spec.default_generate_config.copy()
+        generate_kwargs.update(kwargs)
+        generate_kwargs["num_videos_per_prompt"] = n
         logger.debug(
             "diffusers text_to_video args: %s",
-            kwargs,
+            generate_kwargs,
         )
-        assert self._model is not None
-        if self._kwargs.get("cpu_offload"):
-            # if enabled cpu offload,
-            # the model.device would be CPU
-            device = "cuda"
-        else:
-            device = self._model.device
-        prompt_embeds, _ = self._model.encode_prompt(
-            prompt=prompt,
-            do_classifier_free_guidance=True,
-            num_videos_per_prompt=n,
-            max_sequence_length=226,
-            device=device,
-            dtype=torch.float16,
-        )
-        assert callable(self._model)
         output = self._model(
+            prompt=prompt,
             num_inference_steps=num_inference_steps,
-            guidance_scale=guidance_scale,
-            prompt_embeds=prompt_embeds,
-            **kwargs,
+            **generate_kwargs,
         )
         # clean cache

xinference 0.14.3__py3-none-any.whl → 0.14.4__py3-none-any.whl

Potentially problematic release.

xinference 0.14.3py3-none-any.whl → 0.14.4py3-none-any.whl