PyPI - xinference - Versions diffs - 0.10.0__py3-none-any.whl → 0.10.2__py3-none-any.whl - Mend

xinference 0.10.0py3-none-any.whl → 0.10.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (97) hide show

xinference/model/audio/core.py CHANGED Viewed

@@ -16,9 +16,8 @@ import os
 from collections import defaultdict
 from typing import Dict, List, Optional, Tuple
-from ..._compat import BaseModel
 from ...constants import XINFERENCE_CACHE_DIR
-from ..core import ModelDescription
+from ..core import CacheableModelSpec, ModelDescription
 from ..utils import valid_model_revision
 from .whisper import WhisperModel
@@ -26,8 +25,19 @@ MAX_ATTEMPTS = 3
 logger = logging.getLogger(__name__)
+# Used for check whether the model is cached.
+# Init when registering all the builtin models.
+MODEL_NAME_TO_REVISION: Dict[str, List[str]] = defaultdict(list)
+AUDIO_MODEL_DESCRIPTIONS: Dict[str, List[Dict]] = defaultdict(list)
-class AudioModelFamilyV1(BaseModel):
+def get_audio_model_descriptions():
+    import copy
+    return copy.deepcopy(AUDIO_MODEL_DESCRIPTIONS)
+class AudioModelFamilyV1(CacheableModelSpec):
     model_family: str
     model_name: str
     model_id: str
@@ -77,63 +87,33 @@ def generate_audio_description(
     image_model: AudioModelFamilyV1,
 ) -> Dict[str, List[Dict]]:
     res = defaultdict(list)
-    res[image_model.model_name].extend(
-        AudioModelDescription(None, None, image_model).to_dict()
+    res[image_model.model_name].append(
+        AudioModelDescription(None, None, image_model).to_version_info()
     )
     return res
-def match_model(model_name: str) -> AudioModelFamilyV1:
+def match_audio(model_name: str) -> AudioModelFamilyV1:
     from . import BUILTIN_AUDIO_MODELS
+    from .custom import get_user_defined_audios
+    for model_spec in get_user_defined_audios():
+        if model_spec.model_name == model_name:
+            return model_spec
     if model_name in BUILTIN_AUDIO_MODELS:
         return BUILTIN_AUDIO_MODELS[model_name]
     else:
         raise ValueError(
-            f"Image model {model_name} not found, available"
+            f"Audio model {model_name} not found, available"
             f"model list: {BUILTIN_AUDIO_MODELS.keys()}"
         )
 def cache(model_spec: AudioModelFamilyV1):
-    # TODO: cache from uri
-    import huggingface_hub
-    cache_dir = get_cache_dir(model_spec)
-    if not os.path.exists(cache_dir):
-        os.makedirs(cache_dir, exist_ok=True)
-    meta_path = os.path.join(cache_dir, "__valid_download")
-    if valid_model_revision(meta_path, model_spec.model_revision):
-        return cache_dir
-    for current_attempt in range(1, MAX_ATTEMPTS + 1):
-        try:
-            huggingface_hub.snapshot_download(
-                model_spec.model_id,
-                revision=model_spec.model_revision,
-                local_dir=cache_dir,
-                local_dir_use_symlinks=True,
-                resume_download=True,
-            )
-            break
-        except huggingface_hub.utils.LocalEntryNotFoundError:
-            remaining_attempts = MAX_ATTEMPTS - current_attempt
-            logger.warning(
-                f"Attempt {current_attempt} failed. Remaining attempts: {remaining_attempts}"
-            )
-    else:
-        raise RuntimeError(
-            f"Failed to download model '{model_spec.model_name}' after {MAX_ATTEMPTS} attempts"
-        )
-    with open(meta_path, "w") as f:
-        import json
-        desc = AudioModelDescription(None, None, model_spec)
-        json.dump(desc.to_dict(), f)
+    from ..utils import cache
-    return cache_dir
+    return cache(model_spec, AudioModelDescription)
 def get_cache_dir(model_spec: AudioModelFamilyV1):
@@ -151,7 +131,7 @@ def get_cache_status(
 def create_audio_model_instance(
     subpool_addr: str, devices: List[str], model_uid: str, model_name: str, **kwargs
 ) -> Tuple[WhisperModel, AudioModelDescription]:
-    model_spec = match_model(model_name)
+    model_spec = match_audio(model_name)
     model_path = cache(model_spec)
     model = WhisperModel(model_uid, model_path, model_spec, **kwargs)
     model_description = AudioModelDescription(

xinference/model/audio/custom.py ADDED Viewed

@@ -0,0 +1,148 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+import os
+from threading import Lock
+from typing import Any, List, Optional
+from ..._compat import (
+    ROOT_KEY,
+    ErrorWrapper,
+    Protocol,
+    StrBytes,
+    ValidationError,
+    load_str_bytes,
+)
+from ...constants import XINFERENCE_CACHE_DIR, XINFERENCE_MODEL_DIR
+from .core import AudioModelFamilyV1
+logger = logging.getLogger(__name__)
+UD_AUDIO_LOCK = Lock()
+class CustomAudioModelFamilyV1(AudioModelFamilyV1):
+    model_id: Optional[str]  # type: ignore
+    model_revision: Optional[str]  # type: ignore
+    model_uri: Optional[str]
+    @classmethod
+    def parse_raw(
+        cls: Any,
+        b: StrBytes,
+        *,
+        content_type: Optional[str] = None,
+        encoding: str = "utf8",
+        proto: Protocol = None,
+        allow_pickle: bool = False,
+    ) -> AudioModelFamilyV1:
+        # See source code of BaseModel.parse_raw
+        try:
+            obj = load_str_bytes(
+                b,
+                proto=proto,
+                content_type=content_type,
+                encoding=encoding,
+                allow_pickle=allow_pickle,
+                json_loads=cls.__config__.json_loads,
+            )
+        except (ValueError, TypeError, UnicodeDecodeError) as e:
+            raise ValidationError([ErrorWrapper(e, loc=ROOT_KEY)], cls)
+        audio_spec: AudioModelFamilyV1 = cls.parse_obj(obj)
+        # check model_family
+        if audio_spec.model_family is None:
+            raise ValueError(
+                f"You must specify `model_family` when registering custom Audio models."
+            )
+        assert isinstance(audio_spec.model_family, str)
+        return audio_spec
+UD_AUDIOS: List[CustomAudioModelFamilyV1] = []
+def get_user_defined_audios() -> List[CustomAudioModelFamilyV1]:
+    with UD_AUDIO_LOCK:
+        return UD_AUDIOS.copy()
+def register_audio(model_spec: CustomAudioModelFamilyV1, persist: bool):
+    from ...constants import XINFERENCE_MODEL_DIR
+    from ..utils import is_valid_model_name, is_valid_model_uri
+    from . import BUILTIN_AUDIO_MODELS
+    if not is_valid_model_name(model_spec.model_name):
+        raise ValueError(f"Invalid model name {model_spec.model_name}.")
+    with UD_AUDIO_LOCK:
+        for model_name in list(BUILTIN_AUDIO_MODELS.keys()) + [
+            spec.model_name for spec in UD_AUDIOS
+        ]:
+            if model_spec.model_name == model_name:
+                raise ValueError(
+                    f"Model name conflicts with existing model {model_spec.model_name}"
+                )
+        UD_AUDIOS.append(model_spec)
+    if persist:
+        # We only validate model URL when persist is True.
+        model_uri = model_spec.model_uri
+        if model_uri and not is_valid_model_uri(model_uri):
+            raise ValueError(f"Invalid model URI {model_uri}.")
+        persist_path = os.path.join(
+            XINFERENCE_MODEL_DIR, "audio", f"{model_spec.model_name}.json"
+        )
+        os.makedirs(os.path.dirname(persist_path), exist_ok=True)
+        with open(persist_path, mode="w") as fd:
+            fd.write(model_spec.json())
+def unregister_audio(model_name: str, raise_error: bool = True):
+    with UD_AUDIO_LOCK:
+        model_spec = None
+        for i, f in enumerate(UD_AUDIOS):
+            if f.model_name == model_name:
+                model_spec = f
+                break
+        if model_spec:
+            UD_AUDIOS.remove(model_spec)
+            persist_path = os.path.join(
+                XINFERENCE_MODEL_DIR, "audio", f"{model_spec.model_name}.json"
+            )
+            if os.path.exists(persist_path):
+                os.remove(persist_path)
+            cache_dir = os.path.join(XINFERENCE_CACHE_DIR, model_spec.model_name)
+            if os.path.exists(cache_dir):
+                logger.warning(
+                    f"Remove the cache of user-defined model {model_spec.model_name}. "
+                    f"Cache directory: {cache_dir}"
+                )
+                if os.path.isdir(cache_dir):
+                    os.rmdir(cache_dir)
+                else:
+                    logger.warning(
+                        f"Cache directory is not a soft link, please remove it manually."
+                    )
+        else:
+            if raise_error:
+                raise ValueError(f"Model {model_name} not found")
+            else:
+                logger.warning(f"Custom audio model {model_name} not found")

xinference/model/core.py CHANGED Viewed

@@ -13,9 +13,10 @@
 # limitations under the License.
 from abc import ABC, abstractmethod
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Any, List, Optional, Tuple, Union
 from .._compat import BaseModel
+from ..types import PeftModelConfig
 class ModelDescription(ABC):
@@ -50,11 +51,9 @@ def create_model_instance(
     model_type: str,
     model_name: str,
     model_format: Optional[str] = None,
-    model_size_in_billions: Optional[int] = None,
+    model_size_in_billions: Optional[Union[int, str]] = None,
     quantization: Optional[str] = None,
-    peft_model_path: Optional[str] = None,
-    image_lora_load_kwargs: Optional[Dict] = None,
-    image_lora_fuse_kwargs: Optional[Dict] = None,
+    peft_model_config: Optional[PeftModelConfig] = None,
     is_local_deployment: bool = False,
     **kwargs,
 ) -> Tuple[Any, ModelDescription]:
@@ -73,7 +72,7 @@ def create_model_instance(
             model_format,
             model_size_in_billions,
             quantization,
-            peft_model_path,
+            peft_model_config,
             is_local_deployment,
             **kwargs,
         )
@@ -90,9 +89,7 @@ def create_model_instance(
             devices,
             model_uid,
             model_name,
-            lora_model_path=peft_model_path,
-            lora_load_kwargs=image_lora_load_kwargs,
-            lora_fuse_kwargs=image_lora_fuse_kwargs,
+            peft_model_config,
             **kwargs,
         )
     elif model_type == "rerank":

xinference/model/embedding/core.py CHANGED Viewed

@@ -136,7 +136,7 @@ class EmbeddingModel:
     def create_embedding(self, sentences: Union[str, List[str]], **kwargs):
         from sentence_transformers import SentenceTransformer
-        normalize_embeddings = kwargs.pop("normalize_embeddings", True)
+        kwargs.setdefault("normalize_embeddings", True)
         # copied from sentence-transformers, and modify it to return tokens num
         @no_type_check
@@ -272,7 +272,6 @@ class EmbeddingModel:
             self._model,
             sentences,
             convert_to_numpy=False,
-            normalize_embeddings=normalize_embeddings,
             **kwargs,
         )
         if isinstance(sentences, str):

xinference/model/embedding/model_spec.json CHANGED Viewed

@@ -206,5 +206,29 @@
     "language": ["zh", "en"],
     "model_id": "maidalun1020/bce-embedding-base_v1",
     "model_revision": "236d9024fc1b4046f03848723f934521a66a9323"
+  },
+  {
+    "model_name": "m3e-small",
+    "dimensions": 512,
+    "max_tokens": 512,
+    "language": ["zh", "en"],
+    "model_id": "moka-ai/m3e-small",
+    "model_revision": "44c696631b2a8c200220aaaad5f987f096e986df"
+  },
+  {
+    "model_name": "m3e-base",
+    "dimensions": 768,
+    "max_tokens": 512,
+    "language": ["zh", "en"],
+    "model_id": "moka-ai/m3e-base",
+    "model_revision": "764b537a0e50e5c7d64db883f2d2e051cbe3c64c"
+  },
+  {
+    "model_name": "m3e-large",
+    "dimensions": 1024,
+    "max_tokens": 512,
+    "language": ["zh", "en"],
+    "model_id": "moka-ai/m3e-large",
+    "model_revision": "12900375086c37ba5d83d1e417b21dc7d1d1f388"
   }
 ]

xinference/model/embedding/model_spec_modelscope.json CHANGED Viewed

@@ -208,5 +208,29 @@
     "language": ["zh", "en"],
     "model_id": "maidalun/bce-embedding-base_v1",
     "model_hub": "modelscope"
+  },
+  {
+    "model_name": "m3e-small",
+    "dimensions": 512,
+    "max_tokens": 512,
+    "language": ["zh", "en"],
+    "model_id": "AI-ModelScope/m3e-small",
+    "model_hub": "modelscope"
+  },
+  {
+    "model_name": "m3e-base",
+    "dimensions": 768,
+    "max_tokens": 512,
+    "language": ["zh", "en"],
+    "model_id": "AI-ModelScope/m3e-base",
+    "model_hub": "modelscope"
+  },
+  {
+    "model_name": "m3e-large",
+    "dimensions": 1024,
+    "max_tokens": 512,
+    "language": ["zh", "en"],
+    "model_id": "AI-ModelScope/m3e-large",
+    "model_hub": "modelscope"
   }
 ]

xinference/model/image/core.py CHANGED Viewed

@@ -18,6 +18,7 @@ from collections import defaultdict
 from typing import Dict, List, Optional, Tuple
 from ...constants import XINFERENCE_CACHE_DIR
+from ...types import PeftModelConfig
 from ..core import CacheableModelSpec, ModelDescription
 from ..utils import valid_model_revision
 from .stable_diffusion.core import DiffusionModel
@@ -175,9 +176,7 @@ def create_image_model_instance(
     devices: List[str],
     model_uid: str,
     model_name: str,
-    lora_model_path: Optional[str] = None,
-    lora_load_kwargs: Optional[Dict] = None,
-    lora_fuse_kwargs: Optional[Dict] = None,
+    peft_model_config: Optional[PeftModelConfig] = None,
     **kwargs,
 ) -> Tuple[DiffusionModel, ImageModelDescription]:
     model_spec = match_diffusion(model_name)
@@ -210,10 +209,19 @@ def create_image_model_instance(
         else:
             kwargs["controlnet"] = controlnet_model_paths
     model_path = cache(model_spec)
+    if peft_model_config is not None:
+        lora_model = peft_model_config.peft_model
+        lora_load_kwargs = peft_model_config.image_lora_load_kwargs
+        lora_fuse_kwargs = peft_model_config.image_lora_fuse_kwargs
+    else:
+        lora_model = None
+        lora_load_kwargs = None
+        lora_fuse_kwargs = None
     model = DiffusionModel(
         model_uid,
         model_path,
-        lora_model_path=lora_model_path,
+        lora_model_paths=lora_model,
         lora_load_kwargs=lora_load_kwargs,
         lora_fuse_kwargs=lora_fuse_kwargs,
         **kwargs,

xinference/model/image/stable_diffusion/core.py CHANGED Viewed

@@ -25,7 +25,7 @@ from typing import Dict, List, Optional, Union
 from ....constants import XINFERENCE_IMAGE_DIR
 from ....device_utils import move_model_to_available_device
-from ....types import Image, ImageList
+from ....types import Image, ImageList, LoRA
 logger = logging.getLogger(__name__)
@@ -36,7 +36,7 @@ class DiffusionModel:
         model_uid: str,
         model_path: str,
         device: Optional[str] = None,
-        lora_model_path: Optional[str] = None,
+        lora_model: Optional[List[LoRA]] = None,
         lora_load_kwargs: Optional[Dict] = None,
         lora_fuse_kwargs: Optional[Dict] = None,
         **kwargs,
@@ -45,20 +45,21 @@ class DiffusionModel:
         self._model_path = model_path
         self._device = device
         self._model = None
-        self._lora_model_path = lora_model_path
+        self._lora_model = lora_model
         self._lora_load_kwargs = lora_load_kwargs or {}
         self._lora_fuse_kwargs = lora_fuse_kwargs or {}
         self._kwargs = kwargs
     def _apply_lora(self):
-        if self._lora_model_path is not None:
+        if self._lora_model is not None:
             logger.info(
                 f"Loading the LoRA with load kwargs: {self._lora_load_kwargs}, fuse kwargs: {self._lora_fuse_kwargs}."
             )
             assert self._model is not None
-            self._model.load_lora_weights(
-                self._lora_model_path, **self._lora_load_kwargs
-            )
+            for lora_model in self._lora_model:
+                self._model.load_lora_weights(
+                    lora_model.local_path, **self._lora_load_kwargs
+                )
             self._model.fuse_lora(**self._lora_fuse_kwargs)
             logger.info(f"Successfully loaded the LoRA for model {self._model_uid}.")

xinference/model/llm/__init__.py CHANGED Viewed

@@ -49,7 +49,6 @@ from .llm_family import (
 def _install():
     from .ggml.chatglm import ChatglmCppChatModel
-    from .ggml.ctransformers import CtransformersModel
     from .ggml.llamacpp import LlamaCppChatModel, LlamaCppModel
     from .pytorch.baichuan import BaichuanPytorchChatModel
     from .pytorch.chatglm import ChatglmPytorchChatModel
@@ -77,11 +76,6 @@ def _install():
             ChatglmCppChatModel,
         ]
     )
-    LLM_CLASSES.extend(
-        [
-            CtransformersModel,
-        ]
-    )
     LLM_CLASSES.extend([SGLANGModel, SGLANGChatModel])
     LLM_CLASSES.extend([VLLMModel, VLLMChatModel])
     LLM_CLASSES.extend(

xinference/model/llm/core.py CHANGED Viewed

@@ -21,6 +21,7 @@ from collections import defaultdict
 from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union
 from ...core.utils import parse_replica_model_uid
+from ...types import PeftModelConfig
 from ..core import ModelDescription
 if TYPE_CHECKING:
@@ -178,9 +179,9 @@ def create_llm_model_instance(
     model_uid: str,
     model_name: str,
     model_format: Optional[str] = None,
-    model_size_in_billions: Optional[int] = None,
+    model_size_in_billions: Optional[Union[int, str]] = None,
     quantization: Optional[str] = None,
-    peft_model_path: Optional[str] = None,
+    peft_model_config: Optional[PeftModelConfig] = None,
     is_local_deployment: bool = False,
     **kwargs,
 ) -> Tuple[LLM, LLMDescription]:
@@ -204,9 +205,9 @@ def create_llm_model_instance(
     assert quantization is not None
     save_path = cache(llm_family, llm_spec, quantization)
-    llm_cls = match_llm_cls(
-        llm_family, llm_spec, quantization, peft_model_path=peft_model_path
-    )
+    peft_model = peft_model_config.peft_model if peft_model_config else None
+    llm_cls = match_llm_cls(llm_family, llm_spec, quantization, peft_model=peft_model)
     if not llm_cls:
         raise ValueError(
             f"Model not supported, name: {model_name}, format: {model_format},"
@@ -214,15 +215,9 @@ def create_llm_model_instance(
         )
     logger.debug(f"Launching {model_uid} with {llm_cls.__name__}")
-    if peft_model_path is not None:
+    if peft_model is not None:
         model = llm_cls(
-            model_uid,
-            llm_family,
-            llm_spec,
-            quantization,
-            save_path,
-            kwargs,
-            peft_model_path,
+            model_uid, llm_family, llm_spec, quantization, save_path, kwargs, peft_model
         )
     else:
         model = llm_cls(
@@ -238,7 +233,7 @@ def create_speculative_llm_model_instance(
     devices: List[str],
     model_uid: str,
     model_name: str,
-    model_size_in_billions: Optional[int],
+    model_size_in_billions: Optional[Union[int, str]],
     quantization: Optional[str],
     draft_model_name: str,
     draft_model_size_in_billions: Optional[int],

xinference/model/llm/ggml/llamacpp.py CHANGED Viewed

@@ -30,7 +30,6 @@ from ....types import (
 from ..core import LLM
 from ..llm_family import LLMFamilyV1, LLMSpecV1
 from ..utils import ChatModelMixin
-from .ctransformers import CTRANSFORMERS_SUPPORTED_MODEL
 logger = logging.getLogger(__name__)
@@ -182,11 +181,7 @@ class LlamaCppModel(LLM):
     ) -> bool:
         if llm_spec.model_format not in ["ggmlv3", "ggufv2"]:
             return False
-        if (
-            "chatglm" in llm_family.model_name
-            or "qwen" in llm_family.model_name
-            or llm_family.model_name in CTRANSFORMERS_SUPPORTED_MODEL
-        ):
+        if "chatglm" in llm_family.model_name or "qwen" in llm_family.model_name:
             return False
         if "generate" not in llm_family.model_ability:
             return False
@@ -250,10 +245,7 @@ class LlamaCppChatModel(LlamaCppModel, ChatModelMixin):
     ) -> bool:
         if llm_spec.model_format not in ["ggmlv3", "ggufv2"]:
             return False
-        if (
-            "chatglm" in llm_family.model_name
-            or llm_family.model_name in CTRANSFORMERS_SUPPORTED_MODEL
-        ):
+        if "chatglm" in llm_family.model_name:
             return False
         if "chat" not in llm_family.model_ability:
             return False

xinference 0.10.0__py3-none-any.whl → 0.10.2__py3-none-any.whl

Potentially problematic release.

xinference 0.10.0py3-none-any.whl → 0.10.2py3-none-any.whl