PyPI - xinference - Versions diffs - 0.13.0__py3-none-any.whl → 0.13.1__py3-none-any.whl - Mend

xinference 0.13.0py3-none-any.whl → 0.13.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (66) hide show

xinference/core/worker.py CHANGED Viewed

@@ -22,7 +22,7 @@ import threading
 import time
 from collections import defaultdict
 from logging import getLogger
-from typing import Any, Dict, List, Optional, Set, Tuple, Union
+from typing import Any, Dict, List, Literal, Optional, Set, Tuple, Union
 import xoscar as xo
 from async_timeout import timeout
@@ -212,48 +212,81 @@ class WorkerActor(xo.StatelessActor):
         from ..model.audio import (
             CustomAudioModelFamilyV1,
+            generate_audio_description,
             get_audio_model_descriptions,
             register_audio,
             unregister_audio,
         )
         from ..model.embedding import (
             CustomEmbeddingModelSpec,
+            generate_embedding_description,
             get_embedding_model_descriptions,
             register_embedding,
             unregister_embedding,
         )
+        from ..model.flexible import (
+            FlexibleModelSpec,
+            get_flexible_model_descriptions,
+            register_flexible_model,
+            unregister_flexible_model,
+        )
         from ..model.image import (
             CustomImageModelFamilyV1,
+            generate_image_description,
             get_image_model_descriptions,
             register_image,
             unregister_image,
         )
         from ..model.llm import (
             CustomLLMFamilyV1,
+            generate_llm_description,
             get_llm_model_descriptions,
             register_llm,
             unregister_llm,
         )
         from ..model.rerank import (
             CustomRerankModelSpec,
+            generate_rerank_description,
             get_rerank_model_descriptions,
             register_rerank,
             unregister_rerank,
         )
         self._custom_register_type_to_cls: Dict[str, Tuple] = {  # type: ignore
-            "LLM": (CustomLLMFamilyV1, register_llm, unregister_llm),
+            "LLM": (
+                CustomLLMFamilyV1,
+                register_llm,
+                unregister_llm,
+                generate_llm_description,
+            ),
             "embedding": (
                 CustomEmbeddingModelSpec,
                 register_embedding,
                 unregister_embedding,
+                generate_embedding_description,
+            ),
+            "rerank": (
+                CustomRerankModelSpec,
+                register_rerank,
+                unregister_rerank,
+                generate_rerank_description,
             ),
-            "rerank": (CustomRerankModelSpec, register_rerank, unregister_rerank),
-            "audio": (CustomAudioModelFamilyV1, register_audio, unregister_audio),
             "image": (
                 CustomImageModelFamilyV1,
                 register_image,
                 unregister_image,
+                generate_image_description,
+            ),
+            "audio": (
+                CustomAudioModelFamilyV1,
+                register_audio,
+                unregister_audio,
+                generate_audio_description,
+            ),
+            "flexible": (
+                FlexibleModelSpec,
+                register_flexible_model,
+                unregister_flexible_model,
             ),
         }
@@ -264,6 +297,7 @@ class WorkerActor(xo.StatelessActor):
         model_version_infos.update(get_rerank_model_descriptions())
         model_version_infos.update(get_image_model_descriptions())
         model_version_infos.update(get_audio_model_descriptions())
+        model_version_infos.update(get_flexible_model_descriptions())
         await self._cache_tracker_ref.record_model_version(
             model_version_infos, self.address
         )
@@ -514,17 +548,23 @@ class WorkerActor(xo.StatelessActor):
                 raise ValueError(f"{model_name} model can't run on Darwin system.")
     @log_sync(logger=logger)
-    def register_model(self, model_type: str, model: str, persist: bool):
+    async def register_model(self, model_type: str, model: str, persist: bool):
         # TODO: centralized model registrations
         if model_type in self._custom_register_type_to_cls:
             (
                 model_spec_cls,
                 register_fn,
                 unregister_fn,
+                generate_fn,
             ) = self._custom_register_type_to_cls[model_type]
             model_spec = model_spec_cls.parse_raw(model)
             try:
                 register_fn(model_spec, persist)
+                await self._cache_tracker_ref.record_model_version(
+                    generate_fn(model_spec), self.address
+                )
+            except ValueError as e:
+                raise e
             except Exception as e:
                 unregister_fn(model_spec.model_name, raise_error=False)
                 raise e
@@ -532,14 +572,127 @@ class WorkerActor(xo.StatelessActor):
             raise ValueError(f"Unsupported model type: {model_type}")
     @log_sync(logger=logger)
-    def unregister_model(self, model_type: str, model_name: str):
+    async def unregister_model(self, model_type: str, model_name: str):
         # TODO: centralized model registrations
         if model_type in self._custom_register_type_to_cls:
-            _, _, unregister_fn = self._custom_register_type_to_cls[model_type]
-            unregister_fn(model_name)
+            _, _, unregister_fn, _ = self._custom_register_type_to_cls[model_type]
+            unregister_fn(model_name, False)
         else:
             raise ValueError(f"Unsupported model type: {model_type}")
+    @log_async(logger=logger)
+    async def list_model_registrations(
+        self, model_type: str, detailed: bool = False
+    ) -> List[Dict[str, Any]]:
+        def sort_helper(item):
+            assert isinstance(item["model_name"], str)
+            return item.get("model_name").lower()
+        if model_type == "LLM":
+            from ..model.llm import get_user_defined_llm_families
+            ret = []
+            for family in get_user_defined_llm_families():
+                ret.append({"model_name": family.model_name, "is_builtin": False})
+            ret.sort(key=sort_helper)
+            return ret
+        elif model_type == "embedding":
+            from ..model.embedding.custom import get_user_defined_embeddings
+            ret = []
+            for model_spec in get_user_defined_embeddings():
+                ret.append({"model_name": model_spec.model_name, "is_builtin": False})
+            ret.sort(key=sort_helper)
+            return ret
+        elif model_type == "image":
+            from ..model.image.custom import get_user_defined_images
+            ret = []
+            for model_spec in get_user_defined_images():
+                ret.append({"model_name": model_spec.model_name, "is_builtin": False})
+            ret.sort(key=sort_helper)
+            return ret
+        elif model_type == "audio":
+            from ..model.audio.custom import get_user_defined_audios
+            ret = []
+            for model_spec in get_user_defined_audios():
+                ret.append({"model_name": model_spec.model_name, "is_builtin": False})
+            ret.sort(key=sort_helper)
+            return ret
+        elif model_type == "rerank":
+            from ..model.rerank.custom import get_user_defined_reranks
+            ret = []
+            for model_spec in get_user_defined_reranks():
+                ret.append({"model_name": model_spec.model_name, "is_builtin": False})
+            ret.sort(key=sort_helper)
+            return ret
+        else:
+            raise ValueError(f"Unsupported model type: {model_type}")
+    @log_sync(logger=logger)
+    async def get_model_registration(self, model_type: str, model_name: str) -> Any:
+        if model_type == "LLM":
+            from ..model.llm import get_user_defined_llm_families
+            for f in get_user_defined_llm_families():
+                if f.model_name == model_name:
+                    return f
+        elif model_type == "embedding":
+            from ..model.embedding.custom import get_user_defined_embeddings
+            for f in get_user_defined_embeddings():
+                if f.model_name == model_name:
+                    return f
+        elif model_type == "image":
+            from ..model.image.custom import get_user_defined_images
+            for f in get_user_defined_images():
+                if f.model_name == model_name:
+                    return f
+        elif model_type == "audio":
+            from ..model.audio.custom import get_user_defined_audios
+            for f in get_user_defined_audios():
+                if f.model_name == model_name:
+                    return f
+        elif model_type == "rerank":
+            from ..model.rerank.custom import get_user_defined_reranks
+            for f in get_user_defined_reranks():
+                if f.model_name == model_name:
+                    return f
+        return None
+    @log_async(logger=logger)
+    async def query_engines_by_model_name(self, model_name: str):
+        from copy import deepcopy
+        from ..model.llm.llm_family import LLM_ENGINES
+        if model_name not in LLM_ENGINES:
+            return None
+        # filter llm_class
+        engine_params = deepcopy(LLM_ENGINES[model_name])
+        for engine in engine_params:
+            params = engine_params[engine]
+            for param in params:
+                del param["llm_class"]
+        return engine_params
     async def _get_model_ability(self, model: Any, model_type: str) -> List[str]:
         from ..model.llm.core import LLM
@@ -551,6 +704,8 @@ class WorkerActor(xo.StatelessActor):
             return ["text_to_image"]
         elif model_type == "audio":
             return ["audio_to_text"]
+        elif model_type == "flexible":
+            return ["flexible"]
         else:
             assert model_type == "LLM"
             assert isinstance(model, LLM)
@@ -587,6 +742,7 @@ class WorkerActor(xo.StatelessActor):
         peft_model_config: Optional[PeftModelConfig] = None,
         request_limits: Optional[int] = None,
         gpu_idx: Optional[Union[int, List[int]]] = None,
+        download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
         **kwargs,
     ):
         # !!! Note that The following code must be placed at the very beginning of this function,
@@ -669,6 +825,7 @@ class WorkerActor(xo.StatelessActor):
                     model_size_in_billions,
                     quantization,
                     peft_model_config,
+                    download_hub,
                     **kwargs,
                 )
                 await self.update_cache_status(model_name, model_description)

xinference/deploy/cmdline.py CHANGED Viewed

@@ -370,6 +370,9 @@ def worker(
     help="Type of model to register (default is 'LLM').",
 )
 @click.option("--file", "-f", type=str, help="Path to the model configuration file.")
+@click.option(
+    "--worker-ip", "-w", type=str, help="Specify the ip address of the worker."
+)
 @click.option(
     "--persist",
     "-p",
@@ -387,6 +390,7 @@ def register_model(
     endpoint: Optional[str],
     model_type: str,
     file: str,
+    worker_ip: str,
     persist: bool,
     api_key: Optional[str],
 ):
@@ -400,6 +404,7 @@ def register_model(
     client.register_model(
         model_type=model_type,
         model=model,
+        worker_ip=worker_ip,
         persist=persist,
     )

xinference/model/audio/chattts.py CHANGED Viewed

@@ -38,21 +38,19 @@ class ChatTTSModel:
         self._kwargs = kwargs
     def load(self):
+        import ChatTTS
         import torch
-        from xinference.thirdparty import ChatTTS
         torch._dynamo.config.cache_size_limit = 64
         torch._dynamo.config.suppress_errors = True
         torch.set_float32_matmul_precision("high")
         self._model = ChatTTS.Chat()
-        self._model.load_models(
-            source="local", local_path=self._model_path, compile=True
-        )
+        self._model.load(source="custom", custom_path=self._model_path, compile=True)
     def speech(
         self, input: str, voice: str, response_format: str = "mp3", speed: float = 1.0
     ):
+        import ChatTTS
         import numpy as np
         import torch
         import torchaudio
@@ -71,7 +69,9 @@ class ChatTTSModel:
         default = 5
         infer_speed = int(default * speed)
-        params_infer_code = {"spk_emb": rnd_spk_emb, "prompt": f"[speed_{infer_speed}]"}
+        params_infer_code = ChatTTS.Chat.InferCodeParams(
+            prompt=f"[speed_{infer_speed}]", spk_emb=rnd_spk_emb
+        )
         assert self._model is not None
         wavs = self._model.infer([input], params_infer_code=params_infer_code)

xinference/model/audio/core.py CHANGED Viewed

@@ -14,7 +14,7 @@
 import logging
 import os
 from collections import defaultdict
-from typing import Dict, List, Optional, Tuple, Union
+from typing import Dict, List, Literal, Optional, Tuple, Union
 from ...constants import XINFERENCE_CACHE_DIR
 from ..core import CacheableModelSpec, ModelDescription
@@ -94,7 +94,10 @@ def generate_audio_description(
     return res
-def match_audio(model_name: str) -> AudioModelFamilyV1:
+def match_audio(
+    model_name: str,
+    download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
+) -> AudioModelFamilyV1:
     from ..utils import download_from_modelscope
     from . import BUILTIN_AUDIO_MODELS, MODELSCOPE_AUDIO_MODELS
     from .custom import get_user_defined_audios
@@ -103,17 +106,17 @@ def match_audio(model_name: str) -> AudioModelFamilyV1:
         if model_spec.model_name == model_name:
             return model_spec
-    if download_from_modelscope():
-        if model_name in MODELSCOPE_AUDIO_MODELS:
-            logger.debug(f"Audio model {model_name} found in ModelScope.")
-            return MODELSCOPE_AUDIO_MODELS[model_name]
-        else:
-            logger.debug(
-                f"Audio model {model_name} not found in ModelScope, "
-                f"now try to load it via builtin way."
-            )
-    if model_name in BUILTIN_AUDIO_MODELS:
+    if download_hub == "huggingface" and model_name in BUILTIN_AUDIO_MODELS:
+        logger.debug(f"Audio model {model_name} found in huggingface.")
+        return BUILTIN_AUDIO_MODELS[model_name]
+    elif download_hub == "modelscope" and model_name in MODELSCOPE_AUDIO_MODELS:
+        logger.debug(f"Audio model {model_name} found in ModelScope.")
+        return MODELSCOPE_AUDIO_MODELS[model_name]
+    elif download_from_modelscope() and model_name in MODELSCOPE_AUDIO_MODELS:
+        logger.debug(f"Audio model {model_name} found in ModelScope.")
+        return MODELSCOPE_AUDIO_MODELS[model_name]
+    elif model_name in BUILTIN_AUDIO_MODELS:
+        logger.debug(f"Audio model {model_name} found in huggingface.")
         return BUILTIN_AUDIO_MODELS[model_name]
     else:
         raise ValueError(
@@ -141,9 +144,14 @@ def get_cache_status(
 def create_audio_model_instance(
-    subpool_addr: str, devices: List[str], model_uid: str, model_name: str, **kwargs
+    subpool_addr: str,
+    devices: List[str],
+    model_uid: str,
+    model_name: str,
+    download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
+    **kwargs,
 ) -> Tuple[Union[WhisperModel, ChatTTSModel], AudioModelDescription]:
-    model_spec = match_audio(model_name)
+    model_spec = match_audio(model_name, download_hub)
     model_path = cache(model_spec)
     model: Union[WhisperModel, ChatTTSModel]
     if model_spec.model_family == "whisper":

xinference/model/core.py CHANGED Viewed

@@ -13,7 +13,7 @@
 # limitations under the License.
 from abc import ABC, abstractmethod
-from typing import Any, List, Optional, Tuple, Union
+from typing import Any, List, Literal, Optional, Tuple, Union
 from .._compat import BaseModel
 from ..types import PeftModelConfig
@@ -55,10 +55,12 @@ def create_model_instance(
     model_size_in_billions: Optional[Union[int, str]] = None,
     quantization: Optional[str] = None,
     peft_model_config: Optional[PeftModelConfig] = None,
+    download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
     **kwargs,
 ) -> Tuple[Any, ModelDescription]:
     from .audio.core import create_audio_model_instance
     from .embedding.core import create_embedding_model_instance
+    from .flexible.core import create_flexible_model_instance
     from .image.core import create_image_model_instance
     from .llm.core import create_llm_model_instance
     from .rerank.core import create_rerank_model_instance
@@ -74,13 +76,14 @@ def create_model_instance(
             model_size_in_billions,
             quantization,
             peft_model_config,
+            download_hub,
             **kwargs,
         )
     elif model_type == "embedding":
         # embedding model doesn't accept trust_remote_code
         kwargs.pop("trust_remote_code", None)
         return create_embedding_model_instance(
-            subpool_addr, devices, model_uid, model_name, **kwargs
+            subpool_addr, devices, model_uid, model_name, download_hub, **kwargs
         )
     elif model_type == "image":
         kwargs.pop("trust_remote_code", None)
@@ -90,16 +93,22 @@ def create_model_instance(
             model_uid,
             model_name,
             peft_model_config,
+            download_hub,
             **kwargs,
         )
     elif model_type == "rerank":
         kwargs.pop("trust_remote_code", None)
         return create_rerank_model_instance(
-            subpool_addr, devices, model_uid, model_name, **kwargs
+            subpool_addr, devices, model_uid, model_name, download_hub, **kwargs
         )
     elif model_type == "audio":
         kwargs.pop("trust_remote_code", None)
         return create_audio_model_instance(
+            subpool_addr, devices, model_uid, model_name, download_hub, **kwargs
+        )
+    elif model_type == "flexible":
+        kwargs.pop("trust_remote_code", None)
+        return create_flexible_model_instance(
             subpool_addr, devices, model_uid, model_name, **kwargs
         )
     else:

xinference/model/embedding/core.py CHANGED Viewed

@@ -16,7 +16,7 @@ import gc
 import logging
 import os
 from collections import defaultdict
-from typing import Dict, List, Optional, Tuple, Union, no_type_check
+from typing import Dict, List, Literal, Optional, Tuple, Union, no_type_check
 import numpy as np
@@ -305,7 +305,10 @@ class EmbeddingModel:
         )
-def match_embedding(model_name: str) -> EmbeddingModelSpec:
+def match_embedding(
+    model_name: str,
+    download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
+) -> EmbeddingModelSpec:
     from ..utils import download_from_modelscope
     from . import BUILTIN_EMBEDDING_MODELS, MODELSCOPE_EMBEDDING_MODELS
     from .custom import get_user_defined_embeddings
@@ -315,29 +318,35 @@ def match_embedding(model_name: str) -> EmbeddingModelSpec:
         if model_name == model_spec.model_name:
             return model_spec
-    if download_from_modelscope():
-        if model_name in MODELSCOPE_EMBEDDING_MODELS:
-            logger.debug(f"Embedding model {model_name} found in ModelScope.")
-            return MODELSCOPE_EMBEDDING_MODELS[model_name]
-        else:
-            logger.debug(
-                f"Embedding model {model_name} not found in ModelScope, "
-                f"now try to load it via builtin way."
-            )
-    if model_name in BUILTIN_EMBEDDING_MODELS:
+    if download_hub == "modelscope" and model_name in MODELSCOPE_EMBEDDING_MODELS:
+        logger.debug(f"Embedding model {model_name} found in ModelScope.")
+        return MODELSCOPE_EMBEDDING_MODELS[model_name]
+    elif download_hub == "huggingface" and model_name in BUILTIN_EMBEDDING_MODELS:
+        logger.debug(f"Embedding model {model_name} found in Huggingface.")
+        return BUILTIN_EMBEDDING_MODELS[model_name]
+    elif download_from_modelscope() and model_name in MODELSCOPE_EMBEDDING_MODELS:
+        logger.debug(f"Embedding model {model_name} found in ModelScope.")
+        return MODELSCOPE_EMBEDDING_MODELS[model_name]
+    elif model_name in BUILTIN_EMBEDDING_MODELS:
+        logger.debug(f"Embedding model {model_name} found in Huggingface.")
         return BUILTIN_EMBEDDING_MODELS[model_name]
     else:
         raise ValueError(
             f"Embedding model {model_name} not found, available"
-            f"model list: {BUILTIN_EMBEDDING_MODELS.keys()}"
+            f"Huggingface: {BUILTIN_EMBEDDING_MODELS.keys()}"
+            f"ModelScope: {MODELSCOPE_EMBEDDING_MODELS.keys()}"
         )
 def create_embedding_model_instance(
-    subpool_addr: str, devices: List[str], model_uid: str, model_name: str, **kwargs
+    subpool_addr: str,
+    devices: List[str],
+    model_uid: str,
+    model_name: str,
+    download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
+    **kwargs,
 ) -> Tuple[EmbeddingModel, EmbeddingModelDescription]:
-    model_spec = match_embedding(model_name)
+    model_spec = match_embedding(model_name, download_hub)
     model_path = cache(model_spec)
     model = EmbeddingModel(model_uid, model_path, **kwargs)
     model_description = EmbeddingModelDescription(

xinference/model/flexible/__init__.py ADDED Viewed

@@ -0,0 +1,40 @@
+# Copyright 2022-2024 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import codecs
+import json
+import os
+from ...constants import XINFERENCE_MODEL_DIR
+from .core import (
+    FLEXIBLE_MODEL_DESCRIPTIONS,
+    FlexibleModel,
+    FlexibleModelSpec,
+    generate_flexible_model_description,
+    get_flexible_model_descriptions,
+    get_flexible_models,
+    register_flexible_model,
+    unregister_flexible_model,
+)
+model_dir = os.path.join(XINFERENCE_MODEL_DIR, "flexible")
+if os.path.isdir(model_dir):
+    for f in os.listdir(model_dir):
+        with codecs.open(os.path.join(model_dir, f), encoding="utf-8") as fd:
+            model_spec = FlexibleModelSpec.parse_obj(json.load(fd))
+            register_flexible_model(model_spec, persist=False)
+# register model description
+for model in get_flexible_models():
+    FLEXIBLE_MODEL_DESCRIPTIONS.update(generate_flexible_model_description(model))

xinference 0.13.0__py3-none-any.whl → 0.13.1__py3-none-any.whl

Potentially problematic release.

xinference 0.13.0py3-none-any.whl → 0.13.1py3-none-any.whl