PyPI - xinference - Versions diffs - 1.7.1.post1__py3-none-any.whl → 1.8.0__py3-none-any.whl - Mend

xinference 1.7.1.post1py3-none-any.whl → 1.8.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (136) hide show

xinference/_version.py +3 -3
xinference/client/restful/async_restful_client.py +8 -13
xinference/client/restful/restful_client.py +6 -2
xinference/core/chat_interface.py +6 -4
xinference/core/media_interface.py +5 -0
xinference/core/model.py +1 -5
xinference/core/supervisor.py +117 -68
xinference/core/worker.py +49 -37
xinference/deploy/test/test_cmdline.py +2 -6
xinference/model/audio/__init__.py +26 -23
xinference/model/audio/chattts.py +3 -2
xinference/model/audio/core.py +49 -98
xinference/model/audio/cosyvoice.py +3 -2
xinference/model/audio/custom.py +28 -73
xinference/model/audio/f5tts.py +3 -2
xinference/model/audio/f5tts_mlx.py +3 -2
xinference/model/audio/fish_speech.py +3 -2
xinference/model/audio/funasr.py +17 -4
xinference/model/audio/kokoro.py +3 -2
xinference/model/audio/megatts.py +3 -2
xinference/model/audio/melotts.py +3 -2
xinference/model/audio/model_spec.json +572 -171
xinference/model/audio/utils.py +0 -6
xinference/model/audio/whisper.py +3 -2
xinference/model/audio/whisper_mlx.py +3 -2
xinference/model/cache_manager.py +141 -0
xinference/model/core.py +6 -49
xinference/model/custom.py +174 -0
xinference/model/embedding/__init__.py +67 -56
xinference/model/embedding/cache_manager.py +35 -0
xinference/model/embedding/core.py +104 -84
xinference/model/embedding/custom.py +55 -78
xinference/model/embedding/embed_family.py +80 -31
xinference/model/embedding/flag/core.py +21 -5
xinference/model/embedding/llama_cpp/__init__.py +0 -0
xinference/model/embedding/llama_cpp/core.py +234 -0
xinference/model/embedding/model_spec.json +968 -103
xinference/model/embedding/sentence_transformers/core.py +30 -20
xinference/model/embedding/vllm/core.py +11 -5
xinference/model/flexible/__init__.py +8 -2
xinference/model/flexible/core.py +26 -119
xinference/model/flexible/custom.py +69 -0
xinference/model/flexible/launchers/image_process_launcher.py +1 -0
xinference/model/flexible/launchers/modelscope_launcher.py +5 -1
xinference/model/flexible/launchers/transformers_launcher.py +15 -3
xinference/model/flexible/launchers/yolo_launcher.py +5 -1
xinference/model/image/__init__.py +20 -20
xinference/model/image/cache_manager.py +62 -0
xinference/model/image/core.py +70 -182
xinference/model/image/custom.py +28 -72
xinference/model/image/model_spec.json +402 -119
xinference/model/image/ocr/got_ocr2.py +3 -2
xinference/model/image/stable_diffusion/core.py +22 -7
xinference/model/image/stable_diffusion/mlx.py +6 -6
xinference/model/image/utils.py +2 -2
xinference/model/llm/__init__.py +71 -94
xinference/model/llm/cache_manager.py +292 -0
xinference/model/llm/core.py +37 -111
xinference/model/llm/custom.py +88 -0
xinference/model/llm/llama_cpp/core.py +5 -7
xinference/model/llm/llm_family.json +16260 -8151
xinference/model/llm/llm_family.py +138 -839
xinference/model/llm/lmdeploy/core.py +5 -7
xinference/model/llm/memory.py +3 -4
xinference/model/llm/mlx/core.py +6 -8
xinference/model/llm/reasoning_parser.py +3 -1
xinference/model/llm/sglang/core.py +32 -14
xinference/model/llm/transformers/chatglm.py +3 -7
xinference/model/llm/transformers/core.py +49 -27
xinference/model/llm/transformers/deepseek_v2.py +2 -2
xinference/model/llm/transformers/gemma3.py +2 -2
xinference/model/llm/transformers/multimodal/cogagent.py +2 -2
xinference/model/llm/transformers/multimodal/deepseek_vl2.py +2 -2
xinference/model/llm/transformers/multimodal/gemma3.py +2 -2
xinference/model/llm/transformers/multimodal/glm4_1v.py +167 -0
xinference/model/llm/transformers/multimodal/glm4v.py +2 -2
xinference/model/llm/transformers/multimodal/intern_vl.py +2 -2
xinference/model/llm/transformers/multimodal/minicpmv26.py +3 -3
xinference/model/llm/transformers/multimodal/ovis2.py +2 -2
xinference/model/llm/transformers/multimodal/qwen-omni.py +2 -2
xinference/model/llm/transformers/multimodal/qwen2_audio.py +2 -2
xinference/model/llm/transformers/multimodal/qwen2_vl.py +2 -2
xinference/model/llm/transformers/opt.py +3 -7
xinference/model/llm/utils.py +34 -49
xinference/model/llm/vllm/core.py +77 -27
xinference/model/llm/vllm/xavier/engine.py +5 -3
xinference/model/llm/vllm/xavier/scheduler.py +10 -6
xinference/model/llm/vllm/xavier/transfer.py +1 -1
xinference/model/rerank/__init__.py +26 -25
xinference/model/rerank/core.py +47 -87
xinference/model/rerank/custom.py +25 -71
xinference/model/rerank/model_spec.json +158 -33
xinference/model/rerank/utils.py +2 -2
xinference/model/utils.py +115 -54
xinference/model/video/__init__.py +13 -17
xinference/model/video/core.py +44 -102
xinference/model/video/diffusers.py +4 -3
xinference/model/video/model_spec.json +90 -21
xinference/types.py +5 -3
xinference/web/ui/build/asset-manifest.json +3 -3
xinference/web/ui/build/index.html +1 -1
xinference/web/ui/build/static/js/main.7d24df53.js +3 -0
xinference/web/ui/build/static/js/main.7d24df53.js.map +1 -0
xinference/web/ui/node_modules/.cache/babel-loader/2704ff66a5f73ca78b341eb3edec60154369df9d87fbc8c6dd60121abc5e1b0a.json +1 -0
xinference/web/ui/node_modules/.cache/babel-loader/607dfef23d33e6b594518c0c6434567639f24f356b877c80c60575184ec50ed0.json +1 -0
xinference/web/ui/node_modules/.cache/babel-loader/9be3d56173aacc3efd0b497bcb13c4f6365de30069176ee9403b40e717542326.json +1 -0
xinference/web/ui/node_modules/.cache/babel-loader/9f9dd6c32c78a222d07da5987ae902effe16bcf20aac00774acdccc4de3c9ff2.json +1 -0
xinference/web/ui/node_modules/.cache/babel-loader/b2ab5ee972c60d15eb9abf5845705f8ab7e1d125d324d9a9b1bcae5d6fd7ffb2.json +1 -0
xinference/web/ui/src/locales/en.json +0 -1
xinference/web/ui/src/locales/ja.json +0 -1
xinference/web/ui/src/locales/ko.json +0 -1
xinference/web/ui/src/locales/zh.json +0 -1
{xinference-1.7.1.post1.dist-info → xinference-1.8.0.dist-info}/METADATA +9 -11
{xinference-1.7.1.post1.dist-info → xinference-1.8.0.dist-info}/RECORD +119 -119
xinference/model/audio/model_spec_modelscope.json +0 -231
xinference/model/embedding/model_spec_modelscope.json +0 -293
xinference/model/embedding/utils.py +0 -18
xinference/model/image/model_spec_modelscope.json +0 -375
xinference/model/llm/llama_cpp/memory.py +0 -457
xinference/model/llm/llm_family_csghub.json +0 -56
xinference/model/llm/llm_family_modelscope.json +0 -8700
xinference/model/llm/llm_family_openmind_hub.json +0 -1019
xinference/model/rerank/model_spec_modelscope.json +0 -85
xinference/model/video/model_spec_modelscope.json +0 -184
xinference/web/ui/build/static/js/main.9b12b7f9.js +0 -3
xinference/web/ui/build/static/js/main.9b12b7f9.js.map +0 -1
xinference/web/ui/node_modules/.cache/babel-loader/1460361af6975e63576708039f1cb732faf9c672d97c494d4055fc6331460be0.json +0 -1
xinference/web/ui/node_modules/.cache/babel-loader/4efd8dda58fda83ed9546bf2f587df67f8d98e639117bee2d9326a9a1d9bebb2.json +0 -1
xinference/web/ui/node_modules/.cache/babel-loader/55b9fb40b57fa926e8f05f31c2f96467e76e5ad62f033dca97c03f9e8c4eb4fe.json +0 -1
xinference/web/ui/node_modules/.cache/babel-loader/5b2dafe5aa9e1105e0244a2b6751807342fa86aa0144b4e84d947a1686102715.json +0 -1
xinference/web/ui/node_modules/.cache/babel-loader/611fa2c6c53b66039991d06dfb0473b5ab37fc63b4564e0f6e1718523768a045.json +0 -1
/xinference/web/ui/build/static/js/{main.9b12b7f9.js.LICENSE.txt → main.7d24df53.js.LICENSE.txt} +0 -0
{xinference-1.7.1.post1.dist-info → xinference-1.8.0.dist-info}/WHEEL +0 -0
{xinference-1.7.1.post1.dist-info → xinference-1.8.0.dist-info}/entry_points.txt +0 -0
{xinference-1.7.1.post1.dist-info → xinference-1.8.0.dist-info}/licenses/LICENSE +0 -0
{xinference-1.7.1.post1.dist-info → xinference-1.8.0.dist-info}/top_level.txt +0 -0

xinference/model/embedding/__init__.py CHANGED Viewed

@@ -18,16 +18,15 @@ import os
 import warnings
 from typing import Any, Dict, List
+from ..utils import flatten_quantizations
 from .core import (
     EMBEDDING_MODEL_DESCRIPTIONS,
-    MODEL_NAME_TO_REVISION,
-    EmbeddingModelSpec,
+    EmbeddingModelFamilyV2,
     generate_embedding_description,
-    get_cache_status,
     get_embedding_model_descriptions,
 )
 from .custom import (
-    CustomEmbeddingModelSpec,
+    CustomEmbeddingModelFamilyV2,
     get_user_defined_embeddings,
     register_embedding,
     unregister_embedding,
@@ -36,7 +35,7 @@ from .embed_family import (
     BUILTIN_EMBEDDING_MODELS,
     EMBEDDING_ENGINES,
     FLAG_EMBEDDER_CLASSES,
-    MODELSCOPE_EMBEDDING_MODELS,
+    LLAMA_CPP_CLASSES,
     SENTENCE_TRANSFORMER_CLASSES,
     SUPPORTED_ENGINES,
     VLLM_CLASSES,
@@ -45,15 +44,19 @@ from .embed_family import (
 def register_custom_model():
     from ...constants import XINFERENCE_MODEL_DIR
+    from ..custom import migrate_from_v1_to_v2
-    user_defined_embedding_dir = os.path.join(XINFERENCE_MODEL_DIR, "embedding")
+    # migrate from v1 to v2 first
+    migrate_from_v1_to_v2("embedding", CustomEmbeddingModelFamilyV2)
+    user_defined_embedding_dir = os.path.join(XINFERENCE_MODEL_DIR, "v2", "embedding")
     if os.path.isdir(user_defined_embedding_dir):
         for f in os.listdir(user_defined_embedding_dir):
             try:
                 with codecs.open(
                     os.path.join(user_defined_embedding_dir, f), encoding="utf-8"
                 ) as fd:
-                    user_defined_llm_family = CustomEmbeddingModelSpec.parse_obj(
+                    user_defined_llm_family = CustomEmbeddingModelFamilyV2.parse_obj(
                         json.load(fd)
                     )
                     register_embedding(user_defined_llm_family, persist=False)
@@ -61,80 +64,89 @@ def register_custom_model():
                 warnings.warn(f"{user_defined_embedding_dir}/{f} has error, {e}")
-def generate_engine_config_by_model_name(model_spec: "EmbeddingModelSpec"):
-    model_name = model_spec.model_name
+def check_format_with_engine(model_format, engine):
+    if model_format in ["ggufv2"] and engine not in ["llama.cpp"]:
+        return False
+    if model_format not in ["ggufv2"] and engine == "llama.cpp":
+        return False
+    return True
+def generate_engine_config_by_model_name(model_family: "EmbeddingModelFamilyV2"):
+    model_name = model_family.model_name
     engines: Dict[str, List[Dict[str, Any]]] = EMBEDDING_ENGINES.get(
         model_name, {}
     )  # structure for engine query
-    for engine in SUPPORTED_ENGINES:
-        CLASSES = SUPPORTED_ENGINES[engine]
-        for cls in CLASSES:
-            # Every engine needs to implement match method
-            if cls.match(model_spec):
-                # we only match the first class for an engine
-                engines[engine] = [
-                    {
-                        "model_name": model_name,
-                        "embedding_class": cls,
-                    }
-                ]
-                break
+    for spec in [x for x in model_family.model_specs if x.model_hub == "huggingface"]:
+        model_format = spec.model_format
+        quantization = spec.quantization
+        for engine in SUPPORTED_ENGINES:
+            if not check_format_with_engine(model_format, engine):
+                continue
+            CLASSES = SUPPORTED_ENGINES[engine]
+            for cls in CLASSES:
+                # Every engine needs to implement match method
+                if cls.match(model_family, spec, quantization):
+                    # we only match the first class for an engine
+                    if engine not in engines:
+                        engines[engine] = [
+                            {
+                                "model_name": model_name,
+                                "model_format": model_format,
+                                "quantization": quantization,
+                                "embedding_class": cls,
+                            }
+                        ]
+                    else:
+                        engines[engine].append(
+                            {
+                                "model_name": model_name,
+                                "model_format": model_format,
+                                "quantization": quantization,
+                                "embedding_class": cls,
+                            }
+                        )
+                    break
     EMBEDDING_ENGINES[model_name] = engines
 # will be called in xinference/model/__init__.py
 def _install():
     _model_spec_json = os.path.join(os.path.dirname(__file__), "model_spec.json")
-    _model_spec_modelscope_json = os.path.join(
-        os.path.dirname(__file__), "model_spec_modelscope.json"
-    )
-    ################### HuggingFace Model List Info Init ###################
-    BUILTIN_EMBEDDING_MODELS.update(
-        dict(
-            (spec["model_name"], EmbeddingModelSpec(**spec))
-            for spec in json.load(codecs.open(_model_spec_json, "r", encoding="utf-8"))
+    for json_obj in json.load(codecs.open(_model_spec_json, "r", encoding="utf-8")):
+        flattened = []
+        for spec in json_obj["model_specs"]:
+            flattened.extend(flatten_quantizations(spec))
+        json_obj["model_specs"] = flattened
+        BUILTIN_EMBEDDING_MODELS[json_obj["model_name"]] = EmbeddingModelFamilyV2(
+            **json_obj
         )
-    )
     for model_name, model_spec in BUILTIN_EMBEDDING_MODELS.items():
-        MODEL_NAME_TO_REVISION[model_name].append(model_spec.model_revision)
-    ################### ModelScope Model List Info Init ###################
-    MODELSCOPE_EMBEDDING_MODELS.update(
-        dict(
-            (spec["model_name"], EmbeddingModelSpec(**spec))
-            for spec in json.load(
-                codecs.open(_model_spec_modelscope_json, "r", encoding="utf-8")
+        if model_spec.model_name not in EMBEDDING_MODEL_DESCRIPTIONS:
+            EMBEDDING_MODEL_DESCRIPTIONS.update(
+                generate_embedding_description(model_spec)
             )
-        )
-    )
-    for model_name, model_spec in MODELSCOPE_EMBEDDING_MODELS.items():
-        MODEL_NAME_TO_REVISION[model_name].append(model_spec.model_revision)
-    # TODO: consider support more download hub in future...
-    # register model description after recording model revision
-    for model_spec_info in [BUILTIN_EMBEDDING_MODELS, MODELSCOPE_EMBEDDING_MODELS]:
-        for model_name, model_spec in model_spec_info.items():
-            if model_spec.model_name not in EMBEDDING_MODEL_DESCRIPTIONS:
-                EMBEDDING_MODEL_DESCRIPTIONS.update(
-                    generate_embedding_description(model_spec)
-                )
     from .flag.core import FlagEmbeddingModel
+    from .llama_cpp.core import XllamaCppEmbeddingModel
     from .sentence_transformers.core import SentenceTransformerEmbeddingModel
     from .vllm.core import VLLMEmbeddingModel
     SENTENCE_TRANSFORMER_CLASSES.extend([SentenceTransformerEmbeddingModel])
     FLAG_EMBEDDER_CLASSES.extend([FlagEmbeddingModel])
     VLLM_CLASSES.extend([VLLMEmbeddingModel])
+    LLAMA_CPP_CLASSES.extend([XllamaCppEmbeddingModel])
     SUPPORTED_ENGINES["sentence_transformers"] = SENTENCE_TRANSFORMER_CLASSES
     SUPPORTED_ENGINES["flag"] = FLAG_EMBEDDER_CLASSES
     SUPPORTED_ENGINES["vllm"] = VLLM_CLASSES
+    SUPPORTED_ENGINES["llama.cpp"] = LLAMA_CPP_CLASSES
     # Init embedding engine
-    for model_infos in [BUILTIN_EMBEDDING_MODELS, MODELSCOPE_EMBEDDING_MODELS]:
-        for model_spec in model_infos.values():
-            generate_engine_config_by_model_name(model_spec)
+    for model_spec in BUILTIN_EMBEDDING_MODELS.values():
+        generate_engine_config_by_model_name(model_spec)
     register_custom_model()
@@ -145,4 +157,3 @@ def _install():
         )
     del _model_spec_json
-    del _model_spec_modelscope_json

xinference/model/embedding/cache_manager.py ADDED Viewed

@@ -0,0 +1,35 @@
+import os
+from typing import TYPE_CHECKING
+from ..cache_manager import CacheManager
+if TYPE_CHECKING:
+    from .core import EmbeddingModelFamilyV2
+class EmbeddingCacheManager(CacheManager):
+    def __init__(self, model_family: "EmbeddingModelFamilyV2"):
+        from ..llm.cache_manager import LLMCacheManager
+        super().__init__(model_family)
+        # Composition design mode for avoiding duplicate code
+        self.cache_helper = LLMCacheManager(model_family)
+        spec = self._model_family.model_specs[0]
+        model_dir_name = (
+            f"{self._model_family.model_name}-{spec.model_format}-{spec.quantization}"
+        )
+        self._cache_dir = os.path.join(self._v2_cache_dir_prefix, model_dir_name)
+        self.cache_helper._cache_dir = self._cache_dir
+    def cache(self) -> str:
+        spec = self._model_family.model_specs[0]
+        if spec.model_uri is not None:
+            return self.cache_helper.cache_uri()
+        else:
+            if spec.model_hub == "huggingface":
+                return self.cache_helper.cache_from_huggingface()
+            elif spec.model_hub == "modelscope":
+                return self.cache_helper.cache_from_modelscope()
+            else:
+                raise ValueError(f"Unknown model hub: {spec.model_hub}")

xinference/model/embedding/core.py CHANGED Viewed

@@ -12,23 +12,24 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import abc
 import gc
 import logging
 import os
+from abc import abstractmethod
 from collections import defaultdict
-from typing import Dict, List, Literal, Optional, Tuple, Union
+from typing import Annotated, Dict, List, Literal, Optional, Union
-from ..._compat import ROOT_KEY, ErrorWrapper, ValidationError
+from ..._compat import ROOT_KEY, BaseModel, ErrorWrapper, Field, ValidationError
 from ...device_utils import empty_cache
-from ..core import CacheableModelSpec, ModelDescription, VirtualEnvSettings
-from ..utils import get_cache_dir, is_model_cached
+from ..core import VirtualEnvSettings
+from ..utils import ModelInstanceInfoMixin
 from .embed_family import match_embedding
 logger = logging.getLogger(__name__)
 # Used for check whether the model is cached.
 # Init when registering all the builtin models.
-MODEL_NAME_TO_REVISION: Dict[str, List[str]] = defaultdict(list)
 EMBEDDING_MODEL_DESCRIPTIONS: Dict[str, List[Dict]] = defaultdict(list)
 EMBEDDING_EMPTY_CACHE_COUNT = int(
     os.getenv("XINFERENCE_EMBEDDING_EMPTY_CACHE_COUNT", "10")
@@ -46,96 +47,100 @@ def get_embedding_model_descriptions():
     return copy.deepcopy(EMBEDDING_MODEL_DESCRIPTIONS)
+class TransformersEmbeddingSpecV1(BaseModel):
+    model_format: Literal["pytorch"]
+    model_hub: str = "huggingface"
+    model_id: Optional[str]
+    model_uri: Optional[str]
+    model_revision: Optional[str]
+    quantization: str
+class LlamaCppEmbeddingSpecV1(BaseModel):
+    model_format: Literal["ggufv2"]
+    model_hub: str = "huggingface"
+    model_id: Optional[str]
+    model_uri: Optional[str]
+    model_revision: Optional[str]
+    quantization: str
+    model_file_name_template: str
+    model_file_name_split_template: Optional[str]
+    quantization_parts: Optional[Dict[str, List[str]]]
+EmbeddingSpecV1 = Annotated[
+    Union[TransformersEmbeddingSpecV1, LlamaCppEmbeddingSpecV1],
+    Field(discriminator="model_format"),
+]
 # this class define the basic info of embedding model
-class EmbeddingModelSpec(CacheableModelSpec):
+class EmbeddingModelFamilyV2(BaseModel, ModelInstanceInfoMixin):
+    version: Literal[2]
     model_name: str
     dimensions: int
     max_tokens: int
     language: List[str]
-    model_id: str
-    model_revision: Optional[str]
-    model_hub: str = "huggingface"
+    model_specs: List["EmbeddingSpecV1"]
+    cache_config: Optional[dict]
     virtualenv: Optional[VirtualEnvSettings]
+    class Config:
+        extra = "allow"
-class EmbeddingModelDescription(ModelDescription):
-    def __init__(
-        self,
-        address: Optional[str],
-        devices: Optional[List[str]],
-        model_spec: EmbeddingModelSpec,
-        model_path: Optional[str] = None,
-    ):
-        super().__init__(address, devices, model_path=model_path)
-        self._model_spec = model_spec
-    @property
-    def spec(self):
-        return self._model_spec
-    def to_dict(self):
+    def to_description(self):
+        spec = self.model_specs[0]
         return {
             "model_type": "embedding",
-            "address": self.address,
-            "accelerators": self.devices,
-            "model_name": self._model_spec.model_name,
-            "dimensions": self._model_spec.dimensions,
-            "max_tokens": self._model_spec.max_tokens,
-            "language": self._model_spec.language,
-            "model_revision": self._model_spec.model_revision,
+            "address": getattr(self, "address", None),
+            "accelerators": getattr(self, "accelerators", None),
+            "model_name": self.model_name,
+            "dimensions": self.dimensions,
+            "max_tokens": self.max_tokens,
+            "language": self.language,
+            "model_hub": spec.model_hub,
+            "model_revision": spec.model_revision,
+            "quantization": spec.quantization,
         }
     def to_version_info(self):
-        from .utils import get_model_version
+        from .cache_manager import EmbeddingCacheManager
-        if self._model_path is None:
-            is_cached = get_cache_status(self._model_spec)
-            file_location = get_cache_dir(self._model_spec)
-        else:
-            is_cached = True
-            file_location = self._model_path
+        cache_manager = EmbeddingCacheManager(self)
         return {
-            "model_version": get_model_version(self._model_spec),
-            "model_file_location": file_location,
-            "cache_status": is_cached,
-            "dimensions": self._model_spec.dimensions,
-            "max_tokens": self._model_spec.max_tokens,
+            "model_version": get_model_version(self),
+            "model_file_location": cache_manager.get_cache_dir(),
+            "cache_status": cache_manager.get_cache_status(),
+            "dimensions": self.dimensions,
+            "max_tokens": self.max_tokens,
         }
+def get_model_version(embedding_model: EmbeddingModelFamilyV2) -> str:
+    spec = embedding_model.model_specs[0]
+    return f"{embedding_model.model_name}--{embedding_model.max_tokens}--{embedding_model.dimensions}--{spec.model_format}--{spec.quantization}"
 def generate_embedding_description(
-    model_spec: EmbeddingModelSpec,
+    model_family: EmbeddingModelFamilyV2,
 ) -> Dict[str, List[Dict]]:
     res = defaultdict(list)
-    res[model_spec.model_name].append(
-        EmbeddingModelDescription(None, None, model_spec).to_version_info()
-    )
+    specs = [x for x in model_family.model_specs if x.model_hub == "huggingface"]
+    for spec in specs:
+        family = model_family.copy()
+        family.model_specs = [spec]
+        res[model_family.model_name].append(family.to_version_info())
     return res
-def cache(model_spec: EmbeddingModelSpec):
-    from ..utils import cache
-    return cache(model_spec, EmbeddingModelDescription)
-def get_cache_status(
-    model_spec: EmbeddingModelSpec,
-) -> bool:
-    return is_model_cached(model_spec, MODEL_NAME_TO_REVISION)
-import abc
-from abc import abstractmethod
 class EmbeddingModel(abc.ABC):
     def __init__(
         self,
         model_uid: str,
         model_path: str,
-        model_spec: EmbeddingModelSpec,
+        model_family: EmbeddingModelFamilyV2,
+        quantization: Optional[str] = None,
         device: Optional[str] = None,
         **kwargs,
     ):
@@ -145,8 +150,10 @@ class EmbeddingModel(abc.ABC):
         self._model = None
         self._tokenizer = None
         self._counter = 0
-        self._model_spec = model_spec
-        self._model_name = self._model_spec.model_name
+        self.model_family = model_family
+        self._model_spec = model_family.model_specs[0]
+        self._quantization = quantization
+        self._model_name = self.model_family.model_name
         self._kwargs = kwargs
     @classmethod
@@ -156,17 +163,27 @@ class EmbeddingModel(abc.ABC):
     @classmethod
     @abstractmethod
-    def match_json(cls, model_spec: EmbeddingModelSpec) -> bool:
+    def match_json(
+        cls,
+        model_family: EmbeddingModelFamilyV2,
+        model_spec: EmbeddingSpecV1,
+        quantization: str,
+    ) -> bool:
         pass
     @classmethod
-    def match(cls, model_spec: EmbeddingModelSpec):
+    def match(
+        cls,
+        model_family: EmbeddingModelFamilyV2,
+        model_spec: EmbeddingSpecV1,
+        quantization: str,
+    ):
         """
         Return if the model_spec can be matched.
         """
         if not cls.check_lib():
             return False
-        return cls.match_json(model_spec)
+        return cls.match_json(model_family, model_spec, quantization)
     @abstractmethod
     def load(self):
@@ -290,36 +307,39 @@ class EmbeddingModel(abc.ABC):
 def create_embedding_model_instance(
-    subpool_addr: str,
-    devices: Optional[List[str]],
     model_uid: str,
     model_name: str,
     model_engine: Optional[str],
+    model_format: Optional[str] = None,
+    quantization: Optional[str] = None,
     download_hub: Optional[
         Literal["huggingface", "modelscope", "openmind_hub", "csghub"]
     ] = None,
     model_path: Optional[str] = None,
     **kwargs,
-) -> Tuple[EmbeddingModel, EmbeddingModelDescription]:
-    model_spec = match_embedding(model_name, download_hub)
+) -> EmbeddingModel:
+    from .cache_manager import EmbeddingCacheManager
+    model_family = match_embedding(model_name, model_format, quantization, download_hub)
     if model_path is None:
-        model_path = cache(model_spec)
+        cache_manager = EmbeddingCacheManager(model_family)
+        model_path = cache_manager.cache()
     if model_engine is None:
-        # unlike LLM and for compatibility
+        # unlike LLM and for compatibility,
         # we use sentence_transformers as the default engine for all models
         model_engine = "sentence_transformers"
     from .embed_family import check_engine_by_model_name_and_engine
     embedding_cls = check_engine_by_model_name_and_engine(
-        model_name,
-        model_engine,
+        model_engine, model_name, model_format, quantization
     )
-    devices = devices or ["cpu"]
-    # model class should be one of flag, fastembed, sentence_transformers
-    model = embedding_cls(model_uid, model_path, model_spec, **kwargs)
-    model_description = EmbeddingModelDescription(
-        subpool_addr, devices, model_spec, model_path=model_path
+    model = embedding_cls(
+        model_uid,
+        model_path,
+        model_family,
+        quantization,
+        **kwargs,
     )
-    return model, model_description
+    return model

xinference/model/embedding/custom.py CHANGED Viewed

@@ -11,103 +11,80 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import logging
-import os
-from threading import Lock
-from typing import List, Optional
+from typing import List
-from ...constants import XINFERENCE_CACHE_DIR, XINFERENCE_MODEL_DIR
-from .core import EmbeddingModelSpec
+from ..._compat import Literal
+from ..custom import ModelRegistry
+from .core import EmbeddingModelFamilyV2
 logger = logging.getLogger(__name__)
-UD_EMBEDDING_LOCK = Lock()
+class CustomEmbeddingModelFamilyV2(EmbeddingModelFamilyV2):
+    version: Literal[2] = 2
-class CustomEmbeddingModelSpec(EmbeddingModelSpec):
-    model_id: Optional[str]  # type: ignore
-    model_revision: Optional[str]  # type: ignore
-    model_uri: Optional[str]
+UD_EMBEDDINGS: List[CustomEmbeddingModelFamilyV2] = []
-UD_EMBEDDINGS: List[CustomEmbeddingModelSpec] = []
+class EmbeddingModelRegistry(ModelRegistry):
+    model_type = "embedding"
+    def __init__(self):
+        from .embed_family import BUILTIN_EMBEDDING_MODELS
-def get_user_defined_embeddings() -> List[EmbeddingModelSpec]:
-    with UD_EMBEDDING_LOCK:
-        return UD_EMBEDDINGS.copy()
+        super().__init__()
+        self.models = UD_EMBEDDINGS
+        self.builtin_models = list(BUILTIN_EMBEDDING_MODELS.keys())
+    def add_ud_model(self, model_spec):
+        from . import generate_engine_config_by_model_name
-def register_embedding(model_spec: CustomEmbeddingModelSpec, persist: bool):
-    from ...constants import XINFERENCE_MODEL_DIR
-    from ..utils import is_valid_model_name, is_valid_model_uri
-    from . import (
-        BUILTIN_EMBEDDING_MODELS,
-        MODELSCOPE_EMBEDDING_MODELS,
-        generate_engine_config_by_model_name,
-    )
+        UD_EMBEDDINGS.append(model_spec)
+        generate_engine_config_by_model_name(model_spec)
-    if not is_valid_model_name(model_spec.model_name):
-        raise ValueError(f"Invalid model name {model_spec.model_name}.")
+    def check_model_uri(self, model_family: "EmbeddingModelFamilyV2"):
+        from ..utils import is_valid_model_uri
-    model_uri = model_spec.model_uri
-    if model_uri and not is_valid_model_uri(model_uri):
-        raise ValueError(f"Invalid model URI {model_uri}.")
+        for spec in model_family.model_specs:
+            model_uri = spec.model_uri
+            if model_uri and not is_valid_model_uri(model_uri):
+                raise ValueError(f"Invalid model URI {model_uri}.")
-    with UD_EMBEDDING_LOCK:
-        for model_name in (
-            list(BUILTIN_EMBEDDING_MODELS.keys())
-            + list(MODELSCOPE_EMBEDDING_MODELS.keys())
-            + [spec.model_name for spec in UD_EMBEDDINGS]
-        ):
-            if model_spec.model_name == model_name:
-                raise ValueError(
-                    f"Model name conflicts with existing model {model_spec.model_name}"
-                )
+    def remove_ud_model(self, model_family: "CustomEmbeddingModelFamilyV2"):
+        from .embed_family import EMBEDDING_ENGINES
-        UD_EMBEDDINGS.append(model_spec)
-        generate_engine_config_by_model_name(model_spec)
+        UD_EMBEDDINGS.remove(model_family)
+        del EMBEDDING_ENGINES[model_family.model_name]
+    def remove_ud_model_files(self, model_family: "CustomEmbeddingModelFamilyV2"):
+        from .cache_manager import EmbeddingCacheManager
+        _model_family = model_family.copy()
+        for spec in model_family.model_specs:
+            _model_family.model_specs = [spec]
+            cache_manager = EmbeddingCacheManager(_model_family)
+            cache_manager.unregister_custom_model(self.model_type)
-    if persist:
-        persist_path = os.path.join(
-            XINFERENCE_MODEL_DIR, "embedding", f"{model_spec.model_name}.json"
-        )
-        os.makedirs(os.path.dirname(persist_path), exist_ok=True)
-        with open(persist_path, mode="w") as fd:
-            fd.write(model_spec.json())
+def get_user_defined_embeddings() -> List[EmbeddingModelFamilyV2]:
+    from ..custom import RegistryManager
+    registry = RegistryManager.get_registry("embedding")
+    return registry.get_custom_models()
+def register_embedding(model_family: CustomEmbeddingModelFamilyV2, persist: bool):
+    from ..custom import RegistryManager
+    registry = RegistryManager.get_registry("embedding")
+    registry.register(model_family, persist)
 def unregister_embedding(model_name: str, raise_error: bool = True):
-    with UD_EMBEDDING_LOCK:
-        model_spec = None
-        for i, f in enumerate(UD_EMBEDDINGS):
-            if f.model_name == model_name:
-                model_spec = f
-                break
-        if model_spec:
-            UD_EMBEDDINGS.remove(model_spec)
-            persist_path = os.path.join(
-                XINFERENCE_MODEL_DIR, "embedding", f"{model_spec.model_name}.json"
-            )
-            if os.path.exists(persist_path):
-                os.remove(persist_path)
-            cache_dir = os.path.join(XINFERENCE_CACHE_DIR, model_spec.model_name)
-            if os.path.exists(cache_dir):
-                logger.warning(
-                    f"Remove the cache of user-defined model {model_spec.model_name}. "
-                    f"Cache directory: {cache_dir}"
-                )
-                if os.path.islink(cache_dir):
-                    os.remove(cache_dir)
-                else:
-                    logger.warning(
-                        f"Cache directory is not a soft link, please remove it manually."
-                    )
-        else:
-            if raise_error:
-                raise ValueError(f"Model {model_name} not found")
-            else:
-                logger.warning(f"Custom embedding model {model_name} not found")
+    from ..custom import RegistryManager
+    registry = RegistryManager.get_registry("embedding")
+    registry.unregister(model_name, raise_error)

xinference 1.7.1.post1__py3-none-any.whl → 1.8.0__py3-none-any.whl

Potentially problematic release.

xinference 1.7.1.post1py3-none-any.whl → 1.8.0py3-none-any.whl