PyPI - xinference - Versions diffs - 1.7.1__py3-none-any.whl → 1.8.0__py3-none-any.whl - Mend

xinference 1.7.1py3-none-any.whl → 1.8.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (136) hide show

xinference/_version.py +3 -3
xinference/client/restful/async_restful_client.py +8 -13
xinference/client/restful/restful_client.py +6 -2
xinference/core/chat_interface.py +6 -4
xinference/core/media_interface.py +5 -0
xinference/core/model.py +1 -5
xinference/core/supervisor.py +117 -68
xinference/core/worker.py +49 -37
xinference/deploy/test/test_cmdline.py +2 -6
xinference/model/audio/__init__.py +26 -23
xinference/model/audio/chattts.py +3 -2
xinference/model/audio/core.py +49 -98
xinference/model/audio/cosyvoice.py +3 -2
xinference/model/audio/custom.py +28 -73
xinference/model/audio/f5tts.py +3 -2
xinference/model/audio/f5tts_mlx.py +3 -2
xinference/model/audio/fish_speech.py +3 -2
xinference/model/audio/funasr.py +17 -4
xinference/model/audio/kokoro.py +3 -2
xinference/model/audio/megatts.py +3 -2
xinference/model/audio/melotts.py +3 -2
xinference/model/audio/model_spec.json +572 -171
xinference/model/audio/utils.py +0 -6
xinference/model/audio/whisper.py +3 -2
xinference/model/audio/whisper_mlx.py +3 -2
xinference/model/cache_manager.py +141 -0
xinference/model/core.py +6 -49
xinference/model/custom.py +174 -0
xinference/model/embedding/__init__.py +67 -56
xinference/model/embedding/cache_manager.py +35 -0
xinference/model/embedding/core.py +104 -84
xinference/model/embedding/custom.py +55 -78
xinference/model/embedding/embed_family.py +80 -31
xinference/model/embedding/flag/core.py +21 -5
xinference/model/embedding/llama_cpp/__init__.py +0 -0
xinference/model/embedding/llama_cpp/core.py +234 -0
xinference/model/embedding/model_spec.json +968 -103
xinference/model/embedding/sentence_transformers/core.py +30 -20
xinference/model/embedding/vllm/core.py +11 -5
xinference/model/flexible/__init__.py +8 -2
xinference/model/flexible/core.py +26 -119
xinference/model/flexible/custom.py +69 -0
xinference/model/flexible/launchers/image_process_launcher.py +1 -0
xinference/model/flexible/launchers/modelscope_launcher.py +5 -1
xinference/model/flexible/launchers/transformers_launcher.py +15 -3
xinference/model/flexible/launchers/yolo_launcher.py +5 -1
xinference/model/image/__init__.py +20 -20
xinference/model/image/cache_manager.py +62 -0
xinference/model/image/core.py +70 -182
xinference/model/image/custom.py +28 -72
xinference/model/image/model_spec.json +402 -119
xinference/model/image/ocr/got_ocr2.py +3 -2
xinference/model/image/stable_diffusion/core.py +22 -7
xinference/model/image/stable_diffusion/mlx.py +6 -6
xinference/model/image/utils.py +2 -2
xinference/model/llm/__init__.py +71 -94
xinference/model/llm/cache_manager.py +292 -0
xinference/model/llm/core.py +37 -111
xinference/model/llm/custom.py +88 -0
xinference/model/llm/llama_cpp/core.py +5 -7
xinference/model/llm/llm_family.json +16260 -8151
xinference/model/llm/llm_family.py +138 -839
xinference/model/llm/lmdeploy/core.py +5 -7
xinference/model/llm/memory.py +3 -4
xinference/model/llm/mlx/core.py +6 -8
xinference/model/llm/reasoning_parser.py +3 -1
xinference/model/llm/sglang/core.py +32 -14
xinference/model/llm/transformers/chatglm.py +3 -7
xinference/model/llm/transformers/core.py +49 -27
xinference/model/llm/transformers/deepseek_v2.py +2 -2
xinference/model/llm/transformers/gemma3.py +2 -2
xinference/model/llm/transformers/multimodal/cogagent.py +2 -2
xinference/model/llm/transformers/multimodal/deepseek_vl2.py +2 -2
xinference/model/llm/transformers/multimodal/gemma3.py +2 -2
xinference/model/llm/transformers/multimodal/glm4_1v.py +167 -0
xinference/model/llm/transformers/multimodal/glm4v.py +2 -2
xinference/model/llm/transformers/multimodal/intern_vl.py +2 -2
xinference/model/llm/transformers/multimodal/minicpmv26.py +3 -3
xinference/model/llm/transformers/multimodal/ovis2.py +2 -2
xinference/model/llm/transformers/multimodal/qwen-omni.py +2 -2
xinference/model/llm/transformers/multimodal/qwen2_audio.py +2 -2
xinference/model/llm/transformers/multimodal/qwen2_vl.py +2 -2
xinference/model/llm/transformers/opt.py +3 -7
xinference/model/llm/utils.py +34 -49
xinference/model/llm/vllm/core.py +77 -27
xinference/model/llm/vllm/xavier/engine.py +5 -3
xinference/model/llm/vllm/xavier/scheduler.py +10 -6
xinference/model/llm/vllm/xavier/transfer.py +1 -1
xinference/model/rerank/__init__.py +26 -25
xinference/model/rerank/core.py +47 -87
xinference/model/rerank/custom.py +25 -71
xinference/model/rerank/model_spec.json +158 -33
xinference/model/rerank/utils.py +2 -2
xinference/model/utils.py +115 -54
xinference/model/video/__init__.py +13 -17
xinference/model/video/core.py +44 -102
xinference/model/video/diffusers.py +4 -3
xinference/model/video/model_spec.json +90 -21
xinference/types.py +5 -3
xinference/web/ui/build/asset-manifest.json +3 -3
xinference/web/ui/build/index.html +1 -1
xinference/web/ui/build/static/js/main.7d24df53.js +3 -0
xinference/web/ui/build/static/js/main.7d24df53.js.map +1 -0
xinference/web/ui/node_modules/.cache/babel-loader/2704ff66a5f73ca78b341eb3edec60154369df9d87fbc8c6dd60121abc5e1b0a.json +1 -0
xinference/web/ui/node_modules/.cache/babel-loader/607dfef23d33e6b594518c0c6434567639f24f356b877c80c60575184ec50ed0.json +1 -0
xinference/web/ui/node_modules/.cache/babel-loader/9be3d56173aacc3efd0b497bcb13c4f6365de30069176ee9403b40e717542326.json +1 -0
xinference/web/ui/node_modules/.cache/babel-loader/9f9dd6c32c78a222d07da5987ae902effe16bcf20aac00774acdccc4de3c9ff2.json +1 -0
xinference/web/ui/node_modules/.cache/babel-loader/b2ab5ee972c60d15eb9abf5845705f8ab7e1d125d324d9a9b1bcae5d6fd7ffb2.json +1 -0
xinference/web/ui/src/locales/en.json +0 -1
xinference/web/ui/src/locales/ja.json +0 -1
xinference/web/ui/src/locales/ko.json +0 -1
xinference/web/ui/src/locales/zh.json +0 -1
{xinference-1.7.1.dist-info → xinference-1.8.0.dist-info}/METADATA +9 -11
{xinference-1.7.1.dist-info → xinference-1.8.0.dist-info}/RECORD +119 -119
xinference/model/audio/model_spec_modelscope.json +0 -231
xinference/model/embedding/model_spec_modelscope.json +0 -293
xinference/model/embedding/utils.py +0 -18
xinference/model/image/model_spec_modelscope.json +0 -375
xinference/model/llm/llama_cpp/memory.py +0 -457
xinference/model/llm/llm_family_csghub.json +0 -56
xinference/model/llm/llm_family_modelscope.json +0 -8700
xinference/model/llm/llm_family_openmind_hub.json +0 -1019
xinference/model/rerank/model_spec_modelscope.json +0 -85
xinference/model/video/model_spec_modelscope.json +0 -184
xinference/web/ui/build/static/js/main.9b12b7f9.js +0 -3
xinference/web/ui/build/static/js/main.9b12b7f9.js.map +0 -1
xinference/web/ui/node_modules/.cache/babel-loader/1460361af6975e63576708039f1cb732faf9c672d97c494d4055fc6331460be0.json +0 -1
xinference/web/ui/node_modules/.cache/babel-loader/4efd8dda58fda83ed9546bf2f587df67f8d98e639117bee2d9326a9a1d9bebb2.json +0 -1
xinference/web/ui/node_modules/.cache/babel-loader/55b9fb40b57fa926e8f05f31c2f96467e76e5ad62f033dca97c03f9e8c4eb4fe.json +0 -1
xinference/web/ui/node_modules/.cache/babel-loader/5b2dafe5aa9e1105e0244a2b6751807342fa86aa0144b4e84d947a1686102715.json +0 -1
xinference/web/ui/node_modules/.cache/babel-loader/611fa2c6c53b66039991d06dfb0473b5ab37fc63b4564e0f6e1718523768a045.json +0 -1
/xinference/web/ui/build/static/js/{main.9b12b7f9.js.LICENSE.txt → main.7d24df53.js.LICENSE.txt} +0 -0
{xinference-1.7.1.dist-info → xinference-1.8.0.dist-info}/WHEEL +0 -0
{xinference-1.7.1.dist-info → xinference-1.8.0.dist-info}/entry_points.txt +0 -0
{xinference-1.7.1.dist-info → xinference-1.8.0.dist-info}/licenses/LICENSE +0 -0
{xinference-1.7.1.dist-info → xinference-1.8.0.dist-info}/top_level.txt +0 -0

xinference/model/embedding/sentence_transformers/core.py CHANGED Viewed

@@ -14,20 +14,16 @@
 import importlib.util
 import logging
-from collections import defaultdict
 from typing import List, Optional, Union, no_type_check
 import numpy as np
 import torch
-from ....types import Dict, Embedding, EmbeddingData, EmbeddingUsage
-from ..core import EmbeddingModel, EmbeddingModelSpec
+from ....device_utils import is_device_available
+from ....types import Embedding, EmbeddingData, EmbeddingUsage
+from ..core import EmbeddingModel, EmbeddingModelFamilyV2, EmbeddingSpecV1
 logger = logging.getLogger(__name__)
-# Used for check whether the model is cached.
-# Init when registering all the builtin models.
-MODEL_NAME_TO_REVISION: Dict[str, List[str]] = defaultdict(list)
 SENTENCE_TRANSFORMER_MODEL_LIST: List[str] = []
@@ -76,8 +72,8 @@ class SentenceTransformerEmbeddingModel(EmbeddingModel):
                 torch_dtype = torch.float32
         if (
-            "gte" in self._model_spec.model_name.lower()
-            and "qwen2" in self._model_spec.model_name.lower()
+            "gte" in self.model_family.model_name.lower()
+            and "qwen2" in self.model_family.model_name.lower()
         ):
             model_kwargs = {"device_map": "auto"}
             if torch_dtype:
@@ -87,10 +83,12 @@ class SentenceTransformerEmbeddingModel(EmbeddingModel):
                 device=self._device,
                 model_kwargs=model_kwargs,
             )
-        elif "qwen3" in self._model_spec.model_name.lower():
+        elif "qwen3" in self.model_family.model_name.lower():
             # qwen3 embedding
             flash_attn_installed = importlib.util.find_spec("flash_attn") is not None
-            flash_attn_enabled = self._kwargs.get("enable_flash_attn", True)
+            flash_attn_enabled = self._kwargs.get(
+                "enable_flash_attn", is_device_available("cuda")
+            )
             model_kwargs = {"device_map": "auto"}
             tokenizer_kwargs = {}
             if flash_attn_installed and flash_attn_enabled:
@@ -119,7 +117,8 @@ class SentenceTransformerEmbeddingModel(EmbeddingModel):
                 trust_remote_code=True,
             )
-        self._tokenizer = self._model.tokenizer
+        if hasattr(self._model, "tokenizer"):
+            self._tokenizer = self._model.tokenizer
     def create_embedding(
         self,
@@ -227,8 +226,8 @@ class SentenceTransformerEmbeddingModel(EmbeddingModel):
                 device = model._target_device
             if (
-                "gte" in self._model_spec.model_name.lower()
-                and "qwen2" in self._model_spec.model_name.lower()
+                "gte" in self.model_family.model_name.lower()
+                and "qwen2" in self.model_family.model_name.lower()
             ):
                 model.to(device)
@@ -254,7 +253,10 @@ class SentenceTransformerEmbeddingModel(EmbeddingModel):
                 features.update(extra_features)
                 # when batching, the attention mask 1 means there is a token
                 # thus we just sum up it to get the total number of tokens
-                if "clip" in self._model_spec.model_name.lower():
+                if (
+                    "clip" in self.model_family.model_name.lower()
+                    or "jina-embeddings-v4" in self.model_family.model_name.lower()
+                ):
                     if "input_ids" in features and hasattr(
                         features["input_ids"], "numel"
                     ):
@@ -322,8 +324,8 @@ class SentenceTransformerEmbeddingModel(EmbeddingModel):
         # seems already support prompt in embedding model
         if (
-            "gte" in self._model_spec.model_name.lower()
-            and "qwen2" in self._model_spec.model_name.lower()
+            "gte" in self.model_family.model_name.lower()
+            and "qwen2" in self.model_family.model_name.lower()
         ):
             all_embeddings, all_token_nums = encode(
                 self._model,
@@ -332,7 +334,10 @@ class SentenceTransformerEmbeddingModel(EmbeddingModel):
                 convert_to_numpy=False,
                 **kwargs,
             )
-        elif "clip" in self._model_spec.model_name.lower():
+        elif (
+            "clip" in self.model_family.model_name.lower()
+            or "jina-embeddings-v4" in self.model_family.model_name.lower()
+        ):
             import base64
             import re
             from io import BytesIO
@@ -409,6 +414,11 @@ class SentenceTransformerEmbeddingModel(EmbeddingModel):
         return importlib.util.find_spec("sentence_transformers") is not None
     @classmethod
-    def match_json(cls, model_spec: EmbeddingModelSpec) -> bool:
+    def match_json(
+        cls,
+        model_family: EmbeddingModelFamilyV2,
+        model_spec: EmbeddingSpecV1,
+        quantization: str,
+    ) -> bool:
         # As default embedding engine, sentence-transformer support all models
-        return True
+        return model_spec.model_format in ["pytorch"]

xinference/model/embedding/vllm/core.py CHANGED Viewed

@@ -17,7 +17,7 @@ import logging
 from typing import List, Union
 from ....types import Embedding, EmbeddingData, EmbeddingUsage
-from ..core import EmbeddingModel, EmbeddingModelSpec
+from ..core import EmbeddingModel, EmbeddingModelFamilyV2, EmbeddingSpecV1
 logger = logging.getLogger(__name__)
 SUPPORTED_MODELS_PREFIXES = ["bge", "gte", "text2vec", "m3e", "gte", "Qwen3"]
@@ -88,8 +88,14 @@ class VLLMEmbeddingModel(EmbeddingModel):
         return importlib.util.find_spec("vllm") is not None
     @classmethod
-    def match_json(cls, model_spec: EmbeddingModelSpec) -> bool:
-        prefix = model_spec.model_name.split("-", 1)[0]
-        if prefix in SUPPORTED_MODELS_PREFIXES:
-            return True
+    def match_json(
+        cls,
+        model_family: EmbeddingModelFamilyV2,
+        model_spec: EmbeddingSpecV1,
+        quantization: str,
+    ) -> bool:
+        if model_spec.model_format in ["pytorch"]:
+            prefix = model_family.model_name.split("-", 1)[0]
+            if prefix in SUPPORTED_MODELS_PREFIXES:
+                return True
         return False

xinference/model/flexible/__init__.py CHANGED Viewed

@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import codecs
 import json
 import logging
@@ -25,6 +24,8 @@ from .core import (
     FlexibleModelSpec,
     generate_flexible_model_description,
     get_flexible_model_descriptions,
+)
+from .custom import (
     get_flexible_models,
     register_flexible_model,
     unregister_flexible_model,
@@ -34,7 +35,12 @@ logger = logging.getLogger(__name__)
 def register_custom_model():
-    model_dir = os.path.join(XINFERENCE_MODEL_DIR, "flexible")
+    from ..custom import migrate_from_v1_to_v2
+    # migrate from v1 to v2 first
+    migrate_from_v1_to_v2("flexible", FlexibleModelSpec)
+    model_dir = os.path.join(XINFERENCE_MODEL_DIR, "v2", "flexible")
     if os.path.isdir(model_dir):
         for f in os.listdir(model_dir):
             try:

xinference/model/flexible/core.py CHANGED Viewed

@@ -14,21 +14,19 @@
 import json
 import logging
-import os
 from collections import defaultdict
-from threading import Lock
-from typing import Dict, List, Optional, Tuple
+from typing import Dict, List, Optional
-from ...constants import XINFERENCE_CACHE_DIR, XINFERENCE_MODEL_DIR
-from ..core import CacheableModelSpec, ModelDescription, VirtualEnvSettings
+from ..._compat import Literal
+from ..core import CacheableModelSpec, VirtualEnvSettings
+from ..utils import ModelInstanceInfoMixin
 from .utils import get_launcher
 logger = logging.getLogger(__name__)
-FLEXIBLE_MODEL_LOCK = Lock()
-class FlexibleModelSpec(CacheableModelSpec):
+class FlexibleModelSpec(CacheableModelSpec, ModelInstanceInfoMixin):
+    version: Literal[1, 2] = 2
     model_id: Optional[str]  # type: ignore
     model_description: Optional[str]
     model_uri: Optional[str]
@@ -39,42 +37,26 @@ class FlexibleModelSpec(CacheableModelSpec):
     def parser_args(self):
         return json.loads(self.launcher_args)
+    class Config:
+        extra = "allow"
-class FlexibleModelDescription(ModelDescription):
-    def __init__(
-        self,
-        address: Optional[str],
-        devices: Optional[List[str]],
-        model_spec: FlexibleModelSpec,
-        model_path: Optional[str] = None,
-    ):
-        super().__init__(address, devices, model_path=model_path)
-        self._model_spec = model_spec
-    @property
-    def spec(self):
-        return self._model_spec
-    def to_dict(self):
+    def to_description(self):
         return {
             "model_type": "flexible",
-            "address": self.address,
-            "accelerators": self.devices,
-            "model_name": self._model_spec.model_name,
-            "launcher": self._model_spec.launcher,
-            "launcher_args": self._model_spec.launcher_args,
+            "address": getattr(self, "address", None),
+            "accelerators": getattr(self, "accelerators", None),
+            "model_name": self.model_name,
+            "launcher": self.launcher,
+            "launcher_args": self.launcher_args,
         }
-    def get_model_version(self) -> str:
-        return f"{self._model_spec.model_name}"
     def to_version_info(self):
         return {
-            "model_version": self.get_model_version(),
+            "model_version": self.model_name,
             "cache_status": True,
-            "model_file_location": self._model_spec.model_uri,
-            "launcher": self._model_spec.launcher,
-            "launcher_args": self._model_spec.launcher_args,
+            "model_file_location": self.model_uri,
+            "launcher": self.launcher,
+            "launcher_args": self.launcher_args,
         }
@@ -82,9 +64,7 @@ def generate_flexible_model_description(
     model_spec: FlexibleModelSpec,
 ) -> Dict[str, List[Dict]]:
     res = defaultdict(list)
-    res[model_spec.model_name].append(
-        FlexibleModelDescription(None, None, model_spec).to_version_info()
-    )
+    res[model_spec.model_name].append(model_spec.to_version_info())
     return res
@@ -92,93 +72,22 @@ FLEXIBLE_MODELS: List[FlexibleModelSpec] = []
 FLEXIBLE_MODEL_DESCRIPTIONS: Dict[str, List[Dict]] = defaultdict(list)
-def get_flexible_models():
-    with FLEXIBLE_MODEL_LOCK:
-        return FLEXIBLE_MODELS.copy()
 def get_flexible_model_descriptions():
     import copy
     return copy.deepcopy(FLEXIBLE_MODEL_DESCRIPTIONS)
-def register_flexible_model(model_spec: FlexibleModelSpec, persist: bool):
-    from ..utils import is_valid_model_name, is_valid_model_uri
-    if not is_valid_model_name(model_spec.model_name):
-        raise ValueError(f"Invalid model name {model_spec.model_name}.")
-    model_uri = model_spec.model_uri
-    if model_uri and not is_valid_model_uri(model_uri):
-        raise ValueError(f"Invalid model URI {model_uri}.")
-    if model_spec.launcher_args:
-        try:
-            model_spec.parser_args()
-        except Exception:
-            raise ValueError(f"Invalid model launcher args {model_spec.launcher_args}.")
-    with FLEXIBLE_MODEL_LOCK:
-        for model_name in [spec.model_name for spec in FLEXIBLE_MODELS]:
-            if model_spec.model_name == model_name:
-                raise ValueError(
-                    f"Model name conflicts with existing model {model_spec.model_name}"
-                )
-        FLEXIBLE_MODELS.append(model_spec)
-    if persist:
-        persist_path = os.path.join(
-            XINFERENCE_MODEL_DIR, "flexible", f"{model_spec.model_name}.json"
-        )
-        os.makedirs(os.path.dirname(persist_path), exist_ok=True)
-        with open(persist_path, mode="w") as fd:
-            fd.write(model_spec.json())
-def unregister_flexible_model(model_name: str, raise_error: bool = True):
-    with FLEXIBLE_MODEL_LOCK:
-        model_spec = None
-        for i, f in enumerate(FLEXIBLE_MODELS):
-            if f.model_name == model_name:
-                model_spec = f
-                break
-        if model_spec:
-            FLEXIBLE_MODELS.remove(model_spec)
-            persist_path = os.path.join(
-                XINFERENCE_MODEL_DIR, "flexible", f"{model_spec.model_name}.json"
-            )
-            if os.path.exists(persist_path):
-                os.remove(persist_path)
-            cache_dir = os.path.join(XINFERENCE_CACHE_DIR, model_spec.model_name)
-            if os.path.exists(cache_dir):
-                logger.warning(
-                    f"Remove the cache of user-defined model {model_spec.model_name}. "
-                    f"Cache directory: {cache_dir}"
-                )
-                if os.path.islink(cache_dir):
-                    os.remove(cache_dir)
-                else:
-                    logger.warning(
-                        f"Cache directory is not a soft link, please remove it manually."
-                    )
-        else:
-            if raise_error:
-                raise ValueError(f"Model {model_name} not found")
-            else:
-                logger.warning(f"Model {model_name} not found")
 class FlexibleModel:
     def __init__(
         self,
         model_uid: str,
         model_path: str,
+        model_family: FlexibleModelSpec,
         device: Optional[str] = None,
         config: Optional[Dict] = None,
     ):
+        self.model_family = model_family
         self._model_uid = model_uid
         self._model_path = model_path
         self._device = device
@@ -213,19 +122,20 @@ class FlexibleModel:
 def match_flexible_model(model_name):
+    from .custom import get_flexible_models
     for model_spec in get_flexible_models():
         if model_name == model_spec.model_name:
             return model_spec
+    return None
 def create_flexible_model_instance(
-    subpool_addr: str,
-    devices: List[str],
     model_uid: str,
     model_name: str,
     model_path: Optional[str] = None,
     **kwargs,
-) -> Tuple[FlexibleModel, FlexibleModelDescription]:
+) -> FlexibleModel:
     model_spec = match_flexible_model(model_name)
     if not model_path:
         model_path = model_spec.model_uri
@@ -237,7 +147,4 @@ def create_flexible_model_instance(
         model_uid=model_uid, model_spec=model_spec, **kwargs
     )
-    model_description = FlexibleModelDescription(
-        subpool_addr, devices, model_spec, model_path=model_path
-    )
-    return model, model_description
+    return model

xinference/model/flexible/custom.py ADDED Viewed

@@ -0,0 +1,69 @@
+from typing import TYPE_CHECKING
+from ..custom import ModelRegistry
+if TYPE_CHECKING:
+    from .core import FlexibleModelSpec
+class FlexibleModelRegistry(ModelRegistry):
+    model_type = "flexible"
+    def __init__(self):
+        from .core import FLEXIBLE_MODELS
+        super().__init__()
+        self.models = FLEXIBLE_MODELS
+        self.builtin_models = []
+    def register(self, model_spec: "FlexibleModelSpec", persist: bool):
+        from ..cache_manager import CacheManager
+        from ..utils import is_valid_model_name, is_valid_model_uri
+        if not is_valid_model_name(model_spec.model_name):
+            raise ValueError(f"Invalid model name {model_spec.model_name}.")
+        model_uri = model_spec.model_uri
+        if model_uri and not is_valid_model_uri(model_uri):
+            raise ValueError(f"Invalid model URI {model_uri}.")
+        if model_spec.launcher_args:
+            try:
+                model_spec.parser_args()
+            except Exception:
+                raise ValueError(
+                    f"Invalid model launcher args {model_spec.launcher_args}."
+                )
+        with self.lock:
+            for model_name in [spec.model_name for spec in self.models]:
+                if model_spec.model_name == model_name:
+                    raise ValueError(
+                        f"Model name conflicts with existing model {model_spec.model_name}"
+                    )
+            self.models.append(model_spec)
+        if persist:
+            cache_manager = CacheManager(model_spec)
+            cache_manager.register_custom_model(self.model_type)
+def get_flexible_models():
+    from ..custom import RegistryManager
+    registry = RegistryManager.get_registry("flexible")
+    return registry.get_custom_models()
+def register_flexible_model(model_spec: "FlexibleModelSpec", persist: bool):
+    from ..custom import RegistryManager
+    registry = RegistryManager.get_registry("flexible")
+    registry.register(model_spec, persist)
+def unregister_flexible_model(model_name: str, raise_error: bool = True):
+    from ..custom import RegistryManager
+    registry = RegistryManager.get_registry("flexible")
+    registry.unregister(model_name, raise_error)

xinference/model/flexible/launchers/image_process_launcher.py CHANGED Viewed

@@ -63,6 +63,7 @@ def launcher(model_uid: str, model_spec: FlexibleModelSpec, **kwargs) -> Flexibl
         return ImageRemoveBackgroundModel(
             model_uid=model_uid,
             model_path=model_spec.model_uri,  # type: ignore
+            model_family=model_spec,
             device=device,
             config=kwargs,
         )

xinference/model/flexible/launchers/modelscope_launcher.py CHANGED Viewed

@@ -43,5 +43,9 @@ def launcher(model_uid: str, model_spec: FlexibleModelSpec, **kwargs) -> Flexibl
         raise ValueError("model_path required")
     return ModelScopePipelineModel(
-        model_uid=model_uid, model_path=model_path, device=device, config=kwargs
+        model_uid=model_uid,
+        model_path=model_path,
+        model_family=model_spec,
+        device=device,
+        config=kwargs,
     )

xinference/model/flexible/launchers/transformers_launcher.py CHANGED Viewed

@@ -51,13 +51,25 @@ def launcher(model_uid: str, model_spec: FlexibleModelSpec, **kwargs) -> Flexibl
     if task == "text-classification":
         return TransformersTextClassificationModel(
-            model_uid=model_uid, model_path=model_path, device=device, config=kwargs
+            model_uid=model_uid,
+            model_path=model_path,
+            model_family=model_spec,
+            device=device,
+            config=kwargs,
         )
     elif task == "mock":
         return MockModel(
-            model_uid=model_uid, model_path=model_path, device=device, config=kwargs
+            model_uid=model_uid,
+            model_path=model_path,
+            model_family=model_spec,
+            device=device,
+            config=kwargs,
         )
     else:
         return AutoModel(
-            model_uid=model_uid, model_path=model_path, device=device, config=kwargs
+            model_uid=model_uid,
+            model_path=model_path,
+            model_family=model_spec,
+            device=device,
+            config=kwargs,
         )

xinference/model/flexible/launchers/yolo_launcher.py CHANGED Viewed

@@ -58,5 +58,9 @@ def launcher(model_uid: str, model_spec: FlexibleModelSpec, **kwargs) -> Flexibl
         raise ValueError("model_path required")
     return UltralyticsModel(
-        model_uid=model_uid, model_path=model_path, device=device, config=kwargs
+        model_uid=model_uid,
+        model_path=model_path,
+        model_family=model_spec,
+        device=device,
+        config=kwargs,
     )

xinference/model/image/__init__.py CHANGED Viewed

@@ -16,20 +16,17 @@ import codecs
 import json
 import os
 import warnings
-from itertools import chain
+from ..utils import flatten_model_src
 from .core import (
     BUILTIN_IMAGE_MODELS,
     IMAGE_MODEL_DESCRIPTIONS,
-    MODEL_NAME_TO_REVISION,
-    MODELSCOPE_IMAGE_MODELS,
-    ImageModelFamilyV1,
+    ImageModelFamilyV2,
     generate_image_description,
-    get_cache_status,
     get_image_model_descriptions,
 )
 from .custom import (
-    CustomImageModelFamilyV1,
+    CustomImageModelFamilyV2,
     get_user_defined_images,
     register_image,
     unregister_image,
@@ -38,15 +35,19 @@ from .custom import (
 def register_custom_model():
     from ...constants import XINFERENCE_MODEL_DIR
+    from ..custom import migrate_from_v1_to_v2
-    user_defined_image_dir = os.path.join(XINFERENCE_MODEL_DIR, "image")
+    # migrate from v1 to v2 first
+    migrate_from_v1_to_v2("image", CustomImageModelFamilyV2)
+    user_defined_image_dir = os.path.join(XINFERENCE_MODEL_DIR, "v2", "image")
     if os.path.isdir(user_defined_image_dir):
         for f in os.listdir(user_defined_image_dir):
             try:
                 with codecs.open(
                     os.path.join(user_defined_image_dir, f), encoding="utf-8"
                 ) as fd:
-                    user_defined_image_family = CustomImageModelFamilyV1.parse_obj(
+                    user_defined_image_family = CustomImageModelFamilyV2.parse_obj(
                         json.load(fd)
                     )
                     register_image(user_defined_image_family, persist=False)
@@ -56,12 +57,10 @@ def register_custom_model():
 def _install():
     load_model_family_from_json("model_spec.json", BUILTIN_IMAGE_MODELS)
-    load_model_family_from_json("model_spec_modelscope.json", MODELSCOPE_IMAGE_MODELS)
     # register model description
-    for model_name, model_spec in chain(
-        MODELSCOPE_IMAGE_MODELS.items(), BUILTIN_IMAGE_MODELS.items()
-    ):
+    for model_name, model_specs in BUILTIN_IMAGE_MODELS.items():
+        model_spec = [x for x in model_specs if x.model_hub == "huggingface"][0]
         IMAGE_MODEL_DESCRIPTIONS.update(generate_image_description(model_spec))
     register_custom_model()
@@ -72,13 +71,14 @@ def _install():
 def load_model_family_from_json(json_filename, target_families):
     json_path = os.path.join(os.path.dirname(__file__), json_filename)
-    target_families.update(
-        dict(
-            (spec["model_name"], ImageModelFamilyV1(**spec))
-            for spec in json.load(codecs.open(json_path, "r", encoding="utf-8"))
-        )
-    )
-    for model_name, model_spec in target_families.items():
-        MODEL_NAME_TO_REVISION[model_name].append(model_spec.model_revision)
+    flattened_model_specs = []
+    for spec in json.load(codecs.open(json_path, "r", encoding="utf-8")):
+        flattened_model_specs.extend(flatten_model_src(spec))
+    for spec in flattened_model_specs:
+        if spec["model_name"] not in target_families:
+            target_families[spec["model_name"]] = [ImageModelFamilyV2(**spec)]
+        else:
+            target_families[spec["model_name"]].append(ImageModelFamilyV2(**spec))
     del json_path

xinference/model/image/cache_manager.py ADDED Viewed

@@ -0,0 +1,62 @@
+import os
+from typing import Optional
+from ..cache_manager import CacheManager
+class ImageCacheManager(CacheManager):
+    def cache_gguf(self, quantization: Optional[str] = None):
+        from ..utils import IS_NEW_HUGGINGFACE_HUB, retry_download, symlink_local_file
+        from .core import ImageModelFamilyV2
+        if not quantization:
+            return None
+        assert isinstance(self._model_family, ImageModelFamilyV2)
+        cache_dir = self.get_cache_dir()
+        if not self._model_family.gguf_model_file_name_template:
+            raise NotImplementedError(
+                f"{self._model_family.model_name} does not support GGUF quantization"
+            )
+        if quantization not in (self._model_family.gguf_quantizations or []):
+            raise ValueError(
+                f"Cannot support quantization {quantization}, "
+                f"available quantizations: {self._model_family.gguf_quantizations}"
+            )
+        filename = self._model_family.gguf_model_file_name_template.format(quantization=quantization)  # type: ignore
+        full_path = os.path.join(cache_dir, filename)
+        if self._model_family.model_hub == "huggingface":
+            import huggingface_hub
+            use_symlinks = {}
+            if not IS_NEW_HUGGINGFACE_HUB:
+                use_symlinks = {"local_dir_use_symlinks": True, "local_dir": cache_dir}
+            download_file_path = retry_download(
+                huggingface_hub.hf_hub_download,
+                self._model_family.model_name,
+                None,
+                self._model_family.gguf_model_id,
+                filename=filename,
+                **use_symlinks,
+            )
+            if IS_NEW_HUGGINGFACE_HUB:
+                symlink_local_file(download_file_path, cache_dir, filename)
+        elif self._model_family.model_hub == "modelscope":
+            from modelscope.hub.file_download import model_file_download
+            download_file_path = retry_download(
+                model_file_download,
+                self._model_family.model_name,
+                None,
+                self._model_family.gguf_model_id,
+                filename,
+                revision=self._model_family.model_revision,
+            )
+            symlink_local_file(download_file_path, cache_dir, filename)
+        else:
+            raise NotImplementedError
+        return full_path

xinference 1.7.1__py3-none-any.whl → 1.8.0__py3-none-any.whl

Potentially problematic release.

xinference 1.7.1py3-none-any.whl → 1.8.0py3-none-any.whl