PyPI - xinference - Versions diffs - 0.13.3__py3-none-any.whl → 0.13.4__py3-none-any.whl - Mend

xinference 0.13.3py3-none-any.whl → 0.13.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (48) hide show

xinference/_version.py +3 -3
xinference/api/restful_api.py +4 -1
xinference/client/restful/restful_client.py +2 -2
xinference/constants.py +0 -4
xinference/core/image_interface.py +6 -3
xinference/core/model.py +1 -1
xinference/core/supervisor.py +2 -0
xinference/core/worker.py +7 -0
xinference/deploy/utils.py +6 -0
xinference/model/audio/core.py +4 -2
xinference/model/core.py +25 -4
xinference/model/embedding/core.py +88 -13
xinference/model/embedding/model_spec.json +8 -0
xinference/model/embedding/model_spec_modelscope.json +8 -0
xinference/model/flexible/core.py +8 -2
xinference/model/image/core.py +8 -5
xinference/model/image/model_spec.json +30 -6
xinference/model/image/model_spec_modelscope.json +21 -3
xinference/model/image/stable_diffusion/core.py +30 -27
xinference/model/llm/core.py +6 -4
xinference/model/llm/ggml/llamacpp.py +7 -5
xinference/model/llm/llm_family.py +6 -6
xinference/model/llm/mlx/core.py +7 -0
xinference/model/llm/pytorch/chatglm.py +4 -1
xinference/model/llm/pytorch/deepseek_vl.py +2 -1
xinference/model/llm/pytorch/falcon.py +2 -1
xinference/model/llm/pytorch/llama_2.py +4 -2
xinference/model/llm/pytorch/omnilmm.py +2 -1
xinference/model/llm/pytorch/qwen_vl.py +2 -1
xinference/model/llm/pytorch/vicuna.py +2 -1
xinference/model/llm/pytorch/yi_vl.py +2 -1
xinference/model/llm/sglang/core.py +12 -6
xinference/model/llm/vllm/core.py +1 -5
xinference/model/rerank/core.py +4 -3
xinference/web/ui/build/asset-manifest.json +3 -3
xinference/web/ui/build/index.html +1 -1
xinference/web/ui/build/static/js/{main.2ef0cfaf.js → main.af906659.js} +3 -3
xinference/web/ui/build/static/js/main.af906659.js.map +1 -0
xinference/web/ui/node_modules/.cache/babel-loader/2cd5e4279ad7e13a1f41d486e9fca7756295bfad5bd77d90992f4ac3e10b496d.json +1 -0
{xinference-0.13.3.dist-info → xinference-0.13.4.dist-info}/METADATA +24 -4
{xinference-0.13.3.dist-info → xinference-0.13.4.dist-info}/RECORD +46 -46
xinference/web/ui/build/static/js/main.2ef0cfaf.js.map +0 -1
xinference/web/ui/node_modules/.cache/babel-loader/b6807ecc0c231fea699533518a0eb2a2bf68a081ce00d452be40600dbffa17a7.json +0 -1
/xinference/web/ui/build/static/js/{main.2ef0cfaf.js.LICENSE.txt → main.af906659.js.LICENSE.txt} +0 -0
{xinference-0.13.3.dist-info → xinference-0.13.4.dist-info}/LICENSE +0 -0
{xinference-0.13.3.dist-info → xinference-0.13.4.dist-info}/WHEEL +0 -0
{xinference-0.13.3.dist-info → xinference-0.13.4.dist-info}/entry_points.txt +0 -0
{xinference-0.13.3.dist-info → xinference-0.13.4.dist-info}/top_level.txt +0 -0

xinference/_version.py CHANGED Viewed

@@ -8,11 +8,11 @@ import json
 version_json = '''
 {
- "date": "2024-07-26T18:42:50+0800",
+ "date": "2024-08-02T16:08:07+0800",
  "dirty": false,
  "error": null,
- "full-revisionid": "aa51ff22dbfb5644554436270deaf57a7ebaf066",
- "version": "0.13.3"
+ "full-revisionid": "dd85cfe015c9cd2d8110c79213640aa0e21f3a6a",
+ "version": "0.13.4"
 }
 '''  # END VERSION_JSON

xinference/api/restful_api.py CHANGED Viewed

@@ -797,6 +797,7 @@ class RESTfulAPI:
         worker_ip = payload.get("worker_ip", None)
         gpu_idx = payload.get("gpu_idx", None)
         download_hub = payload.get("download_hub", None)
+        model_path = payload.get("model_path", None)
         exclude_keys = {
             "model_uid",
@@ -813,6 +814,7 @@ class RESTfulAPI:
             "worker_ip",
             "gpu_idx",
             "download_hub",
+            "model_path",
         }
         kwargs = {
@@ -861,6 +863,7 @@ class RESTfulAPI:
                 worker_ip=worker_ip,
                 gpu_idx=gpu_idx,
                 download_hub=download_hub,
+                model_path=model_path,
                 **kwargs,
             )
         except ValueError as ve:
@@ -1407,7 +1410,7 @@ class RESTfulAPI:
         negative_prompt: Optional[Union[str, List[str]]] = Form(None),
         n: Optional[int] = Form(1),
         response_format: Optional[str] = Form("url"),
-        size: Optional[str] = Form("1024*1024"),
+        size: Optional[str] = Form(None),
         kwargs: Optional[str] = Form(None),
     ) -> Response:
         model_uid = model

xinference/client/restful/restful_client.py CHANGED Viewed

@@ -234,9 +234,9 @@ class RESTfulImageModelHandle(RESTfulModelHandle):
         self,
         image: Union[str, bytes],
         prompt: str,
-        negative_prompt: str,
+        negative_prompt: Optional[str] = None,
         n: int = 1,
-        size: str = "1024*1024",
+        size: Optional[str] = None,
         response_format: str = "url",
         **kwargs,
     ) -> "ImageList":

xinference/constants.py CHANGED Viewed

@@ -26,8 +26,6 @@ XINFERENCE_ENV_HEALTH_CHECK_FAILURE_THRESHOLD = (
 XINFERENCE_ENV_HEALTH_CHECK_INTERVAL = "XINFERENCE_HEALTH_CHECK_INTERVAL"
 XINFERENCE_ENV_HEALTH_CHECK_TIMEOUT = "XINFERENCE_HEALTH_CHECK_TIMEOUT"
 XINFERENCE_ENV_DISABLE_HEALTH_CHECK = "XINFERENCE_DISABLE_HEALTH_CHECK"
-XINFERENCE_ENV_DISABLE_VLLM = "XINFERENCE_DISABLE_VLLM"
-XINFERENCE_ENV_ENABLE_SGLANG = "XINFERENCE_ENABLE_SGLANG"
 XINFERENCE_ENV_DISABLE_METRICS = "XINFERENCE_DISABLE_METRICS"
 XINFERENCE_ENV_TRANSFORMERS_ENABLE_BATCHING = "XINFERENCE_TRANSFORMERS_ENABLE_BATCHING"
@@ -72,8 +70,6 @@ XINFERENCE_HEALTH_CHECK_TIMEOUT = int(
 XINFERENCE_DISABLE_HEALTH_CHECK = bool(
     int(os.environ.get(XINFERENCE_ENV_DISABLE_HEALTH_CHECK, 0))
 )
-XINFERENCE_DISABLE_VLLM = bool(int(os.environ.get(XINFERENCE_ENV_DISABLE_VLLM, 0)))
-XINFERENCE_ENABLE_SGLANG = bool(int(os.environ.get(XINFERENCE_ENV_ENABLE_SGLANG, 0)))
 XINFERENCE_DISABLE_METRICS = bool(
     int(os.environ.get(XINFERENCE_ENV_DISABLE_METRICS, 0))
 )

xinference/core/image_interface.py CHANGED Viewed

@@ -153,7 +153,10 @@ class ImageInterface:
             model = client.get_model(self.model_uid)
             assert isinstance(model, RESTfulImageModelHandle)
-            size = f"{int(size_width)}*{int(size_height)}"
+            if size_width > 0 and size_height > 0:
+                size = f"{int(size_width)}*{int(size_height)}"
+            else:
+                size = None
             bio = io.BytesIO()
             image.save(bio, format="png")
@@ -195,8 +198,8 @@ class ImageInterface:
                 with gr.Row():
                     n = gr.Number(label="Number of image", value=1)
-                    size_width = gr.Number(label="Width", value=512)
-                    size_height = gr.Number(label="Height", value=512)
+                    size_width = gr.Number(label="Width", value=-1)
+                    size_height = gr.Number(label="Height", value=-1)
                 with gr.Row():
                     with gr.Column(scale=1):

xinference/core/model.py CHANGED Viewed

@@ -706,7 +706,7 @@ class ModelActor(xo.StatelessActor):
         prompt: str,
         negative_prompt: str,
         n: int = 1,
-        size: str = "1024*1024",
+        size: Optional[str] = None,
         response_format: str = "url",
         *args,
         **kwargs,

xinference/core/supervisor.py CHANGED Viewed

@@ -859,6 +859,7 @@ class SupervisorActor(xo.StatelessActor):
         worker_ip: Optional[str] = None,
         gpu_idx: Optional[Union[int, List[int]]] = None,
         download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
+        model_path: Optional[str] = None,
         **kwargs,
     ) -> str:
         # search in worker first
@@ -942,6 +943,7 @@ class SupervisorActor(xo.StatelessActor):
                 peft_model_config=peft_model_config,
                 gpu_idx=replica_gpu_idx,
                 download_hub=download_hub,
+                model_path=model_path,
                 **kwargs,
             )
             self._replica_model_uid_to_worker[_replica_model_uid] = worker_ref

xinference/core/worker.py CHANGED Viewed

@@ -743,6 +743,7 @@ class WorkerActor(xo.StatelessActor):
         request_limits: Optional[int] = None,
         gpu_idx: Optional[Union[int, List[int]]] = None,
         download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
+        model_path: Optional[str] = None,
         **kwargs,
     ):
         # !!! Note that The following code must be placed at the very beginning of this function,
@@ -799,6 +800,11 @@ class WorkerActor(xo.StatelessActor):
                 raise ValueError(
                     f"PEFT adaptors can only be applied to pytorch-like models"
                 )
+        if model_path is not None:
+            if not os.path.exists(model_path):
+                raise ValueError(
+                    f"Invalid input. `model_path`: {model_path} File or directory does not exist."
+                )
         assert model_uid not in self._model_uid_to_model
         self._check_model_is_valid(model_name, model_format)
@@ -826,6 +832,7 @@ class WorkerActor(xo.StatelessActor):
                     quantization,
                     peft_model_config,
                     download_hub,
+                    model_path,
                     **kwargs,
                 )
                 await self.update_cache_status(model_name, model_description)

xinference/deploy/utils.py CHANGED Viewed

@@ -27,6 +27,9 @@ if TYPE_CHECKING:
 logger = logging.getLogger(__name__)
+# mainly for k8s
+XINFERENCE_POD_NAME_ENV_KEY = "XINFERENCE_POD_NAME"
 class LoggerNameFilter(logging.Filter):
     def filter(self, record):
@@ -40,6 +43,9 @@ def get_log_file(sub_dir: str):
     """
     sub_dir should contain a timestamp.
     """
+    pod_name = os.environ.get(XINFERENCE_POD_NAME_ENV_KEY, None)
+    if pod_name is not None:
+        sub_dir = sub_dir + "_" + pod_name
     log_dir = os.path.join(XINFERENCE_LOG_DIR, sub_dir)
     # Here should be creating a new directory each time, so `exist_ok=False`
     os.makedirs(log_dir, exist_ok=False)

xinference/model/audio/core.py CHANGED Viewed

@@ -150,10 +150,12 @@ def create_audio_model_instance(
     model_uid: str,
     model_name: str,
     download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
+    model_path: Optional[str] = None,
     **kwargs,
 ) -> Tuple[Union[WhisperModel, ChatTTSModel, CosyVoiceModel], AudioModelDescription]:
     model_spec = match_audio(model_name, download_hub)
-    model_path = cache(model_spec)
+    if model_path is None:
+        model_path = cache(model_spec)
     model: Union[WhisperModel, ChatTTSModel, CosyVoiceModel]
     if model_spec.model_family == "whisper":
         model = WhisperModel(model_uid, model_path, model_spec, **kwargs)
@@ -164,6 +166,6 @@ def create_audio_model_instance(
     else:
         raise Exception(f"Unsupported audio model family: {model_spec.model_family}")
     model_description = AudioModelDescription(
-        subpool_addr, devices, model_spec, model_path=model_path
+        subpool_addr, devices, model_spec, model_path
     )
     return model, model_description

xinference/model/core.py CHANGED Viewed

@@ -56,6 +56,7 @@ def create_model_instance(
     quantization: Optional[str] = None,
     peft_model_config: Optional[PeftModelConfig] = None,
     download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
+    model_path: Optional[str] = None,
     **kwargs,
 ) -> Tuple[Any, ModelDescription]:
     from .audio.core import create_audio_model_instance
@@ -77,13 +78,20 @@ def create_model_instance(
             quantization,
             peft_model_config,
             download_hub,
+            model_path,
             **kwargs,
         )
     elif model_type == "embedding":
         # embedding model doesn't accept trust_remote_code
         kwargs.pop("trust_remote_code", None)
         return create_embedding_model_instance(
-            subpool_addr, devices, model_uid, model_name, download_hub, **kwargs
+            subpool_addr,
+            devices,
+            model_uid,
+            model_name,
+            download_hub,
+            model_path,
+            **kwargs,
         )
     elif model_type == "image":
         kwargs.pop("trust_remote_code", None)
@@ -94,22 +102,35 @@ def create_model_instance(
             model_name,
             peft_model_config,
             download_hub,
+            model_path,
             **kwargs,
         )
     elif model_type == "rerank":
         kwargs.pop("trust_remote_code", None)
         return create_rerank_model_instance(
-            subpool_addr, devices, model_uid, model_name, download_hub, **kwargs
+            subpool_addr,
+            devices,
+            model_uid,
+            model_name,
+            download_hub,
+            model_path,
+            **kwargs,
         )
     elif model_type == "audio":
         kwargs.pop("trust_remote_code", None)
         return create_audio_model_instance(
-            subpool_addr, devices, model_uid, model_name, download_hub, **kwargs
+            subpool_addr,
+            devices,
+            model_uid,
+            model_name,
+            download_hub,
+            model_path,
+            **kwargs,
         )
     elif model_type == "flexible":
         kwargs.pop("trust_remote_code", None)
         return create_flexible_model_instance(
-            subpool_addr, devices, model_uid, model_name, **kwargs
+            subpool_addr, devices, model_uid, model_name, model_path, **kwargs
         )
     else:
         raise ValueError(f"Unsupported model type: {model_type}.")

xinference/model/embedding/core.py CHANGED Viewed

@@ -118,12 +118,19 @@ def get_cache_status(
 class EmbeddingModel:
-    def __init__(self, model_uid: str, model_path: str, device: Optional[str] = None):
+    def __init__(
+        self,
+        model_uid: str,
+        model_path: str,
+        model_spec: EmbeddingModelSpec,
+        device: Optional[str] = None,
+    ):
         self._model_uid = model_uid
         self._model_path = model_path
         self._device = device
         self._model = None
         self._counter = 0
+        self._model_spec = model_spec
     def load(self):
         try:
@@ -134,12 +141,26 @@ class EmbeddingModel:
                 "Please make sure 'sentence-transformers' is installed. ",
                 "You can install it by `pip install sentence-transformers`\n",
             ]
             raise ImportError(f"{error_message}\n\n{''.join(installation_guide)}")
+        class XSentenceTransformer(SentenceTransformer):
+            def to(self, *args, **kwargs):
+                pass
         from ..utils import patch_trust_remote_code
         patch_trust_remote_code()
-        self._model = SentenceTransformer(self._model_path, device=self._device)
+        if (
+            "gte-Qwen2" in self._model_spec.model_id
+            or "gte-Qwen2" in self._model_spec.model_name
+        ):
+            self._model = XSentenceTransformer(
+                self._model_path,
+                device=self._device,
+                model_kwargs={"device_map": "auto"},
+            )
+        else:
+            self._model = SentenceTransformer(self._model_path, device=self._device)
     def create_embedding(self, sentences: Union[str, List[str]], **kwargs):
         self._counter += 1
@@ -156,6 +177,8 @@ class EmbeddingModel:
         def encode(
             model: SentenceTransformer,
             sentences: Union[str, List[str]],
+            prompt_name: Optional[str] = None,
+            prompt: Optional[str] = None,
             batch_size: int = 32,
             show_progress_bar: bool = None,
             output_value: str = "sentence_embedding",
@@ -204,10 +227,43 @@ class EmbeddingModel:
                 sentences = [sentences]
                 input_was_string = True
+            if prompt is None:
+                if prompt_name is not None:
+                    try:
+                        prompt = model.prompts[prompt_name]
+                    except KeyError:
+                        raise ValueError(
+                            f"Prompt name '{prompt_name}' not found in the configured prompts dictionary with keys {list(model.prompts.keys())!r}."
+                        )
+                elif model.default_prompt_name is not None:
+                    prompt = model.prompts.get(model.default_prompt_name, None)
+            else:
+                if prompt_name is not None:
+                    logger.warning(
+                        "Encode with either a `prompt`, a `prompt_name`, or neither, but not both. "
+                        "Ignoring the `prompt_name` in favor of `prompt`."
+                    )
+            extra_features = {}
+            if prompt is not None:
+                sentences = [prompt + sentence for sentence in sentences]
+                # Some models (e.g. INSTRUCTOR, GRIT) require removing the prompt before pooling
+                # Tracking the prompt length allow us to remove the prompt during pooling
+                tokenized_prompt = model.tokenize([prompt])
+                if "input_ids" in tokenized_prompt:
+                    extra_features["prompt_length"] = (
+                        tokenized_prompt["input_ids"].shape[-1] - 1
+                    )
             if device is None:
                 device = model._target_device
-            model.to(device)
+            if (
+                "gte-Qwen2" not in self._model_spec.model_id
+                and "gte-Qwen2" not in self._model_spec.model_name
+            ):
+                model.to(device)
             all_embeddings = []
             all_token_nums = 0
@@ -228,6 +284,7 @@ class EmbeddingModel:
                 ]
                 features = model.tokenize(sentences_batch)
                 features = batch_to_device(features, device)
+                features.update(extra_features)
                 all_token_nums += sum([len(f) for f in features])
                 with torch.no_grad():
@@ -272,7 +329,10 @@ class EmbeddingModel:
             ]
             if convert_to_tensor:
-                all_embeddings = torch.stack(all_embeddings)
+                if len(all_embeddings):
+                    all_embeddings = torch.stack(all_embeddings)
+                else:
+                    all_embeddings = torch.Tensor()
             elif convert_to_numpy:
                 all_embeddings = np.asarray([emb.numpy() for emb in all_embeddings])
@@ -281,12 +341,24 @@ class EmbeddingModel:
             return all_embeddings, all_token_nums
-        all_embeddings, all_token_nums = encode(
-            self._model,
-            sentences,
-            convert_to_numpy=False,
-            **kwargs,
-        )
+        if (
+            "gte-Qwen2" in self._model_spec.model_id
+            or "gte-Qwen2" in self._model_spec.model_name
+        ):
+            all_embeddings, all_token_nums = encode(
+                self._model,
+                sentences,
+                prompt_name="query",
+                convert_to_numpy=False,
+                **kwargs,
+            )
+        else:
+            all_embeddings, all_token_nums = encode(
+                self._model,
+                sentences,
+                convert_to_numpy=False,
+                **kwargs,
+            )
         if isinstance(sentences, str):
             all_embeddings = [all_embeddings]
         embedding_list = []
@@ -344,11 +416,14 @@ def create_embedding_model_instance(
     model_uid: str,
     model_name: str,
     download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
+    model_path: Optional[str] = None,
     **kwargs,
 ) -> Tuple[EmbeddingModel, EmbeddingModelDescription]:
     model_spec = match_embedding(model_name, download_hub)
-    model_path = cache(model_spec)
-    model = EmbeddingModel(model_uid, model_path, **kwargs)
+    if model_path is None:
+        model_path = cache(model_spec)
+    model = EmbeddingModel(model_uid, model_path, model_spec, **kwargs)
     model_description = EmbeddingModelDescription(
         subpool_addr, devices, model_spec, model_path=model_path
     )

xinference/model/embedding/model_spec.json CHANGED Viewed

@@ -230,5 +230,13 @@
     "language": ["zh", "en"],
     "model_id": "moka-ai/m3e-large",
     "model_revision": "12900375086c37ba5d83d1e417b21dc7d1d1f388"
+  },
+  {
+    "model_name": "gte-Qwen2",
+    "dimensions": 3584,
+    "max_tokens": 32000,
+    "language": ["zh", "en"],
+    "model_id": "Alibaba-NLP/gte-Qwen2-7B-instruct",
+    "model_revision": "e26182b2122f4435e8b3ebecbf363990f409b45b"
   }
 ]

xinference/model/embedding/model_spec_modelscope.json CHANGED Viewed

@@ -232,5 +232,13 @@
     "language": ["zh", "en"],
     "model_id": "AI-ModelScope/m3e-large",
     "model_hub": "modelscope"
+  },
+    {
+    "model_name": "gte-Qwen2",
+    "dimensions": 4096,
+    "max_tokens": 32000,
+    "language": ["zh", "en"],
+    "model_id": "iic/gte_Qwen2-7B-instruct",
+    "model_hub": "modelscope"
   }
 ]

xinference/model/flexible/core.py CHANGED Viewed

@@ -210,10 +210,16 @@ def match_flexible_model(model_name):
 def create_flexible_model_instance(
-    subpool_addr: str, devices: List[str], model_uid: str, model_name: str, **kwargs
+    subpool_addr: str,
+    devices: List[str],
+    model_uid: str,
+    model_name: str,
+    model_path: Optional[str] = None,
+    **kwargs,
 ) -> Tuple[FlexibleModel, FlexibleModelDescription]:
     model_spec = match_flexible_model(model_name)
-    model_path = model_spec.model_uri
+    if not model_path:
+        model_path = model_spec.model_uri
     launcher_name = model_spec.launcher
     launcher_args = model_spec.parser_args()
     kwargs.update(launcher_args)

xinference/model/image/core.py CHANGED Viewed

@@ -45,7 +45,7 @@ class ImageModelFamilyV1(CacheableModelSpec):
     model_id: str
     model_revision: str
     model_hub: str = "huggingface"
-    ability: Optional[str]
+    abilities: Optional[List[str]]
     controlnet: Optional[List["ImageModelFamilyV1"]]
@@ -72,7 +72,7 @@ class ImageModelDescription(ModelDescription):
             "model_name": self._model_spec.model_name,
             "model_family": self._model_spec.model_family,
             "model_revision": self._model_spec.model_revision,
-            "ability": self._model_spec.ability,
+            "abilities": self._model_spec.abilities,
             "controlnet": controlnet,
         }
@@ -189,6 +189,7 @@ def create_image_model_instance(
     model_name: str,
     peft_model_config: Optional[PeftModelConfig] = None,
     download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
+    model_path: Optional[str] = None,
     **kwargs,
 ) -> Tuple[DiffusionModel, ImageModelDescription]:
     model_spec = match_diffusion(model_name, download_hub)
@@ -209,7 +210,8 @@ def create_image_model_instance(
         for name in controlnet:
             for cn_model_spec in model_spec.controlnet:
                 if cn_model_spec.model_name == name:
-                    model_path = cache(cn_model_spec)
+                    if not model_path:
+                        model_path = cache(cn_model_spec)
                     controlnet_model_paths.append(model_path)
                     break
             else:
@@ -220,7 +222,8 @@ def create_image_model_instance(
             kwargs["controlnet"] = controlnet_model_paths[0]
         else:
             kwargs["controlnet"] = controlnet_model_paths
-    model_path = cache(model_spec)
+    if not model_path:
+        model_path = cache(model_spec)
     if peft_model_config is not None:
         lora_model = peft_model_config.peft_model
         lora_load_kwargs = peft_model_config.image_lora_load_kwargs
@@ -236,7 +239,7 @@ def create_image_model_instance(
         lora_model_paths=lora_model,
         lora_load_kwargs=lora_load_kwargs,
         lora_fuse_kwargs=lora_fuse_kwargs,
-        ability=model_spec.ability,
+        abilities=model_spec.abilities,
         **kwargs,
     )
     model_description = ImageModelDescription(

xinference/model/image/model_spec.json CHANGED Viewed

@@ -3,25 +3,39 @@
     "model_name": "sd3-medium",
     "model_family": "stable_diffusion",
     "model_id": "stabilityai/stable-diffusion-3-medium-diffusers",
-    "model_revision": "ea42f8cef0f178587cf766dc8129abd379c90671"
+    "model_revision": "ea42f8cef0f178587cf766dc8129abd379c90671",
+    "abilities": [
+      "text2iamge",
+      "image2image"
+    ]
   },
   {
     "model_name": "sd-turbo",
     "model_family": "stable_diffusion",
     "model_id": "stabilityai/sd-turbo",
-    "model_revision": "1681ed09e0cff58eeb41e878a49893228b78b94c"
+    "model_revision": "1681ed09e0cff58eeb41e878a49893228b78b94c",
+    "abilities": [
+      "text2iamge"
+    ]
   },
   {
     "model_name": "sdxl-turbo",
     "model_family": "stable_diffusion",
     "model_id": "stabilityai/sdxl-turbo",
-    "model_revision": "f4b0486b498f84668e828044de1d0c8ba486e05b"
+    "model_revision": "f4b0486b498f84668e828044de1d0c8ba486e05b",
+    "abilities": [
+      "text2iamge"
+    ]
   },
   {
     "model_name": "stable-diffusion-v1.5",
     "model_family": "stable_diffusion",
     "model_id": "runwayml/stable-diffusion-v1-5",
     "model_revision": "1d0c4ebf6ff58a5caecab40fa1406526bca4b5b9",
+    "abilities": [
+      "text2iamge",
+      "image2image"
+    ],
     "controlnet": [
       {
         "model_name":"canny",
@@ -72,6 +86,10 @@
     "model_family": "stable_diffusion",
     "model_id": "stabilityai/stable-diffusion-xl-base-1.0",
     "model_revision": "f898a3e026e802f68796b95e9702464bac78d76f",
+    "abilities": [
+      "text2iamge",
+      "image2image"
+    ],
     "controlnet": [
       {
         "model_name":"canny",
@@ -98,20 +116,26 @@
     "model_family": "stable_diffusion",
     "model_id": "runwayml/stable-diffusion-inpainting",
     "model_revision": "51388a731f57604945fddd703ecb5c50e8e7b49d",
-    "ability": "inpainting"
+    "abilities": [
+      "inpainting"
+    ]
   },
   {
     "model_name": "stable-diffusion-2-inpainting",
     "model_family": "stable_diffusion",
     "model_id": "stabilityai/stable-diffusion-2-inpainting",
     "model_revision": "81a84f49b15956b60b4272a405ad3daef3da4590",
-    "ability": "inpainting"
+    "abilities": [
+      "inpainting"
+    ]
   },
   {
     "model_name": "stable-diffusion-xl-inpainting",
     "model_family": "stable_diffusion",
     "model_id": "diffusers/stable-diffusion-xl-1.0-inpainting-0.1",
     "model_revision": "115134f363124c53c7d878647567d04daf26e41e",
-    "ability": "inpainting"
+    "abilities": [
+      "inpainting"
+    ]
   }
 ]

xinference 0.13.3__py3-none-any.whl → 0.13.4__py3-none-any.whl

Potentially problematic release.

xinference 0.13.3py3-none-any.whl → 0.13.4py3-none-any.whl