PyPI - xinference - Versions diffs - 0.8.1__py3-none-any.whl → 0.8.3__py3-none-any.whl - Mend

xinference 0.8.1py3-none-any.whl → 0.8.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (95) hide show

xinference/model/audio/whisper.py ADDED Viewed

@@ -0,0 +1,132 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+from typing import TYPE_CHECKING, Dict, Optional
+if TYPE_CHECKING:
+    from .core import AudioModelFamilyV1
+logger = logging.getLogger(__name__)
+class WhisperModel:
+    def __init__(
+        self,
+        model_uid: str,
+        model_path: str,
+        model_spec: "AudioModelFamilyV1",
+        device: Optional[str] = None,
+        **kwargs,
+    ):
+        self._model_uid = model_uid
+        self._model_path = model_path
+        self._model_spec = model_spec
+        self._device = device
+        self._model = None
+        self._kwargs = kwargs
+    def load(self):
+        import torch
+        from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
+        device = "cuda:0" if torch.cuda.is_available() else "cpu"
+        torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
+        model = AutoModelForSpeechSeq2Seq.from_pretrained(
+            self._model_path,
+            torch_dtype=torch_dtype,
+            low_cpu_mem_usage=True,
+            use_safetensors=True,
+        )
+        model.to(device)
+        processor = AutoProcessor.from_pretrained(self._model_path)
+        self._model = pipeline(
+            "automatic-speech-recognition",
+            model=model,
+            tokenizer=processor.tokenizer,
+            feature_extractor=processor.feature_extractor,
+            max_new_tokens=128,
+            chunk_length_s=30,
+            batch_size=16,
+            return_timestamps=False,
+            torch_dtype=torch_dtype,
+            device=device,
+        )
+    def _call_model(
+        self,
+        audio: bytes,
+        generate_kwargs: Dict,
+        response_format: str,
+    ):
+        if response_format == "json":
+            logger.debug("Call whisper model with generate_kwargs: %s", generate_kwargs)
+            assert callable(self._model)
+            result = self._model(audio, generate_kwargs=generate_kwargs)
+            return {"text": result["text"]}
+        else:
+            raise ValueError(f"Unsupported response format: {response_format}")
+    def transcriptions(
+        self,
+        audio: bytes,
+        language: Optional[str] = None,
+        prompt: Optional[str] = None,
+        response_format: str = "json",
+        temperature: float = 0,
+    ):
+        if temperature != 0:
+            logger.warning(
+                "Temperature for whisper transcriptions will be ignored: %s.",
+                temperature,
+            )
+        if prompt is not None:
+            logger.warning(
+                "Prompt for whisper transcriptions will be ignored: %s", prompt
+            )
+        return self._call_model(
+            audio=audio,
+            generate_kwargs={"language": language, "task": "transcribe"}
+            if language is not None
+            else {"task": "transcribe"},
+            response_format=response_format,
+        )
+    def translations(
+        self,
+        audio: bytes,
+        prompt: Optional[str] = None,
+        response_format: str = "json",
+        temperature: float = 0,
+    ):
+        if not self._model_spec.multilingual:
+            raise RuntimeError(
+                f"Model {self._model_spec.model_name} is not suitable for translations."
+            )
+        if temperature != 0:
+            logger.warning(
+                "Temperature for whisper transcriptions will be ignored: %s.",
+                temperature,
+            )
+        if prompt is not None:
+            logger.warning(
+                "Prompt for whisper transcriptions will be ignored: %s", prompt
+            )
+        return self._call_model(
+            audio=audio,
+            generate_kwargs={"task": "translate"},
+            response_format=response_format,
+        )

xinference/model/core.py CHANGED Viewed

@@ -12,14 +12,20 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from abc import ABC
+from abc import ABC, abstractmethod
 from typing import Any, List, Optional, Tuple
 class ModelDescription(ABC):
-    def __init__(self, address: Optional[str], devices: Optional[List[str]]):
+    def __init__(
+        self,
+        address: Optional[str],
+        devices: Optional[List[str]],
+        model_path: Optional[str] = None,
+    ):
         self.address = address
         self.devices = devices
+        self._model_path = model_path
     def to_dict(self):
         """
@@ -28,6 +34,12 @@ class ModelDescription(ABC):
         """
         raise NotImplementedError
+    @abstractmethod
+    def to_version_info(self):
+        """
+        Return a dict to describe version info about a model instance
+        """
 def create_model_instance(
     subpool_addr: str,
@@ -41,10 +53,10 @@ def create_model_instance(
     is_local_deployment: bool = False,
     **kwargs,
 ) -> Tuple[Any, ModelDescription]:
+    from .audio.core import create_audio_model_instance
     from .embedding.core import create_embedding_model_instance
     from .image.core import create_image_model_instance
     from .llm.core import create_llm_model_instance
-    from .multimodal.core import create_multimodal_model_instance
     from .rerank.core import create_rerank_model_instance
     if model_type == "LLM":
@@ -75,17 +87,10 @@ def create_model_instance(
         return create_rerank_model_instance(
             subpool_addr, devices, model_uid, model_name, **kwargs
         )
-    elif model_type == "multimodal":
+    elif model_type == "audio":
         kwargs.pop("trust_remote_code", None)
-        return create_multimodal_model_instance(
-            subpool_addr,
-            devices,
-            model_uid,
-            model_name,
-            model_format,
-            model_size_in_billions,
-            quantization,
-            **kwargs,
+        return create_audio_model_instance(
+            subpool_addr, devices, model_uid, model_name, **kwargs
         )
     else:
         raise ValueError(f"Unsupported model type: {model_type}.")

xinference/model/embedding/__init__.py CHANGED Viewed

@@ -16,8 +16,20 @@ import codecs
 import json
 import os
-from .core import MODEL_NAME_TO_REVISION, EmbeddingModelSpec, get_cache_status
-from .custom import CustomEmbeddingModelSpec, register_embedding, unregister_embedding
+from .core import (
+    EMBEDDING_MODEL_DESCRIPTIONS,
+    MODEL_NAME_TO_REVISION,
+    EmbeddingModelSpec,
+    generate_embedding_description,
+    get_cache_status,
+    get_embedding_model_descriptions,
+)
+from .custom import (
+    CustomEmbeddingModelSpec,
+    get_user_defined_embeddings,
+    register_embedding,
+    unregister_embedding,
+)
 _model_spec_json = os.path.join(os.path.dirname(__file__), "model_spec.json")
 _model_spec_modelscope_json = os.path.join(
@@ -29,6 +41,7 @@ BUILTIN_EMBEDDING_MODELS = dict(
 )
 for model_name, model_spec in BUILTIN_EMBEDDING_MODELS.items():
     MODEL_NAME_TO_REVISION[model_name].append(model_spec.model_revision)
 MODELSCOPE_EMBEDDING_MODELS = dict(
     (spec["model_name"], EmbeddingModelSpec(**spec))
     for spec in json.load(
@@ -38,6 +51,14 @@ MODELSCOPE_EMBEDDING_MODELS = dict(
 for model_name, model_spec in MODELSCOPE_EMBEDDING_MODELS.items():
     MODEL_NAME_TO_REVISION[model_name].append(model_spec.model_revision)
+# register model description after recording model revision
+for model_spec_info in [BUILTIN_EMBEDDING_MODELS, MODELSCOPE_EMBEDDING_MODELS]:
+    for model_name, model_spec in model_spec_info.items():
+        if model_spec.model_name not in EMBEDDING_MODEL_DESCRIPTIONS:
+            EMBEDDING_MODEL_DESCRIPTIONS.update(
+                generate_embedding_description(model_spec)
+            )
 from ...constants import XINFERENCE_MODEL_DIR
 user_defined_llm_dir = os.path.join(XINFERENCE_MODEL_DIR, "embedding")
@@ -47,5 +68,9 @@ if os.path.isdir(user_defined_llm_dir):
             user_defined_llm_family = CustomEmbeddingModelSpec.parse_obj(json.load(fd))
             register_embedding(user_defined_llm_family, persist=False)
+# register model description
+for ud_embedding in get_user_defined_embeddings():
+    EMBEDDING_MODEL_DESCRIPTIONS.update(generate_embedding_description(ud_embedding))
 del _model_spec_json
 del _model_spec_modelscope_json

xinference/model/embedding/core.py CHANGED Viewed

@@ -24,7 +24,7 @@ from pydantic import BaseModel
 from ...constants import XINFERENCE_CACHE_DIR
 from ...types import Embedding, EmbeddingData, EmbeddingUsage
 from ..core import ModelDescription
-from ..utils import is_model_cached, valid_model_revision
+from ..utils import get_cache_dir, is_model_cached, valid_model_revision
 logger = logging.getLogger(__name__)
@@ -34,6 +34,15 @@ SUPPORTED_SCHEMES = ["s3"]
 MODEL_NAME_TO_REVISION: Dict[str, List[str]] = defaultdict(list)
+EMBEDDING_MODEL_DESCRIPTIONS: Dict[str, List[Dict]] = defaultdict(list)
+def get_embedding_model_descriptions():
+    import copy
+    return copy.deepcopy(EMBEDDING_MODEL_DESCRIPTIONS)
 class EmbeddingModelSpec(BaseModel):
     model_name: str
     dimensions: int
@@ -50,8 +59,9 @@ class EmbeddingModelDescription(ModelDescription):
         address: Optional[str],
         devices: Optional[List[str]],
         model_spec: EmbeddingModelSpec,
+        model_path: Optional[str] = None,
     ):
-        super().__init__(address, devices)
+        super().__init__(address, devices, model_path=model_path)
         self._model_spec = model_spec
     def to_dict(self):
@@ -66,6 +76,34 @@ class EmbeddingModelDescription(ModelDescription):
             "model_revision": self._model_spec.model_revision,
         }
+    def to_version_info(self):
+        from .utils import get_model_version
+        if self._model_path is None:
+            is_cached = get_cache_status(self._model_spec)
+            file_location = get_cache_dir(self._model_spec)
+        else:
+            is_cached = True
+            file_location = self._model_path
+        return {
+            "model_version": get_model_version(self._model_spec),
+            "model_file_location": file_location,
+            "cache_status": is_cached,
+            "dimensions": self._model_spec.dimensions,
+            "max_tokens": self._model_spec.max_tokens,
+        }
+def generate_embedding_description(
+    model_spec: EmbeddingModelSpec,
+) -> Dict[str, List[Dict]]:
+    res = defaultdict(list)
+    res[model_spec.model_name].append(
+        EmbeddingModelDescription(None, None, model_spec).to_version_info()
+    )
+    return res
 def cache_from_uri(
     model_spec: EmbeddingModelSpec,
@@ -421,5 +459,7 @@ def create_embedding_model_instance(
     model_spec = match_embedding(model_name)
     model_path = cache(model_spec)
     model = EmbeddingModel(model_uid, model_path, **kwargs)
-    model_description = EmbeddingModelDescription(subpool_addr, devices, model_spec)
+    model_description = EmbeddingModelDescription(
+        subpool_addr, devices, model_spec, model_path=model_path
+    )
     return model, model_description

xinference/model/embedding/model_spec.json CHANGED Viewed

@@ -143,6 +143,14 @@
     "model_id": "jinaai/jina-embeddings-v2-base-en",
     "model_revision": "7302ac470bed880590f9344bfeee32ff8722d0e5"
   },
+  {
+    "model_name": "jina-embeddings-v2-base-zh",
+    "dimensions": 768,
+    "max_tokens": 8192,
+    "language": ["zh", "en"],
+    "model_id": "jinaai/jina-embeddings-v2-base-zh",
+    "model_revision": "67974cbef5cf50562eadd745de8afc661c52c96f"
+  },
   {
     "model_name": "text2vec-large-chinese",
     "dimensions": 1024,
@@ -182,5 +190,21 @@
     "language": ["zh"],
     "model_id": "shibing624/text2vec-base-multilingual",
     "model_revision": "f241877385fa56ebcc75f04d1850e1579cfa661d"
+  },
+  {
+    "model_name": "bge-m3",
+    "dimensions": 1024,
+    "max_tokens": 8192,
+    "language": ["zh", "en"],
+    "model_id": "BAAI/bge-m3",
+    "model_revision": "73a15ad29ab604f3bdc31601849a9defe86d563f"
+  },
+  {
+    "model_name": "bce-embedding-base_v1",
+    "dimensions": 768,
+    "max_tokens": 512,
+    "language": ["zh", "en"],
+    "model_id": "maidalun1020/bce-embedding-base_v1",
+    "model_revision": "236d9024fc1b4046f03848723f934521a66a9323"
   }
 ]

xinference/model/embedding/model_spec_modelscope.json CHANGED Viewed

@@ -161,6 +161,14 @@
     "model_revision": "v0.0.1",
     "model_hub": "modelscope"
   },
+  {
+    "model_name": "jina-embeddings-v2-base-zh",
+    "dimensions": 768,
+    "max_tokens": 8192,
+    "language": ["zh", "en"],
+    "model_id": "jinaai/jina-embeddings-v2-base-zh",
+    "model_hub": "modelscope"
+  },
   {
     "model_name": "text2vec-large-chinese",
     "dimensions": 1024,
@@ -184,5 +192,21 @@
     "language": ["zh"],
     "model_id": "mwei23/text2vec-base-chinese-paraphrase",
     "model_hub": "modelscope"
+  },
+  {
+    "model_name": "bge-m3",
+    "dimensions": 1024,
+    "max_tokens": 8192,
+    "language": ["zh", "en"],
+    "model_id": "Xorbits/bge-m3",
+    "model_hub": "modelscope"
+  },
+  {
+    "model_name": "bce-embedding-base_v1",
+    "dimensions": 768,
+    "max_tokens": 512,
+    "language": ["zh", "en"],
+    "model_id": "maidalun/bce-embedding-base_v1",
+    "model_hub": "modelscope"
   }
 ]

xinference/model/embedding/utils.py ADDED Viewed

@@ -0,0 +1,18 @@
+# Copyright 2022-2024 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .core import EmbeddingModelSpec
+def get_model_version(embedding_model: EmbeddingModelSpec) -> str:
+    return f"{embedding_model.model_name}--{embedding_model.max_tokens}--{embedding_model.dimensions}"

xinference/model/image/__init__.py CHANGED Viewed

@@ -16,11 +16,22 @@ import codecs
 import json
 import os
-from .core import ImageModelFamilyV1, get_cache_status
+from .core import (
+    IMAGE_MODEL_DESCRIPTIONS,
+    ImageModelFamilyV1,
+    generate_image_description,
+    get_cache_status,
+    get_image_model_descriptions,
+)
 _model_spec_json = os.path.join(os.path.dirname(__file__), "model_spec.json")
 BUILTIN_IMAGE_MODELS = dict(
     (spec["model_name"], ImageModelFamilyV1(**spec))
     for spec in json.load(codecs.open(_model_spec_json, "r", encoding="utf-8"))
 )
+# register model description
+for model_name, model_spec in BUILTIN_IMAGE_MODELS.items():
+    IMAGE_MODEL_DESCRIPTIONS.update(generate_image_description(model_spec))
 del _model_spec_json

xinference/model/image/core.py CHANGED Viewed

@@ -14,7 +14,8 @@
 import collections.abc
 import logging
 import os
-from typing import List, Optional, Tuple
+from collections import defaultdict
+from typing import Dict, List, Optional, Tuple
 from pydantic import BaseModel
@@ -27,6 +28,14 @@ MAX_ATTEMPTS = 3
 logger = logging.getLogger(__name__)
+IMAGE_MODEL_DESCRIPTIONS: Dict[str, List[Dict]] = defaultdict(list)
+def get_image_model_descriptions():
+    import copy
+    return copy.deepcopy(IMAGE_MODEL_DESCRIPTIONS)
 class ImageModelFamilyV1(BaseModel):
     model_family: str
@@ -42,8 +51,9 @@ class ImageModelDescription(ModelDescription):
         address: Optional[str],
         devices: Optional[List[str]],
         model_spec: ImageModelFamilyV1,
+        model_path: Optional[str] = None,
     ):
-        super().__init__(address, devices)
+        super().__init__(address, devices, model_path=model_path)
         self._model_spec = model_spec
     def to_dict(self):
@@ -57,6 +67,48 @@ class ImageModelDescription(ModelDescription):
             "controlnet": self._model_spec.controlnet,
         }
+    def to_version_info(self):
+        from .utils import get_model_version
+        if self._model_path is None:
+            is_cached = get_cache_status(self._model_spec)
+            file_location = get_cache_dir(self._model_spec)
+        else:
+            is_cached = True
+            file_location = self._model_path
+        if self._model_spec.controlnet is None:
+            return [
+                {
+                    "model_version": get_model_version(self._model_spec, None),
+                    "model_file_location": file_location,
+                    "cache_status": is_cached,
+                    "controlnet": "zoe-depth",
+                }
+            ]
+        else:
+            res = []
+            for cn in self._model_spec.controlnet:
+                res.append(
+                    {
+                        "model_version": get_model_version(self._model_spec, cn),
+                        "model_file_location": file_location,
+                        "cache_status": is_cached,
+                        "controlnet": cn.model_name,
+                    }
+                )
+            return res
+def generate_image_description(
+    image_model: ImageModelFamilyV1,
+) -> Dict[str, List[Dict]]:
+    res = defaultdict(list)
+    res[image_model.model_name].extend(
+        ImageModelDescription(None, None, image_model).to_version_info()
+    )
+    return res
 def match_diffusion(model_name: str) -> ImageModelFamilyV1:
     from . import BUILTIN_IMAGE_MODELS
@@ -74,9 +126,7 @@ def cache(model_spec: ImageModelFamilyV1):
     # TODO: cache from uri
     import huggingface_hub
-    cache_dir = os.path.realpath(
-        os.path.join(XINFERENCE_CACHE_DIR, model_spec.model_name)
-    )
+    cache_dir = get_cache_dir(model_spec)
     if not os.path.exists(cache_dir):
         os.makedirs(cache_dir, exist_ok=True)
@@ -113,12 +163,14 @@ def cache(model_spec: ImageModelFamilyV1):
     return cache_dir
+def get_cache_dir(model_spec: ImageModelFamilyV1):
+    return os.path.realpath(os.path.join(XINFERENCE_CACHE_DIR, model_spec.model_name))
 def get_cache_status(
     model_spec: ImageModelFamilyV1,
 ) -> bool:
-    cache_dir = os.path.realpath(
-        os.path.join(XINFERENCE_CACHE_DIR, model_spec.model_name)
-    )
+    cache_dir = get_cache_dir(model_spec)
     meta_path = os.path.join(cache_dir, "__valid_download")
     return valid_model_revision(meta_path, model_spec.model_revision)
@@ -157,5 +209,7 @@ def create_image_model_instance(
             kwargs["controlnet"] = controlnet_model_paths
     model_path = cache(model_spec)
     model = DiffusionModel(model_uid, model_path, **kwargs)
-    model_description = ImageModelDescription(subpool_addr, devices, model_spec)
+    model_description = ImageModelDescription(
+        subpool_addr, devices, model_spec, model_path=model_path
+    )
     return model, model_description

xinference/model/image/utils.py ADDED Viewed

@@ -0,0 +1,26 @@
+# Copyright 2022-2024 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Optional
+from .core import ImageModelFamilyV1
+def get_model_version(
+    image_model: ImageModelFamilyV1, controlnet: Optional[ImageModelFamilyV1]
+) -> str:
+    return (
+        image_model.model_name
+        if controlnet is None
+        else f"{image_model.model_name}--{controlnet.model_name}"
+    )

xinference/model/llm/__init__.py CHANGED Viewed

@@ -16,7 +16,13 @@ import codecs
 import json
 import os
-from .core import LLM
+from .core import (
+    LLM,
+    LLM_MODEL_DESCRIPTIONS,
+    LLMDescription,
+    generate_llm_description,
+    get_llm_model_descriptions,
+)
 from .llm_family import (
     BUILTIN_LLM_FAMILIES,
     BUILTIN_LLM_MODEL_CHAT_FAMILIES,
@@ -50,7 +56,9 @@ def _install():
     from .pytorch.falcon import FalconPytorchChatModel, FalconPytorchModel
     from .pytorch.internlm2 import Internlm2PytorchChatModel
     from .pytorch.llama_2 import LlamaPytorchChatModel, LlamaPytorchModel
+    from .pytorch.qwen_vl import QwenVLChatModel
     from .pytorch.vicuna import VicunaPytorchChatModel
+    from .pytorch.yi_vl import YiVLChatModel
     from .vllm.core import VLLMChatModel, VLLMModel
     # register llm classes.
@@ -82,6 +90,8 @@ def _install():
             PytorchChatModel,
             FalconPytorchModel,
             Internlm2PytorchChatModel,
+            QwenVLChatModel,
+            YiVLChatModel,
             PytorchModel,
         ]
     )
@@ -131,6 +141,11 @@ def _install():
         if "tool_call" in model_spec.model_ability:
             BUILTIN_LLM_MODEL_TOOL_CALL_FAMILIES.add(model_spec.model_name)
+    for llm_specs in [BUILTIN_LLM_FAMILIES, BUILTIN_MODELSCOPE_LLM_FAMILIES]:
+        for llm_spec in llm_specs:
+            if llm_spec.model_name not in LLM_MODEL_DESCRIPTIONS:
+                LLM_MODEL_DESCRIPTIONS.update(generate_llm_description(llm_spec))
     from ...constants import XINFERENCE_MODEL_DIR
     user_defined_llm_dir = os.path.join(XINFERENCE_MODEL_DIR, "llm")
@@ -141,3 +156,7 @@ def _install():
             ) as fd:
                 user_defined_llm_family = CustomLLMFamilyV1.parse_obj(json.load(fd))
                 register_llm(user_defined_llm_family, persist=False)
+    # register model description
+    for ud_llm in get_user_defined_llm_families():
+        LLM_MODEL_DESCRIPTIONS.update(generate_llm_description(ud_llm))

xinference 0.8.1__py3-none-any.whl → 0.8.3__py3-none-any.whl

Potentially problematic release.

xinference 0.8.1py3-none-any.whl → 0.8.3py3-none-any.whl