PyPI - xinference - Versions diffs - 0.10.1__py3-none-any.whl → 0.10.2.post1__py3-none-any.whl - Mend

xinference 0.10.1py3-none-any.whl → 0.10.2.post1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (55) hide show

xinference/model/audio/custom.py ADDED Viewed

@@ -0,0 +1,148 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+import os
+from threading import Lock
+from typing import Any, List, Optional
+from ..._compat import (
+    ROOT_KEY,
+    ErrorWrapper,
+    Protocol,
+    StrBytes,
+    ValidationError,
+    load_str_bytes,
+)
+from ...constants import XINFERENCE_CACHE_DIR, XINFERENCE_MODEL_DIR
+from .core import AudioModelFamilyV1
+logger = logging.getLogger(__name__)
+UD_AUDIO_LOCK = Lock()
+class CustomAudioModelFamilyV1(AudioModelFamilyV1):
+    model_id: Optional[str]  # type: ignore
+    model_revision: Optional[str]  # type: ignore
+    model_uri: Optional[str]
+    @classmethod
+    def parse_raw(
+        cls: Any,
+        b: StrBytes,
+        *,
+        content_type: Optional[str] = None,
+        encoding: str = "utf8",
+        proto: Protocol = None,
+        allow_pickle: bool = False,
+    ) -> AudioModelFamilyV1:
+        # See source code of BaseModel.parse_raw
+        try:
+            obj = load_str_bytes(
+                b,
+                proto=proto,
+                content_type=content_type,
+                encoding=encoding,
+                allow_pickle=allow_pickle,
+                json_loads=cls.__config__.json_loads,
+            )
+        except (ValueError, TypeError, UnicodeDecodeError) as e:
+            raise ValidationError([ErrorWrapper(e, loc=ROOT_KEY)], cls)
+        audio_spec: AudioModelFamilyV1 = cls.parse_obj(obj)
+        # check model_family
+        if audio_spec.model_family is None:
+            raise ValueError(
+                f"You must specify `model_family` when registering custom Audio models."
+            )
+        assert isinstance(audio_spec.model_family, str)
+        return audio_spec
+UD_AUDIOS: List[CustomAudioModelFamilyV1] = []
+def get_user_defined_audios() -> List[CustomAudioModelFamilyV1]:
+    with UD_AUDIO_LOCK:
+        return UD_AUDIOS.copy()
+def register_audio(model_spec: CustomAudioModelFamilyV1, persist: bool):
+    from ...constants import XINFERENCE_MODEL_DIR
+    from ..utils import is_valid_model_name, is_valid_model_uri
+    from . import BUILTIN_AUDIO_MODELS
+    if not is_valid_model_name(model_spec.model_name):
+        raise ValueError(f"Invalid model name {model_spec.model_name}.")
+    with UD_AUDIO_LOCK:
+        for model_name in list(BUILTIN_AUDIO_MODELS.keys()) + [
+            spec.model_name for spec in UD_AUDIOS
+        ]:
+            if model_spec.model_name == model_name:
+                raise ValueError(
+                    f"Model name conflicts with existing model {model_spec.model_name}"
+                )
+        UD_AUDIOS.append(model_spec)
+    if persist:
+        # We only validate model URL when persist is True.
+        model_uri = model_spec.model_uri
+        if model_uri and not is_valid_model_uri(model_uri):
+            raise ValueError(f"Invalid model URI {model_uri}.")
+        persist_path = os.path.join(
+            XINFERENCE_MODEL_DIR, "audio", f"{model_spec.model_name}.json"
+        )
+        os.makedirs(os.path.dirname(persist_path), exist_ok=True)
+        with open(persist_path, mode="w") as fd:
+            fd.write(model_spec.json())
+def unregister_audio(model_name: str, raise_error: bool = True):
+    with UD_AUDIO_LOCK:
+        model_spec = None
+        for i, f in enumerate(UD_AUDIOS):
+            if f.model_name == model_name:
+                model_spec = f
+                break
+        if model_spec:
+            UD_AUDIOS.remove(model_spec)
+            persist_path = os.path.join(
+                XINFERENCE_MODEL_DIR, "audio", f"{model_spec.model_name}.json"
+            )
+            if os.path.exists(persist_path):
+                os.remove(persist_path)
+            cache_dir = os.path.join(XINFERENCE_CACHE_DIR, model_spec.model_name)
+            if os.path.exists(cache_dir):
+                logger.warning(
+                    f"Remove the cache of user-defined model {model_spec.model_name}. "
+                    f"Cache directory: {cache_dir}"
+                )
+                if os.path.isdir(cache_dir):
+                    os.rmdir(cache_dir)
+                else:
+                    logger.warning(
+                        f"Cache directory is not a soft link, please remove it manually."
+                    )
+        else:
+            if raise_error:
+                raise ValueError(f"Model {model_name} not found")
+            else:
+                logger.warning(f"Custom audio model {model_name} not found")

xinference/model/core.py CHANGED Viewed

@@ -13,9 +13,10 @@
 # limitations under the License.
 from abc import ABC, abstractmethod
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Any, List, Optional, Tuple, Union
 from .._compat import BaseModel
+from ..types import PeftModelConfig
 class ModelDescription(ABC):
@@ -50,11 +51,9 @@ def create_model_instance(
     model_type: str,
     model_name: str,
     model_format: Optional[str] = None,
-    model_size_in_billions: Optional[int] = None,
+    model_size_in_billions: Optional[Union[int, str]] = None,
     quantization: Optional[str] = None,
-    peft_model_path: Optional[str] = None,
-    image_lora_load_kwargs: Optional[Dict] = None,
-    image_lora_fuse_kwargs: Optional[Dict] = None,
+    peft_model_config: Optional[PeftModelConfig] = None,
     is_local_deployment: bool = False,
     **kwargs,
 ) -> Tuple[Any, ModelDescription]:
@@ -73,7 +72,7 @@ def create_model_instance(
             model_format,
             model_size_in_billions,
             quantization,
-            peft_model_path,
+            peft_model_config,
             is_local_deployment,
             **kwargs,
         )
@@ -90,9 +89,7 @@ def create_model_instance(
             devices,
             model_uid,
             model_name,
-            lora_model_path=peft_model_path,
-            lora_load_kwargs=image_lora_load_kwargs,
-            lora_fuse_kwargs=image_lora_fuse_kwargs,
+            peft_model_config,
             **kwargs,
         )
     elif model_type == "rerank":

xinference/model/embedding/model_spec.json CHANGED Viewed

@@ -206,5 +206,29 @@
     "language": ["zh", "en"],
     "model_id": "maidalun1020/bce-embedding-base_v1",
     "model_revision": "236d9024fc1b4046f03848723f934521a66a9323"
+  },
+  {
+    "model_name": "m3e-small",
+    "dimensions": 512,
+    "max_tokens": 512,
+    "language": ["zh", "en"],
+    "model_id": "moka-ai/m3e-small",
+    "model_revision": "44c696631b2a8c200220aaaad5f987f096e986df"
+  },
+  {
+    "model_name": "m3e-base",
+    "dimensions": 768,
+    "max_tokens": 512,
+    "language": ["zh", "en"],
+    "model_id": "moka-ai/m3e-base",
+    "model_revision": "764b537a0e50e5c7d64db883f2d2e051cbe3c64c"
+  },
+  {
+    "model_name": "m3e-large",
+    "dimensions": 1024,
+    "max_tokens": 512,
+    "language": ["zh", "en"],
+    "model_id": "moka-ai/m3e-large",
+    "model_revision": "12900375086c37ba5d83d1e417b21dc7d1d1f388"
   }
 ]

xinference/model/embedding/model_spec_modelscope.json CHANGED Viewed

@@ -208,5 +208,29 @@
     "language": ["zh", "en"],
     "model_id": "maidalun/bce-embedding-base_v1",
     "model_hub": "modelscope"
+  },
+  {
+    "model_name": "m3e-small",
+    "dimensions": 512,
+    "max_tokens": 512,
+    "language": ["zh", "en"],
+    "model_id": "AI-ModelScope/m3e-small",
+    "model_hub": "modelscope"
+  },
+  {
+    "model_name": "m3e-base",
+    "dimensions": 768,
+    "max_tokens": 512,
+    "language": ["zh", "en"],
+    "model_id": "AI-ModelScope/m3e-base",
+    "model_hub": "modelscope"
+  },
+  {
+    "model_name": "m3e-large",
+    "dimensions": 1024,
+    "max_tokens": 512,
+    "language": ["zh", "en"],
+    "model_id": "AI-ModelScope/m3e-large",
+    "model_hub": "modelscope"
   }
 ]

xinference/model/image/core.py CHANGED Viewed

@@ -18,6 +18,7 @@ from collections import defaultdict
 from typing import Dict, List, Optional, Tuple
 from ...constants import XINFERENCE_CACHE_DIR
+from ...types import PeftModelConfig
 from ..core import CacheableModelSpec, ModelDescription
 from ..utils import valid_model_revision
 from .stable_diffusion.core import DiffusionModel
@@ -175,9 +176,7 @@ def create_image_model_instance(
     devices: List[str],
     model_uid: str,
     model_name: str,
-    lora_model_path: Optional[str] = None,
-    lora_load_kwargs: Optional[Dict] = None,
-    lora_fuse_kwargs: Optional[Dict] = None,
+    peft_model_config: Optional[PeftModelConfig] = None,
     **kwargs,
 ) -> Tuple[DiffusionModel, ImageModelDescription]:
     model_spec = match_diffusion(model_name)
@@ -210,10 +209,19 @@ def create_image_model_instance(
         else:
             kwargs["controlnet"] = controlnet_model_paths
     model_path = cache(model_spec)
+    if peft_model_config is not None:
+        lora_model = peft_model_config.peft_model
+        lora_load_kwargs = peft_model_config.image_lora_load_kwargs
+        lora_fuse_kwargs = peft_model_config.image_lora_fuse_kwargs
+    else:
+        lora_model = None
+        lora_load_kwargs = None
+        lora_fuse_kwargs = None
     model = DiffusionModel(
         model_uid,
         model_path,
-        lora_model_path=lora_model_path,
+        lora_model_paths=lora_model,
         lora_load_kwargs=lora_load_kwargs,
         lora_fuse_kwargs=lora_fuse_kwargs,
         **kwargs,

xinference/model/image/stable_diffusion/core.py CHANGED Viewed

@@ -25,7 +25,7 @@ from typing import Dict, List, Optional, Union
 from ....constants import XINFERENCE_IMAGE_DIR
 from ....device_utils import move_model_to_available_device
-from ....types import Image, ImageList
+from ....types import Image, ImageList, LoRA
 logger = logging.getLogger(__name__)
@@ -36,7 +36,7 @@ class DiffusionModel:
         model_uid: str,
         model_path: str,
         device: Optional[str] = None,
-        lora_model_path: Optional[str] = None,
+        lora_model: Optional[List[LoRA]] = None,
         lora_load_kwargs: Optional[Dict] = None,
         lora_fuse_kwargs: Optional[Dict] = None,
         **kwargs,
@@ -45,20 +45,21 @@ class DiffusionModel:
         self._model_path = model_path
         self._device = device
         self._model = None
-        self._lora_model_path = lora_model_path
+        self._lora_model = lora_model
         self._lora_load_kwargs = lora_load_kwargs or {}
         self._lora_fuse_kwargs = lora_fuse_kwargs or {}
         self._kwargs = kwargs
     def _apply_lora(self):
-        if self._lora_model_path is not None:
+        if self._lora_model is not None:
             logger.info(
                 f"Loading the LoRA with load kwargs: {self._lora_load_kwargs}, fuse kwargs: {self._lora_fuse_kwargs}."
             )
             assert self._model is not None
-            self._model.load_lora_weights(
-                self._lora_model_path, **self._lora_load_kwargs
-            )
+            for lora_model in self._lora_model:
+                self._model.load_lora_weights(
+                    lora_model.local_path, **self._lora_load_kwargs
+                )
             self._model.fuse_lora(**self._lora_fuse_kwargs)
             logger.info(f"Successfully loaded the LoRA for model {self._model_uid}.")

xinference/model/llm/core.py CHANGED Viewed

@@ -21,6 +21,7 @@ from collections import defaultdict
 from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union
 from ...core.utils import parse_replica_model_uid
+from ...types import PeftModelConfig
 from ..core import ModelDescription
 if TYPE_CHECKING:
@@ -178,9 +179,9 @@ def create_llm_model_instance(
     model_uid: str,
     model_name: str,
     model_format: Optional[str] = None,
-    model_size_in_billions: Optional[int] = None,
+    model_size_in_billions: Optional[Union[int, str]] = None,
     quantization: Optional[str] = None,
-    peft_model_path: Optional[str] = None,
+    peft_model_config: Optional[PeftModelConfig] = None,
     is_local_deployment: bool = False,
     **kwargs,
 ) -> Tuple[LLM, LLMDescription]:
@@ -204,9 +205,9 @@ def create_llm_model_instance(
     assert quantization is not None
     save_path = cache(llm_family, llm_spec, quantization)
-    llm_cls = match_llm_cls(
-        llm_family, llm_spec, quantization, peft_model_path=peft_model_path
-    )
+    peft_model = peft_model_config.peft_model if peft_model_config else None
+    llm_cls = match_llm_cls(llm_family, llm_spec, quantization, peft_model=peft_model)
     if not llm_cls:
         raise ValueError(
             f"Model not supported, name: {model_name}, format: {model_format},"
@@ -214,15 +215,9 @@ def create_llm_model_instance(
         )
     logger.debug(f"Launching {model_uid} with {llm_cls.__name__}")
-    if peft_model_path is not None:
+    if peft_model is not None:
         model = llm_cls(
-            model_uid,
-            llm_family,
-            llm_spec,
-            quantization,
-            save_path,
-            kwargs,
-            peft_model_path,
+            model_uid, llm_family, llm_spec, quantization, save_path, kwargs, peft_model
         )
     else:
         model = llm_cls(
@@ -238,7 +233,7 @@ def create_speculative_llm_model_instance(
     devices: List[str],
     model_uid: str,
     model_name: str,
-    model_size_in_billions: Optional[int],
+    model_size_in_billions: Optional[Union[int, str]],
     quantization: Optional[str],
     draft_model_name: str,
     draft_model_size_in_billions: Optional[int],

xinference/model/llm/llm_family.json CHANGED Viewed

@@ -1930,6 +1930,74 @@
       ]
     }
   },
+  {
+    "version": 1,
+    "context_length": 65536,
+    "model_name": "codeqwen1.5-chat",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "chat"
+    ],
+    "model_description": "CodeQwen1.5 is the Code-Specific version of Qwen1.5. It is a transformer-based decoder-only language model pretrained on a large amount of data of codes.",
+    "model_specs": [
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "q2_k",
+          "q3_k_m",
+          "q4_0",
+          "q4_k_m",
+          "q5_0",
+          "q5_k_m",
+          "q6_k",
+          "q8_0"
+        ],
+        "model_id": "Qwen/CodeQwen1.5-7B-Chat-GGUF",
+        "model_file_name_template": "codeqwen-1_5-7b-chat-{quantization}.gguf"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "Qwen/CodeQwen1.5-7B-Chat"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "Qwen/CodeQwen1.5-7B-Chat-AWQ"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "QWEN",
+      "system_prompt": "You are a helpful assistant.",
+      "roles": [
+        "user",
+        "assistant"
+      ],
+      "intra_message_sep": "\n",
+      "stop_token_ids": [
+        151643,
+        151644,
+        151645
+      ],
+      "stop": [
+        "<|endoftext|>",
+        "<|im_start|>",
+        "<|im_end|>"
+      ]
+    }
+  },
   {
     "version": 1,
     "context_length": 8192,
@@ -4752,5 +4820,200 @@
         "</s>"
       ]
     }
+  },
+  {
+    "version": 1,
+    "context_length": 8192,
+    "model_name": "seallm_v2",
+    "model_lang": [
+      "en",
+      "zh",
+      "vi",
+      "id",
+      "th",
+      "ms",
+      "km",
+      "lo",
+      "my",
+      "tl"
+    ],
+    "model_ability": [
+      "generate"
+    ],
+    "model_description": "We introduce SeaLLM-7B-v2, the state-of-the-art multilingual LLM for Southeast Asian (SEA) languages",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "SeaLLMs/SeaLLM-7B-v2",
+        "model_revision": "f1bd48e0d75365c24a3c5ad006b2d0a0c9dca30f"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "Q4_0",
+          "Q8_0"
+        ],
+        "model_id": "SeaLLMs/SeaLLM-7B-v2-gguf",
+        "model_file_name_template": "SeaLLM-7B-v2.{quantization}.gguf"
+      }
+    ]
+  },
+  {
+    "version": 1,
+    "context_length": 8192,
+    "model_name": "seallm_v2.5",
+    "model_lang": [
+      "en",
+      "zh",
+      "vi",
+      "id",
+      "th",
+      "ms",
+      "km",
+      "lo",
+      "my",
+      "tl"
+    ],
+    "model_ability": [
+      "generate"
+    ],
+    "model_description": "We introduce SeaLLM-7B-v2.5, the state-of-the-art multilingual LLM for Southeast Asian (SEA) languages",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "SeaLLMs/SeaLLM-7B-v2.5",
+        "model_revision": "c54a8eb8e2d58c5a680bfbbe3a7ae71753bb644b"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "Q4_K_M",
+          "Q8_0"
+        ],
+        "model_id": "SeaLLMs/SeaLLM-7B-v2.5-GGUF",
+        "model_file_name_template": "SeaLLM-7B-v2.5.{quantization}.gguf"
+      }
+    ]
+  },
+  {
+    "version": 1,
+    "context_length": 131072,
+    "model_name": "c4ai-command-r-v01",
+    "model_lang": [
+      "en",
+      "fr",
+      "de",
+      "es",
+      "it",
+      "pt",
+      "ja",
+      "ko",
+      "zh",
+      "ar"
+    ],
+    "model_ability": [
+      "generate"
+    ],
+    "model_description": "C4AI Command-R is a research release of a 35 billion parameter highly performant generative model.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 35,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "CohereForAI/c4ai-command-r-v01",
+        "model_revision": "16881ccde1c68bbc7041280e6a66637bc46bfe88"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 35,
+        "quantizations": [
+          "Q2_K",
+          "Q3_K_L",
+          "Q3_K_M",
+          "Q3_K_S",
+          "Q4_0",
+          "Q4_K_M",
+          "Q4_K_S",
+          "Q5_0",
+          "Q5_K_M",
+          "Q5_K_S",
+          "Q6_K",
+          "Q8_0"
+        ],
+        "model_id": "andrewcanis/c4ai-command-r-v01-GGUF",
+        "model_file_name_template": "c4ai-command-r-v01.{quantization}.gguf"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 104,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "CohereForAI/c4ai-command-r-plus",
+        "model_revision": "ba7f1d954c9d1609013677d87e4142ab95c34e62"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 104,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "alpindale/c4ai-command-r-plus-GPTQ",
+        "model_revision": "35febfc08f723ac0df32480eb4af349a7d08656e"
+      }
+    ]
+  },
+  {
+    "version": 1,
+    "context_length": 131072,
+    "model_name": "c4ai-command-r-v01-4bit",
+    "model_lang": [
+      "en",
+      "fr",
+      "de",
+      "es",
+      "it",
+      "pt",
+      "ja",
+      "ko",
+      "zh",
+      "ar"
+    ],
+    "model_ability": [
+      "generate"
+    ],
+    "model_description": "This model is 4bit quantized version of C4AI Command-R using bitsandbytes.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 35,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "CohereForAI/c4ai-command-r-v01-4bit",
+        "model_revision": "f2e87936a146643c9dd143422dcafb9cb1552611"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 104,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "CohereForAI/c4ai-command-r-plus-4bit",
+        "model_revision": "bb63b5b7005ecedb30b0cfd0d5953b02a5817f7b"
+      }
+    ]
   }
 ]

xinference 0.10.1__py3-none-any.whl → 0.10.2.post1__py3-none-any.whl

Potentially problematic release.

xinference 0.10.1py3-none-any.whl → 0.10.2.post1py3-none-any.whl