PyPI - xinference - Versions diffs - 1.7.1__py3-none-any.whl → 1.8.0__py3-none-any.whl - Mend

xinference 1.7.1py3-none-any.whl → 1.8.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (136) hide show

xinference/_version.py +3 -3
xinference/client/restful/async_restful_client.py +8 -13
xinference/client/restful/restful_client.py +6 -2
xinference/core/chat_interface.py +6 -4
xinference/core/media_interface.py +5 -0
xinference/core/model.py +1 -5
xinference/core/supervisor.py +117 -68
xinference/core/worker.py +49 -37
xinference/deploy/test/test_cmdline.py +2 -6
xinference/model/audio/__init__.py +26 -23
xinference/model/audio/chattts.py +3 -2
xinference/model/audio/core.py +49 -98
xinference/model/audio/cosyvoice.py +3 -2
xinference/model/audio/custom.py +28 -73
xinference/model/audio/f5tts.py +3 -2
xinference/model/audio/f5tts_mlx.py +3 -2
xinference/model/audio/fish_speech.py +3 -2
xinference/model/audio/funasr.py +17 -4
xinference/model/audio/kokoro.py +3 -2
xinference/model/audio/megatts.py +3 -2
xinference/model/audio/melotts.py +3 -2
xinference/model/audio/model_spec.json +572 -171
xinference/model/audio/utils.py +0 -6
xinference/model/audio/whisper.py +3 -2
xinference/model/audio/whisper_mlx.py +3 -2
xinference/model/cache_manager.py +141 -0
xinference/model/core.py +6 -49
xinference/model/custom.py +174 -0
xinference/model/embedding/__init__.py +67 -56
xinference/model/embedding/cache_manager.py +35 -0
xinference/model/embedding/core.py +104 -84
xinference/model/embedding/custom.py +55 -78
xinference/model/embedding/embed_family.py +80 -31
xinference/model/embedding/flag/core.py +21 -5
xinference/model/embedding/llama_cpp/__init__.py +0 -0
xinference/model/embedding/llama_cpp/core.py +234 -0
xinference/model/embedding/model_spec.json +968 -103
xinference/model/embedding/sentence_transformers/core.py +30 -20
xinference/model/embedding/vllm/core.py +11 -5
xinference/model/flexible/__init__.py +8 -2
xinference/model/flexible/core.py +26 -119
xinference/model/flexible/custom.py +69 -0
xinference/model/flexible/launchers/image_process_launcher.py +1 -0
xinference/model/flexible/launchers/modelscope_launcher.py +5 -1
xinference/model/flexible/launchers/transformers_launcher.py +15 -3
xinference/model/flexible/launchers/yolo_launcher.py +5 -1
xinference/model/image/__init__.py +20 -20
xinference/model/image/cache_manager.py +62 -0
xinference/model/image/core.py +70 -182
xinference/model/image/custom.py +28 -72
xinference/model/image/model_spec.json +402 -119
xinference/model/image/ocr/got_ocr2.py +3 -2
xinference/model/image/stable_diffusion/core.py +22 -7
xinference/model/image/stable_diffusion/mlx.py +6 -6
xinference/model/image/utils.py +2 -2
xinference/model/llm/__init__.py +71 -94
xinference/model/llm/cache_manager.py +292 -0
xinference/model/llm/core.py +37 -111
xinference/model/llm/custom.py +88 -0
xinference/model/llm/llama_cpp/core.py +5 -7
xinference/model/llm/llm_family.json +16260 -8151
xinference/model/llm/llm_family.py +138 -839
xinference/model/llm/lmdeploy/core.py +5 -7
xinference/model/llm/memory.py +3 -4
xinference/model/llm/mlx/core.py +6 -8
xinference/model/llm/reasoning_parser.py +3 -1
xinference/model/llm/sglang/core.py +32 -14
xinference/model/llm/transformers/chatglm.py +3 -7
xinference/model/llm/transformers/core.py +49 -27
xinference/model/llm/transformers/deepseek_v2.py +2 -2
xinference/model/llm/transformers/gemma3.py +2 -2
xinference/model/llm/transformers/multimodal/cogagent.py +2 -2
xinference/model/llm/transformers/multimodal/deepseek_vl2.py +2 -2
xinference/model/llm/transformers/multimodal/gemma3.py +2 -2
xinference/model/llm/transformers/multimodal/glm4_1v.py +167 -0
xinference/model/llm/transformers/multimodal/glm4v.py +2 -2
xinference/model/llm/transformers/multimodal/intern_vl.py +2 -2
xinference/model/llm/transformers/multimodal/minicpmv26.py +3 -3
xinference/model/llm/transformers/multimodal/ovis2.py +2 -2
xinference/model/llm/transformers/multimodal/qwen-omni.py +2 -2
xinference/model/llm/transformers/multimodal/qwen2_audio.py +2 -2
xinference/model/llm/transformers/multimodal/qwen2_vl.py +2 -2
xinference/model/llm/transformers/opt.py +3 -7
xinference/model/llm/utils.py +34 -49
xinference/model/llm/vllm/core.py +77 -27
xinference/model/llm/vllm/xavier/engine.py +5 -3
xinference/model/llm/vllm/xavier/scheduler.py +10 -6
xinference/model/llm/vllm/xavier/transfer.py +1 -1
xinference/model/rerank/__init__.py +26 -25
xinference/model/rerank/core.py +47 -87
xinference/model/rerank/custom.py +25 -71
xinference/model/rerank/model_spec.json +158 -33
xinference/model/rerank/utils.py +2 -2
xinference/model/utils.py +115 -54
xinference/model/video/__init__.py +13 -17
xinference/model/video/core.py +44 -102
xinference/model/video/diffusers.py +4 -3
xinference/model/video/model_spec.json +90 -21
xinference/types.py +5 -3
xinference/web/ui/build/asset-manifest.json +3 -3
xinference/web/ui/build/index.html +1 -1
xinference/web/ui/build/static/js/main.7d24df53.js +3 -0
xinference/web/ui/build/static/js/main.7d24df53.js.map +1 -0
xinference/web/ui/node_modules/.cache/babel-loader/2704ff66a5f73ca78b341eb3edec60154369df9d87fbc8c6dd60121abc5e1b0a.json +1 -0
xinference/web/ui/node_modules/.cache/babel-loader/607dfef23d33e6b594518c0c6434567639f24f356b877c80c60575184ec50ed0.json +1 -0
xinference/web/ui/node_modules/.cache/babel-loader/9be3d56173aacc3efd0b497bcb13c4f6365de30069176ee9403b40e717542326.json +1 -0
xinference/web/ui/node_modules/.cache/babel-loader/9f9dd6c32c78a222d07da5987ae902effe16bcf20aac00774acdccc4de3c9ff2.json +1 -0
xinference/web/ui/node_modules/.cache/babel-loader/b2ab5ee972c60d15eb9abf5845705f8ab7e1d125d324d9a9b1bcae5d6fd7ffb2.json +1 -0
xinference/web/ui/src/locales/en.json +0 -1
xinference/web/ui/src/locales/ja.json +0 -1
xinference/web/ui/src/locales/ko.json +0 -1
xinference/web/ui/src/locales/zh.json +0 -1
{xinference-1.7.1.dist-info → xinference-1.8.0.dist-info}/METADATA +9 -11
{xinference-1.7.1.dist-info → xinference-1.8.0.dist-info}/RECORD +119 -119
xinference/model/audio/model_spec_modelscope.json +0 -231
xinference/model/embedding/model_spec_modelscope.json +0 -293
xinference/model/embedding/utils.py +0 -18
xinference/model/image/model_spec_modelscope.json +0 -375
xinference/model/llm/llama_cpp/memory.py +0 -457
xinference/model/llm/llm_family_csghub.json +0 -56
xinference/model/llm/llm_family_modelscope.json +0 -8700
xinference/model/llm/llm_family_openmind_hub.json +0 -1019
xinference/model/rerank/model_spec_modelscope.json +0 -85
xinference/model/video/model_spec_modelscope.json +0 -184
xinference/web/ui/build/static/js/main.9b12b7f9.js +0 -3
xinference/web/ui/build/static/js/main.9b12b7f9.js.map +0 -1
xinference/web/ui/node_modules/.cache/babel-loader/1460361af6975e63576708039f1cb732faf9c672d97c494d4055fc6331460be0.json +0 -1
xinference/web/ui/node_modules/.cache/babel-loader/4efd8dda58fda83ed9546bf2f587df67f8d98e639117bee2d9326a9a1d9bebb2.json +0 -1
xinference/web/ui/node_modules/.cache/babel-loader/55b9fb40b57fa926e8f05f31c2f96467e76e5ad62f033dca97c03f9e8c4eb4fe.json +0 -1
xinference/web/ui/node_modules/.cache/babel-loader/5b2dafe5aa9e1105e0244a2b6751807342fa86aa0144b4e84d947a1686102715.json +0 -1
xinference/web/ui/node_modules/.cache/babel-loader/611fa2c6c53b66039991d06dfb0473b5ab37fc63b4564e0f6e1718523768a045.json +0 -1
/xinference/web/ui/build/static/js/{main.9b12b7f9.js.LICENSE.txt → main.7d24df53.js.LICENSE.txt} +0 -0
{xinference-1.7.1.dist-info → xinference-1.8.0.dist-info}/WHEEL +0 -0
{xinference-1.7.1.dist-info → xinference-1.8.0.dist-info}/entry_points.txt +0 -0
{xinference-1.7.1.dist-info → xinference-1.8.0.dist-info}/licenses/LICENSE +0 -0
{xinference-1.7.1.dist-info → xinference-1.8.0.dist-info}/top_level.txt +0 -0

xinference/model/audio/custom.py CHANGED Viewed

@@ -11,29 +11,26 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import logging
-import os
-from threading import Lock
 from typing import Any, List, Optional
 from ..._compat import (
     ROOT_KEY,
     ErrorWrapper,
+    Literal,
     Protocol,
     StrBytes,
     ValidationError,
     load_str_bytes,
 )
-from ...constants import XINFERENCE_CACHE_DIR, XINFERENCE_MODEL_DIR
-from .core import AudioModelFamilyV1
+from ..custom import ModelRegistry
+from .core import AudioModelFamilyV2
 logger = logging.getLogger(__name__)
-UD_AUDIO_LOCK = Lock()
-class CustomAudioModelFamilyV1(AudioModelFamilyV1):
+class CustomAudioModelFamilyV2(AudioModelFamilyV2):
+    version: Literal[2] = 2
     model_id: Optional[str]  # type: ignore
     model_revision: Optional[str]  # type: ignore
     model_uri: Optional[str]
@@ -47,7 +44,7 @@ class CustomAudioModelFamilyV1(AudioModelFamilyV1):
         encoding: str = "utf8",
         proto: Protocol = None,
         allow_pickle: bool = False,
-    ) -> AudioModelFamilyV1:
+    ) -> AudioModelFamilyV2:
         # See source code of BaseModel.parse_raw
         try:
             obj = load_str_bytes(
@@ -61,7 +58,7 @@ class CustomAudioModelFamilyV1(AudioModelFamilyV1):
         except (ValueError, TypeError, UnicodeDecodeError) as e:
             raise ValidationError([ErrorWrapper(e, loc=ROOT_KEY)], cls)
-        audio_spec: AudioModelFamilyV1 = cls.parse_obj(obj)
+        audio_spec: AudioModelFamilyV2 = cls.parse_obj(obj)
         # check model_family
         if audio_spec.model_family is None:
@@ -72,78 +69,36 @@ class CustomAudioModelFamilyV1(AudioModelFamilyV1):
         return audio_spec
-UD_AUDIOS: List[CustomAudioModelFamilyV1] = []
+UD_AUDIOS: List[CustomAudioModelFamilyV2] = []
+class AudioModelRegistry(ModelRegistry):
+    model_type = "audio"
-def get_user_defined_audios() -> List[CustomAudioModelFamilyV1]:
-    with UD_AUDIO_LOCK:
-        return UD_AUDIOS.copy()
+    def __init__(self):
+        from . import BUILTIN_AUDIO_MODELS
+        super().__init__()
+        self.models = UD_AUDIOS
+        self.builtin_models = list(BUILTIN_AUDIO_MODELS.keys())
-def register_audio(model_spec: CustomAudioModelFamilyV1, persist: bool):
-    from ...constants import XINFERENCE_MODEL_DIR
-    from ..utils import is_valid_model_name, is_valid_model_uri
-    from . import BUILTIN_AUDIO_MODELS, MODELSCOPE_AUDIO_MODELS
-    if not is_valid_model_name(model_spec.model_name):
-        raise ValueError(f"Invalid model name {model_spec.model_name}.")
+def get_user_defined_audios() -> List[CustomAudioModelFamilyV2]:
+    from ..custom import RegistryManager
-    model_uri = model_spec.model_uri
-    if model_uri and not is_valid_model_uri(model_uri):
-        raise ValueError(f"Invalid model URI {model_uri}.")
+    registry = RegistryManager.get_registry("audio")
+    return registry.get_custom_models()
-    with UD_AUDIO_LOCK:
-        for model_name in (
-            list(BUILTIN_AUDIO_MODELS.keys())
-            + list(MODELSCOPE_AUDIO_MODELS.keys())
-            + [spec.model_name for spec in UD_AUDIOS]
-        ):
-            if model_spec.model_name == model_name:
-                raise ValueError(
-                    f"Model name conflicts with existing model {model_spec.model_name}"
-                )
-        UD_AUDIOS.append(model_spec)
+def register_audio(model_spec: CustomAudioModelFamilyV2, persist: bool):
+    from ..custom import RegistryManager
-    if persist:
-        persist_path = os.path.join(
-            XINFERENCE_MODEL_DIR, "audio", f"{model_spec.model_name}.json"
-        )
-        os.makedirs(os.path.dirname(persist_path), exist_ok=True)
-        with open(persist_path, mode="w") as fd:
-            fd.write(model_spec.json())
+    registry = RegistryManager.get_registry("audio")
+    registry.register(model_spec, persist)
 def unregister_audio(model_name: str, raise_error: bool = True):
-    with UD_AUDIO_LOCK:
-        model_spec = None
-        for i, f in enumerate(UD_AUDIOS):
-            if f.model_name == model_name:
-                model_spec = f
-                break
-        if model_spec:
-            UD_AUDIOS.remove(model_spec)
-            persist_path = os.path.join(
-                XINFERENCE_MODEL_DIR, "audio", f"{model_spec.model_name}.json"
-            )
-            if os.path.exists(persist_path):
-                os.remove(persist_path)
-            cache_dir = os.path.join(XINFERENCE_CACHE_DIR, model_spec.model_name)
-            if os.path.exists(cache_dir):
-                logger.warning(
-                    f"Remove the cache of user-defined model {model_spec.model_name}. "
-                    f"Cache directory: {cache_dir}"
-                )
-                if os.path.isdir(cache_dir):
-                    os.rmdir(cache_dir)
-                else:
-                    logger.warning(
-                        f"Cache directory is not a soft link, please remove it manually."
-                    )
-        else:
-            if raise_error:
-                raise ValueError(f"Model {model_name} not found")
-            else:
-                logger.warning(f"Custom audio model {model_name} not found")
+    from ..custom import RegistryManager
+    registry = RegistryManager.get_registry("audio")
+    registry.unregister(model_name, raise_error)

xinference/model/audio/f5tts.py CHANGED Viewed

@@ -19,7 +19,7 @@ from io import BytesIO
 from typing import TYPE_CHECKING, Optional, Union
 if TYPE_CHECKING:
-    from .core import AudioModelFamilyV1
+    from .core import AudioModelFamilyV2
 logger = logging.getLogger(__name__)
@@ -29,10 +29,11 @@ class F5TTSModel:
         self,
         model_uid: str,
         model_path: str,
-        model_spec: "AudioModelFamilyV1",
+        model_spec: "AudioModelFamilyV2",
         device: Optional[str] = None,
         **kwargs,
     ):
+        self.model_family = model_spec
         self._model_uid = model_uid
         self._model_path = model_path
         self._model_spec = model_spec

xinference/model/audio/f5tts_mlx.py CHANGED Viewed

@@ -24,7 +24,7 @@ import numpy as np
 from tqdm import tqdm
 if TYPE_CHECKING:
-    from .core import AudioModelFamilyV1
+    from .core import AudioModelFamilyV2
 logger = logging.getLogger(__name__)
@@ -34,10 +34,11 @@ class F5TTSMLXModel:
         self,
         model_uid: str,
         model_path: str,
-        model_spec: "AudioModelFamilyV1",
+        model_spec: "AudioModelFamilyV2",
         device: Optional[str] = None,
         **kwargs,
     ):
+        self.model_family = model_spec
         self._model_uid = model_uid
         self._model_path = model_path
         self._model_spec = model_spec

xinference/model/audio/fish_speech.py CHANGED Viewed

@@ -23,7 +23,7 @@ import torch
 from ...device_utils import get_available_device, is_device_available
 if TYPE_CHECKING:
-    from .core import AudioModelFamilyV1
+    from .core import AudioModelFamilyV2
 logger = logging.getLogger(__name__)
@@ -48,10 +48,11 @@ class FishSpeechModel:
         self,
         model_uid: str,
         model_path: str,
-        model_spec: "AudioModelFamilyV1",
+        model_spec: "AudioModelFamilyV2",
         device: Optional[str] = None,
         **kwargs,
     ):
+        self.model_family = model_spec
         self._model_uid = model_uid
         self._model_path = model_path
         self._model_spec = model_spec

xinference/model/audio/funasr.py CHANGED Viewed

@@ -19,7 +19,7 @@ from typing import TYPE_CHECKING, List, Optional
 from ...device_utils import get_available_device, is_device_available
 if TYPE_CHECKING:
-    from .core import AudioModelFamilyV1
+    from .core import AudioModelFamilyV2
 logger = logging.getLogger(__name__)
@@ -29,10 +29,11 @@ class FunASRModel:
         self,
         model_uid: str,
         model_path: str,
-        model_spec: "AudioModelFamilyV1",
+        model_spec: "AudioModelFamilyV2",
         device: Optional[str] = None,
         **kwargs,
     ):
+        self.model_family = model_spec
         self._model_uid = model_uid
         self._model_path = model_path
         self._model_spec = model_spec
@@ -100,7 +101,11 @@ class FunASRModel:
             if not is_device_available(self._device):
                 raise ValueError(f"Device {self._device} is not available!")
-        kwargs = self._model_spec.default_model_config.copy()
+        kwargs = (
+            self._model_spec.default_model_config.copy()
+            if getattr(self._model_spec, "default_model_config", None)
+            else {}
+        )
         kwargs.update(self._kwargs)
         logger.debug("Loading FunASR model with kwargs: %s", kwargs)
         self._model = AutoModel(model=self._model_path, device=self._device, **kwargs)
@@ -131,12 +136,20 @@ class FunASRModel:
         with tempfile.NamedTemporaryFile(buffering=0) as f:
             f.write(audio)
-            kw = self._model_spec.default_transcription_config.copy()  # type: ignore
+            kw = (
+                self._model_spec.default_transcription_config.copy()  # type: ignore
+                if getattr(self._model_spec, "default_transcription_config", None)
+                else {}
+            )
             kw.update(kwargs)
             logger.debug("Calling FunASR model with kwargs: %s", kw)
             result = self._model.generate(  # type: ignore
                 input=f.name, cache={}, language=language, **kw
             )
+            if not result or not isinstance(result, list):
+                raise RuntimeError(f"FunASR returned empty or invalid result: {result}")
+            if "text" not in result[0]:
+                raise RuntimeError(f"Missing 'text' field in result[0]: {result[0]}")
             text = rich_transcription_postprocess(result[0]["text"])
             if response_format == "json":

xinference/model/audio/kokoro.py CHANGED Viewed

@@ -20,7 +20,7 @@ import numpy as np
 from ...device_utils import get_available_device, is_device_available
 if TYPE_CHECKING:
-    from .core import AudioModelFamilyV1
+    from .core import AudioModelFamilyV2
 logger = logging.getLogger(__name__)
@@ -30,10 +30,11 @@ class KokoroModel:
         self,
         model_uid: str,
         model_path: str,
-        model_spec: "AudioModelFamilyV1",
+        model_spec: "AudioModelFamilyV2",
         device: Optional[str] = None,
         **kwargs,
     ):
+        self.model_family = model_spec
         self._model_uid = model_uid
         self._model_path = model_path
         self._model_spec = model_spec

xinference/model/audio/megatts.py CHANGED Viewed

@@ -17,7 +17,7 @@ from io import BytesIO
 from typing import TYPE_CHECKING, Optional
 if TYPE_CHECKING:
-    from .core import AudioModelFamilyV1
+    from .core import AudioModelFamilyV2
 logger = logging.getLogger(__name__)
@@ -27,10 +27,11 @@ class MegaTTSModel:
         self,
         model_uid: str,
         model_path: str,
-        model_spec: "AudioModelFamilyV1",
+        model_spec: "AudioModelFamilyV2",
         device: Optional[str] = None,
         **kwargs,
     ):
+        self.model_family = model_spec
         self._model_uid = model_uid
         self._model_path = model_path
         self._model_spec = model_spec

xinference/model/audio/melotts.py CHANGED Viewed

@@ -18,7 +18,7 @@ from typing import TYPE_CHECKING, Optional
 from ...device_utils import get_available_device, is_device_available
 if TYPE_CHECKING:
-    from .core import AudioModelFamilyV1
+    from .core import AudioModelFamilyV2
 logger = logging.getLogger(__name__)
@@ -28,10 +28,11 @@ class MeloTTSModel:
         self,
         model_uid: str,
         model_path: str,
-        model_spec: "AudioModelFamilyV1",
+        model_spec: "AudioModelFamilyV2",
         device: Optional[str] = None,
         **kwargs,
     ):
+        self.model_family = model_spec
         self._model_uid = model_uid
         self._model_path = model_path
         self._model_spec = model_spec

xinference 1.7.1__py3-none-any.whl → 1.8.0__py3-none-any.whl

Potentially problematic release.

xinference 1.7.1py3-none-any.whl → 1.8.0py3-none-any.whl