PyPI - xinference - Versions diffs - 1.4.1__py3-none-any.whl → 1.5.0__py3-none-any.whl - Mend

xinference 1.4.1py3-none-any.whl → 1.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (104) hide show

xinference/model/audio/model_spec_modelscope.json CHANGED Viewed

@@ -47,6 +47,22 @@
       "merge_length_s": 15
     }
   },
+  {
+    "model_name": "paraformer-zh",
+    "model_family": "funasr",
+    "model_hub": "modelscope",
+    "model_id": "iic/speech_paraformer-large-vad-punc-spk_asr_nat-zh-cn",
+    "model_revision": "master",
+    "model_ability": "audio-to-text",
+    "multilingual": false,
+    "default_model_config": {
+      "vad_model": "fsmn-vad",
+      "punc_model": "ct-punc"
+    },
+    "default_transcription_config": {
+      "batch_size_s": 300
+    }
+  },
   {
     "model_name": "ChatTTS",
     "model_family": "ChatTTS",
@@ -62,7 +78,7 @@
     "model_hub": "modelscope",
     "model_id": "iic/CosyVoice-300M",
     "model_revision": "master",
-    "model_ability": "audio-to-audio",
+    "model_ability": "text-to-audio",
     "multilingual": true
   },
   {
@@ -109,5 +125,14 @@
     "model_revision": "master",
     "model_ability": "text-to-audio",
     "multilingual": true
+  },
+  {
+    "model_name": "MegaTTS3",
+    "model_family": "MegaTTS",
+    "model_hub": "modelscope",
+    "model_id": "ByteDance/MegaTTS3",
+    "model_revision": "master",
+    "model_ability": "text-to-audio",
+    "multilingual": true
   }
 ]

xinference/model/core.py CHANGED Viewed

@@ -30,6 +30,11 @@ class ModelDescription(ABC):
         self.devices = devices
         self._model_path = model_path
+    @property
+    @abstractmethod
+    def spec(self):
+        pass
     def to_dict(self):
         """
         Return a dict to describe some information about model.
@@ -155,3 +160,12 @@ class CacheableModelSpec(BaseModel):
     model_id: str
     model_revision: Optional[str]
     model_hub: str = "huggingface"
+class VirtualEnvSettings(BaseModel):
+    packages: List[str]
+    inherit_pip_config: bool = True
+    index_url: Optional[str] = None
+    extra_index_url: Optional[str] = None
+    find_links: Optional[str] = None
+    trusted_host: Optional[str] = None

xinference/model/embedding/core.py CHANGED Viewed

@@ -24,7 +24,7 @@ import torch
 from ..._compat import ROOT_KEY, ErrorWrapper, ValidationError
 from ...device_utils import empty_cache
 from ...types import Embedding, EmbeddingData, EmbeddingUsage
-from ..core import CacheableModelSpec, ModelDescription
+from ..core import CacheableModelSpec, ModelDescription, VirtualEnvSettings
 from ..utils import get_cache_dir, is_model_cached
 logger = logging.getLogger(__name__)
@@ -57,6 +57,7 @@ class EmbeddingModelSpec(CacheableModelSpec):
     model_id: str
     model_revision: Optional[str]
     model_hub: str = "huggingface"
+    virtualenv: Optional[VirtualEnvSettings]
 class EmbeddingModelDescription(ModelDescription):
@@ -70,6 +71,10 @@ class EmbeddingModelDescription(ModelDescription):
         super().__init__(address, devices, model_path=model_path)
         self._model_spec = model_spec
+    @property
+    def spec(self):
+        return self._model_spec
     def to_dict(self):
         return {
             "model_type": "embedding",

xinference/model/flexible/core.py CHANGED Viewed

@@ -20,7 +20,7 @@ from threading import Lock
 from typing import Dict, List, Optional, Tuple
 from ...constants import XINFERENCE_CACHE_DIR, XINFERENCE_MODEL_DIR
-from ..core import CacheableModelSpec, ModelDescription
+from ..core import CacheableModelSpec, ModelDescription, VirtualEnvSettings
 from .utils import get_launcher
 logger = logging.getLogger(__name__)
@@ -34,6 +34,7 @@ class FlexibleModelSpec(CacheableModelSpec):
     model_uri: Optional[str]
     launcher: str
     launcher_args: Optional[str]
+    virtualenv: Optional[VirtualEnvSettings]
     def parser_args(self):
         return json.loads(self.launcher_args)
@@ -50,6 +51,10 @@ class FlexibleModelDescription(ModelDescription):
         super().__init__(address, devices, model_path=model_path)
         self._model_spec = model_spec
+    @property
+    def spec(self):
+        return self._model_spec
     def to_dict(self):
         return {
             "model_type": "flexible",

xinference/model/image/core.py CHANGED Viewed

@@ -21,7 +21,7 @@ from typing import Dict, List, Literal, Optional, Tuple, Union
 from ...constants import XINFERENCE_CACHE_DIR
 from ...types import PeftModelConfig
-from ..core import CacheableModelSpec, ModelDescription
+from ..core import CacheableModelSpec, ModelDescription, VirtualEnvSettings
 from ..utils import (
     IS_NEW_HUGGINGFACE_HUB,
     retry_download,
@@ -59,6 +59,7 @@ class ImageModelFamilyV1(CacheableModelSpec):
     gguf_model_id: Optional[str]
     gguf_quantizations: Optional[List[str]]
     gguf_model_file_name_template: Optional[str]
+    virtualenv: Optional[VirtualEnvSettings]
 class ImageModelDescription(ModelDescription):
@@ -72,6 +73,10 @@ class ImageModelDescription(ModelDescription):
         super().__init__(address, devices, model_path=model_path)
         self._model_spec = model_spec
+    @property
+    def spec(self):
+        return self._model_spec
     def to_dict(self):
         if self._model_spec.controlnet is not None:
             controlnet = [cn.dict() for cn in self._model_spec.controlnet]

xinference/model/image/model_spec.json CHANGED Viewed

@@ -339,6 +339,22 @@
     "model_revision": "cf6b7386bc89a54f09785612ba74cb12de6fa17c",
     "model_ability": [
       "ocr"
-    ]
+    ],
+    "virtualenv": {
+      "packages": [
+        "transformers==4.37.2",
+        "httpx==0.24.0",
+        "deepspeed==0.12.3",
+        "peft==0.4.0",
+        "tiktoken==0.6.0",
+        "bitsandbytes==0.41.0",
+        "scikit-learn==1.2.2",
+        "sentencepiece==0.1.99",
+        "einops==0.6.1",
+        "einops-exts==0.0.4",
+        "timm==0.6.13",
+        "numpy==1.26.4"
+      ]
+    }
   }
 ]

xinference/model/image/model_spec_modelscope.json CHANGED Viewed

@@ -315,6 +315,22 @@
     "model_hub": "modelscope",
     "model_ability": [
       "ocr"
-    ]
+    ],
+    "virtualenv": {
+      "packages": [
+        "transformers==4.37.2",
+        "httpx==0.24.0",
+        "deepspeed==0.12.3",
+        "peft==0.4.0",
+        "tiktoken==0.6.0",
+        "bitsandbytes==0.41.0",
+        "scikit-learn==1.2.2",
+        "sentencepiece==0.1.99",
+        "einops==0.6.1",
+        "einops-exts==0.0.4",
+        "timm==0.6.13",
+        "numpy==1.26.4"
+      ]
+    }
   }
 ]

xinference/model/llm/__init__.py CHANGED Viewed

@@ -147,13 +147,11 @@ def _install():
     from .transformers.gemma3 import Gemma3ChatModel, Gemma3TextChatModel
     from .transformers.glm4v import Glm4VModel
     from .transformers.glm_edge_v import GlmEdgeVModel
-    from .transformers.intern_vl import InternVLChatModel
     from .transformers.internlm2 import Internlm2PytorchChatModel
     from .transformers.minicpmv25 import MiniCPMV25Model
     from .transformers.minicpmv26 import MiniCPMV26Model
     from .transformers.opt import OptPytorchModel
     from .transformers.qwen2_audio import Qwen2AudioChatModel
-    from .transformers.qwen2_vl import Qwen2VLChatModel
     from .transformers.qwen_vl import QwenVLChatModel
     from .transformers.yi_vl import YiVLChatModel
     from .vllm.core import VLLMChatModel, VLLMModel, VLLMVisionModel
@@ -184,12 +182,10 @@ def _install():
             PytorchChatModel,
             Internlm2PytorchChatModel,
             QwenVLChatModel,
-            Qwen2VLChatModel,
             Qwen2AudioChatModel,
             YiVLChatModel,
             DeepSeekVLChatModel,
             DeepSeekVL2ChatModel,
-            InternVLChatModel,
             PytorchModel,
             CogVLM2Model,
             CogVLM2VideoModel,

xinference/model/llm/core.py CHANGED Viewed

@@ -144,6 +144,10 @@ class LLMDescription(ModelDescription):
         self._llm_spec = llm_spec
         self._quantization = quantization
+    @property
+    def spec(self):
+        return self._llm_family
     def to_dict(self):
         return {
             "model_type": "LLM",

xinference/model/llm/llama_cpp/core.py CHANGED Viewed

@@ -36,7 +36,7 @@ from ..utils import DEEPSEEK_TOOL_CALL_FAMILY, QWEN_TOOL_CALL_FAMILY, ChatModelM
 logger = logging.getLogger(__name__)
-USE_XLLAMACPP = bool(int(os.environ.get("USE_XLLAMACPP", 0)))
+USE_XLLAMACPP = bool(int(os.environ.get("USE_XLLAMACPP", 1)))
 class _Done:
@@ -142,24 +142,38 @@ class XllamaCppModel(LLM, ChatModelMixin):
         if os.path.isfile(self.model_path):
             # mostly passed from --model_path
-            model_path = os.path.realpath(self.model_path)
+            model_path = self.model_path
         else:
             # handle legacy cache.
-            model_path = os.path.realpath(
-                os.path.join(
+            if (
+                self.model_spec.model_file_name_split_template
+                and self.model_spec.quantization_parts
+            ):
+                part = self.model_spec.quantization_parts[self.quantization]
+                model_path = os.path.join(
+                    self.model_path,
+                    self.model_spec.model_file_name_split_template.format(
+                        quantization=self.quantization, part=part[0]
+                    ),
+                )
+            else:
+                model_path = os.path.join(
                     self.model_path,
                     self.model_spec.model_file_name_template.format(
                         quantization=self.quantization
                     ),
                 )
-            )
-            legacy_model_file_path = os.path.join(self.model_path, "model.bin")
-            if os.path.exists(legacy_model_file_path):
-                model_path = legacy_model_file_path
+                legacy_model_file_path = os.path.join(self.model_path, "model.bin")
+                if os.path.exists(legacy_model_file_path):
+                    model_path = legacy_model_file_path
         try:
             params = CommonParams()
-            params.model = model_path
+            # Compatible with xllamacpp changes
+            try:
+                params.model = model_path
+            except Exception:
+                params.model.path = model_path
             if self.model_family.chat_template:
                 params.chat_template = self.model_family.chat_template
             # This is the default value, could be overwritten by _llamacpp_model_config
@@ -415,20 +429,30 @@ class LlamaCppModel(LLM):
         if os.path.isfile(self.model_path):
             # mostly passed from --model_path
-            model_path = os.path.realpath(self.model_path)
+            model_path = self.model_path
         else:
             # handle legacy cache.
-            model_path = os.path.realpath(
-                os.path.join(
+            if (
+                self.model_spec.model_file_name_split_template
+                and self.model_spec.quantization_parts
+            ):
+                part = self.model_spec.quantization_parts[self.quantization]
+                model_path = os.path.join(
+                    self.model_path,
+                    self.model_spec.model_file_name_split_template.format(
+                        quantization=self.quantization, part=part[0]
+                    ),
+                )
+            else:
+                model_path = os.path.join(
                     self.model_path,
                     self.model_spec.model_file_name_template.format(
                         quantization=self.quantization
                     ),
                 )
-            )
-            legacy_model_file_path = os.path.join(self.model_path, "model.bin")
-            if os.path.exists(legacy_model_file_path):
-                model_path = legacy_model_file_path
+                legacy_model_file_path = os.path.join(self.model_path, "model.bin")
+                if os.path.exists(legacy_model_file_path):
+                    model_path = legacy_model_file_path
         try:
             self._llm = Llama(

xinference 1.4.1__py3-none-any.whl → 1.5.0__py3-none-any.whl

Potentially problematic release.

xinference 1.4.1py3-none-any.whl → 1.5.0py3-none-any.whl