PyPI - xinference - Versions diffs - 1.4.0__py3-none-any.whl → 1.5.0__py3-none-any.whl - Mend

xinference 1.4.0py3-none-any.whl → 1.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (132) hide show

xinference/model/audio/core.py CHANGED Viewed

@@ -17,7 +17,7 @@ from collections import defaultdict
 from typing import Any, Dict, List, Literal, Optional, Tuple, Union
 from ...constants import XINFERENCE_CACHE_DIR
-from ..core import CacheableModelSpec, ModelDescription
+from ..core import CacheableModelSpec, ModelDescription, VirtualEnvSettings
 from ..utils import valid_model_revision
 from .chattts import ChatTTSModel
 from .cosyvoice import CosyVoiceModel
@@ -26,6 +26,7 @@ from .f5tts_mlx import F5TTSMLXModel
 from .fish_speech import FishSpeechModel
 from .funasr import FunASRModel
 from .kokoro import KokoroModel
+from .megatts import MegaTTSModel
 from .melotts import MeloTTSModel
 from .whisper import WhisperModel
 from .whisper_mlx import WhisperMLXModel
@@ -55,6 +56,7 @@ class AudioModelFamilyV1(CacheableModelSpec):
     default_model_config: Optional[Dict[str, Any]]
     default_transcription_config: Optional[Dict[str, Any]]
     engine: Optional[str]
+    virtualenv: Optional[VirtualEnvSettings]
 class AudioModelDescription(ModelDescription):
@@ -68,6 +70,10 @@ class AudioModelDescription(ModelDescription):
         super().__init__(address, devices, model_path=model_path)
         self._model_spec = model_spec
+    @property
+    def spec(self):
+        return self._model_spec
     def to_dict(self):
         return {
             "model_type": "audio",
@@ -178,6 +184,7 @@ def create_audio_model_instance(
         F5TTSMLXModel,
         MeloTTSModel,
         KokoroModel,
+        MegaTTSModel,
     ],
     AudioModelDescription,
 ]:
@@ -195,6 +202,7 @@ def create_audio_model_instance(
         F5TTSMLXModel,
         MeloTTSModel,
         KokoroModel,
+        MegaTTSModel,
     ]
     if model_spec.model_family == "whisper":
         if not model_spec.engine:
@@ -217,6 +225,8 @@ def create_audio_model_instance(
         model = MeloTTSModel(model_uid, model_path, model_spec, **kwargs)
     elif model_spec.model_family == "Kokoro":
         model = KokoroModel(model_uid, model_path, model_spec, **kwargs)
+    elif model_spec.model_family == "MegaTTS":
+        model = MegaTTSModel(model_uid, model_path, model_spec, **kwargs)
     else:
         raise Exception(f"Unsupported audio model family: {model_spec.model_family}")
     model_description = AudioModelDescription(

xinference/model/audio/megatts.py ADDED Viewed

@@ -0,0 +1,105 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import io
+import logging
+from io import BytesIO
+from typing import TYPE_CHECKING, Optional
+if TYPE_CHECKING:
+    from .core import AudioModelFamilyV1
+logger = logging.getLogger(__name__)
+class MegaTTSModel:
+    def __init__(
+        self,
+        model_uid: str,
+        model_path: str,
+        model_spec: "AudioModelFamilyV1",
+        device: Optional[str] = None,
+        **kwargs,
+    ):
+        self._model_uid = model_uid
+        self._model_path = model_path
+        self._model_spec = model_spec
+        self._device = device
+        self._model = None
+        self._vocoder = None
+        self._kwargs = kwargs
+    @property
+    def model_ability(self):
+        return self._model_spec.model_ability
+    def load(self):
+        import os
+        import sys
+        # The yaml config loaded from model has hard-coded the import paths. please refer to: load_hyperpyyaml
+        sys.path.insert(
+            0, os.path.join(os.path.dirname(__file__), "../../thirdparty/megatts3")
+        )
+        # For whisper
+        sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../../thirdparty"))
+        from tts.infer_cli import MegaTTS3DiTInfer
+        self._model = MegaTTS3DiTInfer(ckpt_root=self._model_path)
+    def speech(
+        self,
+        input: str,
+        voice: str,
+        response_format: str = "mp3",
+        speed: float = 1.0,
+        stream: bool = False,
+        **kwargs,
+    ):
+        import soundfile
+        if stream:
+            raise Exception("MegaTTS3 does not support stream generation.")
+        if voice:
+            raise Exception(
+                "MegaTTS3 does not support voice, please specify prompt_speech and prompt_latent."
+            )
+        prompt_speech: Optional[bytes] = kwargs.pop("prompt_speech", None)
+        prompt_latent: Optional[bytes] = kwargs.pop("prompt_latent", None)
+        if not prompt_speech:
+            raise Exception("Please set prompt_speech for MegaTTS3.")
+        if not prompt_latent:
+            raise Exception("Please set prompt_latent for MegaTTS3.")
+        assert self._model is not None
+        with io.BytesIO(prompt_latent) as prompt_latent_io:
+            resource_context = self._model.preprocess(
+                prompt_speech, latent_file=prompt_latent_io
+            )
+        wav_bytes = self._model.forward(
+            resource_context,
+            input,
+            time_step=kwargs.get("time_step", 32),
+            p_w=kwargs.get("p_w", 1.6),
+            t_w=kwargs.get("t_w", 2.5),
+        )
+        # Save the generated audio
+        with BytesIO() as out:
+            with soundfile.SoundFile(
+                out, "w", self._model.sr, 1, format=response_format.upper()
+            ) as f:
+                f.write(wav_bytes)
+            return out.getvalue()

xinference/model/audio/model_spec.json CHANGED Viewed

@@ -203,6 +203,21 @@
       "merge_length_s": 15
     }
   },
+  {
+    "model_name": "paraformer-zh",
+    "model_family": "funasr",
+    "model_id": "funasr/paraformer-zh",
+    "model_revision": "5ed094cdfc8f6a9b6b022bd08bc904ef862bc79e",
+    "model_ability": "audio-to-text",
+    "multilingual": false,
+    "default_model_config": {
+      "vad_model": "fsmn-vad",
+      "punc_model": "ct-punc"
+    },
+    "default_transcription_config": {
+      "batch_size_s": 300
+    }
+  },
   {
     "model_name": "ChatTTS",
     "model_family": "ChatTTS",
@@ -216,7 +231,7 @@
     "model_family": "CosyVoice",
     "model_id": "FunAudioLLM/CosyVoice-300M",
     "model_revision": "39c4e13d46bd4dfb840d214547623e5fcd2428e2",
-    "model_ability": "audio-to-audio",
+    "model_ability": "text-to-audio",
     "multilingual": true
   },
   {
@@ -346,5 +361,13 @@
     "model_revision": "7884269d6fd3f9beabc271b6f1308e5699281fa9",
     "model_ability": "text-to-audio",
     "multilingual": true
+  },
+  {
+    "model_name": "MegaTTS3",
+    "model_family": "MegaTTS",
+    "model_id": "ByteDance/MegaTTS3",
+    "model_revision": "409a7002b006d80f0730fca6f80441b08c10e738",
+    "model_ability": "text-to-audio",
+    "multilingual": true
   }
 ]

xinference/model/audio/model_spec_modelscope.json CHANGED Viewed

@@ -47,6 +47,22 @@
       "merge_length_s": 15
     }
   },
+  {
+    "model_name": "paraformer-zh",
+    "model_family": "funasr",
+    "model_hub": "modelscope",
+    "model_id": "iic/speech_paraformer-large-vad-punc-spk_asr_nat-zh-cn",
+    "model_revision": "master",
+    "model_ability": "audio-to-text",
+    "multilingual": false,
+    "default_model_config": {
+      "vad_model": "fsmn-vad",
+      "punc_model": "ct-punc"
+    },
+    "default_transcription_config": {
+      "batch_size_s": 300
+    }
+  },
   {
     "model_name": "ChatTTS",
     "model_family": "ChatTTS",
@@ -62,7 +78,7 @@
     "model_hub": "modelscope",
     "model_id": "iic/CosyVoice-300M",
     "model_revision": "master",
-    "model_ability": "audio-to-audio",
+    "model_ability": "text-to-audio",
     "multilingual": true
   },
   {
@@ -109,5 +125,14 @@
     "model_revision": "master",
     "model_ability": "text-to-audio",
     "multilingual": true
+  },
+  {
+    "model_name": "MegaTTS3",
+    "model_family": "MegaTTS",
+    "model_hub": "modelscope",
+    "model_id": "ByteDance/MegaTTS3",
+    "model_revision": "master",
+    "model_ability": "text-to-audio",
+    "multilingual": true
   }
 ]

xinference/model/core.py CHANGED Viewed

@@ -30,6 +30,11 @@ class ModelDescription(ABC):
         self.devices = devices
         self._model_path = model_path
+    @property
+    @abstractmethod
+    def spec(self):
+        pass
     def to_dict(self):
         """
         Return a dict to describe some information about model.
@@ -155,3 +160,12 @@ class CacheableModelSpec(BaseModel):
     model_id: str
     model_revision: Optional[str]
     model_hub: str = "huggingface"
+class VirtualEnvSettings(BaseModel):
+    packages: List[str]
+    inherit_pip_config: bool = True
+    index_url: Optional[str] = None
+    extra_index_url: Optional[str] = None
+    find_links: Optional[str] = None
+    trusted_host: Optional[str] = None

xinference/model/embedding/core.py CHANGED Viewed

@@ -24,7 +24,7 @@ import torch
 from ..._compat import ROOT_KEY, ErrorWrapper, ValidationError
 from ...device_utils import empty_cache
 from ...types import Embedding, EmbeddingData, EmbeddingUsage
-from ..core import CacheableModelSpec, ModelDescription
+from ..core import CacheableModelSpec, ModelDescription, VirtualEnvSettings
 from ..utils import get_cache_dir, is_model_cached
 logger = logging.getLogger(__name__)
@@ -57,6 +57,7 @@ class EmbeddingModelSpec(CacheableModelSpec):
     model_id: str
     model_revision: Optional[str]
     model_hub: str = "huggingface"
+    virtualenv: Optional[VirtualEnvSettings]
 class EmbeddingModelDescription(ModelDescription):
@@ -70,6 +71,10 @@ class EmbeddingModelDescription(ModelDescription):
         super().__init__(address, devices, model_path=model_path)
         self._model_spec = model_spec
+    @property
+    def spec(self):
+        return self._model_spec
     def to_dict(self):
         return {
             "model_type": "embedding",

xinference/model/flexible/core.py CHANGED Viewed

@@ -20,7 +20,7 @@ from threading import Lock
 from typing import Dict, List, Optional, Tuple
 from ...constants import XINFERENCE_CACHE_DIR, XINFERENCE_MODEL_DIR
-from ..core import CacheableModelSpec, ModelDescription
+from ..core import CacheableModelSpec, ModelDescription, VirtualEnvSettings
 from .utils import get_launcher
 logger = logging.getLogger(__name__)
@@ -34,6 +34,7 @@ class FlexibleModelSpec(CacheableModelSpec):
     model_uri: Optional[str]
     launcher: str
     launcher_args: Optional[str]
+    virtualenv: Optional[VirtualEnvSettings]
     def parser_args(self):
         return json.loads(self.launcher_args)
@@ -50,6 +51,10 @@ class FlexibleModelDescription(ModelDescription):
         super().__init__(address, devices, model_path=model_path)
         self._model_spec = model_spec
+    @property
+    def spec(self):
+        return self._model_spec
     def to_dict(self):
         return {
             "model_type": "flexible",

xinference/model/image/core.py CHANGED Viewed

@@ -21,7 +21,7 @@ from typing import Dict, List, Literal, Optional, Tuple, Union
 from ...constants import XINFERENCE_CACHE_DIR
 from ...types import PeftModelConfig
-from ..core import CacheableModelSpec, ModelDescription
+from ..core import CacheableModelSpec, ModelDescription, VirtualEnvSettings
 from ..utils import (
     IS_NEW_HUGGINGFACE_HUB,
     retry_download,
@@ -59,6 +59,7 @@ class ImageModelFamilyV1(CacheableModelSpec):
     gguf_model_id: Optional[str]
     gguf_quantizations: Optional[List[str]]
     gguf_model_file_name_template: Optional[str]
+    virtualenv: Optional[VirtualEnvSettings]
 class ImageModelDescription(ModelDescription):
@@ -72,6 +73,10 @@ class ImageModelDescription(ModelDescription):
         super().__init__(address, devices, model_path=model_path)
         self._model_spec = model_spec
+    @property
+    def spec(self):
+        return self._model_spec
     def to_dict(self):
         if self._model_spec.controlnet is not None:
             controlnet = [cn.dict() for cn in self._model_spec.controlnet]

xinference/model/image/model_spec.json CHANGED Viewed

@@ -339,6 +339,22 @@
     "model_revision": "cf6b7386bc89a54f09785612ba74cb12de6fa17c",
     "model_ability": [
       "ocr"
-    ]
+    ],
+    "virtualenv": {
+      "packages": [
+        "transformers==4.37.2",
+        "httpx==0.24.0",
+        "deepspeed==0.12.3",
+        "peft==0.4.0",
+        "tiktoken==0.6.0",
+        "bitsandbytes==0.41.0",
+        "scikit-learn==1.2.2",
+        "sentencepiece==0.1.99",
+        "einops==0.6.1",
+        "einops-exts==0.0.4",
+        "timm==0.6.13",
+        "numpy==1.26.4"
+      ]
+    }
   }
 ]

xinference/model/image/model_spec_modelscope.json CHANGED Viewed

@@ -315,6 +315,22 @@
     "model_hub": "modelscope",
     "model_ability": [
       "ocr"
-    ]
+    ],
+    "virtualenv": {
+      "packages": [
+        "transformers==4.37.2",
+        "httpx==0.24.0",
+        "deepspeed==0.12.3",
+        "peft==0.4.0",
+        "tiktoken==0.6.0",
+        "bitsandbytes==0.41.0",
+        "scikit-learn==1.2.2",
+        "sentencepiece==0.1.99",
+        "einops==0.6.1",
+        "einops-exts==0.0.4",
+        "timm==0.6.13",
+        "numpy==1.26.4"
+      ]
+    }
   }
 ]

xinference/model/llm/__init__.py CHANGED Viewed

@@ -132,7 +132,7 @@ def _install():
     from .llama_cpp.core import LlamaCppChatModel, LlamaCppModel, XllamaCppModel
     from .lmdeploy.core import LMDeployChatModel, LMDeployModel
     from .mlx.core import MLXChatModel, MLXModel, MLXVisionModel
-    from .sglang.core import SGLANGChatModel, SGLANGModel
+    from .sglang.core import SGLANGChatModel, SGLANGModel, SGLANGVisionModel
     from .transformers.chatglm import ChatglmPytorchChatModel
     from .transformers.cogagent import CogAgentChatModel
     from .transformers.cogvlm2 import CogVLM2Model
@@ -143,16 +143,15 @@ def _install():
         DeepSeekV2PytorchModel,
     )
     from .transformers.deepseek_vl import DeepSeekVLChatModel
+    from .transformers.deepseek_vl2 import DeepSeekVL2ChatModel
     from .transformers.gemma3 import Gemma3ChatModel, Gemma3TextChatModel
     from .transformers.glm4v import Glm4VModel
     from .transformers.glm_edge_v import GlmEdgeVModel
-    from .transformers.intern_vl import InternVLChatModel
     from .transformers.internlm2 import Internlm2PytorchChatModel
     from .transformers.minicpmv25 import MiniCPMV25Model
     from .transformers.minicpmv26 import MiniCPMV26Model
     from .transformers.opt import OptPytorchModel
     from .transformers.qwen2_audio import Qwen2AudioChatModel
-    from .transformers.qwen2_vl import Qwen2VLChatModel
     from .transformers.qwen_vl import QwenVLChatModel
     from .transformers.yi_vl import YiVLChatModel
     from .vllm.core import VLLMChatModel, VLLMModel, VLLMVisionModel
@@ -173,7 +172,7 @@ def _install():
             XllamaCppModel,
         ]
     )
-    SGLANG_CLASSES.extend([SGLANGModel, SGLANGChatModel])
+    SGLANG_CLASSES.extend([SGLANGModel, SGLANGChatModel, SGLANGVisionModel])
     VLLM_CLASSES.extend([VLLMModel, VLLMChatModel, VLLMVisionModel])
     MLX_CLASSES.extend([MLXModel, MLXChatModel, MLXVisionModel])
     LMDEPLOY_CLASSES.extend([LMDeployModel, LMDeployChatModel])
@@ -183,11 +182,10 @@ def _install():
             PytorchChatModel,
             Internlm2PytorchChatModel,
             QwenVLChatModel,
-            Qwen2VLChatModel,
             Qwen2AudioChatModel,
             YiVLChatModel,
             DeepSeekVLChatModel,
-            InternVLChatModel,
+            DeepSeekVL2ChatModel,
             PytorchModel,
             CogVLM2Model,
             CogVLM2VideoModel,

xinference/model/llm/core.py CHANGED Viewed

@@ -54,6 +54,7 @@ class LLM(abc.ABC):
         **kwargs,
     ):
         self.model_uid, self.rep_id = parse_replica_model_uid(replica_model_uid)
+        self.raw_model_uid = replica_model_uid
         self.model_family = model_family
         self.model_spec = model_spec
         self.quantization = quantization
@@ -143,6 +144,10 @@ class LLMDescription(ModelDescription):
         self._llm_spec = llm_spec
         self._quantization = quantization
+    @property
+    def spec(self):
+        return self._llm_family
     def to_dict(self):
         return {
             "model_type": "LLM",

xinference/model/llm/llama_cpp/core.py CHANGED Viewed

@@ -36,7 +36,7 @@ from ..utils import DEEPSEEK_TOOL_CALL_FAMILY, QWEN_TOOL_CALL_FAMILY, ChatModelM
 logger = logging.getLogger(__name__)
-USE_XLLAMACPP = bool(int(os.environ.get("USE_XLLAMACPP", 0)))
+USE_XLLAMACPP = bool(int(os.environ.get("USE_XLLAMACPP", 1)))
 class _Done:
@@ -142,24 +142,38 @@ class XllamaCppModel(LLM, ChatModelMixin):
         if os.path.isfile(self.model_path):
             # mostly passed from --model_path
-            model_path = os.path.realpath(self.model_path)
+            model_path = self.model_path
         else:
             # handle legacy cache.
-            model_path = os.path.realpath(
-                os.path.join(
+            if (
+                self.model_spec.model_file_name_split_template
+                and self.model_spec.quantization_parts
+            ):
+                part = self.model_spec.quantization_parts[self.quantization]
+                model_path = os.path.join(
+                    self.model_path,
+                    self.model_spec.model_file_name_split_template.format(
+                        quantization=self.quantization, part=part[0]
+                    ),
+                )
+            else:
+                model_path = os.path.join(
                     self.model_path,
                     self.model_spec.model_file_name_template.format(
                         quantization=self.quantization
                     ),
                 )
-            )
-            legacy_model_file_path = os.path.join(self.model_path, "model.bin")
-            if os.path.exists(legacy_model_file_path):
-                model_path = legacy_model_file_path
+                legacy_model_file_path = os.path.join(self.model_path, "model.bin")
+                if os.path.exists(legacy_model_file_path):
+                    model_path = legacy_model_file_path
         try:
             params = CommonParams()
-            params.model = model_path
+            # Compatible with xllamacpp changes
+            try:
+                params.model = model_path
+            except Exception:
+                params.model.path = model_path
             if self.model_family.chat_template:
                 params.chat_template = self.model_family.chat_template
             # This is the default value, could be overwritten by _llamacpp_model_config
@@ -302,7 +316,12 @@ class XllamaCppModel(LLM, ChatModelMixin):
                 while (r := q.get()) is not _Done:
                     if type(r) is _Error:
                         raise Exception("Got error in chat stream: %s", r.msg)
-                    yield r
+                    # Get valid keys (O(1) lookup)
+                    chunk_keys = ChatCompletionChunk.__annotations__
+                    # The chunk may contain additional keys (e.g., system_fingerprint),
+                    # which might not conform to OpenAI/DeepSeek formats.
+                    # Filter out keys that are not part of ChatCompletionChunk.
+                    yield {key: r[key] for key in chunk_keys if key in r}
             return self._to_chat_completion_chunks(
                 _to_iterator(), self.reasoning_parser
@@ -410,20 +429,30 @@ class LlamaCppModel(LLM):
         if os.path.isfile(self.model_path):
             # mostly passed from --model_path
-            model_path = os.path.realpath(self.model_path)
+            model_path = self.model_path
         else:
             # handle legacy cache.
-            model_path = os.path.realpath(
-                os.path.join(
+            if (
+                self.model_spec.model_file_name_split_template
+                and self.model_spec.quantization_parts
+            ):
+                part = self.model_spec.quantization_parts[self.quantization]
+                model_path = os.path.join(
+                    self.model_path,
+                    self.model_spec.model_file_name_split_template.format(
+                        quantization=self.quantization, part=part[0]
+                    ),
+                )
+            else:
+                model_path = os.path.join(
                     self.model_path,
                     self.model_spec.model_file_name_template.format(
                         quantization=self.quantization
                     ),
                 )
-            )
-            legacy_model_file_path = os.path.join(self.model_path, "model.bin")
-            if os.path.exists(legacy_model_file_path):
-                model_path = legacy_model_file_path
+                legacy_model_file_path = os.path.join(self.model_path, "model.bin")
+                if os.path.exists(legacy_model_file_path):
+                    model_path = legacy_model_file_path
         try:
             self._llm = Llama(

xinference 1.4.0__py3-none-any.whl → 1.5.0__py3-none-any.whl

Potentially problematic release.

xinference 1.4.0py3-none-any.whl → 1.5.0py3-none-any.whl