PyPI - xinference - Versions diffs - 1.4.1__py3-none-any.whl → 1.5.0__py3-none-any.whl - Mend

xinference 1.4.1py3-none-any.whl → 1.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (104) hide show

xinference/model/llm/transformers/intern_vl.py CHANGED Viewed

@@ -19,14 +19,14 @@ from typing import Dict, Iterator, List, Optional, Union
 import torch
 from ....types import ChatCompletion, ChatCompletionChunk
-from ..llm_family import LLMFamilyV1, LLMSpecV1
+from ..llm_family import LLMFamilyV1, LLMSpecV1, register_transformer
 from ..utils import (
     _decode_image,
     generate_chat_completion,
     generate_completion_chunk,
     parse_messages,
 )
-from .core import PytorchChatModel, PytorchGenerateConfig
+from .core import PytorchChatModel, PytorchGenerateConfig, register_non_default_model
 from .utils import cache_clean
 logger = logging.getLogger(__name__)
@@ -232,6 +232,10 @@ def _load_video(video_path, bound=None, input_size=448, max_num=1, num_segments=
     return pixel_values, num_patches_list
+@register_transformer
+@register_non_default_model(
+    "internvl-chat", "internvl2", "Internvl2.5", "Internvl2.5-MPO", "InternVL3"
+)
 class InternVLChatModel(PytorchChatModel):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
@@ -257,6 +261,8 @@ class InternVLChatModel(PytorchChatModel):
     def _split_model(self):
         import math
+        from transformers import AutoConfig
         device_map = {}
         world_size = torch.cuda.device_count()
         # single gpu
@@ -265,22 +271,26 @@ class InternVLChatModel(PytorchChatModel):
         model_size = f"{self.model_spec.model_size_in_billions}B"
         model_name = self.model_family.model_name.lower().replace("-mpo", "")
         model_name = f"{model_name}-{model_size}"
-        num_layers = {
-            "internvl2-1B": 24,
-            "internvl2-2B": 24,
-            "internvl2-4B": 32,
-            "internvl2-8B": 32,
-            "internvl2-26B": 48,
-            "internvl2-40B": 60,
-            "internvl2-76B": 80,
-            "internvl2.5-1B": 24,
-            "internvl2.5-2B": 24,
-            "internvl2.5-4B": 36,
-            "internvl2.5-8B": 32,
-            "internvl2.5-26B": 48,
-            "internvl2.5-38B": 64,
-            "internvl2.5-78B": 80,
-        }[model_name]
+        if "internvl3" in model_name.lower():
+            config = AutoConfig.from_pretrained(self.model_path, trust_remote_code=True)
+            num_layers = config.llm_config.num_hidden_layers
+        else:
+            num_layers = {
+                "internvl2-1B": 24,
+                "internvl2-2B": 24,
+                "internvl2-4B": 32,
+                "internvl2-8B": 32,
+                "internvl2-26B": 48,
+                "internvl2-40B": 60,
+                "internvl2-76B": 80,
+                "internvl2.5-1B": 24,
+                "internvl2.5-2B": 24,
+                "internvl2.5-4B": 36,
+                "internvl2.5-8B": 32,
+                "internvl2.5-26B": 48,
+                "internvl2.5-38B": 64,
+                "internvl2.5-78B": 80,
+            }[model_name]
         # Since the first GPU will be used for ViT, treat it as half a GPU.
         num_layers_per_gpu = math.ceil(num_layers / (world_size - 0.5))

xinference/model/llm/transformers/minicpmv26.py CHANGED Viewed

@@ -20,7 +20,12 @@ import torch
 from PIL import Image
 from ....core.scheduler import InferenceRequest
-from ....types import ChatCompletion, ChatCompletionChunk, CompletionChunk
+from ....types import (
+    ChatCompletion,
+    ChatCompletionChunk,
+    CompletionChunk,
+    PytorchModelConfig,
+)
 from ...utils import select_device
 from ..llm_family import LLMFamilyV1, LLMSpecV1
 from ..utils import (
@@ -52,6 +57,15 @@ class MiniCPMV26Model(PytorchChatModel):
             return True
         return False
+    def _sanitize_model_config(
+        self, pytorch_model_config: Optional[PytorchModelConfig]
+    ) -> PytorchModelConfig:
+        pytorch_model_config = super()._sanitize_model_config(pytorch_model_config)
+        assert pytorch_model_config is not None
+        pytorch_model_config.setdefault("min_pixels", 256 * 28 * 28)
+        pytorch_model_config.setdefault("max_pixels", 1280 * 28 * 28)
+        return pytorch_model_config
     def _get_model_class(self):
         from transformers import AutoModel
@@ -99,8 +113,13 @@ class MiniCPMV26Model(PytorchChatModel):
             self.model_path,
             trust_remote_code=True,
         )
+        min_pixels = self._pytorch_model_config.get("min_pixels")
+        max_pixels = self._pytorch_model_config.get("max_pixels")
         self._processor = AutoProcessor.from_pretrained(
-            self.model_path, trust_remote_code=True
+            self.model_path,
+            trust_remote_code=True,
+            min_pixels=min_pixels,
+            max_pixels=max_pixels,
         )
         self._device = self._model.device
         self._save_tensorizer()

xinference/model/llm/transformers/qwen-omni.py ADDED Viewed

@@ -0,0 +1,308 @@
+# Copyright 2022-2025 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import base64
+import importlib.util
+import io
+import logging
+import sys
+import time
+import uuid
+from typing import Dict, Iterator, List, Optional, Union
+from ....model.utils import select_device
+from ....types import (
+    ChatCompletion,
+    ChatCompletionAudio,
+    ChatCompletionChoice,
+    ChatCompletionChunk,
+    ChatCompletionMessage,
+    CompletionChunk,
+    CompletionUsage,
+)
+from ..llm_family import LLMFamilyV1, LLMSpecV1, register_transformer
+from ..utils import generate_completion_chunk
+from .core import PytorchChatModel, PytorchGenerateConfig, register_non_default_model
+from .utils import cache_clean
+logger = logging.getLogger(__name__)
+DEFAULT_SYSTEM_PROMPT = (
+    "You are Qwen, a virtual human developed by the Qwen Team, Alibaba Group, "
+    "capable of perceiving auditory and visual inputs, as well as generating text and speech."
+)
+@register_transformer
+@register_non_default_model("qwen2.5-omni")
+class Qwen2_5OmniChatModel(PytorchChatModel):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self._tokenizer = None
+        self._model = None
+        self._device = None
+        self._processor = None
+    @classmethod
+    def match(
+        cls, model_family: "LLMFamilyV1", model_spec: "LLMSpecV1", quantization: str
+    ) -> bool:
+        if model_spec.model_format not in ["pytorch", "gptq", "awq"]:
+            return False
+        llm_family = model_family.model_family or model_family.model_name
+        if "qwen2.5-omni".lower() in llm_family.lower():
+            return True
+        return False
+    def load(self):
+        from transformers import (
+            Qwen2_5OmniForConditionalGeneration,
+            Qwen2_5OmniProcessor,
+        )
+        device = self._pytorch_model_config.get("device", "auto")
+        device = select_device(device)
+        self._device = device
+        # for multiple GPU, set back to auto to make multiple devices work
+        device = "auto" if device == "cuda" else device
+        flash_attn_installed = importlib.util.find_spec("flash_attn") is not None
+        kwargs = (
+            {}
+            if not flash_attn_installed
+            else {"attn_implementation": "flash_attention_2"}
+        )
+        logger.debug("Loading model with extra kwargs: %s", kwargs)
+        self._processor = Qwen2_5OmniProcessor.from_pretrained(
+            self.model_path, trust_remote_code=True
+        )
+        self._tokenizer = self._processor.tokenizer
+        self._model = Qwen2_5OmniForConditionalGeneration.from_pretrained(
+            self.model_path,
+            torch_dtype="auto",
+            device_map=device,
+            trust_remote_code=True,
+            **kwargs,
+        )
+    @cache_clean
+    def chat(
+        self,
+        messages: List[Dict],
+        generate_config: Optional[PytorchGenerateConfig] = None,
+    ) -> Union[ChatCompletion, Iterator[ChatCompletionChunk]]:
+        messages = self._transform_messages(messages)
+        generate_config = generate_config if generate_config else {}
+        stream = generate_config.get("stream", False) if generate_config else False
+        if stream:
+            it = self._generate_stream(messages, generate_config)
+            return self._to_chat_completion_chunks(it)
+        else:
+            c = self._generate(messages, generate_config)
+            return c
+    def _transform_messages(
+        self,
+        messages: Union[List[ChatCompletionMessage], List[dict]],
+    ):
+        messages = super()._transform_messages(messages)
+        if messages[0]["role"] != "system":
+            messages.insert(
+                0,
+                {
+                    "role": "system",
+                    "content": [{"type": "text", "text": DEFAULT_SYSTEM_PROMPT}],  # type: ignore
+                },
+            )
+        else:
+            logger.debug("Force to set system prompt")
+            messages[0]["content"] = [{"type": "text", "text": DEFAULT_SYSTEM_PROMPT}]  # type: ignore
+        return messages
+    def _generate(
+        self, messages: List, config: PytorchGenerateConfig = {}
+    ) -> ChatCompletion:
+        import soundfile as sf
+        from qwen_omni_utils import process_mm_info
+        use_audio_in_video = config.get("use_audio_in_video", True)
+        voice = config.get("voice", "Chelsie")
+        text = self._processor.apply_chat_template(
+            messages, tokenize=False, add_generation_prompt=True
+        )
+        audios, images, videos = process_mm_info(
+            messages, use_audio_in_video=use_audio_in_video
+        )
+        logger.debug(
+            "Text, audio, image, video: %s, %s, %s, %s", text, audios, images, videos
+        )
+        inputs = self._processor(
+            text=text,
+            images=images,
+            audio=audios,
+            videos=videos,
+            padding=True,
+            return_tensors="pt",
+            use_audio_in_video=use_audio_in_video,
+        )
+        inputs = inputs.to(self._device)
+        # Inference: Generation of the output
+        generated_ids, audio = self._model.generate(
+            **inputs,
+            speaker=voice,
+            max_new_tokens=config.get("max_tokens", 512),
+            temperature=config.get("temperature", 1),
+            use_audio_in_video=use_audio_in_video,
+        )
+        generated_ids_trimmed = [
+            out_ids[len(in_ids) :]
+            for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
+        ]
+        output_text = self._processor.batch_decode(
+            generated_ids_trimmed,
+            skip_special_tokens=True,
+            clean_up_tokenization_spaces=False,
+        )[0]
+        wav_io = io.BytesIO()
+        sf.write(
+            wav_io,
+            audio.reshape(-1).detach().cpu().numpy(),
+            samplerate=24000,
+            format="WAV",
+        )
+        wav_bytes = wav_io.getvalue()
+        audio_content = base64.b64encode(wav_bytes).decode()
+        return ChatCompletion(
+            id="chat" + str(uuid.uuid1()),
+            object="chat.completion",
+            created=int(time.time()),
+            model=self.model_uid,
+            choices=[
+                ChatCompletionChoice(
+                    index=0,
+                    message={
+                        "role": "assistant",
+                        "content": output_text,
+                        "audio": ChatCompletionAudio(
+                            id="audio" + str(uuid.uuid1()),
+                            data=audio_content,
+                            expires_at=int(time.time()),
+                            transcript="",
+                        ),
+                    },
+                    finish_reason="stop",
+                )
+            ],
+            usage=CompletionUsage(
+                prompt_tokens=-1, completion_tokens=-1, total_tokens=-1
+            ),
+        )
+    def _generate_stream(
+        self, messages: List, config: PytorchGenerateConfig = {}
+    ) -> Iterator[CompletionChunk]:
+        from threading import Thread
+        from qwen_omni_utils import process_mm_info
+        from transformers import TextIteratorStreamer
+        use_audio_in_video = config.get("use_audio_in_video", True)
+        voice = config.get("voice", "Chelsie")
+        text = self._processor.apply_chat_template(
+            messages, tokenize=False, add_generation_prompt=True
+        )
+        audios, images, videos = process_mm_info(
+            messages, use_audio_in_video=use_audio_in_video
+        )
+        logger.debug(
+            "Text, audio, image, video: %s, %s, %s, %s", text, audios, images, videos
+        )
+        inputs = self._processor(
+            text=text,
+            images=images,
+            audio=audios,
+            videos=videos,
+            padding=True,
+            return_tensors="pt",
+            use_audio_in_video=use_audio_in_video,
+        )
+        inputs = inputs.to(self._device)
+        tokenizer = self._tokenizer
+        streamer = TextIteratorStreamer(
+            tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True
+        )
+        # TODO(xuye): Cannot find a way to streaming output,
+        # will implement it when it's supported
+        gen_kwargs = {
+            "max_new_tokens": config.get("max_tokens", 512),
+            "temperature": config.get("temperature", 1),
+            "streamer": streamer,
+            "speaker": voice,
+            **inputs,
+        }
+        error = None
+        def model_generate():
+            try:
+                return self._model.generate(**gen_kwargs)
+            except Exception:
+                nonlocal error
+                error = sys.exc_info()
+                streamer.end()
+                raise
+        thread = Thread(target=model_generate)
+        thread.start()
+        completion_id = str(uuid.uuid1())
+        for new_text in streamer:
+            yield generate_completion_chunk(
+                chunk_text=new_text,
+                finish_reason=None,
+                chunk_id=completion_id,
+                model_uid=self.model_uid,
+                prompt_tokens=-1,
+                completion_tokens=-1,
+                total_tokens=-1,
+                has_choice=True,
+                has_content=True,
+            )
+        if error:
+            _, err, tb = error  # type: ignore
+            raise err.with_traceback(tb)
+        yield generate_completion_chunk(
+            chunk_text=None,
+            finish_reason="stop",
+            chunk_id=completion_id,
+            model_uid=self.model_uid,
+            prompt_tokens=-1,
+            completion_tokens=-1,
+            total_tokens=-1,
+            has_choice=True,
+            has_content=False,
+        )

xinference/model/llm/transformers/qwen2_audio.py CHANGED Viewed

@@ -74,7 +74,7 @@ class Qwen2AudioChatModel(PytorchChatModel):
     def _transform_messages(
         self,
-        messages: List[ChatCompletionMessage],
+        messages: Union[List[ChatCompletionMessage], List[dict]],
     ):
         import librosa

xinference/model/llm/transformers/qwen2_vl.py CHANGED Viewed

@@ -24,15 +24,18 @@ from ....types import (
     ChatCompletionChunk,
     ChatCompletionMessage,
     CompletionChunk,
+    PytorchModelConfig,
 )
-from ..llm_family import LLMFamilyV1, LLMSpecV1
+from ..llm_family import LLMFamilyV1, LLMSpecV1, register_transformer
 from ..utils import generate_chat_completion, generate_completion_chunk
-from .core import PytorchChatModel, PytorchGenerateConfig
+from .core import PytorchChatModel, PytorchGenerateConfig, register_non_default_model
 from .utils import cache_clean
 logger = logging.getLogger(__name__)
+@register_transformer
+@register_non_default_model("qwen2-vl-instruct", "qwen2.5-vl-instruct")
 class Qwen2VLChatModel(PytorchChatModel):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
@@ -41,6 +44,15 @@ class Qwen2VLChatModel(PytorchChatModel):
         self._device = None
         self._processor = None
+    def _sanitize_model_config(
+        self, pytorch_model_config: Optional[PytorchModelConfig]
+    ) -> PytorchModelConfig:
+        pytorch_model_config = super()._sanitize_model_config(pytorch_model_config)
+        assert pytorch_model_config is not None
+        pytorch_model_config.setdefault("min_pixels", 256 * 28 * 28)
+        pytorch_model_config.setdefault("max_pixels", 1280 * 28 * 28)
+        return pytorch_model_config
     @classmethod
     def match(
         cls, model_family: "LLMFamilyV1", model_spec: "LLMSpecV1", quantization: str
@@ -69,9 +81,13 @@ class Qwen2VLChatModel(PytorchChatModel):
         self._device = device
         # for multiple GPU, set back to auto to make multiple devices work
         device = "auto" if device == "cuda" else device
+        min_pixels = self._pytorch_model_config.get("min_pixels")
+        max_pixels = self._pytorch_model_config.get("max_pixels")
         self._processor = AutoProcessor.from_pretrained(
-            self.model_path, trust_remote_code=True
+            self.model_path,
+            trust_remote_code=True,
+            min_pixels=min_pixels,
+            max_pixels=max_pixels,
         )
         self._tokenizer = self._processor.tokenizer
         flash_attn_installed = importlib.util.find_spec("flash_attn") is not None

xinference/model/llm/utils.py CHANGED Viewed

@@ -31,6 +31,7 @@ from typing import (
     List,
     Optional,
     Tuple,
+    Union,
     cast,
 )
@@ -762,7 +763,7 @@ class ChatModelMixin:
     def _transform_messages(
         self,
-        messages: List[ChatCompletionMessage],
+        messages: Union[List[ChatCompletionMessage], List[dict]],
     ):
         transformed_messages = []
         for msg in messages:
@@ -783,6 +784,15 @@ class ChatModelMixin:
                         new_content.append(
                             {"type": "video", "video": item["video_url"]["url"]}
                         )
+                    elif "audio_url" in item:
+                        new_content.append(
+                            {"type": "audio", "audio": item["audio_url"]["url"]}
+                        )
+                    else:
+                        logger.warning(
+                            "Unknown message type, message: %s, this message may be ignored",
+                            messages,
+                        )
             new_message = {"role": role, "content": new_content}
             transformed_messages.append(new_message)

xinference/model/llm/vllm/core.py CHANGED Viewed

@@ -37,6 +37,7 @@ from typing import (
 )
 import xoscar as xo
+from typing_extensions import NotRequired
 from ....types import (
     ChatCompletion,
@@ -81,6 +82,9 @@ class VLLMModelConfig(TypedDict, total=False):
     scheduling_policy: Optional[str]
     reasoning_content: bool
     model_quantization: Optional[str]
+    mm_processor_kwargs: NotRequired[dict[str, Any]]
+    min_pixels: NotRequired[int]
+    max_pixels: NotRequired[int]
 class VLLMGenerateConfig(TypedDict, total=False):
@@ -170,6 +174,8 @@ if VLLM_INSTALLED and vllm.__version__ >= "0.3.0":
     VLLM_SUPPORTED_CHAT_MODELS.append("marco-o1")
     VLLM_SUPPORTED_CHAT_MODELS.append("deepseek-r1-distill-qwen")
     VLLM_SUPPORTED_CHAT_MODELS.append("fin-r1")
+    VLLM_SUPPORTED_CHAT_MODELS.append("seallms-v3")
+    VLLM_SUPPORTED_CHAT_MODELS.append("skywork-or1-preview")
 if VLLM_INSTALLED and vllm.__version__ >= "0.3.2":
     VLLM_SUPPORTED_CHAT_MODELS.append("gemma-it")
@@ -205,6 +211,7 @@ if VLLM_INSTALLED and vllm.__version__ >= "0.6.1":
     VLLM_SUPPORTED_VISION_MODEL_LIST.append("internvl2")
     VLLM_SUPPORTED_VISION_MODEL_LIST.append("InternVL2.5")
     VLLM_SUPPORTED_VISION_MODEL_LIST.append("InternVL2.5-MPO")
+    VLLM_SUPPORTED_VISION_MODEL_LIST.append("InternVL3")
 if VLLM_INSTALLED and vllm.__version__ >= "0.6.2":
     VLLM_SUPPORTED_CHAT_MODELS.append("minicpm3-4b")
@@ -229,6 +236,9 @@ if VLLM_INSTALLED and vllm.__version__ >= "0.8.0":
     VLLM_SUPPORTED_CHAT_MODELS.append("gemma-3-1b-it")
     VLLM_SUPPORTED_VISION_MODEL_LIST.append("gemma-3-it")
+if VLLM_INSTALLED and vllm.__version__ >= "0.8.4":
+    VLLM_SUPPORTED_CHAT_MODELS.append("glm4-0414")
 class VLLMModel(LLM):
     def __init__(
@@ -531,6 +541,31 @@ class VLLMModel(LLM):
         # Add scheduling policy if vLLM version is 0.6.3 or higher
         if vllm.__version__ >= "0.6.3":
             model_config.setdefault("scheduling_policy", "fcfs")
+            # init mm_processor_kwargs params
+            mm_processor_kwargs = model_config.get("mm_processor_kwargs", {})
+            if isinstance(mm_processor_kwargs, str):
+                try:
+                    mm_processor_kwargs = json.loads(mm_processor_kwargs)
+                except json.JSONDecodeError:
+                    logger.warning(
+                        "Failed to parse mm_processor_kwargs as JSON, using default empty dict"
+                    )
+                    mm_processor_kwargs = {}
+                except Exception as e:
+                    logger.warning(
+                        f"Unexpected error parsing mm_processor_kwargs: {e}, using default empty dict"
+                    )
+                    mm_processor_kwargs = {}
+            pixel_params: Dict[str, int] = {}
+            if "min_pixels" in model_config:
+                pixel_params["min_pixels"] = model_config.pop("min_pixels")
+            if "max_pixels" in model_config:
+                pixel_params["max_pixels"] = model_config.pop("max_pixels")
+            if pixel_params or mm_processor_kwargs:
+                model_config["mm_processor_kwargs"] = {
+                    **mm_processor_kwargs,
+                    **pixel_params,
+                }
         return model_config
     @staticmethod

xinference/model/llm/vllm/distributed_executor.py CHANGED Viewed

@@ -84,7 +84,7 @@ class WorkerWrapper:
         return await self._worker_actor_ref.execute_method(method, *args, **kwargs)
     def kill(self):
-        coro = xo.kill_actor(self._worker_actor_ref)
+        coro = xo.destroy_actor(self._worker_actor_ref)
         return asyncio.run_coroutine_threadsafe(coro, self._loop)
@@ -108,6 +108,7 @@ class XinferenceDistributedExecutor(DistributedExecutorBase):
         self._pool_addresses = pool_addresses
         self._loop = loop
         self._n_worker = n_worker
+        self._is_shutdown = False
         super().__init__(vllm_config, *args, **kwargs)
     def _init_executor(self) -> None:
@@ -247,11 +248,16 @@ class XinferenceDistributedExecutor(DistributedExecutorBase):
         return
     def shutdown(self) -> None:
+        if self._is_shutdown:
+            return
         try:
+            self._is_shutdown = True
             futs = [worker.kill() for worker in self.workers]
             _ = [fut.result() for fut in futs]
-        except (RuntimeError, ConnectionError):
+        except (RuntimeError, ConnectionError, xo.ActorNotExist):
             # event loop closed already, ignore
+            # or actor already removed
             pass
     def __del__(self):

xinference/model/rerank/core.py CHANGED Viewed

@@ -29,7 +29,7 @@ import torch.nn as nn
 from ...constants import XINFERENCE_CACHE_DIR
 from ...device_utils import empty_cache
 from ...types import Document, DocumentObj, Rerank, RerankTokens
-from ..core import CacheableModelSpec, ModelDescription
+from ..core import CacheableModelSpec, ModelDescription, VirtualEnvSettings
 from ..utils import is_model_cached
 logger = logging.getLogger(__name__)
@@ -56,6 +56,7 @@ class RerankModelSpec(CacheableModelSpec):
     model_id: str
     model_revision: Optional[str]
     model_hub: str = "huggingface"
+    virtualenv: Optional[VirtualEnvSettings]
 class RerankModelDescription(ModelDescription):
@@ -69,6 +70,10 @@ class RerankModelDescription(ModelDescription):
         super().__init__(address, devices, model_path=model_path)
         self._model_spec = model_spec
+    @property
+    def spec(self):
+        return self._model_spec
     def to_dict(self):
         return {
             "model_type": "rerank",

xinference 1.4.1__py3-none-any.whl → 1.5.0__py3-none-any.whl

Potentially problematic release.

xinference 1.4.1py3-none-any.whl → 1.5.0py3-none-any.whl