PyPI - xinference - Versions diffs - 0.16.2__py3-none-any.whl → 0.16.3__py3-none-any.whl - Mend

xinference 0.16.2py3-none-any.whl → 0.16.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (20) hide show

xinference/_version.py +3 -3
xinference/conftest.py +0 -8
xinference/constants.py +1 -0
xinference/core/model.py +34 -2
xinference/core/supervisor.py +5 -5
xinference/core/utils.py +9 -10
xinference/core/worker.py +5 -4
xinference/deploy/cmdline.py +5 -0
xinference/deploy/utils.py +7 -4
xinference/model/audio/model_spec.json +1 -1
xinference/model/llm/core.py +1 -3
xinference/model/llm/llm_family.json +87 -0
xinference/model/llm/llm_family_modelscope.json +91 -0
xinference/model/llm/vllm/core.py +2 -1
{xinference-0.16.2.dist-info → xinference-0.16.3.dist-info}/METADATA +4 -3
{xinference-0.16.2.dist-info → xinference-0.16.3.dist-info}/RECORD +20 -20
{xinference-0.16.2.dist-info → xinference-0.16.3.dist-info}/LICENSE +0 -0
{xinference-0.16.2.dist-info → xinference-0.16.3.dist-info}/WHEEL +0 -0
{xinference-0.16.2.dist-info → xinference-0.16.3.dist-info}/entry_points.txt +0 -0
{xinference-0.16.2.dist-info → xinference-0.16.3.dist-info}/top_level.txt +0 -0

xinference/_version.py CHANGED Viewed

@@ -8,11 +8,11 @@ import json
 version_json = '''
 {
- "date": "2024-11-01T17:56:47+0800",
+ "date": "2024-11-07T16:55:36+0800",
  "dirty": false,
  "error": null,
- "full-revisionid": "67e97ab485b539dc7a208825bee0504acc37044e",
- "version": "0.16.2"
+ "full-revisionid": "85ab86bf1c0967e45fbec995534cd5a0c9a9c439",
+ "version": "0.16.3"
 }
 '''  # END VERSION_JSON

xinference/conftest.py CHANGED Viewed

@@ -58,10 +58,6 @@ TEST_LOGGING_CONF = {
             "propagate": False,
         }
     },
-    "root": {
-        "level": "WARN",
-        "handlers": ["stream_handler"],
-    },
 }
 TEST_LOG_FILE_PATH = get_log_file(f"test_{get_timestamp_ms()}")
@@ -102,10 +98,6 @@ TEST_FILE_LOGGING_CONF = {
             "propagate": False,
         }
     },
-    "root": {
-        "level": "WARN",
-        "handlers": ["stream_handler", "file_handler"],
-    },
 }

xinference/constants.py CHANGED Viewed

@@ -87,3 +87,4 @@ XINFERENCE_DOWNLOAD_MAX_ATTEMPTS = int(
 XINFERENCE_TEXT_TO_IMAGE_BATCHING_SIZE = os.environ.get(
     XINFERENCE_ENV_TEXT_TO_IMAGE_BATCHING_SIZE, None
 )
+XINFERENCE_LAUNCH_MODEL_RETRY = 3

xinference/core/model.py CHANGED Viewed

@@ -40,7 +40,10 @@ from typing import (
 import sse_starlette.sse
 import xoscar as xo
-from ..constants import XINFERENCE_TEXT_TO_IMAGE_BATCHING_SIZE
+from ..constants import (
+    XINFERENCE_LAUNCH_MODEL_RETRY,
+    XINFERENCE_TEXT_TO_IMAGE_BATCHING_SIZE,
+)
 if TYPE_CHECKING:
     from .progress_tracker import ProgressTrackerActor
@@ -134,6 +137,8 @@ def oom_check(fn):
 class ModelActor(xo.StatelessActor):
+    _replica_model_uid: Optional[str]
     @classmethod
     def gen_uid(cls, model: "LLM"):
         return f"{model.__class__}-model-actor"
@@ -192,6 +197,7 @@ class ModelActor(xo.StatelessActor):
         supervisor_address: str,
         worker_address: str,
         model: "LLM",
+        replica_model_uid: str,
         model_description: Optional["ModelDescription"] = None,
         request_limits: Optional[int] = None,
     ):
@@ -203,6 +209,7 @@ class ModelActor(xo.StatelessActor):
         self._supervisor_address = supervisor_address
         self._worker_address = worker_address
+        self._replica_model_uid = replica_model_uid
         self._model = model
         self._model_description = (
             model_description.to_dict() if model_description else {}
@@ -257,6 +264,9 @@ class ModelActor(xo.StatelessActor):
                 uid=FluxBatchSchedulerActor.gen_uid(self.model_uid()),
             )
+    def __repr__(self) -> str:
+        return f"ModelActor({self._replica_model_uid})"
     async def _record_completion_metrics(
         self, duration, completion_tokens, prompt_tokens
     ):
@@ -374,7 +384,28 @@ class ModelActor(xo.StatelessActor):
         return condition
     async def load(self):
-        self._model.load()
+        try:
+            # Change process title for model
+            import setproctitle
+            setproctitle.setproctitle(f"Model: {self._replica_model_uid}")
+        except ImportError:
+            pass
+        i = 0
+        while True:
+            i += 1
+            try:
+                self._model.load()
+                break
+            except Exception as e:
+                if (
+                    i < XINFERENCE_LAUNCH_MODEL_RETRY
+                    and str(e).find("busy or unavailable") >= 0
+                ):
+                    await asyncio.sleep(5)
+                    logger.warning("Retry to load model {model_uid}: %d times", i)
+                    continue
+                raise
         if self.allow_batching():
             await self._scheduler_ref.set_model(self._model)
             logger.debug(
@@ -385,6 +416,7 @@ class ModelActor(xo.StatelessActor):
             logger.debug(
                 f"Batching enabled for model: {self.model_uid()}, max_num_images: {self._model.get_max_num_images_for_batching()}"
             )
+        logger.info(f"{self} loaded")
     def model_uid(self):
         return (

xinference/core/supervisor.py CHANGED Viewed

@@ -970,7 +970,7 @@ class SupervisorActor(xo.StatelessActor):
                 raise ValueError(
                     f"Model is already in the model list, uid: {_replica_model_uid}"
                 )
-            replica_gpu_idx = assign_replica_gpu(_replica_model_uid, gpu_idx)
+            replica_gpu_idx = assign_replica_gpu(_replica_model_uid, replica, gpu_idx)
             nonlocal model_type
             worker_ref = (
@@ -1084,7 +1084,7 @@ class SupervisorActor(xo.StatelessActor):
                             dead_models,
                         )
                         for replica_model_uid in dead_models:
-                            model_uid, _, _ = parse_replica_model_uid(replica_model_uid)
+                            model_uid, _ = parse_replica_model_uid(replica_model_uid)
                             self._model_uid_to_replica_info.pop(model_uid, None)
                             self._replica_model_uid_to_worker.pop(
                                 replica_model_uid, None
@@ -1137,7 +1137,7 @@ class SupervisorActor(xo.StatelessActor):
             raise ValueError(f"Model not found in the model list, uid: {model_uid}")
         replica_model_uid = build_replica_model_uid(
-            model_uid, replica_info.replica, next(replica_info.scheduler)
+            model_uid, next(replica_info.scheduler)
         )
         worker_ref = self._replica_model_uid_to_worker.get(replica_model_uid, None)
@@ -1154,7 +1154,7 @@ class SupervisorActor(xo.StatelessActor):
             raise ValueError(f"Model not found in the model list, uid: {model_uid}")
         # Use rep id 0 to instead of next(replica_info.scheduler) to avoid
         # consuming the generator.
-        replica_model_uid = build_replica_model_uid(model_uid, replica_info.replica, 0)
+        replica_model_uid = build_replica_model_uid(model_uid, 0)
         worker_ref = self._replica_model_uid_to_worker.get(replica_model_uid, None)
         if worker_ref is None:
             raise ValueError(
@@ -1260,7 +1260,7 @@ class SupervisorActor(xo.StatelessActor):
                 uids_to_remove.append(model_uid)
         for replica_model_uid in uids_to_remove:
-            model_uid, _, _ = parse_replica_model_uid(replica_model_uid)
+            model_uid, _ = parse_replica_model_uid(replica_model_uid)
             self._model_uid_to_replica_info.pop(model_uid, None)
             self._replica_model_uid_to_worker.pop(replica_model_uid, None)

xinference/core/utils.py CHANGED Viewed

@@ -146,27 +146,26 @@ def iter_replica_model_uid(model_uid: str, replica: int) -> Generator[str, None,
     """
     replica = int(replica)
     for rep_id in range(replica):
-        yield f"{model_uid}-{replica}-{rep_id}"
+        yield f"{model_uid}-{rep_id}"
-def build_replica_model_uid(model_uid: str, replica: int, rep_id: int) -> str:
+def build_replica_model_uid(model_uid: str, rep_id: int) -> str:
     """
     Build a replica model uid.
     """
-    return f"{model_uid}-{replica}-{rep_id}"
+    return f"{model_uid}-{rep_id}"
-def parse_replica_model_uid(replica_model_uid: str) -> Tuple[str, int, int]:
+def parse_replica_model_uid(replica_model_uid: str) -> Tuple[str, int]:
     """
-    Parse replica model uid to model uid, replica and rep id.
+    Parse replica model uid to model uid and rep id.
     """
     parts = replica_model_uid.split("-")
     if len(parts) == 1:
-        return replica_model_uid, -1, -1
+        return replica_model_uid, -1
     rep_id = int(parts.pop())
-    replica = int(parts.pop())
     model_uid = "-".join(parts)
-    return model_uid, replica, rep_id
+    return model_uid, rep_id
 def is_valid_model_uid(model_uid: str) -> bool:
@@ -261,9 +260,9 @@ def get_nvidia_gpu_info() -> Dict:
 def assign_replica_gpu(
-    _replica_model_uid: str, gpu_idx: Union[int, List[int]]
+    _replica_model_uid: str, replica: int, gpu_idx: Union[int, List[int]]
 ) -> List[int]:
-    model_uid, replica, rep_id = parse_replica_model_uid(_replica_model_uid)
+    model_uid, rep_id = parse_replica_model_uid(_replica_model_uid)
     rep_id, replica = int(rep_id), int(replica)
     if isinstance(gpu_idx, int):
         gpu_idx = [gpu_idx]

xinference/core/worker.py CHANGED Viewed

@@ -157,7 +157,7 @@ class WorkerActor(xo.StatelessActor):
                                 model_uid,
                                 recover_count - 1,
                             )
-                            event_model_uid, _, __ = parse_replica_model_uid(model_uid)
+                            event_model_uid, _ = parse_replica_model_uid(model_uid)
                             try:
                                 if self._event_collector_ref is not None:
                                     await self._event_collector_ref.report_event(
@@ -377,7 +377,7 @@ class WorkerActor(xo.StatelessActor):
         return len(self._model_uid_to_model)
     async def is_model_vllm_backend(self, model_uid: str) -> bool:
-        _model_uid, _, _ = parse_replica_model_uid(model_uid)
+        _model_uid, _ = parse_replica_model_uid(model_uid)
         supervisor_ref = await self.get_supervisor_ref()
         model_ref = await supervisor_ref.get_model(_model_uid)
         return await model_ref.is_vllm_backend()
@@ -800,7 +800,7 @@ class WorkerActor(xo.StatelessActor):
         launch_args.update(kwargs)
         try:
-            origin_uid, _, _ = parse_replica_model_uid(model_uid)
+            origin_uid, _ = parse_replica_model_uid(model_uid)
         except Exception as e:
             logger.exception(e)
             raise
@@ -889,6 +889,7 @@ class WorkerActor(xo.StatelessActor):
                     uid=model_uid,
                     supervisor_address=self._supervisor_address,
                     worker_address=self.address,
+                    replica_model_uid=model_uid,
                     model=model,
                     model_description=model_description,
                     request_limits=request_limits,
@@ -926,7 +927,7 @@ class WorkerActor(xo.StatelessActor):
         # Terminate model while its launching is not allow
         if model_uid in self._model_uid_launching_guard:
             raise ValueError(f"{model_uid} is launching")
-        origin_uid, _, __ = parse_replica_model_uid(model_uid)
+        origin_uid, _ = parse_replica_model_uid(model_uid)
         try:
             _ = await self.get_supervisor_ref()
             if self._event_collector_ref is not None:

xinference/deploy/cmdline.py CHANGED Viewed

@@ -43,6 +43,7 @@ from .utils import (
     get_log_file,
     get_timestamp_ms,
     handle_click_args_type,
+    set_envs,
 )
 try:
@@ -106,6 +107,8 @@ def start_local_cluster(
         XINFERENCE_LOG_MAX_BYTES,
     )
     logging.config.dictConfig(dict_config)  # type: ignore
+    # refer to https://huggingface.co/docs/transformers/main_classes/logging
+    set_envs("TRANSFORMERS_VERBOSITY", log_level.lower())
     main(
         host=host,
@@ -280,6 +283,7 @@ def supervisor(
         XINFERENCE_LOG_MAX_BYTES,
     )
     logging.config.dictConfig(dict_config)  # type: ignore
+    set_envs("TRANSFORMERS_VERBOSITY", log_level.lower())
     main(
         host=host,
@@ -342,6 +346,7 @@ def worker(
         XINFERENCE_LOG_MAX_BYTES,
     )
     logging.config.dictConfig(dict_config)  # type: ignore
+    set_envs("TRANSFORMERS_VERBOSITY", log_level.lower())
     endpoint = get_endpoint(endpoint)

xinference/deploy/utils.py CHANGED Viewed

@@ -134,10 +134,6 @@ def get_config_dict(
                 "propagate": False,
             },
         },
-        "root": {
-            "level": "WARN",
-            "handlers": ["stream_handler", "file_handler"],
-        },
     }
     return config_dict
@@ -220,3 +216,10 @@ def handle_click_args_type(arg: str) -> Any:
         pass
     return arg
+def set_envs(key: str, value: str):
+    """
+    Environment variables are set by the parent process and inherited by child processes
+    """
+    os.environ[key] = value

xinference/model/audio/model_spec.json CHANGED Viewed

@@ -127,7 +127,7 @@
     "model_name": "ChatTTS",
     "model_family": "ChatTTS",
     "model_id": "2Noise/ChatTTS",
-    "model_revision": "3b34118f6d25850440b8901cef3e71c6ef8619c8",
+    "model_revision": "1a3c04a8b0651689bd9242fbb55b1f4b5a9aef84",
     "model_ability": "text-to-audio",
     "multilingual": true
   },

xinference/model/llm/core.py CHANGED Viewed

@@ -52,9 +52,7 @@ class LLM(abc.ABC):
         *args,
         **kwargs,
     ):
-        self.model_uid, self.replica, self.rep_id = parse_replica_model_uid(
-            replica_model_uid
-        )
+        self.model_uid, self.rep_id = parse_replica_model_uid(replica_model_uid)
         self.model_family = model_family
         self.model_spec = model_spec
         self.quantization = quantization

xinference/model/llm/llm_family.json CHANGED Viewed

@@ -1312,6 +1312,93 @@
       "<|eom_id|>"
     ]
   },
+  {
+    "version": 1,
+    "context_length": 131072,
+    "model_name": "llama-3.2-vision-instruct",
+    "model_lang": [
+      "en",
+      "de",
+      "fr",
+      "it",
+      "pt",
+      "hi",
+      "es",
+      "th"
+    ],
+    "model_ability": [
+	"chat",
+	"vision"
+    ],
+    "model_description": "Llama 3.2-Vision instruction-tuned models are optimized for visual recognition, image reasoning, captioning, and answering general questions about an image...",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 11,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "meta-llama/Llama-3.2-11B-Vision-Instruct"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 90,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "meta-llama/Llama-3.2-90B-Vision-Instruct"
+      }
+    ],
+    "chat_template": "{% for message in messages %}{% if loop.index0 == 0 %}{{ bos_token }}{% endif %}{{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n' }}{% if message['content'] is string %}{{ message['content'] }}{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' %}{{ '<|image|>' }}{% elif content['type'] == 'text' %}{{ content['text'] }}{% endif %}{% endfor %}{% endif %}{{ '<|eot_id|>' }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
+    "stop_token_ids": [
+	128001,
+	128008,
+	128009
+    ],
+    "stop": [
+      "<|end_of_text|>",
+	"<|eot_id|>",
+	"<|eom_id|>"
+    ]
+  },
+  {
+    "version": 1,
+    "context_length": 131072,
+    "model_name": "llama-3.2-vision",
+    "model_lang": [
+      "en",
+      "de",
+      "fr",
+      "it",
+      "pt",
+      "hi",
+      "es",
+      "th"
+    ],
+    "model_ability": [
+	"generate",
+	"vision"
+    ],
+    "model_description": "The Llama 3.2-Vision instruction-tuned models are optimized for visual recognition, image reasoning, captioning, and answering general questions about an image...",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 11,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "meta-llama/Meta-Llama-3.2-11B-Vision"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 90,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "meta-llama/Meta-Llama-3.2-90B-Vision"
+      }
+    ]
+  },
   {
     "version": 1,
     "context_length": 2048,

xinference/model/llm/llm_family_modelscope.json CHANGED Viewed

@@ -363,6 +363,97 @@
       "<|eom_id|>"
     ]
   },
+  {
+    "version": 1,
+    "context_length": 131072,
+    "model_name": "llama-3.2-vision-instruct",
+    "model_lang": [
+      "en",
+      "de",
+      "fr",
+      "it",
+      "pt",
+      "hi",
+      "es",
+      "th"
+    ],
+    "model_ability": [
+	"chat",
+	"vision"
+    ],
+    "model_description": "Llama 3.2-Vision instruction-tuned models are optimized for visual recognition, image reasoning, captioning, and answering general questions about an image...",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 11,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "LLM-Research/Llama-3.2-11B-Vision-Instruct",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 90,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "LLM-Research/Llama-3.2-90B-Vision-Instruct",
+        "model_hub": "modelscope"
+      }
+    ],
+    "chat_template": "{% for message in messages %}{% if loop.index0 == 0 %}{{ bos_token }}{% endif %}{{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n' }}{% if message['content'] is string %}{{ message['content'] }}{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' %}{{ '<|image|>' }}{% elif content['type'] == 'text' %}{{ content['text'] }}{% endif %}{% endfor %}{% endif %}{{ '<|eot_id|>' }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
+    "stop_token_ids": [
+	128001,
+	128008,
+	128009
+    ],
+    "stop": [
+      "<|end_of_text|>",
+	"<|eot_id|>",
+	"<|eom_id|>"
+    ]
+  },
+  {
+    "version": 1,
+    "context_length": 131072,
+    "model_name": "llama-3.2-vision",
+    "model_lang": [
+      "en",
+      "de",
+      "fr",
+      "it",
+      "pt",
+      "hi",
+      "es",
+      "th"
+    ],
+    "model_ability": [
+	"generate",
+	"vision"
+    ],
+    "model_description": "The Llama 3.2-Vision instruction-tuned models are optimized for visual recognition, image reasoning, captioning, and answering general questions about an image...",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 11,
+        "quantizations": [
+          "none"
+        ],
+          "model_id": "LLM-Research/Llama-3.2-11B-Vision",
+	  "model_hub": "modelscope"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 90,
+        "quantizations": [
+          "none"
+        ],
+          "model_id": "LLM-Research/Llama-3.2-90B-Vision",
+	  "model_hub": "modelscope"
+      }
+    ]
+  },
   {
     "version": 1,
     "context_length": 2048,

xinference/model/llm/vllm/core.py CHANGED Viewed

@@ -163,7 +163,6 @@ if VLLM_INSTALLED and vllm.__version__ >= "0.5.1":
     VLLM_SUPPORTED_CHAT_MODELS.append("deepseek-v2-chat-0628")
     VLLM_SUPPORTED_CHAT_MODELS.append("deepseek-v2.5")
 if VLLM_INSTALLED and vllm.__version__ >= "0.5.3":
     VLLM_SUPPORTED_CHAT_MODELS.append("gemma-2-it")
     VLLM_SUPPORTED_CHAT_MODELS.append("mistral-nemo-instruct")
@@ -177,6 +176,8 @@ if VLLM_INSTALLED and vllm.__version__ >= "0.6.1":
     VLLM_SUPPORTED_VISION_MODEL_LIST.append("internvl2")
 if VLLM_INSTALLED and vllm.__version__ >= "0.6.3":
+    VLLM_SUPPORTED_MODELS.append("llama-3.2-vision")
+    VLLM_SUPPORTED_VISION_MODEL_LIST.append("llama-3.2-vision-instruct")
     VLLM_SUPPORTED_VISION_MODEL_LIST.append("qwen2-vl-instruct")

{xinference-0.16.2.dist-info → xinference-0.16.3.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: xinference
-Version: 0.16.2
+Version: 0.16.3
 Summary: Model Serving Made Easy
 Home-page: https://github.com/xorbitsai/inference
 Author: Qin Xuye
@@ -42,6 +42,7 @@ Requires-Dist: nvidia-ml-py
 Requires-Dist: async-timeout
 Requires-Dist: peft
 Requires-Dist: timm
+Requires-Dist: setproctitle
 Provides-Extra: all
 Requires-Dist: llama-cpp-python!=0.2.58,>=0.2.25; extra == "all"
 Requires-Dist: transformers>=4.43.2; extra == "all"
@@ -71,7 +72,7 @@ Requires-Dist: WeTextProcessing<1.0.4; extra == "all"
 Requires-Dist: librosa; extra == "all"
 Requires-Dist: xxhash; extra == "all"
 Requires-Dist: torchaudio; extra == "all"
-Requires-Dist: ChatTTS>=0.2; extra == "all"
+Requires-Dist: ChatTTS>=0.2.1; extra == "all"
 Requires-Dist: lightning>=2.0.0; extra == "all"
 Requires-Dist: hydra-core>=1.3.2; extra == "all"
 Requires-Dist: inflect; extra == "all"
@@ -105,7 +106,7 @@ Requires-Dist: WeTextProcessing<1.0.4; extra == "audio"
 Requires-Dist: librosa; extra == "audio"
 Requires-Dist: xxhash; extra == "audio"
 Requires-Dist: torchaudio; extra == "audio"
-Requires-Dist: ChatTTS>=0.2; extra == "audio"
+Requires-Dist: ChatTTS>=0.2.1; extra == "audio"
 Requires-Dist: tiktoken; extra == "audio"
 Requires-Dist: torch>=2.0.0; extra == "audio"
 Requires-Dist: lightning>=2.0.0; extra == "audio"

{xinference-0.16.2.dist-info → xinference-0.16.3.dist-info}/RECORD RENAMED Viewed

@@ -1,8 +1,8 @@
 xinference/__init__.py,sha256=nmTTrYbIpj964ZF6ojtgOM7E85JBOj1EyQbmYjbj1jw,915
 xinference/_compat.py,sha256=xFztCfyrq3O_4bssL_ygghYkfxicv_ZhiX2YDDWHf-k,3571
-xinference/_version.py,sha256=LLdtJFZnTO6_OxxmoVVh6cxLwAakRJfTVLW8tmmb1Fs,498
-xinference/conftest.py,sha256=56HYQjsAJcQrpZSmskniPqH9dLoW-i3Oud6NVTtc4io,9752
-xinference/constants.py,sha256=l_aIN20C_NwitSEHFvrIqFvcW8Kg9SPX6NFEaPBu0VQ,3825
+xinference/_version.py,sha256=ZJMSF8nqOAMuCeAs35nQ2pCDZSaWMd6E2vS-3qLZTSc,498
+xinference/conftest.py,sha256=vETDpRBVIlWbWi7OTwf7og89U25KyYGyI7yPIB3O8N8,9564
+xinference/constants.py,sha256=VMj62qQ4h36Jt-AmH5g6hmJJteSlKrA3r47K7bGWEPc,3859
 xinference/device_utils.py,sha256=zswJiws3VyTIaNO8z-MOcsJH_UiPoePPiKK5zoNrjTA,3285
 xinference/fields.py,sha256=0UtBFaDNzn1n9MRjyTkNrolsIML-TpZfudWOejqjni8,5245
 xinference/isolation.py,sha256=uhkzVyL3fSYZSuFexkG6Jm-tRTC5I607uNg000BXAnE,1949
@@ -25,19 +25,19 @@ xinference/core/chat_interface.py,sha256=Kiqs1XOXgYBlP7DOXLEXaFjbVuS0yC1-dXJyxrx
 xinference/core/event.py,sha256=42F38H2WOl6aPxp2oxX6WNxHRRxbnvYRmbt4Ar7NP4U,1640
 xinference/core/image_interface.py,sha256=5Iuoiw3g2TvgOYi3gRIAGApve2nNzfMPduRrBHvd1NY,13755
 xinference/core/metrics.py,sha256=ScmTG15Uq3h_ob72ybZSMWdnk8P4sUZFcm60f4ikSXc,2631
-xinference/core/model.py,sha256=GEOqKDllyZBAPOUk7ujt-c88AfCgE7-bgYplzl3XOCk,38613
+xinference/core/model.py,sha256=7BWvhZmLN2joYCCBWTqiVCMX0moGttz8Fyl15OY9hT8,39587
 xinference/core/progress_tracker.py,sha256=LIF6CLIlnEoSBkuDCraJktDOzZ31mQ4HOo6EVr3KpQM,6453
 xinference/core/resource.py,sha256=FQ0aRt3T4ZQo0P6CZZf5QUKHiCsr5llBvKb1f7wfnxg,1611
 xinference/core/scheduler.py,sha256=gdj3SyP_jelJ86vTRrgnFynhxz5JSwLRsQgx8PTtBi8,15671
 xinference/core/status_guard.py,sha256=4an1KjUOhCStgRQUw1VSzXcycXUtvhxwiMREKKcl1UI,2828
-xinference/core/supervisor.py,sha256=Wkjhk1tfRuhyQmcVNrHZApWO09MDA5-Uu4u2p1GBj3I,52964
-xinference/core/utils.py,sha256=pFggqUjfsB9ME6V0VqsppN7KAHNrqpxMuJsIUPNkwoM,8745
-xinference/core/worker.py,sha256=MmGZuPZlI-DrC3VahkSZjGhpw9S9ISVGsxWhBlKNQMk,46367
+xinference/core/supervisor.py,sha256=Z7cY28M0OeY27-z-OhB9f7BDGs_TVvbSsez1rEJjpdo,52923
+xinference/core/utils.py,sha256=iY9Oog3M-k3OoUJFUfIbcWUQ94Yq0T9iIG_b2iPudP0,8658
+xinference/core/worker.py,sha256=YIlaQosBRj_VStfZGPfWnT2ie13GW8K4NNEP5qz28lI,46402
 xinference/deploy/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
-xinference/deploy/cmdline.py,sha256=YNXbPIT9zJIp5EQzl_rH5KwDDYqBd2CbaOVF8hA0lws,48120
+xinference/deploy/cmdline.py,sha256=yQI6KuRUzih0rs_fInp2Lr3rwkOjBOM0eydPaF7VKDQ,48385
 xinference/deploy/local.py,sha256=gcH6WfTxfhjvNkxxKZH3tcGtXV48BEPoaLWYztZHaeo,3954
 xinference/deploy/supervisor.py,sha256=68rB2Ey5KFeF6zto9YGbw3P8QLZmF_KSh1NwH_pNP4w,2986
-xinference/deploy/utils.py,sha256=71xnPSjjF3XDZIYmlJ59Fbr7mswWERtNdjfdYGwyT_I,6703
+xinference/deploy/utils.py,sha256=jdL7i2WV6u_BZ8IiE1d3YktvCARcB3ntzMQ5rHGD5DM,6756
 xinference/deploy/worker.py,sha256=VQ71ClWpeGsyFgDmcOes2ub1cil10cBjhFLHYeuVwC4,2974
 xinference/deploy/test/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
 xinference/deploy/test/test_cmdline.py,sha256=m8xDzjtDuAJy0QkvYVJIZDuTB29cmYBV0d231JyRCPU,7714
@@ -51,7 +51,7 @@ xinference/model/audio/cosyvoice.py,sha256=Enur1Y4Xa-mpr7wwnoXWwhyh7PUAjrHZ8DV91
 xinference/model/audio/custom.py,sha256=8GXBRmTtR-GY03-E91nlRGTIuabCRzlt20ecU6Un6Y8,4985
 xinference/model/audio/fish_speech.py,sha256=v2WVEV-BLWnbiDvqrx8WTGE_YNKmd9QoAF1LZBXWxn0,7310
 xinference/model/audio/funasr.py,sha256=65z7U7_F14CCP-jg6BpeY3_49FK7Y5OCRSzrhhsklCg,4075
-xinference/model/audio/model_spec.json,sha256=JLgT4fKZuD5jz5cBO_KIFkSm_6a6UEW6z0YVrfQJJkI,5120
+xinference/model/audio/model_spec.json,sha256=dHk9t-wBpQ7eso_6_csEO0LwTOoVucq_dAN9PxVjv5M,5120
 xinference/model/audio/model_spec_modelscope.json,sha256=U82E5vZahi4si6kpCjdp2FAG2lCpQ7s7w_1t6lj2ysI,2038
 xinference/model/audio/utils.py,sha256=pwo5cHh8nvhyBa9f-17QaVpXMSjmbpGbPYKwBBtEhGM,717
 xinference/model/audio/whisper.py,sha256=PQL7rebGC7WlIOItuDtjdEtSJtlhxFkolot-Fj-8uDU,7982
@@ -82,11 +82,11 @@ xinference/model/image/stable_diffusion/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17k
 xinference/model/image/stable_diffusion/core.py,sha256=qqMjFcM7KpjQc79irWhTpweIVfenEcsSi6g_WDK7CFM,22982
 xinference/model/image/stable_diffusion/mlx.py,sha256=GZsozzGB04NfHAdU9MI6gwWE1t_A-s_Ddn_ic8DlkKQ,7476
 xinference/model/llm/__init__.py,sha256=9g9dFG2XuNDCTLE5vuJ6kCT-rqe9MfN56aEapyXaJ5M,13938
-xinference/model/llm/core.py,sha256=fBKIi3zJ-37v7o1ON8_YyDF-44uJ34jYgUktVJOeQO0,8187
-xinference/model/llm/llm_family.json,sha256=BiJwRSTFjKUErru4Mqek-P6JAlbFA1eGT9xzabfwBRc,290116
+xinference/model/llm/core.py,sha256=g-luuAjZizrPunhyFE9IRjn57l0g6FY_1xUwtlRegbs,8151
+xinference/model/llm/llm_family.json,sha256=9pnfZbFv7XnsiW6vR3g8VpcIhdi4wjSZSCcRLnl5zuc,292604
 xinference/model/llm/llm_family.py,sha256=tI2wPefd7v-PWcVhUO2qy6iGob_ioeNCwAQQzal-2o4,39549
 xinference/model/llm/llm_family_csghub.json,sha256=zMKWbihsxQNVB1u5iKJbZUkbOfQ4IPNq1KQ-8IDPQQA,8759
-xinference/model/llm/llm_family_modelscope.json,sha256=5sOuthTDH9NisEw_3V22WdQgA_lQY5fl9vv-XYYwfVY,219124
+xinference/model/llm/llm_family_modelscope.json,sha256=DFKSCauDGx0nHZuyFRBpp4Kau0I5q-Aqf0Lrl_B69u4,221744
 xinference/model/llm/llm_family_openmind_hub.json,sha256=jl9pfbe5DztoxgEwKBxDk1Wd7TziTiJ48_Ie_lJdYjA,67872
 xinference/model/llm/memory.py,sha256=NEIMw6wWaF9S_bnBYq-EyuDhVbUEEeceQhwE1iwsrhI,10207
 xinference/model/llm/utils.py,sha256=DUC6jPr1-kPNsgc4J5MXNSMVgDlPLfQiitLGfdJxVxM,23596
@@ -120,7 +120,7 @@ xinference/model/llm/transformers/tensorizer_utils.py,sha256=VXSYbPZtCbd8lVvsnjD
 xinference/model/llm/transformers/utils.py,sha256=Ej9Tu2yVAotfXMFsl30QlYXLZTODU6Pv_UppsGGUiSw,19185
 xinference/model/llm/transformers/yi_vl.py,sha256=iCdRLw-wizbU-qXXc8CT4DhC0Pt-uYg0vFwXEhAZjQg,8961
 xinference/model/llm/vllm/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
-xinference/model/llm/vllm/core.py,sha256=5_aClQ1m37KpT6pyvSm8Kt0744mMqYrCkLC43BLHhI8,31391
+xinference/model/llm/vllm/core.py,sha256=gflboRHy4JvhDG6G2bjPgidgNFTU2dDepbTZBmeDGlY,31516
 xinference/model/llm/vllm/utils.py,sha256=LKOmwfFRrlSecawxT-uE39tC2RQbf1UIiSH9Uz90X6w,1313
 xinference/model/rerank/__init__.py,sha256=wRpf1bOMfmAsuEKEGczMTB5fWEvuqltlJbIbRb-x8Ko,3483
 xinference/model/rerank/core.py,sha256=1ef4Nb7z9z6-7-_Rcjw7VLm2AJvMlmXeIZd2Ap8VSQg,14405
@@ -15529,9 +15529,9 @@ xinference/web/ui/node_modules/yargs-parser/package.json,sha256=BSwbOzgetKXMK4u0
 xinference/web/ui/node_modules/yocto-queue/package.json,sha256=6U1XHQPGXJTqsiFvT953ORihUtXTblZy4fXBWP9qxC0,725
 xinference/web/ui/node_modules/yup/package.json,sha256=xRFSROB9NKxqSWHEVFvSTsPs9Ll074uo8OS1zEw0qhA,1206
 xinference/web/ui/node_modules/yup/node_modules/type-fest/package.json,sha256=JTv2zTTVgxQ2H82m1-6qEpdMv08lHjFx4Puf_MsbB_Q,1134
-xinference-0.16.2.dist-info/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
-xinference-0.16.2.dist-info/METADATA,sha256=JD_uweW_grkKlqWWMtszZpXILC_LoQM7WLh1Y1RjvyE,21010
-xinference-0.16.2.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
-xinference-0.16.2.dist-info/entry_points.txt,sha256=-lDyyzqWMFQF0Rgm7VxBNz0V-bMBMQLRR3pvQ-Y8XTY,226
-xinference-0.16.2.dist-info/top_level.txt,sha256=L1rQt7pl6m8tmKXpWVHzP-GtmzAxp663rXxGE7qnK00,11
-xinference-0.16.2.dist-info/RECORD,,
+xinference-0.16.3.dist-info/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
+xinference-0.16.3.dist-info/METADATA,sha256=7X0n1tJuGmaammNKKtZ16nV03LBJb6HcQSFsPkFVKy8,21042
+xinference-0.16.3.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
+xinference-0.16.3.dist-info/entry_points.txt,sha256=-lDyyzqWMFQF0Rgm7VxBNz0V-bMBMQLRR3pvQ-Y8XTY,226
+xinference-0.16.3.dist-info/top_level.txt,sha256=L1rQt7pl6m8tmKXpWVHzP-GtmzAxp663rXxGE7qnK00,11
+xinference-0.16.3.dist-info/RECORD,,

{xinference-0.16.2.dist-info → xinference-0.16.3.dist-info}/LICENSE RENAMED Viewed

File without changes

{xinference-0.16.2.dist-info → xinference-0.16.3.dist-info}/WHEEL RENAMED Viewed

File without changes

{xinference-0.16.2.dist-info → xinference-0.16.3.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{xinference-0.16.2.dist-info → xinference-0.16.3.dist-info}/top_level.txt RENAMED Viewed

File without changes

xinference 0.16.2__py3-none-any.whl → 0.16.3__py3-none-any.whl

Potentially problematic release.

xinference 0.16.2py3-none-any.whl → 0.16.3py3-none-any.whl