PyPI - xinference - Versions diffs - 0.10.2.post1__py3-none-any.whl → 0.10.3__py3-none-any.whl - Mend

xinference 0.10.2.post1py3-none-any.whl → 0.10.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (16) hide show

xinference/_version.py CHANGED Viewed

@@ -8,11 +8,11 @@ import json
 version_json = '''
 {
- "date": "2024-04-19T14:40:59+0800",
+ "date": "2024-04-24T10:45:37+0800",
  "dirty": false,
  "error": null,
- "full-revisionid": "500171569de25d49f6ddb3c167d9fc0e55cd66c7",
- "version": "0.10.2.post1"
+ "full-revisionid": "2ba72b0ed55c2dbff12491485ffacee7996d3490",
+ "version": "0.10.3"
 }
 '''  # END VERSION_JSON

xinference/core/worker.py CHANGED Viewed

@@ -612,6 +612,14 @@ class WorkerActor(xo.StatelessActor):
         gpu_idx: Optional[Union[int, List[int]]] = None,
         **kwargs,
     ):
+        # !!! Note that The following code must be placed at the very beginning of this function,
+        # or there will be problems with auto-recovery.
+        # Because `locals()` will collect all the local parameters of this function and pass to this function again.
+        launch_args = locals()
+        launch_args.pop("self")
+        launch_args.pop("kwargs")
+        launch_args.update(kwargs)
         event_model_uid, _, __ = parse_replica_model_uid(model_uid)
         await self._event_collector_ref.report_event(
             event_model_uid,
@@ -621,10 +629,6 @@ class WorkerActor(xo.StatelessActor):
                 event_content="Launch model",
             ),
         )
-        launch_args = locals()
-        launch_args.pop("self")
-        launch_args.pop("kwargs")
-        launch_args.update(kwargs)
         if gpu_idx is not None:
             logger.info(

xinference/deploy/cmdline.py CHANGED Viewed

@@ -736,11 +736,15 @@ def model_launch(
         else []
     )
-    peft_model_config = {
-        "image_lora_load_kwargs": image_lora_load_params,
-        "image_lora_fuse_kwargs": image_lora_fuse_params,
-        "lora_list": lora_list,
-    }
+    peft_model_config = (
+        {
+            "image_lora_load_kwargs": image_lora_load_params,
+            "image_lora_fuse_kwargs": image_lora_fuse_params,
+            "lora_list": lora_list,
+        }
+        if lora_list or image_lora_load_params or image_lora_fuse_params
+        else None
+    )
     _gpu_idx: Optional[List[int]] = (
         None if gpu_idx is None else [int(idx) for idx in gpu_idx.split(",")]

xinference/model/audio/model_spec.json CHANGED Viewed

@@ -75,5 +75,12 @@
     "model_id": "BELLE-2/Belle-whisper-large-v2-zh",
     "model_revision": "ec5bd5d78598545b7585814edde86dac2002b5b9",
     "multilingual": false
+  },
+  {
+    "model_name": "Belle-whisper-large-v3-zh",
+    "model_family": "whisper",
+    "model_id": "BELLE-2/Belle-whisper-large-v3-zh",
+    "model_revision": "3bebc7247696b39f5ab9ed22db426943ac33f600",
+    "multilingual": false
   }
-]
+]

xinference/model/embedding/core.py CHANGED Viewed

@@ -12,12 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import gc
 import logging
+import os
 from collections import defaultdict
 from typing import Dict, List, Optional, Tuple, Union, no_type_check
 import numpy as np
+from ...device_utils import empty_cache
 from ...types import Embedding, EmbeddingData, EmbeddingUsage
 from ..core import CacheableModelSpec, ModelDescription
 from ..utils import get_cache_dir, is_model_cached
@@ -28,6 +31,10 @@ logger = logging.getLogger(__name__)
 # Init when registering all the builtin models.
 MODEL_NAME_TO_REVISION: Dict[str, List[str]] = defaultdict(list)
 EMBEDDING_MODEL_DESCRIPTIONS: Dict[str, List[Dict]] = defaultdict(list)
+EMBEDDING_EMPTY_CACHE_COUNT = int(
+    os.getenv("XINFERENCE_EMBEDDING_EMPTY_CACHE_COUNT", "10")
+)
+assert EMBEDDING_EMPTY_CACHE_COUNT > 0
 def get_embedding_model_descriptions():
@@ -116,6 +123,7 @@ class EmbeddingModel:
         self._model_path = model_path
         self._device = device
         self._model = None
+        self._counter = 0
     def load(self):
         try:
@@ -134,6 +142,11 @@ class EmbeddingModel:
         self._model = SentenceTransformer(self._model_path, device=self._device)
     def create_embedding(self, sentences: Union[str, List[str]], **kwargs):
+        self._counter += 1
+        if self._counter % EMBEDDING_EMPTY_CACHE_COUNT == 0:
+            logger.debug("Empty embedding cache.")
+            gc.collect()
+            empty_cache()
         from sentence_transformers import SentenceTransformer
         kwargs.setdefault("normalize_embeddings", True)

xinference/model/llm/llm_family.json CHANGED Viewed

@@ -1220,6 +1220,148 @@
       }
     ]
   },
+  {
+    "version": 1,
+    "context_length": 8192,
+    "model_name": "llama-3",
+    "model_lang": [
+      "en"
+    ],
+    "model_ability": [
+      "generate"
+    ],
+    "model_description": "Llama 3 is an auto-regressive language model that uses an optimized transformer architecture",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 8,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "meta-llama/Meta-Llama-3-8B"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 8,
+        "quantizations": [
+          "Q2_K",
+          "Q3_K_L",
+          "Q3_K_M",
+          "Q3_K_S",
+          "Q4_0",
+          "Q4_1",
+          "Q4_K_M",
+          "Q4_K_S",
+          "Q5_0",
+          "Q5_1",
+          "Q5_K_M",
+          "Q5_K_S",
+          "Q6_K",
+          "Q8_0"
+        ],
+        "model_id": "QuantFactory/Meta-Llama-3-8B-GGUF",
+        "model_file_name_template": "Meta-Llama-3-8B.{quantization}.gguf"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 70,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "meta-llama/Meta-Llama-3-70B"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 70,
+        "quantizations": [
+          "Q4_K_M",
+          "Q5_K_M"
+        ],
+        "model_id": "NousResearch/Meta-Llama-3-70B-GGUF",
+        "model_file_name_template": "Meta-Llama-3-70B-{quantization}.gguf"
+      }
+    ]
+  },
+  {
+    "version": 1,
+    "context_length": 8192,
+    "model_name": "llama-3-instruct",
+    "model_lang": [
+      "en"
+    ],
+    "model_ability": [
+      "chat"
+    ],
+    "model_description": "The Llama 3 instruction tuned models are optimized for dialogue use cases and outperform many of the available open source chat models on common industry benchmarks..",
+    "model_specs": [
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 8,
+        "quantizations": [
+          "IQ3_M",
+          "Q4_K_M",
+          "Q5_K_M",
+          "Q6_K",
+          "Q8_0"
+        ],
+        "model_id": "lmstudio-community/Meta-Llama-3-8B-Instruct-GGUF",
+        "model_file_name_template": "Meta-Llama-3-8B-Instruct-{quantization}.gguf"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 8,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "meta-llama/Meta-Llama-3-8B-Instruct"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 70,
+        "quantizations": [
+          "IQ1_M",
+          "IQ2_XS",
+          "Q4_K_M"
+        ],
+        "model_id": "lmstudio-community/Meta-Llama-3-70B-Instruct-GGUF",
+        "model_file_name_template": "Meta-Llama-3-8B-Instruct-{quantization}.gguf"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 70,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "meta-llama/Meta-Llama-3-70B-Instruct"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "LLAMA3",
+      "system_prompt": "You are a helpful assistant.",
+      "roles": [
+        "user",
+        "assistant"
+      ],
+      "intra_message_sep": "\n\n",
+      "inter_message_sep": "<|eot_id|>",
+      "stop_token_ids": [
+        128001,
+        128009
+      ],
+      "stop": [
+        "<|end_of_text|>",
+        "<|eot_id|>"
+      ]
+    }
+  },
   {
     "version": 1,
     "context_length": 2048,
@@ -1932,7 +2074,7 @@
   },
   {
     "version": 1,
-    "context_length": 65536,
+    "context_length": 32768,
     "model_name": "codeqwen1.5-chat",
     "model_lang": [
       "en",

xinference/model/llm/llm_family_modelscope.json CHANGED Viewed

@@ -84,6 +84,96 @@
       ]
     }
   },
+  {
+    "version": 1,
+    "context_length": 8192,
+    "model_name": "llama-3",
+    "model_lang": [
+      "en"
+    ],
+    "model_ability": [
+      "generate"
+    ],
+    "model_description": "Llama 3 is an auto-regressive language model that uses an optimized transformer architecture",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 8,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "LLM-Research/Meta-Llama-3-8B",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 70,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "LLM-Research/Meta-Llama-3-70B",
+        "model_hub": "modelscope"
+      }
+    ]
+  },
+  {
+    "version": 1,
+    "context_length": 8192,
+    "model_name": "llama-3-instruct",
+    "model_lang": [
+      "en"
+    ],
+    "model_ability": [
+      "chat"
+    ],
+    "model_description": "The Llama 3 instruction tuned models are optimized for dialogue use cases and outperform many of the available open source chat models on common industry benchmarks..",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 8,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "LLM-Research/Meta-Llama-3-8B-Instruct",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 70,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "LLM-Research/Meta-Llama-3-70B-Instruct",
+        "model_hub": "modelscope"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "LLAMA3",
+      "system_prompt": "You are a helpful assistant.",
+      "roles": [
+        "user",
+        "assistant"
+      ],
+      "intra_message_sep": "\n\n",
+      "inter_message_sep": "<|eot_id|>",
+      "stop_token_ids": [
+        128001,
+        128009
+      ],
+      "stop": [
+        "<|end_of_text|>",
+        "<|eot_id|>"
+      ]
+    }
+  },
   {
     "version": 1,
     "context_length": 2048,
@@ -2177,7 +2267,7 @@
   },
   {
     "version": 1,
-    "context_length": 65536,
+    "context_length": 32768,
     "model_name": "codeqwen1.5-chat",
     "model_lang": [
       "en",

xinference/model/llm/utils.py CHANGED Viewed

@@ -114,6 +114,22 @@ class ChatModelMixin:
                 else:
                     ret += role
             return ret
+        elif prompt_style.style_name == "LLAMA3":
+            ret = (
+                f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>"
+                f"{prompt_style.intra_message_sep}{prompt_style.system_prompt}{prompt_style.inter_message_sep}"
+            )
+            for i, message in enumerate(chat_history):
+                role = get_role(message["role"])
+                content = message["content"]
+                if content:
+                    ret += (
+                        f"<|start_header_id|>{role}<|end_header_id|>"
+                        f"{prompt_style.intra_message_sep}{content}{prompt_style.inter_message_sep}"
+                    )
+                else:
+                    ret += f"<|start_header_id|>{role}<|end_header_id|>{prompt_style.intra_message_sep}"
+            return ret
         elif prompt_style.style_name == "FALCON":
             ret = prompt_style.system_prompt
             for message in chat_history:

xinference/model/llm/vllm/core.py CHANGED Viewed

@@ -85,6 +85,7 @@ except ImportError:
 VLLM_SUPPORTED_MODELS = [
     "llama-2",
+    "llama-3",
     "baichuan",
     "internlm-16k",
     "mistral-v0.1",
@@ -94,6 +95,7 @@ VLLM_SUPPORTED_MODELS = [
 ]
 VLLM_SUPPORTED_CHAT_MODELS = [
     "llama-2-chat",
+    "llama-3-instruct",
     "vicuna-v1.3",
     "vicuna-v1.5",
     "baichuan-chat",

xinference/model/rerank/core.py CHANGED Viewed

@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import gc
 import logging
 import os
 import uuid
@@ -21,6 +22,7 @@ from typing import Dict, List, Optional, Tuple
 import numpy as np
 from ...constants import XINFERENCE_CACHE_DIR
+from ...device_utils import empty_cache
 from ...types import Document, DocumentObj, Rerank
 from ..core import CacheableModelSpec, ModelDescription
 from ..utils import is_model_cached
@@ -31,6 +33,8 @@ logger = logging.getLogger(__name__)
 # Init when registering all the builtin models.
 MODEL_NAME_TO_REVISION: Dict[str, List[str]] = defaultdict(list)
 RERANK_MODEL_DESCRIPTIONS: Dict[str, List[Dict]] = defaultdict(list)
+RERANK_EMPTY_CACHE_COUNT = int(os.getenv("XINFERENCE_RERANK_EMPTY_CACHE_COUNT", "10"))
+assert RERANK_EMPTY_CACHE_COUNT > 0
 def get_rerank_model_descriptions():
@@ -113,28 +117,44 @@ class RerankModel:
         self._model_config = model_config or dict()
         self._use_fp16 = use_fp16
         self._model = None
+        self._counter = 0
     def load(self):
-        try:
-            if self._model_spec.type == "normal":
-                from FlagEmbedding import FlagReranker
-            elif self._model_spec.type == "LLM-based":
-                from FlagEmbedding import FlagLLMReranker as FlagReranker
-            elif self._model_spec.type == "LLM-based layerwise":
-                from FlagEmbedding import LayerWiseFlagLLMReranker as FlagReranker
-            else:
-                raise RuntimeError(
-                    f"Unsupported Rank model type: {self._model_spec.type}"
-                )
-        except ImportError:
-            error_message = "Failed to import module 'FlagEmbedding'"
-            installation_guide = [
-                "Please make sure 'FlagEmbedding' is installed. ",
-                "You can install it by `pip install FlagEmbedding`\n",
-            ]
+        if self._model_spec.type == "normal":
+            try:
+                from sentence_transformers.cross_encoder import CrossEncoder
+            except ImportError:
+                error_message = "Failed to import module 'sentence-transformers'"
+                installation_guide = [
+                    "Please make sure 'sentence-transformers' is installed. ",
+                    "You can install it by `pip install sentence-transformers`\n",
+                ]
+                raise ImportError(f"{error_message}\n\n{''.join(installation_guide)}")
+            self._model = CrossEncoder(
+                self._model_path, device=self._device, **self._model_config
+            )
+            if self._use_fp16:
+                self._model.model.half()
+        else:
+            try:
+                if self._model_spec.type == "LLM-based":
+                    from FlagEmbedding import FlagLLMReranker as FlagReranker
+                elif self._model_spec.type == "LLM-based layerwise":
+                    from FlagEmbedding import LayerWiseFlagLLMReranker as FlagReranker
+                else:
+                    raise RuntimeError(
+                        f"Unsupported Rank model type: {self._model_spec.type}"
+                    )
+            except ImportError:
+                error_message = "Failed to import module 'FlagEmbedding'"
+                installation_guide = [
+                    "Please make sure 'FlagEmbedding' is installed. ",
+                    "You can install it by `pip install FlagEmbedding`\n",
+                ]
-            raise ImportError(f"{error_message}\n\n{''.join(installation_guide)}")
-        self._model = FlagReranker(self._model_path, use_fp16=True)
+                raise ImportError(f"{error_message}\n\n{''.join(installation_guide)}")
+            self._model = FlagReranker(self._model_path, use_fp16=self._use_fp16)
     def rerank(
         self,
@@ -145,13 +165,21 @@ class RerankModel:
         return_documents: Optional[bool],
         **kwargs,
     ) -> Rerank:
+        self._counter += 1
+        if self._counter % RERANK_EMPTY_CACHE_COUNT == 0:
+            logger.debug("Empty rerank cache.")
+            gc.collect()
+            empty_cache()
         assert self._model is not None
         if kwargs:
             raise ValueError("rerank hasn't support extra parameter.")
         if max_chunks_per_doc is not None:
             raise ValueError("rerank hasn't support `max_chunks_per_doc` parameter.")
         sentence_combinations = [[query, doc] for doc in documents]
-        similarity_scores = self._model.compute_score(sentence_combinations)
+        if self._model_spec.type == "normal":
+            similarity_scores = self._model.predict(sentence_combinations)
+        else:
+            similarity_scores = self._model.compute_score(sentence_combinations)
         sim_scores_argsort = list(reversed(np.argsort(similarity_scores)))
         if top_n is not None:
             sim_scores_argsort = sim_scores_argsort[:top_n]

{xinference-0.10.2.post1.dist-info → xinference-0.10.3.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: xinference
-Version: 0.10.2.post1
+Version: 0.10.3
 Summary: Model Serving Made Easy
 Home-page: https://github.com/xorbitsai/inference
 Author: Qin Xuye
@@ -176,13 +176,14 @@ potential of cutting-edge AI models.
 - Docker image: [#855](https://github.com/xorbitsai/inference/pull/855)
 - Support multimodal: [#829](https://github.com/xorbitsai/inference/pull/829)
 ### New Models
+- Built-in support for [Llama 3](https://github.com/meta-llama/llama3): [#1332](https://github.com/xorbitsai/inference/pull/1332)
+- Built-in support for [Command-R](https://huggingface.co/CohereForAI/c4ai-command-r-v01): [#1310](https://github.com/xorbitsai/inference/pull/1310)
 - Built-in support for [Qwen1.5 MOE](https://huggingface.co/Qwen/Qwen1.5-MoE-A2.7B-Chat): [#1263](https://github.com/xorbitsai/inference/pull/1263)
 - Built-in support for [Qwen1.5 32B](https://huggingface.co/Qwen/Qwen1.5-32B-Chat): [#1249](https://github.com/xorbitsai/inference/pull/1249)
 - Built-in support for [OmniLMM](https://github.com/OpenBMB/OmniLMM): [#1171](https://github.com/xorbitsai/inference/pull/1171)
 - Built-in support for [Gemma](https://github.com/google-deepmind/gemma): [#1024](https://github.com/xorbitsai/inference/pull/1024)
-- Built-in support for [Qwen1.5](https://github.com/QwenLM/Qwen1.5): [#994](https://github.com/xorbitsai/inference/pull/994)
-- Built-in support for [Yi-VL](https://github.com/01-ai/Yi): [#946](https://github.com/xorbitsai/inference/pull/946)
 ### Integrations
+- [FastGPT](https://github.com/labring/FastGPT): a knowledge-based platform built on the LLM, offers out-of-the-box data processing and model invocation capabilities, allows for workflow orchestration through Flow visualization.
 - [Dify](https://docs.dify.ai/advanced/model-configuration/xinference): an LLMOps platform that enables developers (and even non-developers) to quickly build useful applications based on large language models, ensuring they are visual, operable, and improvable.
 - [Chatbox](https://chatboxai.app/): a desktop client for multiple cutting-edge LLM models, available on Windows, Mac and Linux.

{xinference-0.10.2.post1.dist-info → xinference-0.10.3.dist-info}/RECORD RENAMED Viewed

@@ -1,6 +1,6 @@
 xinference/__init__.py,sha256=0LgIveLP6CXxoIaSrxhlFyOh0lOqPgJBVcBe0tkWJjc,987
 xinference/_compat.py,sha256=SQAjZMGxtBIce45qtW7ob7RWzA0zhv2yB3AxT0rb0uU,1778
-xinference/_version.py,sha256=AeHhtiziXzjdR6A3u0aZYLBnSda-sNqj8QeMNG424ng,504
+xinference/_version.py,sha256=AQ6rrRceWHquLfKWGWzSVXI8bGhcAlO5_Q3_EWaZt1Q,498
 xinference/conftest.py,sha256=RffV9htxwo6iDEGZwmcj0A_O_XBQM2RRUea4q6XTeGQ,9742
 xinference/constants.py,sha256=Bu_fOJUGAvvqF_6FY5OzOHl7fQ1Nomek3LY17xr9oz4,2882
 xinference/device_utils.py,sha256=WNKDD4Eni3Io3AehiyonsuoJaukT77Bc76Es7vNGvjc,2615
@@ -32,9 +32,9 @@ xinference/core/resource.py,sha256=FQ0aRt3T4ZQo0P6CZZf5QUKHiCsr5llBvKb1f7wfnxg,1
 xinference/core/status_guard.py,sha256=ScmTFb3NPTp-RzufdHFpBh5TZHPc2bu907JA8l0gywE,2804
 xinference/core/supervisor.py,sha256=salJ3vIjkQblexxLYl7Mi46iiWIKhpsY9W8DRXxoHrA,41212
 xinference/core/utils.py,sha256=tUpUJUQv1zkE9i7fw1pAFfFdcB3PC6DvKJn4Bmmq75E,6008
-xinference/core/worker.py,sha256=kh7laY7FvNvimgxYh5eCodAwaoUVtRp-018Z2X0utxA,33512
+xinference/core/worker.py,sha256=zfbxO3EJl3zJ7JKhXLEQ7EK3sd9yXSW8iUsn1dq5e00,33784
 xinference/deploy/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
-xinference/deploy/cmdline.py,sha256=_cWl6rUL5ZnxiWLRL5QoVA3xJOVguW2tqtU-rljeHpc,35524
+xinference/deploy/cmdline.py,sha256=bpc6g8V6FwVFx-DOGU7n8XRPSZFrXqFRmRH6atD98DE,35647
 xinference/deploy/local.py,sha256=vlAvhcl8utP1DjW4MJpBgD4JLHQV-1Xebmdd8j9M8IM,3946
 xinference/deploy/supervisor.py,sha256=fMHeEGigQ72PD9JEFmZ5Xudn25Uj4DhD2OVIlAu_YpA,2978
 xinference/deploy/utils.py,sha256=_g4U6GJVzHnEHzF-KSMm-tffba2mtLNnxoEwnC8jmj8,5361
@@ -50,11 +50,11 @@ xinference/model/utils.py,sha256=qqCaje-dJvSarVzeGgmwKnq85e82JCLPVq2yCfAFZlo,145
 xinference/model/audio/__init__.py,sha256=0EVzX6b4pcOO63NAcNpYWTVYVa7w7yG5cPpGxOY9MXw,2347
 xinference/model/audio/core.py,sha256=ypbIvbueTFKeulYt7aJX7FfU4y3Hn3DzxkhhjKO6Dxw,4373
 xinference/model/audio/custom.py,sha256=Li6VpTmpZ17YXk_bwN2-tUKRAJwNcW-O4OwrJefzC2o,4966
-xinference/model/audio/model_spec.json,sha256=gXsXm33FdDr1SfuNfydmt96jjZac9uVPP0Pxe50HA0k,2362
+xinference/model/audio/model_spec.json,sha256=dQUgG7HT9Ge4-0TBie7GcyXbPHz4lH_2HttVTm560Dg,2595
 xinference/model/audio/utils.py,sha256=pwo5cHh8nvhyBa9f-17QaVpXMSjmbpGbPYKwBBtEhGM,717
 xinference/model/audio/whisper.py,sha256=vWUn5huqER_g8ttxzHFNz6UNyDn2CnF7OzS_4PQjjKE,4599
 xinference/model/embedding/__init__.py,sha256=0FLzOZyOuMctxFvhobkLXRUepwHck6RPbtjCct1eMI8,2854
-xinference/model/embedding/core.py,sha256=UmLiclNhgJ83fg69pBDr3FK4emgnt5yDM_k-uDNew2Y,12609
+xinference/model/embedding/core.py,sha256=VJ1b7zUwkm5VtmtQx3-bYpJuETiKb4345dYP6P4oRM4,13023
 xinference/model/embedding/custom.py,sha256=iE3-iWVzxarXdeTdw5e6rxv6HQRXVbPHp65wwhT2IL8,3919
 xinference/model/embedding/model_spec.json,sha256=hpM2_FhH6gSqmrgu2MMu4u94XMEw6r9A6aKUQObsCK0,6652
 xinference/model/embedding/model_spec_modelscope.json,sha256=No71OUu5OoALs6amJ0UiRU6JH9DkYRQvdvSgCf3IIHs,5814
@@ -68,10 +68,10 @@ xinference/model/image/stable_diffusion/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17k
 xinference/model/image/stable_diffusion/core.py,sha256=ib_ZeSg7hzynmRqSnhjtrVuhoLOgZPrR1ZH2LjBmH2E,6063
 xinference/model/llm/__init__.py,sha256=op1aUvEPtQ5KeWYvbP-skptyMC8osQphWKs7EbgNJ1c,6555
 xinference/model/llm/core.py,sha256=FeZv1UiA7zPdmDcAQpmFL9Bslj6grqOSRvqsqkVtBHg,9572
-xinference/model/llm/llm_family.json,sha256=nDTLZsol-aUoknQ8rNXyEco4AnfaExQOqfgP4Qr8REg,123909
+xinference/model/llm/llm_family.json,sha256=Few9frWihmqwN_c_Q0B5S1esZ8DPhGdnNRvxAGEQIOE,127493
 xinference/model/llm/llm_family.py,sha256=pryVjq7WZ84x9kwzXQgXFgE5UxIqBn_LTudeXnDX5RE,34615
-xinference/model/llm/llm_family_modelscope.json,sha256=g50wfGhW0gIUgOA6FjbLLGDrZpO0MviMOKyvLEK8MQo,77600
-xinference/model/llm/utils.py,sha256=jjtPltmsoymFD6p8PK-3DLDUzmO4Veg7fBddFZn-0VI,28882
+xinference/model/llm/llm_family_modelscope.json,sha256=PJlaTLjcYdaqR95U8GYaSJsbxxCZ_Q-8k6Di4ciGZ_k,79795
+xinference/model/llm/utils.py,sha256=gNuRa1VIk5Dv0rrkuCCNQJCFQ7iqwEKIjiej4Cfo8eY,29706
 xinference/model/llm/ggml/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
 xinference/model/llm/ggml/chatglm.py,sha256=Zrzw8K2EroI5v2JlwOAJ08tNFs871n86zRtBxuK97Z8,13044
 xinference/model/llm/ggml/llamacpp.py,sha256=HLjcMOOrMoriaTx39jDOufyfY5lXdO84cCWZORjCc8U,11426
@@ -97,9 +97,9 @@ xinference/model/llm/pytorch/yi_vl.py,sha256=aZkMQPlIb522Ue1K62DAMclq1n9HVw4OQNu
 xinference/model/llm/sglang/__init__.py,sha256=-sjSIQ4K6w-TEzx49kVaWeWC443fnZqODU91GCQ_JNo,581
 xinference/model/llm/sglang/core.py,sha256=eqAczZfGJInC_jihXVeKiWQ79Llk3reHDBkdShQlH-0,12915
 xinference/model/llm/vllm/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
-xinference/model/llm/vllm/core.py,sha256=H1uLAhVzLEMMMpMpTSUHwOJtDpKV6sr7cJM9OlJcSM4,18039
+xinference/model/llm/vllm/core.py,sha256=sV67VKfViYzX_IziSYKlwzO1rw7OUyZEJSCOnxRQSKY,18078
 xinference/model/rerank/__init__.py,sha256=BXIL1uu3ZpZHX9bODhW9lxKUXudZE7-OkXFmmM5rpMU,2817
-xinference/model/rerank/core.py,sha256=WSrZ7679av_9HRYd6pKD84Z0ZUJpN6-X8bO4OH7ixiY,8395
+xinference/model/rerank/core.py,sha256=UVfue73hHE9UL5c-X7OajZfTR_mLTv673RLFWZAfWV4,9665
 xinference/model/rerank/custom.py,sha256=NKk7jA7p4xkuwS5WoOs2SY2wdnoAVpyCjBTvv317bBw,3917
 xinference/model/rerank/model_spec.json,sha256=LCiiCdNz4NYt9vKVnHffk3ZpwvgzzHxe4zsaxOqxL18,1367
 xinference/model/rerank/model_spec_modelscope.json,sha256=vSSC0aWy_DHnNDzzBcMWr2pqdISDmPS95FtD_YfMmn4,1275
@@ -15400,9 +15400,9 @@ xinference/web/ui/node_modules/yargs-parser/package.json,sha256=BSwbOzgetKXMK4u0
 xinference/web/ui/node_modules/yocto-queue/package.json,sha256=6U1XHQPGXJTqsiFvT953ORihUtXTblZy4fXBWP9qxC0,725
 xinference/web/ui/node_modules/yup/package.json,sha256=xRFSROB9NKxqSWHEVFvSTsPs9Ll074uo8OS1zEw0qhA,1206
 xinference/web/ui/node_modules/yup/node_modules/type-fest/package.json,sha256=JTv2zTTVgxQ2H82m1-6qEpdMv08lHjFx4Puf_MsbB_Q,1134
-xinference-0.10.2.post1.dist-info/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
-xinference-0.10.2.post1.dist-info/METADATA,sha256=DCneZ5276aJZpp1f20Tx_QBqOSzzkoF5z7QvUvWJgXE,14996
-xinference-0.10.2.post1.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
-xinference-0.10.2.post1.dist-info/entry_points.txt,sha256=-lDyyzqWMFQF0Rgm7VxBNz0V-bMBMQLRR3pvQ-Y8XTY,226
-xinference-0.10.2.post1.dist-info/top_level.txt,sha256=L1rQt7pl6m8tmKXpWVHzP-GtmzAxp663rXxGE7qnK00,11
-xinference-0.10.2.post1.dist-info/RECORD,,
+xinference-0.10.3.dist-info/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
+xinference-0.10.3.dist-info/METADATA,sha256=CjQ70PUW3asgEheRVmH7_P6AZMUglcWaUkIo1VHvcz8,15256
+xinference-0.10.3.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
+xinference-0.10.3.dist-info/entry_points.txt,sha256=-lDyyzqWMFQF0Rgm7VxBNz0V-bMBMQLRR3pvQ-Y8XTY,226
+xinference-0.10.3.dist-info/top_level.txt,sha256=L1rQt7pl6m8tmKXpWVHzP-GtmzAxp663rXxGE7qnK00,11
+xinference-0.10.3.dist-info/RECORD,,

{xinference-0.10.2.post1.dist-info → xinference-0.10.3.dist-info}/LICENSE RENAMED Viewed

File without changes

{xinference-0.10.2.post1.dist-info → xinference-0.10.3.dist-info}/WHEEL RENAMED Viewed

File without changes

{xinference-0.10.2.post1.dist-info → xinference-0.10.3.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{xinference-0.10.2.post1.dist-info → xinference-0.10.3.dist-info}/top_level.txt RENAMED Viewed

File without changes

xinference 0.10.2.post1__py3-none-any.whl → 0.10.3__py3-none-any.whl

Potentially problematic release.

xinference 0.10.2.post1py3-none-any.whl → 0.10.3py3-none-any.whl