PyPI - xinference - Versions diffs - 0.8.2__py3-none-any.whl → 0.8.3__py3-none-any.whl - Mend

xinference 0.8.2py3-none-any.whl → 0.8.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (53) hide show

xinference/_version.py +3 -3
xinference/api/restful_api.py +22 -7
xinference/client/restful/restful_client.py +10 -0
xinference/constants.py +14 -4
xinference/core/chat_interface.py +8 -1
xinference/core/resource.py +19 -12
xinference/core/supervisor.py +94 -30
xinference/core/utils.py +29 -1
xinference/core/worker.py +18 -3
xinference/deploy/local.py +2 -2
xinference/deploy/supervisor.py +2 -2
xinference/model/audio/model_spec.json +29 -1
xinference/model/embedding/model_spec.json +24 -0
xinference/model/embedding/model_spec_modelscope.json +24 -0
xinference/model/llm/__init__.py +2 -0
xinference/model/llm/core.py +2 -0
xinference/model/llm/ggml/chatglm.py +15 -6
xinference/model/llm/llm_family.json +56 -0
xinference/model/llm/llm_family_modelscope.json +56 -0
xinference/model/llm/pytorch/chatglm.py +3 -3
xinference/model/llm/pytorch/core.py +1 -0
xinference/model/llm/pytorch/utils.py +21 -9
xinference/model/llm/pytorch/yi_vl.py +246 -0
xinference/model/rerank/core.py +1 -1
xinference/model/rerank/model_spec.json +6 -0
xinference/model/rerank/model_spec_modelscope.json +7 -0
xinference/thirdparty/__init__.py +0 -0
xinference/thirdparty/llava/__init__.py +1 -0
xinference/thirdparty/llava/conversation.py +205 -0
xinference/thirdparty/llava/mm_utils.py +122 -0
xinference/thirdparty/llava/model/__init__.py +1 -0
xinference/thirdparty/llava/model/clip_encoder/__init__.py +0 -0
xinference/thirdparty/llava/model/clip_encoder/builder.py +11 -0
xinference/thirdparty/llava/model/clip_encoder/clip_encoder.py +86 -0
xinference/thirdparty/llava/model/constants.py +6 -0
xinference/thirdparty/llava/model/llava_arch.py +385 -0
xinference/thirdparty/llava/model/llava_llama.py +163 -0
xinference/thirdparty/llava/model/multimodal_projector/__init__.py +0 -0
xinference/thirdparty/llava/model/multimodal_projector/builder.py +64 -0
xinference/types.py +1 -1
xinference/web/ui/build/asset-manifest.json +3 -3
xinference/web/ui/build/index.html +1 -1
xinference/web/ui/build/static/js/{main.abedc3c9.js → main.15822aeb.js} +3 -3
xinference/web/ui/build/static/js/{main.abedc3c9.js.map → main.15822aeb.js.map} +1 -1
xinference/web/ui/node_modules/.cache/babel-loader/65ca3ba225b8c8dac907210545b51f2fcdb2591f0feeb7195f1c037f2bc956a0.json +1 -0
{xinference-0.8.2.dist-info → xinference-0.8.3.dist-info}/METADATA +21 -18
{xinference-0.8.2.dist-info → xinference-0.8.3.dist-info}/RECORD +52 -38
xinference/web/ui/node_modules/.cache/babel-loader/c157e34990b23834b7ad4c13c42962209942c60f8130978c1514f3d085cfaea0.json +0 -1
/xinference/web/ui/build/static/js/{main.abedc3c9.js.LICENSE.txt → main.15822aeb.js.LICENSE.txt} +0 -0
{xinference-0.8.2.dist-info → xinference-0.8.3.dist-info}/LICENSE +0 -0
{xinference-0.8.2.dist-info → xinference-0.8.3.dist-info}/WHEEL +0 -0
{xinference-0.8.2.dist-info → xinference-0.8.3.dist-info}/entry_points.txt +0 -0
{xinference-0.8.2.dist-info → xinference-0.8.3.dist-info}/top_level.txt +0 -0

xinference/model/llm/__init__.py CHANGED Viewed

@@ -58,6 +58,7 @@ def _install():
     from .pytorch.llama_2 import LlamaPytorchChatModel, LlamaPytorchModel
     from .pytorch.qwen_vl import QwenVLChatModel
     from .pytorch.vicuna import VicunaPytorchChatModel
+    from .pytorch.yi_vl import YiVLChatModel
     from .vllm.core import VLLMChatModel, VLLMModel
     # register llm classes.
@@ -90,6 +91,7 @@ def _install():
             FalconPytorchModel,
             Internlm2PytorchChatModel,
             QwenVLChatModel,
+            YiVLChatModel,
             PytorchModel,
         ]
     )

xinference/model/llm/core.py CHANGED Viewed

@@ -135,6 +135,8 @@ class LLMDescription(ModelDescription):
             "model_description": self._llm_family.model_description,
             "model_format": self._llm_spec.model_format,
             "model_size_in_billions": self._llm_spec.model_size_in_billions,
+            "model_family": self._llm_family.model_family
+            or self._llm_family.model_name,
             "quantization": self._quantization,
             "model_hub": self._llm_spec.model_hub,
             "revision": self._llm_spec.model_revision,

xinference/model/llm/ggml/chatglm.py CHANGED Viewed

@@ -230,20 +230,28 @@ class ChatglmCppChatModel(LLM):
             ),
         }
+    @staticmethod
+    def _to_chatglm_chat_messages(history_list: List[Any]):
+        from chatglm_cpp import ChatMessage
+        return [ChatMessage(role=v["role"], content=v["content"]) for v in history_list]
     def chat(
         self,
         prompt: str,
+        system_prompt: Optional[str] = None,
         chat_history: Optional[List[ChatCompletionMessage]] = None,
         generate_config: Optional[ChatglmCppGenerateConfig] = None,
     ) -> Union[ChatCompletion, Iterator[ChatCompletionChunk]]:
+        chat_history_list = []
+        if system_prompt is not None:
+            chat_history_list.append({"role": "system", "content": system_prompt})
         if chat_history is not None:
-            chat_history_list = chat_history
-        else:
-            chat_history_list = []
+            chat_history_list.extend(chat_history)  # type: ignore
         tool_message = self._handle_tools(generate_config)
         if tool_message is not None:
-            chat_history_list.insert(0, tool_message)
+            chat_history_list.insert(0, tool_message)  # type: ignore
         # We drop the message which contains tool calls to walkaround the issue:
         # https://github.com/li-plus/chatglm.cpp/issues/231
@@ -276,17 +284,18 @@ class ChatglmCppChatModel(LLM):
         params = {k: v for k, v in params.items() if v is not None}
         assert self._llm is not None
+        chat_history_messages = self._to_chatglm_chat_messages(chat_history_list)
         if generate_config["stream"]:
             it = self._llm.chat(
-                chat_history_list,
+                chat_history_messages,
                 **params,
             )
             assert not isinstance(it, str)
             return self._convert_raw_text_chunks_to_chat(it, self.model_uid)
         else:
             c = self._llm.chat(
-                chat_history_list,
+                chat_history_messages,
                 **params,
             )
             assert not isinstance(c, Iterator)

xinference/model/llm/llm_family.json CHANGED Viewed

@@ -3346,5 +3346,61 @@
         "<unk>"
       ]
     }
+  },
+  {
+    "version": 1,
+    "context_length": 204800,
+    "model_name": "yi-vl-chat",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "chat",
+      "vision"
+    ],
+    "model_description": "Yi Vision Language (Yi-VL) model is the open-source, multimodal version of the Yi Large Language Model (LLM) series, enabling content comprehension, recognition, and multi-round conversations about images.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 6,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "01-ai/Yi-VL-6B",
+        "model_revision": "897c938da1ec860330e2ba2d425ab3004495ba38"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 34,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "01-ai/Yi-VL-34B",
+        "model_revision": "ea29a9a430f27893e780366dae81d4ca5ebab561"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "CHATML",
+      "system_prompt": "",
+      "roles": [
+        "<|im_start|>user",
+        "<|im_start|>assistant"
+      ],
+      "intra_message_sep": "<|im_end|>",
+      "inter_message_sep": "",
+      "stop_token_ids": [
+        2,
+        6,
+        7,
+        8
+      ],
+      "stop": [
+        "<|endoftext|>",
+        "<|im_start|>",
+        "<|im_end|>",
+        "<|im_sep|>"
+      ]
+    }
   }
 ]

xinference/model/llm/llm_family_modelscope.json CHANGED Viewed

@@ -1957,5 +1957,61 @@
         "<unk>"
       ]
     }
+  },
+  {
+    "version": 1,
+    "context_length": 204800,
+    "model_name": "yi-vl-chat",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "chat",
+      "vision"
+    ],
+    "model_description": "Yi Vision Language (Yi-VL) model is the open-source, multimodal version of the Yi Large Language Model (LLM) series, enabling content comprehension, recognition, and multi-round conversations about images.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 6,
+        "quantizations": [
+          "none"
+        ],
+        "model_hub": "modelscope",
+        "model_id": "01ai/Yi-VL-6B"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 34,
+        "quantizations": [
+          "none"
+        ],
+        "model_hub": "modelscope",
+        "model_id": "01ai/Yi-VL-34B"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "CHATML",
+      "system_prompt": "",
+      "roles": [
+        "<|im_start|>user",
+        "<|im_start|>assistant"
+      ],
+      "intra_message_sep": "<|im_end|>",
+      "inter_message_sep": "",
+      "stop_token_ids": [
+        2,
+        6,
+        7,
+        8
+      ],
+      "stop": [
+        "<|endoftext|>",
+        "<|im_start|>",
+        "<|im_end|>",
+        "<|im_sep|>"
+      ]
+    }
   }
 ]

xinference/model/llm/pytorch/chatglm.py CHANGED Viewed

@@ -120,9 +120,9 @@ class ChatglmPytorchChatModel(PytorchChatModel):
         top_p = generate_config.get("top_p")
         if top_p is not None:
             kwargs["top_p"] = float(top_p)
-        max_length = generate_config.get("max_tokens")
-        if max_length is not None:
-            kwargs["max_length"] = int(max_length)
+        max_new_tokens = generate_config.get("max_tokens")
+        if max_new_tokens is not None:
+            kwargs["max_new_tokens"] = int(max_new_tokens)
         # Tool calls only works for non stream, so we call chat directly.
         if prompt == SPECIAL_TOOL_PROMPT and chat_history:
             tool_message = chat_history.pop()

xinference/model/llm/pytorch/core.py CHANGED Viewed

@@ -423,6 +423,7 @@ class PytorchChatModel(PytorchModel, ChatModelMixin):
             "llama-2-chat",
             "internlm2-chat",
             "qwen-vl-chat",
+            "yi-vl-chat",
         ]:
             return False
         if "chat" not in llm_family.model_ability:

xinference/model/llm/pytorch/utils.py CHANGED Viewed

@@ -29,7 +29,12 @@ from transformers.generation.logits_process import (
     TopPLogitsWarper,
 )
-from ....types import CompletionChoice, CompletionChunk, CompletionUsage
+from ....types import (
+    CompletionChoice,
+    CompletionChunk,
+    CompletionUsage,
+    max_tokens_field,
+)
 logger = logging.getLogger(__name__)
@@ -54,16 +59,21 @@ def get_context_length(config):
         hasattr(config, "max_sequence_length")
         and config.max_sequence_length is not None
     ):
-        return config.max_sequence_length
-    elif hasattr(config, "seq_length") and config.seq_length is not None:
-        return config.seq_length
-    elif (
+        max_sequence_length = config.max_sequence_length
+    else:
+        max_sequence_length = 2048
+    if hasattr(config, "seq_length") and config.seq_length is not None:
+        seq_length = config.seq_length
+    else:
+        seq_length = 2048
+    if (
         hasattr(config, "max_position_embeddings")
         and config.max_position_embeddings is not None
     ):
-        return config.max_position_embeddings
+        max_position_embeddings = config.max_position_embeddings
     else:
-        return 2048
+        max_position_embeddings = 2048
+    return max(max_sequence_length, seq_length, max_position_embeddings)
 def prepare_logits_processor(
@@ -102,7 +112,7 @@ def generate_stream(
     repetition_penalty = float(generate_config.get("repetition_penalty", 1.0))
     top_p = float(generate_config.get("top_p", 1.0))
     top_k = int(generate_config.get("top_k", -1))  # -1 means disable
-    max_new_tokens = int(generate_config.get("max_tokens", 256))
+    max_new_tokens = int(generate_config.get("max_tokens", max_tokens_field.default))
     echo = bool(generate_config.get("echo", False))
     stop_str = generate_config.get("stop", None)
     stop_token_ids = generate_config.get("stop_token_ids", None) or []
@@ -123,6 +133,8 @@ def generate_stream(
         max_src_len = context_len
     else:
         max_src_len = context_len - max_new_tokens - 8
+        if max_src_len < 0:
+            raise ValueError("Max tokens exceeds model's max length")
     input_ids = input_ids[-max_src_len:]
     input_echo_len = len(input_ids)
@@ -346,7 +358,7 @@ def generate_stream_falcon(
     repetition_penalty = float(generate_config.get("repetition_penalty", 1.0))
     top_p = float(generate_config.get("top_p", 1.0))
     top_k = int(generate_config.get("top_k", 50))  # -1 means disable
-    max_new_tokens = int(generate_config.get("max_tokens", 256))
+    max_new_tokens = int(generate_config.get("max_tokens", max_tokens_field.default))
     echo = bool(generate_config.get("echo", False))
     stop_str = generate_config.get("stop", None)
     stop_token_ids = generate_config.get("stop_token_ids", None) or []

xinference/model/llm/pytorch/yi_vl.py ADDED Viewed

@@ -0,0 +1,246 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import base64
+import logging
+import time
+import uuid
+from concurrent.futures import ThreadPoolExecutor
+from io import BytesIO
+from threading import Thread
+from typing import Dict, Iterator, List, Optional, Union
+import requests
+import torch
+from PIL import Image
+from ....model.utils import select_device
+from ....types import (
+    ChatCompletion,
+    ChatCompletionChoice,
+    ChatCompletionChunk,
+    ChatCompletionMessage,
+    CompletionUsage,
+)
+from ..llm_family import LLMFamilyV1, LLMSpecV1
+from .core import PytorchChatModel, PytorchGenerateConfig
+logger = logging.getLogger(__name__)
+class YiVLChatModel(PytorchChatModel):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self._tokenizer = None
+        self._model = None
+        self._image_processor = None
+    @classmethod
+    def match(
+        cls, model_family: "LLMFamilyV1", model_spec: "LLMSpecV1", quantization: str
+    ) -> bool:
+        if "yi" in model_family.model_name:
+            return True
+        return False
+    def load(self):
+        from ....thirdparty.llava.mm_utils import load_pretrained_model
+        from ....thirdparty.llava.model.constants import key_info
+        device = self._pytorch_model_config.get("device", "auto")
+        device = select_device(device)
+        key_info["model_path"] = self.model_path
+        (
+            self._tokenizer,
+            self._model,
+            self._image_processor,
+            _,
+        ) = load_pretrained_model(self.model_path, device_map=device)
+    @staticmethod
+    def _message_content_to_yi(content) -> Union[str, tuple]:
+        def _load_image(_url):
+            if _url.startswith("data:"):
+                logging.info("Parse url by base64 decoder.")
+                # https://platform.openai.com/docs/guides/vision/uploading-base-64-encoded-images
+                # e.g. f"data:image/jpeg;base64,{base64_image}"
+                _type, data = _url.split(";")
+                _, ext = _type.split("/")
+                data = data[len("base64,") :]
+                data = base64.b64decode(data.encode("utf-8"))
+                return Image.open(BytesIO(data))
+            else:
+                try:
+                    response = requests.get(_url)
+                except requests.exceptions.MissingSchema:
+                    return Image.open(_url)
+                else:
+                    return Image.open(BytesIO(response.content))
+        if not isinstance(content, str):
+            from ....thirdparty.llava.model.constants import DEFAULT_IMAGE_TOKEN
+            texts = []
+            image_urls = []
+            for c in content:
+                c_type = c.get("type")
+                if c_type == "text":
+                    texts.append(c["text"])
+                elif c_type == "image_url":
+                    image_urls.append(c["image_url"]["url"])
+            image_futures = []
+            with ThreadPoolExecutor() as executor:
+                for image_url in image_urls:
+                    fut = executor.submit(_load_image, image_url)
+                    image_futures.append(fut)
+            images = [fut.result() for fut in image_futures]
+            text = " ".join(texts)
+            if DEFAULT_IMAGE_TOKEN not in text:
+                text = DEFAULT_IMAGE_TOKEN + "\n" + text
+            if len(images) == 0:
+                return text
+            elif len(images) == 1:
+                return text, images[0], "Pad"
+            else:
+                raise RuntimeError("Only one image per message is supported by Yi VL.")
+        return content
+    @staticmethod
+    def _parse_text(text):
+        lines = text.split("\n")
+        lines = [line for line in lines if line != ""]
+        count = 0
+        for i, line in enumerate(lines):
+            if "```" in line:
+                count += 1
+                items = line.split("`")
+                if count % 2 == 1:
+                    lines[i] = f'<pre><code class="language-{items[-1]}">'
+                else:
+                    lines[i] = f"<br></code></pre>"
+            else:
+                if i > 0:
+                    if count % 2 == 1:
+                        line = line.replace("`", r"\`")
+                        line = line.replace("<", "&lt;")
+                        line = line.replace(">", "&gt;")
+                        line = line.replace(" ", "&nbsp;")
+                        line = line.replace("*", "&ast;")
+                        line = line.replace("_", "&lowbar;")
+                        line = line.replace("-", "&#45;")
+                        line = line.replace(".", "&#46;")
+                        line = line.replace("!", "&#33;")
+                        line = line.replace("(", "&#40;")
+                        line = line.replace(")", "&#41;")
+                        line = line.replace("$", "&#36;")
+                    lines[i] = "<br>" + line
+        text = "".join(lines)
+        return text
+    def chat(
+        self,
+        prompt: Union[str, List[Dict]],
+        system_prompt: Optional[str] = None,
+        chat_history: Optional[List[ChatCompletionMessage]] = None,
+        generate_config: Optional[PytorchGenerateConfig] = None,
+    ) -> Union[ChatCompletion, Iterator[ChatCompletionChunk]]:
+        from transformers import TextIteratorStreamer
+        # TODO(codingl2k1): implement stream mode.
+        if generate_config and generate_config.get("stream"):
+            raise Exception(
+                f"Chat with model {self.model_family.model_name} does not support stream."
+            )
+        if not generate_config:
+            generate_config = {}
+        from ....thirdparty.llava.conversation import conv_templates
+        from ....thirdparty.llava.mm_utils import (
+            KeywordsStoppingCriteria,
+            tokenizer_image_token,
+        )
+        from ....thirdparty.llava.model.constants import IMAGE_TOKEN_INDEX
+        # Convert chat history to llava state
+        state = conv_templates["mm_default"].copy()
+        for message in chat_history or []:
+            content = self._message_content_to_yi(message["content"])
+            state.append_message(message["role"], content)
+        state.append_message(state.roles[0], self._message_content_to_yi(prompt))
+        state.append_message(state.roles[1], None)
+        prompt = state.get_prompt()
+        input_ids = (
+            tokenizer_image_token(
+                prompt, self._tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt"
+            )
+            .unsqueeze(0)
+            .cuda()
+        )
+        images = state.get_images(return_pil=True)
+        image = images[0]
+        image_tensor = self._image_processor.preprocess(image, return_tensors="pt")[
+            "pixel_values"
+        ][0]
+        stop_str = state.sep
+        keywords = [stop_str]
+        stopping_criteria = KeywordsStoppingCriteria(
+            keywords, self._tokenizer, input_ids
+        )
+        streamer = TextIteratorStreamer(
+            self._tokenizer, timeout=60, skip_prompt=True, skip_special_tokens=True
+        )
+        top_p = generate_config.get("top_p", 0.7)
+        temperature = generate_config.get("temperature", 0.2)
+        max_new_tokens = generate_config.get("max_tokens", 512)
+        generate_kwargs = {
+            "input_ids": input_ids,
+            "images": image_tensor.unsqueeze(0).to(dtype=torch.bfloat16).cuda(),
+            "streamer": streamer,
+            "do_sample": True,
+            "top_p": float(top_p),
+            "temperature": float(temperature),
+            "stopping_criteria": [stopping_criteria],
+            "use_cache": True,
+            "max_new_tokens": min(int(max_new_tokens), 1536),
+        }
+        t = Thread(target=self._model.generate, kwargs=generate_kwargs)
+        t.start()
+        generated_text = ""
+        for new_text in streamer:
+            generated_text += new_text
+            if generated_text.endswith(stop_str):
+                generated_text = generated_text[: -len(stop_str)]
+        r = self._parse_text(generated_text)
+        return ChatCompletion(
+            id="chat" + str(uuid.uuid1()),
+            object="chat.completion",
+            created=int(time.time()),
+            model=self.model_uid,
+            choices=[
+                ChatCompletionChoice(
+                    index=0,
+                    message={"role": "assistant", "content": r},
+                    finish_reason="stop",
+                )
+            ],
+            usage=CompletionUsage(
+                prompt_tokens=-1, completion_tokens=-1, total_tokens=-1
+            ),
+        )

xinference/model/rerank/core.py CHANGED Viewed

@@ -128,7 +128,7 @@ class RerankModel:
             raise ImportError(f"{error_message}\n\n{''.join(installation_guide)}")
         self._model = CrossEncoder(
-            self._model_path, device=self._device, automodel_args=self._model_config
+            self._model_path, device=self._device, **self._model_config
         )
         if self._use_fp16:
             self._model.model.half()

xinference/model/rerank/model_spec.json CHANGED Viewed

@@ -10,5 +10,11 @@
     "language": ["en", "zh"],
     "model_id": "BAAI/bge-reranker-base",
     "model_revision": "465b4b7ddf2be0a020c8ad6e525b9bb1dbb708ae"
+  },
+  {
+    "model_name": "bce-reranker-base_v1",
+    "language": ["en", "zh"],
+    "model_id": "maidalun1020/bce-reranker-base_v1",
+    "model_revision": "eaa31a577a0574e87a08959bd229ca14ce1b5496"
   }
 ]

xinference/model/rerank/model_spec_modelscope.json CHANGED Viewed

@@ -12,5 +12,12 @@
     "model_id": "Xorbits/bge-reranker-large",
     "model_revision": "v0.0.1",
     "model_hub": "modelscope"
+  },
+    {
+    "model_name": "bce-reranker-base_v1",
+    "language": ["en", "zh"],
+    "model_id": "maidalun/bce-reranker-base_v1",
+    "model_revision": "v0.0.1",
+    "model_hub": "modelscope"
   }
 ]

xinference/thirdparty/__init__.py ADDED Viewed

File without changes

xinference/thirdparty/llava/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ from .model import LlavaLlamaForCausalLM

xinference 0.8.2__py3-none-any.whl → 0.8.3__py3-none-any.whl

Potentially problematic release.

xinference 0.8.2py3-none-any.whl → 0.8.3py3-none-any.whl