PyPI - xinference - Versions diffs - 0.8.0__py3-none-any.whl → 0.8.1__py3-none-any.whl - Mend

xinference 0.8.0py3-none-any.whl → 0.8.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (35) hide show

xinference/_version.py +3 -3
xinference/api/restful_api.py +35 -1
xinference/client/oscar/actor_client.py +2 -2
xinference/client/restful/restful_client.py +2 -2
xinference/conftest.py +5 -1
xinference/core/metrics.py +83 -0
xinference/core/model.py +148 -8
xinference/core/status_guard.py +86 -0
xinference/core/supervisor.py +57 -7
xinference/core/worker.py +132 -13
xinference/deploy/cmdline.py +57 -4
xinference/deploy/local.py +32 -6
xinference/deploy/worker.py +33 -5
xinference/fields.py +4 -1
xinference/model/llm/__init__.py +7 -0
xinference/model/llm/ggml/llamacpp.py +3 -2
xinference/model/llm/llm_family.json +70 -3
xinference/model/llm/llm_family.py +11 -1
xinference/model/llm/llm_family_modelscope.json +72 -3
xinference/model/llm/pytorch/chatglm.py +70 -28
xinference/model/llm/pytorch/core.py +11 -30
xinference/model/llm/pytorch/internlm2.py +155 -0
xinference/model/llm/pytorch/utils.py +0 -153
xinference/model/llm/utils.py +37 -8
xinference/model/llm/vllm/core.py +15 -3
xinference/model/multimodal/__init__.py +15 -8
xinference/model/multimodal/model_spec_modelscope.json +45 -0
xinference/model/utils.py +7 -2
xinference/types.py +2 -0
{xinference-0.8.0.dist-info → xinference-0.8.1.dist-info}/METADATA +2 -1
{xinference-0.8.0.dist-info → xinference-0.8.1.dist-info}/RECORD +35 -31
{xinference-0.8.0.dist-info → xinference-0.8.1.dist-info}/LICENSE +0 -0
{xinference-0.8.0.dist-info → xinference-0.8.1.dist-info}/WHEEL +0 -0
{xinference-0.8.0.dist-info → xinference-0.8.1.dist-info}/entry_points.txt +0 -0
{xinference-0.8.0.dist-info → xinference-0.8.1.dist-info}/top_level.txt +0 -0

xinference/model/llm/pytorch/utils.py CHANGED Viewed

@@ -14,7 +14,6 @@
 import gc
 import logging
-import re
 import time
 import uuid
 from threading import Thread
@@ -23,7 +22,6 @@ from typing import Iterable, Iterator, Tuple
 import torch
 from transformers import GenerationConfig, TextIteratorStreamer
 from transformers.generation.logits_process import (
-    LogitsProcessor,
     LogitsProcessorList,
     RepetitionPenaltyLogitsProcessor,
     TemperatureLogitsWarper,
@@ -480,154 +478,3 @@ def generate_stream_falcon(
     # clean
     gc.collect()
     torch.cuda.empty_cache()
-class InvalidScoreLogitsProcessor(LogitsProcessor):
-    def __call__(
-        self, input_ids: torch.LongTensor, scores: torch.FloatTensor
-    ) -> torch.FloatTensor:
-        if torch.isnan(scores).any() or torch.isinf(scores).any():
-            scores.zero_()
-            scores[..., 5] = 5e4
-        return scores
-invalid_score_processor = InvalidScoreLogitsProcessor()
-def process_response(response):
-    response = response.strip()
-    response = response.replace("[[训练时间]]", "2023年")
-    punkts = [
-        [",", "，"],
-        ["!", "！"],
-        [":", "："],
-        [";", "；"],
-        ["\\?", "？"],
-    ]
-    for item in punkts:
-        response = re.sub(r"([\u4e00-\u9fff])%s" % item[0], r"\1%s" % item[1], response)
-        response = re.sub(r"%s([\u4e00-\u9fff])" % item[0], r"%s\1" % item[1], response)
-    return response
-@torch.inference_mode()
-def generate_stream_chatglm(
-    model_uid,
-    model,
-    tokenizer,
-    prompt,
-    device,
-    generate_config,
-    judge_sent_end=False,
-):
-    stream = generate_config.get("stream", False)
-    temperature = float(generate_config.get("temperature", 1.0))
-    repetition_penalty = float(generate_config.get("repetition_penalty", 1.0))
-    top_p = float(generate_config.get("top_p", 1.0))
-    max_new_tokens = int(generate_config.get("max_tokens", 256))
-    echo = generate_config.get("echo", False)
-    stop_str = generate_config.get("stop", None)
-    eos_token_id = generate_config.get("stop_token_ids", [])
-    eos_token_id.append(tokenizer.eos_token_id)
-    inputs = tokenizer([prompt], return_tensors="pt").to(model.device)
-    input_echo_len = len(inputs["input_ids"][0])
-    gen_kwargs = {
-        "max_length": max_new_tokens + input_echo_len,
-        "do_sample": True if temperature > 1e-5 else False,
-        "top_p": top_p,
-        "repetition_penalty": repetition_penalty,
-        "logits_processor": [invalid_score_processor],
-    }
-    if temperature > 1e-5:
-        gen_kwargs["temperature"] = temperature
-    total_len = 0
-    last_response_length = 0
-    for total_ids in model.stream_generate(
-        **inputs, eos_token_id=eos_token_id, **gen_kwargs
-    ):
-        total_ids = total_ids.tolist()[0]
-        total_len = len(total_ids)
-        if echo:
-            output_ids = total_ids
-        else:
-            output_ids = total_ids[input_echo_len:]
-        response = tokenizer.decode(output_ids)
-        response = process_response(response)
-        partially_stopped = False
-        stopped = False
-        if stop_str:
-            if isinstance(stop_str, str):
-                pos = response.rfind(stop_str, 0)
-                if pos != -1:
-                    response = response[:pos]
-                    stopped = True
-                else:
-                    partially_stopped = is_partial_stop(response, stop_str)
-            elif isinstance(stop_str, Iterable):
-                for each_stop in stop_str:
-                    pos = response.rfind(each_stop, 0)
-                    if pos != -1:
-                        response = response[:pos]
-                        stopped = True
-                        break
-                    else:
-                        partially_stopped = is_partial_stop(response, each_stop)
-                        if partially_stopped:
-                            break
-            else:
-                raise ValueError("Invalid stop field type.")
-        if stream:
-            response = response.strip("�")
-            tmp_response_length = len(response)
-            response = response[last_response_length:]
-            last_response_length = tmp_response_length
-        if not partially_stopped:
-            completion_choice = CompletionChoice(
-                text=response, index=0, logprobs=None, finish_reason=None
-            )
-            completion_chunk = CompletionChunk(
-                id=str(uuid.uuid1()),
-                object="text_completion",
-                created=int(time.time()),
-                model=model_uid,
-                choices=[completion_choice],
-            )
-            completion_usage = CompletionUsage(
-                prompt_tokens=input_echo_len,
-                completion_tokens=(total_len - input_echo_len),
-                total_tokens=total_len,
-            )
-            yield completion_chunk, completion_usage
-        if stopped:
-            break
-    if total_len - input_echo_len == max_new_tokens - 1:
-        finish_reason = "length"
-    else:
-        finish_reason = "stop"
-    completion_choice = CompletionChoice(
-        text=response, index=0, logprobs=None, finish_reason=finish_reason
-    )
-    completion_chunk = CompletionChunk(
-        id=str(uuid.uuid1()),
-        object="text_completion",
-        created=int(time.time()),
-        model=model_uid,
-        choices=[completion_choice],
-    )
-    completion_usage = CompletionUsage(
-        prompt_tokens=input_echo_len,
-        completion_tokens=(total_len - input_echo_len),
-        total_tokens=total_len,
-    )
-    yield completion_chunk, completion_usage

xinference/model/llm/utils.py CHANGED Viewed

@@ -16,7 +16,7 @@ import json
 import logging
 import time
 import uuid
-from typing import AsyncGenerator, Dict, Iterator, List, Optional
+from typing import AsyncGenerator, Dict, Iterator, List, Optional, cast
 from xinference.model.llm.llm_family import PromptStyleV1
@@ -299,6 +299,24 @@ Begin!"""
             )
             ret += chat_history[-1]["role"] + ":"
             return ret
+        elif prompt_style.style_name == "INTERNLM2":
+            ret = (
+                "<s>"
+                if prompt_style.system_prompt == ""
+                else "<s>[UNUSED_TOKEN_146]system\n"
+                + prompt_style.system_prompt
+                + prompt_style.intra_message_sep
+                + "\n"
+            )
+            for message in chat_history:
+                role = message["role"]
+                content = message["content"]
+                if content:
+                    ret += role + "\n" + content + prompt_style.intra_message_sep + "\n"
+                else:
+                    ret += role + "\n"
+            return ret
         elif prompt_style.style_name == "ADD_COLON_SINGLE_COT":
             ret = prompt_style.system_prompt + prompt_style.intra_message_sep
             for message in chat_history:
@@ -360,7 +378,7 @@ Begin!"""
     @classmethod
     def _to_chat_completion_chunk(cls, chunk: CompletionChunk) -> ChatCompletionChunk:
-        return {
+        chat_chunk = {
             "id": "chat" + chunk["id"],
             "model": chunk["model"],
             "created": chunk["created"],
@@ -376,12 +394,16 @@ Begin!"""
                 for i, choice in enumerate(chunk["choices"])
             ],
         }
+        usage = chunk.get("usage")
+        if usage is not None:
+            chat_chunk["usage"] = usage
+        return cast(ChatCompletionChunk, chat_chunk)
     @classmethod
     def _get_first_chat_completion_chunk(
         cls, chunk: CompletionChunk
     ) -> ChatCompletionChunk:
-        return {
+        chat_chunk = {
             "id": "chat" + chunk["id"],
             "model": chunk["model"],
             "created": chunk["created"],
@@ -397,6 +419,10 @@ Begin!"""
                 for i, choice in enumerate(chunk["choices"])
             ],
         }
+        usage = chunk.get("usage")
+        if usage is not None:
+            chat_chunk["usage"] = usage
+        return cast(ChatCompletionChunk, chat_chunk)
     @classmethod
     def _to_chat_completion_chunks(
@@ -494,16 +520,19 @@ Begin!"""
         return text, None, None
     @classmethod
-    def _tool_calls_completion(cls, model_name, model_uid, c, tools):
+    def _tool_calls_completion(cls, model_family, model_uid, c, tools):
         _id = str(uuid.uuid4())
-        if model_name == "gorilla-openfunctions-v1":
+        family = model_family.model_family or model_family.model_name
+        if "gorilla-openfunctions-v1" == family:
             content, func, args = cls._eval_gorilla_openfunctions_arguments(c, tools)
-        elif model_name == "chatglm3":
+        elif "chatglm3" == family:
             content, func, args = cls._eval_chatglm3_arguments(c, tools)
-        elif model_name == "qwen-chat":
+        elif "qwen-chat" == family:
             content, func, args = cls._eval_qwen_chat_arguments(c, tools)
         else:
-            raise Exception(f"Model {model_name} is not support tool calls.")
+            raise Exception(
+                f"Model {model_family.model_name} is not support tool calls."
+            )
         logger.debug("Tool call content: %s, func: %s, args: %s", content, func, args)
         if content:

xinference/model/llm/vllm/core.py CHANGED Viewed

@@ -94,6 +94,7 @@ VLLM_SUPPORTED_CHAT_MODELS = [
     "code-llama-python",
     "code-llama-instruct",
     "mistral-instruct-v0.1",
+    "mistral-instruct-v0.2",
     "chatglm3",
 ]
@@ -170,7 +171,7 @@ class VLLMModel(LLM):
         )
         sanitized.setdefault("temperature", generate_config.get("temperature", 1.0))
         sanitized.setdefault("top_p", generate_config.get("top_p", 1.0))
-        sanitized.setdefault("max_tokens", generate_config.get("max_tokens", 16))
+        sanitized.setdefault("max_tokens", generate_config.get("max_tokens", 1024))
         sanitized.setdefault("stop", generate_config.get("stop", None))
         sanitized.setdefault(
             "stop_token_ids", generate_config.get("stop_token_ids", None)
@@ -303,6 +304,16 @@ class VLLMModel(LLM):
                     delta = choice["text"][len(previous_texts[i]) :]
                     previous_texts[i] = choice["text"]
                     choice["text"] = delta
+                prompt_tokens = len(_request_output.prompt_token_ids)
+                completion_tokens = sum(
+                    len(output.token_ids) for output in _request_output.outputs
+                )
+                total_tokens = prompt_tokens + completion_tokens
+                chunk["usage"] = CompletionUsage(
+                    prompt_tokens=prompt_tokens,
+                    completion_tokens=completion_tokens,
+                    total_tokens=total_tokens,
+                )
                 yield chunk
         if stream:
@@ -379,7 +390,8 @@ class VLLMChatModel(VLLMModel, ChatModelMixin):
         generate_config = self._sanitize_chat_config(generate_config)
         # TODO(codingl2k1): qwen hacky to set stop for function call.
-        if tools and self.model_family.model_name == "qwen-chat":
+        model_family = self.model_family.model_family or self.model_family.model_name
+        if tools and "qwen-chat" == model_family:
             stop = generate_config.get("stop")
             if isinstance(stop, str):
                 generate_config["stop"] = [stop, "Observation:"]
@@ -400,6 +412,6 @@ class VLLMChatModel(VLLMModel, ChatModelMixin):
             assert not isinstance(c, AsyncGenerator)
             if tools:
                 return self._tool_calls_completion(
-                    self.model_family.model_name, self.model_uid, c, tools
+                    self.model_family, self.model_uid, c, tools
                 )
             return self._to_chat_completion(c)

xinference/model/multimodal/__init__.py CHANGED Viewed

@@ -30,16 +30,23 @@ MODEL_CLASSES.append(QwenVLChat)
 def _install():
-    json_path = os.path.join(
+    json_path_huggingface = os.path.join(
         os.path.dirname(os.path.abspath(__file__)), "model_spec.json"
     )
-    for json_obj in json.load(codecs.open(json_path, "r", encoding="utf-8")):
-        model_family = LVLMFamilyV1.parse_obj(json_obj)
-        BUILTIN_LVLM_FAMILIES.append(model_family)
-        for model_spec in model_family.model_specs:
-            MODEL_NAME_TO_REVISION[model_family.model_name].append(
-                model_spec.model_revision
-            )
+    json_path_modelscope = os.path.join(
+        os.path.dirname(os.path.abspath(__file__)), "model_spec_modelscope.json"
+    )
+    for builtin_family, json_path in [
+        (BUILTIN_LVLM_FAMILIES, json_path_huggingface),
+        (BUILTIN_MODELSCOPE_LVLM_FAMILIES, json_path_modelscope),
+    ]:
+        for json_obj in json.load(codecs.open(json_path, "r", encoding="utf-8")):
+            model_family = LVLMFamilyV1.parse_obj(json_obj)
+            builtin_family.append(model_family)
+            for model_spec in model_family.model_specs:
+                MODEL_NAME_TO_REVISION[model_family.model_name].append(
+                    model_spec.model_revision
+                )
 _install()

xinference/model/multimodal/model_spec_modelscope.json ADDED Viewed

@@ -0,0 +1,45 @@
+[
+  {
+    "version": 1,
+    "context_length": 4096,
+    "model_name": "qwen-vl-chat",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "chat"
+    ],
+    "model_description": "Qwen-VL-Chat supports more flexible interaction, such as multiple image inputs, multi-round question answering, and creative capabilities.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "none"
+        ],
+        "model_hub": "modelscope",
+        "model_id": "Qwen/Qwen-VL-Chat",
+        "model_revision": "master"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_hub": "modelscope",
+        "model_id": "Qwen/Qwen-VL-Chat-{quantization}",
+        "model_revision": "master"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "QWEN",
+      "system_prompt": "You are a helpful assistant.",
+      "roles": [
+        "user",
+        "assistant"
+      ]
+    }
+  }
+]

xinference/model/utils.py CHANGED Viewed

@@ -153,8 +153,13 @@ def is_model_cached(model_spec: Any, name_to_revisions_mapping: Dict):
 def is_valid_model_name(model_name: str) -> bool:
-    model_name = model_name.strip()
-    return 0 < len(model_name) <= 100
+    import re
+    if len(model_name) == 0:
+        return False
+    # check if contains +/?%#&=\s
+    return re.match(r"^[^+\/?%#&=\s]*$", model_name) is not None
 def parse_uri(uri: str) -> Tuple[str, str]:

xinference/types.py CHANGED Viewed

@@ -110,6 +110,7 @@ class CompletionChunk(TypedDict):
     created: int
     model: str
     choices: List[CompletionChoice]
+    usage: NotRequired[CompletionUsage]
 class Completion(TypedDict):
@@ -160,6 +161,7 @@ class ChatCompletionChunk(TypedDict):
     object: Literal["chat.completion.chunk"]
     created: int
     choices: List[ChatCompletionChunkChoice]
+    usage: NotRequired[CompletionUsage]
 class ChatglmCppModelConfig(TypedDict, total=False):

{xinference-0.8.0.dist-info → xinference-0.8.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: xinference
-Version: 0.8.0
+Version: 0.8.1
 Summary: Model Serving Made Easy
 Home-page: https://github.com/xorbitsai/inference
 Author: Qin Xuye
@@ -39,6 +39,7 @@ Requires-Dist: sse-starlette >=1.6.5
 Requires-Dist: openai >1
 Requires-Dist: python-jose[cryptography]
 Requires-Dist: passlib[bcrypt]
+Requires-Dist: aioprometheus[starlette]
 Provides-Extra: all
 Requires-Dist: chatglm-cpp >=0.3.0 ; extra == 'all'
 Requires-Dist: ctransformers ; extra == 'all'

{xinference-0.8.0.dist-info → xinference-0.8.1.dist-info}/RECORD RENAMED Viewed

@@ -1,13 +1,13 @@
 xinference/__init__.py,sha256=jv7PR7ali6n5TpvGjB3hKugwB9Tq-eSTyc_xl2gFnZ0,910
-xinference/_version.py,sha256=-j5l4f1CftQly1S8ZlZ0g31e2b3mIt2Y60zsMqIQZEw,497
-xinference/conftest.py,sha256=gClOWkt5nNZ01DgZch1I2JRDT6YPY5_GKfNCQkEeWFs,9210
+xinference/_version.py,sha256=6jc3tqkig6J9BPzwIKDs3G9C6KyubOgPgzkt_BI8zeg,497
+xinference/conftest.py,sha256=mGhwGBCmu4SNXx-akGw6rzYO2wfB9gTwjewC--geSSw,9315
 xinference/constants.py,sha256=JfcCKl28iyuDAnfue9FrvK34KEfg0dKyoo1-2hzQTJ4,2343
-xinference/fields.py,sha256=k_hhu-7LRqZGmFqMhhPZPaK5GG6thfTZS_hsTpo8yFM,5061
+xinference/fields.py,sha256=xRpDiZXVORKoC9rG3eqwxT-BFuAojhJlxJTsAQHzJ24,5075
 xinference/isolation.py,sha256=NstVRcO3dG4umHExICXAHlzVKwH8ch8MBwKwE-KFkE0,1826
-xinference/types.py,sha256=CVgf1AB2AT2LQXd5__BrImZW2C7ZDq-Z5IaKyHsA-vA,11624
+xinference/types.py,sha256=dhWPyZR-YX6AGaPhPTxxem_Q0fWYN9dHQILeWFvW4yI,11704
 xinference/utils.py,sha256=Z6PPDGmX4EW8OD3OfA2Wa37ZM9OdRTnR00ITMDTu4qE,716
 xinference/api/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
-xinference/api/restful_api.py,sha256=ZqfQZLK4yiVXrqaw8P5LSizpZh9sgi3yCcejGMpzJYk,41358
+xinference/api/restful_api.py,sha256=gCx4tRD124-x61zNnFHLQMBotDcp8en0y34kYE7T2co,42684
 xinference/api/oauth2/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
 xinference/api/oauth2/common.py,sha256=YqEUHgyUnsL3SJbTMuS8Prd7l_AI0y9HHhSEdfJ-iSI,613
 xinference/api/oauth2/core.py,sha256=1m8IDdZ9FH5SZvlurSKPKGbrucG47_04Re0PmOJRTg8,3121
@@ -17,22 +17,24 @@ xinference/client/__init__.py,sha256=Gc4HOzAy_1cic5kXlso7hahYgw89CKvZSJDicEU461k
 xinference/client/common.py,sha256=wk-3j1tJPNa60tHO8YZ5z7iBIeI6cBEqomKIatyMQx8,1554
 xinference/client/handlers.py,sha256=nFQQMRHiu_c2nsqVWMA792-QfXXq6wIW3dPR5Q7H-f0,545
 xinference/client/oscar/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
-xinference/client/oscar/actor_client.py,sha256=D7yp9Km3xVGwhmLaJSIfmq72R3f5bcVrwKjBAYUYxE0,21563
+xinference/client/oscar/actor_client.py,sha256=fWGbqCaJHp33CSgtznPosryTD88KWSjQLQebHkGvQCI,21545
 xinference/client/restful/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
-xinference/client/restful/restful_client.py,sha256=HpOYx6Z1CkFQ3asevluxDa1dB__cY4BqHbbO0YmiV-0,37721
+xinference/client/restful/restful_client.py,sha256=jGWMPJNtI1m-rZ3GmYwDDkZqWpyOXMiaSX-PnaF13vI,37703
 xinference/core/__init__.py,sha256=Fe5tYCHDbYJ7PhxJhQ68VbfgKgOsAuslNPr4wPhFMJM,612
 xinference/core/chat_interface.py,sha256=ec9fVV-5m88PoXrfvr4C-m74L_p35nSVHTf-fupW1jc,17114
-xinference/core/model.py,sha256=MYLrNVwQKJJbfZy4KJBa8yUeMAkg_xWqltx_52pxIc8,10799
+xinference/core/metrics.py,sha256=ScmTG15Uq3h_ob72ybZSMWdnk8P4sUZFcm60f4ikSXc,2631
+xinference/core/model.py,sha256=vQ_Q65GEFb5y9RuhhqNJNN5fUy7Mou3pO_lpvAwZzD0,16258
 xinference/core/resource.py,sha256=oG44ZpXQL7uz4pbowtHGdpHarNJ52CK6KBLJRMcySpk,1516
-xinference/core/supervisor.py,sha256=pHa2Y8fOV8lrtWew19axopPFVI0U_RCa4gIONspHHAA,26998
+xinference/core/status_guard.py,sha256=YtQUU_I_9fn74e4xg2xqaLdmww6F4Jx0N96jHsfjR88,2644
+xinference/core/supervisor.py,sha256=afrGeR0yKUjJZCb4gSBWS1EAcc9DDQp0vwV_IdtZjEU,28970
 xinference/core/utils.py,sha256=oVM_bIbbI0w4n3lFIytQ7wMmlIBrxPstrUu7Neahvyw,3528
-xinference/core/worker.py,sha256=kSzzHzCVdBAXDrXnscLzWbGkeErpJ1PYjinoYArS2Qc,17930
+xinference/core/worker.py,sha256=SvHPX66YSPo1WrNSpTMMhTfNC-fmXQzLFcQrNriBjdo,23029
 xinference/deploy/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
-xinference/deploy/cmdline.py,sha256=WID5tuIHQB24Oin6vO2w3eShS1WKclYPD9MbXU3wXgM,26957
-xinference/deploy/local.py,sha256=-xAKTBEJ8HRZD3bfPOMItwAYTdvpcynU32ruKFCX8W8,3108
+xinference/deploy/cmdline.py,sha256=FcxjI24_8-0CzHZlN-lGvFxGsfN76y0lgDDzuGEpF_0,28376
+xinference/deploy/local.py,sha256=99xpzEx9yFj6xPyplzseF64ZCcVOp3oDKZRTBnJF8Io,3933
 xinference/deploy/supervisor.py,sha256=N2EnjN0_lNT92ygZ6qiip6BeodK7dazT2sWWj_wRU_4,2965
 xinference/deploy/utils.py,sha256=wR8dUZud1k7gSmd4M7l6Rq2rtrhTn5qLGmM1XB1IHr4,4941
-xinference/deploy/worker.py,sha256=tiMdZxLPZd8rVpoyKXZbZzqfyii_jzQWjba2IBwuezo,2242
+xinference/deploy/worker.py,sha256=IzmENJE-g0bujAReC5I_iZE6epdgRwaobALKPIk7csA,2915
 xinference/deploy/test/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
 xinference/deploy/test/test_cmdline.py,sha256=bEQ4-y2IG-ntOA6tP3_EP22WQelMAiJ9BQkuBmRzZAs,6736
 xinference/locale/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
@@ -40,7 +42,7 @@ xinference/locale/utils.py,sha256=w-G1DAJGw1UUQVVtq6khOZn7ZjobUmTw6qwHMm2eWIs,13
 xinference/locale/zh_CN.json,sha256=YA55G9s1p05Bt5RBoDo5SV12dd-CMJI0ABap6RpCp4M,1097
 xinference/model/__init__.py,sha256=IRC3ojiqYkVLIK_xsIxYeKypEeeTTdrovnVzK_4L4eg,663
 xinference/model/core.py,sha256=D1EJ9ZJ8VkD0I8NcHcUBA4U9cbm-rcaHkmHL-tq8INY,3041
-xinference/model/utils.py,sha256=rJjz2WWZ3FROf2GIUoiS9KveLB6mQVYkcEeG4HpIl9g,9636
+xinference/model/utils.py,sha256=V2Ei0QKkQS3hW_TpfEmHsl1nAIdBrOZH7sMTTWHp7Mg,9727
 xinference/model/embedding/__init__.py,sha256=uU_fNnMbEpl6LxgzN_FC9U3DisjQwCwIvKv0gfjOFkI,2119
 xinference/model/embedding/core.py,sha256=ANZCSv6rCYpzaPcqcgmzwgSrilinXoBLAAPbzpsyh2A,16138
 xinference/model/embedding/custom.py,sha256=iE3-iWVzxarXdeTdw5e6rxv6HQRXVbPHp65wwhT2IL8,3919
@@ -51,36 +53,38 @@ xinference/model/image/core.py,sha256=ectdISnWjTvxxbj-ty9fgqNzLCBo7uX1q_iA6CbHs1
 xinference/model/image/model_spec.json,sha256=VBo3jTq93UtwD9fB1oqrpIJVaZbyYNiougZuY81pt8g,2965
 xinference/model/image/stable_diffusion/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
 xinference/model/image/stable_diffusion/core.py,sha256=BdslgN_T7f2nzmxtRz0fT2S0P6tTN9zJ_UE6XO2bLLs,5152
-xinference/model/llm/__init__.py,sha256=wMT4RAiNhQwSptHTKtBcF74ks6Bnzp8JZN95e_QAbJE,4846
+xinference/model/llm/__init__.py,sha256=x_f2h_D6aUWtGbkmVSZyzvDA-DcKT3mts3EXxFjWlr4,5244
 xinference/model/llm/core.py,sha256=YDUB-MGEu9CtHxtUKFlu9gNitFO-JvLb6xhaqOYdbZM,7761
-xinference/model/llm/llm_family.json,sha256=ut0jN3-g9YR1MDyKm62Os-90giX3bpttmmgvgUMkaj8,79065
-xinference/model/llm/llm_family.py,sha256=H5TYhzoZ5TwGWiQGmAwIz73yyJzTPqUX7bin2vdQFOQ,30496
-xinference/model/llm/llm_family_modelscope.json,sha256=-8gPXjh7625x_i8ms6Y8WbXnkESEfC_j6hyCWqKQLF4,43904
-xinference/model/llm/utils.py,sha256=kq3Ese0ItleT6J7VkXlkBT5DBKeq8RVIh_m6XZzC2nU,21933
+xinference/model/llm/llm_family.json,sha256=b4PAz9EbD5RjIbC3CR--FaPmDuupViK_ecJeDDJTpjg,80701
+xinference/model/llm/llm_family.py,sha256=6cqVSQsilvZIITwB0kUYIrkj1h8orH1uV1rO7XSYCf0,30989
+xinference/model/llm/llm_family_modelscope.json,sha256=JkaOILdduYXaAS_knca2lO075mUwx4kaOvYN9XCR250,45570
+xinference/model/llm/utils.py,sha256=QyJLOcnibkCNvJwxZhxbAJPWB_F3T9-90OQaOBDT39I,23028
 xinference/model/llm/ggml/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
 xinference/model/llm/ggml/chatglm.py,sha256=XJ6lyr4Kl6-u65ZxFtEjPbIcd0tTdudKXYoEWFoNXig,12555
 xinference/model/llm/ggml/ctransformers.py,sha256=n8dTItZe97cF79NkmsVPirqpBcrZiGAQfd2GRpz9-3I,9917
 xinference/model/llm/ggml/ctransformers_util.py,sha256=WozFJgJZlbuEDPQLhy31YmwGp-oJoUYsnd9HjuGraIE,5271
-xinference/model/llm/ggml/llamacpp.py,sha256=WozE6OCFc7OmAbFlqj6pwh10dfZePCRnNjL-QQ8IRpU,12182
+xinference/model/llm/ggml/llamacpp.py,sha256=aK4EU25ryGgCbY9HRFfAdFusg385MB_UA1OT-p8fFQs,12241
 xinference/model/llm/ggml/tools/__init__.py,sha256=6a6P2VPKE06xKxJ-dTqp4TRO2IEDWvtcVP6gHutAR0M,624
 xinference/model/llm/ggml/tools/convert_ggml_to_gguf.py,sha256=ULvaoAKGH-L6RuRLFOtAOVitKLVPdpd5QyXrLL14gG0,17959
 xinference/model/llm/ggml/tools/gguf.py,sha256=Hv2haR-UN7NdB1N8YId32hFoEPd-JX6_aUNWRJhyJZc,30277
 xinference/model/llm/pytorch/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
 xinference/model/llm/pytorch/baichuan.py,sha256=6d9UQw5Ox1FFdPG-cO3aKPcUmio5bQ4j1X5B0eCg5TU,2703
-xinference/model/llm/pytorch/chatglm.py,sha256=NfqT5bPyulO5xKoZs1oOe4f-zLtaMblUA1arkZXTDls,5294
+xinference/model/llm/pytorch/chatglm.py,sha256=2YCZL86ABFOjjoVXGxBwK9V80j71gSTfHAYEUveskyY,6991
 xinference/model/llm/pytorch/compression.py,sha256=rN8z0yt2xV9ASLv1muYStyBdvZjGCQL-OYaEga4FwIQ,8081
-xinference/model/llm/pytorch/core.py,sha256=3_hn0VpQ0Mhv-mVFd-4eKdvhLmI6dWCvlgd7AEZRlco,18447
+xinference/model/llm/pytorch/core.py,sha256=fE2Faags4qg3dwuDHAYShPYNeB-DyH6KbHmlDDdrF0A,17920
 xinference/model/llm/pytorch/falcon.py,sha256=wC3_ILUtoimtOWox2_Sr94OcEkMYoB7Mf84R4LYHMGI,4298
+xinference/model/llm/pytorch/internlm2.py,sha256=J9byaXpgPQ28zp8EEqz2hggX2d7bdmY5-e2Mp6SIeTU,5569
 xinference/model/llm/pytorch/llama_2.py,sha256=JQI9R_ZrNhTXg_MbS7el6P-ou49iMKq5vnr9QlQ_o70,3509
 xinference/model/llm/pytorch/spec_decoding_utils.py,sha256=t059oJ0kvcXMA1pKizN1HDzs0LMf5Jpb5MM7aMNKnzQ,18750
 xinference/model/llm/pytorch/spec_model.py,sha256=2rmSYaliu0nwOQzc3rnYFtBLFBayKZ9Xe2hkJUbe7mo,6810
-xinference/model/llm/pytorch/utils.py,sha256=EuWHjaB-Yh6yfJpH31Kj_D3UGgPgoumqg2SPd73nSbI,21948
+xinference/model/llm/pytorch/utils.py,sha256=zKpyeF7JV0sMv4jmwiYQGCxpk6VsbXtgOmnyksHUs8M,16870
 xinference/model/llm/pytorch/vicuna.py,sha256=eJ2HVHg-HbhVHRdA5g4N9I2_cCti4K9y3Y4WjMMzAIs,2217
 xinference/model/llm/vllm/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
-xinference/model/llm/vllm/core.py,sha256=Is7ECVzuzPQdZaHzw_pczwkON4jhUGSrfoM-8bxdD3k,14381
-xinference/model/multimodal/__init__.py,sha256=dVujhfm0TgUqfjuLzfJ42SOBELGx7Th_SAS9DecOpI4,1358
+xinference/model/llm/vllm/core.py,sha256=GwQ9eBlEV0NHsrxERx7j4usR3qZEElFkPoAgBIL1VnU,14966
+xinference/model/multimodal/__init__.py,sha256=EjP1dK0s-N5GyePkX4vauris7CfwsoT7Sb2E8OFoysM,1687
 xinference/model/multimodal/core.py,sha256=OupAMjRRc4UjhxWapVaXpymSl7gz1Ss4YTL_lTBO3wI,15429
 xinference/model/multimodal/model_spec.json,sha256=jPDB7Yhn13-e4h2lBm2yF0xQXIgv1cI6Jpto3AjCnto,1071
+xinference/model/multimodal/model_spec_modelscope.json,sha256=yf6naWPy28mVWanW8BMWlwL8gBmzFsMYCVLg2XhzYh8,1073
 xinference/model/multimodal/qwen_vl.py,sha256=DCk_AzDiM5eX7gSRCv1Y3IBRDCwAXEMcM-1jq_HUEEE,5490
 xinference/model/rerank/__init__.py,sha256=FEFQSLCNihIgUb28EAMxutVvpDTPmaf9o7Ey97ryAyA,2135
 xinference/model/rerank/core.py,sha256=M01V1Im2SGb0x8cff8LcB6sbM1mdwcfjxQtGXuHuH6E,11110
@@ -15291,9 +15295,9 @@ xinference/web/ui/node_modules/yargs-parser/package.json,sha256=BSwbOzgetKXMK4u0
 xinference/web/ui/node_modules/yocto-queue/package.json,sha256=6U1XHQPGXJTqsiFvT953ORihUtXTblZy4fXBWP9qxC0,725
 xinference/web/ui/node_modules/yup/package.json,sha256=xRFSROB9NKxqSWHEVFvSTsPs9Ll074uo8OS1zEw0qhA,1206
 xinference/web/ui/node_modules/yup/node_modules/type-fest/package.json,sha256=JTv2zTTVgxQ2H82m1-6qEpdMv08lHjFx4Puf_MsbB_Q,1134
-xinference-0.8.0.dist-info/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
-xinference-0.8.0.dist-info/METADATA,sha256=dohxoMBSP0AWK6Tf1UDkxBaVFkbv5nC3VZkIltCTUNM,13380
-xinference-0.8.0.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
-xinference-0.8.0.dist-info/entry_points.txt,sha256=-lDyyzqWMFQF0Rgm7VxBNz0V-bMBMQLRR3pvQ-Y8XTY,226
-xinference-0.8.0.dist-info/top_level.txt,sha256=L1rQt7pl6m8tmKXpWVHzP-GtmzAxp663rXxGE7qnK00,11
-xinference-0.8.0.dist-info/RECORD,,
+xinference-0.8.1.dist-info/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
+xinference-0.8.1.dist-info/METADATA,sha256=ArHMcySJgMVpoXrhiu5nfdcLvV0uscZcC7kq7c_Ky9c,13420
+xinference-0.8.1.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
+xinference-0.8.1.dist-info/entry_points.txt,sha256=-lDyyzqWMFQF0Rgm7VxBNz0V-bMBMQLRR3pvQ-Y8XTY,226
+xinference-0.8.1.dist-info/top_level.txt,sha256=L1rQt7pl6m8tmKXpWVHzP-GtmzAxp663rXxGE7qnK00,11
+xinference-0.8.1.dist-info/RECORD,,

{xinference-0.8.0.dist-info → xinference-0.8.1.dist-info}/LICENSE RENAMED Viewed

File without changes

{xinference-0.8.0.dist-info → xinference-0.8.1.dist-info}/WHEEL RENAMED Viewed

File without changes

{xinference-0.8.0.dist-info → xinference-0.8.1.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{xinference-0.8.0.dist-info → xinference-0.8.1.dist-info}/top_level.txt RENAMED Viewed

File without changes

xinference 0.8.0__py3-none-any.whl → 0.8.1__py3-none-any.whl

Potentially problematic release.

xinference 0.8.0py3-none-any.whl → 0.8.1py3-none-any.whl