xinference 0.11.2.post1__py3-none-any.whl → 0.11.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

@@ -73,7 +73,8 @@ class BaichuanPytorchChatModel(PytorchChatModel):
73
73
  ) -> bool:
74
74
  if llm_spec.model_format != "pytorch":
75
75
  return False
76
- if llm_family.model_name not in ["baichuan-chat", "baichuan-2-chat"]:
76
+ model_family = llm_family.model_family or llm_family.model_name
77
+ if model_family not in ["baichuan-chat", "baichuan-2-chat"]:
77
78
  return False
78
79
  if "chat" not in llm_family.model_ability:
79
80
  return False
@@ -0,0 +1,257 @@
1
+ # Copyright 2022-2023 XProbe Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ import base64
15
+ import logging
16
+ import time
17
+ import uuid
18
+ from concurrent.futures import ThreadPoolExecutor
19
+ from io import BytesIO
20
+ from typing import Dict, Iterator, List, Optional, Tuple, Union
21
+
22
+ import requests
23
+ import torch
24
+ from PIL import Image
25
+
26
+ from ....model.utils import select_device
27
+ from ....types import (
28
+ ChatCompletion,
29
+ ChatCompletionChunk,
30
+ ChatCompletionMessage,
31
+ Completion,
32
+ CompletionChoice,
33
+ CompletionUsage,
34
+ )
35
+ from ..llm_family import LLMFamilyV1, LLMSpecV1
36
+ from .core import PytorchChatModel, PytorchGenerateConfig
37
+
38
+ logger = logging.getLogger(__name__)
39
+
40
+ IMAGENET_MEAN = (0.485, 0.456, 0.406)
41
+ IMAGENET_STD = (0.229, 0.224, 0.225)
42
+
43
+
44
+ class CogVLM2Model(PytorchChatModel):
45
+ def __init__(self, *args, **kwargs):
46
+ super().__init__(*args, **kwargs)
47
+ self._torch_type = None
48
+ self._device = None
49
+ self._tokenizer = None
50
+ self._model = None
51
+
52
+ @classmethod
53
+ def match(
54
+ cls, model_family: "LLMFamilyV1", model_spec: "LLMSpecV1", quantization: str
55
+ ) -> bool:
56
+ family = model_family.model_family or model_family.model_name
57
+ if "cogvlm" in family.lower():
58
+ return True
59
+ return False
60
+
61
+ def load(self, **kwargs):
62
+ from transformers import AutoModelForCausalLM, AutoTokenizer
63
+ from transformers.generation import GenerationConfig
64
+
65
+ device = self._pytorch_model_config.get("device", "auto")
66
+ self._device = select_device(device)
67
+ self._torch_type = (
68
+ torch.bfloat16
69
+ if torch.cuda.is_available() and torch.cuda.get_device_capability()[0] >= 8
70
+ else torch.float16
71
+ )
72
+
73
+ self._tokenizer = AutoTokenizer.from_pretrained(
74
+ self.model_path,
75
+ trust_remote_code=True,
76
+ )
77
+
78
+ self._model = AutoModelForCausalLM.from_pretrained(
79
+ self.model_path,
80
+ torch_dtype=self._torch_type,
81
+ trust_remote_code=True,
82
+ low_cpu_mem_usage=True,
83
+ device_map="auto",
84
+ ).eval()
85
+
86
+ # Specify hyperparameters for generation
87
+ self._model.generation_config = GenerationConfig.from_pretrained(
88
+ self.model_path,
89
+ trust_remote_code=True,
90
+ )
91
+
92
+ def _message_content_to_cogvlm2(self, content):
93
+ def _load_image(_url):
94
+ if _url.startswith("data:"):
95
+ logging.info("Parse url by base64 decoder.")
96
+ # https://platform.openai.com/docs/guides/vision/uploading-base-64-encoded-images
97
+ # e.g. f"data:image/jpeg;base64,{base64_image}"
98
+ _type, data = _url.split(";")
99
+ _, ext = _type.split("/")
100
+ data = data[len("base64,") :]
101
+ data = base64.b64decode(data.encode("utf-8"))
102
+ return Image.open(BytesIO(data)).convert("RGB")
103
+ else:
104
+ try:
105
+ response = requests.get(_url)
106
+ except requests.exceptions.MissingSchema:
107
+ return Image.open(_url).convert("RGB")
108
+ else:
109
+ return Image.open(BytesIO(response.content)).convert("RGB")
110
+
111
+ if not isinstance(content, str):
112
+ texts = []
113
+ image_urls = []
114
+ for c in content:
115
+ c_type = c.get("type")
116
+ if c_type == "text":
117
+ texts.append(c["text"])
118
+ elif c_type == "image_url":
119
+ image_urls.append(c["image_url"]["url"])
120
+ image_futures = []
121
+ with ThreadPoolExecutor() as executor:
122
+ for image_url in image_urls:
123
+ fut = executor.submit(_load_image, image_url)
124
+ image_futures.append(fut)
125
+ images = [fut.result() for fut in image_futures]
126
+ text = " ".join(texts)
127
+ if len(images) == 0:
128
+ return text, None
129
+ elif len(images) == 1:
130
+ return text, images
131
+ else:
132
+ raise RuntimeError(
133
+ "Only one image per message is supported by CogVLM2."
134
+ )
135
+ return content, None
136
+
137
+ def _history_content_to_cogvlm2(
138
+ self, system_prompt: str, chat_history: List[ChatCompletionMessage]
139
+ ):
140
+ def _image_to_piexl_values(image):
141
+ if image.startswith("data:"):
142
+ logging.info("Parse url by base64 decoder.")
143
+ # https://platform.openai.com/docs/guides/vision/uploading-base-64-encoded-images
144
+ # e.g. f"data:image/jpeg;base64,{base64_image}"
145
+ _type, data = image.split(";")
146
+ _, ext = _type.split("/")
147
+ data = data[len("base64,") :]
148
+ data = base64.b64decode(data.encode("utf-8"))
149
+ return Image.open(BytesIO(data)).convert("RGB")
150
+ else:
151
+ try:
152
+ response = requests.get(image)
153
+ except requests.exceptions.MissingSchema:
154
+ return Image.open(image).convert("RGB")
155
+ else:
156
+ return Image.open(BytesIO(response.content)).convert("RGB")
157
+
158
+ query = system_prompt
159
+ history: List[Tuple] = []
160
+ pixel_values = None
161
+ for i in range(0, len(chat_history), 2):
162
+ user = chat_history[i]["content"]
163
+ if isinstance(user, List):
164
+ for content in user:
165
+ c_type = content.get("type")
166
+ if c_type == "text":
167
+ user = content["text"]
168
+ elif c_type == "image_url" and not pixel_values:
169
+ pixel_values = _image_to_piexl_values(
170
+ content["image_url"]["url"]
171
+ )
172
+ assistant = chat_history[i + 1]["content"]
173
+ query = query + f" USER: {user} ASSISTANT:"
174
+ history.append((query, assistant))
175
+ query = query + f" {assistant}"
176
+ return query, history, [pixel_values]
177
+
178
+ def chat(
179
+ self,
180
+ prompt: Union[str, List[Dict]],
181
+ system_prompt: Optional[str] = None,
182
+ chat_history: Optional[List[ChatCompletionMessage]] = None,
183
+ generate_config: Optional[PytorchGenerateConfig] = None,
184
+ ) -> Union[ChatCompletion, Iterator[ChatCompletionChunk]]:
185
+ system_prompt = system_prompt if system_prompt else ""
186
+ if generate_config and generate_config.get("stream"):
187
+ raise Exception(
188
+ f"Chat with model {self.model_family.model_name} does not support stream."
189
+ )
190
+
191
+ sanitized_config = {
192
+ "pad_token_id": 128002,
193
+ "max_new_tokens": generate_config.get("max_tokens", 512)
194
+ if generate_config
195
+ else 512,
196
+ }
197
+
198
+ content, image = self._message_content_to_cogvlm2(prompt)
199
+
200
+ history = []
201
+ query = ""
202
+ history_image = None
203
+ if chat_history:
204
+ query, history, history_image = self._history_content_to_cogvlm2(
205
+ system_prompt, chat_history
206
+ )
207
+
208
+ if image and history_image:
209
+ history = []
210
+ query = system_prompt + f" USER: {content} ASSISTANT:"
211
+ else:
212
+ image = image if image else history_image
213
+ query = query + f" USER: {content} ASSISTANT:"
214
+
215
+ input_by_model = self._model.build_conversation_input_ids(
216
+ self._tokenizer,
217
+ query=query,
218
+ history=history,
219
+ images=image,
220
+ template_version="chat",
221
+ )
222
+
223
+ inputs = {
224
+ "input_ids": input_by_model["input_ids"].unsqueeze(0).to(self._device),
225
+ "token_type_ids": input_by_model["token_type_ids"]
226
+ .unsqueeze(0)
227
+ .to(self._device),
228
+ "attention_mask": input_by_model["attention_mask"]
229
+ .unsqueeze(0)
230
+ .to(self._device),
231
+ "images": [
232
+ [input_by_model["images"][0].to(self._device).to(self._torch_type)]
233
+ ]
234
+ if image is not None
235
+ else None,
236
+ }
237
+ with torch.no_grad():
238
+ outputs = self._model.generate(**inputs, **sanitized_config)
239
+ outputs = outputs[:, inputs["input_ids"].shape[1] :]
240
+ response = self._tokenizer.decode(outputs[0])
241
+ response = response.split("<|end_of_text|>")[0]
242
+
243
+ chunk = Completion(
244
+ id=str(uuid.uuid1()),
245
+ object="text_completion",
246
+ created=int(time.time()),
247
+ model=self.model_uid,
248
+ choices=[
249
+ CompletionChoice(
250
+ index=0, text=response, finish_reason="stop", logprobs=None
251
+ )
252
+ ],
253
+ usage=CompletionUsage(
254
+ prompt_tokens=-1, completion_tokens=-1, total_tokens=-1
255
+ ),
256
+ )
257
+ return self._to_chat_completion(chunk)
@@ -62,6 +62,7 @@ NON_DEFAULT_MODEL_LIST: List[str] = [
62
62
  "deepseek-vl-chat",
63
63
  "internvl-chat",
64
64
  "mini-internvl-chat",
65
+ "cogvlm2",
65
66
  ]
66
67
 
67
68
 
@@ -77,19 +77,14 @@ class InternVLChatModel(PytorchChatModel):
77
77
  "device_map": device,
78
78
  }
79
79
 
80
- if "Int8" in self.model_spec.quantizations:
81
- kwargs.update(
82
- {
83
- "load_in_8bit": True,
84
- "device_map": device,
85
- }
86
- )
87
- elif "mini" in self.model_family.model_name:
80
+ if "int8" in self.quantization.lower():
81
+ kwargs["load_in_8bit"] = True
82
+ elif 2 == self.model_spec.model_size_in_billions:
88
83
  kwargs.pop("device_map")
89
84
 
90
85
  self._model = AutoModel.from_pretrained(self.model_path, **kwargs).eval()
91
86
 
92
- if "Int8" not in self.model_spec.quantizations:
87
+ if "int8" not in self.quantization.lower():
93
88
  self._model.cuda()
94
89
 
95
90
  # Specify hyperparameters for generation
@@ -298,7 +293,7 @@ class InternVLChatModel(PytorchChatModel):
298
293
  chat_history: Optional[List[ChatCompletionMessage]] = None,
299
294
  generate_config: Optional[PytorchGenerateConfig] = None,
300
295
  ) -> Union[ChatCompletion, Iterator[ChatCompletionChunk]]:
301
- if generate_config and generate_config.pop("stream"):
296
+ if generate_config and generate_config.get("stream"):
302
297
  raise Exception(
303
298
  f"Chat with model {self.model_family.model_name} does not support stream."
304
299
  )
@@ -114,6 +114,7 @@ VLLM_SUPPORTED_CHAT_MODELS = [
114
114
  "qwen-chat",
115
115
  "Yi-chat",
116
116
  "Yi-1.5-chat",
117
+ "Yi-1.5-chat-16k",
117
118
  "code-llama-instruct",
118
119
  "mistral-instruct-v0.1",
119
120
  "mistral-instruct-v0.2",
@@ -140,7 +141,6 @@ if VLLM_INSTALLED and vllm.__version__ >= "0.3.3":
140
141
  if VLLM_INSTALLED and vllm.__version__ >= "0.4.0":
141
142
  VLLM_SUPPORTED_CHAT_MODELS.append("qwen1.5-moe-chat")
142
143
  VLLM_SUPPORTED_CHAT_MODELS.append("c4ai-command-r-v01")
143
- VLLM_SUPPORTED_CHAT_MODELS.append("c4ai-command-r-v01-4bit")
144
144
 
145
145
 
146
146
  class VLLMModel(LLM):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: xinference
3
- Version: 0.11.2.post1
3
+ Version: 0.11.3
4
4
  Summary: Model Serving Made Easy
5
5
  Home-page: https://github.com/xorbitsai/inference
6
6
  Author: Qin Xuye
@@ -177,12 +177,12 @@ potential of cutting-edge AI models.
177
177
  - Docker image: [#855](https://github.com/xorbitsai/inference/pull/855)
178
178
  - Support multimodal: [#829](https://github.com/xorbitsai/inference/pull/829)
179
179
  ### New Models
180
+ - Built-in support for [CogVLM2](https://github.com/THUDM/CogVLM2): [#1551](https://github.com/xorbitsai/inference/pull/1551)
181
+ - Built-in support for [InternVL-Chat-V1-5](https://github.com/OpenGVLab/InternVL): [#1536](https://github.com/xorbitsai/inference/pull/1536)
182
+ - Built-in support for [Yi-1.5](https://github.com/01-ai/Yi-1.5): [#1489](https://github.com/xorbitsai/inference/pull/1489)
180
183
  - Built-in support for [Llama 3](https://github.com/meta-llama/llama3): [#1332](https://github.com/xorbitsai/inference/pull/1332)
181
184
  - Built-in support for [Qwen1.5 110B](https://huggingface.co/Qwen/Qwen1.5-110B-Chat): [#1388](https://github.com/xorbitsai/inference/pull/1388)
182
185
  - Built-in support for [Mixtral-8x22B-instruct-v0.1](https://huggingface.co/mistralai/Mixtral-8x22B-Instruct-v0.1): [#1340](https://github.com/xorbitsai/inference/pull/1340)
183
- - Built-in support for [Command-R](https://huggingface.co/CohereForAI/c4ai-command-r-v01): [#1310](https://github.com/xorbitsai/inference/pull/1310)
184
- - Built-in support for [Qwen1.5 MOE](https://huggingface.co/Qwen/Qwen1.5-MoE-A2.7B-Chat): [#1263](https://github.com/xorbitsai/inference/pull/1263)
185
- - Built-in support for [Qwen1.5 32B](https://huggingface.co/Qwen/Qwen1.5-32B-Chat): [#1249](https://github.com/xorbitsai/inference/pull/1249)
186
186
  ### Integrations
187
187
  - [Dify](https://docs.dify.ai/advanced/model-configuration/xinference): an LLMOps platform that enables developers (and even non-developers) to quickly build useful applications based on large language models, ensuring they are visual, operable, and improvable.
188
188
  - [FastGPT](https://github.com/labring/FastGPT): a knowledge-based platform built on the LLM, offers out-of-the-box data processing and model invocation capabilities, allows for workflow orchestration through Flow visualization.
@@ -1,15 +1,15 @@
1
1
  xinference/__init__.py,sha256=0LgIveLP6CXxoIaSrxhlFyOh0lOqPgJBVcBe0tkWJjc,987
2
2
  xinference/_compat.py,sha256=SQAjZMGxtBIce45qtW7ob7RWzA0zhv2yB3AxT0rb0uU,1778
3
- xinference/_version.py,sha256=mDJV0U5DkMVMiU2azZlEJ4w5LINeGsZFYONO9K3d4d0,504
3
+ xinference/_version.py,sha256=3PbaSmy-dqQ13cZaaCK5ch6akdnc3CpE14f0GcuqBi8,498
4
4
  xinference/conftest.py,sha256=Qus4KWCeaKS7c5UgNCTpPNucD2bjV8P7u1_qRosgGno,9743
5
- xinference/constants.py,sha256=Bu_fOJUGAvvqF_6FY5OzOHl7fQ1Nomek3LY17xr9oz4,2882
5
+ xinference/constants.py,sha256=SIUOiSDpDIcZeuOE8kpF4NgUpCYGrHFIFjv4ogi5SAE,3040
6
6
  xinference/device_utils.py,sha256=zswJiws3VyTIaNO8z-MOcsJH_UiPoePPiKK5zoNrjTA,3285
7
7
  xinference/fields.py,sha256=0UtBFaDNzn1n9MRjyTkNrolsIML-TpZfudWOejqjni8,5245
8
8
  xinference/isolation.py,sha256=NstVRcO3dG4umHExICXAHlzVKwH8ch8MBwKwE-KFkE0,1826
9
9
  xinference/types.py,sha256=BFKUGHb0jKkAA1dczSf8pPlFutRE7-JtRp6C3oVSJ7Q,13626
10
10
  xinference/utils.py,sha256=VSOJMFd9H7kce98OtJZbcDjjpfzRpHAFs8WU0xXPBM8,717
11
11
  xinference/api/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
12
- xinference/api/restful_api.py,sha256=Wj1LWFTQiwH7FEheaG1pA4wpVOCtWHnbTdRHUa6Qcek,60530
12
+ xinference/api/restful_api.py,sha256=iuyxeNxwJHjEH5eSLCjoi1bZRmJFsG0j7pFI2ytQWjw,60830
13
13
  xinference/api/oauth2/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
14
14
  xinference/api/oauth2/auth_service.py,sha256=74JzB42fbbmBu4Q1dW3A9Fp_N7167KgRGB42Z0NHjAM,6119
15
15
  xinference/api/oauth2/types.py,sha256=K923sv_XySIUtM2Eozl9IG082IJcDOS5SFLrPZ5ELBg,996
@@ -19,18 +19,18 @@ xinference/client/common.py,sha256=iciZRs5YjM2gYsXnwACPMaiBZp4_XpawWwfym0Iyu40,1
19
19
  xinference/client/handlers.py,sha256=3gd9C7u4URbcVdR6Eyv8cpEZ175Ll4q_jGL07CnEIpg,648
20
20
  xinference/client/restful/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
21
21
  xinference/client/restful/restful_client.py,sha256=zqfZHliUeiDHvKKo1SrXH3rAvyxp0o_TaMwuCjV2pjQ,43589
22
- xinference/core/__init__.py,sha256=Fe5tYCHDbYJ7PhxJhQ68VbfgKgOsAuslNPr4wPhFMJM,612
23
- xinference/core/cache_tracker.py,sha256=OMY_0HZkb-R5EhHHWppIGqPhC3v9Iwdjp9oema_6eFc,4798
24
- xinference/core/chat_interface.py,sha256=B-qG7RF7HOquhKaPAJSnHA3Dqov5QZQA2yO-txt1IPs,17380
22
+ xinference/core/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
23
+ xinference/core/cache_tracker.py,sha256=xlhHwWsSLIo743r5U5l69b_rti_7Way-kGYzsunzs9s,5846
24
+ xinference/core/chat_interface.py,sha256=7SOm6Qi-iFh1otycHpn6CpISq2wTLlJzEUngJtOwMIk,19558
25
25
  xinference/core/event.py,sha256=dTXv-zg-sAqlY1rFLvyor9D8WEXZvnUH7NigegpziO8,1648
26
26
  xinference/core/image_interface.py,sha256=G2iK24auEN4MrLkPlu1CAA_gf-BQrGQTjazi_FYqIxE,8825
27
27
  xinference/core/metrics.py,sha256=ScmTG15Uq3h_ob72ybZSMWdnk8P4sUZFcm60f4ikSXc,2631
28
28
  xinference/core/model.py,sha256=KAD9a2fYY97MfByNRnmia2oZR-UK7B-zW8gWl_tUJgs,18121
29
29
  xinference/core/resource.py,sha256=FQ0aRt3T4ZQo0P6CZZf5QUKHiCsr5llBvKb1f7wfnxg,1611
30
30
  xinference/core/status_guard.py,sha256=fF5hisvfn6es9DV6Z6RRD6V_S_uLcb8lHM6PArGgb04,2820
31
- xinference/core/supervisor.py,sha256=bREFDEeO6rF9hyWOuf9mA6IHo6Ur2BDGgPmnuysaN9o,41969
31
+ xinference/core/supervisor.py,sha256=4UoXuzrDNk5P1eteUjOlXwdm8W5JFgUTn8kwSmxlROw,42076
32
32
  xinference/core/utils.py,sha256=LqPrez5dGELRQDSwOD5EP8XHb-aUKAdyszS-QpNouuw,6401
33
- xinference/core/worker.py,sha256=oGsqHQ5yJ4Ry-tAL06x6jkLxkQKbj0LxWtvvOy-SRdM,32780
33
+ xinference/core/worker.py,sha256=bf5FtsVWMxQmQNmK0hTsC27CLU6ByGC02eCyrnJg4og,32999
34
34
  xinference/deploy/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
35
35
  xinference/deploy/cmdline.py,sha256=J1NSfnQ-iJ87OPb_kNOfONgMZNqCow1Klm91uLS4_Lk,44582
36
36
  xinference/deploy/local.py,sha256=vlAvhcl8utP1DjW4MJpBgD4JLHQV-1Xebmdd8j9M8IM,3946
@@ -65,11 +65,11 @@ xinference/model/image/model_spec_modelscope.json,sha256=KMI-2YOJoLUtN93LZiqqETW
65
65
  xinference/model/image/utils.py,sha256=gxg8jJ2nYaDknzCcSC53WCy1slbB5aWU14AbJbfm6Z4,906
66
66
  xinference/model/image/stable_diffusion/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
67
67
  xinference/model/image/stable_diffusion/core.py,sha256=ib_ZeSg7hzynmRqSnhjtrVuhoLOgZPrR1ZH2LjBmH2E,6063
68
- xinference/model/llm/__init__.py,sha256=kl2z63rNpgkoFQcg536SUt4TKub2wuDB8c7weYKeeoU,9874
68
+ xinference/model/llm/__init__.py,sha256=AR36qg9QKRb3HtGauEGeFveFpg9xgOY0vJvJVHcJxkw,9946
69
69
  xinference/model/llm/core.py,sha256=CZJrbW3HaV8vJj5mxSnBD8ssABALG_xyXyPS0hHVBPY,7982
70
- xinference/model/llm/llm_family.json,sha256=otMgqAExkRqPC0yV_si0yc6sNhPEIxQjDrSg31Er9F8,152586
70
+ xinference/model/llm/llm_family.json,sha256=ej30g6j2_jYHTFMj5_hi2ERbs-d5KUdSW47lKU0tjMo,160208
71
71
  xinference/model/llm/llm_family.py,sha256=aQZPE2gj5YfII2mhgbaSs1MfllKnUXd8xbflKr_YdFs,37357
72
- xinference/model/llm/llm_family_modelscope.json,sha256=5cyM-y0HtG_fBfC289IU97iasfjZZgUefn5UmVmg7TY,91282
72
+ xinference/model/llm/llm_family_modelscope.json,sha256=f72zwrQfBJzijQwTD1yIy6KPInuyddKE44Iy8arMo-A,98156
73
73
  xinference/model/llm/memory.py,sha256=PTD8m6TCZVU1zrwc9wepX9cUjCqAXBENj6X7tjua0to,10207
74
74
  xinference/model/llm/utils.py,sha256=0SindpTW6dUWn17E-Ne9scnSfPOovb53sIuc9zxIFfo,31653
75
75
  xinference/model/llm/ggml/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
@@ -79,13 +79,14 @@ xinference/model/llm/ggml/tools/__init__.py,sha256=6a6P2VPKE06xKxJ-dTqp4TRO2IEDW
79
79
  xinference/model/llm/ggml/tools/convert_ggml_to_gguf.py,sha256=92To8eoVQBkDZD52_aWBNda2K1Ob6YaHlcfS-8_aOuw,17991
80
80
  xinference/model/llm/ggml/tools/gguf.py,sha256=Hv2haR-UN7NdB1N8YId32hFoEPd-JX6_aUNWRJhyJZc,30277
81
81
  xinference/model/llm/pytorch/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
82
- xinference/model/llm/pytorch/baichuan.py,sha256=JBHldfdjUlSV44HB_td6402CwryDm61rlzw4D_flKHI,2820
82
+ xinference/model/llm/pytorch/baichuan.py,sha256=TpCfi37Ou87VA8S5siGRUK5JWC5SjGbZYkZoTe9VSA8,2883
83
83
  xinference/model/llm/pytorch/chatglm.py,sha256=meFb4P1MXw63IhoDQwmFGczTXa3vEpv1L1Eo9XIifG8,9624
84
+ xinference/model/llm/pytorch/cogvlm2.py,sha256=hRFsNQIgvNa7Dfhh08Z2K3QuKzXop_fkkXtbXwUaKWg,9472
84
85
  xinference/model/llm/pytorch/compression.py,sha256=U0vMJ-JaBt4oC2LffgWg6HbPj1CeUi_YdwVbjDd0mRA,8112
85
- xinference/model/llm/pytorch/core.py,sha256=VGJfZ92e3SftQXrOwxiYMUSF1YnWSUU8Z8WHr6yzpJ8,19984
86
+ xinference/model/llm/pytorch/core.py,sha256=qyAd2UdWq1PFJgFBksMGDouFxbksdr7AmM2bW8TsWEU,19999
86
87
  xinference/model/llm/pytorch/deepseek_vl.py,sha256=T9DKP4cULvRaHSiU08lOWd_j6mt8b3ZIBByneZ0jY8U,11498
87
88
  xinference/model/llm/pytorch/falcon.py,sha256=POSP7vzRJaM5PjvX8dh60jNDXgnCwktwSmeZ7kypQU0,4499
88
- xinference/model/llm/pytorch/intern_vl.py,sha256=MlP7vcp0qu7ehSg3Z7_qe18aiepi3KKjN9N9P-qVTwM,13166
89
+ xinference/model/llm/pytorch/intern_vl.py,sha256=_7IxqtTODTOTfR_4BDQ1hSkgE_qLjAhdyNt7fz_AD20,13044
89
90
  xinference/model/llm/pytorch/internlm2.py,sha256=vjspoc2VHbuD1JaUtjt0sOq9MwvRr2OD3_tKQhBVUPc,7244
90
91
  xinference/model/llm/pytorch/llama_2.py,sha256=HMhUmn4oYW2maeSMIr1yY7jlAOMD0OVAxnF0dnRWmio,3710
91
92
  xinference/model/llm/pytorch/omnilmm.py,sha256=4r6pipch1LU1FPA80sOCE7Z0k3TO_J8CIT7pmVmWKEM,5664
@@ -96,7 +97,7 @@ xinference/model/llm/pytorch/yi_vl.py,sha256=MljT7tpgFIhL6n5rdoS3hmq_u0rtHRE6cxX
96
97
  xinference/model/llm/sglang/__init__.py,sha256=-sjSIQ4K6w-TEzx49kVaWeWC443fnZqODU91GCQ_JNo,581
97
98
  xinference/model/llm/sglang/core.py,sha256=RGHy6t9n0c4zL6Uha8P7t-qPvisPyulFVHw-8Aq8CJ0,14046
98
99
  xinference/model/llm/vllm/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
99
- xinference/model/llm/vllm/core.py,sha256=_cM-NEnDtpGVHMgHxr-uxV0sA-xXUjDsL5i__cpHQEU,21639
100
+ xinference/model/llm/vllm/core.py,sha256=a__R1e3V_PwEOtky5NxVIMKWLo4eVX8S4BnSVWM5uAs,21597
100
101
  xinference/model/rerank/__init__.py,sha256=BXIL1uu3ZpZHX9bODhW9lxKUXudZE7-OkXFmmM5rpMU,2817
101
102
  xinference/model/rerank/core.py,sha256=BLIIStjxUFghSFoxCimet88ghqGwmVaskOYdVRxKdpI,10572
102
103
  xinference/model/rerank/custom.py,sha256=NKk7jA7p4xkuwS5WoOs2SY2wdnoAVpyCjBTvv317bBw,3917
@@ -15423,9 +15424,9 @@ xinference/web/ui/node_modules/yargs-parser/package.json,sha256=BSwbOzgetKXMK4u0
15423
15424
  xinference/web/ui/node_modules/yocto-queue/package.json,sha256=6U1XHQPGXJTqsiFvT953ORihUtXTblZy4fXBWP9qxC0,725
15424
15425
  xinference/web/ui/node_modules/yup/package.json,sha256=xRFSROB9NKxqSWHEVFvSTsPs9Ll074uo8OS1zEw0qhA,1206
15425
15426
  xinference/web/ui/node_modules/yup/node_modules/type-fest/package.json,sha256=JTv2zTTVgxQ2H82m1-6qEpdMv08lHjFx4Puf_MsbB_Q,1134
15426
- xinference-0.11.2.post1.dist-info/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
15427
- xinference-0.11.2.post1.dist-info/METADATA,sha256=gwgZIL72lulJsk-4NasdYTQc5vxs5ibTFRjrf9TMgJc,15548
15428
- xinference-0.11.2.post1.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
15429
- xinference-0.11.2.post1.dist-info/entry_points.txt,sha256=-lDyyzqWMFQF0Rgm7VxBNz0V-bMBMQLRR3pvQ-Y8XTY,226
15430
- xinference-0.11.2.post1.dist-info/top_level.txt,sha256=L1rQt7pl6m8tmKXpWVHzP-GtmzAxp663rXxGE7qnK00,11
15431
- xinference-0.11.2.post1.dist-info/RECORD,,
15427
+ xinference-0.11.3.dist-info/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
15428
+ xinference-0.11.3.dist-info/METADATA,sha256=Hp7uQ0rgqOkdwWD1T7U3KHMijVdeISct2OvSYNSq4EY,15495
15429
+ xinference-0.11.3.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
15430
+ xinference-0.11.3.dist-info/entry_points.txt,sha256=-lDyyzqWMFQF0Rgm7VxBNz0V-bMBMQLRR3pvQ-Y8XTY,226
15431
+ xinference-0.11.3.dist-info/top_level.txt,sha256=L1rQt7pl6m8tmKXpWVHzP-GtmzAxp663rXxGE7qnK00,11
15432
+ xinference-0.11.3.dist-info/RECORD,,