xinference 1.0.0__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

@@ -0,0 +1,230 @@
1
+ # Copyright 2022-2023 XProbe Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ import logging
15
+ import uuid
16
+ from concurrent.futures import ThreadPoolExecutor
17
+ from threading import Thread
18
+ from typing import Any, Dict, Iterator, List, Optional, Tuple, Union
19
+
20
+ import torch
21
+
22
+ from ....types import ChatCompletion, ChatCompletionChunk, CompletionChunk
23
+ from ...utils import select_device
24
+ from ..llm_family import LLMFamilyV1, LLMSpecV1
25
+ from ..utils import (
26
+ _decode_image_without_rgb,
27
+ generate_chat_completion,
28
+ generate_completion_chunk,
29
+ )
30
+ from .core import PytorchChatModel, PytorchGenerateConfig
31
+ from .utils import cache_clean
32
+
33
+ logger = logging.getLogger(__name__)
34
+
35
+
36
+ class GlmEdgeVModel(PytorchChatModel):
37
+ def __init__(self, *args, **kwargs):
38
+ super().__init__(*args, **kwargs)
39
+ self._device = None
40
+ self._tokenizer = None
41
+ self._model = None
42
+ self._processor = None
43
+
44
+ @classmethod
45
+ def match(
46
+ cls, model_family: "LLMFamilyV1", model_spec: "LLMSpecV1", quantization: str
47
+ ) -> bool:
48
+ family = model_family.model_family or model_family.model_name
49
+ if "glm-edge-v" in family.lower():
50
+ return True
51
+ return False
52
+
53
+ def load(self):
54
+ from transformers import AutoImageProcessor, AutoModelForCausalLM, AutoTokenizer
55
+
56
+ device = self._pytorch_model_config.get("device", "auto")
57
+ self._device = select_device(device)
58
+
59
+ kwargs = {"device_map": self._device}
60
+ quantization = self.quantization
61
+
62
+ # referenced from PytorchModel.load
63
+ if quantization != "none":
64
+ if self._device == "cuda" and self._is_linux():
65
+ kwargs["device_map"] = "auto"
66
+ if quantization == "4-bit":
67
+ kwargs["load_in_4bit"] = True
68
+ elif quantization == "8-bit":
69
+ kwargs["load_in_8bit"] = True
70
+ else:
71
+ raise ValueError(
72
+ f"Quantization {quantization} is not supported in temporary"
73
+ )
74
+ else:
75
+ if quantization != "8-bit":
76
+ raise ValueError(
77
+ f"Only 8-bit quantization is supported if it is not linux system or cuda device"
78
+ )
79
+
80
+ processor = AutoImageProcessor.from_pretrained(
81
+ self.model_path, trust_remote_code=True
82
+ )
83
+ self._processor = processor
84
+
85
+ model = AutoModelForCausalLM.from_pretrained(
86
+ self.model_path,
87
+ trust_remote_code=True,
88
+ torch_dtype=torch.bfloat16,
89
+ device_map="auto",
90
+ )
91
+
92
+ self._model = model
93
+
94
+ tokenizer = AutoTokenizer.from_pretrained(
95
+ self.model_path, trust_remote_code=True
96
+ )
97
+ self._tokenizer = tokenizer
98
+
99
+ @staticmethod
100
+ def _get_processed_msgs(
101
+ messages: List[Dict],
102
+ ) -> Tuple[List[Dict[str, Any]], List[Any]]:
103
+ res = []
104
+ img = []
105
+ for message in messages:
106
+ role = message["role"]
107
+ content = message["content"]
108
+ if isinstance(content, str):
109
+ res.append({"role": role, "content": content})
110
+ else:
111
+ texts = []
112
+ image_urls = []
113
+ for c in content:
114
+ c_type = c.get("type")
115
+ if c_type == "text":
116
+ texts.append(c["text"])
117
+ else:
118
+ assert (
119
+ c_type == "image_url"
120
+ ), "Please follow the image input of the OpenAI API."
121
+ image_urls.append(c["image_url"]["url"])
122
+ if len(image_urls) > 1:
123
+ raise RuntimeError("Only one image per message is supported")
124
+ image_futures = []
125
+ with ThreadPoolExecutor() as executor:
126
+ for image_url in image_urls:
127
+ fut = executor.submit(_decode_image_without_rgb, image_url)
128
+ image_futures.append(fut)
129
+ images = [fut.result() for fut in image_futures]
130
+ assert len(images) <= 1
131
+ text = " ".join(texts)
132
+ img.extend(images)
133
+ if images:
134
+ res.append(
135
+ {
136
+ "role": role,
137
+ "content": [
138
+ {"type": "image"},
139
+ {"type": "text", "text": text},
140
+ ],
141
+ }
142
+ )
143
+ else:
144
+ res.append({"role": role, "content": text})
145
+ return res, img
146
+
147
+ @cache_clean
148
+ def chat(
149
+ self,
150
+ messages: List[Dict],
151
+ generate_config: Optional[PytorchGenerateConfig] = None,
152
+ ) -> Union[ChatCompletion, Iterator[ChatCompletionChunk]]:
153
+ from transformers import TextIteratorStreamer
154
+
155
+ if not generate_config:
156
+ generate_config = {}
157
+
158
+ stream = generate_config.get("stream", False)
159
+ msgs, imgs = self._get_processed_msgs(messages)
160
+
161
+ inputs = self._tokenizer.apply_chat_template(
162
+ msgs,
163
+ add_generation_prompt=True,
164
+ tokenize=True,
165
+ return_tensors="pt",
166
+ return_dict=True,
167
+ ) # chat mode
168
+ inputs = inputs.to(self._model.device)
169
+
170
+ generate_kwargs = {
171
+ **inputs,
172
+ }
173
+ if len(imgs) > 0:
174
+ generate_kwargs["pixel_values"] = torch.tensor(
175
+ self._processor(imgs[-1]).pixel_values
176
+ ).to(self._model.device)
177
+ stop_str = "<|endoftext|>"
178
+
179
+ if stream:
180
+ streamer = TextIteratorStreamer(
181
+ tokenizer=self._tokenizer,
182
+ timeout=60,
183
+ skip_prompt=True,
184
+ skip_special_tokens=True,
185
+ )
186
+ generate_kwargs = {
187
+ **generate_kwargs,
188
+ "streamer": streamer,
189
+ }
190
+ t = Thread(target=self._model.generate, kwargs=generate_kwargs)
191
+ t.start()
192
+
193
+ it = self.chat_stream(streamer, stop_str)
194
+ return self._to_chat_completion_chunks(it)
195
+ else:
196
+ with torch.no_grad():
197
+ outputs = self._model.generate(**generate_kwargs)
198
+ outputs = outputs[0][len(inputs["input_ids"][0]) :]
199
+ response = self._tokenizer.decode(outputs)
200
+ if response.endswith(stop_str):
201
+ response = response[: -len(stop_str)]
202
+ return generate_chat_completion(self.model_uid, response)
203
+
204
+ def chat_stream(self, streamer, stop_str) -> Iterator[CompletionChunk]:
205
+ completion_id = str(uuid.uuid1())
206
+ for new_text in streamer:
207
+ if not new_text.endswith(stop_str):
208
+ yield generate_completion_chunk(
209
+ chunk_text=new_text,
210
+ finish_reason=None,
211
+ chunk_id=completion_id,
212
+ model_uid=self.model_uid,
213
+ prompt_tokens=-1,
214
+ completion_tokens=-1,
215
+ total_tokens=-1,
216
+ has_choice=True,
217
+ has_content=True,
218
+ )
219
+
220
+ yield generate_completion_chunk(
221
+ chunk_text=None,
222
+ finish_reason="stop",
223
+ chunk_id=completion_id,
224
+ model_uid=self.model_uid,
225
+ prompt_tokens=-1,
226
+ completion_tokens=-1,
227
+ total_tokens=-1,
228
+ has_choice=True,
229
+ has_content=False,
230
+ )
@@ -569,6 +569,25 @@ def _decode_image(_url):
569
569
  return Image.open(BytesIO(response.content)).convert("RGB")
570
570
 
571
571
 
572
+ def _decode_image_without_rgb(_url):
573
+ if _url.startswith("data:"):
574
+ logging.info("Parse url by base64 decoder.")
575
+ # https://platform.openai.com/docs/guides/vision/uploading-base-64-encoded-images
576
+ # e.g. f"data:image/jpeg;base64,{base64_image}"
577
+ _type, data = _url.split(";")
578
+ _, ext = _type.split("/")
579
+ data = data[len("base64,") :]
580
+ data = base64.b64decode(data.encode("utf-8"))
581
+ return Image.open(BytesIO(data))
582
+ else:
583
+ try:
584
+ response = requests.get(_url)
585
+ except requests.exceptions.MissingSchema:
586
+ return Image.open(_url)
587
+ else:
588
+ return Image.open(BytesIO(response.content))
589
+
590
+
572
591
  @typing.no_type_check
573
592
  def generate_completion_chunk(
574
593
  chunk_text: Optional[str],
@@ -69,6 +69,7 @@ class VLLMModelConfig(TypedDict, total=False):
69
69
  quantization: Optional[str]
70
70
  max_model_len: Optional[int]
71
71
  limit_mm_per_prompt: Optional[Dict[str, int]]
72
+ guided_decoding_backend: Optional[str]
72
73
 
73
74
 
74
75
  class VLLMGenerateConfig(TypedDict, total=False):
@@ -85,6 +86,14 @@ class VLLMGenerateConfig(TypedDict, total=False):
85
86
  stop: Optional[Union[str, List[str]]]
86
87
  stream: bool # non-sampling param, should not be passed to the engine.
87
88
  stream_options: Optional[Union[dict, None]]
89
+ response_format: Optional[dict]
90
+ guided_json: Optional[Union[str, dict]]
91
+ guided_regex: Optional[str]
92
+ guided_choice: Optional[List[str]]
93
+ guided_grammar: Optional[str]
94
+ guided_json_object: Optional[bool]
95
+ guided_decoding_backend: Optional[str]
96
+ guided_whitespace_pattern: Optional[str]
88
97
 
89
98
 
90
99
  try:
@@ -144,6 +153,7 @@ if VLLM_INSTALLED and vllm.__version__ >= "0.3.0":
144
153
  VLLM_SUPPORTED_CHAT_MODELS.append("qwen2.5-instruct")
145
154
  VLLM_SUPPORTED_MODELS.append("qwen2.5-coder")
146
155
  VLLM_SUPPORTED_CHAT_MODELS.append("qwen2.5-coder-instruct")
156
+ VLLM_SUPPORTED_CHAT_MODELS.append("QwQ-32B-Preview")
147
157
 
148
158
 
149
159
  if VLLM_INSTALLED and vllm.__version__ >= "0.3.2":
@@ -314,6 +324,7 @@ class VLLMModel(LLM):
314
324
  model_config.setdefault("max_num_seqs", 256)
315
325
  model_config.setdefault("quantization", None)
316
326
  model_config.setdefault("max_model_len", None)
327
+ model_config.setdefault("guided_decoding_backend", "outlines")
317
328
 
318
329
  return model_config
319
330
 
@@ -325,6 +336,22 @@ class VLLMModel(LLM):
325
336
  generate_config = {}
326
337
 
327
338
  sanitized = VLLMGenerateConfig()
339
+
340
+ response_format = generate_config.pop("response_format", None)
341
+ guided_decoding_backend = generate_config.get("guided_decoding_backend", None)
342
+ guided_json_object = None
343
+ guided_json = None
344
+
345
+ if response_format is not None:
346
+ if response_format.get("type") == "json_object":
347
+ guided_json_object = True
348
+ elif response_format.get("type") == "json_schema":
349
+ json_schema = response_format.get("json_schema")
350
+ assert json_schema is not None
351
+ guided_json = json_schema.get("json_schema")
352
+ if guided_decoding_backend is None:
353
+ guided_decoding_backend = "outlines"
354
+
328
355
  sanitized.setdefault("lora_name", generate_config.get("lora_name", None))
329
356
  sanitized.setdefault("n", generate_config.get("n", 1))
330
357
  sanitized.setdefault("best_of", generate_config.get("best_of", None))
@@ -346,6 +373,28 @@ class VLLMModel(LLM):
346
373
  sanitized.setdefault(
347
374
  "stream_options", generate_config.get("stream_options", None)
348
375
  )
376
+ sanitized.setdefault(
377
+ "guided_json", generate_config.get("guided_json", guided_json)
378
+ )
379
+ sanitized.setdefault("guided_regex", generate_config.get("guided_regex", None))
380
+ sanitized.setdefault(
381
+ "guided_choice", generate_config.get("guided_choice", None)
382
+ )
383
+ sanitized.setdefault(
384
+ "guided_grammar", generate_config.get("guided_grammar", None)
385
+ )
386
+ sanitized.setdefault(
387
+ "guided_whitespace_pattern",
388
+ generate_config.get("guided_whitespace_pattern", None),
389
+ )
390
+ sanitized.setdefault(
391
+ "guided_json_object",
392
+ generate_config.get("guided_json_object", guided_json_object),
393
+ )
394
+ sanitized.setdefault(
395
+ "guided_decoding_backend",
396
+ generate_config.get("guided_decoding_backend", guided_decoding_backend),
397
+ )
349
398
 
350
399
  return sanitized
351
400
 
@@ -483,13 +532,46 @@ class VLLMModel(LLM):
483
532
  if isinstance(stream_options, dict)
484
533
  else False
485
534
  )
486
- sampling_params = SamplingParams(**sanitized_generate_config)
535
+
536
+ if VLLM_INSTALLED and vllm.__version__ >= "0.6.3":
537
+ # guided decoding only available for vllm >= 0.6.3
538
+ from vllm.sampling_params import GuidedDecodingParams
539
+
540
+ guided_options = GuidedDecodingParams.from_optional(
541
+ json=sanitized_generate_config.pop("guided_json", None),
542
+ regex=sanitized_generate_config.pop("guided_regex", None),
543
+ choice=sanitized_generate_config.pop("guided_choice", None),
544
+ grammar=sanitized_generate_config.pop("guided_grammar", None),
545
+ json_object=sanitized_generate_config.pop("guided_json_object", None),
546
+ backend=sanitized_generate_config.pop("guided_decoding_backend", None),
547
+ whitespace_pattern=sanitized_generate_config.pop(
548
+ "guided_whitespace_pattern", None
549
+ ),
550
+ )
551
+
552
+ sampling_params = SamplingParams(
553
+ guided_decoding=guided_options, **sanitized_generate_config
554
+ )
555
+ else:
556
+ # ignore generate configs
557
+ sanitized_generate_config.pop("guided_json", None)
558
+ sanitized_generate_config.pop("guided_regex", None)
559
+ sanitized_generate_config.pop("guided_choice", None)
560
+ sanitized_generate_config.pop("guided_grammar", None)
561
+ sanitized_generate_config.pop("guided_json_object", None)
562
+ sanitized_generate_config.pop("guided_decoding_backend", None)
563
+ sanitized_generate_config.pop("guided_whitespace_pattern", None)
564
+ sampling_params = SamplingParams(**sanitized_generate_config)
565
+
487
566
  if not request_id:
488
567
  request_id = str(uuid.uuid1())
489
568
 
490
569
  assert self._engine is not None
491
570
  results_generator = self._engine.generate(
492
- prompt, sampling_params, request_id, lora_request=lora_request
571
+ prompt,
572
+ sampling_params,
573
+ request_id,
574
+ lora_request,
493
575
  )
494
576
 
495
577
  async def stream_results() -> AsyncGenerator[CompletionChunk, None]:
xinference/types.py CHANGED
@@ -71,7 +71,8 @@ class EmbeddingUsage(TypedDict):
71
71
  class EmbeddingData(TypedDict):
72
72
  index: int
73
73
  object: str
74
- embedding: List[float]
74
+ # support sparse embedding
75
+ embedding: Union[List[float], Dict[str, float]]
75
76
 
76
77
 
77
78
  class Embedding(TypedDict):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: xinference
3
- Version: 1.0.0
3
+ Version: 1.0.1
4
4
  Summary: Model Serving Made Easy
5
5
  Home-page: https://github.com/xorbitsai/inference
6
6
  Author: Qin Xuye
@@ -19,7 +19,7 @@ Classifier: Programming Language :: Python :: Implementation :: CPython
19
19
  Classifier: Topic :: Software Development :: Libraries
20
20
  Description-Content-Type: text/markdown
21
21
  License-File: LICENSE
22
- Requires-Dist: xoscar>=0.3.0
22
+ Requires-Dist: xoscar<0.4.2,>=0.3.0
23
23
  Requires-Dist: torch
24
24
  Requires-Dist: gradio
25
25
  Requires-Dist: pillow
@@ -34,7 +34,7 @@ Requires-Dist: huggingface-hub>=0.19.4
34
34
  Requires-Dist: typing-extensions
35
35
  Requires-Dist: modelscope>=1.10.0
36
36
  Requires-Dist: sse-starlette>=1.6.5
37
- Requires-Dist: openai>1
37
+ Requires-Dist: openai>=1.40.0
38
38
  Requires-Dist: python-jose[cryptography]
39
39
  Requires-Dist: passlib[bcrypt]
40
40
  Requires-Dist: aioprometheus[starlette]>=23.12.0
@@ -80,7 +80,7 @@ Requires-Dist: conformer; extra == "all"
80
80
  Requires-Dist: gdown; extra == "all"
81
81
  Requires-Dist: pyarrow; extra == "all"
82
82
  Requires-Dist: HyperPyYAML; extra == "all"
83
- Requires-Dist: onnxruntime==1.16.0; extra == "all"
83
+ Requires-Dist: onnxruntime>=1.16.0; extra == "all"
84
84
  Requires-Dist: boto3<1.28.65,>=1.28.55; extra == "all"
85
85
  Requires-Dist: tensorizer~=2.9.0; extra == "all"
86
86
  Requires-Dist: eva-decord; extra == "all"
@@ -98,6 +98,7 @@ Requires-Dist: verovio>=4.3.1; extra == "all"
98
98
  Requires-Dist: auto-gptq; sys_platform != "darwin" and extra == "all"
99
99
  Requires-Dist: autoawq<0.2.6; sys_platform != "darwin" and extra == "all"
100
100
  Requires-Dist: mlx-lm; (sys_platform == "darwin" and platform_machine == "arm64") and extra == "all"
101
+ Requires-Dist: mlx-whisper; (sys_platform == "darwin" and platform_machine == "arm64") and extra == "all"
101
102
  Requires-Dist: vllm>=0.2.6; sys_platform == "linux" and extra == "all"
102
103
  Requires-Dist: sglang>=0.2.7; sys_platform == "linux" and extra == "all"
103
104
  Provides-Extra: audio
@@ -119,7 +120,7 @@ Requires-Dist: diffusers>=0.30.0; extra == "audio"
119
120
  Requires-Dist: gdown; extra == "audio"
120
121
  Requires-Dist: pyarrow; extra == "audio"
121
122
  Requires-Dist: HyperPyYAML; extra == "audio"
122
- Requires-Dist: onnxruntime==1.16.0; extra == "audio"
123
+ Requires-Dist: onnxruntime>=1.16.0; extra == "audio"
123
124
  Requires-Dist: loguru; extra == "audio"
124
125
  Requires-Dist: natsort; extra == "audio"
125
126
  Requires-Dist: loralib; extra == "audio"
@@ -143,7 +144,7 @@ Requires-Dist: sphinx-intl>=0.9.9; extra == "dev"
143
144
  Requires-Dist: jieba>=0.42.0; extra == "dev"
144
145
  Requires-Dist: flake8>=3.8.0; extra == "dev"
145
146
  Requires-Dist: black; extra == "dev"
146
- Requires-Dist: openai>1; extra == "dev"
147
+ Requires-Dist: openai>=1.40.0; extra == "dev"
147
148
  Requires-Dist: langchain; extra == "dev"
148
149
  Requires-Dist: langchain-community; extra == "dev"
149
150
  Requires-Dist: orjson; extra == "dev"
@@ -177,6 +178,7 @@ Provides-Extra: llama_cpp
177
178
  Requires-Dist: llama-cpp-python!=0.2.58,>=0.2.25; extra == "llama-cpp"
178
179
  Provides-Extra: mlx
179
180
  Requires-Dist: mlx-lm; extra == "mlx"
181
+ Requires-Dist: mlx-whisper; extra == "mlx"
180
182
  Provides-Extra: rerank
181
183
  Requires-Dist: FlagEmbedding; extra == "rerank"
182
184
  Provides-Extra: sglang
@@ -1,15 +1,15 @@
1
1
  xinference/__init__.py,sha256=nmTTrYbIpj964ZF6ojtgOM7E85JBOj1EyQbmYjbj1jw,915
2
- xinference/_compat.py,sha256=xFztCfyrq3O_4bssL_ygghYkfxicv_ZhiX2YDDWHf-k,3571
3
- xinference/_version.py,sha256=fpV50OH7B6n2pfTzqkYhtyB658xgOLxDdVxTQumkplE,497
2
+ xinference/_compat.py,sha256=vpf_M9Ou6d9qaq-hG5isJ-C8e8UdPZPqoWcPhabfNko,4135
3
+ xinference/_version.py,sha256=ENOsRw9OHQL_a2m0T4MVfI2yL84k11_7guNq3F53rsg,497
4
4
  xinference/conftest.py,sha256=vETDpRBVIlWbWi7OTwf7og89U25KyYGyI7yPIB3O8N8,9564
5
5
  xinference/constants.py,sha256=mEW4HDzjXtDXN61Mt6TtJrJ4ljbB6VUkh97e3oDbNx4,3905
6
6
  xinference/device_utils.py,sha256=zswJiws3VyTIaNO8z-MOcsJH_UiPoePPiKK5zoNrjTA,3285
7
7
  xinference/fields.py,sha256=0UtBFaDNzn1n9MRjyTkNrolsIML-TpZfudWOejqjni8,5245
8
8
  xinference/isolation.py,sha256=uhkzVyL3fSYZSuFexkG6Jm-tRTC5I607uNg000BXAnE,1949
9
- xinference/types.py,sha256=LHTbNLf0zI-FLruxRuBt2KMpk2P4eKpYdFvh2qzNTGI,12458
9
+ xinference/types.py,sha256=t9SIU06Y1Y_lmXMfQmYAHmP8K6vTnD5Ly32z4KqriZE,12514
10
10
  xinference/utils.py,sha256=zYgf9bCvfbybRt3gEog6r5WJCpj0czZCf0qgRdYjkN8,720
11
11
  xinference/api/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
12
- xinference/api/restful_api.py,sha256=eWw6tQ9zVOYAB_P9Zr_0bO3_ng7wyyGFIlA7MnV7q_k,88589
12
+ xinference/api/restful_api.py,sha256=4tEu4YiT5iQU9ZVPI5gpQzwe-pEXiM05o_7S7MnJHBg,92140
13
13
  xinference/api/oauth2/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
14
14
  xinference/api/oauth2/auth_service.py,sha256=74JzB42fbbmBu4Q1dW3A9Fp_N7167KgRGB42Z0NHjAM,6119
15
15
  xinference/api/oauth2/types.py,sha256=K923sv_XySIUtM2Eozl9IG082IJcDOS5SFLrPZ5ELBg,996
@@ -18,14 +18,14 @@ xinference/client/__init__.py,sha256=Gc4HOzAy_1cic5kXlso7hahYgw89CKvZSJDicEU461k
18
18
  xinference/client/common.py,sha256=iciZRs5YjM2gYsXnwACPMaiBZp4_XpawWwfym0Iyu40,1617
19
19
  xinference/client/handlers.py,sha256=OKl_i5FA341wsQf_0onSOPbbW6V861WJrSP7ghtDc8c,527
20
20
  xinference/client/restful/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
21
- xinference/client/restful/restful_client.py,sha256=nz4Gz9PpX1seB20xmO5Pc6vliqB44gqJOfcIvdVuVVQ,52276
21
+ xinference/client/restful/restful_client.py,sha256=JwzP7etUZBR0mmU7y3dUOEWN_D7ol_2hXN9KMAzKZaw,53601
22
22
  xinference/core/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
23
23
  xinference/core/cache_tracker.py,sha256=3ubjYCU5aZToSp2GEuzedECVrg-PR4kThTefrFUkb9g,6971
24
24
  xinference/core/chat_interface.py,sha256=Kiqs1XOXgYBlP7DOXLEXaFjbVuS0yC1-dXJyxrxiRNE,20785
25
25
  xinference/core/event.py,sha256=42F38H2WOl6aPxp2oxX6WNxHRRxbnvYRmbt4Ar7NP4U,1640
26
26
  xinference/core/image_interface.py,sha256=5Iuoiw3g2TvgOYi3gRIAGApve2nNzfMPduRrBHvd1NY,13755
27
27
  xinference/core/metrics.py,sha256=ScmTG15Uq3h_ob72ybZSMWdnk8P4sUZFcm60f4ikSXc,2631
28
- xinference/core/model.py,sha256=fVE-b7vLgMgFmY4yJpFVnV_4Pw1Bde-ykBERZJVAjsM,39873
28
+ xinference/core/model.py,sha256=k5hMtDUhZZjytz1aniTq4F9Bl2Kx-7fPv8cd_yLbCUw,40646
29
29
  xinference/core/progress_tracker.py,sha256=LIF6CLIlnEoSBkuDCraJktDOzZ31mQ4HOo6EVr3KpQM,6453
30
30
  xinference/core/resource.py,sha256=FQ0aRt3T4ZQo0P6CZZf5QUKHiCsr5llBvKb1f7wfnxg,1611
31
31
  xinference/core/scheduler.py,sha256=gdj3SyP_jelJ86vTRrgnFynhxz5JSwLRsQgx8PTtBi8,15671
@@ -44,22 +44,23 @@ xinference/deploy/test/test_cmdline.py,sha256=m8xDzjtDuAJy0QkvYVJIZDuTB29cmYBV0d
44
44
  xinference/model/__init__.py,sha256=bCEwvKjoazBW8QZ5C5Hpfd5v_VTbWpXvo8IB9d4Qs70,1127
45
45
  xinference/model/core.py,sha256=_NEH4wkjjJgRDdLHNVY_hL3V0kT67CvTay89uIzx1Ns,4736
46
46
  xinference/model/utils.py,sha256=_yJ5h4RUzt7Kjs2WdjSzbVM3FTWEkX0ycOnXANZ9KVg,11394
47
- xinference/model/audio/__init__.py,sha256=G4n-MyzdarFVOndPRkEyZZrCwqFIG8yIsky6_5dife0,3433
47
+ xinference/model/audio/__init__.py,sha256=KasWsaNPeij6sGpHKqXaUc_bxUw1yYbD7-fwxkcoAVE,3731
48
48
  xinference/model/audio/chattts.py,sha256=ny3DZTCTt2MzdkLw994_QHZ_4qIEUZcNexNJkCejCyo,4998
49
- xinference/model/audio/core.py,sha256=73KojfhE8QFEVkWoqC8FohioQg3TALz1hKCzuDOArgM,6554
49
+ xinference/model/audio/core.py,sha256=bdStvxdYJC2R8YAs4W58P97MrcREq70Ta9JX1ROxynM,6894
50
50
  xinference/model/audio/cosyvoice.py,sha256=Enur1Y4Xa-mpr7wwnoXWwhyh7PUAjrHZ8DV91tTrpjE,6426
51
51
  xinference/model/audio/custom.py,sha256=8GXBRmTtR-GY03-E91nlRGTIuabCRzlt20ecU6Un6Y8,4985
52
- xinference/model/audio/fish_speech.py,sha256=v2WVEV-BLWnbiDvqrx8WTGE_YNKmd9QoAF1LZBXWxn0,7310
52
+ xinference/model/audio/fish_speech.py,sha256=rdt73VK512WocFhL6WqcqoxOkMYtB00sMu40umt9RuQ,8856
53
53
  xinference/model/audio/funasr.py,sha256=65z7U7_F14CCP-jg6BpeY3_49FK7Y5OCRSzrhhsklCg,4075
54
- xinference/model/audio/model_spec.json,sha256=iw0kE-0j2JaLBwIc3KKra9NERbxKNHIhXuhbwnhGXbA,5120
54
+ xinference/model/audio/model_spec.json,sha256=Z_Y7Fv6fxLGAX8em5pgGVR2ZxBxtCsy0ANsKrjy9vA0,7300
55
55
  xinference/model/audio/model_spec_modelscope.json,sha256=U82E5vZahi4si6kpCjdp2FAG2lCpQ7s7w_1t6lj2ysI,2038
56
56
  xinference/model/audio/utils.py,sha256=pwo5cHh8nvhyBa9f-17QaVpXMSjmbpGbPYKwBBtEhGM,717
57
57
  xinference/model/audio/whisper.py,sha256=PQL7rebGC7WlIOItuDtjdEtSJtlhxFkolot-Fj-8uDU,7982
58
+ xinference/model/audio/whisper_mlx.py,sha256=zBuCd7GUlsN9FC_-J11gqIkOCicihfbqxoabiXTvH2Q,7237
58
59
  xinference/model/embedding/__init__.py,sha256=1GmvQsbeeVUT-VRaRGetf8UT4RQgLWIzfp5kfX5jw-k,3567
59
- xinference/model/embedding/core.py,sha256=PP9Hpv_jK9x3cB2oSa1Q7SnzzPn7TnfVyF6uzx216OU,18762
60
+ xinference/model/embedding/core.py,sha256=kGMxfKQ5s0G2fJcW-zVxKThDFbOWpSQD8vkCgnhFnoU,29593
60
61
  xinference/model/embedding/custom.py,sha256=757KncqhsOWVypHZFtuhBP_xy-UTNaFdy0BuZRfuIV8,3848
61
- xinference/model/embedding/model_spec.json,sha256=dn1XZDiB-HED_IqZO3iYdfhpqA1EO3wHcylXd1O9WK8,7060
62
- xinference/model/embedding/model_spec_modelscope.json,sha256=1qyMQR-JrcSZ_WB5gEtNI9IEzE9orSM41rjSXS6rMs4,6210
62
+ xinference/model/embedding/model_spec.json,sha256=cVYzB4VLYw2FoP7bfLntTuMVDEigHTGDSyBtcj8F6gc,7060
63
+ xinference/model/embedding/model_spec_modelscope.json,sha256=kWPY9tILL69X_6iDuVoh6Y_VODLJ7EFsD0CGkbuQlGo,6210
63
64
  xinference/model/embedding/utils.py,sha256=t_y-7TrYenJKzX3p8e8KWXyC66u7Kd7lMvSzEOolnZw,790
64
65
  xinference/model/flexible/__init__.py,sha256=DUAfq9vaiXOfUJfP2WRfqDmGfMtKMqRE-wLETaJw4_w,1718
65
66
  xinference/model/flexible/core.py,sha256=3REGHL9uUTgwgEEr6qv5uNMq-j-7by4bAco7QNwwxx4,7231
@@ -81,32 +82,33 @@ xinference/model/image/scheduler/flux.py,sha256=GHlpPfU5UxsiQWNyvNe9SaVZceNg_2Ci
81
82
  xinference/model/image/stable_diffusion/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
82
83
  xinference/model/image/stable_diffusion/core.py,sha256=M_sYFsY_q91l0D6O7AqQVd_h-RIgXcmaydyCvASyNsI,23055
83
84
  xinference/model/image/stable_diffusion/mlx.py,sha256=GZsozzGB04NfHAdU9MI6gwWE1t_A-s_Ddn_ic8DlkKQ,7476
84
- xinference/model/llm/__init__.py,sha256=9g9dFG2XuNDCTLE5vuJ6kCT-rqe9MfN56aEapyXaJ5M,13938
85
+ xinference/model/llm/__init__.py,sha256=WqNnI4ePhe1XJ_gzFOk-wpM9fjBjUpDwGlAW9CuzlNM,14020
85
86
  xinference/model/llm/core.py,sha256=g-luuAjZizrPunhyFE9IRjn57l0g6FY_1xUwtlRegbs,8151
86
- xinference/model/llm/llm_family.json,sha256=H1jJvTot6DsoJ_hyTMqJbcHI95tkuEwwYeljQRmPW_8,296753
87
+ xinference/model/llm/llm_family.json,sha256=mlqXYbBkjBeB3xWRpaYpScpoDM5DIT1ESxI8IE7lDoI,307998
87
88
  xinference/model/llm/llm_family.py,sha256=tI2wPefd7v-PWcVhUO2qy6iGob_ioeNCwAQQzal-2o4,39549
88
89
  xinference/model/llm/llm_family_csghub.json,sha256=zMKWbihsxQNVB1u5iKJbZUkbOfQ4IPNq1KQ-8IDPQQA,8759
89
- xinference/model/llm/llm_family_modelscope.json,sha256=iNnH9DRTVc-EmX0YS2790LKRjJ945aLbtK97uoTY6_k,227335
90
+ xinference/model/llm/llm_family_modelscope.json,sha256=nySjsEQk6V6PK0IX4zpjUyTQLBVGYv77cxwdHlagvps,238793
90
91
  xinference/model/llm/llm_family_openmind_hub.json,sha256=jl9pfbe5DztoxgEwKBxDk1Wd7TziTiJ48_Ie_lJdYjA,67872
91
92
  xinference/model/llm/memory.py,sha256=NEIMw6wWaF9S_bnBYq-EyuDhVbUEEeceQhwE1iwsrhI,10207
92
- xinference/model/llm/utils.py,sha256=DUC6jPr1-kPNsgc4J5MXNSMVgDlPLfQiitLGfdJxVxM,23596
93
+ xinference/model/llm/utils.py,sha256=uHLhiaxPNgrIOzGvVJJlwE2S7eQtFis4uvfOjFSVm4Q,24294
93
94
  xinference/model/llm/llama_cpp/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
94
95
  xinference/model/llm/llama_cpp/core.py,sha256=vjuTapwbn-ZjUX-8WA0nFyicE4UGUSehU_csSetvcZw,10928
95
96
  xinference/model/llm/lmdeploy/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
96
97
  xinference/model/llm/lmdeploy/core.py,sha256=WvSP3x6t-HBv6hKh1qWZatFAzlcZCyyKqvc3ua8yPTI,19835
97
98
  xinference/model/llm/mlx/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
98
- xinference/model/llm/mlx/core.py,sha256=IsSFqkAK27yTafcWODw958jSNyxniiTsn9QR1Z6IhWk,15367
99
+ xinference/model/llm/mlx/core.py,sha256=ojemIUVXt4ApI5LgjJ2BBXF_gw49Cu4EtixurLnSH1s,15318
99
100
  xinference/model/llm/sglang/__init__.py,sha256=-sjSIQ4K6w-TEzx49kVaWeWC443fnZqODU91GCQ_JNo,581
100
- xinference/model/llm/sglang/core.py,sha256=ft4QlDw36gwoic8lyjtSx2ai6KTW84CPVbYr8grqGMI,16698
101
+ xinference/model/llm/sglang/core.py,sha256=1xXOyn3oWYYl-AYvAGwsQkxsaEf0kpbdKQlfORgU7Dk,16721
101
102
  xinference/model/llm/transformers/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
102
103
  xinference/model/llm/transformers/chatglm.py,sha256=tr6nEfHAg_mFRDlqXLMvbSq6U47851jOpqTz_vkWNJM,22266
103
104
  xinference/model/llm/transformers/cogvlm2.py,sha256=I5Ftm0VYjbTAv5ZARZCo32Ggpw58PJfHs5B_nX_BIlU,15972
104
105
  xinference/model/llm/transformers/cogvlm2_video.py,sha256=ZGkpC4x2uEtjwoMrLSODmAUYTjOeSNYxZi9VpQrpnhU,11857
105
106
  xinference/model/llm/transformers/compression.py,sha256=U0vMJ-JaBt4oC2LffgWg6HbPj1CeUi_YdwVbjDd0mRA,8112
106
- xinference/model/llm/transformers/core.py,sha256=rgRrqykyd4fpKNvMfmjAIApWjqvYO3HYB1wJ7cmB9S0,28229
107
+ xinference/model/llm/transformers/core.py,sha256=8-BKAXWhBAQOnk1tDHwNQzq5x74-y1P9kdxO2D3ZhjQ,28247
107
108
  xinference/model/llm/transformers/deepseek_v2.py,sha256=-RKlI3mhja730md4evQ2vfIxBnZD5vWyrgmg_3eovms,4096
108
109
  xinference/model/llm/transformers/deepseek_vl.py,sha256=pB6i6DW5oyfHdqTgKpi2DkIKVGlPLGIDR_Op0sB1uKA,10445
109
110
  xinference/model/llm/transformers/glm4v.py,sha256=goph2HhpV8gUm2t8-T1P-jTF2r_kPeH6QNe64lmlm0g,13871
111
+ xinference/model/llm/transformers/glm_edge_v.py,sha256=sedHB4iRd37UC__1MeXs33NLMQQKFYBIFf3A71rMEZU,8159
110
112
  xinference/model/llm/transformers/intern_vl.py,sha256=0pbze1eo3HvNQ0nW-mVJcJuJ4GrEyBBqQAYIdXnAn6c,18270
111
113
  xinference/model/llm/transformers/internlm2.py,sha256=3mjRgqU0RgCFF0F46ieVH0NO2JCKGsQkmkoVlWJrh8E,3221
112
114
  xinference/model/llm/transformers/minicpmv25.py,sha256=mr80-OlSlK_opSuAO3cz_QlkqujLr6V-OsTP0ebwpE8,6814
@@ -120,7 +122,7 @@ xinference/model/llm/transformers/tensorizer_utils.py,sha256=VXSYbPZtCbd8lVvsnjD
120
122
  xinference/model/llm/transformers/utils.py,sha256=Ej9Tu2yVAotfXMFsl30QlYXLZTODU6Pv_UppsGGUiSw,19185
121
123
  xinference/model/llm/transformers/yi_vl.py,sha256=iCdRLw-wizbU-qXXc8CT4DhC0Pt-uYg0vFwXEhAZjQg,8961
122
124
  xinference/model/llm/vllm/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
123
- xinference/model/llm/vllm/core.py,sha256=gflboRHy4JvhDG6G2bjPgidgNFTU2dDepbTZBmeDGlY,31516
125
+ xinference/model/llm/vllm/core.py,sha256=ZPg4oJql8mhHMs5CpHo83Fm6peeKvB3EBXwaFNQQ3Cw,35185
124
126
  xinference/model/llm/vllm/utils.py,sha256=LKOmwfFRrlSecawxT-uE39tC2RQbf1UIiSH9Uz90X6w,1313
125
127
  xinference/model/rerank/__init__.py,sha256=wRpf1bOMfmAsuEKEGczMTB5fWEvuqltlJbIbRb-x8Ko,3483
126
128
  xinference/model/rerank/core.py,sha256=e-QoFgVk-6LOQPM5zqbEj095J-1bkuyd9c5zRI5DlF8,14560
@@ -15522,9 +15524,9 @@ xinference/web/ui/node_modules/yargs-parser/package.json,sha256=BSwbOzgetKXMK4u0
15522
15524
  xinference/web/ui/node_modules/yocto-queue/package.json,sha256=6U1XHQPGXJTqsiFvT953ORihUtXTblZy4fXBWP9qxC0,725
15523
15525
  xinference/web/ui/node_modules/yup/package.json,sha256=xRFSROB9NKxqSWHEVFvSTsPs9Ll074uo8OS1zEw0qhA,1206
15524
15526
  xinference/web/ui/node_modules/yup/node_modules/type-fest/package.json,sha256=JTv2zTTVgxQ2H82m1-6qEpdMv08lHjFx4Puf_MsbB_Q,1134
15525
- xinference-1.0.0.dist-info/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
15526
- xinference-1.0.0.dist-info/METADATA,sha256=H-KYRsy1S1Q2Wk_zY6Ei7xiBmaGfcOr4TqaC1NfBsnM,21824
15527
- xinference-1.0.0.dist-info/WHEEL,sha256=bFJAMchF8aTQGUgMZzHJyDDMPTO3ToJ7x23SLJa1SVo,92
15528
- xinference-1.0.0.dist-info/entry_points.txt,sha256=-lDyyzqWMFQF0Rgm7VxBNz0V-bMBMQLRR3pvQ-Y8XTY,226
15529
- xinference-1.0.0.dist-info/top_level.txt,sha256=L1rQt7pl6m8tmKXpWVHzP-GtmzAxp663rXxGE7qnK00,11
15530
- xinference-1.0.0.dist-info/RECORD,,
15527
+ xinference-1.0.1.dist-info/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
15528
+ xinference-1.0.1.dist-info/METADATA,sha256=pLzWAyI3EYcEd7FbPEOnh6qtrxo0LqjCv2EWxREy8ZU,21992
15529
+ xinference-1.0.1.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
15530
+ xinference-1.0.1.dist-info/entry_points.txt,sha256=-lDyyzqWMFQF0Rgm7VxBNz0V-bMBMQLRR3pvQ-Y8XTY,226
15531
+ xinference-1.0.1.dist-info/top_level.txt,sha256=L1rQt7pl6m8tmKXpWVHzP-GtmzAxp663rXxGE7qnK00,11
15532
+ xinference-1.0.1.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.45.0)
2
+ Generator: bdist_wheel (0.45.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5