xinference 0.15.1__py3-none-any.whl → 0.15.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/core/model.py +2 -2
- xinference/model/audio/cosyvoice.py +3 -3
- xinference/model/embedding/core.py +14 -5
- xinference/model/embedding/model_spec.json +7 -0
- xinference/model/embedding/model_spec_modelscope.json +9 -1
- xinference/model/image/stable_diffusion/core.py +42 -19
- xinference/model/llm/__init__.py +1 -1
- xinference/model/llm/llm_family.json +862 -26
- xinference/model/llm/llm_family_modelscope.json +895 -10
- xinference/model/llm/sglang/core.py +4 -0
- xinference/model/llm/utils.py +14 -3
- xinference/model/llm/vllm/core.py +27 -6
- xinference/model/llm/vllm/utils.py +42 -0
- xinference/model/rerank/core.py +19 -0
- xinference/model/rerank/model_spec.json +8 -0
- xinference/model/rerank/model_spec_modelscope.json +8 -0
- xinference/model/utils.py +0 -25
- xinference/web/ui/build/asset-manifest.json +3 -3
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/js/{main.754740c0.js → main.e51a356d.js} +3 -3
- xinference/web/ui/build/static/js/main.e51a356d.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/4385c1095eefbff0a8ec3b2964ba6e5a66a05ab31be721483ca2f43e2a91f6ff.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c7bf40bab396765f67d0fed627ed3665890608b2d0edaa3e8cb7cfc96310db45.json +1 -0
- {xinference-0.15.1.dist-info → xinference-0.15.3.dist-info}/METADATA +8 -7
- {xinference-0.15.1.dist-info → xinference-0.15.3.dist-info}/RECORD +31 -30
- xinference/web/ui/build/static/js/main.754740c0.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/68bede6d95bb5ef0b35bbb3ec5b8c937eaf6862c6cdbddb5ef222a7776aaf336.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/cd90b08d177025dfe84209596fc51878f8a86bcaa6a240848a3d2e5fd4c7ff24.json +0 -1
- /xinference/web/ui/build/static/js/{main.754740c0.js.LICENSE.txt → main.e51a356d.js.LICENSE.txt} +0 -0
- {xinference-0.15.1.dist-info → xinference-0.15.3.dist-info}/LICENSE +0 -0
- {xinference-0.15.1.dist-info → xinference-0.15.3.dist-info}/WHEEL +0 -0
- {xinference-0.15.1.dist-info → xinference-0.15.3.dist-info}/entry_points.txt +0 -0
- {xinference-0.15.1.dist-info → xinference-0.15.3.dist-info}/top_level.txt +0 -0
xinference/_version.py
CHANGED
|
@@ -8,11 +8,11 @@ import json
|
|
|
8
8
|
|
|
9
9
|
version_json = '''
|
|
10
10
|
{
|
|
11
|
-
"date": "2024-09-
|
|
11
|
+
"date": "2024-09-30T20:17:26+0800",
|
|
12
12
|
"dirty": false,
|
|
13
13
|
"error": null,
|
|
14
|
-
"full-revisionid": "
|
|
15
|
-
"version": "0.15.
|
|
14
|
+
"full-revisionid": "00a9ee15279a60a6d75393c4720d8da5cbbf5796",
|
|
15
|
+
"version": "0.15.3"
|
|
16
16
|
}
|
|
17
17
|
''' # END VERSION_JSON
|
|
18
18
|
|
xinference/core/model.py
CHANGED
|
@@ -769,7 +769,7 @@ class ModelActor(xo.StatelessActor):
|
|
|
769
769
|
self,
|
|
770
770
|
image: "PIL.Image",
|
|
771
771
|
prompt: str,
|
|
772
|
-
negative_prompt: str,
|
|
772
|
+
negative_prompt: Optional[str] = None,
|
|
773
773
|
n: int = 1,
|
|
774
774
|
size: Optional[str] = None,
|
|
775
775
|
response_format: str = "url",
|
|
@@ -777,12 +777,12 @@ class ModelActor(xo.StatelessActor):
|
|
|
777
777
|
**kwargs,
|
|
778
778
|
):
|
|
779
779
|
kwargs.pop("request_id", None)
|
|
780
|
+
kwargs["negative_prompt"] = negative_prompt
|
|
780
781
|
if hasattr(self._model, "image_to_image"):
|
|
781
782
|
return await self._call_wrapper_json(
|
|
782
783
|
self._model.image_to_image,
|
|
783
784
|
image,
|
|
784
785
|
prompt,
|
|
785
|
-
negative_prompt,
|
|
786
786
|
n,
|
|
787
787
|
size,
|
|
788
788
|
response_format,
|
|
@@ -122,10 +122,10 @@ class CosyVoiceModel:
|
|
|
122
122
|
last_pos = new_last_pos
|
|
123
123
|
|
|
124
124
|
def _generator_block():
|
|
125
|
-
|
|
126
|
-
|
|
125
|
+
chunks = [o["tts_speech"] for o in output]
|
|
126
|
+
t = torch.cat(chunks, dim=1)
|
|
127
127
|
with BytesIO() as out:
|
|
128
|
-
torchaudio.save(out,
|
|
128
|
+
torchaudio.save(out, t, 22050, format=response_format)
|
|
129
129
|
return out.getvalue()
|
|
130
130
|
|
|
131
131
|
return _generator_stream() if stream else _generator_block()
|
|
@@ -141,7 +141,15 @@ class EmbeddingModel:
|
|
|
141
141
|
|
|
142
142
|
def load(self):
|
|
143
143
|
try:
|
|
144
|
+
import sentence_transformers
|
|
144
145
|
from sentence_transformers import SentenceTransformer
|
|
146
|
+
|
|
147
|
+
if sentence_transformers.__version__ < "3.1.0":
|
|
148
|
+
raise ValueError(
|
|
149
|
+
"The sentence_transformers version must be greater than 3.1.0. "
|
|
150
|
+
"Please upgrade your version via `pip install -U sentence_transformers` or refer to "
|
|
151
|
+
"https://github.com/UKPLab/sentence-transformers"
|
|
152
|
+
)
|
|
145
153
|
except ImportError:
|
|
146
154
|
error_message = "Failed to import module 'SentenceTransformer'"
|
|
147
155
|
installation_guide = [
|
|
@@ -173,9 +181,6 @@ class EmbeddingModel:
|
|
|
173
181
|
)
|
|
174
182
|
torch_dtype = torch.float32
|
|
175
183
|
|
|
176
|
-
from ..utils import patch_trust_remote_code
|
|
177
|
-
|
|
178
|
-
patch_trust_remote_code()
|
|
179
184
|
if (
|
|
180
185
|
"gte" in self._model_spec.model_name.lower()
|
|
181
186
|
and "qwen2" in self._model_spec.model_name.lower()
|
|
@@ -191,7 +196,10 @@ class EmbeddingModel:
|
|
|
191
196
|
else:
|
|
192
197
|
model_kwargs = {"torch_dtype": torch_dtype} if torch_dtype else None
|
|
193
198
|
self._model = SentenceTransformer(
|
|
194
|
-
self._model_path,
|
|
199
|
+
self._model_path,
|
|
200
|
+
device=self._device,
|
|
201
|
+
model_kwargs=model_kwargs,
|
|
202
|
+
trust_remote_code=True,
|
|
195
203
|
)
|
|
196
204
|
|
|
197
205
|
def create_embedding(self, sentences: Union[str, List[str]], **kwargs):
|
|
@@ -213,6 +221,7 @@ class EmbeddingModel:
|
|
|
213
221
|
convert_to_tensor: bool = False,
|
|
214
222
|
device: str = None,
|
|
215
223
|
normalize_embeddings: bool = False,
|
|
224
|
+
**kwargs,
|
|
216
225
|
):
|
|
217
226
|
"""
|
|
218
227
|
Computes sentence embeddings
|
|
@@ -317,7 +326,7 @@ class EmbeddingModel:
|
|
|
317
326
|
all_token_nums += features["attention_mask"].sum().item()
|
|
318
327
|
|
|
319
328
|
with torch.no_grad():
|
|
320
|
-
out_features = model.forward(features)
|
|
329
|
+
out_features = model.forward(features, **kwargs)
|
|
321
330
|
|
|
322
331
|
if output_value == "token_embeddings":
|
|
323
332
|
embeddings = []
|
|
@@ -238,5 +238,12 @@
|
|
|
238
238
|
"language": ["zh", "en"],
|
|
239
239
|
"model_id": "Alibaba-NLP/gte-Qwen2-7B-instruct",
|
|
240
240
|
"model_revision": "e26182b2122f4435e8b3ebecbf363990f409b45b"
|
|
241
|
+
},
|
|
242
|
+
{
|
|
243
|
+
"model_name": "jina-embeddings-v3",
|
|
244
|
+
"dimensions": 1024,
|
|
245
|
+
"max_tokens": 8192,
|
|
246
|
+
"language": ["zh", "en"],
|
|
247
|
+
"model_id": "jinaai/jina-embeddings-v3"
|
|
241
248
|
}
|
|
242
249
|
]
|
|
@@ -233,12 +233,20 @@
|
|
|
233
233
|
"model_id": "AI-ModelScope/m3e-large",
|
|
234
234
|
"model_hub": "modelscope"
|
|
235
235
|
},
|
|
236
|
-
|
|
236
|
+
{
|
|
237
237
|
"model_name": "gte-Qwen2",
|
|
238
238
|
"dimensions": 4096,
|
|
239
239
|
"max_tokens": 32000,
|
|
240
240
|
"language": ["zh", "en"],
|
|
241
241
|
"model_id": "iic/gte_Qwen2-7B-instruct",
|
|
242
242
|
"model_hub": "modelscope"
|
|
243
|
+
},
|
|
244
|
+
{
|
|
245
|
+
"model_name": "jina-embeddings-v3",
|
|
246
|
+
"dimensions": 1024,
|
|
247
|
+
"max_tokens": 8192,
|
|
248
|
+
"language": ["zh", "en"],
|
|
249
|
+
"model_id": "jinaai/jina-embeddings-v3",
|
|
250
|
+
"model_hub": "modelscope"
|
|
243
251
|
}
|
|
244
252
|
]
|
|
@@ -21,6 +21,7 @@ import re
|
|
|
21
21
|
import sys
|
|
22
22
|
import time
|
|
23
23
|
import uuid
|
|
24
|
+
import warnings
|
|
24
25
|
from concurrent.futures import ThreadPoolExecutor
|
|
25
26
|
from functools import partial
|
|
26
27
|
from io import BytesIO
|
|
@@ -31,7 +32,7 @@ import torch
|
|
|
31
32
|
from PIL import ImageOps
|
|
32
33
|
|
|
33
34
|
from ....constants import XINFERENCE_IMAGE_DIR
|
|
34
|
-
from ....device_utils import move_model_to_available_device
|
|
35
|
+
from ....device_utils import get_available_device, move_model_to_available_device
|
|
35
36
|
from ....types import Image, ImageList, LoRA
|
|
36
37
|
from ..sdapi import SDAPIDiffusionModelMixin
|
|
37
38
|
|
|
@@ -60,6 +61,23 @@ SAMPLING_METHODS = [
|
|
|
60
61
|
]
|
|
61
62
|
|
|
62
63
|
|
|
64
|
+
def model_accept_param(params: Union[str, List[str]], model: Any) -> bool:
|
|
65
|
+
params = [params] if isinstance(params, str) else params
|
|
66
|
+
# model is diffusers Pipeline
|
|
67
|
+
parameters = inspect.signature(model.__call__).parameters # type: ignore
|
|
68
|
+
allow_params = False
|
|
69
|
+
for param in parameters.values():
|
|
70
|
+
if param.kind == inspect.Parameter.VAR_KEYWORD:
|
|
71
|
+
# the __call__ can accept **kwargs,
|
|
72
|
+
# we treat it as it can accept any parameters
|
|
73
|
+
allow_params = True
|
|
74
|
+
break
|
|
75
|
+
if not allow_params:
|
|
76
|
+
if all(param in parameters for param in params):
|
|
77
|
+
allow_params = True
|
|
78
|
+
return allow_params
|
|
79
|
+
|
|
80
|
+
|
|
63
81
|
class DiffusionModel(SDAPIDiffusionModelMixin):
|
|
64
82
|
def __init__(
|
|
65
83
|
self,
|
|
@@ -175,6 +193,18 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
|
|
|
175
193
|
self._model_path,
|
|
176
194
|
**self._kwargs,
|
|
177
195
|
)
|
|
196
|
+
if self._kwargs.get("deepcache", True):
|
|
197
|
+
# NOTE: DeepCache should be loaded first before cpu_offloading
|
|
198
|
+
try:
|
|
199
|
+
from DeepCache import DeepCacheSDHelper
|
|
200
|
+
|
|
201
|
+
helper = DeepCacheSDHelper(pipe=self._model)
|
|
202
|
+
helper.set_params(cache_interval=3, cache_branch_id=0)
|
|
203
|
+
helper.enable()
|
|
204
|
+
except ImportError:
|
|
205
|
+
logger.debug("deepcache is not installed")
|
|
206
|
+
pass
|
|
207
|
+
|
|
178
208
|
if self._kwargs.get("cpu_offload", False):
|
|
179
209
|
logger.debug("CPU offloading model")
|
|
180
210
|
self._model.enable_model_cpu_offload()
|
|
@@ -187,7 +217,7 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
|
|
|
187
217
|
|
|
188
218
|
@staticmethod
|
|
189
219
|
def _get_scheduler(model: Any, sampler_name: str):
|
|
190
|
-
if not sampler_name:
|
|
220
|
+
if not sampler_name or sampler_name == "default":
|
|
191
221
|
return
|
|
192
222
|
|
|
193
223
|
assert model is not None
|
|
@@ -283,13 +313,14 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
|
|
|
283
313
|
origin_size = kwargs.pop("origin_size", None)
|
|
284
314
|
seed = kwargs.pop("seed", None)
|
|
285
315
|
if seed is not None:
|
|
286
|
-
kwargs["generator"] = generator = torch.Generator(device=
|
|
316
|
+
kwargs["generator"] = generator = torch.Generator(device=get_available_device()) # type: ignore
|
|
287
317
|
if seed != -1:
|
|
288
318
|
kwargs["generator"] = generator.manual_seed(seed)
|
|
289
319
|
sampler_name = kwargs.pop("sampler_name", None)
|
|
290
320
|
assert callable(model)
|
|
291
321
|
with self._reset_when_done(model, sampler_name):
|
|
292
322
|
logger.debug("stable diffusion args: %s, model: %s", kwargs, model)
|
|
323
|
+
self._filter_kwargs(model, kwargs)
|
|
293
324
|
images = model(**kwargs).images
|
|
294
325
|
|
|
295
326
|
# revert padding if padded
|
|
@@ -328,11 +359,17 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
|
|
|
328
359
|
raise ValueError(f"Unsupported response format: {response_format}")
|
|
329
360
|
|
|
330
361
|
@classmethod
|
|
331
|
-
def _filter_kwargs(cls, kwargs: dict):
|
|
362
|
+
def _filter_kwargs(cls, model, kwargs: dict):
|
|
332
363
|
for arg in ["negative_prompt", "num_inference_steps"]:
|
|
333
364
|
if not kwargs.get(arg):
|
|
334
365
|
kwargs.pop(arg, None)
|
|
335
366
|
|
|
367
|
+
for key in list(kwargs):
|
|
368
|
+
allow_key = model_accept_param(key, model)
|
|
369
|
+
if not allow_key:
|
|
370
|
+
warnings.warn(f"{type(model)} cannot accept `{key}`, will ignore it")
|
|
371
|
+
kwargs.pop(key)
|
|
372
|
+
|
|
336
373
|
def text_to_image(
|
|
337
374
|
self,
|
|
338
375
|
prompt: str,
|
|
@@ -346,7 +383,6 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
|
|
|
346
383
|
width, height = map(int, re.split(r"[^\d]+", size))
|
|
347
384
|
generate_kwargs = self._model_spec.default_generate_config.copy() # type: ignore
|
|
348
385
|
generate_kwargs.update({k: v for k, v in kwargs.items() if v is not None})
|
|
349
|
-
self._filter_kwargs(generate_kwargs)
|
|
350
386
|
return self._call_model(
|
|
351
387
|
prompt=prompt,
|
|
352
388
|
height=height,
|
|
@@ -368,7 +404,6 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
|
|
|
368
404
|
self,
|
|
369
405
|
image: PIL.Image,
|
|
370
406
|
prompt: Optional[Union[str, List[str]]] = None,
|
|
371
|
-
negative_prompt: Optional[Union[str, List[str]]] = None,
|
|
372
407
|
n: int = 1,
|
|
373
408
|
size: Optional[str] = None,
|
|
374
409
|
response_format: str = "url",
|
|
@@ -404,19 +439,10 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
|
|
|
404
439
|
kwargs["height"] = height
|
|
405
440
|
else:
|
|
406
441
|
# SD3 image2image cannot accept width and height
|
|
407
|
-
|
|
408
|
-
allow_width_height = False
|
|
409
|
-
for param in parameters.values():
|
|
410
|
-
if param.kind == inspect.Parameter.VAR_KEYWORD:
|
|
411
|
-
allow_width_height = True
|
|
412
|
-
break
|
|
413
|
-
if "width" in parameters or "height" in parameters:
|
|
414
|
-
allow_width_height = True
|
|
442
|
+
allow_width_height = model_accept_param(["width", "height"], model)
|
|
415
443
|
if allow_width_height:
|
|
416
444
|
kwargs["width"], kwargs["height"] = image.size
|
|
417
445
|
|
|
418
|
-
kwargs["negative_prompt"] = negative_prompt
|
|
419
|
-
self._filter_kwargs(kwargs)
|
|
420
446
|
return self._call_model(
|
|
421
447
|
image=image,
|
|
422
448
|
prompt=prompt,
|
|
@@ -431,7 +457,6 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
|
|
|
431
457
|
image: PIL.Image,
|
|
432
458
|
mask_image: PIL.Image,
|
|
433
459
|
prompt: Optional[Union[str, List[str]]] = None,
|
|
434
|
-
negative_prompt: Optional[Union[str, List[str]]] = None,
|
|
435
460
|
n: int = 1,
|
|
436
461
|
size: str = "1024*1024",
|
|
437
462
|
response_format: str = "url",
|
|
@@ -469,8 +494,6 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
|
|
|
469
494
|
# calculate actual image size after padding
|
|
470
495
|
width, height = image.size
|
|
471
496
|
|
|
472
|
-
kwargs["negative_prompt"] = negative_prompt
|
|
473
|
-
self._filter_kwargs(kwargs)
|
|
474
497
|
return self._call_model(
|
|
475
498
|
image=image,
|
|
476
499
|
mask_image=mask_image,
|
xinference/model/llm/__init__.py
CHANGED
|
@@ -121,7 +121,7 @@ def register_custom_model():
|
|
|
121
121
|
with codecs.open(
|
|
122
122
|
os.path.join(user_defined_llm_dir, f), encoding="utf-8"
|
|
123
123
|
) as fd:
|
|
124
|
-
user_defined_llm_family = CustomLLMFamilyV1.
|
|
124
|
+
user_defined_llm_family = CustomLLMFamilyV1.parse_raw(fd.read())
|
|
125
125
|
register_llm(user_defined_llm_family, persist=False)
|
|
126
126
|
except Exception as e:
|
|
127
127
|
warnings.warn(f"{user_defined_llm_dir}/{f} has error, {e}")
|