xinference 0.15.1__py3-none-any.whl → 0.15.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (34) hide show
  1. xinference/_version.py +3 -3
  2. xinference/core/model.py +2 -2
  3. xinference/model/audio/cosyvoice.py +3 -3
  4. xinference/model/embedding/core.py +14 -5
  5. xinference/model/embedding/model_spec.json +7 -0
  6. xinference/model/embedding/model_spec_modelscope.json +9 -1
  7. xinference/model/image/stable_diffusion/core.py +42 -19
  8. xinference/model/llm/__init__.py +1 -1
  9. xinference/model/llm/llm_family.json +862 -26
  10. xinference/model/llm/llm_family_modelscope.json +895 -10
  11. xinference/model/llm/sglang/core.py +4 -0
  12. xinference/model/llm/utils.py +14 -3
  13. xinference/model/llm/vllm/core.py +27 -6
  14. xinference/model/llm/vllm/utils.py +42 -0
  15. xinference/model/rerank/core.py +19 -0
  16. xinference/model/rerank/model_spec.json +8 -0
  17. xinference/model/rerank/model_spec_modelscope.json +8 -0
  18. xinference/model/utils.py +0 -25
  19. xinference/web/ui/build/asset-manifest.json +3 -3
  20. xinference/web/ui/build/index.html +1 -1
  21. xinference/web/ui/build/static/js/{main.754740c0.js → main.e51a356d.js} +3 -3
  22. xinference/web/ui/build/static/js/main.e51a356d.js.map +1 -0
  23. xinference/web/ui/node_modules/.cache/babel-loader/4385c1095eefbff0a8ec3b2964ba6e5a66a05ab31be721483ca2f43e2a91f6ff.json +1 -0
  24. xinference/web/ui/node_modules/.cache/babel-loader/c7bf40bab396765f67d0fed627ed3665890608b2d0edaa3e8cb7cfc96310db45.json +1 -0
  25. {xinference-0.15.1.dist-info → xinference-0.15.3.dist-info}/METADATA +8 -7
  26. {xinference-0.15.1.dist-info → xinference-0.15.3.dist-info}/RECORD +31 -30
  27. xinference/web/ui/build/static/js/main.754740c0.js.map +0 -1
  28. xinference/web/ui/node_modules/.cache/babel-loader/68bede6d95bb5ef0b35bbb3ec5b8c937eaf6862c6cdbddb5ef222a7776aaf336.json +0 -1
  29. xinference/web/ui/node_modules/.cache/babel-loader/cd90b08d177025dfe84209596fc51878f8a86bcaa6a240848a3d2e5fd4c7ff24.json +0 -1
  30. /xinference/web/ui/build/static/js/{main.754740c0.js.LICENSE.txt → main.e51a356d.js.LICENSE.txt} +0 -0
  31. {xinference-0.15.1.dist-info → xinference-0.15.3.dist-info}/LICENSE +0 -0
  32. {xinference-0.15.1.dist-info → xinference-0.15.3.dist-info}/WHEEL +0 -0
  33. {xinference-0.15.1.dist-info → xinference-0.15.3.dist-info}/entry_points.txt +0 -0
  34. {xinference-0.15.1.dist-info → xinference-0.15.3.dist-info}/top_level.txt +0 -0
xinference/_version.py CHANGED
@@ -8,11 +8,11 @@ import json
8
8
 
9
9
  version_json = '''
10
10
  {
11
- "date": "2024-09-14T13:22:13+0800",
11
+ "date": "2024-09-30T20:17:26+0800",
12
12
  "dirty": false,
13
13
  "error": null,
14
- "full-revisionid": "961d355102007e3cd7963a353105b2422a31d4fd",
15
- "version": "0.15.1"
14
+ "full-revisionid": "00a9ee15279a60a6d75393c4720d8da5cbbf5796",
15
+ "version": "0.15.3"
16
16
  }
17
17
  ''' # END VERSION_JSON
18
18
 
xinference/core/model.py CHANGED
@@ -769,7 +769,7 @@ class ModelActor(xo.StatelessActor):
769
769
  self,
770
770
  image: "PIL.Image",
771
771
  prompt: str,
772
- negative_prompt: str,
772
+ negative_prompt: Optional[str] = None,
773
773
  n: int = 1,
774
774
  size: Optional[str] = None,
775
775
  response_format: str = "url",
@@ -777,12 +777,12 @@ class ModelActor(xo.StatelessActor):
777
777
  **kwargs,
778
778
  ):
779
779
  kwargs.pop("request_id", None)
780
+ kwargs["negative_prompt"] = negative_prompt
780
781
  if hasattr(self._model, "image_to_image"):
781
782
  return await self._call_wrapper_json(
782
783
  self._model.image_to_image,
783
784
  image,
784
785
  prompt,
785
- negative_prompt,
786
786
  n,
787
787
  size,
788
788
  response_format,
@@ -122,10 +122,10 @@ class CosyVoiceModel:
122
122
  last_pos = new_last_pos
123
123
 
124
124
  def _generator_block():
125
- chunk = next(output)
126
- assert isinstance(chunk, dict), "Expected data to be of type dict"
125
+ chunks = [o["tts_speech"] for o in output]
126
+ t = torch.cat(chunks, dim=1)
127
127
  with BytesIO() as out:
128
- torchaudio.save(out, chunk["tts_speech"], 22050, format=response_format)
128
+ torchaudio.save(out, t, 22050, format=response_format)
129
129
  return out.getvalue()
130
130
 
131
131
  return _generator_stream() if stream else _generator_block()
@@ -141,7 +141,15 @@ class EmbeddingModel:
141
141
 
142
142
  def load(self):
143
143
  try:
144
+ import sentence_transformers
144
145
  from sentence_transformers import SentenceTransformer
146
+
147
+ if sentence_transformers.__version__ < "3.1.0":
148
+ raise ValueError(
149
+ "The sentence_transformers version must be greater than 3.1.0. "
150
+ "Please upgrade your version via `pip install -U sentence_transformers` or refer to "
151
+ "https://github.com/UKPLab/sentence-transformers"
152
+ )
145
153
  except ImportError:
146
154
  error_message = "Failed to import module 'SentenceTransformer'"
147
155
  installation_guide = [
@@ -173,9 +181,6 @@ class EmbeddingModel:
173
181
  )
174
182
  torch_dtype = torch.float32
175
183
 
176
- from ..utils import patch_trust_remote_code
177
-
178
- patch_trust_remote_code()
179
184
  if (
180
185
  "gte" in self._model_spec.model_name.lower()
181
186
  and "qwen2" in self._model_spec.model_name.lower()
@@ -191,7 +196,10 @@ class EmbeddingModel:
191
196
  else:
192
197
  model_kwargs = {"torch_dtype": torch_dtype} if torch_dtype else None
193
198
  self._model = SentenceTransformer(
194
- self._model_path, device=self._device, model_kwargs=model_kwargs
199
+ self._model_path,
200
+ device=self._device,
201
+ model_kwargs=model_kwargs,
202
+ trust_remote_code=True,
195
203
  )
196
204
 
197
205
  def create_embedding(self, sentences: Union[str, List[str]], **kwargs):
@@ -213,6 +221,7 @@ class EmbeddingModel:
213
221
  convert_to_tensor: bool = False,
214
222
  device: str = None,
215
223
  normalize_embeddings: bool = False,
224
+ **kwargs,
216
225
  ):
217
226
  """
218
227
  Computes sentence embeddings
@@ -317,7 +326,7 @@ class EmbeddingModel:
317
326
  all_token_nums += features["attention_mask"].sum().item()
318
327
 
319
328
  with torch.no_grad():
320
- out_features = model.forward(features)
329
+ out_features = model.forward(features, **kwargs)
321
330
 
322
331
  if output_value == "token_embeddings":
323
332
  embeddings = []
@@ -238,5 +238,12 @@
238
238
  "language": ["zh", "en"],
239
239
  "model_id": "Alibaba-NLP/gte-Qwen2-7B-instruct",
240
240
  "model_revision": "e26182b2122f4435e8b3ebecbf363990f409b45b"
241
+ },
242
+ {
243
+ "model_name": "jina-embeddings-v3",
244
+ "dimensions": 1024,
245
+ "max_tokens": 8192,
246
+ "language": ["zh", "en"],
247
+ "model_id": "jinaai/jina-embeddings-v3"
241
248
  }
242
249
  ]
@@ -233,12 +233,20 @@
233
233
  "model_id": "AI-ModelScope/m3e-large",
234
234
  "model_hub": "modelscope"
235
235
  },
236
- {
236
+ {
237
237
  "model_name": "gte-Qwen2",
238
238
  "dimensions": 4096,
239
239
  "max_tokens": 32000,
240
240
  "language": ["zh", "en"],
241
241
  "model_id": "iic/gte_Qwen2-7B-instruct",
242
242
  "model_hub": "modelscope"
243
+ },
244
+ {
245
+ "model_name": "jina-embeddings-v3",
246
+ "dimensions": 1024,
247
+ "max_tokens": 8192,
248
+ "language": ["zh", "en"],
249
+ "model_id": "jinaai/jina-embeddings-v3",
250
+ "model_hub": "modelscope"
243
251
  }
244
252
  ]
@@ -21,6 +21,7 @@ import re
21
21
  import sys
22
22
  import time
23
23
  import uuid
24
+ import warnings
24
25
  from concurrent.futures import ThreadPoolExecutor
25
26
  from functools import partial
26
27
  from io import BytesIO
@@ -31,7 +32,7 @@ import torch
31
32
  from PIL import ImageOps
32
33
 
33
34
  from ....constants import XINFERENCE_IMAGE_DIR
34
- from ....device_utils import move_model_to_available_device
35
+ from ....device_utils import get_available_device, move_model_to_available_device
35
36
  from ....types import Image, ImageList, LoRA
36
37
  from ..sdapi import SDAPIDiffusionModelMixin
37
38
 
@@ -60,6 +61,23 @@ SAMPLING_METHODS = [
60
61
  ]
61
62
 
62
63
 
64
+ def model_accept_param(params: Union[str, List[str]], model: Any) -> bool:
65
+ params = [params] if isinstance(params, str) else params
66
+ # model is diffusers Pipeline
67
+ parameters = inspect.signature(model.__call__).parameters # type: ignore
68
+ allow_params = False
69
+ for param in parameters.values():
70
+ if param.kind == inspect.Parameter.VAR_KEYWORD:
71
+ # the __call__ can accept **kwargs,
72
+ # we treat it as it can accept any parameters
73
+ allow_params = True
74
+ break
75
+ if not allow_params:
76
+ if all(param in parameters for param in params):
77
+ allow_params = True
78
+ return allow_params
79
+
80
+
63
81
  class DiffusionModel(SDAPIDiffusionModelMixin):
64
82
  def __init__(
65
83
  self,
@@ -175,6 +193,18 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
175
193
  self._model_path,
176
194
  **self._kwargs,
177
195
  )
196
+ if self._kwargs.get("deepcache", True):
197
+ # NOTE: DeepCache should be loaded first before cpu_offloading
198
+ try:
199
+ from DeepCache import DeepCacheSDHelper
200
+
201
+ helper = DeepCacheSDHelper(pipe=self._model)
202
+ helper.set_params(cache_interval=3, cache_branch_id=0)
203
+ helper.enable()
204
+ except ImportError:
205
+ logger.debug("deepcache is not installed")
206
+ pass
207
+
178
208
  if self._kwargs.get("cpu_offload", False):
179
209
  logger.debug("CPU offloading model")
180
210
  self._model.enable_model_cpu_offload()
@@ -187,7 +217,7 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
187
217
 
188
218
  @staticmethod
189
219
  def _get_scheduler(model: Any, sampler_name: str):
190
- if not sampler_name:
220
+ if not sampler_name or sampler_name == "default":
191
221
  return
192
222
 
193
223
  assert model is not None
@@ -283,13 +313,14 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
283
313
  origin_size = kwargs.pop("origin_size", None)
284
314
  seed = kwargs.pop("seed", None)
285
315
  if seed is not None:
286
- kwargs["generator"] = generator = torch.Generator(device=self._model.device) # type: ignore
316
+ kwargs["generator"] = generator = torch.Generator(device=get_available_device()) # type: ignore
287
317
  if seed != -1:
288
318
  kwargs["generator"] = generator.manual_seed(seed)
289
319
  sampler_name = kwargs.pop("sampler_name", None)
290
320
  assert callable(model)
291
321
  with self._reset_when_done(model, sampler_name):
292
322
  logger.debug("stable diffusion args: %s, model: %s", kwargs, model)
323
+ self._filter_kwargs(model, kwargs)
293
324
  images = model(**kwargs).images
294
325
 
295
326
  # revert padding if padded
@@ -328,11 +359,17 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
328
359
  raise ValueError(f"Unsupported response format: {response_format}")
329
360
 
330
361
  @classmethod
331
- def _filter_kwargs(cls, kwargs: dict):
362
+ def _filter_kwargs(cls, model, kwargs: dict):
332
363
  for arg in ["negative_prompt", "num_inference_steps"]:
333
364
  if not kwargs.get(arg):
334
365
  kwargs.pop(arg, None)
335
366
 
367
+ for key in list(kwargs):
368
+ allow_key = model_accept_param(key, model)
369
+ if not allow_key:
370
+ warnings.warn(f"{type(model)} cannot accept `{key}`, will ignore it")
371
+ kwargs.pop(key)
372
+
336
373
  def text_to_image(
337
374
  self,
338
375
  prompt: str,
@@ -346,7 +383,6 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
346
383
  width, height = map(int, re.split(r"[^\d]+", size))
347
384
  generate_kwargs = self._model_spec.default_generate_config.copy() # type: ignore
348
385
  generate_kwargs.update({k: v for k, v in kwargs.items() if v is not None})
349
- self._filter_kwargs(generate_kwargs)
350
386
  return self._call_model(
351
387
  prompt=prompt,
352
388
  height=height,
@@ -368,7 +404,6 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
368
404
  self,
369
405
  image: PIL.Image,
370
406
  prompt: Optional[Union[str, List[str]]] = None,
371
- negative_prompt: Optional[Union[str, List[str]]] = None,
372
407
  n: int = 1,
373
408
  size: Optional[str] = None,
374
409
  response_format: str = "url",
@@ -404,19 +439,10 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
404
439
  kwargs["height"] = height
405
440
  else:
406
441
  # SD3 image2image cannot accept width and height
407
- parameters = inspect.signature(model.__call__).parameters # type: ignore
408
- allow_width_height = False
409
- for param in parameters.values():
410
- if param.kind == inspect.Parameter.VAR_KEYWORD:
411
- allow_width_height = True
412
- break
413
- if "width" in parameters or "height" in parameters:
414
- allow_width_height = True
442
+ allow_width_height = model_accept_param(["width", "height"], model)
415
443
  if allow_width_height:
416
444
  kwargs["width"], kwargs["height"] = image.size
417
445
 
418
- kwargs["negative_prompt"] = negative_prompt
419
- self._filter_kwargs(kwargs)
420
446
  return self._call_model(
421
447
  image=image,
422
448
  prompt=prompt,
@@ -431,7 +457,6 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
431
457
  image: PIL.Image,
432
458
  mask_image: PIL.Image,
433
459
  prompt: Optional[Union[str, List[str]]] = None,
434
- negative_prompt: Optional[Union[str, List[str]]] = None,
435
460
  n: int = 1,
436
461
  size: str = "1024*1024",
437
462
  response_format: str = "url",
@@ -469,8 +494,6 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
469
494
  # calculate actual image size after padding
470
495
  width, height = image.size
471
496
 
472
- kwargs["negative_prompt"] = negative_prompt
473
- self._filter_kwargs(kwargs)
474
497
  return self._call_model(
475
498
  image=image,
476
499
  mask_image=mask_image,
@@ -121,7 +121,7 @@ def register_custom_model():
121
121
  with codecs.open(
122
122
  os.path.join(user_defined_llm_dir, f), encoding="utf-8"
123
123
  ) as fd:
124
- user_defined_llm_family = CustomLLMFamilyV1.parse_obj(json.load(fd))
124
+ user_defined_llm_family = CustomLLMFamilyV1.parse_raw(fd.read())
125
125
  register_llm(user_defined_llm_family, persist=False)
126
126
  except Exception as e:
127
127
  warnings.warn(f"{user_defined_llm_dir}/{f} has error, {e}")