xinference 0.15.3__py3-none-any.whl → 0.15.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (43) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/restful_api.py +29 -2
  3. xinference/client/restful/restful_client.py +10 -0
  4. xinference/constants.py +4 -0
  5. xinference/core/image_interface.py +76 -23
  6. xinference/core/model.py +80 -39
  7. xinference/core/progress_tracker.py +187 -0
  8. xinference/core/supervisor.py +11 -0
  9. xinference/core/worker.py +1 -0
  10. xinference/model/audio/chattts.py +2 -1
  11. xinference/model/audio/core.py +0 -2
  12. xinference/model/audio/model_spec.json +8 -0
  13. xinference/model/audio/model_spec_modelscope.json +9 -0
  14. xinference/model/image/core.py +6 -7
  15. xinference/model/image/sdapi.py +35 -4
  16. xinference/model/image/stable_diffusion/core.py +208 -78
  17. xinference/model/llm/llm_family.json +16 -16
  18. xinference/model/llm/llm_family_modelscope.json +16 -12
  19. xinference/model/llm/transformers/cogvlm2.py +2 -1
  20. xinference/model/llm/transformers/cogvlm2_video.py +2 -0
  21. xinference/model/llm/transformers/core.py +6 -2
  22. xinference/model/llm/transformers/deepseek_vl.py +2 -0
  23. xinference/model/llm/transformers/glm4v.py +2 -1
  24. xinference/model/llm/transformers/intern_vl.py +2 -0
  25. xinference/model/llm/transformers/minicpmv25.py +2 -0
  26. xinference/model/llm/transformers/minicpmv26.py +2 -0
  27. xinference/model/llm/transformers/omnilmm.py +2 -0
  28. xinference/model/llm/transformers/qwen2_audio.py +11 -4
  29. xinference/model/llm/transformers/qwen2_vl.py +2 -28
  30. xinference/model/llm/transformers/qwen_vl.py +2 -1
  31. xinference/model/llm/transformers/utils.py +35 -2
  32. xinference/model/llm/transformers/yi_vl.py +2 -0
  33. xinference/model/llm/utils.py +58 -14
  34. xinference/model/llm/vllm/core.py +52 -8
  35. xinference/model/llm/vllm/utils.py +0 -1
  36. xinference/model/utils.py +7 -4
  37. xinference/model/video/core.py +0 -2
  38. {xinference-0.15.3.dist-info → xinference-0.15.4.dist-info}/METADATA +3 -3
  39. {xinference-0.15.3.dist-info → xinference-0.15.4.dist-info}/RECORD +43 -42
  40. {xinference-0.15.3.dist-info → xinference-0.15.4.dist-info}/LICENSE +0 -0
  41. {xinference-0.15.3.dist-info → xinference-0.15.4.dist-info}/WHEEL +0 -0
  42. {xinference-0.15.3.dist-info → xinference-0.15.4.dist-info}/entry_points.txt +0 -0
  43. {xinference-0.15.3.dist-info → xinference-0.15.4.dist-info}/top_level.txt +0 -0
@@ -29,6 +29,7 @@ from ...types import (
29
29
  ChatCompletion,
30
30
  ChatCompletionChoice,
31
31
  ChatCompletionChunk,
32
+ ChatCompletionMessage,
32
33
  Completion,
33
34
  CompletionChoice,
34
35
  CompletionChunk,
@@ -50,6 +51,7 @@ QWEN_TOOL_CALL_FAMILY = [
50
51
  "qwen1.5-moe-chat",
51
52
  "qwen2-instruct",
52
53
  "qwen2-moe-instruct",
54
+ "qwen2.5-instruct",
53
55
  ]
54
56
 
55
57
  GLM4_TOOL_CALL_FAMILY = [
@@ -57,6 +59,10 @@ GLM4_TOOL_CALL_FAMILY = [
57
59
  "glm4-chat-1m",
58
60
  ]
59
61
 
62
+ LLAMA3_TOOL_CALL_FAMILY = [
63
+ "llama-3.1-instruct",
64
+ ]
65
+
60
66
  QWEN_TOOL_CALL_SYMBOLS = ["<tool_call>", "</tool_call>"]
61
67
 
62
68
 
@@ -113,7 +119,7 @@ class ChatModelMixin:
113
119
  return self._build_from_raw_template(messages, chat_template, **kwargs)
114
120
 
115
121
  @staticmethod
116
- def get_specific_prompt(model_family: str, messages: List[Dict]):
122
+ def get_specific_prompt(model_family: str, messages: List[ChatCompletionMessage]):
117
123
  """
118
124
  Inspired by FastChat. Format chat history into a prompt according to the prompty style of
119
125
  different models.
@@ -129,7 +135,7 @@ class ChatModelMixin:
129
135
  ret = (
130
136
  "<s>"
131
137
  if system_prompt == ""
132
- else "<s><|im_start|>system\n"
138
+ else "<s><|im_start|>system\n" # type: ignore
133
139
  + system_prompt
134
140
  + intra_message_sep
135
141
  + "\n"
@@ -333,8 +339,9 @@ class ChatModelMixin:
333
339
  for content in contents:
334
340
  content = content.strip()
335
341
  if content:
336
- if content.startswith(QWEN_TOOL_CALL_SYMBOLS[0]):
337
- content = content[len(QWEN_TOOL_CALL_SYMBOLS[0]) :]
342
+ pos = content.find(QWEN_TOOL_CALL_SYMBOLS[0])
343
+ if pos != -1:
344
+ content = content[pos + len(QWEN_TOOL_CALL_SYMBOLS[0]) :]
338
345
  content = content.strip()
339
346
  try:
340
347
  res = json.loads(content)
@@ -353,6 +360,15 @@ class ChatModelMixin:
353
360
  text = c["choices"][0]["text"]
354
361
  return cls._handle_qwen_tool_result(text)
355
362
 
363
+ @classmethod
364
+ def _eval_llama3_chat_arguments(cls, c) -> List[Tuple]:
365
+ text = c["choices"][0]["text"]
366
+ try:
367
+ data = eval(text, {}, {})
368
+ return [(None, data["name"], data["parameters"])]
369
+ except Exception:
370
+ return [(text, None, None)]
371
+
356
372
  @classmethod
357
373
  def _eval_tool_arguments(cls, model_family, c):
358
374
  family = model_family.model_family or model_family.model_name
@@ -360,6 +376,8 @@ class ChatModelMixin:
360
376
  result = cls._eval_glm_chat_arguments(c)
361
377
  elif family in QWEN_TOOL_CALL_FAMILY:
362
378
  result = cls._eval_qwen_chat_arguments(c)
379
+ elif family in LLAMA3_TOOL_CALL_FAMILY:
380
+ result = cls._eval_llama3_chat_arguments(c)
363
381
  else:
364
382
  raise Exception(
365
383
  f"Model {model_family.model_name} is not support tool calls."
@@ -376,16 +394,14 @@ class ChatModelMixin:
376
394
  for content, func, args in tool_result:
377
395
  if func:
378
396
  tool_calls.append(
379
- [
380
- {
381
- "id": f"call_{_id}",
382
- "type": "function",
383
- "function": {
384
- "name": func,
385
- "arguments": json.dumps(args, ensure_ascii=False),
386
- },
387
- }
388
- ]
397
+ {
398
+ "id": f"call_{_id}",
399
+ "type": "function",
400
+ "function": {
401
+ "name": func,
402
+ "arguments": json.dumps(args, ensure_ascii=False),
403
+ },
404
+ }
389
405
  )
390
406
  else:
391
407
  failed_contents.append(content)
@@ -471,6 +487,34 @@ class ChatModelMixin:
471
487
  "usage": usage,
472
488
  }
473
489
 
490
+ def _transform_messages(
491
+ self,
492
+ messages: List[ChatCompletionMessage],
493
+ ):
494
+ transformed_messages = []
495
+ for msg in messages:
496
+ new_content = []
497
+ role = msg["role"]
498
+ content = msg["content"]
499
+ if isinstance(content, str):
500
+ new_content.append({"type": "text", "text": content})
501
+ elif isinstance(content, List):
502
+ for item in content: # type: ignore
503
+ if "text" in item:
504
+ new_content.append({"type": "text", "text": item["text"]})
505
+ elif "image_url" in item:
506
+ new_content.append(
507
+ {"type": "image", "image": item["image_url"]["url"]}
508
+ )
509
+ elif "video_url" in item:
510
+ new_content.append(
511
+ {"type": "video", "video": item["video_url"]["url"]}
512
+ )
513
+ new_message = {"role": role, "content": new_content}
514
+ transformed_messages.append(new_message)
515
+
516
+ return transformed_messages
517
+
474
518
 
475
519
  def get_file_location(
476
520
  llm_family: LLMFamilyV1, spec: LLMSpecV1, quantization: str
@@ -34,6 +34,7 @@ from typing import (
34
34
  from ....types import (
35
35
  ChatCompletion,
36
36
  ChatCompletionChunk,
37
+ ChatCompletionMessage,
37
38
  Completion,
38
39
  CompletionChoice,
39
40
  CompletionChunk,
@@ -175,6 +176,9 @@ if VLLM_INSTALLED and vllm.__version__ > "0.5.3":
175
176
  if VLLM_INSTALLED and vllm.__version__ >= "0.6.1":
176
177
  VLLM_SUPPORTED_VISION_MODEL_LIST.append("internvl2")
177
178
 
179
+ if VLLM_INSTALLED and vllm.__version__ >= "0.6.3":
180
+ VLLM_SUPPORTED_VISION_MODEL_LIST.append("qwen2-vl-instruct")
181
+
178
182
 
179
183
  class VLLMModel(LLM):
180
184
  def __init__(
@@ -309,11 +313,6 @@ class VLLMModel(LLM):
309
313
  model_config.setdefault("max_num_seqs", 256)
310
314
  model_config.setdefault("quantization", None)
311
315
  model_config.setdefault("max_model_len", None)
312
- model_config["limit_mm_per_prompt"] = (
313
- json.loads(model_config.get("limit_mm_per_prompt")) # type: ignore
314
- if model_config.get("limit_mm_per_prompt")
315
- else None
316
- )
317
316
 
318
317
  return model_config
319
318
 
@@ -733,6 +732,33 @@ class VLLMVisionModel(VLLMModel, ChatModelMixin):
733
732
  return False
734
733
  return VLLM_INSTALLED
735
734
 
735
+ def _sanitize_model_config(
736
+ self, model_config: Optional[VLLMModelConfig]
737
+ ) -> VLLMModelConfig:
738
+ if model_config is None:
739
+ model_config = VLLMModelConfig()
740
+
741
+ cuda_count = self._get_cuda_count()
742
+
743
+ model_config.setdefault("tokenizer_mode", "auto")
744
+ model_config.setdefault("trust_remote_code", True)
745
+ model_config.setdefault("tensor_parallel_size", cuda_count)
746
+ model_config.setdefault("block_size", 16)
747
+ model_config.setdefault("swap_space", 4)
748
+ model_config.setdefault("gpu_memory_utilization", 0.90)
749
+ model_config.setdefault("max_num_seqs", 256)
750
+ model_config.setdefault("quantization", None)
751
+ model_config.setdefault("max_model_len", None)
752
+ model_config["limit_mm_per_prompt"] = (
753
+ json.loads(model_config.get("limit_mm_per_prompt")) # type: ignore
754
+ if model_config.get("limit_mm_per_prompt")
755
+ else {
756
+ "image": 2, # default 2 images all chat
757
+ }
758
+ )
759
+
760
+ return model_config
761
+
736
762
  def _sanitize_chat_config(
737
763
  self,
738
764
  generate_config: Optional[Dict] = None,
@@ -755,14 +781,32 @@ class VLLMVisionModel(VLLMModel, ChatModelMixin):
755
781
  @vllm_check
756
782
  async def async_chat(
757
783
  self,
758
- messages: List[Dict],
784
+ messages: List[ChatCompletionMessage], # type: ignore
759
785
  generate_config: Optional[Dict] = None,
760
786
  request_id: Optional[str] = None,
761
787
  ) -> Union[ChatCompletion, AsyncGenerator[ChatCompletionChunk, None]]:
788
+ messages = self._transform_messages(messages)
789
+ tools = generate_config.pop("tools", []) if generate_config else None
790
+
762
791
  model_family = self.model_family.model_family or self.model_family.model_name
763
- prompt, images = self.get_specific_prompt(model_family, messages)
764
792
 
765
- if len(images) == 0:
793
+ if "internvl2" not in model_family.lower():
794
+ from qwen_vl_utils import process_vision_info
795
+
796
+ full_context_kwargs = {}
797
+ if tools and model_family in QWEN_TOOL_CALL_FAMILY:
798
+ full_context_kwargs["tools"] = tools
799
+ assert self.model_family.chat_template is not None
800
+ prompt = self.get_full_context(
801
+ messages, self.model_family.chat_template, **full_context_kwargs
802
+ )
803
+ images, video_inputs = process_vision_info(messages)
804
+ if video_inputs:
805
+ raise ValueError("Not support video input now.")
806
+ else:
807
+ prompt, images = self.get_specific_prompt(model_family, messages)
808
+
809
+ if not images:
766
810
  inputs = {
767
811
  "prompt": prompt,
768
812
  }
@@ -26,7 +26,6 @@ def vllm_check(fn):
26
26
 
27
27
  @functools.wraps(fn)
28
28
  async def _async_wrapper(self, *args, **kwargs):
29
- logger.info("vllm_check")
30
29
  try:
31
30
  return await fn(self, *args, **kwargs)
32
31
  except AsyncEngineDeadError:
xinference/model/utils.py CHANGED
@@ -23,12 +23,15 @@ import huggingface_hub
23
23
  import numpy as np
24
24
  import torch
25
25
 
26
- from ..constants import XINFERENCE_CACHE_DIR, XINFERENCE_ENV_MODEL_SRC
26
+ from ..constants import (
27
+ XINFERENCE_CACHE_DIR,
28
+ XINFERENCE_DOWNLOAD_MAX_ATTEMPTS,
29
+ XINFERENCE_ENV_MODEL_SRC,
30
+ )
27
31
  from ..device_utils import get_available_device, is_device_available
28
32
  from .core import CacheableModelSpec
29
33
 
30
34
  logger = logging.getLogger(__name__)
31
- MAX_ATTEMPTS = 3
32
35
  IS_NEW_HUGGINGFACE_HUB: bool = huggingface_hub.__version__ >= "0.23.0"
33
36
 
34
37
 
@@ -100,11 +103,11 @@ def retry_download(
100
103
  **kwargs,
101
104
  ):
102
105
  last_ex = None
103
- for current_attempt in range(1, MAX_ATTEMPTS + 1):
106
+ for current_attempt in range(1, XINFERENCE_DOWNLOAD_MAX_ATTEMPTS + 1):
104
107
  try:
105
108
  return download_func(*args, **kwargs)
106
109
  except Exception as e:
107
- remaining_attempts = MAX_ATTEMPTS - current_attempt
110
+ remaining_attempts = XINFERENCE_DOWNLOAD_MAX_ATTEMPTS - current_attempt
108
111
  last_ex = e
109
112
  logger.debug(
110
113
  "Download failed: %s, download func: %s, download args: %s, kwargs: %s",
@@ -21,8 +21,6 @@ from ..core import CacheableModelSpec, ModelDescription
21
21
  from ..utils import valid_model_revision
22
22
  from .diffusers import DiffUsersVideoModel
23
23
 
24
- MAX_ATTEMPTS = 3
25
-
26
24
  logger = logging.getLogger(__name__)
27
25
 
28
26
  MODEL_NAME_TO_REVISION: Dict[str, List[str]] = defaultdict(list)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: xinference
3
- Version: 0.15.3
3
+ Version: 0.15.4
4
4
  Summary: Model Serving Made Easy
5
5
  Home-page: https://github.com/xorbitsai/inference
6
6
  Author: Qin Xuye
@@ -71,7 +71,7 @@ Requires-Dist: WeTextProcessing<1.0.4; extra == "all"
71
71
  Requires-Dist: librosa; extra == "all"
72
72
  Requires-Dist: xxhash; extra == "all"
73
73
  Requires-Dist: torchaudio; extra == "all"
74
- Requires-Dist: ChatTTS>0.1; extra == "all"
74
+ Requires-Dist: ChatTTS<0.2,>0.1; extra == "all"
75
75
  Requires-Dist: lightning>=2.0.0; extra == "all"
76
76
  Requires-Dist: hydra-core>=1.3.2; extra == "all"
77
77
  Requires-Dist: inflect; extra == "all"
@@ -104,7 +104,7 @@ Requires-Dist: WeTextProcessing<1.0.4; extra == "audio"
104
104
  Requires-Dist: librosa; extra == "audio"
105
105
  Requires-Dist: xxhash; extra == "audio"
106
106
  Requires-Dist: torchaudio; extra == "audio"
107
- Requires-Dist: ChatTTS>0.1; extra == "audio"
107
+ Requires-Dist: ChatTTS<0.2,>0.1; extra == "audio"
108
108
  Requires-Dist: tiktoken; extra == "audio"
109
109
  Requires-Dist: torch>=2.0.0; extra == "audio"
110
110
  Requires-Dist: lightning>=2.0.0; extra == "audio"
@@ -1,15 +1,15 @@
1
1
  xinference/__init__.py,sha256=muQ9V9y11BcIqlZhhc06oDf193H7bwDIa8e_wSoDKI8,986
2
2
  xinference/_compat.py,sha256=xFztCfyrq3O_4bssL_ygghYkfxicv_ZhiX2YDDWHf-k,3571
3
- xinference/_version.py,sha256=RGjl0KY7iZC63yTyDPQaCtunSUGRo4ApaNUopMSdDP8,498
3
+ xinference/_version.py,sha256=rsb6h82zrpecXkwoQPZlv48UaKLjrNi7qg2qXrXoSZE,498
4
4
  xinference/conftest.py,sha256=56HYQjsAJcQrpZSmskniPqH9dLoW-i3Oud6NVTtc4io,9752
5
- xinference/constants.py,sha256=f8RxXrnnhoEYSwhiDSp8nKeUMF-KE4GyerMg-pa3Vv4,3582
5
+ xinference/constants.py,sha256=QsYxf86vqJVbn5L2SXH5hdyCdiZy7hRD-Qx5Sse4DqE,3758
6
6
  xinference/device_utils.py,sha256=zswJiws3VyTIaNO8z-MOcsJH_UiPoePPiKK5zoNrjTA,3285
7
7
  xinference/fields.py,sha256=0UtBFaDNzn1n9MRjyTkNrolsIML-TpZfudWOejqjni8,5245
8
8
  xinference/isolation.py,sha256=uhkzVyL3fSYZSuFexkG6Jm-tRTC5I607uNg000BXAnE,1949
9
9
  xinference/types.py,sha256=LHTbNLf0zI-FLruxRuBt2KMpk2P4eKpYdFvh2qzNTGI,12458
10
10
  xinference/utils.py,sha256=VSOJMFd9H7kce98OtJZbcDjjpfzRpHAFs8WU0xXPBM8,717
11
11
  xinference/api/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
12
- xinference/api/restful_api.py,sha256=5UR9RVbrxOJmBwwepxOrRmH49JGVzkp6Tku1os-zzLU,83540
12
+ xinference/api/restful_api.py,sha256=Gp_1fGYLximhr9yTqxvBv9O84HO47-tnTwA5h7o8Ff4,84506
13
13
  xinference/api/oauth2/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
14
14
  xinference/api/oauth2/auth_service.py,sha256=74JzB42fbbmBu4Q1dW3A9Fp_N7167KgRGB42Z0NHjAM,6119
15
15
  xinference/api/oauth2/types.py,sha256=K923sv_XySIUtM2Eozl9IG082IJcDOS5SFLrPZ5ELBg,996
@@ -18,20 +18,21 @@ xinference/client/__init__.py,sha256=Gc4HOzAy_1cic5kXlso7hahYgw89CKvZSJDicEU461k
18
18
  xinference/client/common.py,sha256=iciZRs5YjM2gYsXnwACPMaiBZp4_XpawWwfym0Iyu40,1617
19
19
  xinference/client/handlers.py,sha256=OKl_i5FA341wsQf_0onSOPbbW6V861WJrSP7ghtDc8c,527
20
20
  xinference/client/restful/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
21
- xinference/client/restful/restful_client.py,sha256=rz3d5n1sTluZG6zj0B8jsM40LLNtUAlmPzDdBDrDvFY,50780
21
+ xinference/client/restful/restful_client.py,sha256=eTZf9M0GG6ZaShWhpY7O-yG0BH3ceKZZ-d-DuVDg55g,51189
22
22
  xinference/core/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
23
23
  xinference/core/cache_tracker.py,sha256=3ubjYCU5aZToSp2GEuzedECVrg-PR4kThTefrFUkb9g,6971
24
24
  xinference/core/chat_interface.py,sha256=tM4hQPZ0UVcmE4_-auXWkq2z0rWmZBwwXNwbbl5zvGQ,20666
25
25
  xinference/core/event.py,sha256=42F38H2WOl6aPxp2oxX6WNxHRRxbnvYRmbt4Ar7NP4U,1640
26
- xinference/core/image_interface.py,sha256=-elEvAYVga8KXbl9uc1A8oV0YWK0QbKDu5RPofkxxXs,11837
26
+ xinference/core/image_interface.py,sha256=WsJjrcJG3itQJb-qiuZKWtK4_XLPPnxkEy9VCyZcQmw,13636
27
27
  xinference/core/metrics.py,sha256=ScmTG15Uq3h_ob72ybZSMWdnk8P4sUZFcm60f4ikSXc,2631
28
- xinference/core/model.py,sha256=_X0aPIcTtdy886tMxuoto_nwXqCrm3S8IMalv3Kq5QU,30354
28
+ xinference/core/model.py,sha256=bwmiqRctnXbJlsHhS3O3JA5G0xRNNRd_HqlAGyRrzVo,32086
29
+ xinference/core/progress_tracker.py,sha256=LIF6CLIlnEoSBkuDCraJktDOzZ31mQ4HOo6EVr3KpQM,6453
29
30
  xinference/core/resource.py,sha256=FQ0aRt3T4ZQo0P6CZZf5QUKHiCsr5llBvKb1f7wfnxg,1611
30
31
  xinference/core/scheduler.py,sha256=qONNFqAlnYDcmmzPO5jfU-r0aZ1Lhhpn1oSaA5CAGTE,15485
31
32
  xinference/core/status_guard.py,sha256=4an1KjUOhCStgRQUw1VSzXcycXUtvhxwiMREKKcl1UI,2828
32
- xinference/core/supervisor.py,sha256=bNMyGM-cqHwSqhYxHlR6oePEKqt9D4tcrBFMAb6-oV0,52510
33
+ xinference/core/supervisor.py,sha256=Wkjhk1tfRuhyQmcVNrHZApWO09MDA5-Uu4u2p1GBj3I,52964
33
34
  xinference/core/utils.py,sha256=p3ptQMdzKu9WxdUJ2EdDTXvPDl53BGwiNuVWuhaE4EU,8536
34
- xinference/core/worker.py,sha256=IvcagHkXpMKjTvZl9svXko5hRuKN3czhbi5phGv-6No,46264
35
+ xinference/core/worker.py,sha256=QhxVhpeKl-QYKA_77kUXTj5-rhodHAXlOhgtvqZiiRI,46329
35
36
  xinference/deploy/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
36
37
  xinference/deploy/cmdline.py,sha256=YNXbPIT9zJIp5EQzl_rH5KwDDYqBd2CbaOVF8hA0lws,48120
37
38
  xinference/deploy/local.py,sha256=gcH6WfTxfhjvNkxxKZH3tcGtXV48BEPoaLWYztZHaeo,3954
@@ -42,16 +43,16 @@ xinference/deploy/test/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQ
42
43
  xinference/deploy/test/test_cmdline.py,sha256=m8xDzjtDuAJy0QkvYVJIZDuTB29cmYBV0d231JyRCPU,7714
43
44
  xinference/model/__init__.py,sha256=J7cdxnDxbr-4c8JREXWEIZ8MkB_VokXVaEOsI7ycjho,1101
44
45
  xinference/model/core.py,sha256=WQakLJgxrJMbTGn9AVaw-Tas7QL5M8cJsuYpGgH-io8,4706
45
- xinference/model/utils.py,sha256=j4qdq_R8dwDqwD9YLQkbFSHTmXKxWdZbr5MSvoL8aAw,11122
46
+ xinference/model/utils.py,sha256=-axJ9I1IZ0li5Y2qzfFrsz8nMOGnoXXhmULu_e4aIKc,11196
46
47
  xinference/model/audio/__init__.py,sha256=G4n-MyzdarFVOndPRkEyZZrCwqFIG8yIsky6_5dife0,3433
47
- xinference/model/audio/chattts.py,sha256=rMH6-9M8boZdpUSgxaAge-LraE79nRs6mVc0nPLHd5A,4585
48
- xinference/model/audio/core.py,sha256=2QexrIh3hDoaNeWh5rOMas9q4zDCZTaazsLRdQ7D5Zw,6512
48
+ xinference/model/audio/chattts.py,sha256=EXAfNATwblcilMjU3ff1dzTDJkjwQMXBo6zZ_517Jvo,4659
49
+ xinference/model/audio/core.py,sha256=8rIyw0PLW2Py2-V7xsij4uQGGY39D_uq6uICuICobO8,6494
49
50
  xinference/model/audio/cosyvoice.py,sha256=Enur1Y4Xa-mpr7wwnoXWwhyh7PUAjrHZ8DV91tTrpjE,6426
50
51
  xinference/model/audio/custom.py,sha256=8GXBRmTtR-GY03-E91nlRGTIuabCRzlt20ecU6Un6Y8,4985
51
52
  xinference/model/audio/fish_speech.py,sha256=v2WVEV-BLWnbiDvqrx8WTGE_YNKmd9QoAF1LZBXWxn0,7310
52
53
  xinference/model/audio/funasr.py,sha256=65z7U7_F14CCP-jg6BpeY3_49FK7Y5OCRSzrhhsklCg,4075
53
- xinference/model/audio/model_spec.json,sha256=Ixo-15HVY2vu3_J5lElLL6texoJ41YwH-TBDB139NP8,4858
54
- xinference/model/audio/model_spec_modelscope.json,sha256=club_Pb1BdFPu5EOR5oVktsi2SiSrKYc7lHKsERjpds,1765
54
+ xinference/model/audio/model_spec.json,sha256=rBfDYgiZNI0d1t01emx_UosEqap4JxD1OUJoocmlEMI,5120
55
+ xinference/model/audio/model_spec_modelscope.json,sha256=Ul7_zy49N5zvio8-1WZEBFzZXS5_ueT49frWepMF8KY,2031
55
56
  xinference/model/audio/utils.py,sha256=pwo5cHh8nvhyBa9f-17QaVpXMSjmbpGbPYKwBBtEhGM,717
56
57
  xinference/model/audio/whisper.py,sha256=PQL7rebGC7WlIOItuDtjdEtSJtlhxFkolot-Fj-8uDU,7982
57
58
  xinference/model/embedding/__init__.py,sha256=1GmvQsbeeVUT-VRaRGetf8UT4RQgLWIzfp5kfX5jw-k,3567
@@ -67,22 +68,22 @@ xinference/model/flexible/launchers/__init__.py,sha256=X8w_2hKuQ9H3f90XYK7H_AQU4
67
68
  xinference/model/flexible/launchers/image_process_launcher.py,sha256=APbbHls0N9DpLFL6_qTexuc5o6bQAvdgJEAZWU4clyw,2510
68
69
  xinference/model/flexible/launchers/transformers_launcher.py,sha256=OZeeogDfopRUGhulP4PRJ4fZEJ2D9cfv7lcC2qJBoDE,2012
69
70
  xinference/model/image/__init__.py,sha256=80HBIbKh6lh-BgNaTo6k0TxxKjdG30bwHAdCiwVk6wk,3198
70
- xinference/model/image/core.py,sha256=ir1ns0qlUIlKnd0JS2cAJUppeEeczWYOnf6ecUCaLhM,8907
71
+ xinference/model/image/core.py,sha256=qdqFMpPa2OSi0d5a4_iASEjL8s5vxxl1IRTJjmxfwO0,8959
71
72
  xinference/model/image/custom.py,sha256=5gjujQpJVTJ-pVB5LzBo4-bWKKOHzFlRaoRKJ_CuIUg,3769
72
73
  xinference/model/image/model_spec.json,sha256=JyXU-v4ysRT4yqwkmXgISY3uVWjeSiBLyH8fS7XO1_g,5368
73
74
  xinference/model/image/model_spec_modelscope.json,sha256=r3_m9XZo1QZgmASg5navOPs0ivlft5wVPF1SpbAVNBg,4266
74
- xinference/model/image/sdapi.py,sha256=XhSIfEQY8giC0KC04CoMBJea9dZSFO4Ci8fQlAlxk54,4685
75
+ xinference/model/image/sdapi.py,sha256=Xgdtnvw4Xwj1Nc0cBoDo_ogH6E2mFJqLvX0jSxxgdnA,5936
75
76
  xinference/model/image/utils.py,sha256=gxg8jJ2nYaDknzCcSC53WCy1slbB5aWU14AbJbfm6Z4,906
76
77
  xinference/model/image/stable_diffusion/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
77
- xinference/model/image/stable_diffusion/core.py,sha256=jObFn-DmB210OrzDj38H5GtC7yLXY8tJ37lyUlRELLo,19316
78
+ xinference/model/image/stable_diffusion/core.py,sha256=NeCfkKjRdebgOOGMmg8DGVPR29D9vgX3V72qETgwPp0,24230
78
79
  xinference/model/llm/__init__.py,sha256=elINGzzDXmSissG32UMG7BfMqNgXwSM7USBaTorGWSA,12428
79
80
  xinference/model/llm/core.py,sha256=f4nKVPTAseivij6mbL2yXEKxrzllKm-_i2ttSqckTCg,8157
80
- xinference/model/llm/llm_family.json,sha256=qGr8F8T4deKsDGHN9LQxz7HM4CliPqsvh9guJf3yY2M,284592
81
+ xinference/model/llm/llm_family.json,sha256=9peVrsgESrC-HPsIXS7wDcTsz_oNcGwrsw-Jh_yDSLU,285589
81
82
  xinference/model/llm/llm_family.py,sha256=eqeaHwLeS2TDB_ATf_h6YkH6OiyyF_4cSF_bOq3pTws,37432
82
83
  xinference/model/llm/llm_family_csghub.json,sha256=zMKWbihsxQNVB1u5iKJbZUkbOfQ4IPNq1KQ-8IDPQQA,8759
83
- xinference/model/llm/llm_family_modelscope.json,sha256=EanDUOC0GNWNDGOT0TQtk9TPjalpAfsUkNShM4qjZLs,214921
84
+ xinference/model/llm/llm_family_modelscope.json,sha256=IcnRScrHkM4b6-rrpto4hCZ541rkUpdkaUa8wgSw8No,215985
84
85
  xinference/model/llm/memory.py,sha256=NEIMw6wWaF9S_bnBYq-EyuDhVbUEEeceQhwE1iwsrhI,10207
85
- xinference/model/llm/utils.py,sha256=boK0xMGbWFRX5qUQqPm1z1IfTZgBvFKOnWnqC-gcw7c,21909
86
+ xinference/model/llm/utils.py,sha256=_FHJHZ9d1tYj4NwiG4TYftEp9L5vah6slUkqHKnn21U,23543
86
87
  xinference/model/llm/llama_cpp/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
87
88
  xinference/model/llm/llama_cpp/core.py,sha256=vjuTapwbn-ZjUX-8WA0nFyicE4UGUSehU_csSetvcZw,10928
88
89
  xinference/model/llm/lmdeploy/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -93,27 +94,27 @@ xinference/model/llm/sglang/__init__.py,sha256=-sjSIQ4K6w-TEzx49kVaWeWC443fnZqOD
93
94
  xinference/model/llm/sglang/core.py,sha256=ft4QlDw36gwoic8lyjtSx2ai6KTW84CPVbYr8grqGMI,16698
94
95
  xinference/model/llm/transformers/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
95
96
  xinference/model/llm/transformers/chatglm.py,sha256=pCJgoc0Ikny5BL85iHXl9M6zKgIzAHBsd81NAZ31yRI,17834
96
- xinference/model/llm/transformers/cogvlm2.py,sha256=wqWM6AhVQssgkUOcVX2N7RU7kjnAu55r6ZlUjh9iJro,15942
97
- xinference/model/llm/transformers/cogvlm2_video.py,sha256=dupPGQur8xGie5roA1ibpTIIZNoO-KMynvr7303pyl0,11809
97
+ xinference/model/llm/transformers/cogvlm2.py,sha256=I5Ftm0VYjbTAv5ZARZCo32Ggpw58PJfHs5B_nX_BIlU,15972
98
+ xinference/model/llm/transformers/cogvlm2_video.py,sha256=ZGkpC4x2uEtjwoMrLSODmAUYTjOeSNYxZi9VpQrpnhU,11857
98
99
  xinference/model/llm/transformers/compression.py,sha256=U0vMJ-JaBt4oC2LffgWg6HbPj1CeUi_YdwVbjDd0mRA,8112
99
- xinference/model/llm/transformers/core.py,sha256=-YmBzM5WbK-B3YIv7rWPFewDEx5xFDWyPO8YqFr8Sv8,31012
100
+ xinference/model/llm/transformers/core.py,sha256=fyM7WXsVTEs38LAUPe-CJf45czInrNf1ifsWWexZ5nM,31128
100
101
  xinference/model/llm/transformers/deepseek_v2.py,sha256=HSddUBm5sKpHTNtPbTao9r3Yif-_xRAJrAtfCyELnhw,12975
101
- xinference/model/llm/transformers/deepseek_vl.py,sha256=eb-UH6g1Vr-jaZnGSkn_Ud4WYBzDuYR24CV_tAvo9iM,10397
102
- xinference/model/llm/transformers/glm4v.py,sha256=Uz7y2A_cSDeVVfe4D31h9LGqtUJ51plckt6jmnm_z7c,13841
103
- xinference/model/llm/transformers/intern_vl.py,sha256=3K0_2ng4zBgsnobzV7AfDEq7NzZu676JfNM54oE3AXQ,18222
102
+ xinference/model/llm/transformers/deepseek_vl.py,sha256=pB6i6DW5oyfHdqTgKpi2DkIKVGlPLGIDR_Op0sB1uKA,10445
103
+ xinference/model/llm/transformers/glm4v.py,sha256=goph2HhpV8gUm2t8-T1P-jTF2r_kPeH6QNe64lmlm0g,13871
104
+ xinference/model/llm/transformers/intern_vl.py,sha256=0pbze1eo3HvNQ0nW-mVJcJuJ4GrEyBBqQAYIdXnAn6c,18270
104
105
  xinference/model/llm/transformers/internlm2.py,sha256=nRrmbH9bJl_wLcCH4zSy0EeKeP_ht-b8bVvbG2pMgV0,7228
105
- xinference/model/llm/transformers/minicpmv25.py,sha256=8fcmQo5VAst0vniV2-N6109Nq0sA56O2vWVxMvdZWxo,6766
106
- xinference/model/llm/transformers/minicpmv26.py,sha256=QRO5gnxuFMiJDNZ-v3os1A_4bn4fzrDvYjAOhdPQ4Lw,13392
107
- xinference/model/llm/transformers/omnilmm.py,sha256=MBsh-qaDnjtrtTRrAR7ArgHyupfpowwntuTuOj7xGkA,5124
108
- xinference/model/llm/transformers/qwen2_audio.py,sha256=tkLL523jdn1rVDfHV9RfLldDIISuMiukTJYt-h-dJ4o,5987
109
- xinference/model/llm/transformers/qwen2_vl.py,sha256=IMEdVJiHm3JccZg-vpSKCIElv8XtMrUPD3wT6yHel0A,8419
110
- xinference/model/llm/transformers/qwen_vl.py,sha256=JfMuiEqYuRIlDv5cIiRbLCd4DJQRgwCFoxc0JTJTGgs,14028
106
+ xinference/model/llm/transformers/minicpmv25.py,sha256=mr80-OlSlK_opSuAO3cz_QlkqujLr6V-OsTP0ebwpE8,6814
107
+ xinference/model/llm/transformers/minicpmv26.py,sha256=_e2C4vmyKIzKt7S7AvKgiqhDOhGiBXa6Xoiix4UaYtI,13440
108
+ xinference/model/llm/transformers/omnilmm.py,sha256=2ZLW979ETqDDKo9CaTNwi9uLBZ2d6itHAYqjUA4jdro,5172
109
+ xinference/model/llm/transformers/qwen2_audio.py,sha256=1XmlawVF-Xh2pgGoLDX7kOYIiF_bDUR3doSOnM59QbQ,6107
110
+ xinference/model/llm/transformers/qwen2_vl.py,sha256=i8mypQwaPaaGQ0OIS55H8yuUX6gH87ubPuPQHHAD9fw,7304
111
+ xinference/model/llm/transformers/qwen_vl.py,sha256=LG19qJW30bFiZOS-t9OM3JP6K1KCLj_Sv3nKSCLvyts,14058
111
112
  xinference/model/llm/transformers/tensorizer_utils.py,sha256=VXSYbPZtCbd8lVvsnjDLPZjfCMil67Pkywd_Ze4dTx4,11362
112
- xinference/model/llm/transformers/utils.py,sha256=qob4wDMN98LKzYdDcQe8rFVA5_mX4i5XeVgm3HSq9iI,28505
113
- xinference/model/llm/transformers/yi_vl.py,sha256=w4EpUHpmT9P1u5yEv1Pm3Ico92nqZZv3fO4NEKXteK4,8913
113
+ xinference/model/llm/transformers/utils.py,sha256=kTaNK65igHoWRUe00FD-Bs7nBv_OYre0KXjbmstlleU,29228
114
+ xinference/model/llm/transformers/yi_vl.py,sha256=iCdRLw-wizbU-qXXc8CT4DhC0Pt-uYg0vFwXEhAZjQg,8961
114
115
  xinference/model/llm/vllm/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
115
- xinference/model/llm/vllm/core.py,sha256=L9jAZ2mb3vq5f9ZkrQz9k2oc8mBb6kdIUmHYwofO2d4,28936
116
- xinference/model/llm/vllm/utils.py,sha256=JyztCDV7FT39QVraacg6T-JpmaSyfNwp9StRUBbvKJw,1347
116
+ xinference/model/llm/vllm/core.py,sha256=FhwRaRY29imMS4Aldda7qBQg4tCtUqG8adr1zfUF7jw,30729
117
+ xinference/model/llm/vllm/utils.py,sha256=LKOmwfFRrlSecawxT-uE39tC2RQbf1UIiSH9Uz90X6w,1313
117
118
  xinference/model/rerank/__init__.py,sha256=wRpf1bOMfmAsuEKEGczMTB5fWEvuqltlJbIbRb-x8Ko,3483
118
119
  xinference/model/rerank/core.py,sha256=ZOlbtK1x8CgcAAg0Y-5AF9ItYbhxuGtf7C_Sf0D9Kww,14122
119
120
  xinference/model/rerank/custom.py,sha256=wPKF3bHbGap9dHz9yYvXMXhozh4hRzS78RQijqvaRq8,3846
@@ -121,7 +122,7 @@ xinference/model/rerank/model_spec.json,sha256=xUuJgJ8Ad4l2v8gEHxAdF_xoaSkA8j8AX
121
122
  xinference/model/rerank/model_spec_modelscope.json,sha256=pf5hX4g0HdVjk2-ibHTl_mXHgQSSPYMTBOIwvnwMMkk,1616
122
123
  xinference/model/rerank/utils.py,sha256=MJAFL47G3r3zLVGXKoi0QLTgU3Xr4Ffv72Ipn--psew,713
123
124
  xinference/model/video/__init__.py,sha256=mRhOhzMxzcPFdA5j4niAxH_j9dXLtT9HmchuICrdET8,2160
124
- xinference/model/video/core.py,sha256=PMqyWhhBWO77VjpEvTC7EQrGmyLWxJ_-Mm1VRqb2dNY,6031
125
+ xinference/model/video/core.py,sha256=QEdVbVBDQebSWxqkL483Q2Y9Y1GGc2an0gi2QBPUH9I,6013
125
126
  xinference/model/video/diffusers.py,sha256=kSEBRf0vtWyo0IrwoiEpr_ROu7SwDAVBZ4leqkcPycM,6244
126
127
  xinference/model/video/model_spec.json,sha256=yQcLSU3vRJys-ACdHGtTNdz2pX1O9QDQ5rGHQd9LdFY,817
127
128
  xinference/model/video/model_spec_modelscope.json,sha256=U8p6IqNLbY5Safxwpa6dCfnGbyvOC4FtYIf2ucr8TvM,815
@@ -15507,9 +15508,9 @@ xinference/web/ui/node_modules/yargs-parser/package.json,sha256=BSwbOzgetKXMK4u0
15507
15508
  xinference/web/ui/node_modules/yocto-queue/package.json,sha256=6U1XHQPGXJTqsiFvT953ORihUtXTblZy4fXBWP9qxC0,725
15508
15509
  xinference/web/ui/node_modules/yup/package.json,sha256=xRFSROB9NKxqSWHEVFvSTsPs9Ll074uo8OS1zEw0qhA,1206
15509
15510
  xinference/web/ui/node_modules/yup/node_modules/type-fest/package.json,sha256=JTv2zTTVgxQ2H82m1-6qEpdMv08lHjFx4Puf_MsbB_Q,1134
15510
- xinference-0.15.3.dist-info/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
15511
- xinference-0.15.3.dist-info/METADATA,sha256=NSmJnHU0w8BD5kyJ0JusOYOdX0sfwerPDyb1PPutadc,19126
15512
- xinference-0.15.3.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
15513
- xinference-0.15.3.dist-info/entry_points.txt,sha256=-lDyyzqWMFQF0Rgm7VxBNz0V-bMBMQLRR3pvQ-Y8XTY,226
15514
- xinference-0.15.3.dist-info/top_level.txt,sha256=L1rQt7pl6m8tmKXpWVHzP-GtmzAxp663rXxGE7qnK00,11
15515
- xinference-0.15.3.dist-info/RECORD,,
15511
+ xinference-0.15.4.dist-info/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
15512
+ xinference-0.15.4.dist-info/METADATA,sha256=PANLzbQqqFtpqMFBVCP9JcY30fhoC63e5dG3Y5hbbr4,19136
15513
+ xinference-0.15.4.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
15514
+ xinference-0.15.4.dist-info/entry_points.txt,sha256=-lDyyzqWMFQF0Rgm7VxBNz0V-bMBMQLRR3pvQ-Y8XTY,226
15515
+ xinference-0.15.4.dist-info/top_level.txt,sha256=L1rQt7pl6m8tmKXpWVHzP-GtmzAxp663rXxGE7qnK00,11
15516
+ xinference-0.15.4.dist-info/RECORD,,