xinference 0.15.2__py3-none-any.whl → 0.15.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (57) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/restful_api.py +29 -2
  3. xinference/client/restful/restful_client.py +10 -0
  4. xinference/constants.py +4 -0
  5. xinference/core/image_interface.py +76 -23
  6. xinference/core/model.py +80 -39
  7. xinference/core/progress_tracker.py +187 -0
  8. xinference/core/supervisor.py +11 -0
  9. xinference/core/worker.py +1 -0
  10. xinference/model/audio/chattts.py +2 -1
  11. xinference/model/audio/core.py +0 -2
  12. xinference/model/audio/model_spec.json +8 -0
  13. xinference/model/audio/model_spec_modelscope.json +9 -0
  14. xinference/model/embedding/core.py +14 -5
  15. xinference/model/embedding/model_spec.json +7 -0
  16. xinference/model/embedding/model_spec_modelscope.json +9 -1
  17. xinference/model/image/core.py +6 -7
  18. xinference/model/image/sdapi.py +35 -4
  19. xinference/model/image/stable_diffusion/core.py +212 -70
  20. xinference/model/llm/llm_family.json +28 -40
  21. xinference/model/llm/llm_family_modelscope.json +18 -22
  22. xinference/model/llm/transformers/cogvlm2.py +2 -1
  23. xinference/model/llm/transformers/cogvlm2_video.py +2 -0
  24. xinference/model/llm/transformers/core.py +6 -2
  25. xinference/model/llm/transformers/deepseek_vl.py +2 -0
  26. xinference/model/llm/transformers/glm4v.py +2 -1
  27. xinference/model/llm/transformers/intern_vl.py +2 -0
  28. xinference/model/llm/transformers/minicpmv25.py +2 -0
  29. xinference/model/llm/transformers/minicpmv26.py +2 -0
  30. xinference/model/llm/transformers/omnilmm.py +2 -0
  31. xinference/model/llm/transformers/qwen2_audio.py +11 -4
  32. xinference/model/llm/transformers/qwen2_vl.py +2 -28
  33. xinference/model/llm/transformers/qwen_vl.py +2 -1
  34. xinference/model/llm/transformers/utils.py +35 -2
  35. xinference/model/llm/transformers/yi_vl.py +2 -0
  36. xinference/model/llm/utils.py +72 -17
  37. xinference/model/llm/vllm/core.py +69 -9
  38. xinference/model/llm/vllm/utils.py +41 -0
  39. xinference/model/rerank/core.py +19 -0
  40. xinference/model/rerank/model_spec.json +8 -0
  41. xinference/model/rerank/model_spec_modelscope.json +8 -0
  42. xinference/model/utils.py +7 -29
  43. xinference/model/video/core.py +0 -2
  44. xinference/web/ui/build/asset-manifest.json +3 -3
  45. xinference/web/ui/build/index.html +1 -1
  46. xinference/web/ui/build/static/js/{main.29578905.js → main.e51a356d.js} +3 -3
  47. xinference/web/ui/build/static/js/main.e51a356d.js.map +1 -0
  48. xinference/web/ui/node_modules/.cache/babel-loader/4385c1095eefbff0a8ec3b2964ba6e5a66a05ab31be721483ca2f43e2a91f6ff.json +1 -0
  49. {xinference-0.15.2.dist-info → xinference-0.15.4.dist-info}/METADATA +6 -5
  50. {xinference-0.15.2.dist-info → xinference-0.15.4.dist-info}/RECORD +55 -53
  51. xinference/web/ui/build/static/js/main.29578905.js.map +0 -1
  52. xinference/web/ui/node_modules/.cache/babel-loader/68bede6d95bb5ef0b35bbb3ec5b8c937eaf6862c6cdbddb5ef222a7776aaf336.json +0 -1
  53. /xinference/web/ui/build/static/js/{main.29578905.js.LICENSE.txt → main.e51a356d.js.LICENSE.txt} +0 -0
  54. {xinference-0.15.2.dist-info → xinference-0.15.4.dist-info}/LICENSE +0 -0
  55. {xinference-0.15.2.dist-info → xinference-0.15.4.dist-info}/WHEEL +0 -0
  56. {xinference-0.15.2.dist-info → xinference-0.15.4.dist-info}/entry_points.txt +0 -0
  57. {xinference-0.15.2.dist-info → xinference-0.15.4.dist-info}/top_level.txt +0 -0
@@ -29,6 +29,7 @@ from ..utils import (
29
29
  parse_messages,
30
30
  )
31
31
  from .core import PytorchChatModel, PytorchGenerateConfig
32
+ from .utils import cache_clean
32
33
 
33
34
  logger = logging.getLogger(__name__)
34
35
 
@@ -99,6 +100,7 @@ class YiVLChatModel(PytorchChatModel):
99
100
  raise RuntimeError("Only one image per message is supported by Yi VL.")
100
101
  return content
101
102
 
103
+ @cache_clean
102
104
  def chat(
103
105
  self,
104
106
  messages: List[Dict],
@@ -29,6 +29,7 @@ from ...types import (
29
29
  ChatCompletion,
30
30
  ChatCompletionChoice,
31
31
  ChatCompletionChunk,
32
+ ChatCompletionMessage,
32
33
  Completion,
33
34
  CompletionChoice,
34
35
  CompletionChunk,
@@ -50,6 +51,7 @@ QWEN_TOOL_CALL_FAMILY = [
50
51
  "qwen1.5-moe-chat",
51
52
  "qwen2-instruct",
52
53
  "qwen2-moe-instruct",
54
+ "qwen2.5-instruct",
53
55
  ]
54
56
 
55
57
  GLM4_TOOL_CALL_FAMILY = [
@@ -57,6 +59,10 @@ GLM4_TOOL_CALL_FAMILY = [
57
59
  "glm4-chat-1m",
58
60
  ]
59
61
 
62
+ LLAMA3_TOOL_CALL_FAMILY = [
63
+ "llama-3.1-instruct",
64
+ ]
65
+
60
66
  QWEN_TOOL_CALL_SYMBOLS = ["<tool_call>", "</tool_call>"]
61
67
 
62
68
 
@@ -113,7 +119,7 @@ class ChatModelMixin:
113
119
  return self._build_from_raw_template(messages, chat_template, **kwargs)
114
120
 
115
121
  @staticmethod
116
- def get_specific_prompt(model_family: str, messages: List[Dict]):
122
+ def get_specific_prompt(model_family: str, messages: List[ChatCompletionMessage]):
117
123
  """
118
124
  Inspired by FastChat. Format chat history into a prompt according to the prompty style of
119
125
  different models.
@@ -129,7 +135,7 @@ class ChatModelMixin:
129
135
  ret = (
130
136
  "<s>"
131
137
  if system_prompt == ""
132
- else "<s><|im_start|>system\n"
138
+ else "<s><|im_start|>system\n" # type: ignore
133
139
  + system_prompt
134
140
  + intra_message_sep
135
141
  + "\n"
@@ -159,14 +165,25 @@ class ChatModelMixin:
159
165
  for image_url in image_urls:
160
166
  fut = executor.submit(_decode_image, image_url)
161
167
  image_futures.append(fut)
162
- images = [fut.result() for fut in image_futures]
168
+ images.extend([fut.result() for fut in image_futures])
163
169
  if len(image_futures) == 0:
164
170
  ret += role + "\n" + text + intra_message_sep + "\n"
165
171
  else:
172
+ placeholders = "\n".join(
173
+ f"Image-{i+1}: <image>\n"
174
+ for i in range(
175
+ len(images) - len(image_futures), len(images)
176
+ )
177
+ )
166
178
  ret += (
167
- role + "\n" + f"<image>\n{text}" + intra_message_sep + "\n"
179
+ role
180
+ + "\n"
181
+ + f"{placeholders}\n{text}"
182
+ + intra_message_sep
183
+ + "\n"
168
184
  )
169
-
185
+ if len(images) == 1:
186
+ ret = ret.replace("Image-1: <image>\n", "<image>\n")
170
187
  return ret, images
171
188
  else:
172
189
  raise ValueError(f"Invalid model family: {model_family}")
@@ -322,8 +339,9 @@ class ChatModelMixin:
322
339
  for content in contents:
323
340
  content = content.strip()
324
341
  if content:
325
- if content.startswith(QWEN_TOOL_CALL_SYMBOLS[0]):
326
- content = content[len(QWEN_TOOL_CALL_SYMBOLS[0]) :]
342
+ pos = content.find(QWEN_TOOL_CALL_SYMBOLS[0])
343
+ if pos != -1:
344
+ content = content[pos + len(QWEN_TOOL_CALL_SYMBOLS[0]) :]
327
345
  content = content.strip()
328
346
  try:
329
347
  res = json.loads(content)
@@ -342,6 +360,15 @@ class ChatModelMixin:
342
360
  text = c["choices"][0]["text"]
343
361
  return cls._handle_qwen_tool_result(text)
344
362
 
363
+ @classmethod
364
+ def _eval_llama3_chat_arguments(cls, c) -> List[Tuple]:
365
+ text = c["choices"][0]["text"]
366
+ try:
367
+ data = eval(text, {}, {})
368
+ return [(None, data["name"], data["parameters"])]
369
+ except Exception:
370
+ return [(text, None, None)]
371
+
345
372
  @classmethod
346
373
  def _eval_tool_arguments(cls, model_family, c):
347
374
  family = model_family.model_family or model_family.model_name
@@ -349,6 +376,8 @@ class ChatModelMixin:
349
376
  result = cls._eval_glm_chat_arguments(c)
350
377
  elif family in QWEN_TOOL_CALL_FAMILY:
351
378
  result = cls._eval_qwen_chat_arguments(c)
379
+ elif family in LLAMA3_TOOL_CALL_FAMILY:
380
+ result = cls._eval_llama3_chat_arguments(c)
352
381
  else:
353
382
  raise Exception(
354
383
  f"Model {model_family.model_name} is not support tool calls."
@@ -365,16 +394,14 @@ class ChatModelMixin:
365
394
  for content, func, args in tool_result:
366
395
  if func:
367
396
  tool_calls.append(
368
- [
369
- {
370
- "id": f"call_{_id}",
371
- "type": "function",
372
- "function": {
373
- "name": func,
374
- "arguments": json.dumps(args, ensure_ascii=False),
375
- },
376
- }
377
- ]
397
+ {
398
+ "id": f"call_{_id}",
399
+ "type": "function",
400
+ "function": {
401
+ "name": func,
402
+ "arguments": json.dumps(args, ensure_ascii=False),
403
+ },
404
+ }
378
405
  )
379
406
  else:
380
407
  failed_contents.append(content)
@@ -460,6 +487,34 @@ class ChatModelMixin:
460
487
  "usage": usage,
461
488
  }
462
489
 
490
+ def _transform_messages(
491
+ self,
492
+ messages: List[ChatCompletionMessage],
493
+ ):
494
+ transformed_messages = []
495
+ for msg in messages:
496
+ new_content = []
497
+ role = msg["role"]
498
+ content = msg["content"]
499
+ if isinstance(content, str):
500
+ new_content.append({"type": "text", "text": content})
501
+ elif isinstance(content, List):
502
+ for item in content: # type: ignore
503
+ if "text" in item:
504
+ new_content.append({"type": "text", "text": item["text"]})
505
+ elif "image_url" in item:
506
+ new_content.append(
507
+ {"type": "image", "image": item["image_url"]["url"]}
508
+ )
509
+ elif "video_url" in item:
510
+ new_content.append(
511
+ {"type": "video", "video": item["video_url"]["url"]}
512
+ )
513
+ new_message = {"role": role, "content": new_content}
514
+ transformed_messages.append(new_message)
515
+
516
+ return transformed_messages
517
+
463
518
 
464
519
  def get_file_location(
465
520
  llm_family: LLMFamilyV1, spec: LLMSpecV1, quantization: str
@@ -13,6 +13,7 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import asyncio
16
+ import json
16
17
  import logging
17
18
  import multiprocessing
18
19
  import os
@@ -33,6 +34,7 @@ from typing import (
33
34
  from ....types import (
34
35
  ChatCompletion,
35
36
  ChatCompletionChunk,
37
+ ChatCompletionMessage,
36
38
  Completion,
37
39
  CompletionChoice,
38
40
  CompletionChunk,
@@ -47,6 +49,7 @@ from ..utils import (
47
49
  ChatModelMixin,
48
50
  generate_completion_chunk,
49
51
  )
52
+ from .utils import vllm_check
50
53
 
51
54
  logger = logging.getLogger(__name__)
52
55
 
@@ -65,6 +68,7 @@ class VLLMModelConfig(TypedDict, total=False):
65
68
  max_num_seqs: int
66
69
  quantization: Optional[str]
67
70
  max_model_len: Optional[int]
71
+ limit_mm_per_prompt: Optional[Dict[str, int]]
68
72
 
69
73
 
70
74
  class VLLMGenerateConfig(TypedDict, total=False):
@@ -90,9 +94,7 @@ try:
90
94
  except ImportError:
91
95
  VLLM_INSTALLED = False
92
96
 
93
- VLLM_SUPPORTED_VISION_MODEL_LIST: List[str] = [
94
- "internvl2",
95
- ]
97
+ VLLM_SUPPORTED_VISION_MODEL_LIST: List[str] = []
96
98
  VLLM_SUPPORTED_MODELS = [
97
99
  "llama-2",
98
100
  "llama-3",
@@ -171,6 +173,12 @@ if VLLM_INSTALLED and vllm.__version__ > "0.5.3":
171
173
  VLLM_SUPPORTED_MODELS.append("llama-3.1")
172
174
  VLLM_SUPPORTED_CHAT_MODELS.append("llama-3.1-instruct")
173
175
 
176
+ if VLLM_INSTALLED and vllm.__version__ >= "0.6.1":
177
+ VLLM_SUPPORTED_VISION_MODEL_LIST.append("internvl2")
178
+
179
+ if VLLM_INSTALLED and vllm.__version__ >= "0.6.3":
180
+ VLLM_SUPPORTED_VISION_MODEL_LIST.append("qwen2-vl-instruct")
181
+
174
182
 
175
183
  class VLLMModel(LLM):
176
184
  def __init__(
@@ -304,7 +312,7 @@ class VLLMModel(LLM):
304
312
  model_config.setdefault("gpu_memory_utilization", 0.90)
305
313
  model_config.setdefault("max_num_seqs", 256)
306
314
  model_config.setdefault("quantization", None)
307
- model_config.setdefault("max_model_len", 4096)
315
+ model_config.setdefault("max_model_len", None)
308
316
 
309
317
  return model_config
310
318
 
@@ -434,6 +442,7 @@ class VLLMModel(LLM):
434
442
  usage=usage,
435
443
  )
436
444
 
445
+ @vllm_check
437
446
  async def async_generate(
438
447
  self,
439
448
  prompt: Union[str, Dict[str, Any]],
@@ -665,6 +674,7 @@ class VLLMChatModel(VLLMModel, ChatModelMixin):
665
674
  yield self._to_chat_completion_chunk(chunk)
666
675
  i += 1
667
676
 
677
+ @vllm_check
668
678
  async def async_chat(
669
679
  self,
670
680
  messages: List[Dict],
@@ -722,6 +732,33 @@ class VLLMVisionModel(VLLMModel, ChatModelMixin):
722
732
  return False
723
733
  return VLLM_INSTALLED
724
734
 
735
+ def _sanitize_model_config(
736
+ self, model_config: Optional[VLLMModelConfig]
737
+ ) -> VLLMModelConfig:
738
+ if model_config is None:
739
+ model_config = VLLMModelConfig()
740
+
741
+ cuda_count = self._get_cuda_count()
742
+
743
+ model_config.setdefault("tokenizer_mode", "auto")
744
+ model_config.setdefault("trust_remote_code", True)
745
+ model_config.setdefault("tensor_parallel_size", cuda_count)
746
+ model_config.setdefault("block_size", 16)
747
+ model_config.setdefault("swap_space", 4)
748
+ model_config.setdefault("gpu_memory_utilization", 0.90)
749
+ model_config.setdefault("max_num_seqs", 256)
750
+ model_config.setdefault("quantization", None)
751
+ model_config.setdefault("max_model_len", None)
752
+ model_config["limit_mm_per_prompt"] = (
753
+ json.loads(model_config.get("limit_mm_per_prompt")) # type: ignore
754
+ if model_config.get("limit_mm_per_prompt")
755
+ else {
756
+ "image": 2, # default 2 images all chat
757
+ }
758
+ )
759
+
760
+ return model_config
761
+
725
762
  def _sanitize_chat_config(
726
763
  self,
727
764
  generate_config: Optional[Dict] = None,
@@ -741,25 +778,48 @@ class VLLMVisionModel(VLLMModel, ChatModelMixin):
741
778
  )
742
779
  return generate_config
743
780
 
781
+ @vllm_check
744
782
  async def async_chat(
745
783
  self,
746
- messages: List[Dict],
784
+ messages: List[ChatCompletionMessage], # type: ignore
747
785
  generate_config: Optional[Dict] = None,
748
786
  request_id: Optional[str] = None,
749
787
  ) -> Union[ChatCompletion, AsyncGenerator[ChatCompletionChunk, None]]:
750
- # only support single image, waiting vllm support multi images
788
+ messages = self._transform_messages(messages)
789
+ tools = generate_config.pop("tools", []) if generate_config else None
790
+
751
791
  model_family = self.model_family.model_family or self.model_family.model_name
752
- prompt, images = self.get_specific_prompt(model_family, messages)
753
792
 
754
- if len(images) == 0:
793
+ if "internvl2" not in model_family.lower():
794
+ from qwen_vl_utils import process_vision_info
795
+
796
+ full_context_kwargs = {}
797
+ if tools and model_family in QWEN_TOOL_CALL_FAMILY:
798
+ full_context_kwargs["tools"] = tools
799
+ assert self.model_family.chat_template is not None
800
+ prompt = self.get_full_context(
801
+ messages, self.model_family.chat_template, **full_context_kwargs
802
+ )
803
+ images, video_inputs = process_vision_info(messages)
804
+ if video_inputs:
805
+ raise ValueError("Not support video input now.")
806
+ else:
807
+ prompt, images = self.get_specific_prompt(model_family, messages)
808
+
809
+ if not images:
755
810
  inputs = {
756
811
  "prompt": prompt,
757
812
  }
758
- else:
813
+ elif len(images) == 1:
759
814
  inputs = {
760
815
  "prompt": prompt,
761
816
  "multi_modal_data": {"image": images[-1]}, # type: ignore
762
817
  }
818
+ else:
819
+ inputs = {
820
+ "prompt": prompt,
821
+ "multi_modal_data": {"image": images}, # type: ignore
822
+ }
763
823
  generate_config = self._sanitize_chat_config(generate_config)
764
824
 
765
825
  stream = generate_config.get("stream", None)
@@ -0,0 +1,41 @@
1
+ # Copyright 2022-2023 XProbe Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ import functools
15
+ import logging
16
+ import os
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ def vllm_check(fn):
22
+ try:
23
+ from vllm.engine.async_llm_engine import AsyncEngineDeadError
24
+ except:
25
+ return fn
26
+
27
+ @functools.wraps(fn)
28
+ async def _async_wrapper(self, *args, **kwargs):
29
+ try:
30
+ return await fn(self, *args, **kwargs)
31
+ except AsyncEngineDeadError:
32
+ logger.info("Detecting vLLM is not health, prepare to quit the process")
33
+ try:
34
+ self.stop()
35
+ except:
36
+ # ignore error when stop
37
+ pass
38
+ # Just kill the process and let xinference auto-recover the model
39
+ os._exit(1)
40
+
41
+ return _async_wrapper
@@ -13,6 +13,7 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import gc
16
+ import importlib
16
17
  import logging
17
18
  import os
18
19
  import threading
@@ -178,9 +179,27 @@ class RerankModel:
178
179
  return rerank_type
179
180
 
180
181
  def load(self):
182
+ flash_attn_installed = importlib.util.find_spec("flash_attn") is not None
183
+ if (
184
+ self._auto_detect_type(self._model_path) != "normal"
185
+ and flash_attn_installed
186
+ ):
187
+ logger.warning(
188
+ "flash_attn can only support fp16 and bf16, "
189
+ "will force set `use_fp16` to True"
190
+ )
191
+ self._use_fp16 = True
181
192
  if self._model_spec.type == "normal":
182
193
  try:
194
+ import sentence_transformers
183
195
  from sentence_transformers.cross_encoder import CrossEncoder
196
+
197
+ if sentence_transformers.__version__ < "3.1.0":
198
+ raise ValueError(
199
+ "The sentence_transformers version must be greater than 3.1.0. "
200
+ "Please upgrade your version via `pip install -U sentence_transformers` or refer to "
201
+ "https://github.com/UKPLab/sentence-transformers"
202
+ )
184
203
  except ImportError:
185
204
  error_message = "Failed to import module 'sentence-transformers'"
186
205
  installation_guide = [
@@ -54,5 +54,13 @@
54
54
  "max_tokens": 1024,
55
55
  "model_id": "jinaai/jina-reranker-v2-base-multilingual",
56
56
  "model_revision": "298e48cada4a9318650d7fbd795f63827f884087"
57
+ },
58
+ {
59
+ "model_name": "minicpm-reranker",
60
+ "type": "normal",
61
+ "language": ["en", "zh"],
62
+ "max_tokens": 1024,
63
+ "model_id": "openbmb/MiniCPM-Reranker",
64
+ "model_revision": "5d2fd7345b6444c89d4c0fa59c92272888f3f2d0"
57
65
  }
58
66
  ]
@@ -49,5 +49,13 @@
49
49
  "max_tokens": 2048,
50
50
  "model_id": "mirror013/bge-reranker-v2-minicpm-layerwise",
51
51
  "model_hub": "modelscope"
52
+ },
53
+ {
54
+ "model_name": "minicpm-reranker",
55
+ "type": "normal",
56
+ "language": ["en", "zh"],
57
+ "max_tokens": 1024,
58
+ "model_id": "OpenBMB/MiniCPM-Reranker",
59
+ "model_hub": "modelscope"
52
60
  }
53
61
  ]
xinference/model/utils.py CHANGED
@@ -23,12 +23,15 @@ import huggingface_hub
23
23
  import numpy as np
24
24
  import torch
25
25
 
26
- from ..constants import XINFERENCE_CACHE_DIR, XINFERENCE_ENV_MODEL_SRC
26
+ from ..constants import (
27
+ XINFERENCE_CACHE_DIR,
28
+ XINFERENCE_DOWNLOAD_MAX_ATTEMPTS,
29
+ XINFERENCE_ENV_MODEL_SRC,
30
+ )
27
31
  from ..device_utils import get_available_device, is_device_available
28
32
  from .core import CacheableModelSpec
29
33
 
30
34
  logger = logging.getLogger(__name__)
31
- MAX_ATTEMPTS = 3
32
35
  IS_NEW_HUGGINGFACE_HUB: bool = huggingface_hub.__version__ >= "0.23.0"
33
36
 
34
37
 
@@ -100,11 +103,11 @@ def retry_download(
100
103
  **kwargs,
101
104
  ):
102
105
  last_ex = None
103
- for current_attempt in range(1, MAX_ATTEMPTS + 1):
106
+ for current_attempt in range(1, XINFERENCE_DOWNLOAD_MAX_ATTEMPTS + 1):
104
107
  try:
105
108
  return download_func(*args, **kwargs)
106
109
  except Exception as e:
107
- remaining_attempts = MAX_ATTEMPTS - current_attempt
110
+ remaining_attempts = XINFERENCE_DOWNLOAD_MAX_ATTEMPTS - current_attempt
108
111
  last_ex = e
109
112
  logger.debug(
110
113
  "Download failed: %s, download func: %s, download args: %s, kwargs: %s",
@@ -300,31 +303,6 @@ def cache(model_spec: CacheableModelSpec, model_description_type: type):
300
303
  return cache_dir
301
304
 
302
305
 
303
- def patch_trust_remote_code():
304
- """sentence-transformers calls transformers without the trust_remote_code=True, some embedding
305
- models will fail to load, e.g. jina-embeddings-v2-base-en
306
-
307
- :return:
308
- """
309
- try:
310
- from transformers.dynamic_module_utils import resolve_trust_remote_code
311
- except ImportError:
312
- logger.error("Patch transformers trust_remote_code failed.")
313
- else:
314
-
315
- def _patched_resolve_trust_remote_code(*args, **kwargs):
316
- logger.info("Patched resolve_trust_remote_code: %s %s", args, kwargs)
317
- return True
318
-
319
- if (
320
- resolve_trust_remote_code.__code__
321
- != _patched_resolve_trust_remote_code.__code__
322
- ):
323
- resolve_trust_remote_code.__code__ = (
324
- _patched_resolve_trust_remote_code.__code__
325
- )
326
-
327
-
328
306
  def select_device(device):
329
307
  try:
330
308
  import torch # noqa: F401
@@ -21,8 +21,6 @@ from ..core import CacheableModelSpec, ModelDescription
21
21
  from ..utils import valid_model_revision
22
22
  from .diffusers import DiffUsersVideoModel
23
23
 
24
- MAX_ATTEMPTS = 3
25
-
26
24
  logger = logging.getLogger(__name__)
27
25
 
28
26
  MODEL_NAME_TO_REVISION: Dict[str, List[str]] = defaultdict(list)
@@ -1,14 +1,14 @@
1
1
  {
2
2
  "files": {
3
3
  "main.css": "./static/css/main.5061c4c3.css",
4
- "main.js": "./static/js/main.29578905.js",
4
+ "main.js": "./static/js/main.e51a356d.js",
5
5
  "static/media/icon.webp": "./static/media/icon.4603d52c63041e5dfbfd.webp",
6
6
  "index.html": "./index.html",
7
7
  "main.5061c4c3.css.map": "./static/css/main.5061c4c3.css.map",
8
- "main.29578905.js.map": "./static/js/main.29578905.js.map"
8
+ "main.e51a356d.js.map": "./static/js/main.e51a356d.js.map"
9
9
  },
10
10
  "entrypoints": [
11
11
  "static/css/main.5061c4c3.css",
12
- "static/js/main.29578905.js"
12
+ "static/js/main.e51a356d.js"
13
13
  ]
14
14
  }
@@ -1 +1 @@
1
- <!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.svg"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="Web site created using create-react-app"/><link rel="apple-touch-icon" href="./logo192.png"/><link rel="manifest" href="./manifest.json"/><title>Xinference</title><script defer="defer" src="./static/js/main.29578905.js"></script><link href="./static/css/main.5061c4c3.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>
1
+ <!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.svg"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="Web site created using create-react-app"/><link rel="apple-touch-icon" href="./logo192.png"/><link rel="manifest" href="./manifest.json"/><title>Xinference</title><script defer="defer" src="./static/js/main.e51a356d.js"></script><link href="./static/css/main.5061c4c3.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>