xinference 0.15.2__py3-none-any.whl → 0.15.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (28) hide show
  1. xinference/_version.py +3 -3
  2. xinference/model/embedding/core.py +14 -5
  3. xinference/model/embedding/model_spec.json +7 -0
  4. xinference/model/embedding/model_spec_modelscope.json +9 -1
  5. xinference/model/image/stable_diffusion/core.py +12 -0
  6. xinference/model/llm/llm_family.json +12 -24
  7. xinference/model/llm/llm_family_modelscope.json +2 -10
  8. xinference/model/llm/utils.py +14 -3
  9. xinference/model/llm/vllm/core.py +22 -6
  10. xinference/model/llm/vllm/utils.py +42 -0
  11. xinference/model/rerank/core.py +19 -0
  12. xinference/model/rerank/model_spec.json +8 -0
  13. xinference/model/rerank/model_spec_modelscope.json +8 -0
  14. xinference/model/utils.py +0 -25
  15. xinference/web/ui/build/asset-manifest.json +3 -3
  16. xinference/web/ui/build/index.html +1 -1
  17. xinference/web/ui/build/static/js/{main.29578905.js → main.e51a356d.js} +3 -3
  18. xinference/web/ui/build/static/js/main.e51a356d.js.map +1 -0
  19. xinference/web/ui/node_modules/.cache/babel-loader/4385c1095eefbff0a8ec3b2964ba6e5a66a05ab31be721483ca2f43e2a91f6ff.json +1 -0
  20. {xinference-0.15.2.dist-info → xinference-0.15.3.dist-info}/METADATA +4 -3
  21. {xinference-0.15.2.dist-info → xinference-0.15.3.dist-info}/RECORD +26 -25
  22. xinference/web/ui/build/static/js/main.29578905.js.map +0 -1
  23. xinference/web/ui/node_modules/.cache/babel-loader/68bede6d95bb5ef0b35bbb3ec5b8c937eaf6862c6cdbddb5ef222a7776aaf336.json +0 -1
  24. /xinference/web/ui/build/static/js/{main.29578905.js.LICENSE.txt → main.e51a356d.js.LICENSE.txt} +0 -0
  25. {xinference-0.15.2.dist-info → xinference-0.15.3.dist-info}/LICENSE +0 -0
  26. {xinference-0.15.2.dist-info → xinference-0.15.3.dist-info}/WHEEL +0 -0
  27. {xinference-0.15.2.dist-info → xinference-0.15.3.dist-info}/entry_points.txt +0 -0
  28. {xinference-0.15.2.dist-info → xinference-0.15.3.dist-info}/top_level.txt +0 -0
xinference/_version.py CHANGED
@@ -8,11 +8,11 @@ import json
8
8
 
9
9
  version_json = '''
10
10
  {
11
- "date": "2024-09-20T16:58:06+0800",
11
+ "date": "2024-09-30T20:17:26+0800",
12
12
  "dirty": false,
13
13
  "error": null,
14
- "full-revisionid": "5de46e94c23785fa7e17e3e1d00c3afb6cb1c919",
15
- "version": "0.15.2"
14
+ "full-revisionid": "00a9ee15279a60a6d75393c4720d8da5cbbf5796",
15
+ "version": "0.15.3"
16
16
  }
17
17
  ''' # END VERSION_JSON
18
18
 
@@ -141,7 +141,15 @@ class EmbeddingModel:
141
141
 
142
142
  def load(self):
143
143
  try:
144
+ import sentence_transformers
144
145
  from sentence_transformers import SentenceTransformer
146
+
147
+ if sentence_transformers.__version__ < "3.1.0":
148
+ raise ValueError(
149
+ "The sentence_transformers version must be greater than 3.1.0. "
150
+ "Please upgrade your version via `pip install -U sentence_transformers` or refer to "
151
+ "https://github.com/UKPLab/sentence-transformers"
152
+ )
145
153
  except ImportError:
146
154
  error_message = "Failed to import module 'SentenceTransformer'"
147
155
  installation_guide = [
@@ -173,9 +181,6 @@ class EmbeddingModel:
173
181
  )
174
182
  torch_dtype = torch.float32
175
183
 
176
- from ..utils import patch_trust_remote_code
177
-
178
- patch_trust_remote_code()
179
184
  if (
180
185
  "gte" in self._model_spec.model_name.lower()
181
186
  and "qwen2" in self._model_spec.model_name.lower()
@@ -191,7 +196,10 @@ class EmbeddingModel:
191
196
  else:
192
197
  model_kwargs = {"torch_dtype": torch_dtype} if torch_dtype else None
193
198
  self._model = SentenceTransformer(
194
- self._model_path, device=self._device, model_kwargs=model_kwargs
199
+ self._model_path,
200
+ device=self._device,
201
+ model_kwargs=model_kwargs,
202
+ trust_remote_code=True,
195
203
  )
196
204
 
197
205
  def create_embedding(self, sentences: Union[str, List[str]], **kwargs):
@@ -213,6 +221,7 @@ class EmbeddingModel:
213
221
  convert_to_tensor: bool = False,
214
222
  device: str = None,
215
223
  normalize_embeddings: bool = False,
224
+ **kwargs,
216
225
  ):
217
226
  """
218
227
  Computes sentence embeddings
@@ -317,7 +326,7 @@ class EmbeddingModel:
317
326
  all_token_nums += features["attention_mask"].sum().item()
318
327
 
319
328
  with torch.no_grad():
320
- out_features = model.forward(features)
329
+ out_features = model.forward(features, **kwargs)
321
330
 
322
331
  if output_value == "token_embeddings":
323
332
  embeddings = []
@@ -238,5 +238,12 @@
238
238
  "language": ["zh", "en"],
239
239
  "model_id": "Alibaba-NLP/gte-Qwen2-7B-instruct",
240
240
  "model_revision": "e26182b2122f4435e8b3ebecbf363990f409b45b"
241
+ },
242
+ {
243
+ "model_name": "jina-embeddings-v3",
244
+ "dimensions": 1024,
245
+ "max_tokens": 8192,
246
+ "language": ["zh", "en"],
247
+ "model_id": "jinaai/jina-embeddings-v3"
241
248
  }
242
249
  ]
@@ -233,12 +233,20 @@
233
233
  "model_id": "AI-ModelScope/m3e-large",
234
234
  "model_hub": "modelscope"
235
235
  },
236
- {
236
+ {
237
237
  "model_name": "gte-Qwen2",
238
238
  "dimensions": 4096,
239
239
  "max_tokens": 32000,
240
240
  "language": ["zh", "en"],
241
241
  "model_id": "iic/gte_Qwen2-7B-instruct",
242
242
  "model_hub": "modelscope"
243
+ },
244
+ {
245
+ "model_name": "jina-embeddings-v3",
246
+ "dimensions": 1024,
247
+ "max_tokens": 8192,
248
+ "language": ["zh", "en"],
249
+ "model_id": "jinaai/jina-embeddings-v3",
250
+ "model_hub": "modelscope"
243
251
  }
244
252
  ]
@@ -193,6 +193,18 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
193
193
  self._model_path,
194
194
  **self._kwargs,
195
195
  )
196
+ if self._kwargs.get("deepcache", True):
197
+ # NOTE: DeepCache should be loaded first before cpu_offloading
198
+ try:
199
+ from DeepCache import DeepCacheSDHelper
200
+
201
+ helper = DeepCacheSDHelper(pipe=self._model)
202
+ helper.set_params(cache_interval=3, cache_branch_id=0)
203
+ helper.enable()
204
+ except ImportError:
205
+ logger.debug("deepcache is not installed")
206
+ pass
207
+
196
208
  if self._kwargs.get("cpu_offload", False):
197
209
  logger.debug("CPU offloading model")
198
210
  self._model.enable_model_cpu_offload()
@@ -6483,8 +6483,7 @@
6483
6483
  "8-bit",
6484
6484
  "none"
6485
6485
  ],
6486
- "model_id": "OpenGVLab/InternVL2-1B",
6487
- "model_revision": "a9fc14aea824b6ea1d44f8778cad6b35512c4ce1"
6486
+ "model_id": "OpenGVLab/InternVL2-1B"
6488
6487
  },
6489
6488
  {
6490
6489
  "model_format": "pytorch",
@@ -6494,8 +6493,7 @@
6494
6493
  "8-bit",
6495
6494
  "none"
6496
6495
  ],
6497
- "model_id": "OpenGVLab/InternVL2-2B",
6498
- "model_revision": "422ad7c6335917bfb514958233955512338485a6"
6496
+ "model_id": "OpenGVLab/InternVL2-2B"
6499
6497
  },
6500
6498
  {
6501
6499
  "model_format": "awq",
@@ -6503,8 +6501,7 @@
6503
6501
  "quantizations": [
6504
6502
  "Int4"
6505
6503
  ],
6506
- "model_id": "OpenGVLab/InternVL2-2B-AWQ",
6507
- "model_revision": "701bc3fc098a8a3b686b3b4135cfb77202be89e0"
6504
+ "model_id": "OpenGVLab/InternVL2-2B-AWQ"
6508
6505
  },
6509
6506
  {
6510
6507
  "model_format": "pytorch",
@@ -6514,8 +6511,7 @@
6514
6511
  "8-bit",
6515
6512
  "none"
6516
6513
  ],
6517
- "model_id": "OpenGVLab/InternVL2-4B",
6518
- "model_revision": "b50544dafada6c41e80bfde2f57cc9b0140fc21c"
6514
+ "model_id": "OpenGVLab/InternVL2-4B"
6519
6515
  },
6520
6516
  {
6521
6517
  "model_format": "pytorch",
@@ -6525,8 +6521,7 @@
6525
6521
  "8-bit",
6526
6522
  "none"
6527
6523
  ],
6528
- "model_id": "OpenGVLab/InternVL2-8B",
6529
- "model_revision": "3bfd3664dea4f3da628785f5125d30f889701253"
6524
+ "model_id": "OpenGVLab/InternVL2-8B"
6530
6525
  },
6531
6526
  {
6532
6527
  "model_format": "awq",
@@ -6534,8 +6529,7 @@
6534
6529
  "quantizations": [
6535
6530
  "Int4"
6536
6531
  ],
6537
- "model_id": "OpenGVLab/InternVL2-8B-AWQ",
6538
- "model_revision": "9f1a4756b7ae18eb26d8a22b618dfc283e8193b3"
6532
+ "model_id": "OpenGVLab/InternVL2-8B-AWQ"
6539
6533
  },
6540
6534
  {
6541
6535
  "model_format": "pytorch",
@@ -6545,8 +6539,7 @@
6545
6539
  "8-bit",
6546
6540
  "none"
6547
6541
  ],
6548
- "model_id": "OpenGVLab/InternVL2-26B",
6549
- "model_revision": "b9f3c7e6d575b0115e076a3ffc46fd20b7586899"
6542
+ "model_id": "OpenGVLab/InternVL2-26B"
6550
6543
  },
6551
6544
  {
6552
6545
  "model_format": "awq",
@@ -6554,8 +6547,7 @@
6554
6547
  "quantizations": [
6555
6548
  "Int4"
6556
6549
  ],
6557
- "model_id": "OpenGVLab/InternVL2-26B-AWQ",
6558
- "model_revision": "469e0019ffd251e22ff6501a5c2321964e86ef0d"
6550
+ "model_id": "OpenGVLab/InternVL2-26B-AWQ"
6559
6551
  },
6560
6552
  {
6561
6553
  "model_format": "pytorch",
@@ -6565,8 +6557,7 @@
6565
6557
  "8-bit",
6566
6558
  "none"
6567
6559
  ],
6568
- "model_id": "OpenGVLab/InternVL2-40B",
6569
- "model_revision": "725a12063bb855c966e30a0617d0ccd9e870d772"
6560
+ "model_id": "OpenGVLab/InternVL2-40B"
6570
6561
  },
6571
6562
  {
6572
6563
  "model_format": "awq",
@@ -6574,8 +6565,7 @@
6574
6565
  "quantizations": [
6575
6566
  "Int4"
6576
6567
  ],
6577
- "model_id": "OpenGVLab/InternVL2-40B-AWQ",
6578
- "model_revision": "d92e140f6dfe8ea9679924c6a31898f42c4e1846"
6568
+ "model_id": "OpenGVLab/InternVL2-40B-AWQ"
6579
6569
  },
6580
6570
  {
6581
6571
  "model_format": "pytorch",
@@ -6585,8 +6575,7 @@
6585
6575
  "8-bit",
6586
6576
  "none"
6587
6577
  ],
6588
- "model_id": "OpenGVLab/InternVL2-Llama3-76B",
6589
- "model_revision": "cf7914905f78e9e3560ddbd6f5dfc39becac494f"
6578
+ "model_id": "OpenGVLab/InternVL2-Llama3-76B"
6590
6579
  },
6591
6580
  {
6592
6581
  "model_format": "awq",
@@ -6594,8 +6583,7 @@
6594
6583
  "quantizations": [
6595
6584
  "Int4"
6596
6585
  ],
6597
- "model_id": "OpenGVLab/InternVL2-Llama3-76B-AWQ",
6598
- "model_revision": "1bc796bf80f2ebc7d6a14c15f55217a4600d50a4"
6586
+ "model_id": "OpenGVLab/InternVL2-Llama3-76B-AWQ"
6599
6587
  }
6600
6588
  ],
6601
6589
  "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
@@ -4334,16 +4334,8 @@
4334
4334
  }
4335
4335
  ],
4336
4336
  "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
4337
- "stop_token_ids": [
4338
- 151643,
4339
- 151644,
4340
- 151645
4341
- ],
4342
- "stop": [
4343
- "<|endoftext|>",
4344
- "<|im_start|>",
4345
- "<|im_end|>"
4346
- ]
4337
+ "stop_token_ids": [],
4338
+ "stop": []
4347
4339
  },
4348
4340
  {
4349
4341
  "version": 1,
@@ -159,14 +159,25 @@ class ChatModelMixin:
159
159
  for image_url in image_urls:
160
160
  fut = executor.submit(_decode_image, image_url)
161
161
  image_futures.append(fut)
162
- images = [fut.result() for fut in image_futures]
162
+ images.extend([fut.result() for fut in image_futures])
163
163
  if len(image_futures) == 0:
164
164
  ret += role + "\n" + text + intra_message_sep + "\n"
165
165
  else:
166
+ placeholders = "\n".join(
167
+ f"Image-{i+1}: <image>\n"
168
+ for i in range(
169
+ len(images) - len(image_futures), len(images)
170
+ )
171
+ )
166
172
  ret += (
167
- role + "\n" + f"<image>\n{text}" + intra_message_sep + "\n"
173
+ role
174
+ + "\n"
175
+ + f"{placeholders}\n{text}"
176
+ + intra_message_sep
177
+ + "\n"
168
178
  )
169
-
179
+ if len(images) == 1:
180
+ ret = ret.replace("Image-1: <image>\n", "<image>\n")
170
181
  return ret, images
171
182
  else:
172
183
  raise ValueError(f"Invalid model family: {model_family}")
@@ -13,6 +13,7 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import asyncio
16
+ import json
16
17
  import logging
17
18
  import multiprocessing
18
19
  import os
@@ -47,6 +48,7 @@ from ..utils import (
47
48
  ChatModelMixin,
48
49
  generate_completion_chunk,
49
50
  )
51
+ from .utils import vllm_check
50
52
 
51
53
  logger = logging.getLogger(__name__)
52
54
 
@@ -65,6 +67,7 @@ class VLLMModelConfig(TypedDict, total=False):
65
67
  max_num_seqs: int
66
68
  quantization: Optional[str]
67
69
  max_model_len: Optional[int]
70
+ limit_mm_per_prompt: Optional[Dict[str, int]]
68
71
 
69
72
 
70
73
  class VLLMGenerateConfig(TypedDict, total=False):
@@ -90,9 +93,7 @@ try:
90
93
  except ImportError:
91
94
  VLLM_INSTALLED = False
92
95
 
93
- VLLM_SUPPORTED_VISION_MODEL_LIST: List[str] = [
94
- "internvl2",
95
- ]
96
+ VLLM_SUPPORTED_VISION_MODEL_LIST: List[str] = []
96
97
  VLLM_SUPPORTED_MODELS = [
97
98
  "llama-2",
98
99
  "llama-3",
@@ -171,6 +172,9 @@ if VLLM_INSTALLED and vllm.__version__ > "0.5.3":
171
172
  VLLM_SUPPORTED_MODELS.append("llama-3.1")
172
173
  VLLM_SUPPORTED_CHAT_MODELS.append("llama-3.1-instruct")
173
174
 
175
+ if VLLM_INSTALLED and vllm.__version__ >= "0.6.1":
176
+ VLLM_SUPPORTED_VISION_MODEL_LIST.append("internvl2")
177
+
174
178
 
175
179
  class VLLMModel(LLM):
176
180
  def __init__(
@@ -304,7 +308,12 @@ class VLLMModel(LLM):
304
308
  model_config.setdefault("gpu_memory_utilization", 0.90)
305
309
  model_config.setdefault("max_num_seqs", 256)
306
310
  model_config.setdefault("quantization", None)
307
- model_config.setdefault("max_model_len", 4096)
311
+ model_config.setdefault("max_model_len", None)
312
+ model_config["limit_mm_per_prompt"] = (
313
+ json.loads(model_config.get("limit_mm_per_prompt")) # type: ignore
314
+ if model_config.get("limit_mm_per_prompt")
315
+ else None
316
+ )
308
317
 
309
318
  return model_config
310
319
 
@@ -434,6 +443,7 @@ class VLLMModel(LLM):
434
443
  usage=usage,
435
444
  )
436
445
 
446
+ @vllm_check
437
447
  async def async_generate(
438
448
  self,
439
449
  prompt: Union[str, Dict[str, Any]],
@@ -665,6 +675,7 @@ class VLLMChatModel(VLLMModel, ChatModelMixin):
665
675
  yield self._to_chat_completion_chunk(chunk)
666
676
  i += 1
667
677
 
678
+ @vllm_check
668
679
  async def async_chat(
669
680
  self,
670
681
  messages: List[Dict],
@@ -741,13 +752,13 @@ class VLLMVisionModel(VLLMModel, ChatModelMixin):
741
752
  )
742
753
  return generate_config
743
754
 
755
+ @vllm_check
744
756
  async def async_chat(
745
757
  self,
746
758
  messages: List[Dict],
747
759
  generate_config: Optional[Dict] = None,
748
760
  request_id: Optional[str] = None,
749
761
  ) -> Union[ChatCompletion, AsyncGenerator[ChatCompletionChunk, None]]:
750
- # only support single image, waiting vllm support multi images
751
762
  model_family = self.model_family.model_family or self.model_family.model_name
752
763
  prompt, images = self.get_specific_prompt(model_family, messages)
753
764
 
@@ -755,11 +766,16 @@ class VLLMVisionModel(VLLMModel, ChatModelMixin):
755
766
  inputs = {
756
767
  "prompt": prompt,
757
768
  }
758
- else:
769
+ elif len(images) == 1:
759
770
  inputs = {
760
771
  "prompt": prompt,
761
772
  "multi_modal_data": {"image": images[-1]}, # type: ignore
762
773
  }
774
+ else:
775
+ inputs = {
776
+ "prompt": prompt,
777
+ "multi_modal_data": {"image": images}, # type: ignore
778
+ }
763
779
  generate_config = self._sanitize_chat_config(generate_config)
764
780
 
765
781
  stream = generate_config.get("stream", None)
@@ -0,0 +1,42 @@
1
+ # Copyright 2022-2023 XProbe Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ import functools
15
+ import logging
16
+ import os
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ def vllm_check(fn):
22
+ try:
23
+ from vllm.engine.async_llm_engine import AsyncEngineDeadError
24
+ except:
25
+ return fn
26
+
27
+ @functools.wraps(fn)
28
+ async def _async_wrapper(self, *args, **kwargs):
29
+ logger.info("vllm_check")
30
+ try:
31
+ return await fn(self, *args, **kwargs)
32
+ except AsyncEngineDeadError:
33
+ logger.info("Detecting vLLM is not health, prepare to quit the process")
34
+ try:
35
+ self.stop()
36
+ except:
37
+ # ignore error when stop
38
+ pass
39
+ # Just kill the process and let xinference auto-recover the model
40
+ os._exit(1)
41
+
42
+ return _async_wrapper
@@ -13,6 +13,7 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import gc
16
+ import importlib
16
17
  import logging
17
18
  import os
18
19
  import threading
@@ -178,9 +179,27 @@ class RerankModel:
178
179
  return rerank_type
179
180
 
180
181
  def load(self):
182
+ flash_attn_installed = importlib.util.find_spec("flash_attn") is not None
183
+ if (
184
+ self._auto_detect_type(self._model_path) != "normal"
185
+ and flash_attn_installed
186
+ ):
187
+ logger.warning(
188
+ "flash_attn can only support fp16 and bf16, "
189
+ "will force set `use_fp16` to True"
190
+ )
191
+ self._use_fp16 = True
181
192
  if self._model_spec.type == "normal":
182
193
  try:
194
+ import sentence_transformers
183
195
  from sentence_transformers.cross_encoder import CrossEncoder
196
+
197
+ if sentence_transformers.__version__ < "3.1.0":
198
+ raise ValueError(
199
+ "The sentence_transformers version must be greater than 3.1.0. "
200
+ "Please upgrade your version via `pip install -U sentence_transformers` or refer to "
201
+ "https://github.com/UKPLab/sentence-transformers"
202
+ )
184
203
  except ImportError:
185
204
  error_message = "Failed to import module 'sentence-transformers'"
186
205
  installation_guide = [
@@ -54,5 +54,13 @@
54
54
  "max_tokens": 1024,
55
55
  "model_id": "jinaai/jina-reranker-v2-base-multilingual",
56
56
  "model_revision": "298e48cada4a9318650d7fbd795f63827f884087"
57
+ },
58
+ {
59
+ "model_name": "minicpm-reranker",
60
+ "type": "normal",
61
+ "language": ["en", "zh"],
62
+ "max_tokens": 1024,
63
+ "model_id": "openbmb/MiniCPM-Reranker",
64
+ "model_revision": "5d2fd7345b6444c89d4c0fa59c92272888f3f2d0"
57
65
  }
58
66
  ]
@@ -49,5 +49,13 @@
49
49
  "max_tokens": 2048,
50
50
  "model_id": "mirror013/bge-reranker-v2-minicpm-layerwise",
51
51
  "model_hub": "modelscope"
52
+ },
53
+ {
54
+ "model_name": "minicpm-reranker",
55
+ "type": "normal",
56
+ "language": ["en", "zh"],
57
+ "max_tokens": 1024,
58
+ "model_id": "OpenBMB/MiniCPM-Reranker",
59
+ "model_hub": "modelscope"
52
60
  }
53
61
  ]
xinference/model/utils.py CHANGED
@@ -300,31 +300,6 @@ def cache(model_spec: CacheableModelSpec, model_description_type: type):
300
300
  return cache_dir
301
301
 
302
302
 
303
- def patch_trust_remote_code():
304
- """sentence-transformers calls transformers without the trust_remote_code=True, some embedding
305
- models will fail to load, e.g. jina-embeddings-v2-base-en
306
-
307
- :return:
308
- """
309
- try:
310
- from transformers.dynamic_module_utils import resolve_trust_remote_code
311
- except ImportError:
312
- logger.error("Patch transformers trust_remote_code failed.")
313
- else:
314
-
315
- def _patched_resolve_trust_remote_code(*args, **kwargs):
316
- logger.info("Patched resolve_trust_remote_code: %s %s", args, kwargs)
317
- return True
318
-
319
- if (
320
- resolve_trust_remote_code.__code__
321
- != _patched_resolve_trust_remote_code.__code__
322
- ):
323
- resolve_trust_remote_code.__code__ = (
324
- _patched_resolve_trust_remote_code.__code__
325
- )
326
-
327
-
328
303
  def select_device(device):
329
304
  try:
330
305
  import torch # noqa: F401
@@ -1,14 +1,14 @@
1
1
  {
2
2
  "files": {
3
3
  "main.css": "./static/css/main.5061c4c3.css",
4
- "main.js": "./static/js/main.29578905.js",
4
+ "main.js": "./static/js/main.e51a356d.js",
5
5
  "static/media/icon.webp": "./static/media/icon.4603d52c63041e5dfbfd.webp",
6
6
  "index.html": "./index.html",
7
7
  "main.5061c4c3.css.map": "./static/css/main.5061c4c3.css.map",
8
- "main.29578905.js.map": "./static/js/main.29578905.js.map"
8
+ "main.e51a356d.js.map": "./static/js/main.e51a356d.js.map"
9
9
  },
10
10
  "entrypoints": [
11
11
  "static/css/main.5061c4c3.css",
12
- "static/js/main.29578905.js"
12
+ "static/js/main.e51a356d.js"
13
13
  ]
14
14
  }
@@ -1 +1 @@
1
- <!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.svg"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="Web site created using create-react-app"/><link rel="apple-touch-icon" href="./logo192.png"/><link rel="manifest" href="./manifest.json"/><title>Xinference</title><script defer="defer" src="./static/js/main.29578905.js"></script><link href="./static/css/main.5061c4c3.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>
1
+ <!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.svg"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="Web site created using create-react-app"/><link rel="apple-touch-icon" href="./logo192.png"/><link rel="manifest" href="./manifest.json"/><title>Xinference</title><script defer="defer" src="./static/js/main.e51a356d.js"></script><link href="./static/css/main.5061c4c3.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>