xinference 0.13.3__py3-none-any.whl → 0.13.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (48) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/restful_api.py +4 -1
  3. xinference/client/restful/restful_client.py +2 -2
  4. xinference/constants.py +0 -4
  5. xinference/core/image_interface.py +6 -3
  6. xinference/core/model.py +1 -1
  7. xinference/core/supervisor.py +2 -0
  8. xinference/core/worker.py +7 -0
  9. xinference/deploy/utils.py +6 -0
  10. xinference/model/audio/core.py +4 -2
  11. xinference/model/core.py +25 -4
  12. xinference/model/embedding/core.py +88 -13
  13. xinference/model/embedding/model_spec.json +8 -0
  14. xinference/model/embedding/model_spec_modelscope.json +8 -0
  15. xinference/model/flexible/core.py +8 -2
  16. xinference/model/image/core.py +8 -5
  17. xinference/model/image/model_spec.json +30 -6
  18. xinference/model/image/model_spec_modelscope.json +21 -3
  19. xinference/model/image/stable_diffusion/core.py +30 -27
  20. xinference/model/llm/core.py +6 -4
  21. xinference/model/llm/ggml/llamacpp.py +7 -5
  22. xinference/model/llm/llm_family.py +6 -6
  23. xinference/model/llm/mlx/core.py +7 -0
  24. xinference/model/llm/pytorch/chatglm.py +4 -1
  25. xinference/model/llm/pytorch/deepseek_vl.py +2 -1
  26. xinference/model/llm/pytorch/falcon.py +2 -1
  27. xinference/model/llm/pytorch/llama_2.py +4 -2
  28. xinference/model/llm/pytorch/omnilmm.py +2 -1
  29. xinference/model/llm/pytorch/qwen_vl.py +2 -1
  30. xinference/model/llm/pytorch/vicuna.py +2 -1
  31. xinference/model/llm/pytorch/yi_vl.py +2 -1
  32. xinference/model/llm/sglang/core.py +12 -6
  33. xinference/model/llm/vllm/core.py +1 -5
  34. xinference/model/rerank/core.py +4 -3
  35. xinference/web/ui/build/asset-manifest.json +3 -3
  36. xinference/web/ui/build/index.html +1 -1
  37. xinference/web/ui/build/static/js/{main.2ef0cfaf.js → main.af906659.js} +3 -3
  38. xinference/web/ui/build/static/js/main.af906659.js.map +1 -0
  39. xinference/web/ui/node_modules/.cache/babel-loader/2cd5e4279ad7e13a1f41d486e9fca7756295bfad5bd77d90992f4ac3e10b496d.json +1 -0
  40. {xinference-0.13.3.dist-info → xinference-0.13.4.dist-info}/METADATA +24 -4
  41. {xinference-0.13.3.dist-info → xinference-0.13.4.dist-info}/RECORD +46 -46
  42. xinference/web/ui/build/static/js/main.2ef0cfaf.js.map +0 -1
  43. xinference/web/ui/node_modules/.cache/babel-loader/b6807ecc0c231fea699533518a0eb2a2bf68a081ce00d452be40600dbffa17a7.json +0 -1
  44. /xinference/web/ui/build/static/js/{main.2ef0cfaf.js.LICENSE.txt → main.af906659.js.LICENSE.txt} +0 -0
  45. {xinference-0.13.3.dist-info → xinference-0.13.4.dist-info}/LICENSE +0 -0
  46. {xinference-0.13.3.dist-info → xinference-0.13.4.dist-info}/WHEEL +0 -0
  47. {xinference-0.13.3.dist-info → xinference-0.13.4.dist-info}/entry_points.txt +0 -0
  48. {xinference-0.13.3.dist-info → xinference-0.13.4.dist-info}/top_level.txt +0 -0
@@ -4,21 +4,31 @@
4
4
  "model_family": "stable_diffusion",
5
5
  "model_hub": "modelscope",
6
6
  "model_id": "AI-ModelScope/stable-diffusion-3-medium-diffusers",
7
- "model_revision": "master"
7
+ "model_revision": "master",
8
+ "abilities": [
9
+ "text2iamge",
10
+ "image2image"
11
+ ]
8
12
  },
9
13
  {
10
14
  "model_name": "sd-turbo",
11
15
  "model_family": "stable_diffusion",
12
16
  "model_hub": "modelscope",
13
17
  "model_id": "AI-ModelScope/sd-turbo",
14
- "model_revision": "master"
18
+ "model_revision": "master",
19
+ "abilities": [
20
+ "text2iamge"
21
+ ]
15
22
  },
16
23
  {
17
24
  "model_name": "sdxl-turbo",
18
25
  "model_family": "stable_diffusion",
19
26
  "model_hub": "modelscope",
20
27
  "model_id": "AI-ModelScope/sdxl-turbo",
21
- "model_revision": "master"
28
+ "model_revision": "master",
29
+ "abilities": [
30
+ "text2iamge"
31
+ ]
22
32
  },
23
33
  {
24
34
  "model_name": "stable-diffusion-v1.5",
@@ -26,6 +36,10 @@
26
36
  "model_hub": "modelscope",
27
37
  "model_id": "AI-ModelScope/stable-diffusion-v1-5",
28
38
  "model_revision": "master",
39
+ "abilities": [
40
+ "text2iamge",
41
+ "image2image"
42
+ ],
29
43
  "controlnet": [
30
44
  {
31
45
  "model_name":"canny",
@@ -77,6 +91,10 @@
77
91
  "model_hub": "modelscope",
78
92
  "model_id": "AI-ModelScope/stable-diffusion-xl-base-1.0",
79
93
  "model_revision": "master",
94
+ "abilities": [
95
+ "text2iamge",
96
+ "image2image"
97
+ ],
80
98
  "controlnet": [
81
99
  {
82
100
  "model_name":"canny",
@@ -35,22 +35,23 @@ class DiffusionModel:
35
35
  def __init__(
36
36
  self,
37
37
  model_uid: str,
38
- model_path: str,
38
+ model_path: Optional[str] = None,
39
39
  device: Optional[str] = None,
40
40
  lora_model: Optional[List[LoRA]] = None,
41
41
  lora_load_kwargs: Optional[Dict] = None,
42
42
  lora_fuse_kwargs: Optional[Dict] = None,
43
- ability: Optional[str] = None,
43
+ abilities: Optional[List[str]] = None,
44
44
  **kwargs,
45
45
  ):
46
46
  self._model_uid = model_uid
47
47
  self._model_path = model_path
48
48
  self._device = device
49
49
  self._model = None
50
+ self._i2i_model = None # image to image model
50
51
  self._lora_model = lora_model
51
52
  self._lora_load_kwargs = lora_load_kwargs or {}
52
53
  self._lora_fuse_kwargs = lora_fuse_kwargs or {}
53
- self._ability = ability
54
+ self._abilities = abilities
54
55
  self._kwargs = kwargs
55
56
 
56
57
  def _apply_lora(self):
@@ -69,12 +70,12 @@ class DiffusionModel:
69
70
  def load(self):
70
71
  import torch
71
72
 
72
- if self._ability in [None, "text2image", "image2image"]:
73
+ if "text2image" in self._abilities or "image2image" in self._abilities:
73
74
  from diffusers import AutoPipelineForText2Image as AutoPipelineModel
74
- elif self._ability == "inpainting":
75
+ elif "inpainting" in self._abilities:
75
76
  from diffusers import AutoPipelineForInpainting as AutoPipelineModel
76
77
  else:
77
- raise ValueError(f"Unknown ability: {self._ability}")
78
+ raise ValueError(f"Unknown ability: {self._abilities}")
78
79
 
79
80
  controlnet = self._kwargs.get("controlnet")
80
81
  if controlnet is not None:
@@ -106,28 +107,17 @@ class DiffusionModel:
106
107
 
107
108
  def _call_model(
108
109
  self,
109
- height: int,
110
- width: int,
111
- num_images_per_prompt: int,
112
110
  response_format: str,
111
+ model=None,
113
112
  **kwargs,
114
113
  ):
115
114
  logger.debug(
116
115
  "stable diffusion args: %s",
117
- dict(
118
- kwargs,
119
- height=height,
120
- width=width,
121
- num_images_per_prompt=num_images_per_prompt,
122
- ),
116
+ kwargs,
123
117
  )
124
- assert callable(self._model)
125
- images = self._model(
126
- height=height,
127
- width=width,
128
- num_images_per_prompt=num_images_per_prompt,
129
- **kwargs,
130
- ).images
118
+ model = model if model is not None else self._model
119
+ assert callable(model)
120
+ images = model(**kwargs).images
131
121
  if response_format == "url":
132
122
  os.makedirs(XINFERENCE_IMAGE_DIR, exist_ok=True)
133
123
  image_list = []
@@ -145,7 +135,7 @@ class DiffusionModel:
145
135
  return base64.b64encode(buffered.getvalue()).decode()
146
136
 
147
137
  with ThreadPoolExecutor() as executor:
148
- results = list(map(partial(executor.submit, _gen_base64_image), images))
138
+ results = list(map(partial(executor.submit, _gen_base64_image), images)) # type: ignore
149
139
  image_list = [Image(url=None, b64_json=s.result()) for s in results]
150
140
  return ImageList(created=int(time.time()), data=image_list)
151
141
  else:
@@ -177,19 +167,32 @@ class DiffusionModel:
177
167
  prompt: Optional[Union[str, List[str]]] = None,
178
168
  negative_prompt: Optional[Union[str, List[str]]] = None,
179
169
  n: int = 1,
180
- size: str = "1024*1024",
170
+ size: Optional[str] = None,
181
171
  response_format: str = "url",
182
172
  **kwargs,
183
173
  ):
184
- width, height = map(int, re.split(r"[^\d]+", size))
174
+ if "controlnet" in self._kwargs:
175
+ model = self._model
176
+ else:
177
+ if self._i2i_model is not None:
178
+ model = self._i2i_model
179
+ else:
180
+ from diffusers import AutoPipelineForImage2Image
181
+
182
+ self._i2i_model = model = AutoPipelineForImage2Image.from_pipe(
183
+ self._model
184
+ )
185
+ if size:
186
+ width, height = map(int, re.split(r"[^\d]+", size))
187
+ kwargs["width"] = width
188
+ kwargs["height"] = height
185
189
  return self._call_model(
186
190
  image=image,
187
191
  prompt=prompt,
188
192
  negative_prompt=negative_prompt,
189
- height=height,
190
- width=width,
191
193
  num_images_per_prompt=n,
192
194
  response_format=response_format,
195
+ model=model,
193
196
  **kwargs,
194
197
  )
195
198
 
@@ -194,6 +194,7 @@ def create_llm_model_instance(
194
194
  quantization: Optional[str] = None,
195
195
  peft_model_config: Optional[PeftModelConfig] = None,
196
196
  download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
197
+ model_path: Optional[str] = None,
197
198
  **kwargs,
198
199
  ) -> Tuple[LLM, LLMDescription]:
199
200
  from .llm_family import cache, check_engine_by_spec_parameters, match_llm
@@ -221,7 +222,8 @@ def create_llm_model_instance(
221
222
  )
222
223
  logger.debug(f"Launching {model_uid} with {llm_cls.__name__}")
223
224
 
224
- save_path = cache(llm_family, llm_spec, quantization)
225
+ if not model_path:
226
+ model_path = cache(llm_family, llm_spec, quantization)
225
227
 
226
228
  peft_model = peft_model_config.peft_model if peft_model_config else None
227
229
  if peft_model is not None:
@@ -231,7 +233,7 @@ def create_llm_model_instance(
231
233
  llm_family,
232
234
  llm_spec,
233
235
  quantization,
234
- save_path,
236
+ model_path,
235
237
  kwargs,
236
238
  peft_model,
237
239
  )
@@ -241,11 +243,11 @@ def create_llm_model_instance(
241
243
  f"Load this without lora."
242
244
  )
243
245
  model = llm_cls(
244
- model_uid, llm_family, llm_spec, quantization, save_path, kwargs
246
+ model_uid, llm_family, llm_spec, quantization, model_path, kwargs
245
247
  )
246
248
  else:
247
249
  model = llm_cls(
248
- model_uid, llm_family, llm_spec, quantization, save_path, kwargs
250
+ model_uid, llm_family, llm_spec, quantization, model_path, kwargs
249
251
  )
250
252
  return model, LLMDescription(
251
253
  subpool_addr, devices, llm_family, llm_spec, quantization
@@ -155,11 +155,13 @@ class LlamaCppModel(LLM):
155
155
  raise ImportError(f"{error_message}\n\n{''.join(installation_guide)}")
156
156
 
157
157
  # handle legacy cache.
158
- model_path = os.path.join(
159
- self.model_path,
160
- self.model_spec.model_file_name_template.format(
161
- quantization=self.quantization
162
- ),
158
+ model_path = os.path.realpath(
159
+ os.path.join(
160
+ self.model_path,
161
+ self.model_spec.model_file_name_template.format(
162
+ quantization=self.quantization
163
+ ),
164
+ )
163
165
  )
164
166
  legacy_model_file_path = os.path.join(self.model_path, "model.bin")
165
167
  if os.path.exists(legacy_model_file_path):
@@ -699,12 +699,12 @@ def _generate_model_file_names(
699
699
  def _merge_cached_files(
700
700
  cache_dir: str, input_file_names: List[str], output_file_name: str
701
701
  ):
702
- with open(os.path.join(cache_dir, output_file_name), "wb") as output_file:
703
- for file_name in input_file_names:
704
- logger.info(f"Merging file {file_name} into {output_file_name} ...")
705
-
706
- with open(os.path.join(cache_dir, file_name), "rb") as input_file:
707
- shutil.copyfileobj(input_file, output_file)
702
+ # now llama.cpp can find the gguf parts automatically
703
+ # we only need to provide the first part
704
+ # thus we create the symlink to the first part
705
+ symlink_local_file(
706
+ os.path.join(cache_dir, input_file_names[0]), cache_dir, output_file_name
707
+ )
708
708
 
709
709
  logger.info(f"Merge complete.")
710
710
 
@@ -101,6 +101,7 @@ class MLXModel(LLM):
101
101
 
102
102
  def _load_model(self, **kwargs):
103
103
  try:
104
+ import mlx.core as mx
104
105
  from mlx_lm import load
105
106
  except ImportError:
106
107
  error_message = "Failed to import module 'mlx_lm'"
@@ -122,6 +123,11 @@ class MLXModel(LLM):
122
123
  self._model_config,
123
124
  )
124
125
 
126
+ cache_limit_gb = kwargs.get("cache_limit_gb", None)
127
+ if cache_limit_gb:
128
+ logger.debug(f"Setting cache limit to {cache_limit_gb} GB")
129
+ mx.metal.set_cache_limit(cache_limit_gb * 1024 * 1024 * 1024)
130
+
125
131
  return load(
126
132
  self.model_path,
127
133
  tokenizer_config=tokenizer_config,
@@ -134,6 +140,7 @@ class MLXModel(LLM):
134
140
  "revision", self.model_spec.model_revision
135
141
  )
136
142
  kwargs["trust_remote_code"] = self._model_config.get("trust_remote_code")
143
+ kwargs["cache_limit_gb"] = self._model_config.pop("cache_limit_gb", None)
137
144
 
138
145
  self._model, self._tokenizer = self._load_model(**kwargs)
139
146
 
@@ -430,7 +430,10 @@ class ChatglmPytorchChatModel(PytorchChatModel):
430
430
  outputs = self._model.generate(**kwargs)
431
431
  outputs = outputs[:, kwargs["input_ids"].shape[1] :]
432
432
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
433
- return self._process_response(response, history, tools, end=True)
433
+ if tools:
434
+ return self._process_response(response, history, tools, end=True)
435
+ else:
436
+ return self._process_response(response, history, tools)
434
437
 
435
438
  def chat(
436
439
  self,
@@ -52,7 +52,8 @@ class DeepSeekVLChatModel(PytorchChatModel):
52
52
  def match(
53
53
  cls, model_family: "LLMFamilyV1", model_spec: "LLMSpecV1", quantization: str
54
54
  ) -> bool:
55
- if "deepseek" in model_family.model_name:
55
+ llm_family = model_family.model_family or model_family.model_name
56
+ if "deepseek-vl" in llm_family:
56
57
  return True
57
58
  return False
58
59
 
@@ -71,7 +71,8 @@ class FalconPytorchModel(PytorchModel):
71
71
  ) -> bool:
72
72
  if llm_spec.model_format != "pytorch":
73
73
  return False
74
- if "falcon" not in llm_family.model_name:
74
+ model_family = llm_family.model_family or llm_family.model_name
75
+ if "falcon" not in model_family:
75
76
  return False
76
77
  if "generate" not in llm_family.model_ability:
77
78
  return False
@@ -55,7 +55,8 @@ class LlamaPytorchModel(PytorchModel):
55
55
  ) -> bool:
56
56
  if llm_spec.model_format != "pytorch":
57
57
  return False
58
- if "llama-2" not in llm_family.model_name:
58
+ model_family = llm_family.model_family or llm_family.model_name
59
+ if "llama-2" not in model_family:
59
60
  return False
60
61
  if "generate" not in llm_family.model_ability:
61
62
  return False
@@ -99,7 +100,8 @@ class LlamaPytorchChatModel(PytorchChatModel):
99
100
  ) -> bool:
100
101
  if llm_spec.model_format != "pytorch":
101
102
  return False
102
- if "llama-2" not in llm_family.model_name:
103
+ model_family = llm_family.model_family or llm_family.model_name
104
+ if "llama-2" not in model_family:
103
105
  return False
104
106
  if "chat" not in llm_family.model_ability:
105
107
  return False
@@ -44,7 +44,8 @@ class OmniLMMModel(PytorchChatModel):
44
44
  def match(
45
45
  cls, model_family: "LLMFamilyV1", model_spec: "LLMSpecV1", quantization: str
46
46
  ) -> bool:
47
- if "OmniLMM" in model_family.model_name:
47
+ llm_family = model_family.model_family or model_family.model_name
48
+ if "OmniLMM" in llm_family:
48
49
  return True
49
50
  return False
50
51
 
@@ -52,7 +52,8 @@ class QwenVLChatModel(PytorchChatModel):
52
52
  def match(
53
53
  cls, model_family: "LLMFamilyV1", model_spec: "LLMSpecV1", quantization: str
54
54
  ) -> bool:
55
- if "qwen" in model_family.model_name and "vision" in model_family.model_ability:
55
+ llm_family = model_family.model_family or model_family.model_name
56
+ if "qwen" in llm_family and "vision" in model_family.model_ability:
56
57
  return True
57
58
  return False
58
59
 
@@ -61,7 +61,8 @@ class VicunaPytorchChatModel(PytorchChatModel):
61
61
  ) -> bool:
62
62
  if llm_spec.model_format != "pytorch":
63
63
  return False
64
- if "vicuna" not in llm_family.model_name:
64
+ model_family = llm_family.model_family or llm_family.model_name
65
+ if "vicuna" not in model_family:
65
66
  return False
66
67
  if "chat" not in llm_family.model_ability:
67
68
  return False
@@ -51,7 +51,8 @@ class YiVLChatModel(PytorchChatModel):
51
51
  def match(
52
52
  cls, model_family: "LLMFamilyV1", model_spec: "LLMSpecV1", quantization: str
53
53
  ) -> bool:
54
- if "yi" in model_family.model_name:
54
+ llm_family = model_family.model_family or model_family.model_name
55
+ if "yi-vl" in llm_family:
55
56
  return True
56
57
  return False
57
58
 
@@ -17,7 +17,6 @@ import time
17
17
  import uuid
18
18
  from typing import AsyncGenerator, Dict, List, Optional, TypedDict, Union
19
19
 
20
- from ....constants import XINFERENCE_ENABLE_SGLANG
21
20
  from ....types import (
22
21
  ChatCompletion,
23
22
  ChatCompletionChunk,
@@ -63,15 +62,26 @@ try:
63
62
  except ImportError:
64
63
  SGLANG_INSTALLED = False
65
64
 
66
- SGLANG_SUPPORTED_MODELS = ["llama-2", "mistral-v0.1", "mixtral-v0.1"]
65
+ SGLANG_SUPPORTED_MODELS = [
66
+ "llama-2",
67
+ "llama-3",
68
+ "llama-3.1",
69
+ "mistral-v0.1",
70
+ "mixtral-v0.1",
71
+ ]
67
72
  SGLANG_SUPPORTED_CHAT_MODELS = [
68
73
  "llama-2-chat",
74
+ "llama-3-instruct",
75
+ "llama-3.1-instruct",
69
76
  "qwen-chat",
70
77
  "qwen1.5-chat",
78
+ "qwen2-instruct",
79
+ "qwen2-moe-instruct",
71
80
  "mistral-instruct-v0.1",
72
81
  "mistral-instruct-v0.2",
73
82
  "mixtral-instruct-v0.1",
74
83
  "gemma-it",
84
+ "gemma-2-it",
75
85
  ]
76
86
 
77
87
 
@@ -168,8 +178,6 @@ class SGLANGModel(LLM):
168
178
  def match(
169
179
  cls, llm_family: "LLMFamilyV1", llm_spec: "LLMSpecV1", quantization: str
170
180
  ) -> bool:
171
- if not XINFERENCE_ENABLE_SGLANG:
172
- return False
173
181
  if not cls._has_cuda_device():
174
182
  return False
175
183
  if not cls._is_linux():
@@ -332,8 +340,6 @@ class SGLANGChatModel(SGLANGModel, ChatModelMixin):
332
340
  def match(
333
341
  cls, llm_family: "LLMFamilyV1", llm_spec: "LLMSpecV1", quantization: str
334
342
  ) -> bool:
335
- if not XINFERENCE_ENABLE_SGLANG:
336
- return False
337
343
  if llm_spec.model_format not in ["pytorch", "gptq", "awq"]:
338
344
  return False
339
345
  if llm_spec.model_format == "pytorch":
@@ -28,7 +28,6 @@ from typing import (
28
28
  Union,
29
29
  )
30
30
 
31
- from ....constants import XINFERENCE_DISABLE_VLLM
32
31
  from ....types import (
33
32
  ChatCompletion,
34
33
  ChatCompletionChunk,
@@ -152,6 +151,7 @@ if VLLM_INSTALLED and vllm.__version__ >= "0.4.0":
152
151
  VLLM_SUPPORTED_CHAT_MODELS.append("c4ai-command-r-v01")
153
152
 
154
153
  if VLLM_INSTALLED and vllm.__version__ >= "0.5.3":
154
+ VLLM_SUPPORTED_CHAT_MODELS.append("gemma-2-it")
155
155
  VLLM_SUPPORTED_CHAT_MODELS.append("mistral-nemo-instruct")
156
156
  VLLM_SUPPORTED_CHAT_MODELS.append("mistral-large-instruct")
157
157
 
@@ -296,8 +296,6 @@ class VLLMModel(LLM):
296
296
  def match(
297
297
  cls, llm_family: "LLMFamilyV1", llm_spec: "LLMSpecV1", quantization: str
298
298
  ) -> bool:
299
- if XINFERENCE_DISABLE_VLLM:
300
- return False
301
299
  if not cls._has_cuda_device():
302
300
  return False
303
301
  if not cls._is_linux():
@@ -522,8 +520,6 @@ class VLLMChatModel(VLLMModel, ChatModelMixin):
522
520
  def match(
523
521
  cls, llm_family: "LLMFamilyV1", llm_spec: "LLMSpecV1", quantization: str
524
522
  ) -> bool:
525
- if XINFERENCE_DISABLE_VLLM:
526
- return False
527
523
  if llm_spec.model_format not in ["pytorch", "gptq", "awq"]:
528
524
  return False
529
525
  if llm_spec.model_format == "pytorch":
@@ -107,7 +107,7 @@ class RerankModel:
107
107
  self,
108
108
  model_spec: RerankModelSpec,
109
109
  model_uid: str,
110
- model_path: str,
110
+ model_path: Optional[str] = None,
111
111
  device: Optional[str] = None,
112
112
  use_fp16: bool = False,
113
113
  model_config: Optional[Dict] = None,
@@ -290,6 +290,7 @@ def create_rerank_model_instance(
290
290
  model_uid: str,
291
291
  model_name: str,
292
292
  download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
293
+ model_path: Optional[str] = None,
293
294
  **kwargs,
294
295
  ) -> Tuple[RerankModel, RerankModelDescription]:
295
296
  from ..utils import download_from_modelscope
@@ -321,8 +322,8 @@ def create_rerank_model_instance(
321
322
  f"Huggingface: {BUILTIN_RERANK_MODELS.keys()}"
322
323
  f"ModelScope: {MODELSCOPE_RERANK_MODELS.keys()}"
323
324
  )
324
-
325
- model_path = cache(model_spec)
325
+ if not model_path:
326
+ model_path = cache(model_spec)
326
327
  use_fp16 = kwargs.pop("use_fp16", False)
327
328
  model = RerankModel(
328
329
  model_spec, model_uid, model_path, use_fp16=use_fp16, model_config=kwargs
@@ -1,14 +1,14 @@
1
1
  {
2
2
  "files": {
3
3
  "main.css": "./static/css/main.4bafd904.css",
4
- "main.js": "./static/js/main.2ef0cfaf.js",
4
+ "main.js": "./static/js/main.af906659.js",
5
5
  "static/media/icon.webp": "./static/media/icon.4603d52c63041e5dfbfd.webp",
6
6
  "index.html": "./index.html",
7
7
  "main.4bafd904.css.map": "./static/css/main.4bafd904.css.map",
8
- "main.2ef0cfaf.js.map": "./static/js/main.2ef0cfaf.js.map"
8
+ "main.af906659.js.map": "./static/js/main.af906659.js.map"
9
9
  },
10
10
  "entrypoints": [
11
11
  "static/css/main.4bafd904.css",
12
- "static/js/main.2ef0cfaf.js"
12
+ "static/js/main.af906659.js"
13
13
  ]
14
14
  }
@@ -1 +1 @@
1
- <!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.svg"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="Web site created using create-react-app"/><link rel="apple-touch-icon" href="./logo192.png"/><link rel="manifest" href="./manifest.json"/><title>Xinference</title><script defer="defer" src="./static/js/main.2ef0cfaf.js"></script><link href="./static/css/main.4bafd904.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>
1
+ <!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.svg"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="Web site created using create-react-app"/><link rel="apple-touch-icon" href="./logo192.png"/><link rel="manifest" href="./manifest.json"/><title>Xinference</title><script defer="defer" src="./static/js/main.af906659.js"></script><link href="./static/css/main.4bafd904.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>