xinference 1.7.1.post1__py3-none-any.whl → 1.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (136) hide show
  1. xinference/_version.py +3 -3
  2. xinference/client/restful/async_restful_client.py +8 -13
  3. xinference/client/restful/restful_client.py +6 -2
  4. xinference/core/chat_interface.py +6 -4
  5. xinference/core/media_interface.py +5 -0
  6. xinference/core/model.py +1 -5
  7. xinference/core/supervisor.py +117 -68
  8. xinference/core/worker.py +49 -37
  9. xinference/deploy/test/test_cmdline.py +2 -6
  10. xinference/model/audio/__init__.py +26 -23
  11. xinference/model/audio/chattts.py +3 -2
  12. xinference/model/audio/core.py +49 -98
  13. xinference/model/audio/cosyvoice.py +3 -2
  14. xinference/model/audio/custom.py +28 -73
  15. xinference/model/audio/f5tts.py +3 -2
  16. xinference/model/audio/f5tts_mlx.py +3 -2
  17. xinference/model/audio/fish_speech.py +3 -2
  18. xinference/model/audio/funasr.py +17 -4
  19. xinference/model/audio/kokoro.py +3 -2
  20. xinference/model/audio/megatts.py +3 -2
  21. xinference/model/audio/melotts.py +3 -2
  22. xinference/model/audio/model_spec.json +572 -171
  23. xinference/model/audio/utils.py +0 -6
  24. xinference/model/audio/whisper.py +3 -2
  25. xinference/model/audio/whisper_mlx.py +3 -2
  26. xinference/model/cache_manager.py +141 -0
  27. xinference/model/core.py +6 -49
  28. xinference/model/custom.py +174 -0
  29. xinference/model/embedding/__init__.py +67 -56
  30. xinference/model/embedding/cache_manager.py +35 -0
  31. xinference/model/embedding/core.py +104 -84
  32. xinference/model/embedding/custom.py +55 -78
  33. xinference/model/embedding/embed_family.py +80 -31
  34. xinference/model/embedding/flag/core.py +21 -5
  35. xinference/model/embedding/llama_cpp/__init__.py +0 -0
  36. xinference/model/embedding/llama_cpp/core.py +234 -0
  37. xinference/model/embedding/model_spec.json +968 -103
  38. xinference/model/embedding/sentence_transformers/core.py +30 -20
  39. xinference/model/embedding/vllm/core.py +11 -5
  40. xinference/model/flexible/__init__.py +8 -2
  41. xinference/model/flexible/core.py +26 -119
  42. xinference/model/flexible/custom.py +69 -0
  43. xinference/model/flexible/launchers/image_process_launcher.py +1 -0
  44. xinference/model/flexible/launchers/modelscope_launcher.py +5 -1
  45. xinference/model/flexible/launchers/transformers_launcher.py +15 -3
  46. xinference/model/flexible/launchers/yolo_launcher.py +5 -1
  47. xinference/model/image/__init__.py +20 -20
  48. xinference/model/image/cache_manager.py +62 -0
  49. xinference/model/image/core.py +70 -182
  50. xinference/model/image/custom.py +28 -72
  51. xinference/model/image/model_spec.json +402 -119
  52. xinference/model/image/ocr/got_ocr2.py +3 -2
  53. xinference/model/image/stable_diffusion/core.py +22 -7
  54. xinference/model/image/stable_diffusion/mlx.py +6 -6
  55. xinference/model/image/utils.py +2 -2
  56. xinference/model/llm/__init__.py +71 -94
  57. xinference/model/llm/cache_manager.py +292 -0
  58. xinference/model/llm/core.py +37 -111
  59. xinference/model/llm/custom.py +88 -0
  60. xinference/model/llm/llama_cpp/core.py +5 -7
  61. xinference/model/llm/llm_family.json +16260 -8151
  62. xinference/model/llm/llm_family.py +138 -839
  63. xinference/model/llm/lmdeploy/core.py +5 -7
  64. xinference/model/llm/memory.py +3 -4
  65. xinference/model/llm/mlx/core.py +6 -8
  66. xinference/model/llm/reasoning_parser.py +3 -1
  67. xinference/model/llm/sglang/core.py +32 -14
  68. xinference/model/llm/transformers/chatglm.py +3 -7
  69. xinference/model/llm/transformers/core.py +49 -27
  70. xinference/model/llm/transformers/deepseek_v2.py +2 -2
  71. xinference/model/llm/transformers/gemma3.py +2 -2
  72. xinference/model/llm/transformers/multimodal/cogagent.py +2 -2
  73. xinference/model/llm/transformers/multimodal/deepseek_vl2.py +2 -2
  74. xinference/model/llm/transformers/multimodal/gemma3.py +2 -2
  75. xinference/model/llm/transformers/multimodal/glm4_1v.py +167 -0
  76. xinference/model/llm/transformers/multimodal/glm4v.py +2 -2
  77. xinference/model/llm/transformers/multimodal/intern_vl.py +2 -2
  78. xinference/model/llm/transformers/multimodal/minicpmv26.py +3 -3
  79. xinference/model/llm/transformers/multimodal/ovis2.py +2 -2
  80. xinference/model/llm/transformers/multimodal/qwen-omni.py +2 -2
  81. xinference/model/llm/transformers/multimodal/qwen2_audio.py +2 -2
  82. xinference/model/llm/transformers/multimodal/qwen2_vl.py +2 -2
  83. xinference/model/llm/transformers/opt.py +3 -7
  84. xinference/model/llm/utils.py +34 -49
  85. xinference/model/llm/vllm/core.py +77 -27
  86. xinference/model/llm/vllm/xavier/engine.py +5 -3
  87. xinference/model/llm/vllm/xavier/scheduler.py +10 -6
  88. xinference/model/llm/vllm/xavier/transfer.py +1 -1
  89. xinference/model/rerank/__init__.py +26 -25
  90. xinference/model/rerank/core.py +47 -87
  91. xinference/model/rerank/custom.py +25 -71
  92. xinference/model/rerank/model_spec.json +158 -33
  93. xinference/model/rerank/utils.py +2 -2
  94. xinference/model/utils.py +115 -54
  95. xinference/model/video/__init__.py +13 -17
  96. xinference/model/video/core.py +44 -102
  97. xinference/model/video/diffusers.py +4 -3
  98. xinference/model/video/model_spec.json +90 -21
  99. xinference/types.py +5 -3
  100. xinference/web/ui/build/asset-manifest.json +3 -3
  101. xinference/web/ui/build/index.html +1 -1
  102. xinference/web/ui/build/static/js/main.7d24df53.js +3 -0
  103. xinference/web/ui/build/static/js/main.7d24df53.js.map +1 -0
  104. xinference/web/ui/node_modules/.cache/babel-loader/2704ff66a5f73ca78b341eb3edec60154369df9d87fbc8c6dd60121abc5e1b0a.json +1 -0
  105. xinference/web/ui/node_modules/.cache/babel-loader/607dfef23d33e6b594518c0c6434567639f24f356b877c80c60575184ec50ed0.json +1 -0
  106. xinference/web/ui/node_modules/.cache/babel-loader/9be3d56173aacc3efd0b497bcb13c4f6365de30069176ee9403b40e717542326.json +1 -0
  107. xinference/web/ui/node_modules/.cache/babel-loader/9f9dd6c32c78a222d07da5987ae902effe16bcf20aac00774acdccc4de3c9ff2.json +1 -0
  108. xinference/web/ui/node_modules/.cache/babel-loader/b2ab5ee972c60d15eb9abf5845705f8ab7e1d125d324d9a9b1bcae5d6fd7ffb2.json +1 -0
  109. xinference/web/ui/src/locales/en.json +0 -1
  110. xinference/web/ui/src/locales/ja.json +0 -1
  111. xinference/web/ui/src/locales/ko.json +0 -1
  112. xinference/web/ui/src/locales/zh.json +0 -1
  113. {xinference-1.7.1.post1.dist-info → xinference-1.8.0.dist-info}/METADATA +9 -11
  114. {xinference-1.7.1.post1.dist-info → xinference-1.8.0.dist-info}/RECORD +119 -119
  115. xinference/model/audio/model_spec_modelscope.json +0 -231
  116. xinference/model/embedding/model_spec_modelscope.json +0 -293
  117. xinference/model/embedding/utils.py +0 -18
  118. xinference/model/image/model_spec_modelscope.json +0 -375
  119. xinference/model/llm/llama_cpp/memory.py +0 -457
  120. xinference/model/llm/llm_family_csghub.json +0 -56
  121. xinference/model/llm/llm_family_modelscope.json +0 -8700
  122. xinference/model/llm/llm_family_openmind_hub.json +0 -1019
  123. xinference/model/rerank/model_spec_modelscope.json +0 -85
  124. xinference/model/video/model_spec_modelscope.json +0 -184
  125. xinference/web/ui/build/static/js/main.9b12b7f9.js +0 -3
  126. xinference/web/ui/build/static/js/main.9b12b7f9.js.map +0 -1
  127. xinference/web/ui/node_modules/.cache/babel-loader/1460361af6975e63576708039f1cb732faf9c672d97c494d4055fc6331460be0.json +0 -1
  128. xinference/web/ui/node_modules/.cache/babel-loader/4efd8dda58fda83ed9546bf2f587df67f8d98e639117bee2d9326a9a1d9bebb2.json +0 -1
  129. xinference/web/ui/node_modules/.cache/babel-loader/55b9fb40b57fa926e8f05f31c2f96467e76e5ad62f033dca97c03f9e8c4eb4fe.json +0 -1
  130. xinference/web/ui/node_modules/.cache/babel-loader/5b2dafe5aa9e1105e0244a2b6751807342fa86aa0144b4e84d947a1686102715.json +0 -1
  131. xinference/web/ui/node_modules/.cache/babel-loader/611fa2c6c53b66039991d06dfb0473b5ab37fc63b4564e0f6e1718523768a045.json +0 -1
  132. /xinference/web/ui/build/static/js/{main.9b12b7f9.js.LICENSE.txt → main.7d24df53.js.LICENSE.txt} +0 -0
  133. {xinference-1.7.1.post1.dist-info → xinference-1.8.0.dist-info}/WHEEL +0 -0
  134. {xinference-1.7.1.post1.dist-info → xinference-1.8.0.dist-info}/entry_points.txt +0 -0
  135. {xinference-1.7.1.post1.dist-info → xinference-1.8.0.dist-info}/licenses/LICENSE +0 -0
  136. {xinference-1.7.1.post1.dist-info → xinference-1.8.0.dist-info}/top_level.txt +0 -0
xinference/_version.py CHANGED
@@ -8,11 +8,11 @@ import json
8
8
 
9
9
  version_json = '''
10
10
  {
11
- "date": "2025-06-30T19:23:32+0800",
11
+ "date": "2025-07-20T15:28:25+0800",
12
12
  "dirty": false,
13
13
  "error": null,
14
- "full-revisionid": "84f10dcf287df295396d5988016452ad8db98dfa",
15
- "version": "1.7.1.post1"
14
+ "full-revisionid": "abc42ca3105e3e0fd6f7861fb155ff807c1777a5",
15
+ "version": "1.8.0"
16
16
  }
17
17
  ''' # END VERSION_JSON
18
18
 
@@ -86,12 +86,8 @@ class AsyncRESTfulModelHandle:
86
86
 
87
87
  def __del__(self):
88
88
  if self.session:
89
- try:
90
- loop = asyncio.get_event_loop()
91
- except RuntimeError:
92
- loop = asyncio.new_event_loop()
93
- asyncio.set_event_loop(loop)
94
- loop.run_until_complete(self.close())
89
+ loop = asyncio.get_event_loop()
90
+ loop.create_task(self.close())
95
91
 
96
92
 
97
93
  class AsyncRESTfulEmbeddingModelHandle(AsyncRESTfulModelHandle):
@@ -418,7 +414,10 @@ class AsyncRESTfulImageModelHandle(AsyncRESTfulModelHandle):
418
414
  files.append((key, (None, value)))
419
415
  files.append(("image", ("image", image, "application/octet-stream")))
420
416
  files.append(
421
- ("mask_image", ("mask_image", mask_image, "application/octet-stream"))
417
+ (
418
+ "mask_image",
419
+ ("mask_image", mask_image, "application/octet-stream"),
420
+ )
422
421
  )
423
422
  response = await self.session.post(url, files=files, headers=self.auth_headers)
424
423
  if response.status != 200:
@@ -986,12 +985,8 @@ class AsyncClient:
986
985
 
987
986
  def __del__(self):
988
987
  if self.session:
989
- try:
990
- loop = asyncio.get_event_loop()
991
- except RuntimeError:
992
- loop = asyncio.new_event_loop()
993
- asyncio.set_event_loop(loop)
994
- loop.run_until_complete(self.close())
988
+ loop = asyncio.get_event_loop()
989
+ loop.create_task(self.close())
995
990
 
996
991
  def _set_token(self, token: Optional[str]):
997
992
  if not self._cluster_authed or token is None:
@@ -1397,7 +1397,9 @@ class Client:
1397
1397
  response_data = response.json()
1398
1398
  return response_data
1399
1399
 
1400
- def list_model_registrations(self, model_type: str) -> List[Dict[str, Any]]:
1400
+ def list_model_registrations(
1401
+ self, model_type: str, detailed: bool = False
1402
+ ) -> List[Dict[str, Any]]:
1401
1403
  """
1402
1404
  List models registered on the server.
1403
1405
 
@@ -1405,6 +1407,8 @@ class Client:
1405
1407
  ----------
1406
1408
  model_type: str
1407
1409
  The type of the model.
1410
+ detailed: bool
1411
+ Whether to display detailed information.
1408
1412
 
1409
1413
  Returns
1410
1414
  -------
@@ -1417,7 +1421,7 @@ class Client:
1417
1421
  Report failure to list model registration. Provide details of failure through error message.
1418
1422
 
1419
1423
  """
1420
- url = f"{self.base_url}/v1/model_registrations/{model_type}"
1424
+ url = f"{self.base_url}/v1/model_registrations/{model_type}?detailed={'true' if detailed else 'false'}"
1421
1425
  response = self.session.get(url, headers=self._headers)
1422
1426
  if response.status_code != 200:
1423
1427
  raise RuntimeError(
@@ -292,9 +292,11 @@ class GradioInterface:
292
292
  max_tokens = gr.Slider(
293
293
  minimum=1,
294
294
  maximum=self.context_length,
295
- value=512
296
- if "reasoning" not in self.model_ability
297
- else self.context_length // 2,
295
+ value=(
296
+ 512
297
+ if "reasoning" not in self.model_ability
298
+ else self.context_length // 2
299
+ ),
298
300
  step=1,
299
301
  label="Max Tokens",
300
302
  )
@@ -357,7 +359,7 @@ class GradioInterface:
357
359
  if "content" not in delta:
358
360
  continue
359
361
  else:
360
- response_content += delta["content"]
362
+ response_content += html.escape(delta["content"])
361
363
  bot[-1][1] = response_content
362
364
  yield history, bot
363
365
  history.append(
@@ -221,6 +221,7 @@ class MediaInterface:
221
221
  n: int,
222
222
  size_width: int,
223
223
  size_height: int,
224
+ guidance_scale: int,
224
225
  num_inference_steps: int,
225
226
  padding_image_to_multiple: int,
226
227
  sampler_name: Optional[str] = None,
@@ -237,6 +238,7 @@ class MediaInterface:
237
238
  size = f"{int(size_width)}*{int(size_height)}"
238
239
  else:
239
240
  size = None
241
+ guidance_scale = None if guidance_scale == -1 else guidance_scale # type: ignore
240
242
  num_inference_steps = (
241
243
  None if num_inference_steps == -1 else num_inference_steps # type: ignore
242
244
  )
@@ -262,6 +264,7 @@ class MediaInterface:
262
264
  size=size,
263
265
  response_format="b64_json",
264
266
  num_inference_steps=num_inference_steps,
267
+ guidance_scale=guidance_scale,
265
268
  padding_image_to_multiple=padding_image_to_multiple,
266
269
  sampler_name=sampler_name,
267
270
  )
@@ -314,6 +317,7 @@ class MediaInterface:
314
317
  size_height = gr.Number(label="Height", value=-1)
315
318
 
316
319
  with gr.Row():
320
+ guidance_scale = gr.Number(label="Guidance scale", value=-1)
317
321
  num_inference_steps = gr.Number(
318
322
  label="Inference Step Number", value=-1
319
323
  )
@@ -341,6 +345,7 @@ class MediaInterface:
341
345
  n,
342
346
  size_width,
343
347
  size_height,
348
+ guidance_scale,
344
349
  num_inference_steps,
345
350
  padding_image_to_multiple,
346
351
  sampler_name,
xinference/core/model.py CHANGED
@@ -51,7 +51,6 @@ if TYPE_CHECKING:
51
51
  from .progress_tracker import ProgressTrackerActor
52
52
  from .worker import WorkerActor
53
53
  from ..model.llm.core import LLM
54
- from ..model.core import ModelDescription
55
54
  import PIL
56
55
 
57
56
  import logging
@@ -225,7 +224,6 @@ class ModelActor(xo.StatelessActor, CancelMixin):
225
224
  worker_address: str,
226
225
  model: "LLM",
227
226
  replica_model_uid: str,
228
- model_description: Optional["ModelDescription"] = None,
229
227
  request_limits: Optional[int] = None,
230
228
  xavier_config: Optional[Dict] = None,
231
229
  n_worker: Optional[int] = 1,
@@ -244,9 +242,7 @@ class ModelActor(xo.StatelessActor, CancelMixin):
244
242
  self._worker_address = worker_address
245
243
  self._replica_model_uid = replica_model_uid
246
244
  self._model = model
247
- self._model_description = (
248
- model_description.to_dict() if model_description else {}
249
- )
245
+ self._model_description = self._model.model_family.to_description()
250
246
  self._request_limits = (
251
247
  float("inf") if request_limits is None else request_limits
252
248
  )
@@ -62,13 +62,13 @@ from .utils import (
62
62
  )
63
63
 
64
64
  if TYPE_CHECKING:
65
- from ..model.audio import AudioModelFamilyV1
66
- from ..model.embedding import EmbeddingModelSpec
65
+ from ..model.audio import AudioModelFamilyV2
66
+ from ..model.embedding import EmbeddingModelFamilyV2
67
67
  from ..model.flexible import FlexibleModelSpec
68
- from ..model.image import ImageModelFamilyV1
69
- from ..model.llm import LLMFamilyV1
70
- from ..model.rerank import RerankModelSpec
71
- from ..model.video import VideoModelFamilyV1
68
+ from ..model.image import ImageModelFamilyV2
69
+ from ..model.llm import LLMFamilyV2
70
+ from ..model.rerank import RerankModelFamilyV2
71
+ from ..model.video import VideoModelFamilyV2
72
72
  from .worker import WorkerActor
73
73
 
74
74
 
@@ -94,9 +94,9 @@ class WorkerStatus:
94
94
  class ReplicaInfo:
95
95
  replica: int
96
96
  scheduler: Iterator
97
- replica_to_worker_refs: DefaultDict[
98
- int, List[xo.ActorRefType["WorkerActor"]]
99
- ] = field(default_factory=lambda: defaultdict(list))
97
+ replica_to_worker_refs: DefaultDict[int, List[xo.ActorRefType["WorkerActor"]]] = (
98
+ field(default_factory=lambda: defaultdict(list))
99
+ )
100
100
 
101
101
 
102
102
  class SupervisorActor(xo.StatelessActor):
@@ -144,10 +144,12 @@ class SupervisorActor(xo.StatelessActor):
144
144
  from .progress_tracker import ProgressTrackerActor
145
145
  from .status_guard import StatusGuardActor
146
146
 
147
- self._status_guard_ref: xo.ActorRefType[ # type: ignore
148
- "StatusGuardActor"
149
- ] = await xo.create_actor(
150
- StatusGuardActor, address=self.address, uid=StatusGuardActor.default_uid()
147
+ self._status_guard_ref: xo.ActorRefType["StatusGuardActor"] = ( # type: ignore
148
+ await xo.create_actor(
149
+ StatusGuardActor,
150
+ address=self.address,
151
+ uid=StatusGuardActor.default_uid(),
152
+ )
151
153
  )
152
154
  self._cache_tracker_ref: xo.ActorRefType[ # type: ignore
153
155
  "CacheTrackerActor"
@@ -173,14 +175,14 @@ class SupervisorActor(xo.StatelessActor):
173
175
  )
174
176
 
175
177
  from ..model.audio import (
176
- CustomAudioModelFamilyV1,
178
+ CustomAudioModelFamilyV2,
177
179
  generate_audio_description,
178
180
  get_audio_model_descriptions,
179
181
  register_audio,
180
182
  unregister_audio,
181
183
  )
182
184
  from ..model.embedding import (
183
- CustomEmbeddingModelSpec,
185
+ CustomEmbeddingModelFamilyV2,
184
186
  generate_embedding_description,
185
187
  get_embedding_model_descriptions,
186
188
  register_embedding,
@@ -194,21 +196,21 @@ class SupervisorActor(xo.StatelessActor):
194
196
  unregister_flexible_model,
195
197
  )
196
198
  from ..model.image import (
197
- CustomImageModelFamilyV1,
199
+ CustomImageModelFamilyV2,
198
200
  generate_image_description,
199
201
  get_image_model_descriptions,
200
202
  register_image,
201
203
  unregister_image,
202
204
  )
203
205
  from ..model.llm import (
204
- CustomLLMFamilyV1,
205
- generate_llm_description,
206
- get_llm_model_descriptions,
206
+ CustomLLMFamilyV2,
207
+ generate_llm_version_info,
208
+ get_llm_version_infos,
207
209
  register_llm,
208
210
  unregister_llm,
209
211
  )
210
212
  from ..model.rerank import (
211
- CustomRerankModelSpec,
213
+ CustomRerankModelFamilyV2,
212
214
  generate_rerank_description,
213
215
  get_rerank_model_descriptions,
214
216
  register_rerank,
@@ -217,31 +219,31 @@ class SupervisorActor(xo.StatelessActor):
217
219
 
218
220
  self._custom_register_type_to_cls: Dict[str, Tuple] = { # type: ignore
219
221
  "LLM": (
220
- CustomLLMFamilyV1,
222
+ CustomLLMFamilyV2,
221
223
  register_llm,
222
224
  unregister_llm,
223
- generate_llm_description,
225
+ generate_llm_version_info,
224
226
  ),
225
227
  "embedding": (
226
- CustomEmbeddingModelSpec,
228
+ CustomEmbeddingModelFamilyV2,
227
229
  register_embedding,
228
230
  unregister_embedding,
229
231
  generate_embedding_description,
230
232
  ),
231
233
  "rerank": (
232
- CustomRerankModelSpec,
234
+ CustomRerankModelFamilyV2,
233
235
  register_rerank,
234
236
  unregister_rerank,
235
237
  generate_rerank_description,
236
238
  ),
237
239
  "image": (
238
- CustomImageModelFamilyV1,
240
+ CustomImageModelFamilyV2,
239
241
  register_image,
240
242
  unregister_image,
241
243
  generate_image_description,
242
244
  ),
243
245
  "audio": (
244
- CustomAudioModelFamilyV1,
246
+ CustomAudioModelFamilyV2,
245
247
  register_audio,
246
248
  unregister_audio,
247
249
  generate_audio_description,
@@ -256,7 +258,7 @@ class SupervisorActor(xo.StatelessActor):
256
258
 
257
259
  # record model version
258
260
  model_version_infos: Dict[str, List[Dict]] = {} # type: ignore
259
- model_version_infos.update(get_llm_model_descriptions())
261
+ model_version_infos.update(get_llm_version_infos())
260
262
  model_version_infos.update(get_embedding_model_descriptions())
261
263
  model_version_infos.update(get_rerank_model_descriptions())
262
264
  model_version_infos.update(get_image_model_descriptions())
@@ -405,9 +407,9 @@ class SupervisorActor(xo.StatelessActor):
405
407
  }
406
408
 
407
409
  async def _to_llm_reg(
408
- self, llm_family: "LLMFamilyV1", is_builtin: bool
410
+ self, llm_family: "LLMFamilyV2", is_builtin: bool
409
411
  ) -> Dict[str, Any]:
410
- from ..model.llm import get_cache_status
412
+ from ..model.llm.cache_manager import LLMCacheManager
411
413
 
412
414
  instance_cnt = await self.get_instance_count(llm_family.model_name)
413
415
  version_cnt = await self.get_model_version_count(llm_family.model_name)
@@ -415,9 +417,17 @@ class SupervisorActor(xo.StatelessActor):
415
417
  if self.is_local_deployment():
416
418
  specs = []
417
419
  # TODO: does not work when the supervisor and worker are running on separate nodes.
418
- for spec in llm_family.model_specs:
419
- cache_status = get_cache_status(llm_family, spec)
420
- specs.append({**spec.dict(), "cache_status": cache_status})
420
+ _llm_family = llm_family.copy()
421
+ for spec in [
422
+ _spec
423
+ for _spec in llm_family.model_specs
424
+ if _spec.model_hub == "huggingface"
425
+ ]:
426
+ _llm_family.model_specs = [spec]
427
+ cache_manager = LLMCacheManager(_llm_family)
428
+ specs.append(
429
+ {**spec.dict(), "cache_status": cache_manager.get_cache_status()}
430
+ )
421
431
  res = {**llm_family.dict(), "is_builtin": is_builtin, "model_specs": specs}
422
432
  else:
423
433
  res = {**llm_family.dict(), "is_builtin": is_builtin}
@@ -426,24 +436,37 @@ class SupervisorActor(xo.StatelessActor):
426
436
  return res
427
437
 
428
438
  async def _to_embedding_model_reg(
429
- self, model_spec: "EmbeddingModelSpec", is_builtin: bool
439
+ self, model_family: "EmbeddingModelFamilyV2", is_builtin: bool
430
440
  ) -> Dict[str, Any]:
431
- from ..model.embedding import get_cache_status
441
+ from ..model.embedding.cache_manager import EmbeddingCacheManager
432
442
 
433
- instance_cnt = await self.get_instance_count(model_spec.model_name)
434
- version_cnt = await self.get_model_version_count(model_spec.model_name)
443
+ instance_cnt = await self.get_instance_count(model_family.model_name)
444
+ version_cnt = await self.get_model_version_count(model_family.model_name)
435
445
 
436
446
  if self.is_local_deployment():
447
+ _family = model_family.copy()
448
+ specs = []
437
449
  # TODO: does not work when the supervisor and worker are running on separate nodes.
438
- cache_status = get_cache_status(model_spec)
450
+ for spec in [
451
+ x for x in model_family.model_specs if x.model_hub == "huggingface"
452
+ ]:
453
+ _family.model_specs = [spec]
454
+ specs.append(
455
+ {
456
+ **spec.dict(),
457
+ "cache_status": EmbeddingCacheManager(
458
+ _family
459
+ ).get_cache_status(),
460
+ }
461
+ )
439
462
  res = {
440
- **model_spec.dict(),
441
- "cache_status": cache_status,
463
+ **model_family.dict(),
442
464
  "is_builtin": is_builtin,
465
+ "model_specs": specs,
443
466
  }
444
467
  else:
445
468
  res = {
446
- **model_spec.dict(),
469
+ **model_family.dict(),
447
470
  "is_builtin": is_builtin,
448
471
  }
449
472
  res["model_version_count"] = version_cnt
@@ -451,16 +474,17 @@ class SupervisorActor(xo.StatelessActor):
451
474
  return res
452
475
 
453
476
  async def _to_rerank_model_reg(
454
- self, model_spec: "RerankModelSpec", is_builtin: bool
477
+ self, model_spec: "RerankModelFamilyV2", is_builtin: bool
455
478
  ) -> Dict[str, Any]:
456
- from ..model.rerank import get_cache_status
479
+ from ..model.cache_manager import CacheManager
457
480
 
458
481
  instance_cnt = await self.get_instance_count(model_spec.model_name)
459
482
  version_cnt = await self.get_model_version_count(model_spec.model_name)
483
+ cache_manager = CacheManager(model_spec)
460
484
 
461
485
  if self.is_local_deployment():
462
486
  # TODO: does not work when the supervisor and worker are running on separate nodes.
463
- cache_status = get_cache_status(model_spec)
487
+ cache_status = cache_manager.get_cache_status()
464
488
  res = {
465
489
  **model_spec.dict(),
466
490
  "cache_status": cache_status,
@@ -476,19 +500,19 @@ class SupervisorActor(xo.StatelessActor):
476
500
  return res
477
501
 
478
502
  async def _to_image_model_reg(
479
- self, model_family: "ImageModelFamilyV1", is_builtin: bool
503
+ self, model_family: "ImageModelFamilyV2", is_builtin: bool
480
504
  ) -> Dict[str, Any]:
481
- from ..model.image import get_cache_status
505
+ from ..model.image.cache_manager import ImageCacheManager
482
506
 
483
507
  instance_cnt = await self.get_instance_count(model_family.model_name)
484
508
  version_cnt = await self.get_model_version_count(model_family.model_name)
485
509
 
486
510
  if self.is_local_deployment():
487
511
  # TODO: does not work when the supervisor and worker are running on separate nodes.
488
- cache_status = get_cache_status(model_family)
512
+ cache_manager = ImageCacheManager(model_family)
489
513
  res = {
490
514
  **model_family.dict(),
491
- "cache_status": cache_status,
515
+ "cache_status": cache_manager.get_cache_status(),
492
516
  "is_builtin": is_builtin,
493
517
  }
494
518
  else:
@@ -501,19 +525,19 @@ class SupervisorActor(xo.StatelessActor):
501
525
  return res
502
526
 
503
527
  async def _to_audio_model_reg(
504
- self, model_family: "AudioModelFamilyV1", is_builtin: bool
528
+ self, model_family: "AudioModelFamilyV2", is_builtin: bool
505
529
  ) -> Dict[str, Any]:
506
- from ..model.audio import get_cache_status
530
+ from ..model.cache_manager import CacheManager
507
531
 
508
532
  instance_cnt = await self.get_instance_count(model_family.model_name)
509
533
  version_cnt = await self.get_model_version_count(model_family.model_name)
534
+ cache_manager = CacheManager(model_family)
510
535
 
511
536
  if self.is_local_deployment():
512
537
  # TODO: does not work when the supervisor and worker are running on separate nodes.
513
- cache_status = get_cache_status(model_family)
514
538
  res = {
515
539
  **model_family.dict(),
516
- "cache_status": cache_status,
540
+ "cache_status": cache_manager.get_cache_status(),
517
541
  "is_builtin": is_builtin,
518
542
  }
519
543
  else:
@@ -526,19 +550,19 @@ class SupervisorActor(xo.StatelessActor):
526
550
  return res
527
551
 
528
552
  async def _to_video_model_reg(
529
- self, model_family: "VideoModelFamilyV1", is_builtin: bool
553
+ self, model_family: "VideoModelFamilyV2", is_builtin: bool
530
554
  ) -> Dict[str, Any]:
531
- from ..model.video import get_cache_status
555
+ from ..model.cache_manager import CacheManager
532
556
 
533
557
  instance_cnt = await self.get_instance_count(model_family.model_name)
534
558
  version_cnt = await self.get_model_version_count(model_family.model_name)
559
+ cache_manager = CacheManager(model_family)
535
560
 
536
561
  if self.is_local_deployment():
537
562
  # TODO: does not work when the supervisor and worker are running on separate nodes.
538
- cache_status = get_cache_status(model_family)
539
563
  res = {
540
564
  **model_family.dict(),
541
- "cache_status": cache_status,
565
+ "cache_status": cache_manager.get_cache_status(),
542
566
  "is_builtin": is_builtin,
543
567
  }
544
568
  else:
@@ -630,8 +654,9 @@ class SupervisorActor(xo.StatelessActor):
630
654
  from ..model.image import BUILTIN_IMAGE_MODELS
631
655
  from ..model.image.custom import get_user_defined_images
632
656
 
633
- for model_name, family in BUILTIN_IMAGE_MODELS.items():
657
+ for model_name, families in BUILTIN_IMAGE_MODELS.items():
634
658
  if detailed:
659
+ family = [x for x in families if x.model_hub == "huggingface"][0]
635
660
  ret.append(await self._to_image_model_reg(family, is_builtin=True))
636
661
  else:
637
662
  ret.append({"model_name": model_name, "is_builtin": True})
@@ -652,8 +677,9 @@ class SupervisorActor(xo.StatelessActor):
652
677
  from ..model.audio import BUILTIN_AUDIO_MODELS
653
678
  from ..model.audio.custom import get_user_defined_audios
654
679
 
655
- for model_name, family in BUILTIN_AUDIO_MODELS.items():
680
+ for model_name, families in BUILTIN_AUDIO_MODELS.items():
656
681
  if detailed:
682
+ family = [x for x in families if x.model_hub == "huggingface"][0]
657
683
  ret.append(await self._to_audio_model_reg(family, is_builtin=True))
658
684
  else:
659
685
  ret.append({"model_name": model_name, "is_builtin": True})
@@ -673,8 +699,9 @@ class SupervisorActor(xo.StatelessActor):
673
699
  elif model_type == "video":
674
700
  from ..model.video import BUILTIN_VIDEO_MODELS
675
701
 
676
- for model_name, family in BUILTIN_VIDEO_MODELS.items():
702
+ for model_name, families in BUILTIN_VIDEO_MODELS.items():
677
703
  if detailed:
704
+ family = [x for x in families if x.model_hub == "huggingface"][0]
678
705
  ret.append(await self._to_video_model_reg(family, is_builtin=True))
679
706
  else:
680
707
  ret.append({"model_name": model_name, "is_builtin": True})
@@ -685,8 +712,9 @@ class SupervisorActor(xo.StatelessActor):
685
712
  from ..model.rerank import BUILTIN_RERANK_MODELS
686
713
  from ..model.rerank.custom import get_user_defined_reranks
687
714
 
688
- for model_name, family in BUILTIN_RERANK_MODELS.items():
715
+ for model_name, families in BUILTIN_RERANK_MODELS.items():
689
716
  if detailed:
717
+ family = [x for x in families if x.model_hub == "huggingface"][0]
690
718
  ret.append(await self._to_rerank_model_reg(family, is_builtin=True))
691
719
  else:
692
720
  ret.append({"model_name": model_name, "is_builtin": True})
@@ -755,25 +783,46 @@ class SupervisorActor(xo.StatelessActor):
755
783
  from ..model.image import BUILTIN_IMAGE_MODELS
756
784
  from ..model.image.custom import get_user_defined_images
757
785
 
758
- for f in list(BUILTIN_IMAGE_MODELS.values()) + get_user_defined_images():
759
- if f.model_name == model_name:
760
- return f
786
+ if model_name in BUILTIN_IMAGE_MODELS:
787
+ return [
788
+ x
789
+ for x in BUILTIN_IMAGE_MODELS[model_name]
790
+ if x.model_hub == "huggingface"
791
+ ][0]
792
+ else:
793
+ for f in get_user_defined_images():
794
+ if f.model_name == model_name:
795
+ return f
761
796
  raise ValueError(f"Model {model_name} not found")
762
797
  elif model_type == "audio":
763
798
  from ..model.audio import BUILTIN_AUDIO_MODELS
764
799
  from ..model.audio.custom import get_user_defined_audios
765
800
 
766
- for f in list(BUILTIN_AUDIO_MODELS.values()) + get_user_defined_audios():
767
- if f.model_name == model_name:
768
- return f
801
+ if model_name in BUILTIN_AUDIO_MODELS:
802
+ return [
803
+ x
804
+ for x in BUILTIN_AUDIO_MODELS[model_name]
805
+ if x.model_hub == "huggingface"
806
+ ][0]
807
+ else:
808
+ for f in get_user_defined_audios():
809
+ if f.model_name == model_name:
810
+ return f
769
811
  raise ValueError(f"Model {model_name} not found")
770
812
  elif model_type == "rerank":
771
813
  from ..model.rerank import BUILTIN_RERANK_MODELS
772
814
  from ..model.rerank.custom import get_user_defined_reranks
773
815
 
774
- for f in list(BUILTIN_RERANK_MODELS.values()) + get_user_defined_reranks():
775
- if f.model_name == model_name:
776
- return f
816
+ if model_name in BUILTIN_RERANK_MODELS:
817
+ return [
818
+ x
819
+ for x in BUILTIN_RERANK_MODELS[model_name]
820
+ if x.model_hub == "huggingface"
821
+ ][0]
822
+ else:
823
+ for f in get_user_defined_reranks():
824
+ if f.model_name == model_name:
825
+ return f
777
826
  raise ValueError(f"Model {model_name} not found")
778
827
  elif model_type == "flexible":
779
828
  from ..model.flexible import get_flexible_models