xinference 1.7.1.post1__py3-none-any.whl → 1.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/client/restful/async_restful_client.py +8 -13
- xinference/client/restful/restful_client.py +6 -2
- xinference/core/chat_interface.py +6 -4
- xinference/core/media_interface.py +5 -0
- xinference/core/model.py +1 -5
- xinference/core/supervisor.py +117 -68
- xinference/core/worker.py +49 -37
- xinference/deploy/test/test_cmdline.py +2 -6
- xinference/model/audio/__init__.py +26 -23
- xinference/model/audio/chattts.py +3 -2
- xinference/model/audio/core.py +49 -98
- xinference/model/audio/cosyvoice.py +3 -2
- xinference/model/audio/custom.py +28 -73
- xinference/model/audio/f5tts.py +3 -2
- xinference/model/audio/f5tts_mlx.py +3 -2
- xinference/model/audio/fish_speech.py +3 -2
- xinference/model/audio/funasr.py +17 -4
- xinference/model/audio/kokoro.py +3 -2
- xinference/model/audio/megatts.py +3 -2
- xinference/model/audio/melotts.py +3 -2
- xinference/model/audio/model_spec.json +572 -171
- xinference/model/audio/utils.py +0 -6
- xinference/model/audio/whisper.py +3 -2
- xinference/model/audio/whisper_mlx.py +3 -2
- xinference/model/cache_manager.py +141 -0
- xinference/model/core.py +6 -49
- xinference/model/custom.py +174 -0
- xinference/model/embedding/__init__.py +67 -56
- xinference/model/embedding/cache_manager.py +35 -0
- xinference/model/embedding/core.py +104 -84
- xinference/model/embedding/custom.py +55 -78
- xinference/model/embedding/embed_family.py +80 -31
- xinference/model/embedding/flag/core.py +21 -5
- xinference/model/embedding/llama_cpp/__init__.py +0 -0
- xinference/model/embedding/llama_cpp/core.py +234 -0
- xinference/model/embedding/model_spec.json +968 -103
- xinference/model/embedding/sentence_transformers/core.py +30 -20
- xinference/model/embedding/vllm/core.py +11 -5
- xinference/model/flexible/__init__.py +8 -2
- xinference/model/flexible/core.py +26 -119
- xinference/model/flexible/custom.py +69 -0
- xinference/model/flexible/launchers/image_process_launcher.py +1 -0
- xinference/model/flexible/launchers/modelscope_launcher.py +5 -1
- xinference/model/flexible/launchers/transformers_launcher.py +15 -3
- xinference/model/flexible/launchers/yolo_launcher.py +5 -1
- xinference/model/image/__init__.py +20 -20
- xinference/model/image/cache_manager.py +62 -0
- xinference/model/image/core.py +70 -182
- xinference/model/image/custom.py +28 -72
- xinference/model/image/model_spec.json +402 -119
- xinference/model/image/ocr/got_ocr2.py +3 -2
- xinference/model/image/stable_diffusion/core.py +22 -7
- xinference/model/image/stable_diffusion/mlx.py +6 -6
- xinference/model/image/utils.py +2 -2
- xinference/model/llm/__init__.py +71 -94
- xinference/model/llm/cache_manager.py +292 -0
- xinference/model/llm/core.py +37 -111
- xinference/model/llm/custom.py +88 -0
- xinference/model/llm/llama_cpp/core.py +5 -7
- xinference/model/llm/llm_family.json +16260 -8151
- xinference/model/llm/llm_family.py +138 -839
- xinference/model/llm/lmdeploy/core.py +5 -7
- xinference/model/llm/memory.py +3 -4
- xinference/model/llm/mlx/core.py +6 -8
- xinference/model/llm/reasoning_parser.py +3 -1
- xinference/model/llm/sglang/core.py +32 -14
- xinference/model/llm/transformers/chatglm.py +3 -7
- xinference/model/llm/transformers/core.py +49 -27
- xinference/model/llm/transformers/deepseek_v2.py +2 -2
- xinference/model/llm/transformers/gemma3.py +2 -2
- xinference/model/llm/transformers/multimodal/cogagent.py +2 -2
- xinference/model/llm/transformers/multimodal/deepseek_vl2.py +2 -2
- xinference/model/llm/transformers/multimodal/gemma3.py +2 -2
- xinference/model/llm/transformers/multimodal/glm4_1v.py +167 -0
- xinference/model/llm/transformers/multimodal/glm4v.py +2 -2
- xinference/model/llm/transformers/multimodal/intern_vl.py +2 -2
- xinference/model/llm/transformers/multimodal/minicpmv26.py +3 -3
- xinference/model/llm/transformers/multimodal/ovis2.py +2 -2
- xinference/model/llm/transformers/multimodal/qwen-omni.py +2 -2
- xinference/model/llm/transformers/multimodal/qwen2_audio.py +2 -2
- xinference/model/llm/transformers/multimodal/qwen2_vl.py +2 -2
- xinference/model/llm/transformers/opt.py +3 -7
- xinference/model/llm/utils.py +34 -49
- xinference/model/llm/vllm/core.py +77 -27
- xinference/model/llm/vllm/xavier/engine.py +5 -3
- xinference/model/llm/vllm/xavier/scheduler.py +10 -6
- xinference/model/llm/vllm/xavier/transfer.py +1 -1
- xinference/model/rerank/__init__.py +26 -25
- xinference/model/rerank/core.py +47 -87
- xinference/model/rerank/custom.py +25 -71
- xinference/model/rerank/model_spec.json +158 -33
- xinference/model/rerank/utils.py +2 -2
- xinference/model/utils.py +115 -54
- xinference/model/video/__init__.py +13 -17
- xinference/model/video/core.py +44 -102
- xinference/model/video/diffusers.py +4 -3
- xinference/model/video/model_spec.json +90 -21
- xinference/types.py +5 -3
- xinference/web/ui/build/asset-manifest.json +3 -3
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/js/main.7d24df53.js +3 -0
- xinference/web/ui/build/static/js/main.7d24df53.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/2704ff66a5f73ca78b341eb3edec60154369df9d87fbc8c6dd60121abc5e1b0a.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/607dfef23d33e6b594518c0c6434567639f24f356b877c80c60575184ec50ed0.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/9be3d56173aacc3efd0b497bcb13c4f6365de30069176ee9403b40e717542326.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/9f9dd6c32c78a222d07da5987ae902effe16bcf20aac00774acdccc4de3c9ff2.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/b2ab5ee972c60d15eb9abf5845705f8ab7e1d125d324d9a9b1bcae5d6fd7ffb2.json +1 -0
- xinference/web/ui/src/locales/en.json +0 -1
- xinference/web/ui/src/locales/ja.json +0 -1
- xinference/web/ui/src/locales/ko.json +0 -1
- xinference/web/ui/src/locales/zh.json +0 -1
- {xinference-1.7.1.post1.dist-info → xinference-1.8.0.dist-info}/METADATA +9 -11
- {xinference-1.7.1.post1.dist-info → xinference-1.8.0.dist-info}/RECORD +119 -119
- xinference/model/audio/model_spec_modelscope.json +0 -231
- xinference/model/embedding/model_spec_modelscope.json +0 -293
- xinference/model/embedding/utils.py +0 -18
- xinference/model/image/model_spec_modelscope.json +0 -375
- xinference/model/llm/llama_cpp/memory.py +0 -457
- xinference/model/llm/llm_family_csghub.json +0 -56
- xinference/model/llm/llm_family_modelscope.json +0 -8700
- xinference/model/llm/llm_family_openmind_hub.json +0 -1019
- xinference/model/rerank/model_spec_modelscope.json +0 -85
- xinference/model/video/model_spec_modelscope.json +0 -184
- xinference/web/ui/build/static/js/main.9b12b7f9.js +0 -3
- xinference/web/ui/build/static/js/main.9b12b7f9.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/1460361af6975e63576708039f1cb732faf9c672d97c494d4055fc6331460be0.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/4efd8dda58fda83ed9546bf2f587df67f8d98e639117bee2d9326a9a1d9bebb2.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/55b9fb40b57fa926e8f05f31c2f96467e76e5ad62f033dca97c03f9e8c4eb4fe.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/5b2dafe5aa9e1105e0244a2b6751807342fa86aa0144b4e84d947a1686102715.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/611fa2c6c53b66039991d06dfb0473b5ab37fc63b4564e0f6e1718523768a045.json +0 -1
- /xinference/web/ui/build/static/js/{main.9b12b7f9.js.LICENSE.txt → main.7d24df53.js.LICENSE.txt} +0 -0
- {xinference-1.7.1.post1.dist-info → xinference-1.8.0.dist-info}/WHEEL +0 -0
- {xinference-1.7.1.post1.dist-info → xinference-1.8.0.dist-info}/entry_points.txt +0 -0
- {xinference-1.7.1.post1.dist-info → xinference-1.8.0.dist-info}/licenses/LICENSE +0 -0
- {xinference-1.7.1.post1.dist-info → xinference-1.8.0.dist-info}/top_level.txt +0 -0
xinference/_version.py
CHANGED
|
@@ -8,11 +8,11 @@ import json
|
|
|
8
8
|
|
|
9
9
|
version_json = '''
|
|
10
10
|
{
|
|
11
|
-
"date": "2025-
|
|
11
|
+
"date": "2025-07-20T15:28:25+0800",
|
|
12
12
|
"dirty": false,
|
|
13
13
|
"error": null,
|
|
14
|
-
"full-revisionid": "
|
|
15
|
-
"version": "1.
|
|
14
|
+
"full-revisionid": "abc42ca3105e3e0fd6f7861fb155ff807c1777a5",
|
|
15
|
+
"version": "1.8.0"
|
|
16
16
|
}
|
|
17
17
|
''' # END VERSION_JSON
|
|
18
18
|
|
|
@@ -86,12 +86,8 @@ class AsyncRESTfulModelHandle:
|
|
|
86
86
|
|
|
87
87
|
def __del__(self):
|
|
88
88
|
if self.session:
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
except RuntimeError:
|
|
92
|
-
loop = asyncio.new_event_loop()
|
|
93
|
-
asyncio.set_event_loop(loop)
|
|
94
|
-
loop.run_until_complete(self.close())
|
|
89
|
+
loop = asyncio.get_event_loop()
|
|
90
|
+
loop.create_task(self.close())
|
|
95
91
|
|
|
96
92
|
|
|
97
93
|
class AsyncRESTfulEmbeddingModelHandle(AsyncRESTfulModelHandle):
|
|
@@ -418,7 +414,10 @@ class AsyncRESTfulImageModelHandle(AsyncRESTfulModelHandle):
|
|
|
418
414
|
files.append((key, (None, value)))
|
|
419
415
|
files.append(("image", ("image", image, "application/octet-stream")))
|
|
420
416
|
files.append(
|
|
421
|
-
(
|
|
417
|
+
(
|
|
418
|
+
"mask_image",
|
|
419
|
+
("mask_image", mask_image, "application/octet-stream"),
|
|
420
|
+
)
|
|
422
421
|
)
|
|
423
422
|
response = await self.session.post(url, files=files, headers=self.auth_headers)
|
|
424
423
|
if response.status != 200:
|
|
@@ -986,12 +985,8 @@ class AsyncClient:
|
|
|
986
985
|
|
|
987
986
|
def __del__(self):
|
|
988
987
|
if self.session:
|
|
989
|
-
|
|
990
|
-
|
|
991
|
-
except RuntimeError:
|
|
992
|
-
loop = asyncio.new_event_loop()
|
|
993
|
-
asyncio.set_event_loop(loop)
|
|
994
|
-
loop.run_until_complete(self.close())
|
|
988
|
+
loop = asyncio.get_event_loop()
|
|
989
|
+
loop.create_task(self.close())
|
|
995
990
|
|
|
996
991
|
def _set_token(self, token: Optional[str]):
|
|
997
992
|
if not self._cluster_authed or token is None:
|
|
@@ -1397,7 +1397,9 @@ class Client:
|
|
|
1397
1397
|
response_data = response.json()
|
|
1398
1398
|
return response_data
|
|
1399
1399
|
|
|
1400
|
-
def list_model_registrations(
|
|
1400
|
+
def list_model_registrations(
|
|
1401
|
+
self, model_type: str, detailed: bool = False
|
|
1402
|
+
) -> List[Dict[str, Any]]:
|
|
1401
1403
|
"""
|
|
1402
1404
|
List models registered on the server.
|
|
1403
1405
|
|
|
@@ -1405,6 +1407,8 @@ class Client:
|
|
|
1405
1407
|
----------
|
|
1406
1408
|
model_type: str
|
|
1407
1409
|
The type of the model.
|
|
1410
|
+
detailed: bool
|
|
1411
|
+
Whether to display detailed information.
|
|
1408
1412
|
|
|
1409
1413
|
Returns
|
|
1410
1414
|
-------
|
|
@@ -1417,7 +1421,7 @@ class Client:
|
|
|
1417
1421
|
Report failure to list model registration. Provide details of failure through error message.
|
|
1418
1422
|
|
|
1419
1423
|
"""
|
|
1420
|
-
url = f"{self.base_url}/v1/model_registrations/{model_type}"
|
|
1424
|
+
url = f"{self.base_url}/v1/model_registrations/{model_type}?detailed={'true' if detailed else 'false'}"
|
|
1421
1425
|
response = self.session.get(url, headers=self._headers)
|
|
1422
1426
|
if response.status_code != 200:
|
|
1423
1427
|
raise RuntimeError(
|
|
@@ -292,9 +292,11 @@ class GradioInterface:
|
|
|
292
292
|
max_tokens = gr.Slider(
|
|
293
293
|
minimum=1,
|
|
294
294
|
maximum=self.context_length,
|
|
295
|
-
value=
|
|
296
|
-
|
|
297
|
-
|
|
295
|
+
value=(
|
|
296
|
+
512
|
|
297
|
+
if "reasoning" not in self.model_ability
|
|
298
|
+
else self.context_length // 2
|
|
299
|
+
),
|
|
298
300
|
step=1,
|
|
299
301
|
label="Max Tokens",
|
|
300
302
|
)
|
|
@@ -357,7 +359,7 @@ class GradioInterface:
|
|
|
357
359
|
if "content" not in delta:
|
|
358
360
|
continue
|
|
359
361
|
else:
|
|
360
|
-
response_content += delta["content"]
|
|
362
|
+
response_content += html.escape(delta["content"])
|
|
361
363
|
bot[-1][1] = response_content
|
|
362
364
|
yield history, bot
|
|
363
365
|
history.append(
|
|
@@ -221,6 +221,7 @@ class MediaInterface:
|
|
|
221
221
|
n: int,
|
|
222
222
|
size_width: int,
|
|
223
223
|
size_height: int,
|
|
224
|
+
guidance_scale: int,
|
|
224
225
|
num_inference_steps: int,
|
|
225
226
|
padding_image_to_multiple: int,
|
|
226
227
|
sampler_name: Optional[str] = None,
|
|
@@ -237,6 +238,7 @@ class MediaInterface:
|
|
|
237
238
|
size = f"{int(size_width)}*{int(size_height)}"
|
|
238
239
|
else:
|
|
239
240
|
size = None
|
|
241
|
+
guidance_scale = None if guidance_scale == -1 else guidance_scale # type: ignore
|
|
240
242
|
num_inference_steps = (
|
|
241
243
|
None if num_inference_steps == -1 else num_inference_steps # type: ignore
|
|
242
244
|
)
|
|
@@ -262,6 +264,7 @@ class MediaInterface:
|
|
|
262
264
|
size=size,
|
|
263
265
|
response_format="b64_json",
|
|
264
266
|
num_inference_steps=num_inference_steps,
|
|
267
|
+
guidance_scale=guidance_scale,
|
|
265
268
|
padding_image_to_multiple=padding_image_to_multiple,
|
|
266
269
|
sampler_name=sampler_name,
|
|
267
270
|
)
|
|
@@ -314,6 +317,7 @@ class MediaInterface:
|
|
|
314
317
|
size_height = gr.Number(label="Height", value=-1)
|
|
315
318
|
|
|
316
319
|
with gr.Row():
|
|
320
|
+
guidance_scale = gr.Number(label="Guidance scale", value=-1)
|
|
317
321
|
num_inference_steps = gr.Number(
|
|
318
322
|
label="Inference Step Number", value=-1
|
|
319
323
|
)
|
|
@@ -341,6 +345,7 @@ class MediaInterface:
|
|
|
341
345
|
n,
|
|
342
346
|
size_width,
|
|
343
347
|
size_height,
|
|
348
|
+
guidance_scale,
|
|
344
349
|
num_inference_steps,
|
|
345
350
|
padding_image_to_multiple,
|
|
346
351
|
sampler_name,
|
xinference/core/model.py
CHANGED
|
@@ -51,7 +51,6 @@ if TYPE_CHECKING:
|
|
|
51
51
|
from .progress_tracker import ProgressTrackerActor
|
|
52
52
|
from .worker import WorkerActor
|
|
53
53
|
from ..model.llm.core import LLM
|
|
54
|
-
from ..model.core import ModelDescription
|
|
55
54
|
import PIL
|
|
56
55
|
|
|
57
56
|
import logging
|
|
@@ -225,7 +224,6 @@ class ModelActor(xo.StatelessActor, CancelMixin):
|
|
|
225
224
|
worker_address: str,
|
|
226
225
|
model: "LLM",
|
|
227
226
|
replica_model_uid: str,
|
|
228
|
-
model_description: Optional["ModelDescription"] = None,
|
|
229
227
|
request_limits: Optional[int] = None,
|
|
230
228
|
xavier_config: Optional[Dict] = None,
|
|
231
229
|
n_worker: Optional[int] = 1,
|
|
@@ -244,9 +242,7 @@ class ModelActor(xo.StatelessActor, CancelMixin):
|
|
|
244
242
|
self._worker_address = worker_address
|
|
245
243
|
self._replica_model_uid = replica_model_uid
|
|
246
244
|
self._model = model
|
|
247
|
-
self._model_description = (
|
|
248
|
-
model_description.to_dict() if model_description else {}
|
|
249
|
-
)
|
|
245
|
+
self._model_description = self._model.model_family.to_description()
|
|
250
246
|
self._request_limits = (
|
|
251
247
|
float("inf") if request_limits is None else request_limits
|
|
252
248
|
)
|
xinference/core/supervisor.py
CHANGED
|
@@ -62,13 +62,13 @@ from .utils import (
|
|
|
62
62
|
)
|
|
63
63
|
|
|
64
64
|
if TYPE_CHECKING:
|
|
65
|
-
from ..model.audio import
|
|
66
|
-
from ..model.embedding import
|
|
65
|
+
from ..model.audio import AudioModelFamilyV2
|
|
66
|
+
from ..model.embedding import EmbeddingModelFamilyV2
|
|
67
67
|
from ..model.flexible import FlexibleModelSpec
|
|
68
|
-
from ..model.image import
|
|
69
|
-
from ..model.llm import
|
|
70
|
-
from ..model.rerank import
|
|
71
|
-
from ..model.video import
|
|
68
|
+
from ..model.image import ImageModelFamilyV2
|
|
69
|
+
from ..model.llm import LLMFamilyV2
|
|
70
|
+
from ..model.rerank import RerankModelFamilyV2
|
|
71
|
+
from ..model.video import VideoModelFamilyV2
|
|
72
72
|
from .worker import WorkerActor
|
|
73
73
|
|
|
74
74
|
|
|
@@ -94,9 +94,9 @@ class WorkerStatus:
|
|
|
94
94
|
class ReplicaInfo:
|
|
95
95
|
replica: int
|
|
96
96
|
scheduler: Iterator
|
|
97
|
-
replica_to_worker_refs: DefaultDict[
|
|
98
|
-
|
|
99
|
-
|
|
97
|
+
replica_to_worker_refs: DefaultDict[int, List[xo.ActorRefType["WorkerActor"]]] = (
|
|
98
|
+
field(default_factory=lambda: defaultdict(list))
|
|
99
|
+
)
|
|
100
100
|
|
|
101
101
|
|
|
102
102
|
class SupervisorActor(xo.StatelessActor):
|
|
@@ -144,10 +144,12 @@ class SupervisorActor(xo.StatelessActor):
|
|
|
144
144
|
from .progress_tracker import ProgressTrackerActor
|
|
145
145
|
from .status_guard import StatusGuardActor
|
|
146
146
|
|
|
147
|
-
self._status_guard_ref: xo.ActorRefType[ # type: ignore
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
147
|
+
self._status_guard_ref: xo.ActorRefType["StatusGuardActor"] = ( # type: ignore
|
|
148
|
+
await xo.create_actor(
|
|
149
|
+
StatusGuardActor,
|
|
150
|
+
address=self.address,
|
|
151
|
+
uid=StatusGuardActor.default_uid(),
|
|
152
|
+
)
|
|
151
153
|
)
|
|
152
154
|
self._cache_tracker_ref: xo.ActorRefType[ # type: ignore
|
|
153
155
|
"CacheTrackerActor"
|
|
@@ -173,14 +175,14 @@ class SupervisorActor(xo.StatelessActor):
|
|
|
173
175
|
)
|
|
174
176
|
|
|
175
177
|
from ..model.audio import (
|
|
176
|
-
|
|
178
|
+
CustomAudioModelFamilyV2,
|
|
177
179
|
generate_audio_description,
|
|
178
180
|
get_audio_model_descriptions,
|
|
179
181
|
register_audio,
|
|
180
182
|
unregister_audio,
|
|
181
183
|
)
|
|
182
184
|
from ..model.embedding import (
|
|
183
|
-
|
|
185
|
+
CustomEmbeddingModelFamilyV2,
|
|
184
186
|
generate_embedding_description,
|
|
185
187
|
get_embedding_model_descriptions,
|
|
186
188
|
register_embedding,
|
|
@@ -194,21 +196,21 @@ class SupervisorActor(xo.StatelessActor):
|
|
|
194
196
|
unregister_flexible_model,
|
|
195
197
|
)
|
|
196
198
|
from ..model.image import (
|
|
197
|
-
|
|
199
|
+
CustomImageModelFamilyV2,
|
|
198
200
|
generate_image_description,
|
|
199
201
|
get_image_model_descriptions,
|
|
200
202
|
register_image,
|
|
201
203
|
unregister_image,
|
|
202
204
|
)
|
|
203
205
|
from ..model.llm import (
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
206
|
+
CustomLLMFamilyV2,
|
|
207
|
+
generate_llm_version_info,
|
|
208
|
+
get_llm_version_infos,
|
|
207
209
|
register_llm,
|
|
208
210
|
unregister_llm,
|
|
209
211
|
)
|
|
210
212
|
from ..model.rerank import (
|
|
211
|
-
|
|
213
|
+
CustomRerankModelFamilyV2,
|
|
212
214
|
generate_rerank_description,
|
|
213
215
|
get_rerank_model_descriptions,
|
|
214
216
|
register_rerank,
|
|
@@ -217,31 +219,31 @@ class SupervisorActor(xo.StatelessActor):
|
|
|
217
219
|
|
|
218
220
|
self._custom_register_type_to_cls: Dict[str, Tuple] = { # type: ignore
|
|
219
221
|
"LLM": (
|
|
220
|
-
|
|
222
|
+
CustomLLMFamilyV2,
|
|
221
223
|
register_llm,
|
|
222
224
|
unregister_llm,
|
|
223
|
-
|
|
225
|
+
generate_llm_version_info,
|
|
224
226
|
),
|
|
225
227
|
"embedding": (
|
|
226
|
-
|
|
228
|
+
CustomEmbeddingModelFamilyV2,
|
|
227
229
|
register_embedding,
|
|
228
230
|
unregister_embedding,
|
|
229
231
|
generate_embedding_description,
|
|
230
232
|
),
|
|
231
233
|
"rerank": (
|
|
232
|
-
|
|
234
|
+
CustomRerankModelFamilyV2,
|
|
233
235
|
register_rerank,
|
|
234
236
|
unregister_rerank,
|
|
235
237
|
generate_rerank_description,
|
|
236
238
|
),
|
|
237
239
|
"image": (
|
|
238
|
-
|
|
240
|
+
CustomImageModelFamilyV2,
|
|
239
241
|
register_image,
|
|
240
242
|
unregister_image,
|
|
241
243
|
generate_image_description,
|
|
242
244
|
),
|
|
243
245
|
"audio": (
|
|
244
|
-
|
|
246
|
+
CustomAudioModelFamilyV2,
|
|
245
247
|
register_audio,
|
|
246
248
|
unregister_audio,
|
|
247
249
|
generate_audio_description,
|
|
@@ -256,7 +258,7 @@ class SupervisorActor(xo.StatelessActor):
|
|
|
256
258
|
|
|
257
259
|
# record model version
|
|
258
260
|
model_version_infos: Dict[str, List[Dict]] = {} # type: ignore
|
|
259
|
-
model_version_infos.update(
|
|
261
|
+
model_version_infos.update(get_llm_version_infos())
|
|
260
262
|
model_version_infos.update(get_embedding_model_descriptions())
|
|
261
263
|
model_version_infos.update(get_rerank_model_descriptions())
|
|
262
264
|
model_version_infos.update(get_image_model_descriptions())
|
|
@@ -405,9 +407,9 @@ class SupervisorActor(xo.StatelessActor):
|
|
|
405
407
|
}
|
|
406
408
|
|
|
407
409
|
async def _to_llm_reg(
|
|
408
|
-
self, llm_family: "
|
|
410
|
+
self, llm_family: "LLMFamilyV2", is_builtin: bool
|
|
409
411
|
) -> Dict[str, Any]:
|
|
410
|
-
from ..model.llm import
|
|
412
|
+
from ..model.llm.cache_manager import LLMCacheManager
|
|
411
413
|
|
|
412
414
|
instance_cnt = await self.get_instance_count(llm_family.model_name)
|
|
413
415
|
version_cnt = await self.get_model_version_count(llm_family.model_name)
|
|
@@ -415,9 +417,17 @@ class SupervisorActor(xo.StatelessActor):
|
|
|
415
417
|
if self.is_local_deployment():
|
|
416
418
|
specs = []
|
|
417
419
|
# TODO: does not work when the supervisor and worker are running on separate nodes.
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
420
|
+
_llm_family = llm_family.copy()
|
|
421
|
+
for spec in [
|
|
422
|
+
_spec
|
|
423
|
+
for _spec in llm_family.model_specs
|
|
424
|
+
if _spec.model_hub == "huggingface"
|
|
425
|
+
]:
|
|
426
|
+
_llm_family.model_specs = [spec]
|
|
427
|
+
cache_manager = LLMCacheManager(_llm_family)
|
|
428
|
+
specs.append(
|
|
429
|
+
{**spec.dict(), "cache_status": cache_manager.get_cache_status()}
|
|
430
|
+
)
|
|
421
431
|
res = {**llm_family.dict(), "is_builtin": is_builtin, "model_specs": specs}
|
|
422
432
|
else:
|
|
423
433
|
res = {**llm_family.dict(), "is_builtin": is_builtin}
|
|
@@ -426,24 +436,37 @@ class SupervisorActor(xo.StatelessActor):
|
|
|
426
436
|
return res
|
|
427
437
|
|
|
428
438
|
async def _to_embedding_model_reg(
|
|
429
|
-
self,
|
|
439
|
+
self, model_family: "EmbeddingModelFamilyV2", is_builtin: bool
|
|
430
440
|
) -> Dict[str, Any]:
|
|
431
|
-
from ..model.embedding import
|
|
441
|
+
from ..model.embedding.cache_manager import EmbeddingCacheManager
|
|
432
442
|
|
|
433
|
-
instance_cnt = await self.get_instance_count(
|
|
434
|
-
version_cnt = await self.get_model_version_count(
|
|
443
|
+
instance_cnt = await self.get_instance_count(model_family.model_name)
|
|
444
|
+
version_cnt = await self.get_model_version_count(model_family.model_name)
|
|
435
445
|
|
|
436
446
|
if self.is_local_deployment():
|
|
447
|
+
_family = model_family.copy()
|
|
448
|
+
specs = []
|
|
437
449
|
# TODO: does not work when the supervisor and worker are running on separate nodes.
|
|
438
|
-
|
|
450
|
+
for spec in [
|
|
451
|
+
x for x in model_family.model_specs if x.model_hub == "huggingface"
|
|
452
|
+
]:
|
|
453
|
+
_family.model_specs = [spec]
|
|
454
|
+
specs.append(
|
|
455
|
+
{
|
|
456
|
+
**spec.dict(),
|
|
457
|
+
"cache_status": EmbeddingCacheManager(
|
|
458
|
+
_family
|
|
459
|
+
).get_cache_status(),
|
|
460
|
+
}
|
|
461
|
+
)
|
|
439
462
|
res = {
|
|
440
|
-
**
|
|
441
|
-
"cache_status": cache_status,
|
|
463
|
+
**model_family.dict(),
|
|
442
464
|
"is_builtin": is_builtin,
|
|
465
|
+
"model_specs": specs,
|
|
443
466
|
}
|
|
444
467
|
else:
|
|
445
468
|
res = {
|
|
446
|
-
**
|
|
469
|
+
**model_family.dict(),
|
|
447
470
|
"is_builtin": is_builtin,
|
|
448
471
|
}
|
|
449
472
|
res["model_version_count"] = version_cnt
|
|
@@ -451,16 +474,17 @@ class SupervisorActor(xo.StatelessActor):
|
|
|
451
474
|
return res
|
|
452
475
|
|
|
453
476
|
async def _to_rerank_model_reg(
|
|
454
|
-
self, model_spec: "
|
|
477
|
+
self, model_spec: "RerankModelFamilyV2", is_builtin: bool
|
|
455
478
|
) -> Dict[str, Any]:
|
|
456
|
-
from ..model.
|
|
479
|
+
from ..model.cache_manager import CacheManager
|
|
457
480
|
|
|
458
481
|
instance_cnt = await self.get_instance_count(model_spec.model_name)
|
|
459
482
|
version_cnt = await self.get_model_version_count(model_spec.model_name)
|
|
483
|
+
cache_manager = CacheManager(model_spec)
|
|
460
484
|
|
|
461
485
|
if self.is_local_deployment():
|
|
462
486
|
# TODO: does not work when the supervisor and worker are running on separate nodes.
|
|
463
|
-
cache_status = get_cache_status(
|
|
487
|
+
cache_status = cache_manager.get_cache_status()
|
|
464
488
|
res = {
|
|
465
489
|
**model_spec.dict(),
|
|
466
490
|
"cache_status": cache_status,
|
|
@@ -476,19 +500,19 @@ class SupervisorActor(xo.StatelessActor):
|
|
|
476
500
|
return res
|
|
477
501
|
|
|
478
502
|
async def _to_image_model_reg(
|
|
479
|
-
self, model_family: "
|
|
503
|
+
self, model_family: "ImageModelFamilyV2", is_builtin: bool
|
|
480
504
|
) -> Dict[str, Any]:
|
|
481
|
-
from ..model.image import
|
|
505
|
+
from ..model.image.cache_manager import ImageCacheManager
|
|
482
506
|
|
|
483
507
|
instance_cnt = await self.get_instance_count(model_family.model_name)
|
|
484
508
|
version_cnt = await self.get_model_version_count(model_family.model_name)
|
|
485
509
|
|
|
486
510
|
if self.is_local_deployment():
|
|
487
511
|
# TODO: does not work when the supervisor and worker are running on separate nodes.
|
|
488
|
-
|
|
512
|
+
cache_manager = ImageCacheManager(model_family)
|
|
489
513
|
res = {
|
|
490
514
|
**model_family.dict(),
|
|
491
|
-
"cache_status":
|
|
515
|
+
"cache_status": cache_manager.get_cache_status(),
|
|
492
516
|
"is_builtin": is_builtin,
|
|
493
517
|
}
|
|
494
518
|
else:
|
|
@@ -501,19 +525,19 @@ class SupervisorActor(xo.StatelessActor):
|
|
|
501
525
|
return res
|
|
502
526
|
|
|
503
527
|
async def _to_audio_model_reg(
|
|
504
|
-
self, model_family: "
|
|
528
|
+
self, model_family: "AudioModelFamilyV2", is_builtin: bool
|
|
505
529
|
) -> Dict[str, Any]:
|
|
506
|
-
from ..model.
|
|
530
|
+
from ..model.cache_manager import CacheManager
|
|
507
531
|
|
|
508
532
|
instance_cnt = await self.get_instance_count(model_family.model_name)
|
|
509
533
|
version_cnt = await self.get_model_version_count(model_family.model_name)
|
|
534
|
+
cache_manager = CacheManager(model_family)
|
|
510
535
|
|
|
511
536
|
if self.is_local_deployment():
|
|
512
537
|
# TODO: does not work when the supervisor and worker are running on separate nodes.
|
|
513
|
-
cache_status = get_cache_status(model_family)
|
|
514
538
|
res = {
|
|
515
539
|
**model_family.dict(),
|
|
516
|
-
"cache_status":
|
|
540
|
+
"cache_status": cache_manager.get_cache_status(),
|
|
517
541
|
"is_builtin": is_builtin,
|
|
518
542
|
}
|
|
519
543
|
else:
|
|
@@ -526,19 +550,19 @@ class SupervisorActor(xo.StatelessActor):
|
|
|
526
550
|
return res
|
|
527
551
|
|
|
528
552
|
async def _to_video_model_reg(
|
|
529
|
-
self, model_family: "
|
|
553
|
+
self, model_family: "VideoModelFamilyV2", is_builtin: bool
|
|
530
554
|
) -> Dict[str, Any]:
|
|
531
|
-
from ..model.
|
|
555
|
+
from ..model.cache_manager import CacheManager
|
|
532
556
|
|
|
533
557
|
instance_cnt = await self.get_instance_count(model_family.model_name)
|
|
534
558
|
version_cnt = await self.get_model_version_count(model_family.model_name)
|
|
559
|
+
cache_manager = CacheManager(model_family)
|
|
535
560
|
|
|
536
561
|
if self.is_local_deployment():
|
|
537
562
|
# TODO: does not work when the supervisor and worker are running on separate nodes.
|
|
538
|
-
cache_status = get_cache_status(model_family)
|
|
539
563
|
res = {
|
|
540
564
|
**model_family.dict(),
|
|
541
|
-
"cache_status":
|
|
565
|
+
"cache_status": cache_manager.get_cache_status(),
|
|
542
566
|
"is_builtin": is_builtin,
|
|
543
567
|
}
|
|
544
568
|
else:
|
|
@@ -630,8 +654,9 @@ class SupervisorActor(xo.StatelessActor):
|
|
|
630
654
|
from ..model.image import BUILTIN_IMAGE_MODELS
|
|
631
655
|
from ..model.image.custom import get_user_defined_images
|
|
632
656
|
|
|
633
|
-
for model_name,
|
|
657
|
+
for model_name, families in BUILTIN_IMAGE_MODELS.items():
|
|
634
658
|
if detailed:
|
|
659
|
+
family = [x for x in families if x.model_hub == "huggingface"][0]
|
|
635
660
|
ret.append(await self._to_image_model_reg(family, is_builtin=True))
|
|
636
661
|
else:
|
|
637
662
|
ret.append({"model_name": model_name, "is_builtin": True})
|
|
@@ -652,8 +677,9 @@ class SupervisorActor(xo.StatelessActor):
|
|
|
652
677
|
from ..model.audio import BUILTIN_AUDIO_MODELS
|
|
653
678
|
from ..model.audio.custom import get_user_defined_audios
|
|
654
679
|
|
|
655
|
-
for model_name,
|
|
680
|
+
for model_name, families in BUILTIN_AUDIO_MODELS.items():
|
|
656
681
|
if detailed:
|
|
682
|
+
family = [x for x in families if x.model_hub == "huggingface"][0]
|
|
657
683
|
ret.append(await self._to_audio_model_reg(family, is_builtin=True))
|
|
658
684
|
else:
|
|
659
685
|
ret.append({"model_name": model_name, "is_builtin": True})
|
|
@@ -673,8 +699,9 @@ class SupervisorActor(xo.StatelessActor):
|
|
|
673
699
|
elif model_type == "video":
|
|
674
700
|
from ..model.video import BUILTIN_VIDEO_MODELS
|
|
675
701
|
|
|
676
|
-
for model_name,
|
|
702
|
+
for model_name, families in BUILTIN_VIDEO_MODELS.items():
|
|
677
703
|
if detailed:
|
|
704
|
+
family = [x for x in families if x.model_hub == "huggingface"][0]
|
|
678
705
|
ret.append(await self._to_video_model_reg(family, is_builtin=True))
|
|
679
706
|
else:
|
|
680
707
|
ret.append({"model_name": model_name, "is_builtin": True})
|
|
@@ -685,8 +712,9 @@ class SupervisorActor(xo.StatelessActor):
|
|
|
685
712
|
from ..model.rerank import BUILTIN_RERANK_MODELS
|
|
686
713
|
from ..model.rerank.custom import get_user_defined_reranks
|
|
687
714
|
|
|
688
|
-
for model_name,
|
|
715
|
+
for model_name, families in BUILTIN_RERANK_MODELS.items():
|
|
689
716
|
if detailed:
|
|
717
|
+
family = [x for x in families if x.model_hub == "huggingface"][0]
|
|
690
718
|
ret.append(await self._to_rerank_model_reg(family, is_builtin=True))
|
|
691
719
|
else:
|
|
692
720
|
ret.append({"model_name": model_name, "is_builtin": True})
|
|
@@ -755,25 +783,46 @@ class SupervisorActor(xo.StatelessActor):
|
|
|
755
783
|
from ..model.image import BUILTIN_IMAGE_MODELS
|
|
756
784
|
from ..model.image.custom import get_user_defined_images
|
|
757
785
|
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
786
|
+
if model_name in BUILTIN_IMAGE_MODELS:
|
|
787
|
+
return [
|
|
788
|
+
x
|
|
789
|
+
for x in BUILTIN_IMAGE_MODELS[model_name]
|
|
790
|
+
if x.model_hub == "huggingface"
|
|
791
|
+
][0]
|
|
792
|
+
else:
|
|
793
|
+
for f in get_user_defined_images():
|
|
794
|
+
if f.model_name == model_name:
|
|
795
|
+
return f
|
|
761
796
|
raise ValueError(f"Model {model_name} not found")
|
|
762
797
|
elif model_type == "audio":
|
|
763
798
|
from ..model.audio import BUILTIN_AUDIO_MODELS
|
|
764
799
|
from ..model.audio.custom import get_user_defined_audios
|
|
765
800
|
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
801
|
+
if model_name in BUILTIN_AUDIO_MODELS:
|
|
802
|
+
return [
|
|
803
|
+
x
|
|
804
|
+
for x in BUILTIN_AUDIO_MODELS[model_name]
|
|
805
|
+
if x.model_hub == "huggingface"
|
|
806
|
+
][0]
|
|
807
|
+
else:
|
|
808
|
+
for f in get_user_defined_audios():
|
|
809
|
+
if f.model_name == model_name:
|
|
810
|
+
return f
|
|
769
811
|
raise ValueError(f"Model {model_name} not found")
|
|
770
812
|
elif model_type == "rerank":
|
|
771
813
|
from ..model.rerank import BUILTIN_RERANK_MODELS
|
|
772
814
|
from ..model.rerank.custom import get_user_defined_reranks
|
|
773
815
|
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
816
|
+
if model_name in BUILTIN_RERANK_MODELS:
|
|
817
|
+
return [
|
|
818
|
+
x
|
|
819
|
+
for x in BUILTIN_RERANK_MODELS[model_name]
|
|
820
|
+
if x.model_hub == "huggingface"
|
|
821
|
+
][0]
|
|
822
|
+
else:
|
|
823
|
+
for f in get_user_defined_reranks():
|
|
824
|
+
if f.model_name == model_name:
|
|
825
|
+
return f
|
|
777
826
|
raise ValueError(f"Model {model_name} not found")
|
|
778
827
|
elif model_type == "flexible":
|
|
779
828
|
from ..model.flexible import get_flexible_models
|