xinference 0.12.3__py3-none-any.whl → 0.13.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +56 -8
- xinference/client/restful/restful_client.py +49 -4
- xinference/core/model.py +36 -4
- xinference/core/scheduler.py +2 -0
- xinference/core/supervisor.py +132 -15
- xinference/core/worker.py +239 -53
- xinference/deploy/cmdline.py +5 -0
- xinference/deploy/utils.py +33 -2
- xinference/model/audio/chattts.py +6 -6
- xinference/model/audio/core.py +23 -15
- xinference/model/core.py +12 -3
- xinference/model/embedding/core.py +25 -16
- xinference/model/flexible/__init__.py +40 -0
- xinference/model/flexible/core.py +228 -0
- xinference/model/flexible/launchers/__init__.py +15 -0
- xinference/model/flexible/launchers/transformers_launcher.py +63 -0
- xinference/model/flexible/utils.py +33 -0
- xinference/model/image/core.py +18 -14
- xinference/model/image/custom.py +1 -1
- xinference/model/llm/__init__.py +5 -2
- xinference/model/llm/core.py +3 -2
- xinference/model/llm/ggml/llamacpp.py +1 -10
- xinference/model/llm/llm_family.json +292 -36
- xinference/model/llm/llm_family.py +102 -53
- xinference/model/llm/llm_family_modelscope.json +247 -27
- xinference/model/llm/mlx/__init__.py +13 -0
- xinference/model/llm/mlx/core.py +408 -0
- xinference/model/llm/pytorch/chatglm.py +2 -9
- xinference/model/llm/pytorch/cogvlm2.py +206 -21
- xinference/model/llm/pytorch/core.py +213 -120
- xinference/model/llm/pytorch/glm4v.py +171 -15
- xinference/model/llm/pytorch/qwen_vl.py +168 -7
- xinference/model/llm/pytorch/utils.py +53 -62
- xinference/model/llm/utils.py +28 -7
- xinference/model/rerank/core.py +29 -25
- xinference/thirdparty/deepseek_vl/serve/__init__.py +13 -0
- xinference/thirdparty/deepseek_vl/serve/app_deepseek.py +510 -0
- xinference/thirdparty/deepseek_vl/serve/app_modules/__init__.py +13 -0
- xinference/thirdparty/deepseek_vl/serve/app_modules/gradio_utils.py +94 -0
- xinference/thirdparty/deepseek_vl/serve/app_modules/overwrites.py +81 -0
- xinference/thirdparty/deepseek_vl/serve/app_modules/presets.py +96 -0
- xinference/thirdparty/deepseek_vl/serve/app_modules/utils.py +229 -0
- xinference/thirdparty/deepseek_vl/serve/inference.py +170 -0
- xinference/types.py +0 -1
- xinference/web/ui/build/asset-manifest.json +3 -3
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/js/main.95c1d652.js +3 -0
- xinference/web/ui/build/static/js/main.95c1d652.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/07ce9e632e6aff24d7aa3ad8e48224433bbfeb0d633fca723453f1fcae0c9f1c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/1130403f9e46f5738a23b45ac59b57de8f360c908c713e2c0670c2cce9bd367a.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/1444c41a4d04494f1cbc2d8c1537df107b451cb569cb2c1fbf5159f3a4841a5f.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/2c63090c842376cdd368c3ded88a333ef40d94785747651343040a6f7872a223.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/40f17338fc75ae095de7d2b4d8eae0d5ca0193a7e2bcece4ee745b22a7a2f4b7.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/5262556baf9207738bf6a8ba141ec6599d0a636345c245d61fdf88d3171998cb.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/6450605fac003812485f6251b9f0caafbf2e5bfc3bbe2f000050d9e2fdb8dcd3.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/709711edada3f1596b309d571285fd31f1c364d66f4425bc28723d0088cc351a.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/70fa8c07463a5fe57c68bf92502910105a8f647371836fe8c3a7408246ca7ba0.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/8a9742ddd8ba8546ef42dc14caca443f2b4524fabed7bf269e0eff3b7b64ee7d.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/9375a35b05d56989b2755bf72161fa707c92f28569d33765a75f91a568fda6e9.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d6c643278a0b28320e6f33a60f5fb64c053997cbdc39a60e53ccc574688ade9e.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d93730e2b5d7e8c957b4d0965d2ed1dac9045a649adbd47c220d11f255d4b1e0.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/e656dc00b4d8b387f0a81ba8fc558767df1601c66369e2eb86a5ef27cf080572.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f3e02274cb1964e99b1fe69cbb6db233d3d8d7dd05d50ebcdb8e66d50b224b7b.json +1 -0
- {xinference-0.12.3.dist-info → xinference-0.13.1.dist-info}/METADATA +10 -11
- {xinference-0.12.3.dist-info → xinference-0.13.1.dist-info}/RECORD +71 -69
- xinference/model/llm/ggml/chatglm.py +0 -457
- xinference/thirdparty/ChatTTS/__init__.py +0 -1
- xinference/thirdparty/ChatTTS/core.py +0 -200
- xinference/thirdparty/ChatTTS/experimental/__init__.py +0 -0
- xinference/thirdparty/ChatTTS/experimental/llm.py +0 -40
- xinference/thirdparty/ChatTTS/infer/__init__.py +0 -0
- xinference/thirdparty/ChatTTS/infer/api.py +0 -125
- xinference/thirdparty/ChatTTS/model/__init__.py +0 -0
- xinference/thirdparty/ChatTTS/model/dvae.py +0 -155
- xinference/thirdparty/ChatTTS/model/gpt.py +0 -265
- xinference/thirdparty/ChatTTS/utils/__init__.py +0 -0
- xinference/thirdparty/ChatTTS/utils/gpu_utils.py +0 -23
- xinference/thirdparty/ChatTTS/utils/infer_utils.py +0 -141
- xinference/thirdparty/ChatTTS/utils/io_utils.py +0 -14
- xinference/web/ui/build/static/js/main.77dd47c3.js +0 -3
- xinference/web/ui/build/static/js/main.77dd47c3.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/0cd591866aa345566e0b63fb51ff2043e163a770af6fdc2f3bad395d046353e2.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/37c1476717199863bbba1530e3513a9368f8f73001b75b4a85c2075956308027.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/3da7d55e87882a4af923e187b1351160e34ca102f589086439c15131a227fb6e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/3fa1f69162f9c6dc0f6a6e21b64d49d6b8e6fa8dfa59a82cf829931c5f97d99f.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/46edc1fe657dfedb2e673148332bb442c6eb98f09f2592c389209e376510afa5.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/62e257ed9016471035fa1a7da57c9e2a4250974ed566b4d1295873d747c68eb2.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/72bcecc71c5267250edeb89608859d449b586f13ff9923a5e70e7172976ec403.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/82db357f3fd5b32215d747ee593f69ff06c95ad6cde37f71a96c8290aaab64c0.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/935efd2867664c58230378fdf2ff1ea85e58d853b7214014e20dfbca8dab7b05.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/bc6da27195ec4607bb472bf61f97c928ad4966fa64e4c2247661bedb7400abba.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/c2abe75f04ad82fba68f35ed9cbe2e287762c876684fddccccfa73f739489b65.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/e606671420d2937102c3c34b4b04056c11736408c1d3347b8cf42dfe61fb394b.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f118f99c22b713c678c1209c4e1dd43fe86e3f6e801a4c0c35d3bbf41fd05fe6.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f51bf63ddaa7afd125ef2254a105789333eecc1c94fdf5157a9b88ef7ad0a5bd.json +0 -1
- /xinference/web/ui/build/static/js/{main.77dd47c3.js.LICENSE.txt → main.95c1d652.js.LICENSE.txt} +0 -0
- {xinference-0.12.3.dist-info → xinference-0.13.1.dist-info}/LICENSE +0 -0
- {xinference-0.12.3.dist-info → xinference-0.13.1.dist-info}/WHEEL +0 -0
- {xinference-0.12.3.dist-info → xinference-0.13.1.dist-info}/entry_points.txt +0 -0
- {xinference-0.12.3.dist-info → xinference-0.13.1.dist-info}/top_level.txt +0 -0
xinference/core/worker.py
CHANGED
|
@@ -22,7 +22,7 @@ import threading
|
|
|
22
22
|
import time
|
|
23
23
|
from collections import defaultdict
|
|
24
24
|
from logging import getLogger
|
|
25
|
-
from typing import Any, Dict, List, Optional, Set, Tuple, Union
|
|
25
|
+
from typing import Any, Dict, List, Literal, Optional, Set, Tuple, Union
|
|
26
26
|
|
|
27
27
|
import xoscar as xo
|
|
28
28
|
from async_timeout import timeout
|
|
@@ -73,6 +73,9 @@ class WorkerActor(xo.StatelessActor):
|
|
|
73
73
|
self._main_pool.recover_sub_pool = self.recover_sub_pool
|
|
74
74
|
|
|
75
75
|
# internal states.
|
|
76
|
+
# temporary placeholder during model launch process:
|
|
77
|
+
self._model_uid_launching_guard: Dict[str, bool] = {}
|
|
78
|
+
# attributes maintained after model launched:
|
|
76
79
|
self._model_uid_to_model: Dict[str, xo.ActorRefType["ModelActor"]] = {}
|
|
77
80
|
self._model_uid_to_model_spec: Dict[str, ModelDescription] = {}
|
|
78
81
|
self._gpu_to_model_uid: Dict[int, str] = {}
|
|
@@ -209,48 +212,81 @@ class WorkerActor(xo.StatelessActor):
|
|
|
209
212
|
|
|
210
213
|
from ..model.audio import (
|
|
211
214
|
CustomAudioModelFamilyV1,
|
|
215
|
+
generate_audio_description,
|
|
212
216
|
get_audio_model_descriptions,
|
|
213
217
|
register_audio,
|
|
214
218
|
unregister_audio,
|
|
215
219
|
)
|
|
216
220
|
from ..model.embedding import (
|
|
217
221
|
CustomEmbeddingModelSpec,
|
|
222
|
+
generate_embedding_description,
|
|
218
223
|
get_embedding_model_descriptions,
|
|
219
224
|
register_embedding,
|
|
220
225
|
unregister_embedding,
|
|
221
226
|
)
|
|
227
|
+
from ..model.flexible import (
|
|
228
|
+
FlexibleModelSpec,
|
|
229
|
+
get_flexible_model_descriptions,
|
|
230
|
+
register_flexible_model,
|
|
231
|
+
unregister_flexible_model,
|
|
232
|
+
)
|
|
222
233
|
from ..model.image import (
|
|
223
234
|
CustomImageModelFamilyV1,
|
|
235
|
+
generate_image_description,
|
|
224
236
|
get_image_model_descriptions,
|
|
225
237
|
register_image,
|
|
226
238
|
unregister_image,
|
|
227
239
|
)
|
|
228
240
|
from ..model.llm import (
|
|
229
241
|
CustomLLMFamilyV1,
|
|
242
|
+
generate_llm_description,
|
|
230
243
|
get_llm_model_descriptions,
|
|
231
244
|
register_llm,
|
|
232
245
|
unregister_llm,
|
|
233
246
|
)
|
|
234
247
|
from ..model.rerank import (
|
|
235
248
|
CustomRerankModelSpec,
|
|
249
|
+
generate_rerank_description,
|
|
236
250
|
get_rerank_model_descriptions,
|
|
237
251
|
register_rerank,
|
|
238
252
|
unregister_rerank,
|
|
239
253
|
)
|
|
240
254
|
|
|
241
255
|
self._custom_register_type_to_cls: Dict[str, Tuple] = { # type: ignore
|
|
242
|
-
"LLM": (
|
|
256
|
+
"LLM": (
|
|
257
|
+
CustomLLMFamilyV1,
|
|
258
|
+
register_llm,
|
|
259
|
+
unregister_llm,
|
|
260
|
+
generate_llm_description,
|
|
261
|
+
),
|
|
243
262
|
"embedding": (
|
|
244
263
|
CustomEmbeddingModelSpec,
|
|
245
264
|
register_embedding,
|
|
246
265
|
unregister_embedding,
|
|
266
|
+
generate_embedding_description,
|
|
267
|
+
),
|
|
268
|
+
"rerank": (
|
|
269
|
+
CustomRerankModelSpec,
|
|
270
|
+
register_rerank,
|
|
271
|
+
unregister_rerank,
|
|
272
|
+
generate_rerank_description,
|
|
247
273
|
),
|
|
248
|
-
"rerank": (CustomRerankModelSpec, register_rerank, unregister_rerank),
|
|
249
|
-
"audio": (CustomAudioModelFamilyV1, register_audio, unregister_audio),
|
|
250
274
|
"image": (
|
|
251
275
|
CustomImageModelFamilyV1,
|
|
252
276
|
register_image,
|
|
253
277
|
unregister_image,
|
|
278
|
+
generate_image_description,
|
|
279
|
+
),
|
|
280
|
+
"audio": (
|
|
281
|
+
CustomAudioModelFamilyV1,
|
|
282
|
+
register_audio,
|
|
283
|
+
unregister_audio,
|
|
284
|
+
generate_audio_description,
|
|
285
|
+
),
|
|
286
|
+
"flexible": (
|
|
287
|
+
FlexibleModelSpec,
|
|
288
|
+
register_flexible_model,
|
|
289
|
+
unregister_flexible_model,
|
|
254
290
|
),
|
|
255
291
|
}
|
|
256
292
|
|
|
@@ -261,6 +297,7 @@ class WorkerActor(xo.StatelessActor):
|
|
|
261
297
|
model_version_infos.update(get_rerank_model_descriptions())
|
|
262
298
|
model_version_infos.update(get_image_model_descriptions())
|
|
263
299
|
model_version_infos.update(get_audio_model_descriptions())
|
|
300
|
+
model_version_infos.update(get_flexible_model_descriptions())
|
|
264
301
|
await self._cache_tracker_ref.record_model_version(
|
|
265
302
|
model_version_infos, self.address
|
|
266
303
|
)
|
|
@@ -511,17 +548,23 @@ class WorkerActor(xo.StatelessActor):
|
|
|
511
548
|
raise ValueError(f"{model_name} model can't run on Darwin system.")
|
|
512
549
|
|
|
513
550
|
@log_sync(logger=logger)
|
|
514
|
-
def register_model(self, model_type: str, model: str, persist: bool):
|
|
551
|
+
async def register_model(self, model_type: str, model: str, persist: bool):
|
|
515
552
|
# TODO: centralized model registrations
|
|
516
553
|
if model_type in self._custom_register_type_to_cls:
|
|
517
554
|
(
|
|
518
555
|
model_spec_cls,
|
|
519
556
|
register_fn,
|
|
520
557
|
unregister_fn,
|
|
558
|
+
generate_fn,
|
|
521
559
|
) = self._custom_register_type_to_cls[model_type]
|
|
522
560
|
model_spec = model_spec_cls.parse_raw(model)
|
|
523
561
|
try:
|
|
524
562
|
register_fn(model_spec, persist)
|
|
563
|
+
await self._cache_tracker_ref.record_model_version(
|
|
564
|
+
generate_fn(model_spec), self.address
|
|
565
|
+
)
|
|
566
|
+
except ValueError as e:
|
|
567
|
+
raise e
|
|
525
568
|
except Exception as e:
|
|
526
569
|
unregister_fn(model_spec.model_name, raise_error=False)
|
|
527
570
|
raise e
|
|
@@ -529,14 +572,127 @@ class WorkerActor(xo.StatelessActor):
|
|
|
529
572
|
raise ValueError(f"Unsupported model type: {model_type}")
|
|
530
573
|
|
|
531
574
|
@log_sync(logger=logger)
|
|
532
|
-
def unregister_model(self, model_type: str, model_name: str):
|
|
575
|
+
async def unregister_model(self, model_type: str, model_name: str):
|
|
533
576
|
# TODO: centralized model registrations
|
|
534
577
|
if model_type in self._custom_register_type_to_cls:
|
|
535
|
-
_, _, unregister_fn = self._custom_register_type_to_cls[model_type]
|
|
536
|
-
unregister_fn(model_name)
|
|
578
|
+
_, _, unregister_fn, _ = self._custom_register_type_to_cls[model_type]
|
|
579
|
+
unregister_fn(model_name, False)
|
|
580
|
+
else:
|
|
581
|
+
raise ValueError(f"Unsupported model type: {model_type}")
|
|
582
|
+
|
|
583
|
+
@log_async(logger=logger)
|
|
584
|
+
async def list_model_registrations(
|
|
585
|
+
self, model_type: str, detailed: bool = False
|
|
586
|
+
) -> List[Dict[str, Any]]:
|
|
587
|
+
def sort_helper(item):
|
|
588
|
+
assert isinstance(item["model_name"], str)
|
|
589
|
+
return item.get("model_name").lower()
|
|
590
|
+
|
|
591
|
+
if model_type == "LLM":
|
|
592
|
+
from ..model.llm import get_user_defined_llm_families
|
|
593
|
+
|
|
594
|
+
ret = []
|
|
595
|
+
|
|
596
|
+
for family in get_user_defined_llm_families():
|
|
597
|
+
ret.append({"model_name": family.model_name, "is_builtin": False})
|
|
598
|
+
|
|
599
|
+
ret.sort(key=sort_helper)
|
|
600
|
+
return ret
|
|
601
|
+
elif model_type == "embedding":
|
|
602
|
+
from ..model.embedding.custom import get_user_defined_embeddings
|
|
603
|
+
|
|
604
|
+
ret = []
|
|
605
|
+
|
|
606
|
+
for model_spec in get_user_defined_embeddings():
|
|
607
|
+
ret.append({"model_name": model_spec.model_name, "is_builtin": False})
|
|
608
|
+
|
|
609
|
+
ret.sort(key=sort_helper)
|
|
610
|
+
return ret
|
|
611
|
+
elif model_type == "image":
|
|
612
|
+
from ..model.image.custom import get_user_defined_images
|
|
613
|
+
|
|
614
|
+
ret = []
|
|
615
|
+
|
|
616
|
+
for model_spec in get_user_defined_images():
|
|
617
|
+
ret.append({"model_name": model_spec.model_name, "is_builtin": False})
|
|
618
|
+
|
|
619
|
+
ret.sort(key=sort_helper)
|
|
620
|
+
return ret
|
|
621
|
+
elif model_type == "audio":
|
|
622
|
+
from ..model.audio.custom import get_user_defined_audios
|
|
623
|
+
|
|
624
|
+
ret = []
|
|
625
|
+
|
|
626
|
+
for model_spec in get_user_defined_audios():
|
|
627
|
+
ret.append({"model_name": model_spec.model_name, "is_builtin": False})
|
|
628
|
+
|
|
629
|
+
ret.sort(key=sort_helper)
|
|
630
|
+
return ret
|
|
631
|
+
elif model_type == "rerank":
|
|
632
|
+
from ..model.rerank.custom import get_user_defined_reranks
|
|
633
|
+
|
|
634
|
+
ret = []
|
|
635
|
+
|
|
636
|
+
for model_spec in get_user_defined_reranks():
|
|
637
|
+
ret.append({"model_name": model_spec.model_name, "is_builtin": False})
|
|
638
|
+
|
|
639
|
+
ret.sort(key=sort_helper)
|
|
640
|
+
return ret
|
|
537
641
|
else:
|
|
538
642
|
raise ValueError(f"Unsupported model type: {model_type}")
|
|
539
643
|
|
|
644
|
+
@log_sync(logger=logger)
|
|
645
|
+
async def get_model_registration(self, model_type: str, model_name: str) -> Any:
|
|
646
|
+
if model_type == "LLM":
|
|
647
|
+
from ..model.llm import get_user_defined_llm_families
|
|
648
|
+
|
|
649
|
+
for f in get_user_defined_llm_families():
|
|
650
|
+
if f.model_name == model_name:
|
|
651
|
+
return f
|
|
652
|
+
elif model_type == "embedding":
|
|
653
|
+
from ..model.embedding.custom import get_user_defined_embeddings
|
|
654
|
+
|
|
655
|
+
for f in get_user_defined_embeddings():
|
|
656
|
+
if f.model_name == model_name:
|
|
657
|
+
return f
|
|
658
|
+
elif model_type == "image":
|
|
659
|
+
from ..model.image.custom import get_user_defined_images
|
|
660
|
+
|
|
661
|
+
for f in get_user_defined_images():
|
|
662
|
+
if f.model_name == model_name:
|
|
663
|
+
return f
|
|
664
|
+
elif model_type == "audio":
|
|
665
|
+
from ..model.audio.custom import get_user_defined_audios
|
|
666
|
+
|
|
667
|
+
for f in get_user_defined_audios():
|
|
668
|
+
if f.model_name == model_name:
|
|
669
|
+
return f
|
|
670
|
+
elif model_type == "rerank":
|
|
671
|
+
from ..model.rerank.custom import get_user_defined_reranks
|
|
672
|
+
|
|
673
|
+
for f in get_user_defined_reranks():
|
|
674
|
+
if f.model_name == model_name:
|
|
675
|
+
return f
|
|
676
|
+
return None
|
|
677
|
+
|
|
678
|
+
@log_async(logger=logger)
|
|
679
|
+
async def query_engines_by_model_name(self, model_name: str):
|
|
680
|
+
from copy import deepcopy
|
|
681
|
+
|
|
682
|
+
from ..model.llm.llm_family import LLM_ENGINES
|
|
683
|
+
|
|
684
|
+
if model_name not in LLM_ENGINES:
|
|
685
|
+
return None
|
|
686
|
+
|
|
687
|
+
# filter llm_class
|
|
688
|
+
engine_params = deepcopy(LLM_ENGINES[model_name])
|
|
689
|
+
for engine in engine_params:
|
|
690
|
+
params = engine_params[engine]
|
|
691
|
+
for param in params:
|
|
692
|
+
del param["llm_class"]
|
|
693
|
+
|
|
694
|
+
return engine_params
|
|
695
|
+
|
|
540
696
|
async def _get_model_ability(self, model: Any, model_type: str) -> List[str]:
|
|
541
697
|
from ..model.llm.core import LLM
|
|
542
698
|
|
|
@@ -548,6 +704,8 @@ class WorkerActor(xo.StatelessActor):
|
|
|
548
704
|
return ["text_to_image"]
|
|
549
705
|
elif model_type == "audio":
|
|
550
706
|
return ["audio_to_text"]
|
|
707
|
+
elif model_type == "flexible":
|
|
708
|
+
return ["flexible"]
|
|
551
709
|
else:
|
|
552
710
|
assert model_type == "LLM"
|
|
553
711
|
assert isinstance(model, LLM)
|
|
@@ -584,6 +742,7 @@ class WorkerActor(xo.StatelessActor):
|
|
|
584
742
|
peft_model_config: Optional[PeftModelConfig] = None,
|
|
585
743
|
request_limits: Optional[int] = None,
|
|
586
744
|
gpu_idx: Optional[Union[int, List[int]]] = None,
|
|
745
|
+
download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
|
|
587
746
|
**kwargs,
|
|
588
747
|
):
|
|
589
748
|
# !!! Note that The following code must be placed at the very beginning of this function,
|
|
@@ -594,10 +753,14 @@ class WorkerActor(xo.StatelessActor):
|
|
|
594
753
|
launch_args.pop("kwargs")
|
|
595
754
|
launch_args.update(kwargs)
|
|
596
755
|
|
|
597
|
-
|
|
756
|
+
try:
|
|
757
|
+
origin_uid, _, _ = parse_replica_model_uid(model_uid)
|
|
758
|
+
except Exception as e:
|
|
759
|
+
logger.exception(e)
|
|
760
|
+
raise
|
|
598
761
|
try:
|
|
599
762
|
await self._event_collector_ref.report_event(
|
|
600
|
-
|
|
763
|
+
origin_uid,
|
|
601
764
|
Event(
|
|
602
765
|
event_type=EventType.INFO,
|
|
603
766
|
event_ts=int(time.time()),
|
|
@@ -640,50 +803,56 @@ class WorkerActor(xo.StatelessActor):
|
|
|
640
803
|
assert model_uid not in self._model_uid_to_model
|
|
641
804
|
self._check_model_is_valid(model_name, model_format)
|
|
642
805
|
|
|
643
|
-
|
|
644
|
-
model_uid
|
|
645
|
-
)
|
|
806
|
+
if self.get_model_launch_status(model_uid) is not None:
|
|
807
|
+
raise ValueError(f"{model_uid} is running")
|
|
646
808
|
|
|
647
809
|
try:
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
subpool_address,
|
|
652
|
-
devices,
|
|
653
|
-
model_uid,
|
|
654
|
-
model_type,
|
|
655
|
-
model_name,
|
|
656
|
-
model_engine,
|
|
657
|
-
model_format,
|
|
658
|
-
model_size_in_billions,
|
|
659
|
-
quantization,
|
|
660
|
-
peft_model_config,
|
|
661
|
-
**kwargs,
|
|
662
|
-
)
|
|
663
|
-
await self.update_cache_status(model_name, model_description)
|
|
664
|
-
model_ref = await xo.create_actor(
|
|
665
|
-
ModelActor,
|
|
666
|
-
address=subpool_address,
|
|
667
|
-
uid=model_uid,
|
|
668
|
-
worker_address=self.address,
|
|
669
|
-
model=model,
|
|
670
|
-
model_description=model_description,
|
|
671
|
-
request_limits=request_limits,
|
|
810
|
+
self._model_uid_launching_guard[model_uid] = True
|
|
811
|
+
subpool_address, devices = await self._create_subpool(
|
|
812
|
+
model_uid, model_type, n_gpu=n_gpu, gpu_idx=gpu_idx
|
|
672
813
|
)
|
|
673
|
-
await model_ref.load()
|
|
674
|
-
except:
|
|
675
|
-
logger.error(f"Failed to load model {model_uid}", exc_info=True)
|
|
676
|
-
self.release_devices(model_uid=model_uid)
|
|
677
|
-
await self._main_pool.remove_sub_pool(subpool_address)
|
|
678
|
-
raise
|
|
679
814
|
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
815
|
+
try:
|
|
816
|
+
model, model_description = await asyncio.to_thread(
|
|
817
|
+
create_model_instance,
|
|
818
|
+
subpool_address,
|
|
819
|
+
devices,
|
|
820
|
+
model_uid,
|
|
821
|
+
model_type,
|
|
822
|
+
model_name,
|
|
823
|
+
model_engine,
|
|
824
|
+
model_format,
|
|
825
|
+
model_size_in_billions,
|
|
826
|
+
quantization,
|
|
827
|
+
peft_model_config,
|
|
828
|
+
download_hub,
|
|
829
|
+
**kwargs,
|
|
830
|
+
)
|
|
831
|
+
await self.update_cache_status(model_name, model_description)
|
|
832
|
+
model_ref = await xo.create_actor(
|
|
833
|
+
ModelActor,
|
|
834
|
+
address=subpool_address,
|
|
835
|
+
uid=model_uid,
|
|
836
|
+
worker_address=self.address,
|
|
837
|
+
model=model,
|
|
838
|
+
model_description=model_description,
|
|
839
|
+
request_limits=request_limits,
|
|
840
|
+
)
|
|
841
|
+
await model_ref.load()
|
|
842
|
+
except:
|
|
843
|
+
logger.error(f"Failed to load model {model_uid}", exc_info=True)
|
|
844
|
+
self.release_devices(model_uid=model_uid)
|
|
845
|
+
await self._main_pool.remove_sub_pool(subpool_address)
|
|
846
|
+
raise
|
|
847
|
+
self._model_uid_to_model[model_uid] = model_ref
|
|
848
|
+
self._model_uid_to_model_spec[model_uid] = model_description
|
|
849
|
+
self._model_uid_to_addr[model_uid] = subpool_address
|
|
850
|
+
self._model_uid_to_recover_count.setdefault(
|
|
851
|
+
model_uid, MODEL_ACTOR_AUTO_RECOVER_LIMIT
|
|
852
|
+
)
|
|
853
|
+
self._model_uid_to_launch_args[model_uid] = launch_args
|
|
854
|
+
finally:
|
|
855
|
+
del self._model_uid_launching_guard[model_uid]
|
|
687
856
|
|
|
688
857
|
# update status to READY
|
|
689
858
|
abilities = await self._get_model_ability(model, model_type)
|
|
@@ -694,10 +863,13 @@ class WorkerActor(xo.StatelessActor):
|
|
|
694
863
|
|
|
695
864
|
@log_async(logger=logger)
|
|
696
865
|
async def terminate_model(self, model_uid: str):
|
|
697
|
-
|
|
866
|
+
# Terminate model while its launching is not allow
|
|
867
|
+
if model_uid in self._model_uid_launching_guard:
|
|
868
|
+
raise ValueError(f"{model_uid} is launching")
|
|
869
|
+
origin_uid, _, __ = parse_replica_model_uid(model_uid)
|
|
698
870
|
try:
|
|
699
871
|
await self._event_collector_ref.report_event(
|
|
700
|
-
|
|
872
|
+
origin_uid,
|
|
701
873
|
Event(
|
|
702
874
|
event_type=EventType.INFO,
|
|
703
875
|
event_ts=int(time.time()),
|
|
@@ -708,7 +880,6 @@ class WorkerActor(xo.StatelessActor):
|
|
|
708
880
|
# Report callback error can be log and ignore, should not interrupt the Process
|
|
709
881
|
logger.error("report_event error: %s" % (e))
|
|
710
882
|
|
|
711
|
-
origin_uid, _, _ = parse_replica_model_uid(model_uid)
|
|
712
883
|
await self._status_guard_ref.update_instance_info(
|
|
713
884
|
origin_uid, {"status": LaunchStatus.TERMINATING.name}
|
|
714
885
|
)
|
|
@@ -740,6 +911,21 @@ class WorkerActor(xo.StatelessActor):
|
|
|
740
911
|
origin_uid, {"status": LaunchStatus.TERMINATED.name}
|
|
741
912
|
)
|
|
742
913
|
|
|
914
|
+
# Provide an interface for future version of supervisor to call
|
|
915
|
+
def get_model_launch_status(self, model_uid: str) -> Optional[str]:
|
|
916
|
+
"""
|
|
917
|
+
returns:
|
|
918
|
+
CREATING: model is launching
|
|
919
|
+
RREADY: model is running
|
|
920
|
+
None: model is not running (launch error might have happened)
|
|
921
|
+
"""
|
|
922
|
+
|
|
923
|
+
if model_uid in self._model_uid_launching_guard:
|
|
924
|
+
return LaunchStatus.CREATING.name
|
|
925
|
+
if model_uid in self._model_uid_to_model:
|
|
926
|
+
return LaunchStatus.READY.name
|
|
927
|
+
return None
|
|
928
|
+
|
|
743
929
|
@log_async(logger=logger)
|
|
744
930
|
async def list_models(self) -> Dict[str, Dict[str, Any]]:
|
|
745
931
|
ret = {}
|
xinference/deploy/cmdline.py
CHANGED
|
@@ -370,6 +370,9 @@ def worker(
|
|
|
370
370
|
help="Type of model to register (default is 'LLM').",
|
|
371
371
|
)
|
|
372
372
|
@click.option("--file", "-f", type=str, help="Path to the model configuration file.")
|
|
373
|
+
@click.option(
|
|
374
|
+
"--worker-ip", "-w", type=str, help="Specify the ip address of the worker."
|
|
375
|
+
)
|
|
373
376
|
@click.option(
|
|
374
377
|
"--persist",
|
|
375
378
|
"-p",
|
|
@@ -387,6 +390,7 @@ def register_model(
|
|
|
387
390
|
endpoint: Optional[str],
|
|
388
391
|
model_type: str,
|
|
389
392
|
file: str,
|
|
393
|
+
worker_ip: str,
|
|
390
394
|
persist: bool,
|
|
391
395
|
api_key: Optional[str],
|
|
392
396
|
):
|
|
@@ -400,6 +404,7 @@ def register_model(
|
|
|
400
404
|
client.register_model(
|
|
401
405
|
model_type=model_type,
|
|
402
406
|
model=model,
|
|
407
|
+
worker_ip=worker_ip,
|
|
403
408
|
persist=persist,
|
|
404
409
|
)
|
|
405
410
|
|
xinference/deploy/utils.py
CHANGED
|
@@ -79,6 +79,12 @@ def get_config_dict(
|
|
|
79
79
|
"stream": "ext://sys.stderr",
|
|
80
80
|
"filters": ["logger_name_filter"],
|
|
81
81
|
},
|
|
82
|
+
"console_handler": {
|
|
83
|
+
"class": "logging.StreamHandler",
|
|
84
|
+
"formatter": "formatter",
|
|
85
|
+
"level": log_level,
|
|
86
|
+
"stream": "ext://sys.stderr",
|
|
87
|
+
},
|
|
82
88
|
"file_handler": {
|
|
83
89
|
"class": "logging.handlers.RotatingFileHandler",
|
|
84
90
|
"formatter": "formatter",
|
|
@@ -95,7 +101,32 @@ def get_config_dict(
|
|
|
95
101
|
"handlers": ["stream_handler", "file_handler"],
|
|
96
102
|
"level": log_level,
|
|
97
103
|
"propagate": False,
|
|
98
|
-
}
|
|
104
|
+
},
|
|
105
|
+
"uvicorn": {
|
|
106
|
+
"handlers": ["stream_handler", "file_handler"],
|
|
107
|
+
"level": log_level,
|
|
108
|
+
"propagate": False,
|
|
109
|
+
},
|
|
110
|
+
"uvicorn.error": {
|
|
111
|
+
"handlers": ["stream_handler", "file_handler"],
|
|
112
|
+
"level": log_level,
|
|
113
|
+
"propagate": False,
|
|
114
|
+
},
|
|
115
|
+
"uvicorn.access": {
|
|
116
|
+
"handlers": ["stream_handler", "file_handler"],
|
|
117
|
+
"level": log_level,
|
|
118
|
+
"propagate": False,
|
|
119
|
+
},
|
|
120
|
+
"transformers": {
|
|
121
|
+
"handlers": ["console_handler", "file_handler"],
|
|
122
|
+
"level": log_level,
|
|
123
|
+
"propagate": False,
|
|
124
|
+
},
|
|
125
|
+
"vllm": {
|
|
126
|
+
"handlers": ["console_handler", "file_handler"],
|
|
127
|
+
"level": log_level,
|
|
128
|
+
"propagate": False,
|
|
129
|
+
},
|
|
99
130
|
},
|
|
100
131
|
"root": {
|
|
101
132
|
"level": "WARN",
|
|
@@ -127,7 +158,7 @@ def health_check(address: str, max_attempts: int, sleep_interval: int = 3) -> bo
|
|
|
127
158
|
while attempts < max_attempts:
|
|
128
159
|
time.sleep(sleep_interval)
|
|
129
160
|
try:
|
|
130
|
-
from
|
|
161
|
+
from ..core.supervisor import SupervisorActor
|
|
131
162
|
|
|
132
163
|
supervisor_ref: xo.ActorRefType[SupervisorActor] = await xo.actor_ref( # type: ignore
|
|
133
164
|
address=address, uid=SupervisorActor.uid()
|
|
@@ -38,21 +38,19 @@ class ChatTTSModel:
|
|
|
38
38
|
self._kwargs = kwargs
|
|
39
39
|
|
|
40
40
|
def load(self):
|
|
41
|
+
import ChatTTS
|
|
41
42
|
import torch
|
|
42
43
|
|
|
43
|
-
from xinference.thirdparty import ChatTTS
|
|
44
|
-
|
|
45
44
|
torch._dynamo.config.cache_size_limit = 64
|
|
46
45
|
torch._dynamo.config.suppress_errors = True
|
|
47
46
|
torch.set_float32_matmul_precision("high")
|
|
48
47
|
self._model = ChatTTS.Chat()
|
|
49
|
-
self._model.
|
|
50
|
-
source="local", local_path=self._model_path, compile=True
|
|
51
|
-
)
|
|
48
|
+
self._model.load(source="custom", custom_path=self._model_path, compile=True)
|
|
52
49
|
|
|
53
50
|
def speech(
|
|
54
51
|
self, input: str, voice: str, response_format: str = "mp3", speed: float = 1.0
|
|
55
52
|
):
|
|
53
|
+
import ChatTTS
|
|
56
54
|
import numpy as np
|
|
57
55
|
import torch
|
|
58
56
|
import torchaudio
|
|
@@ -71,7 +69,9 @@ class ChatTTSModel:
|
|
|
71
69
|
|
|
72
70
|
default = 5
|
|
73
71
|
infer_speed = int(default * speed)
|
|
74
|
-
params_infer_code =
|
|
72
|
+
params_infer_code = ChatTTS.Chat.InferCodeParams(
|
|
73
|
+
prompt=f"[speed_{infer_speed}]", spk_emb=rnd_spk_emb
|
|
74
|
+
)
|
|
75
75
|
|
|
76
76
|
assert self._model is not None
|
|
77
77
|
wavs = self._model.infer([input], params_infer_code=params_infer_code)
|
xinference/model/audio/core.py
CHANGED
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
import logging
|
|
15
15
|
import os
|
|
16
16
|
from collections import defaultdict
|
|
17
|
-
from typing import Dict, List, Optional, Tuple, Union
|
|
17
|
+
from typing import Dict, List, Literal, Optional, Tuple, Union
|
|
18
18
|
|
|
19
19
|
from ...constants import XINFERENCE_CACHE_DIR
|
|
20
20
|
from ..core import CacheableModelSpec, ModelDescription
|
|
@@ -94,7 +94,10 @@ def generate_audio_description(
|
|
|
94
94
|
return res
|
|
95
95
|
|
|
96
96
|
|
|
97
|
-
def match_audio(
|
|
97
|
+
def match_audio(
|
|
98
|
+
model_name: str,
|
|
99
|
+
download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
|
|
100
|
+
) -> AudioModelFamilyV1:
|
|
98
101
|
from ..utils import download_from_modelscope
|
|
99
102
|
from . import BUILTIN_AUDIO_MODELS, MODELSCOPE_AUDIO_MODELS
|
|
100
103
|
from .custom import get_user_defined_audios
|
|
@@ -103,17 +106,17 @@ def match_audio(model_name: str) -> AudioModelFamilyV1:
|
|
|
103
106
|
if model_spec.model_name == model_name:
|
|
104
107
|
return model_spec
|
|
105
108
|
|
|
106
|
-
if
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
109
|
+
if download_hub == "huggingface" and model_name in BUILTIN_AUDIO_MODELS:
|
|
110
|
+
logger.debug(f"Audio model {model_name} found in huggingface.")
|
|
111
|
+
return BUILTIN_AUDIO_MODELS[model_name]
|
|
112
|
+
elif download_hub == "modelscope" and model_name in MODELSCOPE_AUDIO_MODELS:
|
|
113
|
+
logger.debug(f"Audio model {model_name} found in ModelScope.")
|
|
114
|
+
return MODELSCOPE_AUDIO_MODELS[model_name]
|
|
115
|
+
elif download_from_modelscope() and model_name in MODELSCOPE_AUDIO_MODELS:
|
|
116
|
+
logger.debug(f"Audio model {model_name} found in ModelScope.")
|
|
117
|
+
return MODELSCOPE_AUDIO_MODELS[model_name]
|
|
118
|
+
elif model_name in BUILTIN_AUDIO_MODELS:
|
|
119
|
+
logger.debug(f"Audio model {model_name} found in huggingface.")
|
|
117
120
|
return BUILTIN_AUDIO_MODELS[model_name]
|
|
118
121
|
else:
|
|
119
122
|
raise ValueError(
|
|
@@ -141,9 +144,14 @@ def get_cache_status(
|
|
|
141
144
|
|
|
142
145
|
|
|
143
146
|
def create_audio_model_instance(
|
|
144
|
-
subpool_addr: str,
|
|
147
|
+
subpool_addr: str,
|
|
148
|
+
devices: List[str],
|
|
149
|
+
model_uid: str,
|
|
150
|
+
model_name: str,
|
|
151
|
+
download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
|
|
152
|
+
**kwargs,
|
|
145
153
|
) -> Tuple[Union[WhisperModel, ChatTTSModel], AudioModelDescription]:
|
|
146
|
-
model_spec = match_audio(model_name)
|
|
154
|
+
model_spec = match_audio(model_name, download_hub)
|
|
147
155
|
model_path = cache(model_spec)
|
|
148
156
|
model: Union[WhisperModel, ChatTTSModel]
|
|
149
157
|
if model_spec.model_family == "whisper":
|
xinference/model/core.py
CHANGED
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
from abc import ABC, abstractmethod
|
|
16
|
-
from typing import Any, List, Optional, Tuple, Union
|
|
16
|
+
from typing import Any, List, Literal, Optional, Tuple, Union
|
|
17
17
|
|
|
18
18
|
from .._compat import BaseModel
|
|
19
19
|
from ..types import PeftModelConfig
|
|
@@ -55,10 +55,12 @@ def create_model_instance(
|
|
|
55
55
|
model_size_in_billions: Optional[Union[int, str]] = None,
|
|
56
56
|
quantization: Optional[str] = None,
|
|
57
57
|
peft_model_config: Optional[PeftModelConfig] = None,
|
|
58
|
+
download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
|
|
58
59
|
**kwargs,
|
|
59
60
|
) -> Tuple[Any, ModelDescription]:
|
|
60
61
|
from .audio.core import create_audio_model_instance
|
|
61
62
|
from .embedding.core import create_embedding_model_instance
|
|
63
|
+
from .flexible.core import create_flexible_model_instance
|
|
62
64
|
from .image.core import create_image_model_instance
|
|
63
65
|
from .llm.core import create_llm_model_instance
|
|
64
66
|
from .rerank.core import create_rerank_model_instance
|
|
@@ -74,13 +76,14 @@ def create_model_instance(
|
|
|
74
76
|
model_size_in_billions,
|
|
75
77
|
quantization,
|
|
76
78
|
peft_model_config,
|
|
79
|
+
download_hub,
|
|
77
80
|
**kwargs,
|
|
78
81
|
)
|
|
79
82
|
elif model_type == "embedding":
|
|
80
83
|
# embedding model doesn't accept trust_remote_code
|
|
81
84
|
kwargs.pop("trust_remote_code", None)
|
|
82
85
|
return create_embedding_model_instance(
|
|
83
|
-
subpool_addr, devices, model_uid, model_name, **kwargs
|
|
86
|
+
subpool_addr, devices, model_uid, model_name, download_hub, **kwargs
|
|
84
87
|
)
|
|
85
88
|
elif model_type == "image":
|
|
86
89
|
kwargs.pop("trust_remote_code", None)
|
|
@@ -90,16 +93,22 @@ def create_model_instance(
|
|
|
90
93
|
model_uid,
|
|
91
94
|
model_name,
|
|
92
95
|
peft_model_config,
|
|
96
|
+
download_hub,
|
|
93
97
|
**kwargs,
|
|
94
98
|
)
|
|
95
99
|
elif model_type == "rerank":
|
|
96
100
|
kwargs.pop("trust_remote_code", None)
|
|
97
101
|
return create_rerank_model_instance(
|
|
98
|
-
subpool_addr, devices, model_uid, model_name, **kwargs
|
|
102
|
+
subpool_addr, devices, model_uid, model_name, download_hub, **kwargs
|
|
99
103
|
)
|
|
100
104
|
elif model_type == "audio":
|
|
101
105
|
kwargs.pop("trust_remote_code", None)
|
|
102
106
|
return create_audio_model_instance(
|
|
107
|
+
subpool_addr, devices, model_uid, model_name, download_hub, **kwargs
|
|
108
|
+
)
|
|
109
|
+
elif model_type == "flexible":
|
|
110
|
+
kwargs.pop("trust_remote_code", None)
|
|
111
|
+
return create_flexible_model_instance(
|
|
103
112
|
subpool_addr, devices, model_uid, model_name, **kwargs
|
|
104
113
|
)
|
|
105
114
|
else:
|