xinference 0.13.0__py3-none-any.whl → 0.13.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +50 -2
- xinference/client/restful/restful_client.py +49 -2
- xinference/core/model.py +15 -0
- xinference/core/supervisor.py +132 -15
- xinference/core/worker.py +165 -8
- xinference/deploy/cmdline.py +5 -0
- xinference/model/audio/chattts.py +6 -6
- xinference/model/audio/core.py +23 -15
- xinference/model/core.py +12 -3
- xinference/model/embedding/core.py +25 -16
- xinference/model/flexible/__init__.py +40 -0
- xinference/model/flexible/core.py +228 -0
- xinference/model/flexible/launchers/__init__.py +15 -0
- xinference/model/flexible/launchers/transformers_launcher.py +63 -0
- xinference/model/flexible/utils.py +33 -0
- xinference/model/image/core.py +18 -14
- xinference/model/image/custom.py +1 -1
- xinference/model/llm/__init__.py +0 -2
- xinference/model/llm/core.py +3 -2
- xinference/model/llm/ggml/llamacpp.py +1 -10
- xinference/model/llm/llm_family.json +52 -35
- xinference/model/llm/llm_family.py +71 -46
- xinference/model/llm/llm_family_modelscope.json +55 -27
- xinference/model/llm/pytorch/core.py +0 -80
- xinference/model/llm/utils.py +4 -2
- xinference/model/rerank/core.py +24 -25
- xinference/types.py +0 -1
- xinference/web/ui/build/asset-manifest.json +3 -3
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/js/{main.0fb6f3ab.js → main.95c1d652.js} +3 -3
- xinference/web/ui/build/static/js/main.95c1d652.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/07ce9e632e6aff24d7aa3ad8e48224433bbfeb0d633fca723453f1fcae0c9f1c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/40f17338fc75ae095de7d2b4d8eae0d5ca0193a7e2bcece4ee745b22a7a2f4b7.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/5262556baf9207738bf6a8ba141ec6599d0a636345c245d61fdf88d3171998cb.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/709711edada3f1596b309d571285fd31f1c364d66f4425bc28723d0088cc351a.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/70fa8c07463a5fe57c68bf92502910105a8f647371836fe8c3a7408246ca7ba0.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f3e02274cb1964e99b1fe69cbb6db233d3d8d7dd05d50ebcdb8e66d50b224b7b.json +1 -0
- {xinference-0.13.0.dist-info → xinference-0.13.1.dist-info}/METADATA +7 -11
- {xinference-0.13.0.dist-info → xinference-0.13.1.dist-info}/RECORD +45 -54
- xinference/model/llm/ggml/chatglm.py +0 -457
- xinference/thirdparty/ChatTTS/__init__.py +0 -1
- xinference/thirdparty/ChatTTS/core.py +0 -200
- xinference/thirdparty/ChatTTS/experimental/__init__.py +0 -0
- xinference/thirdparty/ChatTTS/experimental/llm.py +0 -40
- xinference/thirdparty/ChatTTS/infer/__init__.py +0 -0
- xinference/thirdparty/ChatTTS/infer/api.py +0 -125
- xinference/thirdparty/ChatTTS/model/__init__.py +0 -0
- xinference/thirdparty/ChatTTS/model/dvae.py +0 -155
- xinference/thirdparty/ChatTTS/model/gpt.py +0 -265
- xinference/thirdparty/ChatTTS/utils/__init__.py +0 -0
- xinference/thirdparty/ChatTTS/utils/gpu_utils.py +0 -23
- xinference/thirdparty/ChatTTS/utils/infer_utils.py +0 -141
- xinference/thirdparty/ChatTTS/utils/io_utils.py +0 -14
- xinference/web/ui/build/static/js/main.0fb6f3ab.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/0f6b391abec76271137faad13a3793fe7acc1024e8cd2269c147b653ecd3a73b.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/30a0c79d8025d6441eb75b2df5bc2750a14f30119c869ef02570d294dff65c2f.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/40486e655c3c5801f087e2cf206c0b5511aaa0dfdba78046b7181bf9c17e54c5.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/b5507cd57f16a3a230aa0128e39fe103e928de139ea29e2679e4c64dcbba3b3a.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/d779b915f83f9c7b5a72515b6932fdd114f1822cef90ae01cc0d12bca59abc2d.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/d87824cb266194447a9c0c69ebab2d507bfc3e3148976173760d18c035e9dd26.json +0 -1
- /xinference/web/ui/build/static/js/{main.0fb6f3ab.js.LICENSE.txt → main.95c1d652.js.LICENSE.txt} +0 -0
- {xinference-0.13.0.dist-info → xinference-0.13.1.dist-info}/LICENSE +0 -0
- {xinference-0.13.0.dist-info → xinference-0.13.1.dist-info}/WHEEL +0 -0
- {xinference-0.13.0.dist-info → xinference-0.13.1.dist-info}/entry_points.txt +0 -0
- {xinference-0.13.0.dist-info → xinference-0.13.1.dist-info}/top_level.txt +0 -0
xinference/core/worker.py
CHANGED
|
@@ -22,7 +22,7 @@ import threading
|
|
|
22
22
|
import time
|
|
23
23
|
from collections import defaultdict
|
|
24
24
|
from logging import getLogger
|
|
25
|
-
from typing import Any, Dict, List, Optional, Set, Tuple, Union
|
|
25
|
+
from typing import Any, Dict, List, Literal, Optional, Set, Tuple, Union
|
|
26
26
|
|
|
27
27
|
import xoscar as xo
|
|
28
28
|
from async_timeout import timeout
|
|
@@ -212,48 +212,81 @@ class WorkerActor(xo.StatelessActor):
|
|
|
212
212
|
|
|
213
213
|
from ..model.audio import (
|
|
214
214
|
CustomAudioModelFamilyV1,
|
|
215
|
+
generate_audio_description,
|
|
215
216
|
get_audio_model_descriptions,
|
|
216
217
|
register_audio,
|
|
217
218
|
unregister_audio,
|
|
218
219
|
)
|
|
219
220
|
from ..model.embedding import (
|
|
220
221
|
CustomEmbeddingModelSpec,
|
|
222
|
+
generate_embedding_description,
|
|
221
223
|
get_embedding_model_descriptions,
|
|
222
224
|
register_embedding,
|
|
223
225
|
unregister_embedding,
|
|
224
226
|
)
|
|
227
|
+
from ..model.flexible import (
|
|
228
|
+
FlexibleModelSpec,
|
|
229
|
+
get_flexible_model_descriptions,
|
|
230
|
+
register_flexible_model,
|
|
231
|
+
unregister_flexible_model,
|
|
232
|
+
)
|
|
225
233
|
from ..model.image import (
|
|
226
234
|
CustomImageModelFamilyV1,
|
|
235
|
+
generate_image_description,
|
|
227
236
|
get_image_model_descriptions,
|
|
228
237
|
register_image,
|
|
229
238
|
unregister_image,
|
|
230
239
|
)
|
|
231
240
|
from ..model.llm import (
|
|
232
241
|
CustomLLMFamilyV1,
|
|
242
|
+
generate_llm_description,
|
|
233
243
|
get_llm_model_descriptions,
|
|
234
244
|
register_llm,
|
|
235
245
|
unregister_llm,
|
|
236
246
|
)
|
|
237
247
|
from ..model.rerank import (
|
|
238
248
|
CustomRerankModelSpec,
|
|
249
|
+
generate_rerank_description,
|
|
239
250
|
get_rerank_model_descriptions,
|
|
240
251
|
register_rerank,
|
|
241
252
|
unregister_rerank,
|
|
242
253
|
)
|
|
243
254
|
|
|
244
255
|
self._custom_register_type_to_cls: Dict[str, Tuple] = { # type: ignore
|
|
245
|
-
"LLM": (
|
|
256
|
+
"LLM": (
|
|
257
|
+
CustomLLMFamilyV1,
|
|
258
|
+
register_llm,
|
|
259
|
+
unregister_llm,
|
|
260
|
+
generate_llm_description,
|
|
261
|
+
),
|
|
246
262
|
"embedding": (
|
|
247
263
|
CustomEmbeddingModelSpec,
|
|
248
264
|
register_embedding,
|
|
249
265
|
unregister_embedding,
|
|
266
|
+
generate_embedding_description,
|
|
267
|
+
),
|
|
268
|
+
"rerank": (
|
|
269
|
+
CustomRerankModelSpec,
|
|
270
|
+
register_rerank,
|
|
271
|
+
unregister_rerank,
|
|
272
|
+
generate_rerank_description,
|
|
250
273
|
),
|
|
251
|
-
"rerank": (CustomRerankModelSpec, register_rerank, unregister_rerank),
|
|
252
|
-
"audio": (CustomAudioModelFamilyV1, register_audio, unregister_audio),
|
|
253
274
|
"image": (
|
|
254
275
|
CustomImageModelFamilyV1,
|
|
255
276
|
register_image,
|
|
256
277
|
unregister_image,
|
|
278
|
+
generate_image_description,
|
|
279
|
+
),
|
|
280
|
+
"audio": (
|
|
281
|
+
CustomAudioModelFamilyV1,
|
|
282
|
+
register_audio,
|
|
283
|
+
unregister_audio,
|
|
284
|
+
generate_audio_description,
|
|
285
|
+
),
|
|
286
|
+
"flexible": (
|
|
287
|
+
FlexibleModelSpec,
|
|
288
|
+
register_flexible_model,
|
|
289
|
+
unregister_flexible_model,
|
|
257
290
|
),
|
|
258
291
|
}
|
|
259
292
|
|
|
@@ -264,6 +297,7 @@ class WorkerActor(xo.StatelessActor):
|
|
|
264
297
|
model_version_infos.update(get_rerank_model_descriptions())
|
|
265
298
|
model_version_infos.update(get_image_model_descriptions())
|
|
266
299
|
model_version_infos.update(get_audio_model_descriptions())
|
|
300
|
+
model_version_infos.update(get_flexible_model_descriptions())
|
|
267
301
|
await self._cache_tracker_ref.record_model_version(
|
|
268
302
|
model_version_infos, self.address
|
|
269
303
|
)
|
|
@@ -514,17 +548,23 @@ class WorkerActor(xo.StatelessActor):
|
|
|
514
548
|
raise ValueError(f"{model_name} model can't run on Darwin system.")
|
|
515
549
|
|
|
516
550
|
@log_sync(logger=logger)
|
|
517
|
-
def register_model(self, model_type: str, model: str, persist: bool):
|
|
551
|
+
async def register_model(self, model_type: str, model: str, persist: bool):
|
|
518
552
|
# TODO: centralized model registrations
|
|
519
553
|
if model_type in self._custom_register_type_to_cls:
|
|
520
554
|
(
|
|
521
555
|
model_spec_cls,
|
|
522
556
|
register_fn,
|
|
523
557
|
unregister_fn,
|
|
558
|
+
generate_fn,
|
|
524
559
|
) = self._custom_register_type_to_cls[model_type]
|
|
525
560
|
model_spec = model_spec_cls.parse_raw(model)
|
|
526
561
|
try:
|
|
527
562
|
register_fn(model_spec, persist)
|
|
563
|
+
await self._cache_tracker_ref.record_model_version(
|
|
564
|
+
generate_fn(model_spec), self.address
|
|
565
|
+
)
|
|
566
|
+
except ValueError as e:
|
|
567
|
+
raise e
|
|
528
568
|
except Exception as e:
|
|
529
569
|
unregister_fn(model_spec.model_name, raise_error=False)
|
|
530
570
|
raise e
|
|
@@ -532,14 +572,127 @@ class WorkerActor(xo.StatelessActor):
|
|
|
532
572
|
raise ValueError(f"Unsupported model type: {model_type}")
|
|
533
573
|
|
|
534
574
|
@log_sync(logger=logger)
|
|
535
|
-
def unregister_model(self, model_type: str, model_name: str):
|
|
575
|
+
async def unregister_model(self, model_type: str, model_name: str):
|
|
536
576
|
# TODO: centralized model registrations
|
|
537
577
|
if model_type in self._custom_register_type_to_cls:
|
|
538
|
-
_, _, unregister_fn = self._custom_register_type_to_cls[model_type]
|
|
539
|
-
unregister_fn(model_name)
|
|
578
|
+
_, _, unregister_fn, _ = self._custom_register_type_to_cls[model_type]
|
|
579
|
+
unregister_fn(model_name, False)
|
|
540
580
|
else:
|
|
541
581
|
raise ValueError(f"Unsupported model type: {model_type}")
|
|
542
582
|
|
|
583
|
+
@log_async(logger=logger)
|
|
584
|
+
async def list_model_registrations(
|
|
585
|
+
self, model_type: str, detailed: bool = False
|
|
586
|
+
) -> List[Dict[str, Any]]:
|
|
587
|
+
def sort_helper(item):
|
|
588
|
+
assert isinstance(item["model_name"], str)
|
|
589
|
+
return item.get("model_name").lower()
|
|
590
|
+
|
|
591
|
+
if model_type == "LLM":
|
|
592
|
+
from ..model.llm import get_user_defined_llm_families
|
|
593
|
+
|
|
594
|
+
ret = []
|
|
595
|
+
|
|
596
|
+
for family in get_user_defined_llm_families():
|
|
597
|
+
ret.append({"model_name": family.model_name, "is_builtin": False})
|
|
598
|
+
|
|
599
|
+
ret.sort(key=sort_helper)
|
|
600
|
+
return ret
|
|
601
|
+
elif model_type == "embedding":
|
|
602
|
+
from ..model.embedding.custom import get_user_defined_embeddings
|
|
603
|
+
|
|
604
|
+
ret = []
|
|
605
|
+
|
|
606
|
+
for model_spec in get_user_defined_embeddings():
|
|
607
|
+
ret.append({"model_name": model_spec.model_name, "is_builtin": False})
|
|
608
|
+
|
|
609
|
+
ret.sort(key=sort_helper)
|
|
610
|
+
return ret
|
|
611
|
+
elif model_type == "image":
|
|
612
|
+
from ..model.image.custom import get_user_defined_images
|
|
613
|
+
|
|
614
|
+
ret = []
|
|
615
|
+
|
|
616
|
+
for model_spec in get_user_defined_images():
|
|
617
|
+
ret.append({"model_name": model_spec.model_name, "is_builtin": False})
|
|
618
|
+
|
|
619
|
+
ret.sort(key=sort_helper)
|
|
620
|
+
return ret
|
|
621
|
+
elif model_type == "audio":
|
|
622
|
+
from ..model.audio.custom import get_user_defined_audios
|
|
623
|
+
|
|
624
|
+
ret = []
|
|
625
|
+
|
|
626
|
+
for model_spec in get_user_defined_audios():
|
|
627
|
+
ret.append({"model_name": model_spec.model_name, "is_builtin": False})
|
|
628
|
+
|
|
629
|
+
ret.sort(key=sort_helper)
|
|
630
|
+
return ret
|
|
631
|
+
elif model_type == "rerank":
|
|
632
|
+
from ..model.rerank.custom import get_user_defined_reranks
|
|
633
|
+
|
|
634
|
+
ret = []
|
|
635
|
+
|
|
636
|
+
for model_spec in get_user_defined_reranks():
|
|
637
|
+
ret.append({"model_name": model_spec.model_name, "is_builtin": False})
|
|
638
|
+
|
|
639
|
+
ret.sort(key=sort_helper)
|
|
640
|
+
return ret
|
|
641
|
+
else:
|
|
642
|
+
raise ValueError(f"Unsupported model type: {model_type}")
|
|
643
|
+
|
|
644
|
+
@log_sync(logger=logger)
|
|
645
|
+
async def get_model_registration(self, model_type: str, model_name: str) -> Any:
|
|
646
|
+
if model_type == "LLM":
|
|
647
|
+
from ..model.llm import get_user_defined_llm_families
|
|
648
|
+
|
|
649
|
+
for f in get_user_defined_llm_families():
|
|
650
|
+
if f.model_name == model_name:
|
|
651
|
+
return f
|
|
652
|
+
elif model_type == "embedding":
|
|
653
|
+
from ..model.embedding.custom import get_user_defined_embeddings
|
|
654
|
+
|
|
655
|
+
for f in get_user_defined_embeddings():
|
|
656
|
+
if f.model_name == model_name:
|
|
657
|
+
return f
|
|
658
|
+
elif model_type == "image":
|
|
659
|
+
from ..model.image.custom import get_user_defined_images
|
|
660
|
+
|
|
661
|
+
for f in get_user_defined_images():
|
|
662
|
+
if f.model_name == model_name:
|
|
663
|
+
return f
|
|
664
|
+
elif model_type == "audio":
|
|
665
|
+
from ..model.audio.custom import get_user_defined_audios
|
|
666
|
+
|
|
667
|
+
for f in get_user_defined_audios():
|
|
668
|
+
if f.model_name == model_name:
|
|
669
|
+
return f
|
|
670
|
+
elif model_type == "rerank":
|
|
671
|
+
from ..model.rerank.custom import get_user_defined_reranks
|
|
672
|
+
|
|
673
|
+
for f in get_user_defined_reranks():
|
|
674
|
+
if f.model_name == model_name:
|
|
675
|
+
return f
|
|
676
|
+
return None
|
|
677
|
+
|
|
678
|
+
@log_async(logger=logger)
|
|
679
|
+
async def query_engines_by_model_name(self, model_name: str):
|
|
680
|
+
from copy import deepcopy
|
|
681
|
+
|
|
682
|
+
from ..model.llm.llm_family import LLM_ENGINES
|
|
683
|
+
|
|
684
|
+
if model_name not in LLM_ENGINES:
|
|
685
|
+
return None
|
|
686
|
+
|
|
687
|
+
# filter llm_class
|
|
688
|
+
engine_params = deepcopy(LLM_ENGINES[model_name])
|
|
689
|
+
for engine in engine_params:
|
|
690
|
+
params = engine_params[engine]
|
|
691
|
+
for param in params:
|
|
692
|
+
del param["llm_class"]
|
|
693
|
+
|
|
694
|
+
return engine_params
|
|
695
|
+
|
|
543
696
|
async def _get_model_ability(self, model: Any, model_type: str) -> List[str]:
|
|
544
697
|
from ..model.llm.core import LLM
|
|
545
698
|
|
|
@@ -551,6 +704,8 @@ class WorkerActor(xo.StatelessActor):
|
|
|
551
704
|
return ["text_to_image"]
|
|
552
705
|
elif model_type == "audio":
|
|
553
706
|
return ["audio_to_text"]
|
|
707
|
+
elif model_type == "flexible":
|
|
708
|
+
return ["flexible"]
|
|
554
709
|
else:
|
|
555
710
|
assert model_type == "LLM"
|
|
556
711
|
assert isinstance(model, LLM)
|
|
@@ -587,6 +742,7 @@ class WorkerActor(xo.StatelessActor):
|
|
|
587
742
|
peft_model_config: Optional[PeftModelConfig] = None,
|
|
588
743
|
request_limits: Optional[int] = None,
|
|
589
744
|
gpu_idx: Optional[Union[int, List[int]]] = None,
|
|
745
|
+
download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
|
|
590
746
|
**kwargs,
|
|
591
747
|
):
|
|
592
748
|
# !!! Note that The following code must be placed at the very beginning of this function,
|
|
@@ -669,6 +825,7 @@ class WorkerActor(xo.StatelessActor):
|
|
|
669
825
|
model_size_in_billions,
|
|
670
826
|
quantization,
|
|
671
827
|
peft_model_config,
|
|
828
|
+
download_hub,
|
|
672
829
|
**kwargs,
|
|
673
830
|
)
|
|
674
831
|
await self.update_cache_status(model_name, model_description)
|
xinference/deploy/cmdline.py
CHANGED
|
@@ -370,6 +370,9 @@ def worker(
|
|
|
370
370
|
help="Type of model to register (default is 'LLM').",
|
|
371
371
|
)
|
|
372
372
|
@click.option("--file", "-f", type=str, help="Path to the model configuration file.")
|
|
373
|
+
@click.option(
|
|
374
|
+
"--worker-ip", "-w", type=str, help="Specify the ip address of the worker."
|
|
375
|
+
)
|
|
373
376
|
@click.option(
|
|
374
377
|
"--persist",
|
|
375
378
|
"-p",
|
|
@@ -387,6 +390,7 @@ def register_model(
|
|
|
387
390
|
endpoint: Optional[str],
|
|
388
391
|
model_type: str,
|
|
389
392
|
file: str,
|
|
393
|
+
worker_ip: str,
|
|
390
394
|
persist: bool,
|
|
391
395
|
api_key: Optional[str],
|
|
392
396
|
):
|
|
@@ -400,6 +404,7 @@ def register_model(
|
|
|
400
404
|
client.register_model(
|
|
401
405
|
model_type=model_type,
|
|
402
406
|
model=model,
|
|
407
|
+
worker_ip=worker_ip,
|
|
403
408
|
persist=persist,
|
|
404
409
|
)
|
|
405
410
|
|
|
@@ -38,21 +38,19 @@ class ChatTTSModel:
|
|
|
38
38
|
self._kwargs = kwargs
|
|
39
39
|
|
|
40
40
|
def load(self):
|
|
41
|
+
import ChatTTS
|
|
41
42
|
import torch
|
|
42
43
|
|
|
43
|
-
from xinference.thirdparty import ChatTTS
|
|
44
|
-
|
|
45
44
|
torch._dynamo.config.cache_size_limit = 64
|
|
46
45
|
torch._dynamo.config.suppress_errors = True
|
|
47
46
|
torch.set_float32_matmul_precision("high")
|
|
48
47
|
self._model = ChatTTS.Chat()
|
|
49
|
-
self._model.
|
|
50
|
-
source="local", local_path=self._model_path, compile=True
|
|
51
|
-
)
|
|
48
|
+
self._model.load(source="custom", custom_path=self._model_path, compile=True)
|
|
52
49
|
|
|
53
50
|
def speech(
|
|
54
51
|
self, input: str, voice: str, response_format: str = "mp3", speed: float = 1.0
|
|
55
52
|
):
|
|
53
|
+
import ChatTTS
|
|
56
54
|
import numpy as np
|
|
57
55
|
import torch
|
|
58
56
|
import torchaudio
|
|
@@ -71,7 +69,9 @@ class ChatTTSModel:
|
|
|
71
69
|
|
|
72
70
|
default = 5
|
|
73
71
|
infer_speed = int(default * speed)
|
|
74
|
-
params_infer_code =
|
|
72
|
+
params_infer_code = ChatTTS.Chat.InferCodeParams(
|
|
73
|
+
prompt=f"[speed_{infer_speed}]", spk_emb=rnd_spk_emb
|
|
74
|
+
)
|
|
75
75
|
|
|
76
76
|
assert self._model is not None
|
|
77
77
|
wavs = self._model.infer([input], params_infer_code=params_infer_code)
|
xinference/model/audio/core.py
CHANGED
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
import logging
|
|
15
15
|
import os
|
|
16
16
|
from collections import defaultdict
|
|
17
|
-
from typing import Dict, List, Optional, Tuple, Union
|
|
17
|
+
from typing import Dict, List, Literal, Optional, Tuple, Union
|
|
18
18
|
|
|
19
19
|
from ...constants import XINFERENCE_CACHE_DIR
|
|
20
20
|
from ..core import CacheableModelSpec, ModelDescription
|
|
@@ -94,7 +94,10 @@ def generate_audio_description(
|
|
|
94
94
|
return res
|
|
95
95
|
|
|
96
96
|
|
|
97
|
-
def match_audio(
|
|
97
|
+
def match_audio(
|
|
98
|
+
model_name: str,
|
|
99
|
+
download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
|
|
100
|
+
) -> AudioModelFamilyV1:
|
|
98
101
|
from ..utils import download_from_modelscope
|
|
99
102
|
from . import BUILTIN_AUDIO_MODELS, MODELSCOPE_AUDIO_MODELS
|
|
100
103
|
from .custom import get_user_defined_audios
|
|
@@ -103,17 +106,17 @@ def match_audio(model_name: str) -> AudioModelFamilyV1:
|
|
|
103
106
|
if model_spec.model_name == model_name:
|
|
104
107
|
return model_spec
|
|
105
108
|
|
|
106
|
-
if
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
109
|
+
if download_hub == "huggingface" and model_name in BUILTIN_AUDIO_MODELS:
|
|
110
|
+
logger.debug(f"Audio model {model_name} found in huggingface.")
|
|
111
|
+
return BUILTIN_AUDIO_MODELS[model_name]
|
|
112
|
+
elif download_hub == "modelscope" and model_name in MODELSCOPE_AUDIO_MODELS:
|
|
113
|
+
logger.debug(f"Audio model {model_name} found in ModelScope.")
|
|
114
|
+
return MODELSCOPE_AUDIO_MODELS[model_name]
|
|
115
|
+
elif download_from_modelscope() and model_name in MODELSCOPE_AUDIO_MODELS:
|
|
116
|
+
logger.debug(f"Audio model {model_name} found in ModelScope.")
|
|
117
|
+
return MODELSCOPE_AUDIO_MODELS[model_name]
|
|
118
|
+
elif model_name in BUILTIN_AUDIO_MODELS:
|
|
119
|
+
logger.debug(f"Audio model {model_name} found in huggingface.")
|
|
117
120
|
return BUILTIN_AUDIO_MODELS[model_name]
|
|
118
121
|
else:
|
|
119
122
|
raise ValueError(
|
|
@@ -141,9 +144,14 @@ def get_cache_status(
|
|
|
141
144
|
|
|
142
145
|
|
|
143
146
|
def create_audio_model_instance(
|
|
144
|
-
subpool_addr: str,
|
|
147
|
+
subpool_addr: str,
|
|
148
|
+
devices: List[str],
|
|
149
|
+
model_uid: str,
|
|
150
|
+
model_name: str,
|
|
151
|
+
download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
|
|
152
|
+
**kwargs,
|
|
145
153
|
) -> Tuple[Union[WhisperModel, ChatTTSModel], AudioModelDescription]:
|
|
146
|
-
model_spec = match_audio(model_name)
|
|
154
|
+
model_spec = match_audio(model_name, download_hub)
|
|
147
155
|
model_path = cache(model_spec)
|
|
148
156
|
model: Union[WhisperModel, ChatTTSModel]
|
|
149
157
|
if model_spec.model_family == "whisper":
|
xinference/model/core.py
CHANGED
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
from abc import ABC, abstractmethod
|
|
16
|
-
from typing import Any, List, Optional, Tuple, Union
|
|
16
|
+
from typing import Any, List, Literal, Optional, Tuple, Union
|
|
17
17
|
|
|
18
18
|
from .._compat import BaseModel
|
|
19
19
|
from ..types import PeftModelConfig
|
|
@@ -55,10 +55,12 @@ def create_model_instance(
|
|
|
55
55
|
model_size_in_billions: Optional[Union[int, str]] = None,
|
|
56
56
|
quantization: Optional[str] = None,
|
|
57
57
|
peft_model_config: Optional[PeftModelConfig] = None,
|
|
58
|
+
download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
|
|
58
59
|
**kwargs,
|
|
59
60
|
) -> Tuple[Any, ModelDescription]:
|
|
60
61
|
from .audio.core import create_audio_model_instance
|
|
61
62
|
from .embedding.core import create_embedding_model_instance
|
|
63
|
+
from .flexible.core import create_flexible_model_instance
|
|
62
64
|
from .image.core import create_image_model_instance
|
|
63
65
|
from .llm.core import create_llm_model_instance
|
|
64
66
|
from .rerank.core import create_rerank_model_instance
|
|
@@ -74,13 +76,14 @@ def create_model_instance(
|
|
|
74
76
|
model_size_in_billions,
|
|
75
77
|
quantization,
|
|
76
78
|
peft_model_config,
|
|
79
|
+
download_hub,
|
|
77
80
|
**kwargs,
|
|
78
81
|
)
|
|
79
82
|
elif model_type == "embedding":
|
|
80
83
|
# embedding model doesn't accept trust_remote_code
|
|
81
84
|
kwargs.pop("trust_remote_code", None)
|
|
82
85
|
return create_embedding_model_instance(
|
|
83
|
-
subpool_addr, devices, model_uid, model_name, **kwargs
|
|
86
|
+
subpool_addr, devices, model_uid, model_name, download_hub, **kwargs
|
|
84
87
|
)
|
|
85
88
|
elif model_type == "image":
|
|
86
89
|
kwargs.pop("trust_remote_code", None)
|
|
@@ -90,16 +93,22 @@ def create_model_instance(
|
|
|
90
93
|
model_uid,
|
|
91
94
|
model_name,
|
|
92
95
|
peft_model_config,
|
|
96
|
+
download_hub,
|
|
93
97
|
**kwargs,
|
|
94
98
|
)
|
|
95
99
|
elif model_type == "rerank":
|
|
96
100
|
kwargs.pop("trust_remote_code", None)
|
|
97
101
|
return create_rerank_model_instance(
|
|
98
|
-
subpool_addr, devices, model_uid, model_name, **kwargs
|
|
102
|
+
subpool_addr, devices, model_uid, model_name, download_hub, **kwargs
|
|
99
103
|
)
|
|
100
104
|
elif model_type == "audio":
|
|
101
105
|
kwargs.pop("trust_remote_code", None)
|
|
102
106
|
return create_audio_model_instance(
|
|
107
|
+
subpool_addr, devices, model_uid, model_name, download_hub, **kwargs
|
|
108
|
+
)
|
|
109
|
+
elif model_type == "flexible":
|
|
110
|
+
kwargs.pop("trust_remote_code", None)
|
|
111
|
+
return create_flexible_model_instance(
|
|
103
112
|
subpool_addr, devices, model_uid, model_name, **kwargs
|
|
104
113
|
)
|
|
105
114
|
else:
|
|
@@ -16,7 +16,7 @@ import gc
|
|
|
16
16
|
import logging
|
|
17
17
|
import os
|
|
18
18
|
from collections import defaultdict
|
|
19
|
-
from typing import Dict, List, Optional, Tuple, Union, no_type_check
|
|
19
|
+
from typing import Dict, List, Literal, Optional, Tuple, Union, no_type_check
|
|
20
20
|
|
|
21
21
|
import numpy as np
|
|
22
22
|
|
|
@@ -305,7 +305,10 @@ class EmbeddingModel:
|
|
|
305
305
|
)
|
|
306
306
|
|
|
307
307
|
|
|
308
|
-
def match_embedding(
|
|
308
|
+
def match_embedding(
|
|
309
|
+
model_name: str,
|
|
310
|
+
download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
|
|
311
|
+
) -> EmbeddingModelSpec:
|
|
309
312
|
from ..utils import download_from_modelscope
|
|
310
313
|
from . import BUILTIN_EMBEDDING_MODELS, MODELSCOPE_EMBEDDING_MODELS
|
|
311
314
|
from .custom import get_user_defined_embeddings
|
|
@@ -315,29 +318,35 @@ def match_embedding(model_name: str) -> EmbeddingModelSpec:
|
|
|
315
318
|
if model_name == model_spec.model_name:
|
|
316
319
|
return model_spec
|
|
317
320
|
|
|
318
|
-
if
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
321
|
+
if download_hub == "modelscope" and model_name in MODELSCOPE_EMBEDDING_MODELS:
|
|
322
|
+
logger.debug(f"Embedding model {model_name} found in ModelScope.")
|
|
323
|
+
return MODELSCOPE_EMBEDDING_MODELS[model_name]
|
|
324
|
+
elif download_hub == "huggingface" and model_name in BUILTIN_EMBEDDING_MODELS:
|
|
325
|
+
logger.debug(f"Embedding model {model_name} found in Huggingface.")
|
|
326
|
+
return BUILTIN_EMBEDDING_MODELS[model_name]
|
|
327
|
+
elif download_from_modelscope() and model_name in MODELSCOPE_EMBEDDING_MODELS:
|
|
328
|
+
logger.debug(f"Embedding model {model_name} found in ModelScope.")
|
|
329
|
+
return MODELSCOPE_EMBEDDING_MODELS[model_name]
|
|
330
|
+
elif model_name in BUILTIN_EMBEDDING_MODELS:
|
|
331
|
+
logger.debug(f"Embedding model {model_name} found in Huggingface.")
|
|
329
332
|
return BUILTIN_EMBEDDING_MODELS[model_name]
|
|
330
333
|
else:
|
|
331
334
|
raise ValueError(
|
|
332
335
|
f"Embedding model {model_name} not found, available"
|
|
333
|
-
f"
|
|
336
|
+
f"Huggingface: {BUILTIN_EMBEDDING_MODELS.keys()}"
|
|
337
|
+
f"ModelScope: {MODELSCOPE_EMBEDDING_MODELS.keys()}"
|
|
334
338
|
)
|
|
335
339
|
|
|
336
340
|
|
|
337
341
|
def create_embedding_model_instance(
|
|
338
|
-
subpool_addr: str,
|
|
342
|
+
subpool_addr: str,
|
|
343
|
+
devices: List[str],
|
|
344
|
+
model_uid: str,
|
|
345
|
+
model_name: str,
|
|
346
|
+
download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
|
|
347
|
+
**kwargs,
|
|
339
348
|
) -> Tuple[EmbeddingModel, EmbeddingModelDescription]:
|
|
340
|
-
model_spec = match_embedding(model_name)
|
|
349
|
+
model_spec = match_embedding(model_name, download_hub)
|
|
341
350
|
model_path = cache(model_spec)
|
|
342
351
|
model = EmbeddingModel(model_uid, model_path, **kwargs)
|
|
343
352
|
model_description = EmbeddingModelDescription(
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
# Copyright 2022-2024 XProbe Inc.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import codecs
|
|
16
|
+
import json
|
|
17
|
+
import os
|
|
18
|
+
|
|
19
|
+
from ...constants import XINFERENCE_MODEL_DIR
|
|
20
|
+
from .core import (
|
|
21
|
+
FLEXIBLE_MODEL_DESCRIPTIONS,
|
|
22
|
+
FlexibleModel,
|
|
23
|
+
FlexibleModelSpec,
|
|
24
|
+
generate_flexible_model_description,
|
|
25
|
+
get_flexible_model_descriptions,
|
|
26
|
+
get_flexible_models,
|
|
27
|
+
register_flexible_model,
|
|
28
|
+
unregister_flexible_model,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
model_dir = os.path.join(XINFERENCE_MODEL_DIR, "flexible")
|
|
32
|
+
if os.path.isdir(model_dir):
|
|
33
|
+
for f in os.listdir(model_dir):
|
|
34
|
+
with codecs.open(os.path.join(model_dir, f), encoding="utf-8") as fd:
|
|
35
|
+
model_spec = FlexibleModelSpec.parse_obj(json.load(fd))
|
|
36
|
+
register_flexible_model(model_spec, persist=False)
|
|
37
|
+
|
|
38
|
+
# register model description
|
|
39
|
+
for model in get_flexible_models():
|
|
40
|
+
FLEXIBLE_MODEL_DESCRIPTIONS.update(generate_flexible_model_description(model))
|