xinference 0.10.0__py3-none-any.whl → 0.10.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +34 -15
- xinference/client/oscar/actor_client.py +4 -3
- xinference/client/restful/restful_client.py +40 -18
- xinference/core/supervisor.py +48 -9
- xinference/core/worker.py +13 -8
- xinference/deploy/cmdline.py +22 -9
- xinference/model/audio/__init__.py +40 -1
- xinference/model/audio/core.py +25 -45
- xinference/model/audio/custom.py +148 -0
- xinference/model/core.py +6 -9
- xinference/model/embedding/core.py +1 -2
- xinference/model/embedding/model_spec.json +24 -0
- xinference/model/embedding/model_spec_modelscope.json +24 -0
- xinference/model/image/core.py +12 -4
- xinference/model/image/stable_diffusion/core.py +8 -7
- xinference/model/llm/__init__.py +0 -6
- xinference/model/llm/core.py +9 -14
- xinference/model/llm/ggml/llamacpp.py +2 -10
- xinference/model/llm/llm_family.json +507 -7
- xinference/model/llm/llm_family.py +41 -4
- xinference/model/llm/llm_family_modelscope.json +260 -0
- xinference/model/llm/pytorch/baichuan.py +4 -3
- xinference/model/llm/pytorch/chatglm.py +5 -2
- xinference/model/llm/pytorch/core.py +37 -41
- xinference/model/llm/pytorch/falcon.py +6 -5
- xinference/model/llm/pytorch/internlm2.py +5 -2
- xinference/model/llm/pytorch/llama_2.py +6 -5
- xinference/model/llm/pytorch/qwen_vl.py +2 -0
- xinference/model/llm/pytorch/vicuna.py +4 -3
- xinference/model/llm/pytorch/yi_vl.py +4 -2
- xinference/model/llm/utils.py +42 -4
- xinference/model/llm/vllm/core.py +54 -6
- xinference/model/rerank/core.py +26 -12
- xinference/model/rerank/model_spec.json +24 -0
- xinference/model/rerank/model_spec_modelscope.json +25 -1
- xinference/model/utils.py +12 -1
- xinference/thirdparty/omnilmm/chat.py +1 -1
- xinference/types.py +70 -19
- xinference/utils.py +1 -0
- xinference/web/ui/build/asset-manifest.json +3 -3
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/js/main.26fdbfbe.js +3 -0
- xinference/web/ui/build/static/js/main.26fdbfbe.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/15e2cf8cd8d0989719b6349428ff576f9009ff4c2dcc52378be0bd938e82495e.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/1870cd6f7054d04e049e363c0a85526584fe25519378609d2838e28d7492bbf1.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/1e86938a0cdf706d21e99b21f5d868fa247c0c88b26807047e26dcdc4d9a9db3.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3c2f277c93c5f1638e08db38df0d0fb4e58d1c5571aea03241a5c04ff4094704.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3fa1f69162f9c6dc0f6a6e21b64d49d6b8e6fa8dfa59a82cf829931c5f97d99f.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/44774c783428f952d8e2e4ad0998a9c5bc16a57cd9c68b7c5ff18aaa5a41d65c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/5393569d846332075b93b55656716a34f50e0a8c970be789502d7e6c49755fd7.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/59ce49eae0f486af4c5034d4d2f9ca77c3ec3a32ecc560085caf5ef482b5f4c9.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/62e257ed9016471035fa1a7da57c9e2a4250974ed566b4d1295873d747c68eb2.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/63a4c48f0326d071c7772c46598215c006ae41fd3d4ff3577fe717de66ad6e89.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/b9cbcb6d77ba21b22c6950b6fb5b305d23c19cf747f99f7d48b6b046f8f7b1b0.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d06a96a3c9c32e42689094aa3aaad41c8125894e956b8f84a70fadce6e3f65b3.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/de0299226173b0662b573f49e3992220f6611947073bd66ac079728a8bc8837d.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/e606671420d2937102c3c34b4b04056c11736408c1d3347b8cf42dfe61fb394b.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/e6eccc9aa641e7da833492e27846dc965f9750281420977dc84654ca6ed221e4.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/e9b52d171223bb59fb918316297a051cdfd42dd453e8260fd918e90bc0a4ebdf.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f4d5d1a41892a754c1ee0237450d804b20612d1b657945b59e564161ea47aa7a.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f9290c0738db50065492ceedc6a4af25083fe18399b7c44d942273349ad9e643.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/fad4cd70de36ef6e6d5f8fd74a10ded58d964a8a91ef7681693fbb8376552da7.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/feabb04b4aa507102da0a64398a40818e878fd1df9b75dda8461b3e1e7ff3f11.json +1 -0
- {xinference-0.10.0.dist-info → xinference-0.10.2.dist-info}/METADATA +13 -10
- {xinference-0.10.0.dist-info → xinference-0.10.2.dist-info}/RECORD +71 -74
- xinference/model/llm/ggml/ctransformers.py +0 -281
- xinference/model/llm/ggml/ctransformers_util.py +0 -161
- xinference/web/ui/build/static/js/main.98516614.js +0 -3
- xinference/web/ui/build/static/js/main.98516614.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/0bd70b1ecf307e2681318e864f4692305b6350c8683863007f4caf2f9ac33b6e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/0db651c046ef908f45cde73af0dbea0a797d3e35bb57f4a0863b481502103a64.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/139969fd25258eb7decc9505f30b779089bba50c402bb5c663008477c7bff73b.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/18e5d5422e2464abf4a3e6d38164570e2e426e0a921e9a2628bbae81b18da353.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/3d93bd9a74a1ab0cec85af40f9baa5f6a8e7384b9e18c409b95a81a7b45bb7e2.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/3e055de705e397e1d413d7f429589b1a98dd78ef378b97f0cdb462c5f2487d5e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/3f357ab57b8e7fade54c667f0e0ebf2787566f72bfdca0fea14e395b5c203753.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/4fd24800544873512b540544ae54601240a5bfefd9105ff647855c64f8ad828f.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/52aa27272b4b9968f62666262b47661cb1992336a2aff3b13994cc36877b3ec3.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/60c4b98d8ea7479fb0c94cfd19c8128f17bd7e27a1e73e6dd9adf6e9d88d18eb.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/7e094845f611802b024b57439cbf911038169d06cdf6c34a72a7277f35aa71a4.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/95c8cc049fadd23085d8623e1d43d70b614a4e52217676f186a417dca894aa09.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/98b7ef307f436affe13d75a4f265b27e828ccc2b10ffae6513abe2681bc11971.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/9d7c49815d97539207e5aab2fb967591b5fed7791218a0762539efc9491f36af.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/a8070ce4b780b4a044218536e158a9e7192a6c80ff593fdc126fee43f46296b5.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/b400cfc9db57fa6c70cd2bad055b73c5079fde0ed37974009d898083f6af8cd8.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/bd04667474fd9cac2983b03725c218908a6cc0ee9128a5953cd00d26d4877f60.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/c230a727b8f68f0e62616a75e14a3d33026dc4164f2e325a9a8072d733850edb.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/d0d0b591d9adaf42b83ad6633f8b7c118541a4b80ea957c303d3bf9b86fbad0a.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/d44a6eb6106e09082b691a315c9f6ce17fcfe25beb7547810e0d271ce3301cd2.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/e1d9b2ae4e1248658704bc6bfc5d6160dcd1a9e771ea4ae8c1fed0aaddeedd29.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/fe5db70859503a54cbe71f9637e5a314cda88b1f0eecb733b6e6f837697db1ef.json +0 -1
- /xinference/web/ui/build/static/js/{main.98516614.js.LICENSE.txt → main.26fdbfbe.js.LICENSE.txt} +0 -0
- {xinference-0.10.0.dist-info → xinference-0.10.2.dist-info}/LICENSE +0 -0
- {xinference-0.10.0.dist-info → xinference-0.10.2.dist-info}/WHEEL +0 -0
- {xinference-0.10.0.dist-info → xinference-0.10.2.dist-info}/entry_points.txt +0 -0
- {xinference-0.10.0.dist-info → xinference-0.10.2.dist-info}/top_level.txt +0 -0
xinference/_version.py
CHANGED
|
@@ -8,11 +8,11 @@ import json
|
|
|
8
8
|
|
|
9
9
|
version_json = '''
|
|
10
10
|
{
|
|
11
|
-
"date": "2024-
|
|
11
|
+
"date": "2024-04-19T11:39:12+0800",
|
|
12
12
|
"dirty": false,
|
|
13
13
|
"error": null,
|
|
14
|
-
"full-revisionid": "
|
|
15
|
-
"version": "0.10.
|
|
14
|
+
"full-revisionid": "f19e85be09bce966e0c0b3e01bc5690eb6016398",
|
|
15
|
+
"version": "0.10.2"
|
|
16
16
|
}
|
|
17
17
|
''' # END VERSION_JSON
|
|
18
18
|
|
xinference/api/restful_api.py
CHANGED
|
@@ -64,6 +64,7 @@ from ..types import (
|
|
|
64
64
|
CreateChatCompletion,
|
|
65
65
|
CreateCompletion,
|
|
66
66
|
ImageList,
|
|
67
|
+
PeftModelConfig,
|
|
67
68
|
max_tokens_field,
|
|
68
69
|
)
|
|
69
70
|
from .oauth2.auth_service import AuthService
|
|
@@ -692,9 +693,7 @@ class RESTfulAPI:
|
|
|
692
693
|
replica = payload.get("replica", 1)
|
|
693
694
|
n_gpu = payload.get("n_gpu", "auto")
|
|
694
695
|
request_limits = payload.get("request_limits", None)
|
|
695
|
-
|
|
696
|
-
image_lora_load_kwargs = payload.get("image_lora_load_kwargs", None)
|
|
697
|
-
image_lora_fuse_kwargs = payload.get("image_lora_fuse_kwargs", None)
|
|
696
|
+
peft_model_config = payload.get("peft_model_config", None)
|
|
698
697
|
worker_ip = payload.get("worker_ip", None)
|
|
699
698
|
gpu_idx = payload.get("gpu_idx", None)
|
|
700
699
|
|
|
@@ -708,9 +707,7 @@ class RESTfulAPI:
|
|
|
708
707
|
"replica",
|
|
709
708
|
"n_gpu",
|
|
710
709
|
"request_limits",
|
|
711
|
-
"
|
|
712
|
-
"image_lora_load_kwargs",
|
|
713
|
-
"image_lora_fuse_kwargs",
|
|
710
|
+
"peft_model_config",
|
|
714
711
|
"worker_ip",
|
|
715
712
|
"gpu_idx",
|
|
716
713
|
}
|
|
@@ -725,6 +722,11 @@ class RESTfulAPI:
|
|
|
725
722
|
detail="Invalid input. Please specify the model name",
|
|
726
723
|
)
|
|
727
724
|
|
|
725
|
+
if peft_model_config is not None:
|
|
726
|
+
peft_model_config = PeftModelConfig.from_dict(peft_model_config)
|
|
727
|
+
else:
|
|
728
|
+
peft_model_config = None
|
|
729
|
+
|
|
728
730
|
try:
|
|
729
731
|
model_uid = await (await self._get_supervisor_ref()).launch_builtin_model(
|
|
730
732
|
model_uid=model_uid,
|
|
@@ -737,9 +739,7 @@ class RESTfulAPI:
|
|
|
737
739
|
n_gpu=n_gpu,
|
|
738
740
|
request_limits=request_limits,
|
|
739
741
|
wait_ready=wait_ready,
|
|
740
|
-
|
|
741
|
-
image_lora_load_kwargs=image_lora_load_kwargs,
|
|
742
|
-
image_lora_fuse_kwargs=image_lora_fuse_kwargs,
|
|
742
|
+
peft_model_config=peft_model_config,
|
|
743
743
|
worker_ip=worker_ip,
|
|
744
744
|
gpu_idx=gpu_idx,
|
|
745
745
|
**kwargs,
|
|
@@ -1007,8 +1007,16 @@ class RESTfulAPI:
|
|
|
1007
1007
|
raise HTTPException(status_code=500, detail=str(e))
|
|
1008
1008
|
|
|
1009
1009
|
async def create_embedding(self, request: Request) -> Response:
|
|
1010
|
-
|
|
1010
|
+
payload = await request.json()
|
|
1011
|
+
body = CreateEmbeddingRequest.parse_obj(payload)
|
|
1011
1012
|
model_uid = body.model
|
|
1013
|
+
exclude = {
|
|
1014
|
+
"model",
|
|
1015
|
+
"input",
|
|
1016
|
+
"user",
|
|
1017
|
+
"encoding_format",
|
|
1018
|
+
}
|
|
1019
|
+
kwargs = {key: value for key, value in payload.items() if key not in exclude}
|
|
1012
1020
|
|
|
1013
1021
|
try:
|
|
1014
1022
|
model = await (await self._get_supervisor_ref()).get_model(model_uid)
|
|
@@ -1022,7 +1030,7 @@ class RESTfulAPI:
|
|
|
1022
1030
|
raise HTTPException(status_code=500, detail=str(e))
|
|
1023
1031
|
|
|
1024
1032
|
try:
|
|
1025
|
-
embedding = await model.create_embedding(body.input)
|
|
1033
|
+
embedding = await model.create_embedding(body.input, **kwargs)
|
|
1026
1034
|
return Response(embedding, media_type="application/json")
|
|
1027
1035
|
except RuntimeError as re:
|
|
1028
1036
|
logger.error(re, exc_info=True)
|
|
@@ -1035,8 +1043,15 @@ class RESTfulAPI:
|
|
|
1035
1043
|
raise HTTPException(status_code=500, detail=str(e))
|
|
1036
1044
|
|
|
1037
1045
|
async def rerank(self, request: Request) -> Response:
|
|
1038
|
-
|
|
1046
|
+
payload = await request.json()
|
|
1047
|
+
body = RerankRequest.parse_obj(payload)
|
|
1039
1048
|
model_uid = body.model
|
|
1049
|
+
kwargs = {
|
|
1050
|
+
key: value
|
|
1051
|
+
for key, value in payload.items()
|
|
1052
|
+
if key not in RerankRequest.__annotations__.keys()
|
|
1053
|
+
}
|
|
1054
|
+
|
|
1040
1055
|
try:
|
|
1041
1056
|
model = await (await self._get_supervisor_ref()).get_model(model_uid)
|
|
1042
1057
|
except ValueError as ve:
|
|
@@ -1055,6 +1070,7 @@ class RESTfulAPI:
|
|
|
1055
1070
|
top_n=body.top_n,
|
|
1056
1071
|
max_chunks_per_doc=body.max_chunks_per_doc,
|
|
1057
1072
|
return_documents=body.return_documents,
|
|
1073
|
+
**kwargs,
|
|
1058
1074
|
)
|
|
1059
1075
|
return Response(scores, media_type="application/json")
|
|
1060
1076
|
except RuntimeError as re:
|
|
@@ -1345,9 +1361,12 @@ class RESTfulAPI:
|
|
|
1345
1361
|
detail=f"Only {function_call_models} support tool messages",
|
|
1346
1362
|
)
|
|
1347
1363
|
if body.tools and body.stream:
|
|
1348
|
-
|
|
1349
|
-
|
|
1350
|
-
|
|
1364
|
+
is_vllm = await model.is_vllm_backend()
|
|
1365
|
+
if not is_vllm or model_family not in ["qwen-chat", "qwen1.5-chat"]:
|
|
1366
|
+
raise HTTPException(
|
|
1367
|
+
status_code=400,
|
|
1368
|
+
detail="Streaming support for tool calls is available only when using vLLM backend and Qwen models.",
|
|
1369
|
+
)
|
|
1351
1370
|
|
|
1352
1371
|
if body.stream:
|
|
1353
1372
|
|
|
@@ -111,7 +111,7 @@ class ClientIteratorWrapper(AsyncIterator):
|
|
|
111
111
|
|
|
112
112
|
|
|
113
113
|
class EmbeddingModelHandle(ModelHandle):
|
|
114
|
-
def create_embedding(self, input: Union[str, List[str]]) -> bytes:
|
|
114
|
+
def create_embedding(self, input: Union[str, List[str]], **kwargs) -> bytes:
|
|
115
115
|
"""
|
|
116
116
|
Creates an embedding vector representing the input text.
|
|
117
117
|
|
|
@@ -128,7 +128,7 @@ class EmbeddingModelHandle(ModelHandle):
|
|
|
128
128
|
machine learning models and algorithms.
|
|
129
129
|
"""
|
|
130
130
|
|
|
131
|
-
coro = self._model_ref.create_embedding(input)
|
|
131
|
+
coro = self._model_ref.create_embedding(input, **kwargs)
|
|
132
132
|
return orjson.loads(self._isolation.call(coro))
|
|
133
133
|
|
|
134
134
|
|
|
@@ -140,6 +140,7 @@ class RerankModelHandle(ModelHandle):
|
|
|
140
140
|
top_n: Optional[int],
|
|
141
141
|
max_chunks_per_doc: Optional[int],
|
|
142
142
|
return_documents: Optional[bool],
|
|
143
|
+
**kwargs,
|
|
143
144
|
):
|
|
144
145
|
"""
|
|
145
146
|
Returns an ordered list of documents ordered by their relevance to the provided query.
|
|
@@ -163,7 +164,7 @@ class RerankModelHandle(ModelHandle):
|
|
|
163
164
|
|
|
164
165
|
"""
|
|
165
166
|
coro = self._model_ref.rerank(
|
|
166
|
-
documents, query, top_n, max_chunks_per_doc, return_documents
|
|
167
|
+
documents, query, top_n, max_chunks_per_doc, return_documents, **kwargs
|
|
167
168
|
)
|
|
168
169
|
results = orjson.loads(self._isolation.call(coro))
|
|
169
170
|
for r in results["results"]:
|
|
@@ -18,6 +18,8 @@ from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Optional, Union
|
|
|
18
18
|
|
|
19
19
|
import requests
|
|
20
20
|
|
|
21
|
+
from ...model.utils import convert_float_to_int_or_str
|
|
22
|
+
from ...types import LoRA, PeftModelConfig
|
|
21
23
|
from ..common import streaming_response_iterator
|
|
22
24
|
|
|
23
25
|
if TYPE_CHECKING:
|
|
@@ -80,7 +82,7 @@ class RESTfulModelHandle:
|
|
|
80
82
|
|
|
81
83
|
|
|
82
84
|
class RESTfulEmbeddingModelHandle(RESTfulModelHandle):
|
|
83
|
-
def create_embedding(self, input: Union[str, List[str]]) -> "Embedding":
|
|
85
|
+
def create_embedding(self, input: Union[str, List[str]], **kwargs) -> "Embedding":
|
|
84
86
|
"""
|
|
85
87
|
Create an Embedding from user input via RESTful APIs.
|
|
86
88
|
|
|
@@ -102,7 +104,11 @@ class RESTfulEmbeddingModelHandle(RESTfulModelHandle):
|
|
|
102
104
|
|
|
103
105
|
"""
|
|
104
106
|
url = f"{self._base_url}/v1/embeddings"
|
|
105
|
-
request_body = {
|
|
107
|
+
request_body = {
|
|
108
|
+
"model": self._model_uid,
|
|
109
|
+
"input": input,
|
|
110
|
+
}
|
|
111
|
+
request_body.update(kwargs)
|
|
106
112
|
response = requests.post(url, json=request_body, headers=self.auth_headers)
|
|
107
113
|
if response.status_code != 200:
|
|
108
114
|
raise RuntimeError(
|
|
@@ -121,6 +127,7 @@ class RESTfulRerankModelHandle(RESTfulModelHandle):
|
|
|
121
127
|
top_n: Optional[int] = None,
|
|
122
128
|
max_chunks_per_doc: Optional[int] = None,
|
|
123
129
|
return_documents: Optional[bool] = None,
|
|
130
|
+
**kwargs,
|
|
124
131
|
):
|
|
125
132
|
"""
|
|
126
133
|
Returns an ordered list of documents ordered by their relevance to the provided query.
|
|
@@ -156,6 +163,7 @@ class RESTfulRerankModelHandle(RESTfulModelHandle):
|
|
|
156
163
|
"max_chunks_per_doc": max_chunks_per_doc,
|
|
157
164
|
"return_documents": return_documents,
|
|
158
165
|
}
|
|
166
|
+
request_body.update(kwargs)
|
|
159
167
|
response = requests.post(url, json=request_body, headers=self.auth_headers)
|
|
160
168
|
if response.status_code != 200:
|
|
161
169
|
raise RuntimeError(
|
|
@@ -740,7 +748,7 @@ class Client:
|
|
|
740
748
|
def launch_speculative_llm(
|
|
741
749
|
self,
|
|
742
750
|
model_name: str,
|
|
743
|
-
model_size_in_billions: Optional[int],
|
|
751
|
+
model_size_in_billions: Optional[Union[int, str, float]],
|
|
744
752
|
quantization: Optional[str],
|
|
745
753
|
draft_model_name: str,
|
|
746
754
|
draft_model_size_in_billions: Optional[int],
|
|
@@ -761,6 +769,10 @@ class Client:
|
|
|
761
769
|
"`launch_speculative_llm` is an experimental feature and the API may change in the future."
|
|
762
770
|
)
|
|
763
771
|
|
|
772
|
+
# convert float to int or string since the RESTful API does not accept float.
|
|
773
|
+
if isinstance(model_size_in_billions, float):
|
|
774
|
+
model_size_in_billions = convert_float_to_int_or_str(model_size_in_billions)
|
|
775
|
+
|
|
764
776
|
payload = {
|
|
765
777
|
"model_uid": None,
|
|
766
778
|
"model_name": model_name,
|
|
@@ -788,15 +800,13 @@ class Client:
|
|
|
788
800
|
model_name: str,
|
|
789
801
|
model_type: str = "LLM",
|
|
790
802
|
model_uid: Optional[str] = None,
|
|
791
|
-
model_size_in_billions: Optional[Union[int, str]] = None,
|
|
803
|
+
model_size_in_billions: Optional[Union[int, str, float]] = None,
|
|
792
804
|
model_format: Optional[str] = None,
|
|
793
805
|
quantization: Optional[str] = None,
|
|
794
806
|
replica: int = 1,
|
|
795
807
|
n_gpu: Optional[Union[int, str]] = "auto",
|
|
808
|
+
peft_model_config: Optional[Dict] = None,
|
|
796
809
|
request_limits: Optional[int] = None,
|
|
797
|
-
peft_model_path: Optional[str] = None,
|
|
798
|
-
image_lora_load_kwargs: Optional[Dict] = None,
|
|
799
|
-
image_lora_fuse_kwargs: Optional[Dict] = None,
|
|
800
810
|
worker_ip: Optional[str] = None,
|
|
801
811
|
gpu_idx: Optional[Union[int, List[int]]] = None,
|
|
802
812
|
**kwargs,
|
|
@@ -812,7 +822,7 @@ class Client:
|
|
|
812
822
|
type of model.
|
|
813
823
|
model_uid: str
|
|
814
824
|
UID of model, auto generate a UUID if is None.
|
|
815
|
-
model_size_in_billions: Optional[int]
|
|
825
|
+
model_size_in_billions: Optional[Union[int, str, float]]
|
|
816
826
|
The size (in billions) of the model.
|
|
817
827
|
model_format: Optional[str]
|
|
818
828
|
The format of the model.
|
|
@@ -823,15 +833,13 @@ class Client:
|
|
|
823
833
|
n_gpu: Optional[Union[int, str]],
|
|
824
834
|
The number of GPUs used by the model, default is "auto".
|
|
825
835
|
``n_gpu=None`` means cpu only, ``n_gpu=auto`` lets the system automatically determine the best number of GPUs to use.
|
|
836
|
+
peft_model_config: Optional[Dict]
|
|
837
|
+
- "lora_list": A List of PEFT (Parameter-Efficient Fine-Tuning) model and path.
|
|
838
|
+
- "image_lora_load_kwargs": A Dict of lora load parameters for image model
|
|
839
|
+
- "image_lora_fuse_kwargs": A Dict of lora fuse parameters for image model
|
|
826
840
|
request_limits: Optional[int]
|
|
827
|
-
The number of request limits for this model
|
|
841
|
+
The number of request limits for this model, default is None.
|
|
828
842
|
``request_limits=None`` means no limits for this model.
|
|
829
|
-
peft_model_path: Optional[str]
|
|
830
|
-
PEFT (Parameter-Efficient Fine-Tuning) model path.
|
|
831
|
-
image_lora_load_kwargs: Optional[Dict]
|
|
832
|
-
lora load parameters for image model
|
|
833
|
-
image_lora_fuse_kwargs: Optional[Dict]
|
|
834
|
-
lora fuse parameters for image model
|
|
835
843
|
worker_ip: Optional[str]
|
|
836
844
|
Specify the worker ip where the model is located in a distributed scenario.
|
|
837
845
|
gpu_idx: Optional[Union[int, List[int]]]
|
|
@@ -848,9 +856,26 @@ class Client:
|
|
|
848
856
|
|
|
849
857
|
url = f"{self.base_url}/v1/models"
|
|
850
858
|
|
|
859
|
+
if peft_model_config is not None:
|
|
860
|
+
lora_list = [
|
|
861
|
+
LoRA.from_dict(model) for model in peft_model_config["lora_list"]
|
|
862
|
+
]
|
|
863
|
+
peft_model = PeftModelConfig(
|
|
864
|
+
lora_list,
|
|
865
|
+
peft_model_config["image_lora_load_kwargs"],
|
|
866
|
+
peft_model_config["image_lora_fuse_kwargs"],
|
|
867
|
+
)
|
|
868
|
+
else:
|
|
869
|
+
peft_model = None
|
|
870
|
+
|
|
871
|
+
# convert float to int or string since the RESTful API does not accept float.
|
|
872
|
+
if isinstance(model_size_in_billions, float):
|
|
873
|
+
model_size_in_billions = convert_float_to_int_or_str(model_size_in_billions)
|
|
874
|
+
|
|
851
875
|
payload = {
|
|
852
876
|
"model_uid": model_uid,
|
|
853
877
|
"model_name": model_name,
|
|
878
|
+
"peft_model_config": peft_model.to_dict() if peft_model else None,
|
|
854
879
|
"model_type": model_type,
|
|
855
880
|
"model_size_in_billions": model_size_in_billions,
|
|
856
881
|
"model_format": model_format,
|
|
@@ -858,9 +883,6 @@ class Client:
|
|
|
858
883
|
"replica": replica,
|
|
859
884
|
"n_gpu": n_gpu,
|
|
860
885
|
"request_limits": request_limits,
|
|
861
|
-
"peft_model_path": peft_model_path,
|
|
862
|
-
"image_lora_load_kwargs": image_lora_load_kwargs,
|
|
863
|
-
"image_lora_fuse_kwargs": image_lora_fuse_kwargs,
|
|
864
886
|
"worker_ip": worker_ip,
|
|
865
887
|
"gpu_idx": gpu_idx,
|
|
866
888
|
}
|
xinference/core/supervisor.py
CHANGED
|
@@ -30,6 +30,7 @@ from ..constants import (
|
|
|
30
30
|
)
|
|
31
31
|
from ..core import ModelActor
|
|
32
32
|
from ..core.status_guard import InstanceInfo, LaunchStatus
|
|
33
|
+
from ..types import PeftModelConfig
|
|
33
34
|
from .metrics import record_metrics
|
|
34
35
|
from .resource import GPUStatus, ResourceStatus
|
|
35
36
|
from .utils import (
|
|
@@ -135,6 +136,13 @@ class SupervisorActor(xo.StatelessActor):
|
|
|
135
136
|
EventCollectorActor, address=self.address, uid=EventCollectorActor.uid()
|
|
136
137
|
)
|
|
137
138
|
|
|
139
|
+
from ..model.audio import (
|
|
140
|
+
CustomAudioModelFamilyV1,
|
|
141
|
+
generate_audio_description,
|
|
142
|
+
get_audio_model_descriptions,
|
|
143
|
+
register_audio,
|
|
144
|
+
unregister_audio,
|
|
145
|
+
)
|
|
138
146
|
from ..model.embedding import (
|
|
139
147
|
CustomEmbeddingModelSpec,
|
|
140
148
|
generate_embedding_description,
|
|
@@ -177,6 +185,12 @@ class SupervisorActor(xo.StatelessActor):
|
|
|
177
185
|
unregister_rerank,
|
|
178
186
|
generate_rerank_description,
|
|
179
187
|
),
|
|
188
|
+
"audio": (
|
|
189
|
+
CustomAudioModelFamilyV1,
|
|
190
|
+
register_audio,
|
|
191
|
+
unregister_audio,
|
|
192
|
+
generate_audio_description,
|
|
193
|
+
),
|
|
180
194
|
}
|
|
181
195
|
|
|
182
196
|
# record model version
|
|
@@ -185,6 +199,7 @@ class SupervisorActor(xo.StatelessActor):
|
|
|
185
199
|
model_version_infos.update(get_embedding_model_descriptions())
|
|
186
200
|
model_version_infos.update(get_rerank_model_descriptions())
|
|
187
201
|
model_version_infos.update(get_image_model_descriptions())
|
|
202
|
+
model_version_infos.update(get_audio_model_descriptions())
|
|
188
203
|
await self._cache_tracker_ref.record_model_version(
|
|
189
204
|
model_version_infos, self.address
|
|
190
205
|
)
|
|
@@ -483,6 +498,7 @@ class SupervisorActor(xo.StatelessActor):
|
|
|
483
498
|
return ret
|
|
484
499
|
elif model_type == "audio":
|
|
485
500
|
from ..model.audio import BUILTIN_AUDIO_MODELS
|
|
501
|
+
from ..model.audio.custom import get_user_defined_audios
|
|
486
502
|
|
|
487
503
|
ret = []
|
|
488
504
|
for model_name, family in BUILTIN_AUDIO_MODELS.items():
|
|
@@ -491,6 +507,16 @@ class SupervisorActor(xo.StatelessActor):
|
|
|
491
507
|
else:
|
|
492
508
|
ret.append({"model_name": model_name, "is_builtin": True})
|
|
493
509
|
|
|
510
|
+
for model_spec in get_user_defined_audios():
|
|
511
|
+
if detailed:
|
|
512
|
+
ret.append(
|
|
513
|
+
await self._to_audio_model_reg(model_spec, is_builtin=False)
|
|
514
|
+
)
|
|
515
|
+
else:
|
|
516
|
+
ret.append(
|
|
517
|
+
{"model_name": model_spec.model_name, "is_builtin": False}
|
|
518
|
+
)
|
|
519
|
+
|
|
494
520
|
ret.sort(key=sort_helper)
|
|
495
521
|
return ret
|
|
496
522
|
elif model_type == "rerank":
|
|
@@ -548,8 +574,9 @@ class SupervisorActor(xo.StatelessActor):
|
|
|
548
574
|
raise ValueError(f"Model {model_name} not found")
|
|
549
575
|
elif model_type == "audio":
|
|
550
576
|
from ..model.audio import BUILTIN_AUDIO_MODELS
|
|
577
|
+
from ..model.audio.custom import get_user_defined_audios
|
|
551
578
|
|
|
552
|
-
for f in BUILTIN_AUDIO_MODELS.values():
|
|
579
|
+
for f in list(BUILTIN_AUDIO_MODELS.values()) + get_user_defined_audios():
|
|
553
580
|
if f.model_name == model_name:
|
|
554
581
|
return f
|
|
555
582
|
raise ValueError(f"Model {model_name} not found")
|
|
@@ -654,7 +681,7 @@ class SupervisorActor(xo.StatelessActor):
|
|
|
654
681
|
self,
|
|
655
682
|
model_uid: Optional[str],
|
|
656
683
|
model_name: str,
|
|
657
|
-
model_size_in_billions: Optional[int],
|
|
684
|
+
model_size_in_billions: Optional[Union[int, str]],
|
|
658
685
|
quantization: Optional[str],
|
|
659
686
|
draft_model_name: str,
|
|
660
687
|
draft_model_size_in_billions: Optional[int],
|
|
@@ -714,7 +741,7 @@ class SupervisorActor(xo.StatelessActor):
|
|
|
714
741
|
self,
|
|
715
742
|
model_uid: Optional[str],
|
|
716
743
|
model_name: str,
|
|
717
|
-
model_size_in_billions: Optional[int],
|
|
744
|
+
model_size_in_billions: Optional[Union[int, str]],
|
|
718
745
|
model_format: Optional[str],
|
|
719
746
|
quantization: Optional[str],
|
|
720
747
|
model_type: Optional[str],
|
|
@@ -723,9 +750,7 @@ class SupervisorActor(xo.StatelessActor):
|
|
|
723
750
|
request_limits: Optional[int] = None,
|
|
724
751
|
wait_ready: bool = True,
|
|
725
752
|
model_version: Optional[str] = None,
|
|
726
|
-
|
|
727
|
-
image_lora_load_kwargs: Optional[Dict] = None,
|
|
728
|
-
image_lora_fuse_kwargs: Optional[Dict] = None,
|
|
753
|
+
peft_model_config: Optional[PeftModelConfig] = None,
|
|
729
754
|
worker_ip: Optional[str] = None,
|
|
730
755
|
gpu_idx: Optional[Union[int, List[int]]] = None,
|
|
731
756
|
**kwargs,
|
|
@@ -777,9 +802,7 @@ class SupervisorActor(xo.StatelessActor):
|
|
|
777
802
|
model_type=model_type,
|
|
778
803
|
n_gpu=n_gpu,
|
|
779
804
|
request_limits=request_limits,
|
|
780
|
-
|
|
781
|
-
image_lora_load_kwargs=image_lora_load_kwargs,
|
|
782
|
-
image_lora_fuse_kwargs=image_lora_fuse_kwargs,
|
|
805
|
+
peft_model_config=peft_model_config,
|
|
783
806
|
gpu_idx=gpu_idx,
|
|
784
807
|
**kwargs,
|
|
785
808
|
)
|
|
@@ -870,6 +893,12 @@ class SupervisorActor(xo.StatelessActor):
|
|
|
870
893
|
address,
|
|
871
894
|
dead_models,
|
|
872
895
|
)
|
|
896
|
+
for replica_model_uid in dead_models:
|
|
897
|
+
model_uid, _, _ = parse_replica_model_uid(replica_model_uid)
|
|
898
|
+
self._model_uid_to_replica_info.pop(model_uid, None)
|
|
899
|
+
self._replica_model_uid_to_worker.pop(
|
|
900
|
+
replica_model_uid, None
|
|
901
|
+
)
|
|
873
902
|
dead_nodes.append(address)
|
|
874
903
|
elif (
|
|
875
904
|
status.failure_remaining_count
|
|
@@ -979,6 +1008,16 @@ class SupervisorActor(xo.StatelessActor):
|
|
|
979
1008
|
|
|
980
1009
|
@log_async(logger=logger)
|
|
981
1010
|
async def remove_worker(self, worker_address: str):
|
|
1011
|
+
uids_to_remove = []
|
|
1012
|
+
for model_uid in self._replica_model_uid_to_worker:
|
|
1013
|
+
if self._replica_model_uid_to_worker[model_uid].address == worker_address:
|
|
1014
|
+
uids_to_remove.append(model_uid)
|
|
1015
|
+
|
|
1016
|
+
for replica_model_uid in uids_to_remove:
|
|
1017
|
+
model_uid, _, _ = parse_replica_model_uid(replica_model_uid)
|
|
1018
|
+
self._model_uid_to_replica_info.pop(model_uid, None)
|
|
1019
|
+
self._replica_model_uid_to_worker.pop(replica_model_uid, None)
|
|
1020
|
+
|
|
982
1021
|
if worker_address in self._worker_address_to_worker:
|
|
983
1022
|
del self._worker_address_to_worker[worker_address]
|
|
984
1023
|
logger.debug("Worker %s has been removed successfully", worker_address)
|
xinference/core/worker.py
CHANGED
|
@@ -36,6 +36,7 @@ from ..core import ModelActor
|
|
|
36
36
|
from ..core.status_guard import LaunchStatus
|
|
37
37
|
from ..device_utils import gpu_count
|
|
38
38
|
from ..model.core import ModelDescription, create_model_instance
|
|
39
|
+
from ..types import PeftModelConfig
|
|
39
40
|
from .event import Event, EventCollectorActor, EventType
|
|
40
41
|
from .metrics import launch_metrics_export_server, record_metrics
|
|
41
42
|
from .resource import gather_node_info
|
|
@@ -195,6 +196,12 @@ class WorkerActor(xo.StatelessActor):
|
|
|
195
196
|
logger.info("Purge cache directory: %s", XINFERENCE_CACHE_DIR)
|
|
196
197
|
purge_dir(XINFERENCE_CACHE_DIR)
|
|
197
198
|
|
|
199
|
+
from ..model.audio import (
|
|
200
|
+
CustomAudioModelFamilyV1,
|
|
201
|
+
get_audio_model_descriptions,
|
|
202
|
+
register_audio,
|
|
203
|
+
unregister_audio,
|
|
204
|
+
)
|
|
198
205
|
from ..model.embedding import (
|
|
199
206
|
CustomEmbeddingModelSpec,
|
|
200
207
|
get_embedding_model_descriptions,
|
|
@@ -223,6 +230,7 @@ class WorkerActor(xo.StatelessActor):
|
|
|
223
230
|
unregister_embedding,
|
|
224
231
|
),
|
|
225
232
|
"rerank": (CustomRerankModelSpec, register_rerank, unregister_rerank),
|
|
233
|
+
"audio": (CustomAudioModelFamilyV1, register_audio, unregister_audio),
|
|
226
234
|
}
|
|
227
235
|
|
|
228
236
|
# record model version
|
|
@@ -231,6 +239,7 @@ class WorkerActor(xo.StatelessActor):
|
|
|
231
239
|
model_version_infos.update(get_embedding_model_descriptions())
|
|
232
240
|
model_version_infos.update(get_rerank_model_descriptions())
|
|
233
241
|
model_version_infos.update(get_image_model_descriptions())
|
|
242
|
+
model_version_infos.update(get_audio_model_descriptions())
|
|
234
243
|
await self._cache_tracker_ref.record_model_version(
|
|
235
244
|
model_version_infos, self.address
|
|
236
245
|
)
|
|
@@ -593,14 +602,12 @@ class WorkerActor(xo.StatelessActor):
|
|
|
593
602
|
self,
|
|
594
603
|
model_uid: str,
|
|
595
604
|
model_name: str,
|
|
596
|
-
model_size_in_billions: Optional[int],
|
|
605
|
+
model_size_in_billions: Optional[Union[int, str]],
|
|
597
606
|
model_format: Optional[str],
|
|
598
607
|
quantization: Optional[str],
|
|
599
608
|
model_type: str = "LLM",
|
|
600
609
|
n_gpu: Optional[Union[int, str]] = "auto",
|
|
601
|
-
|
|
602
|
-
image_lora_load_kwargs: Optional[Dict] = None,
|
|
603
|
-
image_lora_fuse_kwargs: Optional[Dict] = None,
|
|
610
|
+
peft_model_config: Optional[PeftModelConfig] = None,
|
|
604
611
|
request_limits: Optional[int] = None,
|
|
605
612
|
gpu_idx: Optional[Union[int, List[int]]] = None,
|
|
606
613
|
**kwargs,
|
|
@@ -638,7 +645,7 @@ class WorkerActor(xo.StatelessActor):
|
|
|
638
645
|
if isinstance(n_gpu, str) and n_gpu != "auto":
|
|
639
646
|
raise ValueError("Currently `n_gpu` only supports `auto`.")
|
|
640
647
|
|
|
641
|
-
if
|
|
648
|
+
if peft_model_config is not None:
|
|
642
649
|
if model_type in ("embedding", "rerank"):
|
|
643
650
|
raise ValueError(
|
|
644
651
|
f"PEFT adaptors cannot be applied to embedding or rerank models."
|
|
@@ -669,9 +676,7 @@ class WorkerActor(xo.StatelessActor):
|
|
|
669
676
|
model_format,
|
|
670
677
|
model_size_in_billions,
|
|
671
678
|
quantization,
|
|
672
|
-
|
|
673
|
-
image_lora_load_kwargs,
|
|
674
|
-
image_lora_fuse_kwargs,
|
|
679
|
+
peft_model_config,
|
|
675
680
|
is_local_deployment,
|
|
676
681
|
**kwargs,
|
|
677
682
|
)
|
xinference/deploy/cmdline.py
CHANGED
|
@@ -640,10 +640,11 @@ def list_model_registrations(
|
|
|
640
640
|
help='The number of GPUs used by the model, default is "auto".',
|
|
641
641
|
)
|
|
642
642
|
@click.option(
|
|
643
|
-
"--
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
643
|
+
"--lora-modules",
|
|
644
|
+
"-lm",
|
|
645
|
+
multiple=True,
|
|
646
|
+
type=(str, str),
|
|
647
|
+
help="LoRA module configurations in the format name=path. Multiple modules can be specified.",
|
|
647
648
|
)
|
|
648
649
|
@click.option(
|
|
649
650
|
"--image-lora-load-kwargs",
|
|
@@ -696,7 +697,7 @@ def model_launch(
|
|
|
696
697
|
quantization: str,
|
|
697
698
|
replica: int,
|
|
698
699
|
n_gpu: str,
|
|
699
|
-
|
|
700
|
+
lora_modules: Optional[Tuple],
|
|
700
701
|
image_lora_load_kwargs: Optional[Tuple],
|
|
701
702
|
image_lora_fuse_kwargs: Optional[Tuple],
|
|
702
703
|
worker_ip: Optional[str],
|
|
@@ -729,6 +730,18 @@ def model_launch(
|
|
|
729
730
|
else None
|
|
730
731
|
)
|
|
731
732
|
|
|
733
|
+
lora_list = (
|
|
734
|
+
[{"lora_name": k, "local_path": v} for k, v in dict(lora_modules).items()]
|
|
735
|
+
if lora_modules
|
|
736
|
+
else []
|
|
737
|
+
)
|
|
738
|
+
|
|
739
|
+
peft_model_config = {
|
|
740
|
+
"image_lora_load_kwargs": image_lora_load_params,
|
|
741
|
+
"image_lora_fuse_kwargs": image_lora_fuse_params,
|
|
742
|
+
"lora_list": lora_list,
|
|
743
|
+
}
|
|
744
|
+
|
|
732
745
|
_gpu_idx: Optional[List[int]] = (
|
|
733
746
|
None if gpu_idx is None else [int(idx) for idx in gpu_idx.split(",")]
|
|
734
747
|
)
|
|
@@ -736,7 +749,9 @@ def model_launch(
|
|
|
736
749
|
endpoint = get_endpoint(endpoint)
|
|
737
750
|
model_size: Optional[Union[str, int]] = (
|
|
738
751
|
size_in_billions
|
|
739
|
-
if size_in_billions is None
|
|
752
|
+
if size_in_billions is None
|
|
753
|
+
or "_" in size_in_billions
|
|
754
|
+
or "." in size_in_billions
|
|
740
755
|
else int(size_in_billions)
|
|
741
756
|
)
|
|
742
757
|
client = RESTfulClient(base_url=endpoint, api_key=api_key)
|
|
@@ -752,9 +767,7 @@ def model_launch(
|
|
|
752
767
|
quantization=quantization,
|
|
753
768
|
replica=replica,
|
|
754
769
|
n_gpu=_n_gpu,
|
|
755
|
-
|
|
756
|
-
image_lora_load_kwargs=image_lora_load_params,
|
|
757
|
-
image_lora_fuse_kwargs=image_lora_fuse_params,
|
|
770
|
+
peft_model_config=peft_model_config,
|
|
758
771
|
worker_ip=worker_ip,
|
|
759
772
|
gpu_idx=_gpu_idx,
|
|
760
773
|
trust_remote_code=trust_remote_code,
|
|
@@ -16,12 +16,51 @@ import codecs
|
|
|
16
16
|
import json
|
|
17
17
|
import os
|
|
18
18
|
|
|
19
|
-
from .core import
|
|
19
|
+
from .core import (
|
|
20
|
+
AUDIO_MODEL_DESCRIPTIONS,
|
|
21
|
+
MODEL_NAME_TO_REVISION,
|
|
22
|
+
AudioModelFamilyV1,
|
|
23
|
+
generate_audio_description,
|
|
24
|
+
get_audio_model_descriptions,
|
|
25
|
+
get_cache_status,
|
|
26
|
+
)
|
|
27
|
+
from .custom import (
|
|
28
|
+
CustomAudioModelFamilyV1,
|
|
29
|
+
get_user_defined_audios,
|
|
30
|
+
register_audio,
|
|
31
|
+
unregister_audio,
|
|
32
|
+
)
|
|
20
33
|
|
|
21
34
|
_model_spec_json = os.path.join(os.path.dirname(__file__), "model_spec.json")
|
|
22
35
|
BUILTIN_AUDIO_MODELS = dict(
|
|
23
36
|
(spec["model_name"], AudioModelFamilyV1(**spec))
|
|
24
37
|
for spec in json.load(codecs.open(_model_spec_json, "r", encoding="utf-8"))
|
|
25
38
|
)
|
|
39
|
+
for model_name, model_spec in BUILTIN_AUDIO_MODELS.items():
|
|
40
|
+
MODEL_NAME_TO_REVISION[model_name].append(model_spec.model_revision)
|
|
41
|
+
|
|
42
|
+
# register model description after recording model revision
|
|
43
|
+
for model_spec_info in [BUILTIN_AUDIO_MODELS]:
|
|
44
|
+
for model_name, model_spec in model_spec_info.items():
|
|
45
|
+
if model_spec.model_name not in AUDIO_MODEL_DESCRIPTIONS:
|
|
46
|
+
AUDIO_MODEL_DESCRIPTIONS.update(generate_audio_description(model_spec))
|
|
47
|
+
|
|
48
|
+
from ...constants import XINFERENCE_MODEL_DIR
|
|
49
|
+
|
|
50
|
+
# if persist=True, load them when init
|
|
51
|
+
user_defined_audio_dir = os.path.join(XINFERENCE_MODEL_DIR, "audio")
|
|
52
|
+
if os.path.isdir(user_defined_audio_dir):
|
|
53
|
+
for f in os.listdir(user_defined_audio_dir):
|
|
54
|
+
with codecs.open(
|
|
55
|
+
os.path.join(user_defined_audio_dir, f), encoding="utf-8"
|
|
56
|
+
) as fd:
|
|
57
|
+
user_defined_audio_family = CustomAudioModelFamilyV1.parse_obj(
|
|
58
|
+
json.load(fd)
|
|
59
|
+
)
|
|
60
|
+
register_audio(user_defined_audio_family, persist=False)
|
|
61
|
+
|
|
62
|
+
# register model description
|
|
63
|
+
for ud_audio in get_user_defined_audios():
|
|
64
|
+
AUDIO_MODEL_DESCRIPTIONS.update(generate_audio_description(ud_audio))
|
|
26
65
|
|
|
27
66
|
del _model_spec_json
|