xinference 1.5.0.post2__py3-none-any.whl → 1.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +10 -3
- xinference/constants.py +5 -1
- xinference/core/supervisor.py +1 -1
- xinference/core/utils.py +1 -1
- xinference/core/worker.py +2 -2
- xinference/deploy/cmdline.py +17 -0
- xinference/model/audio/core.py +1 -1
- xinference/model/audio/model_spec.json +43 -43
- xinference/model/audio/model_spec_modelscope.json +13 -13
- xinference/model/llm/__init__.py +3 -5
- xinference/model/llm/core.py +14 -0
- xinference/model/llm/llama_cpp/core.py +15 -4
- xinference/model/llm/llm_family.json +3251 -4304
- xinference/model/llm/llm_family.py +62 -6
- xinference/model/llm/llm_family_csghub.json +0 -32
- xinference/model/llm/llm_family_modelscope.json +1161 -1789
- xinference/model/llm/llm_family_openmind_hub.json +19 -325
- xinference/model/llm/lmdeploy/core.py +7 -2
- xinference/model/llm/mlx/core.py +19 -6
- xinference/model/llm/sglang/core.py +25 -10
- xinference/model/llm/transformers/chatglm.py +8 -1
- xinference/model/llm/transformers/cogagent.py +10 -12
- xinference/model/llm/transformers/cogvlm2.py +6 -3
- xinference/model/llm/transformers/cogvlm2_video.py +3 -6
- xinference/model/llm/transformers/core.py +50 -58
- xinference/model/llm/transformers/deepseek_v2.py +4 -2
- xinference/model/llm/transformers/deepseek_vl.py +10 -4
- xinference/model/llm/transformers/deepseek_vl2.py +9 -4
- xinference/model/llm/transformers/gemma3.py +4 -5
- xinference/model/llm/transformers/glm4v.py +2 -20
- xinference/model/llm/transformers/glm_edge_v.py +3 -20
- xinference/model/llm/transformers/intern_vl.py +3 -6
- xinference/model/llm/transformers/internlm2.py +1 -1
- xinference/model/llm/transformers/minicpmv25.py +4 -2
- xinference/model/llm/transformers/minicpmv26.py +5 -3
- xinference/model/llm/transformers/omnilmm.py +1 -1
- xinference/model/llm/transformers/opt.py +1 -1
- xinference/model/llm/transformers/ovis2.py +302 -0
- xinference/model/llm/transformers/qwen-omni.py +2 -1
- xinference/model/llm/transformers/qwen2_audio.py +3 -1
- xinference/model/llm/transformers/qwen2_vl.py +5 -1
- xinference/model/llm/transformers/qwen_vl.py +5 -2
- xinference/model/llm/utils.py +28 -0
- xinference/model/llm/vllm/core.py +73 -9
- xinference/model/llm/vllm/distributed_executor.py +8 -7
- xinference/model/llm/vllm/xavier/allocator.py +1 -1
- xinference/model/llm/vllm/xavier/block_manager.py +1 -1
- xinference/model/llm/vllm/xavier/block_tracker.py +3 -3
- xinference/model/llm/vllm/xavier/executor.py +1 -1
- xinference/model/llm/vllm/xavier/test/test_xavier.py +1 -1
- xinference/model/video/diffusers.py +30 -3
- xinference/model/video/model_spec.json +46 -0
- xinference/model/video/model_spec_modelscope.json +48 -0
- xinference/types.py +2 -0
- xinference/web/ui/build/asset-manifest.json +6 -6
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/css/{main.0f6523be.css → main.337afe76.css} +2 -2
- xinference/web/ui/build/static/css/main.337afe76.css.map +1 -0
- xinference/web/ui/build/static/js/main.91e77b5c.js +3 -0
- xinference/web/ui/build/static/js/main.91e77b5c.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/5c08e2cd07809ed3e41486b16652253404cbb63a3ff8d0366ee50f57e2413cea.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/5e6edb0fb87e3798f142e9abf8dd2dc46bab33a60d31dff525797c0c99887097.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/6087820be1bd5c02c42dff797e7df365448ef35ab26dd5d6bd33e967e05cbfd4.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/6798e126f3bc5f95a4c16a9c2ad52ffe77970c62406d83e20604dfda7ffd2247.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/b617f7d21a95045fc57b26a9373551740f1978a826134cbf705c3a1bf8714a93.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c1506cb142151366074975f30fa1ff9cd6e5e978b62a4b074dfc16fe08d70d75.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c5c7c2cd1b863ce41adff2c4737bba06eef3a1acf28288cb83d992060f6b8923.json +1 -0
- xinference/web/ui/src/locales/en.json +1 -0
- xinference/web/ui/src/locales/zh.json +1 -0
- {xinference-1.5.0.post2.dist-info → xinference-1.5.1.dist-info}/METADATA +1 -1
- {xinference-1.5.0.post2.dist-info → xinference-1.5.1.dist-info}/RECORD +77 -78
- {xinference-1.5.0.post2.dist-info → xinference-1.5.1.dist-info}/WHEEL +1 -1
- xinference/model/llm/transformers/compression.py +0 -258
- xinference/model/llm/transformers/yi_vl.py +0 -239
- xinference/web/ui/build/static/css/main.0f6523be.css.map +0 -1
- xinference/web/ui/build/static/js/main.4b67a723.js +0 -3
- xinference/web/ui/build/static/js/main.4b67a723.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/51709f5d3e53bcf19e613662ef9b91fb9174942c5518987a248348dd4e1e0e02.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/8f9af2979e45d4648f0cfae108363e58ee421c29a9d4e7329b6f06d9adfd4133.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/9c8b1a86e7c65b2b2599a205e30920652d6c2105f926508ef5bcf29a3ef4ce76.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/b8551e9775a01b28ae674125c688febe763732ea969ae344512e64ea01bf632e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/e4ba658c6b3b0490910acdae0c535a892257efb61539a24adf8038fc653bd22f.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/efe7cd132c27a8f9fd5352a394c491fd5fb0da0348cf9fcbd923164a32365eab.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f199e8173f6409a5802ed44acb95f218388131136504b2e9132129e150c92f9a.json +0 -1
- /xinference/web/ui/build/static/js/{main.4b67a723.js.LICENSE.txt → main.91e77b5c.js.LICENSE.txt} +0 -0
- {xinference-1.5.0.post2.dist-info → xinference-1.5.1.dist-info}/entry_points.txt +0 -0
- {xinference-1.5.0.post2.dist-info → xinference-1.5.1.dist-info}/licenses/LICENSE +0 -0
- {xinference-1.5.0.post2.dist-info → xinference-1.5.1.dist-info}/top_level.txt +0 -0
xinference/_version.py
CHANGED
|
@@ -8,11 +8,11 @@ import json
|
|
|
8
8
|
|
|
9
9
|
version_json = '''
|
|
10
10
|
{
|
|
11
|
-
"date": "2025-04-
|
|
11
|
+
"date": "2025-04-30T21:28:49+0800",
|
|
12
12
|
"dirty": false,
|
|
13
13
|
"error": null,
|
|
14
|
-
"full-revisionid": "
|
|
15
|
-
"version": "1.5.
|
|
14
|
+
"full-revisionid": "1c11c609971e5a5095ce8be73f0e1bba04a3132f",
|
|
15
|
+
"version": "1.5.1"
|
|
16
16
|
}
|
|
17
17
|
''' # END VERSION_JSON
|
|
18
18
|
|
xinference/api/restful_api.py
CHANGED
|
@@ -56,6 +56,7 @@ from ..constants import (
|
|
|
56
56
|
XINFERENCE_DEFAULT_CANCEL_BLOCK_DURATION,
|
|
57
57
|
XINFERENCE_DEFAULT_ENDPOINT_PORT,
|
|
58
58
|
XINFERENCE_DISABLE_METRICS,
|
|
59
|
+
XINFERENCE_SSE_PING_ATTEMPTS_SECONDS,
|
|
59
60
|
)
|
|
60
61
|
from ..core.event import Event, EventCollectorActor, EventType
|
|
61
62
|
from ..core.supervisor import SupervisorActor
|
|
@@ -1338,7 +1339,9 @@ class RESTfulAPI(CancelMixin):
|
|
|
1338
1339
|
finally:
|
|
1339
1340
|
await model.decrease_serve_count()
|
|
1340
1341
|
|
|
1341
|
-
return EventSourceResponse(
|
|
1342
|
+
return EventSourceResponse(
|
|
1343
|
+
stream_results(), ping=XINFERENCE_SSE_PING_ATTEMPTS_SECONDS
|
|
1344
|
+
)
|
|
1342
1345
|
else:
|
|
1343
1346
|
try:
|
|
1344
1347
|
data = await model.generate(body.prompt, kwargs, raw_params=raw_kwargs)
|
|
@@ -1606,7 +1609,9 @@ class RESTfulAPI(CancelMixin):
|
|
|
1606
1609
|
await model.decrease_serve_count()
|
|
1607
1610
|
|
|
1608
1611
|
return EventSourceResponse(
|
|
1609
|
-
media_type="application/octet-stream",
|
|
1612
|
+
media_type="application/octet-stream",
|
|
1613
|
+
content=stream_results(),
|
|
1614
|
+
ping=XINFERENCE_SSE_PING_ATTEMPTS_SECONDS,
|
|
1610
1615
|
)
|
|
1611
1616
|
else:
|
|
1612
1617
|
return Response(media_type="application/octet-stream", content=out)
|
|
@@ -2122,7 +2127,9 @@ class RESTfulAPI(CancelMixin):
|
|
|
2122
2127
|
finally:
|
|
2123
2128
|
await model.decrease_serve_count()
|
|
2124
2129
|
|
|
2125
|
-
return EventSourceResponse(
|
|
2130
|
+
return EventSourceResponse(
|
|
2131
|
+
stream_results(), ping=XINFERENCE_SSE_PING_ATTEMPTS_SECONDS
|
|
2132
|
+
)
|
|
2126
2133
|
else:
|
|
2127
2134
|
try:
|
|
2128
2135
|
data = await model.chat(
|
xinference/constants.py
CHANGED
|
@@ -29,7 +29,8 @@ XINFERENCE_ENV_DISABLE_HEALTH_CHECK = "XINFERENCE_DISABLE_HEALTH_CHECK"
|
|
|
29
29
|
XINFERENCE_ENV_DISABLE_METRICS = "XINFERENCE_DISABLE_METRICS"
|
|
30
30
|
XINFERENCE_ENV_DOWNLOAD_MAX_ATTEMPTS = "XINFERENCE_DOWNLOAD_MAX_ATTEMPTS"
|
|
31
31
|
XINFERENCE_ENV_TEXT_TO_IMAGE_BATCHING_SIZE = "XINFERENCE_TEXT_TO_IMAGE_BATCHING_SIZE"
|
|
32
|
-
XINFERENCE_ENV_VIRTUAL_ENV = "
|
|
32
|
+
XINFERENCE_ENV_VIRTUAL_ENV = "XINFERENCE_ENABLE_VIRTUAL_ENV"
|
|
33
|
+
XINFERENCE_ENV_SSE_PING_ATTEMPTS_SECONDS = "XINFERENCE_SSE_PING_ATTEMPTS_SECONDS"
|
|
33
34
|
|
|
34
35
|
|
|
35
36
|
def get_xinference_home() -> str:
|
|
@@ -89,6 +90,9 @@ XINFERENCE_DOWNLOAD_MAX_ATTEMPTS = int(
|
|
|
89
90
|
XINFERENCE_TEXT_TO_IMAGE_BATCHING_SIZE = os.environ.get(
|
|
90
91
|
XINFERENCE_ENV_TEXT_TO_IMAGE_BATCHING_SIZE, None
|
|
91
92
|
)
|
|
93
|
+
XINFERENCE_SSE_PING_ATTEMPTS_SECONDS = int(
|
|
94
|
+
os.environ.get(XINFERENCE_ENV_SSE_PING_ATTEMPTS_SECONDS, 600)
|
|
95
|
+
)
|
|
92
96
|
XINFERENCE_LAUNCH_MODEL_RETRY = 3
|
|
93
97
|
XINFERENCE_DEFAULT_CANCEL_BLOCK_DURATION = 30
|
|
94
98
|
XINFERENCE_ENABLE_VIRTUAL_ENV = bool(int(os.getenv(XINFERENCE_ENV_VIRTUAL_ENV, "0")))
|
xinference/core/supervisor.py
CHANGED
|
@@ -1102,8 +1102,8 @@ class SupervisorActor(xo.StatelessActor):
|
|
|
1102
1102
|
xavier_config=xavier_config,
|
|
1103
1103
|
**kwargs,
|
|
1104
1104
|
)
|
|
1105
|
-
await worker_ref.wait_for_load(_replica_model_uid)
|
|
1106
1105
|
self._replica_model_uid_to_worker[_replica_model_uid] = worker_ref
|
|
1106
|
+
await worker_ref.wait_for_load(_replica_model_uid)
|
|
1107
1107
|
return subpool_address
|
|
1108
1108
|
|
|
1109
1109
|
async def _launch_model():
|
xinference/core/utils.py
CHANGED
|
@@ -263,7 +263,7 @@ class CancelMixin:
|
|
|
263
263
|
_CANCEL_TASK_NAME = "abort_block"
|
|
264
264
|
|
|
265
265
|
def __init__(self):
|
|
266
|
-
self._running_tasks: weakref.WeakValueDictionary[
|
|
266
|
+
self._running_tasks: weakref.WeakValueDictionary[ # type: ignore
|
|
267
267
|
str, asyncio.Task
|
|
268
268
|
] = weakref.WeakValueDictionary()
|
|
269
269
|
|
xinference/core/worker.py
CHANGED
|
@@ -787,9 +787,9 @@ class WorkerActor(xo.StatelessActor):
|
|
|
787
787
|
elif model_type == "image":
|
|
788
788
|
return model.model_ability
|
|
789
789
|
elif model_type == "audio":
|
|
790
|
-
return
|
|
790
|
+
return model.model_ability
|
|
791
791
|
elif model_type == "video":
|
|
792
|
-
return
|
|
792
|
+
return model.model_ability
|
|
793
793
|
elif model_type == "flexible":
|
|
794
794
|
return ["flexible"]
|
|
795
795
|
else:
|
xinference/deploy/cmdline.py
CHANGED
|
@@ -805,6 +805,14 @@ def remove_cache(
|
|
|
805
805
|
type=(str, str),
|
|
806
806
|
multiple=True,
|
|
807
807
|
)
|
|
808
|
+
@click.option(
|
|
809
|
+
"--quantization-config",
|
|
810
|
+
"-qc",
|
|
811
|
+
"quantization_config",
|
|
812
|
+
type=(str, str),
|
|
813
|
+
multiple=True,
|
|
814
|
+
help="bnb quantization config for `transformers` engine.",
|
|
815
|
+
)
|
|
808
816
|
@click.option(
|
|
809
817
|
"--worker-ip",
|
|
810
818
|
default=None,
|
|
@@ -853,6 +861,7 @@ def model_launch(
|
|
|
853
861
|
trust_remote_code: bool,
|
|
854
862
|
api_key: Optional[str],
|
|
855
863
|
model_path: Optional[str],
|
|
864
|
+
quantization_config: Optional[Tuple],
|
|
856
865
|
):
|
|
857
866
|
kwargs = {}
|
|
858
867
|
for i in range(0, len(ctx.args), 2):
|
|
@@ -884,6 +893,12 @@ def model_launch(
|
|
|
884
893
|
else:
|
|
885
894
|
_n_gpu = int(n_gpu)
|
|
886
895
|
|
|
896
|
+
bnb_quantization_config = (
|
|
897
|
+
{k: handle_click_args_type(v) for k, v in dict(quantization_config).items()}
|
|
898
|
+
if quantization_config
|
|
899
|
+
else None
|
|
900
|
+
)
|
|
901
|
+
|
|
887
902
|
image_lora_load_params = (
|
|
888
903
|
{k: handle_click_args_type(v) for k, v in dict(image_lora_load_kwargs).items()}
|
|
889
904
|
if image_lora_load_kwargs
|
|
@@ -929,6 +944,8 @@ def model_launch(
|
|
|
929
944
|
|
|
930
945
|
# do not wait for launching.
|
|
931
946
|
kwargs["wait_ready"] = False
|
|
947
|
+
if bnb_quantization_config:
|
|
948
|
+
kwargs["quantization_config"] = {**bnb_quantization_config}
|
|
932
949
|
|
|
933
950
|
model_uid = client.launch_model(
|
|
934
951
|
model_name=model_name,
|
xinference/model/audio/core.py
CHANGED
|
@@ -52,7 +52,7 @@ class AudioModelFamilyV1(CacheableModelSpec):
|
|
|
52
52
|
model_revision: Optional[str]
|
|
53
53
|
multilingual: bool
|
|
54
54
|
language: Optional[str]
|
|
55
|
-
model_ability: Optional[str]
|
|
55
|
+
model_ability: Optional[List[str]]
|
|
56
56
|
default_model_config: Optional[Dict[str, Any]]
|
|
57
57
|
default_transcription_config: Optional[Dict[str, Any]]
|
|
58
58
|
engine: Optional[str]
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
"model_family": "whisper",
|
|
5
5
|
"model_id": "openai/whisper-tiny",
|
|
6
6
|
"model_revision": "167c219b21f11ef214220b8fdb7536b8a88c2475",
|
|
7
|
-
"model_ability": "
|
|
7
|
+
"model_ability": ["audio2text"],
|
|
8
8
|
"multilingual": true
|
|
9
9
|
},
|
|
10
10
|
{
|
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
"model_family": "whisper",
|
|
13
13
|
"model_id": "openai/whisper-tiny.en",
|
|
14
14
|
"model_revision": "87c7102498dcde7456f24cfd30239ca606ed9063",
|
|
15
|
-
"model_ability": "
|
|
15
|
+
"model_ability": ["audio2text"],
|
|
16
16
|
"multilingual": false
|
|
17
17
|
},
|
|
18
18
|
{
|
|
@@ -20,7 +20,7 @@
|
|
|
20
20
|
"model_family": "whisper",
|
|
21
21
|
"model_id": "openai/whisper-base",
|
|
22
22
|
"model_revision": "8c1db9b51951100007a96a525d83a8ec81b3c237",
|
|
23
|
-
"model_ability": "
|
|
23
|
+
"model_ability": ["audio2text"],
|
|
24
24
|
"multilingual": true
|
|
25
25
|
},
|
|
26
26
|
{
|
|
@@ -28,7 +28,7 @@
|
|
|
28
28
|
"model_family": "whisper",
|
|
29
29
|
"model_id": "openai/whisper-base.en",
|
|
30
30
|
"model_revision": "911407f4214e0e1d82085af863093ec0b66f9cd6",
|
|
31
|
-
"model_ability": "
|
|
31
|
+
"model_ability": ["audio2text"],
|
|
32
32
|
"multilingual": false
|
|
33
33
|
},
|
|
34
34
|
{
|
|
@@ -36,7 +36,7 @@
|
|
|
36
36
|
"model_family": "whisper",
|
|
37
37
|
"model_id": "openai/whisper-small",
|
|
38
38
|
"model_revision": "998cb1a777c20db53d6033a61b977ed4c3792cac",
|
|
39
|
-
"model_ability": "
|
|
39
|
+
"model_ability": ["audio2text"],
|
|
40
40
|
"multilingual": true
|
|
41
41
|
},
|
|
42
42
|
{
|
|
@@ -44,7 +44,7 @@
|
|
|
44
44
|
"model_family": "whisper",
|
|
45
45
|
"model_id": "openai/whisper-small.en",
|
|
46
46
|
"model_revision": "e8727524f962ee844a7319d92be39ac1bd25655a",
|
|
47
|
-
"model_ability": "
|
|
47
|
+
"model_ability": ["audio2text"],
|
|
48
48
|
"multilingual": false
|
|
49
49
|
},
|
|
50
50
|
{
|
|
@@ -52,7 +52,7 @@
|
|
|
52
52
|
"model_family": "whisper",
|
|
53
53
|
"model_id": "openai/whisper-medium",
|
|
54
54
|
"model_revision": "16688beb1294bedd0a6f5cd86fe7eec57bce41ed",
|
|
55
|
-
"model_ability": "
|
|
55
|
+
"model_ability": ["audio2text"],
|
|
56
56
|
"multilingual": true
|
|
57
57
|
},
|
|
58
58
|
{
|
|
@@ -60,7 +60,7 @@
|
|
|
60
60
|
"model_family": "whisper",
|
|
61
61
|
"model_id": "openai/whisper-medium.en",
|
|
62
62
|
"model_revision": "2e98eb6279edf5095af0c8dedb36bdec0acd172b",
|
|
63
|
-
"model_ability": "
|
|
63
|
+
"model_ability": ["audio2text"],
|
|
64
64
|
"multilingual": false
|
|
65
65
|
},
|
|
66
66
|
{
|
|
@@ -68,7 +68,7 @@
|
|
|
68
68
|
"model_family": "whisper",
|
|
69
69
|
"model_id": "openai/whisper-large-v3",
|
|
70
70
|
"model_revision": "6cdf07a7e3ec3806e5d55f787915b85d4cd020b1",
|
|
71
|
-
"model_ability": "
|
|
71
|
+
"model_ability": ["audio2text"],
|
|
72
72
|
"multilingual": true
|
|
73
73
|
},
|
|
74
74
|
{
|
|
@@ -76,7 +76,7 @@
|
|
|
76
76
|
"model_family": "whisper",
|
|
77
77
|
"model_id": "openai/whisper-large-v3-turbo",
|
|
78
78
|
"model_revision": "41f01f3fe87f28c78e2fbf8b568835947dd65ed9",
|
|
79
|
-
"model_ability": "
|
|
79
|
+
"model_ability": ["audio2text"],
|
|
80
80
|
"multilingual": true
|
|
81
81
|
},
|
|
82
82
|
{
|
|
@@ -84,7 +84,7 @@
|
|
|
84
84
|
"model_family": "whisper",
|
|
85
85
|
"model_id": "BELLE-2/Belle-distilwhisper-large-v2-zh",
|
|
86
86
|
"model_revision": "ed25d13498fa5bac758b2fc479435b698532dfe8",
|
|
87
|
-
"model_ability": "
|
|
87
|
+
"model_ability": ["audio2text"],
|
|
88
88
|
"multilingual": false
|
|
89
89
|
},
|
|
90
90
|
{
|
|
@@ -92,7 +92,7 @@
|
|
|
92
92
|
"model_family": "whisper",
|
|
93
93
|
"model_id": "BELLE-2/Belle-whisper-large-v2-zh",
|
|
94
94
|
"model_revision": "ec5bd5d78598545b7585814edde86dac2002b5b9",
|
|
95
|
-
"model_ability": "
|
|
95
|
+
"model_ability": ["audio2text"],
|
|
96
96
|
"multilingual": false
|
|
97
97
|
},
|
|
98
98
|
{
|
|
@@ -100,14 +100,14 @@
|
|
|
100
100
|
"model_family": "whisper",
|
|
101
101
|
"model_id": "BELLE-2/Belle-whisper-large-v3-zh",
|
|
102
102
|
"model_revision": "3bebc7247696b39f5ab9ed22db426943ac33f600",
|
|
103
|
-
"model_ability": "
|
|
103
|
+
"model_ability": ["audio2text"],
|
|
104
104
|
"multilingual": false
|
|
105
105
|
},
|
|
106
106
|
{
|
|
107
107
|
"model_name": "whisper-tiny-mlx",
|
|
108
108
|
"model_family": "whisper",
|
|
109
109
|
"model_id": "mlx-community/whisper-tiny",
|
|
110
|
-
"model_ability": "
|
|
110
|
+
"model_ability": ["audio2text"],
|
|
111
111
|
"multilingual": true,
|
|
112
112
|
"engine": "mlx"
|
|
113
113
|
},
|
|
@@ -115,7 +115,7 @@
|
|
|
115
115
|
"model_name": "whisper-tiny.en-mlx",
|
|
116
116
|
"model_family": "whisper",
|
|
117
117
|
"model_id": "mlx-community/whisper-tiny.en-mlx",
|
|
118
|
-
"model_ability": "
|
|
118
|
+
"model_ability": ["audio2text"],
|
|
119
119
|
"multilingual": false,
|
|
120
120
|
"engine": "mlx"
|
|
121
121
|
},
|
|
@@ -123,7 +123,7 @@
|
|
|
123
123
|
"model_name": "whisper-base-mlx",
|
|
124
124
|
"model_family": "whisper",
|
|
125
125
|
"model_id": "mlx-community/whisper-base-mlx",
|
|
126
|
-
"model_ability": "
|
|
126
|
+
"model_ability": ["audio2text"],
|
|
127
127
|
"multilingual": true,
|
|
128
128
|
"engine": "mlx"
|
|
129
129
|
},
|
|
@@ -131,7 +131,7 @@
|
|
|
131
131
|
"model_name": "whisper-base.en-mlx",
|
|
132
132
|
"model_family": "whisper",
|
|
133
133
|
"model_id": "mlx-community/whisper-base.en-mlx",
|
|
134
|
-
"model_ability": "
|
|
134
|
+
"model_ability": ["audio2text"],
|
|
135
135
|
"multilingual": false,
|
|
136
136
|
"engine": "mlx"
|
|
137
137
|
},
|
|
@@ -139,7 +139,7 @@
|
|
|
139
139
|
"model_name": "whisper-small-mlx",
|
|
140
140
|
"model_family": "whisper",
|
|
141
141
|
"model_id": "mlx-community/whisper-small-mlx",
|
|
142
|
-
"model_ability": "
|
|
142
|
+
"model_ability": ["audio2text"],
|
|
143
143
|
"multilingual": true,
|
|
144
144
|
"engine": "mlx"
|
|
145
145
|
},
|
|
@@ -147,7 +147,7 @@
|
|
|
147
147
|
"model_name": "whisper-small.en-mlx",
|
|
148
148
|
"model_family": "whisper",
|
|
149
149
|
"model_id": "mlx-community/whisper-small.en-mlx",
|
|
150
|
-
"model_ability": "
|
|
150
|
+
"model_ability": ["audio2text"],
|
|
151
151
|
"multilingual": false,
|
|
152
152
|
"engine": "mlx"
|
|
153
153
|
},
|
|
@@ -155,7 +155,7 @@
|
|
|
155
155
|
"model_name": "whisper-medium-mlx",
|
|
156
156
|
"model_family": "whisper",
|
|
157
157
|
"model_id": "mlx-community/whisper-medium-mlx",
|
|
158
|
-
"model_ability": "
|
|
158
|
+
"model_ability": ["audio2text"],
|
|
159
159
|
"multilingual": true,
|
|
160
160
|
"engine": "mlx"
|
|
161
161
|
},
|
|
@@ -163,7 +163,7 @@
|
|
|
163
163
|
"model_name": "whisper-medium.en-mlx",
|
|
164
164
|
"model_family": "whisper",
|
|
165
165
|
"model_id": "mlx-community/whisper-medium.en-mlx",
|
|
166
|
-
"model_ability": "
|
|
166
|
+
"model_ability": ["audio2text"],
|
|
167
167
|
"multilingual": false,
|
|
168
168
|
"engine": "mlx"
|
|
169
169
|
},
|
|
@@ -171,7 +171,7 @@
|
|
|
171
171
|
"model_name": "whisper-large-v3-mlx",
|
|
172
172
|
"model_family": "whisper",
|
|
173
173
|
"model_id": "mlx-community/whisper-large-v3-mlx",
|
|
174
|
-
"model_ability": "
|
|
174
|
+
"model_ability": ["audio2text"],
|
|
175
175
|
"multilingual": true,
|
|
176
176
|
"engine": "mlx"
|
|
177
177
|
},
|
|
@@ -179,7 +179,7 @@
|
|
|
179
179
|
"model_name": "whisper-large-v3-turbo-mlx",
|
|
180
180
|
"model_family": "whisper",
|
|
181
181
|
"model_id": "mlx-community/whisper-large-v3-turbo",
|
|
182
|
-
"model_ability": "
|
|
182
|
+
"model_ability": ["audio2text"],
|
|
183
183
|
"multilingual": true,
|
|
184
184
|
"engine": "mlx"
|
|
185
185
|
},
|
|
@@ -188,7 +188,7 @@
|
|
|
188
188
|
"model_family": "funasr",
|
|
189
189
|
"model_id": "FunAudioLLM/SenseVoiceSmall",
|
|
190
190
|
"model_revision": "3eb3b4eeffc2f2dde6051b853983753db33e35c3",
|
|
191
|
-
"model_ability": "
|
|
191
|
+
"model_ability": ["audio2text"],
|
|
192
192
|
"multilingual": true,
|
|
193
193
|
"default_model_config": {
|
|
194
194
|
"vad_model": "fsmn-vad",
|
|
@@ -208,7 +208,7 @@
|
|
|
208
208
|
"model_family": "funasr",
|
|
209
209
|
"model_id": "funasr/paraformer-zh",
|
|
210
210
|
"model_revision": "5ed094cdfc8f6a9b6b022bd08bc904ef862bc79e",
|
|
211
|
-
"model_ability": "
|
|
211
|
+
"model_ability": ["audio2text"],
|
|
212
212
|
"multilingual": false,
|
|
213
213
|
"default_model_config": {
|
|
214
214
|
"vad_model": "fsmn-vad",
|
|
@@ -223,7 +223,7 @@
|
|
|
223
223
|
"model_family": "ChatTTS",
|
|
224
224
|
"model_id": "2Noise/ChatTTS",
|
|
225
225
|
"model_revision": "1a3c04a8b0651689bd9242fbb55b1f4b5a9aef84",
|
|
226
|
-
"model_ability": "
|
|
226
|
+
"model_ability": ["text2audio"],
|
|
227
227
|
"multilingual": true
|
|
228
228
|
},
|
|
229
229
|
{
|
|
@@ -231,7 +231,7 @@
|
|
|
231
231
|
"model_family": "CosyVoice",
|
|
232
232
|
"model_id": "FunAudioLLM/CosyVoice-300M",
|
|
233
233
|
"model_revision": "39c4e13d46bd4dfb840d214547623e5fcd2428e2",
|
|
234
|
-
"model_ability": "
|
|
234
|
+
"model_ability": ["text2audio"],
|
|
235
235
|
"multilingual": true
|
|
236
236
|
},
|
|
237
237
|
{
|
|
@@ -239,7 +239,7 @@
|
|
|
239
239
|
"model_family": "CosyVoice",
|
|
240
240
|
"model_id": "FunAudioLLM/CosyVoice-300M-SFT",
|
|
241
241
|
"model_revision": "096a5cff8d497fabb3dec2756a200f3688457a1b",
|
|
242
|
-
"model_ability": "
|
|
242
|
+
"model_ability": ["text2audio"],
|
|
243
243
|
"multilingual": true
|
|
244
244
|
},
|
|
245
245
|
{
|
|
@@ -247,7 +247,7 @@
|
|
|
247
247
|
"model_family": "CosyVoice",
|
|
248
248
|
"model_id": "FunAudioLLM/CosyVoice-300M-Instruct",
|
|
249
249
|
"model_revision": "ba5265d9a3169c1fedce145122c9dd4bc24e062c",
|
|
250
|
-
"model_ability": "
|
|
250
|
+
"model_ability": ["text2audio"],
|
|
251
251
|
"multilingual": true
|
|
252
252
|
},
|
|
253
253
|
{
|
|
@@ -255,7 +255,7 @@
|
|
|
255
255
|
"model_family": "CosyVoice",
|
|
256
256
|
"model_id": "mrfakename/CosyVoice2-0.5B",
|
|
257
257
|
"model_revision": "5676baabc8a76dc93ef60a88bbd2420deaa2f644",
|
|
258
|
-
"model_ability": "
|
|
258
|
+
"model_ability": ["text2audio"],
|
|
259
259
|
"multilingual": true
|
|
260
260
|
},
|
|
261
261
|
{
|
|
@@ -263,7 +263,7 @@
|
|
|
263
263
|
"model_family": "FishAudio",
|
|
264
264
|
"model_id": "fishaudio/fish-speech-1.5",
|
|
265
265
|
"model_revision": "268b6ec86243dd683bc78dab7e9a6cedf9191f2a",
|
|
266
|
-
"model_ability": "
|
|
266
|
+
"model_ability": ["text2audio"],
|
|
267
267
|
"multilingual": true
|
|
268
268
|
},
|
|
269
269
|
{
|
|
@@ -271,7 +271,7 @@
|
|
|
271
271
|
"model_family": "F5-TTS",
|
|
272
272
|
"model_id": "SWivid/F5-TTS",
|
|
273
273
|
"model_revision": "4dcc16f297f2ff98a17b3726b16f5de5a5e45672",
|
|
274
|
-
"model_ability": "
|
|
274
|
+
"model_ability": ["text2audio"],
|
|
275
275
|
"multilingual": true
|
|
276
276
|
},
|
|
277
277
|
{
|
|
@@ -279,7 +279,7 @@
|
|
|
279
279
|
"model_family": "F5-TTS-MLX",
|
|
280
280
|
"model_id": "lucasnewman/f5-tts-mlx",
|
|
281
281
|
"model_revision": "7642bb232e3fcacf92c51c786edebb8624da6b93",
|
|
282
|
-
"model_ability": "
|
|
282
|
+
"model_ability": ["text2audio"],
|
|
283
283
|
"multilingual": true
|
|
284
284
|
},
|
|
285
285
|
{
|
|
@@ -287,7 +287,7 @@
|
|
|
287
287
|
"model_family": "MeloTTS",
|
|
288
288
|
"model_id": "myshell-ai/MeloTTS-English",
|
|
289
289
|
"model_revision": "bb4fb7346d566d277ba8c8c7dbfdf6786139b8ef",
|
|
290
|
-
"model_ability": "
|
|
290
|
+
"model_ability": ["text2audio"],
|
|
291
291
|
"multilingual": false,
|
|
292
292
|
"language": "EN"
|
|
293
293
|
},
|
|
@@ -296,7 +296,7 @@
|
|
|
296
296
|
"model_family": "MeloTTS",
|
|
297
297
|
"model_id": "myshell-ai/MeloTTS-English-v2",
|
|
298
298
|
"model_revision": "a53e3509c4ee4ff16d79272feb2474ff864e18f3",
|
|
299
|
-
"model_ability": "
|
|
299
|
+
"model_ability": ["text2audio"],
|
|
300
300
|
"multilingual": false,
|
|
301
301
|
"language": "EN"
|
|
302
302
|
},
|
|
@@ -305,7 +305,7 @@
|
|
|
305
305
|
"model_family": "MeloTTS",
|
|
306
306
|
"model_id": "myshell-ai/MeloTTS-English-v3",
|
|
307
307
|
"model_revision": "f7c4a35392c0e9be24a755f1edb4c3f63040f759",
|
|
308
|
-
"model_ability": "
|
|
308
|
+
"model_ability": ["text2audio"],
|
|
309
309
|
"multilingual": false,
|
|
310
310
|
"language": "EN"
|
|
311
311
|
},
|
|
@@ -314,7 +314,7 @@
|
|
|
314
314
|
"model_family": "MeloTTS",
|
|
315
315
|
"model_id": "myshell-ai/MeloTTS-French",
|
|
316
316
|
"model_revision": "1e9bf590262392d8bffb679b0a3b0c16b0f9fdaf",
|
|
317
|
-
"model_ability": "
|
|
317
|
+
"model_ability": ["text2audio"],
|
|
318
318
|
"multilingual": false,
|
|
319
319
|
"language": "FR"
|
|
320
320
|
},
|
|
@@ -323,7 +323,7 @@
|
|
|
323
323
|
"model_family": "MeloTTS",
|
|
324
324
|
"model_id": "myshell-ai/MeloTTS-Japanese",
|
|
325
325
|
"model_revision": "367f8795464b531b4e97c1515bddfc1243e60891",
|
|
326
|
-
"model_ability": "
|
|
326
|
+
"model_ability": ["text2audio"],
|
|
327
327
|
"multilingual": false,
|
|
328
328
|
"language": "JP"
|
|
329
329
|
},
|
|
@@ -332,7 +332,7 @@
|
|
|
332
332
|
"model_family": "MeloTTS",
|
|
333
333
|
"model_id": "myshell-ai/MeloTTS-Spanish",
|
|
334
334
|
"model_revision": "dbb5496df39d11a66c1d5f5a9ca357c3c9fb95fb",
|
|
335
|
-
"model_ability": "
|
|
335
|
+
"model_ability": ["text2audio"],
|
|
336
336
|
"multilingual": false,
|
|
337
337
|
"language": "ES"
|
|
338
338
|
},
|
|
@@ -341,7 +341,7 @@
|
|
|
341
341
|
"model_family": "MeloTTS",
|
|
342
342
|
"model_id": "myshell-ai/MeloTTS-Chinese",
|
|
343
343
|
"model_revision": "af5d207a364ea4208c6f589c89f57f88414bdd16",
|
|
344
|
-
"model_ability": "
|
|
344
|
+
"model_ability": ["text2audio"],
|
|
345
345
|
"multilingual": false,
|
|
346
346
|
"language": "ZH"
|
|
347
347
|
},
|
|
@@ -350,7 +350,7 @@
|
|
|
350
350
|
"model_family": "MeloTTS",
|
|
351
351
|
"model_id": "myshell-ai/MeloTTS-Korean",
|
|
352
352
|
"model_revision": "0207e5adfc90129a51b6b03d89be6d84360ed323",
|
|
353
|
-
"model_ability": "
|
|
353
|
+
"model_ability": ["text2audio"],
|
|
354
354
|
"multilingual": false,
|
|
355
355
|
"language": "KR"
|
|
356
356
|
},
|
|
@@ -359,7 +359,7 @@
|
|
|
359
359
|
"model_family": "Kokoro",
|
|
360
360
|
"model_id": "hexgrad/Kokoro-82M",
|
|
361
361
|
"model_revision": "7884269d6fd3f9beabc271b6f1308e5699281fa9",
|
|
362
|
-
"model_ability": "
|
|
362
|
+
"model_ability": ["text2audio"],
|
|
363
363
|
"multilingual": true
|
|
364
364
|
},
|
|
365
365
|
{
|
|
@@ -367,7 +367,7 @@
|
|
|
367
367
|
"model_family": "MegaTTS",
|
|
368
368
|
"model_id": "ByteDance/MegaTTS3",
|
|
369
369
|
"model_revision": "409a7002b006d80f0730fca6f80441b08c10e738",
|
|
370
|
-
"model_ability": "
|
|
370
|
+
"model_ability": ["text2audio"],
|
|
371
371
|
"multilingual": true
|
|
372
372
|
}
|
|
373
373
|
]
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"model_hub": "modelscope",
|
|
6
6
|
"model_id": "AI-ModelScope/whisper-large-v3",
|
|
7
7
|
"model_revision": "master",
|
|
8
|
-
"model_ability": "
|
|
8
|
+
"model_ability": ["audio2text"],
|
|
9
9
|
"multilingual": true
|
|
10
10
|
},
|
|
11
11
|
{
|
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
"model_hub": "modelscope",
|
|
15
15
|
"model_id": "AI-ModelScope/whisper-large-v3-turbo",
|
|
16
16
|
"model_revision": "master",
|
|
17
|
-
"model_ability": "
|
|
17
|
+
"model_ability": ["audio2text"],
|
|
18
18
|
"multilingual": true
|
|
19
19
|
},
|
|
20
20
|
{
|
|
@@ -23,7 +23,7 @@
|
|
|
23
23
|
"model_hub": "modelscope",
|
|
24
24
|
"model_id": "Xorbits/Belle-whisper-large-v3-zh",
|
|
25
25
|
"model_revision": "master",
|
|
26
|
-
"model_ability": "
|
|
26
|
+
"model_ability": ["audio2text"],
|
|
27
27
|
"multilingual": false
|
|
28
28
|
},
|
|
29
29
|
{
|
|
@@ -32,7 +32,7 @@
|
|
|
32
32
|
"model_hub": "modelscope",
|
|
33
33
|
"model_id": "iic/SenseVoiceSmall",
|
|
34
34
|
"model_revision": "master",
|
|
35
|
-
"model_ability": "
|
|
35
|
+
"model_ability": ["audio2text"],
|
|
36
36
|
"multilingual": true,
|
|
37
37
|
"default_model_config": {
|
|
38
38
|
"vad_model": "fsmn-vad",
|
|
@@ -53,7 +53,7 @@
|
|
|
53
53
|
"model_hub": "modelscope",
|
|
54
54
|
"model_id": "iic/speech_paraformer-large-vad-punc-spk_asr_nat-zh-cn",
|
|
55
55
|
"model_revision": "master",
|
|
56
|
-
"model_ability": "
|
|
56
|
+
"model_ability": ["audio2text"],
|
|
57
57
|
"multilingual": false,
|
|
58
58
|
"default_model_config": {
|
|
59
59
|
"vad_model": "fsmn-vad",
|
|
@@ -69,7 +69,7 @@
|
|
|
69
69
|
"model_hub": "modelscope",
|
|
70
70
|
"model_id": "AI-ModelScope/ChatTTS",
|
|
71
71
|
"model_revision": "master",
|
|
72
|
-
"model_ability": "
|
|
72
|
+
"model_ability": ["text2audio"],
|
|
73
73
|
"multilingual": true
|
|
74
74
|
},
|
|
75
75
|
{
|
|
@@ -78,7 +78,7 @@
|
|
|
78
78
|
"model_hub": "modelscope",
|
|
79
79
|
"model_id": "iic/CosyVoice-300M",
|
|
80
80
|
"model_revision": "master",
|
|
81
|
-
"model_ability": "
|
|
81
|
+
"model_ability": ["text2audio"],
|
|
82
82
|
"multilingual": true
|
|
83
83
|
},
|
|
84
84
|
{
|
|
@@ -87,7 +87,7 @@
|
|
|
87
87
|
"model_hub": "modelscope",
|
|
88
88
|
"model_id": "iic/CosyVoice-300M-SFT",
|
|
89
89
|
"model_revision": "master",
|
|
90
|
-
"model_ability": "
|
|
90
|
+
"model_ability": ["text2audio"],
|
|
91
91
|
"multilingual": true
|
|
92
92
|
},
|
|
93
93
|
{
|
|
@@ -96,7 +96,7 @@
|
|
|
96
96
|
"model_hub": "modelscope",
|
|
97
97
|
"model_id": "iic/CosyVoice-300M-Instruct",
|
|
98
98
|
"model_revision": "master",
|
|
99
|
-
"model_ability": "
|
|
99
|
+
"model_ability": ["text2audio"],
|
|
100
100
|
"multilingual": true
|
|
101
101
|
},
|
|
102
102
|
{
|
|
@@ -105,7 +105,7 @@
|
|
|
105
105
|
"model_hub": "modelscope",
|
|
106
106
|
"model_id": "iic/CosyVoice2-0.5B",
|
|
107
107
|
"model_revision": "master",
|
|
108
|
-
"model_ability": "
|
|
108
|
+
"model_ability": ["text2audio"],
|
|
109
109
|
"multilingual": true
|
|
110
110
|
},
|
|
111
111
|
{
|
|
@@ -114,7 +114,7 @@
|
|
|
114
114
|
"model_hub": "modelscope",
|
|
115
115
|
"model_id": "SWivid/F5-TTS_Emilia-ZH-EN",
|
|
116
116
|
"model_revision": "master",
|
|
117
|
-
"model_ability": "
|
|
117
|
+
"model_ability": ["text2audio"],
|
|
118
118
|
"multilingual": true
|
|
119
119
|
},
|
|
120
120
|
{
|
|
@@ -123,7 +123,7 @@
|
|
|
123
123
|
"model_hub": "modelscope",
|
|
124
124
|
"model_id": "AI-ModelScope/Kokoro-82M",
|
|
125
125
|
"model_revision": "master",
|
|
126
|
-
"model_ability": "
|
|
126
|
+
"model_ability": ["text2audio"],
|
|
127
127
|
"multilingual": true
|
|
128
128
|
},
|
|
129
129
|
{
|
|
@@ -132,7 +132,7 @@
|
|
|
132
132
|
"model_hub": "modelscope",
|
|
133
133
|
"model_id": "ByteDance/MegaTTS3",
|
|
134
134
|
"model_revision": "master",
|
|
135
|
-
"model_ability": "
|
|
135
|
+
"model_ability": ["text2audio"],
|
|
136
136
|
"multilingual": true
|
|
137
137
|
}
|
|
138
138
|
]
|