xinference 1.5.0.post2__py3-none-any.whl → 1.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +107 -11
- xinference/client/restful/restful_client.py +51 -11
- xinference/constants.py +5 -1
- xinference/core/media_interface.py +758 -0
- xinference/core/model.py +49 -9
- xinference/core/supervisor.py +1 -1
- xinference/core/utils.py +1 -1
- xinference/core/worker.py +33 -39
- xinference/deploy/cmdline.py +17 -0
- xinference/deploy/utils.py +0 -3
- xinference/model/audio/__init__.py +16 -27
- xinference/model/audio/core.py +2 -1
- xinference/model/audio/cosyvoice.py +4 -2
- xinference/model/audio/model_spec.json +63 -46
- xinference/model/audio/model_spec_modelscope.json +31 -14
- xinference/model/embedding/__init__.py +16 -24
- xinference/model/image/__init__.py +15 -25
- xinference/model/llm/__init__.py +40 -115
- xinference/model/llm/core.py +29 -6
- xinference/model/llm/llama_cpp/core.py +30 -347
- xinference/model/llm/llm_family.json +1674 -2203
- xinference/model/llm/llm_family.py +71 -7
- xinference/model/llm/llm_family_csghub.json +0 -32
- xinference/model/llm/llm_family_modelscope.json +1838 -2016
- xinference/model/llm/llm_family_openmind_hub.json +19 -325
- xinference/model/llm/lmdeploy/core.py +7 -2
- xinference/model/llm/mlx/core.py +23 -7
- xinference/model/llm/reasoning_parser.py +281 -5
- xinference/model/llm/sglang/core.py +39 -11
- xinference/model/llm/transformers/chatglm.py +9 -2
- xinference/model/llm/transformers/cogagent.py +10 -12
- xinference/model/llm/transformers/cogvlm2.py +6 -3
- xinference/model/llm/transformers/cogvlm2_video.py +3 -6
- xinference/model/llm/transformers/core.py +58 -60
- xinference/model/llm/transformers/deepseek_v2.py +4 -2
- xinference/model/llm/transformers/deepseek_vl.py +10 -4
- xinference/model/llm/transformers/deepseek_vl2.py +9 -4
- xinference/model/llm/transformers/gemma3.py +4 -5
- xinference/model/llm/transformers/glm4v.py +3 -21
- xinference/model/llm/transformers/glm_edge_v.py +3 -20
- xinference/model/llm/transformers/intern_vl.py +3 -6
- xinference/model/llm/transformers/internlm2.py +1 -1
- xinference/model/llm/transformers/minicpmv25.py +4 -2
- xinference/model/llm/transformers/minicpmv26.py +5 -3
- xinference/model/llm/transformers/omnilmm.py +1 -1
- xinference/model/llm/transformers/opt.py +1 -1
- xinference/model/llm/transformers/ovis2.py +302 -0
- xinference/model/llm/transformers/qwen-omni.py +8 -1
- xinference/model/llm/transformers/qwen2_audio.py +3 -1
- xinference/model/llm/transformers/qwen2_vl.py +5 -1
- xinference/model/llm/transformers/qwen_vl.py +5 -2
- xinference/model/llm/utils.py +96 -45
- xinference/model/llm/vllm/core.py +108 -24
- xinference/model/llm/vllm/distributed_executor.py +8 -7
- xinference/model/llm/vllm/xavier/allocator.py +1 -1
- xinference/model/llm/vllm/xavier/block_manager.py +1 -1
- xinference/model/llm/vllm/xavier/block_tracker.py +3 -3
- xinference/model/llm/vllm/xavier/executor.py +1 -1
- xinference/model/llm/vllm/xavier/test/test_xavier.py +2 -11
- xinference/model/rerank/__init__.py +13 -24
- xinference/model/video/__init__.py +15 -25
- xinference/model/video/core.py +3 -3
- xinference/model/video/diffusers.py +157 -13
- xinference/model/video/model_spec.json +100 -0
- xinference/model/video/model_spec_modelscope.json +104 -0
- xinference/thirdparty/cosyvoice/bin/average_model.py +5 -4
- xinference/thirdparty/cosyvoice/bin/export_jit.py +50 -20
- xinference/thirdparty/cosyvoice/bin/export_onnx.py +136 -51
- xinference/thirdparty/cosyvoice/bin/inference.py +15 -5
- xinference/thirdparty/cosyvoice/bin/train.py +7 -2
- xinference/thirdparty/cosyvoice/cli/cosyvoice.py +72 -52
- xinference/thirdparty/cosyvoice/cli/frontend.py +58 -58
- xinference/thirdparty/cosyvoice/cli/model.py +140 -155
- xinference/thirdparty/cosyvoice/dataset/processor.py +9 -5
- xinference/thirdparty/cosyvoice/flow/decoder.py +656 -54
- xinference/thirdparty/cosyvoice/flow/flow.py +69 -11
- xinference/thirdparty/cosyvoice/flow/flow_matching.py +167 -63
- xinference/thirdparty/cosyvoice/flow/length_regulator.py +1 -0
- xinference/thirdparty/cosyvoice/hifigan/discriminator.py +91 -1
- xinference/thirdparty/cosyvoice/hifigan/f0_predictor.py +4 -1
- xinference/thirdparty/cosyvoice/hifigan/generator.py +4 -1
- xinference/thirdparty/cosyvoice/hifigan/hifigan.py +2 -2
- xinference/thirdparty/cosyvoice/llm/llm.py +198 -18
- xinference/thirdparty/cosyvoice/transformer/embedding.py +12 -4
- xinference/thirdparty/cosyvoice/transformer/upsample_encoder.py +124 -21
- xinference/thirdparty/cosyvoice/utils/class_utils.py +13 -0
- xinference/thirdparty/cosyvoice/utils/common.py +1 -1
- xinference/thirdparty/cosyvoice/utils/file_utils.py +40 -2
- xinference/thirdparty/cosyvoice/utils/frontend_utils.py +7 -0
- xinference/thirdparty/cosyvoice/utils/mask.py +4 -0
- xinference/thirdparty/cosyvoice/utils/train_utils.py +5 -1
- xinference/thirdparty/matcha/hifigan/xutils.py +3 -3
- xinference/types.py +2 -71
- xinference/web/ui/build/asset-manifest.json +6 -6
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/css/{main.0f6523be.css → main.337afe76.css} +2 -2
- xinference/web/ui/build/static/css/main.337afe76.css.map +1 -0
- xinference/web/ui/build/static/js/main.ae579a97.js +3 -0
- xinference/web/ui/build/static/js/main.ae579a97.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/0196a4b09e3264614e54360d5f832c46b31d964ec58296765ebff191ace6adbf.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/12e02ee790dbf57ead09a241a93bb5f893393aa36628ca741d44390e836a103f.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/18fa271456b31cded36c05c4c71c6b2b1cf4e4128c1e32f0e45d8b9f21764397.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/2fdc61dcb6a9d1fbcb44be592d0e87d8c3f21297a7327559ef5345665f8343f7.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3d596a3e8dd6430d7ce81d164e32c31f8d47cfa5f725c328a298754d78563e14.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/5c08e2cd07809ed3e41486b16652253404cbb63a3ff8d0366ee50f57e2413cea.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/6798e126f3bc5f95a4c16a9c2ad52ffe77970c62406d83e20604dfda7ffd2247.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/8472e58a31720892d534f3febda31f746b25ec4aa60787eef34217b074e67965.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/b617f7d21a95045fc57b26a9373551740f1978a826134cbf705c3a1bf8714a93.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c1506cb142151366074975f30fa1ff9cd6e5e978b62a4b074dfc16fe08d70d75.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c5c7c2cd1b863ce41adff2c4737bba06eef3a1acf28288cb83d992060f6b8923.json +1 -0
- xinference/web/ui/src/locales/en.json +7 -4
- xinference/web/ui/src/locales/zh.json +7 -4
- {xinference-1.5.0.post2.dist-info → xinference-1.6.0.dist-info}/METADATA +56 -36
- {xinference-1.5.0.post2.dist-info → xinference-1.6.0.dist-info}/RECORD +120 -121
- {xinference-1.5.0.post2.dist-info → xinference-1.6.0.dist-info}/WHEEL +1 -1
- xinference/core/image_interface.py +0 -377
- xinference/model/llm/transformers/compression.py +0 -258
- xinference/model/llm/transformers/yi_vl.py +0 -239
- xinference/thirdparty/cosyvoice/bin/export_trt.sh +0 -9
- xinference/web/ui/build/static/css/main.0f6523be.css.map +0 -1
- xinference/web/ui/build/static/js/main.4b67a723.js +0 -3
- xinference/web/ui/build/static/js/main.4b67a723.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/0f0adb2283a8f469d097a7a0ebb754624fa52414c83b83696c41f2e6a737ceda.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/51709f5d3e53bcf19e613662ef9b91fb9174942c5518987a248348dd4e1e0e02.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/8157db83995c671eb57abc316c337f867d1dc63fb83520bb4ff351fee57dcce2.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/8f9af2979e45d4648f0cfae108363e58ee421c29a9d4e7329b6f06d9adfd4133.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/9c8b1a86e7c65b2b2599a205e30920652d6c2105f926508ef5bcf29a3ef4ce76.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/b8551e9775a01b28ae674125c688febe763732ea969ae344512e64ea01bf632e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/e4ba658c6b3b0490910acdae0c535a892257efb61539a24adf8038fc653bd22f.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/efe7cd132c27a8f9fd5352a394c491fd5fb0da0348cf9fcbd923164a32365eab.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f04f666b77b44d7be3e16034d6b0074de2ba9c254f1fae15222b3148608fa8b3.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f199e8173f6409a5802ed44acb95f218388131136504b2e9132129e150c92f9a.json +0 -1
- /xinference/web/ui/build/static/js/{main.4b67a723.js.LICENSE.txt → main.ae579a97.js.LICENSE.txt} +0 -0
- {xinference-1.5.0.post2.dist-info → xinference-1.6.0.dist-info}/entry_points.txt +0 -0
- {xinference-1.5.0.post2.dist-info → xinference-1.6.0.dist-info}/licenses/LICENSE +0 -0
- {xinference-1.5.0.post2.dist-info → xinference-1.6.0.dist-info}/top_level.txt +0 -0
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
"model_family": "whisper",
|
|
5
5
|
"model_id": "openai/whisper-tiny",
|
|
6
6
|
"model_revision": "167c219b21f11ef214220b8fdb7536b8a88c2475",
|
|
7
|
-
"model_ability": "
|
|
7
|
+
"model_ability": ["audio2text"],
|
|
8
8
|
"multilingual": true
|
|
9
9
|
},
|
|
10
10
|
{
|
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
"model_family": "whisper",
|
|
13
13
|
"model_id": "openai/whisper-tiny.en",
|
|
14
14
|
"model_revision": "87c7102498dcde7456f24cfd30239ca606ed9063",
|
|
15
|
-
"model_ability": "
|
|
15
|
+
"model_ability": ["audio2text"],
|
|
16
16
|
"multilingual": false
|
|
17
17
|
},
|
|
18
18
|
{
|
|
@@ -20,7 +20,7 @@
|
|
|
20
20
|
"model_family": "whisper",
|
|
21
21
|
"model_id": "openai/whisper-base",
|
|
22
22
|
"model_revision": "8c1db9b51951100007a96a525d83a8ec81b3c237",
|
|
23
|
-
"model_ability": "
|
|
23
|
+
"model_ability": ["audio2text"],
|
|
24
24
|
"multilingual": true
|
|
25
25
|
},
|
|
26
26
|
{
|
|
@@ -28,7 +28,7 @@
|
|
|
28
28
|
"model_family": "whisper",
|
|
29
29
|
"model_id": "openai/whisper-base.en",
|
|
30
30
|
"model_revision": "911407f4214e0e1d82085af863093ec0b66f9cd6",
|
|
31
|
-
"model_ability": "
|
|
31
|
+
"model_ability": ["audio2text"],
|
|
32
32
|
"multilingual": false
|
|
33
33
|
},
|
|
34
34
|
{
|
|
@@ -36,7 +36,7 @@
|
|
|
36
36
|
"model_family": "whisper",
|
|
37
37
|
"model_id": "openai/whisper-small",
|
|
38
38
|
"model_revision": "998cb1a777c20db53d6033a61b977ed4c3792cac",
|
|
39
|
-
"model_ability": "
|
|
39
|
+
"model_ability": ["audio2text"],
|
|
40
40
|
"multilingual": true
|
|
41
41
|
},
|
|
42
42
|
{
|
|
@@ -44,7 +44,7 @@
|
|
|
44
44
|
"model_family": "whisper",
|
|
45
45
|
"model_id": "openai/whisper-small.en",
|
|
46
46
|
"model_revision": "e8727524f962ee844a7319d92be39ac1bd25655a",
|
|
47
|
-
"model_ability": "
|
|
47
|
+
"model_ability": ["audio2text"],
|
|
48
48
|
"multilingual": false
|
|
49
49
|
},
|
|
50
50
|
{
|
|
@@ -52,7 +52,7 @@
|
|
|
52
52
|
"model_family": "whisper",
|
|
53
53
|
"model_id": "openai/whisper-medium",
|
|
54
54
|
"model_revision": "16688beb1294bedd0a6f5cd86fe7eec57bce41ed",
|
|
55
|
-
"model_ability": "
|
|
55
|
+
"model_ability": ["audio2text"],
|
|
56
56
|
"multilingual": true
|
|
57
57
|
},
|
|
58
58
|
{
|
|
@@ -60,7 +60,7 @@
|
|
|
60
60
|
"model_family": "whisper",
|
|
61
61
|
"model_id": "openai/whisper-medium.en",
|
|
62
62
|
"model_revision": "2e98eb6279edf5095af0c8dedb36bdec0acd172b",
|
|
63
|
-
"model_ability": "
|
|
63
|
+
"model_ability": ["audio2text"],
|
|
64
64
|
"multilingual": false
|
|
65
65
|
},
|
|
66
66
|
{
|
|
@@ -68,7 +68,7 @@
|
|
|
68
68
|
"model_family": "whisper",
|
|
69
69
|
"model_id": "openai/whisper-large-v3",
|
|
70
70
|
"model_revision": "6cdf07a7e3ec3806e5d55f787915b85d4cd020b1",
|
|
71
|
-
"model_ability": "
|
|
71
|
+
"model_ability": ["audio2text"],
|
|
72
72
|
"multilingual": true
|
|
73
73
|
},
|
|
74
74
|
{
|
|
@@ -76,7 +76,7 @@
|
|
|
76
76
|
"model_family": "whisper",
|
|
77
77
|
"model_id": "openai/whisper-large-v3-turbo",
|
|
78
78
|
"model_revision": "41f01f3fe87f28c78e2fbf8b568835947dd65ed9",
|
|
79
|
-
"model_ability": "
|
|
79
|
+
"model_ability": ["audio2text"],
|
|
80
80
|
"multilingual": true
|
|
81
81
|
},
|
|
82
82
|
{
|
|
@@ -84,7 +84,7 @@
|
|
|
84
84
|
"model_family": "whisper",
|
|
85
85
|
"model_id": "BELLE-2/Belle-distilwhisper-large-v2-zh",
|
|
86
86
|
"model_revision": "ed25d13498fa5bac758b2fc479435b698532dfe8",
|
|
87
|
-
"model_ability": "
|
|
87
|
+
"model_ability": ["audio2text"],
|
|
88
88
|
"multilingual": false
|
|
89
89
|
},
|
|
90
90
|
{
|
|
@@ -92,7 +92,7 @@
|
|
|
92
92
|
"model_family": "whisper",
|
|
93
93
|
"model_id": "BELLE-2/Belle-whisper-large-v2-zh",
|
|
94
94
|
"model_revision": "ec5bd5d78598545b7585814edde86dac2002b5b9",
|
|
95
|
-
"model_ability": "
|
|
95
|
+
"model_ability": ["audio2text"],
|
|
96
96
|
"multilingual": false
|
|
97
97
|
},
|
|
98
98
|
{
|
|
@@ -100,14 +100,14 @@
|
|
|
100
100
|
"model_family": "whisper",
|
|
101
101
|
"model_id": "BELLE-2/Belle-whisper-large-v3-zh",
|
|
102
102
|
"model_revision": "3bebc7247696b39f5ab9ed22db426943ac33f600",
|
|
103
|
-
"model_ability": "
|
|
103
|
+
"model_ability": ["audio2text"],
|
|
104
104
|
"multilingual": false
|
|
105
105
|
},
|
|
106
106
|
{
|
|
107
107
|
"model_name": "whisper-tiny-mlx",
|
|
108
108
|
"model_family": "whisper",
|
|
109
109
|
"model_id": "mlx-community/whisper-tiny",
|
|
110
|
-
"model_ability": "
|
|
110
|
+
"model_ability": ["audio2text"],
|
|
111
111
|
"multilingual": true,
|
|
112
112
|
"engine": "mlx"
|
|
113
113
|
},
|
|
@@ -115,7 +115,7 @@
|
|
|
115
115
|
"model_name": "whisper-tiny.en-mlx",
|
|
116
116
|
"model_family": "whisper",
|
|
117
117
|
"model_id": "mlx-community/whisper-tiny.en-mlx",
|
|
118
|
-
"model_ability": "
|
|
118
|
+
"model_ability": ["audio2text"],
|
|
119
119
|
"multilingual": false,
|
|
120
120
|
"engine": "mlx"
|
|
121
121
|
},
|
|
@@ -123,7 +123,7 @@
|
|
|
123
123
|
"model_name": "whisper-base-mlx",
|
|
124
124
|
"model_family": "whisper",
|
|
125
125
|
"model_id": "mlx-community/whisper-base-mlx",
|
|
126
|
-
"model_ability": "
|
|
126
|
+
"model_ability": ["audio2text"],
|
|
127
127
|
"multilingual": true,
|
|
128
128
|
"engine": "mlx"
|
|
129
129
|
},
|
|
@@ -131,7 +131,7 @@
|
|
|
131
131
|
"model_name": "whisper-base.en-mlx",
|
|
132
132
|
"model_family": "whisper",
|
|
133
133
|
"model_id": "mlx-community/whisper-base.en-mlx",
|
|
134
|
-
"model_ability": "
|
|
134
|
+
"model_ability": ["audio2text"],
|
|
135
135
|
"multilingual": false,
|
|
136
136
|
"engine": "mlx"
|
|
137
137
|
},
|
|
@@ -139,7 +139,7 @@
|
|
|
139
139
|
"model_name": "whisper-small-mlx",
|
|
140
140
|
"model_family": "whisper",
|
|
141
141
|
"model_id": "mlx-community/whisper-small-mlx",
|
|
142
|
-
"model_ability": "
|
|
142
|
+
"model_ability": ["audio2text"],
|
|
143
143
|
"multilingual": true,
|
|
144
144
|
"engine": "mlx"
|
|
145
145
|
},
|
|
@@ -147,7 +147,7 @@
|
|
|
147
147
|
"model_name": "whisper-small.en-mlx",
|
|
148
148
|
"model_family": "whisper",
|
|
149
149
|
"model_id": "mlx-community/whisper-small.en-mlx",
|
|
150
|
-
"model_ability": "
|
|
150
|
+
"model_ability": ["audio2text"],
|
|
151
151
|
"multilingual": false,
|
|
152
152
|
"engine": "mlx"
|
|
153
153
|
},
|
|
@@ -155,7 +155,7 @@
|
|
|
155
155
|
"model_name": "whisper-medium-mlx",
|
|
156
156
|
"model_family": "whisper",
|
|
157
157
|
"model_id": "mlx-community/whisper-medium-mlx",
|
|
158
|
-
"model_ability": "
|
|
158
|
+
"model_ability": ["audio2text"],
|
|
159
159
|
"multilingual": true,
|
|
160
160
|
"engine": "mlx"
|
|
161
161
|
},
|
|
@@ -163,7 +163,7 @@
|
|
|
163
163
|
"model_name": "whisper-medium.en-mlx",
|
|
164
164
|
"model_family": "whisper",
|
|
165
165
|
"model_id": "mlx-community/whisper-medium.en-mlx",
|
|
166
|
-
"model_ability": "
|
|
166
|
+
"model_ability": ["audio2text"],
|
|
167
167
|
"multilingual": false,
|
|
168
168
|
"engine": "mlx"
|
|
169
169
|
},
|
|
@@ -171,7 +171,7 @@
|
|
|
171
171
|
"model_name": "whisper-large-v3-mlx",
|
|
172
172
|
"model_family": "whisper",
|
|
173
173
|
"model_id": "mlx-community/whisper-large-v3-mlx",
|
|
174
|
-
"model_ability": "
|
|
174
|
+
"model_ability": ["audio2text"],
|
|
175
175
|
"multilingual": true,
|
|
176
176
|
"engine": "mlx"
|
|
177
177
|
},
|
|
@@ -179,7 +179,7 @@
|
|
|
179
179
|
"model_name": "whisper-large-v3-turbo-mlx",
|
|
180
180
|
"model_family": "whisper",
|
|
181
181
|
"model_id": "mlx-community/whisper-large-v3-turbo",
|
|
182
|
-
"model_ability": "
|
|
182
|
+
"model_ability": ["audio2text"],
|
|
183
183
|
"multilingual": true,
|
|
184
184
|
"engine": "mlx"
|
|
185
185
|
},
|
|
@@ -188,7 +188,7 @@
|
|
|
188
188
|
"model_family": "funasr",
|
|
189
189
|
"model_id": "FunAudioLLM/SenseVoiceSmall",
|
|
190
190
|
"model_revision": "3eb3b4eeffc2f2dde6051b853983753db33e35c3",
|
|
191
|
-
"model_ability": "
|
|
191
|
+
"model_ability": ["audio2text"],
|
|
192
192
|
"multilingual": true,
|
|
193
193
|
"default_model_config": {
|
|
194
194
|
"vad_model": "fsmn-vad",
|
|
@@ -208,7 +208,7 @@
|
|
|
208
208
|
"model_family": "funasr",
|
|
209
209
|
"model_id": "funasr/paraformer-zh",
|
|
210
210
|
"model_revision": "5ed094cdfc8f6a9b6b022bd08bc904ef862bc79e",
|
|
211
|
-
"model_ability": "
|
|
211
|
+
"model_ability": ["audio2text"],
|
|
212
212
|
"multilingual": false,
|
|
213
213
|
"default_model_config": {
|
|
214
214
|
"vad_model": "fsmn-vad",
|
|
@@ -223,7 +223,7 @@
|
|
|
223
223
|
"model_family": "ChatTTS",
|
|
224
224
|
"model_id": "2Noise/ChatTTS",
|
|
225
225
|
"model_revision": "1a3c04a8b0651689bd9242fbb55b1f4b5a9aef84",
|
|
226
|
-
"model_ability": "
|
|
226
|
+
"model_ability": ["text2audio"],
|
|
227
227
|
"multilingual": true
|
|
228
228
|
},
|
|
229
229
|
{
|
|
@@ -231,7 +231,7 @@
|
|
|
231
231
|
"model_family": "CosyVoice",
|
|
232
232
|
"model_id": "FunAudioLLM/CosyVoice-300M",
|
|
233
233
|
"model_revision": "39c4e13d46bd4dfb840d214547623e5fcd2428e2",
|
|
234
|
-
"model_ability": "
|
|
234
|
+
"model_ability": ["text2audio"],
|
|
235
235
|
"multilingual": true
|
|
236
236
|
},
|
|
237
237
|
{
|
|
@@ -239,7 +239,7 @@
|
|
|
239
239
|
"model_family": "CosyVoice",
|
|
240
240
|
"model_id": "FunAudioLLM/CosyVoice-300M-SFT",
|
|
241
241
|
"model_revision": "096a5cff8d497fabb3dec2756a200f3688457a1b",
|
|
242
|
-
"model_ability": "
|
|
242
|
+
"model_ability": ["text2audio"],
|
|
243
243
|
"multilingual": true
|
|
244
244
|
},
|
|
245
245
|
{
|
|
@@ -247,23 +247,40 @@
|
|
|
247
247
|
"model_family": "CosyVoice",
|
|
248
248
|
"model_id": "FunAudioLLM/CosyVoice-300M-Instruct",
|
|
249
249
|
"model_revision": "ba5265d9a3169c1fedce145122c9dd4bc24e062c",
|
|
250
|
-
"model_ability": "
|
|
250
|
+
"model_ability": ["text2audio"],
|
|
251
251
|
"multilingual": true
|
|
252
252
|
},
|
|
253
253
|
{
|
|
254
254
|
"model_name": "CosyVoice2-0.5B",
|
|
255
255
|
"model_family": "CosyVoice",
|
|
256
|
-
"model_id": "
|
|
257
|
-
"model_revision": "
|
|
258
|
-
"model_ability": "
|
|
259
|
-
"multilingual": true
|
|
256
|
+
"model_id": "JunHowie/CosyVoice2-0.5B",
|
|
257
|
+
"model_revision": "7ac9e9a026aec35efe48cde1196eaad6a00ad5f2",
|
|
258
|
+
"model_ability": ["text2audio"],
|
|
259
|
+
"multilingual": true,
|
|
260
|
+
"virtualenv": {
|
|
261
|
+
"packages": [
|
|
262
|
+
"tiktoken",
|
|
263
|
+
"lightning>=2.0.0",
|
|
264
|
+
"hydra-core>=1.3.2",
|
|
265
|
+
"inflect",
|
|
266
|
+
"conformer",
|
|
267
|
+
"diffusers==0.29.0",
|
|
268
|
+
"gdown",
|
|
269
|
+
"pyarrow",
|
|
270
|
+
"HyperPyYAML",
|
|
271
|
+
"onnxruntime>=1.16.0",
|
|
272
|
+
"pyworld>=0.3.4",
|
|
273
|
+
"numpy==1.26.4",
|
|
274
|
+
"#system_torch#"
|
|
275
|
+
]
|
|
276
|
+
}
|
|
260
277
|
},
|
|
261
278
|
{
|
|
262
279
|
"model_name": "FishSpeech-1.5",
|
|
263
280
|
"model_family": "FishAudio",
|
|
264
281
|
"model_id": "fishaudio/fish-speech-1.5",
|
|
265
282
|
"model_revision": "268b6ec86243dd683bc78dab7e9a6cedf9191f2a",
|
|
266
|
-
"model_ability": "
|
|
283
|
+
"model_ability": ["text2audio"],
|
|
267
284
|
"multilingual": true
|
|
268
285
|
},
|
|
269
286
|
{
|
|
@@ -271,7 +288,7 @@
|
|
|
271
288
|
"model_family": "F5-TTS",
|
|
272
289
|
"model_id": "SWivid/F5-TTS",
|
|
273
290
|
"model_revision": "4dcc16f297f2ff98a17b3726b16f5de5a5e45672",
|
|
274
|
-
"model_ability": "
|
|
291
|
+
"model_ability": ["text2audio"],
|
|
275
292
|
"multilingual": true
|
|
276
293
|
},
|
|
277
294
|
{
|
|
@@ -279,7 +296,7 @@
|
|
|
279
296
|
"model_family": "F5-TTS-MLX",
|
|
280
297
|
"model_id": "lucasnewman/f5-tts-mlx",
|
|
281
298
|
"model_revision": "7642bb232e3fcacf92c51c786edebb8624da6b93",
|
|
282
|
-
"model_ability": "
|
|
299
|
+
"model_ability": ["text2audio"],
|
|
283
300
|
"multilingual": true
|
|
284
301
|
},
|
|
285
302
|
{
|
|
@@ -287,7 +304,7 @@
|
|
|
287
304
|
"model_family": "MeloTTS",
|
|
288
305
|
"model_id": "myshell-ai/MeloTTS-English",
|
|
289
306
|
"model_revision": "bb4fb7346d566d277ba8c8c7dbfdf6786139b8ef",
|
|
290
|
-
"model_ability": "
|
|
307
|
+
"model_ability": ["text2audio"],
|
|
291
308
|
"multilingual": false,
|
|
292
309
|
"language": "EN"
|
|
293
310
|
},
|
|
@@ -296,7 +313,7 @@
|
|
|
296
313
|
"model_family": "MeloTTS",
|
|
297
314
|
"model_id": "myshell-ai/MeloTTS-English-v2",
|
|
298
315
|
"model_revision": "a53e3509c4ee4ff16d79272feb2474ff864e18f3",
|
|
299
|
-
"model_ability": "
|
|
316
|
+
"model_ability": ["text2audio"],
|
|
300
317
|
"multilingual": false,
|
|
301
318
|
"language": "EN"
|
|
302
319
|
},
|
|
@@ -305,7 +322,7 @@
|
|
|
305
322
|
"model_family": "MeloTTS",
|
|
306
323
|
"model_id": "myshell-ai/MeloTTS-English-v3",
|
|
307
324
|
"model_revision": "f7c4a35392c0e9be24a755f1edb4c3f63040f759",
|
|
308
|
-
"model_ability": "
|
|
325
|
+
"model_ability": ["text2audio"],
|
|
309
326
|
"multilingual": false,
|
|
310
327
|
"language": "EN"
|
|
311
328
|
},
|
|
@@ -314,7 +331,7 @@
|
|
|
314
331
|
"model_family": "MeloTTS",
|
|
315
332
|
"model_id": "myshell-ai/MeloTTS-French",
|
|
316
333
|
"model_revision": "1e9bf590262392d8bffb679b0a3b0c16b0f9fdaf",
|
|
317
|
-
"model_ability": "
|
|
334
|
+
"model_ability": ["text2audio"],
|
|
318
335
|
"multilingual": false,
|
|
319
336
|
"language": "FR"
|
|
320
337
|
},
|
|
@@ -323,7 +340,7 @@
|
|
|
323
340
|
"model_family": "MeloTTS",
|
|
324
341
|
"model_id": "myshell-ai/MeloTTS-Japanese",
|
|
325
342
|
"model_revision": "367f8795464b531b4e97c1515bddfc1243e60891",
|
|
326
|
-
"model_ability": "
|
|
343
|
+
"model_ability": ["text2audio"],
|
|
327
344
|
"multilingual": false,
|
|
328
345
|
"language": "JP"
|
|
329
346
|
},
|
|
@@ -332,7 +349,7 @@
|
|
|
332
349
|
"model_family": "MeloTTS",
|
|
333
350
|
"model_id": "myshell-ai/MeloTTS-Spanish",
|
|
334
351
|
"model_revision": "dbb5496df39d11a66c1d5f5a9ca357c3c9fb95fb",
|
|
335
|
-
"model_ability": "
|
|
352
|
+
"model_ability": ["text2audio"],
|
|
336
353
|
"multilingual": false,
|
|
337
354
|
"language": "ES"
|
|
338
355
|
},
|
|
@@ -341,7 +358,7 @@
|
|
|
341
358
|
"model_family": "MeloTTS",
|
|
342
359
|
"model_id": "myshell-ai/MeloTTS-Chinese",
|
|
343
360
|
"model_revision": "af5d207a364ea4208c6f589c89f57f88414bdd16",
|
|
344
|
-
"model_ability": "
|
|
361
|
+
"model_ability": ["text2audio"],
|
|
345
362
|
"multilingual": false,
|
|
346
363
|
"language": "ZH"
|
|
347
364
|
},
|
|
@@ -350,7 +367,7 @@
|
|
|
350
367
|
"model_family": "MeloTTS",
|
|
351
368
|
"model_id": "myshell-ai/MeloTTS-Korean",
|
|
352
369
|
"model_revision": "0207e5adfc90129a51b6b03d89be6d84360ed323",
|
|
353
|
-
"model_ability": "
|
|
370
|
+
"model_ability": ["text2audio"],
|
|
354
371
|
"multilingual": false,
|
|
355
372
|
"language": "KR"
|
|
356
373
|
},
|
|
@@ -359,7 +376,7 @@
|
|
|
359
376
|
"model_family": "Kokoro",
|
|
360
377
|
"model_id": "hexgrad/Kokoro-82M",
|
|
361
378
|
"model_revision": "7884269d6fd3f9beabc271b6f1308e5699281fa9",
|
|
362
|
-
"model_ability": "
|
|
379
|
+
"model_ability": ["text2audio"],
|
|
363
380
|
"multilingual": true
|
|
364
381
|
},
|
|
365
382
|
{
|
|
@@ -367,7 +384,7 @@
|
|
|
367
384
|
"model_family": "MegaTTS",
|
|
368
385
|
"model_id": "ByteDance/MegaTTS3",
|
|
369
386
|
"model_revision": "409a7002b006d80f0730fca6f80441b08c10e738",
|
|
370
|
-
"model_ability": "
|
|
387
|
+
"model_ability": ["text2audio"],
|
|
371
388
|
"multilingual": true
|
|
372
389
|
}
|
|
373
390
|
]
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"model_hub": "modelscope",
|
|
6
6
|
"model_id": "AI-ModelScope/whisper-large-v3",
|
|
7
7
|
"model_revision": "master",
|
|
8
|
-
"model_ability": "
|
|
8
|
+
"model_ability": ["audio2text"],
|
|
9
9
|
"multilingual": true
|
|
10
10
|
},
|
|
11
11
|
{
|
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
"model_hub": "modelscope",
|
|
15
15
|
"model_id": "AI-ModelScope/whisper-large-v3-turbo",
|
|
16
16
|
"model_revision": "master",
|
|
17
|
-
"model_ability": "
|
|
17
|
+
"model_ability": ["audio2text"],
|
|
18
18
|
"multilingual": true
|
|
19
19
|
},
|
|
20
20
|
{
|
|
@@ -23,7 +23,7 @@
|
|
|
23
23
|
"model_hub": "modelscope",
|
|
24
24
|
"model_id": "Xorbits/Belle-whisper-large-v3-zh",
|
|
25
25
|
"model_revision": "master",
|
|
26
|
-
"model_ability": "
|
|
26
|
+
"model_ability": ["audio2text"],
|
|
27
27
|
"multilingual": false
|
|
28
28
|
},
|
|
29
29
|
{
|
|
@@ -32,7 +32,7 @@
|
|
|
32
32
|
"model_hub": "modelscope",
|
|
33
33
|
"model_id": "iic/SenseVoiceSmall",
|
|
34
34
|
"model_revision": "master",
|
|
35
|
-
"model_ability": "
|
|
35
|
+
"model_ability": ["audio2text"],
|
|
36
36
|
"multilingual": true,
|
|
37
37
|
"default_model_config": {
|
|
38
38
|
"vad_model": "fsmn-vad",
|
|
@@ -53,7 +53,7 @@
|
|
|
53
53
|
"model_hub": "modelscope",
|
|
54
54
|
"model_id": "iic/speech_paraformer-large-vad-punc-spk_asr_nat-zh-cn",
|
|
55
55
|
"model_revision": "master",
|
|
56
|
-
"model_ability": "
|
|
56
|
+
"model_ability": ["audio2text"],
|
|
57
57
|
"multilingual": false,
|
|
58
58
|
"default_model_config": {
|
|
59
59
|
"vad_model": "fsmn-vad",
|
|
@@ -69,7 +69,7 @@
|
|
|
69
69
|
"model_hub": "modelscope",
|
|
70
70
|
"model_id": "AI-ModelScope/ChatTTS",
|
|
71
71
|
"model_revision": "master",
|
|
72
|
-
"model_ability": "
|
|
72
|
+
"model_ability": ["text2audio"],
|
|
73
73
|
"multilingual": true
|
|
74
74
|
},
|
|
75
75
|
{
|
|
@@ -78,7 +78,7 @@
|
|
|
78
78
|
"model_hub": "modelscope",
|
|
79
79
|
"model_id": "iic/CosyVoice-300M",
|
|
80
80
|
"model_revision": "master",
|
|
81
|
-
"model_ability": "
|
|
81
|
+
"model_ability": ["text2audio"],
|
|
82
82
|
"multilingual": true
|
|
83
83
|
},
|
|
84
84
|
{
|
|
@@ -87,7 +87,7 @@
|
|
|
87
87
|
"model_hub": "modelscope",
|
|
88
88
|
"model_id": "iic/CosyVoice-300M-SFT",
|
|
89
89
|
"model_revision": "master",
|
|
90
|
-
"model_ability": "
|
|
90
|
+
"model_ability": ["text2audio"],
|
|
91
91
|
"multilingual": true
|
|
92
92
|
},
|
|
93
93
|
{
|
|
@@ -96,7 +96,7 @@
|
|
|
96
96
|
"model_hub": "modelscope",
|
|
97
97
|
"model_id": "iic/CosyVoice-300M-Instruct",
|
|
98
98
|
"model_revision": "master",
|
|
99
|
-
"model_ability": "
|
|
99
|
+
"model_ability": ["text2audio"],
|
|
100
100
|
"multilingual": true
|
|
101
101
|
},
|
|
102
102
|
{
|
|
@@ -105,8 +105,25 @@
|
|
|
105
105
|
"model_hub": "modelscope",
|
|
106
106
|
"model_id": "iic/CosyVoice2-0.5B",
|
|
107
107
|
"model_revision": "master",
|
|
108
|
-
"model_ability": "
|
|
109
|
-
"multilingual": true
|
|
108
|
+
"model_ability": ["text2audio"],
|
|
109
|
+
"multilingual": true,
|
|
110
|
+
"virtualenv": {
|
|
111
|
+
"packages": [
|
|
112
|
+
"tiktoken",
|
|
113
|
+
"lightning>=2.0.0",
|
|
114
|
+
"hydra-core>=1.3.2",
|
|
115
|
+
"inflect",
|
|
116
|
+
"conformer",
|
|
117
|
+
"diffusers==0.29.0",
|
|
118
|
+
"gdown",
|
|
119
|
+
"pyarrow",
|
|
120
|
+
"HyperPyYAML",
|
|
121
|
+
"onnxruntime>=1.16.0",
|
|
122
|
+
"pyworld>=0.3.4",
|
|
123
|
+
"numpy==1.26.4",
|
|
124
|
+
"#system_torch#"
|
|
125
|
+
]
|
|
126
|
+
}
|
|
110
127
|
},
|
|
111
128
|
{
|
|
112
129
|
"model_name": "F5-TTS",
|
|
@@ -114,7 +131,7 @@
|
|
|
114
131
|
"model_hub": "modelscope",
|
|
115
132
|
"model_id": "SWivid/F5-TTS_Emilia-ZH-EN",
|
|
116
133
|
"model_revision": "master",
|
|
117
|
-
"model_ability": "
|
|
134
|
+
"model_ability": ["text2audio"],
|
|
118
135
|
"multilingual": true
|
|
119
136
|
},
|
|
120
137
|
{
|
|
@@ -123,7 +140,7 @@
|
|
|
123
140
|
"model_hub": "modelscope",
|
|
124
141
|
"model_id": "AI-ModelScope/Kokoro-82M",
|
|
125
142
|
"model_revision": "master",
|
|
126
|
-
"model_ability": "
|
|
143
|
+
"model_ability": ["text2audio"],
|
|
127
144
|
"multilingual": true
|
|
128
145
|
},
|
|
129
146
|
{
|
|
@@ -132,7 +149,7 @@
|
|
|
132
149
|
"model_hub": "modelscope",
|
|
133
150
|
"model_id": "ByteDance/MegaTTS3",
|
|
134
151
|
"model_revision": "master",
|
|
135
|
-
"model_ability": "
|
|
152
|
+
"model_ability": ["text2audio"],
|
|
136
153
|
"multilingual": true
|
|
137
154
|
}
|
|
138
155
|
]
|
|
@@ -56,29 +56,10 @@ def register_custom_model():
|
|
|
56
56
|
|
|
57
57
|
|
|
58
58
|
def _install():
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
59
|
+
load_model_family_from_json("model_spec.json", BUILTIN_EMBEDDING_MODELS)
|
|
60
|
+
load_model_family_from_json(
|
|
61
|
+
"model_spec_modelscope.json", MODELSCOPE_EMBEDDING_MODELS
|
|
62
62
|
)
|
|
63
|
-
BUILTIN_EMBEDDING_MODELS.update(
|
|
64
|
-
dict(
|
|
65
|
-
(spec["model_name"], EmbeddingModelSpec(**spec))
|
|
66
|
-
for spec in json.load(codecs.open(_model_spec_json, "r", encoding="utf-8"))
|
|
67
|
-
)
|
|
68
|
-
)
|
|
69
|
-
for model_name, model_spec in BUILTIN_EMBEDDING_MODELS.items():
|
|
70
|
-
MODEL_NAME_TO_REVISION[model_name].append(model_spec.model_revision)
|
|
71
|
-
|
|
72
|
-
MODELSCOPE_EMBEDDING_MODELS.update(
|
|
73
|
-
dict(
|
|
74
|
-
(spec["model_name"], EmbeddingModelSpec(**spec))
|
|
75
|
-
for spec in json.load(
|
|
76
|
-
codecs.open(_model_spec_modelscope_json, "r", encoding="utf-8")
|
|
77
|
-
)
|
|
78
|
-
)
|
|
79
|
-
)
|
|
80
|
-
for model_name, model_spec in MODELSCOPE_EMBEDDING_MODELS.items():
|
|
81
|
-
MODEL_NAME_TO_REVISION[model_name].append(model_spec.model_revision)
|
|
82
63
|
|
|
83
64
|
# register model description after recording model revision
|
|
84
65
|
for model_spec_info in [BUILTIN_EMBEDDING_MODELS, MODELSCOPE_EMBEDDING_MODELS]:
|
|
@@ -96,5 +77,16 @@ def _install():
|
|
|
96
77
|
generate_embedding_description(ud_embedding)
|
|
97
78
|
)
|
|
98
79
|
|
|
99
|
-
|
|
100
|
-
|
|
80
|
+
|
|
81
|
+
def load_model_family_from_json(json_filename, target_families):
|
|
82
|
+
json_path = os.path.join(os.path.dirname(__file__), json_filename)
|
|
83
|
+
target_families.update(
|
|
84
|
+
dict(
|
|
85
|
+
(spec["model_name"], EmbeddingModelSpec(**spec))
|
|
86
|
+
for spec in json.load(codecs.open(json_path, "r", encoding="utf-8"))
|
|
87
|
+
)
|
|
88
|
+
)
|
|
89
|
+
for model_name, model_spec in target_families.items():
|
|
90
|
+
MODEL_NAME_TO_REVISION[model_name].append(model_spec.model_revision)
|
|
91
|
+
|
|
92
|
+
del json_path
|
|
@@ -55,29 +55,8 @@ def register_custom_model():
|
|
|
55
55
|
|
|
56
56
|
|
|
57
57
|
def _install():
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
os.path.dirname(__file__), "model_spec_modelscope.json"
|
|
61
|
-
)
|
|
62
|
-
BUILTIN_IMAGE_MODELS.update(
|
|
63
|
-
dict(
|
|
64
|
-
(spec["model_name"], ImageModelFamilyV1(**spec))
|
|
65
|
-
for spec in json.load(codecs.open(_model_spec_json, "r", encoding="utf-8"))
|
|
66
|
-
)
|
|
67
|
-
)
|
|
68
|
-
for model_name, model_spec in BUILTIN_IMAGE_MODELS.items():
|
|
69
|
-
MODEL_NAME_TO_REVISION[model_name].append(model_spec.model_revision)
|
|
70
|
-
|
|
71
|
-
MODELSCOPE_IMAGE_MODELS.update(
|
|
72
|
-
dict(
|
|
73
|
-
(spec["model_name"], ImageModelFamilyV1(**spec))
|
|
74
|
-
for spec in json.load(
|
|
75
|
-
codecs.open(_model_spec_modelscope_json, "r", encoding="utf-8")
|
|
76
|
-
)
|
|
77
|
-
)
|
|
78
|
-
)
|
|
79
|
-
for model_name, model_spec in MODELSCOPE_IMAGE_MODELS.items():
|
|
80
|
-
MODEL_NAME_TO_REVISION[model_name].append(model_spec.model_revision)
|
|
58
|
+
load_model_family_from_json("model_spec.json", BUILTIN_IMAGE_MODELS)
|
|
59
|
+
load_model_family_from_json("model_spec_modelscope.json", MODELSCOPE_IMAGE_MODELS)
|
|
81
60
|
|
|
82
61
|
# register model description
|
|
83
62
|
for model_name, model_spec in chain(
|
|
@@ -90,5 +69,16 @@ def _install():
|
|
|
90
69
|
for ud_image in get_user_defined_images():
|
|
91
70
|
IMAGE_MODEL_DESCRIPTIONS.update(generate_image_description(ud_image))
|
|
92
71
|
|
|
93
|
-
|
|
94
|
-
|
|
72
|
+
|
|
73
|
+
def load_model_family_from_json(json_filename, target_families):
|
|
74
|
+
json_path = os.path.join(os.path.dirname(__file__), json_filename)
|
|
75
|
+
target_families.update(
|
|
76
|
+
dict(
|
|
77
|
+
(spec["model_name"], ImageModelFamilyV1(**spec))
|
|
78
|
+
for spec in json.load(codecs.open(json_path, "r", encoding="utf-8"))
|
|
79
|
+
)
|
|
80
|
+
)
|
|
81
|
+
for model_name, model_spec in target_families.items():
|
|
82
|
+
MODEL_NAME_TO_REVISION[model_name].append(model_spec.model_revision)
|
|
83
|
+
|
|
84
|
+
del json_path
|