xinference 1.5.0.post2__py3-none-any.whl → 1.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (137) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/restful_api.py +107 -11
  3. xinference/client/restful/restful_client.py +51 -11
  4. xinference/constants.py +5 -1
  5. xinference/core/media_interface.py +758 -0
  6. xinference/core/model.py +49 -9
  7. xinference/core/supervisor.py +1 -1
  8. xinference/core/utils.py +1 -1
  9. xinference/core/worker.py +33 -39
  10. xinference/deploy/cmdline.py +17 -0
  11. xinference/deploy/utils.py +0 -3
  12. xinference/model/audio/__init__.py +16 -27
  13. xinference/model/audio/core.py +2 -1
  14. xinference/model/audio/cosyvoice.py +4 -2
  15. xinference/model/audio/model_spec.json +63 -46
  16. xinference/model/audio/model_spec_modelscope.json +31 -14
  17. xinference/model/embedding/__init__.py +16 -24
  18. xinference/model/image/__init__.py +15 -25
  19. xinference/model/llm/__init__.py +40 -115
  20. xinference/model/llm/core.py +29 -6
  21. xinference/model/llm/llama_cpp/core.py +30 -347
  22. xinference/model/llm/llm_family.json +1674 -2203
  23. xinference/model/llm/llm_family.py +71 -7
  24. xinference/model/llm/llm_family_csghub.json +0 -32
  25. xinference/model/llm/llm_family_modelscope.json +1838 -2016
  26. xinference/model/llm/llm_family_openmind_hub.json +19 -325
  27. xinference/model/llm/lmdeploy/core.py +7 -2
  28. xinference/model/llm/mlx/core.py +23 -7
  29. xinference/model/llm/reasoning_parser.py +281 -5
  30. xinference/model/llm/sglang/core.py +39 -11
  31. xinference/model/llm/transformers/chatglm.py +9 -2
  32. xinference/model/llm/transformers/cogagent.py +10 -12
  33. xinference/model/llm/transformers/cogvlm2.py +6 -3
  34. xinference/model/llm/transformers/cogvlm2_video.py +3 -6
  35. xinference/model/llm/transformers/core.py +58 -60
  36. xinference/model/llm/transformers/deepseek_v2.py +4 -2
  37. xinference/model/llm/transformers/deepseek_vl.py +10 -4
  38. xinference/model/llm/transformers/deepseek_vl2.py +9 -4
  39. xinference/model/llm/transformers/gemma3.py +4 -5
  40. xinference/model/llm/transformers/glm4v.py +3 -21
  41. xinference/model/llm/transformers/glm_edge_v.py +3 -20
  42. xinference/model/llm/transformers/intern_vl.py +3 -6
  43. xinference/model/llm/transformers/internlm2.py +1 -1
  44. xinference/model/llm/transformers/minicpmv25.py +4 -2
  45. xinference/model/llm/transformers/minicpmv26.py +5 -3
  46. xinference/model/llm/transformers/omnilmm.py +1 -1
  47. xinference/model/llm/transformers/opt.py +1 -1
  48. xinference/model/llm/transformers/ovis2.py +302 -0
  49. xinference/model/llm/transformers/qwen-omni.py +8 -1
  50. xinference/model/llm/transformers/qwen2_audio.py +3 -1
  51. xinference/model/llm/transformers/qwen2_vl.py +5 -1
  52. xinference/model/llm/transformers/qwen_vl.py +5 -2
  53. xinference/model/llm/utils.py +96 -45
  54. xinference/model/llm/vllm/core.py +108 -24
  55. xinference/model/llm/vllm/distributed_executor.py +8 -7
  56. xinference/model/llm/vllm/xavier/allocator.py +1 -1
  57. xinference/model/llm/vllm/xavier/block_manager.py +1 -1
  58. xinference/model/llm/vllm/xavier/block_tracker.py +3 -3
  59. xinference/model/llm/vllm/xavier/executor.py +1 -1
  60. xinference/model/llm/vllm/xavier/test/test_xavier.py +2 -11
  61. xinference/model/rerank/__init__.py +13 -24
  62. xinference/model/video/__init__.py +15 -25
  63. xinference/model/video/core.py +3 -3
  64. xinference/model/video/diffusers.py +157 -13
  65. xinference/model/video/model_spec.json +100 -0
  66. xinference/model/video/model_spec_modelscope.json +104 -0
  67. xinference/thirdparty/cosyvoice/bin/average_model.py +5 -4
  68. xinference/thirdparty/cosyvoice/bin/export_jit.py +50 -20
  69. xinference/thirdparty/cosyvoice/bin/export_onnx.py +136 -51
  70. xinference/thirdparty/cosyvoice/bin/inference.py +15 -5
  71. xinference/thirdparty/cosyvoice/bin/train.py +7 -2
  72. xinference/thirdparty/cosyvoice/cli/cosyvoice.py +72 -52
  73. xinference/thirdparty/cosyvoice/cli/frontend.py +58 -58
  74. xinference/thirdparty/cosyvoice/cli/model.py +140 -155
  75. xinference/thirdparty/cosyvoice/dataset/processor.py +9 -5
  76. xinference/thirdparty/cosyvoice/flow/decoder.py +656 -54
  77. xinference/thirdparty/cosyvoice/flow/flow.py +69 -11
  78. xinference/thirdparty/cosyvoice/flow/flow_matching.py +167 -63
  79. xinference/thirdparty/cosyvoice/flow/length_regulator.py +1 -0
  80. xinference/thirdparty/cosyvoice/hifigan/discriminator.py +91 -1
  81. xinference/thirdparty/cosyvoice/hifigan/f0_predictor.py +4 -1
  82. xinference/thirdparty/cosyvoice/hifigan/generator.py +4 -1
  83. xinference/thirdparty/cosyvoice/hifigan/hifigan.py +2 -2
  84. xinference/thirdparty/cosyvoice/llm/llm.py +198 -18
  85. xinference/thirdparty/cosyvoice/transformer/embedding.py +12 -4
  86. xinference/thirdparty/cosyvoice/transformer/upsample_encoder.py +124 -21
  87. xinference/thirdparty/cosyvoice/utils/class_utils.py +13 -0
  88. xinference/thirdparty/cosyvoice/utils/common.py +1 -1
  89. xinference/thirdparty/cosyvoice/utils/file_utils.py +40 -2
  90. xinference/thirdparty/cosyvoice/utils/frontend_utils.py +7 -0
  91. xinference/thirdparty/cosyvoice/utils/mask.py +4 -0
  92. xinference/thirdparty/cosyvoice/utils/train_utils.py +5 -1
  93. xinference/thirdparty/matcha/hifigan/xutils.py +3 -3
  94. xinference/types.py +2 -71
  95. xinference/web/ui/build/asset-manifest.json +6 -6
  96. xinference/web/ui/build/index.html +1 -1
  97. xinference/web/ui/build/static/css/{main.0f6523be.css → main.337afe76.css} +2 -2
  98. xinference/web/ui/build/static/css/main.337afe76.css.map +1 -0
  99. xinference/web/ui/build/static/js/main.ae579a97.js +3 -0
  100. xinference/web/ui/build/static/js/main.ae579a97.js.map +1 -0
  101. xinference/web/ui/node_modules/.cache/babel-loader/0196a4b09e3264614e54360d5f832c46b31d964ec58296765ebff191ace6adbf.json +1 -0
  102. xinference/web/ui/node_modules/.cache/babel-loader/12e02ee790dbf57ead09a241a93bb5f893393aa36628ca741d44390e836a103f.json +1 -0
  103. xinference/web/ui/node_modules/.cache/babel-loader/18fa271456b31cded36c05c4c71c6b2b1cf4e4128c1e32f0e45d8b9f21764397.json +1 -0
  104. xinference/web/ui/node_modules/.cache/babel-loader/2fdc61dcb6a9d1fbcb44be592d0e87d8c3f21297a7327559ef5345665f8343f7.json +1 -0
  105. xinference/web/ui/node_modules/.cache/babel-loader/3d596a3e8dd6430d7ce81d164e32c31f8d47cfa5f725c328a298754d78563e14.json +1 -0
  106. xinference/web/ui/node_modules/.cache/babel-loader/5c08e2cd07809ed3e41486b16652253404cbb63a3ff8d0366ee50f57e2413cea.json +1 -0
  107. xinference/web/ui/node_modules/.cache/babel-loader/6798e126f3bc5f95a4c16a9c2ad52ffe77970c62406d83e20604dfda7ffd2247.json +1 -0
  108. xinference/web/ui/node_modules/.cache/babel-loader/8472e58a31720892d534f3febda31f746b25ec4aa60787eef34217b074e67965.json +1 -0
  109. xinference/web/ui/node_modules/.cache/babel-loader/b617f7d21a95045fc57b26a9373551740f1978a826134cbf705c3a1bf8714a93.json +1 -0
  110. xinference/web/ui/node_modules/.cache/babel-loader/c1506cb142151366074975f30fa1ff9cd6e5e978b62a4b074dfc16fe08d70d75.json +1 -0
  111. xinference/web/ui/node_modules/.cache/babel-loader/c5c7c2cd1b863ce41adff2c4737bba06eef3a1acf28288cb83d992060f6b8923.json +1 -0
  112. xinference/web/ui/src/locales/en.json +7 -4
  113. xinference/web/ui/src/locales/zh.json +7 -4
  114. {xinference-1.5.0.post2.dist-info → xinference-1.6.0.dist-info}/METADATA +56 -36
  115. {xinference-1.5.0.post2.dist-info → xinference-1.6.0.dist-info}/RECORD +120 -121
  116. {xinference-1.5.0.post2.dist-info → xinference-1.6.0.dist-info}/WHEEL +1 -1
  117. xinference/core/image_interface.py +0 -377
  118. xinference/model/llm/transformers/compression.py +0 -258
  119. xinference/model/llm/transformers/yi_vl.py +0 -239
  120. xinference/thirdparty/cosyvoice/bin/export_trt.sh +0 -9
  121. xinference/web/ui/build/static/css/main.0f6523be.css.map +0 -1
  122. xinference/web/ui/build/static/js/main.4b67a723.js +0 -3
  123. xinference/web/ui/build/static/js/main.4b67a723.js.map +0 -1
  124. xinference/web/ui/node_modules/.cache/babel-loader/0f0adb2283a8f469d097a7a0ebb754624fa52414c83b83696c41f2e6a737ceda.json +0 -1
  125. xinference/web/ui/node_modules/.cache/babel-loader/51709f5d3e53bcf19e613662ef9b91fb9174942c5518987a248348dd4e1e0e02.json +0 -1
  126. xinference/web/ui/node_modules/.cache/babel-loader/8157db83995c671eb57abc316c337f867d1dc63fb83520bb4ff351fee57dcce2.json +0 -1
  127. xinference/web/ui/node_modules/.cache/babel-loader/8f9af2979e45d4648f0cfae108363e58ee421c29a9d4e7329b6f06d9adfd4133.json +0 -1
  128. xinference/web/ui/node_modules/.cache/babel-loader/9c8b1a86e7c65b2b2599a205e30920652d6c2105f926508ef5bcf29a3ef4ce76.json +0 -1
  129. xinference/web/ui/node_modules/.cache/babel-loader/b8551e9775a01b28ae674125c688febe763732ea969ae344512e64ea01bf632e.json +0 -1
  130. xinference/web/ui/node_modules/.cache/babel-loader/e4ba658c6b3b0490910acdae0c535a892257efb61539a24adf8038fc653bd22f.json +0 -1
  131. xinference/web/ui/node_modules/.cache/babel-loader/efe7cd132c27a8f9fd5352a394c491fd5fb0da0348cf9fcbd923164a32365eab.json +0 -1
  132. xinference/web/ui/node_modules/.cache/babel-loader/f04f666b77b44d7be3e16034d6b0074de2ba9c254f1fae15222b3148608fa8b3.json +0 -1
  133. xinference/web/ui/node_modules/.cache/babel-loader/f199e8173f6409a5802ed44acb95f218388131136504b2e9132129e150c92f9a.json +0 -1
  134. /xinference/web/ui/build/static/js/{main.4b67a723.js.LICENSE.txt → main.ae579a97.js.LICENSE.txt} +0 -0
  135. {xinference-1.5.0.post2.dist-info → xinference-1.6.0.dist-info}/entry_points.txt +0 -0
  136. {xinference-1.5.0.post2.dist-info → xinference-1.6.0.dist-info}/licenses/LICENSE +0 -0
  137. {xinference-1.5.0.post2.dist-info → xinference-1.6.0.dist-info}/top_level.txt +0 -0
@@ -4,7 +4,7 @@
4
4
  "model_family": "whisper",
5
5
  "model_id": "openai/whisper-tiny",
6
6
  "model_revision": "167c219b21f11ef214220b8fdb7536b8a88c2475",
7
- "model_ability": "audio-to-text",
7
+ "model_ability": ["audio2text"],
8
8
  "multilingual": true
9
9
  },
10
10
  {
@@ -12,7 +12,7 @@
12
12
  "model_family": "whisper",
13
13
  "model_id": "openai/whisper-tiny.en",
14
14
  "model_revision": "87c7102498dcde7456f24cfd30239ca606ed9063",
15
- "model_ability": "audio-to-text",
15
+ "model_ability": ["audio2text"],
16
16
  "multilingual": false
17
17
  },
18
18
  {
@@ -20,7 +20,7 @@
20
20
  "model_family": "whisper",
21
21
  "model_id": "openai/whisper-base",
22
22
  "model_revision": "8c1db9b51951100007a96a525d83a8ec81b3c237",
23
- "model_ability": "audio-to-text",
23
+ "model_ability": ["audio2text"],
24
24
  "multilingual": true
25
25
  },
26
26
  {
@@ -28,7 +28,7 @@
28
28
  "model_family": "whisper",
29
29
  "model_id": "openai/whisper-base.en",
30
30
  "model_revision": "911407f4214e0e1d82085af863093ec0b66f9cd6",
31
- "model_ability": "audio-to-text",
31
+ "model_ability": ["audio2text"],
32
32
  "multilingual": false
33
33
  },
34
34
  {
@@ -36,7 +36,7 @@
36
36
  "model_family": "whisper",
37
37
  "model_id": "openai/whisper-small",
38
38
  "model_revision": "998cb1a777c20db53d6033a61b977ed4c3792cac",
39
- "model_ability": "audio-to-text",
39
+ "model_ability": ["audio2text"],
40
40
  "multilingual": true
41
41
  },
42
42
  {
@@ -44,7 +44,7 @@
44
44
  "model_family": "whisper",
45
45
  "model_id": "openai/whisper-small.en",
46
46
  "model_revision": "e8727524f962ee844a7319d92be39ac1bd25655a",
47
- "model_ability": "audio-to-text",
47
+ "model_ability": ["audio2text"],
48
48
  "multilingual": false
49
49
  },
50
50
  {
@@ -52,7 +52,7 @@
52
52
  "model_family": "whisper",
53
53
  "model_id": "openai/whisper-medium",
54
54
  "model_revision": "16688beb1294bedd0a6f5cd86fe7eec57bce41ed",
55
- "model_ability": "audio-to-text",
55
+ "model_ability": ["audio2text"],
56
56
  "multilingual": true
57
57
  },
58
58
  {
@@ -60,7 +60,7 @@
60
60
  "model_family": "whisper",
61
61
  "model_id": "openai/whisper-medium.en",
62
62
  "model_revision": "2e98eb6279edf5095af0c8dedb36bdec0acd172b",
63
- "model_ability": "audio-to-text",
63
+ "model_ability": ["audio2text"],
64
64
  "multilingual": false
65
65
  },
66
66
  {
@@ -68,7 +68,7 @@
68
68
  "model_family": "whisper",
69
69
  "model_id": "openai/whisper-large-v3",
70
70
  "model_revision": "6cdf07a7e3ec3806e5d55f787915b85d4cd020b1",
71
- "model_ability": "audio-to-text",
71
+ "model_ability": ["audio2text"],
72
72
  "multilingual": true
73
73
  },
74
74
  {
@@ -76,7 +76,7 @@
76
76
  "model_family": "whisper",
77
77
  "model_id": "openai/whisper-large-v3-turbo",
78
78
  "model_revision": "41f01f3fe87f28c78e2fbf8b568835947dd65ed9",
79
- "model_ability": "audio-to-text",
79
+ "model_ability": ["audio2text"],
80
80
  "multilingual": true
81
81
  },
82
82
  {
@@ -84,7 +84,7 @@
84
84
  "model_family": "whisper",
85
85
  "model_id": "BELLE-2/Belle-distilwhisper-large-v2-zh",
86
86
  "model_revision": "ed25d13498fa5bac758b2fc479435b698532dfe8",
87
- "model_ability": "audio-to-text",
87
+ "model_ability": ["audio2text"],
88
88
  "multilingual": false
89
89
  },
90
90
  {
@@ -92,7 +92,7 @@
92
92
  "model_family": "whisper",
93
93
  "model_id": "BELLE-2/Belle-whisper-large-v2-zh",
94
94
  "model_revision": "ec5bd5d78598545b7585814edde86dac2002b5b9",
95
- "model_ability": "audio-to-text",
95
+ "model_ability": ["audio2text"],
96
96
  "multilingual": false
97
97
  },
98
98
  {
@@ -100,14 +100,14 @@
100
100
  "model_family": "whisper",
101
101
  "model_id": "BELLE-2/Belle-whisper-large-v3-zh",
102
102
  "model_revision": "3bebc7247696b39f5ab9ed22db426943ac33f600",
103
- "model_ability": "audio-to-text",
103
+ "model_ability": ["audio2text"],
104
104
  "multilingual": false
105
105
  },
106
106
  {
107
107
  "model_name": "whisper-tiny-mlx",
108
108
  "model_family": "whisper",
109
109
  "model_id": "mlx-community/whisper-tiny",
110
- "model_ability": "audio-to-text",
110
+ "model_ability": ["audio2text"],
111
111
  "multilingual": true,
112
112
  "engine": "mlx"
113
113
  },
@@ -115,7 +115,7 @@
115
115
  "model_name": "whisper-tiny.en-mlx",
116
116
  "model_family": "whisper",
117
117
  "model_id": "mlx-community/whisper-tiny.en-mlx",
118
- "model_ability": "audio-to-text",
118
+ "model_ability": ["audio2text"],
119
119
  "multilingual": false,
120
120
  "engine": "mlx"
121
121
  },
@@ -123,7 +123,7 @@
123
123
  "model_name": "whisper-base-mlx",
124
124
  "model_family": "whisper",
125
125
  "model_id": "mlx-community/whisper-base-mlx",
126
- "model_ability": "audio-to-text",
126
+ "model_ability": ["audio2text"],
127
127
  "multilingual": true,
128
128
  "engine": "mlx"
129
129
  },
@@ -131,7 +131,7 @@
131
131
  "model_name": "whisper-base.en-mlx",
132
132
  "model_family": "whisper",
133
133
  "model_id": "mlx-community/whisper-base.en-mlx",
134
- "model_ability": "audio-to-text",
134
+ "model_ability": ["audio2text"],
135
135
  "multilingual": false,
136
136
  "engine": "mlx"
137
137
  },
@@ -139,7 +139,7 @@
139
139
  "model_name": "whisper-small-mlx",
140
140
  "model_family": "whisper",
141
141
  "model_id": "mlx-community/whisper-small-mlx",
142
- "model_ability": "audio-to-text",
142
+ "model_ability": ["audio2text"],
143
143
  "multilingual": true,
144
144
  "engine": "mlx"
145
145
  },
@@ -147,7 +147,7 @@
147
147
  "model_name": "whisper-small.en-mlx",
148
148
  "model_family": "whisper",
149
149
  "model_id": "mlx-community/whisper-small.en-mlx",
150
- "model_ability": "audio-to-text",
150
+ "model_ability": ["audio2text"],
151
151
  "multilingual": false,
152
152
  "engine": "mlx"
153
153
  },
@@ -155,7 +155,7 @@
155
155
  "model_name": "whisper-medium-mlx",
156
156
  "model_family": "whisper",
157
157
  "model_id": "mlx-community/whisper-medium-mlx",
158
- "model_ability": "audio-to-text",
158
+ "model_ability": ["audio2text"],
159
159
  "multilingual": true,
160
160
  "engine": "mlx"
161
161
  },
@@ -163,7 +163,7 @@
163
163
  "model_name": "whisper-medium.en-mlx",
164
164
  "model_family": "whisper",
165
165
  "model_id": "mlx-community/whisper-medium.en-mlx",
166
- "model_ability": "audio-to-text",
166
+ "model_ability": ["audio2text"],
167
167
  "multilingual": false,
168
168
  "engine": "mlx"
169
169
  },
@@ -171,7 +171,7 @@
171
171
  "model_name": "whisper-large-v3-mlx",
172
172
  "model_family": "whisper",
173
173
  "model_id": "mlx-community/whisper-large-v3-mlx",
174
- "model_ability": "audio-to-text",
174
+ "model_ability": ["audio2text"],
175
175
  "multilingual": true,
176
176
  "engine": "mlx"
177
177
  },
@@ -179,7 +179,7 @@
179
179
  "model_name": "whisper-large-v3-turbo-mlx",
180
180
  "model_family": "whisper",
181
181
  "model_id": "mlx-community/whisper-large-v3-turbo",
182
- "model_ability": "audio-to-text",
182
+ "model_ability": ["audio2text"],
183
183
  "multilingual": true,
184
184
  "engine": "mlx"
185
185
  },
@@ -188,7 +188,7 @@
188
188
  "model_family": "funasr",
189
189
  "model_id": "FunAudioLLM/SenseVoiceSmall",
190
190
  "model_revision": "3eb3b4eeffc2f2dde6051b853983753db33e35c3",
191
- "model_ability": "audio-to-text",
191
+ "model_ability": ["audio2text"],
192
192
  "multilingual": true,
193
193
  "default_model_config": {
194
194
  "vad_model": "fsmn-vad",
@@ -208,7 +208,7 @@
208
208
  "model_family": "funasr",
209
209
  "model_id": "funasr/paraformer-zh",
210
210
  "model_revision": "5ed094cdfc8f6a9b6b022bd08bc904ef862bc79e",
211
- "model_ability": "audio-to-text",
211
+ "model_ability": ["audio2text"],
212
212
  "multilingual": false,
213
213
  "default_model_config": {
214
214
  "vad_model": "fsmn-vad",
@@ -223,7 +223,7 @@
223
223
  "model_family": "ChatTTS",
224
224
  "model_id": "2Noise/ChatTTS",
225
225
  "model_revision": "1a3c04a8b0651689bd9242fbb55b1f4b5a9aef84",
226
- "model_ability": "text-to-audio",
226
+ "model_ability": ["text2audio"],
227
227
  "multilingual": true
228
228
  },
229
229
  {
@@ -231,7 +231,7 @@
231
231
  "model_family": "CosyVoice",
232
232
  "model_id": "FunAudioLLM/CosyVoice-300M",
233
233
  "model_revision": "39c4e13d46bd4dfb840d214547623e5fcd2428e2",
234
- "model_ability": "text-to-audio",
234
+ "model_ability": ["text2audio"],
235
235
  "multilingual": true
236
236
  },
237
237
  {
@@ -239,7 +239,7 @@
239
239
  "model_family": "CosyVoice",
240
240
  "model_id": "FunAudioLLM/CosyVoice-300M-SFT",
241
241
  "model_revision": "096a5cff8d497fabb3dec2756a200f3688457a1b",
242
- "model_ability": "text-to-audio",
242
+ "model_ability": ["text2audio"],
243
243
  "multilingual": true
244
244
  },
245
245
  {
@@ -247,23 +247,40 @@
247
247
  "model_family": "CosyVoice",
248
248
  "model_id": "FunAudioLLM/CosyVoice-300M-Instruct",
249
249
  "model_revision": "ba5265d9a3169c1fedce145122c9dd4bc24e062c",
250
- "model_ability": "text-to-audio",
250
+ "model_ability": ["text2audio"],
251
251
  "multilingual": true
252
252
  },
253
253
  {
254
254
  "model_name": "CosyVoice2-0.5B",
255
255
  "model_family": "CosyVoice",
256
- "model_id": "mrfakename/CosyVoice2-0.5B",
257
- "model_revision": "5676baabc8a76dc93ef60a88bbd2420deaa2f644",
258
- "model_ability": "text-to-audio",
259
- "multilingual": true
256
+ "model_id": "JunHowie/CosyVoice2-0.5B",
257
+ "model_revision": "7ac9e9a026aec35efe48cde1196eaad6a00ad5f2",
258
+ "model_ability": ["text2audio"],
259
+ "multilingual": true,
260
+ "virtualenv": {
261
+ "packages": [
262
+ "tiktoken",
263
+ "lightning>=2.0.0",
264
+ "hydra-core>=1.3.2",
265
+ "inflect",
266
+ "conformer",
267
+ "diffusers==0.29.0",
268
+ "gdown",
269
+ "pyarrow",
270
+ "HyperPyYAML",
271
+ "onnxruntime>=1.16.0",
272
+ "pyworld>=0.3.4",
273
+ "numpy==1.26.4",
274
+ "#system_torch#"
275
+ ]
276
+ }
260
277
  },
261
278
  {
262
279
  "model_name": "FishSpeech-1.5",
263
280
  "model_family": "FishAudio",
264
281
  "model_id": "fishaudio/fish-speech-1.5",
265
282
  "model_revision": "268b6ec86243dd683bc78dab7e9a6cedf9191f2a",
266
- "model_ability": "text-to-audio",
283
+ "model_ability": ["text2audio"],
267
284
  "multilingual": true
268
285
  },
269
286
  {
@@ -271,7 +288,7 @@
271
288
  "model_family": "F5-TTS",
272
289
  "model_id": "SWivid/F5-TTS",
273
290
  "model_revision": "4dcc16f297f2ff98a17b3726b16f5de5a5e45672",
274
- "model_ability": "text-to-audio",
291
+ "model_ability": ["text2audio"],
275
292
  "multilingual": true
276
293
  },
277
294
  {
@@ -279,7 +296,7 @@
279
296
  "model_family": "F5-TTS-MLX",
280
297
  "model_id": "lucasnewman/f5-tts-mlx",
281
298
  "model_revision": "7642bb232e3fcacf92c51c786edebb8624da6b93",
282
- "model_ability": "text-to-audio",
299
+ "model_ability": ["text2audio"],
283
300
  "multilingual": true
284
301
  },
285
302
  {
@@ -287,7 +304,7 @@
287
304
  "model_family": "MeloTTS",
288
305
  "model_id": "myshell-ai/MeloTTS-English",
289
306
  "model_revision": "bb4fb7346d566d277ba8c8c7dbfdf6786139b8ef",
290
- "model_ability": "text-to-audio",
307
+ "model_ability": ["text2audio"],
291
308
  "multilingual": false,
292
309
  "language": "EN"
293
310
  },
@@ -296,7 +313,7 @@
296
313
  "model_family": "MeloTTS",
297
314
  "model_id": "myshell-ai/MeloTTS-English-v2",
298
315
  "model_revision": "a53e3509c4ee4ff16d79272feb2474ff864e18f3",
299
- "model_ability": "text-to-audio",
316
+ "model_ability": ["text2audio"],
300
317
  "multilingual": false,
301
318
  "language": "EN"
302
319
  },
@@ -305,7 +322,7 @@
305
322
  "model_family": "MeloTTS",
306
323
  "model_id": "myshell-ai/MeloTTS-English-v3",
307
324
  "model_revision": "f7c4a35392c0e9be24a755f1edb4c3f63040f759",
308
- "model_ability": "text-to-audio",
325
+ "model_ability": ["text2audio"],
309
326
  "multilingual": false,
310
327
  "language": "EN"
311
328
  },
@@ -314,7 +331,7 @@
314
331
  "model_family": "MeloTTS",
315
332
  "model_id": "myshell-ai/MeloTTS-French",
316
333
  "model_revision": "1e9bf590262392d8bffb679b0a3b0c16b0f9fdaf",
317
- "model_ability": "text-to-audio",
334
+ "model_ability": ["text2audio"],
318
335
  "multilingual": false,
319
336
  "language": "FR"
320
337
  },
@@ -323,7 +340,7 @@
323
340
  "model_family": "MeloTTS",
324
341
  "model_id": "myshell-ai/MeloTTS-Japanese",
325
342
  "model_revision": "367f8795464b531b4e97c1515bddfc1243e60891",
326
- "model_ability": "text-to-audio",
343
+ "model_ability": ["text2audio"],
327
344
  "multilingual": false,
328
345
  "language": "JP"
329
346
  },
@@ -332,7 +349,7 @@
332
349
  "model_family": "MeloTTS",
333
350
  "model_id": "myshell-ai/MeloTTS-Spanish",
334
351
  "model_revision": "dbb5496df39d11a66c1d5f5a9ca357c3c9fb95fb",
335
- "model_ability": "text-to-audio",
352
+ "model_ability": ["text2audio"],
336
353
  "multilingual": false,
337
354
  "language": "ES"
338
355
  },
@@ -341,7 +358,7 @@
341
358
  "model_family": "MeloTTS",
342
359
  "model_id": "myshell-ai/MeloTTS-Chinese",
343
360
  "model_revision": "af5d207a364ea4208c6f589c89f57f88414bdd16",
344
- "model_ability": "text-to-audio",
361
+ "model_ability": ["text2audio"],
345
362
  "multilingual": false,
346
363
  "language": "ZH"
347
364
  },
@@ -350,7 +367,7 @@
350
367
  "model_family": "MeloTTS",
351
368
  "model_id": "myshell-ai/MeloTTS-Korean",
352
369
  "model_revision": "0207e5adfc90129a51b6b03d89be6d84360ed323",
353
- "model_ability": "text-to-audio",
370
+ "model_ability": ["text2audio"],
354
371
  "multilingual": false,
355
372
  "language": "KR"
356
373
  },
@@ -359,7 +376,7 @@
359
376
  "model_family": "Kokoro",
360
377
  "model_id": "hexgrad/Kokoro-82M",
361
378
  "model_revision": "7884269d6fd3f9beabc271b6f1308e5699281fa9",
362
- "model_ability": "text-to-audio",
379
+ "model_ability": ["text2audio"],
363
380
  "multilingual": true
364
381
  },
365
382
  {
@@ -367,7 +384,7 @@
367
384
  "model_family": "MegaTTS",
368
385
  "model_id": "ByteDance/MegaTTS3",
369
386
  "model_revision": "409a7002b006d80f0730fca6f80441b08c10e738",
370
- "model_ability": "text-to-audio",
387
+ "model_ability": ["text2audio"],
371
388
  "multilingual": true
372
389
  }
373
390
  ]
@@ -5,7 +5,7 @@
5
5
  "model_hub": "modelscope",
6
6
  "model_id": "AI-ModelScope/whisper-large-v3",
7
7
  "model_revision": "master",
8
- "model_ability": "audio-to-text",
8
+ "model_ability": ["audio2text"],
9
9
  "multilingual": true
10
10
  },
11
11
  {
@@ -14,7 +14,7 @@
14
14
  "model_hub": "modelscope",
15
15
  "model_id": "AI-ModelScope/whisper-large-v3-turbo",
16
16
  "model_revision": "master",
17
- "model_ability": "audio-to-text",
17
+ "model_ability": ["audio2text"],
18
18
  "multilingual": true
19
19
  },
20
20
  {
@@ -23,7 +23,7 @@
23
23
  "model_hub": "modelscope",
24
24
  "model_id": "Xorbits/Belle-whisper-large-v3-zh",
25
25
  "model_revision": "master",
26
- "model_ability": "audio-to-text",
26
+ "model_ability": ["audio2text"],
27
27
  "multilingual": false
28
28
  },
29
29
  {
@@ -32,7 +32,7 @@
32
32
  "model_hub": "modelscope",
33
33
  "model_id": "iic/SenseVoiceSmall",
34
34
  "model_revision": "master",
35
- "model_ability": "audio-to-text",
35
+ "model_ability": ["audio2text"],
36
36
  "multilingual": true,
37
37
  "default_model_config": {
38
38
  "vad_model": "fsmn-vad",
@@ -53,7 +53,7 @@
53
53
  "model_hub": "modelscope",
54
54
  "model_id": "iic/speech_paraformer-large-vad-punc-spk_asr_nat-zh-cn",
55
55
  "model_revision": "master",
56
- "model_ability": "audio-to-text",
56
+ "model_ability": ["audio2text"],
57
57
  "multilingual": false,
58
58
  "default_model_config": {
59
59
  "vad_model": "fsmn-vad",
@@ -69,7 +69,7 @@
69
69
  "model_hub": "modelscope",
70
70
  "model_id": "AI-ModelScope/ChatTTS",
71
71
  "model_revision": "master",
72
- "model_ability": "text-to-audio",
72
+ "model_ability": ["text2audio"],
73
73
  "multilingual": true
74
74
  },
75
75
  {
@@ -78,7 +78,7 @@
78
78
  "model_hub": "modelscope",
79
79
  "model_id": "iic/CosyVoice-300M",
80
80
  "model_revision": "master",
81
- "model_ability": "text-to-audio",
81
+ "model_ability": ["text2audio"],
82
82
  "multilingual": true
83
83
  },
84
84
  {
@@ -87,7 +87,7 @@
87
87
  "model_hub": "modelscope",
88
88
  "model_id": "iic/CosyVoice-300M-SFT",
89
89
  "model_revision": "master",
90
- "model_ability": "text-to-audio",
90
+ "model_ability": ["text2audio"],
91
91
  "multilingual": true
92
92
  },
93
93
  {
@@ -96,7 +96,7 @@
96
96
  "model_hub": "modelscope",
97
97
  "model_id": "iic/CosyVoice-300M-Instruct",
98
98
  "model_revision": "master",
99
- "model_ability": "text-to-audio",
99
+ "model_ability": ["text2audio"],
100
100
  "multilingual": true
101
101
  },
102
102
  {
@@ -105,8 +105,25 @@
105
105
  "model_hub": "modelscope",
106
106
  "model_id": "iic/CosyVoice2-0.5B",
107
107
  "model_revision": "master",
108
- "model_ability": "text-to-audio",
109
- "multilingual": true
108
+ "model_ability": ["text2audio"],
109
+ "multilingual": true,
110
+ "virtualenv": {
111
+ "packages": [
112
+ "tiktoken",
113
+ "lightning>=2.0.0",
114
+ "hydra-core>=1.3.2",
115
+ "inflect",
116
+ "conformer",
117
+ "diffusers==0.29.0",
118
+ "gdown",
119
+ "pyarrow",
120
+ "HyperPyYAML",
121
+ "onnxruntime>=1.16.0",
122
+ "pyworld>=0.3.4",
123
+ "numpy==1.26.4",
124
+ "#system_torch#"
125
+ ]
126
+ }
110
127
  },
111
128
  {
112
129
  "model_name": "F5-TTS",
@@ -114,7 +131,7 @@
114
131
  "model_hub": "modelscope",
115
132
  "model_id": "SWivid/F5-TTS_Emilia-ZH-EN",
116
133
  "model_revision": "master",
117
- "model_ability": "text-to-audio",
134
+ "model_ability": ["text2audio"],
118
135
  "multilingual": true
119
136
  },
120
137
  {
@@ -123,7 +140,7 @@
123
140
  "model_hub": "modelscope",
124
141
  "model_id": "AI-ModelScope/Kokoro-82M",
125
142
  "model_revision": "master",
126
- "model_ability": "text-to-audio",
143
+ "model_ability": ["text2audio"],
127
144
  "multilingual": true
128
145
  },
129
146
  {
@@ -132,7 +149,7 @@
132
149
  "model_hub": "modelscope",
133
150
  "model_id": "ByteDance/MegaTTS3",
134
151
  "model_revision": "master",
135
- "model_ability": "text-to-audio",
152
+ "model_ability": ["text2audio"],
136
153
  "multilingual": true
137
154
  }
138
155
  ]
@@ -56,29 +56,10 @@ def register_custom_model():
56
56
 
57
57
 
58
58
  def _install():
59
- _model_spec_json = os.path.join(os.path.dirname(__file__), "model_spec.json")
60
- _model_spec_modelscope_json = os.path.join(
61
- os.path.dirname(__file__), "model_spec_modelscope.json"
59
+ load_model_family_from_json("model_spec.json", BUILTIN_EMBEDDING_MODELS)
60
+ load_model_family_from_json(
61
+ "model_spec_modelscope.json", MODELSCOPE_EMBEDDING_MODELS
62
62
  )
63
- BUILTIN_EMBEDDING_MODELS.update(
64
- dict(
65
- (spec["model_name"], EmbeddingModelSpec(**spec))
66
- for spec in json.load(codecs.open(_model_spec_json, "r", encoding="utf-8"))
67
- )
68
- )
69
- for model_name, model_spec in BUILTIN_EMBEDDING_MODELS.items():
70
- MODEL_NAME_TO_REVISION[model_name].append(model_spec.model_revision)
71
-
72
- MODELSCOPE_EMBEDDING_MODELS.update(
73
- dict(
74
- (spec["model_name"], EmbeddingModelSpec(**spec))
75
- for spec in json.load(
76
- codecs.open(_model_spec_modelscope_json, "r", encoding="utf-8")
77
- )
78
- )
79
- )
80
- for model_name, model_spec in MODELSCOPE_EMBEDDING_MODELS.items():
81
- MODEL_NAME_TO_REVISION[model_name].append(model_spec.model_revision)
82
63
 
83
64
  # register model description after recording model revision
84
65
  for model_spec_info in [BUILTIN_EMBEDDING_MODELS, MODELSCOPE_EMBEDDING_MODELS]:
@@ -96,5 +77,16 @@ def _install():
96
77
  generate_embedding_description(ud_embedding)
97
78
  )
98
79
 
99
- del _model_spec_json
100
- del _model_spec_modelscope_json
80
+
81
+ def load_model_family_from_json(json_filename, target_families):
82
+ json_path = os.path.join(os.path.dirname(__file__), json_filename)
83
+ target_families.update(
84
+ dict(
85
+ (spec["model_name"], EmbeddingModelSpec(**spec))
86
+ for spec in json.load(codecs.open(json_path, "r", encoding="utf-8"))
87
+ )
88
+ )
89
+ for model_name, model_spec in target_families.items():
90
+ MODEL_NAME_TO_REVISION[model_name].append(model_spec.model_revision)
91
+
92
+ del json_path
@@ -55,29 +55,8 @@ def register_custom_model():
55
55
 
56
56
 
57
57
  def _install():
58
- _model_spec_json = os.path.join(os.path.dirname(__file__), "model_spec.json")
59
- _model_spec_modelscope_json = os.path.join(
60
- os.path.dirname(__file__), "model_spec_modelscope.json"
61
- )
62
- BUILTIN_IMAGE_MODELS.update(
63
- dict(
64
- (spec["model_name"], ImageModelFamilyV1(**spec))
65
- for spec in json.load(codecs.open(_model_spec_json, "r", encoding="utf-8"))
66
- )
67
- )
68
- for model_name, model_spec in BUILTIN_IMAGE_MODELS.items():
69
- MODEL_NAME_TO_REVISION[model_name].append(model_spec.model_revision)
70
-
71
- MODELSCOPE_IMAGE_MODELS.update(
72
- dict(
73
- (spec["model_name"], ImageModelFamilyV1(**spec))
74
- for spec in json.load(
75
- codecs.open(_model_spec_modelscope_json, "r", encoding="utf-8")
76
- )
77
- )
78
- )
79
- for model_name, model_spec in MODELSCOPE_IMAGE_MODELS.items():
80
- MODEL_NAME_TO_REVISION[model_name].append(model_spec.model_revision)
58
+ load_model_family_from_json("model_spec.json", BUILTIN_IMAGE_MODELS)
59
+ load_model_family_from_json("model_spec_modelscope.json", MODELSCOPE_IMAGE_MODELS)
81
60
 
82
61
  # register model description
83
62
  for model_name, model_spec in chain(
@@ -90,5 +69,16 @@ def _install():
90
69
  for ud_image in get_user_defined_images():
91
70
  IMAGE_MODEL_DESCRIPTIONS.update(generate_image_description(ud_image))
92
71
 
93
- del _model_spec_json
94
- del _model_spec_modelscope_json
72
+
73
+ def load_model_family_from_json(json_filename, target_families):
74
+ json_path = os.path.join(os.path.dirname(__file__), json_filename)
75
+ target_families.update(
76
+ dict(
77
+ (spec["model_name"], ImageModelFamilyV1(**spec))
78
+ for spec in json.load(codecs.open(json_path, "r", encoding="utf-8"))
79
+ )
80
+ )
81
+ for model_name, model_spec in target_families.items():
82
+ MODEL_NAME_TO_REVISION[model_name].append(model_spec.model_revision)
83
+
84
+ del json_path