xinference 1.5.0.post2__py3-none-any.whl → 1.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (89) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/restful_api.py +10 -3
  3. xinference/constants.py +5 -1
  4. xinference/core/supervisor.py +1 -1
  5. xinference/core/utils.py +1 -1
  6. xinference/core/worker.py +2 -2
  7. xinference/deploy/cmdline.py +17 -0
  8. xinference/model/audio/core.py +1 -1
  9. xinference/model/audio/model_spec.json +43 -43
  10. xinference/model/audio/model_spec_modelscope.json +13 -13
  11. xinference/model/llm/__init__.py +3 -5
  12. xinference/model/llm/core.py +14 -0
  13. xinference/model/llm/llama_cpp/core.py +15 -4
  14. xinference/model/llm/llm_family.json +3251 -4304
  15. xinference/model/llm/llm_family.py +62 -6
  16. xinference/model/llm/llm_family_csghub.json +0 -32
  17. xinference/model/llm/llm_family_modelscope.json +1161 -1789
  18. xinference/model/llm/llm_family_openmind_hub.json +19 -325
  19. xinference/model/llm/lmdeploy/core.py +7 -2
  20. xinference/model/llm/mlx/core.py +19 -6
  21. xinference/model/llm/sglang/core.py +25 -10
  22. xinference/model/llm/transformers/chatglm.py +8 -1
  23. xinference/model/llm/transformers/cogagent.py +10 -12
  24. xinference/model/llm/transformers/cogvlm2.py +6 -3
  25. xinference/model/llm/transformers/cogvlm2_video.py +3 -6
  26. xinference/model/llm/transformers/core.py +50 -58
  27. xinference/model/llm/transformers/deepseek_v2.py +4 -2
  28. xinference/model/llm/transformers/deepseek_vl.py +10 -4
  29. xinference/model/llm/transformers/deepseek_vl2.py +9 -4
  30. xinference/model/llm/transformers/gemma3.py +4 -5
  31. xinference/model/llm/transformers/glm4v.py +2 -20
  32. xinference/model/llm/transformers/glm_edge_v.py +3 -20
  33. xinference/model/llm/transformers/intern_vl.py +3 -6
  34. xinference/model/llm/transformers/internlm2.py +1 -1
  35. xinference/model/llm/transformers/minicpmv25.py +4 -2
  36. xinference/model/llm/transformers/minicpmv26.py +5 -3
  37. xinference/model/llm/transformers/omnilmm.py +1 -1
  38. xinference/model/llm/transformers/opt.py +1 -1
  39. xinference/model/llm/transformers/ovis2.py +302 -0
  40. xinference/model/llm/transformers/qwen-omni.py +2 -1
  41. xinference/model/llm/transformers/qwen2_audio.py +3 -1
  42. xinference/model/llm/transformers/qwen2_vl.py +5 -1
  43. xinference/model/llm/transformers/qwen_vl.py +5 -2
  44. xinference/model/llm/utils.py +28 -0
  45. xinference/model/llm/vllm/core.py +73 -9
  46. xinference/model/llm/vllm/distributed_executor.py +8 -7
  47. xinference/model/llm/vllm/xavier/allocator.py +1 -1
  48. xinference/model/llm/vllm/xavier/block_manager.py +1 -1
  49. xinference/model/llm/vllm/xavier/block_tracker.py +3 -3
  50. xinference/model/llm/vllm/xavier/executor.py +1 -1
  51. xinference/model/llm/vllm/xavier/test/test_xavier.py +1 -1
  52. xinference/model/video/diffusers.py +30 -3
  53. xinference/model/video/model_spec.json +46 -0
  54. xinference/model/video/model_spec_modelscope.json +48 -0
  55. xinference/types.py +2 -0
  56. xinference/web/ui/build/asset-manifest.json +6 -6
  57. xinference/web/ui/build/index.html +1 -1
  58. xinference/web/ui/build/static/css/{main.0f6523be.css → main.337afe76.css} +2 -2
  59. xinference/web/ui/build/static/css/main.337afe76.css.map +1 -0
  60. xinference/web/ui/build/static/js/main.91e77b5c.js +3 -0
  61. xinference/web/ui/build/static/js/main.91e77b5c.js.map +1 -0
  62. xinference/web/ui/node_modules/.cache/babel-loader/5c08e2cd07809ed3e41486b16652253404cbb63a3ff8d0366ee50f57e2413cea.json +1 -0
  63. xinference/web/ui/node_modules/.cache/babel-loader/5e6edb0fb87e3798f142e9abf8dd2dc46bab33a60d31dff525797c0c99887097.json +1 -0
  64. xinference/web/ui/node_modules/.cache/babel-loader/6087820be1bd5c02c42dff797e7df365448ef35ab26dd5d6bd33e967e05cbfd4.json +1 -0
  65. xinference/web/ui/node_modules/.cache/babel-loader/6798e126f3bc5f95a4c16a9c2ad52ffe77970c62406d83e20604dfda7ffd2247.json +1 -0
  66. xinference/web/ui/node_modules/.cache/babel-loader/b617f7d21a95045fc57b26a9373551740f1978a826134cbf705c3a1bf8714a93.json +1 -0
  67. xinference/web/ui/node_modules/.cache/babel-loader/c1506cb142151366074975f30fa1ff9cd6e5e978b62a4b074dfc16fe08d70d75.json +1 -0
  68. xinference/web/ui/node_modules/.cache/babel-loader/c5c7c2cd1b863ce41adff2c4737bba06eef3a1acf28288cb83d992060f6b8923.json +1 -0
  69. xinference/web/ui/src/locales/en.json +1 -0
  70. xinference/web/ui/src/locales/zh.json +1 -0
  71. {xinference-1.5.0.post2.dist-info → xinference-1.5.1.dist-info}/METADATA +1 -1
  72. {xinference-1.5.0.post2.dist-info → xinference-1.5.1.dist-info}/RECORD +77 -78
  73. {xinference-1.5.0.post2.dist-info → xinference-1.5.1.dist-info}/WHEEL +1 -1
  74. xinference/model/llm/transformers/compression.py +0 -258
  75. xinference/model/llm/transformers/yi_vl.py +0 -239
  76. xinference/web/ui/build/static/css/main.0f6523be.css.map +0 -1
  77. xinference/web/ui/build/static/js/main.4b67a723.js +0 -3
  78. xinference/web/ui/build/static/js/main.4b67a723.js.map +0 -1
  79. xinference/web/ui/node_modules/.cache/babel-loader/51709f5d3e53bcf19e613662ef9b91fb9174942c5518987a248348dd4e1e0e02.json +0 -1
  80. xinference/web/ui/node_modules/.cache/babel-loader/8f9af2979e45d4648f0cfae108363e58ee421c29a9d4e7329b6f06d9adfd4133.json +0 -1
  81. xinference/web/ui/node_modules/.cache/babel-loader/9c8b1a86e7c65b2b2599a205e30920652d6c2105f926508ef5bcf29a3ef4ce76.json +0 -1
  82. xinference/web/ui/node_modules/.cache/babel-loader/b8551e9775a01b28ae674125c688febe763732ea969ae344512e64ea01bf632e.json +0 -1
  83. xinference/web/ui/node_modules/.cache/babel-loader/e4ba658c6b3b0490910acdae0c535a892257efb61539a24adf8038fc653bd22f.json +0 -1
  84. xinference/web/ui/node_modules/.cache/babel-loader/efe7cd132c27a8f9fd5352a394c491fd5fb0da0348cf9fcbd923164a32365eab.json +0 -1
  85. xinference/web/ui/node_modules/.cache/babel-loader/f199e8173f6409a5802ed44acb95f218388131136504b2e9132129e150c92f9a.json +0 -1
  86. /xinference/web/ui/build/static/js/{main.4b67a723.js.LICENSE.txt → main.91e77b5c.js.LICENSE.txt} +0 -0
  87. {xinference-1.5.0.post2.dist-info → xinference-1.5.1.dist-info}/entry_points.txt +0 -0
  88. {xinference-1.5.0.post2.dist-info → xinference-1.5.1.dist-info}/licenses/LICENSE +0 -0
  89. {xinference-1.5.0.post2.dist-info → xinference-1.5.1.dist-info}/top_level.txt +0 -0
xinference/_version.py CHANGED
@@ -8,11 +8,11 @@ import json
8
8
 
9
9
  version_json = '''
10
10
  {
11
- "date": "2025-04-21T17:53:44+0800",
11
+ "date": "2025-04-30T21:28:49+0800",
12
12
  "dirty": false,
13
13
  "error": null,
14
- "full-revisionid": "a5d4be9f970137bde1d402420f71961826392224",
15
- "version": "1.5.0.post2"
14
+ "full-revisionid": "1c11c609971e5a5095ce8be73f0e1bba04a3132f",
15
+ "version": "1.5.1"
16
16
  }
17
17
  ''' # END VERSION_JSON
18
18
 
@@ -56,6 +56,7 @@ from ..constants import (
56
56
  XINFERENCE_DEFAULT_CANCEL_BLOCK_DURATION,
57
57
  XINFERENCE_DEFAULT_ENDPOINT_PORT,
58
58
  XINFERENCE_DISABLE_METRICS,
59
+ XINFERENCE_SSE_PING_ATTEMPTS_SECONDS,
59
60
  )
60
61
  from ..core.event import Event, EventCollectorActor, EventType
61
62
  from ..core.supervisor import SupervisorActor
@@ -1338,7 +1339,9 @@ class RESTfulAPI(CancelMixin):
1338
1339
  finally:
1339
1340
  await model.decrease_serve_count()
1340
1341
 
1341
- return EventSourceResponse(stream_results())
1342
+ return EventSourceResponse(
1343
+ stream_results(), ping=XINFERENCE_SSE_PING_ATTEMPTS_SECONDS
1344
+ )
1342
1345
  else:
1343
1346
  try:
1344
1347
  data = await model.generate(body.prompt, kwargs, raw_params=raw_kwargs)
@@ -1606,7 +1609,9 @@ class RESTfulAPI(CancelMixin):
1606
1609
  await model.decrease_serve_count()
1607
1610
 
1608
1611
  return EventSourceResponse(
1609
- media_type="application/octet-stream", content=stream_results()
1612
+ media_type="application/octet-stream",
1613
+ content=stream_results(),
1614
+ ping=XINFERENCE_SSE_PING_ATTEMPTS_SECONDS,
1610
1615
  )
1611
1616
  else:
1612
1617
  return Response(media_type="application/octet-stream", content=out)
@@ -2122,7 +2127,9 @@ class RESTfulAPI(CancelMixin):
2122
2127
  finally:
2123
2128
  await model.decrease_serve_count()
2124
2129
 
2125
- return EventSourceResponse(stream_results())
2130
+ return EventSourceResponse(
2131
+ stream_results(), ping=XINFERENCE_SSE_PING_ATTEMPTS_SECONDS
2132
+ )
2126
2133
  else:
2127
2134
  try:
2128
2135
  data = await model.chat(
xinference/constants.py CHANGED
@@ -29,7 +29,8 @@ XINFERENCE_ENV_DISABLE_HEALTH_CHECK = "XINFERENCE_DISABLE_HEALTH_CHECK"
29
29
  XINFERENCE_ENV_DISABLE_METRICS = "XINFERENCE_DISABLE_METRICS"
30
30
  XINFERENCE_ENV_DOWNLOAD_MAX_ATTEMPTS = "XINFERENCE_DOWNLOAD_MAX_ATTEMPTS"
31
31
  XINFERENCE_ENV_TEXT_TO_IMAGE_BATCHING_SIZE = "XINFERENCE_TEXT_TO_IMAGE_BATCHING_SIZE"
32
- XINFERENCE_ENV_VIRTUAL_ENV = "XINFERENCE_EANBLE_VIRTUAL_ENV"
32
+ XINFERENCE_ENV_VIRTUAL_ENV = "XINFERENCE_ENABLE_VIRTUAL_ENV"
33
+ XINFERENCE_ENV_SSE_PING_ATTEMPTS_SECONDS = "XINFERENCE_SSE_PING_ATTEMPTS_SECONDS"
33
34
 
34
35
 
35
36
  def get_xinference_home() -> str:
@@ -89,6 +90,9 @@ XINFERENCE_DOWNLOAD_MAX_ATTEMPTS = int(
89
90
  XINFERENCE_TEXT_TO_IMAGE_BATCHING_SIZE = os.environ.get(
90
91
  XINFERENCE_ENV_TEXT_TO_IMAGE_BATCHING_SIZE, None
91
92
  )
93
+ XINFERENCE_SSE_PING_ATTEMPTS_SECONDS = int(
94
+ os.environ.get(XINFERENCE_ENV_SSE_PING_ATTEMPTS_SECONDS, 600)
95
+ )
92
96
  XINFERENCE_LAUNCH_MODEL_RETRY = 3
93
97
  XINFERENCE_DEFAULT_CANCEL_BLOCK_DURATION = 30
94
98
  XINFERENCE_ENABLE_VIRTUAL_ENV = bool(int(os.getenv(XINFERENCE_ENV_VIRTUAL_ENV, "0")))
@@ -1102,8 +1102,8 @@ class SupervisorActor(xo.StatelessActor):
1102
1102
  xavier_config=xavier_config,
1103
1103
  **kwargs,
1104
1104
  )
1105
- await worker_ref.wait_for_load(_replica_model_uid)
1106
1105
  self._replica_model_uid_to_worker[_replica_model_uid] = worker_ref
1106
+ await worker_ref.wait_for_load(_replica_model_uid)
1107
1107
  return subpool_address
1108
1108
 
1109
1109
  async def _launch_model():
xinference/core/utils.py CHANGED
@@ -263,7 +263,7 @@ class CancelMixin:
263
263
  _CANCEL_TASK_NAME = "abort_block"
264
264
 
265
265
  def __init__(self):
266
- self._running_tasks: weakref.WeakValueDictionary[
266
+ self._running_tasks: weakref.WeakValueDictionary[ # type: ignore
267
267
  str, asyncio.Task
268
268
  ] = weakref.WeakValueDictionary()
269
269
 
xinference/core/worker.py CHANGED
@@ -787,9 +787,9 @@ class WorkerActor(xo.StatelessActor):
787
787
  elif model_type == "image":
788
788
  return model.model_ability
789
789
  elif model_type == "audio":
790
- return [model.model_ability]
790
+ return model.model_ability
791
791
  elif model_type == "video":
792
- return ["text_to_video"]
792
+ return model.model_ability
793
793
  elif model_type == "flexible":
794
794
  return ["flexible"]
795
795
  else:
@@ -805,6 +805,14 @@ def remove_cache(
805
805
  type=(str, str),
806
806
  multiple=True,
807
807
  )
808
+ @click.option(
809
+ "--quantization-config",
810
+ "-qc",
811
+ "quantization_config",
812
+ type=(str, str),
813
+ multiple=True,
814
+ help="bnb quantization config for `transformers` engine.",
815
+ )
808
816
  @click.option(
809
817
  "--worker-ip",
810
818
  default=None,
@@ -853,6 +861,7 @@ def model_launch(
853
861
  trust_remote_code: bool,
854
862
  api_key: Optional[str],
855
863
  model_path: Optional[str],
864
+ quantization_config: Optional[Tuple],
856
865
  ):
857
866
  kwargs = {}
858
867
  for i in range(0, len(ctx.args), 2):
@@ -884,6 +893,12 @@ def model_launch(
884
893
  else:
885
894
  _n_gpu = int(n_gpu)
886
895
 
896
+ bnb_quantization_config = (
897
+ {k: handle_click_args_type(v) for k, v in dict(quantization_config).items()}
898
+ if quantization_config
899
+ else None
900
+ )
901
+
887
902
  image_lora_load_params = (
888
903
  {k: handle_click_args_type(v) for k, v in dict(image_lora_load_kwargs).items()}
889
904
  if image_lora_load_kwargs
@@ -929,6 +944,8 @@ def model_launch(
929
944
 
930
945
  # do not wait for launching.
931
946
  kwargs["wait_ready"] = False
947
+ if bnb_quantization_config:
948
+ kwargs["quantization_config"] = {**bnb_quantization_config}
932
949
 
933
950
  model_uid = client.launch_model(
934
951
  model_name=model_name,
@@ -52,7 +52,7 @@ class AudioModelFamilyV1(CacheableModelSpec):
52
52
  model_revision: Optional[str]
53
53
  multilingual: bool
54
54
  language: Optional[str]
55
- model_ability: Optional[str]
55
+ model_ability: Optional[List[str]]
56
56
  default_model_config: Optional[Dict[str, Any]]
57
57
  default_transcription_config: Optional[Dict[str, Any]]
58
58
  engine: Optional[str]
@@ -4,7 +4,7 @@
4
4
  "model_family": "whisper",
5
5
  "model_id": "openai/whisper-tiny",
6
6
  "model_revision": "167c219b21f11ef214220b8fdb7536b8a88c2475",
7
- "model_ability": "audio-to-text",
7
+ "model_ability": ["audio2text"],
8
8
  "multilingual": true
9
9
  },
10
10
  {
@@ -12,7 +12,7 @@
12
12
  "model_family": "whisper",
13
13
  "model_id": "openai/whisper-tiny.en",
14
14
  "model_revision": "87c7102498dcde7456f24cfd30239ca606ed9063",
15
- "model_ability": "audio-to-text",
15
+ "model_ability": ["audio2text"],
16
16
  "multilingual": false
17
17
  },
18
18
  {
@@ -20,7 +20,7 @@
20
20
  "model_family": "whisper",
21
21
  "model_id": "openai/whisper-base",
22
22
  "model_revision": "8c1db9b51951100007a96a525d83a8ec81b3c237",
23
- "model_ability": "audio-to-text",
23
+ "model_ability": ["audio2text"],
24
24
  "multilingual": true
25
25
  },
26
26
  {
@@ -28,7 +28,7 @@
28
28
  "model_family": "whisper",
29
29
  "model_id": "openai/whisper-base.en",
30
30
  "model_revision": "911407f4214e0e1d82085af863093ec0b66f9cd6",
31
- "model_ability": "audio-to-text",
31
+ "model_ability": ["audio2text"],
32
32
  "multilingual": false
33
33
  },
34
34
  {
@@ -36,7 +36,7 @@
36
36
  "model_family": "whisper",
37
37
  "model_id": "openai/whisper-small",
38
38
  "model_revision": "998cb1a777c20db53d6033a61b977ed4c3792cac",
39
- "model_ability": "audio-to-text",
39
+ "model_ability": ["audio2text"],
40
40
  "multilingual": true
41
41
  },
42
42
  {
@@ -44,7 +44,7 @@
44
44
  "model_family": "whisper",
45
45
  "model_id": "openai/whisper-small.en",
46
46
  "model_revision": "e8727524f962ee844a7319d92be39ac1bd25655a",
47
- "model_ability": "audio-to-text",
47
+ "model_ability": ["audio2text"],
48
48
  "multilingual": false
49
49
  },
50
50
  {
@@ -52,7 +52,7 @@
52
52
  "model_family": "whisper",
53
53
  "model_id": "openai/whisper-medium",
54
54
  "model_revision": "16688beb1294bedd0a6f5cd86fe7eec57bce41ed",
55
- "model_ability": "audio-to-text",
55
+ "model_ability": ["audio2text"],
56
56
  "multilingual": true
57
57
  },
58
58
  {
@@ -60,7 +60,7 @@
60
60
  "model_family": "whisper",
61
61
  "model_id": "openai/whisper-medium.en",
62
62
  "model_revision": "2e98eb6279edf5095af0c8dedb36bdec0acd172b",
63
- "model_ability": "audio-to-text",
63
+ "model_ability": ["audio2text"],
64
64
  "multilingual": false
65
65
  },
66
66
  {
@@ -68,7 +68,7 @@
68
68
  "model_family": "whisper",
69
69
  "model_id": "openai/whisper-large-v3",
70
70
  "model_revision": "6cdf07a7e3ec3806e5d55f787915b85d4cd020b1",
71
- "model_ability": "audio-to-text",
71
+ "model_ability": ["audio2text"],
72
72
  "multilingual": true
73
73
  },
74
74
  {
@@ -76,7 +76,7 @@
76
76
  "model_family": "whisper",
77
77
  "model_id": "openai/whisper-large-v3-turbo",
78
78
  "model_revision": "41f01f3fe87f28c78e2fbf8b568835947dd65ed9",
79
- "model_ability": "audio-to-text",
79
+ "model_ability": ["audio2text"],
80
80
  "multilingual": true
81
81
  },
82
82
  {
@@ -84,7 +84,7 @@
84
84
  "model_family": "whisper",
85
85
  "model_id": "BELLE-2/Belle-distilwhisper-large-v2-zh",
86
86
  "model_revision": "ed25d13498fa5bac758b2fc479435b698532dfe8",
87
- "model_ability": "audio-to-text",
87
+ "model_ability": ["audio2text"],
88
88
  "multilingual": false
89
89
  },
90
90
  {
@@ -92,7 +92,7 @@
92
92
  "model_family": "whisper",
93
93
  "model_id": "BELLE-2/Belle-whisper-large-v2-zh",
94
94
  "model_revision": "ec5bd5d78598545b7585814edde86dac2002b5b9",
95
- "model_ability": "audio-to-text",
95
+ "model_ability": ["audio2text"],
96
96
  "multilingual": false
97
97
  },
98
98
  {
@@ -100,14 +100,14 @@
100
100
  "model_family": "whisper",
101
101
  "model_id": "BELLE-2/Belle-whisper-large-v3-zh",
102
102
  "model_revision": "3bebc7247696b39f5ab9ed22db426943ac33f600",
103
- "model_ability": "audio-to-text",
103
+ "model_ability": ["audio2text"],
104
104
  "multilingual": false
105
105
  },
106
106
  {
107
107
  "model_name": "whisper-tiny-mlx",
108
108
  "model_family": "whisper",
109
109
  "model_id": "mlx-community/whisper-tiny",
110
- "model_ability": "audio-to-text",
110
+ "model_ability": ["audio2text"],
111
111
  "multilingual": true,
112
112
  "engine": "mlx"
113
113
  },
@@ -115,7 +115,7 @@
115
115
  "model_name": "whisper-tiny.en-mlx",
116
116
  "model_family": "whisper",
117
117
  "model_id": "mlx-community/whisper-tiny.en-mlx",
118
- "model_ability": "audio-to-text",
118
+ "model_ability": ["audio2text"],
119
119
  "multilingual": false,
120
120
  "engine": "mlx"
121
121
  },
@@ -123,7 +123,7 @@
123
123
  "model_name": "whisper-base-mlx",
124
124
  "model_family": "whisper",
125
125
  "model_id": "mlx-community/whisper-base-mlx",
126
- "model_ability": "audio-to-text",
126
+ "model_ability": ["audio2text"],
127
127
  "multilingual": true,
128
128
  "engine": "mlx"
129
129
  },
@@ -131,7 +131,7 @@
131
131
  "model_name": "whisper-base.en-mlx",
132
132
  "model_family": "whisper",
133
133
  "model_id": "mlx-community/whisper-base.en-mlx",
134
- "model_ability": "audio-to-text",
134
+ "model_ability": ["audio2text"],
135
135
  "multilingual": false,
136
136
  "engine": "mlx"
137
137
  },
@@ -139,7 +139,7 @@
139
139
  "model_name": "whisper-small-mlx",
140
140
  "model_family": "whisper",
141
141
  "model_id": "mlx-community/whisper-small-mlx",
142
- "model_ability": "audio-to-text",
142
+ "model_ability": ["audio2text"],
143
143
  "multilingual": true,
144
144
  "engine": "mlx"
145
145
  },
@@ -147,7 +147,7 @@
147
147
  "model_name": "whisper-small.en-mlx",
148
148
  "model_family": "whisper",
149
149
  "model_id": "mlx-community/whisper-small.en-mlx",
150
- "model_ability": "audio-to-text",
150
+ "model_ability": ["audio2text"],
151
151
  "multilingual": false,
152
152
  "engine": "mlx"
153
153
  },
@@ -155,7 +155,7 @@
155
155
  "model_name": "whisper-medium-mlx",
156
156
  "model_family": "whisper",
157
157
  "model_id": "mlx-community/whisper-medium-mlx",
158
- "model_ability": "audio-to-text",
158
+ "model_ability": ["audio2text"],
159
159
  "multilingual": true,
160
160
  "engine": "mlx"
161
161
  },
@@ -163,7 +163,7 @@
163
163
  "model_name": "whisper-medium.en-mlx",
164
164
  "model_family": "whisper",
165
165
  "model_id": "mlx-community/whisper-medium.en-mlx",
166
- "model_ability": "audio-to-text",
166
+ "model_ability": ["audio2text"],
167
167
  "multilingual": false,
168
168
  "engine": "mlx"
169
169
  },
@@ -171,7 +171,7 @@
171
171
  "model_name": "whisper-large-v3-mlx",
172
172
  "model_family": "whisper",
173
173
  "model_id": "mlx-community/whisper-large-v3-mlx",
174
- "model_ability": "audio-to-text",
174
+ "model_ability": ["audio2text"],
175
175
  "multilingual": true,
176
176
  "engine": "mlx"
177
177
  },
@@ -179,7 +179,7 @@
179
179
  "model_name": "whisper-large-v3-turbo-mlx",
180
180
  "model_family": "whisper",
181
181
  "model_id": "mlx-community/whisper-large-v3-turbo",
182
- "model_ability": "audio-to-text",
182
+ "model_ability": ["audio2text"],
183
183
  "multilingual": true,
184
184
  "engine": "mlx"
185
185
  },
@@ -188,7 +188,7 @@
188
188
  "model_family": "funasr",
189
189
  "model_id": "FunAudioLLM/SenseVoiceSmall",
190
190
  "model_revision": "3eb3b4eeffc2f2dde6051b853983753db33e35c3",
191
- "model_ability": "audio-to-text",
191
+ "model_ability": ["audio2text"],
192
192
  "multilingual": true,
193
193
  "default_model_config": {
194
194
  "vad_model": "fsmn-vad",
@@ -208,7 +208,7 @@
208
208
  "model_family": "funasr",
209
209
  "model_id": "funasr/paraformer-zh",
210
210
  "model_revision": "5ed094cdfc8f6a9b6b022bd08bc904ef862bc79e",
211
- "model_ability": "audio-to-text",
211
+ "model_ability": ["audio2text"],
212
212
  "multilingual": false,
213
213
  "default_model_config": {
214
214
  "vad_model": "fsmn-vad",
@@ -223,7 +223,7 @@
223
223
  "model_family": "ChatTTS",
224
224
  "model_id": "2Noise/ChatTTS",
225
225
  "model_revision": "1a3c04a8b0651689bd9242fbb55b1f4b5a9aef84",
226
- "model_ability": "text-to-audio",
226
+ "model_ability": ["text2audio"],
227
227
  "multilingual": true
228
228
  },
229
229
  {
@@ -231,7 +231,7 @@
231
231
  "model_family": "CosyVoice",
232
232
  "model_id": "FunAudioLLM/CosyVoice-300M",
233
233
  "model_revision": "39c4e13d46bd4dfb840d214547623e5fcd2428e2",
234
- "model_ability": "text-to-audio",
234
+ "model_ability": ["text2audio"],
235
235
  "multilingual": true
236
236
  },
237
237
  {
@@ -239,7 +239,7 @@
239
239
  "model_family": "CosyVoice",
240
240
  "model_id": "FunAudioLLM/CosyVoice-300M-SFT",
241
241
  "model_revision": "096a5cff8d497fabb3dec2756a200f3688457a1b",
242
- "model_ability": "text-to-audio",
242
+ "model_ability": ["text2audio"],
243
243
  "multilingual": true
244
244
  },
245
245
  {
@@ -247,7 +247,7 @@
247
247
  "model_family": "CosyVoice",
248
248
  "model_id": "FunAudioLLM/CosyVoice-300M-Instruct",
249
249
  "model_revision": "ba5265d9a3169c1fedce145122c9dd4bc24e062c",
250
- "model_ability": "text-to-audio",
250
+ "model_ability": ["text2audio"],
251
251
  "multilingual": true
252
252
  },
253
253
  {
@@ -255,7 +255,7 @@
255
255
  "model_family": "CosyVoice",
256
256
  "model_id": "mrfakename/CosyVoice2-0.5B",
257
257
  "model_revision": "5676baabc8a76dc93ef60a88bbd2420deaa2f644",
258
- "model_ability": "text-to-audio",
258
+ "model_ability": ["text2audio"],
259
259
  "multilingual": true
260
260
  },
261
261
  {
@@ -263,7 +263,7 @@
263
263
  "model_family": "FishAudio",
264
264
  "model_id": "fishaudio/fish-speech-1.5",
265
265
  "model_revision": "268b6ec86243dd683bc78dab7e9a6cedf9191f2a",
266
- "model_ability": "text-to-audio",
266
+ "model_ability": ["text2audio"],
267
267
  "multilingual": true
268
268
  },
269
269
  {
@@ -271,7 +271,7 @@
271
271
  "model_family": "F5-TTS",
272
272
  "model_id": "SWivid/F5-TTS",
273
273
  "model_revision": "4dcc16f297f2ff98a17b3726b16f5de5a5e45672",
274
- "model_ability": "text-to-audio",
274
+ "model_ability": ["text2audio"],
275
275
  "multilingual": true
276
276
  },
277
277
  {
@@ -279,7 +279,7 @@
279
279
  "model_family": "F5-TTS-MLX",
280
280
  "model_id": "lucasnewman/f5-tts-mlx",
281
281
  "model_revision": "7642bb232e3fcacf92c51c786edebb8624da6b93",
282
- "model_ability": "text-to-audio",
282
+ "model_ability": ["text2audio"],
283
283
  "multilingual": true
284
284
  },
285
285
  {
@@ -287,7 +287,7 @@
287
287
  "model_family": "MeloTTS",
288
288
  "model_id": "myshell-ai/MeloTTS-English",
289
289
  "model_revision": "bb4fb7346d566d277ba8c8c7dbfdf6786139b8ef",
290
- "model_ability": "text-to-audio",
290
+ "model_ability": ["text2audio"],
291
291
  "multilingual": false,
292
292
  "language": "EN"
293
293
  },
@@ -296,7 +296,7 @@
296
296
  "model_family": "MeloTTS",
297
297
  "model_id": "myshell-ai/MeloTTS-English-v2",
298
298
  "model_revision": "a53e3509c4ee4ff16d79272feb2474ff864e18f3",
299
- "model_ability": "text-to-audio",
299
+ "model_ability": ["text2audio"],
300
300
  "multilingual": false,
301
301
  "language": "EN"
302
302
  },
@@ -305,7 +305,7 @@
305
305
  "model_family": "MeloTTS",
306
306
  "model_id": "myshell-ai/MeloTTS-English-v3",
307
307
  "model_revision": "f7c4a35392c0e9be24a755f1edb4c3f63040f759",
308
- "model_ability": "text-to-audio",
308
+ "model_ability": ["text2audio"],
309
309
  "multilingual": false,
310
310
  "language": "EN"
311
311
  },
@@ -314,7 +314,7 @@
314
314
  "model_family": "MeloTTS",
315
315
  "model_id": "myshell-ai/MeloTTS-French",
316
316
  "model_revision": "1e9bf590262392d8bffb679b0a3b0c16b0f9fdaf",
317
- "model_ability": "text-to-audio",
317
+ "model_ability": ["text2audio"],
318
318
  "multilingual": false,
319
319
  "language": "FR"
320
320
  },
@@ -323,7 +323,7 @@
323
323
  "model_family": "MeloTTS",
324
324
  "model_id": "myshell-ai/MeloTTS-Japanese",
325
325
  "model_revision": "367f8795464b531b4e97c1515bddfc1243e60891",
326
- "model_ability": "text-to-audio",
326
+ "model_ability": ["text2audio"],
327
327
  "multilingual": false,
328
328
  "language": "JP"
329
329
  },
@@ -332,7 +332,7 @@
332
332
  "model_family": "MeloTTS",
333
333
  "model_id": "myshell-ai/MeloTTS-Spanish",
334
334
  "model_revision": "dbb5496df39d11a66c1d5f5a9ca357c3c9fb95fb",
335
- "model_ability": "text-to-audio",
335
+ "model_ability": ["text2audio"],
336
336
  "multilingual": false,
337
337
  "language": "ES"
338
338
  },
@@ -341,7 +341,7 @@
341
341
  "model_family": "MeloTTS",
342
342
  "model_id": "myshell-ai/MeloTTS-Chinese",
343
343
  "model_revision": "af5d207a364ea4208c6f589c89f57f88414bdd16",
344
- "model_ability": "text-to-audio",
344
+ "model_ability": ["text2audio"],
345
345
  "multilingual": false,
346
346
  "language": "ZH"
347
347
  },
@@ -350,7 +350,7 @@
350
350
  "model_family": "MeloTTS",
351
351
  "model_id": "myshell-ai/MeloTTS-Korean",
352
352
  "model_revision": "0207e5adfc90129a51b6b03d89be6d84360ed323",
353
- "model_ability": "text-to-audio",
353
+ "model_ability": ["text2audio"],
354
354
  "multilingual": false,
355
355
  "language": "KR"
356
356
  },
@@ -359,7 +359,7 @@
359
359
  "model_family": "Kokoro",
360
360
  "model_id": "hexgrad/Kokoro-82M",
361
361
  "model_revision": "7884269d6fd3f9beabc271b6f1308e5699281fa9",
362
- "model_ability": "text-to-audio",
362
+ "model_ability": ["text2audio"],
363
363
  "multilingual": true
364
364
  },
365
365
  {
@@ -367,7 +367,7 @@
367
367
  "model_family": "MegaTTS",
368
368
  "model_id": "ByteDance/MegaTTS3",
369
369
  "model_revision": "409a7002b006d80f0730fca6f80441b08c10e738",
370
- "model_ability": "text-to-audio",
370
+ "model_ability": ["text2audio"],
371
371
  "multilingual": true
372
372
  }
373
373
  ]
@@ -5,7 +5,7 @@
5
5
  "model_hub": "modelscope",
6
6
  "model_id": "AI-ModelScope/whisper-large-v3",
7
7
  "model_revision": "master",
8
- "model_ability": "audio-to-text",
8
+ "model_ability": ["audio2text"],
9
9
  "multilingual": true
10
10
  },
11
11
  {
@@ -14,7 +14,7 @@
14
14
  "model_hub": "modelscope",
15
15
  "model_id": "AI-ModelScope/whisper-large-v3-turbo",
16
16
  "model_revision": "master",
17
- "model_ability": "audio-to-text",
17
+ "model_ability": ["audio2text"],
18
18
  "multilingual": true
19
19
  },
20
20
  {
@@ -23,7 +23,7 @@
23
23
  "model_hub": "modelscope",
24
24
  "model_id": "Xorbits/Belle-whisper-large-v3-zh",
25
25
  "model_revision": "master",
26
- "model_ability": "audio-to-text",
26
+ "model_ability": ["audio2text"],
27
27
  "multilingual": false
28
28
  },
29
29
  {
@@ -32,7 +32,7 @@
32
32
  "model_hub": "modelscope",
33
33
  "model_id": "iic/SenseVoiceSmall",
34
34
  "model_revision": "master",
35
- "model_ability": "audio-to-text",
35
+ "model_ability": ["audio2text"],
36
36
  "multilingual": true,
37
37
  "default_model_config": {
38
38
  "vad_model": "fsmn-vad",
@@ -53,7 +53,7 @@
53
53
  "model_hub": "modelscope",
54
54
  "model_id": "iic/speech_paraformer-large-vad-punc-spk_asr_nat-zh-cn",
55
55
  "model_revision": "master",
56
- "model_ability": "audio-to-text",
56
+ "model_ability": ["audio2text"],
57
57
  "multilingual": false,
58
58
  "default_model_config": {
59
59
  "vad_model": "fsmn-vad",
@@ -69,7 +69,7 @@
69
69
  "model_hub": "modelscope",
70
70
  "model_id": "AI-ModelScope/ChatTTS",
71
71
  "model_revision": "master",
72
- "model_ability": "text-to-audio",
72
+ "model_ability": ["text2audio"],
73
73
  "multilingual": true
74
74
  },
75
75
  {
@@ -78,7 +78,7 @@
78
78
  "model_hub": "modelscope",
79
79
  "model_id": "iic/CosyVoice-300M",
80
80
  "model_revision": "master",
81
- "model_ability": "text-to-audio",
81
+ "model_ability": ["text2audio"],
82
82
  "multilingual": true
83
83
  },
84
84
  {
@@ -87,7 +87,7 @@
87
87
  "model_hub": "modelscope",
88
88
  "model_id": "iic/CosyVoice-300M-SFT",
89
89
  "model_revision": "master",
90
- "model_ability": "text-to-audio",
90
+ "model_ability": ["text2audio"],
91
91
  "multilingual": true
92
92
  },
93
93
  {
@@ -96,7 +96,7 @@
96
96
  "model_hub": "modelscope",
97
97
  "model_id": "iic/CosyVoice-300M-Instruct",
98
98
  "model_revision": "master",
99
- "model_ability": "text-to-audio",
99
+ "model_ability": ["text2audio"],
100
100
  "multilingual": true
101
101
  },
102
102
  {
@@ -105,7 +105,7 @@
105
105
  "model_hub": "modelscope",
106
106
  "model_id": "iic/CosyVoice2-0.5B",
107
107
  "model_revision": "master",
108
- "model_ability": "text-to-audio",
108
+ "model_ability": ["text2audio"],
109
109
  "multilingual": true
110
110
  },
111
111
  {
@@ -114,7 +114,7 @@
114
114
  "model_hub": "modelscope",
115
115
  "model_id": "SWivid/F5-TTS_Emilia-ZH-EN",
116
116
  "model_revision": "master",
117
- "model_ability": "text-to-audio",
117
+ "model_ability": ["text2audio"],
118
118
  "multilingual": true
119
119
  },
120
120
  {
@@ -123,7 +123,7 @@
123
123
  "model_hub": "modelscope",
124
124
  "model_id": "AI-ModelScope/Kokoro-82M",
125
125
  "model_revision": "master",
126
- "model_ability": "text-to-audio",
126
+ "model_ability": ["text2audio"],
127
127
  "multilingual": true
128
128
  },
129
129
  {
@@ -132,7 +132,7 @@
132
132
  "model_hub": "modelscope",
133
133
  "model_id": "ByteDance/MegaTTS3",
134
134
  "model_revision": "master",
135
- "model_ability": "text-to-audio",
135
+ "model_ability": ["text2audio"],
136
136
  "multilingual": true
137
137
  }
138
138
  ]