xinference 0.14.0.post1__py3-none-any.whl → 0.14.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (50) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/restful_api.py +54 -0
  3. xinference/client/handlers.py +0 -3
  4. xinference/client/restful/restful_client.py +51 -134
  5. xinference/constants.py +1 -0
  6. xinference/core/chat_interface.py +1 -4
  7. xinference/core/image_interface.py +33 -5
  8. xinference/core/model.py +28 -2
  9. xinference/core/supervisor.py +37 -0
  10. xinference/core/worker.py +128 -84
  11. xinference/deploy/cmdline.py +1 -4
  12. xinference/model/audio/core.py +11 -3
  13. xinference/model/audio/funasr.py +114 -0
  14. xinference/model/audio/model_spec.json +20 -0
  15. xinference/model/audio/model_spec_modelscope.json +21 -0
  16. xinference/model/audio/whisper.py +1 -1
  17. xinference/model/core.py +12 -0
  18. xinference/model/image/core.py +3 -4
  19. xinference/model/image/model_spec.json +41 -13
  20. xinference/model/image/model_spec_modelscope.json +30 -10
  21. xinference/model/image/stable_diffusion/core.py +53 -2
  22. xinference/model/llm/__init__.py +2 -0
  23. xinference/model/llm/llm_family.json +83 -1
  24. xinference/model/llm/llm_family_modelscope.json +85 -1
  25. xinference/model/llm/pytorch/core.py +1 -0
  26. xinference/model/llm/pytorch/minicpmv26.py +247 -0
  27. xinference/model/llm/sglang/core.py +72 -34
  28. xinference/model/llm/vllm/core.py +38 -0
  29. xinference/model/video/__init__.py +62 -0
  30. xinference/model/video/core.py +178 -0
  31. xinference/model/video/diffusers.py +180 -0
  32. xinference/model/video/model_spec.json +11 -0
  33. xinference/model/video/model_spec_modelscope.json +12 -0
  34. xinference/types.py +10 -24
  35. xinference/web/ui/build/asset-manifest.json +3 -3
  36. xinference/web/ui/build/index.html +1 -1
  37. xinference/web/ui/build/static/js/{main.ef2a203a.js → main.17ca0398.js} +3 -3
  38. xinference/web/ui/build/static/js/main.17ca0398.js.map +1 -0
  39. xinference/web/ui/node_modules/.cache/babel-loader/71684495d995c7e266eecc6a0ad8ea0284cc785f80abddf863789c57a6134969.json +1 -0
  40. xinference/web/ui/node_modules/.cache/babel-loader/80acd1edf31542ab1dcccfad02cb4b38f3325cff847a781fcce97500cfd6f878.json +1 -0
  41. {xinference-0.14.0.post1.dist-info → xinference-0.14.1.dist-info}/METADATA +14 -8
  42. {xinference-0.14.0.post1.dist-info → xinference-0.14.1.dist-info}/RECORD +47 -40
  43. xinference/web/ui/build/static/js/main.ef2a203a.js.map +0 -1
  44. xinference/web/ui/node_modules/.cache/babel-loader/2c63090c842376cdd368c3ded88a333ef40d94785747651343040a6f7872a223.json +0 -1
  45. xinference/web/ui/node_modules/.cache/babel-loader/70fa8c07463a5fe57c68bf92502910105a8f647371836fe8c3a7408246ca7ba0.json +0 -1
  46. /xinference/web/ui/build/static/js/{main.ef2a203a.js.LICENSE.txt → main.17ca0398.js.LICENSE.txt} +0 -0
  47. {xinference-0.14.0.post1.dist-info → xinference-0.14.1.dist-info}/LICENSE +0 -0
  48. {xinference-0.14.0.post1.dist-info → xinference-0.14.1.dist-info}/WHEEL +0 -0
  49. {xinference-0.14.0.post1.dist-info → xinference-0.14.1.dist-info}/entry_points.txt +0 -0
  50. {xinference-0.14.0.post1.dist-info → xinference-0.14.1.dist-info}/top_level.txt +0 -0
@@ -1,11 +1,29 @@
1
1
  [
2
+ {
3
+ "model_name": "FLUX.1-schnell",
4
+ "model_family": "stable_diffusion",
5
+ "model_id": "black-forest-labs/FLUX.1-schnell",
6
+ "model_revision": "768d12a373ed5cc9ef9a9dea7504dc09fcc14842",
7
+ "model_ability": [
8
+ "text2image"
9
+ ]
10
+ },
11
+ {
12
+ "model_name": "FLUX.1-dev",
13
+ "model_family": "stable_diffusion",
14
+ "model_id": "black-forest-labs/FLUX.1-dev",
15
+ "model_revision": "01aa605f2c300568dd6515476f04565a954fcb59",
16
+ "model_ability": [
17
+ "text2image"
18
+ ]
19
+ },
2
20
  {
3
21
  "model_name": "sd3-medium",
4
22
  "model_family": "stable_diffusion",
5
23
  "model_id": "stabilityai/stable-diffusion-3-medium-diffusers",
6
24
  "model_revision": "ea42f8cef0f178587cf766dc8129abd379c90671",
7
- "abilities": [
8
- "text2iamge",
25
+ "model_ability": [
26
+ "text2image",
9
27
  "image2image"
10
28
  ]
11
29
  },
@@ -14,8 +32,8 @@
14
32
  "model_family": "stable_diffusion",
15
33
  "model_id": "stabilityai/sd-turbo",
16
34
  "model_revision": "1681ed09e0cff58eeb41e878a49893228b78b94c",
17
- "abilities": [
18
- "text2iamge"
35
+ "model_ability": [
36
+ "text2image"
19
37
  ]
20
38
  },
21
39
  {
@@ -23,8 +41,8 @@
23
41
  "model_family": "stable_diffusion",
24
42
  "model_id": "stabilityai/sdxl-turbo",
25
43
  "model_revision": "f4b0486b498f84668e828044de1d0c8ba486e05b",
26
- "abilities": [
27
- "text2iamge"
44
+ "model_ability": [
45
+ "text2image"
28
46
  ]
29
47
  },
30
48
  {
@@ -32,8 +50,8 @@
32
50
  "model_family": "stable_diffusion",
33
51
  "model_id": "runwayml/stable-diffusion-v1-5",
34
52
  "model_revision": "1d0c4ebf6ff58a5caecab40fa1406526bca4b5b9",
35
- "abilities": [
36
- "text2iamge",
53
+ "model_ability": [
54
+ "text2image",
37
55
  "image2image"
38
56
  ],
39
57
  "controlnet": [
@@ -86,8 +104,8 @@
86
104
  "model_family": "stable_diffusion",
87
105
  "model_id": "stabilityai/stable-diffusion-xl-base-1.0",
88
106
  "model_revision": "f898a3e026e802f68796b95e9702464bac78d76f",
89
- "abilities": [
90
- "text2iamge",
107
+ "model_ability": [
108
+ "text2image",
91
109
  "image2image"
92
110
  ],
93
111
  "controlnet": [
@@ -111,12 +129,22 @@
111
129
  }
112
130
  ]
113
131
  },
132
+ {
133
+ "model_name": "kolors",
134
+ "model_family": "stable_diffusion",
135
+ "model_id": "Kwai-Kolors/Kolors-diffusers",
136
+ "model_revision": "7e091c75199e910a26cd1b51ed52c28de5db3711",
137
+ "model_ability": [
138
+ "text2image",
139
+ "image2image"
140
+ ]
141
+ },
114
142
  {
115
143
  "model_name": "stable-diffusion-inpainting",
116
144
  "model_family": "stable_diffusion",
117
145
  "model_id": "runwayml/stable-diffusion-inpainting",
118
146
  "model_revision": "51388a731f57604945fddd703ecb5c50e8e7b49d",
119
- "abilities": [
147
+ "model_ability": [
120
148
  "inpainting"
121
149
  ]
122
150
  },
@@ -125,7 +153,7 @@
125
153
  "model_family": "stable_diffusion",
126
154
  "model_id": "stabilityai/stable-diffusion-2-inpainting",
127
155
  "model_revision": "81a84f49b15956b60b4272a405ad3daef3da4590",
128
- "abilities": [
156
+ "model_ability": [
129
157
  "inpainting"
130
158
  ]
131
159
  },
@@ -134,7 +162,7 @@
134
162
  "model_family": "stable_diffusion",
135
163
  "model_id": "diffusers/stable-diffusion-xl-1.0-inpainting-0.1",
136
164
  "model_revision": "115134f363124c53c7d878647567d04daf26e41e",
137
- "abilities": [
165
+ "model_ability": [
138
166
  "inpainting"
139
167
  ]
140
168
  }
@@ -1,12 +1,32 @@
1
1
  [
2
+ {
3
+ "model_name": "FLUX.1-schnell",
4
+ "model_family": "stable_diffusion",
5
+ "model_hub": "modelscope",
6
+ "model_id": "AI-ModelScope/FLUX.1-schnell",
7
+ "model_revision": "master",
8
+ "model_ability": [
9
+ "text2image"
10
+ ]
11
+ },
12
+ {
13
+ "model_name": "FLUX.1-dev",
14
+ "model_family": "stable_diffusion",
15
+ "model_hub": "modelscope",
16
+ "model_id": "AI-ModelScope/FLUX.1-dev",
17
+ "model_revision": "master",
18
+ "model_ability": [
19
+ "text2image"
20
+ ]
21
+ },
2
22
  {
3
23
  "model_name": "sd3-medium",
4
24
  "model_family": "stable_diffusion",
5
25
  "model_hub": "modelscope",
6
26
  "model_id": "AI-ModelScope/stable-diffusion-3-medium-diffusers",
7
27
  "model_revision": "master",
8
- "abilities": [
9
- "text2iamge",
28
+ "model_ability": [
29
+ "text2image",
10
30
  "image2image"
11
31
  ]
12
32
  },
@@ -16,8 +36,8 @@
16
36
  "model_hub": "modelscope",
17
37
  "model_id": "AI-ModelScope/sd-turbo",
18
38
  "model_revision": "master",
19
- "abilities": [
20
- "text2iamge"
39
+ "model_ability": [
40
+ "text2image"
21
41
  ]
22
42
  },
23
43
  {
@@ -26,8 +46,8 @@
26
46
  "model_hub": "modelscope",
27
47
  "model_id": "AI-ModelScope/sdxl-turbo",
28
48
  "model_revision": "master",
29
- "abilities": [
30
- "text2iamge"
49
+ "model_ability": [
50
+ "text2image"
31
51
  ]
32
52
  },
33
53
  {
@@ -36,8 +56,8 @@
36
56
  "model_hub": "modelscope",
37
57
  "model_id": "AI-ModelScope/stable-diffusion-v1-5",
38
58
  "model_revision": "master",
39
- "abilities": [
40
- "text2iamge",
59
+ "model_ability": [
60
+ "text2image",
41
61
  "image2image"
42
62
  ],
43
63
  "controlnet": [
@@ -91,8 +111,8 @@
91
111
  "model_hub": "modelscope",
92
112
  "model_id": "AI-ModelScope/stable-diffusion-xl-base-1.0",
93
113
  "model_revision": "master",
94
- "abilities": [
95
- "text2iamge",
114
+ "model_ability": [
115
+ "text2image",
96
116
  "image2image"
97
117
  ],
98
118
  "controlnet": [
@@ -51,7 +51,7 @@ class DiffusionModel:
51
51
  self._lora_model = lora_model
52
52
  self._lora_load_kwargs = lora_load_kwargs or {}
53
53
  self._lora_fuse_kwargs = lora_fuse_kwargs or {}
54
- self._abilities = abilities
54
+ self._abilities = abilities or []
55
55
  self._kwargs = kwargs
56
56
 
57
57
  def _apply_lora(self):
@@ -88,7 +88,48 @@ class DiffusionModel:
88
88
  if sys.platform != "darwin" and torch_dtype is None:
89
89
  # The following params crashes on Mac M2
90
90
  self._kwargs["torch_dtype"] = torch.float16
91
+ self._kwargs["variant"] = "fp16"
91
92
  self._kwargs["use_safetensors"] = True
93
+ if isinstance(torch_dtype, str):
94
+ self._kwargs["torch_dtype"] = getattr(torch, torch_dtype)
95
+
96
+ quantize_text_encoder = self._kwargs.pop("quantize_text_encoder", None)
97
+ if quantize_text_encoder:
98
+ try:
99
+ from transformers import BitsAndBytesConfig, T5EncoderModel
100
+ except ImportError:
101
+ error_message = "Failed to import module 'transformers'"
102
+ installation_guide = [
103
+ "Please make sure 'transformers' is installed. ",
104
+ "You can install it by `pip install transformers`\n",
105
+ ]
106
+
107
+ raise ImportError(f"{error_message}\n\n{''.join(installation_guide)}")
108
+
109
+ try:
110
+ import bitsandbytes # noqa: F401
111
+ except ImportError:
112
+ error_message = "Failed to import module 'bitsandbytes'"
113
+ installation_guide = [
114
+ "Please make sure 'bitsandbytes' is installed. ",
115
+ "You can install it by `pip install bitsandbytes`\n",
116
+ ]
117
+
118
+ raise ImportError(f"{error_message}\n\n{''.join(installation_guide)}")
119
+
120
+ for text_encoder_name in quantize_text_encoder.split(","):
121
+ quantization_config = BitsAndBytesConfig(load_in_8bit=True)
122
+ quantization_kwargs = {}
123
+ if torch_dtype:
124
+ quantization_kwargs["torch_dtype"] = torch_dtype
125
+ text_encoder = T5EncoderModel.from_pretrained(
126
+ self._model_path,
127
+ subfolder=text_encoder_name,
128
+ quantization_config=quantization_config,
129
+ **quantization_kwargs,
130
+ )
131
+ self._kwargs[text_encoder_name] = text_encoder
132
+ self._kwargs["device_map"] = "balanced"
92
133
 
93
134
  logger.debug("Loading model %s", AutoPipelineModel)
94
135
  self._model = AutoPipelineModel.from_pretrained(
@@ -98,7 +139,7 @@ class DiffusionModel:
98
139
  if self._kwargs.get("cpu_offload", False):
99
140
  logger.debug("CPU offloading model")
100
141
  self._model.enable_model_cpu_offload()
101
- else:
142
+ elif not self._kwargs.get("device_map"):
102
143
  logger.debug("Loading model to available device")
103
144
  self._model = move_model_to_available_device(self._model)
104
145
  # Recommended if your computer has < 64 GB of RAM
@@ -141,6 +182,12 @@ class DiffusionModel:
141
182
  else:
142
183
  raise ValueError(f"Unsupported response format: {response_format}")
143
184
 
185
+ @classmethod
186
+ def _filter_kwargs(cls, kwargs: dict):
187
+ for arg in ["negative_prompt", "num_inference_steps"]:
188
+ if not kwargs.get(arg):
189
+ kwargs.pop(arg, None)
190
+
144
191
  def text_to_image(
145
192
  self,
146
193
  prompt: str,
@@ -152,6 +199,7 @@ class DiffusionModel:
152
199
  # References:
153
200
  # https://huggingface.co/docs/diffusers/main/en/api/pipelines/controlnet_sdxl
154
201
  width, height = map(int, re.split(r"[^\d]+", size))
202
+ self._filter_kwargs(kwargs)
155
203
  return self._call_model(
156
204
  prompt=prompt,
157
205
  height=height,
@@ -174,6 +222,8 @@ class DiffusionModel:
174
222
  if "controlnet" in self._kwargs:
175
223
  model = self._model
176
224
  else:
225
+ if "image2image" not in self._abilities:
226
+ raise RuntimeError(f"{self._model_uid} does not support image2image")
177
227
  if self._i2i_model is not None:
178
228
  model = self._i2i_model
179
229
  else:
@@ -186,6 +236,7 @@ class DiffusionModel:
186
236
  width, height = map(int, re.split(r"[^\d]+", size))
187
237
  kwargs["width"] = width
188
238
  kwargs["height"] = height
239
+ self._filter_kwargs(kwargs)
189
240
  return self._call_model(
190
241
  image=image,
191
242
  prompt=prompt,
@@ -125,6 +125,7 @@ def _install():
125
125
  from .pytorch.internlm2 import Internlm2PytorchChatModel
126
126
  from .pytorch.llama_2 import LlamaPytorchChatModel, LlamaPytorchModel
127
127
  from .pytorch.minicpmv25 import MiniCPMV25Model
128
+ from .pytorch.minicpmv26 import MiniCPMV26Model
128
129
  from .pytorch.qwen_vl import QwenVLChatModel
129
130
  from .pytorch.vicuna import VicunaPytorchChatModel
130
131
  from .pytorch.yi_vl import YiVLChatModel
@@ -167,6 +168,7 @@ def _install():
167
168
  PytorchModel,
168
169
  CogVLM2Model,
169
170
  MiniCPMV25Model,
171
+ MiniCPMV26Model,
170
172
  Glm4VModel,
171
173
  ]
172
174
  )
@@ -1797,6 +1797,16 @@
1797
1797
  "none"
1798
1798
  ],
1799
1799
  "model_id": "meta-llama/Meta-Llama-3.1-70B"
1800
+ },
1801
+ {
1802
+ "model_format": "pytorch",
1803
+ "model_size_in_billions": 405,
1804
+ "quantizations": [
1805
+ "4-bit",
1806
+ "8-bit",
1807
+ "none"
1808
+ ],
1809
+ "model_id": "meta-llama/Meta-Llama-3.1-405B"
1800
1810
  }
1801
1811
  ]
1802
1812
  },
@@ -1975,6 +1985,32 @@
1975
1985
  "none"
1976
1986
  ],
1977
1987
  "model_id": "mlx-community/Meta-Llama-3.1-70B-Instruct-bf16"
1988
+ },
1989
+ {
1990
+ "model_format": "pytorch",
1991
+ "model_size_in_billions": 405,
1992
+ "quantizations": [
1993
+ "4-bit",
1994
+ "8-bit",
1995
+ "none"
1996
+ ],
1997
+ "model_id": "meta-llama/Meta-Llama-3.1-405B-Instruct"
1998
+ },
1999
+ {
2000
+ "model_format": "gptq",
2001
+ "model_size_in_billions": 405,
2002
+ "quantizations": [
2003
+ "Int4"
2004
+ ],
2005
+ "model_id": "hugging-quants/Meta-Llama-3.1-405B-Instruct-GPTQ-INT4"
2006
+ },
2007
+ {
2008
+ "model_format": "awq",
2009
+ "model_size_in_billions": 405,
2010
+ "quantizations": [
2011
+ "Int4"
2012
+ ],
2013
+ "model_id": "hugging-quants/Meta-Llama-3.1-405B-Instruct-AWQ-INT4"
1978
2014
  }
1979
2015
  ],
1980
2016
  "prompt_style": {
@@ -6771,7 +6807,7 @@
6771
6807
  },
6772
6808
  {
6773
6809
  "version":1,
6774
- "context_length":2048,
6810
+ "context_length":8192,
6775
6811
  "model_name":"MiniCPM-Llama3-V-2_5",
6776
6812
  "model_lang":[
6777
6813
  "en",
@@ -6811,6 +6847,52 @@
6811
6847
  ]
6812
6848
  }
6813
6849
  },
6850
+ {
6851
+ "version":1,
6852
+ "context_length":32768,
6853
+ "model_name":"MiniCPM-V-2.6",
6854
+ "model_lang":[
6855
+ "en",
6856
+ "zh"
6857
+ ],
6858
+ "model_ability":[
6859
+ "chat",
6860
+ "vision"
6861
+ ],
6862
+ "model_description":"MiniCPM-V 2.6 is the latest model in the MiniCPM-V series. The model is built on SigLip-400M and Qwen2-7B with a total of 8B parameters.",
6863
+ "model_specs":[
6864
+ {
6865
+ "model_format":"pytorch",
6866
+ "model_size_in_billions":8,
6867
+ "quantizations":[
6868
+ "none"
6869
+ ],
6870
+ "model_id":"openbmb/MiniCPM-V-2_6",
6871
+ "model_revision":"3f7a8da1b7a8b928b5ee229fae33cf43fd64cf31"
6872
+ },
6873
+ {
6874
+ "model_format":"pytorch",
6875
+ "model_size_in_billions":8,
6876
+ "quantizations":[
6877
+ "4-bit"
6878
+ ],
6879
+ "model_id":"openbmb/MiniCPM-V-2_6-int4",
6880
+ "model_revision":"051e2df6505f1fc4305f2c9bd42ed90db8bf4874"
6881
+ }
6882
+ ],
6883
+ "prompt_style":{
6884
+ "style_name":"QWEN",
6885
+ "system_prompt":"You are a helpful assistant",
6886
+ "roles":[
6887
+ "user",
6888
+ "assistant"
6889
+ ],
6890
+ "stop": [
6891
+ "<|im_end|>",
6892
+ "<|endoftext|>"
6893
+ ]
6894
+ }
6895
+ },
6814
6896
  {
6815
6897
  "version": 1,
6816
6898
  "context_length": 4096,
@@ -234,6 +234,17 @@
234
234
  ],
235
235
  "model_id": "LLM-Research/Meta-Llama-3.1-70B",
236
236
  "model_hub": "modelscope"
237
+ },
238
+ {
239
+ "model_format": "pytorch",
240
+ "model_size_in_billions": 405,
241
+ "quantizations": [
242
+ "4-bit",
243
+ "8-bit",
244
+ "none"
245
+ ],
246
+ "model_id": "LLM-Research/Meta-Llama-3.1-405B",
247
+ "model_hub": "modelscope"
237
248
  }
238
249
  ]
239
250
  },
@@ -325,6 +336,35 @@
325
336
  ],
326
337
  "model_id": "LLM-Research/Meta-Llama-3.1-70B-Instruct-AWQ-INT4",
327
338
  "model_hub": "modelscope"
339
+ },
340
+ {
341
+ "model_format": "pytorch",
342
+ "model_size_in_billions": 405,
343
+ "quantizations": [
344
+ "4-bit",
345
+ "8-bit",
346
+ "none"
347
+ ],
348
+ "model_id": "LLM-Research/Meta-Llama-3.1-405B-Instruct",
349
+ "model_hub": "modelscope"
350
+ },
351
+ {
352
+ "model_format": "awq",
353
+ "model_size_in_billions": 405,
354
+ "quantizations": [
355
+ "Int4"
356
+ ],
357
+ "model_id": "LLM-Research/Meta-Llama-3.1-405B-Instruct-AWQ-INT4",
358
+ "model_hub": "modelscope"
359
+ },
360
+ {
361
+ "model_format": "gptq",
362
+ "model_size_in_billions": 405,
363
+ "quantizations": [
364
+ "Int4"
365
+ ],
366
+ "model_id": "LLM-Research/Meta-Llama-3.1-405B-Instruct-GPTQ-INT4",
367
+ "model_hub": "modelscope"
328
368
  }
329
369
  ],
330
370
  "prompt_style": {
@@ -4509,7 +4549,7 @@
4509
4549
  },
4510
4550
  {
4511
4551
  "version":1,
4512
- "context_length":2048,
4552
+ "context_length":8192,
4513
4553
  "model_name":"MiniCPM-Llama3-V-2_5",
4514
4554
  "model_lang":[
4515
4555
  "en",
@@ -4551,6 +4591,50 @@
4551
4591
  ]
4552
4592
  }
4553
4593
  },
4594
+ {
4595
+ "version":1,
4596
+ "context_length":32768,
4597
+ "model_name":"MiniCPM-V-2.6",
4598
+ "model_lang":[
4599
+ "en",
4600
+ "zh"
4601
+ ],
4602
+ "model_ability":[
4603
+ "chat",
4604
+ "vision"
4605
+ ],
4606
+ "model_description":"MiniCPM-V 2.6 is the latest model in the MiniCPM-V series. The model is built on SigLip-400M and Qwen2-7B with a total of 8B parameters.",
4607
+ "model_specs":[
4608
+ {
4609
+ "model_format":"pytorch",
4610
+ "model_size_in_billions":8,
4611
+ "quantizations":[
4612
+ "none"
4613
+ ],
4614
+ "model_hub": "modelscope",
4615
+ "model_id":"OpenBMB/MiniCPM-V-2_6",
4616
+ "model_revision":"master"
4617
+ },
4618
+ {
4619
+ "model_format":"pytorch",
4620
+ "model_size_in_billions":8,
4621
+ "quantizations":[
4622
+ "4-bit"
4623
+ ],
4624
+ "model_hub": "modelscope",
4625
+ "model_id":"OpenBMB/MiniCPM-V-2_6-int4",
4626
+ "model_revision":"master"
4627
+ }
4628
+ ],
4629
+ "prompt_style":{
4630
+ "style_name":"QWEN",
4631
+ "system_prompt":"You are a helpful assistant",
4632
+ "roles":[
4633
+ "user",
4634
+ "assistant"
4635
+ ]
4636
+ }
4637
+ },
4554
4638
  {
4555
4639
  "version": 1,
4556
4640
  "context_length": 2048,
@@ -72,6 +72,7 @@ NON_DEFAULT_MODEL_LIST: List[str] = [
72
72
  "mini-internvl-chat",
73
73
  "cogvlm2",
74
74
  "MiniCPM-Llama3-V-2_5",
75
+ "MiniCPM-V-2.6",
75
76
  "glm-4v",
76
77
  ]
77
78