xinference 0.14.0.post1__py3-none-any.whl → 0.14.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +54 -0
- xinference/client/handlers.py +0 -3
- xinference/client/restful/restful_client.py +51 -134
- xinference/constants.py +1 -0
- xinference/core/chat_interface.py +1 -4
- xinference/core/image_interface.py +33 -5
- xinference/core/model.py +28 -2
- xinference/core/supervisor.py +37 -0
- xinference/core/worker.py +128 -84
- xinference/deploy/cmdline.py +1 -4
- xinference/model/audio/core.py +11 -3
- xinference/model/audio/funasr.py +114 -0
- xinference/model/audio/model_spec.json +20 -0
- xinference/model/audio/model_spec_modelscope.json +21 -0
- xinference/model/audio/whisper.py +1 -1
- xinference/model/core.py +12 -0
- xinference/model/image/core.py +3 -4
- xinference/model/image/model_spec.json +41 -13
- xinference/model/image/model_spec_modelscope.json +30 -10
- xinference/model/image/stable_diffusion/core.py +53 -2
- xinference/model/llm/__init__.py +2 -0
- xinference/model/llm/llm_family.json +83 -1
- xinference/model/llm/llm_family_modelscope.json +85 -1
- xinference/model/llm/pytorch/core.py +1 -0
- xinference/model/llm/pytorch/minicpmv26.py +247 -0
- xinference/model/llm/sglang/core.py +72 -34
- xinference/model/llm/vllm/core.py +38 -0
- xinference/model/video/__init__.py +62 -0
- xinference/model/video/core.py +178 -0
- xinference/model/video/diffusers.py +180 -0
- xinference/model/video/model_spec.json +11 -0
- xinference/model/video/model_spec_modelscope.json +12 -0
- xinference/types.py +10 -24
- xinference/web/ui/build/asset-manifest.json +3 -3
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/js/{main.ef2a203a.js → main.17ca0398.js} +3 -3
- xinference/web/ui/build/static/js/main.17ca0398.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/71684495d995c7e266eecc6a0ad8ea0284cc785f80abddf863789c57a6134969.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/80acd1edf31542ab1dcccfad02cb4b38f3325cff847a781fcce97500cfd6f878.json +1 -0
- {xinference-0.14.0.post1.dist-info → xinference-0.14.1.dist-info}/METADATA +14 -8
- {xinference-0.14.0.post1.dist-info → xinference-0.14.1.dist-info}/RECORD +47 -40
- xinference/web/ui/build/static/js/main.ef2a203a.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/2c63090c842376cdd368c3ded88a333ef40d94785747651343040a6f7872a223.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/70fa8c07463a5fe57c68bf92502910105a8f647371836fe8c3a7408246ca7ba0.json +0 -1
- /xinference/web/ui/build/static/js/{main.ef2a203a.js.LICENSE.txt → main.17ca0398.js.LICENSE.txt} +0 -0
- {xinference-0.14.0.post1.dist-info → xinference-0.14.1.dist-info}/LICENSE +0 -0
- {xinference-0.14.0.post1.dist-info → xinference-0.14.1.dist-info}/WHEEL +0 -0
- {xinference-0.14.0.post1.dist-info → xinference-0.14.1.dist-info}/entry_points.txt +0 -0
- {xinference-0.14.0.post1.dist-info → xinference-0.14.1.dist-info}/top_level.txt +0 -0
|
@@ -1,11 +1,29 @@
|
|
|
1
1
|
[
|
|
2
|
+
{
|
|
3
|
+
"model_name": "FLUX.1-schnell",
|
|
4
|
+
"model_family": "stable_diffusion",
|
|
5
|
+
"model_id": "black-forest-labs/FLUX.1-schnell",
|
|
6
|
+
"model_revision": "768d12a373ed5cc9ef9a9dea7504dc09fcc14842",
|
|
7
|
+
"model_ability": [
|
|
8
|
+
"text2image"
|
|
9
|
+
]
|
|
10
|
+
},
|
|
11
|
+
{
|
|
12
|
+
"model_name": "FLUX.1-dev",
|
|
13
|
+
"model_family": "stable_diffusion",
|
|
14
|
+
"model_id": "black-forest-labs/FLUX.1-dev",
|
|
15
|
+
"model_revision": "01aa605f2c300568dd6515476f04565a954fcb59",
|
|
16
|
+
"model_ability": [
|
|
17
|
+
"text2image"
|
|
18
|
+
]
|
|
19
|
+
},
|
|
2
20
|
{
|
|
3
21
|
"model_name": "sd3-medium",
|
|
4
22
|
"model_family": "stable_diffusion",
|
|
5
23
|
"model_id": "stabilityai/stable-diffusion-3-medium-diffusers",
|
|
6
24
|
"model_revision": "ea42f8cef0f178587cf766dc8129abd379c90671",
|
|
7
|
-
"
|
|
8
|
-
"
|
|
25
|
+
"model_ability": [
|
|
26
|
+
"text2image",
|
|
9
27
|
"image2image"
|
|
10
28
|
]
|
|
11
29
|
},
|
|
@@ -14,8 +32,8 @@
|
|
|
14
32
|
"model_family": "stable_diffusion",
|
|
15
33
|
"model_id": "stabilityai/sd-turbo",
|
|
16
34
|
"model_revision": "1681ed09e0cff58eeb41e878a49893228b78b94c",
|
|
17
|
-
"
|
|
18
|
-
"
|
|
35
|
+
"model_ability": [
|
|
36
|
+
"text2image"
|
|
19
37
|
]
|
|
20
38
|
},
|
|
21
39
|
{
|
|
@@ -23,8 +41,8 @@
|
|
|
23
41
|
"model_family": "stable_diffusion",
|
|
24
42
|
"model_id": "stabilityai/sdxl-turbo",
|
|
25
43
|
"model_revision": "f4b0486b498f84668e828044de1d0c8ba486e05b",
|
|
26
|
-
"
|
|
27
|
-
"
|
|
44
|
+
"model_ability": [
|
|
45
|
+
"text2image"
|
|
28
46
|
]
|
|
29
47
|
},
|
|
30
48
|
{
|
|
@@ -32,8 +50,8 @@
|
|
|
32
50
|
"model_family": "stable_diffusion",
|
|
33
51
|
"model_id": "runwayml/stable-diffusion-v1-5",
|
|
34
52
|
"model_revision": "1d0c4ebf6ff58a5caecab40fa1406526bca4b5b9",
|
|
35
|
-
"
|
|
36
|
-
"
|
|
53
|
+
"model_ability": [
|
|
54
|
+
"text2image",
|
|
37
55
|
"image2image"
|
|
38
56
|
],
|
|
39
57
|
"controlnet": [
|
|
@@ -86,8 +104,8 @@
|
|
|
86
104
|
"model_family": "stable_diffusion",
|
|
87
105
|
"model_id": "stabilityai/stable-diffusion-xl-base-1.0",
|
|
88
106
|
"model_revision": "f898a3e026e802f68796b95e9702464bac78d76f",
|
|
89
|
-
"
|
|
90
|
-
"
|
|
107
|
+
"model_ability": [
|
|
108
|
+
"text2image",
|
|
91
109
|
"image2image"
|
|
92
110
|
],
|
|
93
111
|
"controlnet": [
|
|
@@ -111,12 +129,22 @@
|
|
|
111
129
|
}
|
|
112
130
|
]
|
|
113
131
|
},
|
|
132
|
+
{
|
|
133
|
+
"model_name": "kolors",
|
|
134
|
+
"model_family": "stable_diffusion",
|
|
135
|
+
"model_id": "Kwai-Kolors/Kolors-diffusers",
|
|
136
|
+
"model_revision": "7e091c75199e910a26cd1b51ed52c28de5db3711",
|
|
137
|
+
"model_ability": [
|
|
138
|
+
"text2image",
|
|
139
|
+
"image2image"
|
|
140
|
+
]
|
|
141
|
+
},
|
|
114
142
|
{
|
|
115
143
|
"model_name": "stable-diffusion-inpainting",
|
|
116
144
|
"model_family": "stable_diffusion",
|
|
117
145
|
"model_id": "runwayml/stable-diffusion-inpainting",
|
|
118
146
|
"model_revision": "51388a731f57604945fddd703ecb5c50e8e7b49d",
|
|
119
|
-
"
|
|
147
|
+
"model_ability": [
|
|
120
148
|
"inpainting"
|
|
121
149
|
]
|
|
122
150
|
},
|
|
@@ -125,7 +153,7 @@
|
|
|
125
153
|
"model_family": "stable_diffusion",
|
|
126
154
|
"model_id": "stabilityai/stable-diffusion-2-inpainting",
|
|
127
155
|
"model_revision": "81a84f49b15956b60b4272a405ad3daef3da4590",
|
|
128
|
-
"
|
|
156
|
+
"model_ability": [
|
|
129
157
|
"inpainting"
|
|
130
158
|
]
|
|
131
159
|
},
|
|
@@ -134,7 +162,7 @@
|
|
|
134
162
|
"model_family": "stable_diffusion",
|
|
135
163
|
"model_id": "diffusers/stable-diffusion-xl-1.0-inpainting-0.1",
|
|
136
164
|
"model_revision": "115134f363124c53c7d878647567d04daf26e41e",
|
|
137
|
-
"
|
|
165
|
+
"model_ability": [
|
|
138
166
|
"inpainting"
|
|
139
167
|
]
|
|
140
168
|
}
|
|
@@ -1,12 +1,32 @@
|
|
|
1
1
|
[
|
|
2
|
+
{
|
|
3
|
+
"model_name": "FLUX.1-schnell",
|
|
4
|
+
"model_family": "stable_diffusion",
|
|
5
|
+
"model_hub": "modelscope",
|
|
6
|
+
"model_id": "AI-ModelScope/FLUX.1-schnell",
|
|
7
|
+
"model_revision": "master",
|
|
8
|
+
"model_ability": [
|
|
9
|
+
"text2image"
|
|
10
|
+
]
|
|
11
|
+
},
|
|
12
|
+
{
|
|
13
|
+
"model_name": "FLUX.1-dev",
|
|
14
|
+
"model_family": "stable_diffusion",
|
|
15
|
+
"model_hub": "modelscope",
|
|
16
|
+
"model_id": "AI-ModelScope/FLUX.1-dev",
|
|
17
|
+
"model_revision": "master",
|
|
18
|
+
"model_ability": [
|
|
19
|
+
"text2image"
|
|
20
|
+
]
|
|
21
|
+
},
|
|
2
22
|
{
|
|
3
23
|
"model_name": "sd3-medium",
|
|
4
24
|
"model_family": "stable_diffusion",
|
|
5
25
|
"model_hub": "modelscope",
|
|
6
26
|
"model_id": "AI-ModelScope/stable-diffusion-3-medium-diffusers",
|
|
7
27
|
"model_revision": "master",
|
|
8
|
-
"
|
|
9
|
-
"
|
|
28
|
+
"model_ability": [
|
|
29
|
+
"text2image",
|
|
10
30
|
"image2image"
|
|
11
31
|
]
|
|
12
32
|
},
|
|
@@ -16,8 +36,8 @@
|
|
|
16
36
|
"model_hub": "modelscope",
|
|
17
37
|
"model_id": "AI-ModelScope/sd-turbo",
|
|
18
38
|
"model_revision": "master",
|
|
19
|
-
"
|
|
20
|
-
"
|
|
39
|
+
"model_ability": [
|
|
40
|
+
"text2image"
|
|
21
41
|
]
|
|
22
42
|
},
|
|
23
43
|
{
|
|
@@ -26,8 +46,8 @@
|
|
|
26
46
|
"model_hub": "modelscope",
|
|
27
47
|
"model_id": "AI-ModelScope/sdxl-turbo",
|
|
28
48
|
"model_revision": "master",
|
|
29
|
-
"
|
|
30
|
-
"
|
|
49
|
+
"model_ability": [
|
|
50
|
+
"text2image"
|
|
31
51
|
]
|
|
32
52
|
},
|
|
33
53
|
{
|
|
@@ -36,8 +56,8 @@
|
|
|
36
56
|
"model_hub": "modelscope",
|
|
37
57
|
"model_id": "AI-ModelScope/stable-diffusion-v1-5",
|
|
38
58
|
"model_revision": "master",
|
|
39
|
-
"
|
|
40
|
-
"
|
|
59
|
+
"model_ability": [
|
|
60
|
+
"text2image",
|
|
41
61
|
"image2image"
|
|
42
62
|
],
|
|
43
63
|
"controlnet": [
|
|
@@ -91,8 +111,8 @@
|
|
|
91
111
|
"model_hub": "modelscope",
|
|
92
112
|
"model_id": "AI-ModelScope/stable-diffusion-xl-base-1.0",
|
|
93
113
|
"model_revision": "master",
|
|
94
|
-
"
|
|
95
|
-
"
|
|
114
|
+
"model_ability": [
|
|
115
|
+
"text2image",
|
|
96
116
|
"image2image"
|
|
97
117
|
],
|
|
98
118
|
"controlnet": [
|
|
@@ -51,7 +51,7 @@ class DiffusionModel:
|
|
|
51
51
|
self._lora_model = lora_model
|
|
52
52
|
self._lora_load_kwargs = lora_load_kwargs or {}
|
|
53
53
|
self._lora_fuse_kwargs = lora_fuse_kwargs or {}
|
|
54
|
-
self._abilities = abilities
|
|
54
|
+
self._abilities = abilities or []
|
|
55
55
|
self._kwargs = kwargs
|
|
56
56
|
|
|
57
57
|
def _apply_lora(self):
|
|
@@ -88,7 +88,48 @@ class DiffusionModel:
|
|
|
88
88
|
if sys.platform != "darwin" and torch_dtype is None:
|
|
89
89
|
# The following params crashes on Mac M2
|
|
90
90
|
self._kwargs["torch_dtype"] = torch.float16
|
|
91
|
+
self._kwargs["variant"] = "fp16"
|
|
91
92
|
self._kwargs["use_safetensors"] = True
|
|
93
|
+
if isinstance(torch_dtype, str):
|
|
94
|
+
self._kwargs["torch_dtype"] = getattr(torch, torch_dtype)
|
|
95
|
+
|
|
96
|
+
quantize_text_encoder = self._kwargs.pop("quantize_text_encoder", None)
|
|
97
|
+
if quantize_text_encoder:
|
|
98
|
+
try:
|
|
99
|
+
from transformers import BitsAndBytesConfig, T5EncoderModel
|
|
100
|
+
except ImportError:
|
|
101
|
+
error_message = "Failed to import module 'transformers'"
|
|
102
|
+
installation_guide = [
|
|
103
|
+
"Please make sure 'transformers' is installed. ",
|
|
104
|
+
"You can install it by `pip install transformers`\n",
|
|
105
|
+
]
|
|
106
|
+
|
|
107
|
+
raise ImportError(f"{error_message}\n\n{''.join(installation_guide)}")
|
|
108
|
+
|
|
109
|
+
try:
|
|
110
|
+
import bitsandbytes # noqa: F401
|
|
111
|
+
except ImportError:
|
|
112
|
+
error_message = "Failed to import module 'bitsandbytes'"
|
|
113
|
+
installation_guide = [
|
|
114
|
+
"Please make sure 'bitsandbytes' is installed. ",
|
|
115
|
+
"You can install it by `pip install bitsandbytes`\n",
|
|
116
|
+
]
|
|
117
|
+
|
|
118
|
+
raise ImportError(f"{error_message}\n\n{''.join(installation_guide)}")
|
|
119
|
+
|
|
120
|
+
for text_encoder_name in quantize_text_encoder.split(","):
|
|
121
|
+
quantization_config = BitsAndBytesConfig(load_in_8bit=True)
|
|
122
|
+
quantization_kwargs = {}
|
|
123
|
+
if torch_dtype:
|
|
124
|
+
quantization_kwargs["torch_dtype"] = torch_dtype
|
|
125
|
+
text_encoder = T5EncoderModel.from_pretrained(
|
|
126
|
+
self._model_path,
|
|
127
|
+
subfolder=text_encoder_name,
|
|
128
|
+
quantization_config=quantization_config,
|
|
129
|
+
**quantization_kwargs,
|
|
130
|
+
)
|
|
131
|
+
self._kwargs[text_encoder_name] = text_encoder
|
|
132
|
+
self._kwargs["device_map"] = "balanced"
|
|
92
133
|
|
|
93
134
|
logger.debug("Loading model %s", AutoPipelineModel)
|
|
94
135
|
self._model = AutoPipelineModel.from_pretrained(
|
|
@@ -98,7 +139,7 @@ class DiffusionModel:
|
|
|
98
139
|
if self._kwargs.get("cpu_offload", False):
|
|
99
140
|
logger.debug("CPU offloading model")
|
|
100
141
|
self._model.enable_model_cpu_offload()
|
|
101
|
-
|
|
142
|
+
elif not self._kwargs.get("device_map"):
|
|
102
143
|
logger.debug("Loading model to available device")
|
|
103
144
|
self._model = move_model_to_available_device(self._model)
|
|
104
145
|
# Recommended if your computer has < 64 GB of RAM
|
|
@@ -141,6 +182,12 @@ class DiffusionModel:
|
|
|
141
182
|
else:
|
|
142
183
|
raise ValueError(f"Unsupported response format: {response_format}")
|
|
143
184
|
|
|
185
|
+
@classmethod
|
|
186
|
+
def _filter_kwargs(cls, kwargs: dict):
|
|
187
|
+
for arg in ["negative_prompt", "num_inference_steps"]:
|
|
188
|
+
if not kwargs.get(arg):
|
|
189
|
+
kwargs.pop(arg, None)
|
|
190
|
+
|
|
144
191
|
def text_to_image(
|
|
145
192
|
self,
|
|
146
193
|
prompt: str,
|
|
@@ -152,6 +199,7 @@ class DiffusionModel:
|
|
|
152
199
|
# References:
|
|
153
200
|
# https://huggingface.co/docs/diffusers/main/en/api/pipelines/controlnet_sdxl
|
|
154
201
|
width, height = map(int, re.split(r"[^\d]+", size))
|
|
202
|
+
self._filter_kwargs(kwargs)
|
|
155
203
|
return self._call_model(
|
|
156
204
|
prompt=prompt,
|
|
157
205
|
height=height,
|
|
@@ -174,6 +222,8 @@ class DiffusionModel:
|
|
|
174
222
|
if "controlnet" in self._kwargs:
|
|
175
223
|
model = self._model
|
|
176
224
|
else:
|
|
225
|
+
if "image2image" not in self._abilities:
|
|
226
|
+
raise RuntimeError(f"{self._model_uid} does not support image2image")
|
|
177
227
|
if self._i2i_model is not None:
|
|
178
228
|
model = self._i2i_model
|
|
179
229
|
else:
|
|
@@ -186,6 +236,7 @@ class DiffusionModel:
|
|
|
186
236
|
width, height = map(int, re.split(r"[^\d]+", size))
|
|
187
237
|
kwargs["width"] = width
|
|
188
238
|
kwargs["height"] = height
|
|
239
|
+
self._filter_kwargs(kwargs)
|
|
189
240
|
return self._call_model(
|
|
190
241
|
image=image,
|
|
191
242
|
prompt=prompt,
|
xinference/model/llm/__init__.py
CHANGED
|
@@ -125,6 +125,7 @@ def _install():
|
|
|
125
125
|
from .pytorch.internlm2 import Internlm2PytorchChatModel
|
|
126
126
|
from .pytorch.llama_2 import LlamaPytorchChatModel, LlamaPytorchModel
|
|
127
127
|
from .pytorch.minicpmv25 import MiniCPMV25Model
|
|
128
|
+
from .pytorch.minicpmv26 import MiniCPMV26Model
|
|
128
129
|
from .pytorch.qwen_vl import QwenVLChatModel
|
|
129
130
|
from .pytorch.vicuna import VicunaPytorchChatModel
|
|
130
131
|
from .pytorch.yi_vl import YiVLChatModel
|
|
@@ -167,6 +168,7 @@ def _install():
|
|
|
167
168
|
PytorchModel,
|
|
168
169
|
CogVLM2Model,
|
|
169
170
|
MiniCPMV25Model,
|
|
171
|
+
MiniCPMV26Model,
|
|
170
172
|
Glm4VModel,
|
|
171
173
|
]
|
|
172
174
|
)
|
|
@@ -1797,6 +1797,16 @@
|
|
|
1797
1797
|
"none"
|
|
1798
1798
|
],
|
|
1799
1799
|
"model_id": "meta-llama/Meta-Llama-3.1-70B"
|
|
1800
|
+
},
|
|
1801
|
+
{
|
|
1802
|
+
"model_format": "pytorch",
|
|
1803
|
+
"model_size_in_billions": 405,
|
|
1804
|
+
"quantizations": [
|
|
1805
|
+
"4-bit",
|
|
1806
|
+
"8-bit",
|
|
1807
|
+
"none"
|
|
1808
|
+
],
|
|
1809
|
+
"model_id": "meta-llama/Meta-Llama-3.1-405B"
|
|
1800
1810
|
}
|
|
1801
1811
|
]
|
|
1802
1812
|
},
|
|
@@ -1975,6 +1985,32 @@
|
|
|
1975
1985
|
"none"
|
|
1976
1986
|
],
|
|
1977
1987
|
"model_id": "mlx-community/Meta-Llama-3.1-70B-Instruct-bf16"
|
|
1988
|
+
},
|
|
1989
|
+
{
|
|
1990
|
+
"model_format": "pytorch",
|
|
1991
|
+
"model_size_in_billions": 405,
|
|
1992
|
+
"quantizations": [
|
|
1993
|
+
"4-bit",
|
|
1994
|
+
"8-bit",
|
|
1995
|
+
"none"
|
|
1996
|
+
],
|
|
1997
|
+
"model_id": "meta-llama/Meta-Llama-3.1-405B-Instruct"
|
|
1998
|
+
},
|
|
1999
|
+
{
|
|
2000
|
+
"model_format": "gptq",
|
|
2001
|
+
"model_size_in_billions": 405,
|
|
2002
|
+
"quantizations": [
|
|
2003
|
+
"Int4"
|
|
2004
|
+
],
|
|
2005
|
+
"model_id": "hugging-quants/Meta-Llama-3.1-405B-Instruct-GPTQ-INT4"
|
|
2006
|
+
},
|
|
2007
|
+
{
|
|
2008
|
+
"model_format": "awq",
|
|
2009
|
+
"model_size_in_billions": 405,
|
|
2010
|
+
"quantizations": [
|
|
2011
|
+
"Int4"
|
|
2012
|
+
],
|
|
2013
|
+
"model_id": "hugging-quants/Meta-Llama-3.1-405B-Instruct-AWQ-INT4"
|
|
1978
2014
|
}
|
|
1979
2015
|
],
|
|
1980
2016
|
"prompt_style": {
|
|
@@ -6771,7 +6807,7 @@
|
|
|
6771
6807
|
},
|
|
6772
6808
|
{
|
|
6773
6809
|
"version":1,
|
|
6774
|
-
"context_length":
|
|
6810
|
+
"context_length":8192,
|
|
6775
6811
|
"model_name":"MiniCPM-Llama3-V-2_5",
|
|
6776
6812
|
"model_lang":[
|
|
6777
6813
|
"en",
|
|
@@ -6811,6 +6847,52 @@
|
|
|
6811
6847
|
]
|
|
6812
6848
|
}
|
|
6813
6849
|
},
|
|
6850
|
+
{
|
|
6851
|
+
"version":1,
|
|
6852
|
+
"context_length":32768,
|
|
6853
|
+
"model_name":"MiniCPM-V-2.6",
|
|
6854
|
+
"model_lang":[
|
|
6855
|
+
"en",
|
|
6856
|
+
"zh"
|
|
6857
|
+
],
|
|
6858
|
+
"model_ability":[
|
|
6859
|
+
"chat",
|
|
6860
|
+
"vision"
|
|
6861
|
+
],
|
|
6862
|
+
"model_description":"MiniCPM-V 2.6 is the latest model in the MiniCPM-V series. The model is built on SigLip-400M and Qwen2-7B with a total of 8B parameters.",
|
|
6863
|
+
"model_specs":[
|
|
6864
|
+
{
|
|
6865
|
+
"model_format":"pytorch",
|
|
6866
|
+
"model_size_in_billions":8,
|
|
6867
|
+
"quantizations":[
|
|
6868
|
+
"none"
|
|
6869
|
+
],
|
|
6870
|
+
"model_id":"openbmb/MiniCPM-V-2_6",
|
|
6871
|
+
"model_revision":"3f7a8da1b7a8b928b5ee229fae33cf43fd64cf31"
|
|
6872
|
+
},
|
|
6873
|
+
{
|
|
6874
|
+
"model_format":"pytorch",
|
|
6875
|
+
"model_size_in_billions":8,
|
|
6876
|
+
"quantizations":[
|
|
6877
|
+
"4-bit"
|
|
6878
|
+
],
|
|
6879
|
+
"model_id":"openbmb/MiniCPM-V-2_6-int4",
|
|
6880
|
+
"model_revision":"051e2df6505f1fc4305f2c9bd42ed90db8bf4874"
|
|
6881
|
+
}
|
|
6882
|
+
],
|
|
6883
|
+
"prompt_style":{
|
|
6884
|
+
"style_name":"QWEN",
|
|
6885
|
+
"system_prompt":"You are a helpful assistant",
|
|
6886
|
+
"roles":[
|
|
6887
|
+
"user",
|
|
6888
|
+
"assistant"
|
|
6889
|
+
],
|
|
6890
|
+
"stop": [
|
|
6891
|
+
"<|im_end|>",
|
|
6892
|
+
"<|endoftext|>"
|
|
6893
|
+
]
|
|
6894
|
+
}
|
|
6895
|
+
},
|
|
6814
6896
|
{
|
|
6815
6897
|
"version": 1,
|
|
6816
6898
|
"context_length": 4096,
|
|
@@ -234,6 +234,17 @@
|
|
|
234
234
|
],
|
|
235
235
|
"model_id": "LLM-Research/Meta-Llama-3.1-70B",
|
|
236
236
|
"model_hub": "modelscope"
|
|
237
|
+
},
|
|
238
|
+
{
|
|
239
|
+
"model_format": "pytorch",
|
|
240
|
+
"model_size_in_billions": 405,
|
|
241
|
+
"quantizations": [
|
|
242
|
+
"4-bit",
|
|
243
|
+
"8-bit",
|
|
244
|
+
"none"
|
|
245
|
+
],
|
|
246
|
+
"model_id": "LLM-Research/Meta-Llama-3.1-405B",
|
|
247
|
+
"model_hub": "modelscope"
|
|
237
248
|
}
|
|
238
249
|
]
|
|
239
250
|
},
|
|
@@ -325,6 +336,35 @@
|
|
|
325
336
|
],
|
|
326
337
|
"model_id": "LLM-Research/Meta-Llama-3.1-70B-Instruct-AWQ-INT4",
|
|
327
338
|
"model_hub": "modelscope"
|
|
339
|
+
},
|
|
340
|
+
{
|
|
341
|
+
"model_format": "pytorch",
|
|
342
|
+
"model_size_in_billions": 405,
|
|
343
|
+
"quantizations": [
|
|
344
|
+
"4-bit",
|
|
345
|
+
"8-bit",
|
|
346
|
+
"none"
|
|
347
|
+
],
|
|
348
|
+
"model_id": "LLM-Research/Meta-Llama-3.1-405B-Instruct",
|
|
349
|
+
"model_hub": "modelscope"
|
|
350
|
+
},
|
|
351
|
+
{
|
|
352
|
+
"model_format": "awq",
|
|
353
|
+
"model_size_in_billions": 405,
|
|
354
|
+
"quantizations": [
|
|
355
|
+
"Int4"
|
|
356
|
+
],
|
|
357
|
+
"model_id": "LLM-Research/Meta-Llama-3.1-405B-Instruct-AWQ-INT4",
|
|
358
|
+
"model_hub": "modelscope"
|
|
359
|
+
},
|
|
360
|
+
{
|
|
361
|
+
"model_format": "gptq",
|
|
362
|
+
"model_size_in_billions": 405,
|
|
363
|
+
"quantizations": [
|
|
364
|
+
"Int4"
|
|
365
|
+
],
|
|
366
|
+
"model_id": "LLM-Research/Meta-Llama-3.1-405B-Instruct-GPTQ-INT4",
|
|
367
|
+
"model_hub": "modelscope"
|
|
328
368
|
}
|
|
329
369
|
],
|
|
330
370
|
"prompt_style": {
|
|
@@ -4509,7 +4549,7 @@
|
|
|
4509
4549
|
},
|
|
4510
4550
|
{
|
|
4511
4551
|
"version":1,
|
|
4512
|
-
"context_length":
|
|
4552
|
+
"context_length":8192,
|
|
4513
4553
|
"model_name":"MiniCPM-Llama3-V-2_5",
|
|
4514
4554
|
"model_lang":[
|
|
4515
4555
|
"en",
|
|
@@ -4551,6 +4591,50 @@
|
|
|
4551
4591
|
]
|
|
4552
4592
|
}
|
|
4553
4593
|
},
|
|
4594
|
+
{
|
|
4595
|
+
"version":1,
|
|
4596
|
+
"context_length":32768,
|
|
4597
|
+
"model_name":"MiniCPM-V-2.6",
|
|
4598
|
+
"model_lang":[
|
|
4599
|
+
"en",
|
|
4600
|
+
"zh"
|
|
4601
|
+
],
|
|
4602
|
+
"model_ability":[
|
|
4603
|
+
"chat",
|
|
4604
|
+
"vision"
|
|
4605
|
+
],
|
|
4606
|
+
"model_description":"MiniCPM-V 2.6 is the latest model in the MiniCPM-V series. The model is built on SigLip-400M and Qwen2-7B with a total of 8B parameters.",
|
|
4607
|
+
"model_specs":[
|
|
4608
|
+
{
|
|
4609
|
+
"model_format":"pytorch",
|
|
4610
|
+
"model_size_in_billions":8,
|
|
4611
|
+
"quantizations":[
|
|
4612
|
+
"none"
|
|
4613
|
+
],
|
|
4614
|
+
"model_hub": "modelscope",
|
|
4615
|
+
"model_id":"OpenBMB/MiniCPM-V-2_6",
|
|
4616
|
+
"model_revision":"master"
|
|
4617
|
+
},
|
|
4618
|
+
{
|
|
4619
|
+
"model_format":"pytorch",
|
|
4620
|
+
"model_size_in_billions":8,
|
|
4621
|
+
"quantizations":[
|
|
4622
|
+
"4-bit"
|
|
4623
|
+
],
|
|
4624
|
+
"model_hub": "modelscope",
|
|
4625
|
+
"model_id":"OpenBMB/MiniCPM-V-2_6-int4",
|
|
4626
|
+
"model_revision":"master"
|
|
4627
|
+
}
|
|
4628
|
+
],
|
|
4629
|
+
"prompt_style":{
|
|
4630
|
+
"style_name":"QWEN",
|
|
4631
|
+
"system_prompt":"You are a helpful assistant",
|
|
4632
|
+
"roles":[
|
|
4633
|
+
"user",
|
|
4634
|
+
"assistant"
|
|
4635
|
+
]
|
|
4636
|
+
}
|
|
4637
|
+
},
|
|
4554
4638
|
{
|
|
4555
4639
|
"version": 1,
|
|
4556
4640
|
"context_length": 2048,
|