xinference 0.15.4__py3-none-any.whl → 0.16.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/__init__.py +0 -4
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +48 -0
- xinference/client/restful/restful_client.py +19 -0
- xinference/constants.py +4 -4
- xinference/core/chat_interface.py +5 -1
- xinference/core/image_interface.py +5 -1
- xinference/core/model.py +195 -34
- xinference/core/scheduler.py +10 -7
- xinference/core/utils.py +9 -0
- xinference/model/__init__.py +4 -0
- xinference/model/audio/chattts.py +25 -14
- xinference/model/audio/model_spec.json +1 -1
- xinference/model/audio/model_spec_modelscope.json +1 -1
- xinference/model/embedding/model_spec.json +1 -1
- xinference/model/image/core.py +59 -4
- xinference/model/image/model_spec.json +24 -3
- xinference/model/image/model_spec_modelscope.json +25 -3
- xinference/model/image/ocr/__init__.py +13 -0
- xinference/model/image/ocr/got_ocr2.py +76 -0
- xinference/model/image/scheduler/__init__.py +13 -0
- xinference/model/image/scheduler/flux.py +533 -0
- xinference/model/image/stable_diffusion/core.py +8 -34
- xinference/model/image/stable_diffusion/mlx.py +221 -0
- xinference/model/image/utils.py +39 -3
- xinference/model/llm/__init__.py +2 -0
- xinference/model/llm/llm_family.json +178 -1
- xinference/model/llm/llm_family_modelscope.json +119 -0
- xinference/model/llm/transformers/chatglm.py +104 -0
- xinference/model/llm/transformers/core.py +37 -111
- xinference/model/llm/transformers/deepseek_v2.py +0 -226
- xinference/model/llm/transformers/internlm2.py +3 -95
- xinference/model/llm/transformers/opt.py +68 -0
- xinference/model/llm/transformers/utils.py +4 -284
- xinference/model/llm/utils.py +2 -2
- xinference/model/llm/vllm/core.py +16 -1
- xinference/thirdparty/mlx/__init__.py +13 -0
- xinference/thirdparty/mlx/flux/__init__.py +15 -0
- xinference/thirdparty/mlx/flux/autoencoder.py +357 -0
- xinference/thirdparty/mlx/flux/clip.py +154 -0
- xinference/thirdparty/mlx/flux/datasets.py +75 -0
- xinference/thirdparty/mlx/flux/flux.py +247 -0
- xinference/thirdparty/mlx/flux/layers.py +302 -0
- xinference/thirdparty/mlx/flux/lora.py +76 -0
- xinference/thirdparty/mlx/flux/model.py +134 -0
- xinference/thirdparty/mlx/flux/sampler.py +56 -0
- xinference/thirdparty/mlx/flux/t5.py +244 -0
- xinference/thirdparty/mlx/flux/tokenizers.py +185 -0
- xinference/thirdparty/mlx/flux/trainer.py +98 -0
- xinference/thirdparty/mlx/flux/utils.py +179 -0
- xinference/utils.py +2 -3
- xinference/web/ui/build/asset-manifest.json +3 -3
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/js/{main.e51a356d.js → main.b76aeeb7.js} +3 -3
- xinference/web/ui/build/static/js/main.b76aeeb7.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/32ea2c04cf0bba2761b4883d2c40cc259952c94d2d6bb774e510963ca37aac0a.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/331312668fa8bd3d7401818f4a25fa98135d7f61371cd6bfff78b18cf4fbdd92.json +1 -0
- {xinference-0.15.4.dist-info → xinference-0.16.1.dist-info}/METADATA +49 -10
- {xinference-0.15.4.dist-info → xinference-0.16.1.dist-info}/RECORD +64 -44
- xinference/web/ui/build/static/js/main.e51a356d.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/070d8c6b3b0f3485c6d3885f0b6bbfdf9643e088a468acbd5d596f2396071c16.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/4385c1095eefbff0a8ec3b2964ba6e5a66a05ab31be721483ca2f43e2a91f6ff.json +0 -1
- /xinference/web/ui/build/static/js/{main.e51a356d.js.LICENSE.txt → main.b76aeeb7.js.LICENSE.txt} +0 -0
- {xinference-0.15.4.dist-info → xinference-0.16.1.dist-info}/LICENSE +0 -0
- {xinference-0.15.4.dist-info → xinference-0.16.1.dist-info}/WHEEL +0 -0
- {xinference-0.15.4.dist-info → xinference-0.16.1.dist-info}/entry_points.txt +0 -0
- {xinference-0.15.4.dist-info → xinference-0.16.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
# Copyright 2022-2023 XProbe Inc.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import contextlib
|
|
16
|
+
import gc
|
|
17
|
+
import logging
|
|
18
|
+
import re
|
|
19
|
+
from typing import TYPE_CHECKING, Dict, List, Optional, Tuple
|
|
20
|
+
|
|
21
|
+
import numpy as np
|
|
22
|
+
from PIL import Image
|
|
23
|
+
from xoscar.utils import classproperty
|
|
24
|
+
|
|
25
|
+
from ....types import LoRA
|
|
26
|
+
from ..sdapi import SDAPIDiffusionModelMixin
|
|
27
|
+
from ..utils import handle_image_result
|
|
28
|
+
|
|
29
|
+
if TYPE_CHECKING:
|
|
30
|
+
from ....core.progress_tracker import Progressor
|
|
31
|
+
from ..core import ImageModelFamilyV1
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
logger = logging.getLogger(__name__)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def quantization_predicate(name: str, m) -> bool:
|
|
38
|
+
return hasattr(m, "to_quantized") and m.weight.shape[1] % 512 == 0
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def to_latent_size(image_size: Tuple[int, int]):
|
|
42
|
+
h, w = image_size
|
|
43
|
+
h = ((h + 15) // 16) * 16
|
|
44
|
+
w = ((w + 15) // 16) * 16
|
|
45
|
+
|
|
46
|
+
if (h, w) != image_size:
|
|
47
|
+
print(
|
|
48
|
+
"Warning: The image dimensions need to be divisible by 16px. "
|
|
49
|
+
f"Changing size to {h}x{w}."
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
return (h // 8, w // 8)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class MLXDiffusionModel(SDAPIDiffusionModelMixin):
|
|
56
|
+
def __init__(
|
|
57
|
+
self,
|
|
58
|
+
model_uid: str,
|
|
59
|
+
model_path: Optional[str] = None,
|
|
60
|
+
device: Optional[str] = None,
|
|
61
|
+
lora_model: Optional[List[LoRA]] = None,
|
|
62
|
+
lora_load_kwargs: Optional[Dict] = None,
|
|
63
|
+
lora_fuse_kwargs: Optional[Dict] = None,
|
|
64
|
+
model_spec: Optional["ImageModelFamilyV1"] = None,
|
|
65
|
+
**kwargs,
|
|
66
|
+
):
|
|
67
|
+
self._model_uid = model_uid
|
|
68
|
+
self._model_path = model_path
|
|
69
|
+
self._device = device
|
|
70
|
+
# model info when loading
|
|
71
|
+
self._model = None
|
|
72
|
+
self._lora_model = lora_model
|
|
73
|
+
self._lora_load_kwargs = lora_load_kwargs or {}
|
|
74
|
+
self._lora_fuse_kwargs = lora_fuse_kwargs or {}
|
|
75
|
+
# info
|
|
76
|
+
self._model_spec = model_spec
|
|
77
|
+
self._abilities = model_spec.model_ability or [] # type: ignore
|
|
78
|
+
self._kwargs = kwargs
|
|
79
|
+
|
|
80
|
+
@property
|
|
81
|
+
def model_ability(self):
|
|
82
|
+
return self._abilities
|
|
83
|
+
|
|
84
|
+
@classproperty
|
|
85
|
+
def supported_models(self):
|
|
86
|
+
return ["FLUX.1-schnell", "FLUX.1-dev"]
|
|
87
|
+
|
|
88
|
+
def load(self):
|
|
89
|
+
try:
|
|
90
|
+
import mlx.nn as nn
|
|
91
|
+
except ImportError:
|
|
92
|
+
error_message = "Failed to import module 'mlx'"
|
|
93
|
+
installation_guide = [
|
|
94
|
+
"Please make sure 'mlx' is installed. ",
|
|
95
|
+
"You can install it by `pip install mlx`\n",
|
|
96
|
+
]
|
|
97
|
+
|
|
98
|
+
raise ImportError(f"{error_message}\n\n{''.join(installation_guide)}")
|
|
99
|
+
|
|
100
|
+
from ....thirdparty.mlx.flux import FluxPipeline
|
|
101
|
+
|
|
102
|
+
logger.debug(
|
|
103
|
+
"Loading model from %s, kwargs: %s", self._model_path, self._kwargs
|
|
104
|
+
)
|
|
105
|
+
flux = self._model = FluxPipeline(
|
|
106
|
+
"flux-" + self._model_spec.model_name.split("-")[1],
|
|
107
|
+
model_path=self._model_path,
|
|
108
|
+
t5_padding=self._kwargs.get("t5_padding", True),
|
|
109
|
+
)
|
|
110
|
+
self._apply_lora()
|
|
111
|
+
|
|
112
|
+
quantize = self._kwargs.get("quantize", True)
|
|
113
|
+
if quantize:
|
|
114
|
+
nn.quantize(flux.flow, class_predicate=quantization_predicate)
|
|
115
|
+
nn.quantize(flux.t5, class_predicate=quantization_predicate)
|
|
116
|
+
nn.quantize(flux.clip, class_predicate=quantization_predicate)
|
|
117
|
+
|
|
118
|
+
def _apply_lora(self):
|
|
119
|
+
if self._lora_model is not None:
|
|
120
|
+
import mlx.core as mx
|
|
121
|
+
|
|
122
|
+
for lora_model in self._lora_model:
|
|
123
|
+
weights, lora_config = mx.load(
|
|
124
|
+
lora_model.local_path, return_metadata=True
|
|
125
|
+
)
|
|
126
|
+
rank = int(lora_config.get("lora_rank", 8))
|
|
127
|
+
num_blocks = int(lora_config.get("lora_blocks", -1))
|
|
128
|
+
flux = self._model
|
|
129
|
+
flux.linear_to_lora_layers(rank, num_blocks)
|
|
130
|
+
flux.flow.load_weights(list(weights.items()), strict=False)
|
|
131
|
+
flux.fuse_lora_layers()
|
|
132
|
+
logger.info(f"Successfully loaded the LoRA for model {self._model_uid}.")
|
|
133
|
+
|
|
134
|
+
@staticmethod
|
|
135
|
+
@contextlib.contextmanager
|
|
136
|
+
def _release_after():
|
|
137
|
+
import mlx.core as mx
|
|
138
|
+
|
|
139
|
+
try:
|
|
140
|
+
yield
|
|
141
|
+
finally:
|
|
142
|
+
gc.collect()
|
|
143
|
+
mx.metal.clear_cache()
|
|
144
|
+
|
|
145
|
+
def text_to_image(
|
|
146
|
+
self,
|
|
147
|
+
prompt: str,
|
|
148
|
+
n: int = 1,
|
|
149
|
+
size: str = "1024*1024",
|
|
150
|
+
response_format: str = "url",
|
|
151
|
+
**kwargs,
|
|
152
|
+
):
|
|
153
|
+
import mlx.core as mx
|
|
154
|
+
|
|
155
|
+
flux = self._model
|
|
156
|
+
width, height = map(int, re.split(r"[^\d]+", size))
|
|
157
|
+
|
|
158
|
+
# Make the generator
|
|
159
|
+
latent_size = to_latent_size((height, width))
|
|
160
|
+
gen_latent_kwargs = {}
|
|
161
|
+
if (num_steps := kwargs.get("num_inference_steps")) is None:
|
|
162
|
+
num_steps = 50 if "dev" in self._model_spec.model_name else 2 # type: ignore
|
|
163
|
+
gen_latent_kwargs["num_steps"] = num_steps
|
|
164
|
+
if guidance := kwargs.get("guidance_scale"):
|
|
165
|
+
gen_latent_kwargs["guidance"] = guidance
|
|
166
|
+
if seed := kwargs.get("seed"):
|
|
167
|
+
gen_latent_kwargs["seed"] = seed
|
|
168
|
+
|
|
169
|
+
with self._release_after():
|
|
170
|
+
latents = flux.generate_latents( # type: ignore
|
|
171
|
+
prompt, n_images=n, latent_size=latent_size, **gen_latent_kwargs
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
# First we get and eval the conditioning
|
|
175
|
+
conditioning = next(latents)
|
|
176
|
+
mx.eval(conditioning)
|
|
177
|
+
peak_mem_conditioning = mx.metal.get_peak_memory() / 1024**3
|
|
178
|
+
mx.metal.reset_peak_memory()
|
|
179
|
+
|
|
180
|
+
progressor: Progressor = kwargs.pop("progressor", None)
|
|
181
|
+
# Actual denoising loop
|
|
182
|
+
for i, x_t in enumerate(latents):
|
|
183
|
+
mx.eval(x_t)
|
|
184
|
+
progressor.set_progress((i + 1) / num_steps)
|
|
185
|
+
|
|
186
|
+
peak_mem_generation = mx.metal.get_peak_memory() / 1024**3
|
|
187
|
+
mx.metal.reset_peak_memory()
|
|
188
|
+
|
|
189
|
+
# Decode them into images
|
|
190
|
+
decoded = []
|
|
191
|
+
for i in range(n):
|
|
192
|
+
decoded.append(flux.decode(x_t[i : i + 1], latent_size)) # type: ignore
|
|
193
|
+
mx.eval(decoded[-1])
|
|
194
|
+
peak_mem_decoding = mx.metal.get_peak_memory() / 1024**3
|
|
195
|
+
peak_mem_overall = max(
|
|
196
|
+
peak_mem_conditioning, peak_mem_generation, peak_mem_decoding
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
images = []
|
|
200
|
+
x = mx.concatenate(decoded, axis=0)
|
|
201
|
+
x = (x * 255).astype(mx.uint8)
|
|
202
|
+
for i in range(len(x)):
|
|
203
|
+
im = Image.fromarray(np.array(x[i]))
|
|
204
|
+
images.append(im)
|
|
205
|
+
|
|
206
|
+
logger.debug(
|
|
207
|
+
f"Peak memory used for the text: {peak_mem_conditioning:.3f}GB"
|
|
208
|
+
)
|
|
209
|
+
logger.debug(
|
|
210
|
+
f"Peak memory used for the generation: {peak_mem_generation:.3f}GB"
|
|
211
|
+
)
|
|
212
|
+
logger.debug(f"Peak memory used for the decoding: {peak_mem_decoding:.3f}GB")
|
|
213
|
+
logger.debug(f"Peak memory used overall: {peak_mem_overall:.3f}GB")
|
|
214
|
+
|
|
215
|
+
return handle_image_result(response_format, images)
|
|
216
|
+
|
|
217
|
+
def image_to_image(self, **kwargs):
|
|
218
|
+
raise NotImplementedError
|
|
219
|
+
|
|
220
|
+
def inpainting(self, **kwargs):
|
|
221
|
+
raise NotImplementedError
|
xinference/model/image/utils.py
CHANGED
|
@@ -11,16 +11,52 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
|
|
14
|
+
import base64
|
|
15
|
+
import os
|
|
16
|
+
import time
|
|
17
|
+
import uuid
|
|
18
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
19
|
+
from functools import partial
|
|
20
|
+
from io import BytesIO
|
|
21
|
+
from typing import TYPE_CHECKING, Optional
|
|
15
22
|
|
|
16
|
-
from
|
|
23
|
+
from ...constants import XINFERENCE_IMAGE_DIR
|
|
24
|
+
from ...types import Image, ImageList
|
|
25
|
+
|
|
26
|
+
if TYPE_CHECKING:
|
|
27
|
+
from .core import ImageModelFamilyV1
|
|
17
28
|
|
|
18
29
|
|
|
19
30
|
def get_model_version(
|
|
20
|
-
image_model: ImageModelFamilyV1, controlnet: Optional[ImageModelFamilyV1]
|
|
31
|
+
image_model: "ImageModelFamilyV1", controlnet: Optional["ImageModelFamilyV1"]
|
|
21
32
|
) -> str:
|
|
22
33
|
return (
|
|
23
34
|
image_model.model_name
|
|
24
35
|
if controlnet is None
|
|
25
36
|
else f"{image_model.model_name}--{controlnet.model_name}"
|
|
26
37
|
)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def handle_image_result(response_format: str, images) -> ImageList:
|
|
41
|
+
if response_format == "url":
|
|
42
|
+
os.makedirs(XINFERENCE_IMAGE_DIR, exist_ok=True)
|
|
43
|
+
image_list = []
|
|
44
|
+
with ThreadPoolExecutor() as executor:
|
|
45
|
+
for img in images:
|
|
46
|
+
path = os.path.join(XINFERENCE_IMAGE_DIR, uuid.uuid4().hex + ".jpg")
|
|
47
|
+
image_list.append(Image(url=path, b64_json=None))
|
|
48
|
+
executor.submit(img.save, path, "jpeg")
|
|
49
|
+
return ImageList(created=int(time.time()), data=image_list)
|
|
50
|
+
elif response_format == "b64_json":
|
|
51
|
+
|
|
52
|
+
def _gen_base64_image(_img):
|
|
53
|
+
buffered = BytesIO()
|
|
54
|
+
_img.save(buffered, format="jpeg")
|
|
55
|
+
return base64.b64encode(buffered.getvalue()).decode()
|
|
56
|
+
|
|
57
|
+
with ThreadPoolExecutor() as executor:
|
|
58
|
+
results = list(map(partial(executor.submit, _gen_base64_image), images)) # type: ignore
|
|
59
|
+
image_list = [Image(url=None, b64_json=s.result()) for s in results] # type: ignore
|
|
60
|
+
return ImageList(created=int(time.time()), data=image_list)
|
|
61
|
+
else:
|
|
62
|
+
raise ValueError(f"Unsupported response format: {response_format}")
|
xinference/model/llm/__init__.py
CHANGED
|
@@ -146,6 +146,7 @@ def _install():
|
|
|
146
146
|
from .transformers.internlm2 import Internlm2PytorchChatModel
|
|
147
147
|
from .transformers.minicpmv25 import MiniCPMV25Model
|
|
148
148
|
from .transformers.minicpmv26 import MiniCPMV26Model
|
|
149
|
+
from .transformers.opt import OptPytorchModel
|
|
149
150
|
from .transformers.qwen2_audio import Qwen2AudioChatModel
|
|
150
151
|
from .transformers.qwen2_vl import Qwen2VLChatModel
|
|
151
152
|
from .transformers.qwen_vl import QwenVLChatModel
|
|
@@ -190,6 +191,7 @@ def _install():
|
|
|
190
191
|
Glm4VModel,
|
|
191
192
|
DeepSeekV2PytorchModel,
|
|
192
193
|
DeepSeekV2PytorchChatModel,
|
|
194
|
+
OptPytorchModel,
|
|
193
195
|
]
|
|
194
196
|
)
|
|
195
197
|
if OmniLMMModel: # type: ignore
|
|
@@ -206,7 +206,7 @@
|
|
|
206
206
|
"none"
|
|
207
207
|
],
|
|
208
208
|
"model_id": "THUDM/glm-4-9b-chat",
|
|
209
|
-
"model_revision": "
|
|
209
|
+
"model_revision": "eb55a443d66541f30869f6caac5ad0d2e95bcbaa"
|
|
210
210
|
},
|
|
211
211
|
{
|
|
212
212
|
"model_format": "ggufv2",
|
|
@@ -7923,6 +7923,174 @@
|
|
|
7923
7923
|
"00021-of-00021"
|
|
7924
7924
|
]
|
|
7925
7925
|
}
|
|
7926
|
+
},
|
|
7927
|
+
{
|
|
7928
|
+
"model_format": "mlx",
|
|
7929
|
+
"model_size_in_billions": "0_5",
|
|
7930
|
+
"quantizations": [
|
|
7931
|
+
"4-bit"
|
|
7932
|
+
],
|
|
7933
|
+
"model_id": "mlx-community/Qwen2.5-0.5B-Instruct-4bit"
|
|
7934
|
+
},
|
|
7935
|
+
{
|
|
7936
|
+
"model_format": "mlx",
|
|
7937
|
+
"model_size_in_billions": "0_5",
|
|
7938
|
+
"quantizations": [
|
|
7939
|
+
"8-bit"
|
|
7940
|
+
],
|
|
7941
|
+
"model_id": "mlx-community/Qwen2.5-0.5B-Instruct-8bit"
|
|
7942
|
+
},
|
|
7943
|
+
{
|
|
7944
|
+
"model_format": "mlx",
|
|
7945
|
+
"model_size_in_billions": "0_5",
|
|
7946
|
+
"quantizations": [
|
|
7947
|
+
"none"
|
|
7948
|
+
],
|
|
7949
|
+
"model_id": "mlx-community/Qwen2.5-0.5B-Instruct-bf16"
|
|
7950
|
+
},
|
|
7951
|
+
{
|
|
7952
|
+
"model_format": "mlx",
|
|
7953
|
+
"model_size_in_billions": "1_5",
|
|
7954
|
+
"quantizations": [
|
|
7955
|
+
"4-bit"
|
|
7956
|
+
],
|
|
7957
|
+
"model_id": "mlx-community/Qwen2.5-1.5B-Instruct-4bit"
|
|
7958
|
+
},
|
|
7959
|
+
{
|
|
7960
|
+
"model_format": "mlx",
|
|
7961
|
+
"model_size_in_billions": "1_5",
|
|
7962
|
+
"quantizations": [
|
|
7963
|
+
"8-bit"
|
|
7964
|
+
],
|
|
7965
|
+
"model_id": "mlx-community/Qwen2.5-1.5B-Instruct-8bit"
|
|
7966
|
+
},
|
|
7967
|
+
{
|
|
7968
|
+
"model_format": "mlx",
|
|
7969
|
+
"model_size_in_billions": "1_5",
|
|
7970
|
+
"quantizations": [
|
|
7971
|
+
"none"
|
|
7972
|
+
],
|
|
7973
|
+
"model_id": "mlx-community/Qwen2.5-1.5B-Instruct-bf16"
|
|
7974
|
+
},
|
|
7975
|
+
{
|
|
7976
|
+
"model_format": "mlx",
|
|
7977
|
+
"model_size_in_billions": 3,
|
|
7978
|
+
"quantizations": [
|
|
7979
|
+
"4-bit"
|
|
7980
|
+
],
|
|
7981
|
+
"model_id": "mlx-community/Qwen2.5-3B-Instruct-4bit"
|
|
7982
|
+
},
|
|
7983
|
+
{
|
|
7984
|
+
"model_format": "mlx",
|
|
7985
|
+
"model_size_in_billions": 3,
|
|
7986
|
+
"quantizations": [
|
|
7987
|
+
"8-bit"
|
|
7988
|
+
],
|
|
7989
|
+
"model_id": "mlx-community/Qwen2.5-3B-Instruct-8bit"
|
|
7990
|
+
},
|
|
7991
|
+
{
|
|
7992
|
+
"model_format": "mlx",
|
|
7993
|
+
"model_size_in_billions": 3,
|
|
7994
|
+
"quantizations": [
|
|
7995
|
+
"none"
|
|
7996
|
+
],
|
|
7997
|
+
"model_id": "mlx-community/Qwen2.5-3B-Instruct-bf16"
|
|
7998
|
+
},
|
|
7999
|
+
{
|
|
8000
|
+
"model_format": "mlx",
|
|
8001
|
+
"model_size_in_billions": 7,
|
|
8002
|
+
"quantizations": [
|
|
8003
|
+
"4-bit"
|
|
8004
|
+
],
|
|
8005
|
+
"model_id": "mlx-community/Qwen2.5-7B-Instruct-4bit"
|
|
8006
|
+
},
|
|
8007
|
+
{
|
|
8008
|
+
"model_format": "mlx",
|
|
8009
|
+
"model_size_in_billions": 7,
|
|
8010
|
+
"quantizations": [
|
|
8011
|
+
"8-bit"
|
|
8012
|
+
],
|
|
8013
|
+
"model_id": "mlx-community/Qwen2.5-7B-Instruct-8bit"
|
|
8014
|
+
},
|
|
8015
|
+
{
|
|
8016
|
+
"model_format": "mlx",
|
|
8017
|
+
"model_size_in_billions": 7,
|
|
8018
|
+
"quantizations": [
|
|
8019
|
+
"none"
|
|
8020
|
+
],
|
|
8021
|
+
"model_id": "mlx-community/Qwen2.5-7B-Instruct-bf16"
|
|
8022
|
+
},
|
|
8023
|
+
{
|
|
8024
|
+
"model_format": "mlx",
|
|
8025
|
+
"model_size_in_billions": 14,
|
|
8026
|
+
"quantizations": [
|
|
8027
|
+
"4-bit"
|
|
8028
|
+
],
|
|
8029
|
+
"model_id": "mlx-community/Qwen2.5-14B-Instruct-4bit"
|
|
8030
|
+
},
|
|
8031
|
+
{
|
|
8032
|
+
"model_format": "mlx",
|
|
8033
|
+
"model_size_in_billions": 14,
|
|
8034
|
+
"quantizations": [
|
|
8035
|
+
"8-bit"
|
|
8036
|
+
],
|
|
8037
|
+
"model_id": "mlx-community/Qwen2.5-14B-Instruct-8bit"
|
|
8038
|
+
},
|
|
8039
|
+
{
|
|
8040
|
+
"model_format": "mlx",
|
|
8041
|
+
"model_size_in_billions": 14,
|
|
8042
|
+
"quantizations": [
|
|
8043
|
+
"none"
|
|
8044
|
+
],
|
|
8045
|
+
"model_id": "mlx-community/Qwen2.5-14B-Instruct-bf16"
|
|
8046
|
+
},
|
|
8047
|
+
{
|
|
8048
|
+
"model_format": "mlx",
|
|
8049
|
+
"model_size_in_billions": 32,
|
|
8050
|
+
"quantizations": [
|
|
8051
|
+
"4-bit"
|
|
8052
|
+
],
|
|
8053
|
+
"model_id": "mlx-community/Qwen2.5-32B-Instruct-4bit"
|
|
8054
|
+
},
|
|
8055
|
+
{
|
|
8056
|
+
"model_format": "mlx",
|
|
8057
|
+
"model_size_in_billions": 32,
|
|
8058
|
+
"quantizations": [
|
|
8059
|
+
"8-bit"
|
|
8060
|
+
],
|
|
8061
|
+
"model_id": "mlx-community/Qwen2.5-32B-Instruct-8bit"
|
|
8062
|
+
},
|
|
8063
|
+
{
|
|
8064
|
+
"model_format": "mlx",
|
|
8065
|
+
"model_size_in_billions": 32,
|
|
8066
|
+
"quantizations": [
|
|
8067
|
+
"none"
|
|
8068
|
+
],
|
|
8069
|
+
"model_id": "mlx-community/Qwen2.5-32B-Instruct-bf16"
|
|
8070
|
+
},
|
|
8071
|
+
{
|
|
8072
|
+
"model_format": "mlx",
|
|
8073
|
+
"model_size_in_billions": 72,
|
|
8074
|
+
"quantizations": [
|
|
8075
|
+
"4-bit"
|
|
8076
|
+
],
|
|
8077
|
+
"model_id": "mlx-community/Qwen2.5-72B-Instruct-4bit"
|
|
8078
|
+
},
|
|
8079
|
+
{
|
|
8080
|
+
"model_format": "mlx",
|
|
8081
|
+
"model_size_in_billions": 72,
|
|
8082
|
+
"quantizations": [
|
|
8083
|
+
"8-bit"
|
|
8084
|
+
],
|
|
8085
|
+
"model_id": "mlx-community/Qwen2.5-72B-Instruct-8bit"
|
|
8086
|
+
},
|
|
8087
|
+
{
|
|
8088
|
+
"model_format": "mlx",
|
|
8089
|
+
"model_size_in_billions": 72,
|
|
8090
|
+
"quantizations": [
|
|
8091
|
+
"none"
|
|
8092
|
+
],
|
|
8093
|
+
"model_id": "mlx-community/Qwen2.5-72B-Instruct-bf16"
|
|
7926
8094
|
}
|
|
7927
8095
|
],
|
|
7928
8096
|
"chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
|
|
@@ -8008,6 +8176,15 @@
|
|
|
8008
8176
|
],
|
|
8009
8177
|
"model_id": "Qwen/Qwen2.5-Coder-7B-Instruct"
|
|
8010
8178
|
},
|
|
8179
|
+
{
|
|
8180
|
+
"model_format": "gptq",
|
|
8181
|
+
"model_size_in_billions": "7",
|
|
8182
|
+
"quantizations": [
|
|
8183
|
+
"Int4",
|
|
8184
|
+
"Int8"
|
|
8185
|
+
],
|
|
8186
|
+
"model_id": "Qwen/Qwen2.5-Coder-7B-Instruct-GPTQ-{quantization}"
|
|
8187
|
+
},
|
|
8011
8188
|
{
|
|
8012
8189
|
"model_format": "ggufv2",
|
|
8013
8190
|
"model_size_in_billions": "1_5",
|
|
@@ -5681,6 +5681,114 @@
|
|
|
5681
5681
|
"00021-of-00021"
|
|
5682
5682
|
]
|
|
5683
5683
|
}
|
|
5684
|
+
},
|
|
5685
|
+
{
|
|
5686
|
+
"model_format": "mlx",
|
|
5687
|
+
"model_size_in_billions": 3,
|
|
5688
|
+
"quantizations": [
|
|
5689
|
+
"4-bit"
|
|
5690
|
+
],
|
|
5691
|
+
"model_id": "okwinds/Qwen2.5-3B-Instruct-MLX-4bit",
|
|
5692
|
+
"model_hub": "modelscope"
|
|
5693
|
+
},
|
|
5694
|
+
{
|
|
5695
|
+
"model_format": "mlx",
|
|
5696
|
+
"model_size_in_billions": 3,
|
|
5697
|
+
"quantizations": [
|
|
5698
|
+
"8-bit"
|
|
5699
|
+
],
|
|
5700
|
+
"model_id": "okwinds/Qwen2.5-3B-Instruct-MLX-8bit",
|
|
5701
|
+
"model_hub": "modelscope"
|
|
5702
|
+
},
|
|
5703
|
+
{
|
|
5704
|
+
"model_format": "mlx",
|
|
5705
|
+
"model_size_in_billions": 7,
|
|
5706
|
+
"quantizations": [
|
|
5707
|
+
"4-bit"
|
|
5708
|
+
],
|
|
5709
|
+
"model_id": "okwinds/Qwen2.5-7B-Instruct-MLX-4bit",
|
|
5710
|
+
"model_hub": "modelscope"
|
|
5711
|
+
},
|
|
5712
|
+
{
|
|
5713
|
+
"model_format": "mlx",
|
|
5714
|
+
"model_size_in_billions": 7,
|
|
5715
|
+
"quantizations": [
|
|
5716
|
+
"8-bit"
|
|
5717
|
+
],
|
|
5718
|
+
"model_id": "okwinds/Qwen2.5-7B-Instruct-MLX-8bit",
|
|
5719
|
+
"model_hub": "modelscope"
|
|
5720
|
+
},
|
|
5721
|
+
{
|
|
5722
|
+
"model_format": "mlx",
|
|
5723
|
+
"model_size_in_billions": 14,
|
|
5724
|
+
"quantizations": [
|
|
5725
|
+
"4-bit"
|
|
5726
|
+
],
|
|
5727
|
+
"model_id": "okwinds/Qwen2.5-14B-Instruct-MLX-4bit",
|
|
5728
|
+
"model_hub": "modelscope"
|
|
5729
|
+
},
|
|
5730
|
+
{
|
|
5731
|
+
"model_format": "mlx",
|
|
5732
|
+
"model_size_in_billions": 14,
|
|
5733
|
+
"quantizations": [
|
|
5734
|
+
"8-bit"
|
|
5735
|
+
],
|
|
5736
|
+
"model_id": "okwinds/Qwen2.5-14B-Instruct-MLX-8bit",
|
|
5737
|
+
"model_hub": "modelscope"
|
|
5738
|
+
},
|
|
5739
|
+
{
|
|
5740
|
+
"model_format": "mlx",
|
|
5741
|
+
"model_size_in_billions": 32,
|
|
5742
|
+
"quantizations": [
|
|
5743
|
+
"2-bit"
|
|
5744
|
+
],
|
|
5745
|
+
"model_id": "okwinds/Qwen2.5-32B-Instruct-MLX-2bit",
|
|
5746
|
+
"model_hub": "modelscope"
|
|
5747
|
+
},
|
|
5748
|
+
{
|
|
5749
|
+
"model_format": "mlx",
|
|
5750
|
+
"model_size_in_billions": 32,
|
|
5751
|
+
"quantizations": [
|
|
5752
|
+
"4-bit"
|
|
5753
|
+
],
|
|
5754
|
+
"model_id": "okwinds/Qwen2.5-32B-Instruct-MLX-4bit",
|
|
5755
|
+
"model_hub": "modelscope"
|
|
5756
|
+
},
|
|
5757
|
+
{
|
|
5758
|
+
"model_format": "mlx",
|
|
5759
|
+
"model_size_in_billions": 32,
|
|
5760
|
+
"quantizations": [
|
|
5761
|
+
"8-bit"
|
|
5762
|
+
],
|
|
5763
|
+
"model_id": "okwinds/Qwen2.5-32B-Instruct-MLX-8bit",
|
|
5764
|
+
"model_hub": "modelscope"
|
|
5765
|
+
},
|
|
5766
|
+
{
|
|
5767
|
+
"model_format": "mlx",
|
|
5768
|
+
"model_size_in_billions": 72,
|
|
5769
|
+
"quantizations": [
|
|
5770
|
+
"2-bit"
|
|
5771
|
+
],
|
|
5772
|
+
"model_id": "okwinds/Qwen2.5-32B-Instruct-MLX-2bit",
|
|
5773
|
+
"model_hub": "modelscope"
|
|
5774
|
+
},
|
|
5775
|
+
{
|
|
5776
|
+
"model_format": "mlx",
|
|
5777
|
+
"model_size_in_billions": 72,
|
|
5778
|
+
"quantizations": [
|
|
5779
|
+
"4-bit"
|
|
5780
|
+
],
|
|
5781
|
+
"model_id": "okwinds/Qwen2.5-72B-Instruct-MLX-4bit",
|
|
5782
|
+
"model_hub": "modelscope"
|
|
5783
|
+
},
|
|
5784
|
+
{
|
|
5785
|
+
"model_format": "mlx",
|
|
5786
|
+
"model_size_in_billions": 72,
|
|
5787
|
+
"quantizations": [
|
|
5788
|
+
"8-bit"
|
|
5789
|
+
],
|
|
5790
|
+
"model_id": "okwinds/Qwen2.5-72B-Instruct-MLX-8bit",
|
|
5791
|
+
"model_hub": "modelscope"
|
|
5684
5792
|
}
|
|
5685
5793
|
],
|
|
5686
5794
|
"chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
|
|
@@ -5772,6 +5880,17 @@
|
|
|
5772
5880
|
"model_revision": "master",
|
|
5773
5881
|
"model_hub": "modelscope"
|
|
5774
5882
|
},
|
|
5883
|
+
{
|
|
5884
|
+
"model_format": "gptq",
|
|
5885
|
+
"model_size_in_billions": 7,
|
|
5886
|
+
"quantizations": [
|
|
5887
|
+
"Int4",
|
|
5888
|
+
"Int8"
|
|
5889
|
+
],
|
|
5890
|
+
"model_id": "qwen/Qwen2.5-Coder-7B-Instruct-GPTQ-{quantization}",
|
|
5891
|
+
"model_revision": "master",
|
|
5892
|
+
"model_hub": "modelscope"
|
|
5893
|
+
},
|
|
5775
5894
|
{
|
|
5776
5895
|
"model_format": "ggufv2",
|
|
5777
5896
|
"model_size_in_billions": "1_5",
|