xinference 0.16.2__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +62 -11
- xinference/client/restful/restful_client.py +8 -2
- xinference/conftest.py +0 -8
- xinference/constants.py +2 -0
- xinference/core/model.py +44 -5
- xinference/core/supervisor.py +13 -7
- xinference/core/utils.py +76 -12
- xinference/core/worker.py +5 -4
- xinference/deploy/cmdline.py +5 -0
- xinference/deploy/utils.py +7 -4
- xinference/model/audio/model_spec.json +2 -2
- xinference/model/image/stable_diffusion/core.py +5 -2
- xinference/model/llm/core.py +1 -3
- xinference/model/llm/llm_family.json +263 -4
- xinference/model/llm/llm_family_modelscope.json +302 -0
- xinference/model/llm/mlx/core.py +45 -2
- xinference/model/llm/vllm/core.py +2 -1
- xinference/model/rerank/core.py +11 -4
- xinference/thirdparty/fish_speech/fish_speech/conversation.py +254 -0
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/en_US.json +2 -1
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/es_ES.json +2 -1
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/ja_JP.json +2 -2
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/ko_KR.json +123 -0
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/zh_CN.json +2 -1
- xinference/thirdparty/fish_speech/fish_speech/models/text2semantic/llama.py +76 -11
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/firefly.py +9 -9
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/fsq.py +1 -1
- xinference/thirdparty/fish_speech/fish_speech/text/clean.py +32 -1
- xinference/thirdparty/fish_speech/fish_speech/utils/__init__.py +2 -1
- xinference/thirdparty/fish_speech/fish_speech/utils/utils.py +22 -0
- xinference/thirdparty/fish_speech/fish_speech/webui/launch_utils.py +1 -1
- xinference/thirdparty/fish_speech/fish_speech/webui/manage.py +1 -1
- xinference/thirdparty/fish_speech/tools/api.py +578 -75
- xinference/thirdparty/fish_speech/tools/e2e_webui.py +232 -0
- xinference/thirdparty/fish_speech/tools/fish_e2e.py +298 -0
- xinference/thirdparty/fish_speech/tools/llama/generate.py +393 -9
- xinference/thirdparty/fish_speech/tools/msgpack_api.py +90 -29
- xinference/thirdparty/fish_speech/tools/post_api.py +37 -15
- xinference/thirdparty/fish_speech/tools/schema.py +187 -0
- xinference/thirdparty/fish_speech/tools/vqgan/extract_vq.py +7 -1
- xinference/thirdparty/fish_speech/tools/vqgan/inference.py +2 -3
- xinference/thirdparty/fish_speech/tools/webui.py +138 -75
- {xinference-0.16.2.dist-info → xinference-1.0.0.dist-info}/METADATA +26 -3
- {xinference-0.16.2.dist-info → xinference-1.0.0.dist-info}/RECORD +49 -56
- {xinference-0.16.2.dist-info → xinference-1.0.0.dist-info}/WHEEL +1 -1
- xinference/thirdparty/fish_speech/fish_speech/configs/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/configs/lora/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/datasets/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/datasets/protos/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/models/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/webui/__init__.py +0 -0
- xinference/thirdparty/fish_speech/tools/commons.py +0 -35
- xinference/thirdparty/fish_speech/tools/llama/__init__.py +0 -0
- xinference/thirdparty/fish_speech/tools/vqgan/__init__.py +0 -0
- {xinference-0.16.2.dist-info → xinference-1.0.0.dist-info}/LICENSE +0 -0
- {xinference-0.16.2.dist-info → xinference-1.0.0.dist-info}/entry_points.txt +0 -0
- {xinference-0.16.2.dist-info → xinference-1.0.0.dist-info}/top_level.txt +0 -0
xinference/deploy/cmdline.py
CHANGED
|
@@ -43,6 +43,7 @@ from .utils import (
|
|
|
43
43
|
get_log_file,
|
|
44
44
|
get_timestamp_ms,
|
|
45
45
|
handle_click_args_type,
|
|
46
|
+
set_envs,
|
|
46
47
|
)
|
|
47
48
|
|
|
48
49
|
try:
|
|
@@ -106,6 +107,8 @@ def start_local_cluster(
|
|
|
106
107
|
XINFERENCE_LOG_MAX_BYTES,
|
|
107
108
|
)
|
|
108
109
|
logging.config.dictConfig(dict_config) # type: ignore
|
|
110
|
+
# refer to https://huggingface.co/docs/transformers/main_classes/logging
|
|
111
|
+
set_envs("TRANSFORMERS_VERBOSITY", log_level.lower())
|
|
109
112
|
|
|
110
113
|
main(
|
|
111
114
|
host=host,
|
|
@@ -280,6 +283,7 @@ def supervisor(
|
|
|
280
283
|
XINFERENCE_LOG_MAX_BYTES,
|
|
281
284
|
)
|
|
282
285
|
logging.config.dictConfig(dict_config) # type: ignore
|
|
286
|
+
set_envs("TRANSFORMERS_VERBOSITY", log_level.lower())
|
|
283
287
|
|
|
284
288
|
main(
|
|
285
289
|
host=host,
|
|
@@ -342,6 +346,7 @@ def worker(
|
|
|
342
346
|
XINFERENCE_LOG_MAX_BYTES,
|
|
343
347
|
)
|
|
344
348
|
logging.config.dictConfig(dict_config) # type: ignore
|
|
349
|
+
set_envs("TRANSFORMERS_VERBOSITY", log_level.lower())
|
|
345
350
|
|
|
346
351
|
endpoint = get_endpoint(endpoint)
|
|
347
352
|
|
xinference/deploy/utils.py
CHANGED
|
@@ -134,10 +134,6 @@ def get_config_dict(
|
|
|
134
134
|
"propagate": False,
|
|
135
135
|
},
|
|
136
136
|
},
|
|
137
|
-
"root": {
|
|
138
|
-
"level": "WARN",
|
|
139
|
-
"handlers": ["stream_handler", "file_handler"],
|
|
140
|
-
},
|
|
141
137
|
}
|
|
142
138
|
return config_dict
|
|
143
139
|
|
|
@@ -220,3 +216,10 @@ def handle_click_args_type(arg: str) -> Any:
|
|
|
220
216
|
pass
|
|
221
217
|
|
|
222
218
|
return arg
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
def set_envs(key: str, value: str):
|
|
222
|
+
"""
|
|
223
|
+
Environment variables are set by the parent process and inherited by child processes
|
|
224
|
+
"""
|
|
225
|
+
os.environ[key] = value
|
|
@@ -127,7 +127,7 @@
|
|
|
127
127
|
"model_name": "ChatTTS",
|
|
128
128
|
"model_family": "ChatTTS",
|
|
129
129
|
"model_id": "2Noise/ChatTTS",
|
|
130
|
-
"model_revision": "
|
|
130
|
+
"model_revision": "1a3c04a8b0651689bd9242fbb55b1f4b5a9aef84",
|
|
131
131
|
"model_ability": "text-to-audio",
|
|
132
132
|
"multilingual": true
|
|
133
133
|
},
|
|
@@ -159,7 +159,7 @@
|
|
|
159
159
|
"model_name": "FishSpeech-1.4",
|
|
160
160
|
"model_family": "FishAudio",
|
|
161
161
|
"model_id": "fishaudio/fish-speech-1.4",
|
|
162
|
-
"model_revision": "
|
|
162
|
+
"model_revision": "069c573759936b35191d3380deb89183c0656f59",
|
|
163
163
|
"model_ability": "text-to-audio",
|
|
164
164
|
"multilingual": true
|
|
165
165
|
}
|
|
@@ -17,9 +17,11 @@ import gc
|
|
|
17
17
|
import inspect
|
|
18
18
|
import itertools
|
|
19
19
|
import logging
|
|
20
|
+
import os
|
|
20
21
|
import re
|
|
21
22
|
import sys
|
|
22
23
|
import warnings
|
|
24
|
+
from glob import glob
|
|
23
25
|
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
|
|
24
26
|
|
|
25
27
|
import PIL.Image
|
|
@@ -194,8 +196,9 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
|
|
|
194
196
|
if sys.platform != "darwin" and torch_dtype is None:
|
|
195
197
|
# The following params crashes on Mac M2
|
|
196
198
|
self._torch_dtype = self._kwargs["torch_dtype"] = torch.float16
|
|
197
|
-
self._kwargs["
|
|
198
|
-
|
|
199
|
+
self._kwargs["use_safetensors"] = any(
|
|
200
|
+
glob(os.path.join(self._model_path, "*/*.safetensors"))
|
|
201
|
+
)
|
|
199
202
|
if isinstance(torch_dtype, str):
|
|
200
203
|
self._kwargs["torch_dtype"] = getattr(torch, torch_dtype)
|
|
201
204
|
|
xinference/model/llm/core.py
CHANGED
|
@@ -52,9 +52,7 @@ class LLM(abc.ABC):
|
|
|
52
52
|
*args,
|
|
53
53
|
**kwargs,
|
|
54
54
|
):
|
|
55
|
-
self.model_uid, self.
|
|
56
|
-
replica_model_uid
|
|
57
|
-
)
|
|
55
|
+
self.model_uid, self.rep_id = parse_replica_model_uid(replica_model_uid)
|
|
58
56
|
self.model_family = model_family
|
|
59
57
|
self.model_spec = model_spec
|
|
60
58
|
self.quantization = quantization
|
|
@@ -1312,6 +1312,93 @@
|
|
|
1312
1312
|
"<|eom_id|>"
|
|
1313
1313
|
]
|
|
1314
1314
|
},
|
|
1315
|
+
{
|
|
1316
|
+
"version": 1,
|
|
1317
|
+
"context_length": 131072,
|
|
1318
|
+
"model_name": "llama-3.2-vision-instruct",
|
|
1319
|
+
"model_lang": [
|
|
1320
|
+
"en",
|
|
1321
|
+
"de",
|
|
1322
|
+
"fr",
|
|
1323
|
+
"it",
|
|
1324
|
+
"pt",
|
|
1325
|
+
"hi",
|
|
1326
|
+
"es",
|
|
1327
|
+
"th"
|
|
1328
|
+
],
|
|
1329
|
+
"model_ability": [
|
|
1330
|
+
"chat",
|
|
1331
|
+
"vision"
|
|
1332
|
+
],
|
|
1333
|
+
"model_description": "Llama 3.2-Vision instruction-tuned models are optimized for visual recognition, image reasoning, captioning, and answering general questions about an image...",
|
|
1334
|
+
"model_specs": [
|
|
1335
|
+
{
|
|
1336
|
+
"model_format": "pytorch",
|
|
1337
|
+
"model_size_in_billions": 11,
|
|
1338
|
+
"quantizations": [
|
|
1339
|
+
"none"
|
|
1340
|
+
],
|
|
1341
|
+
"model_id": "meta-llama/Llama-3.2-11B-Vision-Instruct"
|
|
1342
|
+
},
|
|
1343
|
+
{
|
|
1344
|
+
"model_format": "pytorch",
|
|
1345
|
+
"model_size_in_billions": 90,
|
|
1346
|
+
"quantizations": [
|
|
1347
|
+
"none"
|
|
1348
|
+
],
|
|
1349
|
+
"model_id": "meta-llama/Llama-3.2-90B-Vision-Instruct"
|
|
1350
|
+
}
|
|
1351
|
+
],
|
|
1352
|
+
"chat_template": "{% for message in messages %}{% if loop.index0 == 0 %}{{ bos_token }}{% endif %}{{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n' }}{% if message['content'] is string %}{{ message['content'] }}{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' %}{{ '<|image|>' }}{% elif content['type'] == 'text' %}{{ content['text'] }}{% endif %}{% endfor %}{% endif %}{{ '<|eot_id|>' }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
|
|
1353
|
+
"stop_token_ids": [
|
|
1354
|
+
128001,
|
|
1355
|
+
128008,
|
|
1356
|
+
128009
|
|
1357
|
+
],
|
|
1358
|
+
"stop": [
|
|
1359
|
+
"<|end_of_text|>",
|
|
1360
|
+
"<|eot_id|>",
|
|
1361
|
+
"<|eom_id|>"
|
|
1362
|
+
]
|
|
1363
|
+
},
|
|
1364
|
+
{
|
|
1365
|
+
"version": 1,
|
|
1366
|
+
"context_length": 131072,
|
|
1367
|
+
"model_name": "llama-3.2-vision",
|
|
1368
|
+
"model_lang": [
|
|
1369
|
+
"en",
|
|
1370
|
+
"de",
|
|
1371
|
+
"fr",
|
|
1372
|
+
"it",
|
|
1373
|
+
"pt",
|
|
1374
|
+
"hi",
|
|
1375
|
+
"es",
|
|
1376
|
+
"th"
|
|
1377
|
+
],
|
|
1378
|
+
"model_ability": [
|
|
1379
|
+
"generate",
|
|
1380
|
+
"vision"
|
|
1381
|
+
],
|
|
1382
|
+
"model_description": "The Llama 3.2-Vision instruction-tuned models are optimized for visual recognition, image reasoning, captioning, and answering general questions about an image...",
|
|
1383
|
+
"model_specs": [
|
|
1384
|
+
{
|
|
1385
|
+
"model_format": "pytorch",
|
|
1386
|
+
"model_size_in_billions": 11,
|
|
1387
|
+
"quantizations": [
|
|
1388
|
+
"none"
|
|
1389
|
+
],
|
|
1390
|
+
"model_id": "meta-llama/Meta-Llama-3.2-11B-Vision"
|
|
1391
|
+
},
|
|
1392
|
+
{
|
|
1393
|
+
"model_format": "pytorch",
|
|
1394
|
+
"model_size_in_billions": 90,
|
|
1395
|
+
"quantizations": [
|
|
1396
|
+
"none"
|
|
1397
|
+
],
|
|
1398
|
+
"model_id": "meta-llama/Meta-Llama-3.2-90B-Vision"
|
|
1399
|
+
}
|
|
1400
|
+
]
|
|
1401
|
+
},
|
|
1315
1402
|
{
|
|
1316
1403
|
"version": 1,
|
|
1317
1404
|
"context_length": 2048,
|
|
@@ -8118,6 +8205,16 @@
|
|
|
8118
8205
|
],
|
|
8119
8206
|
"model_description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models (formerly known as CodeQwen).",
|
|
8120
8207
|
"model_specs": [
|
|
8208
|
+
{
|
|
8209
|
+
"model_format": "pytorch",
|
|
8210
|
+
"model_size_in_billions": "0_5",
|
|
8211
|
+
"quantizations": [
|
|
8212
|
+
"4-bit",
|
|
8213
|
+
"8-bit",
|
|
8214
|
+
"none"
|
|
8215
|
+
],
|
|
8216
|
+
"model_id": "Qwen/Qwen2.5-Coder-0.5B"
|
|
8217
|
+
},
|
|
8121
8218
|
{
|
|
8122
8219
|
"model_format": "pytorch",
|
|
8123
8220
|
"model_size_in_billions": "1_5",
|
|
@@ -8126,8 +8223,17 @@
|
|
|
8126
8223
|
"8-bit",
|
|
8127
8224
|
"none"
|
|
8128
8225
|
],
|
|
8129
|
-
"model_id": "Qwen/Qwen2.5-Coder-1.5B"
|
|
8130
|
-
|
|
8226
|
+
"model_id": "Qwen/Qwen2.5-Coder-1.5B"
|
|
8227
|
+
},
|
|
8228
|
+
{
|
|
8229
|
+
"model_format": "pytorch",
|
|
8230
|
+
"model_size_in_billions": "3",
|
|
8231
|
+
"quantizations": [
|
|
8232
|
+
"4-bit",
|
|
8233
|
+
"8-bit",
|
|
8234
|
+
"none"
|
|
8235
|
+
],
|
|
8236
|
+
"model_id": "Qwen/Qwen2.5-Coder-3B"
|
|
8131
8237
|
},
|
|
8132
8238
|
{
|
|
8133
8239
|
"model_format": "pytorch",
|
|
@@ -8137,8 +8243,27 @@
|
|
|
8137
8243
|
"8-bit",
|
|
8138
8244
|
"none"
|
|
8139
8245
|
],
|
|
8140
|
-
"model_id": "Qwen/Qwen2.5-Coder-7B"
|
|
8141
|
-
|
|
8246
|
+
"model_id": "Qwen/Qwen2.5-Coder-7B"
|
|
8247
|
+
},
|
|
8248
|
+
{
|
|
8249
|
+
"model_format": "pytorch",
|
|
8250
|
+
"model_size_in_billions": 14,
|
|
8251
|
+
"quantizations": [
|
|
8252
|
+
"4-bit",
|
|
8253
|
+
"8-bit",
|
|
8254
|
+
"none"
|
|
8255
|
+
],
|
|
8256
|
+
"model_id": "Qwen/Qwen2.5-Coder-14B"
|
|
8257
|
+
},
|
|
8258
|
+
{
|
|
8259
|
+
"model_format": "pytorch",
|
|
8260
|
+
"model_size_in_billions": 32,
|
|
8261
|
+
"quantizations": [
|
|
8262
|
+
"4-bit",
|
|
8263
|
+
"8-bit",
|
|
8264
|
+
"none"
|
|
8265
|
+
],
|
|
8266
|
+
"model_id": "Qwen/Qwen2.5-Coder-32B"
|
|
8142
8267
|
}
|
|
8143
8268
|
]
|
|
8144
8269
|
},
|
|
@@ -8156,6 +8281,16 @@
|
|
|
8156
8281
|
],
|
|
8157
8282
|
"model_description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models (formerly known as CodeQwen).",
|
|
8158
8283
|
"model_specs": [
|
|
8284
|
+
{
|
|
8285
|
+
"model_format": "pytorch",
|
|
8286
|
+
"model_size_in_billions": "0_5",
|
|
8287
|
+
"quantizations": [
|
|
8288
|
+
"4-bit",
|
|
8289
|
+
"8-bit",
|
|
8290
|
+
"none"
|
|
8291
|
+
],
|
|
8292
|
+
"model_id": "Qwen/Qwen2.5-Coder-0.5B-Instruct"
|
|
8293
|
+
},
|
|
8159
8294
|
{
|
|
8160
8295
|
"model_format": "pytorch",
|
|
8161
8296
|
"model_size_in_billions": "1_5",
|
|
@@ -8166,6 +8301,16 @@
|
|
|
8166
8301
|
],
|
|
8167
8302
|
"model_id": "Qwen/Qwen2.5-Coder-1.5B-Instruct"
|
|
8168
8303
|
},
|
|
8304
|
+
{
|
|
8305
|
+
"model_format": "pytorch",
|
|
8306
|
+
"model_size_in_billions": "3",
|
|
8307
|
+
"quantizations": [
|
|
8308
|
+
"4-bit",
|
|
8309
|
+
"8-bit",
|
|
8310
|
+
"none"
|
|
8311
|
+
],
|
|
8312
|
+
"model_id": "Qwen/Qwen2.5-Coder-3B-Instruct"
|
|
8313
|
+
},
|
|
8169
8314
|
{
|
|
8170
8315
|
"model_format": "pytorch",
|
|
8171
8316
|
"model_size_in_billions": 7,
|
|
@@ -8176,6 +8321,53 @@
|
|
|
8176
8321
|
],
|
|
8177
8322
|
"model_id": "Qwen/Qwen2.5-Coder-7B-Instruct"
|
|
8178
8323
|
},
|
|
8324
|
+
{
|
|
8325
|
+
"model_format": "pytorch",
|
|
8326
|
+
"model_size_in_billions": 14,
|
|
8327
|
+
"quantizations": [
|
|
8328
|
+
"4-bit",
|
|
8329
|
+
"8-bit",
|
|
8330
|
+
"none"
|
|
8331
|
+
],
|
|
8332
|
+
"model_id": "Qwen/Qwen2.5-Coder-14B-Instruct"
|
|
8333
|
+
},
|
|
8334
|
+
{
|
|
8335
|
+
"model_format": "pytorch",
|
|
8336
|
+
"model_size_in_billions": 32,
|
|
8337
|
+
"quantizations": [
|
|
8338
|
+
"4-bit",
|
|
8339
|
+
"8-bit",
|
|
8340
|
+
"none"
|
|
8341
|
+
],
|
|
8342
|
+
"model_id": "Qwen/Qwen2.5-Coder-32B-Instruct"
|
|
8343
|
+
},
|
|
8344
|
+
{
|
|
8345
|
+
"model_format": "gptq",
|
|
8346
|
+
"model_size_in_billions": "0_5",
|
|
8347
|
+
"quantizations": [
|
|
8348
|
+
"Int4",
|
|
8349
|
+
"Int8"
|
|
8350
|
+
],
|
|
8351
|
+
"model_id": "Qwen/Qwen2.5-Coder-0.5B-Instruct-GPTQ-{quantization}"
|
|
8352
|
+
},
|
|
8353
|
+
{
|
|
8354
|
+
"model_format": "gptq",
|
|
8355
|
+
"model_size_in_billions": "1_5",
|
|
8356
|
+
"quantizations": [
|
|
8357
|
+
"Int4",
|
|
8358
|
+
"Int8"
|
|
8359
|
+
],
|
|
8360
|
+
"model_id": "Qwen/Qwen2.5-Coder-1.5B-Instruct-GPTQ-{quantization}"
|
|
8361
|
+
},
|
|
8362
|
+
{
|
|
8363
|
+
"model_format": "gptq",
|
|
8364
|
+
"model_size_in_billions": "3",
|
|
8365
|
+
"quantizations": [
|
|
8366
|
+
"Int4",
|
|
8367
|
+
"Int8"
|
|
8368
|
+
],
|
|
8369
|
+
"model_id": "Qwen/Qwen2.5-Coder-3B-Instruct-GPTQ-{quantization}"
|
|
8370
|
+
},
|
|
8179
8371
|
{
|
|
8180
8372
|
"model_format": "gptq",
|
|
8181
8373
|
"model_size_in_billions": "7",
|
|
@@ -8185,6 +8377,73 @@
|
|
|
8185
8377
|
],
|
|
8186
8378
|
"model_id": "Qwen/Qwen2.5-Coder-7B-Instruct-GPTQ-{quantization}"
|
|
8187
8379
|
},
|
|
8380
|
+
{
|
|
8381
|
+
"model_format": "gptq",
|
|
8382
|
+
"model_size_in_billions": "14",
|
|
8383
|
+
"quantizations": [
|
|
8384
|
+
"Int4",
|
|
8385
|
+
"Int8"
|
|
8386
|
+
],
|
|
8387
|
+
"model_id": "Qwen/Qwen2.5-Coder-14B-Instruct-GPTQ-{quantization}"
|
|
8388
|
+
},
|
|
8389
|
+
{
|
|
8390
|
+
"model_format": "gptq",
|
|
8391
|
+
"model_size_in_billions": "32",
|
|
8392
|
+
"quantizations": [
|
|
8393
|
+
"Int4",
|
|
8394
|
+
"Int8"
|
|
8395
|
+
],
|
|
8396
|
+
"model_id": "Qwen/Qwen2.5-Coder-32B-Instruct-GPTQ-{quantization}"
|
|
8397
|
+
},
|
|
8398
|
+
{
|
|
8399
|
+
"model_format": "awq",
|
|
8400
|
+
"model_size_in_billions": "0_5",
|
|
8401
|
+
"quantizations": [
|
|
8402
|
+
"Int4"
|
|
8403
|
+
],
|
|
8404
|
+
"model_id": "Qwen/Qwen2.5-Coder-0.5B-Instruct-AWQ"
|
|
8405
|
+
},
|
|
8406
|
+
{
|
|
8407
|
+
"model_format": "awq",
|
|
8408
|
+
"model_size_in_billions": "1_5",
|
|
8409
|
+
"quantizations": [
|
|
8410
|
+
"Int4"
|
|
8411
|
+
],
|
|
8412
|
+
"model_id": "Qwen/Qwen2.5-Coder-1.5B-Instruct-AWQ"
|
|
8413
|
+
},
|
|
8414
|
+
{
|
|
8415
|
+
"model_format": "awq",
|
|
8416
|
+
"model_size_in_billions": "3",
|
|
8417
|
+
"quantizations": [
|
|
8418
|
+
"Int4"
|
|
8419
|
+
],
|
|
8420
|
+
"model_id": "Qwen/Qwen2.5-Coder-3B-Instruct-AWQ"
|
|
8421
|
+
},
|
|
8422
|
+
{
|
|
8423
|
+
"model_format": "awq",
|
|
8424
|
+
"model_size_in_billions": "7",
|
|
8425
|
+
"quantizations": [
|
|
8426
|
+
"Int4"
|
|
8427
|
+
],
|
|
8428
|
+
"model_id": "Qwen/Qwen2.5-Coder-7B-Instruct-AWQ"
|
|
8429
|
+
},
|
|
8430
|
+
{
|
|
8431
|
+
"model_format": "awq",
|
|
8432
|
+
"model_size_in_billions": "14",
|
|
8433
|
+
"quantizations": [
|
|
8434
|
+
"Int4"
|
|
8435
|
+
],
|
|
8436
|
+
"model_id": "Qwen/Qwen2.5-Coder-14B-Instruct-AWQ"
|
|
8437
|
+
},
|
|
8438
|
+
{
|
|
8439
|
+
"model_format": "awq",
|
|
8440
|
+
"model_size_in_billions": "32",
|
|
8441
|
+
"quantizations": [
|
|
8442
|
+
"Int4"
|
|
8443
|
+
],
|
|
8444
|
+
"model_id": "Qwen/Qwen2.5-Coder-32B-Instruct-AWQ"
|
|
8445
|
+
},
|
|
8446
|
+
|
|
8188
8447
|
{
|
|
8189
8448
|
"model_format": "ggufv2",
|
|
8190
8449
|
"model_size_in_billions": "1_5",
|