xinference 1.2.1__py3-none-any.whl → 1.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +4 -7
- xinference/client/handlers.py +3 -0
- xinference/core/scheduler.py +4 -7
- xinference/deploy/local.py +2 -1
- xinference/model/audio/core.py +5 -0
- xinference/model/audio/kokoro.py +139 -0
- xinference/model/audio/model_spec.json +8 -0
- xinference/model/audio/model_spec_modelscope.json +9 -0
- xinference/model/llm/llama_cpp/core.py +21 -14
- xinference/model/llm/llm_family.json +306 -1
- xinference/model/llm/llm_family.py +4 -1
- xinference/model/llm/llm_family_modelscope.json +307 -3
- xinference/model/llm/mlx/core.py +11 -3
- xinference/model/llm/transformers/core.py +9 -1
- xinference/model/llm/transformers/qwen2_audio.py +3 -1
- xinference/model/llm/transformers/qwen2_vl.py +20 -3
- xinference/model/llm/transformers/utils.py +22 -11
- xinference/model/llm/utils.py +111 -1
- xinference/model/llm/vllm/core.py +13 -2
- {xinference-1.2.1.dist-info → xinference-1.2.2.dist-info}/METADATA +9 -8
- {xinference-1.2.1.dist-info → xinference-1.2.2.dist-info}/RECORD +26 -25
- {xinference-1.2.1.dist-info → xinference-1.2.2.dist-info}/LICENSE +0 -0
- {xinference-1.2.1.dist-info → xinference-1.2.2.dist-info}/WHEEL +0 -0
- {xinference-1.2.1.dist-info → xinference-1.2.2.dist-info}/entry_points.txt +0 -0
- {xinference-1.2.1.dist-info → xinference-1.2.2.dist-info}/top_level.txt +0 -0
xinference/_version.py
CHANGED
|
@@ -8,11 +8,11 @@ import json
|
|
|
8
8
|
|
|
9
9
|
version_json = '''
|
|
10
10
|
{
|
|
11
|
-
"date": "2025-
|
|
11
|
+
"date": "2025-02-08T17:06:47+0800",
|
|
12
12
|
"dirty": false,
|
|
13
13
|
"error": null,
|
|
14
|
-
"full-revisionid": "
|
|
15
|
-
"version": "1.2.
|
|
14
|
+
"full-revisionid": "ac97a13a831de6debda52e6fdb8c1bf9366be57c",
|
|
15
|
+
"version": "1.2.2"
|
|
16
16
|
}
|
|
17
17
|
''' # END VERSION_JSON
|
|
18
18
|
|
xinference/api/restful_api.py
CHANGED
|
@@ -2000,25 +2000,22 @@ class RESTfulAPI(CancelMixin):
|
|
|
2000
2000
|
|
|
2001
2001
|
from ..model.llm.utils import (
|
|
2002
2002
|
GLM4_TOOL_CALL_FAMILY,
|
|
2003
|
-
LLAMA3_TOOL_CALL_FAMILY,
|
|
2004
2003
|
QWEN_TOOL_CALL_FAMILY,
|
|
2004
|
+
TOOL_CALL_FAMILY,
|
|
2005
2005
|
)
|
|
2006
2006
|
|
|
2007
2007
|
model_family = desc.get("model_family", "")
|
|
2008
|
-
function_call_models = (
|
|
2009
|
-
QWEN_TOOL_CALL_FAMILY + GLM4_TOOL_CALL_FAMILY + LLAMA3_TOOL_CALL_FAMILY
|
|
2010
|
-
)
|
|
2011
2008
|
|
|
2012
|
-
if model_family not in
|
|
2009
|
+
if model_family not in TOOL_CALL_FAMILY:
|
|
2013
2010
|
if body.tools:
|
|
2014
2011
|
raise HTTPException(
|
|
2015
2012
|
status_code=400,
|
|
2016
|
-
detail=f"Only {
|
|
2013
|
+
detail=f"Only {TOOL_CALL_FAMILY} support tool calls",
|
|
2017
2014
|
)
|
|
2018
2015
|
if has_tool_message:
|
|
2019
2016
|
raise HTTPException(
|
|
2020
2017
|
status_code=400,
|
|
2021
|
-
detail=f"Only {
|
|
2018
|
+
detail=f"Only {TOOL_CALL_FAMILY} support tool messages",
|
|
2022
2019
|
)
|
|
2023
2020
|
if body.tools and body.stream:
|
|
2024
2021
|
is_vllm = await model.is_vllm_backend()
|
xinference/client/handlers.py
CHANGED
xinference/core/scheduler.py
CHANGED
|
@@ -269,16 +269,13 @@ class InferenceRequest:
|
|
|
269
269
|
)
|
|
270
270
|
|
|
271
271
|
|
|
272
|
-
def _get_valid_batch_kv_cache(
|
|
273
|
-
from transformers.cache_utils import DynamicCache
|
|
274
|
-
|
|
275
|
-
cache = DynamicCache.from_legacy_cache(data)
|
|
272
|
+
def _get_valid_batch_kv_cache(cache, skipped_indexes: Set[int]):
|
|
276
273
|
batch_size = cache.key_cache[0].shape[0]
|
|
277
274
|
batch_slices = [num for num in range(batch_size) if num not in skipped_indexes]
|
|
278
275
|
for idx in range(len(cache)):
|
|
279
|
-
cache.key_cache[idx] = cache.key_cache[idx][batch_slices, ::]
|
|
280
|
-
cache.value_cache[idx] = cache.value_cache[idx][batch_slices, ::]
|
|
281
|
-
return cache
|
|
276
|
+
cache.key_cache[idx] = cache.key_cache[idx][batch_slices, ::].contiguous()
|
|
277
|
+
cache.value_cache[idx] = cache.value_cache[idx][batch_slices, ::].contiguous()
|
|
278
|
+
return cache
|
|
282
279
|
|
|
283
280
|
|
|
284
281
|
class SchedulerActor(xo.StatelessActor):
|
xinference/deploy/local.py
CHANGED
xinference/model/audio/core.py
CHANGED
|
@@ -25,6 +25,7 @@ from .f5tts import F5TTSModel
|
|
|
25
25
|
from .f5tts_mlx import F5TTSMLXModel
|
|
26
26
|
from .fish_speech import FishSpeechModel
|
|
27
27
|
from .funasr import FunASRModel
|
|
28
|
+
from .kokoro import KokoroModel
|
|
28
29
|
from .melotts import MeloTTSModel
|
|
29
30
|
from .whisper import WhisperModel
|
|
30
31
|
from .whisper_mlx import WhisperMLXModel
|
|
@@ -176,6 +177,7 @@ def create_audio_model_instance(
|
|
|
176
177
|
F5TTSModel,
|
|
177
178
|
F5TTSMLXModel,
|
|
178
179
|
MeloTTSModel,
|
|
180
|
+
KokoroModel,
|
|
179
181
|
],
|
|
180
182
|
AudioModelDescription,
|
|
181
183
|
]:
|
|
@@ -192,6 +194,7 @@ def create_audio_model_instance(
|
|
|
192
194
|
F5TTSModel,
|
|
193
195
|
F5TTSMLXModel,
|
|
194
196
|
MeloTTSModel,
|
|
197
|
+
KokoroModel,
|
|
195
198
|
]
|
|
196
199
|
if model_spec.model_family == "whisper":
|
|
197
200
|
if not model_spec.engine:
|
|
@@ -212,6 +215,8 @@ def create_audio_model_instance(
|
|
|
212
215
|
model = F5TTSMLXModel(model_uid, model_path, model_spec, **kwargs)
|
|
213
216
|
elif model_spec.model_family == "MeloTTS":
|
|
214
217
|
model = MeloTTSModel(model_uid, model_path, model_spec, **kwargs)
|
|
218
|
+
elif model_spec.model_family == "Kokoro":
|
|
219
|
+
model = KokoroModel(model_uid, model_path, model_spec, **kwargs)
|
|
215
220
|
else:
|
|
216
221
|
raise Exception(f"Unsupported audio model family: {model_spec.model_family}")
|
|
217
222
|
model_description = AudioModelDescription(
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
# Copyright 2022-2023 XProbe Inc.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
import logging
|
|
15
|
+
from io import BytesIO
|
|
16
|
+
from typing import TYPE_CHECKING, Optional
|
|
17
|
+
|
|
18
|
+
import numpy as np
|
|
19
|
+
|
|
20
|
+
from ...device_utils import get_available_device, is_device_available
|
|
21
|
+
|
|
22
|
+
if TYPE_CHECKING:
|
|
23
|
+
from .core import AudioModelFamilyV1
|
|
24
|
+
|
|
25
|
+
logger = logging.getLogger(__name__)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class KokoroModel:
|
|
29
|
+
# The available voices, should keep sync with https://huggingface.co/hexgrad/Kokoro-82M/tree/main/voices
|
|
30
|
+
VOICES = [
|
|
31
|
+
"af_alloy",
|
|
32
|
+
"af_aoede",
|
|
33
|
+
"af_bella",
|
|
34
|
+
"af_jessica",
|
|
35
|
+
"af_kore",
|
|
36
|
+
"af_nicole",
|
|
37
|
+
"af_nova",
|
|
38
|
+
"af_river",
|
|
39
|
+
"af_sarah",
|
|
40
|
+
"af_sky",
|
|
41
|
+
"am_adam",
|
|
42
|
+
"am_echo",
|
|
43
|
+
"am_eric",
|
|
44
|
+
"am_fenrir",
|
|
45
|
+
"am_liam",
|
|
46
|
+
"am_michael",
|
|
47
|
+
"am_onyx",
|
|
48
|
+
"am_puck",
|
|
49
|
+
"bf_alice",
|
|
50
|
+
"bf_emma",
|
|
51
|
+
"bf_isabella",
|
|
52
|
+
"bf_lily",
|
|
53
|
+
"bm_daniel",
|
|
54
|
+
"bm_fable",
|
|
55
|
+
"bm_george",
|
|
56
|
+
"bm_lewis",
|
|
57
|
+
]
|
|
58
|
+
|
|
59
|
+
def __init__(
|
|
60
|
+
self,
|
|
61
|
+
model_uid: str,
|
|
62
|
+
model_path: str,
|
|
63
|
+
model_spec: "AudioModelFamilyV1",
|
|
64
|
+
device: Optional[str] = None,
|
|
65
|
+
**kwargs,
|
|
66
|
+
):
|
|
67
|
+
self._model_uid = model_uid
|
|
68
|
+
self._model_path = model_path
|
|
69
|
+
self._model_spec = model_spec
|
|
70
|
+
self._device = device
|
|
71
|
+
self._model = None
|
|
72
|
+
self._kwargs = kwargs
|
|
73
|
+
|
|
74
|
+
@property
|
|
75
|
+
def model_ability(self):
|
|
76
|
+
return self._model_spec.model_ability
|
|
77
|
+
|
|
78
|
+
def load(self):
|
|
79
|
+
if self._device is None:
|
|
80
|
+
self._device = get_available_device()
|
|
81
|
+
else:
|
|
82
|
+
if not is_device_available(self._device):
|
|
83
|
+
raise ValueError(f"Device {self._device} is not available!")
|
|
84
|
+
|
|
85
|
+
import os
|
|
86
|
+
|
|
87
|
+
from kokoro import KModel, KPipeline
|
|
88
|
+
|
|
89
|
+
config_path = os.path.join(self._model_path, "config.json")
|
|
90
|
+
model_path = os.path.join(self._model_path, "kokoro-v1_0.pth")
|
|
91
|
+
# LANG_CODES = dict(
|
|
92
|
+
# a='American English',
|
|
93
|
+
# b='British English',
|
|
94
|
+
# )
|
|
95
|
+
lang_code = self._kwargs.get("lang_code", "a")
|
|
96
|
+
self._model = KPipeline(
|
|
97
|
+
lang_code=lang_code,
|
|
98
|
+
model=KModel(config=config_path, model=model_path),
|
|
99
|
+
device=self._device,
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
def speech(
|
|
103
|
+
self,
|
|
104
|
+
input: str,
|
|
105
|
+
voice: str,
|
|
106
|
+
response_format: str = "mp3",
|
|
107
|
+
speed: float = 1.0,
|
|
108
|
+
stream: bool = False,
|
|
109
|
+
**kwargs,
|
|
110
|
+
):
|
|
111
|
+
import soundfile
|
|
112
|
+
|
|
113
|
+
if stream:
|
|
114
|
+
raise Exception("Kokoro does not support stream mode.")
|
|
115
|
+
assert self._model is not None
|
|
116
|
+
if not voice:
|
|
117
|
+
voice = next(iter(self.VOICES))
|
|
118
|
+
logger.info("Auto select speaker: %s", voice)
|
|
119
|
+
elif not voice.endswith(".pt") and voice not in self.VOICES:
|
|
120
|
+
raise ValueError(
|
|
121
|
+
f"Invalid voice: {voice}, available speakers: {self.VOICES}"
|
|
122
|
+
)
|
|
123
|
+
else:
|
|
124
|
+
logger.info("Using custom voice pt: %s", voice)
|
|
125
|
+
logger.info("Speech kwargs: %s", kwargs)
|
|
126
|
+
generator = self._model(text=input, voice=voice, speed=speed, **kwargs)
|
|
127
|
+
results = list(generator)
|
|
128
|
+
audio = np.concatenate([r[2] for r in results])
|
|
129
|
+
# Save the generated audio
|
|
130
|
+
with BytesIO() as out:
|
|
131
|
+
with soundfile.SoundFile(
|
|
132
|
+
out,
|
|
133
|
+
"w",
|
|
134
|
+
24000,
|
|
135
|
+
1,
|
|
136
|
+
format=response_format.upper(),
|
|
137
|
+
) as f:
|
|
138
|
+
f.write(audio)
|
|
139
|
+
return out.getvalue()
|
|
@@ -338,5 +338,13 @@
|
|
|
338
338
|
"model_ability": "text-to-audio",
|
|
339
339
|
"multilingual": false,
|
|
340
340
|
"language": "KR"
|
|
341
|
+
},
|
|
342
|
+
{
|
|
343
|
+
"model_name": "Kokoro-82M",
|
|
344
|
+
"model_family": "Kokoro",
|
|
345
|
+
"model_id": "hexgrad/Kokoro-82M",
|
|
346
|
+
"model_revision": "7a29fcdf8e997bac6d6f5f6f0c2f0b92912f6102",
|
|
347
|
+
"model_ability": "text-to-audio",
|
|
348
|
+
"multilingual": true
|
|
341
349
|
}
|
|
342
350
|
]
|
|
@@ -100,5 +100,14 @@
|
|
|
100
100
|
"model_revision": "master",
|
|
101
101
|
"model_ability": "text-to-audio",
|
|
102
102
|
"multilingual": true
|
|
103
|
+
},
|
|
104
|
+
{
|
|
105
|
+
"model_name": "Kokoro-82M",
|
|
106
|
+
"model_family": "Kokoro",
|
|
107
|
+
"model_hub": "modelscope",
|
|
108
|
+
"model_id": "AI-ModelScope/Kokoro-82M",
|
|
109
|
+
"model_revision": "master",
|
|
110
|
+
"model_ability": "text-to-audio",
|
|
111
|
+
"multilingual": true
|
|
103
112
|
}
|
|
104
113
|
]
|
|
@@ -28,7 +28,7 @@ from ....types import (
|
|
|
28
28
|
)
|
|
29
29
|
from ..core import LLM
|
|
30
30
|
from ..llm_family import LLMFamilyV1, LLMSpecV1
|
|
31
|
-
from ..utils import QWEN_TOOL_CALL_FAMILY, ChatModelMixin
|
|
31
|
+
from ..utils import DEEPSEEK_TOOL_CALL_FAMILY, QWEN_TOOL_CALL_FAMILY, ChatModelMixin
|
|
32
32
|
|
|
33
33
|
logger = logging.getLogger(__name__)
|
|
34
34
|
|
|
@@ -123,18 +123,22 @@ class LlamaCppModel(LLM):
|
|
|
123
123
|
|
|
124
124
|
raise ImportError(f"{error_message}\n\n{''.join(installation_guide)}")
|
|
125
125
|
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
os.path.
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
126
|
+
if os.path.isfile(self.model_path):
|
|
127
|
+
# mostly passed from --model_path
|
|
128
|
+
model_path = os.path.realpath(self.model_path)
|
|
129
|
+
else:
|
|
130
|
+
# handle legacy cache.
|
|
131
|
+
model_path = os.path.realpath(
|
|
132
|
+
os.path.join(
|
|
133
|
+
self.model_path,
|
|
134
|
+
self.model_spec.model_file_name_template.format(
|
|
135
|
+
quantization=self.quantization
|
|
136
|
+
),
|
|
137
|
+
)
|
|
133
138
|
)
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
model_path = legacy_model_file_path
|
|
139
|
+
legacy_model_file_path = os.path.join(self.model_path, "model.bin")
|
|
140
|
+
if os.path.exists(legacy_model_file_path):
|
|
141
|
+
model_path = legacy_model_file_path
|
|
138
142
|
|
|
139
143
|
try:
|
|
140
144
|
self._llm = Llama(
|
|
@@ -272,8 +276,11 @@ class LlamaCppChatModel(LlamaCppModel, ChatModelMixin):
|
|
|
272
276
|
model_family = self.model_family.model_family or self.model_family.model_name
|
|
273
277
|
tools = generate_config.pop("tools", []) if generate_config else None
|
|
274
278
|
full_context_kwargs = {}
|
|
275
|
-
if tools
|
|
276
|
-
|
|
279
|
+
if tools:
|
|
280
|
+
if model_family in QWEN_TOOL_CALL_FAMILY:
|
|
281
|
+
full_context_kwargs["tools"] = tools
|
|
282
|
+
elif model_family in DEEPSEEK_TOOL_CALL_FAMILY:
|
|
283
|
+
self._tools_to_messages_for_deepseek(messages, tools)
|
|
277
284
|
assert self.model_family.chat_template is not None
|
|
278
285
|
full_prompt = self.get_full_context(
|
|
279
286
|
messages, self.model_family.chat_template, **full_context_kwargs
|
|
@@ -7125,6 +7125,91 @@
|
|
|
7125
7125
|
"<|endoftext|>"
|
|
7126
7126
|
]
|
|
7127
7127
|
},
|
|
7128
|
+
{
|
|
7129
|
+
"version":1,
|
|
7130
|
+
"context_length":128000,
|
|
7131
|
+
"model_name":"qwen2.5-vl-instruct",
|
|
7132
|
+
"model_lang":[
|
|
7133
|
+
"en",
|
|
7134
|
+
"zh"
|
|
7135
|
+
],
|
|
7136
|
+
"model_ability":[
|
|
7137
|
+
"chat",
|
|
7138
|
+
"vision"
|
|
7139
|
+
],
|
|
7140
|
+
"model_description":"Qwen2.5-VL: Qwen2.5-VL is the latest version of the vision language models in the Qwen model familities.",
|
|
7141
|
+
"model_specs":[
|
|
7142
|
+
{
|
|
7143
|
+
"model_format":"pytorch",
|
|
7144
|
+
"model_size_in_billions":3,
|
|
7145
|
+
"quantizations":[
|
|
7146
|
+
"none"
|
|
7147
|
+
],
|
|
7148
|
+
"model_id":"Qwen/Qwen2.5-VL-3B-Instruct"
|
|
7149
|
+
},
|
|
7150
|
+
{
|
|
7151
|
+
"model_format":"pytorch",
|
|
7152
|
+
"model_size_in_billions":7,
|
|
7153
|
+
"quantizations":[
|
|
7154
|
+
"none"
|
|
7155
|
+
],
|
|
7156
|
+
"model_id":"Qwen/Qwen2.5-VL-7B-Instruct"
|
|
7157
|
+
},
|
|
7158
|
+
{
|
|
7159
|
+
"model_format":"pytorch",
|
|
7160
|
+
"model_size_in_billions":72,
|
|
7161
|
+
"quantizations":[
|
|
7162
|
+
"none"
|
|
7163
|
+
],
|
|
7164
|
+
"model_id":"Qwen/Qwen2.5-VL-72B-Instruct"
|
|
7165
|
+
},
|
|
7166
|
+
{
|
|
7167
|
+
"model_format":"mlx",
|
|
7168
|
+
"model_size_in_billions":3,
|
|
7169
|
+
"quantizations":[
|
|
7170
|
+
"3bit",
|
|
7171
|
+
"4bit",
|
|
7172
|
+
"6bit",
|
|
7173
|
+
"8bit",
|
|
7174
|
+
"bf16"
|
|
7175
|
+
],
|
|
7176
|
+
"model_id":"mlx-community/Qwen2.5-VL-3B-Instruct-{quantization}"
|
|
7177
|
+
},
|
|
7178
|
+
{
|
|
7179
|
+
"model_format":"mlx",
|
|
7180
|
+
"model_size_in_billions":7,
|
|
7181
|
+
"quantizations":[
|
|
7182
|
+
"3bit",
|
|
7183
|
+
"4bit",
|
|
7184
|
+
"6bit",
|
|
7185
|
+
"8bit",
|
|
7186
|
+
"bf16"
|
|
7187
|
+
],
|
|
7188
|
+
"model_id":"mlx-community/Qwen2.5-VL-7B-Instruct-{quantization}"
|
|
7189
|
+
},
|
|
7190
|
+
{
|
|
7191
|
+
"model_format":"mlx",
|
|
7192
|
+
"model_size_in_billions":72,
|
|
7193
|
+
"quantizations":[
|
|
7194
|
+
"3bit",
|
|
7195
|
+
"4bit",
|
|
7196
|
+
"6bit",
|
|
7197
|
+
"8bit",
|
|
7198
|
+
"bf16"
|
|
7199
|
+
],
|
|
7200
|
+
"model_id":"mlx-community/Qwen2.5-VL-72B-Instruct-{quantization}"
|
|
7201
|
+
}
|
|
7202
|
+
],
|
|
7203
|
+
"chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
|
|
7204
|
+
"stop_token_ids": [
|
|
7205
|
+
151645,
|
|
7206
|
+
151643
|
|
7207
|
+
],
|
|
7208
|
+
"stop": [
|
|
7209
|
+
"<|im_end|>",
|
|
7210
|
+
"<|endoftext|>"
|
|
7211
|
+
]
|
|
7212
|
+
},
|
|
7128
7213
|
{
|
|
7129
7214
|
"version": 1,
|
|
7130
7215
|
"context_length": 32768,
|
|
@@ -7212,7 +7297,7 @@
|
|
|
7212
7297
|
"zh"
|
|
7213
7298
|
],
|
|
7214
7299
|
"model_ability":[
|
|
7215
|
-
"
|
|
7300
|
+
"generate",
|
|
7216
7301
|
"audio"
|
|
7217
7302
|
],
|
|
7218
7303
|
"model_description":"Qwen2-Audio: A large-scale audio-language model which is capable of accepting various audio signal inputs and performing audio analysis or direct textual responses with regard to speech instructions.",
|
|
@@ -8937,6 +9022,151 @@
|
|
|
8937
9022
|
"<|end▁of▁sentence|>"
|
|
8938
9023
|
]
|
|
8939
9024
|
},
|
|
9025
|
+
{
|
|
9026
|
+
"version": 1,
|
|
9027
|
+
"context_length": 131072,
|
|
9028
|
+
"model_name": "deepseek-r1-distill-llama",
|
|
9029
|
+
"model_lang": [
|
|
9030
|
+
"en",
|
|
9031
|
+
"zh"
|
|
9032
|
+
],
|
|
9033
|
+
"model_ability": [
|
|
9034
|
+
"chat"
|
|
9035
|
+
],
|
|
9036
|
+
"model_description": "deepseek-r1-distill-llama is distilled from DeepSeek-R1 based on Llama",
|
|
9037
|
+
"model_specs": [
|
|
9038
|
+
{
|
|
9039
|
+
"model_format": "pytorch",
|
|
9040
|
+
"model_size_in_billions": 8,
|
|
9041
|
+
"quantizations": [
|
|
9042
|
+
"4-bit",
|
|
9043
|
+
"8-bit",
|
|
9044
|
+
"none"
|
|
9045
|
+
],
|
|
9046
|
+
"model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
|
|
9047
|
+
},
|
|
9048
|
+
{
|
|
9049
|
+
"model_format": "awq",
|
|
9050
|
+
"model_size_in_billions": 8,
|
|
9051
|
+
"quantizations": [
|
|
9052
|
+
"Int4"
|
|
9053
|
+
],
|
|
9054
|
+
"model_id": "jakiAJK/DeepSeek-R1-Distill-Llama-8B_AWQ"
|
|
9055
|
+
},
|
|
9056
|
+
{
|
|
9057
|
+
"model_format": "gptq",
|
|
9058
|
+
"model_size_in_billions": 8,
|
|
9059
|
+
"quantizations": [
|
|
9060
|
+
"Int4"
|
|
9061
|
+
],
|
|
9062
|
+
"model_id": "jakiAJK/DeepSeek-R1-Distill-Llama-8B_GPTQ-int4"
|
|
9063
|
+
},
|
|
9064
|
+
{
|
|
9065
|
+
"model_format": "ggufv2",
|
|
9066
|
+
"model_size_in_billions": "1_5",
|
|
9067
|
+
"quantizations": [
|
|
9068
|
+
"Q2_K",
|
|
9069
|
+
"Q2_K_L",
|
|
9070
|
+
"Q3_K_M",
|
|
9071
|
+
"Q4_K_M",
|
|
9072
|
+
"Q5_K_M",
|
|
9073
|
+
"Q6_K",
|
|
9074
|
+
"Q8_0",
|
|
9075
|
+
"F16"
|
|
9076
|
+
],
|
|
9077
|
+
"model_id": "unsloth/DeepSeek-R1-Distill-Llama-8B-GGUF",
|
|
9078
|
+
"model_file_name_template": "DeepSeek-R1-Distill-Llama-8B-{quantization}.gguf"
|
|
9079
|
+
},
|
|
9080
|
+
{
|
|
9081
|
+
"model_format": "mlx",
|
|
9082
|
+
"model_size_in_billions": 8,
|
|
9083
|
+
"quantizations": [
|
|
9084
|
+
"3bit",
|
|
9085
|
+
"4bit",
|
|
9086
|
+
"6bit",
|
|
9087
|
+
"8bit",
|
|
9088
|
+
"bf16"
|
|
9089
|
+
],
|
|
9090
|
+
"model_id": "mlx-community/DeepSeek-R1-Distill-Llama-8B-{quantization}"
|
|
9091
|
+
},
|
|
9092
|
+
{
|
|
9093
|
+
"model_format": "pytorch",
|
|
9094
|
+
"model_size_in_billions": 70,
|
|
9095
|
+
"quantizations": [
|
|
9096
|
+
"4-bit",
|
|
9097
|
+
"8-bit",
|
|
9098
|
+
"none"
|
|
9099
|
+
],
|
|
9100
|
+
"model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B"
|
|
9101
|
+
},
|
|
9102
|
+
{
|
|
9103
|
+
"model_format": "awq",
|
|
9104
|
+
"model_size_in_billions": 70,
|
|
9105
|
+
"quantizations": [
|
|
9106
|
+
"Int4"
|
|
9107
|
+
],
|
|
9108
|
+
"model_id": "casperhansen/deepseek-r1-distill-llama-70b-awq"
|
|
9109
|
+
},
|
|
9110
|
+
{
|
|
9111
|
+
"model_format": "gptq",
|
|
9112
|
+
"model_size_in_billions": 70,
|
|
9113
|
+
"quantizations": [
|
|
9114
|
+
"Int4"
|
|
9115
|
+
],
|
|
9116
|
+
"model_id": "empirischtech/DeepSeek-R1-Distill-Llama-70B-gptq-4bit"
|
|
9117
|
+
},
|
|
9118
|
+
{
|
|
9119
|
+
"model_format": "ggufv2",
|
|
9120
|
+
"model_size_in_billions": 70,
|
|
9121
|
+
"quantizations": [
|
|
9122
|
+
"Q2_K",
|
|
9123
|
+
"Q2_K_L",
|
|
9124
|
+
"Q3_K_M",
|
|
9125
|
+
"Q4_K_M",
|
|
9126
|
+
"Q5_K_M",
|
|
9127
|
+
"Q6_K",
|
|
9128
|
+
"Q8_0",
|
|
9129
|
+
"F16"
|
|
9130
|
+
],
|
|
9131
|
+
"quantization_parts": {
|
|
9132
|
+
"Q6_K": [
|
|
9133
|
+
"00001-of-00002",
|
|
9134
|
+
"00002-of-00002"
|
|
9135
|
+
],
|
|
9136
|
+
"Q8_0": [
|
|
9137
|
+
"00001-of-00002",
|
|
9138
|
+
"00002-of-00002"
|
|
9139
|
+
],
|
|
9140
|
+
"F16": [
|
|
9141
|
+
"00001-of-00003",
|
|
9142
|
+
"00002-of-00003",
|
|
9143
|
+
"00003-of-00003"
|
|
9144
|
+
]
|
|
9145
|
+
},
|
|
9146
|
+
"model_id": "unsloth/DeepSeek-R1-Distill-Llama-70B-GGUF",
|
|
9147
|
+
"model_file_name_template": "DeepSeek-R1-Distill-Qwen-7B-{quantization}.gguf",
|
|
9148
|
+
"model_file_name_split_template": "DeepSeek-R1-Distill-Llama-70B-{quantization}/DeepSeek-R1-Distill-Llama-70B-{quantization}-{part}.gguf"
|
|
9149
|
+
},
|
|
9150
|
+
{
|
|
9151
|
+
"model_format": "mlx",
|
|
9152
|
+
"model_size_in_billions": 70,
|
|
9153
|
+
"quantizations": [
|
|
9154
|
+
"3bit",
|
|
9155
|
+
"4bit",
|
|
9156
|
+
"6bit",
|
|
9157
|
+
"8bit"
|
|
9158
|
+
],
|
|
9159
|
+
"model_id": "mlx-community/DeepSeek-R1-Distill-Llama-70B-{quantization}"
|
|
9160
|
+
}
|
|
9161
|
+
],
|
|
9162
|
+
"chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>'}}{% endif %}",
|
|
9163
|
+
"stop_token_ids": [
|
|
9164
|
+
151643
|
|
9165
|
+
],
|
|
9166
|
+
"stop": [
|
|
9167
|
+
"<|end▁of▁sentence|>"
|
|
9168
|
+
]
|
|
9169
|
+
},
|
|
8940
9170
|
{
|
|
8941
9171
|
"version": 1,
|
|
8942
9172
|
"context_length": 8192,
|
|
@@ -9306,5 +9536,80 @@
|
|
|
9306
9536
|
"<|user|>",
|
|
9307
9537
|
"<|observation|>"
|
|
9308
9538
|
]
|
|
9539
|
+
},
|
|
9540
|
+
{
|
|
9541
|
+
"version": 1,
|
|
9542
|
+
"context_length": 32768,
|
|
9543
|
+
"model_name": "internlm3-instruct",
|
|
9544
|
+
"model_lang": [
|
|
9545
|
+
"en",
|
|
9546
|
+
"zh"
|
|
9547
|
+
],
|
|
9548
|
+
"model_ability": [
|
|
9549
|
+
"chat",
|
|
9550
|
+
"tools"
|
|
9551
|
+
],
|
|
9552
|
+
"model_description": "InternLM3 has open-sourced an 8-billion parameter instruction model, InternLM3-8B-Instruct, designed for general-purpose usage and advanced reasoning.",
|
|
9553
|
+
"model_specs": [
|
|
9554
|
+
{
|
|
9555
|
+
"model_format": "pytorch",
|
|
9556
|
+
"model_size_in_billions": 8,
|
|
9557
|
+
"quantizations": [
|
|
9558
|
+
"4-bit",
|
|
9559
|
+
"8-bit",
|
|
9560
|
+
"none"
|
|
9561
|
+
],
|
|
9562
|
+
"model_id": "internlm/internlm3-8b-instruct"
|
|
9563
|
+
},
|
|
9564
|
+
{
|
|
9565
|
+
"model_format": "gptq",
|
|
9566
|
+
"model_size_in_billions": 8,
|
|
9567
|
+
"quantizations": [
|
|
9568
|
+
"Int4"
|
|
9569
|
+
],
|
|
9570
|
+
"model_id": "internlm/internlm3-8b-instruct-gptq-int4"
|
|
9571
|
+
},
|
|
9572
|
+
{
|
|
9573
|
+
"model_format": "awq",
|
|
9574
|
+
"model_size_in_billions": 8,
|
|
9575
|
+
"quantizations": [
|
|
9576
|
+
"Int4"
|
|
9577
|
+
],
|
|
9578
|
+
"model_id": "internlm/internlm3-8b-instruct-awq"
|
|
9579
|
+
},
|
|
9580
|
+
{
|
|
9581
|
+
"model_format": "ggufv2",
|
|
9582
|
+
"model_size_in_billions": 8,
|
|
9583
|
+
"quantizations": [
|
|
9584
|
+
"q2_k",
|
|
9585
|
+
"q3_k_m",
|
|
9586
|
+
"q4_0",
|
|
9587
|
+
"q4_k_m",
|
|
9588
|
+
"q5_0",
|
|
9589
|
+
"q5_k_m",
|
|
9590
|
+
"q6_k",
|
|
9591
|
+
"q8_0"
|
|
9592
|
+
],
|
|
9593
|
+
"model_id": "internlm/internlm3-8b-instruct-gguf",
|
|
9594
|
+
"model_file_name_template": "internlm3-8b-instruct-{quantization}.gguf"
|
|
9595
|
+
},
|
|
9596
|
+
{
|
|
9597
|
+
"model_format":"mlx",
|
|
9598
|
+
"model_size_in_billions":8,
|
|
9599
|
+
"quantizations":[
|
|
9600
|
+
"4bit"
|
|
9601
|
+
],
|
|
9602
|
+
"model_id":"mlx-community/internlm3-8b-instruct-{quantization}"
|
|
9603
|
+
}
|
|
9604
|
+
],
|
|
9605
|
+
"chat_template": "{{ bos_token }}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
|
|
9606
|
+
"stop_token_ids": [
|
|
9607
|
+
2,
|
|
9608
|
+
128131
|
|
9609
|
+
],
|
|
9610
|
+
"stop": [
|
|
9611
|
+
"</s>",
|
|
9612
|
+
"<|im_end|>"
|
|
9613
|
+
]
|
|
9309
9614
|
}
|
|
9310
9615
|
]
|