xinference 1.3.1.post1__py3-none-any.whl → 1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/core/chat_interface.py +1 -1
- xinference/model/llm/__init__.py +3 -0
- xinference/model/llm/llama_cpp/core.py +44 -14
- xinference/model/llm/llm_family.json +271 -12
- xinference/model/llm/llm_family_modelscope.json +248 -13
- xinference/model/llm/mlx/core.py +15 -11
- xinference/model/llm/reasoning_parser.py +14 -6
- xinference/model/llm/sglang/core.py +2 -0
- xinference/model/llm/transformers/core.py +3 -2
- xinference/model/llm/transformers/gemma3.py +185 -0
- xinference/model/llm/transformers/intern_vl.py +0 -2
- xinference/model/llm/utils.py +37 -29
- xinference/model/llm/vllm/core.py +8 -3
- xinference/types.py +2 -2
- xinference/web/ui/build/asset-manifest.json +6 -6
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/css/main.b494ae7e.css +2 -0
- xinference/web/ui/build/static/css/main.b494ae7e.css.map +1 -0
- xinference/web/ui/build/static/js/main.3cea968e.js +3 -0
- xinference/web/ui/build/static/js/main.3cea968e.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/7f59e45e3f268ab8a4788b6fb024cf8dab088736dff22f5a3a39c122a83ab930.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/cc97b49285d7717c63374766c789141a4329a04582ab32756d7e0e614d4c5c7f.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/dcd60488509450bfff37bfff56de2c096d51de17dd00ec60d4db49c8b483ada1.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f199e8173f6409a5802ed44acb95f218388131136504b2e9132129e150c92f9a.json +1 -0
- xinference/web/ui/src/locales/en.json +2 -2
- xinference/web/ui/src/locales/zh.json +1 -1
- {xinference-1.3.1.post1.dist-info → xinference-1.4.0.dist-info}/METADATA +1 -1
- {xinference-1.3.1.post1.dist-info → xinference-1.4.0.dist-info}/RECORD +34 -33
- xinference/web/ui/build/static/css/main.f8177338.css +0 -2
- xinference/web/ui/build/static/css/main.f8177338.css.map +0 -1
- xinference/web/ui/build/static/js/main.55b70cb7.js +0 -3
- xinference/web/ui/build/static/js/main.55b70cb7.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/2deac8d5636974533e3714f34e94fc754f9153a07c6ee11e72846cb8eae47e4b.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/db16a983bc08a05f0439cc61ca0840e49e1d8400eef678909f16c032a418a3d6.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/e23d476fcbf6fd69c8986bf82133d257d28aa8fc9a5cab231d81c1c75c58cd99.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/e7a8c37fda8725cab69c7ef8c627060bd7fc806adc67e00fe628ba148cb86d7f.json +0 -1
- /xinference/web/ui/build/static/js/{main.55b70cb7.js.LICENSE.txt → main.3cea968e.js.LICENSE.txt} +0 -0
- {xinference-1.3.1.post1.dist-info → xinference-1.4.0.dist-info}/LICENSE +0 -0
- {xinference-1.3.1.post1.dist-info → xinference-1.4.0.dist-info}/WHEEL +0 -0
- {xinference-1.3.1.post1.dist-info → xinference-1.4.0.dist-info}/entry_points.txt +0 -0
- {xinference-1.3.1.post1.dist-info → xinference-1.4.0.dist-info}/top_level.txt +0 -0
xinference/_version.py
CHANGED
|
@@ -8,11 +8,11 @@ import json
|
|
|
8
8
|
|
|
9
9
|
version_json = '''
|
|
10
10
|
{
|
|
11
|
-
"date": "2025-03-
|
|
11
|
+
"date": "2025-03-21T14:33:52+0800",
|
|
12
12
|
"dirty": false,
|
|
13
13
|
"error": null,
|
|
14
|
-
"full-revisionid": "
|
|
15
|
-
"version": "1.
|
|
14
|
+
"full-revisionid": "ac88d425e3d5fc12166e22c4032286327871f5f2",
|
|
15
|
+
"version": "1.4.0"
|
|
16
16
|
}
|
|
17
17
|
''' # END VERSION_JSON
|
|
18
18
|
|
|
@@ -137,7 +137,7 @@ class GradioInterface:
|
|
|
137
137
|
):
|
|
138
138
|
assert isinstance(chunk, dict)
|
|
139
139
|
delta = chunk["choices"][0]["delta"]
|
|
140
|
-
if "content" not in delta:
|
|
140
|
+
if "content" not in delta or delta["content"] is None:
|
|
141
141
|
continue
|
|
142
142
|
else:
|
|
143
143
|
# some model like deepseek-r1-distill-qwen
|
xinference/model/llm/__init__.py
CHANGED
|
@@ -143,6 +143,7 @@ def _install():
|
|
|
143
143
|
DeepSeekV2PytorchModel,
|
|
144
144
|
)
|
|
145
145
|
from .transformers.deepseek_vl import DeepSeekVLChatModel
|
|
146
|
+
from .transformers.gemma3 import Gemma3ChatModel, Gemma3TextChatModel
|
|
146
147
|
from .transformers.glm4v import Glm4VModel
|
|
147
148
|
from .transformers.glm_edge_v import GlmEdgeVModel
|
|
148
149
|
from .transformers.intern_vl import InternVLChatModel
|
|
@@ -198,6 +199,8 @@ def _install():
|
|
|
198
199
|
OptPytorchModel,
|
|
199
200
|
GlmEdgeVModel,
|
|
200
201
|
CogAgentChatModel,
|
|
202
|
+
Gemma3TextChatModel,
|
|
203
|
+
Gemma3ChatModel,
|
|
201
204
|
]
|
|
202
205
|
)
|
|
203
206
|
if OmniLMMModel: # type: ignore
|
|
@@ -39,10 +39,15 @@ logger = logging.getLogger(__name__)
|
|
|
39
39
|
USE_XLLAMACPP = bool(int(os.environ.get("USE_XLLAMACPP", 0)))
|
|
40
40
|
|
|
41
41
|
|
|
42
|
-
class
|
|
42
|
+
class _Done:
|
|
43
43
|
pass
|
|
44
44
|
|
|
45
45
|
|
|
46
|
+
class _Error:
|
|
47
|
+
def __init__(self, msg):
|
|
48
|
+
self.msg = msg
|
|
49
|
+
|
|
50
|
+
|
|
46
51
|
class XllamaCppModel(LLM, ChatModelMixin):
|
|
47
52
|
def __init__(
|
|
48
53
|
self,
|
|
@@ -200,7 +205,14 @@ class XllamaCppModel(LLM, ChatModelMixin):
|
|
|
200
205
|
)
|
|
201
206
|
prompt_json = orjson.dumps(data)
|
|
202
207
|
|
|
203
|
-
def
|
|
208
|
+
def _error_callback(err):
|
|
209
|
+
try:
|
|
210
|
+
msg = orjson.loads(err)
|
|
211
|
+
q.put(_Error(msg))
|
|
212
|
+
except Exception as e:
|
|
213
|
+
q.put(_Error(str(e)))
|
|
214
|
+
|
|
215
|
+
def _ok_callback(ok):
|
|
204
216
|
try:
|
|
205
217
|
res = orjson.loads(ok)
|
|
206
218
|
res["model"] = self.model_uid
|
|
@@ -209,10 +221,10 @@ class XllamaCppModel(LLM, ChatModelMixin):
|
|
|
209
221
|
logger.exception("handle_completions callback failed: %s", e)
|
|
210
222
|
|
|
211
223
|
try:
|
|
212
|
-
self._llm.handle_completions(prompt_json,
|
|
224
|
+
self._llm.handle_completions(prompt_json, _error_callback, _ok_callback)
|
|
213
225
|
except Exception as ex:
|
|
214
226
|
logger.exception("handle_completions failed: %s", ex)
|
|
215
|
-
q.put(
|
|
227
|
+
q.put(_Done)
|
|
216
228
|
|
|
217
229
|
assert self._executor
|
|
218
230
|
self._executor.submit(_handle_completion)
|
|
@@ -220,12 +232,17 @@ class XllamaCppModel(LLM, ChatModelMixin):
|
|
|
220
232
|
if stream:
|
|
221
233
|
|
|
222
234
|
def _to_iterator():
|
|
223
|
-
while (r := q.get()) is not
|
|
235
|
+
while (r := q.get()) is not _Done:
|
|
236
|
+
if type(r) is _Error:
|
|
237
|
+
raise Exception("Got error in generate stream: %s", r.msg)
|
|
224
238
|
yield r
|
|
225
239
|
|
|
226
240
|
return _to_iterator()
|
|
227
241
|
else:
|
|
228
|
-
|
|
242
|
+
r = q.get()
|
|
243
|
+
if type(r) is _Error:
|
|
244
|
+
raise Exception("Got error in generate: %s", r.msg)
|
|
245
|
+
return r
|
|
229
246
|
|
|
230
247
|
def chat(
|
|
231
248
|
self,
|
|
@@ -253,7 +270,14 @@ class XllamaCppModel(LLM, ChatModelMixin):
|
|
|
253
270
|
)
|
|
254
271
|
prompt_json = orjson.dumps(data)
|
|
255
272
|
|
|
256
|
-
def
|
|
273
|
+
def _error_callback(err):
|
|
274
|
+
try:
|
|
275
|
+
msg = orjson.loads(err)
|
|
276
|
+
q.put(_Error(msg))
|
|
277
|
+
except Exception as e:
|
|
278
|
+
q.put(_Error(str(e)))
|
|
279
|
+
|
|
280
|
+
def _ok_callback(ok):
|
|
257
281
|
try:
|
|
258
282
|
res = orjson.loads(ok)
|
|
259
283
|
res["model"] = self.model_uid
|
|
@@ -263,11 +287,11 @@ class XllamaCppModel(LLM, ChatModelMixin):
|
|
|
263
287
|
|
|
264
288
|
try:
|
|
265
289
|
self._llm.handle_chat_completions(
|
|
266
|
-
prompt_json,
|
|
290
|
+
prompt_json, _error_callback, _ok_callback
|
|
267
291
|
)
|
|
268
292
|
except Exception as ex:
|
|
269
293
|
logger.exception("handle_chat_completions failed: %s", ex)
|
|
270
|
-
q.put(
|
|
294
|
+
q.put(_Done)
|
|
271
295
|
|
|
272
296
|
assert self._executor
|
|
273
297
|
self._executor.submit(_handle_chat_completion)
|
|
@@ -275,14 +299,19 @@ class XllamaCppModel(LLM, ChatModelMixin):
|
|
|
275
299
|
if stream:
|
|
276
300
|
|
|
277
301
|
def _to_iterator():
|
|
278
|
-
while (r := q.get()) is not
|
|
302
|
+
while (r := q.get()) is not _Done:
|
|
303
|
+
if type(r) is _Error:
|
|
304
|
+
raise Exception("Got error in chat stream: %s", r.msg)
|
|
279
305
|
yield r
|
|
280
306
|
|
|
281
307
|
return self._to_chat_completion_chunks(
|
|
282
308
|
_to_iterator(), self.reasoning_parser
|
|
283
309
|
)
|
|
284
310
|
else:
|
|
285
|
-
|
|
311
|
+
r = q.get()
|
|
312
|
+
if type(r) is _Error:
|
|
313
|
+
raise Exception("Got error in chat: %s", r.msg)
|
|
314
|
+
return self._to_chat_completion(r, self.reasoning_parser)
|
|
286
315
|
|
|
287
316
|
|
|
288
317
|
class LlamaCppModel(LLM):
|
|
@@ -533,10 +562,11 @@ class LlamaCppChatModel(LlamaCppModel, ChatModelMixin):
|
|
|
533
562
|
tools = generate_config.pop("tools", []) if generate_config else None
|
|
534
563
|
full_context_kwargs = {}
|
|
535
564
|
if tools:
|
|
536
|
-
if
|
|
565
|
+
if (
|
|
566
|
+
model_family in QWEN_TOOL_CALL_FAMILY
|
|
567
|
+
or model_family in DEEPSEEK_TOOL_CALL_FAMILY
|
|
568
|
+
):
|
|
537
569
|
full_context_kwargs["tools"] = tools
|
|
538
|
-
elif model_family in DEEPSEEK_TOOL_CALL_FAMILY:
|
|
539
|
-
self._tools_to_messages_for_deepseek(messages, tools)
|
|
540
570
|
assert self.model_family.chat_template is not None
|
|
541
571
|
full_prompt = self.get_full_context(
|
|
542
572
|
messages, self.model_family.chat_template, **full_context_kwargs
|
|
@@ -5786,6 +5786,265 @@
|
|
|
5786
5786
|
"<start_of_turn>"
|
|
5787
5787
|
]
|
|
5788
5788
|
},
|
|
5789
|
+
{
|
|
5790
|
+
"version": 1,
|
|
5791
|
+
"context_length": 32768,
|
|
5792
|
+
"model_name": "gemma-3-1b-it",
|
|
5793
|
+
"model_lang": [
|
|
5794
|
+
"en"
|
|
5795
|
+
],
|
|
5796
|
+
"model_ability": [
|
|
5797
|
+
"chat"
|
|
5798
|
+
],
|
|
5799
|
+
"model_description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.",
|
|
5800
|
+
"model_specs": [
|
|
5801
|
+
{
|
|
5802
|
+
"model_format": "pytorch",
|
|
5803
|
+
"model_size_in_billions": 1,
|
|
5804
|
+
"quantizations": [
|
|
5805
|
+
"none",
|
|
5806
|
+
"4-bit",
|
|
5807
|
+
"8-bit"
|
|
5808
|
+
],
|
|
5809
|
+
"model_id": "google/gemma-3-1b-it"
|
|
5810
|
+
},
|
|
5811
|
+
{
|
|
5812
|
+
"model_format": "ggufv2",
|
|
5813
|
+
"model_size_in_billions": 1,
|
|
5814
|
+
"quantizations": [
|
|
5815
|
+
"IQ2_M",
|
|
5816
|
+
"IQ3_M",
|
|
5817
|
+
"IQ3_XS",
|
|
5818
|
+
"IQ3_XXS",
|
|
5819
|
+
"IQ4_NL",
|
|
5820
|
+
"IQ4_XS",
|
|
5821
|
+
"Q2_K",
|
|
5822
|
+
"Q2_K_L",
|
|
5823
|
+
"Q3_K_L",
|
|
5824
|
+
"Q3_K_M",
|
|
5825
|
+
"Q3_K_S",
|
|
5826
|
+
"Q4_0",
|
|
5827
|
+
"Q4_1",
|
|
5828
|
+
"Q4_K_L",
|
|
5829
|
+
"Q4_K_M",
|
|
5830
|
+
"Q4_K_S",
|
|
5831
|
+
"Q5_K_L",
|
|
5832
|
+
"Q5_K_M",
|
|
5833
|
+
"Q5_K_S",
|
|
5834
|
+
"Q6_K",
|
|
5835
|
+
"Q6_K_L",
|
|
5836
|
+
"Q8_0",
|
|
5837
|
+
"bf16"
|
|
5838
|
+
],
|
|
5839
|
+
"model_id": "bartowski/google_gemma-3-1b-it-GGUF",
|
|
5840
|
+
"model_file_name_template": "google_gemma-3-1b-it-{quantization}.gguf"
|
|
5841
|
+
},
|
|
5842
|
+
{
|
|
5843
|
+
"model_format": "mlx",
|
|
5844
|
+
"model_size_in_billions": 1,
|
|
5845
|
+
"quantizations": [
|
|
5846
|
+
"4bit",
|
|
5847
|
+
"6bit",
|
|
5848
|
+
"8bit",
|
|
5849
|
+
"fp16"
|
|
5850
|
+
],
|
|
5851
|
+
"model_id": "mlx-community/gemma-3-1b-it-{quantization}"
|
|
5852
|
+
}
|
|
5853
|
+
],
|
|
5854
|
+
"chat_template": "{{ bos_token }}\n{%- if messages[0]['role'] == 'system' -%}\n {%- if messages[0]['content'] is string -%}\n {%- set first_user_prefix = messages[0]['content'] + '\n\n' -%}\n {%- else -%}\n {%- set first_user_prefix = messages[0]['content'][0]['text'] + '\n\n' -%}\n {%- endif -%}\n {%- set loop_messages = messages[1:] -%}\n{%- else -%}\n {%- set first_user_prefix = \"\" -%}\n {%- set loop_messages = messages -%}\n{%- endif -%}\n{%- for message in loop_messages -%}\n {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}\n {{ raise_exception(\"Conversation roles must alternate user/assistant/user/assistant/...\") }}\n {%- endif -%}\n {%- if (message['role'] == 'assistant') -%}\n {%- set role = \"model\" -%}\n {%- else -%}\n {%- set role = message['role'] -%}\n {%- endif -%}\n {{ '<start_of_turn>' + role + '\n' + (first_user_prefix if loop.first else \"\") }}\n {%- if message['content'] is string -%}\n {{ message['content'] | trim }}\n {%- elif message['content'] is iterable -%}\n {%- for item in message['content'] -%}\n {%- if item['type'] == 'image' -%}\n {{ '<start_of_image>' }}\n {%- elif item['type'] == 'text' -%}\n {{ item['text'] | trim }}\n {%- endif -%}\n {%- endfor -%}\n {%- else -%}\n {{ raise_exception(\"Invalid content type\") }}\n {%- endif -%}\n {{ '<end_of_turn>\n' }}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n {{'<start_of_turn>model\n'}}\n{%- endif -%}\n",
|
|
5855
|
+
"stop_token_ids": [
|
|
5856
|
+
1,
|
|
5857
|
+
105,
|
|
5858
|
+
106
|
|
5859
|
+
],
|
|
5860
|
+
"stop": [
|
|
5861
|
+
"<eos>",
|
|
5862
|
+
"<end_of_turn>",
|
|
5863
|
+
"<start_of_turn>"
|
|
5864
|
+
]
|
|
5865
|
+
},
|
|
5866
|
+
{
|
|
5867
|
+
"version": 1,
|
|
5868
|
+
"context_length": 131072,
|
|
5869
|
+
"model_name": "gemma-3-it",
|
|
5870
|
+
"model_lang": [
|
|
5871
|
+
"en"
|
|
5872
|
+
],
|
|
5873
|
+
"model_ability": [
|
|
5874
|
+
"chat",
|
|
5875
|
+
"vision"
|
|
5876
|
+
],
|
|
5877
|
+
"model_description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.",
|
|
5878
|
+
"model_specs": [
|
|
5879
|
+
{
|
|
5880
|
+
"model_format": "pytorch",
|
|
5881
|
+
"model_size_in_billions": 4,
|
|
5882
|
+
"quantizations": [
|
|
5883
|
+
"none",
|
|
5884
|
+
"4-bit",
|
|
5885
|
+
"8-bit"
|
|
5886
|
+
],
|
|
5887
|
+
"model_id": "google/gemma-3-4b-it"
|
|
5888
|
+
},
|
|
5889
|
+
{
|
|
5890
|
+
"model_format": "pytorch",
|
|
5891
|
+
"model_size_in_billions": 12,
|
|
5892
|
+
"quantizations": [
|
|
5893
|
+
"none",
|
|
5894
|
+
"4-bit",
|
|
5895
|
+
"8-bit"
|
|
5896
|
+
],
|
|
5897
|
+
"model_id": "google/gemma-3-12b-it"
|
|
5898
|
+
},
|
|
5899
|
+
{
|
|
5900
|
+
"model_format": "pytorch",
|
|
5901
|
+
"model_size_in_billions": 27,
|
|
5902
|
+
"quantizations": [
|
|
5903
|
+
"none",
|
|
5904
|
+
"4-bit",
|
|
5905
|
+
"8-bit"
|
|
5906
|
+
],
|
|
5907
|
+
"model_id": "google/gemma-3-27b-it"
|
|
5908
|
+
},
|
|
5909
|
+
{
|
|
5910
|
+
"model_format": "ggufv2",
|
|
5911
|
+
"model_size_in_billions": 4,
|
|
5912
|
+
"quantizations": [
|
|
5913
|
+
"IQ2_M",
|
|
5914
|
+
"IQ3_M",
|
|
5915
|
+
"IQ3_XS",
|
|
5916
|
+
"IQ3_XXS",
|
|
5917
|
+
"IQ4_NL",
|
|
5918
|
+
"IQ4_XS",
|
|
5919
|
+
"Q2_K",
|
|
5920
|
+
"Q2_K_L",
|
|
5921
|
+
"Q3_K_L",
|
|
5922
|
+
"Q3_K_M",
|
|
5923
|
+
"Q3_K_S",
|
|
5924
|
+
"Q4_0",
|
|
5925
|
+
"Q4_1",
|
|
5926
|
+
"Q4_K_L",
|
|
5927
|
+
"Q4_K_M",
|
|
5928
|
+
"Q4_K_S",
|
|
5929
|
+
"Q5_K_L",
|
|
5930
|
+
"Q5_K_M",
|
|
5931
|
+
"Q5_K_S",
|
|
5932
|
+
"Q6_K",
|
|
5933
|
+
"Q6_K_L",
|
|
5934
|
+
"Q8_0",
|
|
5935
|
+
"bf16"
|
|
5936
|
+
],
|
|
5937
|
+
"model_id": "bartowski/google_gemma-3-4b-it-GGUF",
|
|
5938
|
+
"model_file_name_template": "google_gemma-3-4b-it-{quantization}.gguf"
|
|
5939
|
+
},
|
|
5940
|
+
{
|
|
5941
|
+
"model_format": "ggufv2",
|
|
5942
|
+
"model_size_in_billions": 12,
|
|
5943
|
+
"quantizations": [
|
|
5944
|
+
"IQ2_M",
|
|
5945
|
+
"IQ3_M",
|
|
5946
|
+
"IQ3_XS",
|
|
5947
|
+
"IQ3_XXS",
|
|
5948
|
+
"IQ4_NL",
|
|
5949
|
+
"IQ4_XS",
|
|
5950
|
+
"Q2_K",
|
|
5951
|
+
"Q2_K_L",
|
|
5952
|
+
"Q3_K_L",
|
|
5953
|
+
"Q3_K_M",
|
|
5954
|
+
"Q3_K_S",
|
|
5955
|
+
"Q4_0",
|
|
5956
|
+
"Q4_1",
|
|
5957
|
+
"Q4_K_L",
|
|
5958
|
+
"Q4_K_M",
|
|
5959
|
+
"Q4_K_S",
|
|
5960
|
+
"Q5_K_L",
|
|
5961
|
+
"Q5_K_M",
|
|
5962
|
+
"Q5_K_S",
|
|
5963
|
+
"Q6_K",
|
|
5964
|
+
"Q6_K_L",
|
|
5965
|
+
"Q8_0",
|
|
5966
|
+
"bf16"
|
|
5967
|
+
],
|
|
5968
|
+
"model_id": "bartowski/google_gemma-3-12b-it-GGUF",
|
|
5969
|
+
"model_file_name_template": "google_gemma-3-12b-it-{quantization}.gguf"
|
|
5970
|
+
},
|
|
5971
|
+
{
|
|
5972
|
+
"model_format": "ggufv2",
|
|
5973
|
+
"model_size_in_billions": 27,
|
|
5974
|
+
"quantizations": [
|
|
5975
|
+
"IQ2_M",
|
|
5976
|
+
"IQ3_M",
|
|
5977
|
+
"IQ3_XS",
|
|
5978
|
+
"IQ3_XXS",
|
|
5979
|
+
"IQ4_NL",
|
|
5980
|
+
"IQ4_XS",
|
|
5981
|
+
"Q2_K",
|
|
5982
|
+
"Q2_K_L",
|
|
5983
|
+
"Q3_K_L",
|
|
5984
|
+
"Q3_K_M",
|
|
5985
|
+
"Q3_K_S",
|
|
5986
|
+
"Q4_0",
|
|
5987
|
+
"Q4_1",
|
|
5988
|
+
"Q4_K_L",
|
|
5989
|
+
"Q4_K_M",
|
|
5990
|
+
"Q4_K_S",
|
|
5991
|
+
"Q5_K_L",
|
|
5992
|
+
"Q5_K_M",
|
|
5993
|
+
"Q5_K_S",
|
|
5994
|
+
"Q6_K",
|
|
5995
|
+
"Q6_K_L",
|
|
5996
|
+
"Q8_0",
|
|
5997
|
+
"bf16"
|
|
5998
|
+
],
|
|
5999
|
+
"model_id": "bartowski/google_gemma-3-27b-it-GGUF",
|
|
6000
|
+
"model_file_name_template": "google_gemma-3-27b-it-{quantization}.gguf"
|
|
6001
|
+
},
|
|
6002
|
+
{
|
|
6003
|
+
"model_format": "mlx",
|
|
6004
|
+
"model_size_in_billions": 4,
|
|
6005
|
+
"quantizations": [
|
|
6006
|
+
"4bit",
|
|
6007
|
+
"6bit",
|
|
6008
|
+
"8bit",
|
|
6009
|
+
"fp16"
|
|
6010
|
+
],
|
|
6011
|
+
"model_id": "mlx-community/gemma-3-4b-it-{quantization}"
|
|
6012
|
+
},
|
|
6013
|
+
{
|
|
6014
|
+
"model_format": "mlx",
|
|
6015
|
+
"model_size_in_billions": 12,
|
|
6016
|
+
"quantizations": [
|
|
6017
|
+
"4bit",
|
|
6018
|
+
"6bit",
|
|
6019
|
+
"8bit",
|
|
6020
|
+
"fp16"
|
|
6021
|
+
],
|
|
6022
|
+
"model_id": "mlx-community/gemma-3-12b-it-{quantization}"
|
|
6023
|
+
},
|
|
6024
|
+
{
|
|
6025
|
+
"model_format": "mlx",
|
|
6026
|
+
"model_size_in_billions": 27,
|
|
6027
|
+
"quantizations": [
|
|
6028
|
+
"4bit",
|
|
6029
|
+
"6bit",
|
|
6030
|
+
"8bit",
|
|
6031
|
+
"fp16"
|
|
6032
|
+
],
|
|
6033
|
+
"model_id": "mlx-community/gemma-3-27b-it-{quantization}"
|
|
6034
|
+
}
|
|
6035
|
+
],
|
|
6036
|
+
"chat_template": "{{ bos_token }}\n{%- if messages[0]['role'] == 'system' -%}\n {%- if messages[0]['content'] is string -%}\n {%- set first_user_prefix = messages[0]['content'] + '\n\n' -%}\n {%- else -%}\n {%- set first_user_prefix = messages[0]['content'][0]['text'] + '\n\n' -%}\n {%- endif -%}\n {%- set loop_messages = messages[1:] -%}\n{%- else -%}\n {%- set first_user_prefix = \"\" -%}\n {%- set loop_messages = messages -%}\n{%- endif -%}\n{%- for message in loop_messages -%}\n {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}\n {{ raise_exception(\"Conversation roles must alternate user/assistant/user/assistant/...\") }}\n {%- endif -%}\n {%- if (message['role'] == 'assistant') -%}\n {%- set role = \"model\" -%}\n {%- else -%}\n {%- set role = message['role'] -%}\n {%- endif -%}\n {{ '<start_of_turn>' + role + '\n' + (first_user_prefix if loop.first else \"\") }}\n {%- if message['content'] is string -%}\n {{ message['content'] | trim }}\n {%- elif message['content'] is iterable -%}\n {%- for item in message['content'] -%}\n {%- if item['type'] == 'image' -%}\n {{ '<start_of_image>' }}\n {%- elif item['type'] == 'text' -%}\n {{ item['text'] | trim }}\n {%- endif -%}\n {%- endfor -%}\n {%- else -%}\n {{ raise_exception(\"Invalid content type\") }}\n {%- endif -%}\n {{ '<end_of_turn>\n' }}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n {{'<start_of_turn>model\n'}}\n{%- endif -%}\n",
|
|
6037
|
+
"stop_token_ids": [
|
|
6038
|
+
1,
|
|
6039
|
+
105,
|
|
6040
|
+
106
|
|
6041
|
+
],
|
|
6042
|
+
"stop": [
|
|
6043
|
+
"<eos>",
|
|
6044
|
+
"<end_of_turn>",
|
|
6045
|
+
"<start_of_turn>"
|
|
6046
|
+
]
|
|
6047
|
+
},
|
|
5789
6048
|
{
|
|
5790
6049
|
"version": 1,
|
|
5791
6050
|
"context_length": 8192,
|
|
@@ -6923,7 +7182,7 @@
|
|
|
6923
7182
|
"8-bit",
|
|
6924
7183
|
"none"
|
|
6925
7184
|
],
|
|
6926
|
-
"model_id": "OpenGVLab/InternVL2_5-MPO
|
|
7185
|
+
"model_id": "OpenGVLab/InternVL2_5-1B-MPO"
|
|
6927
7186
|
},
|
|
6928
7187
|
{
|
|
6929
7188
|
"model_format": "pytorch",
|
|
@@ -6933,7 +7192,7 @@
|
|
|
6933
7192
|
"8-bit",
|
|
6934
7193
|
"none"
|
|
6935
7194
|
],
|
|
6936
|
-
"model_id": "OpenGVLab/InternVL2_5-MPO
|
|
7195
|
+
"model_id": "OpenGVLab/InternVL2_5-2B-MPO"
|
|
6937
7196
|
},
|
|
6938
7197
|
{
|
|
6939
7198
|
"model_format": "pytorch",
|
|
@@ -6943,7 +7202,7 @@
|
|
|
6943
7202
|
"8-bit",
|
|
6944
7203
|
"none"
|
|
6945
7204
|
],
|
|
6946
|
-
"model_id": "OpenGVLab/InternVL2_5-MPO
|
|
7205
|
+
"model_id": "OpenGVLab/InternVL2_5-4B-MPO"
|
|
6947
7206
|
},
|
|
6948
7207
|
{
|
|
6949
7208
|
"model_format": "awq",
|
|
@@ -6961,7 +7220,7 @@
|
|
|
6961
7220
|
"8-bit",
|
|
6962
7221
|
"none"
|
|
6963
7222
|
],
|
|
6964
|
-
"model_id": "OpenGVLab/InternVL2_5-MPO
|
|
7223
|
+
"model_id": "OpenGVLab/InternVL2_5-8B-MPO"
|
|
6965
7224
|
},
|
|
6966
7225
|
{
|
|
6967
7226
|
"model_format": "awq",
|
|
@@ -6969,7 +7228,7 @@
|
|
|
6969
7228
|
"quantizations": [
|
|
6970
7229
|
"Int4"
|
|
6971
7230
|
],
|
|
6972
|
-
"model_id": "OpenGVLab/InternVL2_5-MPO-
|
|
7231
|
+
"model_id": "OpenGVLab/InternVL2_5-8B-MPO-AWQ"
|
|
6973
7232
|
},
|
|
6974
7233
|
{
|
|
6975
7234
|
"model_format": "pytorch",
|
|
@@ -6979,7 +7238,7 @@
|
|
|
6979
7238
|
"8-bit",
|
|
6980
7239
|
"none"
|
|
6981
7240
|
],
|
|
6982
|
-
"model_id": "OpenGVLab/InternVL2_5-MPO
|
|
7241
|
+
"model_id": "OpenGVLab/InternVL2_5-26B-MPO"
|
|
6983
7242
|
},
|
|
6984
7243
|
{
|
|
6985
7244
|
"model_format": "awq",
|
|
@@ -6987,7 +7246,7 @@
|
|
|
6987
7246
|
"quantizations": [
|
|
6988
7247
|
"Int4"
|
|
6989
7248
|
],
|
|
6990
|
-
"model_id": "OpenGVLab/InternVL2_5-MPO-
|
|
7249
|
+
"model_id": "OpenGVLab/InternVL2_5-26B-MPO-AWQ"
|
|
6991
7250
|
},
|
|
6992
7251
|
{
|
|
6993
7252
|
"model_format": "pytorch",
|
|
@@ -6997,7 +7256,7 @@
|
|
|
6997
7256
|
"8-bit",
|
|
6998
7257
|
"none"
|
|
6999
7258
|
],
|
|
7000
|
-
"model_id": "OpenGVLab/InternVL2_5-MPO
|
|
7259
|
+
"model_id": "OpenGVLab/InternVL2_5-38B-MPO"
|
|
7001
7260
|
},
|
|
7002
7261
|
{
|
|
7003
7262
|
"model_format": "awq",
|
|
@@ -7005,7 +7264,7 @@
|
|
|
7005
7264
|
"quantizations": [
|
|
7006
7265
|
"Int4"
|
|
7007
7266
|
],
|
|
7008
|
-
"model_id": "OpenGVLab/InternVL2_5-MPO-
|
|
7267
|
+
"model_id": "OpenGVLab/InternVL2_5-38B-MPO-AWQ"
|
|
7009
7268
|
},
|
|
7010
7269
|
{
|
|
7011
7270
|
"model_format": "pytorch",
|
|
@@ -7015,7 +7274,7 @@
|
|
|
7015
7274
|
"8-bit",
|
|
7016
7275
|
"none"
|
|
7017
7276
|
],
|
|
7018
|
-
"model_id": "OpenGVLab/InternVL2_5-MPO
|
|
7277
|
+
"model_id": "OpenGVLab/InternVL2_5-78B-MPO"
|
|
7019
7278
|
},
|
|
7020
7279
|
{
|
|
7021
7280
|
"model_format": "awq",
|
|
@@ -7023,7 +7282,7 @@
|
|
|
7023
7282
|
"quantizations": [
|
|
7024
7283
|
"Int4"
|
|
7025
7284
|
],
|
|
7026
|
-
"model_id": "OpenGVLab/InternVL2_5-MPO-
|
|
7285
|
+
"model_id": "OpenGVLab/InternVL2_5-78B-MPO-AWQ"
|
|
7027
7286
|
}
|
|
7028
7287
|
],
|
|
7029
7288
|
"chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
|
|
@@ -7892,7 +8151,7 @@
|
|
|
7892
8151
|
"model_id": "mlx-community/DeepSeek-V3-{quantization}"
|
|
7893
8152
|
}
|
|
7894
8153
|
],
|
|
7895
|
-
"chat_template": "{% if
|
|
8154
|
+
"chat_template": "{% if messages %} {% if system or tools %} {% if system %} {{ system }} {% endif %} {% if tools %} {# Handle tools here if needed #} {% endif %} {% endif %} {% for message in messages %} {% set last = loop.index == loop.length %} {% if message.role == \"user\" %} <|User|> {% if tools and last %} Given the following functions, please respond with a JSON for a function call with its proper arguments that best answers the given prompt. Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}. Do not use variables. {{ tools }} {% endif %} {{ message.content }} {% if last %} <|Assistant|> {% endif %} {% elif message.role == \"assistant\" %} <|Assistant|> {% if message.tool_calls %} <|tool▁calls▁begin|> {% for tool in message.tool_calls %} <|tool▁call▁begin|> {\"name\": \"{{ tool.function.name }}\", \"parameters\": {{ tool.function.arguments }}} <|tool▁call▁end|> {% endfor %} <|tool▁calls▁end|> {% else %} {{ message.content }} {% if not last %} <|end▁of▁sentence|> {% endif %} {% endif %} {% elif message.role == \"tool\" %} <|tool▁outputs▁begin|> <|tool▁output▁begin|> {{ message.content }} <|tool▁output▁end|> <|tool▁outputs▁end|> {% if last and message.role != \"assistant\" %} <|Assistant|> {% endif %} {% endif %} {% endfor %} {% else %} {% if system %} {{ system }} {% endif %} {% if prompt %} <|User|> {{ prompt }} {% endif %} <|Assistant|> {{ response }} {% if response %} {{ response }} {% endif %} {% endif %}",
|
|
7896
8155
|
"stop_token_ids": [
|
|
7897
8156
|
1
|
|
7898
8157
|
],
|