xinference 1.2.1__py3-none-any.whl → 1.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +4 -7
- xinference/client/handlers.py +3 -0
- xinference/client/restful/restful_client.py +9 -1
- xinference/core/model.py +19 -0
- xinference/core/resource.py +7 -1
- xinference/core/scheduler.py +4 -7
- xinference/core/status_guard.py +1 -0
- xinference/core/supervisor.py +228 -19
- xinference/core/utils.py +1 -29
- xinference/core/worker.py +28 -2
- xinference/deploy/cmdline.py +33 -3
- xinference/deploy/local.py +2 -1
- xinference/deploy/test/test_cmdline.py +32 -0
- xinference/device_utils.py +43 -1
- xinference/model/audio/core.py +5 -0
- xinference/model/audio/kokoro.py +122 -0
- xinference/model/audio/model_spec.json +8 -0
- xinference/model/audio/model_spec_modelscope.json +9 -0
- xinference/model/image/stable_diffusion/core.py +15 -6
- xinference/model/llm/llama_cpp/core.py +21 -14
- xinference/model/llm/llm_family.json +866 -46
- xinference/model/llm/llm_family.py +7 -2
- xinference/model/llm/llm_family_modelscope.json +873 -16
- xinference/model/llm/mlx/core.py +11 -3
- xinference/model/llm/reasoning_parsers/__init__.py +13 -0
- xinference/model/llm/reasoning_parsers/abs_reasoning_parsers.py +98 -0
- xinference/model/llm/reasoning_parsers/deepseek_r1_reasoning_parser.py +140 -0
- xinference/model/llm/sglang/core.py +99 -11
- xinference/model/llm/transformers/core.py +9 -1
- xinference/model/llm/transformers/intern_vl.py +23 -14
- xinference/model/llm/transformers/qwen2_audio.py +3 -1
- xinference/model/llm/transformers/qwen2_vl.py +20 -3
- xinference/model/llm/transformers/utils.py +22 -11
- xinference/model/llm/utils.py +164 -20
- xinference/model/llm/vllm/core.py +36 -4
- xinference/model/llm/vllm/xavier/executor.py +2 -2
- xinference/model/llm/vllm/xavier/scheduler.py +3 -3
- xinference/thirdparty/internvl/conversation.py +26 -17
- xinference/types.py +2 -0
- xinference/web/ui/build/asset-manifest.json +6 -6
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/css/main.f8177338.css +2 -0
- xinference/web/ui/build/static/css/main.f8177338.css.map +1 -0
- xinference/web/ui/build/static/js/main.ad42919c.js +3 -0
- xinference/web/ui/build/static/js/main.ad42919c.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/074a42304bbbaa79e1bfc3b28502457a390df55708de9006f4cc8e35c60aea87.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/0acb065326560592b10888234242f94f67efe28458b90f273d4d4fba9daa0cd2.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/279ace390216236a82b3d8995c78eca4d637ac9a523e9f521a2d9c76607a43d7.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/630a7bd592596cc6e291fc32238ce7c08238038a64ed8ccee0eb0c13c9902910.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/6cb9f6c62ab4042f0b11c5d75e51187188e9d6f5f08b1d63e796e051bafdb457.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/8f9af2979e45d4648f0cfae108363e58ee421c29a9d4e7329b6f06d9adfd4133.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/914c33e91c1012e3bcd3e96f3a25884cbef148290632d0266dab972b8cc1e95f.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/9c8b1a86e7c65b2b2599a205e30920652d6c2105f926508ef5bcf29a3ef4ce76.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/b7939cd3a48adf12fccfdd0803019b5cc235ff7de3a297dae70ce635e0eea13e.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/efe7cd132c27a8f9fd5352a394c491fd5fb0da0348cf9fcbd923164a32365eab.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f04f666b77b44d7be3e16034d6b0074de2ba9c254f1fae15222b3148608fa8b3.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/fecf076bcd198a458c2a6ab0e85e40dc1c99994c353164e79c469be162cb74c9.json +1 -0
- xinference/web/ui/src/locales/en.json +14 -1
- xinference/web/ui/src/locales/zh.json +14 -1
- {xinference-1.2.1.dist-info → xinference-1.3.0.dist-info}/METADATA +18 -17
- {xinference-1.2.1.dist-info → xinference-1.3.0.dist-info}/RECORD +67 -60
- xinference/web/ui/build/static/css/main.51a587ff.css +0 -2
- xinference/web/ui/build/static/css/main.51a587ff.css.map +0 -1
- xinference/web/ui/build/static/js/main.b0936c54.js +0 -3
- xinference/web/ui/build/static/js/main.b0936c54.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/0c2fb5375667931c4a331c99e0d87dc145e8f327cea3f44d6e56f54c7c1d4020.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/185ceb8872d562e032b47e79df6a45670e06345b8ed70aad1a131e0476783c5c.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/3eefb411b24c2b3ce053570ef50daccf154022f0e168be5ed0fec21394baf9f4.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/63c8e07687ea53a4f8a910ee5e42e0eb26cd1acbfbe820f3e3248a786ee51401.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/a3ff866acddf34917a7ee399e0e571a4dfd8ba66d5057db885f243e16a6eb17d.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/a7f1a71f6580dfe810c685a9c1d68e318f71e1fa258fbe50b87a6ac37cc0a598.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/bdee44abeadc4abc17d41c52eb49c6e19a4b1a267b6e16876ce91bdeeebfc52d.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/d7664d18c4ddbad9c3a6a31b91f7c00fb0dde804608674a9860ee50f33e54708.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/ed57202cb79649bb716400436590245547df241988fc7c8e1d85d132299542d2.json +0 -1
- /xinference/web/ui/build/static/js/{main.b0936c54.js.LICENSE.txt → main.ad42919c.js.LICENSE.txt} +0 -0
- {xinference-1.2.1.dist-info → xinference-1.3.0.dist-info}/LICENSE +0 -0
- {xinference-1.2.1.dist-info → xinference-1.3.0.dist-info}/WHEEL +0 -0
- {xinference-1.2.1.dist-info → xinference-1.3.0.dist-info}/entry_points.txt +0 -0
- {xinference-1.2.1.dist-info → xinference-1.3.0.dist-info}/top_level.txt +0 -0
|
@@ -6772,6 +6772,151 @@
|
|
|
6772
6772
|
"stop_token_ids": [],
|
|
6773
6773
|
"stop": []
|
|
6774
6774
|
},
|
|
6775
|
+
{
|
|
6776
|
+
"version": 1,
|
|
6777
|
+
"context_length": 16384,
|
|
6778
|
+
"model_name": "InternVL2.5",
|
|
6779
|
+
"model_lang": [
|
|
6780
|
+
"en",
|
|
6781
|
+
"zh"
|
|
6782
|
+
],
|
|
6783
|
+
"model_ability": [
|
|
6784
|
+
"chat",
|
|
6785
|
+
"vision"
|
|
6786
|
+
],
|
|
6787
|
+
"model_description": "InternVL 2.5 is an open-source multimodal large language model (MLLM) to bridge the capability gap between open-source and proprietary commercial models in multimodal understanding. ",
|
|
6788
|
+
"model_specs": [
|
|
6789
|
+
{
|
|
6790
|
+
"model_format": "pytorch",
|
|
6791
|
+
"model_size_in_billions": 1,
|
|
6792
|
+
"quantizations": [
|
|
6793
|
+
"4-bit",
|
|
6794
|
+
"8-bit",
|
|
6795
|
+
"none"
|
|
6796
|
+
],
|
|
6797
|
+
"model_id": "OpenGVLab/InternVL2_5-1B"
|
|
6798
|
+
},
|
|
6799
|
+
{
|
|
6800
|
+
"model_format": "awq",
|
|
6801
|
+
"model_size_in_billions": 1,
|
|
6802
|
+
"quantizations": [
|
|
6803
|
+
"Int4"
|
|
6804
|
+
],
|
|
6805
|
+
"model_id": "OpenGVLab/InternVL2_5-1B-AWQ"
|
|
6806
|
+
},
|
|
6807
|
+
{
|
|
6808
|
+
"model_format": "pytorch",
|
|
6809
|
+
"model_size_in_billions": 2,
|
|
6810
|
+
"quantizations": [
|
|
6811
|
+
"4-bit",
|
|
6812
|
+
"8-bit",
|
|
6813
|
+
"none"
|
|
6814
|
+
],
|
|
6815
|
+
"model_id": "OpenGVLab/InternVL2_5-2B"
|
|
6816
|
+
},
|
|
6817
|
+
{
|
|
6818
|
+
"model_format": "awq",
|
|
6819
|
+
"model_size_in_billions": 2,
|
|
6820
|
+
"quantizations": [
|
|
6821
|
+
"Int4"
|
|
6822
|
+
],
|
|
6823
|
+
"model_id": "OpenGVLab/InternVL2_5-2B-AWQ"
|
|
6824
|
+
},
|
|
6825
|
+
{
|
|
6826
|
+
"model_format": "pytorch",
|
|
6827
|
+
"model_size_in_billions": 4,
|
|
6828
|
+
"quantizations": [
|
|
6829
|
+
"4-bit",
|
|
6830
|
+
"8-bit",
|
|
6831
|
+
"none"
|
|
6832
|
+
],
|
|
6833
|
+
"model_id": "OpenGVLab/InternVL2_5-4B"
|
|
6834
|
+
},
|
|
6835
|
+
{
|
|
6836
|
+
"model_format": "awq",
|
|
6837
|
+
"model_size_in_billions": 4,
|
|
6838
|
+
"quantizations": [
|
|
6839
|
+
"Int4"
|
|
6840
|
+
],
|
|
6841
|
+
"model_id": "OpenGVLab/InternVL2_5-4B-AWQ"
|
|
6842
|
+
},
|
|
6843
|
+
{
|
|
6844
|
+
"model_format": "pytorch",
|
|
6845
|
+
"model_size_in_billions": 8,
|
|
6846
|
+
"quantizations": [
|
|
6847
|
+
"4-bit",
|
|
6848
|
+
"8-bit",
|
|
6849
|
+
"none"
|
|
6850
|
+
],
|
|
6851
|
+
"model_id": "OpenGVLab/InternVL2_5-8B"
|
|
6852
|
+
},
|
|
6853
|
+
{
|
|
6854
|
+
"model_format": "awq",
|
|
6855
|
+
"model_size_in_billions": 8,
|
|
6856
|
+
"quantizations": [
|
|
6857
|
+
"Int4"
|
|
6858
|
+
],
|
|
6859
|
+
"model_id": "OpenGVLab/InternVL2_5-8B-AWQ"
|
|
6860
|
+
},
|
|
6861
|
+
{
|
|
6862
|
+
"model_format": "pytorch",
|
|
6863
|
+
"model_size_in_billions": 26,
|
|
6864
|
+
"quantizations": [
|
|
6865
|
+
"4-bit",
|
|
6866
|
+
"8-bit",
|
|
6867
|
+
"none"
|
|
6868
|
+
],
|
|
6869
|
+
"model_id": "OpenGVLab/InternVL2_5-26B"
|
|
6870
|
+
},
|
|
6871
|
+
{
|
|
6872
|
+
"model_format": "awq",
|
|
6873
|
+
"model_size_in_billions": 26,
|
|
6874
|
+
"quantizations": [
|
|
6875
|
+
"Int4"
|
|
6876
|
+
],
|
|
6877
|
+
"model_id": "OpenGVLab/InternVL2_5-26B-AWQ"
|
|
6878
|
+
},
|
|
6879
|
+
{
|
|
6880
|
+
"model_format": "pytorch",
|
|
6881
|
+
"model_size_in_billions": 38,
|
|
6882
|
+
"quantizations": [
|
|
6883
|
+
"4-bit",
|
|
6884
|
+
"8-bit",
|
|
6885
|
+
"none"
|
|
6886
|
+
],
|
|
6887
|
+
"model_id": "OpenGVLab/InternVL2_5-38B"
|
|
6888
|
+
},
|
|
6889
|
+
{
|
|
6890
|
+
"model_format": "awq",
|
|
6891
|
+
"model_size_in_billions": 38,
|
|
6892
|
+
"quantizations": [
|
|
6893
|
+
"Int4"
|
|
6894
|
+
],
|
|
6895
|
+
"model_id": "OpenGVLab/InternVL2_5-38B-AWQ"
|
|
6896
|
+
},
|
|
6897
|
+
{
|
|
6898
|
+
"model_format": "pytorch",
|
|
6899
|
+
"model_size_in_billions": 78,
|
|
6900
|
+
"quantizations": [
|
|
6901
|
+
"4-bit",
|
|
6902
|
+
"8-bit",
|
|
6903
|
+
"none"
|
|
6904
|
+
],
|
|
6905
|
+
"model_id": "OpenGVLab/InternVL2_5-78B"
|
|
6906
|
+
},
|
|
6907
|
+
{
|
|
6908
|
+
"model_format": "awq",
|
|
6909
|
+
"model_size_in_billions": 78,
|
|
6910
|
+
"quantizations": [
|
|
6911
|
+
"Int4"
|
|
6912
|
+
],
|
|
6913
|
+
"model_id": "OpenGVLab/InternVL2_5-78B-AWQ"
|
|
6914
|
+
}
|
|
6915
|
+
],
|
|
6916
|
+
"chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
|
|
6917
|
+
"stop_token_ids": [],
|
|
6918
|
+
"stop": []
|
|
6919
|
+
},
|
|
6775
6920
|
{
|
|
6776
6921
|
"version": 1,
|
|
6777
6922
|
"context_length": 8192,
|
|
@@ -7125,6 +7270,91 @@
|
|
|
7125
7270
|
"<|endoftext|>"
|
|
7126
7271
|
]
|
|
7127
7272
|
},
|
|
7273
|
+
{
|
|
7274
|
+
"version":1,
|
|
7275
|
+
"context_length":128000,
|
|
7276
|
+
"model_name":"qwen2.5-vl-instruct",
|
|
7277
|
+
"model_lang":[
|
|
7278
|
+
"en",
|
|
7279
|
+
"zh"
|
|
7280
|
+
],
|
|
7281
|
+
"model_ability":[
|
|
7282
|
+
"chat",
|
|
7283
|
+
"vision"
|
|
7284
|
+
],
|
|
7285
|
+
"model_description":"Qwen2.5-VL: Qwen2.5-VL is the latest version of the vision language models in the Qwen model familities.",
|
|
7286
|
+
"model_specs":[
|
|
7287
|
+
{
|
|
7288
|
+
"model_format":"pytorch",
|
|
7289
|
+
"model_size_in_billions":3,
|
|
7290
|
+
"quantizations":[
|
|
7291
|
+
"none"
|
|
7292
|
+
],
|
|
7293
|
+
"model_id":"Qwen/Qwen2.5-VL-3B-Instruct"
|
|
7294
|
+
},
|
|
7295
|
+
{
|
|
7296
|
+
"model_format":"pytorch",
|
|
7297
|
+
"model_size_in_billions":7,
|
|
7298
|
+
"quantizations":[
|
|
7299
|
+
"none"
|
|
7300
|
+
],
|
|
7301
|
+
"model_id":"Qwen/Qwen2.5-VL-7B-Instruct"
|
|
7302
|
+
},
|
|
7303
|
+
{
|
|
7304
|
+
"model_format":"pytorch",
|
|
7305
|
+
"model_size_in_billions":72,
|
|
7306
|
+
"quantizations":[
|
|
7307
|
+
"none"
|
|
7308
|
+
],
|
|
7309
|
+
"model_id":"Qwen/Qwen2.5-VL-72B-Instruct"
|
|
7310
|
+
},
|
|
7311
|
+
{
|
|
7312
|
+
"model_format":"mlx",
|
|
7313
|
+
"model_size_in_billions":3,
|
|
7314
|
+
"quantizations":[
|
|
7315
|
+
"3bit",
|
|
7316
|
+
"4bit",
|
|
7317
|
+
"6bit",
|
|
7318
|
+
"8bit",
|
|
7319
|
+
"bf16"
|
|
7320
|
+
],
|
|
7321
|
+
"model_id":"mlx-community/Qwen2.5-VL-3B-Instruct-{quantization}"
|
|
7322
|
+
},
|
|
7323
|
+
{
|
|
7324
|
+
"model_format":"mlx",
|
|
7325
|
+
"model_size_in_billions":7,
|
|
7326
|
+
"quantizations":[
|
|
7327
|
+
"3bit",
|
|
7328
|
+
"4bit",
|
|
7329
|
+
"6bit",
|
|
7330
|
+
"8bit",
|
|
7331
|
+
"bf16"
|
|
7332
|
+
],
|
|
7333
|
+
"model_id":"mlx-community/Qwen2.5-VL-7B-Instruct-{quantization}"
|
|
7334
|
+
},
|
|
7335
|
+
{
|
|
7336
|
+
"model_format":"mlx",
|
|
7337
|
+
"model_size_in_billions":72,
|
|
7338
|
+
"quantizations":[
|
|
7339
|
+
"3bit",
|
|
7340
|
+
"4bit",
|
|
7341
|
+
"6bit",
|
|
7342
|
+
"8bit",
|
|
7343
|
+
"bf16"
|
|
7344
|
+
],
|
|
7345
|
+
"model_id":"mlx-community/Qwen2.5-VL-72B-Instruct-{quantization}"
|
|
7346
|
+
}
|
|
7347
|
+
],
|
|
7348
|
+
"chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
|
|
7349
|
+
"stop_token_ids": [
|
|
7350
|
+
151645,
|
|
7351
|
+
151643
|
|
7352
|
+
],
|
|
7353
|
+
"stop": [
|
|
7354
|
+
"<|im_end|>",
|
|
7355
|
+
"<|endoftext|>"
|
|
7356
|
+
]
|
|
7357
|
+
},
|
|
7128
7358
|
{
|
|
7129
7359
|
"version": 1,
|
|
7130
7360
|
"context_length": 32768,
|
|
@@ -7212,7 +7442,7 @@
|
|
|
7212
7442
|
"zh"
|
|
7213
7443
|
],
|
|
7214
7444
|
"model_ability":[
|
|
7215
|
-
"
|
|
7445
|
+
"generate",
|
|
7216
7446
|
"audio"
|
|
7217
7447
|
],
|
|
7218
7448
|
"model_description":"Qwen2-Audio: A large-scale audio-language model which is capable of accepting various audio signal inputs and performing audio analysis or direct textual responses with regard to speech instructions.",
|
|
@@ -7335,57 +7565,421 @@
|
|
|
7335
7565
|
"model_description": "DeepSeek-V2-Chat-0628 is an improved version of DeepSeek-V2-Chat. ",
|
|
7336
7566
|
"model_specs": [
|
|
7337
7567
|
{
|
|
7338
|
-
"model_format": "pytorch",
|
|
7339
|
-
"model_size_in_billions": 236,
|
|
7568
|
+
"model_format": "pytorch",
|
|
7569
|
+
"model_size_in_billions": 236,
|
|
7570
|
+
"quantizations": [
|
|
7571
|
+
"4-bit",
|
|
7572
|
+
"8-bit",
|
|
7573
|
+
"none"
|
|
7574
|
+
],
|
|
7575
|
+
"model_id": "deepseek-ai/DeepSeek-V2-Chat-0628",
|
|
7576
|
+
"model_revision": "5d09e272c2b223830f4e84359cd9dd047a5d7c78"
|
|
7577
|
+
}
|
|
7578
|
+
],
|
|
7579
|
+
"chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{{ '<|begin▁of▁sentence|>' }}{% for message in messages %}{% if message['role'] == 'user' %}{{ '<|User|>' + message['content'] }}{% elif message['role'] == 'assistant' %}{{ '<|Assistant|>' + message['content'] + '<|end▁of▁sentence|>' }}{% elif message['role'] == 'system' %}{{ message['content'] + '\n\n' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|Assistant|>' }}{% endif %}",
|
|
7580
|
+
"stop_token_ids": [
|
|
7581
|
+
100001
|
|
7582
|
+
],
|
|
7583
|
+
"stop": [
|
|
7584
|
+
"<|end▁of▁sentence|>"
|
|
7585
|
+
]
|
|
7586
|
+
},
|
|
7587
|
+
{
|
|
7588
|
+
"version": 1,
|
|
7589
|
+
"context_length": 128000,
|
|
7590
|
+
"model_name": "deepseek-v2.5",
|
|
7591
|
+
"model_lang": [
|
|
7592
|
+
"en",
|
|
7593
|
+
"zh"
|
|
7594
|
+
],
|
|
7595
|
+
"model_ability": [
|
|
7596
|
+
"chat"
|
|
7597
|
+
],
|
|
7598
|
+
"model_description": "DeepSeek-V2.5 is an upgraded version that combines DeepSeek-V2-Chat and DeepSeek-Coder-V2-Instruct. The new model integrates the general and coding abilities of the two previous versions.",
|
|
7599
|
+
"model_specs": [
|
|
7600
|
+
{
|
|
7601
|
+
"model_format": "pytorch",
|
|
7602
|
+
"model_size_in_billions": 236,
|
|
7603
|
+
"quantizations": [
|
|
7604
|
+
"4-bit",
|
|
7605
|
+
"8-bit",
|
|
7606
|
+
"none"
|
|
7607
|
+
],
|
|
7608
|
+
"model_id": "deepseek-ai/DeepSeek-V2.5",
|
|
7609
|
+
"model_revision": "24b08cb750e0c2757de112d2e16327cb21ed4833"
|
|
7610
|
+
}
|
|
7611
|
+
],
|
|
7612
|
+
"chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %} {%- if message['role'] == 'system' %} {% set ns.system_prompt = message['content'] %} {%- endif %}{%- endfor %}{{'<|begin▁of▁sentence|>'}}{{ns.system_prompt}}{%- for message in messages %} {%- if message['role'] == 'user' %} {%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}} {%- endif %} {%- if message['role'] == 'assistant' and message['content'] is none %} {%- set ns.is_tool = false -%} {%- for tool in message['tool_calls']%} {%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}} {%- set ns.is_first = true -%} {%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}} {%- endif %} {%- endfor %} {%- endif %} {%- if message['role'] == 'assistant' and message['content'] is not none %} {%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}} {%- set ns.is_tool = false -%} {%- else %}{{'<|Assistant|>' + message['content'] + '<|end▁of▁sentence|>'}} {%- endif %} {%- endif %} {%- if message['role'] == 'tool' %} {%- set ns.is_tool = true -%} {%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}} {%- set ns.is_output_first = false %} {%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}} {%- endif %} {%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>'}}{% endif %}",
|
|
7613
|
+
"stop_token_ids": [
|
|
7614
|
+
100001
|
|
7615
|
+
],
|
|
7616
|
+
"stop": [
|
|
7617
|
+
"<|end▁of▁sentence|>"
|
|
7618
|
+
]
|
|
7619
|
+
},
|
|
7620
|
+
{
|
|
7621
|
+
"version": 1,
|
|
7622
|
+
"context_length": 163840,
|
|
7623
|
+
"model_name": "deepseek-v3",
|
|
7624
|
+
"model_lang": [
|
|
7625
|
+
"en",
|
|
7626
|
+
"zh"
|
|
7627
|
+
],
|
|
7628
|
+
"model_ability": [
|
|
7629
|
+
"chat"
|
|
7630
|
+
],
|
|
7631
|
+
"model_description": "DeepSeek-V3, a strong Mixture-of-Experts (MoE) language model with 671B total parameters with 37B activated for each token. ",
|
|
7632
|
+
"model_specs": [
|
|
7633
|
+
{
|
|
7634
|
+
"model_format": "pytorch",
|
|
7635
|
+
"model_size_in_billions": 671,
|
|
7636
|
+
"quantizations": [
|
|
7637
|
+
"4-bit",
|
|
7638
|
+
"8-bit",
|
|
7639
|
+
"none"
|
|
7640
|
+
],
|
|
7641
|
+
"model_id": "deepseek-ai/DeepSeek-V3",
|
|
7642
|
+
"model_revision": "1d044fd82b15f1cedb197a288e50cc96a2c27205"
|
|
7643
|
+
},
|
|
7644
|
+
{
|
|
7645
|
+
"model_format": "awq",
|
|
7646
|
+
"model_size_in_billions": 671,
|
|
7647
|
+
"quantizations": [
|
|
7648
|
+
"Int4"
|
|
7649
|
+
],
|
|
7650
|
+
"model_id": "cognitivecomputations/DeepSeek-V3-AWQ"
|
|
7651
|
+
},
|
|
7652
|
+
{
|
|
7653
|
+
"model_format": "ggufv2",
|
|
7654
|
+
"model_size_in_billions": 671,
|
|
7655
|
+
"quantizations": [
|
|
7656
|
+
"Q2_K_L",
|
|
7657
|
+
"Q2_K_XS",
|
|
7658
|
+
"Q3_K_M",
|
|
7659
|
+
"Q4_K_M",
|
|
7660
|
+
"Q5_K_M",
|
|
7661
|
+
"Q6_K",
|
|
7662
|
+
"Q8_0"
|
|
7663
|
+
],
|
|
7664
|
+
"model_id": "unsloth/DeepSeek-V3-GGUF",
|
|
7665
|
+
"model_file_name_template": "DeepSeek-V3-{quantization}/DeepSeek-V3-{quantization}.gguf",
|
|
7666
|
+
"model_file_name_split_template": "DeepSeek-V3-{quantization}/DeepSeek-V3-{quantization}-{part}.gguf",
|
|
7667
|
+
"quantization_parts": {
|
|
7668
|
+
"Q2_K_L": [
|
|
7669
|
+
"00001-of-00005",
|
|
7670
|
+
"00002-of-00005",
|
|
7671
|
+
"00003-of-00005",
|
|
7672
|
+
"00004-of-00005",
|
|
7673
|
+
"00005-of-00005"
|
|
7674
|
+
],
|
|
7675
|
+
"Q2_K_XS": [
|
|
7676
|
+
"00001-of-00005",
|
|
7677
|
+
"00002-of-00005",
|
|
7678
|
+
"00003-of-00005",
|
|
7679
|
+
"00004-of-00005",
|
|
7680
|
+
"00005-of-00005"
|
|
7681
|
+
],
|
|
7682
|
+
"Q3_K_M": [
|
|
7683
|
+
"00001-of-00007",
|
|
7684
|
+
"00002-of-00007",
|
|
7685
|
+
"00003-of-00007",
|
|
7686
|
+
"00004-of-00007",
|
|
7687
|
+
"00005-of-00007",
|
|
7688
|
+
"00006-of-00007",
|
|
7689
|
+
"00007-of-00007"
|
|
7690
|
+
],
|
|
7691
|
+
"Q4_K_M": [
|
|
7692
|
+
"00001-of-00009",
|
|
7693
|
+
"00002-of-00009",
|
|
7694
|
+
"00003-of-00009",
|
|
7695
|
+
"00004-of-00009",
|
|
7696
|
+
"00005-of-00009",
|
|
7697
|
+
"00006-of-00009",
|
|
7698
|
+
"00007-of-00009",
|
|
7699
|
+
"00008-of-00009",
|
|
7700
|
+
"00009-of-00009"
|
|
7701
|
+
],
|
|
7702
|
+
"Q5_K_M": [
|
|
7703
|
+
"00001-of-00010",
|
|
7704
|
+
"00002-of-00010",
|
|
7705
|
+
"00003-of-00010",
|
|
7706
|
+
"00004-of-00010",
|
|
7707
|
+
"00005-of-00010",
|
|
7708
|
+
"00006-of-00010",
|
|
7709
|
+
"00007-of-00010",
|
|
7710
|
+
"00008-of-00010",
|
|
7711
|
+
"00009-of-00010",
|
|
7712
|
+
"00010-of-00010"
|
|
7713
|
+
],
|
|
7714
|
+
"Q6_K": [
|
|
7715
|
+
"00001-of-00012",
|
|
7716
|
+
"00002-of-00012",
|
|
7717
|
+
"00003-of-00012",
|
|
7718
|
+
"00004-of-00012",
|
|
7719
|
+
"00005-of-00012",
|
|
7720
|
+
"00006-of-00012",
|
|
7721
|
+
"00007-of-00012",
|
|
7722
|
+
"00008-of-00012",
|
|
7723
|
+
"00009-of-00012",
|
|
7724
|
+
"00010-of-00012",
|
|
7725
|
+
"00011-of-00012",
|
|
7726
|
+
"00012-of-00012"
|
|
7727
|
+
],
|
|
7728
|
+
"Q8_0": [
|
|
7729
|
+
"00001-of-00016",
|
|
7730
|
+
"00002-of-00016",
|
|
7731
|
+
"00003-of-00016",
|
|
7732
|
+
"00004-of-00016",
|
|
7733
|
+
"00005-of-00016",
|
|
7734
|
+
"00006-of-00016",
|
|
7735
|
+
"00007-of-00016",
|
|
7736
|
+
"00008-of-00016",
|
|
7737
|
+
"00009-of-00016",
|
|
7738
|
+
"00010-of-00016",
|
|
7739
|
+
"00011-of-00016",
|
|
7740
|
+
"00012-of-00016",
|
|
7741
|
+
"00013-of-00016",
|
|
7742
|
+
"00014-of-00016",
|
|
7743
|
+
"00015-of-00016",
|
|
7744
|
+
"00016-of-00016"
|
|
7745
|
+
]
|
|
7746
|
+
}
|
|
7747
|
+
},
|
|
7748
|
+
{
|
|
7749
|
+
"model_format": "mlx",
|
|
7750
|
+
"model_size_in_billions": 671,
|
|
7751
|
+
"quantizations": [
|
|
7752
|
+
"3bit",
|
|
7753
|
+
"4bit"
|
|
7754
|
+
],
|
|
7755
|
+
"model_id": "mlx-community/DeepSeek-V3-{quantization}"
|
|
7756
|
+
}
|
|
7757
|
+
],
|
|
7758
|
+
"chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='', is_first_sp=true) %}{%- for message in messages %}{%- if message['role'] == 'system' %}{%- if ns.is_first_sp %}{% set ns.system_prompt = ns.system_prompt + message['content'] %}{% set ns.is_first_sp = false %}{%- else %}{% set ns.system_prompt = ns.system_prompt + '\\n\\n' + message['content'] %}{%- endif %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{{'<|Assistant|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>'}}{% endif %}",
|
|
7759
|
+
"stop_token_ids": [
|
|
7760
|
+
1
|
|
7761
|
+
],
|
|
7762
|
+
"stop": [
|
|
7763
|
+
"<|end▁of▁sentence|>"
|
|
7764
|
+
]
|
|
7765
|
+
},
|
|
7766
|
+
{
|
|
7767
|
+
"version": 1,
|
|
7768
|
+
"context_length": 163840,
|
|
7769
|
+
"model_name": "deepseek-r1",
|
|
7770
|
+
"model_lang": [
|
|
7771
|
+
"en",
|
|
7772
|
+
"zh"
|
|
7773
|
+
],
|
|
7774
|
+
"model_ability": [
|
|
7775
|
+
"chat",
|
|
7776
|
+
"reasoning"
|
|
7777
|
+
],
|
|
7778
|
+
"model_description": "DeepSeek-R1, which incorporates cold-start data before RL. DeepSeek-R1 achieves performance comparable to OpenAI-o1 across math, code, and reasoning tasks.",
|
|
7779
|
+
"model_specs": [
|
|
7780
|
+
{
|
|
7781
|
+
"model_format": "pytorch",
|
|
7782
|
+
"model_size_in_billions": 671,
|
|
7783
|
+
"quantizations": [
|
|
7784
|
+
"4-bit",
|
|
7785
|
+
"8-bit",
|
|
7786
|
+
"none"
|
|
7787
|
+
],
|
|
7788
|
+
"model_id": "deepseek-ai/DeepSeek-R1",
|
|
7789
|
+
"model_revision": "8a58a132790c9935686eb97f042afa8013451c9f"
|
|
7790
|
+
},
|
|
7791
|
+
{
|
|
7792
|
+
"model_format": "awq",
|
|
7793
|
+
"model_size_in_billions": 671,
|
|
7794
|
+
"quantizations": [
|
|
7795
|
+
"Int4"
|
|
7796
|
+
],
|
|
7797
|
+
"model_id": "cognitivecomputations/DeepSeek-R1-AWQ"
|
|
7798
|
+
},
|
|
7799
|
+
{
|
|
7800
|
+
"model_format": "ggufv2",
|
|
7801
|
+
"model_size_in_billions": 671,
|
|
7802
|
+
"quantizations": [
|
|
7803
|
+
"UD-IQ1_S",
|
|
7804
|
+
"UD-IQ1_M",
|
|
7805
|
+
"UD-IQ2_XXS",
|
|
7806
|
+
"UD-Q2_K_XL",
|
|
7807
|
+
"Q2_K",
|
|
7808
|
+
"Q2_K_L",
|
|
7809
|
+
"Q2_K_XS",
|
|
7810
|
+
"Q3_K_M",
|
|
7811
|
+
"Q4_K_M",
|
|
7812
|
+
"Q5_K_M",
|
|
7813
|
+
"Q6_K",
|
|
7814
|
+
"Q8_0",
|
|
7815
|
+
"BF16"
|
|
7816
|
+
],
|
|
7817
|
+
"model_id": "unsloth/DeepSeek-R1-GGUF",
|
|
7818
|
+
"model_file_name_template": "DeepSeek-R1-{quantization}/DeepSeek-R1-{quantization}.gguf",
|
|
7819
|
+
"model_file_name_split_template": "DeepSeek-R1-{quantization}/DeepSeek-R1-{quantization}-{part}.gguf",
|
|
7820
|
+
"quantization_parts": {
|
|
7821
|
+
"UD-IQ1_S": [
|
|
7822
|
+
"00001-of-00003",
|
|
7823
|
+
"00002-of-00003",
|
|
7824
|
+
"00003-of-00003"
|
|
7825
|
+
],
|
|
7826
|
+
"UD-IQ1_M": [
|
|
7827
|
+
"00001-of-00004",
|
|
7828
|
+
"00002-of-00004",
|
|
7829
|
+
"00003-of-00004",
|
|
7830
|
+
"00004-of-00004"
|
|
7831
|
+
],
|
|
7832
|
+
"UD-IQ2_XXS": [
|
|
7833
|
+
"00001-of-00004",
|
|
7834
|
+
"00002-of-00004",
|
|
7835
|
+
"00003-of-00004",
|
|
7836
|
+
"00004-of-00004"
|
|
7837
|
+
],
|
|
7838
|
+
"UD-Q2_K_XL": [
|
|
7839
|
+
"00001-of-00005",
|
|
7840
|
+
"00002-of-00005",
|
|
7841
|
+
"00003-of-00005",
|
|
7842
|
+
"00004-of-00005",
|
|
7843
|
+
"00005-of-00005"
|
|
7844
|
+
],
|
|
7845
|
+
"Q2_K": [
|
|
7846
|
+
"00001-of-00005",
|
|
7847
|
+
"00002-of-00005",
|
|
7848
|
+
"00003-of-00005",
|
|
7849
|
+
"00004-of-00005",
|
|
7850
|
+
"00005-of-00005"
|
|
7851
|
+
],
|
|
7852
|
+
"Q2_K_L": [
|
|
7853
|
+
"00001-of-00005",
|
|
7854
|
+
"00002-of-00005",
|
|
7855
|
+
"00003-of-00005",
|
|
7856
|
+
"00004-of-00005",
|
|
7857
|
+
"00005-of-00005"
|
|
7858
|
+
],
|
|
7859
|
+
"Q2_K_XS": [
|
|
7860
|
+
"00001-of-00005",
|
|
7861
|
+
"00002-of-00005",
|
|
7862
|
+
"00003-of-00005",
|
|
7863
|
+
"00004-of-00005",
|
|
7864
|
+
"00005-of-00005"
|
|
7865
|
+
],
|
|
7866
|
+
"Q3_K_M": [
|
|
7867
|
+
"00001-of-00007",
|
|
7868
|
+
"00002-of-00007",
|
|
7869
|
+
"00003-of-00007",
|
|
7870
|
+
"00004-of-00007",
|
|
7871
|
+
"00005-of-00007",
|
|
7872
|
+
"00006-of-00007",
|
|
7873
|
+
"00007-of-00007"
|
|
7874
|
+
],
|
|
7875
|
+
"Q4_K_M": [
|
|
7876
|
+
"00001-of-00009",
|
|
7877
|
+
"00002-of-00009",
|
|
7878
|
+
"00003-of-00009",
|
|
7879
|
+
"00004-of-00009",
|
|
7880
|
+
"00005-of-00009",
|
|
7881
|
+
"00006-of-00009",
|
|
7882
|
+
"00007-of-00009",
|
|
7883
|
+
"00008-of-00009",
|
|
7884
|
+
"00009-of-00009"
|
|
7885
|
+
],
|
|
7886
|
+
"Q5_K_M": [
|
|
7887
|
+
"00001-of-00010",
|
|
7888
|
+
"00002-of-00010",
|
|
7889
|
+
"00003-of-00010",
|
|
7890
|
+
"00004-of-00010",
|
|
7891
|
+
"00005-of-00010",
|
|
7892
|
+
"00006-of-00010",
|
|
7893
|
+
"00007-of-00010",
|
|
7894
|
+
"00008-of-00010",
|
|
7895
|
+
"00009-of-00010",
|
|
7896
|
+
"00010-of-00010"
|
|
7897
|
+
],
|
|
7898
|
+
"Q6_K": [
|
|
7899
|
+
"00001-of-00012",
|
|
7900
|
+
"00002-of-00012",
|
|
7901
|
+
"00003-of-00012",
|
|
7902
|
+
"00004-of-00012",
|
|
7903
|
+
"00005-of-00012",
|
|
7904
|
+
"00006-of-00012",
|
|
7905
|
+
"00007-of-00012",
|
|
7906
|
+
"00008-of-00012",
|
|
7907
|
+
"00009-of-00012",
|
|
7908
|
+
"00010-of-00012",
|
|
7909
|
+
"00011-of-00012",
|
|
7910
|
+
"00012-of-00012"
|
|
7911
|
+
],
|
|
7912
|
+
"Q8_0": [
|
|
7913
|
+
"00001-of-00015",
|
|
7914
|
+
"00002-of-00015",
|
|
7915
|
+
"00003-of-00015",
|
|
7916
|
+
"00004-of-00015",
|
|
7917
|
+
"00005-of-00015",
|
|
7918
|
+
"00006-of-00015",
|
|
7919
|
+
"00007-of-00015",
|
|
7920
|
+
"00008-of-00015",
|
|
7921
|
+
"00009-of-00015",
|
|
7922
|
+
"00010-of-00015",
|
|
7923
|
+
"00011-of-00015",
|
|
7924
|
+
"00012-of-00015",
|
|
7925
|
+
"00013-of-00015",
|
|
7926
|
+
"00014-of-00015",
|
|
7927
|
+
"00015-of-00015"
|
|
7928
|
+
],
|
|
7929
|
+
"BF16": [
|
|
7930
|
+
"00001-of-00030",
|
|
7931
|
+
"00002-of-00030",
|
|
7932
|
+
"00003-of-00030",
|
|
7933
|
+
"00004-of-00030",
|
|
7934
|
+
"00005-of-00030",
|
|
7935
|
+
"00006-of-00030",
|
|
7936
|
+
"00007-of-00030",
|
|
7937
|
+
"00008-of-00030",
|
|
7938
|
+
"00009-of-00030",
|
|
7939
|
+
"00010-of-00030",
|
|
7940
|
+
"00011-of-00030",
|
|
7941
|
+
"00012-of-00030",
|
|
7942
|
+
"00013-of-00030",
|
|
7943
|
+
"00014-of-00030",
|
|
7944
|
+
"00015-of-00030",
|
|
7945
|
+
"00016-of-00030",
|
|
7946
|
+
"00017-of-00030",
|
|
7947
|
+
"00018-of-00030",
|
|
7948
|
+
"00019-of-00030",
|
|
7949
|
+
"00020-of-00030",
|
|
7950
|
+
"00021-of-00030",
|
|
7951
|
+
"00022-of-00030",
|
|
7952
|
+
"00023-of-00030",
|
|
7953
|
+
"00024-of-00030",
|
|
7954
|
+
"00025-of-00030",
|
|
7955
|
+
"00026-of-00030",
|
|
7956
|
+
"00027-of-00030",
|
|
7957
|
+
"00028-of-00030",
|
|
7958
|
+
"00029-of-00030",
|
|
7959
|
+
"00030-of-00030"
|
|
7960
|
+
]
|
|
7961
|
+
}
|
|
7962
|
+
},
|
|
7963
|
+
{
|
|
7964
|
+
"model_format": "mlx",
|
|
7965
|
+
"model_size_in_billions": 671,
|
|
7340
7966
|
"quantizations": [
|
|
7341
|
-
"
|
|
7342
|
-
"
|
|
7343
|
-
"
|
|
7967
|
+
"2bit",
|
|
7968
|
+
"3bit",
|
|
7969
|
+
"4bit"
|
|
7344
7970
|
],
|
|
7345
|
-
"model_id": "
|
|
7346
|
-
"model_revision": "5d09e272c2b223830f4e84359cd9dd047a5d7c78"
|
|
7971
|
+
"model_id": "mlx-community/DeepSeek-R1-{quantization}"
|
|
7347
7972
|
}
|
|
7348
7973
|
],
|
|
7349
|
-
"chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{{ '
|
|
7974
|
+
"chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|><think>\\n'}}{% endif %}",
|
|
7350
7975
|
"stop_token_ids": [
|
|
7351
|
-
|
|
7976
|
+
1
|
|
7352
7977
|
],
|
|
7353
7978
|
"stop": [
|
|
7354
7979
|
"<|end▁of▁sentence|>"
|
|
7355
|
-
]
|
|
7356
|
-
},
|
|
7357
|
-
{
|
|
7358
|
-
"version": 1,
|
|
7359
|
-
"context_length": 128000,
|
|
7360
|
-
"model_name": "deepseek-v2.5",
|
|
7361
|
-
"model_lang": [
|
|
7362
|
-
"en",
|
|
7363
|
-
"zh"
|
|
7364
|
-
],
|
|
7365
|
-
"model_ability": [
|
|
7366
|
-
"chat"
|
|
7367
|
-
],
|
|
7368
|
-
"model_description": "DeepSeek-V2.5 is an upgraded version that combines DeepSeek-V2-Chat and DeepSeek-Coder-V2-Instruct. The new model integrates the general and coding abilities of the two previous versions.",
|
|
7369
|
-
"model_specs": [
|
|
7370
|
-
{
|
|
7371
|
-
"model_format": "pytorch",
|
|
7372
|
-
"model_size_in_billions": 236,
|
|
7373
|
-
"quantizations": [
|
|
7374
|
-
"4-bit",
|
|
7375
|
-
"8-bit",
|
|
7376
|
-
"none"
|
|
7377
|
-
],
|
|
7378
|
-
"model_id": "deepseek-ai/DeepSeek-V2.5",
|
|
7379
|
-
"model_revision": "24b08cb750e0c2757de112d2e16327cb21ed4833"
|
|
7380
|
-
}
|
|
7381
|
-
],
|
|
7382
|
-
"chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %} {%- if message['role'] == 'system' %} {% set ns.system_prompt = message['content'] %} {%- endif %}{%- endfor %}{{'<|begin▁of▁sentence|>'}}{{ns.system_prompt}}{%- for message in messages %} {%- if message['role'] == 'user' %} {%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}} {%- endif %} {%- if message['role'] == 'assistant' and message['content'] is none %} {%- set ns.is_tool = false -%} {%- for tool in message['tool_calls']%} {%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}} {%- set ns.is_first = true -%} {%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}} {%- endif %} {%- endfor %} {%- endif %} {%- if message['role'] == 'assistant' and message['content'] is not none %} {%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}} {%- set ns.is_tool = false -%} {%- else %}{{'<|Assistant|>' + message['content'] + '<|end▁of▁sentence|>'}} {%- endif %} {%- endif %} {%- if message['role'] == 'tool' %} {%- set ns.is_tool = true -%} {%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}} {%- set ns.is_output_first = false %} {%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}} {%- endif %} {%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>'}}{% endif %}",
|
|
7383
|
-
"stop_token_ids": [
|
|
7384
|
-
100001
|
|
7385
7980
|
],
|
|
7386
|
-
"
|
|
7387
|
-
|
|
7388
|
-
]
|
|
7981
|
+
"reasoning_start_tag": "<think>",
|
|
7982
|
+
"reasoning_end_tag": "</think>"
|
|
7389
7983
|
},
|
|
7390
7984
|
{
|
|
7391
7985
|
"version": 1,
|
|
@@ -8725,7 +9319,8 @@
|
|
|
8725
9319
|
"zh"
|
|
8726
9320
|
],
|
|
8727
9321
|
"model_ability": [
|
|
8728
|
-
"chat"
|
|
9322
|
+
"chat",
|
|
9323
|
+
"reasoning"
|
|
8729
9324
|
],
|
|
8730
9325
|
"model_description": "deepseek-r1-distill-qwen is distilled from DeepSeek-R1 based on Qwen",
|
|
8731
9326
|
"model_specs": [
|
|
@@ -8929,13 +9524,163 @@
|
|
|
8929
9524
|
"model_id": "mlx-community/DeepSeek-R1-Distill-Qwen-32B-{quantization}"
|
|
8930
9525
|
}
|
|
8931
9526
|
],
|
|
8932
|
-
"chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and
|
|
9527
|
+
"chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='', is_first_sp=true) %}{%- for message in messages %}{%- if message['role'] == 'system' %}{%- if ns.is_first_sp %}{% set ns.system_prompt = ns.system_prompt + message['content'] %}{% set ns.is_first_sp = false %}{%- else %}{% set ns.system_prompt = ns.system_prompt + '\\n\\n' + message['content'] %}{%- endif %}{%- endif %}{%- endfor %}{{ bos_token }}{{ ns.system_prompt }}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and 'tool_calls' in message %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls'] %}{%- if not ns.is_first %}{%- if message['content'] is none %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- else %}{{'<|Assistant|>' + message['content'] + '<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- endif %}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- endif %}{%- endfor %}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- if message['role'] == 'assistant' and 'tool_calls' not in message %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|><think>\\n'}}{% endif %}",
|
|
8933
9528
|
"stop_token_ids": [
|
|
8934
9529
|
151643
|
|
8935
9530
|
],
|
|
8936
9531
|
"stop": [
|
|
8937
9532
|
"<|end▁of▁sentence|>"
|
|
8938
|
-
]
|
|
9533
|
+
],
|
|
9534
|
+
"reasoning_start_tag": "<think>",
|
|
9535
|
+
"reasoning_end_tag": "</think>"
|
|
9536
|
+
},
|
|
9537
|
+
{
|
|
9538
|
+
"version": 1,
|
|
9539
|
+
"context_length": 131072,
|
|
9540
|
+
"model_name": "deepseek-r1-distill-llama",
|
|
9541
|
+
"model_lang": [
|
|
9542
|
+
"en",
|
|
9543
|
+
"zh"
|
|
9544
|
+
],
|
|
9545
|
+
"model_ability": [
|
|
9546
|
+
"chat",
|
|
9547
|
+
"reasoning"
|
|
9548
|
+
],
|
|
9549
|
+
"model_description": "deepseek-r1-distill-llama is distilled from DeepSeek-R1 based on Llama",
|
|
9550
|
+
"model_specs": [
|
|
9551
|
+
{
|
|
9552
|
+
"model_format": "pytorch",
|
|
9553
|
+
"model_size_in_billions": 8,
|
|
9554
|
+
"quantizations": [
|
|
9555
|
+
"4-bit",
|
|
9556
|
+
"8-bit",
|
|
9557
|
+
"none"
|
|
9558
|
+
],
|
|
9559
|
+
"model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
|
|
9560
|
+
},
|
|
9561
|
+
{
|
|
9562
|
+
"model_format": "awq",
|
|
9563
|
+
"model_size_in_billions": 8,
|
|
9564
|
+
"quantizations": [
|
|
9565
|
+
"Int4"
|
|
9566
|
+
],
|
|
9567
|
+
"model_id": "jakiAJK/DeepSeek-R1-Distill-Llama-8B_AWQ"
|
|
9568
|
+
},
|
|
9569
|
+
{
|
|
9570
|
+
"model_format": "gptq",
|
|
9571
|
+
"model_size_in_billions": 8,
|
|
9572
|
+
"quantizations": [
|
|
9573
|
+
"Int4"
|
|
9574
|
+
],
|
|
9575
|
+
"model_id": "jakiAJK/DeepSeek-R1-Distill-Llama-8B_GPTQ-int4"
|
|
9576
|
+
},
|
|
9577
|
+
{
|
|
9578
|
+
"model_format": "ggufv2",
|
|
9579
|
+
"model_size_in_billions": "1_5",
|
|
9580
|
+
"quantizations": [
|
|
9581
|
+
"Q2_K",
|
|
9582
|
+
"Q2_K_L",
|
|
9583
|
+
"Q3_K_M",
|
|
9584
|
+
"Q4_K_M",
|
|
9585
|
+
"Q5_K_M",
|
|
9586
|
+
"Q6_K",
|
|
9587
|
+
"Q8_0",
|
|
9588
|
+
"F16"
|
|
9589
|
+
],
|
|
9590
|
+
"model_id": "unsloth/DeepSeek-R1-Distill-Llama-8B-GGUF",
|
|
9591
|
+
"model_file_name_template": "DeepSeek-R1-Distill-Llama-8B-{quantization}.gguf"
|
|
9592
|
+
},
|
|
9593
|
+
{
|
|
9594
|
+
"model_format": "mlx",
|
|
9595
|
+
"model_size_in_billions": 8,
|
|
9596
|
+
"quantizations": [
|
|
9597
|
+
"3bit",
|
|
9598
|
+
"4bit",
|
|
9599
|
+
"6bit",
|
|
9600
|
+
"8bit",
|
|
9601
|
+
"bf16"
|
|
9602
|
+
],
|
|
9603
|
+
"model_id": "mlx-community/DeepSeek-R1-Distill-Llama-8B-{quantization}"
|
|
9604
|
+
},
|
|
9605
|
+
{
|
|
9606
|
+
"model_format": "pytorch",
|
|
9607
|
+
"model_size_in_billions": 70,
|
|
9608
|
+
"quantizations": [
|
|
9609
|
+
"4-bit",
|
|
9610
|
+
"8-bit",
|
|
9611
|
+
"none"
|
|
9612
|
+
],
|
|
9613
|
+
"model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B"
|
|
9614
|
+
},
|
|
9615
|
+
{
|
|
9616
|
+
"model_format": "awq",
|
|
9617
|
+
"model_size_in_billions": 70,
|
|
9618
|
+
"quantizations": [
|
|
9619
|
+
"Int4"
|
|
9620
|
+
],
|
|
9621
|
+
"model_id": "casperhansen/deepseek-r1-distill-llama-70b-awq"
|
|
9622
|
+
},
|
|
9623
|
+
{
|
|
9624
|
+
"model_format": "gptq",
|
|
9625
|
+
"model_size_in_billions": 70,
|
|
9626
|
+
"quantizations": [
|
|
9627
|
+
"Int4"
|
|
9628
|
+
],
|
|
9629
|
+
"model_id": "empirischtech/DeepSeek-R1-Distill-Llama-70B-gptq-4bit"
|
|
9630
|
+
},
|
|
9631
|
+
{
|
|
9632
|
+
"model_format": "ggufv2",
|
|
9633
|
+
"model_size_in_billions": 70,
|
|
9634
|
+
"quantizations": [
|
|
9635
|
+
"Q2_K",
|
|
9636
|
+
"Q2_K_L",
|
|
9637
|
+
"Q3_K_M",
|
|
9638
|
+
"Q4_K_M",
|
|
9639
|
+
"Q5_K_M",
|
|
9640
|
+
"Q6_K",
|
|
9641
|
+
"Q8_0",
|
|
9642
|
+
"F16"
|
|
9643
|
+
],
|
|
9644
|
+
"quantization_parts": {
|
|
9645
|
+
"Q6_K": [
|
|
9646
|
+
"00001-of-00002",
|
|
9647
|
+
"00002-of-00002"
|
|
9648
|
+
],
|
|
9649
|
+
"Q8_0": [
|
|
9650
|
+
"00001-of-00002",
|
|
9651
|
+
"00002-of-00002"
|
|
9652
|
+
],
|
|
9653
|
+
"F16": [
|
|
9654
|
+
"00001-of-00003",
|
|
9655
|
+
"00002-of-00003",
|
|
9656
|
+
"00003-of-00003"
|
|
9657
|
+
]
|
|
9658
|
+
},
|
|
9659
|
+
"model_id": "unsloth/DeepSeek-R1-Distill-Llama-70B-GGUF",
|
|
9660
|
+
"model_file_name_template": "DeepSeek-R1-Distill-Qwen-7B-{quantization}.gguf",
|
|
9661
|
+
"model_file_name_split_template": "DeepSeek-R1-Distill-Llama-70B-{quantization}/DeepSeek-R1-Distill-Llama-70B-{quantization}-{part}.gguf"
|
|
9662
|
+
},
|
|
9663
|
+
{
|
|
9664
|
+
"model_format": "mlx",
|
|
9665
|
+
"model_size_in_billions": 70,
|
|
9666
|
+
"quantizations": [
|
|
9667
|
+
"3bit",
|
|
9668
|
+
"4bit",
|
|
9669
|
+
"6bit",
|
|
9670
|
+
"8bit"
|
|
9671
|
+
],
|
|
9672
|
+
"model_id": "mlx-community/DeepSeek-R1-Distill-Llama-70B-{quantization}"
|
|
9673
|
+
}
|
|
9674
|
+
],
|
|
9675
|
+
"chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|><think>\\n'}}{% endif %}",
|
|
9676
|
+
"stop_token_ids": [
|
|
9677
|
+
151643
|
|
9678
|
+
],
|
|
9679
|
+
"stop": [
|
|
9680
|
+
"<|end▁of▁sentence|>"
|
|
9681
|
+
],
|
|
9682
|
+
"reasoning_start_tag": "<think>",
|
|
9683
|
+
"reasoning_end_tag": "</think>"
|
|
8939
9684
|
},
|
|
8940
9685
|
{
|
|
8941
9686
|
"version": 1,
|
|
@@ -9306,5 +10051,80 @@
|
|
|
9306
10051
|
"<|user|>",
|
|
9307
10052
|
"<|observation|>"
|
|
9308
10053
|
]
|
|
10054
|
+
},
|
|
10055
|
+
{
|
|
10056
|
+
"version": 1,
|
|
10057
|
+
"context_length": 32768,
|
|
10058
|
+
"model_name": "internlm3-instruct",
|
|
10059
|
+
"model_lang": [
|
|
10060
|
+
"en",
|
|
10061
|
+
"zh"
|
|
10062
|
+
],
|
|
10063
|
+
"model_ability": [
|
|
10064
|
+
"chat",
|
|
10065
|
+
"tools"
|
|
10066
|
+
],
|
|
10067
|
+
"model_description": "InternLM3 has open-sourced an 8-billion parameter instruction model, InternLM3-8B-Instruct, designed for general-purpose usage and advanced reasoning.",
|
|
10068
|
+
"model_specs": [
|
|
10069
|
+
{
|
|
10070
|
+
"model_format": "pytorch",
|
|
10071
|
+
"model_size_in_billions": 8,
|
|
10072
|
+
"quantizations": [
|
|
10073
|
+
"4-bit",
|
|
10074
|
+
"8-bit",
|
|
10075
|
+
"none"
|
|
10076
|
+
],
|
|
10077
|
+
"model_id": "internlm/internlm3-8b-instruct"
|
|
10078
|
+
},
|
|
10079
|
+
{
|
|
10080
|
+
"model_format": "gptq",
|
|
10081
|
+
"model_size_in_billions": 8,
|
|
10082
|
+
"quantizations": [
|
|
10083
|
+
"Int4"
|
|
10084
|
+
],
|
|
10085
|
+
"model_id": "internlm/internlm3-8b-instruct-gptq-int4"
|
|
10086
|
+
},
|
|
10087
|
+
{
|
|
10088
|
+
"model_format": "awq",
|
|
10089
|
+
"model_size_in_billions": 8,
|
|
10090
|
+
"quantizations": [
|
|
10091
|
+
"Int4"
|
|
10092
|
+
],
|
|
10093
|
+
"model_id": "internlm/internlm3-8b-instruct-awq"
|
|
10094
|
+
},
|
|
10095
|
+
{
|
|
10096
|
+
"model_format": "ggufv2",
|
|
10097
|
+
"model_size_in_billions": 8,
|
|
10098
|
+
"quantizations": [
|
|
10099
|
+
"q2_k",
|
|
10100
|
+
"q3_k_m",
|
|
10101
|
+
"q4_0",
|
|
10102
|
+
"q4_k_m",
|
|
10103
|
+
"q5_0",
|
|
10104
|
+
"q5_k_m",
|
|
10105
|
+
"q6_k",
|
|
10106
|
+
"q8_0"
|
|
10107
|
+
],
|
|
10108
|
+
"model_id": "internlm/internlm3-8b-instruct-gguf",
|
|
10109
|
+
"model_file_name_template": "internlm3-8b-instruct-{quantization}.gguf"
|
|
10110
|
+
},
|
|
10111
|
+
{
|
|
10112
|
+
"model_format":"mlx",
|
|
10113
|
+
"model_size_in_billions":8,
|
|
10114
|
+
"quantizations":[
|
|
10115
|
+
"4bit"
|
|
10116
|
+
],
|
|
10117
|
+
"model_id":"mlx-community/internlm3-8b-instruct-{quantization}"
|
|
10118
|
+
}
|
|
10119
|
+
],
|
|
10120
|
+
"chat_template": "{{ bos_token }}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
|
|
10121
|
+
"stop_token_ids": [
|
|
10122
|
+
2,
|
|
10123
|
+
128131
|
|
10124
|
+
],
|
|
10125
|
+
"stop": [
|
|
10126
|
+
"</s>",
|
|
10127
|
+
"<|im_end|>"
|
|
10128
|
+
]
|
|
9309
10129
|
}
|
|
9310
10130
|
]
|