xinference 1.2.1__py3-none-any.whl → 1.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (80) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/restful_api.py +4 -7
  3. xinference/client/handlers.py +3 -0
  4. xinference/client/restful/restful_client.py +9 -1
  5. xinference/core/model.py +19 -0
  6. xinference/core/resource.py +7 -1
  7. xinference/core/scheduler.py +4 -7
  8. xinference/core/status_guard.py +1 -0
  9. xinference/core/supervisor.py +228 -19
  10. xinference/core/utils.py +1 -29
  11. xinference/core/worker.py +28 -2
  12. xinference/deploy/cmdline.py +33 -3
  13. xinference/deploy/local.py +2 -1
  14. xinference/deploy/test/test_cmdline.py +32 -0
  15. xinference/device_utils.py +43 -1
  16. xinference/model/audio/core.py +5 -0
  17. xinference/model/audio/kokoro.py +122 -0
  18. xinference/model/audio/model_spec.json +8 -0
  19. xinference/model/audio/model_spec_modelscope.json +9 -0
  20. xinference/model/image/stable_diffusion/core.py +15 -6
  21. xinference/model/llm/llama_cpp/core.py +21 -14
  22. xinference/model/llm/llm_family.json +866 -46
  23. xinference/model/llm/llm_family.py +7 -2
  24. xinference/model/llm/llm_family_modelscope.json +873 -16
  25. xinference/model/llm/mlx/core.py +11 -3
  26. xinference/model/llm/reasoning_parsers/__init__.py +13 -0
  27. xinference/model/llm/reasoning_parsers/abs_reasoning_parsers.py +98 -0
  28. xinference/model/llm/reasoning_parsers/deepseek_r1_reasoning_parser.py +140 -0
  29. xinference/model/llm/sglang/core.py +99 -11
  30. xinference/model/llm/transformers/core.py +9 -1
  31. xinference/model/llm/transformers/intern_vl.py +23 -14
  32. xinference/model/llm/transformers/qwen2_audio.py +3 -1
  33. xinference/model/llm/transformers/qwen2_vl.py +20 -3
  34. xinference/model/llm/transformers/utils.py +22 -11
  35. xinference/model/llm/utils.py +164 -20
  36. xinference/model/llm/vllm/core.py +36 -4
  37. xinference/model/llm/vllm/xavier/executor.py +2 -2
  38. xinference/model/llm/vllm/xavier/scheduler.py +3 -3
  39. xinference/thirdparty/internvl/conversation.py +26 -17
  40. xinference/types.py +2 -0
  41. xinference/web/ui/build/asset-manifest.json +6 -6
  42. xinference/web/ui/build/index.html +1 -1
  43. xinference/web/ui/build/static/css/main.f8177338.css +2 -0
  44. xinference/web/ui/build/static/css/main.f8177338.css.map +1 -0
  45. xinference/web/ui/build/static/js/main.ad42919c.js +3 -0
  46. xinference/web/ui/build/static/js/main.ad42919c.js.map +1 -0
  47. xinference/web/ui/node_modules/.cache/babel-loader/074a42304bbbaa79e1bfc3b28502457a390df55708de9006f4cc8e35c60aea87.json +1 -0
  48. xinference/web/ui/node_modules/.cache/babel-loader/0acb065326560592b10888234242f94f67efe28458b90f273d4d4fba9daa0cd2.json +1 -0
  49. xinference/web/ui/node_modules/.cache/babel-loader/279ace390216236a82b3d8995c78eca4d637ac9a523e9f521a2d9c76607a43d7.json +1 -0
  50. xinference/web/ui/node_modules/.cache/babel-loader/630a7bd592596cc6e291fc32238ce7c08238038a64ed8ccee0eb0c13c9902910.json +1 -0
  51. xinference/web/ui/node_modules/.cache/babel-loader/6cb9f6c62ab4042f0b11c5d75e51187188e9d6f5f08b1d63e796e051bafdb457.json +1 -0
  52. xinference/web/ui/node_modules/.cache/babel-loader/8f9af2979e45d4648f0cfae108363e58ee421c29a9d4e7329b6f06d9adfd4133.json +1 -0
  53. xinference/web/ui/node_modules/.cache/babel-loader/914c33e91c1012e3bcd3e96f3a25884cbef148290632d0266dab972b8cc1e95f.json +1 -0
  54. xinference/web/ui/node_modules/.cache/babel-loader/9c8b1a86e7c65b2b2599a205e30920652d6c2105f926508ef5bcf29a3ef4ce76.json +1 -0
  55. xinference/web/ui/node_modules/.cache/babel-loader/b7939cd3a48adf12fccfdd0803019b5cc235ff7de3a297dae70ce635e0eea13e.json +1 -0
  56. xinference/web/ui/node_modules/.cache/babel-loader/efe7cd132c27a8f9fd5352a394c491fd5fb0da0348cf9fcbd923164a32365eab.json +1 -0
  57. xinference/web/ui/node_modules/.cache/babel-loader/f04f666b77b44d7be3e16034d6b0074de2ba9c254f1fae15222b3148608fa8b3.json +1 -0
  58. xinference/web/ui/node_modules/.cache/babel-loader/fecf076bcd198a458c2a6ab0e85e40dc1c99994c353164e79c469be162cb74c9.json +1 -0
  59. xinference/web/ui/src/locales/en.json +14 -1
  60. xinference/web/ui/src/locales/zh.json +14 -1
  61. {xinference-1.2.1.dist-info → xinference-1.3.0.dist-info}/METADATA +18 -17
  62. {xinference-1.2.1.dist-info → xinference-1.3.0.dist-info}/RECORD +67 -60
  63. xinference/web/ui/build/static/css/main.51a587ff.css +0 -2
  64. xinference/web/ui/build/static/css/main.51a587ff.css.map +0 -1
  65. xinference/web/ui/build/static/js/main.b0936c54.js +0 -3
  66. xinference/web/ui/build/static/js/main.b0936c54.js.map +0 -1
  67. xinference/web/ui/node_modules/.cache/babel-loader/0c2fb5375667931c4a331c99e0d87dc145e8f327cea3f44d6e56f54c7c1d4020.json +0 -1
  68. xinference/web/ui/node_modules/.cache/babel-loader/185ceb8872d562e032b47e79df6a45670e06345b8ed70aad1a131e0476783c5c.json +0 -1
  69. xinference/web/ui/node_modules/.cache/babel-loader/3eefb411b24c2b3ce053570ef50daccf154022f0e168be5ed0fec21394baf9f4.json +0 -1
  70. xinference/web/ui/node_modules/.cache/babel-loader/63c8e07687ea53a4f8a910ee5e42e0eb26cd1acbfbe820f3e3248a786ee51401.json +0 -1
  71. xinference/web/ui/node_modules/.cache/babel-loader/a3ff866acddf34917a7ee399e0e571a4dfd8ba66d5057db885f243e16a6eb17d.json +0 -1
  72. xinference/web/ui/node_modules/.cache/babel-loader/a7f1a71f6580dfe810c685a9c1d68e318f71e1fa258fbe50b87a6ac37cc0a598.json +0 -1
  73. xinference/web/ui/node_modules/.cache/babel-loader/bdee44abeadc4abc17d41c52eb49c6e19a4b1a267b6e16876ce91bdeeebfc52d.json +0 -1
  74. xinference/web/ui/node_modules/.cache/babel-loader/d7664d18c4ddbad9c3a6a31b91f7c00fb0dde804608674a9860ee50f33e54708.json +0 -1
  75. xinference/web/ui/node_modules/.cache/babel-loader/ed57202cb79649bb716400436590245547df241988fc7c8e1d85d132299542d2.json +0 -1
  76. /xinference/web/ui/build/static/js/{main.b0936c54.js.LICENSE.txt → main.ad42919c.js.LICENSE.txt} +0 -0
  77. {xinference-1.2.1.dist-info → xinference-1.3.0.dist-info}/LICENSE +0 -0
  78. {xinference-1.2.1.dist-info → xinference-1.3.0.dist-info}/WHEEL +0 -0
  79. {xinference-1.2.1.dist-info → xinference-1.3.0.dist-info}/entry_points.txt +0 -0
  80. {xinference-1.2.1.dist-info → xinference-1.3.0.dist-info}/top_level.txt +0 -0
@@ -6772,6 +6772,151 @@
6772
6772
  "stop_token_ids": [],
6773
6773
  "stop": []
6774
6774
  },
6775
+ {
6776
+ "version": 1,
6777
+ "context_length": 16384,
6778
+ "model_name": "InternVL2.5",
6779
+ "model_lang": [
6780
+ "en",
6781
+ "zh"
6782
+ ],
6783
+ "model_ability": [
6784
+ "chat",
6785
+ "vision"
6786
+ ],
6787
+ "model_description": "InternVL 2.5 is an open-source multimodal large language model (MLLM) to bridge the capability gap between open-source and proprietary commercial models in multimodal understanding. ",
6788
+ "model_specs": [
6789
+ {
6790
+ "model_format": "pytorch",
6791
+ "model_size_in_billions": 1,
6792
+ "quantizations": [
6793
+ "4-bit",
6794
+ "8-bit",
6795
+ "none"
6796
+ ],
6797
+ "model_id": "OpenGVLab/InternVL2_5-1B"
6798
+ },
6799
+ {
6800
+ "model_format": "awq",
6801
+ "model_size_in_billions": 1,
6802
+ "quantizations": [
6803
+ "Int4"
6804
+ ],
6805
+ "model_id": "OpenGVLab/InternVL2_5-1B-AWQ"
6806
+ },
6807
+ {
6808
+ "model_format": "pytorch",
6809
+ "model_size_in_billions": 2,
6810
+ "quantizations": [
6811
+ "4-bit",
6812
+ "8-bit",
6813
+ "none"
6814
+ ],
6815
+ "model_id": "OpenGVLab/InternVL2_5-2B"
6816
+ },
6817
+ {
6818
+ "model_format": "awq",
6819
+ "model_size_in_billions": 2,
6820
+ "quantizations": [
6821
+ "Int4"
6822
+ ],
6823
+ "model_id": "OpenGVLab/InternVL2_5-2B-AWQ"
6824
+ },
6825
+ {
6826
+ "model_format": "pytorch",
6827
+ "model_size_in_billions": 4,
6828
+ "quantizations": [
6829
+ "4-bit",
6830
+ "8-bit",
6831
+ "none"
6832
+ ],
6833
+ "model_id": "OpenGVLab/InternVL2_5-4B"
6834
+ },
6835
+ {
6836
+ "model_format": "awq",
6837
+ "model_size_in_billions": 4,
6838
+ "quantizations": [
6839
+ "Int4"
6840
+ ],
6841
+ "model_id": "OpenGVLab/InternVL2_5-4B-AWQ"
6842
+ },
6843
+ {
6844
+ "model_format": "pytorch",
6845
+ "model_size_in_billions": 8,
6846
+ "quantizations": [
6847
+ "4-bit",
6848
+ "8-bit",
6849
+ "none"
6850
+ ],
6851
+ "model_id": "OpenGVLab/InternVL2_5-8B"
6852
+ },
6853
+ {
6854
+ "model_format": "awq",
6855
+ "model_size_in_billions": 8,
6856
+ "quantizations": [
6857
+ "Int4"
6858
+ ],
6859
+ "model_id": "OpenGVLab/InternVL2_5-8B-AWQ"
6860
+ },
6861
+ {
6862
+ "model_format": "pytorch",
6863
+ "model_size_in_billions": 26,
6864
+ "quantizations": [
6865
+ "4-bit",
6866
+ "8-bit",
6867
+ "none"
6868
+ ],
6869
+ "model_id": "OpenGVLab/InternVL2_5-26B"
6870
+ },
6871
+ {
6872
+ "model_format": "awq",
6873
+ "model_size_in_billions": 26,
6874
+ "quantizations": [
6875
+ "Int4"
6876
+ ],
6877
+ "model_id": "OpenGVLab/InternVL2_5-26B-AWQ"
6878
+ },
6879
+ {
6880
+ "model_format": "pytorch",
6881
+ "model_size_in_billions": 38,
6882
+ "quantizations": [
6883
+ "4-bit",
6884
+ "8-bit",
6885
+ "none"
6886
+ ],
6887
+ "model_id": "OpenGVLab/InternVL2_5-38B"
6888
+ },
6889
+ {
6890
+ "model_format": "awq",
6891
+ "model_size_in_billions": 38,
6892
+ "quantizations": [
6893
+ "Int4"
6894
+ ],
6895
+ "model_id": "OpenGVLab/InternVL2_5-38B-AWQ"
6896
+ },
6897
+ {
6898
+ "model_format": "pytorch",
6899
+ "model_size_in_billions": 78,
6900
+ "quantizations": [
6901
+ "4-bit",
6902
+ "8-bit",
6903
+ "none"
6904
+ ],
6905
+ "model_id": "OpenGVLab/InternVL2_5-78B"
6906
+ },
6907
+ {
6908
+ "model_format": "awq",
6909
+ "model_size_in_billions": 78,
6910
+ "quantizations": [
6911
+ "Int4"
6912
+ ],
6913
+ "model_id": "OpenGVLab/InternVL2_5-78B-AWQ"
6914
+ }
6915
+ ],
6916
+ "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
6917
+ "stop_token_ids": [],
6918
+ "stop": []
6919
+ },
6775
6920
  {
6776
6921
  "version": 1,
6777
6922
  "context_length": 8192,
@@ -7125,6 +7270,91 @@
7125
7270
  "<|endoftext|>"
7126
7271
  ]
7127
7272
  },
7273
+ {
7274
+ "version":1,
7275
+ "context_length":128000,
7276
+ "model_name":"qwen2.5-vl-instruct",
7277
+ "model_lang":[
7278
+ "en",
7279
+ "zh"
7280
+ ],
7281
+ "model_ability":[
7282
+ "chat",
7283
+ "vision"
7284
+ ],
7285
+ "model_description":"Qwen2.5-VL: Qwen2.5-VL is the latest version of the vision language models in the Qwen model familities.",
7286
+ "model_specs":[
7287
+ {
7288
+ "model_format":"pytorch",
7289
+ "model_size_in_billions":3,
7290
+ "quantizations":[
7291
+ "none"
7292
+ ],
7293
+ "model_id":"Qwen/Qwen2.5-VL-3B-Instruct"
7294
+ },
7295
+ {
7296
+ "model_format":"pytorch",
7297
+ "model_size_in_billions":7,
7298
+ "quantizations":[
7299
+ "none"
7300
+ ],
7301
+ "model_id":"Qwen/Qwen2.5-VL-7B-Instruct"
7302
+ },
7303
+ {
7304
+ "model_format":"pytorch",
7305
+ "model_size_in_billions":72,
7306
+ "quantizations":[
7307
+ "none"
7308
+ ],
7309
+ "model_id":"Qwen/Qwen2.5-VL-72B-Instruct"
7310
+ },
7311
+ {
7312
+ "model_format":"mlx",
7313
+ "model_size_in_billions":3,
7314
+ "quantizations":[
7315
+ "3bit",
7316
+ "4bit",
7317
+ "6bit",
7318
+ "8bit",
7319
+ "bf16"
7320
+ ],
7321
+ "model_id":"mlx-community/Qwen2.5-VL-3B-Instruct-{quantization}"
7322
+ },
7323
+ {
7324
+ "model_format":"mlx",
7325
+ "model_size_in_billions":7,
7326
+ "quantizations":[
7327
+ "3bit",
7328
+ "4bit",
7329
+ "6bit",
7330
+ "8bit",
7331
+ "bf16"
7332
+ ],
7333
+ "model_id":"mlx-community/Qwen2.5-VL-7B-Instruct-{quantization}"
7334
+ },
7335
+ {
7336
+ "model_format":"mlx",
7337
+ "model_size_in_billions":72,
7338
+ "quantizations":[
7339
+ "3bit",
7340
+ "4bit",
7341
+ "6bit",
7342
+ "8bit",
7343
+ "bf16"
7344
+ ],
7345
+ "model_id":"mlx-community/Qwen2.5-VL-72B-Instruct-{quantization}"
7346
+ }
7347
+ ],
7348
+ "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
7349
+ "stop_token_ids": [
7350
+ 151645,
7351
+ 151643
7352
+ ],
7353
+ "stop": [
7354
+ "<|im_end|>",
7355
+ "<|endoftext|>"
7356
+ ]
7357
+ },
7128
7358
  {
7129
7359
  "version": 1,
7130
7360
  "context_length": 32768,
@@ -7212,7 +7442,7 @@
7212
7442
  "zh"
7213
7443
  ],
7214
7444
  "model_ability":[
7215
- "chat",
7445
+ "generate",
7216
7446
  "audio"
7217
7447
  ],
7218
7448
  "model_description":"Qwen2-Audio: A large-scale audio-language model which is capable of accepting various audio signal inputs and performing audio analysis or direct textual responses with regard to speech instructions.",
@@ -7335,57 +7565,421 @@
7335
7565
  "model_description": "DeepSeek-V2-Chat-0628 is an improved version of DeepSeek-V2-Chat. ",
7336
7566
  "model_specs": [
7337
7567
  {
7338
- "model_format": "pytorch",
7339
- "model_size_in_billions": 236,
7568
+ "model_format": "pytorch",
7569
+ "model_size_in_billions": 236,
7570
+ "quantizations": [
7571
+ "4-bit",
7572
+ "8-bit",
7573
+ "none"
7574
+ ],
7575
+ "model_id": "deepseek-ai/DeepSeek-V2-Chat-0628",
7576
+ "model_revision": "5d09e272c2b223830f4e84359cd9dd047a5d7c78"
7577
+ }
7578
+ ],
7579
+ "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{{ '<|begin▁of▁sentence|>' }}{% for message in messages %}{% if message['role'] == 'user' %}{{ '<|User|>' + message['content'] }}{% elif message['role'] == 'assistant' %}{{ '<|Assistant|>' + message['content'] + '<|end▁of▁sentence|>' }}{% elif message['role'] == 'system' %}{{ message['content'] + '\n\n' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|Assistant|>' }}{% endif %}",
7580
+ "stop_token_ids": [
7581
+ 100001
7582
+ ],
7583
+ "stop": [
7584
+ "<|end▁of▁sentence|>"
7585
+ ]
7586
+ },
7587
+ {
7588
+ "version": 1,
7589
+ "context_length": 128000,
7590
+ "model_name": "deepseek-v2.5",
7591
+ "model_lang": [
7592
+ "en",
7593
+ "zh"
7594
+ ],
7595
+ "model_ability": [
7596
+ "chat"
7597
+ ],
7598
+ "model_description": "DeepSeek-V2.5 is an upgraded version that combines DeepSeek-V2-Chat and DeepSeek-Coder-V2-Instruct. The new model integrates the general and coding abilities of the two previous versions.",
7599
+ "model_specs": [
7600
+ {
7601
+ "model_format": "pytorch",
7602
+ "model_size_in_billions": 236,
7603
+ "quantizations": [
7604
+ "4-bit",
7605
+ "8-bit",
7606
+ "none"
7607
+ ],
7608
+ "model_id": "deepseek-ai/DeepSeek-V2.5",
7609
+ "model_revision": "24b08cb750e0c2757de112d2e16327cb21ed4833"
7610
+ }
7611
+ ],
7612
+ "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %} {%- if message['role'] == 'system' %} {% set ns.system_prompt = message['content'] %} {%- endif %}{%- endfor %}{{'<|begin▁of▁sentence|>'}}{{ns.system_prompt}}{%- for message in messages %} {%- if message['role'] == 'user' %} {%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}} {%- endif %} {%- if message['role'] == 'assistant' and message['content'] is none %} {%- set ns.is_tool = false -%} {%- for tool in message['tool_calls']%} {%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}} {%- set ns.is_first = true -%} {%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}} {%- endif %} {%- endfor %} {%- endif %} {%- if message['role'] == 'assistant' and message['content'] is not none %} {%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}} {%- set ns.is_tool = false -%} {%- else %}{{'<|Assistant|>' + message['content'] + '<|end▁of▁sentence|>'}} {%- endif %} {%- endif %} {%- if message['role'] == 'tool' %} {%- set ns.is_tool = true -%} {%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}} {%- set ns.is_output_first = false %} {%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}} {%- endif %} {%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>'}}{% endif %}",
7613
+ "stop_token_ids": [
7614
+ 100001
7615
+ ],
7616
+ "stop": [
7617
+ "<|end▁of▁sentence|>"
7618
+ ]
7619
+ },
7620
+ {
7621
+ "version": 1,
7622
+ "context_length": 163840,
7623
+ "model_name": "deepseek-v3",
7624
+ "model_lang": [
7625
+ "en",
7626
+ "zh"
7627
+ ],
7628
+ "model_ability": [
7629
+ "chat"
7630
+ ],
7631
+ "model_description": "DeepSeek-V3, a strong Mixture-of-Experts (MoE) language model with 671B total parameters with 37B activated for each token. ",
7632
+ "model_specs": [
7633
+ {
7634
+ "model_format": "pytorch",
7635
+ "model_size_in_billions": 671,
7636
+ "quantizations": [
7637
+ "4-bit",
7638
+ "8-bit",
7639
+ "none"
7640
+ ],
7641
+ "model_id": "deepseek-ai/DeepSeek-V3",
7642
+ "model_revision": "1d044fd82b15f1cedb197a288e50cc96a2c27205"
7643
+ },
7644
+ {
7645
+ "model_format": "awq",
7646
+ "model_size_in_billions": 671,
7647
+ "quantizations": [
7648
+ "Int4"
7649
+ ],
7650
+ "model_id": "cognitivecomputations/DeepSeek-V3-AWQ"
7651
+ },
7652
+ {
7653
+ "model_format": "ggufv2",
7654
+ "model_size_in_billions": 671,
7655
+ "quantizations": [
7656
+ "Q2_K_L",
7657
+ "Q2_K_XS",
7658
+ "Q3_K_M",
7659
+ "Q4_K_M",
7660
+ "Q5_K_M",
7661
+ "Q6_K",
7662
+ "Q8_0"
7663
+ ],
7664
+ "model_id": "unsloth/DeepSeek-V3-GGUF",
7665
+ "model_file_name_template": "DeepSeek-V3-{quantization}/DeepSeek-V3-{quantization}.gguf",
7666
+ "model_file_name_split_template": "DeepSeek-V3-{quantization}/DeepSeek-V3-{quantization}-{part}.gguf",
7667
+ "quantization_parts": {
7668
+ "Q2_K_L": [
7669
+ "00001-of-00005",
7670
+ "00002-of-00005",
7671
+ "00003-of-00005",
7672
+ "00004-of-00005",
7673
+ "00005-of-00005"
7674
+ ],
7675
+ "Q2_K_XS": [
7676
+ "00001-of-00005",
7677
+ "00002-of-00005",
7678
+ "00003-of-00005",
7679
+ "00004-of-00005",
7680
+ "00005-of-00005"
7681
+ ],
7682
+ "Q3_K_M": [
7683
+ "00001-of-00007",
7684
+ "00002-of-00007",
7685
+ "00003-of-00007",
7686
+ "00004-of-00007",
7687
+ "00005-of-00007",
7688
+ "00006-of-00007",
7689
+ "00007-of-00007"
7690
+ ],
7691
+ "Q4_K_M": [
7692
+ "00001-of-00009",
7693
+ "00002-of-00009",
7694
+ "00003-of-00009",
7695
+ "00004-of-00009",
7696
+ "00005-of-00009",
7697
+ "00006-of-00009",
7698
+ "00007-of-00009",
7699
+ "00008-of-00009",
7700
+ "00009-of-00009"
7701
+ ],
7702
+ "Q5_K_M": [
7703
+ "00001-of-00010",
7704
+ "00002-of-00010",
7705
+ "00003-of-00010",
7706
+ "00004-of-00010",
7707
+ "00005-of-00010",
7708
+ "00006-of-00010",
7709
+ "00007-of-00010",
7710
+ "00008-of-00010",
7711
+ "00009-of-00010",
7712
+ "00010-of-00010"
7713
+ ],
7714
+ "Q6_K": [
7715
+ "00001-of-00012",
7716
+ "00002-of-00012",
7717
+ "00003-of-00012",
7718
+ "00004-of-00012",
7719
+ "00005-of-00012",
7720
+ "00006-of-00012",
7721
+ "00007-of-00012",
7722
+ "00008-of-00012",
7723
+ "00009-of-00012",
7724
+ "00010-of-00012",
7725
+ "00011-of-00012",
7726
+ "00012-of-00012"
7727
+ ],
7728
+ "Q8_0": [
7729
+ "00001-of-00016",
7730
+ "00002-of-00016",
7731
+ "00003-of-00016",
7732
+ "00004-of-00016",
7733
+ "00005-of-00016",
7734
+ "00006-of-00016",
7735
+ "00007-of-00016",
7736
+ "00008-of-00016",
7737
+ "00009-of-00016",
7738
+ "00010-of-00016",
7739
+ "00011-of-00016",
7740
+ "00012-of-00016",
7741
+ "00013-of-00016",
7742
+ "00014-of-00016",
7743
+ "00015-of-00016",
7744
+ "00016-of-00016"
7745
+ ]
7746
+ }
7747
+ },
7748
+ {
7749
+ "model_format": "mlx",
7750
+ "model_size_in_billions": 671,
7751
+ "quantizations": [
7752
+ "3bit",
7753
+ "4bit"
7754
+ ],
7755
+ "model_id": "mlx-community/DeepSeek-V3-{quantization}"
7756
+ }
7757
+ ],
7758
+ "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='', is_first_sp=true) %}{%- for message in messages %}{%- if message['role'] == 'system' %}{%- if ns.is_first_sp %}{% set ns.system_prompt = ns.system_prompt + message['content'] %}{% set ns.is_first_sp = false %}{%- else %}{% set ns.system_prompt = ns.system_prompt + '\\n\\n' + message['content'] %}{%- endif %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{{'<|Assistant|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>'}}{% endif %}",
7759
+ "stop_token_ids": [
7760
+ 1
7761
+ ],
7762
+ "stop": [
7763
+ "<|end▁of▁sentence|>"
7764
+ ]
7765
+ },
7766
+ {
7767
+ "version": 1,
7768
+ "context_length": 163840,
7769
+ "model_name": "deepseek-r1",
7770
+ "model_lang": [
7771
+ "en",
7772
+ "zh"
7773
+ ],
7774
+ "model_ability": [
7775
+ "chat",
7776
+ "reasoning"
7777
+ ],
7778
+ "model_description": "DeepSeek-R1, which incorporates cold-start data before RL. DeepSeek-R1 achieves performance comparable to OpenAI-o1 across math, code, and reasoning tasks.",
7779
+ "model_specs": [
7780
+ {
7781
+ "model_format": "pytorch",
7782
+ "model_size_in_billions": 671,
7783
+ "quantizations": [
7784
+ "4-bit",
7785
+ "8-bit",
7786
+ "none"
7787
+ ],
7788
+ "model_id": "deepseek-ai/DeepSeek-R1",
7789
+ "model_revision": "8a58a132790c9935686eb97f042afa8013451c9f"
7790
+ },
7791
+ {
7792
+ "model_format": "awq",
7793
+ "model_size_in_billions": 671,
7794
+ "quantizations": [
7795
+ "Int4"
7796
+ ],
7797
+ "model_id": "cognitivecomputations/DeepSeek-R1-AWQ"
7798
+ },
7799
+ {
7800
+ "model_format": "ggufv2",
7801
+ "model_size_in_billions": 671,
7802
+ "quantizations": [
7803
+ "UD-IQ1_S",
7804
+ "UD-IQ1_M",
7805
+ "UD-IQ2_XXS",
7806
+ "UD-Q2_K_XL",
7807
+ "Q2_K",
7808
+ "Q2_K_L",
7809
+ "Q2_K_XS",
7810
+ "Q3_K_M",
7811
+ "Q4_K_M",
7812
+ "Q5_K_M",
7813
+ "Q6_K",
7814
+ "Q8_0",
7815
+ "BF16"
7816
+ ],
7817
+ "model_id": "unsloth/DeepSeek-R1-GGUF",
7818
+ "model_file_name_template": "DeepSeek-R1-{quantization}/DeepSeek-R1-{quantization}.gguf",
7819
+ "model_file_name_split_template": "DeepSeek-R1-{quantization}/DeepSeek-R1-{quantization}-{part}.gguf",
7820
+ "quantization_parts": {
7821
+ "UD-IQ1_S": [
7822
+ "00001-of-00003",
7823
+ "00002-of-00003",
7824
+ "00003-of-00003"
7825
+ ],
7826
+ "UD-IQ1_M": [
7827
+ "00001-of-00004",
7828
+ "00002-of-00004",
7829
+ "00003-of-00004",
7830
+ "00004-of-00004"
7831
+ ],
7832
+ "UD-IQ2_XXS": [
7833
+ "00001-of-00004",
7834
+ "00002-of-00004",
7835
+ "00003-of-00004",
7836
+ "00004-of-00004"
7837
+ ],
7838
+ "UD-Q2_K_XL": [
7839
+ "00001-of-00005",
7840
+ "00002-of-00005",
7841
+ "00003-of-00005",
7842
+ "00004-of-00005",
7843
+ "00005-of-00005"
7844
+ ],
7845
+ "Q2_K": [
7846
+ "00001-of-00005",
7847
+ "00002-of-00005",
7848
+ "00003-of-00005",
7849
+ "00004-of-00005",
7850
+ "00005-of-00005"
7851
+ ],
7852
+ "Q2_K_L": [
7853
+ "00001-of-00005",
7854
+ "00002-of-00005",
7855
+ "00003-of-00005",
7856
+ "00004-of-00005",
7857
+ "00005-of-00005"
7858
+ ],
7859
+ "Q2_K_XS": [
7860
+ "00001-of-00005",
7861
+ "00002-of-00005",
7862
+ "00003-of-00005",
7863
+ "00004-of-00005",
7864
+ "00005-of-00005"
7865
+ ],
7866
+ "Q3_K_M": [
7867
+ "00001-of-00007",
7868
+ "00002-of-00007",
7869
+ "00003-of-00007",
7870
+ "00004-of-00007",
7871
+ "00005-of-00007",
7872
+ "00006-of-00007",
7873
+ "00007-of-00007"
7874
+ ],
7875
+ "Q4_K_M": [
7876
+ "00001-of-00009",
7877
+ "00002-of-00009",
7878
+ "00003-of-00009",
7879
+ "00004-of-00009",
7880
+ "00005-of-00009",
7881
+ "00006-of-00009",
7882
+ "00007-of-00009",
7883
+ "00008-of-00009",
7884
+ "00009-of-00009"
7885
+ ],
7886
+ "Q5_K_M": [
7887
+ "00001-of-00010",
7888
+ "00002-of-00010",
7889
+ "00003-of-00010",
7890
+ "00004-of-00010",
7891
+ "00005-of-00010",
7892
+ "00006-of-00010",
7893
+ "00007-of-00010",
7894
+ "00008-of-00010",
7895
+ "00009-of-00010",
7896
+ "00010-of-00010"
7897
+ ],
7898
+ "Q6_K": [
7899
+ "00001-of-00012",
7900
+ "00002-of-00012",
7901
+ "00003-of-00012",
7902
+ "00004-of-00012",
7903
+ "00005-of-00012",
7904
+ "00006-of-00012",
7905
+ "00007-of-00012",
7906
+ "00008-of-00012",
7907
+ "00009-of-00012",
7908
+ "00010-of-00012",
7909
+ "00011-of-00012",
7910
+ "00012-of-00012"
7911
+ ],
7912
+ "Q8_0": [
7913
+ "00001-of-00015",
7914
+ "00002-of-00015",
7915
+ "00003-of-00015",
7916
+ "00004-of-00015",
7917
+ "00005-of-00015",
7918
+ "00006-of-00015",
7919
+ "00007-of-00015",
7920
+ "00008-of-00015",
7921
+ "00009-of-00015",
7922
+ "00010-of-00015",
7923
+ "00011-of-00015",
7924
+ "00012-of-00015",
7925
+ "00013-of-00015",
7926
+ "00014-of-00015",
7927
+ "00015-of-00015"
7928
+ ],
7929
+ "BF16": [
7930
+ "00001-of-00030",
7931
+ "00002-of-00030",
7932
+ "00003-of-00030",
7933
+ "00004-of-00030",
7934
+ "00005-of-00030",
7935
+ "00006-of-00030",
7936
+ "00007-of-00030",
7937
+ "00008-of-00030",
7938
+ "00009-of-00030",
7939
+ "00010-of-00030",
7940
+ "00011-of-00030",
7941
+ "00012-of-00030",
7942
+ "00013-of-00030",
7943
+ "00014-of-00030",
7944
+ "00015-of-00030",
7945
+ "00016-of-00030",
7946
+ "00017-of-00030",
7947
+ "00018-of-00030",
7948
+ "00019-of-00030",
7949
+ "00020-of-00030",
7950
+ "00021-of-00030",
7951
+ "00022-of-00030",
7952
+ "00023-of-00030",
7953
+ "00024-of-00030",
7954
+ "00025-of-00030",
7955
+ "00026-of-00030",
7956
+ "00027-of-00030",
7957
+ "00028-of-00030",
7958
+ "00029-of-00030",
7959
+ "00030-of-00030"
7960
+ ]
7961
+ }
7962
+ },
7963
+ {
7964
+ "model_format": "mlx",
7965
+ "model_size_in_billions": 671,
7340
7966
  "quantizations": [
7341
- "4-bit",
7342
- "8-bit",
7343
- "none"
7967
+ "2bit",
7968
+ "3bit",
7969
+ "4bit"
7344
7970
  ],
7345
- "model_id": "deepseek-ai/DeepSeek-V2-Chat-0628",
7346
- "model_revision": "5d09e272c2b223830f4e84359cd9dd047a5d7c78"
7971
+ "model_id": "mlx-community/DeepSeek-R1-{quantization}"
7347
7972
  }
7348
7973
  ],
7349
- "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{{ '<|begin▁of▁sentence|>' }}{% for message in messages %}{% if message['role'] == 'user' %}{{ '<|User|>' + message['content'] }}{% elif message['role'] == 'assistant' %}{{ '<|Assistant|>' + message['content'] + '<|end▁of▁sentence|>' }}{% elif message['role'] == 'system' %}{{ message['content'] + '\n\n' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|Assistant|>' }}{% endif %}",
7974
+ "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|><think>\\n'}}{% endif %}",
7350
7975
  "stop_token_ids": [
7351
- 100001
7976
+ 1
7352
7977
  ],
7353
7978
  "stop": [
7354
7979
  "<|end▁of▁sentence|>"
7355
- ]
7356
- },
7357
- {
7358
- "version": 1,
7359
- "context_length": 128000,
7360
- "model_name": "deepseek-v2.5",
7361
- "model_lang": [
7362
- "en",
7363
- "zh"
7364
- ],
7365
- "model_ability": [
7366
- "chat"
7367
- ],
7368
- "model_description": "DeepSeek-V2.5 is an upgraded version that combines DeepSeek-V2-Chat and DeepSeek-Coder-V2-Instruct. The new model integrates the general and coding abilities of the two previous versions.",
7369
- "model_specs": [
7370
- {
7371
- "model_format": "pytorch",
7372
- "model_size_in_billions": 236,
7373
- "quantizations": [
7374
- "4-bit",
7375
- "8-bit",
7376
- "none"
7377
- ],
7378
- "model_id": "deepseek-ai/DeepSeek-V2.5",
7379
- "model_revision": "24b08cb750e0c2757de112d2e16327cb21ed4833"
7380
- }
7381
- ],
7382
- "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %} {%- if message['role'] == 'system' %} {% set ns.system_prompt = message['content'] %} {%- endif %}{%- endfor %}{{'<|begin▁of▁sentence|>'}}{{ns.system_prompt}}{%- for message in messages %} {%- if message['role'] == 'user' %} {%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}} {%- endif %} {%- if message['role'] == 'assistant' and message['content'] is none %} {%- set ns.is_tool = false -%} {%- for tool in message['tool_calls']%} {%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}} {%- set ns.is_first = true -%} {%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}} {%- endif %} {%- endfor %} {%- endif %} {%- if message['role'] == 'assistant' and message['content'] is not none %} {%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}} {%- set ns.is_tool = false -%} {%- else %}{{'<|Assistant|>' + message['content'] + '<|end▁of▁sentence|>'}} {%- endif %} {%- endif %} {%- if message['role'] == 'tool' %} {%- set ns.is_tool = true -%} {%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}} {%- set ns.is_output_first = false %} {%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}} {%- endif %} {%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>'}}{% endif %}",
7383
- "stop_token_ids": [
7384
- 100001
7385
7980
  ],
7386
- "stop": [
7387
- "<|end▁of▁sentence|>"
7388
- ]
7981
+ "reasoning_start_tag": "<think>",
7982
+ "reasoning_end_tag": "</think>"
7389
7983
  },
7390
7984
  {
7391
7985
  "version": 1,
@@ -8725,7 +9319,8 @@
8725
9319
  "zh"
8726
9320
  ],
8727
9321
  "model_ability": [
8728
- "chat"
9322
+ "chat",
9323
+ "reasoning"
8729
9324
  ],
8730
9325
  "model_description": "deepseek-r1-distill-qwen is distilled from DeepSeek-R1 based on Qwen",
8731
9326
  "model_specs": [
@@ -8929,13 +9524,163 @@
8929
9524
  "model_id": "mlx-community/DeepSeek-R1-Distill-Qwen-32B-{quantization}"
8930
9525
  }
8931
9526
  ],
8932
- "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>'}}{% endif %}",
9527
+ "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='', is_first_sp=true) %}{%- for message in messages %}{%- if message['role'] == 'system' %}{%- if ns.is_first_sp %}{% set ns.system_prompt = ns.system_prompt + message['content'] %}{% set ns.is_first_sp = false %}{%- else %}{% set ns.system_prompt = ns.system_prompt + '\\n\\n' + message['content'] %}{%- endif %}{%- endif %}{%- endfor %}{{ bos_token }}{{ ns.system_prompt }}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and 'tool_calls' in message %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls'] %}{%- if not ns.is_first %}{%- if message['content'] is none %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- else %}{{'<|Assistant|>' + message['content'] + '<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- endif %}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- endif %}{%- endfor %}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- if message['role'] == 'assistant' and 'tool_calls' not in message %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|><think>\\n'}}{% endif %}",
8933
9528
  "stop_token_ids": [
8934
9529
  151643
8935
9530
  ],
8936
9531
  "stop": [
8937
9532
  "<|end▁of▁sentence|>"
8938
- ]
9533
+ ],
9534
+ "reasoning_start_tag": "<think>",
9535
+ "reasoning_end_tag": "</think>"
9536
+ },
9537
+ {
9538
+ "version": 1,
9539
+ "context_length": 131072,
9540
+ "model_name": "deepseek-r1-distill-llama",
9541
+ "model_lang": [
9542
+ "en",
9543
+ "zh"
9544
+ ],
9545
+ "model_ability": [
9546
+ "chat",
9547
+ "reasoning"
9548
+ ],
9549
+ "model_description": "deepseek-r1-distill-llama is distilled from DeepSeek-R1 based on Llama",
9550
+ "model_specs": [
9551
+ {
9552
+ "model_format": "pytorch",
9553
+ "model_size_in_billions": 8,
9554
+ "quantizations": [
9555
+ "4-bit",
9556
+ "8-bit",
9557
+ "none"
9558
+ ],
9559
+ "model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
9560
+ },
9561
+ {
9562
+ "model_format": "awq",
9563
+ "model_size_in_billions": 8,
9564
+ "quantizations": [
9565
+ "Int4"
9566
+ ],
9567
+ "model_id": "jakiAJK/DeepSeek-R1-Distill-Llama-8B_AWQ"
9568
+ },
9569
+ {
9570
+ "model_format": "gptq",
9571
+ "model_size_in_billions": 8,
9572
+ "quantizations": [
9573
+ "Int4"
9574
+ ],
9575
+ "model_id": "jakiAJK/DeepSeek-R1-Distill-Llama-8B_GPTQ-int4"
9576
+ },
9577
+ {
9578
+ "model_format": "ggufv2",
9579
+ "model_size_in_billions": "1_5",
9580
+ "quantizations": [
9581
+ "Q2_K",
9582
+ "Q2_K_L",
9583
+ "Q3_K_M",
9584
+ "Q4_K_M",
9585
+ "Q5_K_M",
9586
+ "Q6_K",
9587
+ "Q8_0",
9588
+ "F16"
9589
+ ],
9590
+ "model_id": "unsloth/DeepSeek-R1-Distill-Llama-8B-GGUF",
9591
+ "model_file_name_template": "DeepSeek-R1-Distill-Llama-8B-{quantization}.gguf"
9592
+ },
9593
+ {
9594
+ "model_format": "mlx",
9595
+ "model_size_in_billions": 8,
9596
+ "quantizations": [
9597
+ "3bit",
9598
+ "4bit",
9599
+ "6bit",
9600
+ "8bit",
9601
+ "bf16"
9602
+ ],
9603
+ "model_id": "mlx-community/DeepSeek-R1-Distill-Llama-8B-{quantization}"
9604
+ },
9605
+ {
9606
+ "model_format": "pytorch",
9607
+ "model_size_in_billions": 70,
9608
+ "quantizations": [
9609
+ "4-bit",
9610
+ "8-bit",
9611
+ "none"
9612
+ ],
9613
+ "model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B"
9614
+ },
9615
+ {
9616
+ "model_format": "awq",
9617
+ "model_size_in_billions": 70,
9618
+ "quantizations": [
9619
+ "Int4"
9620
+ ],
9621
+ "model_id": "casperhansen/deepseek-r1-distill-llama-70b-awq"
9622
+ },
9623
+ {
9624
+ "model_format": "gptq",
9625
+ "model_size_in_billions": 70,
9626
+ "quantizations": [
9627
+ "Int4"
9628
+ ],
9629
+ "model_id": "empirischtech/DeepSeek-R1-Distill-Llama-70B-gptq-4bit"
9630
+ },
9631
+ {
9632
+ "model_format": "ggufv2",
9633
+ "model_size_in_billions": 70,
9634
+ "quantizations": [
9635
+ "Q2_K",
9636
+ "Q2_K_L",
9637
+ "Q3_K_M",
9638
+ "Q4_K_M",
9639
+ "Q5_K_M",
9640
+ "Q6_K",
9641
+ "Q8_0",
9642
+ "F16"
9643
+ ],
9644
+ "quantization_parts": {
9645
+ "Q6_K": [
9646
+ "00001-of-00002",
9647
+ "00002-of-00002"
9648
+ ],
9649
+ "Q8_0": [
9650
+ "00001-of-00002",
9651
+ "00002-of-00002"
9652
+ ],
9653
+ "F16": [
9654
+ "00001-of-00003",
9655
+ "00002-of-00003",
9656
+ "00003-of-00003"
9657
+ ]
9658
+ },
9659
+ "model_id": "unsloth/DeepSeek-R1-Distill-Llama-70B-GGUF",
9660
+ "model_file_name_template": "DeepSeek-R1-Distill-Qwen-7B-{quantization}.gguf",
9661
+ "model_file_name_split_template": "DeepSeek-R1-Distill-Llama-70B-{quantization}/DeepSeek-R1-Distill-Llama-70B-{quantization}-{part}.gguf"
9662
+ },
9663
+ {
9664
+ "model_format": "mlx",
9665
+ "model_size_in_billions": 70,
9666
+ "quantizations": [
9667
+ "3bit",
9668
+ "4bit",
9669
+ "6bit",
9670
+ "8bit"
9671
+ ],
9672
+ "model_id": "mlx-community/DeepSeek-R1-Distill-Llama-70B-{quantization}"
9673
+ }
9674
+ ],
9675
+ "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|><think>\\n'}}{% endif %}",
9676
+ "stop_token_ids": [
9677
+ 151643
9678
+ ],
9679
+ "stop": [
9680
+ "<|end▁of▁sentence|>"
9681
+ ],
9682
+ "reasoning_start_tag": "<think>",
9683
+ "reasoning_end_tag": "</think>"
8939
9684
  },
8940
9685
  {
8941
9686
  "version": 1,
@@ -9306,5 +10051,80 @@
9306
10051
  "<|user|>",
9307
10052
  "<|observation|>"
9308
10053
  ]
10054
+ },
10055
+ {
10056
+ "version": 1,
10057
+ "context_length": 32768,
10058
+ "model_name": "internlm3-instruct",
10059
+ "model_lang": [
10060
+ "en",
10061
+ "zh"
10062
+ ],
10063
+ "model_ability": [
10064
+ "chat",
10065
+ "tools"
10066
+ ],
10067
+ "model_description": "InternLM3 has open-sourced an 8-billion parameter instruction model, InternLM3-8B-Instruct, designed for general-purpose usage and advanced reasoning.",
10068
+ "model_specs": [
10069
+ {
10070
+ "model_format": "pytorch",
10071
+ "model_size_in_billions": 8,
10072
+ "quantizations": [
10073
+ "4-bit",
10074
+ "8-bit",
10075
+ "none"
10076
+ ],
10077
+ "model_id": "internlm/internlm3-8b-instruct"
10078
+ },
10079
+ {
10080
+ "model_format": "gptq",
10081
+ "model_size_in_billions": 8,
10082
+ "quantizations": [
10083
+ "Int4"
10084
+ ],
10085
+ "model_id": "internlm/internlm3-8b-instruct-gptq-int4"
10086
+ },
10087
+ {
10088
+ "model_format": "awq",
10089
+ "model_size_in_billions": 8,
10090
+ "quantizations": [
10091
+ "Int4"
10092
+ ],
10093
+ "model_id": "internlm/internlm3-8b-instruct-awq"
10094
+ },
10095
+ {
10096
+ "model_format": "ggufv2",
10097
+ "model_size_in_billions": 8,
10098
+ "quantizations": [
10099
+ "q2_k",
10100
+ "q3_k_m",
10101
+ "q4_0",
10102
+ "q4_k_m",
10103
+ "q5_0",
10104
+ "q5_k_m",
10105
+ "q6_k",
10106
+ "q8_0"
10107
+ ],
10108
+ "model_id": "internlm/internlm3-8b-instruct-gguf",
10109
+ "model_file_name_template": "internlm3-8b-instruct-{quantization}.gguf"
10110
+ },
10111
+ {
10112
+ "model_format":"mlx",
10113
+ "model_size_in_billions":8,
10114
+ "quantizations":[
10115
+ "4bit"
10116
+ ],
10117
+ "model_id":"mlx-community/internlm3-8b-instruct-{quantization}"
10118
+ }
10119
+ ],
10120
+ "chat_template": "{{ bos_token }}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
10121
+ "stop_token_ids": [
10122
+ 2,
10123
+ 128131
10124
+ ],
10125
+ "stop": [
10126
+ "</s>",
10127
+ "<|im_end|>"
10128
+ ]
9309
10129
  }
9310
10130
  ]