PyPI - xinference - Versions diffs - 1.2.1__py3-none-any.whl → 1.3.0__py3-none-any.whl - Mend - Supply Chain Defender

xinference 1.2.1py3-none-any.whl → 1.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (80) hide show

xinference/model/llm/llm_family.json CHANGED Viewed

@@ -6772,6 +6772,151 @@
     "stop_token_ids": [],
     "stop": []
   },
+  {
+    "version": 1,
+    "context_length": 16384,
+    "model_name": "InternVL2.5",
+    "model_lang": [
+        "en",
+        "zh"
+    ],
+    "model_ability": [
+        "chat",
+        "vision"
+    ],
+    "model_description": "InternVL 2.5 is an open-source multimodal large language model (MLLM) to bridge the capability gap between open-source and proprietary commercial models in multimodal understanding. ",
+    "model_specs": [
+      {
+          "model_format": "pytorch",
+          "model_size_in_billions": 1,
+          "quantizations": [
+            "4-bit",
+            "8-bit",
+            "none"
+          ],
+          "model_id": "OpenGVLab/InternVL2_5-1B"
+        },
+        {
+          "model_format": "awq",
+          "model_size_in_billions": 1,
+          "quantizations": [
+            "Int4"
+          ],
+          "model_id": "OpenGVLab/InternVL2_5-1B-AWQ"
+        },
+        {
+          "model_format": "pytorch",
+          "model_size_in_billions": 2,
+          "quantizations": [
+            "4-bit",
+            "8-bit",
+            "none"
+          ],
+          "model_id": "OpenGVLab/InternVL2_5-2B"
+        },
+        {
+          "model_format": "awq",
+          "model_size_in_billions": 2,
+          "quantizations": [
+            "Int4"
+          ],
+          "model_id": "OpenGVLab/InternVL2_5-2B-AWQ"
+        },
+        {
+          "model_format": "pytorch",
+          "model_size_in_billions": 4,
+          "quantizations": [
+            "4-bit",
+            "8-bit",
+            "none"
+          ],
+          "model_id": "OpenGVLab/InternVL2_5-4B"
+        },
+        {
+          "model_format": "awq",
+          "model_size_in_billions": 4,
+          "quantizations": [
+            "Int4"
+          ],
+          "model_id": "OpenGVLab/InternVL2_5-4B-AWQ"
+        },
+        {
+          "model_format": "pytorch",
+          "model_size_in_billions": 8,
+          "quantizations": [
+            "4-bit",
+            "8-bit",
+            "none"
+          ],
+          "model_id": "OpenGVLab/InternVL2_5-8B"
+        },
+        {
+          "model_format": "awq",
+          "model_size_in_billions": 8,
+          "quantizations": [
+            "Int4"
+          ],
+          "model_id": "OpenGVLab/InternVL2_5-8B-AWQ"
+        },
+        {
+          "model_format": "pytorch",
+          "model_size_in_billions": 26,
+          "quantizations": [
+            "4-bit",
+            "8-bit",
+            "none"
+          ],
+          "model_id": "OpenGVLab/InternVL2_5-26B"
+        },
+        {
+          "model_format": "awq",
+          "model_size_in_billions": 26,
+          "quantizations": [
+            "Int4"
+          ],
+          "model_id": "OpenGVLab/InternVL2_5-26B-AWQ"
+        },
+        {
+          "model_format": "pytorch",
+          "model_size_in_billions": 38,
+          "quantizations": [
+            "4-bit",
+            "8-bit",
+            "none"
+          ],
+          "model_id": "OpenGVLab/InternVL2_5-38B"
+        },
+        {
+          "model_format": "awq",
+          "model_size_in_billions": 38,
+          "quantizations": [
+            "Int4"
+          ],
+          "model_id": "OpenGVLab/InternVL2_5-38B-AWQ"
+        },
+        {
+          "model_format": "pytorch",
+          "model_size_in_billions": 78,
+          "quantizations": [
+            "4-bit",
+            "8-bit",
+            "none"
+          ],
+          "model_id": "OpenGVLab/InternVL2_5-78B"
+        },
+        {
+          "model_format": "awq",
+          "model_size_in_billions": 78,
+          "quantizations": [
+            "Int4"
+          ],
+          "model_id": "OpenGVLab/InternVL2_5-78B-AWQ"
+        }
+    ],
+    "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
+    "stop_token_ids": [],
+    "stop": []
+  },
   {
     "version": 1,
     "context_length": 8192,
@@ -7125,6 +7270,91 @@
       "<|endoftext|>"
     ]
   },
+  {
+    "version":1,
+    "context_length":128000,
+    "model_name":"qwen2.5-vl-instruct",
+    "model_lang":[
+      "en",
+      "zh"
+    ],
+    "model_ability":[
+      "chat",
+      "vision"
+    ],
+    "model_description":"Qwen2.5-VL: Qwen2.5-VL is the latest version of the vision language models in the Qwen model familities.",
+    "model_specs":[
+      {
+        "model_format":"pytorch",
+        "model_size_in_billions":3,
+        "quantizations":[
+          "none"
+        ],
+        "model_id":"Qwen/Qwen2.5-VL-3B-Instruct"
+      },
+      {
+        "model_format":"pytorch",
+        "model_size_in_billions":7,
+        "quantizations":[
+          "none"
+        ],
+        "model_id":"Qwen/Qwen2.5-VL-7B-Instruct"
+      },
+      {
+        "model_format":"pytorch",
+        "model_size_in_billions":72,
+        "quantizations":[
+          "none"
+        ],
+        "model_id":"Qwen/Qwen2.5-VL-72B-Instruct"
+      },
+      {
+        "model_format":"mlx",
+        "model_size_in_billions":3,
+        "quantizations":[
+          "3bit",
+          "4bit",
+          "6bit",
+          "8bit",
+          "bf16"
+        ],
+        "model_id":"mlx-community/Qwen2.5-VL-3B-Instruct-{quantization}"
+      },
+      {
+        "model_format":"mlx",
+        "model_size_in_billions":7,
+        "quantizations":[
+          "3bit",
+          "4bit",
+          "6bit",
+          "8bit",
+          "bf16"
+        ],
+        "model_id":"mlx-community/Qwen2.5-VL-7B-Instruct-{quantization}"
+      },
+      {
+        "model_format":"mlx",
+        "model_size_in_billions":72,
+        "quantizations":[
+          "3bit",
+          "4bit",
+          "6bit",
+          "8bit",
+          "bf16"
+        ],
+        "model_id":"mlx-community/Qwen2.5-VL-72B-Instruct-{quantization}"
+      }
+    ],
+    "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
+    "stop_token_ids": [
+      151645,
+      151643
+    ],
+    "stop": [
+      "<|im_end|>",
+      "<|endoftext|>"
+    ]
+  },
   {
     "version": 1,
     "context_length": 32768,
@@ -7212,7 +7442,7 @@
       "zh"
     ],
     "model_ability":[
-      "chat",
+      "generate",
       "audio"
     ],
     "model_description":"Qwen2-Audio: A large-scale audio-language model which is capable of accepting various audio signal inputs and performing audio analysis or direct textual responses with regard to speech instructions.",
@@ -7335,57 +7565,421 @@
     "model_description": "DeepSeek-V2-Chat-0628 is an improved version of DeepSeek-V2-Chat. ",
     "model_specs": [
       {
-        "model_format": "pytorch",
-        "model_size_in_billions": 236,
+        "model_format": "pytorch",
+        "model_size_in_billions": 236,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "deepseek-ai/DeepSeek-V2-Chat-0628",
+        "model_revision": "5d09e272c2b223830f4e84359cd9dd047a5d7c78"
+      }
+    ],
+    "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{{ '<｜begin▁of▁sentence｜>' }}{% for message in messages %}{% if message['role'] == 'user' %}{{ '<｜User｜>' + message['content'] }}{% elif message['role'] == 'assistant' %}{{ '<｜Assistant｜>' + message['content'] + '<｜end▁of▁sentence｜>' }}{% elif message['role'] == 'system' %}{{ message['content'] + '\n\n' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<｜Assistant｜>' }}{% endif %}",
+    "stop_token_ids": [
+      100001
+    ],
+    "stop": [
+      "<｜end▁of▁sentence｜>"
+    ]
+  },
+  {
+    "version": 1,
+    "context_length": 128000,
+    "model_name": "deepseek-v2.5",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "chat"
+    ],
+    "model_description": "DeepSeek-V2.5 is an upgraded version that combines DeepSeek-V2-Chat and DeepSeek-Coder-V2-Instruct. The new model integrates the general and coding abilities of the two previous versions.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 236,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "deepseek-ai/DeepSeek-V2.5",
+        "model_revision": "24b08cb750e0c2757de112d2e16327cb21ed4833"
+      }
+    ],
+    "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}    {%- if message['role'] == 'system' %}        {% set ns.system_prompt = message['content'] %}    {%- endif %}{%- endfor %}{{'<｜begin▁of▁sentence｜>'}}{{ns.system_prompt}}{%- for message in messages %}    {%- if message['role'] == 'user' %}    {%- set ns.is_tool = false -%}{{'<｜User｜>' + message['content']}}    {%- endif %}    {%- if message['role'] == 'assistant' and message['content'] is none %}        {%- set ns.is_tool = false -%}        {%- for tool in message['tool_calls']%}            {%- if not ns.is_first %}{{'<｜Assistant｜><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}            {%- set ns.is_first = true -%}            {%- else %}{{'\\n' + '<｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{{'<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}                   {%- endif %}        {%- endfor %}    {%- endif %}    {%- if message['role'] == 'assistant' and message['content'] is not none %}        {%- if ns.is_tool %}{{'<｜tool▁outputs▁end｜>' + message['content'] + '<｜end▁of▁sentence｜>'}}        {%- set ns.is_tool = false -%}        {%- else %}{{'<｜Assistant｜>' + message['content'] + '<｜end▁of▁sentence｜>'}}        {%- endif %}    {%- endif %}    {%- if message['role'] == 'tool' %}        {%- set ns.is_tool = true -%}        {%- if ns.is_output_first %}{{'<｜tool▁outputs▁begin｜><｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}        {%- set ns.is_output_first = false %}        {%- else %}{{'\\n<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}        {%- endif %}    {%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<｜tool▁outputs▁end｜>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<｜Assistant｜>'}}{% endif %}",
+    "stop_token_ids": [
+      100001
+    ],
+    "stop": [
+      "<｜end▁of▁sentence｜>"
+    ]
+  },
+  {
+    "version": 1,
+    "context_length": 163840,
+    "model_name": "deepseek-v3",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "chat"
+    ],
+    "model_description": "DeepSeek-V3, a strong Mixture-of-Experts (MoE) language model with 671B total parameters with 37B activated for each token. ",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 671,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "deepseek-ai/DeepSeek-V3",
+        "model_revision": "1d044fd82b15f1cedb197a288e50cc96a2c27205"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": 671,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "cognitivecomputations/DeepSeek-V3-AWQ"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 671,
+        "quantizations": [
+          "Q2_K_L",
+          "Q2_K_XS",
+          "Q3_K_M",
+          "Q4_K_M",
+          "Q5_K_M",
+          "Q6_K",
+          "Q8_0"
+        ],
+        "model_id": "unsloth/DeepSeek-V3-GGUF",
+        "model_file_name_template": "DeepSeek-V3-{quantization}/DeepSeek-V3-{quantization}.gguf",
+        "model_file_name_split_template": "DeepSeek-V3-{quantization}/DeepSeek-V3-{quantization}-{part}.gguf",
+        "quantization_parts": {
+          "Q2_K_L": [
+            "00001-of-00005",
+            "00002-of-00005",
+            "00003-of-00005",
+            "00004-of-00005",
+            "00005-of-00005"
+          ],
+          "Q2_K_XS": [
+            "00001-of-00005",
+            "00002-of-00005",
+            "00003-of-00005",
+            "00004-of-00005",
+            "00005-of-00005"
+          ],
+          "Q3_K_M": [
+            "00001-of-00007",
+            "00002-of-00007",
+            "00003-of-00007",
+            "00004-of-00007",
+            "00005-of-00007",
+            "00006-of-00007",
+            "00007-of-00007"
+          ],
+          "Q4_K_M": [
+            "00001-of-00009",
+            "00002-of-00009",
+            "00003-of-00009",
+            "00004-of-00009",
+            "00005-of-00009",
+            "00006-of-00009",
+            "00007-of-00009",
+            "00008-of-00009",
+            "00009-of-00009"
+          ],
+          "Q5_K_M": [
+            "00001-of-00010",
+            "00002-of-00010",
+            "00003-of-00010",
+            "00004-of-00010",
+            "00005-of-00010",
+            "00006-of-00010",
+            "00007-of-00010",
+            "00008-of-00010",
+            "00009-of-00010",
+            "00010-of-00010"
+          ],
+          "Q6_K": [
+            "00001-of-00012",
+            "00002-of-00012",
+            "00003-of-00012",
+            "00004-of-00012",
+            "00005-of-00012",
+            "00006-of-00012",
+            "00007-of-00012",
+            "00008-of-00012",
+            "00009-of-00012",
+            "00010-of-00012",
+            "00011-of-00012",
+            "00012-of-00012"
+          ],
+          "Q8_0": [
+            "00001-of-00016",
+            "00002-of-00016",
+            "00003-of-00016",
+            "00004-of-00016",
+            "00005-of-00016",
+            "00006-of-00016",
+            "00007-of-00016",
+            "00008-of-00016",
+            "00009-of-00016",
+            "00010-of-00016",
+            "00011-of-00016",
+            "00012-of-00016",
+            "00013-of-00016",
+            "00014-of-00016",
+            "00015-of-00016",
+            "00016-of-00016"
+          ]
+        }
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 671,
+        "quantizations": [
+          "3bit",
+          "4bit"
+        ],
+        "model_id": "mlx-community/DeepSeek-V3-{quantization}"
+      }
+    ],
+    "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='', is_first_sp=true) %}{%- for message in messages %}{%- if message['role'] == 'system' %}{%- if ns.is_first_sp %}{% set ns.system_prompt = ns.system_prompt + message['content'] %}{% set ns.is_first_sp = false %}{%- else %}{% set ns.system_prompt = ns.system_prompt + '\\n\\n' + message['content'] %}{%- endif %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<｜User｜>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<｜Assistant｜><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{{'<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<｜tool▁outputs▁end｜>' + message['content'] + '<｜end▁of▁sentence｜>'}}{%- set ns.is_tool = false -%}{%- else %}{{'<｜Assistant｜>' + message['content'] + '<｜end▁of▁sentence｜>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<｜tool▁outputs▁begin｜><｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<｜tool▁outputs▁end｜>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<｜Assistant｜>'}}{% endif %}",
+    "stop_token_ids": [
+      1
+    ],
+    "stop": [
+      "<｜end▁of▁sentence｜>"
+    ]
+  },
+  {
+    "version": 1,
+    "context_length": 163840,
+    "model_name": "deepseek-r1",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "chat",
+      "reasoning"
+    ],
+    "model_description": "DeepSeek-R1, which incorporates cold-start data before RL. DeepSeek-R1 achieves performance comparable to OpenAI-o1 across math, code, and reasoning tasks.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 671,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "deepseek-ai/DeepSeek-R1",
+        "model_revision": "8a58a132790c9935686eb97f042afa8013451c9f"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": 671,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "cognitivecomputations/DeepSeek-R1-AWQ"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 671,
+        "quantizations": [
+          "UD-IQ1_S",
+          "UD-IQ1_M",
+          "UD-IQ2_XXS",
+          "UD-Q2_K_XL",
+          "Q2_K",
+          "Q2_K_L",
+          "Q2_K_XS",
+          "Q3_K_M",
+          "Q4_K_M",
+          "Q5_K_M",
+          "Q6_K",
+          "Q8_0",
+          "BF16"
+        ],
+        "model_id": "unsloth/DeepSeek-R1-GGUF",
+        "model_file_name_template": "DeepSeek-R1-{quantization}/DeepSeek-R1-{quantization}.gguf",
+        "model_file_name_split_template": "DeepSeek-R1-{quantization}/DeepSeek-R1-{quantization}-{part}.gguf",
+        "quantization_parts": {
+          "UD-IQ1_S": [
+            "00001-of-00003",
+            "00002-of-00003",
+            "00003-of-00003"
+          ],
+          "UD-IQ1_M": [
+            "00001-of-00004",
+            "00002-of-00004",
+            "00003-of-00004",
+            "00004-of-00004"
+          ],
+          "UD-IQ2_XXS": [
+            "00001-of-00004",
+            "00002-of-00004",
+            "00003-of-00004",
+            "00004-of-00004"
+          ],
+          "UD-Q2_K_XL": [
+            "00001-of-00005",
+            "00002-of-00005",
+            "00003-of-00005",
+            "00004-of-00005",
+            "00005-of-00005"
+          ],
+          "Q2_K": [
+            "00001-of-00005",
+            "00002-of-00005",
+            "00003-of-00005",
+            "00004-of-00005",
+            "00005-of-00005"
+          ],
+          "Q2_K_L": [
+            "00001-of-00005",
+            "00002-of-00005",
+            "00003-of-00005",
+            "00004-of-00005",
+            "00005-of-00005"
+          ],
+          "Q2_K_XS": [
+            "00001-of-00005",
+            "00002-of-00005",
+            "00003-of-00005",
+            "00004-of-00005",
+            "00005-of-00005"
+          ],
+          "Q3_K_M": [
+            "00001-of-00007",
+            "00002-of-00007",
+            "00003-of-00007",
+            "00004-of-00007",
+            "00005-of-00007",
+            "00006-of-00007",
+            "00007-of-00007"
+          ],
+          "Q4_K_M": [
+            "00001-of-00009",
+            "00002-of-00009",
+            "00003-of-00009",
+            "00004-of-00009",
+            "00005-of-00009",
+            "00006-of-00009",
+            "00007-of-00009",
+            "00008-of-00009",
+            "00009-of-00009"
+          ],
+          "Q5_K_M": [
+            "00001-of-00010",
+            "00002-of-00010",
+            "00003-of-00010",
+            "00004-of-00010",
+            "00005-of-00010",
+            "00006-of-00010",
+            "00007-of-00010",
+            "00008-of-00010",
+            "00009-of-00010",
+            "00010-of-00010"
+          ],
+          "Q6_K": [
+            "00001-of-00012",
+            "00002-of-00012",
+            "00003-of-00012",
+            "00004-of-00012",
+            "00005-of-00012",
+            "00006-of-00012",
+            "00007-of-00012",
+            "00008-of-00012",
+            "00009-of-00012",
+            "00010-of-00012",
+            "00011-of-00012",
+            "00012-of-00012"
+          ],
+          "Q8_0": [
+            "00001-of-00015",
+            "00002-of-00015",
+            "00003-of-00015",
+            "00004-of-00015",
+            "00005-of-00015",
+            "00006-of-00015",
+            "00007-of-00015",
+            "00008-of-00015",
+            "00009-of-00015",
+            "00010-of-00015",
+            "00011-of-00015",
+            "00012-of-00015",
+            "00013-of-00015",
+            "00014-of-00015",
+            "00015-of-00015"
+          ],
+          "BF16": [
+            "00001-of-00030",
+            "00002-of-00030",
+            "00003-of-00030",
+            "00004-of-00030",
+            "00005-of-00030",
+            "00006-of-00030",
+            "00007-of-00030",
+            "00008-of-00030",
+            "00009-of-00030",
+            "00010-of-00030",
+            "00011-of-00030",
+            "00012-of-00030",
+            "00013-of-00030",
+            "00014-of-00030",
+            "00015-of-00030",
+            "00016-of-00030",
+            "00017-of-00030",
+            "00018-of-00030",
+            "00019-of-00030",
+            "00020-of-00030",
+            "00021-of-00030",
+            "00022-of-00030",
+            "00023-of-00030",
+            "00024-of-00030",
+            "00025-of-00030",
+            "00026-of-00030",
+            "00027-of-00030",
+            "00028-of-00030",
+            "00029-of-00030",
+            "00030-of-00030"
+          ]
+        }
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 671,
         "quantizations": [
-          "4-bit",
-          "8-bit",
-          "none"
+          "2bit",
+          "3bit",
+          "4bit"
         ],
-        "model_id": "deepseek-ai/DeepSeek-V2-Chat-0628",
-        "model_revision": "5d09e272c2b223830f4e84359cd9dd047a5d7c78"
+        "model_id": "mlx-community/DeepSeek-R1-{quantization}"
       }
     ],
-    "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{{ '<｜begin▁of▁sentence｜>' }}{% for message in messages %}{% if message['role'] == 'user' %}{{ '<｜User｜>' + message['content'] }}{% elif message['role'] == 'assistant' %}{{ '<｜Assistant｜>' + message['content'] + '<｜end▁of▁sentence｜>' }}{% elif message['role'] == 'system' %}{{ message['content'] + '\n\n' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<｜Assistant｜>' }}{% endif %}",
+    "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<｜User｜>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<｜Assistant｜><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{{'<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<｜tool▁outputs▁end｜>' + message['content'] + '<｜end▁of▁sentence｜>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<｜Assistant｜>' + content + '<｜end▁of▁sentence｜>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<｜tool▁outputs▁begin｜><｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<｜tool▁outputs▁end｜>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<｜Assistant｜><think>\\n'}}{% endif %}",
     "stop_token_ids": [
-      100001
+      1
     ],
     "stop": [
       "<｜end▁of▁sentence｜>"
-    ]
-  },
-  {
-    "version": 1,
-    "context_length": 128000,
-    "model_name": "deepseek-v2.5",
-    "model_lang": [
-      "en",
-      "zh"
-    ],
-    "model_ability": [
-      "chat"
-    ],
-    "model_description": "DeepSeek-V2.5 is an upgraded version that combines DeepSeek-V2-Chat and DeepSeek-Coder-V2-Instruct. The new model integrates the general and coding abilities of the two previous versions.",
-    "model_specs": [
-      {
-        "model_format": "pytorch",
-        "model_size_in_billions": 236,
-        "quantizations": [
-          "4-bit",
-          "8-bit",
-          "none"
-        ],
-        "model_id": "deepseek-ai/DeepSeek-V2.5",
-        "model_revision": "24b08cb750e0c2757de112d2e16327cb21ed4833"
-      }
-    ],
-    "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}    {%- if message['role'] == 'system' %}        {% set ns.system_prompt = message['content'] %}    {%- endif %}{%- endfor %}{{'<｜begin▁of▁sentence｜>'}}{{ns.system_prompt}}{%- for message in messages %}    {%- if message['role'] == 'user' %}    {%- set ns.is_tool = false -%}{{'<｜User｜>' + message['content']}}    {%- endif %}    {%- if message['role'] == 'assistant' and message['content'] is none %}        {%- set ns.is_tool = false -%}        {%- for tool in message['tool_calls']%}            {%- if not ns.is_first %}{{'<｜Assistant｜><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}            {%- set ns.is_first = true -%}            {%- else %}{{'\\n' + '<｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{{'<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}                   {%- endif %}        {%- endfor %}    {%- endif %}    {%- if message['role'] == 'assistant' and message['content'] is not none %}        {%- if ns.is_tool %}{{'<｜tool▁outputs▁end｜>' + message['content'] + '<｜end▁of▁sentence｜>'}}        {%- set ns.is_tool = false -%}        {%- else %}{{'<｜Assistant｜>' + message['content'] + '<｜end▁of▁sentence｜>'}}        {%- endif %}    {%- endif %}    {%- if message['role'] == 'tool' %}        {%- set ns.is_tool = true -%}        {%- if ns.is_output_first %}{{'<｜tool▁outputs▁begin｜><｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}        {%- set ns.is_output_first = false %}        {%- else %}{{'\\n<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}        {%- endif %}    {%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<｜tool▁outputs▁end｜>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<｜Assistant｜>'}}{% endif %}",
-    "stop_token_ids": [
-      100001
     ],
-    "stop": [
-      "<｜end▁of▁sentence｜>"
-    ]
+    "reasoning_start_tag": "<think>",
+    "reasoning_end_tag": "</think>"
   },
   {
     "version": 1,
@@ -8725,7 +9319,8 @@
       "zh"
     ],
     "model_ability": [
-      "chat"
+      "chat",
+      "reasoning"
     ],
     "model_description": "deepseek-r1-distill-qwen is distilled from DeepSeek-R1 based on Qwen",
     "model_specs": [
@@ -8929,13 +9524,163 @@
         "model_id": "mlx-community/DeepSeek-R1-Distill-Qwen-32B-{quantization}"
       }
     ],
-    "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<｜User｜>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<｜Assistant｜><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{{'<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<｜tool▁outputs▁end｜>' + message['content'] + '<｜end▁of▁sentence｜>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<｜Assistant｜>' + content + '<｜end▁of▁sentence｜>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<｜tool▁outputs▁begin｜><｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<｜tool▁outputs▁end｜>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<｜Assistant｜>'}}{% endif %}",
+    "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='', is_first_sp=true) %}{%- for message in messages %}{%- if message['role'] == 'system' %}{%- if ns.is_first_sp %}{% set ns.system_prompt = ns.system_prompt + message['content'] %}{% set ns.is_first_sp = false %}{%- else %}{% set ns.system_prompt = ns.system_prompt + '\\n\\n' + message['content'] %}{%- endif %}{%- endif %}{%- endfor %}{{ bos_token }}{{ ns.system_prompt }}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<｜User｜>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and 'tool_calls' in message %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls'] %}{%- if not ns.is_first %}{%- if message['content'] is none %}{{'<｜Assistant｜><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{%- else %}{{'<｜Assistant｜>' + message['content'] + '<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{%- endif %}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{%- endif %}{%- endfor %}{{'<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}{%- endif %}{%- if message['role'] == 'assistant' and 'tool_calls' not in message %}{%- if ns.is_tool %}{{'<｜tool▁outputs▁end｜>' + message['content'] + '<｜end▁of▁sentence｜>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<｜Assistant｜>' + content + '<｜end▁of▁sentence｜>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<｜tool▁outputs▁begin｜><｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- set ns.is_output_first = false %}{%- else %}{{'<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<｜tool▁outputs▁end｜>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<｜Assistant｜><think>\\n'}}{% endif %}",
     "stop_token_ids": [
       151643
     ],
     "stop": [
       "<｜end▁of▁sentence｜>"
-    ]
+    ],
+    "reasoning_start_tag": "<think>",
+    "reasoning_end_tag": "</think>"
+  },
+  {
+    "version": 1,
+    "context_length": 131072,
+    "model_name": "deepseek-r1-distill-llama",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "chat",
+      "reasoning"
+    ],
+    "model_description": "deepseek-r1-distill-llama is distilled from DeepSeek-R1 based on Llama",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 8,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": 8,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "jakiAJK/DeepSeek-R1-Distill-Llama-8B_AWQ"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 8,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "jakiAJK/DeepSeek-R1-Distill-Llama-8B_GPTQ-int4"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": "1_5",
+        "quantizations": [
+          "Q2_K",
+          "Q2_K_L",
+          "Q3_K_M",
+          "Q4_K_M",
+          "Q5_K_M",
+          "Q6_K",
+          "Q8_0",
+          "F16"
+        ],
+        "model_id": "unsloth/DeepSeek-R1-Distill-Llama-8B-GGUF",
+        "model_file_name_template": "DeepSeek-R1-Distill-Llama-8B-{quantization}.gguf"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 8,
+        "quantizations": [
+          "3bit",
+          "4bit",
+          "6bit",
+          "8bit",
+          "bf16"
+        ],
+        "model_id": "mlx-community/DeepSeek-R1-Distill-Llama-8B-{quantization}"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 70,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": 70,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "casperhansen/deepseek-r1-distill-llama-70b-awq"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 70,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "empirischtech/DeepSeek-R1-Distill-Llama-70B-gptq-4bit"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 70,
+        "quantizations": [
+          "Q2_K",
+          "Q2_K_L",
+          "Q3_K_M",
+          "Q4_K_M",
+          "Q5_K_M",
+          "Q6_K",
+          "Q8_0",
+          "F16"
+        ],
+        "quantization_parts": {
+          "Q6_K": [
+            "00001-of-00002",
+            "00002-of-00002"
+          ],
+          "Q8_0": [
+            "00001-of-00002",
+            "00002-of-00002"
+          ],
+          "F16": [
+            "00001-of-00003",
+            "00002-of-00003",
+            "00003-of-00003"
+          ]
+        },
+        "model_id": "unsloth/DeepSeek-R1-Distill-Llama-70B-GGUF",
+        "model_file_name_template": "DeepSeek-R1-Distill-Qwen-7B-{quantization}.gguf",
+        "model_file_name_split_template": "DeepSeek-R1-Distill-Llama-70B-{quantization}/DeepSeek-R1-Distill-Llama-70B-{quantization}-{part}.gguf"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 70,
+        "quantizations": [
+          "3bit",
+          "4bit",
+          "6bit",
+          "8bit"
+        ],
+        "model_id": "mlx-community/DeepSeek-R1-Distill-Llama-70B-{quantization}"
+      }
+    ],
+    "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<｜User｜>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<｜Assistant｜><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{{'<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<｜tool▁outputs▁end｜>' + message['content'] + '<｜end▁of▁sentence｜>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<｜Assistant｜>' + content + '<｜end▁of▁sentence｜>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<｜tool▁outputs▁begin｜><｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<｜tool▁outputs▁end｜>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<｜Assistant｜><think>\\n'}}{% endif %}",
+    "stop_token_ids": [
+      151643
+    ],
+    "stop": [
+      "<｜end▁of▁sentence｜>"
+    ],
+    "reasoning_start_tag": "<think>",
+    "reasoning_end_tag": "</think>"
   },
   {
     "version": 1,
@@ -9306,5 +10051,80 @@
       "<|user|>",
       "<|observation|>"
     ]
+  },
+  {
+    "version": 1,
+    "context_length": 32768,
+    "model_name": "internlm3-instruct",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "chat",
+      "tools"
+    ],
+    "model_description": "InternLM3 has open-sourced an 8-billion parameter instruction model, InternLM3-8B-Instruct, designed for general-purpose usage and advanced reasoning.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 8,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "internlm/internlm3-8b-instruct"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 8,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "internlm/internlm3-8b-instruct-gptq-int4"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": 8,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "internlm/internlm3-8b-instruct-awq"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 8,
+        "quantizations": [
+          "q2_k",
+          "q3_k_m",
+          "q4_0",
+          "q4_k_m",
+          "q5_0",
+          "q5_k_m",
+          "q6_k",
+          "q8_0"
+        ],
+        "model_id": "internlm/internlm3-8b-instruct-gguf",
+        "model_file_name_template": "internlm3-8b-instruct-{quantization}.gguf"
+      },
+      {
+        "model_format":"mlx",
+        "model_size_in_billions":8,
+        "quantizations":[
+          "4bit"
+        ],
+        "model_id":"mlx-community/internlm3-8b-instruct-{quantization}"
+      }
+    ],
+    "chat_template": "{{ bos_token }}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
+    "stop_token_ids": [
+      2,
+      128131
+    ],
+    "stop": [
+      "</s>",
+      "<|im_end|>"
+    ]
   }
 ]