PyPI - xinference - Versions diffs - 0.16.3__py3-none-any.whl → 1.2.1__py3-none-any.whl - Mend - Supply Chain Defender

xinference 0.16.3py3-none-any.whl → 1.2.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (373) hide show

xinference/model/llm/llm_family.json CHANGED Viewed

@@ -205,8 +205,8 @@
           "8-bit",
           "none"
         ],
-        "model_id": "THUDM/glm-4-9b-chat",
-        "model_revision": "eb55a443d66541f30869f6caac5ad0d2e95bcbaa"
+        "model_id": "THUDM/glm-4-9b-chat-hf",
+        "model_revision": "c7f73fd9e0f378c87f3c8f2c25aec6ad705043cd"
       },
       {
         "model_format": "ggufv2",
@@ -269,8 +269,8 @@
           "8-bit",
           "none"
         ],
-        "model_id": "THUDM/glm-4-9b-chat-1m",
-        "model_revision": "0aa722c7e0745dd21453427dd44c257dd253304f"
+        "model_id": "THUDM/glm-4-9b-chat-1m-hf",
+        "model_revision": "0588cb62942f0f0a5545c695e5c1b019d64eabdc"
       },
       {
         "model_format": "ggufv2",
@@ -952,7 +952,7 @@
         "model_format": "mlx",
         "model_size_in_billions": 8,
         "quantizations": [
-          "4-bit"
+          "4bit"
         ],
         "model_id": "mlx-community/Meta-Llama-3-8B-Instruct-4bit"
       },
@@ -960,7 +960,7 @@
         "model_format": "mlx",
         "model_size_in_billions": 8,
         "quantizations": [
-          "8-bit"
+          "8bit"
         ],
         "model_id": "mlx-community/Meta-Llama-3-8B-Instruct-8bit"
       },
@@ -976,7 +976,7 @@
         "model_format": "mlx",
         "model_size_in_billions": 70,
         "quantizations": [
-          "4-bit"
+          "4bit"
         ],
         "model_id": "mlx-community/Meta-Llama-3-70B-Instruct-4bit-mlx"
       },
@@ -984,7 +984,7 @@
         "model_format": "mlx",
         "model_size_in_billions": 70,
         "quantizations": [
-          "8-bit"
+          "8bit"
         ],
         "model_id": "mlx-community/Meta-Llama-3-70B-Instruct-8bit"
       },
@@ -1229,7 +1229,7 @@
         "model_format": "mlx",
         "model_size_in_billions": 8,
         "quantizations": [
-          "4-bit"
+          "4bit"
         ],
         "model_id": "mlx-community/Meta-Llama-3.1-8B-Instruct-4bit"
       },
@@ -1237,7 +1237,7 @@
         "model_format": "mlx",
         "model_size_in_billions": 8,
         "quantizations": [
-          "8-bit"
+          "8bit"
         ],
         "model_id": "mlx-community/Meta-Llama-3.1-8B-Instruct-8bit"
       },
@@ -1253,7 +1253,7 @@
         "model_format": "mlx",
         "model_size_in_billions": 70,
         "quantizations": [
-          "4-bit"
+          "4bit"
         ],
         "model_id": "mlx-community/Meta-Llama-3.1-70B-Instruct-4bit"
       },
@@ -1261,7 +1261,7 @@
         "model_format": "mlx",
         "model_size_in_billions": 70,
         "quantizations": [
-          "8-bit"
+          "8bit"
         ],
         "model_id": "mlx-community/Meta-Llama-3.1-70B-Instruct-8bit"
       },
@@ -1399,6 +1399,98 @@
       }
     ]
   },
+  {
+    "version": 1,
+    "context_length": 131072,
+    "model_name": "llama-3.3-instruct",
+    "model_lang": [
+      "en",
+      "de",
+      "fr",
+      "it",
+      "pt",
+      "hi",
+      "es",
+      "th"
+    ],
+    "model_ability": [
+      "chat",
+      "tools"
+    ],
+    "model_description": "The Llama 3.3 instruction tuned models are optimized for dialogue use cases and outperform many of the available open source chat models on common industry benchmarks..",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 70,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "meta-llama/Llama-3.3-70B-Instruct"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 70,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "shuyuej/Llama-3.3-70B-Instruct-GPTQ"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": 70,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "casperhansen/llama-3.3-70b-instruct-awq"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 70,
+        "quantizations": [
+          "3bit",
+          "4bit",
+          "6bit",
+          "8bit",
+          "fp16"
+        ],
+        "model_id": "mlx-community/Llama-3.3-70B-Instruct-{quantization}"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 70,
+        "quantizations": [
+          "Q3_K_L",
+          "Q4_K_M",
+          "Q6_K",
+          "Q8_0"
+        ],
+        "quantization_parts": {
+          "Q6_K": [
+            "00001-of-00002",
+            "00002-of-00002"
+          ],
+          "Q8_0": [
+            "00001-of-00002",
+            "00002-of-00002"
+          ]
+        },
+        "model_id": "lmstudio-community/Llama-3.3-70B-Instruct-GGUF",
+        "model_file_name_template": "Llama-3.3-70B-Instruct-{quantization}.gguf",
+        "model_file_name_split_template": "Llama-3.3-70B-Instruct-{quantization}-{part}.gguf"
+      }
+    ],
+    "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n    {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n    {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n    {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n    {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n    {%- set system_message = messages[0]['content']|trim %}\n    {%- set messages = messages[1:] %}\n{%- else %}\n    {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n    {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n    {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\\n\\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n    {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n    {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n    {{- \"Do not use variables.\\n\\n\" }}\n    {%- for t in tools %}\n        {{- t | tojson(indent=4) }}\n        {{- \"\\n\\n\" }}\n    {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n    {#- Extract the first user message so we can plug it in here #}\n    {%- if messages | length != 0 %}\n        {%- set first_user_message = messages[0]['content']|trim %}\n        {%- set messages = messages[1:] %}\n    {%- else %}\n        {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n    {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n    {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n    {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n    {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n    {{- \"Do not use variables.\\n\\n\" }}\n    {%- for t in tools %}\n        {{- t | tojson(indent=4) }}\n        {{- \"\\n\\n\" }}\n    {%- endfor %}\n    {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n    {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n        {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n    {%- elif 'tool_calls' in message %}\n        {%- if not message.tool_calls|length == 1 %}\n            {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n        {%- endif %}\n        {%- set tool_call = message.tool_calls[0].function %}\n        {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n            {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n            {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n            {%- for arg_name, arg_val in tool_call.arguments | items %}\n                {{- arg_name + '=\"' + arg_val + '\"' }}\n                {%- if not loop.last %}\n                    {{- \", \" }}\n                {%- endif %}\n                {%- endfor %}\n            {{- \")\" }}\n        {%- else  %}\n            {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n            {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n            {{- '\"parameters\": ' }}\n            {{- tool_call.arguments | tojson }}\n            {{- \"}\" }}\n        {%- endif %}\n        {%- if builtin_tools is defined %}\n            {#- This means we're in ipython mode #}\n            {{- \"<|eom_id|>\" }}\n        {%- else %}\n            {{- \"<|eot_id|>\" }}\n        {%- endif %}\n    {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n        {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n        {%- if message.content is mapping or message.content is iterable %}\n            {{- message.content | tojson }}\n        {%- else %}\n            {{- message.content }}\n        {%- endif %}\n        {{- \"<|eot_id|>\" }}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n",
+    "stop_token_ids": [
+      128001,
+      128008,
+      128009
+    ],
+    "stop": [
+      "<|end_of_text|>",
+      "<|eot_id|>",
+      "<|eom_id|>"
+    ]
+  },
   {
     "version": 1,
     "context_length": 2048,
@@ -2199,7 +2291,7 @@
         "model_format": "mlx",
         "model_size_in_billions": "0_5",
         "quantizations": [
-          "4-bit"
+          "4bit"
         ],
         "model_id": "Qwen/Qwen2-0.5B-Instruct-MLX"
       },
@@ -2207,7 +2299,7 @@
         "model_format": "mlx",
         "model_size_in_billions": "1_5",
         "quantizations": [
-          "4-bit"
+          "4bit"
         ],
         "model_id": "Qwen/Qwen2-1.5B-Instruct-MLX"
       },
@@ -2215,7 +2307,7 @@
         "model_format": "mlx",
         "model_size_in_billions": 7,
         "quantizations": [
-          "4-bit"
+          "4bit"
         ],
         "model_id": "Qwen/Qwen2-7B-Instruct-MLX"
       },
@@ -2223,7 +2315,7 @@
         "model_format": "mlx",
         "model_size_in_billions": 72,
         "quantizations": [
-          "4-bit"
+          "4bit"
         ],
         "model_id": "mlx-community/Qwen2-72B-Instruct-4bit"
       },
@@ -3222,7 +3314,7 @@
         "model_format": "mlx",
         "model_size_in_billions": 12,
         "quantizations": [
-          "4-bit"
+          "4bit"
         ],
         "model_id": "mlx-community/Mistral-Nemo-Instruct-2407-4bit"
       },
@@ -3230,7 +3322,7 @@
         "model_format": "mlx",
         "model_size_in_billions": 12,
         "quantizations": [
-          "8-bit"
+          "8bit"
         ],
         "model_id": "mlx-community/Mistral-Nemo-Instruct-2407-8bit"
       }
@@ -3370,7 +3462,7 @@
         "model_format": "mlx",
         "model_size_in_billions": 123,
         "quantizations": [
-          "4-bit"
+          "4bit"
         ],
         "model_id": "mlx-community/Mistral-Large-Instruct-2407-4bit"
       },
@@ -3378,7 +3470,7 @@
         "model_format": "mlx",
         "model_size_in_billions": 123,
         "quantizations": [
-          "8-bit"
+          "8bit"
         ],
         "model_id": "mlx-community/Mistral-Large-Instruct-2407-8bit"
       }
@@ -3411,8 +3503,8 @@
           "8-bit",
           "none"
         ],
-        "model_id": "mistralai/Mistral-7B-Instruct-v0.2",
-        "model_revision": "9552e7b1d9b2d5bbd87a5aa7221817285dbb6366"
+        "model_id": "mistralai/Codestral-22B-v0.1",
+        "model_revision": "8f5fe23af91885222a1563283c87416745a5e212"
       },
       {
         "model_format": "ggufv2",
@@ -3436,7 +3528,7 @@
         "model_format": "mlx",
         "model_size_in_billions": 22,
         "quantizations": [
-          "4-bit"
+          "4bit"
         ],
         "model_id": "mlx-community/Codestral-22B-v0.1-4bit",
         "model_revision": "544626b38eb1c9524f0fa570ec7b29550c26b78d"
@@ -3445,7 +3537,7 @@
         "model_format": "mlx",
         "model_size_in_billions": 22,
         "quantizations": [
-          "8-bit"
+          "8bit"
         ],
         "model_id": "mlx-community/Codestral-22B-v0.1-8bit",
         "model_revision": "0399a53970663950d57010e61a2796af524a1588"
@@ -4170,7 +4262,7 @@
         "model_format": "mlx",
         "model_size_in_billions": 6,
         "quantizations": [
-          "4-bit"
+          "4bit"
         ],
         "model_id": "mlx-community/Yi-1.5-6B-Chat-4bit",
         "model_revision": "0177c9a12b869d6bc73f772b5a1981a7c966adb6"
@@ -4179,7 +4271,7 @@
         "model_format": "mlx",
         "model_size_in_billions": 6,
         "quantizations": [
-          "8-bit"
+          "8bit"
         ],
         "model_id": "mlx-community/Yi-1.5-6B-Chat-8bit",
         "model_revision": "7756e65d1bf1e2e6e97aef6bc9484307225f536b"
@@ -4188,7 +4280,7 @@
         "model_format": "mlx",
         "model_size_in_billions": 9,
         "quantizations": [
-          "4-bit"
+          "4bit"
         ],
         "model_id": "mlx-community/Yi-1.5-9B-Chat-4bit",
         "model_revision": "e15f886479c44e7d90f0ac13ace69b2319b71c2f"
@@ -4197,7 +4289,7 @@
         "model_format": "mlx",
         "model_size_in_billions": 9,
         "quantizations": [
-          "8-bit"
+          "8bit"
         ],
         "model_id": "mlx-community/Yi-1.5-9B-Chat-8bit",
         "model_revision": "c1f742fcf3683edbe2d2c2fd1ad7ac2bb6c5ca36"
@@ -4206,7 +4298,7 @@
         "model_format": "mlx",
         "model_size_in_billions": 34,
         "quantizations": [
-          "4-bit"
+          "4bit"
         ],
         "model_id": "mlx-community/Yi-1.5-34B-Chat-4bit",
         "model_revision": "945e3b306ef37c46ab444fdc857d1f3ea7247374"
@@ -4215,7 +4307,7 @@
         "model_format": "mlx",
         "model_size_in_billions": 34,
         "quantizations": [
-          "8-bit"
+          "8bit"
         ],
         "model_id": "mlx-community/Yi-1.5-34B-Chat-8bit",
         "model_revision": "3c12761a2c6663f216caab6dff84b0dd29b472ac"
@@ -5266,7 +5358,7 @@
         "model_format": "mlx",
         "model_size_in_billions": 7,
         "quantizations": [
-          "4-bit"
+          "4bit"
         ],
         "model_id": "mlx-community/internlm2_5-7b-chat-4bit",
         "model_revision": "d12097a867721978142a6048399f470a3d18beee"
@@ -5275,7 +5367,7 @@
         "model_format": "mlx",
         "model_size_in_billions": 7,
         "quantizations": [
-          "8-bit"
+          "8bit"
         ],
         "model_id": "mlx-community/internlm2_5-7b-chat-8bit",
         "model_revision": "0ec94d61d30ab161b49c69f9bf92ec2b9986d234"
@@ -5803,7 +5895,7 @@
         "model_format": "mlx",
         "model_size_in_billions": 2,
         "quantizations": [
-          "4-bit"
+          "4bit"
         ],
         "model_id": "mlx-community/gemma-2-2b-it-4bit"
       },
@@ -5811,7 +5903,7 @@
         "model_format": "mlx",
         "model_size_in_billions": 2,
         "quantizations": [
-          "8-bit"
+          "8bit"
         ],
         "model_id": "mlx-community/gemma-2-2b-it-8bit"
       },
@@ -5827,7 +5919,7 @@
         "model_format": "mlx",
         "model_size_in_billions": 9,
         "quantizations": [
-          "4-bit"
+          "4bit"
         ],
         "model_id": "mlx-community/gemma-2-9b-it-4bit"
       },
@@ -5835,7 +5927,7 @@
         "model_format": "mlx",
         "model_size_in_billions": 9,
         "quantizations": [
-          "8-bit"
+          "8bit"
         ],
         "model_id": "mlx-community/gemma-2-9b-it-8bit"
       },
@@ -5851,7 +5943,7 @@
         "model_format": "mlx",
         "model_size_in_billions": 27,
         "quantizations": [
-          "4-bit"
+          "4bit"
         ],
         "model_id": "mlx-community/gemma-2-27b-it-4bit"
       },
@@ -5859,7 +5951,7 @@
         "model_format": "mlx",
         "model_size_in_billions": 27,
         "quantizations": [
-          "8-bit"
+          "8bit"
         ],
         "model_id": "mlx-community/gemma-2-27b-it-8bit"
       },
@@ -6925,7 +7017,7 @@
         "model_id":"Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4",
         "model_revision":"800d396518c82960ce6d231adecd07bbc474f0a9"
       },
-         {
+      {
         "model_format":"awq",
         "model_size_in_billions":2,
         "quantizations":[
@@ -6934,6 +7026,15 @@
         "model_id":"Qwen/Qwen2-VL-2B-Instruct-AWQ",
         "model_revision":"ea8c5854c0044e28626719292de0d9b1a671f6fc"
       },
+      {
+        "model_format":"mlx",
+        "model_size_in_billions":2,
+        "quantizations":[
+          "4bit",
+          "8bit"
+        ],
+        "model_id":"mlx-community/Qwen2-VL-2B-Instruct-{quantization}"
+      },
       {
         "model_format":"pytorch",
         "model_size_in_billions":7,
@@ -6970,6 +7071,15 @@
         "model_id":"Qwen/Qwen2-VL-7B-Instruct-AWQ",
         "model_revision":"f94216e8b513933bccd567bcd9b7350199f32538"
       },
+      {
+        "model_format":"mlx",
+        "model_size_in_billions":7,
+        "quantizations":[
+          "4bit",
+          "8bit"
+        ],
+        "model_id":"mlx-community/Qwen2-VL-7B-Instruct-{quantization}"
+      },
       {
         "model_format":"pytorch",
         "model_size_in_billions":72,
@@ -6994,6 +7104,15 @@
           "Int8"
         ],
         "model_id":"Qwen/Qwen2-VL-72B-Instruct-GPTQ-{quantization}"
+      },
+      {
+        "model_format":"mlx",
+        "model_size_in_billions":72,
+        "quantizations":[
+          "4bit",
+          "8bit"
+        ],
+        "model_id":"mlx-community/Qwen2-VL-72B-Instruct-{quantization}"
       }
     ],
     "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
@@ -8015,7 +8134,7 @@
         "model_format": "mlx",
         "model_size_in_billions": "0_5",
         "quantizations": [
-          "4-bit"
+          "4bit"
         ],
         "model_id": "mlx-community/Qwen2.5-0.5B-Instruct-4bit"
       },
@@ -8023,7 +8142,7 @@
         "model_format": "mlx",
         "model_size_in_billions": "0_5",
         "quantizations": [
-          "8-bit"
+          "8bit"
         ],
         "model_id": "mlx-community/Qwen2.5-0.5B-Instruct-8bit"
       },
@@ -8039,7 +8158,7 @@
         "model_format": "mlx",
         "model_size_in_billions": "1_5",
         "quantizations": [
-          "4-bit"
+          "4bit"
         ],
         "model_id": "mlx-community/Qwen2.5-1.5B-Instruct-4bit"
       },
@@ -8047,7 +8166,7 @@
         "model_format": "mlx",
         "model_size_in_billions": "1_5",
         "quantizations": [
-          "8-bit"
+          "8bit"
         ],
         "model_id": "mlx-community/Qwen2.5-1.5B-Instruct-8bit"
       },
@@ -8063,7 +8182,7 @@
         "model_format": "mlx",
         "model_size_in_billions": 3,
         "quantizations": [
-          "4-bit"
+          "4bit"
         ],
         "model_id": "mlx-community/Qwen2.5-3B-Instruct-4bit"
       },
@@ -8071,7 +8190,7 @@
         "model_format": "mlx",
         "model_size_in_billions": 3,
         "quantizations": [
-          "8-bit"
+          "8bit"
         ],
         "model_id": "mlx-community/Qwen2.5-3B-Instruct-8bit"
       },
@@ -8087,7 +8206,7 @@
         "model_format": "mlx",
         "model_size_in_billions": 7,
         "quantizations": [
-          "4-bit"
+          "4bit"
         ],
         "model_id": "mlx-community/Qwen2.5-7B-Instruct-4bit"
       },
@@ -8095,7 +8214,7 @@
         "model_format": "mlx",
         "model_size_in_billions": 7,
         "quantizations": [
-          "8-bit"
+          "8bit"
         ],
         "model_id": "mlx-community/Qwen2.5-7B-Instruct-8bit"
       },
@@ -8111,7 +8230,7 @@
         "model_format": "mlx",
         "model_size_in_billions": 14,
         "quantizations": [
-          "4-bit"
+          "4bit"
         ],
         "model_id": "mlx-community/Qwen2.5-14B-Instruct-4bit"
       },
@@ -8119,7 +8238,7 @@
         "model_format": "mlx",
         "model_size_in_billions": 14,
         "quantizations": [
-          "8-bit"
+          "8bit"
         ],
         "model_id": "mlx-community/Qwen2.5-14B-Instruct-8bit"
       },
@@ -8135,7 +8254,7 @@
         "model_format": "mlx",
         "model_size_in_billions": 32,
         "quantizations": [
-          "4-bit"
+          "4bit"
         ],
         "model_id": "mlx-community/Qwen2.5-32B-Instruct-4bit"
       },
@@ -8143,7 +8262,7 @@
         "model_format": "mlx",
         "model_size_in_billions": 32,
         "quantizations": [
-          "8-bit"
+          "8bit"
         ],
         "model_id": "mlx-community/Qwen2.5-32B-Instruct-8bit"
       },
@@ -8159,7 +8278,7 @@
         "model_format": "mlx",
         "model_size_in_billions": 72,
         "quantizations": [
-          "4-bit"
+          "4bit"
         ],
         "model_id": "mlx-community/Qwen2.5-72B-Instruct-4bit"
       },
@@ -8167,7 +8286,7 @@
         "model_format": "mlx",
         "model_size_in_billions": 72,
         "quantizations": [
-          "8-bit"
+          "8bit"
         ],
         "model_id": "mlx-community/Qwen2.5-72B-Instruct-8bit"
       },
@@ -8205,6 +8324,16 @@
     ],
     "model_description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models (formerly known as CodeQwen).",
     "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": "0_5",
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-0.5B"
+      },
       {
         "model_format": "pytorch",
         "model_size_in_billions": "1_5",
@@ -8213,8 +8342,17 @@
           "8-bit",
           "none"
         ],
-        "model_id": "Qwen/Qwen2.5-Coder-1.5B",
-        "model_revision": "d3586cfe793730945f8e4d7ef31032a3ee50247d"
+        "model_id": "Qwen/Qwen2.5-Coder-1.5B"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": "3",
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-3B"
       },
       {
         "model_format": "pytorch",
@@ -8224,8 +8362,27 @@
           "8-bit",
           "none"
         ],
-        "model_id": "Qwen/Qwen2.5-Coder-7B",
-        "model_revision": "30b6a7e874a78d46b80fa1db3194ea427dd41b08"
+        "model_id": "Qwen/Qwen2.5-Coder-7B"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 14,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-14B"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 32,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-32B"
       }
     ]
   },
@@ -8243,6 +8400,16 @@
     ],
     "model_description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models (formerly known as CodeQwen).",
     "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": "0_5",
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-0.5B-Instruct"
+      },
       {
         "model_format": "pytorch",
         "model_size_in_billions": "1_5",
@@ -8253,6 +8420,16 @@
         ],
         "model_id": "Qwen/Qwen2.5-Coder-1.5B-Instruct"
       },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": "3",
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-3B-Instruct"
+      },
       {
         "model_format": "pytorch",
         "model_size_in_billions": 7,
@@ -8263,57 +8440,171 @@
         ],
         "model_id": "Qwen/Qwen2.5-Coder-7B-Instruct"
       },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 14,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-14B-Instruct"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 32,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-32B-Instruct"
+      },
       {
         "model_format": "gptq",
-        "model_size_in_billions": "7",
+        "model_size_in_billions": "0_5",
         "quantizations": [
             "Int4",
             "Int8"
         ],
-        "model_id": "Qwen/Qwen2.5-Coder-7B-Instruct-GPTQ-{quantization}"
+        "model_id": "Qwen/Qwen2.5-Coder-0.5B-Instruct-GPTQ-{quantization}"
       },
       {
-        "model_format": "ggufv2",
+        "model_format": "gptq",
         "model_size_in_billions": "1_5",
         "quantizations": [
-          "q2_k",
-          "q3_k_m",
-          "q4_0",
-          "q4_k_m",
-          "q5_0",
-          "q5_k_m",
-          "q6_k",
-          "q8_0"
+            "Int4",
+            "Int8"
         ],
-        "model_id": "Qwen/Qwen2.5-Coder-1.5B-Instruct-GGUF",
-        "model_file_name_template": "qwen2.5-coder-1.5b-instruct-{quantization}.gguf"
+        "model_id": "Qwen/Qwen2.5-Coder-1.5B-Instruct-GPTQ-{quantization}"
       },
       {
-        "model_format": "ggufv2",
-        "model_size_in_billions": 7,
+        "model_format": "gptq",
+        "model_size_in_billions": "3",
         "quantizations": [
-          "q2_k",
-          "q3_k_m",
-          "q4_0",
-          "q4_k_m",
-          "q5_0",
-          "q5_k_m",
-          "q6_k",
-          "q8_0"
+            "Int4",
+            "Int8"
         ],
-        "model_id": "Qwen/Qwen2.5-Coder-7B-Instruct-GGUF",
-        "model_file_name_template": "qwen2.5-coder-7b-instruct-{quantization}.gguf",
-        "model_file_name_split_template": "qwen2.5-coder-7b-instruct-{quantization}-{part}.gguf",
-        "quantization_parts": {
-          "q4_0": [
-            "00001-of-00002",
-            "00002-of-00002"
-          ],
-          "q4_k_m": [
-            "00001-of-00002",
-            "00002-of-00002"
-          ],
-          "q5_0": [
+        "model_id": "Qwen/Qwen2.5-Coder-3B-Instruct-GPTQ-{quantization}"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": "7",
+        "quantizations": [
+            "Int4",
+            "Int8"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-7B-Instruct-GPTQ-{quantization}"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": "14",
+        "quantizations": [
+            "Int4",
+            "Int8"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-14B-Instruct-GPTQ-{quantization}"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": "32",
+        "quantizations": [
+            "Int4",
+            "Int8"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-32B-Instruct-GPTQ-{quantization}"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": "0_5",
+        "quantizations": [
+            "Int4"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-0.5B-Instruct-AWQ"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": "1_5",
+        "quantizations": [
+            "Int4"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-1.5B-Instruct-AWQ"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": "3",
+        "quantizations": [
+            "Int4"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-3B-Instruct-AWQ"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": "7",
+        "quantizations": [
+            "Int4"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-7B-Instruct-AWQ"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": "14",
+        "quantizations": [
+            "Int4"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-14B-Instruct-AWQ"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": "32",
+        "quantizations": [
+            "Int4"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-32B-Instruct-AWQ"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": "1_5",
+        "quantizations": [
+          "q2_k",
+          "q3_k_m",
+          "q4_0",
+          "q4_k_m",
+          "q5_0",
+          "q5_k_m",
+          "q6_k",
+          "q8_0"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-1.5B-Instruct-GGUF",
+        "model_file_name_template": "qwen2.5-coder-1.5b-instruct-{quantization}.gguf"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "q2_k",
+          "q3_k_m",
+          "q4_0",
+          "q4_k_m",
+          "q5_0",
+          "q5_k_m",
+          "q6_k",
+          "q8_0"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-7B-Instruct-GGUF",
+        "model_file_name_template": "qwen2.5-coder-7b-instruct-{quantization}.gguf",
+        "model_file_name_split_template": "qwen2.5-coder-7b-instruct-{quantization}-{part}.gguf",
+        "quantization_parts": {
+          "q4_0": [
+            "00001-of-00002",
+            "00002-of-00002"
+          ],
+          "q4_k_m": [
+            "00001-of-00002",
+            "00002-of-00002"
+          ],
+          "q5_0": [
             "00001-of-00002",
             "00002-of-00002"
           ],
@@ -8344,5 +8635,676 @@
       "<|im_start|>",
       "<|im_end|>"
     ]
+  },
+  {
+    "version": 1,
+    "context_length": 32768,
+    "model_name": "QwQ-32B-Preview",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "chat"
+    ],
+    "model_description": "QwQ-32B-Preview is an experimental research model developed by the Qwen Team, focused on advancing AI reasoning capabilities.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 32,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "Qwen/QwQ-32B-Preview"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": 32,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "KirillR/QwQ-32B-Preview-AWQ"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 32,
+        "quantizations": [
+          "Q3_K_L",
+          "Q4_K_M",
+          "Q6_K",
+          "Q8_0"
+        ],
+        "model_id": "lmstudio-community/QwQ-32B-Preview-GGUF",
+        "model_file_name_template": "QwQ-32B-Preview-{quantization}.gguf"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 32,
+        "quantizations": [
+          "4bit"
+        ],
+        "model_id": "mlx-community/Qwen_QwQ-32B-Preview_MLX-4bit"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 32,
+        "quantizations": [
+          "8bit"
+        ],
+        "model_id": "mlx-community/Qwen_QwQ-32B-Preview_MLX-8bit"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 32,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "mlx-community/QwQ-32B-Preview-bf16"
+      }
+    ],
+    "chat_template": "{%- if tools %}\n    {{- '<|im_start|>system\\n' }}\n    {%- if messages[0]['role'] == 'system' %}\n        {{- messages[0]['content'] }}\n    {%- else %}\n        {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n    {%- endif %}\n    {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n    {%- for tool in tools %}\n        {{- \"\\n\" }}\n        {{- tool | tojson }}\n    {%- endfor %}\n    {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n    {%- if messages[0]['role'] == 'system' %}\n        {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n    {%- else %}\n        {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n    {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n    {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n        {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n    {%- elif message.role == \"assistant\" %}\n        {{- '<|im_start|>' + message.role }}\n        {%- if message.content %}\n            {{- '\\n' + message.content }}\n        {%- endif %}\n        {%- for tool_call in message.tool_calls %}\n            {%- if tool_call.function is defined %}\n                {%- set tool_call = tool_call.function %}\n            {%- endif %}\n            {{- '\\n<tool_call>\\n{\"name\": \"' }}\n            {{- tool_call.name }}\n            {{- '\", \"arguments\": ' }}\n            {{- tool_call.arguments | tojson }}\n            {{- '}\\n</tool_call>' }}\n        {%- endfor %}\n        {{- '<|im_end|>\\n' }}\n    {%- elif message.role == \"tool\" %}\n        {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n            {{- '<|im_start|>user' }}\n        {%- endif %}\n        {{- '\\n<tool_response>\\n' }}\n        {{- message.content }}\n        {{- '\\n</tool_response>' }}\n        {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n            {{- '<|im_end|>\\n' }}\n        {%- endif %}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
+    "stop_token_ids": [
+      151643,
+      151644,
+      151645
+    ],
+    "stop": [
+      "<|endoftext|>",
+      "<|im_start|>",
+      "<|im_end|>"
+    ]
+  },
+  {
+    "version": 1,
+    "context_length": 131072,
+    "model_name": "deepseek-r1-distill-qwen",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "chat"
+    ],
+    "model_description": "deepseek-r1-distill-qwen is distilled from DeepSeek-R1 based on Qwen",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": "1_5",
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": "1_5",
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "casperhansen/deepseek-r1-distill-qwen-1.5b-awq"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": "1_5",
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "jakiAJK/DeepSeek-R1-Distill-Qwen-1.5B_GPTQ-int4"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": "1_5",
+        "quantizations": [
+          "Q2_K",
+          "Q2_K_L",
+          "Q3_K_M",
+          "Q4_K_M",
+          "Q5_K_M",
+          "Q6_K",
+          "Q8_0"
+        ],
+        "model_id": "unsloth/DeepSeek-R1-Distill-Qwen-1.5B-GGUF",
+        "model_file_name_template": "DeepSeek-R1-Distill-Qwen-1.5B-{quantization}.gguf"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": "1_5",
+        "quantizations": [
+          "3bit",
+          "4bit",
+          "6bit",
+          "8bit",
+          "bf16"
+        ],
+        "model_id": "mlx-community/DeepSeek-R1-Distill-Qwen-1.5B-{quantization}"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "jakiAJK/DeepSeek-R1-Distill-Qwen-7B_AWQ"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "jakiAJK/DeepSeek-R1-Distill-Qwen-7B_GPTQ-int4"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "Q2_K",
+          "Q2_K_L",
+          "Q3_K_M",
+          "Q4_K_M",
+          "Q5_K_M",
+          "Q6_K",
+          "Q8_0",
+          "F16"
+        ],
+        "model_id": "unsloth/DeepSeek-R1-Distill-Qwen-7B-GGUF",
+        "model_file_name_template": "DeepSeek-R1-Distill-Qwen-7B-{quantization}.gguf"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "3bit",
+          "4bit",
+          "6bit",
+          "8bit",
+          "bf16"
+        ],
+        "model_id": "mlx-community/DeepSeek-R1-Distill-Qwen-7B-{quantization}"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 14,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": 14,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "casperhansen/deepseek-r1-distill-qwen-14b-awq"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 14,
+        "quantizations": [
+          "Q2_K",
+          "Q2_K_L",
+          "Q3_K_M",
+          "Q4_K_M",
+          "Q5_K_M",
+          "Q6_K",
+          "Q8_0",
+          "F16"
+        ],
+        "model_id": "unsloth/DeepSeek-R1-Distill-Qwen-14B-GGUF",
+        "model_file_name_template": "DeepSeek-R1-Distill-Qwen-14B-{quantization}.gguf"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 14,
+        "quantizations": [
+          "3bit",
+          "4bit",
+          "6bit",
+          "8bit",
+          "bf16"
+        ],
+        "model_id": "mlx-community/DeepSeek-R1-Distill-Qwen-14B-{quantization}"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 32,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": 32,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "casperhansen/deepseek-r1-distill-qwen-32b-awq"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 32,
+        "quantizations": [
+          "Q2_K",
+          "Q2_K_L",
+          "Q3_K_M",
+          "Q4_K_M",
+          "Q5_K_M",
+          "Q6_K",
+          "Q8_0",
+          "F16"
+        ],
+        "model_id": "unsloth/DeepSeek-R1-Distill-Qwen-32B-GGUF",
+        "model_file_name_template": "DeepSeek-R1-Distill-Qwen-32B-{quantization}.gguf"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 32,
+        "quantizations": [
+          "3bit",
+          "4bit",
+          "6bit",
+          "8bit",
+          "bf16"
+        ],
+        "model_id": "mlx-community/DeepSeek-R1-Distill-Qwen-32B-{quantization}"
+      }
+    ],
+    "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<｜User｜>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<｜Assistant｜><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{{'<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<｜tool▁outputs▁end｜>' + message['content'] + '<｜end▁of▁sentence｜>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<｜Assistant｜>' + content + '<｜end▁of▁sentence｜>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<｜tool▁outputs▁begin｜><｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<｜tool▁outputs▁end｜>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<｜Assistant｜>'}}{% endif %}",
+    "stop_token_ids": [
+      151643
+    ],
+    "stop": [
+      "<｜end▁of▁sentence｜>"
+    ]
+  },
+  {
+    "version": 1,
+    "context_length": 8192,
+    "model_name": "glm-edge-chat",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "chat"
+    ],
+    "model_description": "The GLM-Edge series is our attempt to face the end-side real-life scenarios, which consists of two sizes of large-language dialogue models and multimodal comprehension models (GLM-Edge-1.5B-Chat, GLM-Edge-4B-Chat, GLM-Edge-V-2B, GLM-Edge-V-5B). Among them, the 1.5B / 2B model is mainly for platforms such as mobile phones and cars, and the 4B / 5B model is mainly for platforms such as PCs.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": "1_5",
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "THUDM/glm-edge-1.5b-chat"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": "4",
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "THUDM/glm-edge-4b-chat"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": "1_5",
+        "quantizations": [
+          "Q4_0",
+          "Q4_1",
+          "Q4_K",
+          "Q4_K_M",
+          "Q4_K_S",
+          "Q5_0",
+          "Q5_1",
+          "Q5_K",
+          "Q5_K_M",
+          "Q5_K_S",
+          "Q6_K",
+          "Q8_0"
+        ],
+        "model_file_name_template": "ggml-model-{quantization}.gguf",
+        "model_id": "THUDM/glm-edge-1.5b-chat-gguf"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": "1_5",
+        "quantizations": [
+          "F16"
+        ],
+        "model_file_name_template": "glm-edge-1.5B-chat-{quantization}.gguf",
+        "model_id": "THUDM/glm-edge-1.5b-chat-gguf"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": "4",
+        "quantizations": [
+          "Q4_0",
+          "Q4_1",
+          "Q4_K",
+          "Q4_K_M",
+          "Q4_K_S",
+          "Q5_0",
+          "Q5_1",
+          "Q5_K",
+          "Q5_K_M",
+          "Q5_K_S",
+          "Q6_K",
+          "Q8_0"
+        ],
+        "model_file_name_template": "ggml-model-{quantization}.gguf",
+        "model_id": "THUDM/glm-edge-4b-chat-gguf"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": "4",
+        "quantizations": [
+          "F16"
+        ],
+        "model_file_name_template": "glm-edge-4B-chat-{quantization}.gguf",
+        "model_id": "THUDM/glm-edge-4b-chat-gguf"
+      }
+    ],
+    "chat_template": "{% for item in messages %}{% if item['role'] == 'system' %}<|system|>\n{{ item['content'] }}{% elif item['role'] == 'user' %}<|user|>\n{{ item['content'] }}{% elif item['role'] == 'assistant' %}<|assistant|>\n{{ item['content'] }}{% endif %}{% endfor %}{% if add_generation_prompt %}<|assistant|>\n{% endif %}",
+    "stop_token_ids": [
+      59246,
+      59253,
+      59255
+    ],
+    "stop": [
+      "<|endoftext|>",
+      "<|user|>",
+      "<|observation|>"
+    ]
+  },
+  {
+    "version": 1,
+    "context_length": 8192,
+    "model_name": "glm-edge-v",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "chat",
+      "vision"
+    ],
+    "model_description": "The GLM-Edge series is our attempt to face the end-side real-life scenarios, which consists of two sizes of large-language dialogue models and multimodal comprehension models (GLM-Edge-1.5B-Chat, GLM-Edge-4B-Chat, GLM-Edge-V-2B, GLM-Edge-V-5B). Among them, the 1.5B / 2B model is mainly for platforms such as mobile phones and cars, and the 4B / 5B model is mainly for platforms such as PCs.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": "2",
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "THUDM/glm-edge-v-2b"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": "5",
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "THUDM/glm-edge-v-5b"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": "2",
+        "quantizations": [
+          "Q4_0",
+          "Q4_1",
+          "Q4_K",
+          "Q4_K_M",
+          "Q4_K_S",
+          "Q5_0",
+          "Q5_1",
+          "Q5_K",
+          "Q5_K_M",
+          "Q5_K_S",
+          "Q6_K",
+          "Q8_0"
+        ],
+        "model_file_name_template": "ggml-model-{quantization}.gguf",
+        "model_id": "THUDM/glm-edge-v-2b-gguf"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": "2",
+        "quantizations": [
+          "F16"
+        ],
+        "model_file_name_template": "glm-edge-v-2B-{quantization}.gguf",
+        "model_id": "THUDM/glm-edge-v-2b-gguf"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": "2",
+        "quantizations": [
+          "f16"
+        ],
+        "model_file_name_template": "mmproj-model-{quantization}.gguf",
+        "model_id": "THUDM/glm-edge-v-2b-gguf"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": "5",
+        "quantizations": [
+          "Q4_0",
+          "Q4_1",
+          "Q4_K",
+          "Q4_K_M",
+          "Q4_K_S",
+          "Q5_0",
+          "Q5_1",
+          "Q5_K",
+          "Q5_K_M",
+          "Q5_K_S",
+          "Q6_K",
+          "Q8_0"
+        ],
+        "model_file_name_template": "ggml-model-{quantization}.gguf",
+        "model_id": "THUDM/glm-edge-v-5b-gguf"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": "5",
+        "quantizations": [
+          "F16"
+        ],
+        "model_file_name_template": "glm-edge-v-5B-{quantization}.gguf",
+        "model_id": "THUDM/glm-edge-v-5b-gguf"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": "5",
+        "quantizations": [
+          "f16"
+        ],
+        "model_file_name_template": "mmproj-model-{quantization}.gguf",
+        "model_id": "THUDM/glm-edge-v-5b-gguf"
+      }
+    ],
+    "chat_template": "{% for item in messages %}{% if item['role'] != 'system' %}<|{{ item['role'] }}|>\n{% for content in item['content'] %}{% if content['type'] == 'image' %}{% for _ in range(578) %}<|begin_of_image|>{% endfor %}{% elif content['type'] == 'text' %}{{ content['text'] }}{% endif %}{% endfor %}\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|assistant|>\n{% endif %}",
+    "stop_token_ids": [
+      59246,
+      59253,
+      59255
+    ],
+    "stop": [
+      "<|endoftext|>",
+      "<|user|>",
+      "<|observation|>"
+    ]
+  },
+  {
+    "version": 1,
+    "context_length": 32768,
+    "model_name": "QvQ-72B-Preview",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "chat",
+      "vision"
+    ],
+    "model_description": "QVQ-72B-Preview is an experimental research model developed by the Qwen team, focusing on enhancing visual reasoning capabilities.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 72,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "Qwen/QVQ-72B-Preview"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 72,
+        "quantizations": [
+          "3bit",
+          "4bit",
+          "6bit",
+          "8bit",
+          "bf16"
+        ],
+        "model_id": "mlx-community/QVQ-72B-Preview-{quantization}"
+      }
+    ],
+    "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful and harmless assistant. You are Qwen developed by Alibaba. You should think step-by-step.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
+    "stop_token_ids": [
+      151645,
+      151643
+    ],
+    "stop": [
+      "<|im_end|>",
+      "<|endoftext|>"
+    ]
+  },
+  {
+    "version": 1,
+    "context_length": 32768,
+    "model_name": "marco-o1",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "chat",
+      "tools"
+    ],
+    "model_description": "Marco-o1: Towards Open Reasoning Models for Open-Ended Solutions",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "AIDC-AI/Marco-o1"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "Q2_K",
+          "Q3_K_L",
+          "Q3_K_M",
+          "Q3_K_S",
+          "Q4_0",
+          "Q4_1",
+          "Q4_K_M",
+          "Q4_K_S",
+          "Q5_0",
+          "Q5_1",
+          "Q5_K_M",
+          "Q5_K_S",
+          "Q6_K",
+          "Q8_0"
+        ],
+        "model_id": "QuantFactory/Marco-o1-GGUF",
+        "model_file_name_template": "Marco-o1.{quantization}.gguf"
+      }
+    ],
+    "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\n\n你是一个经过良好训练的AI助手，你的名字是Marco-o1.由阿里国际数字商业集团的AI Business创造.\n        \n## 重要！！！！！\n当你回答问题时，你的思考应该在<Thought>内完成，<Output>内输出你的结果。\n<Thought>应该尽可能是英文，但是有2个特例，一个是对原文中的引用，另一个是是数学应该使用markdown格式，<Output>内的输出需要遵循用户输入的语言。\n        <|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
+    "stop_token_ids": [
+      151643,
+      151644,
+      151645
+    ],
+    "stop": [
+      "<|endoftext|>",
+      "<|im_start|>",
+      "<|im_end|>"
+    ]
+  },
+  {
+    "version": 1,
+    "context_length": 4096,
+    "model_name": "cogagent",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "chat",
+      "vision"
+    ],
+    "model_description": "The CogAgent-9B-20241220 model is based on GLM-4V-9B, a bilingual open-source VLM base model. Through data collection and optimization, multi-stage training, and strategy improvements, CogAgent-9B-20241220 achieves significant advancements in GUI perception, inference prediction accuracy, action space completeness, and task generalizability. ",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": "9",
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "THUDM/cogagent-9b-20241220"
+      }
+    ],
+    "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
+    "stop_token_ids": [
+      151329,
+      151336,
+      151338
+    ],
+    "stop": [
+      "<|endoftext|>",
+      "<|user|>",
+      "<|observation|>"
+    ]
   }
 ]