PyPI - xinference - Versions diffs - 1.5.0.post1__py3-none-any.whl → 1.5.1__py3-none-any.whl - Mend - Supply Chain Defender

xinference 1.5.0.post1py3-none-any.whl → 1.5.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (89) hide show

xinference/model/llm/llm_family_modelscope.json CHANGED Viewed

@@ -37,8 +37,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 7,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "modelscope/Llama-2-7b-chat-ms",
@@ -49,8 +47,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 13,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "modelscope/Llama-2-13b-chat-ms",
@@ -61,8 +57,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 70,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "modelscope/Llama-2-70b-chat-ms",
@@ -72,7 +66,7 @@
     ],
     "chat_template": "{% if messages[0]['role'] == 'system' %}{% set system_message = '<<SYS>>\n' + messages[0]['content'] | trim + '\n<</SYS>>\n\n' %}{% set messages = messages[1:] %}{% else %}{% set system_message = '' %}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 %}{% set content = system_message + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ '<s>' + '[INST] ' + content | trim + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content | trim + ' ' + '</s>' }}{% endif %}{% endfor %}",
     "stop_token_ids": [
-        2
+      2
     ],
     "stop": []
   },
@@ -92,8 +86,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 8,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "LLM-Research/Meta-Llama-3-8B",
@@ -103,8 +95,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 70,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "LLM-Research/Meta-Llama-3-70B",
@@ -128,8 +118,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 8,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "LLM-Research/Meta-Llama-3-8B-Instruct",
@@ -139,8 +127,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 70,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "LLM-Research/Meta-Llama-3-70B-Instruct",
@@ -200,8 +186,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 8,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "LLM-Research/Meta-Llama-3.1-8B",
@@ -211,8 +195,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 70,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "LLM-Research/Meta-Llama-3.1-70B",
@@ -222,8 +204,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 405,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "LLM-Research/Meta-Llama-3.1-405B",
@@ -296,8 +276,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 70,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "LLM-Research/Meta-Llama-3.1-70B-Instruct",
@@ -325,8 +303,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 405,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "LLM-Research/Meta-Llama-3.1-405B-Instruct",
@@ -378,8 +354,8 @@
       "th"
     ],
     "model_ability": [
-	"chat",
-	"vision"
+      "chat",
+      "vision"
     ],
     "model_description": "Llama 3.2-Vision instruction-tuned models are optimized for visual recognition, image reasoning, captioning, and answering general questions about an image...",
     "model_specs": [
@@ -404,14 +380,14 @@
     ],
     "chat_template": "{% for message in messages %}{% if loop.index0 == 0 %}{{ bos_token }}{% endif %}{{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n' }}{% if message['content'] is string %}{{ message['content'] }}{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' %}{{ '<|image|>' }}{% elif content['type'] == 'text' %}{{ content['text'] }}{% endif %}{% endfor %}{% endif %}{{ '<|eot_id|>' }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
     "stop_token_ids": [
-	128001,
-	128008,
-	128009
+      128001,
+      128008,
+      128009
     ],
     "stop": [
       "<|end_of_text|>",
-	"<|eot_id|>",
-	"<|eom_id|>"
+      "<|eot_id|>",
+      "<|eom_id|>"
     ]
   },
   {
@@ -429,8 +405,8 @@
       "th"
     ],
     "model_ability": [
-	"generate",
-	"vision"
+      "generate",
+      "vision"
     ],
     "model_description": "The Llama 3.2-Vision instruction-tuned models are optimized for visual recognition, image reasoning, captioning, and answering general questions about an image...",
     "model_specs": [
@@ -440,8 +416,8 @@
         "quantizations": [
           "none"
         ],
-          "model_id": "LLM-Research/Llama-3.2-11B-Vision",
-	  "model_hub": "modelscope"
+        "model_id": "LLM-Research/Llama-3.2-11B-Vision",
+        "model_hub": "modelscope"
       },
       {
         "model_format": "pytorch",
@@ -449,8 +425,8 @@
         "quantizations": [
           "none"
         ],
-          "model_id": "LLM-Research/Llama-3.2-90B-Vision",
-	  "model_hub": "modelscope"
+        "model_id": "LLM-Research/Llama-3.2-90B-Vision",
+        "model_hub": "modelscope"
       }
     ]
   },
@@ -562,8 +538,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 7,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "baichuan-inc/Baichuan2-7B-Chat",
@@ -574,8 +548,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 13,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "baichuan-inc/Baichuan2-13B-Chat",
@@ -607,8 +579,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 7,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "baichuan-inc/Baichuan2-7B-Base",
@@ -619,8 +589,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 13,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "baichuan-inc/Baichuan2-13B-Base",
@@ -647,8 +615,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 9,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_hub": "modelscope",
@@ -713,8 +679,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 9,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_hub": "modelscope",
@@ -779,8 +743,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 9,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_hub": "modelscope",
@@ -817,8 +779,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 9,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "ZhipuAI/codegeex4-all-9b",
@@ -870,8 +830,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 7,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_hub": "modelscope",
@@ -882,8 +840,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 13,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_hub": "modelscope",
@@ -916,8 +872,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 7,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "xverse/XVERSE-7B",
@@ -928,8 +882,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 13,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "xverse/XVERSE-13B",
@@ -940,8 +892,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 65,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "xverse/XVERSE-65B",
@@ -950,108 +900,6 @@
       }
     ]
   },
-  {
-    "version": 1,
-    "context_length": 32768,
-    "model_name": "internlm2.5-chat",
-    "model_lang": [
-      "en",
-      "zh"
-    ],
-    "model_ability": [
-      "chat"
-    ],
-    "model_description": "InternLM2.5 series of the InternLM model.",
-    "model_specs": [
-      {
-        "model_format": "pytorch",
-        "model_size_in_billions": "1_8",
-        "quantizations": [
-          "none"
-        ],
-        "model_id": "Shanghai_AI_Laboratory/internlm2_5-1_8b-chat",
-        "model_hub": "modelscope"
-      },
-      {
-        "model_format": "pytorch",
-        "model_size_in_billions": 7,
-        "quantizations": [
-          "none"
-        ],
-        "model_id": "Shanghai_AI_Laboratory/internlm2_5-7b-chat",
-        "model_hub": "modelscope"
-      },
-      {
-        "model_format": "ggufv2",
-        "model_size_in_billions": 7,
-        "quantizations": [
-          "q2_k",
-          "q3_k_m",
-          "q4_0",
-          "q4_k_m",
-          "q5_0",
-          "q5_k_m",
-          "q6_k",
-          "q8_0",
-          "fp16"
-        ],
-        "model_id": "Shanghai_AI_Laboratory/internlm2_5-7b-chat-gguf",
-        "model_file_name_template": "internlm2_5-7b-chat-{quantization}.gguf",
-        "model_hub": "modelscope"
-      },
-      {
-        "model_format": "pytorch",
-        "model_size_in_billions": 20,
-        "quantizations": [
-          "none"
-        ],
-        "model_id": "Shanghai_AI_Laboratory/internlm2_5-20b-chat",
-        "model_hub": "modelscope"
-      }
-    ],
-    "chat_template": "{{ '<s>' }}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
-    "stop_token_ids": [
-      2,
-      92542
-    ],
-    "stop": [
-      "</s>",
-      "<|im_end|>"
-    ]
-  },
-  {
-    "version": 1,
-    "context_length": 262144,
-    "model_name": "internlm2.5-chat-1m",
-    "model_lang": [
-      "en",
-      "zh"
-    ],
-    "model_ability": [
-      "chat"
-    ],
-    "model_description": "InternLM2.5 series of the InternLM model supports 1M long-context",
-    "model_specs": [
-      {
-        "model_format": "pytorch",
-        "model_size_in_billions": 7,
-        "quantizations": [
-          "none"
-        ],
-        "model_id": "Shanghai_AI_Laboratory/internlm2_5-7b-chat-1m",
-        "model_hub": "modelscope"
-      }
-    ],
-    "chat_template": "{{ '<s>' }}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
-    "stop_token_ids": [
-      2,
-      92542
-    ],
-    "stop": [
-      "</s>",
-      "<|im_end|>"
-    ]
-  },
   {
     "version": 1,
     "context_length": 100000,
@@ -1067,8 +915,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 13,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_hub": "modelscope",
@@ -1079,8 +925,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 34,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_hub": "modelscope",
@@ -1112,8 +956,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 7,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_hub": "modelscope",
@@ -1124,8 +966,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 13,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_hub": "modelscope",
@@ -1136,8 +976,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 34,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_hub": "modelscope",
@@ -1221,8 +1059,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 7,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_hub": "modelscope",
@@ -1233,8 +1069,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 13,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_hub": "modelscope",
@@ -1245,8 +1079,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 34,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_hub": "modelscope",
@@ -1289,7 +1121,7 @@
     ],
     "chat_template": "{% if messages[0]['role'] == 'system' %}{% set system_message = '<<SYS>>\n' + messages[0]['content'] | trim + '\n<</SYS>>\n\n' %}{% set messages = messages[1:] %}{% else %}{% set system_message = '' %}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 %}{% set content = system_message + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ '<s>' + '[INST] ' + content | trim + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content | trim + ' ' + '</s>' }}{% endif %}{% endfor %}",
     "stop_token_ids": [
-        2
+      2
     ],
     "stop": [
       "</s>"
@@ -1311,8 +1143,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 7,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_hub": "modelscope",
@@ -1402,8 +1232,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 13,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_hub": "modelscope",
@@ -1414,8 +1242,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 7,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_hub": "modelscope",
@@ -1426,8 +1252,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 13,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_hub": "modelscope",
@@ -1456,8 +1280,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": "46_7",
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_hub": "modelscope",
@@ -1486,8 +1308,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": "46_7",
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_hub": "modelscope",
@@ -1520,8 +1340,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 6,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_hub": "modelscope",
@@ -1532,8 +1350,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 9,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_hub": "modelscope",
@@ -1544,8 +1360,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 34,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_hub": "modelscope",
@@ -1571,8 +1385,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 6,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_hub": "modelscope",
@@ -1583,8 +1395,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 34,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_hub": "modelscope",
@@ -1620,8 +1430,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 6,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_hub": "modelscope",
@@ -1632,8 +1440,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 34,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_hub": "modelscope",
@@ -1672,8 +1478,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 6,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_hub": "modelscope",
@@ -1684,8 +1488,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 9,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_hub": "modelscope",
@@ -1696,8 +1498,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 34,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_hub": "modelscope",
@@ -1723,8 +1523,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 6,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_hub": "modelscope",
@@ -1735,8 +1533,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 9,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_hub": "modelscope",
@@ -1747,8 +1543,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 34,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_hub": "modelscope",
@@ -1847,8 +1641,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 9,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_hub": "modelscope",
@@ -1859,8 +1651,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 34,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_hub": "modelscope",
@@ -1898,8 +1688,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 7,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_hub": "modelscope",
@@ -1931,8 +1719,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 7,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_hub": "modelscope",
@@ -1964,8 +1750,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 7,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_hub": "modelscope",
@@ -2071,7 +1855,7 @@
         "model_format": "pytorch",
         "model_size_in_billions": 123,
         "quantizations": [
-          "4-bit"
+          "none"
         ],
         "model_id": "LLM-Research/Mistral-Large-Instruct-2407-bnb-4bit",
         "model_hub": "modelscope"
@@ -2124,8 +1908,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": "1_8",
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_hub": "modelscope",
@@ -2136,8 +1918,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 7,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_hub": "modelscope",
@@ -2148,8 +1928,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 72,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_hub": "modelscope",
@@ -2160,8 +1938,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 14,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "qwen/Qwen-14B-Chat",
@@ -2243,8 +2019,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": "0_5",
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "qwen/Qwen1.5-0.5B-Chat",
@@ -2254,8 +2028,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": "1_8",
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "qwen/Qwen1.5-1.8B-Chat",
@@ -2265,8 +2037,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 4,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "qwen/Qwen1.5-4B-Chat",
@@ -2276,8 +2046,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 7,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "qwen/Qwen1.5-7B-Chat",
@@ -2287,8 +2055,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 14,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "qwen/Qwen1.5-14B-Chat",
@@ -2298,8 +2064,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 32,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "qwen/Qwen1.5-32B-Chat",
@@ -2309,8 +2073,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 72,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "qwen/Qwen1.5-72B-Chat",
@@ -2320,8 +2082,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 110,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "qwen/Qwen1.5-110B-Chat",
@@ -2629,8 +2389,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": "2_7",
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "qwen/Qwen1.5-MoE-A2.7B-Chat",
@@ -2675,8 +2433,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 7,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "qwen/CodeQwen1.5-7B",
@@ -2718,8 +2474,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 7,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "qwen/CodeQwen1.5-7B-Chat",
@@ -2765,8 +2519,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": "0_5",
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "qwen/Qwen2-0.5B-Instruct",
@@ -2776,8 +2528,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": "1_5",
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "qwen/Qwen2-1.5B-Instruct",
@@ -2787,8 +2537,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 7,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "qwen/Qwen2-7B-Instruct",
@@ -2798,8 +2546,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 72,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "qwen/Qwen2-72B-Instruct",
@@ -3054,8 +2800,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 14,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "qwen/Qwen2-57B-A14B-Instruct",
@@ -3170,8 +2914,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 7,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "deepseek-ai/deepseek-llm-7b-base",
@@ -3181,8 +2923,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 67,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "deepseek-ai/deepseek-llm-67b-base",
@@ -3207,8 +2947,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 7,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "deepseek-ai/deepseek-llm-7b-chat",
@@ -3218,8 +2956,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 67,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "deepseek-ai/deepseek-llm-67b-chat",
@@ -3251,8 +2987,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": "1_3",
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "deepseek-ai/deepseek-coder-1.3b-base",
@@ -3262,8 +2996,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": "6_7",
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "deepseek-ai/deepseek-coder-6.7b-base",
@@ -3273,8 +3005,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 33,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "deepseek-ai/deepseek-coder-33b-base",
@@ -3299,8 +3029,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": "1_3",
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "deepseek-ai/deepseek-coder-1.3b-instruct",
@@ -3310,8 +3038,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": "6_7",
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "deepseek-ai/deepseek-coder-6.7b-instruct",
@@ -3321,8 +3047,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 33,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "deepseek-ai/deepseek-coder-33b-instruct",
@@ -3354,7 +3078,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 13,
         "quantizations": [
-          "8-bit",
           "none"
         ],
         "model_hub": "modelscope",
@@ -3380,7 +3103,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 13,
         "quantizations": [
-          "8-bit",
           "none"
         ],
         "model_hub": "modelscope",
@@ -3389,50 +3111,6 @@
       }
     ]
   },
-  {
-    "version": 1,
-    "context_length": 204800,
-    "model_name": "internlm2-chat",
-    "model_lang": [
-      "en",
-      "zh"
-    ],
-    "model_ability": [
-      "chat"
-    ],
-    "model_description": "The second generation of the InternLM model, InternLM2.",
-    "model_specs": [
-      {
-        "model_format": "pytorch",
-        "model_size_in_billions": 7,
-        "quantizations": [
-          "none"
-        ],
-        "model_id": "Shanghai_AI_Laboratory/internlm2-chat-7b",
-        "model_hub": "modelscope",
-        "model_revision": "master"
-      },
-      {
-        "model_format": "pytorch",
-        "model_size_in_billions": 20,
-        "quantizations": [
-          "none"
-        ],
-        "model_id": "Shanghai_AI_Laboratory/internlm2-chat-20b",
-        "model_hub": "modelscope",
-        "model_revision": "master"
-      }
-    ],
-    "chat_template": "{{ '<s>' }}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
-    "stop_token_ids": [
-      2,
-      92542
-    ],
-    "stop": [
-      "</s>",
-      "<|im_end|>"
-    ]
-  },
   {
     "version": 1,
     "context_length": 4096,
@@ -3497,9 +3175,7 @@
         "model_format": "pytorch",
         "model_size_in_billions": 14,
         "quantizations": [
-          "none",
-          "4-bit",
-          "8-bit"
+          "none"
         ],
         "model_id": "OrionStarAI/Orion-14B-Chat",
         "model_hub": "modelscope"
@@ -3528,336 +3204,116 @@
   },
   {
     "version": 1,
-    "context_length": 4096,
-    "model_name": "orion-chat-rag",
+    "context_length": 32768,
+    "model_name": "gemma-3-1b-it",
     "model_lang": [
-      "en",
-      "zh"
+      "en"
     ],
     "model_ability": [
       "chat"
     ],
-    "model_description": "Orion-14B series models are open-source multilingual large language models trained from scratch by OrionStarAI.",
+    "model_description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.",
     "model_specs": [
       {
         "model_format": "pytorch",
-        "model_size_in_billions": 14,
+        "model_size_in_billions": 1,
         "quantizations": [
-          "none",
-          "4-bit",
-          "8-bit"
+          "none"
         ],
-        "model_hub": "modelscope",
-        "model_id": "OrionStarAI/Orion-14B-Chat-RAG"
+        "model_id": "LLM-Research/gemma-3-1b-it",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 1,
+        "quantizations": [
+          "Q2_K",
+          "Q3_K_L",
+          "Q3_K_M",
+          "Q3_K_S",
+          "Q4_K_L",
+          "Q4_K_M",
+          "Q4_K_S",
+          "Q5_K_L",
+          "Q5_K_M",
+          "Q5_K_S",
+          "Q6_K",
+          "Q6_K_L",
+          "Q8_0",
+          "bf16"
+        ],
+        "model_id": "bartowski/google_gemma-3-1b-it-GGUF",
+        "model_file_name_template": "google_gemma-3-1b-it-{quantization}.gguf",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 1,
+        "quantizations": [
+          "4bit",
+          "6bit",
+          "8bit",
+          "fp16"
+        ],
+        "model_id": "mlx-community/gemma-3-1b-it-{quantization}",
+        "model_hub": "modelscope"
       }
     ],
-    "chat_template": "{% for message in messages %}{% if loop.first %}{{ '<s>' }}{% endif %}{% if message['role'] == 'user' %}{{ 'Human: ' + message['content'] + '\n\nAssistant: ' + '</s>' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + '</s>' }}{% endif %}{% endfor %}",
+    "chat_template": "{{ bos_token }}\n{%- if messages[0]['role'] == 'system' -%}\n    {%- if messages[0]['content'] is string -%}\n        {%- set first_user_prefix = messages[0]['content'] + '\n\n' -%}\n    {%- else -%}\n        {%- set first_user_prefix = messages[0]['content'][0]['text'] + '\n\n' -%}\n    {%- endif -%}\n    {%- set loop_messages = messages[1:] -%}\n{%- else -%}\n    {%- set first_user_prefix = \"\" -%}\n    {%- set loop_messages = messages -%}\n{%- endif -%}\n{%- for message in loop_messages -%}\n    {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}\n        {{ raise_exception(\"Conversation roles must alternate user/assistant/user/assistant/...\") }}\n    {%- endif -%}\n    {%- if (message['role'] == 'assistant') -%}\n        {%- set role = \"model\" -%}\n    {%- else -%}\n        {%- set role = message['role'] -%}\n    {%- endif -%}\n    {{ '<start_of_turn>' + role + '\n' + (first_user_prefix if loop.first else \"\") }}\n    {%- if message['content'] is string -%}\n        {{ message['content'] | trim }}\n    {%- elif message['content'] is iterable -%}\n        {%- for item in message['content'] -%}\n            {%- if item['type'] == 'image' -%}\n                {{ '<start_of_image>' }}\n            {%- elif item['type'] == 'text' -%}\n                {{ item['text'] | trim }}\n            {%- endif -%}\n        {%- endfor -%}\n    {%- else -%}\n        {{ raise_exception(\"Invalid content type\") }}\n    {%- endif -%}\n    {{ '<end_of_turn>\n' }}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n    {{'<start_of_turn>model\n'}}\n{%- endif -%}\n",
     "stop_token_ids": [
       1,
-      2,
-      0
+      106,
+      107
     ],
     "stop": [
-      "<s>",
-      "</s>",
-      "<unk>"
+      "<eos>",
+      "<end_of_turn>",
+      "<start_of_turn>"
     ]
   },
   {
     "version": 1,
-    "context_length": 4096,
-    "model_name": "yi-vl-chat",
+    "context_length": 131072,
+    "model_name": "gemma-3-it",
     "model_lang": [
-      "en",
-      "zh"
+      "en"
     ],
     "model_ability": [
       "chat",
       "vision"
     ],
-    "model_description": "Yi Vision Language (Yi-VL) model is the open-source, multimodal version of the Yi Large Language Model (LLM) series, enabling content comprehension, recognition, and multi-round conversations about images.",
+    "model_description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.",
     "model_specs": [
       {
         "model_format": "pytorch",
-        "model_size_in_billions": 6,
+        "model_size_in_billions": 4,
         "quantizations": [
           "none"
         ],
-        "model_hub": "modelscope",
-        "model_id": "01ai/Yi-VL-6B"
+        "model_id": "LLM-Research/gemma-3-4b-it",
+        "model_hub": "modelscope"
       },
       {
         "model_format": "pytorch",
-        "model_size_in_billions": 34,
+        "model_size_in_billions": 12,
         "quantizations": [
           "none"
         ],
-        "model_hub": "modelscope",
-        "model_id": "01ai/Yi-VL-34B"
-      }
-    ],
-    "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
-    "stop_token_ids": [
-      2,
-      6,
-      7,
-      8
-    ],
-    "stop": [
-      "<|endoftext|>",
-      "<|im_start|>",
-      "<|im_end|>",
-      "<|im_sep|>"
-    ]
-  },
-  {
-    "version": 1,
-    "context_length": 8192,
-    "model_name": "gemma-it",
-    "model_lang": [
-      "en"
-    ],
-    "model_ability": [
-      "chat"
-    ],
-    "model_description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.",
-    "model_specs": [
+        "model_id": "LLM-Research/gemma-3-12b-it",
+        "model_hub": "modelscope"
+      },
       {
         "model_format": "pytorch",
-        "model_size_in_billions": 2,
+        "model_size_in_billions": 27,
         "quantizations": [
-          "none",
-          "4-bit",
-          "8-bit"
+          "none"
         ],
-        "model_hub": "modelscope",
-        "model_id": "AI-ModelScope/gemma-2b-it"
+        "model_id": "LLM-Research/gemma-3-27b-it",
+        "model_hub": "modelscope"
       },
       {
-        "model_format": "pytorch",
-        "model_size_in_billions": 7,
-        "quantizations": [
-          "none",
-          "4-bit",
-          "8-bit"
-        ],
-        "model_hub": "modelscope",
-        "model_id": "AI-ModelScope/gemma-7b-it"
-      }
-    ],
-    "chat_template": "{{ '<bos>' }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '<start_of_turn>' + role + '\n' + message['content'] | trim + '<end_of_turn>\n' }}{% endfor %}{% if add_generation_prompt %}{{'<start_of_turn>model\n'}}{% endif %}",
-    "stop_token_ids": [
-      1,
-      106,
-      107
-    ],
-    "stop": [
-      "<eos>",
-      "<end_of_turn>",
-      "<start_of_turn>"
-    ]
-  },
-  {
-    "version": 1,
-    "context_length": 8192,
-    "model_name": "gemma-2-it",
-    "model_lang": [
-      "en"
-    ],
-    "model_ability": [
-      "chat"
-    ],
-    "model_description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.",
-    "model_specs": [
-      {
-        "model_format": "pytorch",
-        "model_size_in_billions": 2,
-        "quantizations": [
-          "none",
-          "4-bit",
-          "8-bit"
-        ],
-        "model_id": "LLM-Research/gemma-2-2b-it",
-        "model_hub": "modelscope"
-      },
-      {
-        "model_format": "pytorch",
-        "model_size_in_billions": 9,
-        "quantizations": [
-          "none",
-          "4-bit",
-          "8-bit"
-        ],
-        "model_id": "AI-ModelScope/gemma-2-9b-it",
-        "model_hub": "modelscope"
-      },
-      {
-        "model_format": "pytorch",
-        "model_size_in_billions": 27,
-        "quantizations": [
-          "none",
-          "4-bit",
-          "8-bit"
-        ],
-        "model_id": "AI-ModelScope/gemma-2-27b-it",
-        "model_hub": "modelscope"
-      },
-      {
-        "model_format": "ggufv2",
-        "model_size_in_billions": 9,
-        "quantizations": [
-          "Q2_K",
-          "Q3_K_L",
-          "Q3_K_M",
-          "Q3_K_S",
-          "Q4_K_L",
-          "Q4_K_M",
-          "Q4_K_S",
-          "Q5_K_L",
-          "Q5_K_M",
-          "Q5_K_S",
-          "Q6_K",
-          "Q6_K_L",
-          "Q8_0",
-          "f32"
-        ],
-        "model_id": "LLM-Research/gemma-2-9b-it-GGUF",
-        "model_file_name_template": "gemma-2-9b-it-{quantization}.gguf",
-        "model_hub": "modelscope"
-      }
-    ],
-    "chat_template": "{{ '<bos>' }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '<start_of_turn>' + role + '\n' + message['content'] | trim + '<end_of_turn>\n' }}{% endfor %}{% if add_generation_prompt %}{{'<start_of_turn>model\n'}}{% endif %}",
-    "stop_token_ids": [
-      1,
-      106,
-      107
-    ],
-    "stop": [
-      "<eos>",
-      "<end_of_turn>",
-      "<start_of_turn>"
-    ]
-  },
-  {
-    "version": 1,
-    "context_length": 32768,
-    "model_name": "gemma-3-1b-it",
-    "model_lang": [
-      "en"
-    ],
-    "model_ability": [
-      "chat"
-    ],
-    "model_description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.",
-    "model_specs": [
-      {
-        "model_format": "pytorch",
-        "model_size_in_billions": 1,
-        "quantizations": [
-          "none",
-          "4-bit",
-          "8-bit"
-        ],
-        "model_id": "LLM-Research/gemma-3-1b-it",
-        "model_hub": "modelscope"
-      },
-      {
-        "model_format": "ggufv2",
-        "model_size_in_billions": 1,
-        "quantizations": [
-          "Q2_K",
-          "Q3_K_L",
-          "Q3_K_M",
-          "Q3_K_S",
-          "Q4_K_L",
-          "Q4_K_M",
-          "Q4_K_S",
-          "Q5_K_L",
-          "Q5_K_M",
-          "Q5_K_S",
-          "Q6_K",
-          "Q6_K_L",
-          "Q8_0",
-          "bf16"
-        ],
-        "model_id": "bartowski/google_gemma-3-1b-it-GGUF",
-        "model_file_name_template": "google_gemma-3-1b-it-{quantization}.gguf",
-        "model_hub": "modelscope"
-      },
-      {
-        "model_format": "mlx",
-        "model_size_in_billions": 1,
-        "quantizations": [
-          "4bit",
-          "6bit",
-          "8bit",
-          "fp16"
-        ],
-        "model_id": "mlx-community/gemma-3-1b-it-{quantization}",
-        "model_hub": "modelscope"
-      }
-    ],
-    "chat_template": "{{ bos_token }}\n{%- if messages[0]['role'] == 'system' -%}\n    {%- if messages[0]['content'] is string -%}\n        {%- set first_user_prefix = messages[0]['content'] + '\n\n' -%}\n    {%- else -%}\n        {%- set first_user_prefix = messages[0]['content'][0]['text'] + '\n\n' -%}\n    {%- endif -%}\n    {%- set loop_messages = messages[1:] -%}\n{%- else -%}\n    {%- set first_user_prefix = \"\" -%}\n    {%- set loop_messages = messages -%}\n{%- endif -%}\n{%- for message in loop_messages -%}\n    {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}\n        {{ raise_exception(\"Conversation roles must alternate user/assistant/user/assistant/...\") }}\n    {%- endif -%}\n    {%- if (message['role'] == 'assistant') -%}\n        {%- set role = \"model\" -%}\n    {%- else -%}\n        {%- set role = message['role'] -%}\n    {%- endif -%}\n    {{ '<start_of_turn>' + role + '\n' + (first_user_prefix if loop.first else \"\") }}\n    {%- if message['content'] is string -%}\n        {{ message['content'] | trim }}\n    {%- elif message['content'] is iterable -%}\n        {%- for item in message['content'] -%}\n            {%- if item['type'] == 'image' -%}\n                {{ '<start_of_image>' }}\n            {%- elif item['type'] == 'text' -%}\n                {{ item['text'] | trim }}\n            {%- endif -%}\n        {%- endfor -%}\n    {%- else -%}\n        {{ raise_exception(\"Invalid content type\") }}\n    {%- endif -%}\n    {{ '<end_of_turn>\n' }}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n    {{'<start_of_turn>model\n'}}\n{%- endif -%}\n",
-    "stop_token_ids": [
-      1,
-      106,
-      107
-    ],
-    "stop": [
-      "<eos>",
-      "<end_of_turn>",
-      "<start_of_turn>"
-    ]
-  },
-  {
-    "version": 1,
-    "context_length": 131072,
-    "model_name": "gemma-3-it",
-    "model_lang": [
-      "en"
-    ],
-    "model_ability": [
-      "chat",
-      "vision"
-    ],
-    "model_description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.",
-    "model_specs": [
-      {
-        "model_format": "pytorch",
-        "model_size_in_billions": 4,
-        "quantizations": [
-          "none",
-          "4-bit",
-          "8-bit"
-        ],
-        "model_id": "LLM-Research/gemma-3-4b-it",
-        "model_hub": "modelscope"
-      },
-      {
-        "model_format": "pytorch",
-        "model_size_in_billions": 12,
-        "quantizations": [
-          "none",
-          "4-bit",
-          "8-bit"
-        ],
-        "model_id": "LLM-Research/gemma-3-12b-it",
-        "model_hub": "modelscope"
-      },
-      {
-        "model_format": "pytorch",
-        "model_size_in_billions": 27,
-        "quantizations": [
-          "none",
-          "4-bit",
-          "8-bit"
-        ],
-        "model_id": "LLM-Research/gemma-3-27b-it",
-        "model_hub": "modelscope"
-      },
-      {
-        "model_format": "ggufv2",
-        "model_size_in_billions": 4,
+        "model_format": "ggufv2",
+        "model_size_in_billions": 4,
         "quantizations": [
           "Q2_K",
           "Q3_K_L",
@@ -3974,38 +3430,38 @@
     ]
   },
   {
-    "version":1,
-    "context_length":2048,
-    "model_name":"OmniLMM",
-    "model_lang":[
+    "version": 1,
+    "context_length": 2048,
+    "model_name": "OmniLMM",
+    "model_lang": [
       "en",
       "zh"
     ],
-    "model_ability":[
+    "model_ability": [
       "chat",
       "vision"
     ],
-    "model_description":"OmniLMM is a family of open-source large multimodal models (LMMs) adept at vision & language modeling.",
-    "model_specs":[
+    "model_description": "OmniLMM is a family of open-source large multimodal models (LMMs) adept at vision & language modeling.",
+    "model_specs": [
       {
-        "model_format":"pytorch",
-        "model_size_in_billions":3,
-        "quantizations":[
+        "model_format": "pytorch",
+        "model_size_in_billions": 3,
+        "quantizations": [
           "none"
         ],
-        "model_id":"OpenBMB/MiniCPM-V",
-        "model_hub":"modelscope",
-        "model_revision":"master"
+        "model_id": "OpenBMB/MiniCPM-V",
+        "model_hub": "modelscope",
+        "model_revision": "master"
       },
       {
-        "model_format":"pytorch",
-        "model_size_in_billions":12,
-        "quantizations":[
+        "model_format": "pytorch",
+        "model_size_in_billions": 12,
+        "quantizations": [
           "none"
         ],
-        "model_id":"OpenBMB/OmniLMM-12B",
-        "model_hub":"modelscope",
-        "model_revision":"master"
+        "model_id": "OpenBMB/OmniLMM-12B",
+        "model_hub": "modelscope",
+        "model_revision": "master"
       }
     ],
     "chat_template": "",
@@ -4182,38 +3638,38 @@
     ]
   },
   {
-    "version":1,
-    "context_length":8192,
-    "model_name":"MiniCPM-Llama3-V-2_5",
-    "model_lang":[
+    "version": 1,
+    "context_length": 8192,
+    "model_name": "MiniCPM-Llama3-V-2_5",
+    "model_lang": [
       "en",
       "zh"
     ],
-    "model_ability":[
+    "model_ability": [
       "chat",
       "vision"
     ],
-    "model_description":"MiniCPM-Llama3-V 2.5 is the latest model in the MiniCPM-V series. The model is built on SigLip-400M and Llama3-8B-Instruct with a total of 8B parameters.",
-    "model_specs":[
+    "model_description": "MiniCPM-Llama3-V 2.5 is the latest model in the MiniCPM-V series. The model is built on SigLip-400M and Llama3-8B-Instruct with a total of 8B parameters.",
+    "model_specs": [
       {
-        "model_format":"pytorch",
-        "model_size_in_billions":8,
-        "quantizations":[
+        "model_format": "pytorch",
+        "model_size_in_billions": 8,
+        "quantizations": [
           "none"
         ],
         "model_hub": "modelscope",
-        "model_id":"OpenBMB/MiniCPM-Llama3-V-2_5",
-        "model_revision":"master"
+        "model_id": "OpenBMB/MiniCPM-Llama3-V-2_5",
+        "model_revision": "master"
       },
       {
-        "model_format":"pytorch",
-        "model_size_in_billions":8,
-        "quantizations":[
-          "int4"
+        "model_format": "pytorch",
+        "model_size_in_billions": 8,
+        "quantizations": [
+          "none"
         ],
         "model_hub": "modelscope",
-        "model_id":"OpenBMB/MiniCPM-Llama3-V-2_5-{quantization}",
-        "model_revision":"master"
+        "model_id": "OpenBMB/MiniCPM-Llama3-V-2_5-{quantization}",
+        "model_revision": "master"
       }
     ],
     "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = '<|begin_of_text|>' + content %}{% endif %}{{ content }}{% endfor %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}",
@@ -4225,244 +3681,48 @@
     ]
   },
   {
-    "version":1,
-    "context_length":32768,
-    "model_name":"MiniCPM-V-2.6",
-    "model_lang":[
+    "version": 1,
+    "context_length": 32768,
+    "model_name": "MiniCPM-V-2.6",
+    "model_lang": [
       "en",
       "zh"
     ],
-    "model_ability":[
+    "model_ability": [
       "chat",
       "vision"
     ],
-    "model_description":"MiniCPM-V 2.6 is the latest model in the MiniCPM-V series. The model is built on SigLip-400M and Qwen2-7B with a total of 8B parameters.",
-    "model_specs":[
+    "model_description": "MiniCPM-V 2.6 is the latest model in the MiniCPM-V series. The model is built on SigLip-400M and Qwen2-7B with a total of 8B parameters.",
+    "model_specs": [
       {
-        "model_format":"pytorch",
-        "model_size_in_billions":8,
-        "quantizations":[
+        "model_format": "pytorch",
+        "model_size_in_billions": 8,
+        "quantizations": [
           "none"
         ],
         "model_hub": "modelscope",
-        "model_id":"OpenBMB/MiniCPM-V-2_6",
-        "model_revision":"master"
-      },
-      {
-        "model_format":"pytorch",
-        "model_size_in_billions":8,
-        "quantizations":[
-          "4-bit"
-        ],
-        "model_hub": "modelscope",
-        "model_id":"OpenBMB/MiniCPM-V-2_6-int4",
-        "model_revision":"master"
-      }
-    ],
-    "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
-    "stop_token_ids": [
-      151645,
-      151643
-    ],
-    "stop": [
-      "<|im_end|>",
-      "<|endoftext|>"
-    ]
-  },
-  {
-    "version": 1,
-    "context_length": 2048,
-    "model_name": "aquila2",
-    "model_lang": [
-      "zh"
-    ],
-    "model_ability": [
-      "generate"
-    ],
-    "model_description": "Aquila2 series models are the base language models",
-    "model_specs": [
-      {
-        "model_format": "pytorch",
-        "model_size_in_billions": 34,
-        "quantizations": [
-          "none"
-        ],
-        "model_hub": "modelscope",
-        "model_id": "BAAI/Aquila2-34B",
-        "model_revision": "master"
-      },
-      {
-        "model_format": "pytorch",
-        "model_size_in_billions": 70,
-        "quantizations": [
-          "none"
-        ],
-        "model_hub": "modelscope",
-        "model_id": "BAAI/Aquila2-70B-Expr",
-        "model_revision": "master"
-      }
-    ]
-  },
-  {
-    "version": 1,
-    "context_length": 2048,
-    "model_name": "aquila2-chat",
-    "model_lang": [
-      "zh"
-    ],
-    "model_ability": [
-      "chat"
-    ],
-    "model_description": "Aquila2-chat series models are the chat models",
-    "model_specs": [
-      {
-        "model_format": "pytorch",
-        "model_size_in_billions": 34,
-        "quantizations": [
-          "none"
-        ],
-        "model_hub": "modelscope",
-        "model_id": "BAAI/AquilaChat2-34B",
-        "model_revision": "master"
-      },
-      {
-        "model_format": "gptq",
-        "model_size_in_billions": 34,
-        "quantizations": [
-          "Int4"
-        ],
-        "model_hub": "modelscope",
-        "model_id": "BAAI/AquilaChat2-34B-Int4-GPTQ",
-        "model_revision": "master"
-      },
-      {
-        "model_format": "pytorch",
-        "model_size_in_billions": 70,
-        "quantizations": [
-          "none"
-        ],
-        "model_hub": "modelscope",
-        "model_id": "BAAI/AquilaChat2-70B-Expr",
-        "model_revision": "master"
-      }
-    ],
-    "chat_template": "{% for item in messages %}{% if loop.first and item['role'] == 'system' %}{{ item['content'] + '\n' }}{% endif %}{% if item['role'] == 'user' %}{{ 'USER: ' + item['content'] + '\n' }}{% elif item['role'] == 'assistant' %}{{ 'ASSISTANT: ' + item['content'] + '\n' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ 'ASSISTANT: ' }}{% endif %}",
-    "stop_token_ids": [
-      100006,
-      100007
-    ],
-    "stop": [
-      "[CLS]",
-      "</s>"
-    ]
-  },
-  {
-    "version": 1,
-    "context_length": 16384,
-    "model_name": "aquila2-chat-16k",
-    "model_lang": [
-      "zh"
-    ],
-    "model_ability": [
-      "generate"
-    ],
-    "model_description": "AquilaChat2-16k series models are the long-text chat models",
-    "model_specs": [
-      {
-        "model_format": "pytorch",
-        "model_size_in_billions": 34,
-        "quantizations": [
-          "none"
-        ],
-        "model_hub": "modelscope",
-        "model_id": "BAAI/AquilaChat2-34B-16K",
-        "model_revision": "master"
-      }
-    ],
-    "chat_template": "{% for item in messages %}{% if loop.first and item['role'] == 'system' %}{{ item['content'] + '\n' }}{% endif %}{% if item['role'] == 'user' %}{{ 'USER: ' + item['content'] + '\n' }}{% elif item['role'] == 'assistant' %}{{ 'ASSISTANT: ' + item['content'] + '\n' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ 'ASSISTANT: ' }}{% endif %}",
-    "stop_token_ids": [
-      100006,
-      100007
-    ],
-    "stop": [
-      "[CLS]",
-      "</s>"
-    ]
-  },
-  {
-    "version": 1,
-    "context_length": 131072,
-    "model_name": "c4ai-command-r-v01",
-    "model_lang": [
-      "en",
-      "fr",
-      "de",
-      "es",
-      "it",
-      "pt",
-      "ja",
-      "ko",
-      "zh",
-      "ar"
-    ],
-    "model_ability": [
-      "chat"
-    ],
-    "model_description": "C4AI Command-R is a research release of a 35 billion parameter highly performant generative model.",
-    "model_specs": [
-      {
-        "model_format": "pytorch",
-        "model_size_in_billions": 35,
-        "quantizations": [
-          "none"
-        ],
-        "model_hub": "modelscope",
-        "model_id": "AI-ModelScope/c4ai-command-r-v01",
-        "model_revision": "master"
-      },
-      {
-        "model_format": "pytorch",
-        "model_size_in_billions": 35,
-        "quantizations": [
-          "4-bit"
-        ],
-        "model_hub": "modelscope",
-        "model_id": "mirror013/c4ai-command-r-v01-4bit",
-        "model_revision": "master"
-      },
-      {
-        "model_format": "ggufv2",
-        "model_size_in_billions": 35,
-        "quantizations": [
-          "Q2_K",
-          "Q3_K_M",
-          "Q4_K_M",
-          "Q5_K_M"
-        ],
-        "model_id": "mirror013/C4AI-Command-R-v01-GGUF",
-        "model_file_name_template": "c4ai-command-r-v01-{quantization}.gguf",
-        "model_hub": "modelscope",
-        "model_revision": "master"
+        "model_id": "OpenBMB/MiniCPM-V-2_6",
+        "model_revision": "master"
       },
       {
         "model_format": "pytorch",
-        "model_size_in_billions": 104,
+        "model_size_in_billions": 8,
         "quantizations": [
           "none"
         ],
         "model_hub": "modelscope",
-        "model_id": "AI-ModelScope/c4ai-command-r-plus",
+        "model_id": "OpenBMB/MiniCPM-V-2_6-int4",
         "model_revision": "master"
       }
     ],
-    "chat_template": "{{ '<BOS_TOKEN>' }}{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% elif false == true %}{% set loop_messages = messages %}{% set system_message = 'You are Command-R, a brilliant, sophisticated, AI-assistant trained to assist human users by providing thorough responses. You are trained by Cohere.' %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% if system_message != false %}{{ '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>' + system_message + '<|END_OF_TURN_TOKEN|>' }}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|START_OF_TURN_TOKEN|><|USER_TOKEN|>' + content.strip() + '<|END_OF_TURN_TOKEN|>' }}{% elif message['role'] == 'assistant' %}{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>'  + content.strip() + '<|END_OF_TURN_TOKEN|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' }}{% endif %}",
+    "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
     "stop_token_ids": [
-      6,
-      255001
+      151645,
+      151643
     ],
     "stop": [
-      "<EOS_TOKEN>",
-      "<|END_OF_TURN_TOKEN|>"
+      "<|im_end|>",
+      "<|endoftext|>"
     ]
   },
   {
@@ -4481,8 +3741,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 4,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_hub": "modelscope",
@@ -4491,7 +3749,7 @@
       }
     ],
     "chat_template": "{% for message in messages %}{% if message['role'] == 'system' %}{{'<|system|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'user' %}{{'<|user|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'assistant' %}{{'<|assistant|>\n' + message['content'] + '<|end|>\n'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% else %}{{ '<|endoftext|>' }}{% endif %}",
-    "stop_token_ids":[
+    "stop_token_ids": [
       32000,
       32001,
       32007
@@ -4518,8 +3776,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 4,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_hub": "modelscope",
@@ -4528,7 +3784,7 @@
       }
     ],
     "chat_template": "{% for message in messages %}{% if message['role'] == 'system' %}{{'<|system|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'user' %}{{'<|user|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'assistant' %}{{'<|assistant|>\n' + message['content'] + '<|end|>\n'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% else %}{{ '<|endoftext|>' }}{% endif %}",
-    "stop_token_ids":[
+    "stop_token_ids": [
       32000,
       32001,
       32007
@@ -4541,267 +3797,64 @@
   },
   {
     "version": 1,
-    "context_length": 32768,
-    "model_name": "internvl-chat",
-    "model_lang": [
-        "en",
-        "zh"
-    ],
-    "model_ability": [
-        "chat",
-        "vision"
-    ],
-    "model_description": "InternVL 1.5 is an open-source multimodal large language model (MLLM) to bridge the capability gap between open-source and proprietary commercial models in multimodal understanding. ",
-    "model_specs": [
-        {
-            "model_format": "pytorch",
-            "model_size_in_billions": 26,
-            "quantizations": [
-              "4-bit",
-              "8-bit",
-              "none"
-            ],
-            "model_hub": "modelscope",
-            "model_id": "OpenGVLab/InternVL-Chat-V1-5",
-            "model_revision": "master"
-        }
-    ],
-    "chat_template": "{{ '<s>' }}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
-    "stop_token_ids": [
-      2,
-      92542,
-      92543
-    ],
-    "stop": [
-      "</s>",
-      "<|im_end|>",
-      "<|im_start|>"
-    ]
-  },
-  {
-    "version": 1,
-    "context_length": 32768,
-    "model_name": "internvl2",
-    "model_lang": [
-        "en",
-        "zh"
-    ],
-    "model_ability": [
-        "chat",
-        "vision"
-    ],
-    "model_description": "InternVL 2 is an open-source multimodal large language model (MLLM) to bridge the capability gap between open-source and proprietary commercial models in multimodal understanding. ",
-    "model_specs": [
-        {
-            "model_format": "pytorch",
-            "model_size_in_billions": 1,
-            "quantizations": [
-              "4-bit",
-              "8-bit",
-              "none"
-            ],
-            "model_hub": "modelscope",
-            "model_id": "OpenGVLab/InternVL2-1B",
-            "model_revision": "master"
-        },
-      {
-            "model_format": "pytorch",
-            "model_size_in_billions": 2,
-            "quantizations": [
-              "4-bit",
-              "8-bit",
-              "none"
-            ],
-            "model_hub": "modelscope",
-            "model_id": "OpenGVLab/InternVL2-2B",
-            "model_revision": "master"
-        },
-        {
-            "model_format": "awq",
-            "model_size_in_billions": 2,
-            "quantizations": [
-              "Int4"
-            ],
-            "model_hub": "modelscope",
-            "model_id": "OpenGVLab/InternVL2-2B-AWQ",
-            "model_revision": "master"
-        },
-        {
-            "model_format": "pytorch",
-            "model_size_in_billions": 4,
-            "quantizations": [
-              "4-bit",
-              "8-bit",
-              "none"
-            ],
-            "model_hub": "modelscope",
-            "model_id": "OpenGVLab/InternVL2-4B",
-            "model_revision": "master"
-        },
-        {
-            "model_format": "pytorch",
-            "model_size_in_billions": 8,
-            "quantizations": [
-              "4-bit",
-              "8-bit",
-              "none"
-            ],
-            "model_hub": "modelscope",
-            "model_id": "OpenGVLab/InternVL2-8B",
-            "model_revision": "master"
-        },
-        {
-            "model_format": "awq",
-            "model_size_in_billions": 8,
-            "quantizations": [
-              "Int4"
-            ],
-            "model_hub": "modelscope",
-            "model_id": "OpenGVLab/InternVL2-8B-AWQ",
-            "model_revision": "master"
-        },
-        {
-            "model_format": "pytorch",
-            "model_size_in_billions": 26,
-            "quantizations": [
-              "4-bit",
-              "8-bit",
-              "none"
-            ],
-            "model_hub": "modelscope",
-            "model_id": "OpenGVLab/InternVL2-26B",
-            "model_revision": "master"
-        },
-        {
-            "model_format": "awq",
-            "model_size_in_billions": 26,
-            "quantizations": [
-              "Int4"
-            ],
-            "model_hub": "modelscope",
-            "model_id": "OpenGVLab/InternVL2-26B-AWQ",
-            "model_revision": "master"
-        },
-        {
-            "model_format": "pytorch",
-            "model_size_in_billions": 40,
-            "quantizations": [
-              "4-bit",
-              "8-bit",
-              "none"
-            ],
-            "model_hub": "modelscope",
-            "model_id": "OpenGVLab/InternVL2-40B",
-            "model_revision": "master"
-        },
-        {
-            "model_format": "awq",
-            "model_size_in_billions": 40,
-            "quantizations": [
-              "Int4"
-            ],
-            "model_hub": "modelscope",
-            "model_id": "OpenGVLab/InternVL2-40B-AWQ",
-            "model_revision": "master"
-        },
-        {
-            "model_format": "pytorch",
-            "model_size_in_billions": 76,
-            "quantizations": [
-              "4-bit",
-              "8-bit",
-              "none"
-            ],
-            "model_hub": "modelscope",
-            "model_id": "OpenGVLab/InternVL2-Llama3-76B",
-            "model_revision": "master"
-        },
-        {
-            "model_format": "awq",
-            "model_size_in_billions": 76,
-            "quantizations": [
-              "Int4"
-            ],
-            "model_hub": "modelscope",
-            "model_id": "OpenGVLab/InternVL2-Llama3-76B-AWQ",
-            "model_revision": "master"
-        }
-    ],
-    "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
-    "stop_token_ids": [],
-    "stop": []
-  },
-  {
-    "version": 1,
-    "context_length": 16384,
-    "model_name": "InternVL2.5",
+    "context_length": 8192,
+    "model_name": "InternVL3",
     "model_lang": [
-        "en",
-        "zh"
+      "en",
+      "zh"
     ],
     "model_ability": [
-        "chat",
-        "vision"
+      "chat",
+      "vision"
     ],
-    "model_description": "InternVL 2.5 is an open-source multimodal large language model (MLLM) to bridge the capability gap between open-source and proprietary commercial models in multimodal understanding. ",
+    "model_description": "InternVL3, an advanced multimodal large language model (MLLM) series that demonstrates superior overall performance.",
     "model_specs": [
       {
         "model_format": "pytorch",
         "model_size_in_billions": 1,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
+        "model_id": "OpenGVLab/InternVL3-1B",
         "model_hub": "modelscope",
-        "model_id": "OpenGVLab/InternVL2_5-1B",
         "model_revision": "master"
       },
       {
-        "model_format": "pytorch",
-        "model_size_in_billions": 2,
+        "model_format": "awq",
+        "model_size_in_billions": 1,
         "quantizations": [
-          "4-bit",
-          "8-bit",
-          "none"
+          "Int4"
         ],
-        "model_hub": "modelscope",
-        "model_id": "OpenGVLab/InternVL2_5-2B",
-        "model_revision": "master"
+        "model_id": "OpenGVLab/InternVL3-1B-AWQ",
+        "model_hub": "modelscope"
       },
       {
         "model_format": "pytorch",
-        "model_size_in_billions": 4,
+        "model_size_in_billions": 2,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
+        "model_id": "OpenGVLab/InternVL3-2B",
         "model_hub": "modelscope",
-        "model_id": "OpenGVLab/InternVL2_5-4B",
         "model_revision": "master"
       },
       {
         "model_format": "awq",
-        "model_size_in_billions": 4,
+        "model_size_in_billions": 2,
         "quantizations": [
           "Int4"
         ],
-        "model_hub": "modelscope",
-        "model_id": "OpenGVLab/InternVL2_5-4B-AWQ",
-        "model_revision": "master"
+        "model_id": "OpenGVLab/InternVL3-2B-AWQ",
+        "model_hub": "modelscope"
       },
       {
         "model_format": "pytorch",
         "model_size_in_billions": 8,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
+        "model_id": "OpenGVLab/InternVL3-8B",
         "model_hub": "modelscope",
-        "model_id": "OpenGVLab/InternVL2_5-8B",
         "model_revision": "master"
       },
       {
@@ -4810,353 +3863,51 @@
         "quantizations": [
           "Int4"
         ],
-        "model_hub": "modelscope",
-        "model_id": "OpenGVLab/InternVL2_5-8B-AWQ",
-        "model_revision": "master"
+        "model_id": "OpenGVLab/InternVL3-8B-AWQ",
+        "model_hub": "modelscope"
       },
       {
         "model_format": "pytorch",
-        "model_size_in_billions": 26,
+        "model_size_in_billions": 9,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
+        "model_id": "OpenGVLab/InternVL3-9B",
         "model_hub": "modelscope",
-        "model_id": "OpenGVLab/InternVL2_5-26B",
         "model_revision": "master"
       },
       {
         "model_format": "awq",
-        "model_size_in_billions": 26,
+        "model_size_in_billions": 9,
         "quantizations": [
           "Int4"
         ],
-        "model_hub": "modelscope",
-        "model_id": "OpenGVLab/InternVL2_5-26B-AWQ",
-        "model_revision": "master"
+        "model_id": "OpenGVLab/InternVL3-9B-AWQ",
+        "model_hub": "modelscope"
       },
       {
         "model_format": "pytorch",
-        "model_size_in_billions": 38,
+        "model_size_in_billions": 14,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
+        "model_id": "OpenGVLab/InternVL3-14B",
         "model_hub": "modelscope",
-        "model_id": "OpenGVLab/InternVL2_5-38B",
         "model_revision": "master"
       },
       {
         "model_format": "awq",
-        "model_size_in_billions": 38,
+        "model_size_in_billions": 14,
         "quantizations": [
           "Int4"
         ],
-        "model_hub": "modelscope",
-        "model_id": "OpenGVLab/InternVL2_5-38B-AWQ",
-        "model_revision": "master"
-      },
-      {
-        "model_format": "pytorch",
-        "model_size_in_billions": 78,
-        "quantizations": [
-          "4-bit",
-          "8-bit",
-          "none"
-        ],
-        "model_hub": "modelscope",
-        "model_id": "OpenGVLab/InternVL2_5-78B",
-        "model_revision": "master"
-      },
-      {
-        "model_format": "awq",
-        "model_size_in_billions": 78,
-        "quantizations": [
-          "Int4"
-        ],
-        "model_hub": "modelscope",
-        "model_id": "OpenGVLab/InternVL2_5-78B-AWQ",
-        "model_revision": "master"
-      }
-    ],
-    "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
-    "stop_token_ids": [],
-    "stop": []
-  },
-  {
-    "version": 1,
-    "context_length": 16384,
-    "model_name": "InternVL2.5-MPO",
-    "model_lang": [
-        "en",
-        "zh"
-    ],
-    "model_ability": [
-        "chat",
-        "vision"
-    ],
-    "model_description": "InternVL 2.5 is an open-source multimodal large language model (MLLM) to bridge the capability gap between open-source and proprietary commercial models in multimodal understanding. ",
-    "model_specs": [
-      {
-        "model_format": "pytorch",
-        "model_size_in_billions": 1,
-        "quantizations": [
-          "4-bit",
-          "8-bit",
-          "none"
-        ],
-        "model_hub": "modelscope",
-        "model_id": "OpenGVLab/InternVL2_5-1B-MPO",
-        "model_revision": "master"
-      },
-      {
-        "model_format": "pytorch",
-        "model_size_in_billions": 2,
-        "quantizations": [
-          "4-bit",
-          "8-bit",
-          "none"
-        ],
-        "model_hub": "modelscope",
-        "model_id": "OpenGVLab/InternVL2_5-2B-MPO",
-        "model_revision": "master"
-      },
-      {
-        "model_format": "pytorch",
-        "model_size_in_billions": 4,
-        "quantizations": [
-          "4-bit",
-          "8-bit",
-          "none"
-        ],
-        "model_hub": "modelscope",
-        "model_id": "OpenGVLab/InternVL2_5-4B-MPO",
-        "model_revision": "master"
-      },
-      {
-        "model_format": "awq",
-        "model_size_in_billions": 4,
-        "quantizations": [
-          "Int4"
-        ],
-        "model_hub": "modelscope",
-        "model_id": "OpenGVLab/InternVL2_5-4B-MPO-AWQ",
-        "model_revision": "master"
-      },
-      {
-        "model_format": "pytorch",
-        "model_size_in_billions": 8,
-        "quantizations": [
-          "4-bit",
-          "8-bit",
-          "none"
-        ],
-        "model_hub": "modelscope",
-        "model_id": "OpenGVLab/InternVL2_5-8B-MPO",
-        "model_revision": "master"
-      },
-      {
-        "model_format": "awq",
-        "model_size_in_billions": 8,
-        "quantizations": [
-          "Int4"
-        ],
-        "model_hub": "modelscope",
-        "model_id": "OpenGVLab/InternVL2_5-8B-MPO-AWQ",
-        "model_revision": "master"
-      },
-      {
-        "model_format": "pytorch",
-        "model_size_in_billions": 26,
-        "quantizations": [
-          "4-bit",
-          "8-bit",
-          "none"
-        ],
-        "model_hub": "modelscope",
-        "model_id": "OpenGVLab/InternVL2_5-26B-MPO",
-        "model_revision": "master"
-      },
-      {
-        "model_format": "awq",
-        "model_size_in_billions": 26,
-        "quantizations": [
-          "Int4"
-        ],
-        "model_hub": "modelscope",
-        "model_id": "OpenGVLab/InternVL2_5-26B-MPO-AWQ",
-        "model_revision": "master"
-      },
-      {
-        "model_format": "pytorch",
-        "model_size_in_billions": 38,
-        "quantizations": [
-          "4-bit",
-          "8-bit",
-          "none"
-        ],
-        "model_hub": "modelscope",
-        "model_id": "OpenGVLab/InternVL2_5-38B-MPO",
-        "model_revision": "master"
-      },
-      {
-        "model_format": "awq",
-        "model_size_in_billions": 38,
-        "quantizations": [
-          "Int4"
-        ],
-        "model_hub": "modelscope",
-        "model_id": "OpenGVLab/InternVL2_5-38B-MPO-AWQ",
-        "model_revision": "master"
-      },
-      {
-        "model_format": "pytorch",
-        "model_size_in_billions": 78,
-        "quantizations": [
-          "4-bit",
-          "8-bit",
-          "none"
-        ],
-        "model_hub": "modelscope",
-        "model_id": "OpenGVLab/InternVL2_5-78B-MPO",
-        "model_revision": "master"
-      },
-      {
-        "model_format": "awq",
-        "model_size_in_billions": 78,
-        "quantizations": [
-          "Int4"
-        ],
-        "model_hub": "modelscope",
-        "model_id": "OpenGVLab/InternVL2_5-78B-MPO-AWQ",
-        "model_revision": "master"
-      }
-    ],
-    "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
-    "stop_token_ids": [],
-    "stop": []
-  },
-  {
-    "version": 1,
-    "context_length": 8192,
-    "model_name": "InternVL3",
-    "model_lang": [
-        "en",
-        "zh"
-    ],
-    "model_ability": [
-        "chat",
-        "vision"
-    ],
-    "model_description": "InternVL3, an advanced multimodal large language model (MLLM) series that demonstrates superior overall performance.",
-    "model_specs": [
-      {
-        "model_format": "pytorch",
-        "model_size_in_billions": 1,
-        "quantizations": [
-          "8-bit",
-          "none"
-        ],
-        "model_id": "OpenGVLab/InternVL3-1B",
-        "model_hub": "modelscope",
-        "model_revision": "master"
-      },
-      {
-        "model_format": "awq",
-        "model_size_in_billions": 1,
-        "quantizations": [
-          "Int4"
-        ],
-        "model_id": "OpenGVLab/InternVL3-1B-AWQ",
-        "model_hub": "modelscope"
-      },
-      {
-        "model_format": "pytorch",
-        "model_size_in_billions": 2,
-        "quantizations": [
-          "8-bit",
-          "none"
-        ],
-        "model_id": "OpenGVLab/InternVL3-2B",
-        "model_hub": "modelscope",
-        "model_revision": "master"
-      },
-      {
-        "model_format": "awq",
-        "model_size_in_billions": 2,
-        "quantizations": [
-          "Int4"
-        ],
-        "model_id": "OpenGVLab/InternVL3-2B-AWQ",
-        "model_hub": "modelscope"
-      },
-      {
-        "model_format": "pytorch",
-        "model_size_in_billions": 8,
-        "quantizations": [
-          "8-bit",
-          "none"
-        ],
-        "model_id": "OpenGVLab/InternVL3-8B",
-        "model_hub": "modelscope",
-        "model_revision": "master"
-      },
-      {
-        "model_format": "awq",
-        "model_size_in_billions": 8,
-        "quantizations": [
-          "Int4"
-        ],
-        "model_id": "OpenGVLab/InternVL3-8B-AWQ",
-        "model_hub": "modelscope"
-      },
-      {
-        "model_format": "pytorch",
-        "model_size_in_billions": 9,
-        "quantizations": [
-          "8-bit",
-          "none"
-        ],
-        "model_id": "OpenGVLab/InternVL3-9B",
-        "model_hub": "modelscope",
-        "model_revision": "master"
-      },
-      {
-        "model_format": "awq",
-        "model_size_in_billions": 9,
-        "quantizations": [
-          "Int4"
-        ],
-        "model_id": "OpenGVLab/InternVL3-9B-AWQ",
-        "model_hub": "modelscope"
-      },
-      {
-        "model_format": "pytorch",
-        "model_size_in_billions": 14,
-        "quantizations": [
-          "8-bit",
-          "none"
-        ],
-        "model_id": "OpenGVLab/InternVL3-14B",
-        "model_hub": "modelscope",
-        "model_revision": "master"
-      },
-      {
-        "model_format": "awq",
-        "model_size_in_billions": 14,
-        "quantizations": [
-          "Int4"
-        ],
-        "model_id": "OpenGVLab/InternVL3-14B-AWQ",
-        "model_hub": "modelscope"
+        "model_id": "OpenGVLab/InternVL3-14B-AWQ",
+        "model_hub": "modelscope"
       },
       {
         "model_format": "pytorch",
         "model_size_in_billions": 38,
         "quantizations": [
-          "8-bit",
           "none"
         ],
         "model_id": "OpenGVLab/InternVL3-38B",
@@ -5176,7 +3927,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 78,
         "quantizations": [
-          "8-bit",
           "none"
         ],
         "model_id": "OpenGVLab/InternVL3-78B",
@@ -5206,12 +3956,12 @@
     "context_length": 8192,
     "model_name": "cogvlm2",
     "model_lang": [
-        "en",
-        "zh"
+      "en",
+      "zh"
     ],
     "model_ability": [
-        "chat",
-        "vision"
+      "chat",
+      "vision"
     ],
     "model_description": "CogVLM2 have achieved good results in many lists compared to the previous generation of CogVLM open source models. Its excellent performance can compete with some non-open source models.",
     "model_specs": [
@@ -5229,7 +3979,7 @@
         "model_format": "pytorch",
         "model_size_in_billions": 20,
         "quantizations": [
-          "int4"
+          "none"
         ],
         "model_hub": "modelscope",
         "model_id": "ZhipuAI/cogvlm2-llama3-chinese-chat-19B-{quantization}",
@@ -5251,12 +4001,12 @@
     "context_length": 8192,
     "model_name": "cogvlm2-video-llama3-chat",
     "model_lang": [
-        "en",
-        "zh"
+      "en",
+      "zh"
     ],
     "model_ability": [
-        "chat",
-        "vision"
+      "chat",
+      "vision"
     ],
     "model_description": "CogVLM2-Video achieves state-of-the-art performance on multiple video question answering tasks.",
     "model_specs": [
@@ -5264,8 +4014,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 12,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_hub": "modelscope",
@@ -5300,8 +4048,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 7,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "TeleAI/telechat-7B",
@@ -5323,8 +4069,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 12,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "TeleAI/TeleChat-12B",
@@ -5346,8 +4090,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 52,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "TeleAI/TeleChat-52B",
@@ -5378,146 +4120,146 @@
       "vision"
     ],
     "model_description": "Qwen2-VL: To See the World More Clearly.Qwen2-VL is the latest version of the vision language models in the Qwen model familities.",
-    "model_specs":[
+    "model_specs": [
       {
-        "model_format":"pytorch",
-        "model_size_in_billions":7,
-        "quantizations":[
+        "model_format": "pytorch",
+        "model_size_in_billions": 7,
+        "quantizations": [
           "none"
         ],
         "model_hub": "modelscope",
-        "model_id":"qwen/Qwen2-VL-7B-Instruct",
-        "model_revision":"master"
+        "model_id": "qwen/Qwen2-VL-7B-Instruct",
+        "model_revision": "master"
       },
       {
-        "model_format":"gptq",
-        "model_size_in_billions":7,
-        "quantizations":[
+        "model_format": "gptq",
+        "model_size_in_billions": 7,
+        "quantizations": [
           "Int8"
         ],
         "model_hub": "modelscope",
-        "model_id":"qwen/Qwen2-VL-7B-Instruct-GPTQ-Int8",
-        "model_revision":"master"
+        "model_id": "qwen/Qwen2-VL-7B-Instruct-GPTQ-Int8",
+        "model_revision": "master"
       },
       {
-        "model_format":"gptq",
-        "model_size_in_billions":7,
-        "quantizations":[
+        "model_format": "gptq",
+        "model_size_in_billions": 7,
+        "quantizations": [
           "Int4"
         ],
         "model_hub": "modelscope",
-        "model_id":"qwen/Qwen2-VL-7B-Instruct-GPTQ-Int4",
-        "model_revision":"master"
+        "model_id": "qwen/Qwen2-VL-7B-Instruct-GPTQ-Int4",
+        "model_revision": "master"
       },
       {
-        "model_format":"awq",
-        "model_size_in_billions":7,
-        "quantizations":[
+        "model_format": "awq",
+        "model_size_in_billions": 7,
+        "quantizations": [
           "Int4"
         ],
         "model_hub": "modelscope",
-        "model_id":"qwen/Qwen2-VL-7B-Instruct-AWQ",
-        "model_revision":"master"
+        "model_id": "qwen/Qwen2-VL-7B-Instruct-AWQ",
+        "model_revision": "master"
       },
       {
-        "model_format":"mlx",
-        "model_size_in_billions":7,
-        "quantizations":[
+        "model_format": "mlx",
+        "model_size_in_billions": 7,
+        "quantizations": [
           "8bit"
         ],
         "model_hub": "modelscope",
-        "model_id":"okwinds/Qwen2-VL-7B-Instruct-MLX-8bit",
-        "model_revision":"master"
+        "model_id": "okwinds/Qwen2-VL-7B-Instruct-MLX-8bit",
+        "model_revision": "master"
       },
       {
-        "model_format":"pytorch",
-        "model_size_in_billions":2,
-        "quantizations":[
+        "model_format": "pytorch",
+        "model_size_in_billions": 2,
+        "quantizations": [
           "none"
         ],
         "model_hub": "modelscope",
-        "model_id":"qwen/Qwen2-VL-2B-Instruct",
-        "model_revision":"master"
+        "model_id": "qwen/Qwen2-VL-2B-Instruct",
+        "model_revision": "master"
       },
       {
-        "model_format":"gptq",
-        "model_size_in_billions":2,
-        "quantizations":[
+        "model_format": "gptq",
+        "model_size_in_billions": 2,
+        "quantizations": [
           "Int8"
         ],
         "model_hub": "modelscope",
-        "model_id":"qwen/Qwen2-VL-2B-Instruct-GPTQ-Int8",
-        "model_revision":"master"
+        "model_id": "qwen/Qwen2-VL-2B-Instruct-GPTQ-Int8",
+        "model_revision": "master"
       },
       {
-        "model_format":"gptq",
-        "model_size_in_billions":2,
-        "quantizations":[
+        "model_format": "gptq",
+        "model_size_in_billions": 2,
+        "quantizations": [
           "Int4"
         ],
         "model_hub": "modelscope",
-        "model_id":"qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4",
-        "model_revision":"master"
+        "model_id": "qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4",
+        "model_revision": "master"
       },
       {
-        "model_format":"awq",
-        "model_size_in_billions":2,
-        "quantizations":[
+        "model_format": "awq",
+        "model_size_in_billions": 2,
+        "quantizations": [
           "Int4"
         ],
         "model_hub": "modelscope",
-        "model_id":"qwen/Qwen2-VL-2B-Instruct-AWQ",
-        "model_revision":"master"
+        "model_id": "qwen/Qwen2-VL-2B-Instruct-AWQ",
+        "model_revision": "master"
       },
       {
-        "model_format":"mlx",
-        "model_size_in_billions":2,
-        "quantizations":[
+        "model_format": "mlx",
+        "model_size_in_billions": 2,
+        "quantizations": [
           "4bit",
           "8bit"
         ],
         "model_hub": "modelscope",
-        "model_id":"mlx-community/Qwen2-VL-2B-Instruct-{quantization}",
-        "model_revision":"master"
+        "model_id": "mlx-community/Qwen2-VL-2B-Instruct-{quantization}",
+        "model_revision": "master"
       },
       {
-        "model_format":"pytorch",
-        "model_size_in_billions":72,
-        "quantizations":[
+        "model_format": "pytorch",
+        "model_size_in_billions": 72,
+        "quantizations": [
           "none"
         ],
-        "model_id":"qwen/Qwen2-VL-72B-Instruct",
+        "model_id": "qwen/Qwen2-VL-72B-Instruct",
         "model_hub": "modelscope"
       },
       {
-        "model_format":"awq",
-        "model_size_in_billions":72,
-        "quantizations":[
+        "model_format": "awq",
+        "model_size_in_billions": 72,
+        "quantizations": [
           "Int4"
         ],
-        "model_id":"qwen/Qwen2-VL-72B-Instruct-AWQ",
+        "model_id": "qwen/Qwen2-VL-72B-Instruct-AWQ",
         "model_hub": "modelscope"
       },
       {
-        "model_format":"gptq",
-        "model_size_in_billions":72,
-        "quantizations":[
+        "model_format": "gptq",
+        "model_size_in_billions": 72,
+        "quantizations": [
           "Int4",
           "Int8"
         ],
-        "model_id":"qwen/Qwen2-VL-72B-Instruct-GPTQ-{quantization}",
+        "model_id": "qwen/Qwen2-VL-72B-Instruct-GPTQ-{quantization}",
         "model_hub": "modelscope"
       },
       {
-        "model_format":"mlx",
-        "model_size_in_billions":72,
-        "quantizations":[
+        "model_format": "mlx",
+        "model_size_in_billions": 72,
+        "quantizations": [
           "4bit",
           "8bit"
         ],
         "model_hub": "modelscope",
-        "model_id":"okwinds/Qwen2-VL-72B-Instruct-MLX-{quantization}",
-        "model_revision":"master"
+        "model_id": "okwinds/Qwen2-VL-72B-Instruct-MLX-{quantization}",
+        "model_revision": "master"
       }
     ],
     "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
@@ -5531,95 +4273,95 @@
     ]
   },
   {
-    "version":1,
-    "context_length":128000,
-    "model_name":"qwen2.5-vl-instruct",
-    "model_lang":[
+    "version": 1,
+    "context_length": 128000,
+    "model_name": "qwen2.5-vl-instruct",
+    "model_lang": [
       "en",
       "zh"
     ],
-    "model_ability":[
+    "model_ability": [
       "chat",
       "vision"
     ],
-    "model_description":"Qwen2.5-VL: Qwen2.5-VL is the latest version of the vision language models in the Qwen model familities.",
-    "model_specs":[
+    "model_description": "Qwen2.5-VL: Qwen2.5-VL is the latest version of the vision language models in the Qwen model familities.",
+    "model_specs": [
       {
-        "model_format":"pytorch",
-        "model_size_in_billions":3,
-        "quantizations":[
+        "model_format": "pytorch",
+        "model_size_in_billions": 3,
+        "quantizations": [
           "none"
         ],
         "model_hub": "modelscope",
-        "model_id":"Qwen/Qwen2.5-VL-3B-Instruct"
+        "model_id": "Qwen/Qwen2.5-VL-3B-Instruct"
       },
       {
-        "model_format":"pytorch",
-        "model_size_in_billions":7,
-        "quantizations":[
+        "model_format": "pytorch",
+        "model_size_in_billions": 7,
+        "quantizations": [
           "none"
         ],
         "model_hub": "modelscope",
-        "model_id":"Qwen/Qwen2.5-VL-7B-Instruct"
+        "model_id": "Qwen/Qwen2.5-VL-7B-Instruct"
       },
       {
-        "model_format":"pytorch",
-        "model_size_in_billions":32,
-        "quantizations":[
+        "model_format": "pytorch",
+        "model_size_in_billions": 32,
+        "quantizations": [
           "none"
         ],
         "model_hub": "modelscope",
-        "model_id":"Qwen/Qwen2.5-VL-32B-Instruct"
+        "model_id": "Qwen/Qwen2.5-VL-32B-Instruct"
       },
       {
-        "model_format":"pytorch",
-        "model_size_in_billions":72,
-        "quantizations":[
+        "model_format": "pytorch",
+        "model_size_in_billions": 72,
+        "quantizations": [
           "none"
         ],
         "model_hub": "modelscope",
-        "model_id":"Qwen/Qwen2.5-VL-72B-Instruct"
+        "model_id": "Qwen/Qwen2.5-VL-72B-Instruct"
       },
       {
-        "model_format":"awq",
-        "model_size_in_billions":3,
-        "quantizations":[
+        "model_format": "awq",
+        "model_size_in_billions": 3,
+        "quantizations": [
           "Int4"
         ],
         "model_hub": "modelscope",
-        "model_id":"Qwen/Qwen2.5-VL-3B-Instruct-AWQ"
+        "model_id": "Qwen/Qwen2.5-VL-3B-Instruct-AWQ"
       },
       {
-        "model_format":"awq",
-        "model_size_in_billions":7,
-        "quantizations":[
+        "model_format": "awq",
+        "model_size_in_billions": 7,
+        "quantizations": [
           "Int4"
         ],
         "model_hub": "modelscope",
-        "model_id":"Qwen/Qwen2.5-VL-7B-Instruct-AWQ"
+        "model_id": "Qwen/Qwen2.5-VL-7B-Instruct-AWQ"
       },
       {
-        "model_format":"awq",
-        "model_size_in_billions":32,
-        "quantizations":[
+        "model_format": "awq",
+        "model_size_in_billions": 32,
+        "quantizations": [
           "Int4"
         ],
         "model_hub": "modelscope",
-        "model_id":"Qwen/Qwen2.5-VL-32B-Instruct-AWQ"
+        "model_id": "Qwen/Qwen2.5-VL-32B-Instruct-AWQ"
       },
       {
-        "model_format":"pytorch",
-        "model_size_in_billions":72,
-        "quantizations":[
-          "Int4"
+        "model_format": "pytorch",
+        "model_size_in_billions": 72,
+        "quantizations": [
+          "none"
         ],
         "model_hub": "modelscope",
-        "model_id":"Qwen/Qwen2.5-VL-72B-Instruct-AWQ"
+        "model_id": "Qwen/Qwen2.5-VL-72B-Instruct-AWQ"
       },
       {
-        "model_format":"mlx",
-        "model_size_in_billions":3,
-        "quantizations":[
+        "model_format": "mlx",
+        "model_size_in_billions": 3,
+        "quantizations": [
           "3bit",
           "4bit",
           "6bit",
@@ -5627,12 +4369,12 @@
           "bf16"
         ],
         "model_hub": "modelscope",
-        "model_id":"mlx-community/Qwen2.5-VL-3B-Instruct-{quantization}"
+        "model_id": "mlx-community/Qwen2.5-VL-3B-Instruct-{quantization}"
       },
       {
-        "model_format":"mlx",
-        "model_size_in_billions":7,
-        "quantizations":[
+        "model_format": "mlx",
+        "model_size_in_billions": 7,
+        "quantizations": [
           "3bit",
           "4bit",
           "6bit",
@@ -5640,12 +4382,12 @@
           "bf16"
         ],
         "model_hub": "modelscope",
-        "model_id":"mlx-community/Qwen2.5-VL-7B-Instruct-{quantization}"
+        "model_id": "mlx-community/Qwen2.5-VL-7B-Instruct-{quantization}"
       },
       {
-        "model_format":"mlx",
-        "model_size_in_billions":72,
-        "quantizations":[
+        "model_format": "mlx",
+        "model_size_in_billions": 72,
+        "quantizations": [
           "3bit",
           "4bit",
           "6bit",
@@ -5653,7 +4395,7 @@
           "bf16"
         ],
         "model_hub": "modelscope",
-        "model_id":"mlx-community/Qwen2.5-VL-72B-Instruct-{quantization}"
+        "model_id": "mlx-community/Qwen2.5-VL-72B-Instruct-{quantization}"
       }
     ],
     "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
@@ -5667,29 +4409,29 @@
     ]
   },
   {
-    "version":1,
-    "context_length":32768,
-    "model_name":"qwen2.5-omni",
-    "model_lang":[
+    "version": 1,
+    "context_length": 32768,
+    "model_name": "qwen2.5-omni",
+    "model_lang": [
       "en",
       "zh"
     ],
-    "model_ability":[
+    "model_ability": [
       "chat",
       "vision",
       "audio",
       "omni"
     ],
-    "model_description":"Qwen2.5-Omni: the new flagship end-to-end multimodal model in the Qwen series.",
-    "model_specs":[
+    "model_description": "Qwen2.5-Omni: the new flagship end-to-end multimodal model in the Qwen series.",
+    "model_specs": [
       {
-        "model_format":"pytorch",
-        "model_size_in_billions":7,
-        "quantizations":[
+        "model_format": "pytorch",
+        "model_size_in_billions": 7,
+        "quantizations": [
           "none"
         ],
         "model_hub": "modelscope",
-        "model_id":"Qwen/Qwen2.5-Omni-7B"
+        "model_id": "Qwen/Qwen2.5-Omni-7B"
       }
     ],
     "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
@@ -5838,8 +4580,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 16,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "deepseek-ai/DeepSeek-V2-Lite",
@@ -5850,8 +4590,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 236,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "deepseek-ai/DeepSeek-V2",
@@ -5877,8 +4615,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 16,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "deepseek-ai/DeepSeek-V2-Lite-Chat",
@@ -5889,8 +4625,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 236,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "deepseek-ai/DeepSeek-V2-Chat",
@@ -5923,8 +4657,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 236,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "deepseek-ai/DeepSeek-V2-Chat-0628",
@@ -5957,8 +4689,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 236,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "deepseek-ai/DeepSeek-V2.5",
@@ -5991,8 +4721,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 671,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "deepseek-ai/DeepSeek-V3",
@@ -6143,8 +4871,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 671,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "deepseek-ai/DeepSeek-R1",
@@ -6348,87 +5074,6 @@
     "reasoning_start_tag": "<think>",
     "reasoning_end_tag": "</think>"
   },
-  {
-    "version": 1,
-    "context_length": 131072,
-    "model_name": "yi-coder-chat",
-    "model_lang": [
-      "en"
-    ],
-    "model_ability": [
-      "chat"
-    ],
-    "model_description": "Yi-Coder is a series of open-source code language models that delivers state-of-the-art coding performance with fewer than 10 billion parameters.Excelling in long-context understanding with a maximum context length of 128K tokens.Supporting 52 major programming languages, including popular ones such as Java, Python, JavaScript, and C++.",
-    "model_specs": [
-      {
-        "model_format": "pytorch",
-        "model_size_in_billions": 9,
-        "quantizations": [
-          "none"
-        ],
-        "model_hub": "modelscope",
-        "model_id": "01ai/Yi-Coder-9B-Chat",
-        "model_revision": "master"
-      },
-      {
-        "model_format": "pytorch",
-        "model_size_in_billions": "1_5",
-        "quantizations": [
-          "none"
-        ],
-        "model_hub": "modelscope",
-        "model_id": "01ai/Yi-Coder-1.5B-Chat",
-        "model_revision": "master"
-      }
-    ],
-    "chat_template": "{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% if system_message is defined %}{{ '<|im_start|>system\n' + system_message + '<|im_end|>\n' }}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|im_start|>user\n' + content + '<|im_end|>\n<|im_start|>assistant\n' }}{% elif message['role'] == 'assistant' %}{{ content + '<|im_end|>' + '\n' }}{% endif %}{% endfor %}",
-    "stop_token_ids": [
-      1,
-      2,
-      6,
-      7
-    ],
-    "stop": [
-      "<|startoftext|>",
-      "<|endoftext|>",
-      "<|im_start|>",
-      "<|im_end|>"
-    ]
-  },
-  {
-    "version": 1,
-    "context_length": 131072,
-    "model_name": "yi-coder",
-    "model_lang": [
-      "en"
-    ],
-    "model_ability": [
-      "generate"
-    ],
-    "model_description": "Yi-Coder is a series of open-source code language models that delivers state-of-the-art coding performance with fewer than 10 billion parameters.Excelling in long-context understanding with a maximum context length of 128K tokens.Supporting 52 major programming languages, including popular ones such as Java, Python, JavaScript, and C++.",
-    "model_specs": [
-      {
-        "model_format": "pytorch",
-        "model_size_in_billions": 9,
-        "quantizations": [
-          "none"
-        ],
-        "model_hub": "modelscope",
-        "model_id": "01ai/Yi-Coder-9B",
-        "model_revision": "master"
-      },
-      {
-        "model_format": "pytorch",
-        "model_size_in_billions": "1_5",
-        "quantizations": [
-          "none"
-        ],
-        "model_hub": "modelscope",
-        "model_id": "01ai/Yi-Coder-1.5B",
-        "model_revision": "master"
-      }
-    ]
-  },
   {
     "version": 1,
     "context_length": 32768,
@@ -6446,8 +5091,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": "0_5",
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "qwen/Qwen2.5-0.5B",
@@ -6458,8 +5101,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": "1_5",
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "qwen/Qwen2.5-1.5B",
@@ -6470,8 +5111,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 3,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "qwen/Qwen2.5-3B",
@@ -6482,8 +5121,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 7,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "qwen/Qwen2.5-7B",
@@ -6494,8 +5131,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 14,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "qwen/Qwen2.5-14B",
@@ -6506,8 +5141,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 32,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "qwen/Qwen2.5-32B",
@@ -6518,8 +5151,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 72,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "qwen/Qwen2.5-72B",
@@ -6546,8 +5177,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": "0_5",
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "qwen/Qwen2.5-0.5B-Instruct",
@@ -6557,8 +5186,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": "1_5",
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "qwen/Qwen2.5-1.5B-Instruct",
@@ -6568,8 +5195,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 3,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "qwen/Qwen2.5-3B-Instruct",
@@ -6579,8 +5204,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 7,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "qwen/Qwen2.5-7B-Instruct",
@@ -6590,8 +5213,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 14,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "qwen/Qwen2.5-14B-Instruct",
@@ -6601,8 +5222,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 32,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "qwen/Qwen2.5-32B-Instruct",
@@ -6612,8 +5231,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 72,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "qwen/Qwen2.5-72B-Instruct",
@@ -6727,7 +5344,7 @@
       },
       {
         "model_format": "awq",
-        "model_size_in_billions":14,
+        "model_size_in_billions": 14,
         "quantizations": [
           "Int4"
         ],
@@ -7008,7 +5625,7 @@
         "model_file_name_template": "qwen2_5-72b-instruct-{quantization}.gguf",
         "model_file_name_split_template": "qwen2.5-72b-instruct-{quantization}-{part}.gguf",
         "quantization_parts": {
-           "q2_k": [
+          "q2_k": [
             "00001-of-00007",
             "00002-of-00007",
             "00003-of-00007",
@@ -7267,8 +5884,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": "0_5",
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "qwen/Qwen2.5-Coder-0.5B",
@@ -7279,8 +5894,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": "1_5",
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "qwen/Qwen2.5-Coder-1.5B",
@@ -7291,8 +5904,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": "3",
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "qwen/Qwen2.5-Coder-3B",
@@ -7303,8 +5914,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 7,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "qwen/Qwen2.5-Coder-7B",
@@ -7315,8 +5924,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 14,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "qwen/Qwen2.5-Coder-14B",
@@ -7327,8 +5934,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 32,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "qwen/Qwen2.5-Coder-32B",
@@ -7355,8 +5960,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": "0_5",
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "qwen/Qwen2.5-Coder-0.5B-Instruct",
@@ -7367,19 +5970,16 @@
         "model_format": "pytorch",
         "model_size_in_billions": "1_5",
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "qwen/Qwen2.5-Coder-1.5B-Instruct",
         "model_revision": "master",
         "model_hub": "modelscope"
-      },      {
+      },
+      {
         "model_format": "pytorch",
         "model_size_in_billions": "3",
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "qwen/Qwen2.5-Coder-3B-Instruct",
@@ -7390,8 +5990,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 7,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "qwen/Qwen2.5-Coder-7B-Instruct",
@@ -7402,8 +6000,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 14,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "qwen/Qwen2.5-Coder-14B-Instruct",
@@ -7414,8 +6010,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 32,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "qwen/Qwen2.5-Coder-32B-Instruct",
@@ -7548,7 +6142,6 @@
         "model_revision": "master",
         "model_hub": "modelscope"
       },
       {
         "model_format": "ggufv2",
         "model_size_in_billions": "1_5",
@@ -7641,8 +6234,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 32,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "Qwen/QwQ-32B-Preview",
@@ -7692,7 +6283,7 @@
       "<|im_end|>"
     ]
   },
-{
+  {
     "version": 1,
     "context_length": 131072,
     "model_name": "QwQ-32B",
@@ -7702,7 +6293,8 @@
     ],
     "model_ability": [
       "chat",
-      "reasoning"
+      "reasoning",
+      "tools"
     ],
     "model_description": "QwQ is the reasoning model of the Qwen series. Compared with conventional instruction-tuned models, QwQ, which is capable of thinking and reasoning, can achieve significantly enhanced performance in downstream tasks, especially hard problems. QwQ-32B is the medium-sized reasoning model, which is capable of achieving competitive performance against state-of-the-art reasoning models, e.g., DeepSeek-R1, o1-mini.",
     "model_specs": [
@@ -7710,8 +6302,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 32,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "Qwen/QwQ-32B",
@@ -7812,8 +6402,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": "1_5",
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
@@ -7852,8 +6440,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 7,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
@@ -7901,8 +6487,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 14,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B",
@@ -7941,8 +6525,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 32,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
@@ -8016,8 +6598,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 8,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
@@ -8057,8 +6637,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 70,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
@@ -8137,8 +6715,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": "1_5",
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "ZhipuAI/glm-edge-1.5b-chat",
@@ -8148,8 +6724,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": "4",
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "ZhipuAI/glm-edge-4b-chat",
@@ -8248,8 +6822,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": "2",
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "ZhipuAI/glm-edge-v-2b",
@@ -8259,8 +6831,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": "5",
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "ZhipuAI/glm-edge-v-5b",
@@ -8379,8 +6949,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 72,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "Qwen/QVQ-72B-Preview",
@@ -8428,8 +6996,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 7,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "AIDC-AI/Marco-o1",
@@ -8489,8 +7055,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": "9",
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "ZhipuAI/cogagent-9b-20241220",
@@ -8527,8 +7091,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 8,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "Shanghai_AI_Laboratory/internlm3-8b-instruct",
@@ -8570,13 +7132,13 @@
         "model_hub": "modelscope"
       },
       {
-        "model_format":"mlx",
-        "model_size_in_billions":8,
-        "quantizations":[
+        "model_format": "mlx",
+        "model_size_in_billions": 8,
+        "quantizations": [
           "4bit"
         ],
         "model_hub": "modelscope",
-        "model_id":"mlx-community/internlm3-8b-instruct-{quantization}"
+        "model_id": "mlx-community/internlm3-8b-instruct-{quantization}"
       }
     ],
     "chat_template": "{{ bos_token }}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
@@ -8606,8 +7168,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 7,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "Qwen/Qwen2.5-7B-Instruct-1M",
@@ -8617,8 +7177,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 14,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "Qwen/Qwen2.5-14B-Instruct-1M",
@@ -8652,15 +7210,13 @@
         "model_format": "pytorch",
         "model_size_in_billions": 3,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "moonshotai/Moonlight-16B-A3B-Instruct",
         "model_hub": "modelscope"
       }
     ],
-    "chat_template":"{%- for message in messages -%}{%- if loop.first and messages[0]['role'] != 'system' -%}<|im_system|>system<|im_middle|>You are a helpful assistant<|im_end|>{%- endif -%}{%- if message['role'] == 'system' -%}<|im_system|>{%- endif -%}{%- if message['role'] == 'user' -%}<|im_user|>{%- endif -%}{%- if message['role'] == 'assistant' -%}<|im_assistant|>{%- endif -%}{{ message['role'] }}<|im_middle|>{{message['content']}}<|im_end|>{%- endfor -%}{%- if add_generation_prompt -%}<|im_assistant|>assistant<|im_middle|>{%- endif -%}",
+    "chat_template": "{%- for message in messages -%}{%- if loop.first and messages[0]['role'] != 'system' -%}<|im_system|>system<|im_middle|>You are a helpful assistant<|im_end|>{%- endif -%}{%- if message['role'] == 'system' -%}<|im_system|>{%- endif -%}{%- if message['role'] == 'user' -%}<|im_user|>{%- endif -%}{%- if message['role'] == 'assistant' -%}<|im_assistant|>{%- endif -%}{{ message['role'] }}<|im_middle|>{{message['content']}}<|im_end|>{%- endfor -%}{%- if add_generation_prompt -%}<|im_assistant|>assistant<|im_middle|>{%- endif -%}",
     "stop_token_ids": [
       163586
     ],
@@ -8685,8 +7241,6 @@
         "model_format": "pytorch",
         "model_size_in_billions": 7,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
         "model_id": "AI-ModelScope/Fin-R1",
@@ -8858,25 +7412,246 @@
         ],
         "model_id": "ZhipuAI/GLM-4-32B-0414",
         "model_hub": "modelscope"
-      }
-    ],
-    "chat_template": "[gMASK]<sop>{%- if tools -%}<|system|>\n# 可用工具\n{% for tool in tools %}{%- set function = tool.function if tool.get(\"function\") else tool %}\n\n## {{ function.name }}\n\n{{ function | tojson(indent=4, ensure_ascii=False) }}\n在调用上述函数时，请使用 Json 格式表示调用的参数。{%- endfor %}{%- endif -%}{%- for msg in messages %}{%- if msg.role == 'system' %}<|system|>\n{{ msg.content }}{%- endif %}{%- endfor %}{%- for message in messages if message.role != 'system' %}{%- set role = message['role'] %}{%- set content = message['content'] %}{%- set meta = message.get(\"metadata\", \"\") %}{%- if role == 'user' %}<|user|>\n{{ content }}{%- elif role == 'assistant' and not meta %}<|assistant|>\n{{ content }}{%- elif role == 'assistant' and meta %}<|assistant|>{{ meta }} \n{{ content }}{%- elif role == 'observation' %}<|observation|>\n{{ content }}{%- endif %}{%- endfor %}{% if add_generation_prompt %}<|assistant|>{% endif %}",
-    "stop_token_ids": [
-      151329,
-      151336,
-      151338
-    ],
-    "stop": [
-      "<|endoftext|>",
-      "<|user|>",
-      "<|observation|>"
-    ]
-  },
-  {
-    "version": 1,
-    "context_length": 32768,
-    "model_name": "skywork-or1-preview",
-    "model_lang": [
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 9,
+        "quantizations": [
+          "4bit",
+          "6bit",
+          "8bit",
+          "bf16"
+        ],
+        "model_id": "mlx-community/GLM-4-9B-0414-{quantization}",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 32,
+        "quantizations": [
+          "4bit",
+          "8bit"
+        ],
+        "model_id": "mlx-community/GLM-4-32B-0414-{quantization}",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 9,
+        "quantizations": [
+          "IQ2_M",
+          "IQ3_M",
+          "IQ3_XS",
+          "IQ3_XXS",
+          "IQ4_NL",
+          "IQ4_XS",
+          "Q2_K",
+          "Q2_K_L",
+          "Q3_K_L",
+          "Q3_K_M",
+          "Q3_K_S",
+          "Q3_K_XL",
+          "Q4_0",
+          "Q4_1",
+          "Q4_K_L",
+          "Q4_K_M",
+          "Q4_K_S",
+          "Q5_K_L",
+          "Q5_K_M",
+          "Q5_K_S",
+          "Q6_K",
+          "Q6_K_L",
+          "Q8_0",
+          "bf16"
+        ],
+        "model_id": "bartowski/THUDM_GLM-4-9B-0414-GGUF",
+        "model_file_name_template": "THUDM_GLM-4-9B-0414-{quantization}.gguf",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 32,
+        "quantizations": [
+          "IQ2_M",
+          "IQ2_S",
+          "IQ2_XS",
+          "IQ3_M",
+          "IQ3_XS",
+          "IQ3_XXS",
+          "IQ4_NL",
+          "IQ4_XS",
+          "Q2_K",
+          "Q2_K_L",
+          "Q3_K_L",
+          "Q3_K_M",
+          "Q3_K_S",
+          "Q3_K_XL",
+          "Q4_0",
+          "Q4_1",
+          "Q4_K_L",
+          "Q4_K_M",
+          "Q4_K_S",
+          "Q5_K_L",
+          "Q5_K_M",
+          "Q5_K_S",
+          "Q6_K",
+          "Q6_K_L",
+          "Q8_0"
+        ],
+        "model_id": "bartowski/THUDM_GLM-4-9B-0414-GGUF",
+        "model_file_name_template": "THUDM_GLM-4-9B-0414-{quantization}.gguf",
+        "model_hub": "modelscope"
+      }
+    ],
+    "chat_template": "[gMASK]<sop>{%- if tools -%}<|system|>\n# 可用工具\n{% for tool in tools %}{%- set function = tool.function if tool.get(\"function\") else tool %}\n\n## {{ function.name }}\n\n{{ function | tojson(indent=4, ensure_ascii=False) }}\n在调用上述函数时，请使用 Json 格式表示调用的参数。{%- endfor %}{%- endif -%}{%- for msg in messages %}{%- if msg.role == 'system' %}<|system|>\n{{ msg.content }}{%- endif %}{%- endfor %}{%- for message in messages if message.role != 'system' %}{%- set role = message['role'] %}{%- set content = message['content'] %}{%- set meta = message.get(\"metadata\", \"\") %}{%- if role == 'user' %}<|user|>\n{{ content }}{%- elif role == 'assistant' and not meta %}<|assistant|>\n{{ content }}{%- elif role == 'assistant' and meta %}<|assistant|>{{ meta }} \n{{ content }}{%- elif role == 'observation' %}<|observation|>\n{{ content }}{%- endif %}{%- endfor %}{% if add_generation_prompt %}<|assistant|>{% endif %}",
+    "stop_token_ids": [
+      151329,
+      151336,
+      151338
+    ],
+    "stop": [
+      "<|endoftext|>",
+      "<|user|>",
+      "<|observation|>"
+    ],
+    "virtualenv": {
+      "packages": [
+        "transformers>=4.51.3",
+        "mlx-lm>=0.23.1 ; sys_platform=='darwin'",
+        "numpy==1.26.4"
+      ]
+    }
+  },
+  {
+    "version":1,
+    "context_length":32768,
+    "model_name":"Ovis2",
+    "model_lang":[
+      "en",
+      "zh"
+    ],
+    "model_ability":[
+      "chat",
+      "vision"
+    ],
+    "model_description":"Ovis (Open VISion) is a novel Multimodal Large Language Model (MLLM) architecture, designed to structurally align visual and textual embeddings.",
+    "model_specs":[
+      {
+        "model_format":"pytorch",
+        "model_size_in_billions":1,
+        "quantizations":[
+          "none"
+        ],
+        "model_id":"AIDC-AI/Ovis2-1B",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format":"pytorch",
+        "model_size_in_billions":2,
+        "quantizations":[
+          "none"
+        ],
+        "model_id":"AIDC-AI/Ovis2-2B",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format":"pytorch",
+        "model_size_in_billions":4,
+        "quantizations":[
+          "none"
+        ],
+        "model_id":"AIDC-AI/Ovis2-4B",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format":"pytorch",
+        "model_size_in_billions":8,
+        "quantizations":[
+          "none"
+        ],
+        "model_id":"AIDC-AI/Ovis2-8B",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format":"pytorch",
+        "model_size_in_billions":16,
+        "quantizations":[
+          "none"
+        ],
+        "model_id":"AIDC-AI/Ovis2-16B",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format":"pytorch",
+        "model_size_in_billions":34,
+        "quantizations":[
+          "none"
+        ],
+        "model_id":"AIDC-AI/Ovis2-34B",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format":"gptq",
+        "model_size_in_billions":2,
+        "quantizations":[
+          "Int4"
+        ],
+        "model_id":"AIDC-AI/Ovis2-2B-GPTQ-{quantization}",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format":"gptq",
+        "model_size_in_billions":4,
+        "quantizations":[
+          "Int4"
+        ],
+        "model_id":"AIDC-AI/Ovis2-4B-GPTQ-{quantization}",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format":"gptq",
+        "model_size_in_billions":8,
+        "quantizations":[
+          "Int4"
+        ],
+        "model_id":"AIDC-AI/Ovis2-8B-GPTQ-{quantization}",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format":"gptq",
+        "model_size_in_billions":16,
+        "quantizations":[
+          "Int4"
+        ],
+        "model_id":"AIDC-AI/Ovis2-16B-GPTQ-{quantization}",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format":"gptq",
+        "model_size_in_billions":34,
+        "quantizations":[
+          "Int4",
+          "Int8"
+        ],
+        "model_id":"AIDC-AI/Ovis2-34B-GPTQ-{quantization}",
+        "model_hub": "modelscope"
+      }
+    ],
+    "chat_template":  "{%- if tools %}\n    {{- '<|im_start|>system\\n' }}\n    {%- if messages[0]['role'] == 'system' %}\n        {{- messages[0]['content'] }}\n    {%- else %}\n        {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n    {%- endif %}\n    {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n    {%- for tool in tools %}\n        {{- \"\\n\" }}\n        {{- tool | tojson }}\n    {%- endfor %}\n    {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n    {%- if messages[0]['role'] == 'system' %}\n        {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n    {%- else %}\n        {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n    {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n    {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n        {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n    {%- elif message.role == \"assistant\" %}\n        {{- '<|im_start|>' + message.role }}\n        {%- if message.content %}\n            {{- '\\n' + message.content }}\n        {%- endif %}\n        {%- for tool_call in message.tool_calls %}\n            {%- if tool_call.function is defined %}\n                {%- set tool_call = tool_call.function %}\n            {%- endif %}\n            {{- '\\n<tool_call>\\n{\"name\": \"' }}\n            {{- tool_call.name }}\n            {{- '\", \"arguments\": ' }}\n            {{- tool_call.arguments | tojson }}\n            {{- '}\\n</tool_call>' }}\n        {%- endfor %}\n        {{- '<|im_end|>\\n' }}\n    {%- elif message.role == \"tool\" %}\n        {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n            {{- '<|im_start|>user' }}\n        {%- endif %}\n        {{- '\\n<tool_response>\\n' }}\n        {{- message.content }}\n        {{- '\\n</tool_response>' }}\n        {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n            {{- '<|im_end|>\\n' }}\n        {%- endif %}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
+    "stop_token_ids": [
+      151645,
+      151643
+    ],
+    "stop": [
+      "<|im_end|>",
+      "<|endoftext|>"
+    ]
+  },
+  {
+    "version": 1,
+    "context_length": 32768,
+    "model_name": "skywork-or1-preview",
+    "model_lang": [
       "en",
       "zh"
     ],
@@ -8993,5 +7768,602 @@
       "<|im_start|>",
       "<|im_end|>"
     ]
+  },
+  {
+    "version": 1,
+    "context_length": 40960,
+    "model_name": "qwen3",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "chat",
+      "reasoning",
+      "tools"
+    ],
+    "model_description": "Qwen3 is the latest generation of large language models in Qwen series, offering a comprehensive suite of dense and mixture-of-experts (MoE) models. Built upon extensive training, Qwen3 delivers groundbreaking advancements in reasoning, instruction-following, agent capabilities, and multilingual support",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": "0_6",
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "Qwen/Qwen3-0.6B",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "fp8",
+        "model_size_in_billions": "0_6",
+        "quantizations": [
+          "fp8"
+        ],
+        "model_id": "Qwen/Qwen3-0.6B-FP8",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": "0_6",
+        "quantizations": [
+          "Int4",
+          "Int8"
+        ],
+        "model_id": "JunHowie/Qwen3-0.6B-GPTQ-{quantization}",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": "0_6",
+        "quantizations": [
+          "Q2_K",
+          "Q2_K_L",
+          "Q3_K_M",
+          "Q3_K_S",
+          "Q4_0",
+          "Q4_1",
+          "Q4_K_M",
+          "Q5_K_M",
+          "Q6_K",
+          "Q8_0",
+          "BF16",
+          "UD-IQ1_M",
+          "UD-IQ1_S",
+          "UD-IQ2_M",
+          "UD-IQ2_XXS",
+          "UD-IQ3_XXS",
+          "UD-Q2_K_XL",
+          "UD-Q3_K_XL",
+          "UD-Q4_K_XL",
+          "UD-Q5_K_XL",
+          "UD-Q6_K_XL",
+          "UD-Q8_K_XL",
+          "IQ4_NL",
+          "IQ4_XS"
+        ],
+        "model_id": "unsloth/Qwen3-0.6B-GGUF",
+        "model_hub": "modelscope",
+        "model_file_name_template": "Qwen3-0.6B-{quantization}.gguf"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": "1_7",
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "Qwen/Qwen3-1.7B",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "fp8",
+        "model_size_in_billions": "1_7",
+        "quantizations": [
+          "fp8"
+        ],
+        "model_id": "Qwen/Qwen3-1.7B-FP8",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": "1_7",
+        "quantizations": [
+          "Int4",
+          "Int8"
+        ],
+        "model_id": "JunHowie/Qwen3-1.7B-GPTQ-{quantization}",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": "1_7",
+        "quantizations": [
+          "Q2_K",
+          "Q2_K_L",
+          "Q3_K_M",
+          "Q3_K_S",
+          "Q4_0",
+          "Q4_1",
+          "Q4_K_M",
+          "Q5_K_M",
+          "Q6_K",
+          "Q8_0",
+          "BF16",
+          "UD-IQ1_M",
+          "UD-IQ1_S",
+          "UD-IQ2_M",
+          "UD-IQ2_XXS",
+          "UD-IQ3_XXS",
+          "UD-Q2_K_XL",
+          "UD-Q3_K_XL",
+          "UD-Q4_K_XL",
+          "UD-Q5_K_XL",
+          "UD-Q6_K_XL",
+          "UD-Q8_K_XL",
+          "IQ4_NL",
+          "IQ4_XS"
+        ],
+        "model_id": "unsloth/Qwen3-1.7B-GGUF",
+        "model_hub": "modelscope",
+        "model_file_name_template": "Qwen3-1.7B-{quantization}.gguf"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 4,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "Qwen/Qwen3-4B",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "fp8",
+        "model_size_in_billions": 4,
+        "quantizations": [
+          "fp8"
+        ],
+        "model_id": "Qwen/Qwen3-4B-FP8",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 4,
+        "quantizations": [
+          "Int4",
+          "Int8"
+        ],
+        "model_id": "JunHowie/Qwen3-4B-GPTQ-{quantization}",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 4,
+        "quantizations": [
+          "Q2_K",
+          "Q2_K_L",
+          "Q3_K_M",
+          "Q3_K_S",
+          "Q4_0",
+          "Q4_1",
+          "Q4_K_M",
+          "Q5_K_M",
+          "Q6_K",
+          "Q8_0",
+          "BF16",
+          "UD-IQ1_M",
+          "UD-IQ1_S",
+          "UD-IQ2_M",
+          "UD-IQ2_XXS",
+          "UD-IQ3_XXS",
+          "UD-Q2_K_XL",
+          "UD-Q3_K_XL",
+          "UD-Q4_K_XL",
+          "UD-Q5_K_XL",
+          "UD-Q6_K_XL",
+          "UD-Q8_K_XL",
+          "IQ4_NL",
+          "IQ4_XS"
+        ],
+        "model_id": "unsloth/Qwen3-4B-GGUF",
+        "model_hub": "modelscope",
+        "model_file_name_template": "Qwen3-4B-{quantization}.gguf"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 8,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "Qwen/Qwen3-8B",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "fp8",
+        "model_size_in_billions": 8,
+        "quantizations": [
+          "fp8"
+        ],
+        "model_id": "Qwen/Qwen3-8B-FP8",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 8,
+        "quantizations": [
+          "Int4",
+          "Int8"
+        ],
+        "model_id": "JunHowie/Qwen3-8B-GPTQ-{quantization}",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 8,
+        "quantizations": [
+          "Q2_K",
+          "Q2_K_L",
+          "Q3_K_M",
+          "Q3_K_S",
+          "Q4_0",
+          "Q4_1",
+          "Q4_K_M",
+          "Q5_K_M",
+          "Q6_K",
+          "Q8_0",
+          "BF16",
+          "UD-IQ1_M",
+          "UD-IQ1_S",
+          "UD-IQ2_M",
+          "UD-IQ2_XXS",
+          "UD-IQ3_XXS",
+          "UD-Q2_K_XL",
+          "UD-Q3_K_XL",
+          "UD-Q4_K_XL",
+          "UD-Q5_K_XL",
+          "UD-Q6_K_XL",
+          "UD-Q8_K_XL",
+          "IQ4_NL",
+          "IQ4_XS"
+        ],
+        "model_id": "unsloth/Qwen3-8B-GGUF",
+        "model_hub": "modelscope",
+        "model_file_name_template": "Qwen3-8B-{quantization}.gguf"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 14,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "Qwen/Qwen3-14B",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "fp8",
+        "model_size_in_billions": 14,
+        "quantizations": [
+          "fp8"
+        ],
+        "model_id": "Qwen/Qwen3-14B-FP8",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 14,
+        "quantizations": [
+          "Int4",
+          "Int8"
+        ],
+        "model_id": "JunHowie/Qwen3-14B-GPTQ-{quantization}",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 14,
+        "quantizations": [
+          "Q2_K",
+          "Q2_K_L",
+          "Q3_K_M",
+          "Q3_K_S",
+          "Q4_0",
+          "Q4_1",
+          "Q4_K_M",
+          "Q5_K_M",
+          "Q6_K",
+          "Q8_0",
+          "BF16",
+          "UD-IQ1_M",
+          "UD-IQ1_S",
+          "UD-IQ2_M",
+          "UD-IQ2_XXS",
+          "UD-IQ3_XXS",
+          "UD-Q2_K_XL",
+          "UD-Q3_K_XL",
+          "UD-Q4_K_XL",
+          "UD-Q5_K_XL",
+          "UD-Q6_K_XL",
+          "UD-Q8_K_XL",
+          "IQ4_NL",
+          "IQ4_XS"
+        ],
+        "model_id": "unsloth/Qwen3-14B-GGUF",
+        "model_hub": "modelscope",
+        "model_file_name_template": "Qwen3-14B-{quantization}.gguf"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 30,
+        "activated_size_in_billions": 3,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "Qwen/Qwen3-30B-A3B",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "fp8",
+        "model_size_in_billions": 30,
+        "activated_size_in_billions": 3,
+        "quantizations": [
+          "fp8"
+        ],
+        "model_id": "Qwen/Qwen3-30B-A3B-FP8",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 30,
+        "activated_size_in_billions": 3,
+        "quantizations": [
+          "Int4",
+          "Int8"
+        ],
+        "model_id": "JunHowie/Qwen3-30B-A3B-GPTQ-{quantization}",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 30,
+        "activated_size_in_billions": 3,
+        "quantizations": [
+          "Q2_K",
+          "Q2_K_L",
+          "Q3_K_M",
+          "Q3_K_S",
+          "Q4_0",
+          "Q4_1",
+          "Q4_K_M",
+          "Q5_K_M",
+          "Q6_K",
+          "Q8_0",
+          "BF16",
+          "UD-IQ1_M",
+          "UD-IQ1_S",
+          "UD-IQ2_M",
+          "UD-IQ2_XXS",
+          "UD-IQ3_XXS",
+          "UD-Q2_K_XL",
+          "UD-Q3_K_XL",
+          "UD-Q4_K_XL",
+          "UD-Q5_K_XL",
+          "UD-Q6_K_XL",
+          "UD-Q8_K_XL",
+          "IQ4_NL",
+          "IQ4_XS"
+        ],
+        "quantization_parts": {
+          "BF16": [
+            "00001-of-00002",
+            "00002-of-00002"
+          ]
+        },
+        "model_id": "unsloth/Qwen3-30B-A3B-GGUF",
+        "model_hub": "modelscope",
+        "model_file_name_template": "Qwen3-30B-A3B-{quantization}.gguf",
+        "model_file_name_split_template": "BF16/Qwen3-30B-A3B-{quantization}-{part}.gguf"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 32,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "Qwen/Qwen3-32B",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "fp8",
+        "model_size_in_billions": 32,
+        "quantizations": [
+          "fp8"
+        ],
+        "model_id": "Qwen/Qwen3-32B-FP8",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 32,
+        "quantizations": [
+          "Int4",
+          "Int8"
+        ],
+        "model_id": "JunHowie/Qwen3-32B-GPTQ-{quantization}",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 32,
+        "quantizations": [
+          "Q2_K",
+          "Q2_K_L",
+          "Q3_K_M",
+          "Q3_K_S",
+          "Q4_0",
+          "Q4_1",
+          "Q4_K_M",
+          "Q5_K_M",
+          "Q6_K",
+          "Q8_0",
+          "BF16",
+          "UD-IQ1_M",
+          "UD-IQ1_S",
+          "UD-IQ2_M",
+          "UD-IQ2_XXS",
+          "UD-IQ3_XXS",
+          "UD-Q2_K_XL",
+          "UD-Q3_K_XL",
+          "UD-Q4_K_XL",
+          "UD-Q5_K_XL",
+          "UD-Q6_K_XL",
+          "UD-Q8_K_XL",
+          "IQ4_NL",
+          "IQ4_XS"
+        ],
+        "quantization_parts": {
+          "BF16": [
+            "00001-of-00002",
+            "00002-of-00002"
+          ]
+        },
+        "model_id": "unsloth/Qwen3-32B-GGUF",
+        "model_hub": "modelscope",
+        "model_file_name_template": "Qwen3-32B-{quantization}.gguf",
+        "model_file_name_split_template": "BF16/Qwen3-32B-{quantization}-{part}.gguf"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 235,
+        "activated_size_in_billions": 22,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "Qwen/Qwen3-235B",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "fp8",
+        "model_size_in_billions": 235,
+        "activated_size_in_billions": 22,
+        "quantizations": [
+          "fp8"
+        ],
+        "model_id": "Qwen/Qwen3-235B-FP8",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 235,
+        "activated_size_in_billions": 22,
+        "quantizations": [
+          "Q2_K",
+          "Q2_K_L",
+          "Q3_K_M",
+          "Q3_K_S",
+          "Q4_0",
+          "Q4_1",
+          "Q5_K_M",
+          "Q6_K",
+          "Q8_0",
+          "BF16",
+          "UD-Q2_K_XL",
+          "UD-Q3_K_XL",
+          "IQ4_NL",
+          "IQ4_XS"
+        ],
+        "quantization_parts": {
+          "BF16": [
+            "00001-of-00010",
+            "00002-of-00010",
+            "00003-of-00010",
+            "00004-of-00010",
+            "00005-of-00010",
+            "00006-of-00010",
+            "00007-of-00010",
+            "00008-of-00010",
+            "00009-of-00010",
+            "00010-of-00010"
+          ],
+          "IQ4_XS": [
+            "00001-of-00003",
+            "00002-of-00003",
+            "00003-of-00003"
+          ],
+          "Q2_K": [
+            "00001-of-00002",
+            "00002-of-00002"
+          ],
+          "Q2_K_L": [
+            "00001-of-00002",
+            "00002-of-00002"
+          ],
+          "Q3_K_S": [
+            "00001-of-00003",
+            "00002-of-00003",
+            "00003-of-00003"
+          ],
+          "Q4_0": [
+            "00001-of-00003",
+            "00002-of-00003",
+            "00003-of-00003"
+          ],
+          "Q4_1": [
+            "00001-of-00003",
+            "00002-of-00003",
+            "00003-of-00003"
+          ],
+          "Q5_K_M": [
+            "00001-of-00004",
+            "00002-of-00004",
+            "00003-of-00004",
+            "00004-of-00004"
+          ],
+          "Q6_K": [
+            "00001-of-00004",
+            "00002-of-00004",
+            "00003-of-00004",
+            "00004-of-00004"
+          ],
+          "Q8_0": [
+            "00001-of-00006",
+            "00002-of-00006",
+            "00003-of-00006",
+            "00004-of-00006",
+            "00005-of-00006",
+            "00006-of-00006"
+          ],
+          "UD-Q2_K_XL": [
+            "00001-of-00002",
+            "00002-of-00002"
+          ],
+          "UD-Q3_K_XL": [
+            "00001-of-00003",
+            "00002-of-00003",
+            "00003-of-00003"
+          ]
+        },
+        "model_id": "unsloth/Qwen3-235B-A22B-GGUF",
+        "model_hub": "modelscope",
+        "model_file_name_template": "Qwen3-235B-A22B-{quantization}.gguf",
+        "model_file_name_split_template": "{quantization}/Qwen3-235B-A22B-{quantization}-{part}.gguf"
+      }
+    ],
+    "chat_template": "{%- if tools %}\n    {{- '<|im_start|>system\\n' }}\n    {%- if messages[0].role == 'system' %}\n        {{- messages[0].content + '\\n\\n' }}\n    {%- endif %}\n    {{- \"# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n    {%- for tool in tools %}\n        {{- \"\\n\" }}\n        {{- tool | tojson }}\n    {%- endfor %}\n    {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n    {%- if messages[0].role == 'system' %}\n        {{- '<|im_start|>system\\n' + messages[0].content + '<|im_end|>\\n' }}\n    {%- endif %}\n{%- endif %}\n{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}\n{%- for message in messages[::-1] %}\n    {%- set index = (messages|length - 1) - loop.index0 %}\n    {%- if ns.multi_step_tool and message.role == \"user\" and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}\n        {%- set ns.multi_step_tool = false %}\n        {%- set ns.last_query_index = index %}\n    {%- endif %}\n{%- endfor %}\n{%- for message in messages %}\n    {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %}\n        {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n    {%- elif message.role == \"assistant\" %}\n        {%- set content = message.content %}\n        {%- set reasoning_content = '' %}\n        {%- if message.reasoning_content is defined and message.reasoning_content is not none %}\n            {%- set reasoning_content = message.reasoning_content %}\n        {%- else %}\n            {%- if '</think>' in message.content %}\n                {%- set content = message.content.split('</think>')[-1].lstrip('\\n') %}\n                {%- set reasoning_content = message.content.split('</think>')[0].rstrip('\\n').split('<think>')[-1].lstrip('\\n') %}\n            {%- endif %}\n        {%- endif %}\n        {%- if loop.index0 > ns.last_query_index %}\n            {%- if loop.last or (not loop.last and reasoning_content) %}\n                {{- '<|im_start|>' + message.role + '\\n<think>\\n' + reasoning_content.strip('\\n') + '\\n</think>\\n\\n' + content.lstrip('\\n') }}\n            {%- else %}\n                {{- '<|im_start|>' + message.role + '\\n' + content }}\n            {%- endif %}\n        {%- else %}\n            {{- '<|im_start|>' + message.role + '\\n' + content }}\n        {%- endif %}\n        {%- if message.tool_calls %}\n            {%- for tool_call in message.tool_calls %}\n                {%- if (loop.first and content) or (not loop.first) %}\n                    {{- '\\n' }}\n                {%- endif %}\n                {%- if tool_call.function %}\n                    {%- set tool_call = tool_call.function %}\n                {%- endif %}\n                {{- '<tool_call>\\n{\"name\": \"' }}\n                {{- tool_call.name }}\n                {{- '\", \"arguments\": ' }}\n                {%- if tool_call.arguments is string %}\n                    {{- tool_call.arguments }}\n                {%- else %}\n                    {{- tool_call.arguments | tojson }}\n                {%- endif %}\n                {{- '}\\n</tool_call>' }}\n            {%- endfor %}\n        {%- endif %}\n        {{- '<|im_end|>\\n' }}\n    {%- elif message.role == \"tool\" %}\n        {%- if loop.first or (messages[loop.index0 - 1].role != \"tool\") %}\n            {{- '<|im_start|>user' }}\n        {%- endif %}\n        {{- '\\n<tool_response>\\n' }}\n        {{- message.content }}\n        {{- '\\n</tool_response>' }}\n        {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n            {{- '<|im_end|>\\n' }}\n        {%- endif %}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|im_start|>assistant\\n' }}\n    {%- if enable_thinking is defined and enable_thinking is false %}\n        {{- '<think>\\n\\n</think>\\n\\n' }}\n    {%- endif %}\n{%- endif %}",
+    "stop_token_ids": [
+      151643,
+      151644,
+      151645
+    ],
+    "stop": [
+      "<|endoftext|>",
+      "<|im_start|>",
+      "<|im_end|>"
+    ],
+    "reasoning_start_tag": "<think>",
+    "reasoning_end_tag": "</think>",
+    "virtualenv": {
+      "packages": [
+        "transformers>=4.51.0",
+        "numpy==1.26.4"
+      ]
+    }
   }
 ]