PyPI - xinference - Versions diffs - 1.6.0.post1__py3-none-any.whl → 1.7.0__py3-none-any.whl - Mend - Supply Chain Defender

xinference 1.6.0.post1py3-none-any.whl → 1.7.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (124) hide show

xinference/model/llm/llm_family.json CHANGED Viewed

@@ -4392,47 +4392,6 @@
       "<｜end▁of▁sentence｜>"
     ]
   },
-  {
-    "version": 1,
-    "context_length": 4096,
-    "model_name": "deepseek-vl-chat",
-    "model_lang": [
-      "en",
-      "zh"
-    ],
-    "model_ability": [
-      "chat",
-      "vision"
-    ],
-    "model_description": "DeepSeek-VL possesses general multimodal understanding capabilities, capable of processing logical diagrams, web pages, formula recognition, scientific literature, natural images, and embodied intelligence in complex scenarios.",
-    "model_specs": [
-      {
-        "model_format": "pytorch",
-        "model_size_in_billions": "1_3",
-        "quantizations": [
-          "none"
-        ],
-        "model_id": "deepseek-ai/deepseek-vl-1.3b-chat",
-        "model_revision": "8f13a8e00dbdc381d614a9d29d61b07e8fe91b3f"
-      },
-      {
-        "model_format": "pytorch",
-        "model_size_in_billions": 7,
-        "quantizations": [
-          "none"
-        ],
-        "model_id": "deepseek-ai/deepseek-vl-7b-chat",
-        "model_revision": "6f16f00805f45b5249f709ce21820122eeb43556"
-      }
-    ],
-    "chat_template": "",
-    "stop_token_ids": [
-      100001
-    ],
-    "stop": [
-      "<｜end▁of▁sentence｜>"
-    ]
-  },
   {
     "version": 1,
     "context_length": 4096,
@@ -5009,88 +4968,6 @@
       }
     ]
   },
-  {
-    "version": 1,
-    "context_length": 2048,
-    "model_name": "OmniLMM",
-    "model_lang": [
-      "en",
-      "zh"
-    ],
-    "model_ability": [
-      "chat",
-      "vision"
-    ],
-    "model_description": "OmniLMM is a family of open-source large multimodal models (LMMs) adept at vision & language modeling.",
-    "model_specs": [
-      {
-        "model_format": "pytorch",
-        "model_size_in_billions": 3,
-        "quantizations": [
-          "none"
-        ],
-        "model_id": "openbmb/MiniCPM-V",
-        "model_revision": "bec7d1cd1c9e804c064ec291163e40624825eaaa"
-      },
-      {
-        "model_format": "pytorch",
-        "model_size_in_billions": 12,
-        "quantizations": [
-          "none"
-        ],
-        "model_id": "openbmb/OmniLMM-12B",
-        "model_revision": "ef62bae5af34be653b9801037cd613e05ab24fdc"
-      }
-    ],
-    "chat_template": "",
-    "stop_token_ids": [
-      2
-    ],
-    "stop": [
-      "</s>"
-    ]
-  },
-  {
-    "version": 1,
-    "context_length": 8192,
-    "model_name": "MiniCPM-Llama3-V-2_5",
-    "model_lang": [
-      "en",
-      "zh"
-    ],
-    "model_ability": [
-      "chat",
-      "vision"
-    ],
-    "model_description": "MiniCPM-Llama3-V 2.5 is the latest model in the MiniCPM-V series. The model is built on SigLip-400M and Llama3-8B-Instruct with a total of 8B parameters.",
-    "model_specs": [
-      {
-        "model_format": "pytorch",
-        "model_size_in_billions": 8,
-        "quantizations": [
-          "none"
-        ],
-        "model_id": "openbmb/MiniCPM-Llama3-V-2_5",
-        "model_revision": "285a637ba8a30a0660dfcccad16f9a864f75abfd"
-      },
-      {
-        "model_format": "pytorch",
-        "model_size_in_billions": 8,
-        "quantizations": [
-          "none"
-        ],
-        "model_id": "openbmb/MiniCPM-Llama3-V-2_5-{quantization}",
-        "model_revision": "f92aff28552de35de3be204e8fe292dd4824e544"
-      }
-    ],
-    "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = '<|begin_of_text|>' + content %}{% endif %}{{ content }}{% endfor %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}",
-    "stop_token_ids": [
-      128001
-    ],
-    "stop": [
-      "<|end_of_text|>"
-    ]
-  },
   {
     "version": 1,
     "context_length": 32768,
@@ -5134,51 +5011,6 @@
       "<|endoftext|>"
     ]
   },
-  {
-    "version": 1,
-    "context_length": 4096,
-    "model_name": "qwen-vl-chat",
-    "model_lang": [
-      "en",
-      "zh"
-    ],
-    "model_ability": [
-      "chat",
-      "vision"
-    ],
-    "model_description": "Qwen-VL-Chat supports more flexible interaction, such as multiple image inputs, multi-round question answering, and creative capabilities.",
-    "model_specs": [
-      {
-        "model_format": "pytorch",
-        "model_size_in_billions": 7,
-        "quantizations": [
-          "none"
-        ],
-        "model_id": "Qwen/Qwen-VL-Chat",
-        "model_revision": "6665c780ade5ff3f08853b4262dcb9c8f9598d42"
-      },
-      {
-        "model_format": "gptq",
-        "model_size_in_billions": 7,
-        "quantizations": [
-          "Int4"
-        ],
-        "model_id": "Qwen/Qwen-VL-Chat-{quantization}",
-        "model_revision": "5d3a5aa033ed2c502300d426c81cc5b13bcd1409"
-      }
-    ],
-    "chat_template": "",
-    "stop_token_ids": [
-      151643,
-      151644,
-      151645
-    ],
-    "stop": [
-      "<|endoftext|>",
-      "<|im_start|>",
-      "<|im_end|>"
-    ]
-  },
   {
     "version": 1,
     "context_length": 4096,
@@ -5362,6 +5194,11 @@
           "Q8_0",
           "bf16"
         ],
+        "multimodal_projectors": [
+          "mmproj-google_gemma-3-4b-it-f16.gguf",
+          "mmproj-google_gemma-3-4b-it-f32.gguf",
+          "mmproj-google_gemma-3-4b-it-bf16.gguf"
+        ],
         "model_id": "bartowski/google_gemma-3-4b-it-GGUF",
         "model_file_name_template": "google_gemma-3-4b-it-{quantization}.gguf"
       },
@@ -5393,6 +5230,11 @@
           "Q8_0",
           "bf16"
         ],
+        "multimodal_projectors": [
+          "mmproj-google_gemma-3-12b-it-f16.gguf",
+          "mmproj-google_gemma-3-12b-it-f32.gguf",
+          "mmproj-google_gemma-3-12b-it-bf16.gguf"
+        ],
         "model_id": "bartowski/google_gemma-3-12b-it-GGUF",
         "model_file_name_template": "google_gemma-3-12b-it-{quantization}.gguf"
       },
@@ -5424,6 +5266,11 @@
           "Q8_0",
           "bf16"
         ],
+        "multimodal_projectors": [
+          "mmproj-google_gemma-3-27b-it-f16.gguf",
+          "mmproj-google_gemma-3-27b-it-f32.gguf",
+          "mmproj-google_gemma-3-27b-it-bf16.gguf"
+        ],
         "model_id": "bartowski/google_gemma-3-27b-it-GGUF",
         "model_file_name_template": "google_gemma-3-27b-it-{quantization}.gguf"
       },
@@ -5852,83 +5699,6 @@
       "<|im_end|>"
     ]
   },
-  {
-    "version": 1,
-    "context_length": 8192,
-    "model_name": "cogvlm2",
-    "model_lang": [
-      "en",
-      "zh"
-    ],
-    "model_ability": [
-      "chat",
-      "vision"
-    ],
-    "model_description": "CogVLM2 have achieved good results in many lists compared to the previous generation of CogVLM open source models. Its excellent performance can compete with some non-open source models.",
-    "model_specs": [
-      {
-        "model_format": "pytorch",
-        "model_size_in_billions": 20,
-        "quantizations": [
-          "none"
-        ],
-        "model_id": "THUDM/cogvlm2-llama3-chinese-chat-19B",
-        "model_revision": "d88b352bce5ee58a289b1ac8328553eb31efa2ef"
-      },
-      {
-        "model_format": "pytorch",
-        "model_size_in_billions": 20,
-        "quantizations": [
-          "none"
-        ],
-        "model_id": "THUDM/cogvlm2-llama3-chinese-chat-19B-{quantization}",
-        "model_revision": "7863e362174f4718c2fe9cba4befd0b580a3194f"
-      }
-    ],
-    "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = '<|begin_of_text|>' + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% else %}{{ '<|end_of_text|>' }}{% endif %}",
-    "stop_token_ids": [
-      128001,
-      128009
-    ],
-    "stop": [
-      "<|end_of_text|>",
-      "<|eot_id|>"
-    ]
-  },
-  {
-    "version": 1,
-    "context_length": 8192,
-    "model_name": "cogvlm2-video-llama3-chat",
-    "model_lang": [
-      "en",
-      "zh"
-    ],
-    "model_ability": [
-      "chat",
-      "vision"
-    ],
-    "model_description": "CogVLM2-Video achieves state-of-the-art performance on multiple video question answering tasks.",
-    "model_specs": [
-      {
-        "model_format": "pytorch",
-        "model_size_in_billions": 12,
-        "quantizations": [
-          "none"
-        ],
-        "model_id": "THUDM/cogvlm2-video-llama3-chat",
-        "model_revision": "f375ead7d8202ebe2c3d09f1068abdddeb2929fa"
-      }
-    ],
-    "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = '<|begin_of_text|>' + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% else %}{{ '<|end_of_text|>' }}{% endif %}",
-    "stop_token_ids": [
-      128001,
-      128009
-    ],
-    "stop": [
-      "<|end_of_text|>",
-      "<|eot_id|>"
-    ]
-  },
   {
     "version": 1,
     "context_length": 8192,
@@ -6372,6 +6142,53 @@
       "</s>"
     ]
   },
+  {
+    "version": 1,
+    "context_length": 32768,
+    "model_name": "minicpm4",
+    "model_lang": [
+      "zh"
+    ],
+    "model_ability": [
+      "chat"
+    ],
+    "model_description": "MiniCPM4 series are highly efficient large language models (LLMs) designed explicitly for end-side devices, which achieves this efficiency through systematic innovation in four key dimensions: model architecture, training data, training algorithms, and inference systems.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": "0_5",
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "openbmb/MiniCPM4-0.5B"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 8,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "openbmb/MiniCPM4-8B"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 8,
+        "quantizations": [
+          "4bit"
+        ],
+        "model_id": "mlx-community/MiniCPM4-8B-4bit"
+      }
+    ],
+    "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
+    "stop_token_ids": [
+      2,
+      73440
+    ],
+    "stop": [
+      "</s>",
+      "<|im_end|>"
+    ]
+  },
   {
     "version": 1,
     "context_length": 32768,
@@ -6440,44 +6257,11 @@
         "user",
         "assistant"
       ],
-      "stop": [
-        "<|im_end|>",
-        "<|endoftext|>"
-      ]
-    }
-  },
-  {
-    "version": 1,
-    "context_length": 128000,
-    "model_name": "deepseek-v2",
-    "model_lang": [
-      "en",
-      "zh"
-    ],
-    "model_ability": [
-      "generate"
-    ],
-    "model_description": "DeepSeek-V2, a strong Mixture-of-Experts (MoE) language model characterized by economical training and efficient inference. ",
-    "model_specs": [
-      {
-        "model_format": "pytorch",
-        "model_size_in_billions": 16,
-        "quantizations": [
-          "none"
-        ],
-        "model_id": "deepseek-ai/DeepSeek-V2-Lite",
-        "model_revision": "604d5664dddd88a0433dbae533b7fe9472482de0"
-      },
-      {
-        "model_format": "pytorch",
-        "model_size_in_billions": 236,
-        "quantizations": [
-          "none"
-        ],
-        "model_id": "deepseek-ai/DeepSeek-V2",
-        "model_revision": "4461458f186c35188585855f28f77af5661ad489"
-      }
-    ]
+      "stop": [
+        "<|im_end|>",
+        "<|endoftext|>"
+      ]
+    }
   },
   {
     "version": 1,
@@ -6725,6 +6509,44 @@
       "<｜end▁of▁sentence｜>"
     ]
   },
+  {
+    "version": 1,
+    "context_length": 163840,
+    "model_name": "deepseek-v3-0324",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "chat"
+    ],
+    "model_description": "DeepSeek-V3, a strong Mixture-of-Experts (MoE) language model with 671B total parameters with 37B activated for each token. ",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 671,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "deepseek-ai/DeepSeek-V3-0324"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": 671,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "cognitivecomputations/DeepSeek-V3-0324-AWQ"
+      }
+    ],
+    "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='', is_first_sp=true, is_last_user=false) %}{%- for message in messages %}{%- if message['role'] == 'system' %}{%- if ns.is_first_sp %}{% set ns.system_prompt = ns.system_prompt + message['content'] %}{% set ns.is_first_sp = false %}{%- else %}{% set ns.system_prompt = ns.system_prompt + '\n\n' + message['content'] %}{%- endif %}{%- endif %}{%- endfor %}{{ bos_token }}{{ ns.system_prompt }}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{%- set ns.is_first = false -%}{%- set ns.is_last_user = true -%}{{'<｜User｜>' + message['content'] + '<｜Assistant｜>'}}{%- endif %}{%- if message['role'] == 'assistant' and message['tool_calls'] is defined and message['tool_calls'] is not none %}{%- set ns.is_last_user = false -%}{%- if ns.is_tool %}{{'<｜tool▁outputs▁end｜>'}}{%- endif %}{%- set ns.is_first = false %}{%- set ns.is_tool = false -%}{%- set ns.is_output_first = true %}{%- for tool in message['tool_calls'] %}{%- if not ns.is_first %}{%- if message['content'] is none %}{{'<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<｜tool▁call▁end｜>'}}{%- else %}{{message['content'] + '<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<｜tool▁call▁end｜>'}}{%- endif %}{%- set ns.is_first = true -%}{%- else %}{{'\n' + '<｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<｜tool▁call▁end｜>'}}{%- endif %}{%- endfor %}{{'<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}{%- endif %}{%- if message['role'] == 'assistant' and (message['tool_calls'] is not defined or message['tool_calls'] is none)%}{%- set ns.is_last_user = false -%}{%- if ns.is_tool %}{{'<｜tool▁outputs▁end｜>' + message['content'] + '<｜end▁of▁sentence｜>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{{content + '<｜end▁of▁sentence｜>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_last_user = false -%}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<｜tool▁outputs▁begin｜><｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\n<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<｜tool▁outputs▁end｜>'}}{% endif %}{% if add_generation_prompt and not ns.is_last_user and not ns.is_tool %}{{'<｜Assistant｜>'}}{% endif %}",
+    "stop_token_ids": [
+      1
+    ],
+    "stop": [
+      "<｜end▁of▁sentence｜>"
+    ]
+  },
   {
     "version": 1,
     "context_length": 163840,
@@ -6941,6 +6763,148 @@
     "reasoning_start_tag": "<think>",
     "reasoning_end_tag": "</think>"
   },
+  {
+    "version": 1,
+    "context_length": 163840,
+    "model_name": "deepseek-r1-0528",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "chat",
+      "reasoning"
+    ],
+    "model_description": "DeepSeek-R1, which incorporates cold-start data before RL. DeepSeek-R1 achieves performance comparable to OpenAI-o1 across math, code, and reasoning tasks.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 671,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "deepseek-ai/DeepSeek-R1-0528"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 671,
+        "quantizations": [
+          "Int4-Int8Mix-Lite",
+          "Int4-Int8Mix-Compact",
+          "Int4-Int8Mix-Medium"
+        ],
+        "model_id": "QuantTrio/DeepSeek-R1-0528-GPTQ-{quantization}"
+      }
+    ],
+    "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='', is_first_sp=true) %}{%- for message in messages %}{%- if message['role'] == 'system' %}{%- if ns.is_first_sp %}{% set ns.system_prompt = ns.system_prompt + message['content'] %}{% set ns.is_first_sp = false %}{%- else %}{% set ns.system_prompt = ns.system_prompt + '\\n\\n' + message['content'] %}{%- endif %}{%- endif %}{%- endfor %}{{ bos_token }}{{ ns.system_prompt }}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<｜User｜>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and 'tool_calls' in message %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls'] %}{%- if not ns.is_first %}{%- if message['content'] is none %}{{'<｜Assistant｜><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{%- else %}{{'<｜Assistant｜>' + message['content'] + '<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{%- endif %}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{%- endif %}{%- endfor %}{{'<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}{%- endif %}{%- if message['role'] == 'assistant' and 'tool_calls' not in message %}{%- if ns.is_tool %}{{'<｜tool▁outputs▁end｜>' + message['content'] + '<｜end▁of▁sentence｜>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<｜Assistant｜>' + content + '<｜end▁of▁sentence｜>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<｜tool▁outputs▁begin｜><｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- set ns.is_output_first = false %}{%- else %}{{'<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<｜tool▁outputs▁end｜>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<｜Assistant｜>'}}{% endif %}",
+    "stop_token_ids": [
+      1
+    ],
+    "stop": [
+      "<｜end▁of▁sentence｜>"
+    ],
+    "reasoning_start_tag": "<think>",
+    "reasoning_end_tag": "</think>"
+  },
+  {
+    "version": 1,
+    "context_length": 131072,
+    "model_name": "deepseek-r1-0528-qwen3",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "chat",
+      "reasoning"
+    ],
+    "model_description": "The DeepSeek R1 model has undergone a minor version upgrade, with the current version being DeepSeek-R1-0528. In the latest update, DeepSeek R1 has significantly improved its depth of reasoning and inference capabilities by leveraging increased computational resources and introducing algorithmic optimization mechanisms during post-training. The model has demonstrated outstanding performance across various benchmark evaluations, including mathematics, programming, and general logic. Its overall performance is now approaching that of leading models, such as O3 and Gemini 2.5 Pro",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 8,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "deepseek-ai/DeepSeek-R1-0528-Qwen3-8B"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 8,
+        "quantizations": [
+          "Int4-W4A16",
+          "Int8-W8A16"
+        ],
+        "model_id": "QuantTrio/DeepSeek-R1-0528-Qwen3-8B-{quantization}"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 8,
+        "quantizations": [
+          "Int4-Int8Mix"
+        ],
+        "model_id": "QuantTrio/DeepSeek-R1-0528-Qwen3-8B-GPTQ-Int4-Int8Mix"
+      }
+    ],
+    "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='', is_first_sp=true, is_last_user=false) %}{%- for message in messages %}{%- if message['role'] == 'system' %}{%- if ns.is_first_sp %}{% set ns.system_prompt = ns.system_prompt + message['content'] %}{% set ns.is_first_sp = false %}{%- else %}{% set ns.system_prompt = ns.system_prompt + '\n\n' + message['content'] %}{%- endif %}{%- endif %}{%- endfor %}{{ bos_token }}{{ ns.system_prompt }}{%- for message in messages %}{% set content = message['content'] %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{%- set ns.is_first = false -%}{%- set ns.is_last_user = true -%}{{'<｜User｜>' + content + '<｜Assistant｜>'}}{%- endif %}{%- if message['role'] == 'assistant' %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{% endif %}{%- if message['role'] == 'assistant' and message['tool_calls'] is defined and message['tool_calls'] is not none %}{%- set ns.is_last_user = false -%}{%- if ns.is_tool %}{{'<｜tool▁outputs▁end｜>'}}{%- endif %}{%- set ns.is_first = false %}{%- set ns.is_tool = false -%}{%- set ns.is_output_first = true %}{%- for tool in message['tool_calls'] %}{%- if not ns.is_first %}{%- if content is none %}{{'<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<｜tool▁call▁end｜>'}}{%- else %}{{content + '<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<｜tool▁call▁end｜>'}}{%- endif %}{%- set ns.is_first = true -%}{%- else %}{{'\n' + '<｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<｜tool▁call▁end｜>'}}{%- endif %}{%- endfor %}{{'<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}{%- endif %}{%- if message['role'] == 'assistant' and (message['tool_calls'] is not defined or message['tool_calls'] is none)%}{%- set ns.is_last_user = false -%}{%- if ns.is_tool %}{{'<｜tool▁outputs▁end｜>' + content + '<｜end▁of▁sentence｜>'}}{%- set ns.is_tool = false -%}{%- else %}{{content + '<｜end▁of▁sentence｜>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_last_user = false -%}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<｜tool▁outputs▁begin｜><｜tool▁output▁begin｜>' + content + '<｜tool▁output▁end｜>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\n<｜tool▁output▁begin｜>' + content + '<｜tool▁output▁end｜>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<｜tool▁outputs▁end｜>'}}{% endif %}{% if add_generation_prompt and not ns.is_last_user and not ns.is_tool %}{{'<｜Assistant｜>'}}{% endif %}",
+    "stop_token_ids": [
+      151645
+    ],
+    "stop": [
+      "<｜end▁of▁sentence｜>"
+    ],
+    "reasoning_start_tag": "<think>",
+    "reasoning_end_tag": "</think>"
+  },
+  {
+    "version": 1,
+    "context_length": 163840,
+    "model_name": "deepseek-prover-v2",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "chat",
+      "reasoning"
+    ],
+    "model_description": "We introduce DeepSeek-Prover-V2, an open-source large language model designed for formal theorem proving in Lean 4, with initialization data collected through a recursive theorem proving pipeline powered by DeepSeek-V3. The cold-start training procedure begins by prompting DeepSeek-V3 to decompose complex problems into a series of subgoals. The proofs of resolved subgoals are synthesized into a chain-of-thought process, combined with DeepSeek-V3's step-by-step reasoning, to create an initial cold start for reinforcement learning. This process enables us to integrate both informal and formal mathematical reasoning into a unified model",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 671,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "deepseek-ai/DeepSeek-Prover-V2-671B"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "deepseek-ai/DeepSeek-Prover-V2-7B"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "4bit"
+        ],
+        "model_id": "mlx-community/DeepSeek-Prover-V2-7B-4bit"
+      }
+    ],
+    "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='', is_first_sp=true, is_last_user=false) %}{%- for message in messages %}{%- if message['role'] == 'system' %}{%- if ns.is_first_sp %}{% set ns.system_prompt = ns.system_prompt + message['content'] %}{% set ns.is_first_sp = false %}{%- else %}{% set ns.system_prompt = ns.system_prompt + '\n\n' + message['content'] %}{%- endif %}{%- endif %}{%- endfor %}{{ bos_token }}{{ ns.system_prompt }}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{%- set ns.is_first = false -%}{%- set ns.is_last_user = true -%}{{'<｜User｜>' + message['content'] + '<｜Assistant｜>'}}{%- endif %}{%- if message['role'] == 'assistant' and message['tool_calls'] is defined and message['tool_calls'] is not none %}{%- set ns.is_last_user = false -%}{%- if ns.is_tool %}{{'<｜tool▁outputs▁end｜>'}}{%- endif %}{%- set ns.is_first = false %}{%- set ns.is_tool = false -%}{%- set ns.is_output_first = true %}{%- for tool in message['tool_calls'] %}{%- if not ns.is_first %}{%- if message['content'] is none %}{{'<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<｜tool▁call▁end｜>'}}{%- else %}{{message['content'] + '<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<｜tool▁call▁end｜>'}}{%- endif %}{%- set ns.is_first = true -%}{%- else %}{{'\n' + '<｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<｜tool▁call▁end｜>'}}{%- endif %}{%- endfor %}{{'<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}{%- endif %}{%- if message['role'] == 'assistant' and (message['tool_calls'] is not defined or message['tool_calls'] is none)%}{%- set ns.is_last_user = false -%}{%- if ns.is_tool %}{{'<｜tool▁outputs▁end｜>' + message['content'] + '<｜end▁of▁sentence｜>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{{content + '<｜end▁of▁sentence｜>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_last_user = false -%}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<｜tool▁outputs▁begin｜><｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\n<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<｜tool▁outputs▁end｜>'}}{% endif %}{% if add_generation_prompt and not ns.is_last_user and not ns.is_tool %}{{'<｜Assistant｜>'}}{% endif %}",
+    "stop_token_ids": [
+      1
+    ],
+    "stop": [
+      "<｜end▁of▁sentence｜>"
+    ],
+    "reasoning_start_tag": "<think>",
+    "reasoning_end_tag": "</think>"
+  },
   {
     "version": 1,
     "context_length": 32768,
@@ -8968,125 +8932,6 @@
       "<|observation|>"
     ]
   },
-  {
-    "version": 1,
-    "context_length": 8192,
-    "model_name": "glm-edge-v",
-    "model_lang": [
-      "en",
-      "zh"
-    ],
-    "model_ability": [
-      "chat",
-      "vision"
-    ],
-    "model_description": "The GLM-Edge series is our attempt to face the end-side real-life scenarios, which consists of two sizes of large-language dialogue models and multimodal comprehension models (GLM-Edge-1.5B-Chat, GLM-Edge-4B-Chat, GLM-Edge-V-2B, GLM-Edge-V-5B). Among them, the 1.5B / 2B model is mainly for platforms such as mobile phones and cars, and the 4B / 5B model is mainly for platforms such as PCs.",
-    "model_specs": [
-      {
-        "model_format": "pytorch",
-        "model_size_in_billions": "2",
-        "quantizations": [
-          "none"
-        ],
-        "model_id": "THUDM/glm-edge-v-2b"
-      },
-      {
-        "model_format": "pytorch",
-        "model_size_in_billions": "5",
-        "quantizations": [
-          "none"
-        ],
-        "model_id": "THUDM/glm-edge-v-5b"
-      },
-      {
-        "model_format": "ggufv2",
-        "model_size_in_billions": "2",
-        "quantizations": [
-          "Q4_0",
-          "Q4_1",
-          "Q4_K",
-          "Q4_K_M",
-          "Q4_K_S",
-          "Q5_0",
-          "Q5_1",
-          "Q5_K",
-          "Q5_K_M",
-          "Q5_K_S",
-          "Q6_K",
-          "Q8_0"
-        ],
-        "model_file_name_template": "ggml-model-{quantization}.gguf",
-        "model_id": "THUDM/glm-edge-v-2b-gguf"
-      },
-      {
-        "model_format": "ggufv2",
-        "model_size_in_billions": "2",
-        "quantizations": [
-          "F16"
-        ],
-        "model_file_name_template": "glm-edge-v-2B-{quantization}.gguf",
-        "model_id": "THUDM/glm-edge-v-2b-gguf"
-      },
-      {
-        "model_format": "ggufv2",
-        "model_size_in_billions": "2",
-        "quantizations": [
-          "f16"
-        ],
-        "model_file_name_template": "mmproj-model-{quantization}.gguf",
-        "model_id": "THUDM/glm-edge-v-2b-gguf"
-      },
-      {
-        "model_format": "ggufv2",
-        "model_size_in_billions": "5",
-        "quantizations": [
-          "Q4_0",
-          "Q4_1",
-          "Q4_K",
-          "Q4_K_M",
-          "Q4_K_S",
-          "Q5_0",
-          "Q5_1",
-          "Q5_K",
-          "Q5_K_M",
-          "Q5_K_S",
-          "Q6_K",
-          "Q8_0"
-        ],
-        "model_file_name_template": "ggml-model-{quantization}.gguf",
-        "model_id": "THUDM/glm-edge-v-5b-gguf"
-      },
-      {
-        "model_format": "ggufv2",
-        "model_size_in_billions": "5",
-        "quantizations": [
-          "F16"
-        ],
-        "model_file_name_template": "glm-edge-v-5B-{quantization}.gguf",
-        "model_id": "THUDM/glm-edge-v-5b-gguf"
-      },
-      {
-        "model_format": "ggufv2",
-        "model_size_in_billions": "5",
-        "quantizations": [
-          "f16"
-        ],
-        "model_file_name_template": "mmproj-model-{quantization}.gguf",
-        "model_id": "THUDM/glm-edge-v-5b-gguf"
-      }
-    ],
-    "chat_template": "{% for item in messages %}{% if item['role'] != 'system' %}<|{{ item['role'] }}|>\n{% for content in item['content'] %}{% if content['type'] == 'image' %}{% for _ in range(578) %}<|begin_of_image|>{% endfor %}{% elif content['type'] == 'text' %}{{ content['text'] }}{% endif %}{% endfor %}\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|assistant|>\n{% endif %}",
-    "stop_token_ids": [
-      59246,
-      59253,
-      59255
-    ],
-    "stop": [
-      "<|endoftext|>",
-      "<|user|>",
-      "<|observation|>"
-    ]
-  },
   {
     "version": 1,
     "context_length": 32768,