PyPI - xinference - Versions diffs - 1.3.1.post1__py3-none-any.whl → 1.4.1__py3-none-any.whl - Mend - Supply Chain Defender

xinference 1.3.1.post1py3-none-any.whl → 1.4.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (75) hide show

xinference/model/llm/llm_family.json CHANGED Viewed

@@ -5786,6 +5786,265 @@
       "<start_of_turn>"
     ]
   },
+  {
+    "version": 1,
+    "context_length": 32768,
+    "model_name": "gemma-3-1b-it",
+    "model_lang": [
+      "en"
+    ],
+    "model_ability": [
+      "chat"
+    ],
+    "model_description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 1,
+        "quantizations": [
+          "none",
+          "4-bit",
+          "8-bit"
+        ],
+        "model_id": "google/gemma-3-1b-it"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 1,
+        "quantizations": [
+          "IQ2_M",
+          "IQ3_M",
+          "IQ3_XS",
+          "IQ3_XXS",
+          "IQ4_NL",
+          "IQ4_XS",
+          "Q2_K",
+          "Q2_K_L",
+          "Q3_K_L",
+          "Q3_K_M",
+          "Q3_K_S",
+          "Q4_0",
+          "Q4_1",
+          "Q4_K_L",
+          "Q4_K_M",
+          "Q4_K_S",
+          "Q5_K_L",
+          "Q5_K_M",
+          "Q5_K_S",
+          "Q6_K",
+          "Q6_K_L",
+          "Q8_0",
+          "bf16"
+        ],
+        "model_id": "bartowski/google_gemma-3-1b-it-GGUF",
+        "model_file_name_template": "google_gemma-3-1b-it-{quantization}.gguf"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 1,
+        "quantizations": [
+          "4bit",
+          "6bit",
+          "8bit",
+          "fp16"
+        ],
+        "model_id": "mlx-community/gemma-3-1b-it-{quantization}"
+      }
+    ],
+    "chat_template": "{{ bos_token }}\n{%- if messages[0]['role'] == 'system' -%}\n    {%- if messages[0]['content'] is string -%}\n        {%- set first_user_prefix = messages[0]['content'] + '\n\n' -%}\n    {%- else -%}\n        {%- set first_user_prefix = messages[0]['content'][0]['text'] + '\n\n' -%}\n    {%- endif -%}\n    {%- set loop_messages = messages[1:] -%}\n{%- else -%}\n    {%- set first_user_prefix = \"\" -%}\n    {%- set loop_messages = messages -%}\n{%- endif -%}\n{%- for message in loop_messages -%}\n    {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}\n        {{ raise_exception(\"Conversation roles must alternate user/assistant/user/assistant/...\") }}\n    {%- endif -%}\n    {%- if (message['role'] == 'assistant') -%}\n        {%- set role = \"model\" -%}\n    {%- else -%}\n        {%- set role = message['role'] -%}\n    {%- endif -%}\n    {{ '<start_of_turn>' + role + '\n' + (first_user_prefix if loop.first else \"\") }}\n    {%- if message['content'] is string -%}\n        {{ message['content'] | trim }}\n    {%- elif message['content'] is iterable -%}\n        {%- for item in message['content'] -%}\n            {%- if item['type'] == 'image' -%}\n                {{ '<start_of_image>' }}\n            {%- elif item['type'] == 'text' -%}\n                {{ item['text'] | trim }}\n            {%- endif -%}\n        {%- endfor -%}\n    {%- else -%}\n        {{ raise_exception(\"Invalid content type\") }}\n    {%- endif -%}\n    {{ '<end_of_turn>\n' }}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n    {{'<start_of_turn>model\n'}}\n{%- endif -%}\n",
+    "stop_token_ids": [
+      1,
+      105,
+      106
+    ],
+    "stop": [
+      "<eos>",
+      "<end_of_turn>",
+      "<start_of_turn>"
+    ]
+  },
+  {
+    "version": 1,
+    "context_length": 131072,
+    "model_name": "gemma-3-it",
+    "model_lang": [
+      "en"
+    ],
+    "model_ability": [
+      "chat",
+      "vision"
+    ],
+    "model_description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 4,
+        "quantizations": [
+          "none",
+          "4-bit",
+          "8-bit"
+        ],
+        "model_id": "google/gemma-3-4b-it"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 12,
+        "quantizations": [
+          "none",
+          "4-bit",
+          "8-bit"
+        ],
+        "model_id": "google/gemma-3-12b-it"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 27,
+        "quantizations": [
+          "none",
+          "4-bit",
+          "8-bit"
+        ],
+        "model_id": "google/gemma-3-27b-it"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 4,
+        "quantizations": [
+          "IQ2_M",
+          "IQ3_M",
+          "IQ3_XS",
+          "IQ3_XXS",
+          "IQ4_NL",
+          "IQ4_XS",
+          "Q2_K",
+          "Q2_K_L",
+          "Q3_K_L",
+          "Q3_K_M",
+          "Q3_K_S",
+          "Q4_0",
+          "Q4_1",
+          "Q4_K_L",
+          "Q4_K_M",
+          "Q4_K_S",
+          "Q5_K_L",
+          "Q5_K_M",
+          "Q5_K_S",
+          "Q6_K",
+          "Q6_K_L",
+          "Q8_0",
+          "bf16"
+        ],
+        "model_id": "bartowski/google_gemma-3-4b-it-GGUF",
+        "model_file_name_template": "google_gemma-3-4b-it-{quantization}.gguf"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 12,
+        "quantizations": [
+          "IQ2_M",
+          "IQ3_M",
+          "IQ3_XS",
+          "IQ3_XXS",
+          "IQ4_NL",
+          "IQ4_XS",
+          "Q2_K",
+          "Q2_K_L",
+          "Q3_K_L",
+          "Q3_K_M",
+          "Q3_K_S",
+          "Q4_0",
+          "Q4_1",
+          "Q4_K_L",
+          "Q4_K_M",
+          "Q4_K_S",
+          "Q5_K_L",
+          "Q5_K_M",
+          "Q5_K_S",
+          "Q6_K",
+          "Q6_K_L",
+          "Q8_0",
+          "bf16"
+        ],
+        "model_id": "bartowski/google_gemma-3-12b-it-GGUF",
+        "model_file_name_template": "google_gemma-3-12b-it-{quantization}.gguf"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 27,
+        "quantizations": [
+          "IQ2_M",
+          "IQ3_M",
+          "IQ3_XS",
+          "IQ3_XXS",
+          "IQ4_NL",
+          "IQ4_XS",
+          "Q2_K",
+          "Q2_K_L",
+          "Q3_K_L",
+          "Q3_K_M",
+          "Q3_K_S",
+          "Q4_0",
+          "Q4_1",
+          "Q4_K_L",
+          "Q4_K_M",
+          "Q4_K_S",
+          "Q5_K_L",
+          "Q5_K_M",
+          "Q5_K_S",
+          "Q6_K",
+          "Q6_K_L",
+          "Q8_0",
+          "bf16"
+        ],
+        "model_id": "bartowski/google_gemma-3-27b-it-GGUF",
+        "model_file_name_template": "google_gemma-3-27b-it-{quantization}.gguf"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 4,
+        "quantizations": [
+          "4bit",
+          "6bit",
+          "8bit",
+          "fp16"
+        ],
+        "model_id": "mlx-community/gemma-3-4b-it-{quantization}"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 12,
+        "quantizations": [
+          "4bit",
+          "6bit",
+          "8bit",
+          "fp16"
+        ],
+        "model_id": "mlx-community/gemma-3-12b-it-{quantization}"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 27,
+        "quantizations": [
+          "4bit",
+          "6bit",
+          "8bit",
+          "fp16"
+        ],
+        "model_id": "mlx-community/gemma-3-27b-it-{quantization}"
+      }
+    ],
+    "chat_template": "{{ bos_token }}\n{%- if messages[0]['role'] == 'system' -%}\n    {%- if messages[0]['content'] is string -%}\n        {%- set first_user_prefix = messages[0]['content'] + '\n\n' -%}\n    {%- else -%}\n        {%- set first_user_prefix = messages[0]['content'][0]['text'] + '\n\n' -%}\n    {%- endif -%}\n    {%- set loop_messages = messages[1:] -%}\n{%- else -%}\n    {%- set first_user_prefix = \"\" -%}\n    {%- set loop_messages = messages -%}\n{%- endif -%}\n{%- for message in loop_messages -%}\n    {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}\n        {{ raise_exception(\"Conversation roles must alternate user/assistant/user/assistant/...\") }}\n    {%- endif -%}\n    {%- if (message['role'] == 'assistant') -%}\n        {%- set role = \"model\" -%}\n    {%- else -%}\n        {%- set role = message['role'] -%}\n    {%- endif -%}\n    {{ '<start_of_turn>' + role + '\n' + (first_user_prefix if loop.first else \"\") }}\n    {%- if message['content'] is string -%}\n        {{ message['content'] | trim }}\n    {%- elif message['content'] is iterable -%}\n        {%- for item in message['content'] -%}\n            {%- if item['type'] == 'image' -%}\n                {{ '<start_of_image>' }}\n            {%- elif item['type'] == 'text' -%}\n                {{ item['text'] | trim }}\n            {%- endif -%}\n        {%- endfor -%}\n    {%- else -%}\n        {{ raise_exception(\"Invalid content type\") }}\n    {%- endif -%}\n    {{ '<end_of_turn>\n' }}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n    {{'<start_of_turn>model\n'}}\n{%- endif -%}\n",
+    "stop_token_ids": [
+      1,
+      105,
+      106
+    ],
+    "stop": [
+      "<eos>",
+      "<end_of_turn>",
+      "<start_of_turn>"
+    ]
+  },
   {
     "version": 1,
     "context_length": 8192,
@@ -6923,7 +7182,7 @@
             "8-bit",
             "none"
           ],
-          "model_id": "OpenGVLab/InternVL2_5-MPO-1B"
+          "model_id": "OpenGVLab/InternVL2_5-1B-MPO"
         },
         {
           "model_format": "pytorch",
@@ -6933,7 +7192,7 @@
             "8-bit",
             "none"
           ],
-          "model_id": "OpenGVLab/InternVL2_5-MPO-2B"
+          "model_id": "OpenGVLab/InternVL2_5-2B-MPO"
         },
         {
           "model_format": "pytorch",
@@ -6943,7 +7202,7 @@
             "8-bit",
             "none"
           ],
-          "model_id": "OpenGVLab/InternVL2_5-MPO-4B"
+          "model_id": "OpenGVLab/InternVL2_5-4B-MPO"
         },
         {
           "model_format": "awq",
@@ -6961,7 +7220,7 @@
             "8-bit",
             "none"
           ],
-          "model_id": "OpenGVLab/InternVL2_5-MPO-8B"
+          "model_id": "OpenGVLab/InternVL2_5-8B-MPO"
         },
         {
           "model_format": "awq",
@@ -6969,7 +7228,7 @@
           "quantizations": [
             "Int4"
           ],
-          "model_id": "OpenGVLab/InternVL2_5-MPO-8B-AWQ"
+          "model_id": "OpenGVLab/InternVL2_5-8B-MPO-AWQ"
         },
         {
           "model_format": "pytorch",
@@ -6979,7 +7238,7 @@
             "8-bit",
             "none"
           ],
-          "model_id": "OpenGVLab/InternVL2_5-MPO-26B"
+          "model_id": "OpenGVLab/InternVL2_5-26B-MPO"
         },
         {
           "model_format": "awq",
@@ -6987,7 +7246,7 @@
           "quantizations": [
             "Int4"
           ],
-          "model_id": "OpenGVLab/InternVL2_5-MPO-26B-AWQ"
+          "model_id": "OpenGVLab/InternVL2_5-26B-MPO-AWQ"
         },
         {
           "model_format": "pytorch",
@@ -6997,7 +7256,7 @@
             "8-bit",
             "none"
           ],
-          "model_id": "OpenGVLab/InternVL2_5-MPO-38B"
+          "model_id": "OpenGVLab/InternVL2_5-38B-MPO"
         },
         {
           "model_format": "awq",
@@ -7005,7 +7264,7 @@
           "quantizations": [
             "Int4"
           ],
-          "model_id": "OpenGVLab/InternVL2_5-MPO-38B-AWQ"
+          "model_id": "OpenGVLab/InternVL2_5-38B-MPO-AWQ"
         },
         {
           "model_format": "pytorch",
@@ -7015,7 +7274,7 @@
             "8-bit",
             "none"
           ],
-          "model_id": "OpenGVLab/InternVL2_5-MPO-78B"
+          "model_id": "OpenGVLab/InternVL2_5-78B-MPO"
         },
         {
           "model_format": "awq",
@@ -7023,7 +7282,7 @@
           "quantizations": [
             "Int4"
           ],
-          "model_id": "OpenGVLab/InternVL2_5-MPO-78B-AWQ"
+          "model_id": "OpenGVLab/InternVL2_5-78B-MPO-AWQ"
         }
     ],
     "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
@@ -7302,7 +7561,7 @@
         "model_id":"Qwen/Qwen2-VL-7B-Instruct",
         "model_revision":"6010982c1010c3b222fa98afc81575f124aa9bd6"
       },
-        {
+      {
         "model_format":"gptq",
         "model_size_in_billions":7,
         "quantizations":[
@@ -7413,6 +7672,14 @@
         ],
         "model_id":"Qwen/Qwen2.5-VL-7B-Instruct"
       },
+      {
+        "model_format":"pytorch",
+        "model_size_in_billions":32,
+        "quantizations":[
+          "none"
+        ],
+        "model_id":"Qwen/Qwen2.5-VL-32B-Instruct"
+      },
       {
         "model_format":"pytorch",
         "model_size_in_billions":72,
@@ -7437,6 +7704,14 @@
         ],
         "model_id":"Qwen/Qwen2.5-VL-7B-Instruct-AWQ"
       },
+      {
+        "model_format":"awq",
+        "model_size_in_billions":32,
+        "quantizations":[
+          "Int4"
+        ],
+        "model_id":"Qwen/Qwen2.5-VL-32B-Instruct-AWQ"
+      },
       {
         "model_format":"awq",
         "model_size_in_billions":72,
@@ -7892,7 +8167,7 @@
         "model_id": "mlx-community/DeepSeek-V3-{quantization}"
       }
     ],
-    "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='', is_first_sp=true) %}{%- for message in messages %}{%- if message['role'] == 'system' %}{%- if ns.is_first_sp %}{% set ns.system_prompt = ns.system_prompt + message['content'] %}{% set ns.is_first_sp = false %}{%- else %}{% set ns.system_prompt = ns.system_prompt + '\\n\\n' + message['content'] %}{%- endif %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<｜User｜>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<｜Assistant｜><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{{'<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<｜tool▁outputs▁end｜>' + message['content'] + '<｜end▁of▁sentence｜>'}}{%- set ns.is_tool = false -%}{%- else %}{{'<｜Assistant｜>' + message['content'] + '<｜end▁of▁sentence｜>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<｜tool▁outputs▁begin｜><｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<｜tool▁outputs▁end｜>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<｜Assistant｜>'}}{% endif %}",
+    "chat_template": "{% if messages %} {% if system or tools %} {% if system %} {{ system }} {% endif %} {% if tools %} {# Handle tools here if needed #} {% endif %} {% endif %} {% for message in messages %} {% set last = loop.index == loop.length %} {% if message.role == \"user\" %} <｜User｜> {% if tools and last %} Given the following functions, please respond with a JSON for a function call with its proper arguments that best answers the given prompt.  Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}. Do not use variables.  {{ tools }} {% endif %} {{ message.content }} {% if last %} <｜Assistant｜> {% endif %} {% elif message.role == \"assistant\" %} <｜Assistant｜> {% if message.tool_calls %} <｜tool▁calls▁begin｜> {% for tool in message.tool_calls %} <｜tool▁call▁begin｜> {\"name\": \"{{ tool.function.name }}\", \"parameters\": {{ tool.function.arguments }}} <｜tool▁call▁end｜> {% endfor %} <｜tool▁calls▁end｜> {% else %} {{ message.content }} {% if not last %} <｜end▁of▁sentence｜> {% endif %} {% endif %} {% elif message.role == \"tool\" %} <｜tool▁outputs▁begin｜> <｜tool▁output▁begin｜> {{ message.content }} <｜tool▁output▁end｜> <｜tool▁outputs▁end｜> {% if last and message.role != \"assistant\" %} <｜Assistant｜> {% endif %} {% endif %} {% endfor %} {% else %} {% if system %} {{ system }} {% endif %} {% if prompt %} <｜User｜> {{ prompt }} {% endif %} <｜Assistant｜> {{ response }} {% if response %} {{ response }} {% endif %} {% endif %}",
     "stop_token_ids": [
       1
     ],
@@ -10499,5 +10774,105 @@
     "stop": [
       "<|im_end|>"
     ]
+  },
+  {
+    "version": 1,
+    "context_length": 131072,
+    "model_name": "fin-r1",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "chat"
+    ],
+    "model_description": "Fin-R1 is a large language model specifically designed for the field of financial reasoning",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "SUFE-AIFLM-Lab/Fin-R1"
+      },
+      {
+        "model_format":"gptq",
+        "model_size_in_billions":7,
+        "quantizations":[
+          "Int4",
+          "Int8"
+        ],
+        "model_id":"JunHowie/Fin-R1-GPTQ-{quantization}"
+      },
+      {
+        "model_format":"fp8",
+        "model_size_in_billions":7,
+        "quantizations":[
+          "FP8"
+        ],
+        "model_id":"JunHowie/Fin-R1-FP8-Dynamic"
+      }
+    ],
+    "chat_template": "{%- if tools %}\n    {{- '<|im_start|>system\\n' }}\n    {%- if messages[0]['role'] == 'system' %}\n        {{- messages[0]['content'] }}\n    {%- else %}\n        {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n    {%- endif %}\n    {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n    {%- for tool in tools %}\n        {{- \"\\n\" }}\n        {{- tool | tojson }}\n    {%- endfor %}\n    {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n    {%- if messages[0]['role'] == 'system' %}\n        {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n    {%- else %}\n        {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n    {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n    {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n        {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n    {%- elif message.role == \"assistant\" %}\n        {{- '<|im_start|>' + message.role }}\n        {%- if message.content %}\n            {{- '\\n' + message.content }}\n        {%- endif %}\n        {%- for tool_call in message.tool_calls %}\n            {%- if tool_call.function is defined %}\n                {%- set tool_call = tool_call.function %}\n            {%- endif %}\n            {{- '\\n<tool_call>\\n{\"name\": \"' }}\n            {{- tool_call.name }}\n            {{- '\", \"arguments\": ' }}\n            {{- tool_call.arguments | tojson }}\n            {{- '}\\n</tool_call>' }}\n        {%- endfor %}\n        {{- '<|im_end|>\\n' }}\n    {%- elif message.role == \"tool\" %}\n        {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n            {{- '<|im_start|>user' }}\n        {%- endif %}\n        {{- '\\n<tool_response>\\n' }}\n        {{- message.content }}\n        {{- '\\n</tool_response>' }}\n        {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n            {{- '<|im_end|>\\n' }}\n        {%- endif %}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
+    "stop_token_ids": [
+      151643,
+      151644,
+      151645
+    ],
+    "stop": [
+      "<|endoftext|>",
+      "<|im_start|>",
+      "<|im_end|>"
+    ]
+  },
+  {
+    "version": 1,
+    "context_length": 4096,
+    "model_name": "deepseek-vl2",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "chat",
+      "vision"
+    ],
+    "model_description": "DeepSeek-VL2, an advanced series of large Mixture-of-Experts (MoE) Vision-Language Models that significantly improves upon its predecessor, DeepSeek-VL. DeepSeek-VL2 demonstrates superior capabilities across various tasks, including but not limited to visual question answering, optical character recognition, document/table/chart understanding, and visual grounding.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 27,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "deepseek-ai/deepseek-vl2"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 16,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "deepseek-ai/deepseek-vl2-small"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 3,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "deepseek-ai/deepseek-vl2-tiny"
+      }
+    ],
+    "chat_template": "",
+    "stop_token_ids": [
+      1
+    ],
+    "stop": [
+      "<｜end▁of▁sentence｜>"
+    ]
   }
 ]