PyPI - xinference - Versions diffs - 0.9.4__py3-none-any.whl → 0.10.1__py3-none-any.whl - Mend - Supply Chain Defender

xinference 0.9.4py3-none-any.whl → 0.10.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (103) hide show

xinference/model/llm/llm_family.json CHANGED Viewed

@@ -688,6 +688,49 @@
       ]
     }
   },
+  {
+    "version": 1,
+    "context_length": 131072,
+    "model_name": "chatglm3-128k",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "chat"
+    ],
+    "model_description": "ChatGLM3 is the third generation of ChatGLM, still open-source and trained on Chinese and English data.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 6,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "THUDM/chatglm3-6b-128k",
+        "model_revision": "f0afbe671009abc9e31182170cf60636d5546cda"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "CHATGLM3",
+      "system_prompt": "",
+      "roles": [
+        "user",
+        "assistant"
+      ],
+      "stop_token_ids": [
+        64795,
+        64797,
+        2
+      ],
+      "stop": [
+        "<|user|>",
+        "<|observation|>"
+      ]
+    }
+  },
   {
     "version": 1,
     "context_length": 2048,
@@ -870,6 +913,38 @@
         "model_id": "meta-llama/Llama-2-7b-chat-hf",
         "model_revision": "08751db2aca9bf2f7f80d2e516117a53d7450235"
       },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "TheBloke/Llama-2-7B-Chat-GPTQ"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 70,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "TheBloke/Llama-2-70B-Chat-GPTQ"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": 70,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "TheBloke/Llama-2-70B-Chat-AWQ"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "TheBloke/Llama-2-7B-Chat-AWQ"
+      },
       {
         "model_format": "pytorch",
         "model_size_in_billions": 13,
@@ -881,6 +956,22 @@
         "model_id": "meta-llama/Llama-2-13b-chat-hf",
         "model_revision": "0ba94ac9b9e1d5a0037780667e8b219adde1908c"
       },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 13,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "TheBloke/Llama-2-13B-chat-GPTQ"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": 13,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "TheBloke/Llama-2-13B-chat-AWQ"
+      },
       {
         "model_format": "pytorch",
         "model_size_in_billions": 70,
@@ -1002,6 +1093,22 @@
         "model_id": "TheBloke/Llama-2-7B-GGML",
         "model_file_name_template": "llama-2-7b.ggmlv3.{quantization}.bin"
       },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "TheBloke/Llama-2-7B-GPTQ"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "TheBloke/Llama-2-7B-AWQ"
+      },
       {
         "model_format": "ggmlv3",
         "model_size_in_billions": 13,
@@ -1068,6 +1175,22 @@
         "model_id": "meta-llama/Llama-2-13b-hf",
         "model_revision": "db6b8eb1feabb38985fdf785a89895959e944936"
       },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 13,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "TheBloke/Llama-2-13B-GPTQ"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": 13,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "TheBloke/Llama-2-13B-AWQ"
+      },
       {
         "model_format": "pytorch",
         "model_size_in_billions": 70,
@@ -1078,6 +1201,22 @@
         ],
         "model_id": "meta-llama/Llama-2-70b-hf",
         "model_revision": "cc8aa03a000ff08b4d5c5b39673321a2a396c396"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 70,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "TheBloke/Llama-2-70B-GPTQ"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": 70,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "TheBloke/Llama-2-70B-AWQ"
       }
     ]
   },
@@ -1466,6 +1605,16 @@
         ],
         "model_id": "Qwen/Qwen1.5-14B-Chat"
       },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 32,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "Qwen/Qwen1.5-32B-Chat"
+      },
       {
         "model_format": "pytorch",
         "model_size_in_billions": 72,
@@ -1521,6 +1670,14 @@
         ],
         "model_id": "Qwen/Qwen1.5-14B-Chat-GPTQ-{quantization}"
       },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 32,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "Qwen/Qwen1.5-32B-Chat-GPTQ-{quantization}"
+      },
       {
         "model_format": "gptq",
         "model_size_in_billions": 72,
@@ -1570,6 +1727,14 @@
         ],
         "model_id": "Qwen/Qwen1.5-14B-Chat-AWQ"
       },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": 32,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "Qwen/Qwen1.5-32B-Chat-AWQ"
+      },
       {
         "model_format": "awq",
         "model_size_in_billions": 72,
@@ -1658,6 +1823,22 @@
         "model_id": "Qwen/Qwen1.5-14B-Chat-GGUF",
         "model_file_name_template": "qwen1_5-14b-chat-{quantization}.gguf"
       },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 32,
+        "quantizations": [
+          "q2_k",
+          "q3_k_m",
+          "q4_0",
+          "q4_k_m",
+          "q5_0",
+          "q5_k_m",
+          "q6_k",
+          "q8_0"
+        ],
+        "model_id": "Qwen/Qwen1.5-32B-Chat-GGUF",
+        "model_file_name_template": "qwen1_5-32b-chat-{quantization}.gguf"
+      },
       {
         "model_format": "ggufv2",
         "model_size_in_billions": 72,
@@ -1697,6 +1878,58 @@
       ]
     }
   },
+  {
+    "version": 1,
+    "context_length": 32768,
+    "model_name": "qwen1.5-moe-chat",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "chat"
+    ],
+    "model_description": "Qwen1.5-MoE is a transformer-based MoE decoder-only language model pretrained on a large amount of data.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": "2_7",
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "Qwen/Qwen1.5-MoE-A2.7B-Chat"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": "2_7",
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "Qwen/Qwen1.5-MoE-A2.7B-Chat-GPTQ-Int4"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "QWEN",
+      "system_prompt": "You are a helpful assistant.",
+      "roles": [
+        "user",
+        "assistant"
+      ],
+      "intra_message_sep": "\n",
+      "stop_token_ids": [
+        151643,
+        151644,
+        151645
+      ],
+      "stop": [
+        "<|endoftext|>",
+        "<|im_start|>",
+        "<|im_end|>"
+      ]
+    }
+  },
   {
     "version": 1,
     "context_length": 8192,
@@ -1737,13 +1970,13 @@
     "model_description": "GPT-2 is a Transformer-based LLM that is trained on WebTest, a 40 GB dataset of Reddit posts with 3+ upvotes.",
     "model_specs": [
       {
-        "model_format": "ggmlv3",
-        "model_size_in_billions": 1,
+        "model_format": "pytorch",
+        "model_size_in_billions": "1_5",
         "quantizations": [
           "none"
         ],
-        "model_id": "marella/gpt-2-ggml",
-        "model_file_name_template": "ggml-model.bin"
+        "model_id": "openai-community/gpt2",
+        "model_revision": "607a30d783dfa663caf39e06633721c8d4cfcd7e"
       }
     ]
   },
@@ -2526,6 +2759,22 @@
         "model_id": "mistralai/Mistral-7B-Instruct-v0.1",
         "model_revision": "54766df6d50e4d3d7ccd66758e5341ba105a6d36"
       },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "TheBloke/Mistral-7B-Instruct-v0.1-AWQ"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "TheBloke/Mistral-7B-Instruct-v0.1-GPTQ"
+      },
       {
         "model_format": "ggufv2",
         "model_size_in_billions": 7,
@@ -2587,6 +2836,22 @@
         "model_id": "mistralai/Mistral-7B-Instruct-v0.2",
         "model_revision": "b70aa86578567ba3301b21c8a27bea4e8f6d6d61"
       },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "TheBloke/Mistral-7B-Instruct-v0.2-GPTQ"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "TheBloke/Mistral-7B-Instruct-v0.2-AWQ"
+      },
       {
         "model_format": "ggufv2",
         "model_size_in_billions": 7,
@@ -2747,6 +3012,14 @@
         "model_id": "mistralai/Mixtral-8x7B-v0.1",
         "model_revision": "58301445dc1378584211722b7ebf8743ec4e192b"
       },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": "46_7",
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "TheBloke/Mixtral-8x7B-v0.1-GPTQ"
+      },
       {
         "model_format": "ggufv2",
         "model_size_in_billions": "46_7",
@@ -2796,10 +3069,17 @@
         "model_format": "awq",
         "model_size_in_billions": "46_7",
         "quantizations": [
-          "4-bit"
+          "Int4"
+        ],
+        "model_id": "TheBloke/Mixtral-8x7B-Instruct-v0.1-AWQ"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": "46_7",
+        "quantizations": [
+          "Int4"
         ],
-        "model_id": "TheBloke/Mixtral-8x7B-Instruct-v0.1-AWQ",
-        "model_revision": "9afb6f0a7d7fe9ecebdda1baa4ff4e13e73e97d7"
+        "model_id": "TheBloke/Mixtral-8x7B-Instruct-v0.1-GPTQ"
       },
       {
         "model_format": "ggufv2",
@@ -3357,22 +3637,122 @@
   {
     "version": 1,
     "context_length": 4096,
-    "model_name": "deepseek-chat",
+    "model_name": "gorilla-openfunctions-v2",
     "model_lang": [
-      "en",
-      "zh"
+      "en"
     ],
     "model_ability": [
       "chat"
     ],
-    "model_description": "DeepSeek LLM is an advanced language model comprising 67 billion parameters. It has been trained from scratch on a vast dataset of 2 trillion tokens in both English and Chinese.",
+    "model_description": "OpenFunctions is designed to extend Large Language Model (LLM) Chat Completion feature to formulate executable APIs call given natural language instructions and API context.",
     "model_specs": [
       {
         "model_format": "pytorch",
         "model_size_in_billions": 7,
         "quantizations": [
-          "4-bit",
-          "8-bit",
+          "none"
+        ],
+        "model_id": "gorilla-llm/gorilla-openfunctions-v2",
+        "model_revision": "0f91d705e64b77fb55e35a7eab5d03bf965c9b5c"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "Q2_K",
+          "Q3_K_L",
+          "Q3_K_M",
+          "Q3_K_S",
+          "Q4_0",
+          "Q4_K_M",
+          "Q4_K_S",
+          "Q5_K_M",
+          "Q5_K_S",
+          "Q6_K"
+        ],
+        "model_id": "gorilla-llm//gorilla-openfunctions-v2-GGUF",
+        "model_file_name_template": "gorilla-openfunctions-v2.{quantization}.gguf"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "GORILLA_OPENFUNCTIONS",
+      "system_prompt": "",
+      "roles": [
+        "",
+        ""
+      ],
+      "intra_message_sep": "\n",
+      "inter_message_sep": "\n",
+      "stop_token_ids": [],
+      "stop": []
+    }
+  },
+  {
+    "version": 1,
+    "context_length": 4096,
+    "model_name": "deepseek-vl-chat",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "chat",
+      "vision"
+    ],
+    "model_description": "DeepSeek-VL possesses general multimodal understanding capabilities, capable of processing logical diagrams, web pages, formula recognition, scientific literature, natural images, and embodied intelligence in complex scenarios.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": "1_3",
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "deepseek-ai/deepseek-vl-1.3b-chat",
+        "model_revision": "8f13a8e00dbdc381d614a9d29d61b07e8fe91b3f"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "deepseek-ai/deepseek-vl-7b-chat",
+        "model_revision": "6f16f00805f45b5249f709ce21820122eeb43556"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "DEEPSEEK_CHAT",
+      "system_prompt": "<｜begin▁of▁sentence｜>",
+      "roles": [
+        "User",
+        "Assistant"
+      ],
+      "intra_message_sep": "\n\n",
+      "inter_message_sep": "<｜end▁of▁sentence｜>",
+      "stop": [
+        "<｜end▁of▁sentence｜>"
+      ]
+    }
+  },
+  {
+    "version": 1,
+    "context_length": 4096,
+    "model_name": "deepseek-chat",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "chat"
+    ],
+    "model_description": "DeepSeek LLM is an advanced language model comprising 67 billion parameters. It has been trained from scratch on a vast dataset of 2 trillion tokens in both English and Chinese.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
           "none"
         ],
         "model_id": "deepseek-ai/deepseek-llm-7b-chat",
@@ -3662,6 +4042,48 @@
       ]
     }
   },
+  {
+    "version":1,
+    "context_length":2048,
+    "model_name":"OmniLMM",
+    "model_lang":[
+      "en",
+      "zh"
+    ],
+    "model_ability":[
+      "chat",
+      "vision"
+    ],
+    "model_description":"OmniLMM is a family of open-source large multimodal models (LMMs) adept at vision & language modeling.",
+    "model_specs":[
+      {
+        "model_format":"pytorch",
+        "model_size_in_billions":3,
+        "quantizations":[
+          "none"
+        ],
+        "model_id":"openbmb/MiniCPM-V",
+        "model_revision":"bec7d1cd1c9e804c064ec291163e40624825eaaa"
+      },
+      {
+        "model_format":"pytorch",
+        "model_size_in_billions":12,
+        "quantizations":[
+          "none"
+        ],
+        "model_id":"openbmb/OmniLMM-12B",
+        "model_revision":"ef62bae5af34be653b9801037cd613e05ab24fdc"
+      }
+    ],
+    "prompt_style":{
+      "style_name":"OmniLMM",
+      "system_prompt":"The role of first msg should be user",
+      "roles":[
+        "user",
+        "assistant"
+      ]
+    }
+  },
   {
     "version": 1,
     "context_length": 4096,
@@ -3888,5 +4310,447 @@
         "<start_of_turn>"
       ]
     }
+  },
+  {
+    "version": 1,
+    "context_length": 4096,
+    "model_name": "platypus2-70b-instruct",
+    "model_lang": [
+      "en"
+    ],
+    "model_ability": [
+      "generate"
+    ],
+    "model_description": "Platypus-70B-instruct is a merge of garage-bAInd/Platypus2-70B and upstage/Llama-2-70b-instruct-v2.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 70,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "garage-bAInd/Platypus2-70B-instruct",
+        "model_revision": "31389b50953688e4e542be53e6d2ab04d5c34e87"
+      }
+    ]
+  },
+  {
+    "version": 1,
+    "context_length": 2048,
+    "model_name": "aquila2",
+    "model_lang": [
+      "zh"
+    ],
+    "model_ability": [
+      "generate"
+    ],
+    "model_description": "Aquila2 series models are the base language models",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "BAAI/Aquila2-7B",
+        "model_revision": "9c76e143c6e9621689ca76e078c465b0dee75eb8"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 34,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "BAAI/Aquila2-34B",
+        "model_revision": "356733caf6221e9dd898cde8ff189a98175526ec"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 70,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "BAAI/Aquila2-70B-Expr",
+        "model_revision": "32a2897235541b9f5238bbe88f8d76a19993c0ba"
+      }
+    ]
+  },
+  {
+    "version": 1,
+    "context_length": 2048,
+    "model_name": "aquila2-chat",
+    "model_lang": [
+      "zh"
+    ],
+    "model_ability": [
+      "chat"
+    ],
+    "model_description": "Aquila2-chat series models are the chat models",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "BAAI/AquilaChat2-7B",
+        "model_revision": "0d060c4edeb4e0febd81130c17f6868653184fb3"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 34,
+        "quantizations": [
+          "Q2_K",
+          "Q3_K_L",
+          "Q3_K_M",
+          "Q3_K_S",
+          "Q4_0",
+          "Q4_K_M",
+          "Q4_K_S",
+          "Q5_0",
+          "Q5_K_M",
+          "Q5_K_S",
+          "Q6_K",
+          "Q8_0"
+        ],
+        "model_id": "TheBloke/AquilaChat2-34B-GGUF",
+        "model_file_name_template": "aquilachat2-34b.{quantization}.gguf"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 34,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "TheBloke/AquilaChat2-34B-GPTQ",
+        "model_revision": "9a9d21424f7db608be51df769885514ab6e052db"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": "34",
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "TheBloke/AquilaChat2-34B-AWQ",
+        "model_revision": "ad1dec1c8adb7fa6cb07b7e261aaa04fccf1c4c0"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 34,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "BAAI/AquilaChat2-34B",
+        "model_revision": "b9cd9c7436435ab9cfa5e4f009be2b0354979ca8"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 70,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "BAAI/AquilaChat2-70B-Expr",
+        "model_revision": "0df19b6e10f1a19ca663f7cc1141aae10f1825f4"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "ADD_COLON_SINGLE",
+      "intra_message_sep": "\n",
+      "system_prompt": "",
+      "roles": [
+        "USER",
+        "ASSISTANT"
+      ],
+      "stop_token_ids": [
+        100006,
+        100007
+      ],
+      "stop": [
+        "[CLS]",
+        "</s>"
+      ]
+    }
+  },
+  {
+    "version": 1,
+    "context_length": 16384,
+    "model_name": "aquila2-chat-16k",
+    "model_lang": [
+      "zh"
+    ],
+    "model_ability": [
+      "chat"
+    ],
+    "model_description": "AquilaChat2-16k series models are the long-text chat models",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "BAAI/AquilaChat2-7B-16K",
+        "model_revision": "fb46d48479d05086ccf6952f19018322fcbb54cd"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 34,
+        "quantizations": [
+          "Q2_K",
+          "Q3_K_L",
+          "Q3_K_M",
+          "Q3_K_S",
+          "Q4_0",
+          "Q4_K_M",
+          "Q4_K_S",
+          "Q5_0",
+          "Q5_K_M",
+          "Q5_K_S",
+          "Q6_K",
+          "Q8_0"
+        ],
+        "model_id": "TheBloke/AquilaChat2-34B-16K-GGUF",
+        "model_file_name_template": "aquilachat2-34b-16k.{quantization}.gguf"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 34,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "TheBloke/AquilaChat2-34B-16K-GPTQ",
+        "model_revision": "0afa1c2a55a4ee1a6f0dba81d9ec296dc7936b91"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": 34,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "TheBloke/AquilaChat2-34B-16K-AWQ",
+        "model_revision": "db7403ca492416903c84a7a38b11cb5506de48b1"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 34,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "BAAI/AquilaChat2-34B-16K",
+        "model_revision": "a06fd164c7170714924d2881c61c8348425ebc94"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "ADD_COLON_SINGLE",
+      "intra_message_sep": "\n",
+      "system_prompt": "",
+      "roles": [
+        "USER",
+        "ASSISTANT"
+      ],
+      "stop_token_ids": [
+        100006,
+        100007
+      ],
+      "stop": [
+        "[CLS]",
+        "</s>"
+      ]
+    }
+  },
+  {
+    "version": 1,
+    "context_length": 4096,
+    "model_name": "minicpm-2b-sft-bf16",
+    "model_lang": [
+      "zh"
+    ],
+    "model_ability": [
+      "chat"
+    ],
+    "model_description": "MiniCPM is an End-Size LLM developed by ModelBest Inc. and TsinghuaNLP, with only 2.4B parameters excluding embeddings.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 2,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "openbmb/MiniCPM-2B-sft-bf16",
+        "model_revision": "fe1d74027ebdd81cef5f815fa3a2d432a6b5de2a"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "MINICPM-2B",
+      "system_prompt": "",
+      "roles": [
+        "user",
+        "assistant"
+      ],
+      "stop_token_ids": [
+        1,
+        2
+      ],
+      "stop": [
+        "<s>",
+        "</s>"
+      ]
+    }
+  },
+  {
+    "version": 1,
+    "context_length": 4096,
+    "model_name": "minicpm-2b-sft-fp32",
+    "model_lang": [
+      "zh"
+    ],
+    "model_ability": [
+      "chat"
+    ],
+    "model_description": "MiniCPM is an End-Size LLM developed by ModelBest Inc. and TsinghuaNLP, with only 2.4B parameters excluding embeddings.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 2,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "openbmb/MiniCPM-2B-sft-fp32",
+        "model_revision": "35b90dd57d977b6e5bc4907986fa5b77aa15a82e"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "MINICPM-2B",
+      "system_prompt": "",
+      "roles": [
+        "user",
+        "assistant"
+      ],
+      "stop_token_ids": [
+        1,
+        2
+      ],
+      "stop": [
+        "<s>",
+        "</s>"
+      ]
+    }
+  },
+  {
+    "version": 1,
+    "context_length": 4096,
+    "model_name": "minicpm-2b-dpo-bf16",
+    "model_lang": [
+      "zh"
+    ],
+    "model_ability": [
+      "chat"
+    ],
+    "model_description": "MiniCPM is an End-Size LLM developed by ModelBest Inc. and TsinghuaNLP, with only 2.4B parameters excluding embeddings.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 2,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "openbmb/MiniCPM-2B-dpo-bf16",
+        "model_revision": "f4a3ba49f3f18695945c2a7c12400d4da99da498"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "MINICPM-2B",
+      "system_prompt": "",
+      "roles": [
+        "user",
+        "assistant"
+      ],
+      "stop_token_ids": [
+        1,
+        2
+      ],
+      "stop": [
+        "<s>",
+        "</s>"
+      ]
+    }
+  },
+  {
+    "version": 1,
+    "context_length": 4096,
+    "model_name": "minicpm-2b-dpo-fp16",
+    "model_lang": [
+      "zh"
+    ],
+    "model_ability": [
+      "chat"
+    ],
+    "model_description": "MiniCPM is an End-Size LLM developed by ModelBest Inc. and TsinghuaNLP, with only 2.4B parameters excluding embeddings.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 2,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "openbmb/MiniCPM-2B-dpo-fp16",
+        "model_revision": "e7a50289e4f839674cf8d4a5a2ce032ccacf64ac"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "MINICPM-2B",
+      "system_prompt": "",
+      "roles": [
+        "user",
+        "assistant"
+      ],
+      "stop_token_ids": [
+        1,
+        2
+      ],
+      "stop": [
+        "<s>",
+        "</s>"
+      ]
+    }
+  },
+  {
+    "version": 1,
+    "context_length": 4096,
+    "model_name": "minicpm-2b-dpo-fp32",
+    "model_lang": [
+      "zh"
+    ],
+    "model_ability": [
+      "chat"
+    ],
+    "model_description": "MiniCPM is an End-Size LLM developed by ModelBest Inc. and TsinghuaNLP, with only 2.4B parameters excluding embeddings.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 2,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "openbmb/MiniCPM-2B-dpo-fp32",
+        "model_revision": "b560a1593779b735a84a6daf72fba96ae38da288"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "MINICPM-2B",
+      "system_prompt": "",
+      "roles": [
+        "user",
+        "assistant"
+      ],
+      "stop_token_ids": [
+        1,
+        2
+      ],
+      "stop": [
+        "<s>",
+        "</s>"
+      ]
+    }
   }
 ]