PyPI - xinference - Versions diffs - 0.11.0__py3-none-any.whl → 0.11.2__py3-none-any.whl - Mend - Supply Chain Defender

xinference 0.11.0py3-none-any.whl → 0.11.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (56) hide show

xinference/model/llm/llm_family.json CHANGED Viewed

@@ -2198,6 +2198,31 @@
       ]
     }
   },
+  {
+    "version": 1,
+    "context_length": 65536,
+    "model_name": "codeqwen1.5",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "generate"
+    ],
+    "model_description": "CodeQwen1.5 is the Code-Specific version of Qwen1.5. It is a transformer-based decoder-only language model pretrained on a large amount of data of codes.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "Qwen/CodeQwen1.5-7B"
+      }
+    ]
+  },
   {
     "version": 1,
     "context_length": 65536,
@@ -3651,7 +3676,7 @@
   },
   {
     "version": 1,
-    "context_length": 204800,
+    "context_length": 262144,
     "model_name": "Yi-200k",
     "model_lang": [
       "en",
@@ -3688,7 +3713,7 @@
   },
   {
     "version": 1,
-    "context_length": 204800,
+    "context_length": 4096,
     "model_name": "Yi-chat",
     "model_lang": [
       "en",
@@ -3707,6 +3732,17 @@
         ],
         "model_id": "01-ai/Yi-34B-Chat-{quantization}"
       },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 6,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "01-ai/Yi-6B-Chat",
+        "model_revision": "1c20c960895e4c3877cf478bc2df074221b81d7b"
+      },
       {
         "model_format": "pytorch",
         "model_size_in_billions": 34,
@@ -3762,6 +3798,124 @@
       ]
     }
   },
+  {
+    "version": 1,
+    "context_length": 4096,
+    "model_name": "Yi-1.5",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "generate"
+    ],
+    "model_description": "Yi-1.5 is an upgraded version of Yi. It is continuously pre-trained on Yi with a high-quality corpus of 500B tokens and fine-tuned on 3M diverse fine-tuning samples.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 6,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "01-ai/Yi-1.5-6B",
+        "model_revision": "741a657c42d2081f777ce4c6c5572090f8b8c886"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 9,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "01-ai/Yi-1.5-9B",
+        "model_revision": "9a6839c5b9db3dbb245fb98a072bfabc242621f2"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 34,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "01-ai/Yi-1.5-34B",
+        "model_revision": "4f83007957ec3eec76d87df19ad061eb0f57b5c5"
+      }
+    ]
+  },
+  {
+    "version": 1,
+    "context_length": 4096,
+    "model_name": "Yi-1.5-chat",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "chat"
+    ],
+    "model_description": "Yi-1.5 is an upgraded version of Yi. It is continuously pre-trained on Yi with a high-quality corpus of 500B tokens and fine-tuned on 3M diverse fine-tuning samples.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 6,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "01-ai/Yi-1.5-6B-Chat",
+        "model_revision": "d68dab90947a3c869e28c9cb2806996af99a6080"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 9,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "01-ai/Yi-1.5-9B-Chat",
+        "model_revision": "1dc6e2b8dcfc12b95bede8dec67e6b6332ac64c6"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 34,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "01-ai/Yi-1.5-34B-Chat",
+        "model_revision": "fa695ee438bfcd0ec2b378fa1c7e0dea1b40393e"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "CHATML",
+      "system_prompt": "",
+      "roles": [
+        "<|im_start|>user",
+        "<|im_start|>assistant"
+      ],
+      "intra_message_sep": "<|im_end|>",
+      "inter_message_sep": "",
+      "stop_token_ids": [
+        2,
+        6,
+        7,
+        8
+      ],
+      "stop": [
+        "<|endoftext|>",
+        "<|im_start|>",
+        "<|im_end|>",
+        "<|im_sep|>"
+      ]
+    }
+  },
   {
     "version": 1,
     "context_length": 2048,
@@ -4206,6 +4360,83 @@
       ]
     }
   },
+  {
+    "version": 1,
+    "context_length": 4096,
+    "model_name": "deepseek",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "generate"
+    ],
+    "model_description": "DeepSeek LLM, trained from scratch on a vast dataset of 2 trillion tokens in both English and Chinese. ",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "deepseek-ai/deepseek-llm-7b-base",
+        "model_revision": "7683fea62db869066ddaff6a41d032262c490d4f"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 67,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "deepseek-ai/deepseek-llm-67b-base",
+        "model_revision": "c3f813a1121c95488a20132d3a4da89f4a46452f"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "Q2_K",
+          "Q3_K_L",
+          "Q3_K_M",
+          "Q3_K_S",
+          "Q4_0",
+          "Q4_K_M",
+          "Q4_K_S",
+          "Q5_0",
+          "Q5_K_M",
+          "Q5_K_S",
+          "Q6_K",
+          "Q8_0"
+        ],
+        "model_id": "TheBloke/deepseek-llm-7B-chat-GGUF",
+        "model_file_name_template": "deepseek-llm-7b-chat.{quantization}.gguf"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 67,
+        "quantizations": [
+          "Q2_K",
+          "Q3_K_L",
+          "Q3_K_M",
+          "Q3_K_S",
+          "Q4_0",
+          "Q4_K_M",
+          "Q4_K_S",
+          "Q5_0",
+          "Q5_K_M",
+          "Q5_K_S",
+          "Q6_K",
+          "Q8_0"
+        ],
+        "model_id": "TheBloke/deepseek-llm-67b-chat-GGUF",
+        "model_file_name_template": "deepseek-llm-67b-chat.{quantization}.gguf"
+      }
+    ]
+  },
   {
     "version": 1,
     "context_length": 4096,
@@ -4298,7 +4529,199 @@
   },
   {
     "version": 1,
-    "context_length": 4096,
+    "context_length": 16384,
+    "model_name": "deepseek-coder",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "generate"
+    ],
+    "model_description": "Deepseek Coder is composed of a series of code language models, each trained from scratch on 2T tokens, with a composition of 87% code and 13% natural language in both English and Chinese. ",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": "1_3",
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "deepseek-ai/deepseek-coder-1.3b-base",
+        "model_revision": "c919139c3a9b4070729c8b2cca4847ab29ca8d94"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": "6_7",
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "deepseek-ai/deepseek-coder-6.7b-base",
+        "model_revision": "ce2207a8bfef3ee92bd7dd4cc31c52cfa0046912"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "deepseek-ai/deepseek-coder-7b-base-v1.5",
+        "model_revision": "98f0904cee2237e235f10408ae12292037b21dac"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 33,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "deepseek-ai/deepseek-coder-33b-base",
+        "model_revision": "45c85cadf3720ef3e85a492e24fd4b8c5d21d8ac"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": "1_3",
+        "quantizations": [
+          "Q2_K",
+          "Q3_K_L",
+          "Q3_K_M",
+          "Q3_K_S",
+          "Q4_0",
+          "Q4_K_M",
+          "Q4_K_S",
+          "Q5_0",
+          "Q5_K_M",
+          "Q5_K_S",
+          "Q6_K",
+          "Q8_0"
+        ],
+        "model_id": "TheBloke/deepseek-coder-1.3b-base-GGUF",
+        "model_file_name_template": "deepseek-coder-1.3b-base.{quantization}.gguf"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": "6_7",
+        "quantizations": [
+          "Q2_K",
+          "Q3_K_L",
+          "Q3_K_M",
+          "Q3_K_S",
+          "Q4_0",
+          "Q4_K_M",
+          "Q4_K_S",
+          "Q5_0",
+          "Q5_K_M",
+          "Q5_K_S",
+          "Q6_K",
+          "Q8_0"
+        ],
+        "model_id": "TheBloke/deepseek-coder-6.7B-base-GGUF",
+        "model_file_name_template": "deepseek-coder-6.7b-base.{quantization}.gguf"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "Q2_K",
+          "Q3_K_L",
+          "Q3_K_M",
+          "Q3_K_S",
+          "Q4_K_M",
+          "Q4_K_S",
+          "Q5_0",
+          "Q5_K_M",
+          "Q5_K_S",
+          "Q6_K",
+          "Q8_0"
+        ],
+        "model_id": "dagbs/deepseek-coder-7b-base-v1.5-GGUF",
+        "model_file_name_template": "deepseek-coder-7b-base-v1.5.{quantization}.gguf"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 33,
+        "quantizations": [
+          "Q2_K",
+          "Q3_K_L",
+          "Q3_K_M",
+          "Q3_K_S",
+          "Q4_0",
+          "Q4_K_M",
+          "Q4_K_S",
+          "Q5_0",
+          "Q5_K_M",
+          "Q5_K_S",
+          "Q6_K",
+          "Q8_0"
+        ],
+        "model_id": "TheBloke/deepseek-coder-33B-base-GGUF",
+        "model_file_name_template": "deepseek-coder-33b-base.{quantization}.gguf"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": "1_3",
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "TheBloke/deepseek-coder-1.3b-base-GPTQ",
+        "model_revision": "a5bf3b76d70cda53327311a631b1003024d5de29"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": "6_7",
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "TheBloke/deepseek-coder-6.7B-base-GPTQ",
+        "model_revision": "6476ea3d6e623a1313d363dbc6e172773e031bb1"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 33,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "TheBloke/deepseek-coder-33B-base-GPTQ",
+        "model_revision": "f527d7325e463a5cb091d044e4f2b15902674a70"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": "1_3",
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "TheBloke/deepseek-coder-1.3b-base-AWQ",
+        "model_revision": "ffb66f1a2a194401b4f29025edcd261d7f0a08a7"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": "6_7",
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "TheBloke/deepseek-coder-6.7B-base-AWQ",
+        "model_revision": "e3d4bdf39712665f5e9d5c05c9df6f20fe1e2d5a"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": 33,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "TheBloke/deepseek-coder-33B-base-AWQ",
+        "model_revision": "c7edb2d5868d61a5dcf2591933a8992c8cbe3ef4"
+      }
+    ]
+  },
+  {
+    "version": 1,
+    "context_length": 16384,
     "model_name": "deepseek-coder-instruct",
     "model_lang": [
       "en",
@@ -4331,6 +4754,17 @@
         "model_id": "deepseek-ai/deepseek-coder-6.7b-instruct",
         "model_revision": "cbb77d7448ea3168d884758817e7f895e3828d1c"
       },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "deepseek-ai/deepseek-coder-7b-instruct-v1.5",
+        "model_revision": "2a050a4c59d687a85324d32e147517992117ed30"
+      },
       {
         "model_format": "pytorch",
         "model_size_in_billions": 33,
@@ -4382,6 +4816,25 @@
         "model_id": "TheBloke/deepseek-coder-6.7B-instruct-GGUF",
         "model_file_name_template": "deepseek-coder-6.7b-instruct.{quantization}.gguf"
       },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "Q3_K_L",
+          "Q3_K_M",
+          "Q3_K_S",
+          "Q4_0",
+          "Q4_K_M",
+          "Q4_K_S",
+          "Q5_0",
+          "Q5_K_M",
+          "Q5_K_S",
+          "Q6_K",
+          "Q8_0"
+        ],
+        "model_id": "LoneStriker/deepseek-coder-7b-instruct-v1.5-GGUF",
+        "model_file_name_template": "deepseek-coder-7b-instruct-v1.5-{quantization}.gguf"
+      },
       {
         "model_format": "ggufv2",
         "model_size_in_billions": 33,
@@ -4401,6 +4854,60 @@
         ],
         "model_id": "TheBloke/deepseek-coder-33B-instruct-GGUF",
         "model_file_name_template": "deepseek-coder-33b-instruct.{quantization}.gguf"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": "1_3",
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "TheBloke/deepseek-coder-1.3b-instruct-GPTQ",
+        "model_revision": "9c002e9af6cbdf3bd9244e2d7264b6a35d1dcacf"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": "6_7",
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "TheBloke/deepseek-coder-6.7B-instruct-GPTQ",
+        "model_revision": "13ccea6e3a43dcfdcb655d92097610018b431a17"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 33,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "TheBloke/deepseek-coder-33B-instruct-GPTQ",
+        "model_revision": "08372729d98dfc248f9531a412fe69e14e607027"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": "1_3",
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "TheBloke/deepseek-coder-1.3b-instruct-AWQ",
+        "model_revision": "a2a484da6e4146d055316a9a63cf5b13955715a4"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": "6_7",
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "TheBloke/deepseek-coder-6.7B-instruct-AWQ",
+        "model_revision": "502ae3e19e57ae78dc30a791ba33c565da72dc62"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": 33,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "TheBloke/deepseek-coder-33B-instruct-AWQ",
+        "model_revision": "c40b499bac2712cd3c445cf1b05d2c6558ab0d29"
       }
     ],
     "prompt_style": {
@@ -4684,7 +5191,7 @@
   },
   {
     "version": 1,
-    "context_length": 204800,
+    "context_length": 4096,
     "model_name": "yi-vl-chat",
     "model_lang": [
       "en",
@@ -5326,9 +5833,9 @@
       "ar"
     ],
     "model_ability": [
-      "generate"
+      "chat"
     ],
-    "model_description": "C4AI Command-R is a research release of a 35 billion parameter highly performant generative model.",
+    "model_description": "C4AI Command-R(+) is a research release of a 35 and 104 billion parameter highly performant generative model.",
     "model_specs": [
       {
         "model_format": "pytorch",
@@ -5377,7 +5884,21 @@
         "model_id": "alpindale/c4ai-command-r-plus-GPTQ",
         "model_revision": "35febfc08f723ac0df32480eb4af349a7d08656e"
       }
-    ]
+    ],
+    "prompt_style": {
+      "style_name": "c4ai-command-r",
+      "system_prompt": "You are Command-R, a brilliant, sophisticated, AI-assistant trained to assist human users by providing thorough responses. You are trained by Cohere.",
+      "roles": [
+        "<|USER_TOKEN|>",
+        "<|CHATBOT_TOKEN|>"
+      ],
+      "intra_message_sep": "",
+      "inter_message_sep": "<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|>",
+      "stop_token_ids": [
+        6,
+        255001
+      ]
+    }
   },
   {
     "version": 1,
@@ -5418,7 +5939,21 @@
         "model_id": "CohereForAI/c4ai-command-r-plus-4bit",
         "model_revision": "bb63b5b7005ecedb30b0cfd0d5953b02a5817f7b"
       }
-    ]
+    ],
+    "prompt_style": {
+      "style_name": "c4ai-command-r",
+      "system_prompt": "You are Command-R, a brilliant, sophisticated, AI-assistant trained to assist human users by providing thorough responses. You are trained by Cohere.",
+      "roles": [
+        "<|USER_TOKEN|>",
+        "<|CHATBOT_TOKEN|>"
+      ],
+      "intra_message_sep": "",
+      "inter_message_sep": "<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|>",
+      "stop_token_ids": [
+        6,
+        255001
+      ]
+    }
   },
   {
     "version": 1,
@@ -5459,5 +5994,94 @@
         32000
       ]
     }
-  }
+  },
+  {
+    "version": 1,
+    "context_length": 32768,
+    "model_name": "internvl-chat",
+    "model_lang": [
+        "en",
+        "zh"
+    ],
+    "model_ability": [
+        "chat",
+        "vision"
+    ],
+    "model_description": "InternVL 1.5 is an open-source multimodal large language model (MLLM) to bridge the capability gap between open-source and proprietary commercial models in multimodal understanding. ",
+    "model_specs": [
+        {
+            "model_format": "pytorch",
+            "model_size_in_billions": 26,
+            "quantizations": [
+                "none"
+            ],
+            "model_id": "OpenGVLab/InternVL-Chat-V1-5",
+            "model_revision": "e822119e5806946ce128043023a73d715ecabf8d"
+        },
+        {
+            "model_format": "pytorch",
+            "model_size_in_billions": 26,
+            "quantizations": [
+                "Int8"
+            ],
+            "model_id": "OpenGVLab/InternVL-Chat-V1-5-{quantization}",
+            "model_revision": "acaaed06937c603ab04f084216ecb0268160f538"
+        }
+    ],
+    "prompt_style": {
+        "style_name": "INTERNLM2",
+        "system_prompt": "You are InternLM (书生·浦语), a helpful, honest, and harmless AI assistant developed by Shanghai AI Laboratory (上海人工智能实验室).",
+        "roles": [
+            "<|im_start|>user",
+            "<|im_start|>assistant"
+        ],
+        "intra_message_sep": "<|im_end|>",
+        "stop_token_ids": [
+            92542
+        ],
+        "stop": [
+            "<|im_end|>"
+        ]
+    }
+},
+  {
+    "version": 1,
+    "context_length": 32768,
+    "model_name": "mini-internvl-chat",
+    "model_lang": [
+        "en",
+        "zh"
+    ],
+    "model_ability": [
+        "chat",
+        "vision"
+    ],
+    "model_description": "InternVL 1.5 is an open-source multimodal large language model (MLLM) to bridge the capability gap between open-source and proprietary commercial models in multimodal understanding. ",
+    "model_specs": [
+        {
+            "model_format": "pytorch",
+            "model_size_in_billions": 2,
+            "quantizations": [
+                "none"
+            ],
+            "model_id": "OpenGVLab/Mini-InternVL-Chat-2B-V1-5",
+            "model_revision": "ce3f67acff17281bacbf4b156f402a0580fb9605"
+        }
+    ],
+    "prompt_style": {
+        "style_name": "INTERNLM2",
+        "system_prompt": "You are InternLM (书生·浦语), a helpful, honest, and harmless AI assistant developed by Shanghai AI Laboratory (上海人工智能实验室).",
+        "roles": [
+            "<|im_start|>user",
+            "<|im_start|>assistant"
+        ],
+        "intra_message_sep": "<|im_end|>",
+        "stop_token_ids": [
+            92542
+        ],
+        "stop": [
+            "<|im_end|>"
+        ]
+    }
+}
 ]