PyPI - xinference - Versions diffs - 0.14.1.post1__py3-none-any.whl → 0.14.3__py3-none-any.whl - Mend

xinference 0.14.1.post1py3-none-any.whl → 0.14.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (194) hide show

xinference/model/llm/llm_family_modelscope.json CHANGED Viewed

@@ -503,78 +503,6 @@
       }
     ]
   },
-  {
-    "version": 1,
-    "context_length": 8192,
-    "model_name": "chatglm2",
-    "model_lang": [
-      "en",
-      "zh"
-    ],
-    "model_ability": [
-      "chat"
-    ],
-    "model_description": "ChatGLM2 is the second generation of ChatGLM, still open-source and trained on Chinese and English data.",
-    "model_specs": [
-      {
-        "model_format": "pytorch",
-        "model_size_in_billions": 6,
-        "quantizations": [
-          "4-bit",
-          "8-bit",
-          "none"
-        ],
-        "model_hub": "modelscope",
-        "model_id": "ZhipuAI/chatglm2-6b",
-        "model_revision": "v1.0.12"
-      }
-    ],
-    "prompt_style": {
-      "style_name": "CHATGLM",
-      "system_prompt": "",
-      "roles": [
-        "问",
-        "答"
-      ],
-      "intra_message_sep": "\n\n"
-    }
-  },
-  {
-    "version": 1,
-    "context_length": 32768,
-    "model_name": "chatglm2-32k",
-    "model_lang": [
-      "en",
-      "zh"
-    ],
-    "model_ability": [
-      "chat"
-    ],
-    "model_description": "ChatGLM2-32k is a special version of ChatGLM2, with a context window of 32k tokens instead of 8k.",
-    "model_specs": [
-      {
-        "model_format": "pytorch",
-        "model_size_in_billions": 6,
-        "quantizations": [
-          "4-bit",
-          "8-bit",
-          "none"
-        ],
-        "model_hub": "modelscope",
-        "model_id": "ZhipuAI/chatglm2-6b-32k",
-        "model_revision": "v1.0.2"
-      }
-    ],
-    "prompt_style": {
-      "style_name": "CHATGLM",
-      "system_prompt": "",
-      "roles": [
-        "问",
-        "答"
-      ],
-      "intra_message_sep": "\n\n"
-    }
-  },
   {
     "version": 1,
     "context_length": 8192,
@@ -1060,166 +988,60 @@
   },
   {
     "version": 1,
-    "context_length": 8192,
-    "model_name": "internlm-7b",
+    "context_length": 32768,
+    "model_name": "internlm2.5-chat",
     "model_lang": [
       "en",
       "zh"
     ],
     "model_ability": [
-      "generate"
+      "chat"
     ],
-    "model_description": "InternLM is a Transformer-based LLM that is trained on both Chinese and English data, focusing on practical scenarios.",
+    "model_description": "InternLM2.5 series of the InternLM model.",
     "model_specs": [
       {
         "model_format": "pytorch",
-        "model_size_in_billions": 7,
+        "model_size_in_billions": "1_8",
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
-        "model_id": "Shanghai_AI_Laboratory/internlm-7b",
-        "model_hub": "modelscope",
-        "model_revision": "v1.0.1"
-      }
-    ]
-  },
-  {
-    "version": 1,
-    "context_length": 4096,
-    "model_name": "internlm-chat-7b",
-    "model_lang": [
-      "en",
-      "zh"
-    ],
-    "model_ability": [
-      "chat"
-    ],
-    "model_description": "Internlm-chat is a fine-tuned version of the Internlm LLM, specializing in chatting.",
-    "model_specs": [
+        "model_id": "Shanghai_AI_Laboratory/internlm2_5-1_8b-chat",
+        "model_hub": "modelscope"
+      },
       {
         "model_format": "pytorch",
         "model_size_in_billions": 7,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
-        "model_id": "Shanghai_AI_Laboratory/internlm-chat-7b",
-        "model_hub": "modelscope",
-        "model_revision": "v1.0.1"
-      }
-    ],
-    "prompt_style": {
-      "style_name": "INTERNLM",
-      "system_prompt": "",
-      "roles": [
-        "<|User|>",
-        "<|Bot|>"
-      ],
-      "intra_message_sep": "<eoh>\n",
-      "inter_message_sep": "<eoa>\n",
-      "stop_token_ids": [
-        1,
-        103028
-      ],
-      "stop": [
-        "<eoa>"
-      ]
-    }
-  },
-  {
-    "version": 1,
-    "context_length": 16384,
-    "model_name": "internlm-20b",
-    "model_lang": [
-      "en",
-      "zh"
-    ],
-    "model_ability": [
-      "generate"
-    ],
-    "model_description": "Pre-trained on over 2.3T Tokens containing high-quality English, Chinese, and code data.",
-    "model_specs": [
+        "model_id": "Shanghai_AI_Laboratory/internlm2_5-7b-chat",
+        "model_hub": "modelscope"
+      },
       {
-        "model_format": "pytorch",
-        "model_size_in_billions": 20,
+        "model_format": "ggufv2",
+        "model_size_in_billions": 7,
         "quantizations": [
-          "4-bit",
-          "8-bit",
-          "none"
+          "q2_k",
+          "q3_k_m",
+          "q4_0",
+          "q4_k_m",
+          "q5_0",
+          "q5_k_m",
+          "q6_k",
+          "q8_0",
+          "fp16"
         ],
-        "model_id": "Shanghai_AI_Laboratory/internlm-20b",
-        "model_hub": "modelscope",
-        "model_revision": "v1.0.1"
-      }
-    ]
-  },
-  {
-    "version": 1,
-    "context_length": 16384,
-    "model_name": "internlm-chat-20b",
-    "model_lang": [
-      "en",
-      "zh"
-    ],
-    "model_ability": [
-      "chat"
-    ],
-    "model_description": "Pre-trained on over 2.3T Tokens containing high-quality English, Chinese, and code data. The Chat version has undergone SFT and RLHF training.",
-    "model_specs": [
+        "model_id": "Shanghai_AI_Laboratory/internlm2_5-7b-chat-gguf",
+        "model_file_name_template": "internlm2_5-7b-chat-{quantization}.gguf",
+        "model_hub": "modelscope"
+      },
       {
         "model_format": "pytorch",
         "model_size_in_billions": 20,
-        "quantizations": [
-          "4-bit",
-          "8-bit",
-          "none"
-        ],
-        "model_id": "Shanghai_AI_Laboratory/internlm-chat-20b",
-        "model_hub": "modelscope",
-        "model_revision": "v1.0.1"
-      }
-    ],
-    "prompt_style": {
-      "style_name": "INTERNLM",
-      "system_prompt": "",
-      "roles": [
-        "<|User|>",
-        "<|Bot|>"
-      ],
-      "intra_message_sep": "<eoh>\n",
-      "inter_message_sep": "<eoa>\n",
-      "stop_token_ids": [
-        1,
-        103028
-      ],
-      "stop": [
-        "<eoa>"
-      ]
-    }
-  },
-  {
-    "version": 1,
-    "context_length": 32768,
-    "model_name": "internlm2.5-chat",
-    "model_lang": [
-      "en",
-      "zh"
-    ],
-    "model_ability": [
-      "chat"
-    ],
-    "model_description": "InternLM2.5 series of the InternLM model.",
-    "model_specs": [
-      {
-        "model_format": "pytorch",
-        "model_size_in_billions": 7,
         "quantizations": [
           "none"
         ],
-        "model_id": "Shanghai_AI_Laboratory/internlm2_5-7b-chat",
+        "model_id": "Shanghai_AI_Laboratory/internlm2_5-20b-chat",
         "model_hub": "modelscope"
       }
     ],
@@ -2403,59 +2225,6 @@
       ]
     }
   },
-  {
-    "version": 1,
-    "context_length": 2048,
-    "model_name": "falcon-instruct",
-    "model_lang": [
-      "en"
-    ],
-    "model_ability": [
-      "chat"
-    ],
-    "model_description": "Falcon-instruct is a fine-tuned version of the Falcon LLM, specializing in chatting.",
-    "model_specs": [
-      {
-        "model_format": "pytorch",
-        "model_size_in_billions": 7,
-        "quantizations": [
-          "4-bit",
-          "8-bit",
-          "none"
-        ],
-        "model_hub": "modelscope",
-        "model_id": "Xorbits/falcon-7b-instruct",
-        "model_revision": "v1.0.0"
-      }
-    ],
-    "prompt_style": {
-      "style_name": "FALCON",
-      "system_prompt": "",
-      "roles": [
-        "User",
-        "Assistant"
-      ],
-      "intra_message_sep": "\n",
-      "inter_message_sep": "<|endoftext|>",
-      "stop": [
-        "\nUser"
-      ],
-      "stop_token_ids": [
-        0,
-        1,
-        2,
-        3,
-        4,
-        5,
-        6,
-        7,
-        8,
-        9,
-        10,
-        11
-      ]
-    }
-  },
   {
     "version": 1,
     "context_length": 8192,
@@ -2540,53 +2309,6 @@
       ]
     }
   },
-  {
-    "version": 1,
-    "context_length": 2048,
-    "model_name": "OpenBuddy",
-    "model_lang": [
-      "en"
-    ],
-    "model_ability": [
-      "chat"
-    ],
-    "model_description": "OpenBuddy is a powerful open multilingual chatbot model aimed at global users.",
-    "model_specs": [
-      {
-        "model_format": "ggmlv3",
-        "model_size_in_billions": 13,
-        "quantizations": [
-          "Q2_K",
-          "Q3_K_S",
-          "Q3_K_M",
-          "Q3_K_L",
-          "Q4_0",
-          "Q4_1",
-          "Q4_K_S",
-          "Q4_K_M",
-          "Q5_0",
-          "Q5_1",
-          "Q5_K_S",
-          "Q5_K_M",
-          "Q6_K",
-          "Q8_0"
-        ],
-        "model_hub": "modelscope",
-        "model_id": "Xorbits/OpenBuddy-Llama2-13B-v11.1-GGML",
-        "model_file_name_template": "openbuddy-llama2-13b-v11.1.ggmlv3.{quantization}.bin"
-      }
-    ],
-    "prompt_style": {
-      "style_name": "INSTRUCTION",
-      "system_prompt": "You are a professional translator. Be faithful or accurate in translation. Make the translation readable or intelligible. Be elegant or natural in translation. Do not translate person's name. Do not add any additional text to the translation. Do not give me any comments or suggestions.\nUser:\n\n{0}\nAssistant:",
-      "roles": [
-        "User",
-        "Assistant"
-      ],
-      "intra_message_sep": "",
-      "inter_message_sep": ""
-    }
-  },
   {
     "version": 1,
     "context_length": 32768,
@@ -3416,6 +3138,24 @@
         "model_id": "qwen/Qwen2-72B-Instruct-AWQ",
         "model_hub": "modelscope"
       },
+      {
+        "model_format": "fp8",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "fp8"
+        ],
+        "model_id": "liuzhenghua/Qwen2-7B-FP8-Instruct",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "fp8",
+        "model_size_in_billions": 72,
+        "quantizations": [
+          "fp8"
+        ],
+        "model_id": "liuzhenghua/Qwen2-72B-FP8-Instruct",
+        "model_hub": "modelscope"
+      },
       {
         "model_format": "mlx",
         "model_size_in_billions": "0_5",
@@ -4245,6 +3985,17 @@
     ],
     "model_description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.",
     "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 2,
+        "quantizations": [
+          "none",
+          "4-bit",
+          "8-bit"
+        ],
+        "model_id": "LLM-Research/gemma-2-2b-it",
+        "model_hub": "modelscope"
+      },
       {
         "model_format": "pytorch",
         "model_size_in_billions": 9,
@@ -4958,25 +4709,187 @@
             "model_format": "pytorch",
             "model_size_in_billions": 26,
             "quantizations": [
-                "none"
+              "4-bit",
+              "8-bit",
+              "none"
             ],
-          "model_hub": "modelscope",
-            "model_id": "AI-ModelScope/InternVL-Chat-V1-5",
+            "model_hub": "modelscope",
+            "model_id": "OpenGVLab/InternVL-Chat-V1-5",
+            "model_revision": "master"
+        }
+    ],
+    "prompt_style": {
+        "style_name": "INTERNVL",
+        "system_prompt": "You are InternLM (书生·浦语), a helpful, honest, and harmless AI assistant developed by Shanghai AI Laboratory (上海人工智能实验室).",
+        "roles": [
+            "<|im_start|>user",
+            "<|im_start|>assistant"
+        ],
+        "intra_message_sep": "<|im_end|>",
+        "stop_token_ids": [
+            2,
+            92543,
+            92542
+        ],
+        "stop": [
+            "</s>",
+            "<|im_end|>",
+            "<|im_start|>"
+        ]
+    }
+  },
+  {
+    "version": 1,
+    "context_length": 32768,
+    "model_name": "internvl2",
+    "model_lang": [
+        "en",
+        "zh"
+    ],
+    "model_ability": [
+        "chat",
+        "vision"
+    ],
+    "model_description": "InternVL 2 is an open-source multimodal large language model (MLLM) to bridge the capability gap between open-source and proprietary commercial models in multimodal understanding. ",
+    "model_specs": [
+        {
+            "model_format": "pytorch",
+            "model_size_in_billions": 1,
+            "quantizations": [
+              "4-bit",
+              "8-bit",
+              "none"
+            ],
+            "model_hub": "modelscope",
+            "model_id": "OpenGVLab/InternVL2-1B",
+            "model_revision": "master"
+        },
+      {
+            "model_format": "pytorch",
+            "model_size_in_billions": 2,
+            "quantizations": [
+              "4-bit",
+              "8-bit",
+              "none"
+            ],
+            "model_hub": "modelscope",
+            "model_id": "OpenGVLab/InternVL2-2B",
+            "model_revision": "master"
+        },
+        {
+            "model_format": "awq",
+            "model_size_in_billions": 2,
+            "quantizations": [
+              "Int4"
+            ],
+            "model_hub": "modelscope",
+            "model_id": "OpenGVLab/InternVL2-2B-AWQ",
+            "model_revision": "master"
+        },
+        {
+            "model_format": "pytorch",
+            "model_size_in_billions": 4,
+            "quantizations": [
+              "4-bit",
+              "8-bit",
+              "none"
+            ],
+            "model_hub": "modelscope",
+            "model_id": "OpenGVLab/InternVL2-4B",
+            "model_revision": "master"
+        },
+        {
+            "model_format": "pytorch",
+            "model_size_in_billions": 8,
+            "quantizations": [
+              "4-bit",
+              "8-bit",
+              "none"
+            ],
+            "model_hub": "modelscope",
+            "model_id": "OpenGVLab/InternVL2-8B",
+            "model_revision": "master"
+        },
+        {
+            "model_format": "awq",
+            "model_size_in_billions": 8,
+            "quantizations": [
+              "Int4"
+            ],
+            "model_hub": "modelscope",
+            "model_id": "OpenGVLab/InternVL2-8B-AWQ",
             "model_revision": "master"
         },
         {
             "model_format": "pytorch",
             "model_size_in_billions": 26,
             "quantizations": [
-                "Int8"
+              "4-bit",
+              "8-bit",
+              "none"
+            ],
+            "model_hub": "modelscope",
+            "model_id": "OpenGVLab/InternVL2-26B",
+            "model_revision": "master"
+        },
+        {
+            "model_format": "awq",
+            "model_size_in_billions": 26,
+            "quantizations": [
+              "Int4"
+            ],
+            "model_hub": "modelscope",
+            "model_id": "OpenGVLab/InternVL2-26B-AWQ",
+            "model_revision": "master"
+        },
+        {
+            "model_format": "pytorch",
+            "model_size_in_billions": 40,
+            "quantizations": [
+              "4-bit",
+              "8-bit",
+              "none"
+            ],
+            "model_hub": "modelscope",
+            "model_id": "OpenGVLab/InternVL2-40B",
+            "model_revision": "master"
+        },
+        {
+            "model_format": "awq",
+            "model_size_in_billions": 40,
+            "quantizations": [
+              "Int4"
+            ],
+            "model_hub": "modelscope",
+            "model_id": "OpenGVLab/InternVL2-40B-AWQ",
+            "model_revision": "master"
+        },
+        {
+            "model_format": "pytorch",
+            "model_size_in_billions": 76,
+            "quantizations": [
+              "4-bit",
+              "8-bit",
+              "none"
             ],
-          "model_hub": "modelscope",
-            "model_id": "AI-ModelScope/InternVL-Chat-V1-5-{quantization}",
+            "model_hub": "modelscope",
+            "model_id": "OpenGVLab/InternVL2-Llama3-76B",
+            "model_revision": "master"
+        },
+        {
+            "model_format": "awq",
+            "model_size_in_billions": 76,
+            "quantizations": [
+              "Int4"
+            ],
+            "model_hub": "modelscope",
+            "model_id": "OpenGVLab/InternVL2-Llama3-76B-AWQ",
             "model_revision": "master"
         }
     ],
     "prompt_style": {
-        "style_name": "INTERNLM2",
+        "style_name": "INTERNVL",
         "system_prompt": "You are InternLM (书生·浦语), a helpful, honest, and harmless AI assistant developed by Shanghai AI Laboratory (上海人工智能实验室).",
         "roles": [
             "<|im_start|>user",
@@ -4984,10 +4897,14 @@
         ],
         "intra_message_sep": "<|im_end|>",
         "stop_token_ids": [
+            2,
+            92543,
             92542
         ],
         "stop": [
-            "<|im_end|>"
+            "</s>",
+            "<|im_end|>",
+            "<|im_start|>"
         ]
     }
   },
@@ -5045,6 +4962,52 @@
       ]
     }
   },
+  {
+    "version": 1,
+    "context_length": 8192,
+    "model_name": "cogvlm2-video-llama3-chat",
+    "model_lang": [
+        "en",
+        "zh"
+    ],
+    "model_ability": [
+        "chat",
+        "vision"
+    ],
+    "model_description": "CogVLM2-Video achieves state-of-the-art performance on multiple video question answering tasks.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 12,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_hub": "modelscope",
+        "model_id": "ZhipuAI/cogvlm2-video-llama3-chat",
+        "model_revision": "master"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "LLAMA3",
+      "system_prompt": "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.",
+      "roles": [
+        "user",
+        "assistant"
+      ],
+      "intra_message_sep": "\n\n",
+      "inter_message_sep": "<|eot_id|>",
+      "stop_token_ids": [
+        128001,
+        128009
+      ],
+      "stop": [
+        "<|end_of_text|>",
+        "<|eot_id|>"
+      ]
+    }
+  },
   {
     "version": 1,
     "context_length": 8192,

xinference/model/llm/lmdeploy/__init__.py ADDED Viewed

File without changes

xinference 0.14.1.post1__py3-none-any.whl → 0.14.3__py3-none-any.whl

Potentially problematic release.

xinference 0.14.1.post1py3-none-any.whl → 0.14.3py3-none-any.whl