PyPI - xinference - Versions diffs - 0.9.4__py3-none-any.whl → 0.10.1__py3-none-any.whl - Mend

xinference 0.9.4py3-none-any.whl → 0.10.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (103) hide show

xinference/model/llm/llm_family.py CHANGED Viewed

@@ -199,6 +199,21 @@ class CustomLLMFamilyV1(LLMFamilyV1):
                 )
             llm_spec.prompt_style = BUILTIN_LLM_PROMPT_STYLE[prompt_style_name]
+        # check model ability, registering LLM only provides generate and chat
+        # but for vision models, we add back the abilities so that
+        # gradio chat interface can be generated properly
+        if (
+            llm_spec.model_family != "other"
+            and llm_spec.model_family
+            in {
+                family.model_name
+                for family in BUILTIN_LLM_FAMILIES
+                if "vision" in family.model_ability
+            }
+            and "vision" not in llm_spec.model_ability
+        ):
+            llm_spec.model_ability.append("vision")
         return llm_spec

xinference/model/llm/llm_family_modelscope.json CHANGED Viewed

@@ -388,6 +388,50 @@
       ]
     }
   },
+  {
+    "version": 1,
+    "context_length": 131072,
+    "model_name": "chatglm3-128k",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "chat"
+    ],
+    "model_description": "ChatGLM3 is the third generation of ChatGLM, still open-source and trained on Chinese and English data.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 6,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_hub": "modelscope",
+        "model_id": "ZhipuAI/chatglm3-6b-128k",
+        "model_revision": "master"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "CHATGLM3",
+      "system_prompt": "",
+      "roles": [
+        "user",
+        "assistant"
+      ],
+      "stop_token_ids": [
+        64795,
+        64797,
+        2
+      ],
+      "stop": [
+        "<|user|>",
+        "<|observation|>"
+      ]
+    }
+  },
   {
     "version": 1,
     "context_length": 2048,
@@ -1781,6 +1825,17 @@
         "model_id": "qwen/Qwen1.5-14B-Chat",
         "model_hub": "modelscope"
       },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 32,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "qwen/Qwen1.5-32B-Chat",
+        "model_hub": "modelscope"
+      },
       {
         "model_format": "pytorch",
         "model_size_in_billions": 72,
@@ -1842,6 +1897,15 @@
         "model_id": "qwen/Qwen1.5-14B-Chat-GPTQ-{quantization}",
         "model_hub": "modelscope"
       },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 32,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "qwen/Qwen1.5-32B-Chat-GPTQ-{quantization}",
+        "model_hub": "modelscope"
+      },
       {
         "model_format": "gptq",
         "model_size_in_billions": 72,
@@ -1897,6 +1961,15 @@
         "model_id": "qwen/Qwen1.5-14B-Chat-AWQ",
         "model_hub": "modelscope"
       },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": 32,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "qwen/Qwen1.5-32B-Chat-AWQ",
+        "model_hub": "modelscope"
+      },
       {
         "model_format": "awq",
         "model_size_in_billions": 72,
@@ -1991,6 +2064,23 @@
         "model_hub": "modelscope",
         "model_file_name_template": "qwen1_5-14b-chat-{quantization}.gguf"
       },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 32,
+        "quantizations": [
+          "q2_k",
+          "q3_k_m",
+          "q4_0",
+          "q4_k_m",
+          "q5_0",
+          "q5_k_m",
+          "q6_k",
+          "q8_0"
+        ],
+        "model_id": "qwen/Qwen1.5-32B-Chat-GGUF",
+        "model_hub": "modelscope",
+        "model_file_name_template": "qwen1_5-32b-chat-{quantization}.gguf"
+      },
       {
         "model_format": "ggufv2",
         "model_size_in_billions": 72,
@@ -2031,6 +2121,107 @@
       ]
     }
   },
+  {
+    "version": 1,
+    "context_length": 32768,
+    "model_name": "qwen1.5-moe-chat",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "chat"
+    ],
+    "model_description": "Qwen1.5-MoE is a transformer-based MoE decoder-only language model pretrained on a large amount of data.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": "2_7",
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "qwen/Qwen1.5-MoE-A2.7B-Chat",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": "2_7",
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "qwen/Qwen1.5-MoE-A2.7B-Chat-GPTQ-Int4",
+        "model_hub": "modelscope"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "QWEN",
+      "system_prompt": "You are a helpful assistant.",
+      "roles": [
+        "user",
+        "assistant"
+      ],
+      "intra_message_sep": "\n",
+      "stop_token_ids": [
+        151643,
+        151644,
+        151645
+      ],
+      "stop": [
+        "<|endoftext|>",
+        "<|im_start|>",
+        "<|im_end|>"
+      ]
+    }
+  },
+  {
+    "version": 1,
+    "context_length": 4096,
+    "model_name": "deepseek-vl-chat",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "chat",
+      "vision"
+    ],
+    "model_description": "DeepSeek-VL possesses general multimodal understanding capabilities, capable of processing logical diagrams, web pages, formula recognition, scientific literature, natural images, and embodied intelligence in complex scenarios.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": "1_3",
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "deepseek-ai/deepseek-vl-1.3b-chat",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "deepseek-ai/deepseek-vl-7b-chat",
+        "model_hub": "modelscope"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "DEEPSEEK_CHAT",
+      "system_prompt": "<｜begin▁of▁sentence｜>",
+      "roles": [
+        "User",
+        "Assistant"
+      ],
+      "intra_message_sep": "\n\n",
+      "inter_message_sep": "<｜end▁of▁sentence｜>",
+      "stop": [
+        "<｜end▁of▁sentence｜>"
+      ]
+    }
+  },
   {
     "version": 1,
     "context_length": 4096,
@@ -2474,5 +2665,385 @@
         "<start_of_turn>"
       ]
     }
+  },
+  {
+    "version":1,
+    "context_length":2048,
+    "model_name":"OmniLMM",
+    "model_lang":[
+      "en",
+      "zh"
+    ],
+    "model_ability":[
+      "chat",
+      "vision"
+    ],
+    "model_description":"mniLMM is a family of open-source large multimodal models (LMMs) adept at vision & language modeling.",
+    "model_specs":[
+      {
+        "model_format":"pytorch",
+        "model_size_in_billions":3,
+        "quantizations":[
+          "none"
+        ],
+        "model_id":"OpenBMB/MiniCPM-V",
+        "model_hub":"modelscope",
+        "model_revision":"master"
+      },
+      {
+        "model_format":"pytorch",
+        "model_size_in_billions":12,
+        "quantizations":[
+          "none"
+        ],
+        "model_id":"OpenBMB/OmniLMM-12B",
+        "model_hub":"modelscope",
+        "model_revision":"master"
+      }
+    ],
+    "prompt_style":{
+      "style_name":"OmniLMM",
+      "system_prompt":"The role of first msg should be user",
+      "roles":[
+        "user",
+        "assistant"
+      ]
+    }
+  },
+  {
+    "version": 1,
+    "context_length": 4096,
+    "model_name": "minicpm-2b-sft-bf16",
+    "model_lang": [
+      "zh"
+    ],
+    "model_ability": [
+      "chat"
+    ],
+    "model_description": "MiniCPM is an End-Size LLM developed by ModelBest Inc. and TsinghuaNLP, with only 2.4B parameters excluding embeddings.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 2,
+        "quantizations": [
+          "none"
+        ],
+        "model_hub": "modelscope",
+        "model_id": "OpenBMB/miniCPM-bf16",
+        "model_revision": "master"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "MINICPM-2B",
+      "system_prompt": "",
+      "roles": [
+        "user",
+        "assistant"
+      ],
+      "stop_token_ids": [
+        1,
+        2
+      ],
+      "stop": [
+        "<s>",
+        "</s>"
+      ]
+    }
+  },
+  {
+    "version": 1,
+    "context_length": 4096,
+    "model_name": "minicpm-2b-sft-fp32",
+    "model_lang": [
+      "zh"
+    ],
+    "model_ability": [
+      "chat"
+    ],
+    "model_description": "MiniCPM is an End-Size LLM developed by ModelBest Inc. and TsinghuaNLP, with only 2.4B parameters excluding embeddings.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 2,
+        "quantizations": [
+          "none"
+        ],
+        "model_hub": "modelscope",
+        "model_id": "OpenBMB/MiniCPM-2B-sft-fp32",
+        "model_revision": "master"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "MINICPM-2B",
+      "system_prompt": "",
+      "roles": [
+        "user",
+        "assistant"
+      ],
+      "stop_token_ids": [
+        1,
+        2
+      ],
+      "stop": [
+        "<s>",
+        "</s>"
+      ]
+    }
+  },
+  {
+    "version": 1,
+    "context_length": 4096,
+    "model_name": "minicpm-2b-dpo-bf16",
+    "model_lang": [
+      "zh"
+    ],
+    "model_ability": [
+      "chat"
+    ],
+    "model_description": "MiniCPM is an End-Size LLM developed by ModelBest Inc. and TsinghuaNLP, with only 2.4B parameters excluding embeddings.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 2,
+        "quantizations": [
+          "none"
+        ],
+        "model_hub": "modelscope",
+        "model_id": "OpenBMB/MiniCPM-2B-dpo-bf16",
+        "model_revision": "master"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "MINICPM-2B",
+      "system_prompt": "",
+      "roles": [
+        "user",
+        "assistant"
+      ],
+      "stop_token_ids": [
+        1,
+        2
+      ],
+      "stop": [
+        "<s>",
+        "</s>"
+      ]
+    }
+  },
+  {
+    "version": 1,
+    "context_length": 4096,
+    "model_name": "minicpm-2b-dpo-fp16",
+    "model_lang": [
+      "zh"
+    ],
+    "model_ability": [
+      "chat"
+    ],
+    "model_description": "MiniCPM is an End-Size LLM developed by ModelBest Inc. and TsinghuaNLP, with only 2.4B parameters excluding embeddings.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 2,
+        "quantizations": [
+          "none"
+        ],
+        "model_hub": "modelscope",
+        "model_id": "OpenBMB/MiniCPM-2B-dpo-fp16",
+        "model_revision": "master"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "MINICPM-2B",
+      "system_prompt": "",
+      "roles": [
+        "user",
+        "assistant"
+      ],
+      "stop_token_ids": [
+        1,
+        2
+      ],
+      "stop": [
+        "<s>",
+        "</s>"
+      ]
+    }
+  },
+  {
+    "version": 1,
+    "context_length": 4096,
+    "model_name": "minicpm-2b-dpo-fp32",
+    "model_lang": [
+      "zh"
+    ],
+    "model_ability": [
+      "chat"
+    ],
+    "model_description": "MiniCPM is an End-Size LLM developed by ModelBest Inc. and TsinghuaNLP, with only 2.4B parameters excluding embeddings.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 2,
+        "quantizations": [
+          "none"
+        ],
+        "model_hub": "modelscope",
+        "model_id": "OpenBMB/MiniCPM-2B-dpo-fp32",
+        "model_revision": "master"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "MINICPM-2B",
+      "system_prompt": "",
+      "roles": [
+        "user",
+        "assistant"
+      ],
+      "stop_token_ids": [
+        1,
+        2
+      ],
+      "stop": [
+        "<s>",
+        "</s>"
+      ]
+    }
+  },
+  {
+    "version": 1,
+    "context_length": 2048,
+    "model_name": "aquila2",
+    "model_lang": [
+      "zh"
+    ],
+    "model_ability": [
+      "generate"
+    ],
+    "model_description": "Aquila2 series models are the base language models",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 34,
+        "quantizations": [
+          "none"
+        ],
+        "model_hub": "modelscope",
+        "model_id": "BAAI/Aquila2-34B",
+        "model_revision": "master"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 70,
+        "quantizations": [
+          "none"
+        ],
+        "model_hub": "modelscope",
+        "model_id": "BAAI/Aquila2-70B-Expr",
+        "model_revision": "master"
+      }
+    ]
+  },
+  {
+    "version": 1,
+    "context_length": 2048,
+    "model_name": "aquila2-chat",
+    "model_lang": [
+      "zh"
+    ],
+    "model_ability": [
+      "generate"
+    ],
+    "model_description": "Aquila2-chat series models are the chat models",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 34,
+        "quantizations": [
+          "none"
+        ],
+        "model_hub": "modelscope",
+        "model_id": "BAAI/AquilaChat2-34B",
+        "model_revision": "master"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 34,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_hub": "modelscope",
+        "model_id": "BAAI/AquilaChat2-34B-Int4-GPTQ",
+        "model_revision": "master"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 70,
+        "quantizations": [
+          "none"
+        ],
+        "model_hub": "modelscope",
+        "model_id": "BAAI/AquilaChat2-70B-Expr",
+        "model_revision": "master"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "ADD_COLON_SINGLE",
+      "intra_message_sep": "\n",
+      "system_prompt": "",
+      "roles": [
+        "USER",
+        "ASSISTANT"
+      ],
+      "stop_token_ids": [
+        100006,
+        100007
+      ],
+      "stop": [
+        "[CLS]",
+        "</s>"
+      ]
+    }
+  },
+  {
+    "version": 1,
+    "context_length": 16384,
+    "model_name": "aquila2-chat-16k",
+    "model_lang": [
+      "zh"
+    ],
+    "model_ability": [
+      "generate"
+    ],
+    "model_description": "AquilaChat2-16k series models are the long-text chat models",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 34,
+        "quantizations": [
+          "none"
+        ],
+        "model_hub": "modelscope",
+        "model_id": "BAAI/AquilaChat2-34B-16K",
+        "model_revision": "master"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "ADD_COLON_SINGLE",
+      "intra_message_sep": "\n",
+      "system_prompt": "",
+      "roles": [
+        "USER",
+        "ASSISTANT"
+      ],
+      "stop_token_ids": [
+        100006,
+        100007
+      ],
+      "stop": [
+        "[CLS]",
+        "</s>"
+      ]
+    }
   }
 ]

xinference/model/llm/pytorch/chatglm.py CHANGED Viewed

@@ -135,6 +135,8 @@ class ChatglmPytorchChatModel(PytorchChatModel):
             chat_history = [h for h in chat_history if not h.get("tool_calls")]
         if not chat_history:
             chat_history = []
+        if system_prompt:
+            chat_history.append({"role": "system", "content": system_prompt})
         if tools:
             msg = self._model.chat(
                 self._tokenizer, prompt, [tools] + chat_history, **kwargs

xinference/model/llm/pytorch/core.py CHANGED Viewed

@@ -42,6 +42,25 @@ from ..utils import ChatModelMixin
 logger = logging.getLogger(__name__)
+NON_DEFAULT_MODEL_LIST: List[str] = [
+    "baichuan-chat",
+    "baichuan-2-chat",
+    "vicuna-v1.3",
+    "falcon",
+    "falcon-instruct",
+    "chatglm",
+    "chatglm2",
+    "chatglm2-32k",
+    "chatglm2-128k",
+    "llama-2",
+    "llama-2-chat",
+    "internlm2-chat",
+    "qwen-vl-chat",
+    "OmniLMM",
+    "yi-vl-chat",
+    "deepseek-vl-chat",
+]
 class PytorchModel(LLM):
     def __init__(
@@ -233,17 +252,7 @@ class PytorchModel(LLM):
         if llm_spec.model_format not in ["pytorch", "gptq", "awq"]:
             return False
         model_family = llm_family.model_family or llm_family.model_name
-        if model_family in [
-            "baichuan-chat",
-            "vicuna-v1.3",
-            "falcon",
-            "falcon-instruct",
-            "chatglm",
-            "chatglm2",
-            "chatglm2-32k",
-            "llama-2",
-            "llama-2-chat",
-        ]:
+        if model_family in NON_DEFAULT_MODEL_LIST:
             return False
         if "generate" not in llm_family.model_ability:
             return False
@@ -452,21 +461,8 @@ class PytorchChatModel(PytorchModel, ChatModelMixin):
     ) -> bool:
         if llm_spec.model_format not in ["pytorch", "gptq", "awq"]:
             return False
-        if llm_family.model_name in [
-            "baichuan-chat",
-            "baichuan-2-chat",
-            "vicuna-v1.3",
-            "falcon",
-            "falcon-instruct",
-            "chatglm",
-            "chatglm2",
-            "chatglm2-32k",
-            "llama-2",
-            "llama-2-chat",
-            "internlm2-chat",
-            "qwen-vl-chat",
-            "yi-vl-chat",
-        ]:
+        model_family = llm_family.model_family or llm_family.model_name
+        if model_family in NON_DEFAULT_MODEL_LIST:
             return False
         if "chat" not in llm_family.model_ability:
             return False

xinference 0.9.4__py3-none-any.whl → 0.10.1__py3-none-any.whl

Potentially problematic release.

xinference 0.9.4py3-none-any.whl → 0.10.1py3-none-any.whl