PyPI - xinference - Versions diffs - 0.9.3__py3-none-any.whl → 0.10.0__py3-none-any.whl - Mend

xinference 0.9.3py3-none-any.whl → 0.10.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (64) hide show

xinference/model/llm/llm_family_modelscope.json CHANGED Viewed

@@ -338,7 +338,7 @@
         64797,
         2
       ],
-      "stop":[
+      "stop": [
         "<|user|>",
         "<|observation|>"
       ]
@@ -382,13 +382,56 @@
         64797,
         2
       ],
-      "stop":[
+      "stop": [
+        "<|user|>",
+        "<|observation|>"
+      ]
+    }
+  },
+  {
+    "version": 1,
+    "context_length": 131072,
+    "model_name": "chatglm3-128k",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "chat"
+    ],
+    "model_description": "ChatGLM3 is the third generation of ChatGLM, still open-source and trained on Chinese and English data.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 6,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_hub": "modelscope",
+        "model_id": "ZhipuAI/chatglm3-6b-128k",
+        "model_revision": "master"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "CHATGLM3",
+      "system_prompt": "",
+      "roles": [
+        "user",
+        "assistant"
+      ],
+      "stop_token_ids": [
+        64795,
+        64797,
+        2
+      ],
+      "stop": [
         "<|user|>",
         "<|observation|>"
       ]
     }
   },
   {
     "version": 1,
     "context_length": 2048,
@@ -728,6 +771,74 @@
       }
     ]
   },
+  {
+    "version": 1,
+    "context_length": 8194,
+    "model_name": "codeshell",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "generate"
+    ],
+    "model_description": "CodeShell is a multi-language code LLM developed by the Knowledge Computing Lab of Peking University. ",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "WisdomShell/CodeShell-7B",
+        "model_revision": "master",
+        "model_hub": "modelscope"
+      }
+    ]
+  },
+  {
+    "version": 1,
+    "context_length": 8194,
+    "model_name": "codeshell-chat",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "chat"
+    ],
+    "model_description": "CodeShell is a multi-language code LLM developed by the Knowledge Computing Lab of Peking University.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "WisdomShell/CodeShell-7B-Chat",
+        "model_revision": "master",
+        "model_hub": "modelscope"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "CodeShell",
+      "system_prompt": "",
+      "roles": [
+        "## human:",
+        "## assistant: "
+      ],
+      "intra_message_sep": "",
+      "inter_message_sep": "",
+      "stop_token_ids": [
+        70000
+      ],
+      "stop": [
+        "<|endoftext|>",
+        "|||",
+        "|<end>|"
+      ]
+    }
+  },
   {
     "version": 1,
     "context_length": 100000,
@@ -970,7 +1081,11 @@
     "context_length": 32768,
     "model_name": "mixtral-v0.1",
     "model_lang": [
-      "en", "fr", "it", "de", "es"
+      "en",
+      "fr",
+      "it",
+      "de",
+      "es"
     ],
     "model_ability": [
       "generate"
@@ -996,7 +1111,11 @@
     "context_length": 32768,
     "model_name": "mixtral-instruct-v0.1",
     "model_lang": [
-      "en", "fr", "it", "de", "es"
+      "en",
+      "fr",
+      "it",
+      "de",
+      "es"
     ],
     "model_ability": [
       "chat"
@@ -1929,7 +2048,10 @@
         "model_file_name_template": "qwen1_5-72b-chat-{quantization}.gguf",
         "model_file_name_split_template": "qwen1_5-72b-chat-{quantization}.gguf.{part}",
         "quantization_parts": {
-          "q4_k_m": ["a", "b"]
+          "q4_k_m": [
+            "a",
+            "b"
+          ]
         }
       }
     ],
@@ -1953,6 +2075,53 @@
       ]
     }
   },
+  {
+    "version": 1,
+    "context_length": 4096,
+    "model_name": "deepseek-vl-chat",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "chat",
+      "vision"
+    ],
+    "model_description": "DeepSeek-VL possesses general multimodal understanding capabilities, capable of processing logical diagrams, web pages, formula recognition, scientific literature, natural images, and embodied intelligence in complex scenarios.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": "1_3",
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "deepseek-ai/deepseek-vl-1.3b-chat",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "deepseek-ai/deepseek-vl-7b-chat",
+        "model_hub": "modelscope"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "DEEPSEEK_CHAT",
+      "system_prompt": "<｜begin▁of▁sentence｜>",
+      "roles": [
+        "User",
+        "Assistant"
+      ],
+      "intra_message_sep": "\n\n",
+      "inter_message_sep": "<｜end▁of▁sentence｜>",
+      "stop": [
+        "<｜end▁of▁sentence｜>"
+      ]
+    }
+  },
   {
     "version": 1,
     "context_length": 4096,
@@ -2008,7 +2177,8 @@
     "context_length": 4096,
     "model_name": "deepseek-coder-instruct",
     "model_lang": [
-      "en", "zh"
+      "en",
+      "zh"
     ],
     "model_ability": [
       "chat"
@@ -2395,5 +2565,385 @@
         "<start_of_turn>"
       ]
     }
+  },
+  {
+    "version":1,
+    "context_length":2048,
+    "model_name":"OmniLMM",
+    "model_lang":[
+      "en",
+      "zh"
+    ],
+    "model_ability":[
+      "chat",
+      "vision"
+    ],
+    "model_description":"mniLMM is a family of open-source large multimodal models (LMMs) adept at vision & language modeling.",
+    "model_specs":[
+      {
+        "model_format":"pytorch",
+        "model_size_in_billions":3,
+        "quantizations":[
+          "none"
+        ],
+        "model_id":"OpenBMB/MiniCPM-V",
+        "model_hub":"modelscope",
+        "model_revision":"master"
+      },
+      {
+        "model_format":"pytorch",
+        "model_size_in_billions":12,
+        "quantizations":[
+          "none"
+        ],
+        "model_id":"OpenBMB/OmniLMM-12B",
+        "model_hub":"modelscope",
+        "model_revision":"master"
+      }
+    ],
+    "prompt_style":{
+      "style_name":"OmniLMM",
+      "system_prompt":"The role of first msg should be user",
+      "roles":[
+        "user",
+        "assistant"
+      ]
+    }
+  },
+  {
+    "version": 1,
+    "context_length": 4096,
+    "model_name": "minicpm-2b-sft-bf16",
+    "model_lang": [
+      "zh"
+    ],
+    "model_ability": [
+      "chat"
+    ],
+    "model_description": "MiniCPM is an End-Size LLM developed by ModelBest Inc. and TsinghuaNLP, with only 2.4B parameters excluding embeddings.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 2,
+        "quantizations": [
+          "none"
+        ],
+        "model_hub": "modelscope",
+        "model_id": "OpenBMB/miniCPM-bf16",
+        "model_revision": "master"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "MINICPM-2B",
+      "system_prompt": "",
+      "roles": [
+        "user",
+        "assistant"
+      ],
+      "stop_token_ids": [
+        1,
+        2
+      ],
+      "stop": [
+        "<s>",
+        "</s>"
+      ]
+    }
+  },
+  {
+    "version": 1,
+    "context_length": 4096,
+    "model_name": "minicpm-2b-sft-fp32",
+    "model_lang": [
+      "zh"
+    ],
+    "model_ability": [
+      "chat"
+    ],
+    "model_description": "MiniCPM is an End-Size LLM developed by ModelBest Inc. and TsinghuaNLP, with only 2.4B parameters excluding embeddings.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 2,
+        "quantizations": [
+          "none"
+        ],
+        "model_hub": "modelscope",
+        "model_id": "OpenBMB/MiniCPM-2B-sft-fp32",
+        "model_revision": "master"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "MINICPM-2B",
+      "system_prompt": "",
+      "roles": [
+        "user",
+        "assistant"
+      ],
+      "stop_token_ids": [
+        1,
+        2
+      ],
+      "stop": [
+        "<s>",
+        "</s>"
+      ]
+    }
+  },
+  {
+    "version": 1,
+    "context_length": 4096,
+    "model_name": "minicpm-2b-dpo-bf16",
+    "model_lang": [
+      "zh"
+    ],
+    "model_ability": [
+      "chat"
+    ],
+    "model_description": "MiniCPM is an End-Size LLM developed by ModelBest Inc. and TsinghuaNLP, with only 2.4B parameters excluding embeddings.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 2,
+        "quantizations": [
+          "none"
+        ],
+        "model_hub": "modelscope",
+        "model_id": "OpenBMB/MiniCPM-2B-dpo-bf16",
+        "model_revision": "master"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "MINICPM-2B",
+      "system_prompt": "",
+      "roles": [
+        "user",
+        "assistant"
+      ],
+      "stop_token_ids": [
+        1,
+        2
+      ],
+      "stop": [
+        "<s>",
+        "</s>"
+      ]
+    }
+  },
+  {
+    "version": 1,
+    "context_length": 4096,
+    "model_name": "minicpm-2b-dpo-fp16",
+    "model_lang": [
+      "zh"
+    ],
+    "model_ability": [
+      "chat"
+    ],
+    "model_description": "MiniCPM is an End-Size LLM developed by ModelBest Inc. and TsinghuaNLP, with only 2.4B parameters excluding embeddings.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 2,
+        "quantizations": [
+          "none"
+        ],
+        "model_hub": "modelscope",
+        "model_id": "OpenBMB/MiniCPM-2B-dpo-fp16",
+        "model_revision": "master"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "MINICPM-2B",
+      "system_prompt": "",
+      "roles": [
+        "user",
+        "assistant"
+      ],
+      "stop_token_ids": [
+        1,
+        2
+      ],
+      "stop": [
+        "<s>",
+        "</s>"
+      ]
+    }
+  },
+  {
+    "version": 1,
+    "context_length": 4096,
+    "model_name": "minicpm-2b-dpo-fp32",
+    "model_lang": [
+      "zh"
+    ],
+    "model_ability": [
+      "chat"
+    ],
+    "model_description": "MiniCPM is an End-Size LLM developed by ModelBest Inc. and TsinghuaNLP, with only 2.4B parameters excluding embeddings.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 2,
+        "quantizations": [
+          "none"
+        ],
+        "model_hub": "modelscope",
+        "model_id": "OpenBMB/MiniCPM-2B-dpo-fp32",
+        "model_revision": "master"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "MINICPM-2B",
+      "system_prompt": "",
+      "roles": [
+        "user",
+        "assistant"
+      ],
+      "stop_token_ids": [
+        1,
+        2
+      ],
+      "stop": [
+        "<s>",
+        "</s>"
+      ]
+    }
+  },
+  {
+    "version": 1,
+    "context_length": 2048,
+    "model_name": "aquila2",
+    "model_lang": [
+      "zh"
+    ],
+    "model_ability": [
+      "generate"
+    ],
+    "model_description": "Aquila2 series models are the base language models",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 34,
+        "quantizations": [
+          "none"
+        ],
+        "model_hub": "modelscope",
+        "model_id": "BAAI/Aquila2-34B",
+        "model_revision": "master"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 70,
+        "quantizations": [
+          "none"
+        ],
+        "model_hub": "modelscope",
+        "model_id": "BAAI/Aquila2-70B-Expr",
+        "model_revision": "master"
+      }
+    ]
+  },
+  {
+    "version": 1,
+    "context_length": 2048,
+    "model_name": "aquila2-chat",
+    "model_lang": [
+      "zh"
+    ],
+    "model_ability": [
+      "generate"
+    ],
+    "model_description": "Aquila2-chat series models are the chat models",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 34,
+        "quantizations": [
+          "none"
+        ],
+        "model_hub": "modelscope",
+        "model_id": "BAAI/AquilaChat2-34B",
+        "model_revision": "master"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 34,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_hub": "modelscope",
+        "model_id": "BAAI/AquilaChat2-34B-Int4-GPTQ",
+        "model_revision": "master"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 70,
+        "quantizations": [
+          "none"
+        ],
+        "model_hub": "modelscope",
+        "model_id": "BAAI/AquilaChat2-70B-Expr",
+        "model_revision": "master"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "ADD_COLON_SINGLE",
+      "intra_message_sep": "\n",
+      "system_prompt": "",
+      "roles": [
+        "USER",
+        "ASSISTANT"
+      ],
+      "stop_token_ids": [
+        100006,
+        100007
+      ],
+      "stop": [
+        "[CLS]",
+        "</s>"
+      ]
+    }
+  },
+  {
+    "version": 1,
+    "context_length": 16384,
+    "model_name": "aquila2-chat-16k",
+    "model_lang": [
+      "zh"
+    ],
+    "model_ability": [
+      "generate"
+    ],
+    "model_description": "AquilaChat2-16k series models are the long-text chat models",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 34,
+        "quantizations": [
+          "none"
+        ],
+        "model_hub": "modelscope",
+        "model_id": "BAAI/AquilaChat2-34B-16K",
+        "model_revision": "master"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "ADD_COLON_SINGLE",
+      "intra_message_sep": "\n",
+      "system_prompt": "",
+      "roles": [
+        "USER",
+        "ASSISTANT"
+      ],
+      "stop_token_ids": [
+        100006,
+        100007
+      ],
+      "stop": [
+        "[CLS]",
+        "</s>"
+      ]
+    }
   }
 ]

xinference/model/llm/pytorch/chatglm.py CHANGED Viewed

@@ -148,6 +148,7 @@ class ChatglmPytorchChatModel(PytorchChatModel):
                 def _stream_generator():
                     last_chunk_text_length = 0
+                    chunk_id = "chat-" + str(uuid.uuid1())
                     for chunk_text, _ in self._model.stream_chat(
                         self._tokenizer, prompt, chat_history, **kwargs
                     ):
@@ -157,7 +158,7 @@ class ChatglmPytorchChatModel(PytorchChatModel):
                             text=chunk_text, index=0, logprobs=None, finish_reason=None
                         )
                         yield CompletionChunk(
-                            id=str(uuid.uuid1()),
+                            id=chunk_id,
                             object="text_completion",
                             created=int(time.time()),
                             model=self.model_uid,

xinference/model/llm/pytorch/core.py CHANGED Viewed

@@ -465,7 +465,9 @@ class PytorchChatModel(PytorchModel, ChatModelMixin):
             "llama-2-chat",
             "internlm2-chat",
             "qwen-vl-chat",
+            "OmniLMM",
             "yi-vl-chat",
+            "deepseek-vl-chat",
         ]:
             return False
         if "chat" not in llm_family.model_ability:

xinference 0.9.3__py3-none-any.whl → 0.10.0__py3-none-any.whl

Potentially problematic release.

xinference 0.9.3py3-none-any.whl → 0.10.0py3-none-any.whl