PyPI - xinference - Versions diffs - 0.8.3__py3-none-any.whl → 0.8.4__py3-none-any.whl - Mend

xinference 0.8.3py3-none-any.whl → 0.8.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (25) hide show

xinference/_version.py CHANGED Viewed

@@ -8,11 +8,11 @@ import json
 version_json = '''
 {
- "date": "2024-02-02T12:27:24+0800",
+ "date": "2024-02-04T17:16:50+0800",
  "dirty": false,
  "error": null,
- "full-revisionid": "749ef3ff298a94b88c1e67415819fae4fb1de75c",
- "version": "0.8.3"
+ "full-revisionid": "1b9b8c805e4e23a4c34fb05a96b819fee3ca8d50",
+ "version": "0.8.4"
 }
 '''  # END VERSION_JSON

xinference/api/restful_api.py CHANGED Viewed

@@ -1156,17 +1156,17 @@ class RESTfulAPI:
             await self._report_error_event(model_uid, str(e))
             raise HTTPException(status_code=500, detail=str(e))
-        model_name = desc.get("model_name", "")
+        model_family = desc.get("model_family", "")
         function_call_models = ["chatglm3", "gorilla-openfunctions-v1", "qwen-chat"]
-        is_qwen = desc.get("model_format") == "ggmlv3" and "qwen" in model_name
+        is_qwen = desc.get("model_format") == "ggmlv3" and "qwen-chat" == model_family
         if is_qwen and system_prompt is not None:
             raise HTTPException(
                 status_code=400, detail="Qwen ggml does not have system prompt"
             )
-        if not any(name in model_name for name in function_call_models):
+        if model_family not in function_call_models:
             if body.tools:
                 raise HTTPException(
                     status_code=400,

xinference/core/chat_interface.py CHANGED Viewed

@@ -98,16 +98,9 @@ class GradioInterface:
             return flat_list
         def to_chat(lst: List[str]) -> List[ChatCompletionMessage]:
-            from ..model.llm import BUILTIN_LLM_PROMPT_STYLE
             res = []
-            prompt_style = BUILTIN_LLM_PROMPT_STYLE.get(self.model_name)
-            if prompt_style is None:
-                roles = ["assistant", "user"]
-            else:
-                roles = prompt_style.roles
             for i in range(len(lst)):
-                role = roles[0] if i % 2 == 1 else roles[1]
+                role = "assistant" if i % 2 == 1 else "user"
                 res.append(ChatCompletionMessage(role=role, content=lst[i]))
             return res

xinference/model/llm/llm_family.json CHANGED Viewed

@@ -827,6 +827,66 @@
         ],
         "model_id": "meta-llama/Llama-2-70b-chat-hf",
         "model_revision": "36d9a7388cc80e5f4b3e9701ca2f250d21a96c30"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "Q2_K",
+          "Q3_K_S",
+          "Q3_K_M",
+          "Q3_K_L",
+          "Q4_0",
+          "Q4_K_S",
+          "Q4_K_M",
+          "Q5_0",
+          "Q5_K_S",
+          "Q5_K_M",
+          "Q6_K",
+          "Q8_0"
+        ],
+        "model_id": "TheBloke/Llama-2-7B-Chat-GGUF",
+        "model_file_name_template": "llama-2-7b-chat.{quantization}.gguf"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 13,
+        "quantizations": [
+          "Q2_K",
+          "Q3_K_S",
+          "Q3_K_M",
+          "Q3_K_L",
+          "Q4_0",
+          "Q4_K_S",
+          "Q4_K_M",
+          "Q5_0",
+          "Q5_K_S",
+          "Q5_K_M",
+          "Q6_K",
+          "Q8_0"
+        ],
+        "model_id": "TheBloke/Llama-2-13B-chat-GGUF",
+        "model_file_name_template": "llama-2-13b-chat.{quantization}.gguf"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 70,
+        "quantizations": [
+          "Q2_K",
+          "Q3_K_S",
+          "Q3_K_M",
+          "Q3_K_L",
+          "Q4_0",
+          "Q4_K_S",
+          "Q4_K_M",
+          "Q5_0",
+          "Q5_K_S",
+          "Q5_K_M",
+          "Q6_K",
+          "Q8_0"
+        ],
+        "model_id": "TheBloke/Llama-2-70B-Chat-GGUF",
+        "model_file_name_template": "llama-2-70b-chat.{quantization}.gguf"
       }
     ],
     "prompt_style": {
@@ -2131,16 +2191,19 @@
       }
     ],
     "prompt_style": {
-      "style_name": "NO_COLON_TWO",
-      "system_prompt": "<s>[INST] <<SYS>>\nAn informative and inspiring conversation\n<</SYS>>\n\n",
+      "style_name": "LLAMA2",
+      "system_prompt": "[INST] ",
       "roles": [
         "[INST]",
         "[/INST]"
       ],
       "intra_message_sep": " ",
-      "inter_message_sep": " </s><s>",
+      "inter_message_sep": "<s>",
       "stop_token_ids": [
         2
+      ],
+      "stop": [
+        "</s>"
       ]
     }
   },
@@ -2189,16 +2252,19 @@
       }
     ],
     "prompt_style": {
-      "style_name": "NO_COLON_TWO",
-      "system_prompt": "<s>[INST] <<SYS>>\nAn informative and inspiring conversation\n<</SYS>>\n\n",
+      "style_name": "LLAMA2",
+      "system_prompt": "[INST] ",
       "roles": [
         "[INST]",
         "[/INST]"
       ],
       "intra_message_sep": " ",
-      "inter_message_sep": " </s><s>",
+      "inter_message_sep": "<s>",
       "stop_token_ids": [
         2
+      ],
+      "stop": [
+        "</s>"
       ]
     }
   },

xinference/model/llm/llm_family_modelscope.json CHANGED Viewed

@@ -1224,16 +1224,19 @@
       }
     ],
     "prompt_style": {
-      "style_name": "NO_COLON_TWO",
-      "system_prompt": "<s>[INST] <<SYS>>\nAn informative and inspiring conversation\n<</SYS>>\n\n",
+      "style_name": "LLAMA2",
+      "system_prompt": "[INST] ",
       "roles": [
         "[INST]",
         "[/INST]"
       ],
       "intra_message_sep": " ",
-      "inter_message_sep": " </s><s>",
+      "inter_message_sep": "<s>",
       "stop_token_ids": [
         2
+      ],
+      "stop": [
+        "</s>"
       ]
     }
   },
@@ -1272,16 +1275,19 @@
       }
     ],
     "prompt_style": {
-      "style_name": "NO_COLON_TWO",
-      "system_prompt": "<s>[INST] <<SYS>>\nAn informative and inspiring conversation\n<</SYS>>\n\n",
+      "style_name": "LLAMA2",
+      "system_prompt": "[INST] ",
       "roles": [
         "[INST]",
         "[/INST]"
       ],
       "intra_message_sep": " ",
-      "inter_message_sep": " </s><s>",
+      "inter_message_sep": "<s>",
       "stop_token_ids": [
         2
+      ],
+      "stop": [
+        "</s>"
       ]
     }
   },

xinference/model/llm/utils.py CHANGED Viewed

@@ -60,10 +60,18 @@ class ChatModelMixin:
             ChatCompletionMessage(role=prompt_style.roles[1], content="")
         )
+        def get_role(role_name: str):
+            if role_name == "user":
+                return prompt_style.roles[0]
+            elif role_name == "assistant":
+                return prompt_style.roles[1]
+            else:
+                return role_name
         if prompt_style.style_name == "ADD_COLON_SINGLE":
             ret = prompt_style.system_prompt + prompt_style.intra_message_sep
             for message in chat_history:
-                role = message["role"]
+                role = get_role(message["role"])
                 content = message["content"]
                 if content:
                     ret += role + ": " + content + prompt_style.intra_message_sep
@@ -74,7 +82,7 @@ class ChatModelMixin:
             seps = [prompt_style.intra_message_sep, prompt_style.inter_message_sep]
             ret = prompt_style.system_prompt + seps[0]
             for i, message in enumerate(chat_history):
-                role = message["role"]
+                role = get_role(message["role"])
                 content = message["content"]
                 if content:
                     ret += role + ": " + content + seps[i % 2]
@@ -85,7 +93,7 @@ class ChatModelMixin:
             seps = [prompt_style.intra_message_sep, prompt_style.inter_message_sep]
             ret = prompt_style.system_prompt
             for i, message in enumerate(chat_history):
-                role = message["role"]
+                role = get_role(message["role"])
                 content = message["content"]
                 if content:
                     ret += role + content + seps[i % 2]
@@ -96,7 +104,7 @@ class ChatModelMixin:
             seps = [prompt_style.intra_message_sep, prompt_style.inter_message_sep]
             ret = ""
             for i, message in enumerate(chat_history):
-                role = message["role"]
+                role = get_role(message["role"])
                 content = message["content"]
                 if content:
                     if i == 0:
@@ -109,7 +117,7 @@ class ChatModelMixin:
         elif prompt_style.style_name == "FALCON":
             ret = prompt_style.system_prompt
             for message in chat_history:
-                role = message["role"]
+                role = get_role(message["role"])
                 content = message["content"]
                 if content:
                     ret += (
@@ -137,7 +145,7 @@ class ChatModelMixin:
             else:
                 ret = ""
             for i, message in enumerate(chat_history):
-                role = message["role"]
+                role = get_role(message["role"])
                 content = message["content"]
                 if i % 2 == 0:
                     ret += f"[Round {i // 2 + round_add_n}]{prompt_style.intra_message_sep}"
@@ -154,7 +162,7 @@ class ChatModelMixin:
             )
             for i, message in enumerate(chat_history):
-                role = message["role"]
+                role = get_role(message["role"])
                 content = message["content"]
                 tool_calls = message.get("tool_calls")
                 if tool_calls:
@@ -173,7 +181,7 @@ class ChatModelMixin:
                 else ""
             )
             for i, message in enumerate(chat_history):
-                role = message["role"]
+                role = get_role(message["role"])
                 content = message["content"]
                 if content:
                     ret += f"<|{role}|> \n {content}"
@@ -239,7 +247,7 @@ Begin!"""
             ret = f"<|im_start|>system\n{prompt_style.system_prompt}<|im_end|>"
             for message in chat_history:
-                role = message["role"]
+                role = get_role(message["role"])
                 content = message["content"]
                 ret += prompt_style.intra_message_sep
@@ -279,7 +287,7 @@ Begin!"""
                 else prompt_style.system_prompt + prompt_style.intra_message_sep + "\n"
             )
             for message in chat_history:
-                role = message["role"]
+                role = get_role(message["role"])
                 content = message["content"]
                 if content:
@@ -293,7 +301,7 @@ Begin!"""
             for i, message in enumerate(chat_history[:-2]):
                 if i % 2 == 0:
                     ret += "<s>"
-                role = message["role"]
+                role = get_role(message["role"])
                 content = message["content"]
                 ret += role + ":" + str(content) + seps[i % 2]
             if len(ret) == 0:
@@ -316,7 +324,7 @@ Begin!"""
                 + "\n"
             )
             for message in chat_history:
-                role = message["role"]
+                role = get_role(message["role"])
                 content = message["content"]
                 if content:
@@ -327,7 +335,7 @@ Begin!"""
         elif prompt_style.style_name == "ADD_COLON_SINGLE_COT":
             ret = prompt_style.system_prompt + prompt_style.intra_message_sep
             for message in chat_history:
-                role = message["role"]
+                role = get_role(message["role"])
                 content = message["content"]
                 if content:
                     ret += role + ": " + content + prompt_style.intra_message_sep
@@ -341,7 +349,7 @@ Begin!"""
             seps = [prompt_style.intra_message_sep, prompt_style.inter_message_sep]
             ret = prompt_style.system_prompt
             for i, message in enumerate(chat_history):
-                role = message["role"]
+                role = get_role(message["role"])
                 content = message["content"]
                 if content:
                     ret += role + ": " + content + seps[i % 2]
@@ -352,7 +360,7 @@ Begin!"""
             sep = prompt_style.inter_message_sep
             ret = prompt_style.system_prompt + sep
             for i, message in enumerate(chat_history):
-                role = message["role"]
+                role = get_role(message["role"])
                 content = message["content"]
                 if content:
                     ret += role + "\n" + content + sep
@@ -384,7 +392,7 @@ Begin!"""
             ret = "<s>"
             for i, message in enumerate(chat_history):
                 content = message["content"]
-                role = message["role"]
+                role = get_role(message["role"])
                 if i % 2 == 0:  # Human
                     assert content is not None
                     ret += role + ": " + content + "\n\n"

xinference/web/ui/build/asset-manifest.json CHANGED Viewed

@@ -1,11 +1,11 @@
 {
   "files": {
-    "main.js": "./static/js/main.15822aeb.js",
+    "main.js": "./static/js/main.476e35cc.js",
     "static/media/icon.webp": "./static/media/icon.4603d52c63041e5dfbfd.webp",
     "index.html": "./index.html",
-    "main.15822aeb.js.map": "./static/js/main.15822aeb.js.map"
+    "main.476e35cc.js.map": "./static/js/main.476e35cc.js.map"
   },
   "entrypoints": [
-    "static/js/main.15822aeb.js"
+    "static/js/main.476e35cc.js"
   ]
 }

xinference/web/ui/build/index.html CHANGED Viewed

	@@ -1 +1 @@
1	- <!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.svg"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="Web site created using create-react-app"/><link rel="apple-touch-icon" href="./logo192.png"/><link rel="manifest" href="./manifest.json"/><title>Xinference</title><script defer="defer" src="./static/js/main.~~15822aeb~~.js"></script></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>
1	+ <!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.svg"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="Web site created using create-react-app"/><link rel="apple-touch-icon" href="./logo192.png"/><link rel="manifest" href="./manifest.json"/><title>Xinference</title><script defer="defer" src="./static/js/main.476e35cc.js"></script></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>

xinference 0.8.3__py3-none-any.whl → 0.8.4__py3-none-any.whl

Potentially problematic release.

xinference 0.8.3py3-none-any.whl → 0.8.4py3-none-any.whl