PyPI - xinference - Versions diffs - 0.10.2.post1__py3-none-any.whl → 0.11.0__py3-none-any.whl - Mend

xinference 0.10.2.post1py3-none-any.whl → 0.11.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (92) hide show

xinference/model/llm/utils.py CHANGED Viewed

@@ -114,6 +114,22 @@ class ChatModelMixin:
                 else:
                     ret += role
             return ret
+        elif prompt_style.style_name == "LLAMA3":
+            ret = (
+                f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>"
+                f"{prompt_style.intra_message_sep}{prompt_style.system_prompt}{prompt_style.inter_message_sep}"
+            )
+            for i, message in enumerate(chat_history):
+                role = get_role(message["role"])
+                content = message["content"]
+                if content:
+                    ret += (
+                        f"<|start_header_id|>{role}<|end_header_id|>"
+                        f"{prompt_style.intra_message_sep}{content}{prompt_style.inter_message_sep}"
+                    )
+                else:
+                    ret += f"<|start_header_id|>{role}<|end_header_id|>{prompt_style.intra_message_sep}"
+            return ret
         elif prompt_style.style_name == "FALCON":
             ret = prompt_style.system_prompt
             for message in chat_history:
@@ -212,16 +228,14 @@ Begin!"""
                 tools_name_text = []
                 for func_info in tools:
                     parameters = []
-                    required_parameters = func_info["function"]["parameters"].get(
-                        "required", []
-                    )
-                    for name, p in func_info["function"]["parameters"][
-                        "properties"
-                    ].items():
-                        param = dict({"name": name}, **p)
-                        if name in required_parameters:
-                            param["required"] = True
-                        parameters.append(param)
+                    fp = func_info["function"].get("parameters", {})
+                    if fp:
+                        required_parameters = fp.get("required", [])
+                        for name, p in fp["properties"].items():
+                            param = dict({"name": name}, **p)
+                            if name in required_parameters:
+                                param["required"] = True
+                            parameters.append(param)
                     name = func_info["function"]["name"]
                     desc = func_info["function"]["description"]
@@ -431,6 +445,17 @@ Begin!"""
                 else:
                     ret += "<AI>" + content.strip()
             return ret
+        elif prompt_style.style_name == "PHI3":
+            ret = f"<|system|>{prompt_style.intra_message_sep}{prompt_style.system_prompt}{prompt_style.inter_message_sep}"
+            for message in chat_history:
+                content = message["content"] or ""
+                role = get_role(message["role"])
+                if content:
+                    ret += f"<|{role}|>{prompt_style.intra_message_sep}{content}{prompt_style.inter_message_sep}"
+                else:
+                    ret += f"<|{role}|>{prompt_style.intra_message_sep}"
+            ret += "<|assistant|>\n"
+            return ret
         else:
             raise ValueError(f"Invalid prompt style: {prompt_style.style_name}")
@@ -664,6 +689,15 @@ Begin!"""
         else:
             m = {"role": "assistant", "content": content, "tool_calls": []}
             finish_reason = "stop"
+        try:
+            usage = c.get("usage")
+            assert "prompt_tokens" in usage
+        except Exception:
+            usage = {
+                "prompt_tokens": -1,
+                "completion_tokens": -1,
+                "total_tokens": -1,
+            }
         return {
             "id": "chat" + f"cmpl-{_id}",
             "model": model_uid,
@@ -676,11 +710,7 @@ Begin!"""
                     "finish_reason": finish_reason,
                 }
             ],
-            "usage": {
-                "prompt_tokens": -1,
-                "completion_tokens": -1,
-                "total_tokens": -1,
-            },
+            "usage": usage,
         }

xinference/model/llm/vllm/core.py CHANGED Viewed

@@ -85,6 +85,7 @@ except ImportError:
 VLLM_SUPPORTED_MODELS = [
     "llama-2",
+    "llama-3",
     "baichuan",
     "internlm-16k",
     "mistral-v0.1",
@@ -94,6 +95,7 @@ VLLM_SUPPORTED_MODELS = [
 ]
 VLLM_SUPPORTED_CHAT_MODELS = [
     "llama-2-chat",
+    "llama-3-instruct",
     "vicuna-v1.3",
     "vicuna-v1.5",
     "baichuan-chat",
@@ -108,6 +110,7 @@ VLLM_SUPPORTED_CHAT_MODELS = [
     "mistral-instruct-v0.1",
     "mistral-instruct-v0.2",
     "mixtral-instruct-v0.1",
+    "mixtral-8x22B-instruct-v0.1",
     "chatglm3",
     "chatglm3-32k",
     "chatglm3-128k",
@@ -237,10 +240,17 @@ class VLLMModel(LLM):
         if llm_spec.model_format == "pytorch":
             if quantization != "none" and not (quantization is None):
                 return False
-        if llm_spec.model_format in ["gptq", "awq"]:
-            # Currently, only 4-bit weight quantization is supported for GPTQ, but got 8 bits.
+        if llm_spec.model_format == "awq":
+            # Currently, only 4-bit weight quantization is supported for AWQ, but got 8 bits.
             if "4" not in quantization:
                 return False
+        if llm_spec.model_format == "gptq":
+            if VLLM_INSTALLED and vllm.__version__ >= "0.3.3":
+                if not any(q in quantization for q in ("3", "4", "8")):
+                    return False
+            else:
+                if "4" not in quantization:
+                    return False
         if isinstance(llm_family, CustomLLMFamilyV1):
             if llm_family.model_family not in VLLM_SUPPORTED_MODELS:
                 return False
@@ -414,10 +424,17 @@ class VLLMChatModel(VLLMModel, ChatModelMixin):
         if llm_spec.model_format == "pytorch":
             if quantization != "none" and not (quantization is None):
                 return False
-        if llm_spec.model_format in ["gptq", "awq"]:
-            # Currently, only 4-bit weight quantization is supported for GPTQ, but got 8 bits.
+        if llm_spec.model_format == "awq":
+            # Currently, only 4-bit weight quantization is supported for AWQ, but got 8 bits.
             if "4" not in quantization:
                 return False
+        if llm_spec.model_format == "gptq":
+            if VLLM_INSTALLED and vllm.__version__ >= "0.3.3":
+                if not any(q in quantization for q in ("3", "4", "8")):
+                    return False
+            else:
+                if "4" not in quantization:
+                    return False
         if isinstance(llm_family, CustomLLMFamilyV1):
             if llm_family.model_family not in VLLM_SUPPORTED_CHAT_MODELS:
                 return False

xinference/model/rerank/core.py CHANGED Viewed

@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import gc
 import logging
 import os
 import uuid
@@ -21,6 +22,7 @@ from typing import Dict, List, Optional, Tuple
 import numpy as np
 from ...constants import XINFERENCE_CACHE_DIR
+from ...device_utils import empty_cache
 from ...types import Document, DocumentObj, Rerank
 from ..core import CacheableModelSpec, ModelDescription
 from ..utils import is_model_cached
@@ -31,6 +33,8 @@ logger = logging.getLogger(__name__)
 # Init when registering all the builtin models.
 MODEL_NAME_TO_REVISION: Dict[str, List[str]] = defaultdict(list)
 RERANK_MODEL_DESCRIPTIONS: Dict[str, List[Dict]] = defaultdict(list)
+RERANK_EMPTY_CACHE_COUNT = int(os.getenv("XINFERENCE_RERANK_EMPTY_CACHE_COUNT", "10"))
+assert RERANK_EMPTY_CACHE_COUNT > 0
 def get_rerank_model_descriptions():
@@ -113,28 +117,44 @@ class RerankModel:
         self._model_config = model_config or dict()
         self._use_fp16 = use_fp16
         self._model = None
+        self._counter = 0
     def load(self):
-        try:
-            if self._model_spec.type == "normal":
-                from FlagEmbedding import FlagReranker
-            elif self._model_spec.type == "LLM-based":
-                from FlagEmbedding import FlagLLMReranker as FlagReranker
-            elif self._model_spec.type == "LLM-based layerwise":
-                from FlagEmbedding import LayerWiseFlagLLMReranker as FlagReranker
-            else:
-                raise RuntimeError(
-                    f"Unsupported Rank model type: {self._model_spec.type}"
-                )
-        except ImportError:
-            error_message = "Failed to import module 'FlagEmbedding'"
-            installation_guide = [
-                "Please make sure 'FlagEmbedding' is installed. ",
-                "You can install it by `pip install FlagEmbedding`\n",
-            ]
+        if self._model_spec.type == "normal":
+            try:
+                from sentence_transformers.cross_encoder import CrossEncoder
+            except ImportError:
+                error_message = "Failed to import module 'sentence-transformers'"
+                installation_guide = [
+                    "Please make sure 'sentence-transformers' is installed. ",
+                    "You can install it by `pip install sentence-transformers`\n",
+                ]
+                raise ImportError(f"{error_message}\n\n{''.join(installation_guide)}")
+            self._model = CrossEncoder(
+                self._model_path, device=self._device, **self._model_config
+            )
+            if self._use_fp16:
+                self._model.model.half()
+        else:
+            try:
+                if self._model_spec.type == "LLM-based":
+                    from FlagEmbedding import FlagLLMReranker as FlagReranker
+                elif self._model_spec.type == "LLM-based layerwise":
+                    from FlagEmbedding import LayerWiseFlagLLMReranker as FlagReranker
+                else:
+                    raise RuntimeError(
+                        f"Unsupported Rank model type: {self._model_spec.type}"
+                    )
+            except ImportError:
+                error_message = "Failed to import module 'FlagEmbedding'"
+                installation_guide = [
+                    "Please make sure 'FlagEmbedding' is installed. ",
+                    "You can install it by `pip install FlagEmbedding`\n",
+                ]
-            raise ImportError(f"{error_message}\n\n{''.join(installation_guide)}")
-        self._model = FlagReranker(self._model_path, use_fp16=True)
+                raise ImportError(f"{error_message}\n\n{''.join(installation_guide)}")
+            self._model = FlagReranker(self._model_path, use_fp16=self._use_fp16)
     def rerank(
         self,
@@ -145,13 +165,21 @@ class RerankModel:
         return_documents: Optional[bool],
         **kwargs,
     ) -> Rerank:
+        self._counter += 1
+        if self._counter % RERANK_EMPTY_CACHE_COUNT == 0:
+            logger.debug("Empty rerank cache.")
+            gc.collect()
+            empty_cache()
         assert self._model is not None
         if kwargs:
             raise ValueError("rerank hasn't support extra parameter.")
         if max_chunks_per_doc is not None:
             raise ValueError("rerank hasn't support `max_chunks_per_doc` parameter.")
         sentence_combinations = [[query, doc] for doc in documents]
-        similarity_scores = self._model.compute_score(sentence_combinations)
+        if self._model_spec.type == "normal":
+            similarity_scores = self._model.predict(sentence_combinations)
+        else:
+            similarity_scores = self._model.compute_score(sentence_combinations)
         sim_scores_argsort = list(reversed(np.argsort(similarity_scores)))
         if top_n is not None:
             sim_scores_argsort = sim_scores_argsort[:top_n]

xinference/thirdparty/omnilmm/chat.py CHANGED Viewed

@@ -4,7 +4,6 @@ import json
 import os
 import torch
-from accelerate import init_empty_weights, load_checkpoint_and_dispatch
 from PIL import Image
 from transformers import AutoModel, AutoTokenizer
@@ -20,6 +19,8 @@ DEFAULT_IM_END_TOKEN = "<im_end>"
 def init_omni_lmm(model_path, device_map):
+    from accelerate import init_empty_weights, load_checkpoint_and_dispatch
     torch.backends.cuda.matmul.allow_tf32 = True
     disable_torch_init()
     model_name = os.path.expanduser(model_path)

xinference/thirdparty/omnilmm/model/omnilmm.py CHANGED Viewed

@@ -2,7 +2,6 @@ import gc
 import math
 from typing import List, Optional, Tuple, Union
-import timm
 import torch
 import torch.nn as nn
 from torch import Tensor
@@ -37,6 +36,8 @@ class Identity(torch.nn.Identity):
 def create_vision_module(config):
+    import timm
     vision_tower = timm.create_model(
         "eva02_enormous_patch14_clip_224.laion2b_plus",
         pretrained=False,

xinference/types.py CHANGED Viewed

@@ -33,6 +33,7 @@ from .fields import (
     stop_field,
     stream_field,
     stream_interval_field,
+    stream_option_field,
     temperature_field,
     top_k_field,
     top_p_field,
@@ -392,6 +393,7 @@ class _CreateCompletionOpenAIFallback(BaseModel):
     seed: Optional[int] = none_field
     stop: Optional[Union[str, List[str]]] = stop_field
     stream: bool = stream_field
+    stream_options: Optional[dict] = stream_option_field
     suffix: Optional[str] = none_field
     temperature: float = temperature_field
     top_p: float = top_p_field

xinference/web/ui/build/asset-manifest.json CHANGED Viewed

@@ -1,11 +1,14 @@
 {
   "files": {
-    "main.js": "./static/js/main.26fdbfbe.js",
+    "main.css": "./static/css/main.54bca460.css",
+    "main.js": "./static/js/main.8e44da4b.js",
     "static/media/icon.webp": "./static/media/icon.4603d52c63041e5dfbfd.webp",
     "index.html": "./index.html",
-    "main.26fdbfbe.js.map": "./static/js/main.26fdbfbe.js.map"
+    "main.54bca460.css.map": "./static/css/main.54bca460.css.map",
+    "main.8e44da4b.js.map": "./static/js/main.8e44da4b.js.map"
   },
   "entrypoints": [
-    "static/js/main.26fdbfbe.js"
+    "static/css/main.54bca460.css",
+    "static/js/main.8e44da4b.js"
   ]
 }

xinference/web/ui/build/index.html CHANGED Viewed

	@@ -1 +1 @@
1	- <!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.svg"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="Web site created using create-react-app"/><link rel="apple-touch-icon" href="./logo192.png"/><link rel="manifest" href="./manifest.json"/><title>Xinference</title><script defer="defer" src="./static/js/main.~~26fdbfbe~~.js"></script></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>
1	+ <!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.svg"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="Web site created using create-react-app"/><link rel="apple-touch-icon" href="./logo192.png"/><link rel="manifest" href="./manifest.json"/><title>Xinference</title><script defer="defer" src="./static/js/main.8e44da4b.js"></script><link href="./static/css/main.54bca460.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>

xinference/web/ui/build/static/css/main.54bca460.css ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ .formBox{max-height:80vh;max-width:50vw;min-width:50vw;overflow:auto;padding:40px 20px 0 0;position:relative;transition:all .4s ease-in-out}.broaden{max-width:100%;min-width:100%;padding-right:0}.show-json{align-items:center;color:#444;display:flex;position:fixed;right:60px;top:90px}.icon{cursor:pointer;margin-left:20px;position:absolute;right:-40px}.icon:hover{color:#1976d2}.arrow{font-size:24px!important}.jsonBox{min-height:80vh;position:relative;transition:all .4s ease-in-out;width:100%}.hide{overflow:hidden;-webkit-transform:translate(30vw);transform:translate(30vw);width:0}.jsonBox-header{font-weight:700;line-height:40px}.textarea{border:1px solid #ddd;border-radius:5px;color:#444;height:calc(100% - 40px);padding:5px 10px;resize:none;width:100%}.copyIcon{color:#555;cursor:pointer;font-size:16px!important;position:absolute;right:5px;top:13px}.copyIcon:hover{color:#1976d2}.addBtn{margin-left:20px!important}.item{background-color:#eee;border-radius:10px;margin:10px 50px 0;overflow:hidden;padding:20px;position:relative}.item:hover .deleteBtn{-webkit-transform:translateX(-50px);transform:translateX(-50px)}.deleteBtn{background-color:#1976d2;border-radius:25px;height:50px;line-height:70px;position:absolute;right:20px;text-align:center;top:calc(50% - 25px);-webkit-transform:translateX(80px);transform:translateX(80px);transition:all .3s ease-in-out;width:50px}.deleteBtn:hover{box-shadow:0 0 10px #aaa;cursor:pointer}.deleteIcon{color:#fff;font-size:28px!important}
2	+ /# sourceMappingURL=main.54bca460.css.map/

xinference/web/ui/build/static/css/main.54bca460.css.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"static/css/main.54bca460.css","mappings":"AAAA,SAIE,eAAgB,CAFhB,cAAe,CACf,cAAe,CAEf,aAAc,CACd,qBAAsB,CALtB,iBAAkB,CAMlB,8BACF,CAEA,SACE,cAAe,CACf,cAAe,CACf,eACF,CAEA,WAEE,kBAAmB,CAInB,UAAW,CALX,YAAa,CAEb,cAAe,CAEf,UAAW,CADX,QAGF,CAEA,MAGE,cAAe,CACf,gBAAiB,CAHjB,iBAAkB,CAClB,WAGF,CAEA,YACE,aACF,CAEA,OACE,wBACF,CAEA,SAEE,eAAgB,CADhB,iBAAkB,CAGlB,8BAAgC,CADhC,UAEF,CAEA,MAGE,eAAgB,CADhB,iCAA6B,CAA7B,yBAA6B,CAD7B,OAGF,CAEA,gBAEE,eAAgB,CADhB,gBAEF,CAEA,UAIE,qBAAsB,CACtB,iBAAkB,CAElB,UAAW,CALX,wBAAyB,CACzB,gBAAiB,CAGjB,WAAY,CALZ,UAOF,CAEA,UAME,UAAW,CALX,cAAe,CAIf,wBAA0B,CAH1B,iBAAkB,CAElB,SAAU,CADV,QAIF,CAEA,gBACE,aACF,CAEA,QACE,0BACF,CAEA,MAEE,qBAAsB,CAGtB,kBAAmB,CAFnB,kBAAmB,CAGnB,eAAgB,CAFhB,YAAa,CAHb,iBAMF,CAEA,uBACE,mCAA4B,CAA5B,2BACF,CAEA,WAUE,wBAAyB,CADzB,kBAAmB,CAJnB,WAAY,CAGZ,gBAAiB,CAPjB,iBAAkB,CAClB,UAAW,CAKX,iBAAkB,CAJlB,oBAAqB,CAGrB,kCAA2B,CAA3B,0BAA2B,CAK3B,8BAAgC,CAPhC,UAQF,CAEA,iBAEE,wBAAyB,CADzB,cAEF,CAEA,YAEE,UAAW,CADX,wBAEF","sources":["scenes/register_model/styles/registerModelStyle.css"],"sourcesContent":[".formBox {\n position: relative;\n max-width: 50vw;\n min-width: 50vw;\n max-height: 80vh;\n overflow: auto;\n padding: 40px 20px 0 0;\n transition: all 0.4s ease-in-out;\n}\n\n.broaden {\n max-width: 100%;\n min-width: 100%;\n padding-right: 0;\n}\n\n.show-json {\n display: flex;\n align-items: center;\n position: fixed;\n top: 90px;\n right: 60px;\n color: #444;\n}\n\n.icon {\n position: absolute;\n right: -40px;\n cursor: pointer;\n margin-left: 20px;\n}\n\n.icon:hover {\n color: #1976d2;\n}\n\n.arrow {\n font-size: 24px !important;\n}\n\n.jsonBox {\n position: relative;\n min-height: 80vh;\n width: 100%;\n transition: all 0.4s ease-in-out;\n}\n\n.hide {\n width: 0;\n transform: translate(30vw, 0);\n overflow: hidden;\n}\n\n.jsonBox-header {\n line-height: 40px;\n font-weight: 700;\n}\n\n.textarea {\n width: 100%;\n height: calc(100% - 40px);\n padding: 5px 10px;\n border: 1px solid #ddd;\n border-radius: 5px;\n resize: none;\n color: #444;\n}\n\n.copyIcon {\n cursor: pointer;\n position: absolute;\n top: 13px;\n right: 5px;\n font-size: 16px !important;\n color: #555;\n}\n\n.copyIcon:hover {\n color: #1976d2;\n}\n\n.addBtn {\n margin-left: 20px !important;\n}\n\n.item {\n position: relative;\n background-color: #eee;\n margin: 10px 50px 0;\n padding: 20px;\n border-radius: 10px;\n overflow: hidden;\n}\n\n.item:hover .deleteBtn {\n transform: translateX(-50px);\n}\n\n.deleteBtn {\n position: absolute;\n right: 20px;\n top: calc(50% - 25px);\n width: 50px;\n height: 50px;\n transform: translateX(80px);\n text-align: center;\n line-height: 70px;\n border-radius: 25px;\n background-color: #1976d2;\n transition: all 0.3s ease-in-out;\n}\n\n.deleteBtn:hover {\n cursor: pointer;\n box-shadow: 0 0 10px #aaa;\n}\n\n.deleteIcon {\n font-size: 28px !important;\n color: #fff;\n}\n"],"names":[],"sourceRoot":""}

xinference 0.10.2.post1__py3-none-any.whl → 0.11.0__py3-none-any.whl

Potentially problematic release.

xinference 0.10.2.post1py3-none-any.whl → 0.11.0py3-none-any.whl