PyPI - xinference - Versions diffs - 0.7.0__py3-none-any.whl → 0.7.1__py3-none-any.whl - Mend

xinference 0.7.0py3-none-any.whl → 0.7.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (46) hide show

xinference/_version.py CHANGED Viewed

@@ -8,11 +8,11 @@ import json
 version_json = '''
 {
- "date": "2023-12-08T13:45:18+0800",
+ "date": "2023-12-12T19:35:36+0800",
  "dirty": false,
  "error": null,
- "full-revisionid": "b5a5f0a270f85e451591eba34fe615a0fc8ce4bf",
- "version": "0.7.0"
+ "full-revisionid": "91f5f13c3914e1943977c80281ce485e8e3502cf",
+ "version": "0.7.1"
 }
 '''  # END VERSION_JSON

xinference/api/restful_api.py CHANGED Viewed

@@ -258,9 +258,6 @@ class RESTfulAPI:
                 f"{pprint.pformat(invalid_routes)}"
             )
-        for tp in [CreateChatCompletion, CreateCompletion]:
-            logger.debug("Dump request model fields:\n%s", tp.__fields__)
         class SPAStaticFiles(StaticFiles):
             async def get_response(self, path: str, scope):
                 response = await super().get_response(path, scope)
@@ -721,7 +718,10 @@ class RESTfulAPI:
         if (
             not body.messages
-            or body.messages[-1].get("role") != "user"
+            or (
+                body.messages[-1].get("role") != "user"
+                and body.messages[-1].get("role") != "system"
+            )
             or not body.messages[-1].get("content")
         ):
             raise HTTPException(

xinference/deploy/utils.py CHANGED Viewed

@@ -60,7 +60,9 @@ def get_config_dict(
         "disable_existing_loggers": False,
         "formatters": {
             "formatter": {
-                "format": "%(asctime)s %(name)-12s %(process)d %(levelname)-8s %(message)s"
+                "format": (
+                    "%(asctime)s %(name)-12s %(process)d %(levelname)-8s %(message)s"
+                )
             },
         },
         "filters": {

xinference/model/embedding/__init__.py CHANGED Viewed

@@ -16,7 +16,7 @@ import codecs
 import json
 import os
-from .core import EmbeddingModelSpec, get_cache_status
+from .core import MODEL_NAME_TO_REVISION, EmbeddingModelSpec, get_cache_status
 from .custom import CustomEmbeddingModelSpec, register_embedding, unregister_embedding
 _model_spec_json = os.path.join(os.path.dirname(__file__), "model_spec.json")
@@ -27,12 +27,16 @@ BUILTIN_EMBEDDING_MODELS = dict(
     (spec["model_name"], EmbeddingModelSpec(**spec))
     for spec in json.load(codecs.open(_model_spec_json, "r", encoding="utf-8"))
 )
+for model_name, model_spec in BUILTIN_EMBEDDING_MODELS.items():
+    MODEL_NAME_TO_REVISION[model_name].append(model_spec.model_revision)
 MODELSCOPE_EMBEDDING_MODELS = dict(
     (spec["model_name"], EmbeddingModelSpec(**spec))
     for spec in json.load(
         codecs.open(_model_spec_modelscope_json, "r", encoding="utf-8")
     )
 )
+for model_name, model_spec in MODELSCOPE_EMBEDDING_MODELS.items():
+    MODEL_NAME_TO_REVISION[model_name].append(model_spec.model_revision)
 from ...constants import XINFERENCE_MODEL_DIR

xinference/model/embedding/core.py CHANGED Viewed

@@ -15,7 +15,8 @@
 import logging
 import os
 import shutil
-from typing import List, Optional, Tuple, Union, no_type_check
+from collections import defaultdict
+from typing import Dict, List, Optional, Tuple, Union, no_type_check
 import numpy as np
 from pydantic import BaseModel
@@ -23,11 +24,14 @@ from pydantic import BaseModel
 from ...constants import XINFERENCE_CACHE_DIR
 from ...types import Embedding, EmbeddingData, EmbeddingUsage
 from ..core import ModelDescription
-from ..utils import valid_model_revision
+from ..utils import is_model_cached, valid_model_revision
 logger = logging.getLogger(__name__)
 SUPPORTED_SCHEMES = ["s3"]
+# Used for check whether the model is cached.
+# Init when registering all the builtin models.
+MODEL_NAME_TO_REVISION: Dict[str, List[str]] = defaultdict(list)
 class EmbeddingModelSpec(BaseModel):
@@ -195,11 +199,7 @@ def cache(model_spec: EmbeddingModelSpec):
 def get_cache_status(
     model_spec: EmbeddingModelSpec,
 ) -> bool:
-    cache_dir = os.path.realpath(
-        os.path.join(XINFERENCE_CACHE_DIR, model_spec.model_name)
-    )
-    meta_path = os.path.join(cache_dir, "__valid_download")
-    return valid_model_revision(meta_path, model_spec.model_revision)
+    return is_model_cached(model_spec, MODEL_NAME_TO_REVISION)
 class EmbeddingModel:

xinference/model/llm/ggml/chatglm.py CHANGED Viewed

@@ -134,9 +134,9 @@ class ChatglmCppChatModel(LLM):
                     {
                         "index": 0,
                         "delta": {
-                            "content": token
-                            if isinstance(token, str)
-                            else token.content,
+                            "content": (
+                                token if isinstance(token, str) else token.content
+                            ),
                         },
                         "finish_reason": None,
                     }
@@ -223,8 +223,10 @@ class ChatglmCppChatModel(LLM):
             chatglm_tools.append(elem["function"])
         return {
             "role": "system",
-            "content": f"Answer the following questions as best as you can. You have access to the following tools:\n"
-            f"{json.dumps(chatglm_tools, indent=4, ensure_ascii=False)}",
+            "content": (
+                f"Answer the following questions as best as you can. You have access to the following tools:\n"
+                f"{json.dumps(chatglm_tools, indent=4, ensure_ascii=False)}"
+            ),
         }
     def chat(

xinference/model/llm/llm_family.py CHANGED Viewed

@@ -588,31 +588,57 @@ def cache_from_huggingface(
     return cache_dir
+def _check_revision(
+    llm_family: LLMFamilyV1,
+    llm_spec: "LLMSpecV1",
+    builtin: list,
+    meta_path: str,
+) -> bool:
+    for family in builtin:
+        if llm_family.model_name == family.model_name:
+            specs = family.model_specs
+            for spec in specs:
+                if (
+                    spec.model_format == "pytorch"
+                    and spec.model_size_in_billions == llm_spec.model_size_in_billions
+                ):
+                    return valid_model_revision(meta_path, spec.model_revision)
+    return False
 def get_cache_status(
     llm_family: LLMFamilyV1,
     llm_spec: "LLMSpecV1",
 ) -> Union[bool, List[bool]]:
+    """
+    When calling this function from above, `llm_family` is constructed only from BUILTIN_LLM_FAMILIES,
+    so we should check both huggingface and modelscope cache files.
+    """
     cache_dir = _get_cache_dir(llm_family, llm_spec, create_if_not_exist=False)
+    # check revision for pytorch model
     if llm_spec.model_format == "pytorch":
-        return _skip_download(
-            cache_dir,
-            llm_spec.model_format,
-            llm_spec.model_hub,
-            llm_spec.model_revision,
-            "none",
-        )
+        hf_meta_path = _get_meta_path(cache_dir, "pytorch", "huggingface", "none")
+        ms_meta_path = _get_meta_path(cache_dir, "pytorch", "modelscope", "none")
+        revisions = [
+            _check_revision(llm_family, llm_spec, BUILTIN_LLM_FAMILIES, hf_meta_path),
+            _check_revision(
+                llm_family, llm_spec, BUILTIN_MODELSCOPE_LLM_FAMILIES, ms_meta_path
+            ),
+        ]
+        return any(revisions)
+    # just check meta file for ggml and gptq model
     elif llm_spec.model_format in ["ggmlv3", "ggufv2", "gptq"]:
         ret = []
         for q in llm_spec.quantizations:
-            ret.append(
-                _skip_download(
-                    cache_dir,
-                    llm_spec.model_format,
-                    llm_spec.model_hub,
-                    llm_spec.model_revision,
-                    q,
-                )
+            assert q is not None
+            hf_meta_path = _get_meta_path(
+                cache_dir, llm_spec.model_format, "huggingface", q
+            )
+            ms_meta_path = _get_meta_path(
+                cache_dir, llm_spec.model_format, "modelscope", q
             )
+            results = [os.path.exists(hf_meta_path), os.path.exists(ms_meta_path)]
+            ret.append(any(results))
         return ret
     else:
         raise ValueError(f"Unsupported model format: {llm_spec.model_format}")

xinference/model/llm/pytorch/core.py CHANGED Viewed

@@ -345,6 +345,7 @@ class PytorchModel(LLM):
             inputs = input
         tokenizer = self._tokenizer
+        tokenizer.pad_token = tokenizer.eos_token
         is_llama = "llama" in str(type(self._model))  # llama supports batch inference
         is_chatglm = "chatglm" in str(type(self._model))
         if is_llama:

xinference/model/llm/pytorch/utils.py CHANGED Viewed

@@ -259,6 +259,7 @@ def generate_stream(
                     raise ValueError("Invalid stop field type.")
             if stream:
+                output = output.strip("�")
                 tmp_output_length = len(output)
                 output = output[last_output_length:]
                 last_output_length = tmp_output_length
@@ -424,6 +425,7 @@ def generate_stream_falcon(
                     raise ValueError("Invalid stop field type.")
             if stream:
+                output = output.strip("�")
                 tmp_output_length = len(output)
                 output = output[last_output_length:]
                 last_output_length = tmp_output_length
@@ -552,6 +554,7 @@ def generate_stream_chatglm(
         response = process_response(response)
         if stream:
+            response = response.strip("�")
             tmp_response_length = len(response)
             response = response[last_response_length:]
             last_response_length = tmp_response_length

xinference/model/llm/vllm/core.py CHANGED Viewed

@@ -79,6 +79,10 @@ VLLM_SUPPORTED_CHAT_MODELS = [
     "internlm-chat-20b",
     "qwen-chat",
     "Yi",
+    "Yi-chat",
+    "code-llama",
+    "code-llama-python",
+    "code-llama-instruct",
     "mistral-instruct-v0.1",
     "chatglm3",
 ]
@@ -345,7 +349,7 @@ class VLLMChatModel(VLLMModel, ChatModelMixin):
         full_prompt = self.get_prompt(prompt, chat_history, prompt_style)
         sanitized = self._sanitize_chat_config(generate_config)
-        stream = sanitized["stream"]
+        stream = sanitized.get("stream", None)
         if stream:
             agen = await self.async_generate(full_prompt, sanitized)

xinference/model/rerank/__init__.py CHANGED Viewed

@@ -16,7 +16,7 @@ import codecs
 import json
 import os
-from .core import RerankModelSpec, get_cache_status
+from .core import MODEL_NAME_TO_REVISION, RerankModelSpec, get_cache_status
 _model_spec_json = os.path.join(os.path.dirname(__file__), "model_spec.json")
 _model_spec_modelscope_json = os.path.join(
@@ -26,11 +26,15 @@ BUILTIN_RERANK_MODELS = dict(
     (spec["model_name"], RerankModelSpec(**spec))
     for spec in json.load(codecs.open(_model_spec_json, "r", encoding="utf-8"))
 )
+for model_name, model_spec in BUILTIN_RERANK_MODELS.items():
+    MODEL_NAME_TO_REVISION[model_name].append(model_spec.model_revision)
 MODELSCOPE_RERANK_MODELS = dict(
     (spec["model_name"], RerankModelSpec(**spec))
     for spec in json.load(
         codecs.open(_model_spec_modelscope_json, "r", encoding="utf-8")
     )
 )
+for model_name, model_spec in MODELSCOPE_RERANK_MODELS.items():
+    MODEL_NAME_TO_REVISION[model_name].append(model_spec.model_revision)
 del _model_spec_json
 del _model_spec_modelscope_json

xinference/model/rerank/core.py CHANGED Viewed

@@ -15,6 +15,7 @@
 import logging
 import os
 import uuid
+from collections import defaultdict
 from typing import Dict, List, Optional, Tuple
 import numpy as np
@@ -23,10 +24,14 @@ from pydantic import BaseModel
 from ...constants import XINFERENCE_CACHE_DIR
 from ...types import Document, DocumentObj, Rerank
 from ..core import ModelDescription
-from ..utils import valid_model_revision
+from ..utils import is_model_cached, valid_model_revision
 logger = logging.getLogger(__name__)
+# Used for check whether the model is cached.
+# Init when registering all the builtin models.
+MODEL_NAME_TO_REVISION: Dict[str, List[str]] = defaultdict(list)
 class RerankModelSpec(BaseModel):
     model_name: str
@@ -126,11 +131,7 @@ class RerankModel:
 def get_cache_status(
     model_spec: RerankModelSpec,
 ) -> bool:
-    cache_dir = os.path.realpath(
-        os.path.join(XINFERENCE_CACHE_DIR, model_spec.model_name)
-    )
-    meta_path = os.path.join(cache_dir, "__valid_download")
-    return valid_model_revision(meta_path, model_spec.model_revision)
+    return is_model_cached(model_spec, MODEL_NAME_TO_REVISION)
 def cache(model_spec: RerankModelSpec):

xinference/model/utils.py CHANGED Viewed

@@ -16,11 +16,11 @@ import logging
 import os
 from json import JSONDecodeError
 from pathlib import Path
-from typing import Callable, Dict, Optional, Tuple
+from typing import Any, Callable, Dict, Optional, Tuple
 from fsspec import AbstractFileSystem
-from ..constants import XINFERENCE_ENV_MODEL_SRC
+from ..constants import XINFERENCE_CACHE_DIR, XINFERENCE_ENV_MODEL_SRC
 logger = logging.getLogger(__name__)
 MAX_ATTEMPTS = 3
@@ -132,6 +132,17 @@ def valid_model_revision(
         return real_revision == expected_model_revision
+def is_model_cached(model_spec: Any, name_to_revisions_mapping: Dict):
+    cache_dir = os.path.realpath(
+        os.path.join(XINFERENCE_CACHE_DIR, model_spec.model_name)
+    )
+    meta_path = os.path.join(cache_dir, "__valid_download")
+    revisions = name_to_revisions_mapping[model_spec.model_name]
+    if model_spec.model_revision not in revisions:  # Usually for UT
+        revisions.append(model_spec.model_revision)
+    return any([valid_model_revision(meta_path, revision) for revision in revisions])
 def is_valid_model_name(model_name: str) -> bool:
     import re

xinference/web/ui/build/asset-manifest.json CHANGED Viewed

@@ -1,11 +1,11 @@
 {
   "files": {
-    "main.js": "./static/js/main.8126d441.js",
+    "main.js": "./static/js/main.778615cc.js",
     "static/media/icon.webp": "./static/media/icon.4603d52c63041e5dfbfd.webp",
     "index.html": "./index.html",
-    "main.8126d441.js.map": "./static/js/main.8126d441.js.map"
+    "main.778615cc.js.map": "./static/js/main.778615cc.js.map"
   },
   "entrypoints": [
-    "static/js/main.8126d441.js"
+    "static/js/main.778615cc.js"
   ]
 }

xinference/web/ui/build/index.html CHANGED Viewed

	@@ -1 +1 @@
1	- <!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.svg"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="Web site created using create-react-app"/><link rel="apple-touch-icon" href="./logo192.png"/><link rel="manifest" href="./manifest.json"/><title>Xinference</title><script defer="defer" src="./static/js/main.~~8126d441~~.js"></script></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>
1	+ <!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.svg"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="Web site created using create-react-app"/><link rel="apple-touch-icon" href="./logo192.png"/><link rel="manifest" href="./manifest.json"/><title>Xinference</title><script defer="defer" src="./static/js/main.778615cc.js"></script></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>

xinference 0.7.0__py3-none-any.whl → 0.7.1__py3-none-any.whl

Potentially problematic release.

xinference 0.7.0py3-none-any.whl → 0.7.1py3-none-any.whl