PyPI - xinference - Versions diffs - 0.6.5__py3-none-any.whl → 0.7.1__py3-none-any.whl - Mend

xinference 0.6.5py3-none-any.whl → 0.7.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (243) hide show

xinference/_version.py CHANGED Viewed

@@ -8,11 +8,11 @@ import json
 version_json = '''
 {
- "date": "2023-12-01T18:35:32+0800",
+ "date": "2023-12-12T19:35:36+0800",
  "dirty": false,
  "error": null,
- "full-revisionid": "909a428f4762a5e397123a42b8d2abff02eada83",
- "version": "0.6.5"
+ "full-revisionid": "91f5f13c3914e1943977c80281ce485e8e3502cf",
+ "version": "0.7.1"
 }
 '''  # END VERSION_JSON

xinference/api/restful_api.py CHANGED Viewed

@@ -158,6 +158,9 @@ class RESTfulAPI:
         self._router.add_api_route(
             "/v1/models/prompts", self._get_builtin_prompts, methods=["GET"]
         )
+        self._router.add_api_route(
+            "/v1/cluster/devices", self._get_devices_count, methods=["GET"]
+        )
         self._router.add_api_route(
             "/v1/models/{model_uid}", self.describe_model, methods=["GET"]
         )
@@ -255,9 +258,6 @@ class RESTfulAPI:
                 f"{pprint.pformat(invalid_routes)}"
             )
-        for tp in [CreateChatCompletion, CreateCompletion]:
-            logger.debug("Dump request model fields:\n%s", tp.__fields__)
         class SPAStaticFiles(StaticFiles):
             async def get_response(self, path: str, scope):
                 response = await super().get_response(path, scope)
@@ -310,6 +310,17 @@ class RESTfulAPI:
             logger.error(e, exc_info=True)
             raise HTTPException(status_code=500, detail=str(e))
+    async def _get_devices_count(self) -> JSONResponse:
+        """
+        For internal usage
+        """
+        try:
+            data = await (await self._get_supervisor_ref()).get_devices_count()
+            return JSONResponse(content=data)
+        except Exception as e:
+            logger.error(e, exc_info=True)
+            raise HTTPException(status_code=500, detail=str(e))
     async def get_status(self) -> JSONResponse:
         try:
             data = await (await self._get_supervisor_ref()).get_status()
@@ -707,7 +718,10 @@ class RESTfulAPI:
         if (
             not body.messages
-            or body.messages[-1].get("role") != "user"
+            or (
+                body.messages[-1].get("role") != "user"
+                and body.messages[-1].get("role") != "system"
+            )
             or not body.messages[-1].get("content")
         ):
             raise HTTPException(

xinference/core/model.py CHANGED Viewed

@@ -14,6 +14,7 @@
 import asyncio
 import inspect
+import os
 import uuid
 from typing import (
     TYPE_CHECKING,
@@ -44,6 +45,15 @@ from .utils import json_dumps, log_async
 T = TypeVar("T")
+try:
+    from torch.cuda import OutOfMemoryError
+except ImportError:
+    class _OutOfMemoryError(Exception):
+        pass
+    OutOfMemoryError = _OutOfMemoryError
 def request_limit(fn):
     """
@@ -192,18 +202,30 @@ class ModelActor(xo.StatelessActor):
             return ret
     async def _call_wrapper(self, _wrapper: Callable):
-        assert not (
-            inspect.iscoroutinefunction(_wrapper)
-            or inspect.isasyncgenfunction(_wrapper)
-        )
-        if self._lock is None:
-            return await asyncio.to_thread(_wrapper)
-        else:
-            async with self._lock:
+        try:
+            assert not (
+                inspect.iscoroutinefunction(_wrapper)
+                or inspect.isasyncgenfunction(_wrapper)
+            )
+            if self._lock is None:
                 return await asyncio.to_thread(_wrapper)
+            else:
+                async with self._lock:
+                    return await asyncio.to_thread(_wrapper)
+        except OutOfMemoryError:
+            logger.exception(
+                "Model actor is out of memory, model id: %s", self.model_uid()
+            )
+            os._exit(1)
     async def _call_async_wrapper(self, _wrapper: Callable):
-        return await asyncio.create_task(_wrapper())
+        try:
+            return await asyncio.create_task(_wrapper())
+        except OutOfMemoryError:
+            logger.exception(
+                "Model actor is out of memory, model id: %s", self.model_uid()
+            )
+            os._exit(1)
     @log_async(logger=logger)
     @request_limit
@@ -365,7 +387,8 @@ class ModelActor(xo.StatelessActor):
         async def _async_wrapper():
             try:
-                return await anext(gen)  # noqa: F821
+                # anext is only available for Python >= 3.10
+                return await gen.__anext__()  # noqa: F821
             except StopAsyncIteration:
                 return stop

xinference/core/supervisor.py CHANGED Viewed

@@ -106,6 +106,16 @@ class SupervisorActor(xo.StatelessActor):
             data[k] = v.dict()
         return data
+    async def get_devices_count(self) -> int:
+        from ..utils import cuda_count
+        if self.is_local_deployment():
+            return cuda_count()
+        # distributed deployment, choose a worker and return its cuda_count.
+        # Assume that each worker has the same count of cards.
+        worker_ref = await self._choose_worker()
+        return await worker_ref.get_devices_count()
     async def _choose_worker(self) -> xo.ActorRefType["WorkerActor"]:
         # TODO: better allocation strategy.
         min_running_model_count = None

xinference/core/worker.py CHANGED Viewed

@@ -48,6 +48,7 @@ class WorkerActor(xo.StatelessActor):
         self._supervisor_address = supervisor_address
         self._supervisor_ref = None
         self._main_pool = main_pool
+        self._main_pool.recover_sub_pool = self.recover_sub_pool
         # internal states.
         self._model_uid_to_model: Dict[str, xo.ActorRefType["ModelActor"]] = {}
@@ -55,9 +56,22 @@ class WorkerActor(xo.StatelessActor):
         self._gpu_to_model_uid: Dict[int, str] = {}
         self._gpu_to_embedding_model_uids: Dict[int, Set[str]] = defaultdict(set)
         self._model_uid_to_addr: Dict[str, str] = {}
+        self._model_uid_to_launch_args: Dict[str, Dict] = {}
         self._lock = asyncio.Lock()
+    async def recover_sub_pool(self, address):
+        logger.warning("Process %s is down, create model.", address)
+        for model_uid, addr in self._model_uid_to_addr.items():
+            if addr == address:
+                launch_args = self._model_uid_to_launch_args.get(model_uid)
+                try:
+                    await self.terminate_model(model_uid)
+                except Exception:
+                    pass
+                await self.launch_builtin_model(**launch_args)
+                break
     @classmethod
     def uid(cls) -> str:
         return "worker"
@@ -94,6 +108,12 @@ class WorkerActor(xo.StatelessActor):
     async def __pre_destroy__(self):
         self._upload_task.cancel()
+    @staticmethod
+    def get_devices_count():
+        from ..utils import cuda_count
+        return cuda_count()
     @log_sync(logger=logger)
     def get_model_count(self) -> int:
         return len(self._model_uid_to_model)
@@ -175,7 +195,7 @@ class WorkerActor(xo.StatelessActor):
             gpu_cnt = n_gpu if isinstance(n_gpu, int) else 1
             devices = (
                 [await self.allocate_devices_for_embedding(model_uid)]
-                if model_type == "embedding"
+                if model_type in ["embedding", "rerank"]
                 else self.allocate_devices(model_uid=model_uid, n_gpu=gpu_cnt)
             )
             env["CUDA_VISIBLE_DEVICES"] = ",".join([str(dev) for dev in devices])
@@ -278,7 +298,6 @@ class WorkerActor(xo.StatelessActor):
         for dev in devices:
             self._gpu_to_model_uid[int(dev)] = model_uid
         self._model_uid_to_addr[model_uid] = subpool_address
-        return model_ref
     @log_async(logger=logger)
     async def launch_builtin_model(
@@ -292,7 +311,9 @@ class WorkerActor(xo.StatelessActor):
         n_gpu: Optional[Union[int, str]] = "auto",
         request_limits: Optional[int] = None,
         **kwargs,
-    ) -> xo.ActorRefType["ModelActor"]:
+    ):
+        launch_args = locals()
+        launch_args.pop("self")
         if n_gpu is not None:
             if isinstance(n_gpu, int) and (n_gpu <= 0 or n_gpu > cuda_count()):
                 raise ValueError(
@@ -343,7 +364,7 @@ class WorkerActor(xo.StatelessActor):
         self._model_uid_to_model[model_uid] = model_ref
         self._model_uid_to_model_spec[model_uid] = model_description
         self._model_uid_to_addr[model_uid] = subpool_address
-        return model_ref
+        self._model_uid_to_launch_args[model_uid] = launch_args
     @log_async(logger=logger)
     async def terminate_model(self, model_uid: str):
@@ -351,15 +372,21 @@ class WorkerActor(xo.StatelessActor):
         if model_ref is None:
             raise ValueError(f"Model not found in the model list, uid: {model_uid}")
-        await xo.destroy_actor(model_ref)
-        del self._model_uid_to_model[model_uid]
-        del self._model_uid_to_model_spec[model_uid]
-        self.release_devices(model_uid)
-        subpool_address = self._model_uid_to_addr[model_uid]
-        await self._main_pool.remove_sub_pool(subpool_address)
-        del self._model_uid_to_addr[model_uid]
+        try:
+            await xo.destroy_actor(model_ref)
+        except Exception as e:
+            logger.debug(
+                "Destroy model actor failed, model uid: %s, error: %s", model_uid, e
+            )
+        try:
+            subpool_address = self._model_uid_to_addr[model_uid]
+            await self._main_pool.remove_sub_pool(subpool_address)
+        finally:
+            del self._model_uid_to_model[model_uid]
+            del self._model_uid_to_model_spec[model_uid]
+            self.release_devices(model_uid)
+            del self._model_uid_to_addr[model_uid]
+            del self._model_uid_to_launch_args[model_uid]
     @log_async(logger=logger)
     async def list_models(self) -> Dict[str, Dict[str, Any]]:

xinference/deploy/utils.py CHANGED Viewed

@@ -60,7 +60,9 @@ def get_config_dict(
         "disable_existing_loggers": False,
         "formatters": {
             "formatter": {
-                "format": "%(asctime)s %(name)-12s %(process)d %(levelname)-8s %(message)s"
+                "format": (
+                    "%(asctime)s %(name)-12s %(process)d %(levelname)-8s %(message)s"
+                )
             },
         },
         "filters": {
@@ -110,6 +112,7 @@ async def create_worker_actor_pool(
     return await xo.create_actor_pool(
         address=address,
         n_process=0,
+        auto_recover="process",
         subprocess_start_method=subprocess_start_method,
         logging_conf={"dict": logging_conf},
     )

xinference/model/embedding/__init__.py CHANGED Viewed

@@ -16,7 +16,7 @@ import codecs
 import json
 import os
-from .core import EmbeddingModelSpec, get_cache_status
+from .core import MODEL_NAME_TO_REVISION, EmbeddingModelSpec, get_cache_status
 from .custom import CustomEmbeddingModelSpec, register_embedding, unregister_embedding
 _model_spec_json = os.path.join(os.path.dirname(__file__), "model_spec.json")
@@ -27,12 +27,16 @@ BUILTIN_EMBEDDING_MODELS = dict(
     (spec["model_name"], EmbeddingModelSpec(**spec))
     for spec in json.load(codecs.open(_model_spec_json, "r", encoding="utf-8"))
 )
+for model_name, model_spec in BUILTIN_EMBEDDING_MODELS.items():
+    MODEL_NAME_TO_REVISION[model_name].append(model_spec.model_revision)
 MODELSCOPE_EMBEDDING_MODELS = dict(
     (spec["model_name"], EmbeddingModelSpec(**spec))
     for spec in json.load(
         codecs.open(_model_spec_modelscope_json, "r", encoding="utf-8")
     )
 )
+for model_name, model_spec in MODELSCOPE_EMBEDDING_MODELS.items():
+    MODEL_NAME_TO_REVISION[model_name].append(model_spec.model_revision)
 from ...constants import XINFERENCE_MODEL_DIR

xinference/model/embedding/core.py CHANGED Viewed

@@ -15,7 +15,8 @@
 import logging
 import os
 import shutil
-from typing import List, Optional, Tuple, Union, no_type_check
+from collections import defaultdict
+from typing import Dict, List, Optional, Tuple, Union, no_type_check
 import numpy as np
 from pydantic import BaseModel
@@ -23,11 +24,14 @@ from pydantic import BaseModel
 from ...constants import XINFERENCE_CACHE_DIR
 from ...types import Embedding, EmbeddingData, EmbeddingUsage
 from ..core import ModelDescription
-from ..utils import valid_model_revision
+from ..utils import is_model_cached, valid_model_revision
 logger = logging.getLogger(__name__)
 SUPPORTED_SCHEMES = ["s3"]
+# Used for check whether the model is cached.
+# Init when registering all the builtin models.
+MODEL_NAME_TO_REVISION: Dict[str, List[str]] = defaultdict(list)
 class EmbeddingModelSpec(BaseModel):
@@ -195,11 +199,7 @@ def cache(model_spec: EmbeddingModelSpec):
 def get_cache_status(
     model_spec: EmbeddingModelSpec,
 ) -> bool:
-    cache_dir = os.path.realpath(
-        os.path.join(XINFERENCE_CACHE_DIR, model_spec.model_name)
-    )
-    meta_path = os.path.join(cache_dir, "__valid_download")
-    return valid_model_revision(meta_path, model_spec.model_revision)
+    return is_model_cached(model_spec, MODEL_NAME_TO_REVISION)
 class EmbeddingModel:
@@ -220,6 +220,9 @@ class EmbeddingModel:
             ]
             raise ImportError(f"{error_message}\n\n{''.join(installation_guide)}")
+        from ..utils import patch_trust_remote_code
+        patch_trust_remote_code()
         self._model = SentenceTransformer(self._model_path, device=self._device)
     def create_embedding(self, sentences: Union[str, List[str]], **kwargs):

xinference/model/llm/ggml/chatglm.py CHANGED Viewed

@@ -134,9 +134,9 @@ class ChatglmCppChatModel(LLM):
                     {
                         "index": 0,
                         "delta": {
-                            "content": token
-                            if isinstance(token, str)
-                            else token.content,
+                            "content": (
+                                token if isinstance(token, str) else token.content
+                            ),
                         },
                         "finish_reason": None,
                     }
@@ -223,8 +223,10 @@ class ChatglmCppChatModel(LLM):
             chatglm_tools.append(elem["function"])
         return {
             "role": "system",
-            "content": f"Answer the following questions as best as you can. You have access to the following tools:\n"
-            f"{json.dumps(chatglm_tools, indent=4, ensure_ascii=False)}",
+            "content": (
+                f"Answer the following questions as best as you can. You have access to the following tools:\n"
+                f"{json.dumps(chatglm_tools, indent=4, ensure_ascii=False)}"
+            ),
         }
     def chat(

xinference/model/llm/llm_family.py CHANGED Viewed

@@ -588,31 +588,57 @@ def cache_from_huggingface(
     return cache_dir
+def _check_revision(
+    llm_family: LLMFamilyV1,
+    llm_spec: "LLMSpecV1",
+    builtin: list,
+    meta_path: str,
+) -> bool:
+    for family in builtin:
+        if llm_family.model_name == family.model_name:
+            specs = family.model_specs
+            for spec in specs:
+                if (
+                    spec.model_format == "pytorch"
+                    and spec.model_size_in_billions == llm_spec.model_size_in_billions
+                ):
+                    return valid_model_revision(meta_path, spec.model_revision)
+    return False
 def get_cache_status(
     llm_family: LLMFamilyV1,
     llm_spec: "LLMSpecV1",
 ) -> Union[bool, List[bool]]:
+    """
+    When calling this function from above, `llm_family` is constructed only from BUILTIN_LLM_FAMILIES,
+    so we should check both huggingface and modelscope cache files.
+    """
     cache_dir = _get_cache_dir(llm_family, llm_spec, create_if_not_exist=False)
+    # check revision for pytorch model
     if llm_spec.model_format == "pytorch":
-        return _skip_download(
-            cache_dir,
-            llm_spec.model_format,
-            llm_spec.model_hub,
-            llm_spec.model_revision,
-            "none",
-        )
+        hf_meta_path = _get_meta_path(cache_dir, "pytorch", "huggingface", "none")
+        ms_meta_path = _get_meta_path(cache_dir, "pytorch", "modelscope", "none")
+        revisions = [
+            _check_revision(llm_family, llm_spec, BUILTIN_LLM_FAMILIES, hf_meta_path),
+            _check_revision(
+                llm_family, llm_spec, BUILTIN_MODELSCOPE_LLM_FAMILIES, ms_meta_path
+            ),
+        ]
+        return any(revisions)
+    # just check meta file for ggml and gptq model
     elif llm_spec.model_format in ["ggmlv3", "ggufv2", "gptq"]:
         ret = []
         for q in llm_spec.quantizations:
-            ret.append(
-                _skip_download(
-                    cache_dir,
-                    llm_spec.model_format,
-                    llm_spec.model_hub,
-                    llm_spec.model_revision,
-                    q,
-                )
+            assert q is not None
+            hf_meta_path = _get_meta_path(
+                cache_dir, llm_spec.model_format, "huggingface", q
+            )
+            ms_meta_path = _get_meta_path(
+                cache_dir, llm_spec.model_format, "modelscope", q
             )
+            results = [os.path.exists(hf_meta_path), os.path.exists(ms_meta_path)]
+            ret.append(any(results))
         return ret
     else:
         raise ValueError(f"Unsupported model format: {llm_spec.model_format}")

xinference/model/llm/pytorch/core.py CHANGED Viewed

@@ -345,6 +345,7 @@ class PytorchModel(LLM):
             inputs = input
         tokenizer = self._tokenizer
+        tokenizer.pad_token = tokenizer.eos_token
         is_llama = "llama" in str(type(self._model))  # llama supports batch inference
         is_chatglm = "chatglm" in str(type(self._model))
         if is_llama:

xinference/model/llm/pytorch/utils.py CHANGED Viewed

@@ -259,6 +259,7 @@ def generate_stream(
                     raise ValueError("Invalid stop field type.")
             if stream:
+                output = output.strip("�")
                 tmp_output_length = len(output)
                 output = output[last_output_length:]
                 last_output_length = tmp_output_length
@@ -424,6 +425,7 @@ def generate_stream_falcon(
                     raise ValueError("Invalid stop field type.")
             if stream:
+                output = output.strip("�")
                 tmp_output_length = len(output)
                 output = output[last_output_length:]
                 last_output_length = tmp_output_length
@@ -552,6 +554,7 @@ def generate_stream_chatglm(
         response = process_response(response)
         if stream:
+            response = response.strip("�")
             tmp_response_length = len(response)
             response = response[last_response_length:]
             last_response_length = tmp_response_length

xinference/model/llm/vllm/core.py CHANGED Viewed

@@ -79,6 +79,10 @@ VLLM_SUPPORTED_CHAT_MODELS = [
     "internlm-chat-20b",
     "qwen-chat",
     "Yi",
+    "Yi-chat",
+    "code-llama",
+    "code-llama-python",
+    "code-llama-instruct",
     "mistral-instruct-v0.1",
     "chatglm3",
 ]
@@ -319,7 +323,9 @@ class VLLMChatModel(VLLMModel, ChatModelMixin):
         if not generate_config:
             generate_config = {}
         if self.model_family.prompt_style:
-            if (not generate_config["stop"]) and self.model_family.prompt_style.stop:
+            if (
+                not generate_config.get("stop")
+            ) and self.model_family.prompt_style.stop:
                 generate_config["stop"] = self.model_family.prompt_style.stop.copy()
             if self.model_family.prompt_style.stop_token_ids:
                 generate_config.setdefault(
@@ -343,7 +349,7 @@ class VLLMChatModel(VLLMModel, ChatModelMixin):
         full_prompt = self.get_prompt(prompt, chat_history, prompt_style)
         sanitized = self._sanitize_chat_config(generate_config)
-        stream = sanitized["stream"]
+        stream = sanitized.get("stream", None)
         if stream:
             agen = await self.async_generate(full_prompt, sanitized)

xinference/model/rerank/__init__.py CHANGED Viewed

@@ -16,7 +16,7 @@ import codecs
 import json
 import os
-from .core import RerankModelSpec, get_cache_status
+from .core import MODEL_NAME_TO_REVISION, RerankModelSpec, get_cache_status
 _model_spec_json = os.path.join(os.path.dirname(__file__), "model_spec.json")
 _model_spec_modelscope_json = os.path.join(
@@ -26,11 +26,15 @@ BUILTIN_RERANK_MODELS = dict(
     (spec["model_name"], RerankModelSpec(**spec))
     for spec in json.load(codecs.open(_model_spec_json, "r", encoding="utf-8"))
 )
+for model_name, model_spec in BUILTIN_RERANK_MODELS.items():
+    MODEL_NAME_TO_REVISION[model_name].append(model_spec.model_revision)
 MODELSCOPE_RERANK_MODELS = dict(
     (spec["model_name"], RerankModelSpec(**spec))
     for spec in json.load(
         codecs.open(_model_spec_modelscope_json, "r", encoding="utf-8")
     )
 )
+for model_name, model_spec in MODELSCOPE_RERANK_MODELS.items():
+    MODEL_NAME_TO_REVISION[model_name].append(model_spec.model_revision)
 del _model_spec_json
 del _model_spec_modelscope_json

xinference/model/rerank/core.py CHANGED Viewed

@@ -15,6 +15,7 @@
 import logging
 import os
 import uuid
+from collections import defaultdict
 from typing import Dict, List, Optional, Tuple
 import numpy as np
@@ -23,10 +24,14 @@ from pydantic import BaseModel
 from ...constants import XINFERENCE_CACHE_DIR
 from ...types import Document, DocumentObj, Rerank
 from ..core import ModelDescription
-from ..utils import valid_model_revision
+from ..utils import is_model_cached, valid_model_revision
 logger = logging.getLogger(__name__)
+# Used for check whether the model is cached.
+# Init when registering all the builtin models.
+MODEL_NAME_TO_REVISION: Dict[str, List[str]] = defaultdict(list)
 class RerankModelSpec(BaseModel):
     model_name: str
@@ -126,11 +131,7 @@ class RerankModel:
 def get_cache_status(
     model_spec: RerankModelSpec,
 ) -> bool:
-    cache_dir = os.path.realpath(
-        os.path.join(XINFERENCE_CACHE_DIR, model_spec.model_name)
-    )
-    meta_path = os.path.join(cache_dir, "__valid_download")
-    return valid_model_revision(meta_path, model_spec.model_revision)
+    return is_model_cached(model_spec, MODEL_NAME_TO_REVISION)
 def cache(model_spec: RerankModelSpec):

xinference/model/utils.py CHANGED Viewed

@@ -16,11 +16,11 @@ import logging
 import os
 from json import JSONDecodeError
 from pathlib import Path
-from typing import Callable, Dict, Optional, Tuple
+from typing import Any, Callable, Dict, Optional, Tuple
 from fsspec import AbstractFileSystem
-from ..constants import XINFERENCE_ENV_MODEL_SRC
+from ..constants import XINFERENCE_CACHE_DIR, XINFERENCE_ENV_MODEL_SRC
 logger = logging.getLogger(__name__)
 MAX_ATTEMPTS = 3
@@ -132,6 +132,17 @@ def valid_model_revision(
         return real_revision == expected_model_revision
+def is_model_cached(model_spec: Any, name_to_revisions_mapping: Dict):
+    cache_dir = os.path.realpath(
+        os.path.join(XINFERENCE_CACHE_DIR, model_spec.model_name)
+    )
+    meta_path = os.path.join(cache_dir, "__valid_download")
+    revisions = name_to_revisions_mapping[model_spec.model_name]
+    if model_spec.model_revision not in revisions:  # Usually for UT
+        revisions.append(model_spec.model_revision)
+    return any([valid_model_revision(meta_path, revision) for revision in revisions])
 def is_valid_model_name(model_name: str) -> bool:
     import re
@@ -211,3 +222,28 @@ def copy_from_src_to_dst(
             )
             if attempt + 1 == max_attempt:
                 raise
+def patch_trust_remote_code():
+    """sentence-transformers calls transformers without the trust_remote_code=True, some embedding
+    models will fail to load, e.g. jina-embeddings-v2-base-en
+    :return:
+    """
+    try:
+        from transformers.dynamic_module_utils import resolve_trust_remote_code
+    except ImportError:
+        logger.error("Patch transformers trust_remote_code failed.")
+    else:
+        def _patched_resolve_trust_remote_code(*args, **kwargs):
+            logger.info("Patched resolve_trust_remote_code: %s %s", args, kwargs)
+            return True
+        if (
+            resolve_trust_remote_code.__code__
+            != _patched_resolve_trust_remote_code.__code__
+        ):
+            resolve_trust_remote_code.__code__ = (
+                _patched_resolve_trust_remote_code.__code__
+            )

xinference/types.py CHANGED Viewed

@@ -289,7 +289,7 @@ def get_pydantic_model_from_method(
         model.__fields__.pop(key)
     if exclude_fields is not None:
         for key in exclude_fields:
-            model.__fields__.pop(key)
+            model.__fields__.pop(key, None)
     if include_fields is not None:
         dummy_model = create_model("DummyModel", **include_fields)
         model.__fields__.update(dummy_model.__fields__)
@@ -307,10 +307,10 @@ def fix_forward_ref(model):
         if isinstance(field.annotation, ForwardRef):
             exclude_fields.append(key)
             include_fields[key] = (Optional[Any], None)
-    if exclude_fields is not None:
+    if exclude_fields:
         for key in exclude_fields:
-            model.__fields__.pop(key)
-    if include_fields is not None:
+            model.__fields__.pop(key, None)
+    if include_fields:
         dummy_model = create_model("DummyModel", **include_fields)
         model.__fields__.update(dummy_model.__fields__)
     return model

xinference 0.6.5__py3-none-any.whl → 0.7.1__py3-none-any.whl

Potentially problematic release.

xinference 0.6.5py3-none-any.whl → 0.7.1py3-none-any.whl