PyPI - xinference - Versions diffs - 0.14.1__py3-none-any.whl → 0.14.2__py3-none-any.whl - Mend

xinference 0.14.1py3-none-any.whl → 0.14.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (87) hide show

xinference/model/llm/{pytorch → transformers}/minicpmv26.py RENAMED Viewed

@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import base64
-import json
 import logging
 import time
 import uuid
@@ -124,29 +123,60 @@ class MiniCPMV26Model(PytorchChatModel):
                 else:
                     return Image.open(BytesIO(response.content)).convert("RGB")
+        MAX_NUM_FRAMES = 64
+        def encode_video(video_path):
+            from decord import VideoReader, cpu
+            def uniform_sample(l, n):
+                gap = len(l) / n
+                idxs = [int(i * gap + gap / 2) for i in range(n)]
+                return [l[i] for i in idxs]
+            vr = VideoReader(video_path, ctx=cpu(0))
+            sample_fps = round(vr.get_avg_fps() / 1)  # FPS
+            frame_idx = [i for i in range(0, len(vr), sample_fps)]
+            if len(frame_idx) > MAX_NUM_FRAMES:
+                frame_idx = uniform_sample(frame_idx, MAX_NUM_FRAMES)
+            frames = vr.get_batch(frame_idx).asnumpy()
+            frames = [Image.fromarray(v.astype("uint8")) for v in frames]
+            print("num frames:", len(frames))
+            return frames
+        def _load_video(_url):
+            frames = None
+            if _url.startswith("data:"):
+                raise RuntimeError("Only video url format is supported")
+            else:
+                frames = encode_video(_url)
+            return frames
         if not isinstance(content, str):
             texts = []
             image_urls = []
+            video_urls = []
             for c in content:
                 c_type = c.get("type")
                 if c_type == "text":
                     texts.append(c["text"])
                 elif c_type == "image_url":
                     image_urls.append(c["image_url"]["url"])
+                elif c_type == "video_url":
+                    video_urls.append(c["video_url"]["url"])
             image_futures = []
             with ThreadPoolExecutor() as executor:
                 for image_url in image_urls:
                     fut = executor.submit(_load_image, image_url)
                     image_futures.append(fut)
             images = [fut.result() for fut in image_futures]
+            frames = []
+            if len(video_urls) > 1:
+                raise RuntimeError("Only one video per message is supported")
+            for v in video_urls:
+                frames = _load_video(v)
             text = " ".join(texts)
-            if len(images) == 0:
-                return text, []
-            elif len(images) == 1:
-                return text, images
-            else:
-                raise RuntimeError("Only one image per message is supported")
-        return content, []
+            return text, images, frames
+        return content, [], []
     def chat(
         self,
@@ -156,36 +186,51 @@ class MiniCPMV26Model(PytorchChatModel):
         generate_config: Optional[PytorchGenerateConfig] = None,
     ) -> Union[ChatCompletion, Iterator[ChatCompletionChunk]]:
         stream = generate_config.get("stream", False) if generate_config else False
-        content, images_chat = self._message_content_to_chat(prompt)
+        videoExisted = False
+        content, images_chat, video_frames = self._message_content_to_chat(prompt)
+        if len(video_frames) > 0:
+            videoExisted = True
+            images_chat = video_frames
         msgs = []
         query_to_response: List[Dict] = []
-        images_history = []
         for h in chat_history or []:
+            images_history = []
             role = h["role"]
-            content_h, images_tmp = self._message_content_to_chat(h["content"])
+            content_h, images_tmp, video_frames_h = self._message_content_to_chat(
+                h["content"]
+            )
             if images_tmp != []:
                 images_history = images_tmp
+            if len(video_frames_h) > 0:
+                videoExisted = True
+                images_history = video_frames_h
             if len(query_to_response) == 0 and role == "user":
-                query_to_response.append({"role": "user", "content": content_h})
+                query_to_response.append(
+                    {"role": "user", "content": images_history + [content_h]}
+                )
             if len(query_to_response) == 1 and role == "assistant":
-                query_to_response.append({"role": "assistant", "content": content_h})
+                query_to_response.append(
+                    {"role": "assistant", "content": images_history + [content_h]}
+                )
             if len(query_to_response) == 2:
                 msgs.extend(query_to_response)
                 query_to_response = []
-        image = None
-        if len(images_chat) > 0:
-            image = images_chat[0]
-        elif len(images_history) > 0:
-            image = images_history[0]
-        msgs.append({"role": "user", "content": content})
+        msgs.append({"role": "user", "content": images_chat + [content]})
+        # Set decode params for video
+        params = {}
+        if videoExisted:
+            params = {"use_image_id": False, "max_slice_nums": 1}
         chat = self._model.chat(
-            image=image,
-            msgs=json.dumps(msgs, ensure_ascii=True),
+            image=None,
+            msgs=msgs,
             tokenizer=self._tokenizer,
             sampling=True,
-            **generate_config
+            **generate_config,
+            **params,
         )
         if stream:
             it = self.chat_stream(chat)

xinference/model/llm/{pytorch → transformers}/utils.py RENAMED Viewed

@@ -40,8 +40,7 @@ from ....types import (
 )
 if TYPE_CHECKING:
-    from ...llm.pytorch.core import PytorchModel
+    from ...llm.transformers.core import PytorchModel
 logger = logging.getLogger(__name__)

xinference/model/llm/utils.py CHANGED Viewed

@@ -11,14 +11,19 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import base64
 import functools
 import json
 import logging
 import os
 import time
 import uuid
+from io import BytesIO
 from typing import AsyncGenerator, Dict, Iterator, List, Optional, Tuple, cast
+import requests
+from PIL import Image
 from ...types import (
     SPECIAL_TOOL_PROMPT,
     ChatCompletion,
@@ -28,7 +33,7 @@ from ...types import (
     CompletionChunk,
 )
 from .llm_family import (
-    GgmlLLMSpecV1,
+    LlamaCppLLMSpecV1,
     LLMFamilyV1,
     LLMSpecV1,
     PromptStyleV1,
@@ -60,7 +65,7 @@ class ChatModelMixin:
         chat_history: List[ChatCompletionMessage],
         prompt_style: PromptStyleV1,
         tools: Optional[List[Dict]] = None,
-    ) -> str:
+    ):
         """
         Inspired by FastChat. Format chat history into a prompt according to the prompty style of
         different models.
@@ -92,17 +97,6 @@ class ChatModelMixin:
                 else:
                     ret += role + ":"
             return ret
-        elif prompt_style.style_name == "ADD_COLON_TWO":
-            seps = [prompt_style.intra_message_sep, prompt_style.inter_message_sep]
-            ret = prompt_style.system_prompt + seps[0]
-            for i, message in enumerate(chat_history):
-                role = get_role(message["role"])
-                content = message["content"]
-                if content:
-                    ret += role + ": " + content + seps[i % 2]
-                else:
-                    ret += role + ":"
-            return ret
         elif prompt_style.style_name == "NO_COLON_TWO":
             seps = [prompt_style.intra_message_sep, prompt_style.inter_message_sep]
             ret = prompt_style.system_prompt
@@ -144,21 +138,6 @@ class ChatModelMixin:
                 else:
                     ret += f"<|start_header_id|>{role}<|end_header_id|>{prompt_style.intra_message_sep}"
             return ret
-        elif prompt_style.style_name == "FALCON":
-            ret = prompt_style.system_prompt
-            for message in chat_history:
-                role = get_role(message["role"])
-                content = message["content"]
-                if content:
-                    ret += (
-                        role
-                        + ": "
-                        + content.replace("\r\n", "\n").replace("\n\n", "\n")
-                    )
-                    ret += "\n\n"
-                else:
-                    ret += role + ":"
-            return ret
         elif prompt_style.style_name == "MIXTRAL_V01":
             ret = ""
             for i, message in enumerate(chat_history):
@@ -168,22 +147,6 @@ class ChatModelMixin:
                 else:  # assistant
                     ret += f"{content} </s>"
             return ret
-        elif prompt_style.style_name == "CHATGLM":
-            round_add_n = 1 if prompt_style.intra_message_sep == "\n\n" else 0
-            if prompt_style.system_prompt:
-                ret = prompt_style.system_prompt + prompt_style.intra_message_sep
-            else:
-                ret = ""
-            for i, message in enumerate(chat_history):
-                role = get_role(message["role"])
-                content = message["content"]
-                if i % 2 == 0:
-                    ret += f"[Round {i // 2 + round_add_n}]{prompt_style.intra_message_sep}"
-                if content:
-                    ret += role + "：" + content + prompt_style.intra_message_sep
-                else:
-                    ret += role + "："
-            return ret
         elif prompt_style.style_name == "CHATGLM3":
             prompts = (
                 [f"<|system|>\n {prompt_style.system_prompt}"]
@@ -323,25 +286,6 @@ Begin!"""
                 else:
                     ret += role + "\n"
             return ret
-        elif prompt_style.style_name == "INTERNLM":
-            seps = [prompt_style.intra_message_sep, prompt_style.inter_message_sep]
-            ret = ""
-            for i, message in enumerate(chat_history[:-2]):
-                if i % 2 == 0:
-                    ret += "<s>"
-                role = get_role(message["role"])
-                content = message["content"]
-                ret += role + ":" + str(content) + seps[i % 2]
-            if len(ret) == 0:
-                ret += "<s>"
-            ret += (
-                chat_history[-2]["role"]
-                + ":"
-                + str(chat_history[-2]["content"])
-                + seps[0]
-            )
-            ret += chat_history[-1]["role"] + ":"
-            return ret
         elif prompt_style.style_name == "INTERNLM2":
             ret = (
                 "<s>"
@@ -370,9 +314,6 @@ Begin!"""
                 else:
                     ret += role + ": Let's think step by step."
             return ret
-        elif prompt_style.style_name == "INSTRUCTION":
-            message = chat_history[-2]
-            return prompt_style.system_prompt.format(message["content"])
         elif prompt_style.style_name == "DEEPSEEK_CHAT":
             seps = [prompt_style.intra_message_sep, prompt_style.inter_message_sep]
             ret = prompt_style.system_prompt
@@ -504,6 +445,52 @@ Begin!"""
                 else:
                     ret += role
             return ret
+        elif prompt_style.style_name == "INTERNVL":
+            ret = (
+                "<s>"
+                if prompt_style.system_prompt == ""
+                else "<s><|im_start|>system\n"
+                + prompt_style.system_prompt
+                + prompt_style.intra_message_sep
+                + "\n"
+            )
+            images = []  # type: ignore
+            for message in chat_history:
+                role = get_role(message["role"])
+                content = message["content"]
+                if isinstance(content, str):
+                    ret += role + "\n" + content + prompt_style.intra_message_sep + "\n"
+                elif isinstance(content, list):
+                    text = ""
+                    image_urls = []
+                    for c in content:
+                        c_type = c.get("type")
+                        if c_type == "text":
+                            text = c["text"]
+                        elif c_type == "image_url":
+                            image_urls.append(c["image_url"]["url"])
+                    image_futures = []
+                    from concurrent.futures import ThreadPoolExecutor
+                    with ThreadPoolExecutor() as executor:
+                        for image_url in image_urls:
+                            fut = executor.submit(_decode_image, image_url)
+                            image_futures.append(fut)
+                    images = [fut.result() for fut in image_futures]
+                    if len(image_futures) == 0:
+                        ret += (
+                            role + "\n" + text + prompt_style.intra_message_sep + "\n"
+                        )
+                    else:
+                        ret += (
+                            role
+                            + "\n"
+                            + f"<image>\n{text}"
+                            + prompt_style.intra_message_sep
+                            + "\n"
+                        )
+            return (ret, images)
         else:
             raise ValueError(f"Invalid prompt style: {prompt_style.style_name}")
@@ -706,7 +693,7 @@ Begin!"""
         family = model_family.model_family or model_family.model_name
         if family in ["gorilla-openfunctions-v1", "gorilla-openfunctions-v2"]:
             content, func, args = cls._eval_gorilla_openfunctions_arguments(c, tools)
-        elif family in ["chatglm3"] + GLM4_TOOL_CALL_FAMILY:
+        elif family in GLM4_TOOL_CALL_FAMILY:
             content, func, args = cls._eval_glm_chat_arguments(c, tools)
         elif family in QWEN_TOOL_CALL_FAMILY:
             content, func, args = cls._eval_qwen_chat_arguments(c, tools)
@@ -870,10 +857,10 @@ def get_file_location(
         is_cached = cache_status
     assert isinstance(is_cached, bool)
-    if spec.model_format in ["pytorch", "gptq", "awq", "mlx"]:
+    if spec.model_format in ["pytorch", "gptq", "awq", "fp8", "mlx"]:
         return cache_dir, is_cached
-    elif spec.model_format in ["ggmlv3", "ggufv2"]:
-        assert isinstance(spec, GgmlLLMSpecV1)
+    elif spec.model_format in ["ggufv2"]:
+        assert isinstance(spec, LlamaCppLLMSpecV1)
         filename = spec.model_file_name_template.format(quantization=quantization)
         model_path = os.path.join(cache_dir, filename)
         return model_path, is_cached
@@ -885,3 +872,22 @@ def get_model_version(
     llm_family: LLMFamilyV1, llm_spec: LLMSpecV1, quantization: str
 ) -> str:
     return f"{llm_family.model_name}--{llm_spec.model_size_in_billions}B--{llm_spec.model_format}--{quantization}"
+def _decode_image(_url):
+    if _url.startswith("data:"):
+        logging.info("Parse url by base64 decoder.")
+        # https://platform.openai.com/docs/guides/vision/uploading-base-64-encoded-images
+        # e.g. f"data:image/jpeg;base64,{base64_image}"
+        _type, data = _url.split(";")
+        _, ext = _type.split("/")
+        data = data[len("base64,") :]
+        data = base64.b64decode(data.encode("utf-8"))
+        return Image.open(BytesIO(data)).convert("RGB")
+    else:
+        try:
+            response = requests.get(_url)
+        except requests.exceptions.MissingSchema:
+            return Image.open(_url).convert("RGB")
+        else:
+            return Image.open(BytesIO(response.content)).convert("RGB")

xinference/model/llm/vllm/core.py CHANGED Viewed

@@ -21,6 +21,7 @@ import time
 import uuid
 from typing import (
     TYPE_CHECKING,
+    Any,
     AsyncGenerator,
     Dict,
     Iterable,
@@ -88,11 +89,12 @@ try:
 except ImportError:
     VLLM_INSTALLED = False
+VLLM_SUPPORTED_VISION_MODEL_LIST: List[str] = [
+    "internvl2",
+]
 VLLM_SUPPORTED_MODELS = [
     "llama-2",
     "llama-3",
-    "baichuan",
-    "internlm-16k",
     "mistral-v0.1",
     "codestral-v0.1",
     "Yi",
@@ -105,13 +107,7 @@ VLLM_SUPPORTED_MODELS = [
 VLLM_SUPPORTED_CHAT_MODELS = [
     "llama-2-chat",
     "llama-3-instruct",
-    "vicuna-v1.3",
-    "vicuna-v1.5",
-    "baichuan-chat",
     "baichuan-2-chat",
-    "internlm-chat-7b",
-    "internlm-chat-8k",
-    "internlm-chat-20b",
     "internlm2-chat",
     "internlm2.5-chat",
     "internlm2.5-chat-1m",
@@ -338,7 +334,7 @@ class VLLMModel(LLM):
             return False
         if not cls._is_linux():
             return False
-        if llm_spec.model_format not in ["pytorch", "gptq", "awq"]:
+        if llm_spec.model_format not in ["pytorch", "gptq", "awq", "fp8"]:
             return False
         if llm_spec.model_format == "pytorch":
             if quantization != "none" and not (quantization is None):
@@ -421,7 +417,7 @@ class VLLMModel(LLM):
     async def async_generate(
         self,
-        prompt: str,
+        prompt: Union[str, Dict[str, Any]],
         generate_config: Optional[Dict] = None,
         tools: object = False,
     ) -> Union[Completion, AsyncGenerator[CompletionChunk, None]]:
@@ -558,7 +554,7 @@ class VLLMChatModel(VLLMModel, ChatModelMixin):
     def match(
         cls, llm_family: "LLMFamilyV1", llm_spec: "LLMSpecV1", quantization: str
     ) -> bool:
-        if llm_spec.model_format not in ["pytorch", "gptq", "awq"]:
+        if llm_spec.model_format not in ["pytorch", "gptq", "awq", "fp8"]:
             return False
         if llm_spec.model_format == "pytorch":
             if quantization != "none" and not (quantization is None):
@@ -644,3 +640,106 @@ class VLLMChatModel(VLLMModel, ChatModelMixin):
                     self.model_family, self.model_uid, c, tools
                 )
             return self._to_chat_completion(c)
+class VLLMVisionModel(VLLMModel, ChatModelMixin):
+    def load(self):
+        try:
+            import vllm
+            from vllm.engine.arg_utils import AsyncEngineArgs
+            from vllm.engine.async_llm_engine import AsyncLLMEngine
+        except ImportError:
+            error_message = "Failed to import module 'vllm'"
+            installation_guide = [
+                "Please make sure 'vllm' is installed. ",
+                "You can install it by `pip install vllm`\n",
+            ]
+            raise ImportError(f"{error_message}\n\n{''.join(installation_guide)}")
+        if vllm.__version__ >= "0.3.1":
+            # from vllm v0.3.1, it uses cupy as NCCL backend
+            # in which cupy will fork a process
+            # only for xoscar >= 0.3.0, new process is allowed in subpool
+            # besides, xinference set start method as forkserver for unix
+            # we need to set it to fork to make cupy NCCL work
+            multiprocessing.set_start_method("fork", force=True)
+        self._model_config = self._sanitize_model_config(self._model_config)
+        logger.info(
+            f"Loading {self.model_uid} with following model config: {self._model_config}"
+        )
+        engine_args = AsyncEngineArgs(
+            model=self.model_path,
+            **self._model_config,
+        )
+        self._engine = AsyncLLMEngine.from_engine_args(engine_args)
+    @classmethod
+    def match(
+        cls, llm_family: "LLMFamilyV1", llm_spec: "LLMSpecV1", quantization: str
+    ) -> bool:
+        if llm_spec.model_format != "pytorch":
+            return False
+        if llm_spec.model_format == "pytorch":
+            if quantization != "none" and not (quantization is None):
+                return False
+        if isinstance(llm_family, CustomLLMFamilyV1):
+            if llm_family.model_family not in VLLM_SUPPORTED_VISION_MODEL_LIST:
+                return False
+        else:
+            if llm_family.model_name not in VLLM_SUPPORTED_VISION_MODEL_LIST:
+                return False
+        if "vision" not in llm_family.model_ability:
+            return False
+        return VLLM_INSTALLED
+    def _sanitize_chat_config(
+        self,
+        generate_config: Optional[Dict] = None,
+    ) -> Dict:
+        if not generate_config:
+            generate_config = {}
+        if self.model_family.prompt_style:
+            if self.model_family.prompt_style.stop_token_ids:
+                generate_config.setdefault(
+                    "stop_token_ids",
+                    self.model_family.prompt_style.stop_token_ids.copy(),
+                )
+        return generate_config
+    async def async_chat(
+        self,
+        prompt: str,
+        system_prompt: Optional[str] = None,
+        chat_history: Optional[List[ChatCompletionMessage]] = None,
+        generate_config: Optional[Dict] = None,
+    ) -> Union[ChatCompletion, AsyncGenerator[ChatCompletionChunk, None]]:
+        # only support single image, waiting vllm support multi images
+        assert self.model_family.prompt_style is not None
+        prompt_style = self.model_family.prompt_style.copy()
+        chat_history = chat_history or []
+        prompt, images = self.get_prompt(prompt, chat_history, prompt_style)
+        logger.info(f"messages:{prompt}")
+        if len(images) == 0:
+            inputs = {
+                "prompt": prompt,
+            }
+        else:
+            inputs = {
+                "prompt": prompt,
+                "multi_modal_data": {"image": images[-1]},  # type: ignore
+            }
+        generate_config = self._sanitize_chat_config(generate_config)
+        stream = generate_config.get("stream", None)
+        if stream:
+            agen = await self.async_generate(inputs, generate_config)
+            assert isinstance(agen, AsyncGenerator)
+            return self._async_to_chat_completion_chunks(agen)
+        else:
+            c = await self.async_generate(inputs, generate_config)
+            assert not isinstance(c, AsyncGenerator)
+            return self._to_chat_completion(c)

xinference/model/utils.py CHANGED Viewed

@@ -14,13 +14,11 @@
 import json
 import logging
 import os
-import shutil
 from json import JSONDecodeError
 from pathlib import Path
 from typing import Any, Callable, Dict, Optional, Tuple, Union
 import huggingface_hub
-from fsspec import AbstractFileSystem
 from ..constants import XINFERENCE_CACHE_DIR, XINFERENCE_ENV_MODEL_SRC
 from ..device_utils import get_available_device, is_device_available
@@ -220,12 +218,7 @@ def is_valid_model_uri(model_uri: Optional[str]) -> bool:
         return True
-def cache_from_uri(
-    model_spec: CacheableModelSpec,
-    self_hosted_storage: bool = False,
-) -> str:
-    from fsspec import AbstractFileSystem, filesystem
+def cache_from_uri(model_spec: CacheableModelSpec) -> str:
     cache_dir = os.path.realpath(
         os.path.join(XINFERENCE_CACHE_DIR, model_spec.model_name)
     )
@@ -247,48 +240,6 @@ def cache_from_uri(
         os.makedirs(XINFERENCE_CACHE_DIR, exist_ok=True)
         os.symlink(src_root, cache_dir, target_is_directory=True)
         return cache_dir
-    elif src_scheme in ["s3"]:
-        # use anonymous connection for self-hosted storage.
-        src_fs: AbstractFileSystem = filesystem(src_scheme, anon=self_hosted_storage)
-        local_fs: AbstractFileSystem = filesystem("file")
-        files_to_download = []
-        os.makedirs(cache_dir, exist_ok=True)
-        for path, _, files in src_fs.walk(model_spec.model_uri):
-            for file in files:
-                src_path = f"{path}/{file}"
-                local_path = src_path.replace(src_root, cache_dir)
-                files_to_download.append((src_path, local_path))
-        from concurrent.futures import ThreadPoolExecutor
-        failed = False
-        with ThreadPoolExecutor(max_workers=min(len(files_to_download), 4)) as executor:
-            futures = [
-                (
-                    src_path,
-                    executor.submit(
-                        copy_from_src_to_dst, src_fs, src_path, local_fs, local_path
-                    ),
-                )
-                for src_path, local_path in files_to_download
-            ]
-            for src_path, future in futures:
-                if failed:
-                    future.cancel()
-                else:
-                    try:
-                        future.result()
-                    except:
-                        logger.error(f"Download {src_path} failed", exc_info=True)
-                        failed = True
-        if failed:
-            logger.warning(f"Removing cache directory: {cache_dir}")
-            shutil.rmtree(cache_dir, ignore_errors=True)
-            raise RuntimeError(f"Failed to download model '{model_spec.model_name}' ")
-        return cache_dir
     else:
         raise ValueError(f"Unsupported URL scheme: {src_scheme}")
@@ -346,51 +297,6 @@ def cache(model_spec: CacheableModelSpec, model_description_type: type):
     return cache_dir
-def copy_from_src_to_dst(
-    _src_fs: "AbstractFileSystem",
-    _src_path: str,
-    dst_fs: "AbstractFileSystem",
-    dst_path: str,
-    max_attempt: int = 3,
-):
-    from tqdm import tqdm
-    for attempt in range(max_attempt):
-        logger.info(f"Copy from {_src_path} to {dst_path}, attempt: {attempt}")
-        try:
-            with _src_fs.open(_src_path, "rb") as src_file:
-                file_size = _src_fs.info(_src_path)["size"]
-                dst_fs.makedirs(os.path.dirname(dst_path), exist_ok=True)
-                with dst_fs.open(dst_path, "wb") as dst_file:
-                    chunk_size = 1024 * 1024  # 1 MB
-                    with tqdm(
-                        total=file_size,
-                        unit="B",
-                        unit_scale=True,
-                        unit_divisor=1024,
-                        desc=_src_path,
-                    ) as pbar:
-                        while True:
-                            chunk = src_file.read(chunk_size)
-                            if not chunk:
-                                break
-                            dst_file.write(chunk)
-                            pbar.update(len(chunk))
-            logger.info(
-                f"Copy from {_src_path} to {dst_path} finished, attempt: {attempt}"
-            )
-            break
-        except:
-            logger.error(
-                f"Failed to copy from {_src_path} to {dst_path} on attempt {attempt + 1}",
-                exc_info=True,
-            )
-            if attempt + 1 == max_attempt:
-                raise
 def patch_trust_remote_code():
     """sentence-transformers calls transformers without the trust_remote_code=True, some embedding
     models will fail to load, e.g. jina-embeddings-v2-base-en

xinference/thirdparty/internvl/__init__.py ADDED Viewed

File without changes

xinference 0.14.1__py3-none-any.whl → 0.14.2__py3-none-any.whl

Potentially problematic release.

xinference 0.14.1py3-none-any.whl → 0.14.2py3-none-any.whl