PyPI - xinference - Versions diffs - 0.11.1__py3-none-any.whl → 0.11.2__py3-none-any.whl - Mend

xinference 0.11.1py3-none-any.whl → 0.11.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (31) hide show

xinference/_version.py +3 -3
xinference/api/restful_api.py +30 -0
xinference/client/restful/restful_client.py +29 -0
xinference/core/cache_tracker.py +12 -1
xinference/core/supervisor.py +30 -2
xinference/core/utils.py +12 -0
xinference/core/worker.py +4 -1
xinference/deploy/cmdline.py +126 -0
xinference/deploy/test/test_cmdline.py +24 -0
xinference/model/llm/__init__.py +2 -0
xinference/model/llm/llm_family.json +501 -6
xinference/model/llm/llm_family.py +84 -10
xinference/model/llm/llm_family_modelscope.json +198 -7
xinference/model/llm/memory.py +332 -0
xinference/model/llm/pytorch/core.py +2 -0
xinference/model/llm/pytorch/intern_vl.py +387 -0
xinference/model/llm/utils.py +13 -0
xinference/model/llm/vllm/core.py +5 -2
xinference/model/rerank/core.py +23 -1
xinference/model/utils.py +17 -7
xinference/thirdparty/deepseek_vl/models/processing_vlm.py +1 -1
xinference/thirdparty/deepseek_vl/models/siglip_vit.py +2 -2
xinference/thirdparty/llava/mm_utils.py +3 -2
xinference/thirdparty/llava/model/llava_arch.py +1 -1
xinference/thirdparty/omnilmm/chat.py +6 -5
{xinference-0.11.1.dist-info → xinference-0.11.2.dist-info}/METADATA +8 -7
{xinference-0.11.1.dist-info → xinference-0.11.2.dist-info}/RECORD +31 -29
{xinference-0.11.1.dist-info → xinference-0.11.2.dist-info}/LICENSE +0 -0
{xinference-0.11.1.dist-info → xinference-0.11.2.dist-info}/WHEEL +0 -0
{xinference-0.11.1.dist-info → xinference-0.11.2.dist-info}/entry_points.txt +0 -0
{xinference-0.11.1.dist-info → xinference-0.11.2.dist-info}/top_level.txt +0 -0

xinference/model/llm/pytorch/intern_vl.py ADDED Viewed

@@ -0,0 +1,387 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import base64
+import logging
+import time
+import uuid
+from concurrent.futures import ThreadPoolExecutor
+from io import BytesIO
+from typing import Dict, Iterator, List, Optional, Tuple, Union
+import requests
+import torch
+import torchvision.transforms as T
+from PIL import Image
+from torchvision.transforms.functional import InterpolationMode
+from ....model.utils import select_device
+from ....types import (
+    ChatCompletion,
+    ChatCompletionChunk,
+    ChatCompletionMessage,
+    Completion,
+    CompletionChoice,
+    CompletionUsage,
+)
+from ..llm_family import LLMFamilyV1, LLMSpecV1
+from .core import PytorchChatModel, PytorchGenerateConfig
+logger = logging.getLogger(__name__)
+IMAGENET_MEAN = (0.485, 0.456, 0.406)
+IMAGENET_STD = (0.229, 0.224, 0.225)
+class InternVLChatModel(PytorchChatModel):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self._tokenizer = None
+        self._model = None
+    @classmethod
+    def match(
+        cls, model_family: "LLMFamilyV1", model_spec: "LLMSpecV1", quantization: str
+    ) -> bool:
+        family = model_family.model_family or model_family.model_name
+        if "internvl" in family.lower():
+            return True
+        return False
+    def load(self, **kwargs):
+        from transformers import AutoModel, AutoTokenizer
+        from transformers.generation import GenerationConfig
+        device = self._pytorch_model_config.get("device", "auto")
+        device = select_device(device)
+        # for multiple GPU, set back to auto to make multiple devices work
+        device = "auto" if device == "cuda" else device
+        self._tokenizer = AutoTokenizer.from_pretrained(
+            self.model_path,
+            trust_remote_code=True,
+        )
+        kwargs = {
+            "torch_dtype": torch.bfloat16,
+            "low_cpu_mem_usage": True,
+            "trust_remote_code": True,
+            "device_map": device,
+        }
+        if "Int8" in self.model_spec.quantizations:
+            kwargs.update(
+                {
+                    "load_in_8bit": True,
+                    "device_map": device,
+                }
+            )
+        elif "mini" in self.model_family.model_name:
+            kwargs.pop("device_map")
+        self._model = AutoModel.from_pretrained(self.model_path, **kwargs).eval()
+        if "Int8" not in self.model_spec.quantizations:
+            self._model.cuda()
+        # Specify hyperparameters for generation
+        self._model.generation_config = GenerationConfig.from_pretrained(
+            self.model_path,
+            trust_remote_code=True,
+        )
+    def _message_content_to_intern(self, content):
+        def _load_image(_url):
+            if _url.startswith("data:"):
+                logging.info("Parse url by base64 decoder.")
+                # https://platform.openai.com/docs/guides/vision/uploading-base-64-encoded-images
+                # e.g. f"data:image/jpeg;base64,{base64_image}"
+                _type, data = _url.split(";")
+                _, ext = _type.split("/")
+                data = data[len("base64,") :]
+                data = base64.b64decode(data.encode("utf-8"))
+                return Image.open(BytesIO(data)).convert("RGB")
+            else:
+                try:
+                    response = requests.get(_url)
+                except requests.exceptions.MissingSchema:
+                    return Image.open(_url).convert("RGB")
+                else:
+                    return Image.open(BytesIO(response.content)).convert("RGB")
+        if not isinstance(content, str):
+            texts = []
+            image_urls = []
+            for c in content:
+                c_type = c.get("type")
+                if c_type == "text":
+                    texts.append(c["text"])
+                elif c_type == "image_url":
+                    image_urls.append(c["image_url"]["url"])
+            image_futures = []
+            with ThreadPoolExecutor() as executor:
+                for image_url in image_urls:
+                    fut = executor.submit(_load_image, image_url)
+                    image_futures.append(fut)
+            images = [fut.result() for fut in image_futures]
+            text = " ".join(texts)
+            if len(images) == 0:
+                return text, None
+            else:
+                return text, images
+        return content, None
+    def _history_content_to_intern(
+        self,
+        chat_history: List[ChatCompletionMessage],
+        IMG_START_TOKEN="<img>",
+        IMG_END_TOKEN="</img>",
+        IMG_CONTEXT_TOKEN="<IMG_CONTEXT>",
+    ):
+        def _image_to_piexl_values(images):
+            load_images = []
+            for image in images:
+                if image.startswith("data:"):
+                    logging.info("Parse url by base64 decoder.")
+                    # https://platform.openai.com/docs/guides/vision/uploading-base-64-encoded-images
+                    # e.g. f"data:image/jpeg;base64,{base64_image}"
+                    _type, data = image.split(";")
+                    _, ext = _type.split("/")
+                    data = data[len("base64,") :]
+                    data = base64.b64decode(data.encode("utf-8"))
+                    img = Image.open(BytesIO(data)).convert("RGB")
+                    pixel_value = (
+                        self._load_image(img, max_num=6).to(torch.bfloat16).cuda()
+                    )
+                    load_images.append(pixel_value)
+                else:
+                    try:
+                        response = requests.get(image)
+                    except requests.exceptions.MissingSchema:
+                        img = Image.open(image).convert("RGB")
+                    else:
+                        img = Image.open(BytesIO(response.content)).convert("RGB")
+                    pixel_value = (
+                        self._load_image(img, max_num=6).to(torch.bfloat16).cuda()
+                    )
+                    load_images.append(pixel_value)
+            return torch.cat(tuple(load_images), dim=0)
+        history: List[Tuple] = []
+        pixel_values = None
+        for i in range(0, len(chat_history), 2):
+            tmp = []
+            images: List[str] = []
+            user = chat_history[i]["content"]
+            if isinstance(user, List):
+                for content in user:
+                    c_type = content.get("type")
+                    if c_type == "text":
+                        tmp.append(content["text"])
+                    elif c_type == "image_url" and not history:
+                        images.append(content["image_url"]["url"])
+                if not history:
+                    pixel_values = _image_to_piexl_values(images)
+                    image_bs = pixel_values.shape[0]
+                    image_tokens = (
+                        IMG_START_TOKEN
+                        + IMG_CONTEXT_TOKEN * self._model.num_image_token * image_bs
+                        + IMG_END_TOKEN
+                    )
+                    tmp[0] = image_tokens + "\n" + tmp[0]
+            else:
+                tmp.append(user)
+            tmp.append(chat_history[i + 1]["content"])
+            history.append(tuple(tmp))
+        return history, pixel_values
+        def _load_image(_url):
+            if _url.startswith("data:"):
+                logging.info("Parse url by base64 decoder.")
+                # https://platform.openai.com/docs/guides/vision/uploading-base-64-encoded-images
+                # e.g. f"data:image/jpeg;base64,{base64_image}"
+                _type, data = _url.split(";")
+                _, ext = _type.split("/")
+                data = data[len("base64,") :]
+                data = base64.b64decode(data.encode("utf-8"))
+                return Image.open(BytesIO(data)).convert("RGB")
+            else:
+                try:
+                    response = requests.get(_url)
+                except requests.exceptions.MissingSchema:
+                    return Image.open(_url).convert("RGB")
+                else:
+                    return Image.open(BytesIO(response.content)).convert("RGB")
+        if not isinstance(content, str):
+            texts = []
+            image_urls = []
+            for c in content:
+                c_type = c.get("type")
+                if c_type == "text":
+                    texts.append(c["text"])
+                elif c_type == "image_url":
+                    image_urls.append(c["image_url"]["url"])
+            image_futures = []
+            with ThreadPoolExecutor() as executor:
+                for image_url in image_urls:
+                    fut = executor.submit(_load_image, image_url)
+                    image_futures.append(fut)
+            images = [fut.result() for fut in image_futures]
+            text = " ".join(texts)
+            if len(images) == 0:
+                return text
+            else:
+                return text, images
+        return content
+    def _find_closest_aspect_ratio(
+        self, aspect_ratio, target_ratios, width, height, image_size
+    ):
+        best_ratio_diff = float("inf")
+        best_ratio = (1, 1)
+        area = width * height
+        for ratio in target_ratios:
+            target_aspect_ratio = ratio[0] / ratio[1]
+            ratio_diff = abs(aspect_ratio - target_aspect_ratio)
+            if ratio_diff < best_ratio_diff:
+                best_ratio_diff = ratio_diff
+                best_ratio = ratio
+            elif ratio_diff == best_ratio_diff:
+                if area > 0.5 * image_size * image_size * ratio[0] * ratio[1]:
+                    best_ratio = ratio
+        return best_ratio
+    def _dynamic_preprocess(
+        self, image, min_num=1, max_num=6, image_size=448, use_thumbnail=False
+    ):
+        orig_width, orig_height = image.size
+        aspect_ratio = orig_width / orig_height
+        # calculate the existing image aspect ratio
+        target_ratios = set(
+            (i, j)
+            for n in range(min_num, max_num + 1)
+            for i in range(1, n + 1)
+            for j in range(1, n + 1)
+            if i * j <= max_num and i * j >= min_num
+        )
+        target_ratios = sorted(target_ratios, key=lambda x: x[0] * x[1])
+        # find the closest aspect ratio to the target
+        target_aspect_ratio = self._find_closest_aspect_ratio(
+            aspect_ratio, target_ratios, orig_width, orig_height, image_size
+        )
+        # calculate the target width and height
+        target_width = image_size * target_aspect_ratio[0]
+        target_height = image_size * target_aspect_ratio[1]
+        blocks = target_aspect_ratio[0] * target_aspect_ratio[1]
+        # resize the image
+        resized_img = image.resize((target_width, target_height))
+        processed_images = []
+        for i in range(blocks):
+            box = (
+                (i % (target_width // image_size)) * image_size,
+                (i // (target_width // image_size)) * image_size,
+                ((i % (target_width // image_size)) + 1) * image_size,
+                ((i // (target_width // image_size)) + 1) * image_size,
+            )
+            # split the image
+            split_img = resized_img.crop(box)
+            processed_images.append(split_img)
+        assert len(processed_images) == blocks
+        if use_thumbnail and len(processed_images) != 1:
+            thumbnail_img = image.resize((image_size, image_size))
+            processed_images.append(thumbnail_img)
+        return processed_images
+    def _build_transform(self, input_size):
+        MEAN, STD = IMAGENET_MEAN, IMAGENET_STD
+        transform = T.Compose(
+            [
+                T.Lambda(lambda img: img.convert("RGB") if img.mode != "RGB" else img),
+                T.Resize(
+                    (input_size, input_size), interpolation=InterpolationMode.BICUBIC
+                ),
+                T.ToTensor(),
+                T.Normalize(mean=MEAN, std=STD),
+            ]
+        )
+        return transform
+    def _load_image(self, image_file, input_size=448, max_num=6):
+        transform = self._build_transform(input_size=input_size)
+        images = self._dynamic_preprocess(
+            image_file, image_size=input_size, use_thumbnail=True, max_num=max_num
+        )
+        pixel_values = [transform(image) for image in images]
+        pixel_values = torch.stack(pixel_values)
+        return pixel_values
+    def chat(
+        self,
+        prompt: Union[str, List[Dict]],
+        system_prompt: Optional[str] = None,
+        chat_history: Optional[List[ChatCompletionMessage]] = None,
+        generate_config: Optional[PytorchGenerateConfig] = None,
+    ) -> Union[ChatCompletion, Iterator[ChatCompletionChunk]]:
+        if generate_config and generate_config.pop("stream"):
+            raise Exception(
+                f"Chat with model {self.model_family.model_name} does not support stream."
+            )
+        sanitized_config = {
+            "num_beams": 1,
+            "max_new_tokens": generate_config.get("max_tokens", 512)
+            if generate_config
+            else 512,
+            "do_sample": False,
+        }
+        content, image = self._message_content_to_intern(prompt)
+        history = None
+        if chat_history:
+            history, pixel_values = self._history_content_to_intern(chat_history)
+        else:
+            load_images = []
+            for img in image:
+                pixel_value = self._load_image(img, max_num=6).to(torch.bfloat16).cuda()
+                load_images.append(pixel_value)
+            pixel_values = torch.cat(tuple(load_images), dim=0)
+        response, history = self._model.chat(
+            self._tokenizer,
+            pixel_values,
+            content,
+            sanitized_config,
+            history=history,
+            return_history=True,
+        )
+        chunk = Completion(
+            id=str(uuid.uuid1()),
+            object="text_completion",
+            created=int(time.time()),
+            model=self.model_uid,
+            choices=[
+                CompletionChoice(
+                    index=0, text=response, finish_reason="stop", logprobs=None
+                )
+            ],
+            usage=CompletionUsage(
+                prompt_tokens=-1, completion_tokens=-1, total_tokens=-1
+            ),
+        )
+        return self._to_chat_completion(chunk)

xinference/model/llm/utils.py CHANGED Viewed

@@ -456,6 +456,19 @@ Begin!"""
                     ret += f"<|{role}|>{prompt_style.intra_message_sep}"
             ret += "<|assistant|>\n"
             return ret
+        elif prompt_style.style_name == "c4ai-command-r":
+            ret = (
+                f"<BOS_TOKEN><|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>"
+                f"{prompt_style.system_prompt}{prompt_style.inter_message_sep}"
+            )
+            for i, message in enumerate(chat_history):
+                role = get_role(message["role"])
+                content = message["content"]
+                if content:
+                    ret += f"{role}{content}{prompt_style.inter_message_sep}"
+                else:
+                    ret += role
+            return ret
         else:
             raise ValueError(f"Invalid prompt style: {prompt_style.style_name}")

xinference/model/llm/vllm/core.py CHANGED Viewed

@@ -97,6 +97,8 @@ VLLM_SUPPORTED_MODELS = [
     "Yi-1.5",
     "code-llama",
     "code-llama-python",
+    "deepseek",
+    "deepseek-coder",
 ]
 VLLM_SUPPORTED_CHAT_MODELS = [
     "llama-2-chat",
@@ -125,6 +127,7 @@ VLLM_SUPPORTED_CHAT_MODELS = [
 ]
 if VLLM_INSTALLED and vllm.__version__ >= "0.3.0":
     VLLM_SUPPORTED_CHAT_MODELS.append("qwen1.5-chat")
+    VLLM_SUPPORTED_MODELS.append("codeqwen1.5")
     VLLM_SUPPORTED_CHAT_MODELS.append("codeqwen1.5-chat")
 if VLLM_INSTALLED and vllm.__version__ >= "0.3.2":
@@ -136,8 +139,8 @@ if VLLM_INSTALLED and vllm.__version__ >= "0.3.3":
 if VLLM_INSTALLED and vllm.__version__ >= "0.4.0":
     VLLM_SUPPORTED_CHAT_MODELS.append("qwen1.5-moe-chat")
-    VLLM_SUPPORTED_MODELS.append("c4ai-command-r-v01")
-    VLLM_SUPPORTED_MODELS.append("c4ai-command-r-v01-4bit")
+    VLLM_SUPPORTED_CHAT_MODELS.append("c4ai-command-r-v01")
+    VLLM_SUPPORTED_CHAT_MODELS.append("c4ai-command-r-v01-4bit")
 class VLLMModel(LLM):

xinference/model/rerank/core.py CHANGED Viewed

@@ -46,7 +46,7 @@ def get_rerank_model_descriptions():
 class RerankModelSpec(CacheableModelSpec):
     model_name: str
     language: List[str]
-    type: Optional[str] = "normal"
+    type: Optional[str] = "unknown"
     model_id: str
     model_revision: Optional[str]
     model_hub: str = "huggingface"
@@ -118,6 +118,28 @@ class RerankModel:
         self._use_fp16 = use_fp16
         self._model = None
         self._counter = 0
+        if model_spec.type == "unknown":
+            model_spec.type = self._auto_detect_type(model_path)
+    @staticmethod
+    def _auto_detect_type(model_path):
+        """This method may not be stable due to the fact that the tokenizer name may be changed.
+        Therefore, we only use this method for unknown model types."""
+        from transformers import AutoTokenizer
+        type_mapper = {
+            "LlamaTokenizerFast": "LLM-based layerwise",
+            "GemmaTokenizerFast": "LLM-based",
+            "XLMRobertaTokenizerFast": "normal",
+        }
+        tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
+        rerank_type = type_mapper.get(type(tokenizer).__name__)
+        if rerank_type is None:
+            raise Exception(
+                f"Can't determine the rerank type based on the tokenizer {tokenizer}"
+            )
+        return rerank_type
     def load(self):
         if self._model_spec.type == "normal":

xinference/model/utils.py CHANGED Viewed

@@ -19,6 +19,7 @@ from json import JSONDecodeError
 from pathlib import Path
 from typing import Any, Callable, Dict, Optional, Tuple, Union
+import huggingface_hub
 from fsspec import AbstractFileSystem
 from ..constants import XINFERENCE_CACHE_DIR, XINFERENCE_ENV_MODEL_SRC
@@ -27,6 +28,7 @@ from .core import CacheableModelSpec
 logger = logging.getLogger(__name__)
 MAX_ATTEMPTS = 3
+IS_NEW_HUGGINGFACE_HUB: bool = huggingface_hub.__version__ >= "0.23.0"
 def is_locale_chinese_simplified() -> bool:
@@ -76,6 +78,13 @@ def symlink_local_file(path: str, local_dir: str, relpath: str) -> str:
     return local_dir_filepath
+def create_symlink(download_dir: str, cache_dir: str):
+    for subdir, dirs, files in os.walk(download_dir):
+        for file in files:
+            relpath = os.path.relpath(os.path.join(subdir, file), download_dir)
+            symlink_local_file(os.path.join(subdir, file), cache_dir, relpath)
 def retry_download(
     download_func: Callable,
     model_name: str,
@@ -306,22 +315,23 @@ def cache(model_spec: CacheableModelSpec, model_description_type: type):
             model_spec.model_id,
             revision=model_spec.model_revision,
         )
-        for subdir, dirs, files in os.walk(download_dir):
-            for file in files:
-                relpath = os.path.relpath(os.path.join(subdir, file), download_dir)
-                symlink_local_file(os.path.join(subdir, file), cache_dir, relpath)
+        create_symlink(download_dir, cache_dir)
     else:
         from huggingface_hub import snapshot_download as hf_download
-        retry_download(
+        use_symlinks = {}
+        if not IS_NEW_HUGGINGFACE_HUB:
+            use_symlinks = {"local_dir_use_symlinks": True, "local_dir": cache_dir}
+        download_dir = retry_download(
             hf_download,
             model_spec.model_name,
             None,
             model_spec.model_id,
             revision=model_spec.model_revision,
-            local_dir=cache_dir,
-            local_dir_use_symlinks=True,
+            **use_symlinks,
         )
+        if IS_NEW_HUGGINGFACE_HUB:
+            create_symlink(download_dir, cache_dir)
     with open(meta_path, "w") as f:
         import json

xinference/thirdparty/deepseek_vl/models/processing_vlm.py CHANGED Viewed

@@ -25,8 +25,8 @@ from PIL.Image import Image
 from transformers import LlamaTokenizerFast
 from transformers.processing_utils import ProcessorMixin
-from .image_processing_vlm import VLMImageProcessor
 from ..utils.conversation import get_conv_template
+from .image_processing_vlm import VLMImageProcessor
 class DictOutput(object):

xinference/thirdparty/deepseek_vl/models/siglip_vit.py CHANGED Viewed

@@ -92,7 +92,7 @@ def _no_grad_trunc_normal_(tensor, mean, std, a, b):
 def trunc_normal_(tensor, mean=0.0, std=1.0, a=-2.0, b=2.0):
     # type: (torch.Tensor, float, float, float, float) -> torch.Tensor
     r"""The original timm.models.layers.weight_init.trunc_normal_ can not handle bfloat16 yet, here we first
-    convert the tensor to float32, apply the trunc_normal_() in float32, and then convert it back to its orignal dtype.
+    convert the tensor to float32, apply the trunc_normal_() in float32, and then convert it back to its original dtype.
     Fills the input Tensor with values drawn from a truncated normal distribution. The values are effectively drawn
     from the normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)`
     with values outside :math:`[a, b]` redrawn until they are within
@@ -305,7 +305,7 @@ class VisionTransformer(nn.Module):
             img_size: Input image size.
             patch_size: Patch size.
             in_chans: Number of image input channels.
-            num_classes: Mumber of classes for classification head.
+            num_classes: Number of classes for classification head.
             global_pool: Type of global pooling for final sequence (default: 'token').
             embed_dim: Transformer embedding dimension.
             depth: Depth of transformer.

xinference/thirdparty/llava/mm_utils.py CHANGED Viewed

@@ -2,11 +2,12 @@ import base64
 from io import BytesIO
 import torch
-from .model import LlavaLlamaForCausalLM
-from .model.constants import IMAGE_TOKEN_INDEX
 from PIL import Image
 from transformers import AutoTokenizer, StoppingCriteria
+from .model import LlavaLlamaForCausalLM
+from .model.constants import IMAGE_TOKEN_INDEX
 def load_image_from_base64(image):
     return Image.open(BytesIO(base64.b64decode(image)))

xinference/thirdparty/llava/model/llava_arch.py CHANGED Viewed

@@ -17,9 +17,9 @@ import os
 from abc import ABC, abstractmethod
 import torch
-from .constants import IGNORE_INDEX, IMAGE_TOKEN_INDEX, key_info
 from .clip_encoder.builder import build_vision_tower
+from .constants import IGNORE_INDEX, IMAGE_TOKEN_INDEX, key_info
 from .multimodal_projector.builder import build_vision_projector

xinference/thirdparty/omnilmm/chat.py CHANGED Viewed

@@ -7,11 +7,6 @@ import torch
 from PIL import Image
 from transformers import AutoModel, AutoTokenizer
-from .model.omnilmm import OmniLMMForCausalLM
-from .model.utils import build_transform
-from .train.train_utils import omni_preprocess
-from .utils import disable_torch_init
 DEFAULT_IMAGE_TOKEN = "<image>"
 DEFAULT_IMAGE_PATCH_TOKEN = "<im_patch>"
 DEFAULT_IM_START_TOKEN = "<im_start>"
@@ -21,6 +16,10 @@ DEFAULT_IM_END_TOKEN = "<im_end>"
 def init_omni_lmm(model_path, device_map):
     from accelerate import init_empty_weights, load_checkpoint_and_dispatch
+    from .model.omnilmm import OmniLMMForCausalLM
+    from .model.utils import build_transform
+    from .utils import disable_torch_init
     torch.backends.cuda.matmul.allow_tf32 = True
     disable_torch_init()
     model_name = os.path.expanduser(model_path)
@@ -98,6 +97,8 @@ def expand_question_into_multimodal(
 def wrap_question_for_omni_lmm(question, image_token_len, tokenizer):
+    from .train.train_utils import omni_preprocess
     question = expand_question_into_multimodal(
         question,
         image_token_len,

xinference 0.11.1__py3-none-any.whl → 0.11.2__py3-none-any.whl

Potentially problematic release.

xinference 0.11.1py3-none-any.whl → 0.11.2py3-none-any.whl