PyPI - xinference - Versions diffs - 0.8.2__py3-none-any.whl → 0.8.3__py3-none-any.whl - Mend

xinference 0.8.2py3-none-any.whl → 0.8.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (53) hide show

xinference/_version.py +3 -3
xinference/api/restful_api.py +22 -7
xinference/client/restful/restful_client.py +10 -0
xinference/constants.py +14 -4
xinference/core/chat_interface.py +8 -1
xinference/core/resource.py +19 -12
xinference/core/supervisor.py +94 -30
xinference/core/utils.py +29 -1
xinference/core/worker.py +18 -3
xinference/deploy/local.py +2 -2
xinference/deploy/supervisor.py +2 -2
xinference/model/audio/model_spec.json +29 -1
xinference/model/embedding/model_spec.json +24 -0
xinference/model/embedding/model_spec_modelscope.json +24 -0
xinference/model/llm/__init__.py +2 -0
xinference/model/llm/core.py +2 -0
xinference/model/llm/ggml/chatglm.py +15 -6
xinference/model/llm/llm_family.json +56 -0
xinference/model/llm/llm_family_modelscope.json +56 -0
xinference/model/llm/pytorch/chatglm.py +3 -3
xinference/model/llm/pytorch/core.py +1 -0
xinference/model/llm/pytorch/utils.py +21 -9
xinference/model/llm/pytorch/yi_vl.py +246 -0
xinference/model/rerank/core.py +1 -1
xinference/model/rerank/model_spec.json +6 -0
xinference/model/rerank/model_spec_modelscope.json +7 -0
xinference/thirdparty/__init__.py +0 -0
xinference/thirdparty/llava/__init__.py +1 -0
xinference/thirdparty/llava/conversation.py +205 -0
xinference/thirdparty/llava/mm_utils.py +122 -0
xinference/thirdparty/llava/model/__init__.py +1 -0
xinference/thirdparty/llava/model/clip_encoder/__init__.py +0 -0
xinference/thirdparty/llava/model/clip_encoder/builder.py +11 -0
xinference/thirdparty/llava/model/clip_encoder/clip_encoder.py +86 -0
xinference/thirdparty/llava/model/constants.py +6 -0
xinference/thirdparty/llava/model/llava_arch.py +385 -0
xinference/thirdparty/llava/model/llava_llama.py +163 -0
xinference/thirdparty/llava/model/multimodal_projector/__init__.py +0 -0
xinference/thirdparty/llava/model/multimodal_projector/builder.py +64 -0
xinference/types.py +1 -1
xinference/web/ui/build/asset-manifest.json +3 -3
xinference/web/ui/build/index.html +1 -1
xinference/web/ui/build/static/js/{main.abedc3c9.js → main.15822aeb.js} +3 -3
xinference/web/ui/build/static/js/{main.abedc3c9.js.map → main.15822aeb.js.map} +1 -1
xinference/web/ui/node_modules/.cache/babel-loader/65ca3ba225b8c8dac907210545b51f2fcdb2591f0feeb7195f1c037f2bc956a0.json +1 -0
{xinference-0.8.2.dist-info → xinference-0.8.3.dist-info}/METADATA +21 -18
{xinference-0.8.2.dist-info → xinference-0.8.3.dist-info}/RECORD +52 -38
xinference/web/ui/node_modules/.cache/babel-loader/c157e34990b23834b7ad4c13c42962209942c60f8130978c1514f3d085cfaea0.json +0 -1
/xinference/web/ui/build/static/js/{main.abedc3c9.js.LICENSE.txt → main.15822aeb.js.LICENSE.txt} +0 -0
{xinference-0.8.2.dist-info → xinference-0.8.3.dist-info}/LICENSE +0 -0
{xinference-0.8.2.dist-info → xinference-0.8.3.dist-info}/WHEEL +0 -0
{xinference-0.8.2.dist-info → xinference-0.8.3.dist-info}/entry_points.txt +0 -0
{xinference-0.8.2.dist-info → xinference-0.8.3.dist-info}/top_level.txt +0 -0

xinference/thirdparty/llava/conversation.py ADDED Viewed

@@ -0,0 +1,205 @@
+import dataclasses
+from enum import Enum, auto
+from typing import List
+class SeparatorStyle(Enum):
+    """Different separator style."""
+    SINGLE = auto()
+@dataclasses.dataclass
+class Conversation:
+    """A class that keeps all conversation history."""
+    system: str
+    roles: List[str]
+    messages: List[List[str]]
+    offset: int
+    sep_style: SeparatorStyle = SeparatorStyle.SINGLE
+    sep: str = "###"
+    sep2: str = None
+    version: str = "Unknown"
+    skip_next: bool = False
+    def get_prompt(self):
+        messages = self.messages
+        if len(messages) > 0 and type(messages[0][1]) is tuple:
+            messages = self.messages.copy()
+            init_role, init_msg = messages[0].copy()
+            init_msg = init_msg[0].replace("<image_placeholder>", "").strip()
+            if "mmtag" in self.version:
+                messages[0] = (init_role, init_msg)
+                messages.insert(
+                    0, (self.roles[0], "<Image><image_placeholder></Image>")
+                )
+                messages.insert(1, (self.roles[1], "Received."))
+            else:
+                messages[0] = (init_role, "<image_placeholder>\n" + init_msg)
+        if self.sep_style == SeparatorStyle.SINGLE:
+            ret = self.system + "\n\n" + self.sep + " "
+            for role, message in messages:
+                if message:
+                    if type(message) is tuple:
+                        message, _, _ = message
+                    ret += role + ": " + message + "\n" + self.sep + " "
+                else:
+                    ret += role + ":"
+        else:
+            raise ValueError(f"Invalid style: {self.sep_style}")
+        return ret
+    def append_message(self, role, message):
+        self.messages.append([role, message])
+    def get_images(self, return_pil=False):
+        images = []
+        for i, (role, msg) in enumerate(self.messages[self.offset :]):
+            if i % 2 == 0:
+                if type(msg) is tuple:
+                    import base64
+                    from io import BytesIO
+                    from PIL import Image
+                    msg, image, image_process_mode = msg
+                    if image_process_mode == "Pad":
+                        def expand2square(pil_img, background_color=(122, 116, 104)):
+                            width, height = pil_img.size
+                            if width == height:
+                                return pil_img
+                            elif width > height:
+                                result = Image.new(
+                                    pil_img.mode, (width, width), background_color
+                                )
+                                result.paste(pil_img, (0, (width - height) // 2))
+                                return result
+                            else:
+                                result = Image.new(
+                                    pil_img.mode, (height, height), background_color
+                                )
+                                result.paste(pil_img, ((height - width) // 2, 0))
+                                return result
+                        image = expand2square(image)
+                    elif image_process_mode == "Crop":
+                        pass
+                    elif image_process_mode == "Resize":
+                        image = image.resize((336, 336))
+                    else:
+                        raise ValueError(
+                            f"Invalid image_process_mode: {image_process_mode}"
+                        )
+                    max_hw, min_hw = max(image.size), min(image.size)
+                    aspect_ratio = max_hw / min_hw
+                    max_len, min_len = 800, 400
+                    shortest_edge = int(min(max_len / aspect_ratio, min_len, min_hw))
+                    longest_edge = int(shortest_edge * aspect_ratio)
+                    W, H = image.size
+                    if H > W:
+                        H, W = longest_edge, shortest_edge
+                    else:
+                        H, W = shortest_edge, longest_edge
+                    image = image.resize((W, H))
+                    if return_pil:
+                        images.append(image)
+                    else:
+                        buffered = BytesIO()
+                        image.save(buffered, format="PNG")
+                        img_b64_str = base64.b64encode(buffered.getvalue()).decode()
+                        images.append(img_b64_str)
+        return images
+    def to_gradio_chatbot(self):
+        ret = []
+        for i, (role, msg) in enumerate(self.messages[self.offset :]):
+            if i % 2 == 0:
+                if type(msg) is tuple:
+                    import base64
+                    from io import BytesIO
+                    msg, image, image_process_mode = msg
+                    max_hw, min_hw = max(image.size), min(image.size)
+                    aspect_ratio = max_hw / min_hw
+                    max_len, min_len = 800, 400
+                    shortest_edge = int(min(max_len / aspect_ratio, min_len, min_hw))
+                    longest_edge = int(shortest_edge * aspect_ratio)
+                    W, H = image.size
+                    if H > W:
+                        H, W = longest_edge, shortest_edge
+                    else:
+                        H, W = shortest_edge, longest_edge
+                    image = image.resize((W, H))
+                    buffered = BytesIO()
+                    image.save(buffered, format="JPEG")
+                    img_b64_str = base64.b64encode(buffered.getvalue()).decode()
+                    img_str = f'<img src="data:image/png;base64,{img_b64_str}" alt="user upload image" />'
+                    msg = img_str + msg.replace("<image_placeholder>", "").strip()
+                    ret.append([msg, None])
+                else:
+                    ret.append([msg, None])
+            else:
+                ret[-1][-1] = msg
+        return ret
+    def copy(self):
+        return Conversation(
+            system=self.system,
+            roles=self.roles,
+            messages=[[x, y] for x, y in self.messages],
+            offset=self.offset,
+            sep_style=self.sep_style,
+            sep=self.sep,
+            sep2=self.sep2,
+            version=self.version,
+        )
+    def dict(self):
+        if len(self.get_images()) > 0:
+            return {
+                "system": self.system,
+                "roles": self.roles,
+                "messages": [
+                    [x, y[0] if type(y) is tuple else y] for x, y in self.messages
+                ],
+                "offset": self.offset,
+                "sep": self.sep,
+                "sep2": self.sep2,
+            }
+        return {
+            "system": self.system,
+            "roles": self.roles,
+            "messages": self.messages,
+            "offset": self.offset,
+            "sep": self.sep,
+            "sep2": self.sep2,
+        }
+mm_default_conv = Conversation(
+    system="This is a chat between an inquisitive human and an AI assistant. "
+    "Assume the role of the AI assistant. "
+    "Read all the images carefully, and respond to the human's questions with informative, helpful, detailed and polite answers. "
+    "这是一个好奇的人类和一个人工智能助手之间的对话。"
+    "假设你扮演这个AI助手的角色。仔细阅读所有的图像，并对人类的问题做出信息丰富、有帮助、详细的和礼貌的回答。",
+    roles=("Human", "Assistant"),
+    messages=(),
+    offset=0,
+    sep_style=SeparatorStyle.SINGLE,
+    sep="###",
+)
+default_conversation = mm_default_conv
+conv_templates = {
+    "mm_default": mm_default_conv,
+}
+if __name__ == "__main__":
+    print(default_conversation.get_prompt())

xinference/thirdparty/llava/mm_utils.py ADDED Viewed

@@ -0,0 +1,122 @@
+import base64
+from io import BytesIO
+import torch
+from .model import LlavaLlamaForCausalLM
+from .model.constants import IMAGE_TOKEN_INDEX
+from PIL import Image
+from transformers import AutoTokenizer, StoppingCriteria
+def load_image_from_base64(image):
+    return Image.open(BytesIO(base64.b64decode(image)))
+def process_images(images, image_processor, model_cfg):
+    return image_processor(images, return_tensors="pt")["pixel_values"]
+def expand2square(pil_img, background_color):
+    width, height = pil_img.size
+    if width == height:
+        return pil_img
+    elif width > height:
+        result = Image.new(pil_img.mode, (width, width), background_color)
+        result.paste(pil_img, (0, (width - height) // 2))
+        return result
+    else:
+        result = Image.new(pil_img.mode, (height, height), background_color)
+        result.paste(pil_img, ((height - width) // 2, 0))
+        return result
+def tokenizer_image_token(
+    prompt, tokenizer, image_token_index=IMAGE_TOKEN_INDEX, return_tensors=None
+):
+    prompt_chunks = [
+        tokenizer(chunk).input_ids for chunk in prompt.split("<image_placeholder>")
+    ]
+    def insert_separator(X, sep):
+        return [ele for sublist in zip(X, [sep] * len(X)) for ele in sublist][:-1]
+    input_ids = []
+    offset = 0
+    if (
+        len(prompt_chunks) > 0
+        and len(prompt_chunks[0]) > 0
+        and prompt_chunks[0][0] == tokenizer.bos_token_id
+    ):
+        offset = 1
+        input_ids.append(prompt_chunks[0][0])
+    for x in insert_separator(prompt_chunks, [image_token_index] * (offset + 1)):
+        input_ids.extend(x[offset:])
+    if return_tensors is not None:
+        if return_tensors == "pt":
+            return torch.tensor(input_ids, dtype=torch.long)
+        raise ValueError(f"Unsupported tensor type: {return_tensors}")
+    return input_ids
+def get_model_name_from_path(model_path):
+    model_path = model_path.strip("/")
+    model_paths = model_path.split("/")
+    if model_paths[-1].startswith("checkpoint-"):
+        return model_paths[-2] + "_" + model_paths[-1]
+    else:
+        return model_paths[-1]
+def load_pretrained_model(
+    model_path, load_8bit=False, load_4bit=False, device_map="auto", multimodal="IMAGE"
+):
+    kwargs = {"device_map": device_map}
+    kwargs["torch_dtype"] = torch.bfloat16
+    tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=False)
+    model = LlavaLlamaForCausalLM.from_pretrained(
+        model_path, low_cpu_mem_usage=True, **kwargs
+    )
+    image_processor = None
+    model.resize_token_embeddings(len(tokenizer))
+    vision_tower = model.get_vision_tower()
+    if not vision_tower.is_loaded:
+        vision_tower.load_model()
+    vision_tower.to(device="cuda", dtype=torch.bfloat16)
+    image_processor = vision_tower.image_processor
+    if hasattr(model.config, "max_sequence_length"):
+        context_len = model.config.max_sequence_length
+    else:
+        context_len = 2048
+    return tokenizer, model, image_processor, context_len
+class KeywordsStoppingCriteria(StoppingCriteria):
+    def __init__(self, keywords, tokenizer, input_ids):
+        self.keywords = keywords
+        self.tokenizer = tokenizer
+        self.start_len = None
+        self.input_ids = input_ids
+    def __call__(
+        self, output_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs
+    ) -> bool:
+        if self.start_len is None:
+            self.start_len = self.input_ids.shape[1]
+            return False
+        else:
+            outputs = self.tokenizer.batch_decode(
+                output_ids[:, self.start_len :], skip_special_tokens=True
+            )
+            flag = True
+            for output in outputs:
+                for keyword in self.keywords:
+                    if keyword not in output:
+                        flag = False
+                        return False
+            return flag

xinference/thirdparty/llava/model/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ from .llava_llama import LlavaConfig, LlavaLlamaForCausalLM

xinference/thirdparty/llava/model/clip_encoder/__init__.py ADDED Viewed

File without changes

xinference/thirdparty/llava/model/clip_encoder/builder.py ADDED Viewed

@@ -0,0 +1,11 @@
+from .clip_encoder import CLIPVisionTower
+def build_vision_tower(vision_tower_cfg, **kwargs):
+    vision_tower = getattr(
+        vision_tower_cfg,
+        "mm_vision_tower",
+        getattr(vision_tower_cfg, "vision_tower", None),
+    )
+    return CLIPVisionTower(vision_tower, args=vision_tower_cfg, **kwargs)

xinference/thirdparty/llava/model/clip_encoder/clip_encoder.py ADDED Viewed

@@ -0,0 +1,86 @@
+import torch
+import torch.nn as nn
+from transformers import CLIPImageProcessor, CLIPVisionConfig, CLIPVisionModel
+class CLIPVisionTower(nn.Module):
+    def __init__(self, vision_tower, args, delay_load=False):
+        super().__init__()
+        self.is_loaded = False
+        self.vision_tower_name = vision_tower
+        self.select_layer = args.mm_vision_select_layer
+        self.select_feature = getattr(args, "mm_vision_select_feature", "patch")
+        if not delay_load:
+            self.load_model()
+        else:
+            self.cfg_only = CLIPVisionConfig.from_pretrained(self.vision_tower_name)
+    def load_model(self):
+        self.image_processor = CLIPImageProcessor.from_pretrained(
+            self.vision_tower_name
+        )
+        self.vision_tower = CLIPVisionModel.from_pretrained(
+            self.vision_tower_name, ignore_mismatched_sizes=True
+        )
+        self.is_loaded = True
+    def feature_select(self, image_forward_outs):
+        image_features = image_forward_outs.hidden_states[self.select_layer]
+        if self.select_feature == "patch":
+            image_features = image_features[:, 1:]
+        elif self.select_feature == "cls_patch":
+            image_features = image_features
+        else:
+            raise ValueError(f"Unexpected select feature: {self.select_feature}")
+        return image_features
+    # @torch.no_grad()
+    def forward(self, images):
+        if type(images) is list:
+            image_features = []
+            for image in images:
+                image_forward_out = self.vision_tower(
+                    image.to(device=self.device, dtype=self.dtype).unsqueeze(0),
+                    output_hidden_states=True,
+                )
+                image_feature = self.feature_select(image_forward_out).to(image.dtype)
+                image_features.append(image_feature)
+        else:
+            image_forward_outs = self.vision_tower(
+                images.to(device=self.device, dtype=self.dtype),
+                output_hidden_states=True,
+            )
+            image_features = self.feature_select(image_forward_outs).to(images.dtype)
+        return image_features
+    @property
+    def dummy_feature(self):
+        return torch.zeros(1, self.hidden_size, device=self.device, dtype=self.dtype)
+    @property
+    def dtype(self):
+        return self.vision_tower.dtype
+    @property
+    def device(self):
+        return self.vision_tower.device
+    @property
+    def config(self):
+        if self.is_loaded:
+            return self.vision_tower.config
+        else:
+            return self.cfg_only
+    @property
+    def hidden_size(self):
+        return self.config.hidden_size
+    @property
+    def num_patches(self):
+        return (self.config.image_size // self.config.patch_size) ** 2

xinference/thirdparty/llava/model/constants.py ADDED Viewed

@@ -0,0 +1,6 @@
+# Model Constants
+IGNORE_INDEX = -100
+IMAGE_TOKEN_INDEX = -200
+DEFAULT_IMAGE_TOKEN = "<image_placeholder>"
+key_info = {"model_path": None}

xinference 0.8.2__py3-none-any.whl → 0.8.3__py3-none-any.whl

Potentially problematic release.

xinference 0.8.2py3-none-any.whl → 0.8.3py3-none-any.whl