PyPI - xinference - Versions diffs - 0.9.4__py3-none-any.whl → 0.10.0__py3-none-any.whl - Mend

xinference 0.9.4py3-none-any.whl → 0.10.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (59) hide show

xinference/thirdparty/deepseek_vl/utils/conversation.py ADDED Viewed

@@ -0,0 +1,348 @@
+# Copyright (c) 2023-2024 DeepSeek.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of
+# this software and associated documentation files (the "Software"), to deal in
+# the Software without restriction, including without limitation the rights to
+# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+# the Software, and to permit persons to whom the Software is furnished to do so,
+# subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+"""
+From https://github.com/lm-sys/FastChat/blob/main/fastchat/conversation.py
+"""
+import dataclasses
+from enum import IntEnum, auto
+from typing import Dict, List
+class SeparatorStyle(IntEnum):
+    """Separator styles."""
+    ADD_COLON_SINGLE = auto()
+    ADD_COLON_TWO = auto()
+    ADD_COLON_SPACE_SINGLE = auto()
+    NO_COLON_SINGLE = auto()
+    NO_COLON_TWO = auto()
+    ADD_NEW_LINE_SINGLE = auto()
+    LLAMA2 = auto()
+    CHATGLM = auto()
+    CHATML = auto()
+    CHATINTERN = auto()
+    DOLLY = auto()
+    RWKV = auto()
+    PHOENIX = auto()
+    ROBIN = auto()
+    DeepSeek = auto()
+    PLAIN = auto()
+    ALIGNMENT = auto()
+@dataclasses.dataclass
+class Conversation:
+    """A class that manages prompt templates and keeps all conversation history."""
+    # The name of this template
+    name: str
+    # The template of the system prompt
+    system_template: str = "{system_message}"
+    # The system message
+    system_message: str = ""
+    # The names of two roles
+    roles: List[str] = (("USER", "ASSISTANT"),)
+    # All messages. Each item is (role, message).
+    messages: List[List[str]] = ()
+    # The number of few shot examples
+    offset: int = 0
+    # The separator style and configurations
+    sep_style: SeparatorStyle = SeparatorStyle.ADD_COLON_SINGLE
+    sep: str = "\n"
+    sep2: str = None
+    # Stop criteria (the default one is EOS token)
+    stop_str: str = None
+    # Stops generation if meeting any token in this list
+    stop_token_ids: List[int] = None
+    def get_prompt(self) -> str:
+        """Get the prompt for generation."""
+        system_prompt = self.system_template.format(system_message=self.system_message)
+        if self.sep_style == SeparatorStyle.DeepSeek:
+            seps = [self.sep, self.sep2]
+            if system_prompt == "" or system_prompt is None:
+                ret = ""
+            else:
+                ret = system_prompt + seps[0]
+            for i, (role, message) in enumerate(self.messages):
+                if message:
+                    ret += role + ": " + message + seps[i % 2]
+                else:
+                    ret += role + ":"
+            return ret
+        elif self.sep_style == SeparatorStyle.LLAMA2:
+            seps = [self.sep, self.sep2]
+            if self.system_message:
+                ret = system_prompt
+            else:
+                ret = "[INST] "
+            for i, (role, message) in enumerate(self.messages):
+                tag = self.roles[i % 2]
+                if message:
+                    if type(message) is tuple:  # multimodal message
+                        message, _ = message
+                    if i == 0:
+                        ret += message + " "
+                    else:
+                        ret += tag + " " + message + seps[i % 2]
+                else:
+                    ret += tag
+            return ret
+        elif self.sep_style == SeparatorStyle.PLAIN:
+            seps = [self.sep, self.sep2]
+            ret = ""
+            for i, (role, message) in enumerate(self.messages):
+                if message:
+                    if type(message) is tuple:
+                        message, _, _ = message
+                    if i % 2 == 0:
+                        ret += message + seps[i % 2]
+                    else:
+                        ret += message + seps[i % 2]
+                else:
+                    ret += ""
+            return ret
+        elif self.sep_style == SeparatorStyle.ALIGNMENT:
+            seps = [self.sep, self.sep2]
+            ret = ""
+            for i, (role, message) in enumerate(self.messages):
+                if message:
+                    if type(message) is tuple:
+                        message, _, _ = message
+                    if i % 2 == 0:
+                        ret += "<image>\n" + seps[i % 2]
+                    else:
+                        ret += message + seps[i % 2]
+                else:
+                    ret += ""
+            return ret
+        else:
+            raise ValueError(f"Invalid style: {self.sep_style}")
+    def get_prompt_for_current_round(self, content=None):
+        """Get current round formatted question prompt during sft training"""
+        if self.sep_style == SeparatorStyle.PLAIN:
+            formatted_question = "<image>\n"
+        elif self.sep_style == SeparatorStyle.DeepSeek:
+            formatted_question = (
+                f"{self.roles[0]}: " + content.strip() + self.sep + f"{self.roles[1]}:"
+            )
+        else:
+            raise ValueError(f"Unsupported sep_style: {self.sep_style}")
+        return formatted_question
+    def set_system_message(self, system_message: str):
+        """Set the system message."""
+        self.system_message = system_message
+    def append_message(self, role: str, message: str):
+        """Append a new message."""
+        self.messages.append([role, message])
+    def reset_message(self):
+        """Reset a new message."""
+        self.messages = []
+    def update_last_message(self, message: str):
+        """Update the last output.
+        The last message is typically set to be None when constructing the prompt,
+        so we need to update it in-place after getting the response from a model.
+        """
+        self.messages[-1][1] = message
+    def to_gradio_chatbot(self):
+        """Convert the conversation to gradio chatbot format."""
+        ret = []
+        for i, (role, msg) in enumerate(self.messages[self.offset :]):
+            if i % 2 == 0:
+                ret.append([msg, None])
+            else:
+                ret[-1][-1] = msg
+        return ret
+    def to_openai_api_messages(self):
+        """Convert the conversation to OpenAI chat completion format."""
+        system_prompt = self.system_template.format(system_message=self.system_message)
+        ret = [{"role": "system", "content": system_prompt}]
+        for i, (_, msg) in enumerate(self.messages[self.offset :]):
+            if i % 2 == 0:
+                ret.append({"role": "user", "content": msg})
+            else:
+                if msg is not None:
+                    ret.append({"role": "assistant", "content": msg})
+        return ret
+    def copy(self):
+        return Conversation(
+            name=self.name,
+            system_template=self.system_template,
+            system_message=self.system_message,
+            roles=self.roles,
+            messages=[[x, y] for x, y in self.messages],
+            offset=self.offset,
+            sep_style=self.sep_style,
+            sep=self.sep,
+            sep2=self.sep2,
+            stop_str=self.stop_str,
+            stop_token_ids=self.stop_token_ids,
+        )
+    def dict(self):
+        return {
+            "template_name": self.name,
+            "system_message": self.system_message,
+            "roles": self.roles,
+            "messages": self.messages,
+            "offset": self.offset,
+        }
+# A global registry for all conversation templates
+conv_templates: Dict[str, Conversation] = {}
+def register_conv_template(template: Conversation, override: bool = False):
+    """Register a new conversation template."""
+    if not override:
+        assert (
+            template.name not in conv_templates
+        ), f"{template.name} has been registered."
+    conv_templates[template.name] = template
+def get_conv_template(name: str) -> Conversation:
+    """Get a conversation template."""
+    return conv_templates[name].copy()
+# llava_llama2 template
+register_conv_template(
+    Conversation(
+        name="llava_llama2",
+        system_message="You are a helpful language and vision assistant. "
+        "You are able to understand the visual content that the user provides, "
+        "and assist the user with a variety of tasks using natural language.",
+        system_template="[INST] <<SYS>>\n{system_message}\n<</SYS>>\n\n",
+        roles=("[INST]", "[/INST]"),
+        messages=(),
+        offset=0,
+        sep_style=SeparatorStyle.LLAMA2,
+        sep=" ",
+        sep2=" </s><s>",
+        stop_token_ids=[2],
+    )
+)
+# llama2 template
+# reference: https://github.com/facebookresearch/llama/blob/cfc3fc8c1968d390eb830e65c63865e980873a06/llama/generation.py#L212
+register_conv_template(
+    Conversation(
+        name="llama-2",
+        system_template="[INST] <<SYS>>\n{system_message}\n<</SYS>>\n\n",
+        roles=("[INST]", "[/INST]"),
+        messages=(),
+        offset=0,
+        sep_style=SeparatorStyle.LLAMA2,
+        sep=" ",
+        sep2=" </s><s>",
+        stop_token_ids=[2],
+    )
+)
+# deepseek template
+register_conv_template(
+    Conversation(
+        name="deepseek",
+        system_template="{system_message}",
+        # system_message="You are a helpful assistant. Please answer truthfully and write out your "
+        # "thinking step by step to be sure you get the right answer.",
+        system_message="",
+        roles=("User", "Assistant"),
+        messages=(),
+        offset=0,
+        sep_style=SeparatorStyle.DeepSeek,
+        sep="\n\n",
+        sep2="<｜end▁of▁sentence｜>",
+        stop_token_ids=[100001],
+        stop_str=["User:", "<｜end▁of▁sentence｜>"],
+    )
+)
+register_conv_template(
+    Conversation(
+        name="plain",
+        system_template="",
+        system_message="",
+        roles=("", ""),
+        messages=(),
+        offset=0,
+        sep_style=SeparatorStyle.PLAIN,
+        sep="",
+        sep2="",
+        stop_token_ids=[2],
+        stop_str=["</s>"],
+    )
+)
+register_conv_template(
+    Conversation(
+        name="alignment",
+        system_template="",
+        system_message="",
+        roles=("", ""),
+        messages=(),
+        offset=0,
+        sep_style=SeparatorStyle.ALIGNMENT,
+        sep="",
+        sep2="",
+        stop_token_ids=[2],
+        stop_str=["</s>"],
+    )
+)
+if __name__ == "__main__":
+    # print("Llama-2 template:")
+    # conv = get_conv_template("llama-2")
+    # conv.set_system_message("You are a helpful, respectful and honest assistant.")
+    # conv.append_message(conv.roles[0], "Hello!")
+    # conv.append_message(conv.roles[1], "Hi!")
+    # conv.append_message(conv.roles[0], "How are you?")
+    # conv.append_message(conv.roles[1], None)
+    # print(conv.get_prompt())
+    # print("\n")
+    print("deepseek template:")
+    conv = get_conv_template("deepseek")
+    conv.append_message(conv.roles[0], "Hello!")
+    conv.append_message(conv.roles[1], "Hi! This is Tony.")
+    conv.append_message(conv.roles[0], "Who are you?")
+    conv.append_message(conv.roles[1], "I am a helpful assistant.")
+    conv.append_message(conv.roles[0], "How are you?")
+    conv.append_message(conv.roles[1], None)
+    print(conv.get_prompt())

xinference/thirdparty/deepseek_vl/utils/io.py ADDED Viewed

@@ -0,0 +1,78 @@
+# Copyright (c) 2023-2024 DeepSeek.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of
+# this software and associated documentation files (the "Software"), to deal in
+# the Software without restriction, including without limitation the rights to
+# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+# the Software, and to permit persons to whom the Software is furnished to do so,
+# subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+import json
+from typing import Dict, List
+import PIL.Image
+import torch
+from transformers import AutoModelForCausalLM
+from ..models import MultiModalityCausalLM, VLChatProcessor
+def load_pretrained_model(model_path: str):
+    vl_chat_processor: VLChatProcessor = VLChatProcessor.from_pretrained(model_path)
+    tokenizer = vl_chat_processor.tokenizer
+    vl_gpt: MultiModalityCausalLM = AutoModelForCausalLM.from_pretrained(
+        model_path, trust_remote_code=True
+    )
+    vl_gpt = vl_gpt.to(torch.bfloat16).cuda().eval()
+    return tokenizer, vl_chat_processor, vl_gpt
+def load_pil_images(conversations: List[Dict[str, str]]) -> List[PIL.Image.Image]:
+    """
+    Args:
+        conversations (List[Dict[str, str]]): the conversations with a list of messages. An example is :
+            [
+                {
+                    "role": "User",
+                    "content": "<image_placeholder>\nExtract all information from this image and convert them into markdown format.",
+                    "images": ["./examples/table_datasets.png"]
+                },
+                {"role": "Assistant", "content": ""},
+            ]
+    Returns:
+        pil_images (List[PIL.Image.Image]): the list of PIL images.
+    """
+    pil_images = []
+    for message in conversations:
+        if "images" not in message:
+            continue
+        for image_path in message["images"]:
+            pil_img = PIL.Image.open(image_path)
+            pil_img = pil_img.convert("RGB")
+            pil_images.append(pil_img)
+    return pil_images
+def load_json(filepath):
+    with open(filepath, "r") as f:
+        data = json.load(f)
+        return data

xinference/thirdparty/omnilmm/__init__.py ADDED Viewed

File without changes

xinference/thirdparty/omnilmm/chat.py ADDED Viewed

@@ -0,0 +1,216 @@
+import base64
+import io
+import json
+import os
+import torch
+from accelerate import init_empty_weights, load_checkpoint_and_dispatch
+from PIL import Image
+from transformers import AutoModel, AutoTokenizer
+from .model.omnilmm import OmniLMMForCausalLM
+from .model.utils import build_transform
+from .train.train_utils import omni_preprocess
+from .utils import disable_torch_init
+DEFAULT_IMAGE_TOKEN = "<image>"
+DEFAULT_IMAGE_PATCH_TOKEN = "<im_patch>"
+DEFAULT_IM_START_TOKEN = "<im_start>"
+DEFAULT_IM_END_TOKEN = "<im_end>"
+def init_omni_lmm(model_path, device_map):
+    torch.backends.cuda.matmul.allow_tf32 = True
+    disable_torch_init()
+    model_name = os.path.expanduser(model_path)
+    print(f"Load omni_lmm model and tokenizer from {model_name}")
+    tokenizer = AutoTokenizer.from_pretrained(model_name, model_max_length=2048)
+    if False:
+        # model on multiple devices for small size gpu memory (Nvidia 3090 24G x2)
+        with init_empty_weights():
+            model = OmniLMMForCausalLM.from_pretrained(
+                model_name, tune_clip=True, torch_dtype=torch.bfloat16
+            )
+        model = load_checkpoint_and_dispatch(
+            model,
+            model_name,
+            dtype=torch.bfloat16,
+            device_map="auto",
+            no_split_module_classes=[
+                "Eva",
+                "MistralDecoderLayer",
+                "ModuleList",
+                "Resampler",
+            ],
+        )
+    else:
+        model = OmniLMMForCausalLM.from_pretrained(
+            model_name,
+            tune_clip=True,
+            torch_dtype=torch.bfloat16,
+            device_map=device_map,
+        ).to(dtype=torch.bfloat16)
+    image_processor = build_transform(
+        is_train=False, input_size=model.model.config.image_size, std_mode="OPENAI_CLIP"
+    )
+    mm_use_im_start_end = getattr(model.config, "mm_use_im_start_end", False)
+    assert mm_use_im_start_end
+    tokenizer.add_tokens(
+        [DEFAULT_IMAGE_PATCH_TOKEN, DEFAULT_IM_START_TOKEN, DEFAULT_IM_END_TOKEN],
+        special_tokens=True,
+    )
+    vision_config = model.model.vision_config
+    vision_config.im_patch_token = tokenizer.convert_tokens_to_ids(
+        [DEFAULT_IMAGE_PATCH_TOKEN]
+    )[0]
+    vision_config.use_im_start_end = mm_use_im_start_end
+    (
+        vision_config.im_start_token,
+        vision_config.im_end_token,
+    ) = tokenizer.convert_tokens_to_ids([DEFAULT_IM_START_TOKEN, DEFAULT_IM_END_TOKEN])
+    image_token_len = model.model.config.num_query
+    return model, image_processor, image_token_len, tokenizer
+def expand_question_into_multimodal(
+    question_text, image_token_len, im_st_token, im_ed_token, im_patch_token
+):
+    if "<image>" in question_text[0]["content"]:
+        question_text[0]["content"] = question_text[0]["content"].replace(
+            "<image>", im_st_token + im_patch_token * image_token_len + im_ed_token
+        )
+    else:
+        question_text[0]["content"] = (
+            im_st_token
+            + im_patch_token * image_token_len
+            + im_ed_token
+            + "\n"
+            + question_text[0]["content"]
+        )
+    return question_text
+def wrap_question_for_omni_lmm(question, image_token_len, tokenizer):
+    question = expand_question_into_multimodal(
+        question,
+        image_token_len,
+        DEFAULT_IM_START_TOKEN,
+        DEFAULT_IM_END_TOKEN,
+        DEFAULT_IMAGE_PATCH_TOKEN,
+    )
+    conversation = question
+    data_dict = omni_preprocess(
+        sources=[conversation], tokenizer=tokenizer, generation=True
+    )
+    data_dict = dict(input_ids=data_dict["input_ids"][0], labels=data_dict["labels"][0])
+    return data_dict
+class OmniLMM12B:
+    def __init__(self, model_path, device_map) -> None:
+        model, img_processor, image_token_len, tokenizer = init_omni_lmm(
+            model_path, device_map
+        )
+        self.model = model
+        self.image_token_len = image_token_len
+        self.image_transform = img_processor
+        self.tokenizer = tokenizer
+        self.model.eval()
+    def decode(self, image, input_ids):
+        with torch.inference_mode():
+            output = self.model.generate_vllm(
+                input_ids=input_ids.unsqueeze(0).cuda(),
+                images=image.unsqueeze(0).half().cuda(),
+                temperature=0.6,
+                max_new_tokens=1024,
+                # num_beams=num_beams,
+                do_sample=True,
+                output_scores=True,
+                return_dict_in_generate=True,
+                repetition_penalty=1.1,
+                top_k=30,
+                top_p=0.9,
+            )
+            response = self.tokenizer.decode(
+                output.sequences[0], skip_special_tokens=True
+            )
+            response = response.strip()
+            return response
+    def chat(self, input):
+        try:
+            image = Image.open(io.BytesIO(base64.b64decode(input["image"]))).convert(
+                "RGB"
+            )
+        except Exception as e:
+            return f"Image decode error: {e}"
+        msgs = json.loads(input["question"])
+        input_ids = wrap_question_for_omni_lmm(
+            msgs, self.image_token_len, self.tokenizer
+        )["input_ids"]
+        input_ids = torch.as_tensor(input_ids)
+        # print('input_ids', input_ids)
+        image = self.image_transform(image)
+        out = self.decode(image, input_ids)
+        return out
+def img2base64(file_name):
+    with open(file_name, "rb") as f:
+        encoded_string = base64.b64encode(f.read())
+        return encoded_string
+class OmniLMM3B:
+    def __init__(self, model_path, device_map) -> None:
+        self.model = AutoModel.from_pretrained(
+            model_path, trust_remote_code=True, device_map=device_map
+        ).to(dtype=torch.bfloat16)
+        self.tokenizer = AutoTokenizer.from_pretrained(
+            model_path, trust_remote_code=True
+        )
+        self.model.eval().cuda()
+    def chat(self, input):
+        try:
+            image = Image.open(io.BytesIO(base64.b64decode(input["image"]))).convert(
+                "RGB"
+            )
+        except Exception as e:
+            return f"Image decode error: {e}"
+        msgs = json.loads(input["question"])
+        answer, context, _ = self.model.chat(
+            image=image,
+            msgs=msgs,
+            context=None,
+            tokenizer=self.tokenizer,
+            sampling=True,
+            temperature=0.7,
+        )
+        return answer
+class OmniLMMChat:
+    def __init__(self, model_path, device_map) -> None:
+        if "12B" in model_path:
+            self.model = OmniLMM12B(model_path, device_map)
+        else:
+            self.model = OmniLMM3B(model_path, device_map)
+    def chat(self, input):
+        return self.model.chat(input)

xinference/thirdparty/omnilmm/constants.py ADDED Viewed

@@ -0,0 +1,4 @@
+CONTROLLER_HEART_BEAT_EXPIRATION = 30
+WORKER_HEART_BEAT_INTERVAL = 15
+LOGDIR = "."

xinference 0.9.4__py3-none-any.whl → 0.10.0__py3-none-any.whl

Potentially problematic release.

xinference 0.9.4py3-none-any.whl → 0.10.0py3-none-any.whl