PyPI - xinference - Versions diffs - 0.12.3__py3-none-any.whl → 0.13.0__py3-none-any.whl - Mend

xinference 0.12.3py3-none-any.whl → 0.13.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (71) hide show

xinference/thirdparty/deepseek_vl/serve/app_deepseek.py ADDED Viewed

@@ -0,0 +1,510 @@
+# Copyright (c) 2023-2024 DeepSeek.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of
+# this software and associated documentation files (the "Software"), to deal in
+# the Software without restriction, including without limitation the rights to
+# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+# the Software, and to permit persons to whom the Software is furnished to do so,
+# subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+# -*- coding:utf-8 -*-
+import base64
+from io import BytesIO
+import gradio as gr
+import torch
+from app_modules.gradio_utils import (
+    cancel_outputing,
+    delete_last_conversation,
+    reset_state,
+    reset_textbox,
+    transfer_input,
+    wrap_gen_fn,
+)
+from app_modules.overwrites import reload_javascript
+from app_modules.presets import CONCURRENT_COUNT, description, description_top, title
+from app_modules.utils import configure_logger, is_variable_assigned, strip_stop_words
+from ..utils.conversation import SeparatorStyle
+from .inference import convert_conversation_to_prompts, deepseek_generate, load_model
+def load_models():
+    models = {
+        "DeepSeek-VL 7B": "deepseek-ai/deepseek-vl-7b-chat",
+    }
+    for model_name in models:
+        models[model_name] = load_model(models[model_name])
+    return models
+logger = configure_logger()
+models = load_models()
+MODELS = sorted(list(models.keys()))
+def generate_prompt_with_history(
+    text, image, history, vl_chat_processor, tokenizer, max_length=2048
+):
+    """
+    Generate a prompt with history for the deepseek application.
+    Args:
+        text (str): The text prompt.
+        image (str): The image prompt.
+        history (list): List of previous conversation messages.
+        tokenizer: The tokenizer used for encoding the prompt.
+        max_length (int): The maximum length of the prompt.
+    Returns:
+        tuple: A tuple containing the generated prompt, image list, conversation, and conversation copy. If the prompt could not be generated within the max_length limit, returns None.
+    """
+    sft_format = "deepseek"
+    user_role_ind = 0
+    bot_role_ind = 1
+    # Initialize conversation
+    conversation = vl_chat_processor.new_chat_template()
+    if history:
+        conversation.messages = history
+    if image is not None:
+        if "<image_placeholder>" not in text:
+            text = (
+                "<image_placeholder>" + "\n" + text
+            )  # append the <image_placeholder> in a new line after the text prompt
+        text = (text, image)
+    conversation.append_message(conversation.roles[user_role_ind], text)
+    conversation.append_message(conversation.roles[bot_role_ind], "")
+    # Create a copy of the conversation to avoid history truncation in the UI
+    conversation_copy = conversation.copy()
+    logger.info("=" * 80)
+    logger.info(get_prompt(conversation))
+    rounds = len(conversation.messages) // 2
+    for _ in range(rounds):
+        current_prompt = get_prompt(conversation)
+        current_prompt = (
+            current_prompt.replace("</s>", "")
+            if sft_format == "deepseek"
+            else current_prompt
+        )
+        if torch.tensor(tokenizer.encode(current_prompt)).size(-1) <= max_length:
+            return conversation_copy
+        if len(conversation.messages) % 2 != 0:
+            gr.Error("The messages between user and assistant are not paired.")
+            return
+        try:
+            for _ in range(2):  # pop out two messages in a row
+                conversation.messages.pop(0)
+        except IndexError:
+            gr.Error("Input text processing failed, unable to respond in this round.")
+            return None
+    gr.Error("Prompt could not be generated within max_length limit.")
+    return None
+def to_gradio_chatbot(conv):
+    """Convert the conversation to gradio chatbot format."""
+    ret = []
+    for i, (role, msg) in enumerate(conv.messages[conv.offset :]):
+        if i % 2 == 0:
+            if type(msg) is tuple:
+                msg, image = msg
+                if isinstance(image, str):
+                    with open(image, "rb") as f:
+                        data = f.read()
+                    img_b64_str = base64.b64encode(data).decode()
+                    image_str = f'<video src="data:video/mp4;base64,{img_b64_str}" controls width="426" height="240"></video>'
+                    msg = msg.replace("\n".join(["<image_placeholder>"] * 4), image_str)
+                else:
+                    max_hw, min_hw = max(image.size), min(image.size)
+                    aspect_ratio = max_hw / min_hw
+                    max_len, min_len = 800, 400
+                    shortest_edge = int(min(max_len / aspect_ratio, min_len, min_hw))
+                    longest_edge = int(shortest_edge * aspect_ratio)
+                    W, H = image.size
+                    if H > W:
+                        H, W = longest_edge, shortest_edge
+                    else:
+                        H, W = shortest_edge, longest_edge
+                    image = image.resize((W, H))
+                    buffered = BytesIO()
+                    image.save(buffered, format="JPEG")
+                    img_b64_str = base64.b64encode(buffered.getvalue()).decode()
+                    img_str = f'<img src="data:image/png;base64,{img_b64_str}" alt="user upload image" />'
+                    msg = msg.replace("<image_placeholder>", img_str)
+            ret.append([msg, None])
+        else:
+            ret[-1][-1] = msg
+    return ret
+def to_gradio_history(conv):
+    """Convert the conversation to gradio history state."""
+    return conv.messages[conv.offset :]
+def get_prompt(conv) -> str:
+    """Get the prompt for generation."""
+    system_prompt = conv.system_template.format(system_message=conv.system_message)
+    if conv.sep_style == SeparatorStyle.DeepSeek:
+        seps = [conv.sep, conv.sep2]
+        if system_prompt == "" or system_prompt is None:
+            ret = ""
+        else:
+            ret = system_prompt + seps[0]
+        for i, (role, message) in enumerate(conv.messages):
+            if message:
+                if type(message) is tuple:  # multimodal message
+                    message, _ = message
+                ret += role + ": " + message + seps[i % 2]
+            else:
+                ret += role + ":"
+        return ret
+    else:
+        return conv.get_prompt
+@wrap_gen_fn
+def predict(
+    text,
+    image,
+    chatbot,
+    history,
+    top_p,
+    temperature,
+    repetition_penalty,
+    max_length_tokens,
+    max_context_length_tokens,
+    model_select_dropdown,
+):
+    """
+    Function to predict the response based on the user's input and selected model.
+    Parameters:
+    user_text (str): The input text from the user.
+    user_image (str): The input image from the user.
+    chatbot (str): The chatbot's name.
+    history (str): The history of the chat.
+    top_p (float): The top-p parameter for the model.
+    temperature (float): The temperature parameter for the model.
+    max_length_tokens (int): The maximum length of tokens for the model.
+    max_context_length_tokens (int): The maximum length of context tokens for the model.
+    model_select_dropdown (str): The selected model from the dropdown.
+    Returns:
+    generator: A generator that yields the chatbot outputs, history, and status.
+    """
+    print("running the prediction function")
+    try:
+        tokenizer, vl_gpt, vl_chat_processor = models[model_select_dropdown]
+        if text == "":
+            yield chatbot, history, "Empty context."
+            return
+    except KeyError:
+        yield [[text, "No Model Found"]], [], "No Model Found"
+        return
+    conversation = generate_prompt_with_history(
+        text,
+        image,
+        history,
+        vl_chat_processor,
+        tokenizer,
+        max_length=max_context_length_tokens,
+    )
+    prompts = convert_conversation_to_prompts(conversation)
+    stop_words = conversation.stop_str
+    gradio_chatbot_output = to_gradio_chatbot(conversation)
+    full_response = ""
+    with torch.no_grad():
+        for x in deepseek_generate(
+            prompts=prompts,
+            vl_gpt=vl_gpt,
+            vl_chat_processor=vl_chat_processor,
+            tokenizer=tokenizer,
+            stop_words=stop_words,
+            max_length=max_length_tokens,
+            temperature=temperature,
+            repetition_penalty=repetition_penalty,
+            top_p=top_p,
+        ):
+            full_response += x
+            response = strip_stop_words(full_response, stop_words)
+            conversation.update_last_message(response)
+            gradio_chatbot_output[-1][1] = response
+            yield gradio_chatbot_output, to_gradio_history(
+                conversation
+            ), "Generating..."
+    print("flushed result to gradio")
+    torch.cuda.empty_cache()
+    if is_variable_assigned("x"):
+        print(f"{model_select_dropdown}:\n{text}\n{'-' * 80}\n{x}\n{'=' * 80}")
+        print(
+            f"temperature: {temperature}, top_p: {top_p}, repetition_penalty: {repetition_penalty}, max_length_tokens: {max_length_tokens}"
+        )
+    yield gradio_chatbot_output, to_gradio_history(conversation), "Generate: Success"
+def retry(
+    text,
+    image,
+    chatbot,
+    history,
+    top_p,
+    temperature,
+    repetition_penalty,
+    max_length_tokens,
+    max_context_length_tokens,
+    model_select_dropdown,
+):
+    if len(history) == 0:
+        yield (chatbot, history, "Empty context")
+        return
+    chatbot.pop()
+    history.pop()
+    text = history.pop()[-1]
+    if type(text) is tuple:
+        text, image = text
+    yield from predict(
+        text,
+        image,
+        chatbot,
+        history,
+        top_p,
+        temperature,
+        repetition_penalty,
+        max_length_tokens,
+        max_context_length_tokens,
+        model_select_dropdown,
+    )
+def build_demo(MODELS):
+    with open("deepseek_vl/serve/assets/custom.css", "r", encoding="utf-8") as f:
+        customCSS = f.read()
+    with gr.Blocks(theme=gr.themes.Soft()) as demo:
+        history = gr.State([])
+        input_text = gr.State()
+        input_image = gr.State()
+        with gr.Row():
+            gr.HTML(title)
+            status_display = gr.Markdown("Success", elem_id="status_display")
+        gr.Markdown(description_top)
+        with gr.Row(equal_height=True):
+            with gr.Column(scale=4):
+                with gr.Row():
+                    chatbot = gr.Chatbot(
+                        elem_id="deepseek_chatbot",
+                        show_share_button=True,
+                        likeable=True,
+                        bubble_full_width=False,
+                        height=600,
+                    )
+                with gr.Row():
+                    with gr.Column(scale=4):
+                        text_box = gr.Textbox(
+                            show_label=False, placeholder="Enter text", container=False
+                        )
+                    with gr.Column(
+                        min_width=70,
+                    ):
+                        submitBtn = gr.Button("Send")
+                    with gr.Column(
+                        min_width=70,
+                    ):
+                        cancelBtn = gr.Button("Stop")
+                with gr.Row():
+                    emptyBtn = gr.Button(
+                        "🧹 New Conversation",
+                    )
+                    retryBtn = gr.Button("🔄 Regenerate")
+                    delLastBtn = gr.Button("🗑️ Remove Last Turn")
+            with gr.Column():
+                image_box = gr.Image(type="pil")
+                with gr.Tab(label="Parameter Setting") as parameter_row:
+                    top_p = gr.Slider(
+                        minimum=-0,
+                        maximum=1.0,
+                        value=0.95,
+                        step=0.05,
+                        interactive=True,
+                        label="Top-p",
+                    )
+                    temperature = gr.Slider(
+                        minimum=0,
+                        maximum=1.0,
+                        value=0.1,
+                        step=0.1,
+                        interactive=True,
+                        label="Temperature",
+                    )
+                    repetition_penalty = gr.Slider(
+                        minimum=0.0,
+                        maximum=2.0,
+                        value=1.1,
+                        step=0.1,
+                        interactive=True,
+                        label="Repetition penalty",
+                    )
+                    max_length_tokens = gr.Slider(
+                        minimum=0,
+                        maximum=4096,
+                        value=2048,
+                        step=8,
+                        interactive=True,
+                        label="Max Generation Tokens",
+                    )
+                    max_context_length_tokens = gr.Slider(
+                        minimum=0,
+                        maximum=4096,
+                        value=4096,
+                        step=128,
+                        interactive=True,
+                        label="Max History Tokens",
+                    )
+                    model_select_dropdown = gr.Dropdown(
+                        label="Select Models",
+                        choices=MODELS,
+                        multiselect=False,
+                        value=MODELS[0],
+                        interactive=True,
+                    )
+        examples_list = [
+            [
+                "deepseek_vl/serve/examples/rap.jpeg",
+                "Can you write me a master rap song that rhymes very well based on this image?",
+            ],
+            [
+                "deepseek_vl/serve/examples/app.png",
+                "What is this app about?",
+            ],
+            [
+                "deepseek_vl/serve/examples/pipeline.png",
+                "Help me write a python code based on the image.",
+            ],
+            [
+                "deepseek_vl/serve/examples/chart.png",
+                "Could you help me to re-draw this picture with python codes?",
+            ],
+            [
+                "deepseek_vl/serve/examples/mirror.png",
+                "How many people are there in the image. Why?",
+            ],
+            [
+                "deepseek_vl/serve/examples/puzzle.png",
+                "Can this 2 pieces combine together?",
+            ],
+        ]
+        gr.Examples(examples=examples_list, inputs=[image_box, text_box])
+        gr.Markdown(description)
+        input_widgets = [
+            input_text,
+            input_image,
+            chatbot,
+            history,
+            top_p,
+            temperature,
+            repetition_penalty,
+            max_length_tokens,
+            max_context_length_tokens,
+            model_select_dropdown,
+        ]
+        output_widgets = [chatbot, history, status_display]
+        transfer_input_args = dict(
+            fn=transfer_input,
+            inputs=[text_box, image_box],
+            outputs=[input_text, input_image, text_box, image_box, submitBtn],
+            show_progress=True,
+        )
+        predict_args = dict(
+            fn=predict,
+            inputs=input_widgets,
+            outputs=output_widgets,
+            show_progress=True,
+        )
+        retry_args = dict(
+            fn=retry,
+            inputs=input_widgets,
+            outputs=output_widgets,
+            show_progress=True,
+        )
+        reset_args = dict(
+            fn=reset_textbox, inputs=[], outputs=[text_box, status_display]
+        )
+        predict_events = [
+            text_box.submit(**transfer_input_args).then(**predict_args),
+            submitBtn.click(**transfer_input_args).then(**predict_args),
+        ]
+        emptyBtn.click(reset_state, outputs=output_widgets, show_progress=True)
+        emptyBtn.click(**reset_args)
+        retryBtn.click(**retry_args)
+        delLastBtn.click(
+            delete_last_conversation,
+            [chatbot, history],
+            output_widgets,
+            show_progress=True,
+        )
+        cancelBtn.click(cancel_outputing, [], [status_display], cancels=predict_events)
+    return demo
+if __name__ == "__main__":
+    demo = build_demo(MODELS)
+    demo.title = "DeepSeek-VL Chatbot"
+    reload_javascript()
+    demo.queue(concurrency_count=CONCURRENT_COUNT).launch(
+        share=False,
+        favicon_path="deepseek_vl/serve/assets/favicon.ico",
+        inbrowser=False,
+        server_name="0.0.0.0",
+        server_port=8122,
+    )

xinference/thirdparty/deepseek_vl/serve/app_modules/__init__.py ADDED Viewed

@@ -0,0 +1,13 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

xinference/thirdparty/deepseek_vl/serve/app_modules/gradio_utils.py ADDED Viewed

@@ -0,0 +1,94 @@
+# Copyright (c) 2023-2024 DeepSeek.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of
+# this software and associated documentation files (the "Software"), to deal in
+# the Software without restriction, including without limitation the rights to
+# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+# the Software, and to permit persons to whom the Software is furnished to do so,
+# subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+from functools import wraps
+import gradio as gr
+def wrap_gen_fn(gen_fn):
+    @wraps(gen_fn)
+    def wrapped_gen_fn(prompt, *args, **kwargs):
+        try:
+            yield from gen_fn(prompt, *args, **kwargs)
+        except gr.Error as g_err:
+            raise g_err
+        except Exception as e:
+            raise gr.Error(f"Failed to generate text: {e}") from e
+    return wrapped_gen_fn
+def delete_last_conversation(chatbot, history):
+    if len(history) % 2 != 0:
+        gr.Error("history length is not even")
+        return (
+            chatbot,
+            history,
+            "Delete Done",
+        )
+    if len(chatbot) > 0:
+        chatbot.pop()
+    if len(history) > 0 and len(history) % 2 == 0:
+        history.pop()
+        history.pop()
+    return (
+        chatbot,
+        history,
+        "Delete Done",
+    )
+def reset_state():
+    return [], [], None, "Reset Done"
+def reset_textbox():
+    return gr.update(value=""), ""
+def cancel_outputing():
+    return "Stop Done"
+def transfer_input(input_text, input_image):
+    print("transferring input text and input image")
+    return (
+        input_text,
+        input_image,
+        gr.update(value=""),
+        gr.update(value=None),
+        gr.Button(visible=True),
+    )
+class State:
+    interrupted = False
+    def interrupt(self):
+        self.interrupted = True
+    def recover(self):
+        self.interrupted = False
+shared_state = State()

xinference/thirdparty/deepseek_vl/serve/app_modules/overwrites.py ADDED Viewed

@@ -0,0 +1,81 @@
+# Copyright (c) 2023-2024 DeepSeek.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of
+# this software and associated documentation files (the "Software"), to deal in
+# the Software without restriction, including without limitation the rights to
+# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+# the Software, and to permit persons to whom the Software is furnished to do so,
+# subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+from __future__ import annotations
+import logging
+from typing import List, Tuple
+from .presets import gr
+from .utils import convert_asis, convert_mdtext, detect_converted_mark
+def compact_text_chunks(self, prompt, text_chunks: List[str]) -> List[str]:
+    logging.debug("Compacting text chunks...🚀🚀🚀")
+    combined_str = [c.strip() for c in text_chunks if c.strip()]
+    combined_str = [f"[{index+1}] {c}" for index, c in enumerate(combined_str)]
+    combined_str = "\n\n".join(combined_str)
+    # resplit based on self.max_chunk_overlap
+    text_splitter = self.get_text_splitter_given_prompt(prompt, 1, padding=1)
+    return text_splitter.split_text(combined_str)
+def postprocess(
+    self, y: List[Tuple[str | None, str | None]]
+) -> List[Tuple[str | None, str | None]]:
+    """
+    Parameters:
+        y: List of tuples representing the message and response pairs. Each message and response should be a string, which may be in Markdown format.
+    Returns:
+        List of tuples representing the message and response. Each message and response will be a string of HTML.
+    """
+    if y is None or y == []:
+        return []
+    temp = []
+    for x in y:
+        user, bot = x
+        if not detect_converted_mark(user):
+            user = convert_asis(user)
+        if not detect_converted_mark(bot):
+            bot = convert_mdtext(bot)
+        temp.append((user, bot))
+    return temp
+with open("deepseek_vl/serve/assets/custom.js", "r", encoding="utf-8") as f, open(
+    "deepseek_vl/serve/assets/Kelpy-Codos.js", "r", encoding="utf-8"
+) as f2:
+    customJS = f.read()
+    kelpyCodos = f2.read()
+def reload_javascript():
+    print("Reloading javascript...")
+    js = f"<script>{customJS}</script><script>{kelpyCodos}</script>"
+    def template_response(*args, **kwargs):
+        res = GradioTemplateResponseOriginal(*args, **kwargs)
+        res.body = res.body.replace(b"</html>", f"{js}</html>".encode("utf8"))
+        res.init_headers()
+        return res
+    gr.routes.templates.TemplateResponse = template_response
+GradioTemplateResponseOriginal = gr.routes.templates.TemplateResponse

xinference 0.12.3__py3-none-any.whl → 0.13.0__py3-none-any.whl

Potentially problematic release.

xinference 0.12.3py3-none-any.whl → 0.13.0py3-none-any.whl