PyPI - xinference - Versions diffs - 0.7.5__py3-none-any.whl → 0.8.1__py3-none-any.whl - Mend - Supply Chain Defender

xinference 0.7.5py3-none-any.whl → 0.8.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (120) hide show

xinference/core/chat_interface.py CHANGED Viewed

@@ -12,11 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import base64
 import logging
 import os
-from typing import Generator, List
+from io import BytesIO
+from typing import Generator, List, Optional
 import gradio as gr
+import PIL.Image
 from gradio.components import Markdown, Textbox
 from gradio.layouts import Accordion, Column, Row
@@ -24,39 +27,48 @@ from ..client.restful.restful_client import (
     RESTfulChatglmCppChatModelHandle,
     RESTfulChatModelHandle,
     RESTfulGenerateModelHandle,
+    RESTfulMultimodalModelHandle,
 )
 from ..types import ChatCompletionMessage
 logger = logging.getLogger(__name__)
-class LLMInterface:
+class GradioInterface:
     def __init__(
         self,
         endpoint: str,
         model_uid: str,
         model_name: str,
         model_size_in_billions: int,
+        model_type: str,
         model_format: str,
         quantization: str,
         context_length: int,
         model_ability: List[str],
         model_description: str,
         model_lang: List[str],
+        access_token: Optional[str],
     ):
         self.endpoint = endpoint
         self.model_uid = model_uid
         self.model_name = model_name
         self.model_size_in_billions = model_size_in_billions
+        self.model_type = model_type
         self.model_format = model_format
         self.quantization = quantization
         self.context_length = context_length
         self.model_ability = model_ability
         self.model_description = model_description
         self.model_lang = model_lang
+        self._access_token = (
+            access_token.replace("Bearer ", "") if access_token is not None else None
+        )
     def build(self) -> "gr.Blocks":
-        if "chat" in self.model_ability:
+        if self.model_type == "multimodal":
+            interface = self.build_chat_vl_interface()
+        elif "chat" in self.model_ability:
             interface = self.build_chat_interface()
         else:
             interface = self.build_generate_interface()
@@ -102,6 +114,7 @@ class LLMInterface:
             from ..client import RESTfulClient
             client = RESTfulClient(self.endpoint)
+            client._set_token(self._access_token)
             model = client.get_model(self.model_uid)
             assert isinstance(
                 model, (RESTfulChatModelHandle, RESTfulChatglmCppChatModelHandle)
@@ -168,6 +181,131 @@ class LLMInterface:
             analytics_enabled=False,
         )
+    def build_chat_vl_interface(
+        self,
+    ) -> "gr.Blocks":
+        def predict(history, bot):
+            logger.debug("Predict model: %s, history: %s", self.model_uid, history)
+            from ..client import RESTfulClient
+            client = RESTfulClient(self.endpoint)
+            client._set_token(self._access_token)
+            model = client.get_model(self.model_uid)
+            assert isinstance(model, RESTfulMultimodalModelHandle)
+            prompt = history[-1]
+            assert prompt["role"] == "user"
+            prompt = prompt["content"]
+            # multimodal chat does not support stream.
+            response = model.chat(prompt=prompt, chat_history=history[:-1])
+            history.append(response["choices"][0]["message"])
+            bot[-1][1] = history[-1]["content"]
+            return history, bot
+        def add_text(history, bot, text, image):
+            logger.debug("Add text, text: %s, image: %s", text, image)
+            if image:
+                buffered = BytesIO()
+                with PIL.Image.open(image) as img:
+                    img.thumbnail((500, 500))
+                    img.save(buffered, format="JPEG")
+                img_b64_str = base64.b64encode(buffered.getvalue()).decode()
+                display_content = f'<img src="data:image/png;base64,{img_b64_str}" alt="user upload image" />\n{text}'
+                message = {
+                    "role": "user",
+                    "content": [
+                        {"type": "text", "text": text},
+                        {"type": "image_url", "image_url": {"url": image}},
+                    ],
+                }
+            else:
+                display_content = text
+                message = {"role": "user", "content": text}
+            history = history + [message]
+            bot = bot + [(display_content, None)]
+            return history, bot, "", None
+        def clear_history():
+            logger.debug("Clear history.")
+            return [], None, "", None
+        def update_button(text):
+            return gr.update(interactive=bool(text))
+        with gr.Blocks(
+            title=f"🚀 Xinference Chat Bot : {self.model_name} 🚀",
+            css="""
+        .center{
+            display: flex;
+            justify-content: center;
+            align-items: center;
+            padding: 0px;
+            color: #9ea4b0 !important;
+        }
+        """,
+            analytics_enabled=False,
+        ) as chat_vl_interface:
+            Markdown(
+                f"""
+                <h1 style='text-align: center; margin-bottom: 1rem'>🚀 Xinference Chat Bot : {self.model_name} 🚀</h1>
+                """
+            )
+            Markdown(
+                f"""
+                <div class="center">
+                Model ID: {self.model_uid}
+                </div>
+                <div class="center">
+                Model Size: {self.model_size_in_billions} Billion Parameters
+                </div>
+                <div class="center">
+                Model Format: {self.model_format}
+                </div>
+                <div class="center">
+                Model Quantization: {self.quantization}
+                </div>
+                """
+            )
+            state = gr.State([])
+            with gr.Row():
+                chatbot = gr.Chatbot(
+                    elem_id="chatbot", label=self.model_name, height=550, scale=7
+                )
+                with gr.Column(scale=3):
+                    imagebox = gr.Image(type="filepath")
+                    textbox = gr.Textbox(
+                        show_label=False,
+                        placeholder="Enter text and press ENTER",
+                        container=False,
+                    )
+                    submit_btn = gr.Button(
+                        value="Send", variant="primary", interactive=False
+                    )
+                    clear_btn = gr.Button(value="Clear")
+            textbox.change(update_button, [textbox], [submit_btn], queue=False)
+            textbox.submit(
+                add_text,
+                [state, chatbot, textbox, imagebox],
+                [state, chatbot, textbox, imagebox],
+                queue=False,
+            ).then(predict, [state, chatbot], [state, chatbot])
+            submit_btn.click(
+                add_text,
+                [state, chatbot, textbox, imagebox],
+                [state, chatbot, textbox, imagebox],
+                queue=False,
+            ).then(predict, [state, chatbot], [state, chatbot])
+            clear_btn.click(
+                clear_history, None, [state, chatbot, textbox, imagebox], queue=False
+            )
+        return chat_vl_interface
     def build_generate_interface(
         self,
     ):
@@ -198,6 +336,7 @@ class LLMInterface:
             from ..client import RESTfulClient
             client = RESTfulClient(self.endpoint)
+            client._set_token(self._access_token)
             model = client.get_model(self.model_uid)
             assert isinstance(model, RESTfulGenerateModelHandle)
@@ -234,6 +373,7 @@ class LLMInterface:
             from ..client import RESTfulClient
             client = RESTfulClient(self.endpoint)
+            client._set_token(self._access_token)
             model = client.get_model(self.model_uid)
             assert isinstance(model, RESTfulGenerateModelHandle)

xinference/core/metrics.py ADDED Viewed

@@ -0,0 +1,83 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import asyncio
+import uvicorn
+from aioprometheus import Counter, Gauge
+from aioprometheus.asgi.starlette import metrics
+from fastapi import FastAPI
+from fastapi.responses import RedirectResponse
+DEFAULT_METRICS_SERVER_LOG_LEVEL = "warning"
+generate_throughput = Gauge(
+    "xinference:generate_tokens_per_s", "Generate throughput in tokens/s."
+)
+# Latency
+time_to_first_token = Gauge(
+    "xinference:time_to_first_token_ms", "First token latency in ms."
+)
+# Tokens counter
+input_tokens_total_counter = Counter(
+    "xinference:input_tokens_total_counter", "Total number of input tokens."
+)
+output_tokens_total_counter = Counter(
+    "xinference:output_tokens_total_counter", "Total number of output tokens."
+)
+def record_metrics(name, op, kwargs):
+    collector = globals().get(name)
+    getattr(collector, op)(**kwargs)
+def launch_metrics_export_server(q, host=None, port=None):
+    app = FastAPI()
+    app.add_route("/metrics", metrics)
+    @app.get("/")
+    async def root():
+        response = RedirectResponse(url="/metrics")
+        return response
+    async def main():
+        if host is not None and port is not None:
+            config = uvicorn.Config(
+                app, host=host, port=port, log_level=DEFAULT_METRICS_SERVER_LOG_LEVEL
+            )
+        elif host is not None:
+            config = uvicorn.Config(
+                app, host=host, port=0, log_level=DEFAULT_METRICS_SERVER_LOG_LEVEL
+            )
+        elif port is not None:
+            config = uvicorn.Config(
+                app, port=port, log_level=DEFAULT_METRICS_SERVER_LOG_LEVEL
+            )
+        else:
+            config = uvicorn.Config(app, log_level=DEFAULT_METRICS_SERVER_LOG_LEVEL)
+        server = uvicorn.Server(config)
+        task = asyncio.create_task(server.serve())
+        while not server.started and not task.done():
+            await asyncio.sleep(0.1)
+        for server in server.servers:
+            for socket in server.sockets:
+                q.put(socket.getsockname())
+        await task
+    asyncio.run(main())