PyPI - xinference - Versions diffs - 0.9.2__py3-none-any.whl → 0.9.4__py3-none-any.whl - Mend

xinference 0.9.2py3-none-any.whl → 0.9.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (48) hide show

xinference/_version.py CHANGED Viewed

@@ -8,11 +8,11 @@ import json
 version_json = '''
 {
- "date": "2024-03-08T13:28:03+0800",
+ "date": "2024-03-21T14:58:01+0800",
  "dirty": false,
  "error": null,
- "full-revisionid": "29f4c10a854cfec684dcf8398a0974f64bf8ce2b",
- "version": "0.9.2"
+ "full-revisionid": "2c9465ade7f358d57d4bc087277882d896a8de15",
+ "version": "0.9.4"
 }
 '''  # END VERSION_JSON

xinference/api/restful_api.py CHANGED Viewed

@@ -22,7 +22,7 @@ import pprint
 import sys
 import time
 import warnings
-from typing import Any, List, Optional, Union
+from typing import Any, Dict, List, Optional, Union
 import gradio as gr
 import xoscar as xo
@@ -59,6 +59,7 @@ from ..core.utils import json_dumps
 from ..types import (
     SPECIAL_TOOL_PROMPT,
     ChatCompletion,
+    ChatCompletionMessage,
     Completion,
     CreateChatCompletion,
     CreateCompletion,
@@ -135,6 +136,15 @@ class BuildGradioInterfaceRequest(BaseModel):
     model_lang: List[str]
+class BuildGradioImageInterfaceRequest(BaseModel):
+    model_type: str
+    model_name: str
+    model_family: str
+    model_id: str
+    controlnet: Union[None, List[Dict[str, Union[str, None]]]]
+    model_revision: str
 class RESTfulAPI:
     def __init__(
         self,
@@ -246,6 +256,16 @@ class RESTfulAPI:
                 else None
             ),
         )
+        self._router.add_api_route(
+            "/v1/ui/images/{model_uid}",
+            self.build_gradio_images_interface,
+            methods=["POST"],
+            dependencies=(
+                [Security(self._auth_service, scopes=["models:read"])]
+                if self.is_authenticated()
+                else None
+            ),
+        )
         self._router.add_api_route(
             "/token", self.login_for_access_token, methods=["POST"]
         )
@@ -584,8 +604,22 @@ class RESTfulAPI:
     async def list_models(self) -> JSONResponse:
         try:
-            data = await (await self._get_supervisor_ref()).list_models()
-            return JSONResponse(content=data)
+            models = await (await self._get_supervisor_ref()).list_models()
+            model_list = []
+            for model_id, model_info in models.items():
+                model_list.append(
+                    {
+                        "id": model_id,
+                        "object": "model",
+                        "created": 0,
+                        "owned_by": "xinference",
+                        **model_info,
+                    }
+                )
+            response = {"object": "list", "data": model_list}
+            return JSONResponse(content=response)
         except Exception as e:
             logger.error(e, exc_info=True)
             raise HTTPException(status_code=500, detail=str(e))
@@ -822,6 +856,56 @@ class RESTfulAPI:
         return JSONResponse(content={"model_uid": model_uid})
+    async def build_gradio_images_interface(
+        self, model_uid: str, request: Request
+    ) -> JSONResponse:
+        """
+        Build a Gradio interface for image processing models.
+        """
+        payload = await request.json()
+        body = BuildGradioImageInterfaceRequest.parse_obj(payload)
+        assert self._app is not None
+        assert body.model_type == "image"
+        # asyncio.Lock() behaves differently in 3.9 than 3.10+
+        # A event loop is required in 3.9 but not 3.10+
+        if sys.version_info < (3, 10):
+            try:
+                asyncio.get_event_loop()
+            except RuntimeError:
+                warnings.warn(
+                    "asyncio.Lock() requires an event loop in Python 3.9"
+                    + "a placeholder event loop has been created"
+                )
+                asyncio.set_event_loop(asyncio.new_event_loop())
+        from ..core.image_interface import ImageInterface
+        try:
+            access_token = request.headers.get("Authorization")
+            internal_host = "localhost" if self._host == "0.0.0.0" else self._host
+            interface = ImageInterface(
+                endpoint=f"http://{internal_host}:{self._port}",
+                model_uid=model_uid,
+                model_family=body.model_family,
+                model_name=body.model_name,
+                model_id=body.model_id,
+                model_revision=body.model_revision,
+                controlnet=body.controlnet,
+                access_token=access_token,
+            ).build()
+            gr.mount_gradio_app(self._app, interface, f"/{model_uid}")
+        except ValueError as ve:
+            logger.error(str(ve), exc_info=True)
+            raise HTTPException(status_code=400, detail=str(ve))
+        except Exception as e:
+            logger.error(e, exc_info=True)
+            raise HTTPException(status_code=500, detail=str(e))
+        return JSONResponse(content={"model_uid": model_uid})
     async def terminate_model(self, model_uid: str) -> JSONResponse:
         try:
             assert self._app is not None
@@ -891,11 +975,17 @@ class RESTfulAPI:
                         self.handle_request_limit_error(re)
                     async for item in iterator:
                         yield item
+                except asyncio.CancelledError:
+                    logger.info(
+                        f"Disconnected from client (via refresh/close) {request.client} during generate."
+                    )
+                    return
                 except Exception as ex:
                     logger.exception("Completion stream got an error: %s", ex)
                     await self._report_error_event(model_uid, str(ex))
                     # https://github.com/openai/openai-python/blob/e0aafc6c1a45334ac889fe3e54957d309c3af93f/src/openai/_streaming.py#L107
                     yield dict(data=json.dumps({"error": str(ex)}))
+                    return
             return EventSourceResponse(stream_results())
         else:
@@ -1169,25 +1259,21 @@ class RESTfulAPI:
                 status_code=400, detail="Invalid input. Please specify the prompt."
             )
-        system_messages = []
+        system_messages: List["ChatCompletionMessage"] = []
+        system_messages_contents = []
         non_system_messages = []
         for msg in messages:
             assert (
                 msg.get("content") != SPECIAL_TOOL_PROMPT
             ), f"Invalid message content {SPECIAL_TOOL_PROMPT}"
             if msg["role"] == "system":
-                system_messages.append(msg)
+                system_messages_contents.append(msg["content"])
             else:
                 non_system_messages.append(msg)
+        system_messages.append(
+            {"role": "system", "content": ". ".join(system_messages_contents)}
+        )
-        if len(system_messages) > 1:
-            raise HTTPException(
-                status_code=400, detail="Multiple system messages are not supported."
-            )
-        if len(system_messages) == 1 and messages[0]["role"] != "system":
-            raise HTTPException(
-                status_code=400, detail="System message should be the first one."
-            )
         assert non_system_messages
         has_tool_message = messages[-1].get("role") == "tool"
@@ -1273,11 +1359,23 @@ class RESTfulAPI:
                     async for item in iterator:
                         yield item
                     yield "[DONE]"
+                # Note that asyncio.CancelledError does not inherit from Exception.
+                # When the user uses ctrl+c to cancel the streaming chat, asyncio.CancelledError would be triggered.
+                # See https://github.com/sysid/sse-starlette/blob/main/examples/example.py#L48
+                except asyncio.CancelledError:
+                    logger.info(
+                        f"Disconnected from client (via refresh/close) {request.client} during chat."
+                    )
+                    # See https://github.com/sysid/sse-starlette/blob/main/examples/error_handling.py#L13
+                    # Use return to stop the generator from continuing.
+                    # TODO: Cannot yield here. Yield here would leads to error for the next streaming request.
+                    return
                 except Exception as ex:
                     logger.exception("Chat completion stream got an error: %s", ex)
                     await self._report_error_event(model_uid, str(ex))
                     # https://github.com/openai/openai-python/blob/e0aafc6c1a45334ac889fe3e54957d309c3af93f/src/openai/_streaming.py#L107
                     yield dict(data=json.dumps({"error": str(ex)}))
+                    return
             return EventSourceResponse(stream_results())
         else:

xinference/client/restful/restful_client.py CHANGED Viewed

@@ -732,7 +732,8 @@ class Client:
             )
         response_data = response.json()
-        return response_data
+        model_list = response_data["data"]
+        return {item["id"]: item for item in model_list}
     def launch_speculative_llm(
         self,

xinference/conftest.py CHANGED Viewed

@@ -208,10 +208,11 @@ def setup():
     if not api_health_check(endpoint, max_attempts=10, sleep_interval=5):
         raise RuntimeError("Endpoint is not available after multiple attempts")
-    yield f"http://localhost:{port}", supervisor_addr
-    local_cluster_proc.terminate()
-    restful_api_proc.terminate()
+    try:
+        yield f"http://localhost:{port}", supervisor_addr
+    finally:
+        local_cluster_proc.kill()
+        restful_api_proc.kill()
 @pytest.fixture
@@ -239,10 +240,11 @@ def setup_with_file_logging():
     if not api_health_check(endpoint, max_attempts=3, sleep_interval=5):
         raise RuntimeError("Endpoint is not available after multiple attempts")
-    yield f"http://localhost:{port}", supervisor_addr, TEST_LOG_FILE_PATH
-    local_cluster_proc.terminate()
-    restful_api_proc.terminate()
+    try:
+        yield f"http://localhost:{port}", supervisor_addr, TEST_LOG_FILE_PATH
+    finally:
+        local_cluster_proc.kill()
+        restful_api_proc.kill()
 @pytest.fixture
@@ -290,11 +292,12 @@ def setup_with_auth():
     if not api_health_check(endpoint, max_attempts=10, sleep_interval=5):
         raise RuntimeError("Endpoint is not available after multiple attempts")
-    yield f"http://localhost:{port}", supervisor_addr
-    local_cluster_proc.terminate()
-    restful_api_proc.terminate()
     try:
-        os.remove(auth_file)
-    except:
-        pass
+        yield f"http://localhost:{port}", supervisor_addr
+    finally:
+        local_cluster_proc.kill()
+        restful_api_proc.kill()
+        try:
+            os.remove(auth_file)
+        except:
+            pass

xinference/constants.py CHANGED Viewed

@@ -25,6 +25,7 @@ XINFERENCE_ENV_HEALTH_CHECK_INTERVAL = "XINFERENCE_HEALTH_CHECK_INTERVAL"
 XINFERENCE_ENV_HEALTH_CHECK_TIMEOUT = "XINFERENCE_HEALTH_CHECK_TIMEOUT"
 XINFERENCE_ENV_DISABLE_HEALTH_CHECK = "XINFERENCE_DISABLE_HEALTH_CHECK"
 XINFERENCE_ENV_DISABLE_VLLM = "XINFERENCE_DISABLE_VLLM"
+XINFERENCE_ENV_ENABLE_SGLANG = "XINFERENCE_ENABLE_SGLANG"
 def get_xinference_home() -> str:
@@ -64,3 +65,4 @@ XINFERENCE_DISABLE_HEALTH_CHECK = bool(
     int(os.environ.get(XINFERENCE_ENV_DISABLE_HEALTH_CHECK, 0))
 )
 XINFERENCE_DISABLE_VLLM = bool(int(os.environ.get(XINFERENCE_ENV_DISABLE_VLLM, 0)))
+XINFERENCE_ENABLE_SGLANG = bool(int(os.environ.get(XINFERENCE_ENV_ENABLE_SGLANG, 0)))

xinference/core/image_interface.py ADDED Viewed

@@ -0,0 +1,252 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import base64
+import io
+import logging
+import os
+from typing import Dict, List, Optional, Union
+import gradio as gr
+import PIL.Image
+from gradio import Markdown
+from ..client.restful.restful_client import RESTfulImageModelHandle
+logger = logging.getLogger(__name__)
+class ImageInterface:
+    def __init__(
+        self,
+        endpoint: str,
+        model_uid: str,
+        model_family: str,
+        model_name: str,
+        model_id: str,
+        model_revision: str,
+        controlnet: Union[None, List[Dict[str, Union[str, None]]]],
+        access_token: Optional[str],
+    ):
+        self.endpoint = endpoint
+        self.model_uid = model_uid
+        self.model_family = model_family
+        self.model_name = model_name
+        self.model_id = model_id
+        self.model_revision = model_revision
+        self.controlnet = controlnet
+        self.access_token = (
+            access_token.replace("Bearer ", "") if access_token is not None else None
+        )
+    def build(self) -> gr.Blocks:
+        assert "stable_diffusion" in self.model_family
+        interface = self.build_main_interface()
+        interface.queue()
+        # Gradio initiates the queue during a startup event, but since the app has already been
+        # started, that event will not run, so manually invoke the startup events.
+        # See: https://github.com/gradio-app/gradio/issues/5228
+        interface.startup_events()
+        favicon_path = os.path.join(
+            os.path.dirname(os.path.abspath(__file__)),
+            os.path.pardir,
+            "web",
+            "ui",
+            "public",
+            "favicon.svg",
+        )
+        interface.favicon_path = favicon_path
+        return interface
+    def text2image_interface(self) -> "gr.Blocks":
+        def text_generate_image(
+            prompt: str,
+            n: int,
+            size_width: int,
+            size_height: int,
+            negative_prompt: Optional[str] = None,
+        ) -> PIL.Image.Image:
+            from ..client import RESTfulClient
+            client = RESTfulClient(self.endpoint)
+            client._set_token(self.access_token)
+            model = client.get_model(self.model_uid)
+            assert isinstance(model, RESTfulImageModelHandle)
+            size = f"{int(size_width)}*{int(size_height)}"
+            response = model.text_to_image(
+                prompt=prompt,
+                n=n,
+                size=size,
+                negative_prompt=negative_prompt,
+                response_format="b64_json",
+            )
+            images = []
+            for image_dict in response["data"]:
+                assert image_dict["b64_json"] is not None
+                image_data = base64.b64decode(image_dict["b64_json"])
+                image = PIL.Image.open(io.BytesIO(image_data))
+                images.append(image)
+            return images
+        with gr.Blocks() as text2image_vl_interface:
+            with gr.Column():
+                with gr.Row():
+                    with gr.Column(scale=10):
+                        prompt = gr.Textbox(
+                            label="Prompt",
+                            show_label=True,
+                            placeholder="Enter prompt here...",
+                        )
+                        negative_prompt = gr.Textbox(
+                            label="Negative prompt",
+                            show_label=True,
+                            placeholder="Enter negative prompt here...",
+                        )
+                    with gr.Column(scale=1):
+                        generate_button = gr.Button("Generate")
+                with gr.Row():
+                    n = gr.Number(label="Number of Images", value=1)
+                    size_width = gr.Number(label="Width", value=1024)
+                    size_height = gr.Number(label="Height", value=1024)
+                with gr.Column():
+                    image_output = gr.Gallery()
+            generate_button.click(
+                text_generate_image,
+                inputs=[prompt, n, size_width, size_height, negative_prompt],
+                outputs=image_output,
+            )
+        return text2image_vl_interface
+    def image2image_interface(self) -> "gr.Blocks":
+        def image_generate_image(
+            prompt: str,
+            negative_prompt: str,
+            image: PIL.Image.Image,
+            n: int,
+            size_width: int,
+            size_height: int,
+        ) -> PIL.Image.Image:
+            from ..client import RESTfulClient
+            client = RESTfulClient(self.endpoint)
+            client._set_token(self.access_token)
+            model = client.get_model(self.model_uid)
+            assert isinstance(model, RESTfulImageModelHandle)
+            size = f"{int(size_width)}*{int(size_height)}"
+            bio = io.BytesIO()
+            image.save(bio, format="png")
+            response = model.image_to_image(
+                prompt=prompt,
+                negative_prompt=negative_prompt,
+                n=n,
+                image=bio.getvalue(),
+                size=size,
+                response_format="b64_json",
+            )
+            images = []
+            for image_dict in response["data"]:
+                assert image_dict["b64_json"] is not None
+                image_data = base64.b64decode(image_dict["b64_json"])
+                image = PIL.Image.open(io.BytesIO(image_data))
+                images.append(image)
+            return images
+        with gr.Blocks() as image2image_inteface:
+            with gr.Column():
+                with gr.Row():
+                    with gr.Column(scale=10):
+                        prompt = gr.Textbox(
+                            label="Prompt",
+                            show_label=True,
+                            placeholder="Enter prompt here...",
+                        )
+                        negative_prompt = gr.Textbox(
+                            label="Negative Prompt",
+                            show_label=True,
+                            placeholder="Enter negative prompt here...",
+                        )
+                    with gr.Column(scale=1):
+                        generate_button = gr.Button("Generate")
+                with gr.Row():
+                    n = gr.Number(label="Number of image", value=1)
+                    size_width = gr.Number(label="Width", value=512)
+                    size_height = gr.Number(label="Height", value=512)
+                with gr.Row():
+                    with gr.Column(scale=1):
+                        uploaded_image = gr.Image(type="pil", label="Upload Image")
+                    with gr.Column(scale=1):
+                        output_gallery = gr.Gallery()
+            generate_button.click(
+                image_generate_image,
+                inputs=[
+                    prompt,
+                    negative_prompt,
+                    uploaded_image,
+                    n,
+                    size_width,
+                    size_height,
+                ],
+                outputs=output_gallery,
+            )
+        return image2image_inteface
+    def build_main_interface(self) -> "gr.Blocks":
+        with gr.Blocks(
+            title=f"🎨 Xinference Stable Diffusion: {self.model_name} 🎨",
+            css="""
+                    .center{
+                        display: flex;
+                        justify-content: center;
+                        align-items: center;
+                        padding: 0px;
+                        color: #9ea4b0 !important;
+                    }
+                    """,
+            analytics_enabled=False,
+        ) as app:
+            Markdown(
+                f"""
+                    <h1 class="center" style='text-align: center; margin-bottom: 1rem'>🎨 Xinference Stable Diffusion: {self.model_name} 🎨</h1>
+                    """
+            )
+            Markdown(
+                f"""
+                    <div class="center">
+                    Model ID: {self.model_uid}
+                    </div>
+                    """
+            )
+            with gr.Tab("Text to Image"):
+                self.text2image_interface()
+            with gr.Tab("Image to Image"):
+                self.image2image_interface()
+        return app

xinference/core/supervisor.py CHANGED Viewed

@@ -722,17 +722,10 @@ class SupervisorActor(xo.StatelessActor):
         if model_uid is None:
             model_uid = self._gen_model_uid(model_name)
+        model_size = str(model_size_in_billions) if model_size_in_billions else ""
         logger.debug(
-            (
-                f"Enter launch_builtin_model, model_uid: %s, model_name: %s, model_size: %s, "
-                f"model_format: %s, quantization: %s, replica: %s"
-            ),
-            model_uid,
-            model_name,
-            str(model_size_in_billions) if model_size_in_billions else "",
-            model_format,
-            quantization,
-            replica,
+            f"Enter launch_builtin_model, model_uid: {model_uid}, model_name: {model_name}, model_size: {model_size}, "
+            f"model_format: {model_format}, quantization: {quantization}, replica: {replica}"
         )
         async def _launch_one_model(_replica_model_uid):

xinference 0.9.2__py3-none-any.whl → 0.9.4__py3-none-any.whl

Potentially problematic release.

xinference 0.9.2py3-none-any.whl → 0.9.4py3-none-any.whl