PyPI - xinference - Versions diffs - 0.15.3__py3-none-any.whl → 0.16.0__py3-none-any.whl - Mend

xinference 0.15.3py3-none-any.whl → 0.16.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (65) hide show

xinference/__init__.py +0 -4
xinference/_version.py +3 -3
xinference/api/restful_api.py +29 -2
xinference/client/restful/restful_client.py +10 -0
xinference/constants.py +7 -3
xinference/core/image_interface.py +76 -23
xinference/core/model.py +158 -46
xinference/core/progress_tracker.py +187 -0
xinference/core/scheduler.py +10 -7
xinference/core/supervisor.py +11 -0
xinference/core/utils.py +9 -0
xinference/core/worker.py +1 -0
xinference/deploy/supervisor.py +4 -0
xinference/model/__init__.py +4 -0
xinference/model/audio/chattts.py +2 -1
xinference/model/audio/core.py +0 -2
xinference/model/audio/model_spec.json +8 -0
xinference/model/audio/model_spec_modelscope.json +9 -0
xinference/model/image/core.py +6 -7
xinference/model/image/scheduler/__init__.py +13 -0
xinference/model/image/scheduler/flux.py +533 -0
xinference/model/image/sdapi.py +35 -4
xinference/model/image/stable_diffusion/core.py +215 -110
xinference/model/image/utils.py +39 -3
xinference/model/llm/__init__.py +2 -0
xinference/model/llm/llm_family.json +185 -17
xinference/model/llm/llm_family_modelscope.json +124 -12
xinference/model/llm/transformers/chatglm.py +104 -0
xinference/model/llm/transformers/cogvlm2.py +2 -1
xinference/model/llm/transformers/cogvlm2_video.py +2 -0
xinference/model/llm/transformers/core.py +43 -113
xinference/model/llm/transformers/deepseek_v2.py +0 -226
xinference/model/llm/transformers/deepseek_vl.py +2 -0
xinference/model/llm/transformers/glm4v.py +2 -1
xinference/model/llm/transformers/intern_vl.py +2 -0
xinference/model/llm/transformers/internlm2.py +3 -95
xinference/model/llm/transformers/minicpmv25.py +2 -0
xinference/model/llm/transformers/minicpmv26.py +2 -0
xinference/model/llm/transformers/omnilmm.py +2 -0
xinference/model/llm/transformers/opt.py +68 -0
xinference/model/llm/transformers/qwen2_audio.py +11 -4
xinference/model/llm/transformers/qwen2_vl.py +2 -28
xinference/model/llm/transformers/qwen_vl.py +2 -1
xinference/model/llm/transformers/utils.py +36 -283
xinference/model/llm/transformers/yi_vl.py +2 -0
xinference/model/llm/utils.py +60 -16
xinference/model/llm/vllm/core.py +68 -9
xinference/model/llm/vllm/utils.py +0 -1
xinference/model/utils.py +7 -4
xinference/model/video/core.py +0 -2
xinference/utils.py +2 -3
xinference/web/ui/build/asset-manifest.json +3 -3
xinference/web/ui/build/index.html +1 -1
xinference/web/ui/build/static/js/{main.e51a356d.js → main.f7da0140.js} +3 -3
xinference/web/ui/build/static/js/main.f7da0140.js.map +1 -0
xinference/web/ui/node_modules/.cache/babel-loader/331312668fa8bd3d7401818f4a25fa98135d7f61371cd6bfff78b18cf4fbdd92.json +1 -0
{xinference-0.15.3.dist-info → xinference-0.16.0.dist-info}/METADATA +38 -6
{xinference-0.15.3.dist-info → xinference-0.16.0.dist-info}/RECORD +63 -59
xinference/web/ui/build/static/js/main.e51a356d.js.map +0 -1
xinference/web/ui/node_modules/.cache/babel-loader/4385c1095eefbff0a8ec3b2964ba6e5a66a05ab31be721483ca2f43e2a91f6ff.json +0 -1
/xinference/web/ui/build/static/js/{main.e51a356d.js.LICENSE.txt → main.f7da0140.js.LICENSE.txt} +0 -0
{xinference-0.15.3.dist-info → xinference-0.16.0.dist-info}/LICENSE +0 -0
{xinference-0.15.3.dist-info → xinference-0.16.0.dist-info}/WHEEL +0 -0
{xinference-0.15.3.dist-info → xinference-0.16.0.dist-info}/entry_points.txt +0 -0
{xinference-0.15.3.dist-info → xinference-0.16.0.dist-info}/top_level.txt +0 -0

xinference/__init__.py CHANGED Viewed

@@ -26,13 +26,9 @@ except:
 def _install():
     from xoscar.backends.router import Router
-    from .model import _install as install_model
     default_router = Router.get_instance_or_empty()
     Router.set_instance(default_router)
-    install_model()
 _install()
 del _install

xinference/_version.py CHANGED Viewed

@@ -8,11 +8,11 @@ import json
 version_json = '''
 {
- "date": "2024-09-30T20:17:26+0800",
+ "date": "2024-10-18T12:49:02+0800",
  "dirty": false,
  "error": null,
- "full-revisionid": "00a9ee15279a60a6d75393c4720d8da5cbbf5796",
- "version": "0.15.3"
+ "full-revisionid": "5f7dea44832a1c41f887b9a01377191894550057",
+ "version": "0.16.0"
 }
 '''  # END VERSION_JSON

xinference/api/restful_api.py CHANGED Viewed

@@ -524,6 +524,16 @@ class RESTfulAPI:
                 else None
             ),
         )
+        self._router.add_api_route(
+            "/v1/requests/{request_id}/progress",
+            self.get_progress,
+            methods=["get"],
+            dependencies=(
+                [Security(self._auth_service, scopes=["models:read"])]
+                if self.is_authenticated()
+                else None
+            ),
+        )
         self._router.add_api_route(
             "/v1/images/generations",
             self.create_images,
@@ -1486,6 +1496,17 @@ class RESTfulAPI:
             await self._report_error_event(model_uid, str(e))
             raise HTTPException(status_code=500, detail=str(e))
+    async def get_progress(self, request_id: str) -> JSONResponse:
+        try:
+            supervisor_ref = await self._get_supervisor_ref()
+            result = {"progress": await supervisor_ref.get_progress(request_id)}
+            return JSONResponse(content=result)
+        except KeyError as e:
+            raise HTTPException(status_code=400, detail=str(e))
+        except Exception as e:
+            logger.error(e, exc_info=True)
+            raise HTTPException(status_code=500, detail=str(e))
     async def create_images(self, request: Request) -> Response:
         body = TextToImageRequest.parse_obj(await request.json())
         model_uid = body.model
@@ -1853,10 +1874,16 @@ class RESTfulAPI:
             await self._report_error_event(model_uid, str(e))
             raise HTTPException(status_code=500, detail=str(e))
-        from ..model.llm.utils import GLM4_TOOL_CALL_FAMILY, QWEN_TOOL_CALL_FAMILY
+        from ..model.llm.utils import (
+            GLM4_TOOL_CALL_FAMILY,
+            LLAMA3_TOOL_CALL_FAMILY,
+            QWEN_TOOL_CALL_FAMILY,
+        )
         model_family = desc.get("model_family", "")
-        function_call_models = QWEN_TOOL_CALL_FAMILY + GLM4_TOOL_CALL_FAMILY
+        function_call_models = (
+            QWEN_TOOL_CALL_FAMILY + GLM4_TOOL_CALL_FAMILY + LLAMA3_TOOL_CALL_FAMILY
+        )
         if model_family not in function_call_models:
             if body.tools:

xinference/client/restful/restful_client.py CHANGED Viewed

@@ -1385,6 +1385,16 @@ class Client:
         response_json = response.json()
         return response_json
+    def get_progress(self, request_id: str):
+        url = f"{self.base_url}/v1/requests/{request_id}/progress"
+        response = requests.get(url, headers=self._headers)
+        if response.status_code != 200:
+            raise RuntimeError(
+                f"Failed to get progress, detail: {_get_error_string(response)}"
+            )
+        response_json = response.json()
+        return response_json
     def abort_cluster(self):
         url = f"{self.base_url}/v1/clusters"
         response = requests.delete(url, headers=self._headers)

xinference/constants.py CHANGED Viewed

@@ -27,7 +27,8 @@ XINFERENCE_ENV_HEALTH_CHECK_INTERVAL = "XINFERENCE_HEALTH_CHECK_INTERVAL"
 XINFERENCE_ENV_HEALTH_CHECK_TIMEOUT = "XINFERENCE_HEALTH_CHECK_TIMEOUT"
 XINFERENCE_ENV_DISABLE_HEALTH_CHECK = "XINFERENCE_DISABLE_HEALTH_CHECK"
 XINFERENCE_ENV_DISABLE_METRICS = "XINFERENCE_DISABLE_METRICS"
-XINFERENCE_ENV_TRANSFORMERS_ENABLE_BATCHING = "XINFERENCE_TRANSFORMERS_ENABLE_BATCHING"
+XINFERENCE_ENV_DOWNLOAD_MAX_ATTEMPTS = "XINFERENCE_DOWNLOAD_MAX_ATTEMPTS"
+XINFERENCE_ENV_TEXT_TO_IMAGE_BATCHING_SIZE = "XINFERENCE_TEXT_TO_IMAGE_BATCHING_SIZE"
 def get_xinference_home() -> str:
@@ -79,6 +80,9 @@ XINFERENCE_DISABLE_HEALTH_CHECK = bool(
 XINFERENCE_DISABLE_METRICS = bool(
     int(os.environ.get(XINFERENCE_ENV_DISABLE_METRICS, 0))
 )
-XINFERENCE_TRANSFORMERS_ENABLE_BATCHING = bool(
-    int(os.environ.get(XINFERENCE_ENV_TRANSFORMERS_ENABLE_BATCHING, 0))
+XINFERENCE_DOWNLOAD_MAX_ATTEMPTS = int(
+    os.environ.get(XINFERENCE_ENV_DOWNLOAD_MAX_ATTEMPTS, 3)
+)
+XINFERENCE_TEXT_TO_IMAGE_BATCHING_SIZE = os.environ.get(
+    XINFERENCE_ENV_TEXT_TO_IMAGE_BATCHING_SIZE, None
 )

xinference/core/image_interface.py CHANGED Viewed

@@ -16,6 +16,9 @@ import base64
 import io
 import logging
 import os
+import threading
+import time
+import uuid
 from typing import Dict, List, Optional, Union
 import gradio as gr
@@ -84,6 +87,7 @@ class ImageInterface:
             num_inference_steps: int,
             negative_prompt: Optional[str] = None,
             sampler_name: Optional[str] = None,
+            progress=gr.Progress(),
         ) -> PIL.Image.Image:
             from ..client import RESTfulClient
@@ -99,19 +103,43 @@ class ImageInterface:
             )
             sampler_name = None if sampler_name == "default" else sampler_name
-            response = model.text_to_image(
-                prompt=prompt,
-                n=n,
-                size=size,
-                num_inference_steps=num_inference_steps,
-                guidance_scale=guidance_scale,
-                negative_prompt=negative_prompt,
-                sampler_name=sampler_name,
-                response_format="b64_json",
-            )
+            response = None
+            exc = None
+            request_id = str(uuid.uuid4())
+            def run_in_thread():
+                nonlocal exc, response
+                try:
+                    response = model.text_to_image(
+                        request_id=request_id,
+                        prompt=prompt,
+                        n=n,
+                        size=size,
+                        num_inference_steps=num_inference_steps,
+                        guidance_scale=guidance_scale,
+                        negative_prompt=negative_prompt,
+                        sampler_name=sampler_name,
+                        response_format="b64_json",
+                    )
+                except Exception as e:
+                    exc = e
+            t = threading.Thread(target=run_in_thread)
+            t.start()
+            while t.is_alive():
+                try:
+                    cur_progress = client.get_progress(request_id)["progress"]
+                except (KeyError, RuntimeError):
+                    cur_progress = 0.0
+                progress(cur_progress, desc="Generating images")
+                time.sleep(1)
+            if exc:
+                raise exc
             images = []
-            for image_dict in response["data"]:
+            for image_dict in response["data"]:  # type: ignore
                 assert image_dict["b64_json"] is not None
                 image_data = base64.b64decode(image_dict["b64_json"])
                 image = PIL.Image.open(io.BytesIO(image_data))
@@ -184,6 +212,7 @@ class ImageInterface:
             num_inference_steps: int,
             padding_image_to_multiple: int,
             sampler_name: Optional[str] = None,
+            progress=gr.Progress(),
         ) -> PIL.Image.Image:
             from ..client import RESTfulClient
@@ -205,20 +234,44 @@ class ImageInterface:
             bio = io.BytesIO()
             image.save(bio, format="png")
-            response = model.image_to_image(
-                prompt=prompt,
-                negative_prompt=negative_prompt,
-                n=n,
-                image=bio.getvalue(),
-                size=size,
-                response_format="b64_json",
-                num_inference_steps=num_inference_steps,
-                padding_image_to_multiple=padding_image_to_multiple,
-                sampler_name=sampler_name,
-            )
+            response = None
+            exc = None
+            request_id = str(uuid.uuid4())
+            def run_in_thread():
+                nonlocal exc, response
+                try:
+                    response = model.image_to_image(
+                        request_id=request_id,
+                        prompt=prompt,
+                        negative_prompt=negative_prompt,
+                        n=n,
+                        image=bio.getvalue(),
+                        size=size,
+                        response_format="b64_json",
+                        num_inference_steps=num_inference_steps,
+                        padding_image_to_multiple=padding_image_to_multiple,
+                        sampler_name=sampler_name,
+                    )
+                except Exception as e:
+                    exc = e
+            t = threading.Thread(target=run_in_thread)
+            t.start()
+            while t.is_alive():
+                try:
+                    cur_progress = client.get_progress(request_id)["progress"]
+                except (KeyError, RuntimeError):
+                    cur_progress = 0.0
+                progress(cur_progress, desc="Generating images")
+                time.sleep(1)
+            if exc:
+                raise exc
             images = []
-            for image_dict in response["data"]:
+            for image_dict in response["data"]:  # type: ignore
                 assert image_dict["b64_json"] is not None
                 image_data = base64.b64decode(image_dict["b64_json"])
                 image = PIL.Image.open(io.BytesIO(image_data))

xinference/core/model.py CHANGED Viewed

@@ -41,9 +41,10 @@ from typing import (
 import sse_starlette.sse
 import xoscar as xo
-from ..constants import XINFERENCE_TRANSFORMERS_ENABLE_BATCHING
+from ..constants import XINFERENCE_TEXT_TO_IMAGE_BATCHING_SIZE
 if TYPE_CHECKING:
+    from .progress_tracker import ProgressTrackerActor
     from .worker import WorkerActor
     from ..model.llm.core import LLM
     from ..model.core import ModelDescription
@@ -73,6 +74,8 @@ XINFERENCE_BATCHING_ALLOWED_VISION_MODELS = [
     "MiniCPM-V-2.6",
 ]
+XINFERENCE_TEXT_TO_IMAGE_BATCHING_ALLOWED_MODELS = ["FLUX.1-dev", "FLUX.1-schnell"]
 def request_limit(fn):
     """
@@ -152,6 +155,16 @@ class ModelActor(xo.StatelessActor):
                     f"Destroy scheduler actor failed, address: {self.address}, error: {e}"
                 )
+        if self.allow_batching_for_text_to_image():
+            try:
+                assert self._text_to_image_scheduler_ref is not None
+                await xo.destroy_actor(self._text_to_image_scheduler_ref)
+                del self._text_to_image_scheduler_ref
+            except Exception as e:
+                logger.debug(
+                    f"Destroy text_to_image scheduler actor failed, address: {self.address}, error: {e}"
+                )
         if hasattr(self._model, "stop") and callable(self._model.stop):
             self._model.stop()
@@ -177,6 +190,7 @@ class ModelActor(xo.StatelessActor):
     def __init__(
         self,
+        supervisor_address: str,
         worker_address: str,
         model: "LLM",
         model_description: Optional["ModelDescription"] = None,
@@ -188,6 +202,7 @@ class ModelActor(xo.StatelessActor):
         from ..model.llm.transformers.core import PytorchModel
         from ..model.llm.vllm.core import VLLMModel
+        self._supervisor_address = supervisor_address
         self._worker_address = worker_address
         self._model = model
         self._model_description = (
@@ -205,6 +220,7 @@ class ModelActor(xo.StatelessActor):
             else asyncio.locks.Lock()
         )
         self._worker_ref = None
+        self._progress_tracker_ref = None
         self._serve_count = 0
         self._metrics_labels = {
             "type": self._model_description.get("model_type", "unknown"),
@@ -216,6 +232,7 @@ class ModelActor(xo.StatelessActor):
         self._loop: Optional[asyncio.AbstractEventLoop] = None
         self._scheduler_ref = None
+        self._text_to_image_scheduler_ref = None
     async def __post_create__(self):
         self._loop = asyncio.get_running_loop()
@@ -229,6 +246,15 @@ class ModelActor(xo.StatelessActor):
                 uid=SchedulerActor.gen_uid(self.model_uid(), self._model.rep_id),
             )
+        if self.allow_batching_for_text_to_image():
+            from ..model.image.scheduler.flux import FluxBatchSchedulerActor
+            self._text_to_image_scheduler_ref = await xo.create_actor(
+                FluxBatchSchedulerActor,
+                address=self.address,
+                uid=FluxBatchSchedulerActor.gen_uid(self.model_uid()),
+            )
     async def _record_completion_metrics(
         self, duration, completion_tokens, prompt_tokens
     ):
@@ -275,6 +301,28 @@ class ModelActor(xo.StatelessActor):
             )
         return self._worker_ref
+    async def _get_progress_tracker_ref(
+        self,
+    ) -> xo.ActorRefType["ProgressTrackerActor"]:
+        from .progress_tracker import ProgressTrackerActor
+        if self._progress_tracker_ref is None:
+            self._progress_tracker_ref = await xo.actor_ref(
+                address=self._supervisor_address, uid=ProgressTrackerActor.default_uid()
+            )
+        return self._progress_tracker_ref
+    async def _get_progressor(self, request_id: str):
+        from .progress_tracker import Progressor
+        progressor = Progressor(
+            request_id,
+            await self._get_progress_tracker_ref(),
+            asyncio.get_running_loop(),
+        )
+        await progressor.start()
+        return progressor
     def is_vllm_backend(self) -> bool:
         from ..model.llm.vllm.core import VLLMModel
@@ -285,10 +333,8 @@ class ModelActor(xo.StatelessActor):
         model_ability = self._model_description.get("model_ability", [])
-        condition = XINFERENCE_TRANSFORMERS_ENABLE_BATCHING and isinstance(
-            self._model, PytorchModel
-        )
-        if condition and "vision" in model_ability:
+        condition = isinstance(self._model, PytorchModel)
+        if condition and ("vision" in model_ability or "audio" in model_ability):
             if (
                 self._model.model_family.model_name
                 in XINFERENCE_BATCHING_ALLOWED_VISION_MODELS
@@ -305,6 +351,26 @@ class ModelActor(xo.StatelessActor):
                 return False
         return condition
+    def allow_batching_for_text_to_image(self) -> bool:
+        from ..model.image.stable_diffusion.core import DiffusionModel
+        condition = XINFERENCE_TEXT_TO_IMAGE_BATCHING_SIZE is not None and isinstance(
+            self._model, DiffusionModel
+        )
+        if condition:
+            model_name = self._model._model_spec.model_name  # type: ignore
+            if model_name in XINFERENCE_TEXT_TO_IMAGE_BATCHING_ALLOWED_MODELS:
+                return True
+            else:
+                logger.warning(
+                    f"Currently for image models with text_to_image ability, "
+                    f"xinference only supports {', '.join(XINFERENCE_TEXT_TO_IMAGE_BATCHING_ALLOWED_MODELS)} for batching. "
+                    f"Your model {model_name} is disqualified."
+                )
+                return False
+        return condition
     async def load(self):
         self._model.load()
         if self.allow_batching():
@@ -312,6 +378,11 @@ class ModelActor(xo.StatelessActor):
             logger.debug(
                 f"Batching enabled for model: {self.model_uid()}, max_num_seqs: {self._model.get_max_num_seqs()}"
             )
+        if self.allow_batching_for_text_to_image():
+            await self._text_to_image_scheduler_ref.set_model(self._model)
+            logger.debug(
+                f"Batching enabled for model: {self.model_uid()}, max_num_images: {self._model.get_max_num_images_for_batching()}"
+            )
     def model_uid(self):
         return (
@@ -591,12 +662,16 @@ class ModelActor(xo.StatelessActor):
                 )
     async def abort_request(self, request_id: str) -> str:
-        from .scheduler import AbortRequestMessage
+        from .utils import AbortRequestMessage
         if self.allow_batching():
             if self._scheduler_ref is None:
                 return AbortRequestMessage.NOT_FOUND.name
             return await self._scheduler_ref.abort_request(request_id)
+        elif self.allow_batching_for_text_to_image():
+            if self._text_to_image_scheduler_ref is None:
+                return AbortRequestMessage.NOT_FOUND.name
+            return await self._text_to_image_scheduler_ref.abort_request(request_id)
         return AbortRequestMessage.NO_OP.name
     @request_limit
@@ -721,6 +796,22 @@ class ModelActor(xo.StatelessActor):
             f"Model {self._model.model_spec} is not for creating speech."
         )
+    async def handle_image_batching_request(self, unique_id, *args, **kwargs):
+        size = args[2]
+        if XINFERENCE_TEXT_TO_IMAGE_BATCHING_SIZE != size:
+            raise RuntimeError(
+                f"The image size: {size} of text_to_image for batching "
+                f"must be the same as the environment variable: {XINFERENCE_TEXT_TO_IMAGE_BATCHING_SIZE} you set."
+            )
+        assert self._loop is not None
+        future = ConcurrentFuture()
+        await self._text_to_image_scheduler_ref.add_request(
+            unique_id, future, *args, **kwargs
+        )
+        fut = asyncio.wrap_future(future, loop=self._loop)
+        result = await fut
+        return await asyncio.to_thread(json_dumps, result)
     @request_limit
     @log_async(logger=logger)
     async def text_to_image(
@@ -732,17 +823,26 @@ class ModelActor(xo.StatelessActor):
         *args,
         **kwargs,
     ):
-        kwargs.pop("request_id", None)
         if hasattr(self._model, "text_to_image"):
-            return await self._call_wrapper_json(
-                self._model.text_to_image,
-                prompt,
-                n,
-                size,
-                response_format,
-                *args,
-                **kwargs,
-            )
+            if self.allow_batching_for_text_to_image():
+                unique_id = kwargs.pop("request_id", None)
+                return await self.handle_image_batching_request(
+                    unique_id, prompt, n, size, response_format, *args, **kwargs
+                )
+            else:
+                progressor = kwargs["progressor"] = await self._get_progressor(
+                    kwargs.pop("request_id", None)
+                )
+                with progressor:
+                    return await self._call_wrapper_json(
+                        self._model.text_to_image,
+                        prompt,
+                        n,
+                        size,
+                        response_format,
+                        *args,
+                        **kwargs,
+                    )
         raise AttributeError(
             f"Model {self._model.model_spec} is not for creating image."
         )
@@ -753,12 +853,15 @@ class ModelActor(xo.StatelessActor):
         self,
         **kwargs,
     ):
-        kwargs.pop("request_id", None)
         if hasattr(self._model, "txt2img"):
-            return await self._call_wrapper_json(
-                self._model.txt2img,
-                **kwargs,
+            progressor = kwargs["progressor"] = await self._get_progressor(
+                kwargs.pop("request_id", None)
             )
+            with progressor:
+                return await self._call_wrapper_json(
+                    self._model.txt2img,
+                    **kwargs,
+                )
         raise AttributeError(f"Model {self._model.model_spec} is not for txt2img.")
     @log_async(
@@ -776,19 +879,22 @@ class ModelActor(xo.StatelessActor):
         *args,
         **kwargs,
     ):
-        kwargs.pop("request_id", None)
         kwargs["negative_prompt"] = negative_prompt
         if hasattr(self._model, "image_to_image"):
-            return await self._call_wrapper_json(
-                self._model.image_to_image,
-                image,
-                prompt,
-                n,
-                size,
-                response_format,
-                *args,
-                **kwargs,
+            progressor = kwargs["progressor"] = await self._get_progressor(
+                kwargs.pop("request_id", None)
             )
+            with progressor:
+                return await self._call_wrapper_json(
+                    self._model.image_to_image,
+                    image,
+                    prompt,
+                    n,
+                    size,
+                    response_format,
+                    *args,
+                    **kwargs,
+                )
         raise AttributeError(
             f"Model {self._model.model_spec} is not for creating image."
         )
@@ -799,12 +905,15 @@ class ModelActor(xo.StatelessActor):
         self,
         **kwargs,
     ):
-        kwargs.pop("request_id", None)
         if hasattr(self._model, "img2img"):
-            return await self._call_wrapper_json(
-                self._model.img2img,
-                **kwargs,
+            progressor = kwargs["progressor"] = await self._get_progressor(
+                kwargs.pop("request_id", None)
             )
+            with progressor:
+                return await self._call_wrapper_json(
+                    self._model.img2img,
+                    **kwargs,
+                )
         raise AttributeError(f"Model {self._model.model_spec} is not for img2img.")
     @log_async(
@@ -823,20 +932,23 @@ class ModelActor(xo.StatelessActor):
         *args,
         **kwargs,
     ):
-        kwargs.pop("request_id", None)
+        kwargs["negative_prompt"] = negative_prompt
         if hasattr(self._model, "inpainting"):
-            return await self._call_wrapper_json(
-                self._model.inpainting,
-                image,
-                mask_image,
-                prompt,
-                negative_prompt,
-                n,
-                size,
-                response_format,
-                *args,
-                **kwargs,
+            progressor = kwargs["progressor"] = await self._get_progressor(
+                kwargs.pop("request_id", None)
             )
+            with progressor:
+                return await self._call_wrapper_json(
+                    self._model.inpainting,
+                    image,
+                    mask_image,
+                    prompt,
+                    n,
+                    size,
+                    response_format,
+                    *args,
+                    **kwargs,
+                )
         raise AttributeError(
             f"Model {self._model.model_spec} is not for creating image."
         )

xinference 0.15.3__py3-none-any.whl → 0.16.0__py3-none-any.whl

Potentially problematic release.

xinference 0.15.3py3-none-any.whl → 0.16.0py3-none-any.whl