PyPI - xinference - Versions diffs - 0.16.3__py3-none-any.whl → 1.0.0__py3-none-any.whl - Mend

xinference 0.16.3py3-none-any.whl → 1.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (54) hide show

xinference/_version.py CHANGED Viewed

@@ -8,11 +8,11 @@ import json
 version_json = '''
 {
- "date": "2024-11-07T16:55:36+0800",
+ "date": "2024-11-15T17:33:11+0800",
  "dirty": false,
  "error": null,
- "full-revisionid": "85ab86bf1c0967e45fbec995534cd5a0c9a9c439",
- "version": "0.16.3"
+ "full-revisionid": "4c96475b8f90e354aa1b47856fda4db098b62b65",
+ "version": "1.0.0"
 }
 '''  # END VERSION_JSON

xinference/api/restful_api.py CHANGED Viewed

@@ -52,10 +52,14 @@ from xoscar.utils import get_next_port
 from .._compat import BaseModel, Field
 from .._version import get_versions
-from ..constants import XINFERENCE_DEFAULT_ENDPOINT_PORT, XINFERENCE_DISABLE_METRICS
+from ..constants import (
+    XINFERENCE_DEFAULT_CANCEL_BLOCK_DURATION,
+    XINFERENCE_DEFAULT_ENDPOINT_PORT,
+    XINFERENCE_DISABLE_METRICS,
+)
 from ..core.event import Event, EventCollectorActor, EventType
 from ..core.supervisor import SupervisorActor
-from ..core.utils import json_dumps
+from ..core.utils import CancelMixin, json_dumps
 from ..types import (
     ChatCompletion,
     Completion,
@@ -111,6 +115,7 @@ class RerankRequest(BaseModel):
     return_documents: Optional[bool] = False
     return_len: Optional[bool] = False
     max_chunks_per_doc: Optional[int] = None
+    kwargs: Optional[str] = None
 class TextToImageRequest(BaseModel):
@@ -206,7 +211,7 @@ class BuildGradioImageInterfaceRequest(BaseModel):
     model_ability: List[str]
-class RESTfulAPI:
+class RESTfulAPI(CancelMixin):
     def __init__(
         self,
         supervisor_address: str,
@@ -1311,11 +1316,6 @@ class RESTfulAPI:
         payload = await request.json()
         body = RerankRequest.parse_obj(payload)
         model_uid = body.model
-        kwargs = {
-            key: value
-            for key, value in payload.items()
-            if key not in RerankRequest.__annotations__.keys()
-        }
         try:
             model = await (await self._get_supervisor_ref()).get_model(model_uid)
@@ -1329,6 +1329,10 @@ class RESTfulAPI:
             raise HTTPException(status_code=500, detail=str(e))
         try:
+            if body.kwargs is not None:
+                parsed_kwargs = json.loads(body.kwargs)
+            else:
+                parsed_kwargs = {}
             scores = await model.rerank(
                 body.documents,
                 body.query,
@@ -1336,7 +1340,7 @@ class RESTfulAPI:
                 max_chunks_per_doc=body.max_chunks_per_doc,
                 return_documents=body.return_documents,
                 return_len=body.return_len,
-                **kwargs,
+                **parsed_kwargs,
             )
             return Response(scores, media_type="application/json")
         except RuntimeError as re:
@@ -1531,8 +1535,11 @@ class RESTfulAPI:
             await self._report_error_event(model_uid, str(e))
             raise HTTPException(status_code=500, detail=str(e))
+        request_id = None
         try:
             kwargs = json.loads(body.kwargs) if body.kwargs else {}
+            request_id = kwargs.get("request_id")
+            self._add_running_task(request_id)
             image_list = await model.text_to_image(
                 prompt=body.prompt,
                 n=body.n,
@@ -1541,6 +1548,11 @@ class RESTfulAPI:
                 **kwargs,
             )
             return Response(content=image_list, media_type="application/json")
+        except asyncio.CancelledError:
+            err_str = f"The request has been cancelled: {request_id}"
+            logger.error(err_str)
+            await self._report_error_event(model_uid, err_str)
+            raise HTTPException(status_code=409, detail=err_str)
         except RuntimeError as re:
             logger.error(re, exc_info=True)
             await self._report_error_event(model_uid, str(re))
@@ -1686,11 +1698,14 @@ class RESTfulAPI:
             await self._report_error_event(model_uid, str(e))
             raise HTTPException(status_code=500, detail=str(e))
+        request_id = None
         try:
             if kwargs is not None:
                 parsed_kwargs = json.loads(kwargs)
             else:
                 parsed_kwargs = {}
+            request_id = parsed_kwargs.get("request_id")
+            self._add_running_task(request_id)
             image_list = await model_ref.image_to_image(
                 image=Image.open(image.file),
                 prompt=prompt,
@@ -1701,6 +1716,11 @@ class RESTfulAPI:
                 **parsed_kwargs,
             )
             return Response(content=image_list, media_type="application/json")
+        except asyncio.CancelledError:
+            err_str = f"The request has been cancelled: {request_id}"
+            logger.error(err_str)
+            await self._report_error_event(model_uid, err_str)
+            raise HTTPException(status_code=409, detail=err_str)
         except RuntimeError as re:
             logger.error(re, exc_info=True)
             await self._report_error_event(model_uid, str(re))
@@ -1734,11 +1754,14 @@ class RESTfulAPI:
             await self._report_error_event(model_uid, str(e))
             raise HTTPException(status_code=500, detail=str(e))
+        request_id = None
         try:
             if kwargs is not None:
                 parsed_kwargs = json.loads(kwargs)
             else:
                 parsed_kwargs = {}
+            request_id = parsed_kwargs.get("request_id")
+            self._add_running_task(request_id)
             im = Image.open(image.file)
             mask_im = Image.open(mask_image.file)
             if not size:
@@ -1755,6 +1778,11 @@ class RESTfulAPI:
                 **parsed_kwargs,
             )
             return Response(content=image_list, media_type="application/json")
+        except asyncio.CancelledError:
+            err_str = f"The request has been cancelled: {request_id}"
+            logger.error(err_str)
+            await self._report_error_event(model_uid, err_str)
+            raise HTTPException(status_code=409, detail=err_str)
         except RuntimeError as re:
             logger.error(re, exc_info=True)
             await self._report_error_event(model_uid, str(re))
@@ -1782,17 +1810,25 @@ class RESTfulAPI:
             await self._report_error_event(model_uid, str(e))
             raise HTTPException(status_code=500, detail=str(e))
+        request_id = None
         try:
             if kwargs is not None:
                 parsed_kwargs = json.loads(kwargs)
             else:
                 parsed_kwargs = {}
+            request_id = parsed_kwargs.get("request_id")
+            self._add_running_task(request_id)
             im = Image.open(image.file)
             text = await model_ref.ocr(
                 image=im,
                 **parsed_kwargs,
             )
             return Response(content=text, media_type="text/plain")
+        except asyncio.CancelledError:
+            err_str = f"The request has been cancelled: {request_id}"
+            logger.error(err_str)
+            await self._report_error_event(model_uid, err_str)
+            raise HTTPException(status_code=409, detail=err_str)
         except RuntimeError as re:
             logger.error(re, exc_info=True)
             await self._report_error_event(model_uid, str(re))
@@ -2111,10 +2147,25 @@ class RESTfulAPI:
             logger.error(e, exc_info=True)
             raise HTTPException(status_code=500, detail=str(e))
-    async def abort_request(self, model_uid: str, request_id: str) -> JSONResponse:
+    async def abort_request(
+        self, request: Request, model_uid: str, request_id: str
+    ) -> JSONResponse:
         try:
+            payload = await request.json()
+            block_duration = payload.get(
+                "block_duration", XINFERENCE_DEFAULT_CANCEL_BLOCK_DURATION
+            )
+            logger.info(
+                "Abort request with model uid: %s, request id: %s, block duration: %s",
+                model_uid,
+                request_id,
+                block_duration,
+            )
             supervisor_ref = await self._get_supervisor_ref()
-            res = await supervisor_ref.abort_request(model_uid, request_id)
+            res = await supervisor_ref.abort_request(
+                model_uid, request_id, block_duration
+            )
+            self._cancel_running_task(request_id, block_duration)
             return JSONResponse(content=res)
         except Exception as e:
             logger.error(e, exc_info=True)

xinference/client/restful/restful_client.py CHANGED Viewed

@@ -174,6 +174,7 @@ class RESTfulRerankModelHandle(RESTfulModelHandle):
             "max_chunks_per_doc": max_chunks_per_doc,
             "return_documents": return_documents,
             "return_len": return_len,
+            "kwargs": json.dumps(kwargs),
         }
         request_body.update(kwargs)
         response = requests.post(url, json=request_body, headers=self.auth_headers)
@@ -1357,7 +1358,7 @@ class Client:
         response_data = response.json()
         return response_data
-    def abort_request(self, model_uid: str, request_id: str):
+    def abort_request(self, model_uid: str, request_id: str, block_duration: int = 30):
         """
         Abort a request.
         Abort a submitted request. If the request is finished or not found, this method will be a no-op.
@@ -1369,13 +1370,18 @@ class Client:
             Model uid.
         request_id: str
             Request id.
+        block_duration: int
+            The duration to make the request id abort. If set to 0, the abort_request will be immediate, which may
+            prevent it from taking effect if it arrives before the request operation.
         Returns
         -------
         Dict
             Return empty dict.
         """
         url = f"{self.base_url}/v1/models/{model_uid}/requests/{request_id}/abort"
-        response = requests.post(url, headers=self._headers)
+        response = requests.post(
+            url, headers=self._headers, json={"block_duration": block_duration}
+        )
         if response.status_code != 200:
             raise RuntimeError(
                 f"Failed to abort request, detail: {_get_error_string(response)}"

xinference/constants.py CHANGED Viewed

@@ -88,3 +88,4 @@ XINFERENCE_TEXT_TO_IMAGE_BATCHING_SIZE = os.environ.get(
     XINFERENCE_ENV_TEXT_TO_IMAGE_BATCHING_SIZE, None
 )
 XINFERENCE_LAUNCH_MODEL_RETRY = 3
+XINFERENCE_DEFAULT_CANCEL_BLOCK_DURATION = 30

xinference/core/model.py CHANGED Viewed

@@ -41,6 +41,7 @@ import sse_starlette.sse
 import xoscar as xo
 from ..constants import (
+    XINFERENCE_DEFAULT_CANCEL_BLOCK_DURATION,
     XINFERENCE_LAUNCH_MODEL_RETRY,
     XINFERENCE_TEXT_TO_IMAGE_BATCHING_SIZE,
 )
@@ -57,7 +58,7 @@ import logging
 logger = logging.getLogger(__name__)
 from ..device_utils import empty_cache
-from .utils import json_dumps, log_async
+from .utils import CancelMixin, json_dumps, log_async
 try:
     from torch.cuda import OutOfMemoryError
@@ -136,7 +137,7 @@ def oom_check(fn):
         return _wrapper
-class ModelActor(xo.StatelessActor):
+class ModelActor(xo.StatelessActor, CancelMixin):
     _replica_model_uid: Optional[str]
     @classmethod
@@ -553,6 +554,7 @@ class ModelActor(xo.StatelessActor):
     @oom_check
     async def _call_wrapper(self, output_type: str, fn: Callable, *args, **kwargs):
+        self._add_running_task(kwargs.get("request_id"))
         if self._lock is None:
             if inspect.iscoroutinefunction(fn):
                 ret = await fn(*args, **kwargs)
@@ -761,9 +763,14 @@ class ModelActor(xo.StatelessActor):
                     prompt_tokens,
                 )
-    async def abort_request(self, request_id: str) -> str:
+    async def abort_request(
+        self,
+        request_id: str,
+        block_duration: int = XINFERENCE_DEFAULT_CANCEL_BLOCK_DURATION,
+    ) -> str:
         from .utils import AbortRequestMessage
+        self._cancel_running_task(request_id, block_duration)
         if self.allow_batching():
             if self._scheduler_ref is None:
                 return AbortRequestMessage.NOT_FOUND.name

xinference/core/supervisor.py CHANGED Viewed

@@ -35,6 +35,7 @@ from typing import (
 import xoscar as xo
 from ..constants import (
+    XINFERENCE_DEFAULT_CANCEL_BLOCK_DURATION,
     XINFERENCE_DISABLE_HEALTH_CHECK,
     XINFERENCE_HEALTH_CHECK_FAILURE_THRESHOLD,
     XINFERENCE_HEALTH_CHECK_INTERVAL,
@@ -1213,7 +1214,12 @@ class SupervisorActor(xo.StatelessActor):
         return cached_models
     @log_async(logger=logger)
-    async def abort_request(self, model_uid: str, request_id: str) -> Dict:
+    async def abort_request(
+        self,
+        model_uid: str,
+        request_id: str,
+        block_duration: int = XINFERENCE_DEFAULT_CANCEL_BLOCK_DURATION,
+    ) -> Dict:
         from .scheduler import AbortRequestMessage
         res = {"msg": AbortRequestMessage.NO_OP.name}
@@ -1228,7 +1234,7 @@ class SupervisorActor(xo.StatelessActor):
             if worker_ref is None:
                 continue
             model_ref = await worker_ref.get_model(model_uid=rep_mid)
-            result_info = await model_ref.abort_request(request_id)
+            result_info = await model_ref.abort_request(request_id, block_duration)
             res["msg"] = result_info
             if result_info == AbortRequestMessage.DONE.name:
                 break

xinference/core/utils.py CHANGED Viewed

@@ -11,11 +11,13 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import asyncio
 import logging
 import os
 import random
 import string
 import uuid
+import weakref
 from enum import Enum
 from typing import Dict, Generator, List, Optional, Tuple, Union
@@ -23,7 +25,10 @@ import orjson
 from pynvml import nvmlDeviceGetCount, nvmlInit, nvmlShutdown
 from .._compat import BaseModel
-from ..constants import XINFERENCE_LOG_ARG_MAX_LENGTH
+from ..constants import (
+    XINFERENCE_DEFAULT_CANCEL_BLOCK_DURATION,
+    XINFERENCE_LOG_ARG_MAX_LENGTH,
+)
 logger = logging.getLogger(__name__)
@@ -49,13 +54,20 @@ def log_async(
 ):
     import time
     from functools import wraps
+    from inspect import signature
     def decorator(func):
         func_name = func.__name__
+        sig = signature(func)
         @wraps(func)
         async def wrapped(*args, **kwargs):
-            request_id_str = kwargs.get("request_id", "")
+            try:
+                bound_args = sig.bind_partial(*args, **kwargs)
+                arguments = bound_args.arguments
+            except TypeError:
+                arguments = {}
+            request_id_str = arguments.get("request_id", "")
             if not request_id_str:
                 request_id_str = uuid.uuid1()
                 if func_name == "text_to_image":
@@ -269,3 +281,56 @@ def assign_replica_gpu(
     if isinstance(gpu_idx, list) and gpu_idx:
         return gpu_idx[rep_id::replica]
     return gpu_idx
+class CancelMixin:
+    _CANCEL_TASK_NAME = "abort_block"
+    def __init__(self):
+        self._running_tasks: weakref.WeakValueDictionary[
+            str, asyncio.Task
+        ] = weakref.WeakValueDictionary()
+    def _add_running_task(self, request_id: Optional[str]):
+        """Add current asyncio task to the running task.
+        :param request_id: The corresponding request id.
+        """
+        if request_id is None:
+            return
+        running_task = self._running_tasks.get(request_id)
+        if running_task is not None:
+            if running_task.get_name() == self._CANCEL_TASK_NAME:
+                raise Exception(f"The request has been aborted: {request_id}")
+            raise Exception(f"Duplicate request id: {request_id}")
+        current_task = asyncio.current_task()
+        assert current_task is not None
+        self._running_tasks[request_id] = current_task
+    def _cancel_running_task(
+        self,
+        request_id: Optional[str],
+        block_duration: int = XINFERENCE_DEFAULT_CANCEL_BLOCK_DURATION,
+    ):
+        """Cancel the running asyncio task.
+        :param request_id: The request id to cancel.
+        :param block_duration: The duration seconds to ensure the request can't be executed.
+        """
+        if request_id is None:
+            return
+        running_task = self._running_tasks.pop(request_id, None)
+        if running_task is not None:
+            running_task.cancel()
+        async def block_task():
+            """This task is for blocking the request for a duration."""
+            try:
+                await asyncio.sleep(block_duration)
+                logger.info("Abort block end for request: %s", request_id)
+            except asyncio.CancelledError:
+                logger.info("Abort block is cancelled for request: %s", request_id)
+        if block_duration > 0:
+            logger.info("Abort block start for request: %s", request_id)
+            self._running_tasks[request_id] = asyncio.create_task(
+                block_task(), name=self._CANCEL_TASK_NAME
+            )

xinference/model/audio/model_spec.json CHANGED Viewed

@@ -159,7 +159,7 @@
     "model_name": "FishSpeech-1.4",
     "model_family": "FishAudio",
     "model_id": "fishaudio/fish-speech-1.4",
-    "model_revision": "3c49651b8e583b6b13f55e375432e0d57e1aa84d",
+    "model_revision": "069c573759936b35191d3380deb89183c0656f59",
     "model_ability": "text-to-audio",
     "multilingual": true
   }

xinference/model/image/stable_diffusion/core.py CHANGED Viewed

@@ -17,9 +17,11 @@ import gc
 import inspect
 import itertools
 import logging
+import os
 import re
 import sys
 import warnings
+from glob import glob
 from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
 import PIL.Image
@@ -194,8 +196,9 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
         if sys.platform != "darwin" and torch_dtype is None:
             # The following params crashes on Mac M2
             self._torch_dtype = self._kwargs["torch_dtype"] = torch.float16
-            self._kwargs["variant"] = "fp16"
-            self._kwargs["use_safetensors"] = True
+            self._kwargs["use_safetensors"] = any(
+                glob(os.path.join(self._model_path, "*/*.safetensors"))
+            )
         if isinstance(torch_dtype, str):
             self._kwargs["torch_dtype"] = getattr(torch, torch_dtype)

xinference 0.16.3__py3-none-any.whl → 1.0.0__py3-none-any.whl

Potentially problematic release.

xinference 0.16.3py3-none-any.whl → 1.0.0py3-none-any.whl