PyPI - xinference - Versions diffs - 1.4.0__py3-none-any.whl → 1.5.0__py3-none-any.whl - Mend

xinference 1.4.0py3-none-any.whl → 1.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (132) hide show

xinference/_compat.py CHANGED Viewed

@@ -102,6 +102,7 @@ class CreateChatCompletionOpenAI(BaseModel):
     frequency_penalty: Optional[float]
     logit_bias: Optional[Dict[str, int]]
     logprobs: Optional[bool]
+    max_completion_tokens: Optional[int]
     max_tokens: Optional[int]
     n: Optional[int]
     parallel_tool_calls: Optional[bool]

xinference/_version.py CHANGED Viewed

@@ -8,11 +8,11 @@ import json
 version_json = '''
 {
- "date": "2025-03-21T14:33:52+0800",
+ "date": "2025-04-19T20:32:22+0800",
  "dirty": false,
  "error": null,
- "full-revisionid": "ac88d425e3d5fc12166e22c4032286327871f5f2",
- "version": "1.4.0"
+ "full-revisionid": "ee8d025e1c046b22b3b148e5e97c0e107c979ee3",
+ "version": "1.5.0"
 }
 '''  # END VERSION_JSON

xinference/api/restful_api.py CHANGED Viewed

@@ -468,6 +468,26 @@ class RESTfulAPI(CancelMixin):
                 else None
             ),
         )
+        self._router.add_api_route(
+            "/v1/models/{model_uid}/progress",
+            self.get_launch_model_progress,
+            methods=["GET"],
+            dependencies=(
+                [Security(self._auth_service, scopes=["models:read"])]
+                if self.is_authenticated()
+                else None
+            ),
+        )
+        self._router.add_api_route(
+            "/v1/models/{model_uid}/cancel",
+            self.cancel_launch_model,
+            methods=["POST"],
+            dependencies=(
+                [Security(self._auth_service, scopes=["models:stop"])]
+                if self.is_authenticated()
+                else None
+            ),
+        )
         self._router.add_api_route(
             "/v1/completions",
             self.create_completion,
@@ -1024,6 +1044,10 @@ class RESTfulAPI(CancelMixin):
         except RuntimeError as re:
             logger.error(str(re), exc_info=True)
             raise HTTPException(status_code=503, detail=str(re))
+        except asyncio.CancelledError as ce:
+            # cancelled by user
+            logger.error(str(ce), exc_info=True)
+            raise HTTPException(status_code=499, detail=str(ce))
         except Exception as e:
             logger.error(str(e), exc_info=True)
             raise HTTPException(status_code=500, detail=str(e))
@@ -1044,6 +1068,26 @@ class RESTfulAPI(CancelMixin):
             raise HTTPException(status_code=500, detail=str(e))
         return JSONResponse(content=infos)
+    async def get_launch_model_progress(self, model_uid: str) -> JSONResponse:
+        try:
+            progress = await (
+                await self._get_supervisor_ref()
+            ).get_launch_builtin_model_progress(model_uid)
+        except Exception as e:
+            logger.error(str(e), exc_info=True)
+            raise HTTPException(status_code=500, detail=str(e))
+        return JSONResponse(content={"progress": progress})
+    async def cancel_launch_model(self, model_uid: str) -> JSONResponse:
+        try:
+            await (await self._get_supervisor_ref()).cancel_launch_builtin_model(
+                model_uid
+            )
+        except Exception as e:
+            logger.error(str(e), exc_info=True)
+            raise HTTPException(status_code=500, detail=str(e))
+        return JSONResponse(content=None)
     async def launch_model_by_version(
         self, request: Request, wait_ready: bool = Query(True)
     ) -> JSONResponse:
@@ -1514,8 +1558,11 @@ class RESTfulAPI(CancelMixin):
         prompt_speech: Optional[UploadFile] = File(
             None, media_type="application/octet-stream"
         ),
+        prompt_latent: Optional[UploadFile] = File(
+            None, media_type="application/octet-stream"
+        ),
     ) -> Response:
-        if prompt_speech:
+        if prompt_speech or prompt_latent:
             f = await request.form()
         else:
             f = await request.json()
@@ -1539,6 +1586,8 @@ class RESTfulAPI(CancelMixin):
                 parsed_kwargs = {}
             if prompt_speech is not None:
                 parsed_kwargs["prompt_speech"] = await prompt_speech.read()
+            if prompt_latent is not None:
+                parsed_kwargs["prompt_latent"] = await prompt_latent.read()
             out = await model.speech(
                 input=body.input,
                 voice=body.voice,
@@ -1952,6 +2001,7 @@ class RESTfulAPI(CancelMixin):
             "logit_bias",
             "logit_bias_type",
             "user",
+            "max_completion_tokens",
         }
         raw_kwargs = {k: v for k, v in raw_body.items() if k not in exclude}
@@ -1964,6 +2014,9 @@ class RESTfulAPI(CancelMixin):
         if body.max_tokens is None:
             kwargs["max_tokens"] = max_tokens_field.default
+        if body.max_completion_tokens is not None:
+            kwargs["max_tokens"] = body.max_completion_tokens
         if body.logit_bias is not None:
             raise HTTPException(status_code=501, detail="Not implemented")

xinference/client/restful/restful_client.py CHANGED Viewed

@@ -723,6 +723,7 @@ class RESTfulAudioModelHandle(RESTfulModelHandle):
         speed: float = 1.0,
         stream: bool = False,
         prompt_speech: Optional[bytes] = None,
+        prompt_latent: Optional[bytes] = None,
         **kwargs,
     ):
         """
@@ -743,6 +744,8 @@ class RESTfulAudioModelHandle(RESTfulModelHandle):
             Use stream or not.
         prompt_speech: bytes
             The audio bytes to be provided to the model.
+        prompt_latent: bytes
+            The latent bytes to be provided to the model.
         Returns
         -------
@@ -759,14 +762,22 @@ class RESTfulAudioModelHandle(RESTfulModelHandle):
             "stream": stream,
             "kwargs": json.dumps(kwargs),
         }
+        files: List[Any] = []
         if prompt_speech:
-            files: List[Any] = []
             files.append(
                 (
                     "prompt_speech",
                     ("prompt_speech", prompt_speech, "application/octet-stream"),
                 )
             )
+        if prompt_latent:
+            files.append(
+                (
+                    "prompt_latent",
+                    ("prompt_latent", prompt_latent, "application/octet-stream"),
+                )
+            )
+        if files:
             response = requests.post(
                 url, data=params, files=files, headers=self.auth_headers, stream=stream
             )
@@ -999,10 +1010,17 @@ class Client:
             "model_path": model_path,
         }
+        wait_ready = kwargs.pop("wait_ready", True)
         for key, value in kwargs.items():
             payload[str(key)] = value
-        response = requests.post(url, json=payload, headers=self._headers)
+        if wait_ready:
+            response = requests.post(url, json=payload, headers=self._headers)
+        else:
+            response = requests.post(
+                url, json=payload, headers=self._headers, params={"wait_ready": False}
+            )
         if response.status_code != 200:
             raise RuntimeError(
                 f"Failed to launch model, detail: {_get_error_string(response)}"
@@ -1035,6 +1053,68 @@ class Client:
                 f"Failed to terminate model, detail: {_get_error_string(response)}"
             )
+    def get_launch_model_progress(self, model_uid: str) -> dict:
+        """
+        Get progress of the specific model.
+        Parameters
+        ----------
+        model_uid: str
+            The unique id that identify the model we want.
+        Returns
+        -------
+        result: dict
+            Result that contains progress.
+        Raises
+        ------
+        RuntimeError
+            Report failure to get the wanted model with given model_uid. Provide details of failure through error message.
+        """
+        url = f"{self.base_url}/v1/models/{model_uid}/progress"
+        response = requests.get(url, headers=self._headers)
+        if response.status_code != 200:
+            raise RuntimeError(
+                f"Fail to get model launching progress, detail: {_get_error_string(response)}"
+            )
+        return response.json()
+    def cancel_launch_model(self, model_uid: str):
+        """
+        Cancel launching model.
+        Parameters
+        ----------
+        model_uid: str
+            The unique id that identify the model we want.
+        Raises
+        ------
+        RuntimeError
+            Report failure to get the wanted model with given model_uid. Provide details of failure through error message.
+        """
+        url = f"{self.base_url}/v1/models/{model_uid}/cancel"
+        response = requests.post(url, headers=self._headers)
+        if response.status_code != 200:
+            raise RuntimeError(
+                f"Fail to cancel launching model, detail: {_get_error_string(response)}"
+            )
+    def get_instance_info(self, model_name: str, model_uid: str):
+        url = f"{self.base_url}/v1/models/instances"
+        response = requests.get(
+            url,
+            headers=self._headers,
+            params={"model_name": model_name, "model_uid": model_uid},
+        )
+        if response.status_code != 200:
+            raise RuntimeError("Failed to get instance info")
+        response_data = response.json()
+        return response_data
     def _get_supervisor_internal_address(self):
         url = f"{self.base_url}/v1/address"
         response = requests.get(url, headers=self._headers)

xinference/constants.py CHANGED Viewed

@@ -29,6 +29,7 @@ XINFERENCE_ENV_DISABLE_HEALTH_CHECK = "XINFERENCE_DISABLE_HEALTH_CHECK"
 XINFERENCE_ENV_DISABLE_METRICS = "XINFERENCE_DISABLE_METRICS"
 XINFERENCE_ENV_DOWNLOAD_MAX_ATTEMPTS = "XINFERENCE_DOWNLOAD_MAX_ATTEMPTS"
 XINFERENCE_ENV_TEXT_TO_IMAGE_BATCHING_SIZE = "XINFERENCE_TEXT_TO_IMAGE_BATCHING_SIZE"
+XINFERENCE_ENV_VIRTUAL_ENV = "XINFERENCE_EANBLE_VIRTUAL_ENV"
 def get_xinference_home() -> str:
@@ -55,6 +56,7 @@ XINFERENCE_LOG_DIR = os.path.join(XINFERENCE_HOME, "logs")
 XINFERENCE_IMAGE_DIR = os.path.join(XINFERENCE_HOME, "image")
 XINFERENCE_VIDEO_DIR = os.path.join(XINFERENCE_HOME, "video")
 XINFERENCE_AUTH_DIR = os.path.join(XINFERENCE_HOME, "auth")
+XINFERENCE_VIRTUAL_ENV_DIR = os.path.join(XINFERENCE_HOME, "virtualenv")
 XINFERENCE_CSG_ENDPOINT = str(
     os.environ.get(XINFERENCE_ENV_CSG_ENDPOINT, "https://hub-stg.opencsg.com/")
 )
@@ -89,3 +91,4 @@ XINFERENCE_TEXT_TO_IMAGE_BATCHING_SIZE = os.environ.get(
 )
 XINFERENCE_LAUNCH_MODEL_RETRY = 3
 XINFERENCE_DEFAULT_CANCEL_BLOCK_DURATION = 30
+XINFERENCE_ENABLE_VIRTUAL_ENV = bool(int(os.getenv(XINFERENCE_ENV_VIRTUAL_ENV, "0")))

xinference 1.4.0__py3-none-any.whl → 1.5.0__py3-none-any.whl

Potentially problematic release.

xinference 1.4.0py3-none-any.whl → 1.5.0py3-none-any.whl