PyPI - xinference - Versions diffs - 1.5.0.post2__py3-none-any.whl → 1.6.0__py3-none-any.whl - Mend

xinference 1.5.0.post2py3-none-any.whl → 1.6.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (137) hide show

xinference/_version.py CHANGED Viewed

@@ -8,11 +8,11 @@ import json
 version_json = '''
 {
- "date": "2025-04-21T17:53:44+0800",
+ "date": "2025-05-16T20:05:54+0800",
  "dirty": false,
  "error": null,
- "full-revisionid": "a5d4be9f970137bde1d402420f71961826392224",
- "version": "1.5.0.post2"
+ "full-revisionid": "81a24f4646ace8f41c85a810237491d9c0ad5282",
+ "version": "1.6.0"
 }
 '''  # END VERSION_JSON

xinference/api/restful_api.py CHANGED Viewed

@@ -56,6 +56,7 @@ from ..constants import (
     XINFERENCE_DEFAULT_CANCEL_BLOCK_DURATION,
     XINFERENCE_DEFAULT_ENDPOINT_PORT,
     XINFERENCE_DISABLE_METRICS,
+    XINFERENCE_SSE_PING_ATTEMPTS_SECONDS,
 )
 from ..core.event import Event, EventCollectorActor, EventType
 from ..core.supervisor import SupervisorActor
@@ -201,13 +202,13 @@ class BuildGradioInterfaceRequest(BaseModel):
     model_lang: List[str]
-class BuildGradioImageInterfaceRequest(BaseModel):
+class BuildGradioMediaInterfaceRequest(BaseModel):
     model_type: str
     model_name: str
     model_family: str
     model_id: str
     controlnet: Union[None, List[Dict[str, Union[str, dict, None]]]]
-    model_revision: str
+    model_revision: Optional[str]
     model_ability: List[str]
@@ -352,7 +353,27 @@ class RESTfulAPI(CancelMixin):
         )
         self._router.add_api_route(
             "/v1/ui/images/{model_uid}",
-            self.build_gradio_images_interface,
+            self.build_gradio_media_interface,
+            methods=["POST"],
+            dependencies=(
+                [Security(self._auth_service, scopes=["models:read"])]
+                if self.is_authenticated()
+                else None
+            ),
+        )
+        self._router.add_api_route(
+            "/v1/ui/audios/{model_uid}",
+            self.build_gradio_media_interface,
+            methods=["POST"],
+            dependencies=(
+                [Security(self._auth_service, scopes=["models:read"])]
+                if self.is_authenticated()
+                else None
+            ),
+        )
+        self._router.add_api_route(
+            "/v1/ui/videos/{model_uid}",
+            self.build_gradio_media_interface,
             methods=["POST"],
             dependencies=(
                 [Security(self._auth_service, scopes=["models:read"])]
@@ -676,6 +697,17 @@ class RESTfulAPI(CancelMixin):
                 else None
             ),
         )
+        self._router.add_api_route(
+            "/v1/video/generations/image",
+            self.create_videos_from_images,
+            methods=["POST"],
+            response_model=VideoList,
+            dependencies=(
+                [Security(self._auth_service, scopes=["models:read"])]
+                if self.is_authenticated()
+                else None
+            ),
+        )
         self._router.add_api_route(
             "/v1/chat/completions",
             self.create_chat_completion,
@@ -1183,16 +1215,16 @@ class RESTfulAPI(CancelMixin):
         return JSONResponse(content={"model_uid": model_uid})
-    async def build_gradio_images_interface(
+    async def build_gradio_media_interface(
         self, model_uid: str, request: Request
     ) -> JSONResponse:
         """
         Build a Gradio interface for image processing models.
         """
         payload = await request.json()
-        body = BuildGradioImageInterfaceRequest.parse_obj(payload)
+        body = BuildGradioMediaInterfaceRequest.parse_obj(payload)
         assert self._app is not None
-        assert body.model_type == "image"
+        assert body.model_type in ("image", "video", "audio")
         # asyncio.Lock() behaves differently in 3.9 than 3.10+
         # A event loop is required in 3.9 but not 3.10+
@@ -1206,12 +1238,12 @@ class RESTfulAPI(CancelMixin):
                 )
                 asyncio.set_event_loop(asyncio.new_event_loop())
-        from ..core.image_interface import ImageInterface
+        from ..core.media_interface import MediaInterface
         try:
             access_token = request.headers.get("Authorization")
             internal_host = "localhost" if self._host == "0.0.0.0" else self._host
-            interface = ImageInterface(
+            interface = MediaInterface(
                 endpoint=f"http://{internal_host}:{self._port}",
                 model_uid=model_uid,
                 model_family=body.model_family,
@@ -1221,6 +1253,7 @@ class RESTfulAPI(CancelMixin):
                 controlnet=body.controlnet,
                 access_token=access_token,
                 model_ability=body.model_ability,
+                model_type=body.model_type,
             ).build()
             gr.mount_gradio_app(self._app, interface, f"/{model_uid}")
@@ -1338,7 +1371,9 @@ class RESTfulAPI(CancelMixin):
                 finally:
                     await model.decrease_serve_count()
-            return EventSourceResponse(stream_results())
+            return EventSourceResponse(
+                stream_results(), ping=XINFERENCE_SSE_PING_ATTEMPTS_SECONDS
+            )
         else:
             try:
                 data = await model.generate(body.prompt, kwargs, raw_params=raw_kwargs)
@@ -1606,7 +1641,9 @@ class RESTfulAPI(CancelMixin):
                         await model.decrease_serve_count()
                 return EventSourceResponse(
-                    media_type="application/octet-stream", content=stream_results()
+                    media_type="application/octet-stream",
+                    content=stream_results(),
+                    ping=XINFERENCE_SSE_PING_ATTEMPTS_SECONDS,
                 )
             else:
                 return Response(media_type="application/octet-stream", content=out)
@@ -1975,14 +2012,22 @@ class RESTfulAPI(CancelMixin):
             await self._report_error_event(model_uid, str(e))
             raise HTTPException(status_code=500, detail=str(e))
+        request_id = None
         try:
             kwargs = json.loads(body.kwargs) if body.kwargs else {}
+            request_id = kwargs.get("request_id")
+            self._add_running_task(request_id)
             video_list = await model.text_to_video(
                 prompt=body.prompt,
                 n=body.n,
                 **kwargs,
             )
             return Response(content=video_list, media_type="application/json")
+        except asyncio.CancelledError:
+            err_str = f"The request has been cancelled: {request_id}"
+            logger.error(err_str)
+            await self._report_error_event(model_uid, err_str)
+            raise HTTPException(status_code=409, detail=err_str)
         except Exception as e:
             e = await self._get_model_last_error(model.uid, e)
             logger.error(e, exc_info=True)
@@ -1990,6 +2035,55 @@ class RESTfulAPI(CancelMixin):
             self.handle_request_limit_error(e)
             raise HTTPException(status_code=500, detail=str(e))
+    async def create_videos_from_images(
+        self,
+        model: str = Form(...),
+        image: UploadFile = File(media_type="application/octet-stream"),
+        prompt: Optional[Union[str, List[str]]] = Form(None),
+        negative_prompt: Optional[Union[str, List[str]]] = Form(None),
+        n: Optional[int] = Form(1),
+        kwargs: Optional[str] = Form(None),
+    ) -> Response:
+        model_uid = model
+        try:
+            model_ref = await (await self._get_supervisor_ref()).get_model(model_uid)
+        except ValueError as ve:
+            logger.error(str(ve), exc_info=True)
+            await self._report_error_event(model_uid, str(ve))
+            raise HTTPException(status_code=400, detail=str(ve))
+        except Exception as e:
+            logger.error(e, exc_info=True)
+            await self._report_error_event(model_uid, str(e))
+            raise HTTPException(status_code=500, detail=str(e))
+        request_id = None
+        try:
+            if kwargs is not None:
+                parsed_kwargs = json.loads(kwargs)
+            else:
+                parsed_kwargs = {}
+            request_id = parsed_kwargs.get("request_id")
+            self._add_running_task(request_id)
+            video_list = await model_ref.image_to_video(
+                image=Image.open(image.file),
+                prompt=prompt,
+                negative_prompt=negative_prompt,
+                n=n,
+                **parsed_kwargs,
+            )
+            return Response(content=video_list, media_type="application/json")
+        except asyncio.CancelledError:
+            err_str = f"The request has been cancelled: {request_id}"
+            logger.error(err_str)
+            await self._report_error_event(model_uid, err_str)
+            raise HTTPException(status_code=409, detail=err_str)
+        except Exception as e:
+            e = await self._get_model_last_error(model_ref.uid, e)
+            logger.error(e, exc_info=True)
+            await self._report_error_event(model_uid, str(e))
+            self.handle_request_limit_error(e)
+            raise HTTPException(status_code=500, detail=str(e))
     async def create_chat_completion(self, request: Request) -> Response:
         raw_body = await request.json()
         body = CreateChatCompletion.parse_obj(raw_body)
@@ -2122,7 +2216,9 @@ class RESTfulAPI(CancelMixin):
                 finally:
                     await model.decrease_serve_count()
-            return EventSourceResponse(stream_results())
+            return EventSourceResponse(
+                stream_results(), ping=XINFERENCE_SSE_PING_ATTEMPTS_SECONDS
+            )
         else:
             try:
                 data = await model.chat(

xinference/client/restful/restful_client.py CHANGED Viewed

@@ -28,7 +28,6 @@ if TYPE_CHECKING:
         CompletionChunk,
         Embedding,
         ImageList,
-        LlamaCppGenerateConfig,
         PytorchGenerateConfig,
         VideoList,
     )
@@ -464,14 +463,59 @@ class RESTfulVideoModelHandle(RESTfulModelHandle):
         response_data = response.json()
         return response_data
+    def image_to_video(
+        self,
+        image: Union[str, bytes],
+        prompt: str,
+        negative_prompt: Optional[str] = None,
+        n: int = 1,
+        **kwargs,
+    ) -> "VideoList":
+        """
+        Creates a video by the input image and text.
+        Parameters
+        ----------
+        image: `Union[str, bytes]`
+            The input image to condition the generation on.
+        prompt: `str` or `List[str]`
+            The prompt or prompts to guide video generation. If not defined, you need to pass `prompt_embeds`.
+        negative_prompt (`str` or `List[str]`, *optional*):
+            The prompt or prompts not to guide the image generation.
+        n: `int`, defaults to 1
+            The number of videos to generate per prompt. Must be between 1 and 10.
+        Returns
+        -------
+        VideoList
+            A list of video objects.
+        """
+        url = f"{self._base_url}/v1/video/generations/image"
+        params = {
+            "model": self._model_uid,
+            "prompt": prompt,
+            "negative_prompt": negative_prompt,
+            "n": n,
+            "kwargs": json.dumps(kwargs),
+        }
+        files: List[Any] = []
+        for key, value in params.items():
+            files.append((key, (None, value)))
+        files.append(("image", ("image", image, "application/octet-stream")))
+        response = requests.post(url, files=files, headers=self.auth_headers)
+        if response.status_code != 200:
+            raise RuntimeError(
+                f"Failed to create the video from image, detail: {_get_error_string(response)}"
+            )
+        response_data = response.json()
+        return response_data
 class RESTfulGenerateModelHandle(RESTfulModelHandle):
     def generate(
         self,
         prompt: str,
-        generate_config: Optional[
-            Union["LlamaCppGenerateConfig", "PytorchGenerateConfig"]
-        ] = None,
+        generate_config: Optional["PytorchGenerateConfig"] = None,
     ) -> Union["Completion", Iterator["CompletionChunk"]]:
         """
         Creates a completion for the provided prompt and parameters via RESTful APIs.
@@ -480,9 +524,8 @@ class RESTfulGenerateModelHandle(RESTfulModelHandle):
         ----------
         prompt: str
             The user's message or user's input.
-        generate_config: Optional[Union["LlamaCppGenerateConfig", "PytorchGenerateConfig"]]
+        generate_config: Optional["PytorchGenerateConfig"]
             Additional configuration for the chat generation.
-            "LlamaCppGenerateConfig" -> Configuration for llama-cpp-python model
             "PytorchGenerateConfig" -> Configuration for pytorch model
         Returns
@@ -528,9 +571,7 @@ class RESTfulChatModelHandle(RESTfulGenerateModelHandle):
         self,
         messages: List[Dict],
         tools: Optional[List[Dict]] = None,
-        generate_config: Optional[
-            Union["LlamaCppGenerateConfig", "PytorchGenerateConfig"]
-        ] = None,
+        generate_config: Optional["PytorchGenerateConfig"] = None,
     ) -> Union["ChatCompletion", Iterator["ChatCompletionChunk"]]:
         """
         Given a list of messages comprising a conversation, the model will return a response via RESTful APIs.
@@ -541,9 +582,8 @@ class RESTfulChatModelHandle(RESTfulGenerateModelHandle):
             A list of messages comprising the conversation so far.
         tools: Optional[List[Dict]]
             A tool list.
-        generate_config: Optional[Union["LlamaCppGenerateConfig", "PytorchGenerateConfig"]]
+        generate_config: Optional["PytorchGenerateConfig"]
             Additional configuration for the chat generation.
-            "LlamaCppGenerateConfig" -> configuration for llama-cpp-python model
             "PytorchGenerateConfig" -> configuration for pytorch model
         Returns

xinference/constants.py CHANGED Viewed

@@ -29,7 +29,8 @@ XINFERENCE_ENV_DISABLE_HEALTH_CHECK = "XINFERENCE_DISABLE_HEALTH_CHECK"
 XINFERENCE_ENV_DISABLE_METRICS = "XINFERENCE_DISABLE_METRICS"
 XINFERENCE_ENV_DOWNLOAD_MAX_ATTEMPTS = "XINFERENCE_DOWNLOAD_MAX_ATTEMPTS"
 XINFERENCE_ENV_TEXT_TO_IMAGE_BATCHING_SIZE = "XINFERENCE_TEXT_TO_IMAGE_BATCHING_SIZE"
-XINFERENCE_ENV_VIRTUAL_ENV = "XINFERENCE_EANBLE_VIRTUAL_ENV"
+XINFERENCE_ENV_VIRTUAL_ENV = "XINFERENCE_ENABLE_VIRTUAL_ENV"
+XINFERENCE_ENV_SSE_PING_ATTEMPTS_SECONDS = "XINFERENCE_SSE_PING_ATTEMPTS_SECONDS"
 def get_xinference_home() -> str:
@@ -89,6 +90,9 @@ XINFERENCE_DOWNLOAD_MAX_ATTEMPTS = int(
 XINFERENCE_TEXT_TO_IMAGE_BATCHING_SIZE = os.environ.get(
     XINFERENCE_ENV_TEXT_TO_IMAGE_BATCHING_SIZE, None
 )
+XINFERENCE_SSE_PING_ATTEMPTS_SECONDS = int(
+    os.environ.get(XINFERENCE_ENV_SSE_PING_ATTEMPTS_SECONDS, 600)
+)
 XINFERENCE_LAUNCH_MODEL_RETRY = 3
 XINFERENCE_DEFAULT_CANCEL_BLOCK_DURATION = 30
 XINFERENCE_ENABLE_VIRTUAL_ENV = bool(int(os.getenv(XINFERENCE_ENV_VIRTUAL_ENV, "0")))

xinference 1.5.0.post2__py3-none-any.whl → 1.6.0__py3-none-any.whl

Potentially problematic release.

xinference 1.5.0.post2py3-none-any.whl → 1.6.0py3-none-any.whl