PyPI - xinference - Versions diffs - 1.10.0__py3-none-any.whl → 1.10.1__py3-none-any.whl - Mend

xinference 1.10.0py3-none-any.whl → 1.10.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (317) hide show

xinference/_version.py CHANGED Viewed

@@ -8,11 +8,11 @@ import json
 version_json = '''
 {
- "date": "2025-09-12T21:20:52+0800",
+ "date": "2025-09-30T23:23:16+0800",
  "dirty": false,
  "error": null,
- "full-revisionid": "b018733c97029fb59e8ffe55fadc6473232fbf23",
- "version": "1.10.0"
+ "full-revisionid": "98a3c8ad4a6debd97ef29cc05aad3514f4ba488c",
+ "version": "1.10.1"
 }
 '''  # END VERSION_JSON

xinference/api/restful_api.py CHANGED Viewed

@@ -2136,7 +2136,7 @@ class RESTfulAPI(CancelMixin):
     async def create_variations(
         self,
         model: str = Form(...),
-        image: UploadFile = File(media_type="application/octet-stream"),
+        image: List[UploadFile] = File(media_type="application/octet-stream"),
         prompt: Optional[Union[str, List[str]]] = Form(None),
         negative_prompt: Optional[Union[str, List[str]]] = Form(None),
         n: Optional[int] = Form(1),
@@ -2164,8 +2164,17 @@ class RESTfulAPI(CancelMixin):
                 parsed_kwargs = {}
             request_id = parsed_kwargs.get("request_id")
             self._add_running_task(request_id)
+            # Handle single image or multiple images
+            if len(image) == 1:
+                # Single image
+                image_data = Image.open(image[0].file)
+            else:
+                # Multiple images - convert to list of PIL Images
+                image_data = [Image.open(img.file) for img in image]
             image_list = await model_ref.image_to_image(
-                image=Image.open(image.file),
+                image=image_data,
                 prompt=prompt,
                 negative_prompt=negative_prompt,
                 n=n,
@@ -2858,19 +2867,6 @@ class RESTfulAPI(CancelMixin):
     def extract_guided_params(raw_body: dict) -> dict:
         kwargs = {}
         raw_extra_body: dict = raw_body.get("extra_body")  # type: ignore
-        # Convert OpenAI response_format to vLLM guided decoding
-        response_format = raw_body.get("response_format")
-        if response_format is not None:
-            if isinstance(response_format, dict):
-                format_type = response_format.get("type")
-                if format_type == "json_schema":
-                    json_schema = response_format.get("json_schema")
-                    if isinstance(json_schema, dict):
-                        schema = json_schema.get("schema")
-                        if schema is not None:
-                            kwargs["guided_json"] = schema
-                elif format_type == "json_object":
-                    kwargs["guided_json_object"] = True
         if raw_body.get("guided_json"):
             kwargs["guided_json"] = raw_body.get("guided_json")
         if raw_body.get("guided_regex") is not None:
@@ -2889,19 +2885,6 @@ class RESTfulAPI(CancelMixin):
             )
         # Parse OpenAI extra_body
         if raw_extra_body is not None:
-            # Convert OpenAI response_format to vLLM guided decoding
-            extra_response_format = raw_extra_body.get("response_format")
-            if extra_response_format is not None:
-                if isinstance(extra_response_format, dict):
-                    format_type = extra_response_format.get("type")
-                    if format_type == "json_schema":
-                        json_schema = extra_response_format.get("json_schema")
-                        if isinstance(json_schema, dict):
-                            schema = json_schema.get("schema")
-                            if schema is not None:
-                                kwargs["guided_json"] = schema
-                    elif format_type == "json_object":
-                        kwargs["guided_json_object"] = True
             if raw_extra_body.get("guided_json"):
                 kwargs["guided_json"] = raw_extra_body.get("guided_json")
             if raw_extra_body.get("guided_regex") is not None:

xinference/client/restful/async_restful_client.py CHANGED Viewed

@@ -285,7 +285,7 @@ class AsyncRESTfulImageModelHandle(AsyncRESTfulModelHandle):
     async def image_to_image(
         self,
-        image: Union[str, bytes],
+        image: Union[str, bytes, List[Union[str, bytes]]],
         prompt: str,
         negative_prompt: Optional[str] = None,
         n: int = 1,
@@ -298,7 +298,7 @@ class AsyncRESTfulImageModelHandle(AsyncRESTfulModelHandle):
         Parameters
         ----------
-        image: `Union[str, bytes]`
+        image: `Union[str, bytes, List[Union[str, bytes]]]`
             The ControlNet input condition to provide guidance to the `unet` for generation. If the type is
             specified as `torch.FloatTensor`, it is passed to ControlNet as is. `PIL.Image.Image` can also be
             accepted as an image. The dimensions of the output image defaults to `image`'s dimensions. If height
@@ -338,7 +338,24 @@ class AsyncRESTfulImageModelHandle(AsyncRESTfulModelHandle):
         files: List[Any] = []
         for key, value in params.items():
             files.append((key, (None, value)))
-        files.append(("image", ("image", image, "application/octet-stream")))
+        # Handle both single image and list of images
+        if isinstance(image, list):
+            if len(image) == 0:
+                raise ValueError("Image list cannot be empty")
+            elif len(image) == 1:
+                # Single image in list, use it directly
+                files.append(("image", ("image", image[0], "application/octet-stream")))
+            else:
+                # Multiple images - send all images with same field name
+                # FastAPI will collect them into a list
+                for img_data in image:
+                    files.append(
+                        ("image", ("image", img_data, "application/octet-stream"))
+                    )
+        else:
+            # Single image
+            files.append(("image", ("image", image, "application/octet-stream")))
         response = await self.session.post(url, files=files, headers=self.auth_headers)
         if response.status != 200:
             raise RuntimeError(

xinference/client/restful/restful_client.py CHANGED Viewed

@@ -250,7 +250,7 @@ class RESTfulImageModelHandle(RESTfulModelHandle):
     def image_to_image(
         self,
-        image: Union[str, bytes],
+        image: Union[str, bytes, List[Union[str, bytes]]],
         prompt: str,
         negative_prompt: Optional[str] = None,
         n: int = 1,
@@ -263,7 +263,7 @@ class RESTfulImageModelHandle(RESTfulModelHandle):
         Parameters
         ----------
-        image: `Union[str, bytes]`
+        image: `Union[str, bytes, List[Union[str, bytes]]]`
             The ControlNet input condition to provide guidance to the `unet` for generation. If the type is
             specified as `torch.FloatTensor`, it is passed to ControlNet as is. `PIL.Image.Image` can also be
             accepted as an image. The dimensions of the output image defaults to `image`'s dimensions. If height
@@ -302,7 +302,24 @@ class RESTfulImageModelHandle(RESTfulModelHandle):
         files: List[Any] = []
         for key, value in params.items():
             files.append((key, (None, value)))
-        files.append(("image", ("image", image, "application/octet-stream")))
+        # Handle both single image and list of images
+        if isinstance(image, list):
+            if len(image) == 0:
+                raise ValueError("Image list cannot be empty")
+            elif len(image) == 1:
+                # Single image in list, use it directly
+                files.append(("image", ("image", image[0], "application/octet-stream")))
+            else:
+                # Multiple images - send all images with same field name
+                # FastAPI will collect them into a list
+                for img_data in image:
+                    files.append(
+                        ("image", ("image", img_data, "application/octet-stream"))
+                    )
+        else:
+            # Single image
+            files.append(("image", ("image", image, "application/octet-stream")))
         response = self.session.post(url, files=files, headers=self.auth_headers)
         if response.status_code != 200:
             raise RuntimeError(

xinference/core/supervisor.py CHANGED Viewed

@@ -31,6 +31,7 @@ from typing import (
     Literal,
     Optional,
     Tuple,
+    Type,
     Union,
 )
@@ -406,6 +407,26 @@ class SupervisorActor(xo.StatelessActor):
             "workers": self._worker_status,
         }
+    def _get_spec_dicts(
+        self, model_family: Any, cache_manager_cls: Type
+    ) -> Tuple[List[dict], List[str]]:
+        specs = []
+        download_hubs: Dict[str, None] = dict()
+        for spec in model_family.model_specs:
+            model_hub = spec.model_hub
+            if model_hub not in download_hubs:
+                download_hubs[model_hub] = None
+            if model_hub != "huggingface":
+                # since we only need to know all specs
+                # thus filter huggingface specs only
+                continue
+            model_family.model_specs = [spec]
+            cache_manager = cache_manager_cls(model_family)
+            specs.append(
+                {**spec.dict(), "cache_status": cache_manager.get_cache_status()}
+            )
+        return specs, list(download_hubs)
     async def _to_llm_reg(
         self, llm_family: "LLMFamilyV2", is_builtin: bool
     ) -> Dict[str, Any]:
@@ -415,20 +436,15 @@ class SupervisorActor(xo.StatelessActor):
         version_cnt = await self.get_model_version_count(llm_family.model_name)
         if self.is_local_deployment():
-            specs = []
             # TODO: does not work when the supervisor and worker are running on separate nodes.
             _llm_family = llm_family.copy()
-            for spec in [
-                _spec
-                for _spec in llm_family.model_specs
-                if _spec.model_hub == "huggingface"
-            ]:
-                _llm_family.model_specs = [spec]
-                cache_manager = LLMCacheManager(_llm_family)
-                specs.append(
-                    {**spec.dict(), "cache_status": cache_manager.get_cache_status()}
-                )
-            res = {**llm_family.dict(), "is_builtin": is_builtin, "model_specs": specs}
+            specs, download_hubs = self._get_spec_dicts(_llm_family, LLMCacheManager)
+            res = {
+                **llm_family.dict(),
+                "is_builtin": is_builtin,
+                "model_specs": specs,
+                "download_hubs": download_hubs,
+            }
         else:
             res = {**llm_family.dict(), "is_builtin": is_builtin}
         res["model_version_count"] = version_cnt
@@ -445,24 +461,13 @@ class SupervisorActor(xo.StatelessActor):
         if self.is_local_deployment():
             _family = model_family.copy()
-            specs = []
             # TODO: does not work when the supervisor and worker are running on separate nodes.
-            for spec in [
-                x for x in model_family.model_specs if x.model_hub == "huggingface"
-            ]:
-                _family.model_specs = [spec]
-                specs.append(
-                    {
-                        **spec.dict(),
-                        "cache_status": EmbeddingCacheManager(
-                            _family
-                        ).get_cache_status(),
-                    }
-                )
+            specs, download_hubs = self._get_spec_dicts(_family, EmbeddingCacheManager)
             res = {
                 **model_family.dict(),
                 "is_builtin": is_builtin,
                 "model_specs": specs,
+                "download_hubs": download_hubs,
             }
         else:
             res = {
@@ -474,25 +479,26 @@ class SupervisorActor(xo.StatelessActor):
         return res
     async def _to_rerank_model_reg(
-        self, model_spec: "RerankModelFamilyV2", is_builtin: bool
+        self, model_family: "RerankModelFamilyV2", is_builtin: bool
     ) -> Dict[str, Any]:
-        from ..model.rerank.cache_manager import RerankCacheManager as CacheManager
+        from ..model.rerank.cache_manager import RerankCacheManager
-        instance_cnt = await self.get_instance_count(model_spec.model_name)
-        version_cnt = await self.get_model_version_count(model_spec.model_name)
-        cache_manager = CacheManager(model_spec)
+        instance_cnt = await self.get_instance_count(model_family.model_name)
+        version_cnt = await self.get_model_version_count(model_family.model_name)
         if self.is_local_deployment():
+            _family = model_family.copy()
             # TODO: does not work when the supervisor and worker are running on separate nodes.
-            cache_status = cache_manager.get_cache_status()
+            specs, download_hubs = self._get_spec_dicts(_family, RerankCacheManager)
             res = {
-                **model_spec.dict(),
-                "cache_status": cache_status,
+                **model_family.dict(),
                 "is_builtin": is_builtin,
+                "model_specs": specs,
+                "download_hubs": download_hubs,
             }
         else:
             res = {
-                **model_spec.dict(),
+                **model_family.dict(),
                 "is_builtin": is_builtin,
             }
         res["model_version_count"] = version_cnt
@@ -657,7 +663,9 @@ class SupervisorActor(xo.StatelessActor):
             for model_name, families in BUILTIN_IMAGE_MODELS.items():
                 if detailed:
                     family = [x for x in families if x.model_hub == "huggingface"][0]
-                    ret.append(await self._to_image_model_reg(family, is_builtin=True))
+                    info = await self._to_image_model_reg(family, is_builtin=True)
+                    info["download_hubs"] = [x.model_hub for x in families]
+                    ret.append(info)
                 else:
                     ret.append({"model_name": model_name, "is_builtin": True})
@@ -680,7 +688,9 @@ class SupervisorActor(xo.StatelessActor):
             for model_name, families in BUILTIN_AUDIO_MODELS.items():
                 if detailed:
                     family = [x for x in families if x.model_hub == "huggingface"][0]
-                    ret.append(await self._to_audio_model_reg(family, is_builtin=True))
+                    info = await self._to_audio_model_reg(family, is_builtin=True)
+                    info["download_hubs"] = [x.model_hub for x in families]
+                    ret.append(info)
                 else:
                     ret.append({"model_name": model_name, "is_builtin": True})
@@ -702,7 +712,9 @@ class SupervisorActor(xo.StatelessActor):
             for model_name, families in BUILTIN_VIDEO_MODELS.items():
                 if detailed:
                     family = [x for x in families if x.model_hub == "huggingface"][0]
-                    ret.append(await self._to_video_model_reg(family, is_builtin=True))
+                    info = await self._to_video_model_reg(family, is_builtin=True)
+                    info["download_hubs"] = [x.model_hub for x in families]
+                    ret.append(info)
                 else:
                     ret.append({"model_name": model_name, "is_builtin": True})
@@ -812,16 +824,9 @@ class SupervisorActor(xo.StatelessActor):
             from ..model.rerank import BUILTIN_RERANK_MODELS
             from ..model.rerank.custom import get_user_defined_reranks
-            if model_name in BUILTIN_RERANK_MODELS:
-                return [
-                    x
-                    for x in BUILTIN_RERANK_MODELS[model_name]
-                    if x.model_hub == "huggingface"
-                ][0]
-            else:
-                for f in get_user_defined_reranks():
-                    if f.model_name == model_name:
-                        return f
+            for f in list(BUILTIN_RERANK_MODELS.values()) + get_user_defined_reranks():
+                if f.model_name == model_name:
+                    return f
             raise ValueError(f"Model {model_name} not found")
         elif model_type == "flexible":
             from ..model.flexible import get_flexible_models
@@ -830,6 +835,16 @@ class SupervisorActor(xo.StatelessActor):
                 if f.model_name == model_name:
                     return f
             raise ValueError(f"Model {model_name} not found")
+        elif model_type == "video":
+            from ..model.video import BUILTIN_VIDEO_MODELS
+            if model_name in BUILTIN_VIDEO_MODELS:
+                return [
+                    x
+                    for x in BUILTIN_VIDEO_MODELS[model_name]
+                    if x.model_hub == "huggingface"
+                ][0]
+            raise ValueError(f"Model {model_name} not found")
         else:
             raise ValueError(f"Unsupported model type: {model_type}")
@@ -864,6 +879,26 @@ class SupervisorActor(xo.StatelessActor):
                 generate_fn,
             ) = self._custom_register_type_to_cls[model_type]
+            model_spec = model_spec_cls.parse_raw(model)
+            # check if model already registered
+            try:
+                model = await self.get_model_registration(
+                    model_type, model_spec.model_name
+                )
+                if model is not None:
+                    raise ValueError(
+                        f"Model {model_spec.model_name} already registered"
+                    )
+            except ValueError as e:
+                if "not found" in str(e):
+                    pass
+                else:
+                    raise e
+            except Exception:
+                logger.error("Get model registration failed.", exc_info=True)
+                raise
             target_ip_worker_ref = (
                 self._get_worker_ref_by_ip(worker_ip) if worker_ip is not None else None
             )
@@ -880,7 +915,6 @@ class SupervisorActor(xo.StatelessActor):
                 await target_ip_worker_ref.register_model(model_type, model, persist)
                 return
-            model_spec = model_spec_cls.parse_raw(model)
             try:
                 register_fn(model_spec, persist)
                 await self._cache_tracker_ref.record_model_version(
@@ -901,25 +935,25 @@ class SupervisorActor(xo.StatelessActor):
     async def _sync_register_model(
         self, model_type: str, model: str, persist: bool, model_name: str
     ):
-        logger.info(f"begin sync model:{model_name} to worker")
+        logger.info(f"begin sync model: {model_name} to worker")
         try:
             # Sync model to all workers.
             for name, worker in self._worker_address_to_worker.items():
-                logger.info(f"sync model:{model_name} to {name}")
+                logger.info(f"sync model: {model_name} to {name}")
                 if name == self.address:
                     # Ignore: when worker and supervisor at the same node.
                     logger.info(
-                        f"ignore sync model:{model_name} to {name} for same node"
+                        f"ignore sync model: {model_name} to {name} for same node"
                     )
                 else:
                     await worker.register_model(model_type, model, persist)
-                    logger.info(f"success sync model:{model_name} to {name}")
+                    logger.info(f"success sync model: {model_name} to {name}")
         except Exception as e:
             # If sync fails, unregister the model in all workers.
             for name, worker in self._worker_address_to_worker.items():
                 logger.warning(f"ready to unregister model for {name}")
                 await worker.unregister_model(model_type, model_name)
-                logger.warning(f"finish unregister model:{model} for {name}")
+                logger.warning(f"finish unregister model: {model} for {name}")
             raise e
     @log_async(logger=logger)

xinference/core/worker.py CHANGED Viewed

@@ -710,6 +710,16 @@ class WorkerActor(xo.StatelessActor):
             for model_spec in get_user_defined_reranks():
                 ret.append({"model_name": model_spec.model_name, "is_builtin": False})
+            ret.sort(key=sort_helper)
+            return ret
+        elif model_type == "flexible":
+            from ..model.flexible.custom import get_flexible_models
+            ret = []
+            for model_spec in get_flexible_models():
+                ret.append({"model_name": model_spec.model_name, "is_builtin": False})
             ret.sort(key=sort_helper)
             return ret
         else:

xinference/deploy/cmdline.py CHANGED Viewed

@@ -576,6 +576,21 @@ def list_model_registrations(
             ),
             file=sys.stderr,
         )
+    elif model_type == "flexible":
+        for registration in registrations:
+            model_name = registration["model_name"]
+            model_family = client.get_model_registration(model_type, model_name)
+            table.append(
+                [
+                    model_type,
+                    model_family["model_name"],
+                    registration["is_builtin"],
+                ]
+            )
+        print(
+            tabulate(table, headers=["Type", "Name", "Is-built-in"]),
+            file=sys.stderr,
+        )
     else:
         raise NotImplementedError(f"List {model_type} is not implemented.")

xinference/model/audio/core.py CHANGED Viewed

@@ -23,6 +23,7 @@ from .f5tts import F5TTSModel
 from .f5tts_mlx import F5TTSMLXModel
 from .fish_speech import FishSpeechModel
 from .funasr import FunASRModel
+from .indextts2 import Indextts2
 from .kokoro import KokoroModel
 from .kokoro_mlx import KokoroMLXModel
 from .kokoro_zh import KokoroZHModel
@@ -107,13 +108,23 @@ def match_audio(
     if model_name in BUILTIN_AUDIO_MODELS:
         model_families = BUILTIN_AUDIO_MODELS[model_name]
-        if download_hub == "modelscope" or download_from_modelscope():
-            return (
-                [x for x in model_families if x.model_hub == "modelscope"]
-                + [x for x in model_families if x.model_hub == "huggingface"]
-            )[0]
+        if download_hub is not None:
+            if download_hub == "modelscope":
+                return (
+                    [x for x in model_families if x.model_hub == "modelscope"]
+                    + [x for x in model_families if x.model_hub == "huggingface"]
+                )[0]
+            else:
+                return [x for x in model_families if x.model_hub == download_hub][0]
         else:
-            return [x for x in model_families if x.model_hub == "huggingface"][0]
+            if download_from_modelscope():
+                return (
+                    [x for x in model_families if x.model_hub == "modelscope"]
+                    + [x for x in model_families if x.model_hub == "huggingface"]
+                )[0]
+            else:
+                return [x for x in model_families if x.model_hub == "huggingface"][0]
     else:
         raise ValueError(
             f"Audio model {model_name} not found, available"
@@ -143,6 +154,7 @@ def create_audio_model_instance(
     KokoroMLXModel,
     KokoroZHModel,
     MegaTTSModel,
+    Indextts2,
 ]:
     from ..cache_manager import CacheManager
@@ -164,6 +176,7 @@ def create_audio_model_instance(
         KokoroMLXModel,
         KokoroZHModel,
         MegaTTSModel,
+        Indextts2,
     ]
     if model_spec.model_family == "whisper":
         if not model_spec.engine:
@@ -192,6 +205,8 @@ def create_audio_model_instance(
         model = KokoroMLXModel(model_uid, model_path, model_spec, **kwargs)
     elif model_spec.model_family == "MegaTTS":
         model = MegaTTSModel(model_uid, model_path, model_spec, **kwargs)
+    elif model_spec.model_family == "IndexTTS2":
+        model = Indextts2(model_uid, model_path, model_spec, **kwargs)
     else:
         raise Exception(f"Unsupported audio model family: {model_spec.model_family}")
     return model

xinference 1.10.0__py3-none-any.whl → 1.10.1__py3-none-any.whl

Potentially problematic release.

xinference 1.10.0py3-none-any.whl → 1.10.1py3-none-any.whl