PyPI - xinference - Versions diffs - 0.10.0__py3-none-any.whl → 0.10.2__py3-none-any.whl - Mend

xinference 0.10.0py3-none-any.whl → 0.10.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (97) hide show

xinference/_version.py CHANGED Viewed

@@ -8,11 +8,11 @@ import json
 version_json = '''
 {
- "date": "2024-03-29T12:46:14+0800",
+ "date": "2024-04-19T11:39:12+0800",
  "dirty": false,
  "error": null,
- "full-revisionid": "2857ec497afbd2a6895d3658384ff3b4022b2840",
- "version": "0.10.0"
+ "full-revisionid": "f19e85be09bce966e0c0b3e01bc5690eb6016398",
+ "version": "0.10.2"
 }
 '''  # END VERSION_JSON

xinference/api/restful_api.py CHANGED Viewed

@@ -64,6 +64,7 @@ from ..types import (
     CreateChatCompletion,
     CreateCompletion,
     ImageList,
+    PeftModelConfig,
     max_tokens_field,
 )
 from .oauth2.auth_service import AuthService
@@ -692,9 +693,7 @@ class RESTfulAPI:
         replica = payload.get("replica", 1)
         n_gpu = payload.get("n_gpu", "auto")
         request_limits = payload.get("request_limits", None)
-        peft_model_path = payload.get("peft_model_path", None)
-        image_lora_load_kwargs = payload.get("image_lora_load_kwargs", None)
-        image_lora_fuse_kwargs = payload.get("image_lora_fuse_kwargs", None)
+        peft_model_config = payload.get("peft_model_config", None)
         worker_ip = payload.get("worker_ip", None)
         gpu_idx = payload.get("gpu_idx", None)
@@ -708,9 +707,7 @@ class RESTfulAPI:
             "replica",
             "n_gpu",
             "request_limits",
-            "peft_model_path",
-            "image_lora_load_kwargs",
-            "image_lora_fuse_kwargs",
+            "peft_model_config",
             "worker_ip",
             "gpu_idx",
         }
@@ -725,6 +722,11 @@ class RESTfulAPI:
                 detail="Invalid input. Please specify the model name",
             )
+        if peft_model_config is not None:
+            peft_model_config = PeftModelConfig.from_dict(peft_model_config)
+        else:
+            peft_model_config = None
         try:
             model_uid = await (await self._get_supervisor_ref()).launch_builtin_model(
                 model_uid=model_uid,
@@ -737,9 +739,7 @@ class RESTfulAPI:
                 n_gpu=n_gpu,
                 request_limits=request_limits,
                 wait_ready=wait_ready,
-                peft_model_path=peft_model_path,
-                image_lora_load_kwargs=image_lora_load_kwargs,
-                image_lora_fuse_kwargs=image_lora_fuse_kwargs,
+                peft_model_config=peft_model_config,
                 worker_ip=worker_ip,
                 gpu_idx=gpu_idx,
                 **kwargs,
@@ -1007,8 +1007,16 @@ class RESTfulAPI:
                 raise HTTPException(status_code=500, detail=str(e))
     async def create_embedding(self, request: Request) -> Response:
-        body = CreateEmbeddingRequest.parse_obj(await request.json())
+        payload = await request.json()
+        body = CreateEmbeddingRequest.parse_obj(payload)
         model_uid = body.model
+        exclude = {
+            "model",
+            "input",
+            "user",
+            "encoding_format",
+        }
+        kwargs = {key: value for key, value in payload.items() if key not in exclude}
         try:
             model = await (await self._get_supervisor_ref()).get_model(model_uid)
@@ -1022,7 +1030,7 @@ class RESTfulAPI:
             raise HTTPException(status_code=500, detail=str(e))
         try:
-            embedding = await model.create_embedding(body.input)
+            embedding = await model.create_embedding(body.input, **kwargs)
             return Response(embedding, media_type="application/json")
         except RuntimeError as re:
             logger.error(re, exc_info=True)
@@ -1035,8 +1043,15 @@ class RESTfulAPI:
             raise HTTPException(status_code=500, detail=str(e))
     async def rerank(self, request: Request) -> Response:
-        body = RerankRequest.parse_obj(await request.json())
+        payload = await request.json()
+        body = RerankRequest.parse_obj(payload)
         model_uid = body.model
+        kwargs = {
+            key: value
+            for key, value in payload.items()
+            if key not in RerankRequest.__annotations__.keys()
+        }
         try:
             model = await (await self._get_supervisor_ref()).get_model(model_uid)
         except ValueError as ve:
@@ -1055,6 +1070,7 @@ class RESTfulAPI:
                 top_n=body.top_n,
                 max_chunks_per_doc=body.max_chunks_per_doc,
                 return_documents=body.return_documents,
+                **kwargs,
             )
             return Response(scores, media_type="application/json")
         except RuntimeError as re:
@@ -1345,9 +1361,12 @@ class RESTfulAPI:
                     detail=f"Only {function_call_models} support tool messages",
                 )
         if body.tools and body.stream:
-            raise HTTPException(
-                status_code=400, detail="Tool calls does not support stream"
-            )
+            is_vllm = await model.is_vllm_backend()
+            if not is_vllm or model_family not in ["qwen-chat", "qwen1.5-chat"]:
+                raise HTTPException(
+                    status_code=400,
+                    detail="Streaming support for tool calls is available only when using vLLM backend and Qwen models.",
+                )
         if body.stream:

xinference/client/oscar/actor_client.py CHANGED Viewed

@@ -111,7 +111,7 @@ class ClientIteratorWrapper(AsyncIterator):
 class EmbeddingModelHandle(ModelHandle):
-    def create_embedding(self, input: Union[str, List[str]]) -> bytes:
+    def create_embedding(self, input: Union[str, List[str]], **kwargs) -> bytes:
         """
         Creates an embedding vector representing the input text.
@@ -128,7 +128,7 @@ class EmbeddingModelHandle(ModelHandle):
             machine learning models and algorithms.
         """
-        coro = self._model_ref.create_embedding(input)
+        coro = self._model_ref.create_embedding(input, **kwargs)
         return orjson.loads(self._isolation.call(coro))
@@ -140,6 +140,7 @@ class RerankModelHandle(ModelHandle):
         top_n: Optional[int],
         max_chunks_per_doc: Optional[int],
         return_documents: Optional[bool],
+        **kwargs,
     ):
         """
         Returns an ordered list of documents ordered by their relevance to the provided query.
@@ -163,7 +164,7 @@ class RerankModelHandle(ModelHandle):
         """
         coro = self._model_ref.rerank(
-            documents, query, top_n, max_chunks_per_doc, return_documents
+            documents, query, top_n, max_chunks_per_doc, return_documents, **kwargs
         )
         results = orjson.loads(self._isolation.call(coro))
         for r in results["results"]:

xinference/client/restful/restful_client.py CHANGED Viewed

@@ -18,6 +18,8 @@ from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Optional, Union
 import requests
+from ...model.utils import convert_float_to_int_or_str
+from ...types import LoRA, PeftModelConfig
 from ..common import streaming_response_iterator
 if TYPE_CHECKING:
@@ -80,7 +82,7 @@ class RESTfulModelHandle:
 class RESTfulEmbeddingModelHandle(RESTfulModelHandle):
-    def create_embedding(self, input: Union[str, List[str]]) -> "Embedding":
+    def create_embedding(self, input: Union[str, List[str]], **kwargs) -> "Embedding":
         """
         Create an Embedding from user input via RESTful APIs.
@@ -102,7 +104,11 @@ class RESTfulEmbeddingModelHandle(RESTfulModelHandle):
         """
         url = f"{self._base_url}/v1/embeddings"
-        request_body = {"model": self._model_uid, "input": input}
+        request_body = {
+            "model": self._model_uid,
+            "input": input,
+        }
+        request_body.update(kwargs)
         response = requests.post(url, json=request_body, headers=self.auth_headers)
         if response.status_code != 200:
             raise RuntimeError(
@@ -121,6 +127,7 @@ class RESTfulRerankModelHandle(RESTfulModelHandle):
         top_n: Optional[int] = None,
         max_chunks_per_doc: Optional[int] = None,
         return_documents: Optional[bool] = None,
+        **kwargs,
     ):
         """
         Returns an ordered list of documents ordered by their relevance to the provided query.
@@ -156,6 +163,7 @@ class RESTfulRerankModelHandle(RESTfulModelHandle):
             "max_chunks_per_doc": max_chunks_per_doc,
             "return_documents": return_documents,
         }
+        request_body.update(kwargs)
         response = requests.post(url, json=request_body, headers=self.auth_headers)
         if response.status_code != 200:
             raise RuntimeError(
@@ -740,7 +748,7 @@ class Client:
     def launch_speculative_llm(
         self,
         model_name: str,
-        model_size_in_billions: Optional[int],
+        model_size_in_billions: Optional[Union[int, str, float]],
         quantization: Optional[str],
         draft_model_name: str,
         draft_model_size_in_billions: Optional[int],
@@ -761,6 +769,10 @@ class Client:
             "`launch_speculative_llm` is an experimental feature and the API may change in the future."
         )
+        # convert float to int or string since the RESTful API does not accept float.
+        if isinstance(model_size_in_billions, float):
+            model_size_in_billions = convert_float_to_int_or_str(model_size_in_billions)
         payload = {
             "model_uid": None,
             "model_name": model_name,
@@ -788,15 +800,13 @@ class Client:
         model_name: str,
         model_type: str = "LLM",
         model_uid: Optional[str] = None,
-        model_size_in_billions: Optional[Union[int, str]] = None,
+        model_size_in_billions: Optional[Union[int, str, float]] = None,
         model_format: Optional[str] = None,
         quantization: Optional[str] = None,
         replica: int = 1,
         n_gpu: Optional[Union[int, str]] = "auto",
+        peft_model_config: Optional[Dict] = None,
         request_limits: Optional[int] = None,
-        peft_model_path: Optional[str] = None,
-        image_lora_load_kwargs: Optional[Dict] = None,
-        image_lora_fuse_kwargs: Optional[Dict] = None,
         worker_ip: Optional[str] = None,
         gpu_idx: Optional[Union[int, List[int]]] = None,
         **kwargs,
@@ -812,7 +822,7 @@ class Client:
             type of model.
         model_uid: str
             UID of model, auto generate a UUID if is None.
-        model_size_in_billions: Optional[int]
+        model_size_in_billions: Optional[Union[int, str, float]]
             The size (in billions) of the model.
         model_format: Optional[str]
             The format of the model.
@@ -823,15 +833,13 @@ class Client:
         n_gpu: Optional[Union[int, str]],
             The number of GPUs used by the model, default is "auto".
             ``n_gpu=None`` means cpu only, ``n_gpu=auto`` lets the system automatically determine the best number of GPUs to use.
+        peft_model_config: Optional[Dict]
+            - "lora_list": A List of PEFT (Parameter-Efficient Fine-Tuning) model and path.
+            - "image_lora_load_kwargs": A Dict of lora load parameters for image model
+            - "image_lora_fuse_kwargs": A Dict of lora fuse parameters for image model
         request_limits: Optional[int]
-            The number of request limits for this model， default is None.
+            The number of request limits for this model, default is None.
             ``request_limits=None`` means no limits for this model.
-        peft_model_path: Optional[str]
-            PEFT (Parameter-Efficient Fine-Tuning) model path.
-        image_lora_load_kwargs: Optional[Dict]
-            lora load parameters for image model
-        image_lora_fuse_kwargs: Optional[Dict]
-            lora fuse parameters for image model
         worker_ip: Optional[str]
             Specify the worker ip where the model is located in a distributed scenario.
         gpu_idx: Optional[Union[int, List[int]]]
@@ -848,9 +856,26 @@ class Client:
         url = f"{self.base_url}/v1/models"
+        if peft_model_config is not None:
+            lora_list = [
+                LoRA.from_dict(model) for model in peft_model_config["lora_list"]
+            ]
+            peft_model = PeftModelConfig(
+                lora_list,
+                peft_model_config["image_lora_load_kwargs"],
+                peft_model_config["image_lora_fuse_kwargs"],
+            )
+        else:
+            peft_model = None
+        # convert float to int or string since the RESTful API does not accept float.
+        if isinstance(model_size_in_billions, float):
+            model_size_in_billions = convert_float_to_int_or_str(model_size_in_billions)
         payload = {
             "model_uid": model_uid,
             "model_name": model_name,
+            "peft_model_config": peft_model.to_dict() if peft_model else None,
             "model_type": model_type,
             "model_size_in_billions": model_size_in_billions,
             "model_format": model_format,
@@ -858,9 +883,6 @@ class Client:
             "replica": replica,
             "n_gpu": n_gpu,
             "request_limits": request_limits,
-            "peft_model_path": peft_model_path,
-            "image_lora_load_kwargs": image_lora_load_kwargs,
-            "image_lora_fuse_kwargs": image_lora_fuse_kwargs,
             "worker_ip": worker_ip,
             "gpu_idx": gpu_idx,
         }

xinference/core/supervisor.py CHANGED Viewed

@@ -30,6 +30,7 @@ from ..constants import (
 )
 from ..core import ModelActor
 from ..core.status_guard import InstanceInfo, LaunchStatus
+from ..types import PeftModelConfig
 from .metrics import record_metrics
 from .resource import GPUStatus, ResourceStatus
 from .utils import (
@@ -135,6 +136,13 @@ class SupervisorActor(xo.StatelessActor):
             EventCollectorActor, address=self.address, uid=EventCollectorActor.uid()
         )
+        from ..model.audio import (
+            CustomAudioModelFamilyV1,
+            generate_audio_description,
+            get_audio_model_descriptions,
+            register_audio,
+            unregister_audio,
+        )
         from ..model.embedding import (
             CustomEmbeddingModelSpec,
             generate_embedding_description,
@@ -177,6 +185,12 @@ class SupervisorActor(xo.StatelessActor):
                 unregister_rerank,
                 generate_rerank_description,
             ),
+            "audio": (
+                CustomAudioModelFamilyV1,
+                register_audio,
+                unregister_audio,
+                generate_audio_description,
+            ),
         }
         # record model version
@@ -185,6 +199,7 @@ class SupervisorActor(xo.StatelessActor):
         model_version_infos.update(get_embedding_model_descriptions())
         model_version_infos.update(get_rerank_model_descriptions())
         model_version_infos.update(get_image_model_descriptions())
+        model_version_infos.update(get_audio_model_descriptions())
         await self._cache_tracker_ref.record_model_version(
             model_version_infos, self.address
         )
@@ -483,6 +498,7 @@ class SupervisorActor(xo.StatelessActor):
             return ret
         elif model_type == "audio":
             from ..model.audio import BUILTIN_AUDIO_MODELS
+            from ..model.audio.custom import get_user_defined_audios
             ret = []
             for model_name, family in BUILTIN_AUDIO_MODELS.items():
@@ -491,6 +507,16 @@ class SupervisorActor(xo.StatelessActor):
                 else:
                     ret.append({"model_name": model_name, "is_builtin": True})
+            for model_spec in get_user_defined_audios():
+                if detailed:
+                    ret.append(
+                        await self._to_audio_model_reg(model_spec, is_builtin=False)
+                    )
+                else:
+                    ret.append(
+                        {"model_name": model_spec.model_name, "is_builtin": False}
+                    )
             ret.sort(key=sort_helper)
             return ret
         elif model_type == "rerank":
@@ -548,8 +574,9 @@ class SupervisorActor(xo.StatelessActor):
             raise ValueError(f"Model {model_name} not found")
         elif model_type == "audio":
             from ..model.audio import BUILTIN_AUDIO_MODELS
+            from ..model.audio.custom import get_user_defined_audios
-            for f in BUILTIN_AUDIO_MODELS.values():
+            for f in list(BUILTIN_AUDIO_MODELS.values()) + get_user_defined_audios():
                 if f.model_name == model_name:
                     return f
             raise ValueError(f"Model {model_name} not found")
@@ -654,7 +681,7 @@ class SupervisorActor(xo.StatelessActor):
         self,
         model_uid: Optional[str],
         model_name: str,
-        model_size_in_billions: Optional[int],
+        model_size_in_billions: Optional[Union[int, str]],
         quantization: Optional[str],
         draft_model_name: str,
         draft_model_size_in_billions: Optional[int],
@@ -714,7 +741,7 @@ class SupervisorActor(xo.StatelessActor):
         self,
         model_uid: Optional[str],
         model_name: str,
-        model_size_in_billions: Optional[int],
+        model_size_in_billions: Optional[Union[int, str]],
         model_format: Optional[str],
         quantization: Optional[str],
         model_type: Optional[str],
@@ -723,9 +750,7 @@ class SupervisorActor(xo.StatelessActor):
         request_limits: Optional[int] = None,
         wait_ready: bool = True,
         model_version: Optional[str] = None,
-        peft_model_path: Optional[str] = None,
-        image_lora_load_kwargs: Optional[Dict] = None,
-        image_lora_fuse_kwargs: Optional[Dict] = None,
+        peft_model_config: Optional[PeftModelConfig] = None,
         worker_ip: Optional[str] = None,
         gpu_idx: Optional[Union[int, List[int]]] = None,
         **kwargs,
@@ -777,9 +802,7 @@ class SupervisorActor(xo.StatelessActor):
                 model_type=model_type,
                 n_gpu=n_gpu,
                 request_limits=request_limits,
-                peft_model_path=peft_model_path,
-                image_lora_load_kwargs=image_lora_load_kwargs,
-                image_lora_fuse_kwargs=image_lora_fuse_kwargs,
+                peft_model_config=peft_model_config,
                 gpu_idx=gpu_idx,
                 **kwargs,
             )
@@ -870,6 +893,12 @@ class SupervisorActor(xo.StatelessActor):
                             address,
                             dead_models,
                         )
+                        for replica_model_uid in dead_models:
+                            model_uid, _, _ = parse_replica_model_uid(replica_model_uid)
+                            self._model_uid_to_replica_info.pop(model_uid, None)
+                            self._replica_model_uid_to_worker.pop(
+                                replica_model_uid, None
+                            )
                         dead_nodes.append(address)
                     elif (
                         status.failure_remaining_count
@@ -979,6 +1008,16 @@ class SupervisorActor(xo.StatelessActor):
     @log_async(logger=logger)
     async def remove_worker(self, worker_address: str):
+        uids_to_remove = []
+        for model_uid in self._replica_model_uid_to_worker:
+            if self._replica_model_uid_to_worker[model_uid].address == worker_address:
+                uids_to_remove.append(model_uid)
+        for replica_model_uid in uids_to_remove:
+            model_uid, _, _ = parse_replica_model_uid(replica_model_uid)
+            self._model_uid_to_replica_info.pop(model_uid, None)
+            self._replica_model_uid_to_worker.pop(replica_model_uid, None)
         if worker_address in self._worker_address_to_worker:
             del self._worker_address_to_worker[worker_address]
             logger.debug("Worker %s has been removed successfully", worker_address)

xinference/core/worker.py CHANGED Viewed

@@ -36,6 +36,7 @@ from ..core import ModelActor
 from ..core.status_guard import LaunchStatus
 from ..device_utils import gpu_count
 from ..model.core import ModelDescription, create_model_instance
+from ..types import PeftModelConfig
 from .event import Event, EventCollectorActor, EventType
 from .metrics import launch_metrics_export_server, record_metrics
 from .resource import gather_node_info
@@ -195,6 +196,12 @@ class WorkerActor(xo.StatelessActor):
         logger.info("Purge cache directory: %s", XINFERENCE_CACHE_DIR)
         purge_dir(XINFERENCE_CACHE_DIR)
+        from ..model.audio import (
+            CustomAudioModelFamilyV1,
+            get_audio_model_descriptions,
+            register_audio,
+            unregister_audio,
+        )
         from ..model.embedding import (
             CustomEmbeddingModelSpec,
             get_embedding_model_descriptions,
@@ -223,6 +230,7 @@ class WorkerActor(xo.StatelessActor):
                 unregister_embedding,
             ),
             "rerank": (CustomRerankModelSpec, register_rerank, unregister_rerank),
+            "audio": (CustomAudioModelFamilyV1, register_audio, unregister_audio),
         }
         # record model version
@@ -231,6 +239,7 @@ class WorkerActor(xo.StatelessActor):
         model_version_infos.update(get_embedding_model_descriptions())
         model_version_infos.update(get_rerank_model_descriptions())
         model_version_infos.update(get_image_model_descriptions())
+        model_version_infos.update(get_audio_model_descriptions())
         await self._cache_tracker_ref.record_model_version(
             model_version_infos, self.address
         )
@@ -593,14 +602,12 @@ class WorkerActor(xo.StatelessActor):
         self,
         model_uid: str,
         model_name: str,
-        model_size_in_billions: Optional[int],
+        model_size_in_billions: Optional[Union[int, str]],
         model_format: Optional[str],
         quantization: Optional[str],
         model_type: str = "LLM",
         n_gpu: Optional[Union[int, str]] = "auto",
-        peft_model_path: Optional[str] = None,
-        image_lora_load_kwargs: Optional[Dict] = None,
-        image_lora_fuse_kwargs: Optional[Dict] = None,
+        peft_model_config: Optional[PeftModelConfig] = None,
         request_limits: Optional[int] = None,
         gpu_idx: Optional[Union[int, List[int]]] = None,
         **kwargs,
@@ -638,7 +645,7 @@ class WorkerActor(xo.StatelessActor):
             if isinstance(n_gpu, str) and n_gpu != "auto":
                 raise ValueError("Currently `n_gpu` only supports `auto`.")
-        if peft_model_path is not None:
+        if peft_model_config is not None:
             if model_type in ("embedding", "rerank"):
                 raise ValueError(
                     f"PEFT adaptors cannot be applied to embedding or rerank models."
@@ -669,9 +676,7 @@ class WorkerActor(xo.StatelessActor):
                 model_format,
                 model_size_in_billions,
                 quantization,
-                peft_model_path,
-                image_lora_load_kwargs,
-                image_lora_fuse_kwargs,
+                peft_model_config,
                 is_local_deployment,
                 **kwargs,
             )

xinference/deploy/cmdline.py CHANGED Viewed

@@ -640,10 +640,11 @@ def list_model_registrations(
     help='The number of GPUs used by the model, default is "auto".',
 )
 @click.option(
-    "--peft-model-path",
-    default=None,
-    type=str,
-    help="PEFT model path.",
+    "--lora-modules",
+    "-lm",
+    multiple=True,
+    type=(str, str),
+    help="LoRA module configurations in the format name=path. Multiple modules can be specified.",
 )
 @click.option(
     "--image-lora-load-kwargs",
@@ -696,7 +697,7 @@ def model_launch(
     quantization: str,
     replica: int,
     n_gpu: str,
-    peft_model_path: Optional[str],
+    lora_modules: Optional[Tuple],
     image_lora_load_kwargs: Optional[Tuple],
     image_lora_fuse_kwargs: Optional[Tuple],
     worker_ip: Optional[str],
@@ -729,6 +730,18 @@ def model_launch(
         else None
     )
+    lora_list = (
+        [{"lora_name": k, "local_path": v} for k, v in dict(lora_modules).items()]
+        if lora_modules
+        else []
+    )
+    peft_model_config = {
+        "image_lora_load_kwargs": image_lora_load_params,
+        "image_lora_fuse_kwargs": image_lora_fuse_params,
+        "lora_list": lora_list,
+    }
     _gpu_idx: Optional[List[int]] = (
         None if gpu_idx is None else [int(idx) for idx in gpu_idx.split(",")]
     )
@@ -736,7 +749,9 @@ def model_launch(
     endpoint = get_endpoint(endpoint)
     model_size: Optional[Union[str, int]] = (
         size_in_billions
-        if size_in_billions is None or "_" in size_in_billions
+        if size_in_billions is None
+        or "_" in size_in_billions
+        or "." in size_in_billions
         else int(size_in_billions)
     )
     client = RESTfulClient(base_url=endpoint, api_key=api_key)
@@ -752,9 +767,7 @@ def model_launch(
         quantization=quantization,
         replica=replica,
         n_gpu=_n_gpu,
-        peft_model_path=peft_model_path,
-        image_lora_load_kwargs=image_lora_load_params,
-        image_lora_fuse_kwargs=image_lora_fuse_params,
+        peft_model_config=peft_model_config,
         worker_ip=worker_ip,
         gpu_idx=_gpu_idx,
         trust_remote_code=trust_remote_code,

xinference/model/audio/__init__.py CHANGED Viewed

@@ -16,12 +16,51 @@ import codecs
 import json
 import os
-from .core import AudioModelFamilyV1, generate_audio_description, get_cache_status
+from .core import (
+    AUDIO_MODEL_DESCRIPTIONS,
+    MODEL_NAME_TO_REVISION,
+    AudioModelFamilyV1,
+    generate_audio_description,
+    get_audio_model_descriptions,
+    get_cache_status,
+)
+from .custom import (
+    CustomAudioModelFamilyV1,
+    get_user_defined_audios,
+    register_audio,
+    unregister_audio,
+)
 _model_spec_json = os.path.join(os.path.dirname(__file__), "model_spec.json")
 BUILTIN_AUDIO_MODELS = dict(
     (spec["model_name"], AudioModelFamilyV1(**spec))
     for spec in json.load(codecs.open(_model_spec_json, "r", encoding="utf-8"))
 )
+for model_name, model_spec in BUILTIN_AUDIO_MODELS.items():
+    MODEL_NAME_TO_REVISION[model_name].append(model_spec.model_revision)
+# register model description after recording model revision
+for model_spec_info in [BUILTIN_AUDIO_MODELS]:
+    for model_name, model_spec in model_spec_info.items():
+        if model_spec.model_name not in AUDIO_MODEL_DESCRIPTIONS:
+            AUDIO_MODEL_DESCRIPTIONS.update(generate_audio_description(model_spec))
+from ...constants import XINFERENCE_MODEL_DIR
+# if persist=True, load them when init
+user_defined_audio_dir = os.path.join(XINFERENCE_MODEL_DIR, "audio")
+if os.path.isdir(user_defined_audio_dir):
+    for f in os.listdir(user_defined_audio_dir):
+        with codecs.open(
+            os.path.join(user_defined_audio_dir, f), encoding="utf-8"
+        ) as fd:
+            user_defined_audio_family = CustomAudioModelFamilyV1.parse_obj(
+                json.load(fd)
+            )
+            register_audio(user_defined_audio_family, persist=False)
+# register model description
+for ud_audio in get_user_defined_audios():
+    AUDIO_MODEL_DESCRIPTIONS.update(generate_audio_description(ud_audio))
 del _model_spec_json

xinference 0.10.0__py3-none-any.whl → 0.10.2__py3-none-any.whl

Potentially problematic release.

xinference 0.10.0py3-none-any.whl → 0.10.2py3-none-any.whl