PyPI - xinference - Versions diffs - 0.14.1.post1__py3-none-any.whl → 0.14.3__py3-none-any.whl - Mend

xinference 0.14.1.post1py3-none-any.whl → 0.14.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (194) hide show

xinference/_version.py CHANGED Viewed

@@ -8,11 +8,11 @@ import json
 version_json = '''
 {
- "date": "2024-08-12T12:36:32+0800",
+ "date": "2024-08-23T18:14:53+0800",
  "dirty": false,
  "error": null,
- "full-revisionid": "9afee766a3c5cc53e6035490400a4291b78e72ff",
- "version": "0.14.1.post1"
+ "full-revisionid": "b5002242e04634bca7e75cac9df0cdc6c0bf407a",
+ "version": "0.14.3"
 }
 '''  # END VERSION_JSON

xinference/api/restful_api.py CHANGED Viewed

@@ -1682,18 +1682,9 @@ class RESTfulAPI:
         model_family = desc.get("model_family", "")
         function_call_models = (
-            ["chatglm3", "gorilla-openfunctions-v1"]
-            + QWEN_TOOL_CALL_FAMILY
-            + GLM4_TOOL_CALL_FAMILY
+            ["gorilla-openfunctions-v1"] + QWEN_TOOL_CALL_FAMILY + GLM4_TOOL_CALL_FAMILY
         )
-        is_qwen = desc.get("model_format") == "ggmlv3" and "qwen-chat" == model_family
-        if is_qwen and system_prompt is not None:
-            raise HTTPException(
-                status_code=400, detail="Qwen ggml does not have system prompt"
-            )
         if model_family not in function_call_models:
             if body.tools:
                 raise HTTPException(
@@ -1724,18 +1715,13 @@ class RESTfulAPI:
                 iterator = None
                 try:
                     try:
-                        if is_qwen:
-                            iterator = await model.chat(
-                                prompt, chat_history, kwargs, raw_params=raw_kwargs
-                            )
-                        else:
-                            iterator = await model.chat(
-                                prompt,
-                                system_prompt,
-                                chat_history,
-                                kwargs,
-                                raw_params=raw_kwargs,
-                            )
+                        iterator = await model.chat(
+                            prompt,
+                            system_prompt,
+                            chat_history,
+                            kwargs,
+                            raw_params=raw_kwargs,
+                        )
                     except RuntimeError as re:
                         await self._report_error_event(model_uid, str(re))
                         self.handle_request_limit_error(re)
@@ -1763,18 +1749,13 @@ class RESTfulAPI:
             return EventSourceResponse(stream_results())
         else:
             try:
-                if is_qwen:
-                    data = await model.chat(
-                        prompt, chat_history, kwargs, raw_params=raw_kwargs
-                    )
-                else:
-                    data = await model.chat(
-                        prompt,
-                        system_prompt,
-                        chat_history,
-                        kwargs,
-                        raw_params=raw_kwargs,
-                    )
+                data = await model.chat(
+                    prompt,
+                    system_prompt,
+                    chat_history,
+                    kwargs,
+                    raw_params=raw_kwargs,
+                )
                 return Response(content=data, media_type="application/json")
             except Exception as e:
                 logger.error(e, exc_info=True)

xinference/client/restful/restful_client.py CHANGED Viewed

@@ -426,7 +426,7 @@ class RESTfulGenerateModelHandle(RESTfulModelHandle):
             The user's message or user's input.
         generate_config: Optional[Union["LlamaCppGenerateConfig", "PytorchGenerateConfig"]]
             Additional configuration for the chat generation.
-            "LlamaCppGenerateConfig" -> Configuration for ggml model
+            "LlamaCppGenerateConfig" -> Configuration for llama-cpp-python model
             "PytorchGenerateConfig" -> Configuration for pytorch model
         Returns
@@ -493,7 +493,7 @@ class RESTfulChatModelHandle(RESTfulGenerateModelHandle):
             A tool list.
         generate_config: Optional[Union["LlamaCppGenerateConfig", "PytorchGenerateConfig"]]
             Additional configuration for the chat generation.
-            "LlamaCppGenerateConfig" -> configuration for ggml model
+            "LlamaCppGenerateConfig" -> configuration for llama-cpp-python model
             "PytorchGenerateConfig" -> configuration for pytorch model
         Returns

xinference/core/chat_interface.py CHANGED Viewed

@@ -236,8 +236,8 @@ class GradioInterface:
                 bot[-1][1] = history[-1]["content"]
                 yield history, bot
-        def add_text(history, bot, text, image):
-            logger.debug("Add text, text: %s, image: %s", text, image)
+        def add_text(history, bot, text, image, video):
+            logger.debug("Add text, text: %s, image: %s, video: %s", text, image, video)
             if image:
                 buffered = BytesIO()
                 with PIL.Image.open(image) as img:
@@ -257,16 +257,47 @@ class GradioInterface:
                         },
                     ],
                 }
+            elif video:
+                def video_to_base64(video_path):
+                    with open(video_path, "rb") as video_file:
+                        encoded_string = base64.b64encode(video_file.read()).decode(
+                            "utf-8"
+                        )
+                    return encoded_string
+                def generate_html_video(video_path):
+                    base64_video = video_to_base64(video_path)
+                    video_format = video_path.split(".")[-1]
+                    html_code = f"""
+                    <video controls>
+                        <source src="data:video/{video_format};base64,{base64_video}" type="video/{video_format}">
+                        Your browser does not support the video tag.
+                    </video>
+                    """
+                    return html_code
+                display_content = f"{generate_html_video(video)}\n{text}"
+                message = {
+                    "role": "user",
+                    "content": [
+                        {"type": "text", "text": text},
+                        {
+                            "type": "video_url",
+                            "video_url": {"url": video},
+                        },
+                    ],
+                }
             else:
                 display_content = text
                 message = {"role": "user", "content": text}
             history = history + [message]
             bot = bot + [[display_content, None]]
-            return history, bot, "", None
+            return history, bot, "", None, None
         def clear_history():
             logger.debug("Clear history.")
-            return [], None, "", None
+            return [], None, "", None, None
         def update_button(text):
             return gr.update(interactive=bool(text))
@@ -309,10 +340,11 @@ class GradioInterface:
             state = gr.State([])
             with gr.Row():
                 chatbot = gr.Chatbot(
-                    elem_id="chatbot", label=self.model_name, height=550, scale=7
+                    elem_id="chatbot", label=self.model_name, height=700, scale=7
                 )
                 with gr.Column(scale=3):
                     imagebox = gr.Image(type="filepath")
+                    videobox = gr.Video()
                     textbox = gr.Textbox(
                         show_label=False,
                         placeholder="Enter text and press ENTER",
@@ -340,8 +372,8 @@ class GradioInterface:
             textbox.submit(
                 add_text,
-                [state, chatbot, textbox, imagebox],
-                [state, chatbot, textbox, imagebox],
+                [state, chatbot, textbox, imagebox, videobox],
+                [state, chatbot, textbox, imagebox, videobox],
                 queue=False,
             ).then(
                 predict,
@@ -351,8 +383,8 @@ class GradioInterface:
             submit_btn.click(
                 add_text,
-                [state, chatbot, textbox, imagebox],
-                [state, chatbot, textbox, imagebox],
+                [state, chatbot, textbox, imagebox, videobox],
+                [state, chatbot, textbox, imagebox, videobox],
                 queue=False,
             ).then(
                 predict,
@@ -361,7 +393,10 @@ class GradioInterface:
             )
             clear_btn.click(
-                clear_history, None, [state, chatbot, textbox, imagebox], queue=False
+                clear_history,
+                None,
+                [state, chatbot, textbox, imagebox, videobox],
+                queue=False,
             )
         return chat_vl_interface

xinference/core/image_interface.py CHANGED Viewed

@@ -163,6 +163,7 @@ class ImageInterface:
             size_width: int,
             size_height: int,
             num_inference_steps: int,
+            padding_image_to_multiple: int,
         ) -> PIL.Image.Image:
             from ..client import RESTfulClient
@@ -178,6 +179,7 @@ class ImageInterface:
             num_inference_steps = (
                 None if num_inference_steps == -1 else num_inference_steps  # type: ignore
             )
+            padding_image_to_multiple = None if padding_image_to_multiple == -1 else padding_image_to_multiple  # type: ignore
             bio = io.BytesIO()
             image.save(bio, format="png")
@@ -190,6 +192,7 @@ class ImageInterface:
                 size=size,
                 response_format="b64_json",
                 num_inference_steps=num_inference_steps,
+                padding_image_to_multiple=padding_image_to_multiple,
             )
             images = []
@@ -222,9 +225,14 @@ class ImageInterface:
                     n = gr.Number(label="Number of image", value=1)
                     size_width = gr.Number(label="Width", value=-1)
                     size_height = gr.Number(label="Height", value=-1)
+                with gr.Row():
                     num_inference_steps = gr.Number(
                         label="Inference Step Number", value=-1
                     )
+                    padding_image_to_multiple = gr.Number(
+                        label="Padding image to multiple", value=-1
+                    )
                 with gr.Row():
                     with gr.Column(scale=1):
@@ -242,6 +250,7 @@ class ImageInterface:
                     size_width,
                     size_height,
                     num_inference_steps,
+                    padding_image_to_multiple,
                 ],
                 outputs=output_gallery,
             )

xinference/core/model.py CHANGED Viewed

@@ -132,8 +132,8 @@ class ModelActor(xo.StatelessActor):
     async def __pre_destroy__(self):
         from ..model.embedding.core import EmbeddingModel
-        from ..model.llm.pytorch.core import PytorchModel as LLMPytorchModel
         from ..model.llm.sglang.core import SGLANGModel
+        from ..model.llm.transformers.core import PytorchModel as LLMPytorchModel
         from ..model.llm.vllm.core import VLLMModel as LLMVLLMModel
         if self.allow_batching():
@@ -177,8 +177,9 @@ class ModelActor(xo.StatelessActor):
         request_limits: Optional[int] = None,
     ):
         super().__init__()
-        from ..model.llm.pytorch.core import PytorchModel
+        from ..model.llm.lmdeploy.core import LMDeployModel
         from ..model.llm.sglang.core import SGLANGModel
+        from ..model.llm.transformers.core import PytorchModel
         from ..model.llm.vllm.core import VLLMModel
         self._worker_address = worker_address
@@ -192,7 +193,9 @@ class ModelActor(xo.StatelessActor):
         self._current_generator = lambda: None
         self._lock = (
             None
-            if isinstance(self._model, (PytorchModel, VLLMModel, SGLANGModel))
+            if isinstance(
+                self._model, (PytorchModel, VLLMModel, SGLANGModel, LMDeployModel)
+            )
             else asyncio.locks.Lock()
         )
         self._worker_ref = None
@@ -272,7 +275,7 @@ class ModelActor(xo.StatelessActor):
         return isinstance(self._model, VLLMModel)
     def allow_batching(self) -> bool:
-        from ..model.llm.pytorch.core import PytorchModel
+        from ..model.llm.transformers.core import PytorchModel
         model_ability = self._model_description.get("model_ability", [])
@@ -415,7 +418,7 @@ class ModelActor(xo.StatelessActor):
                     ret = await asyncio.to_thread(fn, *args, **kwargs)
         if self._lock is not None and self._current_generator():
-            raise Exception("Parallel generation is not supported by ggml.")
+            raise Exception("Parallel generation is not supported by llama-cpp-python.")
         if inspect.isgenerator(ret):
             gen = self._to_generator(output_type, ret)

xinference/core/scheduler.py CHANGED Viewed

@@ -24,7 +24,6 @@ import xoscar as xo
 logger = logging.getLogger(__name__)
-XINFERENCE_BATCHING_CLEAN_CACHE_INTERVAL = 5
 XINFERENCE_STREAMING_DONE_FLAG = "<XINFERENCE_STREAMING_DONE>"
 XINFERENCE_STREAMING_ERROR_FLAG = "<XINFERENCE_STREAMING_ERROR>"
 XINFERENCE_STREAMING_ABORT_FLAG = "<XINFERENCE_STREAMING_ABORT>"
@@ -359,7 +358,7 @@ class SchedulerActor(xo.StatelessActor):
     @staticmethod
     def _empty_cache():
-        from ..model.llm.pytorch.utils import empty_cache
+        from ..model.llm.transformers.utils import empty_cache
         empty_cache()

xinference/core/worker.py CHANGED Viewed

@@ -39,9 +39,11 @@ from ..core.status_guard import LaunchStatus
 from ..device_utils import get_available_device_env_name, gpu_count
 from ..model.core import ModelDescription, create_model_instance
 from ..types import PeftModelConfig
+from .cache_tracker import CacheTrackerActor
 from .event import Event, EventCollectorActor, EventType
 from .metrics import launch_metrics_export_server, record_metrics
 from .resource import gather_node_info
+from .status_guard import StatusGuardActor
 from .utils import log_async, log_sync, parse_replica_model_uid, purge_dir
 logger = getLogger(__name__)
@@ -71,6 +73,15 @@ class WorkerActor(xo.StatelessActor):
         self._supervisor_ref: Optional[xo.ActorRefType] = None
         self._main_pool = main_pool
         self._main_pool.recover_sub_pool = self.recover_sub_pool
+        self._status_guard_ref: xo.ActorRefType[  # type: ignore
+            "StatusGuardActor"
+        ] = None
+        self._event_collector_ref: xo.ActorRefType[  # type: ignore
+            EventCollectorActor
+        ] = None
+        self._cache_tracker_ref: xo.ActorRefType[  # type: ignore
+            CacheTrackerActor
+        ] = None
         # internal states.
         # temporary placeholder during model launch process:
@@ -308,56 +319,50 @@ class WorkerActor(xo.StatelessActor):
         Params:
             add_worker: By default will call supervisor.add_worker after first connect
         """
-        from .status_guard import StatusGuardActor
         from .supervisor import SupervisorActor
         if self._supervisor_ref is not None:
             return self._supervisor_ref
-        self._supervisor_ref: xo.ActorRefType["SupervisorActor"] = await xo.actor_ref(  # type: ignore
+        supervisor_ref = await xo.actor_ref(  # type: ignore
             address=self._supervisor_address, uid=SupervisorActor.uid()
         )
+        # Prevent concurrent operations leads to double initialization, check again.
+        if self._supervisor_ref is not None:
+            return self._supervisor_ref
+        self._supervisor_ref = supervisor_ref
         if add_worker and len(self._model_uid_to_model) == 0:
             # Newly started (or restarted), has no model, notify supervisor
             await self._supervisor_ref.add_worker(self.address)
             logger.info("Connected to supervisor as a fresh worker")
-            self._status_guard_ref: xo.ActorRefType[  # type: ignore
-                "StatusGuardActor"
-            ] = await xo.actor_ref(
-                address=self._supervisor_address, uid=StatusGuardActor.uid()
-            )
-            self._event_collector_ref: xo.ActorRefType[  # type: ignore
-                EventCollectorActor
-            ] = await xo.actor_ref(
-                address=self._supervisor_address, uid=EventCollectorActor.uid()
-            )
-            from .cache_tracker import CacheTrackerActor
-            self._cache_tracker_ref: xo.ActorRefType[  # type: ignore
-                "CacheTrackerActor"
-            ] = await xo.actor_ref(
-                address=self._supervisor_address, uid=CacheTrackerActor.uid()
-            )
-            # cache_tracker is on supervisor
-            from ..model.audio import get_audio_model_descriptions
-            from ..model.embedding import get_embedding_model_descriptions
-            from ..model.flexible import get_flexible_model_descriptions
-            from ..model.image import get_image_model_descriptions
-            from ..model.llm import get_llm_model_descriptions
-            from ..model.rerank import get_rerank_model_descriptions
-            # record model version
-            model_version_infos: Dict[str, List[Dict]] = {}  # type: ignore
-            model_version_infos.update(get_llm_model_descriptions())
-            model_version_infos.update(get_embedding_model_descriptions())
-            model_version_infos.update(get_rerank_model_descriptions())
-            model_version_infos.update(get_image_model_descriptions())
-            model_version_infos.update(get_audio_model_descriptions())
-            model_version_infos.update(get_flexible_model_descriptions())
-            await self._cache_tracker_ref.record_model_version(
-                model_version_infos, self.address
-            )
+        self._status_guard_ref = await xo.actor_ref(
+            address=self._supervisor_address, uid=StatusGuardActor.uid()
+        )
+        self._event_collector_ref = await xo.actor_ref(
+            address=self._supervisor_address, uid=EventCollectorActor.uid()
+        )
+        self._cache_tracker_ref = await xo.actor_ref(
+            address=self._supervisor_address, uid=CacheTrackerActor.uid()
+        )
+        # cache_tracker is on supervisor
+        from ..model.audio import get_audio_model_descriptions
+        from ..model.embedding import get_embedding_model_descriptions
+        from ..model.flexible import get_flexible_model_descriptions
+        from ..model.image import get_image_model_descriptions
+        from ..model.llm import get_llm_model_descriptions
+        from ..model.rerank import get_rerank_model_descriptions
+        # record model version
+        model_version_infos: Dict[str, List[Dict]] = {}  # type: ignore
+        model_version_infos.update(get_llm_model_descriptions())
+        model_version_infos.update(get_embedding_model_descriptions())
+        model_version_infos.update(get_rerank_model_descriptions())
+        model_version_infos.update(get_image_model_descriptions())
+        model_version_infos.update(get_audio_model_descriptions())
+        model_version_infos.update(get_flexible_model_descriptions())
+        await self._cache_tracker_ref.record_model_version(
+            model_version_infos, self.address
+        )
         return self._supervisor_ref
     @staticmethod
@@ -734,7 +739,7 @@ class WorkerActor(xo.StatelessActor):
         elif model_type == "image":
             return ["text_to_image"]
         elif model_type == "audio":
-            return ["audio_to_text"]
+            return [model._model_spec.ability]
         elif model_type == "video":
             return ["text_to_video"]
         elif model_type == "flexible":
@@ -793,6 +798,7 @@ class WorkerActor(xo.StatelessActor):
             logger.exception(e)
             raise
         try:
+            _ = await self.get_supervisor_ref()
             if self._event_collector_ref is not None:
                 await self._event_collector_ref.report_event(
                     origin_uid,
@@ -830,7 +836,7 @@ class WorkerActor(xo.StatelessActor):
                 raise ValueError(
                     f"PEFT adaptors cannot be applied to embedding or rerank models."
                 )
-            if model_type == "LLM" and model_format in ("ggufv2", "ggmlv3"):
+            if model_type == "LLM" and model_format in ("ggufv2",):
                 raise ValueError(
                     f"PEFT adaptors can only be applied to pytorch-like models"
                 )
@@ -914,6 +920,7 @@ class WorkerActor(xo.StatelessActor):
             raise ValueError(f"{model_uid} is launching")
         origin_uid, _, __ = parse_replica_model_uid(model_uid)
         try:
+            _ = await self.get_supervisor_ref()
             if self._event_collector_ref is not None:
                 await self._event_collector_ref.report_event(
                     origin_uid,
@@ -1081,7 +1088,7 @@ class WorkerActor(xo.StatelessActor):
                 paths.update([os.path.realpath(path) for path in paths])
             # get tensorizer path
-            from ..model.llm.pytorch.tensorizer_utils import get_tensorizer_dir
+            from ..model.llm.transformers.tensorizer_utils import get_tensorizer_dir
             tensorizer_path = get_tensorizer_dir(path)
             if os.path.isdir(tensorizer_path):

xinference/deploy/cmdline.py CHANGED Viewed

@@ -750,7 +750,7 @@ def remove_cache(
     "-f",
     default=None,
     type=str,
-    help="Specify the format of the model, e.g. pytorch, ggmlv3, etc.",
+    help="Specify the format of the model, e.g. pytorch, ggufv2, etc.",
 )
 @click.option(
     "--quantization",
@@ -1516,7 +1516,7 @@ def query_engine_by_model_name(
     "-f",
     type=str,
     required=True,
-    help="Specify the format of the model, e.g. pytorch, ggmlv3, etc.",
+    help="Specify the format of the model, e.g. pytorch, ggufv2, etc.",
 )
 @click.option(
     "--quantization",

xinference/deploy/test/test_cmdline.py CHANGED Viewed

@@ -66,10 +66,10 @@ def test_cmdline(setup, stream, model_uid):
     replica = 1
     original_model_uid = model_uid
     model_uid = client.launch_model(
-        model_name="orca",
+        model_name="qwen1.5-chat",
         model_engine="llama.cpp",
         model_uid=model_uid,
-        model_size_in_billions=3,
+        model_size_in_billions="0_5",
         quantization="q4_0",
         replica=replica,
     )
@@ -249,10 +249,10 @@ def test_rotate_logs(setup_with_file_logging):
     runner = CliRunner()
     replica = 1 if os.name == "nt" else 2
     model_uid = client.launch_model(
-        model_name="orca",
+        model_name="qwen1.5-chat",
         model_engine="llama.cpp",
         model_uid=None,
-        model_size_in_billions=3,
+        model_size_in_billions="0_5",
         quantization="q4_0",
         replica=replica,
     )
@@ -288,7 +288,7 @@ def test_list_cached_models(setup):
     result = runner.invoke(
         list_cached_models,
-        ["--endpoint", endpoint, "--model_name", "orca"],
+        ["--endpoint", endpoint, "--model_name", "qwen1.5-chat"],
     )
     assert "model_name" in result.stdout
     assert "model_format" in result.stdout
@@ -305,9 +305,9 @@ def test_remove_cache(setup):
     result = runner.invoke(
         remove_cache,
-        ["--endpoint", endpoint, "--model_version", "orca"],
+        ["--endpoint", endpoint, "--model_version", "qwen1.5-chat"],
         input="y\n",
     )
     assert result.exit_code == 0
-    assert "Cache directory orca has been deleted."
+    assert "Cache directory qwen1.5-chat has been deleted."

xinference/model/audio/chattts.py CHANGED Viewed

@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import base64
 import logging
 from io import BytesIO
 from typing import TYPE_CHECKING, Optional
@@ -61,16 +62,31 @@ class ChatTTSModel:
         import torchaudio
         import xxhash
-        seed = xxhash.xxh32_intdigest(voice)
+        rnd_spk_emb = None
-        torch.manual_seed(seed)
-        np.random.seed(seed)
-        torch.cuda.manual_seed(seed)
-        torch.backends.cudnn.deterministic = True
-        torch.backends.cudnn.benchmark = False
+        if len(voice) > 400:
+            try:
+                assert self._model is not None
+                b = base64.b64decode(voice)
+                bio = BytesIO(b)
+                tensor = torch.load(bio, map_location="cpu")
+                rnd_spk_emb = self._model._encode_spk_emb(tensor)
+                logger.info("Speech by input speaker")
+            except Exception as e:
+                logger.info("Fallback to random speaker due to %s", e)
-        assert self._model is not None
-        rnd_spk_emb = self._model.sample_random_speaker()
+        if rnd_spk_emb is None:
+            seed = xxhash.xxh32_intdigest(voice)
+            torch.manual_seed(seed)
+            np.random.seed(seed)
+            torch.cuda.manual_seed(seed)
+            torch.backends.cudnn.deterministic = True
+            torch.backends.cudnn.benchmark = False
+            assert self._model is not None
+            rnd_spk_emb = self._model.sample_random_speaker()
+            logger.info("Speech by voice %s", voice)
         default = 5
         infer_speed = int(default * speed)
@@ -100,7 +116,6 @@ class ChatTTSModel:
                                     if new_last_pos != last_pos:
                                         out.seek(last_pos)
                                         encoded_bytes = out.read()
-                                        print(len(encoded_bytes))
                                         yield encoded_bytes
                                         last_pos = new_last_pos

xinference/model/audio/core.py CHANGED Viewed

@@ -21,6 +21,7 @@ from ..core import CacheableModelSpec, ModelDescription
 from ..utils import valid_model_revision
 from .chattts import ChatTTSModel
 from .cosyvoice import CosyVoiceModel
+from .fish_speech import FishSpeechModel
 from .funasr import FunASRModel
 from .whisper import WhisperModel
@@ -46,6 +47,7 @@ class AudioModelFamilyV1(CacheableModelSpec):
     model_id: str
     model_revision: str
     multilingual: bool
+    ability: str
     default_model_config: Optional[Dict[str, Any]]
     default_transcription_config: Optional[Dict[str, Any]]
@@ -156,13 +158,15 @@ def create_audio_model_instance(
     model_path: Optional[str] = None,
     **kwargs,
 ) -> Tuple[
-    Union[WhisperModel, FunASRModel, ChatTTSModel, CosyVoiceModel],
+    Union[WhisperModel, FunASRModel, ChatTTSModel, CosyVoiceModel, FishSpeechModel],
     AudioModelDescription,
 ]:
     model_spec = match_audio(model_name, download_hub)
     if model_path is None:
         model_path = cache(model_spec)
-    model: Union[WhisperModel, FunASRModel, ChatTTSModel, CosyVoiceModel]
+    model: Union[
+        WhisperModel, FunASRModel, ChatTTSModel, CosyVoiceModel, FishSpeechModel
+    ]
     if model_spec.model_family == "whisper":
         model = WhisperModel(model_uid, model_path, model_spec, **kwargs)
     elif model_spec.model_family == "funasr":
@@ -171,6 +175,8 @@ def create_audio_model_instance(
         model = ChatTTSModel(model_uid, model_path, model_spec, **kwargs)
     elif model_spec.model_family == "CosyVoice":
         model = CosyVoiceModel(model_uid, model_path, model_spec, **kwargs)
+    elif model_spec.model_family == "FishAudio":
+        model = FishSpeechModel(model_uid, model_path, model_spec, **kwargs)
     else:
         raise Exception(f"Unsupported audio model family: {model_spec.model_family}")
     model_description = AudioModelDescription(

xinference 0.14.1.post1__py3-none-any.whl → 0.14.3__py3-none-any.whl

Potentially problematic release.

xinference 0.14.1.post1py3-none-any.whl → 0.14.3py3-none-any.whl