PyPI - xinference - Versions diffs - 1.4.1__py3-none-any.whl → 1.5.0.post1__py3-none-any.whl - Mend

xinference 1.4.1py3-none-any.whl → 1.5.0.post1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (104) hide show

xinference/core/worker.py CHANGED Viewed

@@ -22,9 +22,20 @@ import signal
 import threading
 import time
 from collections import defaultdict
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from logging import getLogger
-from typing import Any, Dict, List, Literal, Optional, Set, Tuple, Union, no_type_check
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Dict,
+    List,
+    Literal,
+    Optional,
+    Set,
+    Tuple,
+    Union,
+    no_type_check,
+)
 import xoscar as xo
 from async_timeout import timeout
@@ -34,13 +45,17 @@ from ..constants import (
     XINFERENCE_CACHE_DIR,
     XINFERENCE_DISABLE_HEALTH_CHECK,
     XINFERENCE_DISABLE_METRICS,
+    XINFERENCE_ENABLE_VIRTUAL_ENV,
     XINFERENCE_HEALTH_CHECK_INTERVAL,
+    XINFERENCE_VIRTUAL_ENV_DIR,
 )
 from ..core.model import ModelActor
 from ..core.status_guard import LaunchStatus
 from ..device_utils import get_available_device_env_name, gpu_count
-from ..model.core import ModelDescription, create_model_instance
+from ..model.core import ModelDescription, VirtualEnvSettings, create_model_instance
+from ..model.utils import CancellableDownloader
 from ..types import PeftModelConfig
+from ..utils import get_pip_config_args, get_real_path
 from .cache_tracker import CacheTrackerActor
 from .event import Event, EventCollectorActor, EventType
 from .metrics import launch_metrics_export_server, record_metrics
@@ -48,6 +63,14 @@ from .resource import gather_node_info
 from .status_guard import StatusGuardActor
 from .utils import log_async, log_sync, parse_replica_model_uid, purge_dir
+try:
+    from xoscar.virtualenv import VirtualEnvManager
+except ImportError:
+    VirtualEnvManager = None
+if TYPE_CHECKING:
+    from .progress_tracker import Progressor
 logger = getLogger(__name__)
@@ -64,6 +87,17 @@ class ModelStatus:
     last_error: str = ""
+@dataclass
+class LaunchInfo:
+    cancel_event: threading.Event = field(default_factory=threading.Event)
+    # virtualenv manager
+    virtual_env_manager: Optional["VirtualEnvManager"] = None
+    # downloader, report progress or cancel entire download
+    downloader: Optional[CancellableDownloader] = None
+    # sub pools created for the model
+    sub_pools: Optional[List[str]] = None
 class WorkerActor(xo.StatelessActor):
     def __init__(
         self,
@@ -92,7 +126,7 @@ class WorkerActor(xo.StatelessActor):
         # internal states.
         # temporary placeholder during model launch process:
-        self._model_uid_launching_guard: Dict[str, bool] = {}
+        self._model_uid_launching_guard: Dict[str, LaunchInfo] = {}
         # attributes maintained after model launched:
         self._model_uid_to_model: Dict[str, xo.ActorRefType["ModelActor"]] = {}
         self._model_uid_to_model_spec: Dict[str, ModelDescription] = {}
@@ -352,6 +386,7 @@ class WorkerActor(xo.StatelessActor):
         self._cache_tracker_ref = await xo.actor_ref(
             address=self._supervisor_address, uid=CacheTrackerActor.default_uid()
         )
+        self._progress_tracker_ref = None
         # cache_tracker is on supervisor
         from ..model.audio import get_audio_model_descriptions
         from ..model.embedding import get_embedding_model_descriptions
@@ -548,8 +583,9 @@ class WorkerActor(xo.StatelessActor):
         model_type: Optional[str] = None,
         n_gpu: Optional[Union[int, str]] = "auto",
         gpu_idx: Optional[List[int]] = None,
+        env: Optional[Dict[str, str]] = None,
     ) -> Tuple[str, List[str]]:
-        env = {}
+        env = {} if env is None else env
         devices = []
         env_name = get_available_device_env_name() or "CUDA_VISIBLE_DEVICES"
         if gpu_idx is None:
@@ -778,6 +814,96 @@ class WorkerActor(xo.StatelessActor):
                 version_info["model_file_location"],
             )
+    @classmethod
+    def _create_virtual_env_manager(
+        cls,
+        enable_virtual_env: Optional[bool],
+        virtual_env_name: Optional[str],
+        env_path: str,
+    ) -> Optional[VirtualEnvManager]:
+        if enable_virtual_env is None:
+            enable_virtual_env = XINFERENCE_ENABLE_VIRTUAL_ENV
+        if not enable_virtual_env:
+            # skip preparing virtualenv
+            return None
+        from xoscar.virtualenv import get_virtual_env_manager
+        virtual_env_manager: VirtualEnvManager = get_virtual_env_manager(
+            virtual_env_name or "uv", env_path
+        )
+        return virtual_env_manager
+    @classmethod
+    def _prepare_virtual_env(
+        cls,
+        virtual_env_manager: "VirtualEnvManager",
+        settings: Optional[VirtualEnvSettings],
+    ):
+        if not settings or not settings.packages:
+            # no settings or no packages
+            return
+        # create env
+        virtual_env_manager.create_env()
+        if settings.inherit_pip_config:
+            # inherit pip config
+            pip_config = get_pip_config_args()
+            for k, v in pip_config.items():
+                if hasattr(settings, k) and not getattr(settings, k):
+                    setattr(settings, k, v)
+        packages = settings.packages
+        index_url = settings.index_url
+        extra_index_url = settings.extra_index_url
+        find_links = settings.find_links
+        trusted_host = settings.trusted_host
+        logger.info(
+            "Installing packages %s in virtual env %s, with settings(index_url=%s)",
+            packages,
+            virtual_env_manager.env_path,
+            index_url,
+        )
+        virtual_env_manager.install_packages(
+            packages,
+            index_url=index_url,
+            extra_index_url=extra_index_url,
+            find_links=find_links,
+            trusted_host=trusted_host,
+        )
+    async def _get_progressor(self, request_id: str):
+        from .progress_tracker import Progressor, ProgressTrackerActor
+        progress_tracker_ref = self._progress_tracker_ref
+        if progress_tracker_ref is None:
+            progress_tracker_ref = self._progress_tracker_ref = await xo.actor_ref(
+                address=self._supervisor_address, uid=ProgressTrackerActor.default_uid()
+            )
+        progressor = Progressor(
+            request_id,
+            progress_tracker_ref,
+            asyncio.get_running_loop(),
+        )
+        await progressor.start()
+        progressor.set_progress(0.0, "start to launch model")
+        return progressor
+    @classmethod
+    def _upload_download_progress(
+        cls, progressor: "Progressor", downloader: CancellableDownloader
+    ):
+        while not downloader.done:
+            progress = downloader.get_progress()
+            progressor.set_progress(progress)
+            downloader.wait(1)
+        progressor.set_progress(1.0, "Start to load model")
     @log_async(logger=logger, level=logging.INFO)
     async def launch_builtin_model(
         self,
@@ -870,9 +996,27 @@ class WorkerActor(xo.StatelessActor):
             raise ValueError(f"{model_uid} is running")
         try:
-            self._model_uid_launching_guard[model_uid] = True
+            self._model_uid_launching_guard[model_uid] = launch_info = LaunchInfo()
+            # virtualenv
+            enable_virtual_env = kwargs.pop("enable_virtual_env", None)
+            virtual_env_name = kwargs.pop("virtual_env_name", None)
+            virtual_env_path = os.path.join(XINFERENCE_VIRTUAL_ENV_DIR, model_name)
+            virtual_env_manager = await asyncio.to_thread(
+                self._create_virtual_env_manager,
+                enable_virtual_env,
+                virtual_env_name,
+                virtual_env_path,
+            )
+            # setting os.environ if virtualenv created
+            env = (
+                {"PYTHONPATH": virtual_env_manager.get_lib_path()}
+                if virtual_env_manager
+                else None
+            )
             subpool_address, devices = await self._create_subpool(
-                model_uid, model_type, n_gpu=n_gpu, gpu_idx=gpu_idx
+                model_uid, model_type, n_gpu=n_gpu, gpu_idx=gpu_idx, env=env
             )
             all_subpool_addresses = [subpool_address]
             try:
@@ -891,23 +1035,62 @@ class WorkerActor(xo.StatelessActor):
                             driver_info=driver_info,
                         )
                     )
-                model, model_description = await asyncio.to_thread(
-                    create_model_instance,
-                    subpool_address,
-                    devices,
-                    model_uid,
-                    model_type,
-                    model_name,
-                    model_engine,
-                    model_format,
-                    model_size_in_billions,
-                    quantization,
-                    peft_model_config,
-                    download_hub,
-                    model_path,
-                    **model_kwargs,
-                )
-                await self.update_cache_status(model_name, model_description)
+                with CancellableDownloader(
+                    cancelled_event=launch_info.cancel_event
+                ) as downloader:
+                    launch_info.downloader = downloader
+                    progressor = await self._get_progressor("launching-" + model_uid)
+                    # split into download and launch
+                    progressor.split_stages(2, stage_weight=[0, 0.8, 1.0])
+                    with progressor:
+                        upload_progress_task = asyncio.create_task(
+                            asyncio.to_thread(
+                                self._upload_download_progress, progressor, downloader
+                            )
+                        )
+                        model, model_description = await asyncio.to_thread(
+                            create_model_instance,
+                            subpool_address,
+                            devices,
+                            model_uid,
+                            model_type,
+                            model_name,
+                            model_engine,
+                            model_format,
+                            model_size_in_billions,
+                            quantization,
+                            peft_model_config,
+                            download_hub,
+                            model_path,
+                            **model_kwargs,
+                        )
+                    await self.update_cache_status(model_name, model_description)
+                def check_cancel():
+                    # check downloader first, sometimes download finished
+                    # cancelled already
+                    if downloader.cancelled:
+                        with progressor:
+                            # just report progress
+                            pass
+                        downloader.raise_error(error_msg="Launch cancelled")
+                # check cancel before prepare virtual env
+                check_cancel()
+                # install packages in virtual env
+                if virtual_env_manager:
+                    await asyncio.to_thread(
+                        self._prepare_virtual_env,
+                        virtual_env_manager,
+                        model_description.spec.virtualenv,
+                    )
+                    launch_info.virtual_env_manager = virtual_env_manager
+                # check before creating model actor
+                check_cancel()
                 model_ref = await xo.create_actor(
                     ModelActor,
                     address=subpool_address,
@@ -939,12 +1122,28 @@ class WorkerActor(xo.StatelessActor):
                     pool_addresses = await asyncio.gather(*coros)
                     all_subpool_addresses.extend(pool_addresses)
                     await model_ref.set_pool_addresses(pool_addresses)
-                await model_ref.load()
+                # check before loading
+                check_cancel()
+                # set all subpool addresses
+                # when cancelled, all subpool addresses need to be destroyed
+                launch_info.sub_pools = all_subpool_addresses
+                with progressor:
+                    try:
+                        await model_ref.load()
+                    except xo.ServerClosed:
+                        check_cancel()
+                        raise
             except:
                 logger.error(f"Failed to load model {model_uid}", exc_info=True)
                 self.release_devices(model_uid=model_uid)
                 for addr in all_subpool_addresses:
-                    await self._main_pool.remove_sub_pool(addr)
+                    try:
+                        await self._main_pool.remove_sub_pool(addr)
+                    except KeyError:
+                        continue
                 raise
             self._model_uid_to_model[model_uid] = model_ref
             self._model_uid_to_model_spec[model_uid] = model_description
@@ -978,6 +1177,39 @@ class WorkerActor(xo.StatelessActor):
         model_ref = self._model_uid_to_model[model_uid]
         await model_ref.wait_for_load()
+    @log_sync(logger=logger, level=logging.INFO)
+    async def cancel_launch_model(self, model_uid: str):
+        try:
+            launch_info = self._model_uid_launching_guard[model_uid]
+            # downloader shared same cancel event
+            # sometimes cancel happens very early before downloader
+            # even if users cancel at this time,
+            # downloader will know and stop everything
+            launch_info.cancel_event.set()
+            if launch_info.downloader:
+                logger.debug("Try to cancel download, %s")
+                launch_info.downloader.cancel()
+            if launch_info.virtual_env_manager:
+                launch_info.virtual_env_manager.cancel_install()
+            if launch_info.sub_pools:
+                logger.debug("Try to stop sub pools: %s", launch_info.sub_pools)
+                coros = []
+                for addr in launch_info.sub_pools:
+                    coros.append(self._main_pool.remove_sub_pool(addr, force=True))
+                await asyncio.gather(*coros)
+            if self._status_guard_ref is not None:
+                await self._status_guard_ref.update_instance_info(
+                    parse_replica_model_uid(model_uid)[0],
+                    {"status": LaunchStatus.ERROR.name},
+                )
+        except KeyError:
+            logger.error("Fail to cancel launching", exc_info=True)
+            raise RuntimeError(
+                "Model is not launching, may have launched or not launched yet"
+            )
     @log_async(logger=logger, level=logging.INFO)
     async def terminate_model(self, model_uid: str, is_model_die=False):
         # Terminate model while its launching is not allow
@@ -1157,16 +1389,9 @@ class WorkerActor(xo.StatelessActor):
             }
             path = list.get("model_file_location")
             cached_model["path"] = path
-            # parsing soft links
-            if os.path.isdir(path):
-                files = os.listdir(path)
-                # dir has files
-                if files:
-                    resolved_file = os.path.realpath(os.path.join(path, files[0]))
-                    if resolved_file:
-                        cached_model["real_path"] = os.path.dirname(resolved_file)
-            else:
-                cached_model["real_path"] = os.path.realpath(path)
+            real_path = get_real_path(path)
+            if real_path:
+                cached_model["real_path"] = real_path
             cached_model["actor_ip_address"] = self.address
             cached_models.append(cached_model)
         return cached_models
@@ -1267,7 +1492,7 @@ class WorkerActor(xo.StatelessActor):
         # Note that `store_port` needs to be generated on the worker,
         # as the TCP store is on rank 0, not on the supervisor.
         store_port = xo.utils.get_next_port()
-        self._model_uid_launching_guard[rep_model_uid] = True
+        self._model_uid_launching_guard[rep_model_uid] = LaunchInfo()
         try:
             try:
                 xavier_config["rank_address"] = subpool_address

xinference/deploy/cmdline.py CHANGED Viewed

@@ -16,10 +16,12 @@ import asyncio
 import logging
 import os
 import sys
+import time
 import warnings
 from typing import Dict, List, Optional, Sequence, Tuple, Union
 import click
+from tqdm.auto import tqdm
 from xoscar.utils import get_next_port
 from .. import __version__
@@ -925,6 +927,9 @@ def model_launch(
     if api_key is None:
         client._set_token(get_stored_token(endpoint, client))
+    # do not wait for launching.
+    kwargs["wait_ready"] = False
     model_uid = client.launch_model(
         model_name=model_name,
         model_type=model_type,
@@ -943,8 +948,35 @@ def model_launch(
         model_path=model_path,
         **kwargs,
     )
+    try:
+        with tqdm(
+            total=100, desc="Launching model", bar_format="{l_bar}{bar} | {n:.1f}%"
+        ) as pbar:
+            while True:
+                status = client.get_instance_info(model_name, model_uid)
+                if all(s["status"] in ["READY", "ERROR", "TERMINATED"] for s in status):
+                    break
+                progress = client.get_launch_model_progress(model_uid)["progress"]
+                percent = max(round(progress * 100, 1), pbar.n)
-    print(f"Model uid: {model_uid}", file=sys.stderr)
+                pbar.update(percent - pbar.n)
+                time.sleep(0.5)
+            # setting to 100%
+            pbar.update(pbar.total - pbar.n)
+        print(f"Model uid: {model_uid}", file=sys.stderr)
+    except KeyboardInterrupt:
+        user_input = (
+            input("Do you want to cancel model launching? (y/[n]): ").strip().lower()
+        )
+        if user_input == "y":
+            client.cancel_launch_model(model_uid)
+            print(f"Cancel request sent: {model_uid}")
+        else:
+            print("Skip cancel, launching model will be running still.")
 @cli.command(

xinference/model/audio/core.py CHANGED Viewed

@@ -17,7 +17,7 @@ from collections import defaultdict
 from typing import Any, Dict, List, Literal, Optional, Tuple, Union
 from ...constants import XINFERENCE_CACHE_DIR
-from ..core import CacheableModelSpec, ModelDescription
+from ..core import CacheableModelSpec, ModelDescription, VirtualEnvSettings
 from ..utils import valid_model_revision
 from .chattts import ChatTTSModel
 from .cosyvoice import CosyVoiceModel
@@ -26,6 +26,7 @@ from .f5tts_mlx import F5TTSMLXModel
 from .fish_speech import FishSpeechModel
 from .funasr import FunASRModel
 from .kokoro import KokoroModel
+from .megatts import MegaTTSModel
 from .melotts import MeloTTSModel
 from .whisper import WhisperModel
 from .whisper_mlx import WhisperMLXModel
@@ -55,6 +56,7 @@ class AudioModelFamilyV1(CacheableModelSpec):
     default_model_config: Optional[Dict[str, Any]]
     default_transcription_config: Optional[Dict[str, Any]]
     engine: Optional[str]
+    virtualenv: Optional[VirtualEnvSettings]
 class AudioModelDescription(ModelDescription):
@@ -68,6 +70,10 @@ class AudioModelDescription(ModelDescription):
         super().__init__(address, devices, model_path=model_path)
         self._model_spec = model_spec
+    @property
+    def spec(self):
+        return self._model_spec
     def to_dict(self):
         return {
             "model_type": "audio",
@@ -178,6 +184,7 @@ def create_audio_model_instance(
         F5TTSMLXModel,
         MeloTTSModel,
         KokoroModel,
+        MegaTTSModel,
     ],
     AudioModelDescription,
 ]:
@@ -195,6 +202,7 @@ def create_audio_model_instance(
         F5TTSMLXModel,
         MeloTTSModel,
         KokoroModel,
+        MegaTTSModel,
     ]
     if model_spec.model_family == "whisper":
         if not model_spec.engine:
@@ -217,6 +225,8 @@ def create_audio_model_instance(
         model = MeloTTSModel(model_uid, model_path, model_spec, **kwargs)
     elif model_spec.model_family == "Kokoro":
         model = KokoroModel(model_uid, model_path, model_spec, **kwargs)
+    elif model_spec.model_family == "MegaTTS":
+        model = MegaTTSModel(model_uid, model_path, model_spec, **kwargs)
     else:
         raise Exception(f"Unsupported audio model family: {model_spec.model_family}")
     model_description = AudioModelDescription(

xinference/model/audio/megatts.py ADDED Viewed

@@ -0,0 +1,105 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import io
+import logging
+from io import BytesIO
+from typing import TYPE_CHECKING, Optional
+if TYPE_CHECKING:
+    from .core import AudioModelFamilyV1
+logger = logging.getLogger(__name__)
+class MegaTTSModel:
+    def __init__(
+        self,
+        model_uid: str,
+        model_path: str,
+        model_spec: "AudioModelFamilyV1",
+        device: Optional[str] = None,
+        **kwargs,
+    ):
+        self._model_uid = model_uid
+        self._model_path = model_path
+        self._model_spec = model_spec
+        self._device = device
+        self._model = None
+        self._vocoder = None
+        self._kwargs = kwargs
+    @property
+    def model_ability(self):
+        return self._model_spec.model_ability
+    def load(self):
+        import os
+        import sys
+        # The yaml config loaded from model has hard-coded the import paths. please refer to: load_hyperpyyaml
+        sys.path.insert(
+            0, os.path.join(os.path.dirname(__file__), "../../thirdparty/megatts3")
+        )
+        # For whisper
+        sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../../thirdparty"))
+        from tts.infer_cli import MegaTTS3DiTInfer
+        self._model = MegaTTS3DiTInfer(ckpt_root=self._model_path)
+    def speech(
+        self,
+        input: str,
+        voice: str,
+        response_format: str = "mp3",
+        speed: float = 1.0,
+        stream: bool = False,
+        **kwargs,
+    ):
+        import soundfile
+        if stream:
+            raise Exception("MegaTTS3 does not support stream generation.")
+        if voice:
+            raise Exception(
+                "MegaTTS3 does not support voice, please specify prompt_speech and prompt_latent."
+            )
+        prompt_speech: Optional[bytes] = kwargs.pop("prompt_speech", None)
+        prompt_latent: Optional[bytes] = kwargs.pop("prompt_latent", None)
+        if not prompt_speech:
+            raise Exception("Please set prompt_speech for MegaTTS3.")
+        if not prompt_latent:
+            raise Exception("Please set prompt_latent for MegaTTS3.")
+        assert self._model is not None
+        with io.BytesIO(prompt_latent) as prompt_latent_io:
+            resource_context = self._model.preprocess(
+                prompt_speech, latent_file=prompt_latent_io
+            )
+        wav_bytes = self._model.forward(
+            resource_context,
+            input,
+            time_step=kwargs.get("time_step", 32),
+            p_w=kwargs.get("p_w", 1.6),
+            t_w=kwargs.get("t_w", 2.5),
+        )
+        # Save the generated audio
+        with BytesIO() as out:
+            with soundfile.SoundFile(
+                out, "w", self._model.sr, 1, format=response_format.upper()
+            ) as f:
+                f.write(wav_bytes)
+            return out.getvalue()

xinference/model/audio/model_spec.json CHANGED Viewed

@@ -203,6 +203,21 @@
       "merge_length_s": 15
     }
   },
+  {
+    "model_name": "paraformer-zh",
+    "model_family": "funasr",
+    "model_id": "funasr/paraformer-zh",
+    "model_revision": "5ed094cdfc8f6a9b6b022bd08bc904ef862bc79e",
+    "model_ability": "audio-to-text",
+    "multilingual": false,
+    "default_model_config": {
+      "vad_model": "fsmn-vad",
+      "punc_model": "ct-punc"
+    },
+    "default_transcription_config": {
+      "batch_size_s": 300
+    }
+  },
   {
     "model_name": "ChatTTS",
     "model_family": "ChatTTS",
@@ -216,7 +231,7 @@
     "model_family": "CosyVoice",
     "model_id": "FunAudioLLM/CosyVoice-300M",
     "model_revision": "39c4e13d46bd4dfb840d214547623e5fcd2428e2",
-    "model_ability": "audio-to-audio",
+    "model_ability": "text-to-audio",
     "multilingual": true
   },
   {
@@ -346,5 +361,13 @@
     "model_revision": "7884269d6fd3f9beabc271b6f1308e5699281fa9",
     "model_ability": "text-to-audio",
     "multilingual": true
+  },
+  {
+    "model_name": "MegaTTS3",
+    "model_family": "MegaTTS",
+    "model_id": "ByteDance/MegaTTS3",
+    "model_revision": "409a7002b006d80f0730fca6f80441b08c10e738",
+    "model_ability": "text-to-audio",
+    "multilingual": true
   }
 ]

xinference 1.4.1__py3-none-any.whl → 1.5.0.post1__py3-none-any.whl

Potentially problematic release.

xinference 1.4.1py3-none-any.whl → 1.5.0.post1py3-none-any.whl