PyPI - xinference - Versions diffs - 1.2.0__py3-none-any.whl → 1.2.2__py3-none-any.whl - Mend

xinference 1.2.0py3-none-any.whl → 1.2.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (124) hide show

xinference/_version.py +3 -3
xinference/api/restful_api.py +4 -7
xinference/client/handlers.py +3 -0
xinference/core/chat_interface.py +6 -1
xinference/core/model.py +2 -0
xinference/core/scheduler.py +4 -7
xinference/core/supervisor.py +114 -23
xinference/core/worker.py +70 -4
xinference/deploy/local.py +2 -1
xinference/model/audio/core.py +11 -0
xinference/model/audio/cosyvoice.py +16 -5
xinference/model/audio/kokoro.py +139 -0
xinference/model/audio/melotts.py +110 -0
xinference/model/audio/model_spec.json +80 -0
xinference/model/audio/model_spec_modelscope.json +18 -0
xinference/model/audio/whisper.py +35 -10
xinference/model/llm/llama_cpp/core.py +21 -14
xinference/model/llm/llm_family.json +527 -1
xinference/model/llm/llm_family.py +4 -1
xinference/model/llm/llm_family_modelscope.json +495 -3
xinference/model/llm/memory.py +1 -1
xinference/model/llm/mlx/core.py +24 -6
xinference/model/llm/transformers/core.py +9 -1
xinference/model/llm/transformers/qwen2_audio.py +3 -1
xinference/model/llm/transformers/qwen2_vl.py +20 -3
xinference/model/llm/transformers/utils.py +22 -11
xinference/model/llm/utils.py +115 -1
xinference/model/llm/vllm/core.py +14 -4
xinference/model/llm/vllm/xavier/block.py +3 -4
xinference/model/llm/vllm/xavier/block_tracker.py +71 -58
xinference/model/llm/vllm/xavier/collective.py +74 -0
xinference/model/llm/vllm/xavier/collective_manager.py +147 -0
xinference/model/llm/vllm/xavier/executor.py +18 -16
xinference/model/llm/vllm/xavier/scheduler.py +79 -63
xinference/model/llm/vllm/xavier/test/test_xavier.py +60 -35
xinference/model/llm/vllm/xavier/transfer.py +53 -32
xinference/thirdparty/cosyvoice/bin/spk2info.pt +0 -0
xinference/thirdparty/melo/__init__.py +0 -0
xinference/thirdparty/melo/api.py +135 -0
xinference/thirdparty/melo/app.py +61 -0
xinference/thirdparty/melo/attentions.py +459 -0
xinference/thirdparty/melo/commons.py +160 -0
xinference/thirdparty/melo/configs/config.json +94 -0
xinference/thirdparty/melo/data/example/metadata.list +20 -0
xinference/thirdparty/melo/data_utils.py +413 -0
xinference/thirdparty/melo/download_utils.py +67 -0
xinference/thirdparty/melo/infer.py +25 -0
xinference/thirdparty/melo/init_downloads.py +14 -0
xinference/thirdparty/melo/losses.py +58 -0
xinference/thirdparty/melo/main.py +36 -0
xinference/thirdparty/melo/mel_processing.py +174 -0
xinference/thirdparty/melo/models.py +1030 -0
xinference/thirdparty/melo/modules.py +598 -0
xinference/thirdparty/melo/monotonic_align/__init__.py +16 -0
xinference/thirdparty/melo/monotonic_align/core.py +46 -0
xinference/thirdparty/melo/preprocess_text.py +135 -0
xinference/thirdparty/melo/split_utils.py +174 -0
xinference/thirdparty/melo/text/__init__.py +35 -0
xinference/thirdparty/melo/text/chinese.py +199 -0
xinference/thirdparty/melo/text/chinese_bert.py +107 -0
xinference/thirdparty/melo/text/chinese_mix.py +253 -0
xinference/thirdparty/melo/text/cleaner.py +36 -0
xinference/thirdparty/melo/text/cleaner_multiling.py +110 -0
xinference/thirdparty/melo/text/cmudict.rep +129530 -0
xinference/thirdparty/melo/text/cmudict_cache.pickle +0 -0
xinference/thirdparty/melo/text/english.py +284 -0
xinference/thirdparty/melo/text/english_bert.py +39 -0
xinference/thirdparty/melo/text/english_utils/__init__.py +0 -0
xinference/thirdparty/melo/text/english_utils/abbreviations.py +35 -0
xinference/thirdparty/melo/text/english_utils/number_norm.py +97 -0
xinference/thirdparty/melo/text/english_utils/time_norm.py +47 -0
xinference/thirdparty/melo/text/es_phonemizer/__init__.py +0 -0
xinference/thirdparty/melo/text/es_phonemizer/base.py +140 -0
xinference/thirdparty/melo/text/es_phonemizer/cleaner.py +109 -0
xinference/thirdparty/melo/text/es_phonemizer/es_symbols.json +79 -0
xinference/thirdparty/melo/text/es_phonemizer/es_symbols.txt +1 -0
xinference/thirdparty/melo/text/es_phonemizer/es_symbols_v2.json +83 -0
xinference/thirdparty/melo/text/es_phonemizer/es_to_ipa.py +12 -0
xinference/thirdparty/melo/text/es_phonemizer/example_ipa.txt +400 -0
xinference/thirdparty/melo/text/es_phonemizer/gruut_wrapper.py +253 -0
xinference/thirdparty/melo/text/es_phonemizer/punctuation.py +174 -0
xinference/thirdparty/melo/text/es_phonemizer/spanish_symbols.txt +1 -0
xinference/thirdparty/melo/text/es_phonemizer/test.ipynb +124 -0
xinference/thirdparty/melo/text/fr_phonemizer/__init__.py +0 -0
xinference/thirdparty/melo/text/fr_phonemizer/base.py +140 -0
xinference/thirdparty/melo/text/fr_phonemizer/cleaner.py +122 -0
xinference/thirdparty/melo/text/fr_phonemizer/en_symbols.json +78 -0
xinference/thirdparty/melo/text/fr_phonemizer/example_ipa.txt +1 -0
xinference/thirdparty/melo/text/fr_phonemizer/fr_symbols.json +89 -0
xinference/thirdparty/melo/text/fr_phonemizer/fr_to_ipa.py +30 -0
xinference/thirdparty/melo/text/fr_phonemizer/french_abbreviations.py +48 -0
xinference/thirdparty/melo/text/fr_phonemizer/french_symbols.txt +1 -0
xinference/thirdparty/melo/text/fr_phonemizer/gruut_wrapper.py +258 -0
xinference/thirdparty/melo/text/fr_phonemizer/punctuation.py +172 -0
xinference/thirdparty/melo/text/french.py +94 -0
xinference/thirdparty/melo/text/french_bert.py +39 -0
xinference/thirdparty/melo/text/japanese.py +647 -0
xinference/thirdparty/melo/text/japanese_bert.py +49 -0
xinference/thirdparty/melo/text/ko_dictionary.py +44 -0
xinference/thirdparty/melo/text/korean.py +192 -0
xinference/thirdparty/melo/text/opencpop-strict.txt +429 -0
xinference/thirdparty/melo/text/spanish.py +122 -0
xinference/thirdparty/melo/text/spanish_bert.py +39 -0
xinference/thirdparty/melo/text/symbols.py +290 -0
xinference/thirdparty/melo/text/tone_sandhi.py +769 -0
xinference/thirdparty/melo/train.py +635 -0
xinference/thirdparty/melo/train.sh +19 -0
xinference/thirdparty/melo/transforms.py +209 -0
xinference/thirdparty/melo/utils.py +424 -0
xinference/types.py +2 -0
xinference/web/ui/build/asset-manifest.json +3 -3
xinference/web/ui/build/index.html +1 -1
xinference/web/ui/build/static/js/{main.1eb206d1.js → main.b0936c54.js} +3 -3
xinference/web/ui/build/static/js/main.b0936c54.js.map +1 -0
xinference/web/ui/node_modules/.cache/babel-loader/a3ff866acddf34917a7ee399e0e571a4dfd8ba66d5057db885f243e16a6eb17d.json +1 -0
{xinference-1.2.0.dist-info → xinference-1.2.2.dist-info}/METADATA +37 -27
{xinference-1.2.0.dist-info → xinference-1.2.2.dist-info}/RECORD +122 -45
xinference/web/ui/build/static/js/main.1eb206d1.js.map +0 -1
xinference/web/ui/node_modules/.cache/babel-loader/2213d49de260e1f67c888081b18f120f5225462b829ae57c9e05a05cec83689d.json +0 -1
/xinference/web/ui/build/static/js/{main.1eb206d1.js.LICENSE.txt → main.b0936c54.js.LICENSE.txt} +0 -0
{xinference-1.2.0.dist-info → xinference-1.2.2.dist-info}/LICENSE +0 -0
{xinference-1.2.0.dist-info → xinference-1.2.2.dist-info}/WHEEL +0 -0
{xinference-1.2.0.dist-info → xinference-1.2.2.dist-info}/entry_points.txt +0 -0
{xinference-1.2.0.dist-info → xinference-1.2.2.dist-info}/top_level.txt +0 -0

xinference/_version.py CHANGED Viewed

@@ -8,11 +8,11 @@ import json
 version_json = '''
 {
- "date": "2025-01-10T17:24:10+0800",
+ "date": "2025-02-08T17:06:47+0800",
  "dirty": false,
  "error": null,
- "full-revisionid": "df45f11115051929d6296a0c138b99472abf497f",
- "version": "1.2.0"
+ "full-revisionid": "ac97a13a831de6debda52e6fdb8c1bf9366be57c",
+ "version": "1.2.2"
 }
 '''  # END VERSION_JSON

xinference/api/restful_api.py CHANGED Viewed

@@ -2000,25 +2000,22 @@ class RESTfulAPI(CancelMixin):
         from ..model.llm.utils import (
             GLM4_TOOL_CALL_FAMILY,
-            LLAMA3_TOOL_CALL_FAMILY,
             QWEN_TOOL_CALL_FAMILY,
+            TOOL_CALL_FAMILY,
         )
         model_family = desc.get("model_family", "")
-        function_call_models = (
-            QWEN_TOOL_CALL_FAMILY + GLM4_TOOL_CALL_FAMILY + LLAMA3_TOOL_CALL_FAMILY
-        )
-        if model_family not in function_call_models:
+        if model_family not in TOOL_CALL_FAMILY:
             if body.tools:
                 raise HTTPException(
                     status_code=400,
-                    detail=f"Only {function_call_models} support tool calls",
+                    detail=f"Only {TOOL_CALL_FAMILY} support tool calls",
                 )
             if has_tool_message:
                 raise HTTPException(
                     status_code=400,
-                    detail=f"Only {function_call_models} support tool messages",
+                    detail=f"Only {TOOL_CALL_FAMILY} support tool messages",
                 )
         if body.tools and body.stream:
             is_vllm = await model.is_vllm_backend()

xinference/client/handlers.py CHANGED Viewed

@@ -13,3 +13,6 @@ from .restful.restful_client import (  # noqa: F401
 from .restful.restful_client import (  # noqa: F401
     RESTfulImageModelHandle as ImageModelHandle,
 )
+from .restful.restful_client import (  # noqa: F401
+    RESTfulVideoModelHandle as VideoModelHandle,
+)

xinference/core/chat_interface.py CHANGED Viewed

@@ -13,6 +13,7 @@
 # limitations under the License.
 import base64
+import html
 import logging
 import os
 from io import BytesIO
@@ -137,7 +138,11 @@ class GradioInterface:
                 if "content" not in delta:
                     continue
                 else:
-                    response_content += delta["content"]
+                    # some model like deepseek-r1-distill-qwen
+                    # will generate <think>...</think> ...
+                    # in gradio, no output will be rendered,
+                    # thus escape html tags in advance
+                    response_content += html.escape(delta["content"])
                     yield response_content
             yield response_content

xinference/core/model.py CHANGED Viewed

@@ -35,6 +35,7 @@ from typing import (
     List,
     Optional,
     Union,
+    no_type_check,
 )
 import sse_starlette.sse
@@ -302,6 +303,7 @@ class ModelActor(xo.StatelessActor, CancelMixin):
     def decrease_serve_count(self):
         self._serve_count -= 1
+    @no_type_check
     async def start_transfer_for_vllm(self, rank_addresses: List[str]):
         from ..model.llm.vllm.core import VLLMModel
         from ..model.llm.vllm.xavier.transfer import TransferActor

xinference/core/scheduler.py CHANGED Viewed

@@ -269,16 +269,13 @@ class InferenceRequest:
         )
-def _get_valid_batch_kv_cache(data, skipped_indexes: Set[int]):
-    from transformers.cache_utils import DynamicCache
-    cache = DynamicCache.from_legacy_cache(data)
+def _get_valid_batch_kv_cache(cache, skipped_indexes: Set[int]):
     batch_size = cache.key_cache[0].shape[0]
     batch_slices = [num for num in range(batch_size) if num not in skipped_indexes]
     for idx in range(len(cache)):
-        cache.key_cache[idx] = cache.key_cache[idx][batch_slices, ::]
-        cache.value_cache[idx] = cache.value_cache[idx][batch_slices, ::]
-    return cache.to_legacy_cache()
+        cache.key_cache[idx] = cache.key_cache[idx][batch_slices, ::].contiguous()
+        cache.value_cache[idx] = cache.value_cache[idx][batch_slices, ::].contiguous()
+    return cache
 class SchedulerActor(xo.StatelessActor):

xinference/core/supervisor.py CHANGED Viewed

@@ -268,8 +268,12 @@ class SupervisorActor(xo.StatelessActor):
             )
         from ..model.llm.vllm.xavier.block_tracker import VLLMBlockTracker
+        from ..model.llm.vllm.xavier.collective_manager import CollectiveManager
-        self._block_tracker: Optional[xo.ActorRefType[VLLMBlockTracker]] = None
+        self._block_tracker_mapping: Dict[str, xo.ActorRefType[VLLMBlockTracker]] = {}
+        self._collective_manager_mapping: Dict[
+            str, xo.ActorRefType[CollectiveManager]
+        ] = {}
     @typing.no_type_check
     async def get_cluster_device_info(self, detailed: bool = False) -> List:
@@ -960,26 +964,40 @@ class SupervisorActor(xo.StatelessActor):
         ]:
             raise ValueError("Tensorizer is not supported for %s." % model_name)
+        if model_uid is None:
+            model_uid = self._gen_model_uid(model_name)
+        # Xavier-related
         enable_xavier: bool = (
             bool(kwargs.pop("enable_xavier", False))
             and model_engine is not None
             and model_engine.lower() == "vllm"
         )
+        store_address = None
+        store_port = None
+        world_size = None
         if enable_xavier:
             if replica <= 1:
                 logger.warning(f"Enabling xavier when `replica<=1` is meaningless.")
                 enable_xavier = False
             else:
                 from ..model.llm.vllm.xavier.block_tracker import VLLMBlockTracker
+                from ..model.llm.vllm.xavier.collective_manager import CollectiveManager
-                self._block_tracker = await xo.create_actor(
+                self._block_tracker_mapping[model_uid] = await xo.create_actor(
                     VLLMBlockTracker,
                     address=self.address,
-                    uid=VLLMBlockTracker.default_uid(),
+                    uid=f"{VLLMBlockTracker.default_uid()}-{model_uid}",
                 )
-        if model_uid is None:
-            model_uid = self._gen_model_uid(model_name)
+                world_size = replica + 1
+                logger.info(f"Going to start xavier with world size: {world_size}")
+                self._collective_manager_mapping[model_uid] = await xo.create_actor(
+                    CollectiveManager,
+                    address=self.address,
+                    uid=f"{CollectiveManager.default_uid()}-{model_uid}",
+                    model_uid=model_uid,
+                )
+                logger.info(f"Start collective manager for {model_uid} done.")
         model_size = str(model_size_in_billions) if model_size_in_billions else ""
         logger.debug(
@@ -988,13 +1006,38 @@ class SupervisorActor(xo.StatelessActor):
             f"kwargs: {kwargs}"
         )
-        async def _launch_one_model(
-            worker_ref, _replica_model_uid, rank: int, store_port: int
-        ):
+        async def _launch_one_model(worker_ref, _replica_model_uid, rank: int):
             if _replica_model_uid in self._replica_model_uid_to_worker:
                 raise ValueError(
                     f"Model is already in the model list, uid: {_replica_model_uid}"
                 )
+            nonlocal store_address
+            nonlocal store_port
+            xavier_config = (
+                {
+                    "block_tracker_uid": self._block_tracker_mapping[model_uid].uid,
+                    "block_tracker_address": self._block_tracker_mapping[
+                        model_uid
+                    ].address,
+                    "rank": rank,
+                    "world_size": world_size,
+                    "store_address": store_address,
+                    "store_port": store_port,
+                }
+                if enable_xavier
+                else None
+            )
+            if enable_xavier and rank == 0:
+                rank0_address, _port = await worker_ref.launch_rank0_model(
+                    _replica_model_uid, xavier_config
+                )
+                self._replica_model_uid_to_worker[_replica_model_uid] = worker_ref
+                store_address = rank0_address.split(":")[0]
+                store_port = _port
+                return rank0_address
             replica_gpu_idx = assign_replica_gpu(_replica_model_uid, replica, gpu_idx)
             nonlocal model_type
@@ -1014,17 +1057,7 @@ class SupervisorActor(xo.StatelessActor):
                 gpu_idx=replica_gpu_idx,
                 download_hub=download_hub,
                 model_path=model_path,
-                xavier_config={
-                    "block_tracker_address": self._block_tracker.address
-                    if self._block_tracker is not None
-                    else None,
-                    "rank": rank,
-                    "world_size": replica,
-                    "store_address": self.address.split(":")[0],
-                    "store_port": store_port,
-                }
-                if enable_xavier
-                else None,
+                xavier_config=xavier_config,
                 **kwargs,
             )
             self._replica_model_uid_to_worker[_replica_model_uid] = worker_ref
@@ -1032,10 +1065,9 @@ class SupervisorActor(xo.StatelessActor):
         async def _launch_model():
             try:
-                store_port = xo.utils.get_next_port()
                 worker_refs = []
                 rank_addresses = []
-                for rank, rep_model_uid in enumerate(
+                for _idx, rep_model_uid in enumerate(
                     iter_replica_model_uid(model_uid, replica)
                 ):
                     worker_ref = (
@@ -1043,8 +1075,18 @@ class SupervisorActor(xo.StatelessActor):
                         if target_ip_worker_ref is not None
                         else await self._choose_worker()
                     )
+                    if enable_xavier and _idx == 0:
+                        """
+                        Start the rank 0 model actor on the worker that holds the rank 1 replica,
+                        solely for constructing the collective communication world.
+                        """
+                        _uid = model_uid + "-rank0"
+                        rank0_address = await _launch_one_model(worker_ref, _uid, 0)
+                        worker_refs.append((worker_ref, _uid))
+                        rank_addresses.append(rank0_address)
                     subpool_address = await _launch_one_model(
-                        worker_ref, rep_model_uid, rank, store_port
+                        worker_ref, rep_model_uid, _idx + 1
                     )
                     worker_refs.append((worker_ref, rep_model_uid))
                     rank_addresses.append(subpool_address)
@@ -1054,6 +1096,7 @@ class SupervisorActor(xo.StatelessActor):
                 # because the transfer actor needs all the rank addresses used for collective communication
                 if enable_xavier:
                     logger.debug(f"Init transfer component for xavier...")
+                    collective_manager_ref = self._collective_manager_mapping[model_uid]
                     tasks = []
                     for worker_ref, rep_model_uid in worker_refs:
                         tasks.append(
@@ -1064,6 +1107,13 @@ class SupervisorActor(xo.StatelessActor):
                     # Here you must use asyncio.gather, not a for loop,
                     # or you will get stuck.
                     await asyncio.gather(*tasks)
+                    # init collective_manager
+                    for idx, addr in enumerate(rank_addresses):
+                        await collective_manager_ref.register_rank(
+                            idx, addr, update=False
+                        )
                     logger.debug(f"Init transfer component for xavier done.")
             except Exception:
                 # terminate_model will remove the replica info.
@@ -1193,6 +1243,38 @@ class SupervisorActor(xo.StatelessActor):
                     raise
         self._model_uid_to_replica_info.pop(model_uid, None)
+        # clear for xavier
+        rank0_uid = model_uid + "-rank0"
+        if rank0_uid in self._replica_model_uid_to_worker:
+            await _terminate_one_model(rank0_uid)
+        collective_manager_ref = self._collective_manager_mapping.pop(model_uid, None)
+        if collective_manager_ref is not None:
+            try:
+                await xo.destroy_actor(collective_manager_ref)
+            except Exception as e:
+                logger.debug(
+                    "Destroy collective_manager_ref failed, model uid: %s, error: %s",
+                    model_uid,
+                    e,
+                )
+            finally:
+                logger.debug(
+                    f"Destroy collective_manager_ref done. model uid: {model_uid}"
+                )
+        block_tracker_ref = self._block_tracker_mapping.pop(model_uid, None)
+        if block_tracker_ref is not None:
+            try:
+                await xo.destroy_actor(block_tracker_ref)
+            except Exception as e:
+                logger.debug(
+                    "Destroy block_tracker_ref failed, model uid: %s, error: %s",
+                    model_uid,
+                    e,
+                )
+            finally:
+                logger.debug(f"Destroy block_tracker_ref done. model uid: {model_uid}")
     @log_async(logger=logger)
     async def get_model(self, model_uid: str) -> xo.ActorRefType["ModelActor"]:
         replica_info = self._model_uid_to_replica_info.get(model_uid, None)
@@ -1448,3 +1530,12 @@ class SupervisorActor(xo.StatelessActor):
     async def get_progress(self, request_id: str) -> float:
         return await self._progress_tracker.get_progress(request_id)
+    async def call_collective_manager(
+        self, model_uid: str, func_name: str, *args, **kwargs
+    ):
+        """
+        Used by worker.
+        """
+        collective_manager_ref = self._collective_manager_mapping[model_uid]
+        await getattr(collective_manager_ref, func_name)(*args, **kwargs)

xinference/core/worker.py CHANGED Viewed

@@ -24,7 +24,7 @@ import time
 from collections import defaultdict
 from dataclasses import dataclass
 from logging import getLogger
-from typing import Any, Dict, List, Literal, Optional, Set, Tuple, Union
+from typing import Any, Dict, List, Literal, Optional, Set, Tuple, Union, no_type_check
 import xoscar as xo
 from async_timeout import timeout
@@ -184,12 +184,12 @@ class WorkerActor(xo.StatelessActor):
                             self._model_uid_to_recover_count[model_uid] = (
                                 recover_count - 1
                             )
-                            await self.launch_builtin_model(**launch_args)
+                            await self.recover_model(launch_args)
                         else:
                             logger.warning("Stop recreating model actor.")
                     else:
                         logger.warning("Recreating model actor %s ...", model_uid)
-                        await self.launch_builtin_model(**launch_args)
+                        await self.recover_model(launch_args)
                 break
     @classmethod
@@ -940,7 +940,11 @@ class WorkerActor(xo.StatelessActor):
         # Terminate model while its launching is not allow
         if model_uid in self._model_uid_launching_guard:
             raise ValueError(f"{model_uid} is launching")
-        origin_uid, _ = parse_replica_model_uid(model_uid)
+        # In special cases, if the suffix is `-rank0`, this is the Xavier's rank 0 model actor.
+        if model_uid.endswith("-rank0"):
+            origin_uid = model_uid.removesuffix("-rank0")
+        else:
+            origin_uid, _ = parse_replica_model_uid(model_uid)
         try:
             _ = await self.get_supervisor_ref()
             if self._event_collector_ref is not None:
@@ -1173,3 +1177,65 @@ class WorkerActor(xo.StatelessActor):
     ):
         model_ref = self._model_uid_to_model[rep_model_uid]
         await model_ref.start_transfer_for_vllm(rank_addresses)
+    @log_async(logger=logger, level=logging.INFO)
+    async def launch_rank0_model(
+        self, rep_model_uid: str, xavier_config: Dict[str, Any]
+    ) -> Tuple[str, int]:
+        from ..model.llm.vllm.xavier.collective_manager import Rank0ModelActor
+        if os.name != "nt" and platform.system() != "Darwin":
+            # Linux
+            start_method = "forkserver"
+        else:
+            # Windows and macOS
+            start_method = "spawn"
+        subpool_address = await self._main_pool.append_sub_pool(
+            start_method=start_method
+        )
+        store_address = subpool_address.split(":")[0]
+        # Note that `store_port` needs to be generated on the worker,
+        # as the TCP store is on rank 0, not on the supervisor.
+        store_port = xo.utils.get_next_port()
+        self._model_uid_launching_guard[rep_model_uid] = True
+        try:
+            try:
+                xavier_config["rank_address"] = subpool_address
+                xavier_config["store_address"] = store_address
+                xavier_config["store_port"] = store_port
+                model_ref = await xo.create_actor(
+                    Rank0ModelActor,
+                    address=subpool_address,
+                    uid=rep_model_uid,
+                    xavier_config=xavier_config,
+                )
+            except:
+                await self._main_pool.remove_sub_pool(subpool_address)
+                raise
+            self._model_uid_to_model[rep_model_uid] = model_ref
+            self._model_uid_to_addr[rep_model_uid] = subpool_address
+        finally:
+            del self._model_uid_launching_guard[rep_model_uid]
+        return subpool_address, store_port
+    @no_type_check
+    async def recover_model(self, launch_args: Dict[str, Any]):
+        rep_model_uid = launch_args.get("model_uid")
+        origin_uid, _ = parse_replica_model_uid(rep_model_uid)
+        xavier_config: Optional[Dict[str, Any]] = launch_args.get("xavier_config", None)
+        is_xavier: bool = xavier_config is not None
+        supervisor_ref = await self.get_supervisor_ref(add_worker=False)
+        if is_xavier:
+            rank = xavier_config.get("rank")
+            await supervisor_ref.call_collective_manager(
+                origin_uid, "unregister_rank", rank
+            )
+        subpool_address = await self.launch_builtin_model(**launch_args)
+        if is_xavier:
+            model_ref = self._model_uid_to_model[rep_model_uid]
+            await model_ref.start_transfer_for_vllm([])
+            rank = xavier_config.get("rank")
+            await supervisor_ref.call_collective_manager(
+                origin_uid, "register_rank", rank, subpool_address, update=True
+            )

xinference/deploy/local.py CHANGED Viewed

@@ -41,7 +41,8 @@ async def _start_local_cluster(
 ):
     from .utils import create_worker_actor_pool
-    logging.config.dictConfig(logging_conf)  # type: ignore
+    if logging_conf:
+        logging.config.dictConfig(logging_conf)  # type: ignore
     pool = None
     try:

xinference/model/audio/core.py CHANGED Viewed

@@ -25,6 +25,8 @@ from .f5tts import F5TTSModel
 from .f5tts_mlx import F5TTSMLXModel
 from .fish_speech import FishSpeechModel
 from .funasr import FunASRModel
+from .kokoro import KokoroModel
+from .melotts import MeloTTSModel
 from .whisper import WhisperModel
 from .whisper_mlx import WhisperMLXModel
@@ -48,6 +50,7 @@ class AudioModelFamilyV1(CacheableModelSpec):
     model_id: str
     model_revision: Optional[str]
     multilingual: bool
+    language: Optional[str]
     model_ability: Optional[str]
     default_model_config: Optional[Dict[str, Any]]
     default_transcription_config: Optional[Dict[str, Any]]
@@ -173,6 +176,8 @@ def create_audio_model_instance(
         FishSpeechModel,
         F5TTSModel,
         F5TTSMLXModel,
+        MeloTTSModel,
+        KokoroModel,
     ],
     AudioModelDescription,
 ]:
@@ -188,6 +193,8 @@ def create_audio_model_instance(
         FishSpeechModel,
         F5TTSModel,
         F5TTSMLXModel,
+        MeloTTSModel,
+        KokoroModel,
     ]
     if model_spec.model_family == "whisper":
         if not model_spec.engine:
@@ -206,6 +213,10 @@ def create_audio_model_instance(
         model = F5TTSModel(model_uid, model_path, model_spec, **kwargs)
     elif model_spec.model_family == "F5-TTS-MLX":
         model = F5TTSMLXModel(model_uid, model_path, model_spec, **kwargs)
+    elif model_spec.model_family == "MeloTTS":
+        model = MeloTTSModel(model_uid, model_path, model_spec, **kwargs)
+    elif model_spec.model_family == "Kokoro":
+        model = KokoroModel(model_uid, model_path, model_spec, **kwargs)
     else:
         raise Exception(f"Unsupported audio model family: {model_spec.model_family}")
     model_description = AudioModelDescription(

xinference/model/audio/cosyvoice.py CHANGED Viewed

@@ -49,8 +49,11 @@ class CosyVoiceModel:
         import os
         import sys
+        import torch
         # The yaml config loaded from model has hard-coded the import paths. please refer to: load_hyperpyyaml
-        sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../../thirdparty"))
+        thirdparty_dir = os.path.join(os.path.dirname(__file__), "../../thirdparty")
+        sys.path.insert(0, thirdparty_dir)
         if "CosyVoice2" in self._model_spec.model_name:
             from cosyvoice.cli.cosyvoice import CosyVoice2 as CosyVoice
@@ -61,9 +64,17 @@ class CosyVoiceModel:
             self._is_cosyvoice2 = False
-        self._model = CosyVoice(
-            self._model_path, load_jit=self._kwargs.get("load_jit", False)
+        # Unify this configuration name as 'compile' to be compatible with the name 'load_jit'.
+        load_jit = self._kwargs.get("load_jit", False) or self._kwargs.get(
+            "compile", False
         )
+        logger.info("Loading CosyVoice model, compile=%s...", load_jit)
+        self._model = CosyVoice(self._model_path, load_jit=load_jit)
+        if self._is_cosyvoice2:
+            spk2info_file = os.path.join(thirdparty_dir, "cosyvoice/bin/spk2info.pt")
+            self._model.frontend.spk2info = torch.load(
+                spk2info_file, map_location=self._device
+            )
     def _speech_handle(
         self,
@@ -101,10 +112,10 @@ class CosyVoiceModel:
                     input, prompt_speech_16k, stream=stream
                 )
         else:
-            assert not self._is_cosyvoice2
             available_speakers = self._model.list_avaliable_spks()
             if not voice:
                 voice = available_speakers[0]
+                logger.info("Auto select speaker: %s", voice)
             else:
                 assert (
                     voice in available_speakers
@@ -184,7 +195,7 @@ class CosyVoiceModel:
                 prompt_text is None
             ), "CosyVoice Instruct model does not support prompt_text"
         elif self._is_cosyvoice2:
-            assert prompt_speech is not None, "CosyVoice2 requires prompt_speech"
+            pass
         else:
             # inference_zero_shot
             # inference_cross_lingual

xinference 1.2.0__py3-none-any.whl → 1.2.2__py3-none-any.whl

Potentially problematic release.

xinference 1.2.0py3-none-any.whl → 1.2.2py3-none-any.whl