PyPI - xinference - Versions diffs - 1.2.0__py3-none-any.whl → 1.2.2__py3-none-any.whl - Mend

xinference 1.2.0py3-none-any.whl → 1.2.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (124) hide show

xinference/_version.py +3 -3
xinference/api/restful_api.py +4 -7
xinference/client/handlers.py +3 -0
xinference/core/chat_interface.py +6 -1
xinference/core/model.py +2 -0
xinference/core/scheduler.py +4 -7
xinference/core/supervisor.py +114 -23
xinference/core/worker.py +70 -4
xinference/deploy/local.py +2 -1
xinference/model/audio/core.py +11 -0
xinference/model/audio/cosyvoice.py +16 -5
xinference/model/audio/kokoro.py +139 -0
xinference/model/audio/melotts.py +110 -0
xinference/model/audio/model_spec.json +80 -0
xinference/model/audio/model_spec_modelscope.json +18 -0
xinference/model/audio/whisper.py +35 -10
xinference/model/llm/llama_cpp/core.py +21 -14
xinference/model/llm/llm_family.json +527 -1
xinference/model/llm/llm_family.py +4 -1
xinference/model/llm/llm_family_modelscope.json +495 -3
xinference/model/llm/memory.py +1 -1
xinference/model/llm/mlx/core.py +24 -6
xinference/model/llm/transformers/core.py +9 -1
xinference/model/llm/transformers/qwen2_audio.py +3 -1
xinference/model/llm/transformers/qwen2_vl.py +20 -3
xinference/model/llm/transformers/utils.py +22 -11
xinference/model/llm/utils.py +115 -1
xinference/model/llm/vllm/core.py +14 -4
xinference/model/llm/vllm/xavier/block.py +3 -4
xinference/model/llm/vllm/xavier/block_tracker.py +71 -58
xinference/model/llm/vllm/xavier/collective.py +74 -0
xinference/model/llm/vllm/xavier/collective_manager.py +147 -0
xinference/model/llm/vllm/xavier/executor.py +18 -16
xinference/model/llm/vllm/xavier/scheduler.py +79 -63
xinference/model/llm/vllm/xavier/test/test_xavier.py +60 -35
xinference/model/llm/vllm/xavier/transfer.py +53 -32
xinference/thirdparty/cosyvoice/bin/spk2info.pt +0 -0
xinference/thirdparty/melo/__init__.py +0 -0
xinference/thirdparty/melo/api.py +135 -0
xinference/thirdparty/melo/app.py +61 -0
xinference/thirdparty/melo/attentions.py +459 -0
xinference/thirdparty/melo/commons.py +160 -0
xinference/thirdparty/melo/configs/config.json +94 -0
xinference/thirdparty/melo/data/example/metadata.list +20 -0
xinference/thirdparty/melo/data_utils.py +413 -0
xinference/thirdparty/melo/download_utils.py +67 -0
xinference/thirdparty/melo/infer.py +25 -0
xinference/thirdparty/melo/init_downloads.py +14 -0
xinference/thirdparty/melo/losses.py +58 -0
xinference/thirdparty/melo/main.py +36 -0
xinference/thirdparty/melo/mel_processing.py +174 -0
xinference/thirdparty/melo/models.py +1030 -0
xinference/thirdparty/melo/modules.py +598 -0
xinference/thirdparty/melo/monotonic_align/__init__.py +16 -0
xinference/thirdparty/melo/monotonic_align/core.py +46 -0
xinference/thirdparty/melo/preprocess_text.py +135 -0
xinference/thirdparty/melo/split_utils.py +174 -0
xinference/thirdparty/melo/text/__init__.py +35 -0
xinference/thirdparty/melo/text/chinese.py +199 -0
xinference/thirdparty/melo/text/chinese_bert.py +107 -0
xinference/thirdparty/melo/text/chinese_mix.py +253 -0
xinference/thirdparty/melo/text/cleaner.py +36 -0
xinference/thirdparty/melo/text/cleaner_multiling.py +110 -0
xinference/thirdparty/melo/text/cmudict.rep +129530 -0
xinference/thirdparty/melo/text/cmudict_cache.pickle +0 -0
xinference/thirdparty/melo/text/english.py +284 -0
xinference/thirdparty/melo/text/english_bert.py +39 -0
xinference/thirdparty/melo/text/english_utils/__init__.py +0 -0
xinference/thirdparty/melo/text/english_utils/abbreviations.py +35 -0
xinference/thirdparty/melo/text/english_utils/number_norm.py +97 -0
xinference/thirdparty/melo/text/english_utils/time_norm.py +47 -0
xinference/thirdparty/melo/text/es_phonemizer/__init__.py +0 -0
xinference/thirdparty/melo/text/es_phonemizer/base.py +140 -0
xinference/thirdparty/melo/text/es_phonemizer/cleaner.py +109 -0
xinference/thirdparty/melo/text/es_phonemizer/es_symbols.json +79 -0
xinference/thirdparty/melo/text/es_phonemizer/es_symbols.txt +1 -0
xinference/thirdparty/melo/text/es_phonemizer/es_symbols_v2.json +83 -0
xinference/thirdparty/melo/text/es_phonemizer/es_to_ipa.py +12 -0
xinference/thirdparty/melo/text/es_phonemizer/example_ipa.txt +400 -0
xinference/thirdparty/melo/text/es_phonemizer/gruut_wrapper.py +253 -0
xinference/thirdparty/melo/text/es_phonemizer/punctuation.py +174 -0
xinference/thirdparty/melo/text/es_phonemizer/spanish_symbols.txt +1 -0
xinference/thirdparty/melo/text/es_phonemizer/test.ipynb +124 -0
xinference/thirdparty/melo/text/fr_phonemizer/__init__.py +0 -0
xinference/thirdparty/melo/text/fr_phonemizer/base.py +140 -0
xinference/thirdparty/melo/text/fr_phonemizer/cleaner.py +122 -0
xinference/thirdparty/melo/text/fr_phonemizer/en_symbols.json +78 -0
xinference/thirdparty/melo/text/fr_phonemizer/example_ipa.txt +1 -0
xinference/thirdparty/melo/text/fr_phonemizer/fr_symbols.json +89 -0
xinference/thirdparty/melo/text/fr_phonemizer/fr_to_ipa.py +30 -0
xinference/thirdparty/melo/text/fr_phonemizer/french_abbreviations.py +48 -0
xinference/thirdparty/melo/text/fr_phonemizer/french_symbols.txt +1 -0
xinference/thirdparty/melo/text/fr_phonemizer/gruut_wrapper.py +258 -0
xinference/thirdparty/melo/text/fr_phonemizer/punctuation.py +172 -0
xinference/thirdparty/melo/text/french.py +94 -0
xinference/thirdparty/melo/text/french_bert.py +39 -0
xinference/thirdparty/melo/text/japanese.py +647 -0
xinference/thirdparty/melo/text/japanese_bert.py +49 -0
xinference/thirdparty/melo/text/ko_dictionary.py +44 -0
xinference/thirdparty/melo/text/korean.py +192 -0
xinference/thirdparty/melo/text/opencpop-strict.txt +429 -0
xinference/thirdparty/melo/text/spanish.py +122 -0
xinference/thirdparty/melo/text/spanish_bert.py +39 -0
xinference/thirdparty/melo/text/symbols.py +290 -0
xinference/thirdparty/melo/text/tone_sandhi.py +769 -0
xinference/thirdparty/melo/train.py +635 -0
xinference/thirdparty/melo/train.sh +19 -0
xinference/thirdparty/melo/transforms.py +209 -0
xinference/thirdparty/melo/utils.py +424 -0
xinference/types.py +2 -0
xinference/web/ui/build/asset-manifest.json +3 -3
xinference/web/ui/build/index.html +1 -1
xinference/web/ui/build/static/js/{main.1eb206d1.js → main.b0936c54.js} +3 -3
xinference/web/ui/build/static/js/main.b0936c54.js.map +1 -0
xinference/web/ui/node_modules/.cache/babel-loader/a3ff866acddf34917a7ee399e0e571a4dfd8ba66d5057db885f243e16a6eb17d.json +1 -0
{xinference-1.2.0.dist-info → xinference-1.2.2.dist-info}/METADATA +37 -27
{xinference-1.2.0.dist-info → xinference-1.2.2.dist-info}/RECORD +122 -45
xinference/web/ui/build/static/js/main.1eb206d1.js.map +0 -1
xinference/web/ui/node_modules/.cache/babel-loader/2213d49de260e1f67c888081b18f120f5225462b829ae57c9e05a05cec83689d.json +0 -1
/xinference/web/ui/build/static/js/{main.1eb206d1.js.LICENSE.txt → main.b0936c54.js.LICENSE.txt} +0 -0
{xinference-1.2.0.dist-info → xinference-1.2.2.dist-info}/LICENSE +0 -0
{xinference-1.2.0.dist-info → xinference-1.2.2.dist-info}/WHEEL +0 -0
{xinference-1.2.0.dist-info → xinference-1.2.2.dist-info}/entry_points.txt +0 -0
{xinference-1.2.0.dist-info → xinference-1.2.2.dist-info}/top_level.txt +0 -0

xinference/model/llm/vllm/xavier/transfer.py CHANGED Viewed

@@ -23,6 +23,8 @@ from vllm.core.scheduler import Scheduler
 from vllm.utils import TORCH_DTYPE_TO_NUMPY_DTYPE, Device
 from vllm.worker.cache_engine import CacheEngine
+from .collective import CollectiveRank
 logger = logging.getLogger(__name__)
@@ -89,7 +91,7 @@ class BufferTransferMixin:
         return TypeMappingGloo[TORCH_DTYPE_TO_NUMPY_DTYPE[input_dtype]]
-class TransferActor(xo.StatelessActor, BufferTransferMixin):
+class TransferActor(xo.StatelessActor, BufferTransferMixin, CollectiveRank):
     @classmethod
     def default_uid(cls):
         return f"vllm-transfer-actor"
@@ -104,38 +106,21 @@ class TransferActor(xo.StatelessActor, BufferTransferMixin):
         world_addresses: List[str],
     ):
         super().__init__()
-        self._rank = rank
-        self._world_size = world_size
-        self._store_address = store_address
-        self._rank_address = rank_address
-        self._store_port = store_port
-        self._world_addresses = world_addresses
-        self._context = None
+        CollectiveRank.__init__(
+            self,
+            rank,
+            world_size,
+            rank_address,
+            store_address,
+            store_port,
+            world_addresses,
+        )
         self._cache_engine: Optional[List[CacheEngine]] = None
         self._scheduler: Optional[List[Scheduler]] = None
         self._swap_stream = torch.cuda.Stream()
     async def __post_create__(self):
-        from xoscar.collective import xoscar_pygloo as xp
-        context = xp.rendezvous.Context(self._rank, self._world_size)
-        attr = xp.transport.tcp.attr(self._rank_address.split(":")[0])
-        dev = xp.transport.tcp.CreateDevice(attr)
-        opt = xp.rendezvous.TCPStoreOptions()
-        opt.port = self._store_port
-        opt.numWorkers = self._world_size
-        opt.isServer = self._rank == 0
-        store = xp.rendezvous.TCPStore(self._store_address, opt)
-        store = xp.rendezvous.PrefixStore(str(self._world_size), store)
-        context.connectFullMesh(store, dev)
-        self._context = context
-        logger.debug(
-            f"Rank {self._rank} arrives successfully, world addresses: {self._world_addresses}"
-        )
+        self.init_rank()
     def setup(
         self,
@@ -153,6 +138,9 @@ class TransferActor(xo.StatelessActor, BufferTransferMixin):
             num_buffer, buffer_shape, buffer_dtype, buffer_device, pin_memory
         )
+    async def __pre_destroy__(self):
+        self._context.closeConnections()
     def _get_cache_engine(self, virtual_engine: int) -> CacheEngine:
         return self._cache_engine[virtual_engine]  # type: ignore
@@ -281,18 +269,51 @@ class TransferActor(xo.StatelessActor, BufferTransferMixin):
             self.free_buffer_index(cpu_buf_index)
     async def recv(
-        self, virtual_engine: int, from_address: str, src_to_dst: Dict[int, int]
+        self, virtual_engine: int, from_rank: int, src_to_dst: Dict[int, int]
     ):
         """
         This is the external entry point for the call.
         The transfer logic is as follows:
         the receiver requests the sender to send the data directly to itself in a point-to-point manner.
         """
-        rank = self._world_addresses.index(from_address)
+        from_address = self._world_addresses[from_rank]
         sender_ref = await xo.actor_ref(
-            address=from_address, uid=f"{TransferActor.default_uid()}-{rank}"
+            address=from_address, uid=f"{TransferActor.default_uid()}-{from_rank}"
         )
         await asyncio.gather(
             sender_ref.do_send(virtual_engine, self._rank, src_to_dst),
-            self.do_recv(virtual_engine, rank, src_to_dst),
+            self.do_recv(virtual_engine, from_rank, src_to_dst),
         )
+class Rank0TransferActor(xo.StatelessActor, CollectiveRank):
+    """
+    The Rank 0 transfer actor is only used for constructing the collective communication world,
+    so it only needs to inherit the `CollectiveWorld` class.
+    """
+    @classmethod
+    def default_uid(cls):
+        return f"vllm-transfer-actor"
+    def __init__(
+        self,
+        rank: int,
+        world_size: int,
+        rank_address: str,
+        store_address: str,
+        store_port: int,
+        world_addresses: List[str],
+    ):
+        CollectiveRank.__init__(
+            self,
+            rank,
+            world_size,
+            rank_address,
+            store_address,
+            store_port,
+            world_addresses,
+        )
+    async def __post_create__(self):
+        self.init_rank()

xinference/thirdparty/cosyvoice/bin/spk2info.pt ADDED Viewed

Binary file

xinference/thirdparty/melo/__init__.py ADDED Viewed

File without changes

xinference/thirdparty/melo/api.py ADDED Viewed

@@ -0,0 +1,135 @@
+import os
+import re
+import json
+import torch
+import librosa
+import soundfile
+import torchaudio
+import numpy as np
+import torch.nn as nn
+from tqdm import tqdm
+import torch
+from . import utils
+from . import commons
+from .models import SynthesizerTrn
+from .split_utils import split_sentence
+from .mel_processing import spectrogram_torch, spectrogram_torch_conv
+from .download_utils import load_or_download_config, load_or_download_model
+class TTS(nn.Module):
+    def __init__(self,
+                language,
+                device='auto',
+                use_hf=True,
+                config_path=None,
+                ckpt_path=None):
+        super().__init__()
+        if device == 'auto':
+            device = 'cpu'
+            if torch.cuda.is_available(): device = 'cuda'
+            if torch.backends.mps.is_available(): device = 'mps'
+        if 'cuda' in device:
+            assert torch.cuda.is_available()
+        # config_path =
+        hps = load_or_download_config(language, use_hf=use_hf, config_path=config_path)
+        num_languages = hps.num_languages
+        num_tones = hps.num_tones
+        symbols = hps.symbols
+        model = SynthesizerTrn(
+            len(symbols),
+            hps.data.filter_length // 2 + 1,
+            hps.train.segment_size // hps.data.hop_length,
+            n_speakers=hps.data.n_speakers,
+            num_tones=num_tones,
+            num_languages=num_languages,
+            **hps.model,
+        ).to(device)
+        model.eval()
+        self.model = model
+        self.symbol_to_id = {s: i for i, s in enumerate(symbols)}
+        self.hps = hps
+        self.device = device
+        # load state_dict
+        checkpoint_dict = load_or_download_model(language, device, use_hf=use_hf, ckpt_path=ckpt_path)
+        self.model.load_state_dict(checkpoint_dict['model'], strict=True)
+        language = language.split('_')[0]
+        self.language = 'ZH_MIX_EN' if language == 'ZH' else language # we support a ZH_MIX_EN model
+    @staticmethod
+    def audio_numpy_concat(segment_data_list, sr, speed=1.):
+        audio_segments = []
+        for segment_data in segment_data_list:
+            audio_segments += segment_data.reshape(-1).tolist()
+            audio_segments += [0] * int((sr * 0.05) / speed)
+        audio_segments = np.array(audio_segments).astype(np.float32)
+        return audio_segments
+    @staticmethod
+    def split_sentences_into_pieces(text, language, quiet=False):
+        texts = split_sentence(text, language_str=language)
+        if not quiet:
+            print(" > Text split to sentences.")
+            print('\n'.join(texts))
+            print(" > ===========================")
+        return texts
+    def tts_to_file(self, text, speaker_id, output_path=None, sdp_ratio=0.2, noise_scale=0.6, noise_scale_w=0.8, speed=1.0, pbar=None, format=None, position=None, quiet=False,):
+        language = self.language
+        texts = self.split_sentences_into_pieces(text, language, quiet)
+        audio_list = []
+        if pbar:
+            tx = pbar(texts)
+        else:
+            if position:
+                tx = tqdm(texts, position=position)
+            elif quiet:
+                tx = texts
+            else:
+                tx = tqdm(texts)
+        for t in tx:
+            if language in ['EN', 'ZH_MIX_EN']:
+                t = re.sub(r'([a-z])([A-Z])', r'\1 \2', t)
+            device = self.device
+            bert, ja_bert, phones, tones, lang_ids = utils.get_text_for_tts_infer(t, language, self.hps, device, self.symbol_to_id)
+            with torch.no_grad():
+                x_tst = phones.to(device).unsqueeze(0)
+                tones = tones.to(device).unsqueeze(0)
+                lang_ids = lang_ids.to(device).unsqueeze(0)
+                bert = bert.to(device).unsqueeze(0)
+                ja_bert = ja_bert.to(device).unsqueeze(0)
+                x_tst_lengths = torch.LongTensor([phones.size(0)]).to(device)
+                del phones
+                speakers = torch.LongTensor([speaker_id]).to(device)
+                audio = self.model.infer(
+                        x_tst,
+                        x_tst_lengths,
+                        speakers,
+                        tones,
+                        lang_ids,
+                        bert,
+                        ja_bert,
+                        sdp_ratio=sdp_ratio,
+                        noise_scale=noise_scale,
+                        noise_scale_w=noise_scale_w,
+                        length_scale=1. / speed,
+                    )[0][0, 0].data.cpu().float().numpy()
+                del x_tst, tones, lang_ids, bert, ja_bert, x_tst_lengths, speakers
+                #
+            audio_list.append(audio)
+        torch.cuda.empty_cache()
+        audio = self.audio_numpy_concat(audio_list, sr=self.hps.data.sampling_rate, speed=speed)
+        if output_path is None:
+            return audio
+        else:
+            if format:
+                soundfile.write(output_path, audio, self.hps.data.sampling_rate, format=format)
+            else:
+                soundfile.write(output_path, audio, self.hps.data.sampling_rate)

xinference/thirdparty/melo/app.py ADDED Viewed

@@ -0,0 +1,61 @@
+# WebUI by mrfakename <X @realmrfakename / HF @mrfakename>
+# Demo also available on HF Spaces: https://huggingface.co/spaces/mrfakename/MeloTTS
+import gradio as gr
+import os, torch, io
+# os.system('python -m unidic download')
+print("Make sure you've downloaded unidic (python -m unidic download) for this WebUI to work.")
+from melo.api import TTS
+speed = 1.0
+import tempfile
+import click
+device = 'auto'
+models = {
+    'EN': TTS(language='EN', device=device),
+    'ES': TTS(language='ES', device=device),
+    'FR': TTS(language='FR', device=device),
+    'ZH': TTS(language='ZH', device=device),
+    'JP': TTS(language='JP', device=device),
+    'KR': TTS(language='KR', device=device),
+}
+speaker_ids = models['EN'].hps.data.spk2id
+default_text_dict = {
+    'EN': 'The field of text-to-speech has seen rapid development recently.',
+    'ES': 'El campo de la conversión de texto a voz ha experimentado un rápido desarrollo recientemente.',
+    'FR': 'Le domaine de la synthèse vocale a connu un développement rapide récemment',
+    'ZH': 'text-to-speech 领域近年来发展迅速',
+    'JP': 'テキスト読み上げの分野は最近急速な発展を遂げています',
+    'KR': '최근 텍스트 음성 변환 분야가 급속도로 발전하고 있습니다.',
+}
+def synthesize(speaker, text, speed, language, progress=gr.Progress()):
+    bio = io.BytesIO()
+    models[language].tts_to_file(text, models[language].hps.data.spk2id[speaker], bio, speed=speed, pbar=progress.tqdm, format='wav')
+    return bio.getvalue()
+def load_speakers(language, text):
+    if text in list(default_text_dict.values()):
+        newtext = default_text_dict[language]
+    else:
+        newtext = text
+    return gr.update(value=list(models[language].hps.data.spk2id.keys())[0], choices=list(models[language].hps.data.spk2id.keys())), newtext
+with gr.Blocks() as demo:
+    gr.Markdown('# MeloTTS WebUI\n\nA WebUI for MeloTTS.')
+    with gr.Group():
+        speaker = gr.Dropdown(speaker_ids.keys(), interactive=True, value='EN-US', label='Speaker')
+        language = gr.Radio(['EN', 'ES', 'FR', 'ZH', 'JP', 'KR'], label='Language', value='EN')
+        speed = gr.Slider(label='Speed', minimum=0.1, maximum=10.0, value=1.0, interactive=True, step=0.1)
+        text = gr.Textbox(label="Text to speak", value=default_text_dict['EN'])
+        language.input(load_speakers, inputs=[language, text], outputs=[speaker, text])
+    btn = gr.Button('Synthesize', variant='primary')
+    aud = gr.Audio(interactive=False)
+    btn.click(synthesize, inputs=[speaker, text, speed, language], outputs=[aud])
+    gr.Markdown('WebUI by [mrfakename](https://twitter.com/realmrfakename).')
+@click.command()
+@click.option('--share', '-s', is_flag=True, show_default=True, default=False, help="Expose a publicly-accessible shared Gradio link usable by anyone with the link. Only share the link with people you trust.")
+@click.option('--host', '-h', default=None)
+@click.option('--port', '-p', type=int, default=None)
+def main(share, host, port):
+    demo.queue(api_open=False).launch(show_api=False, share=share, server_name=host, server_port=port)
+if __name__ == "__main__":
+    main()

xinference 1.2.0__py3-none-any.whl → 1.2.2__py3-none-any.whl

Potentially problematic release.

xinference 1.2.0py3-none-any.whl → 1.2.2py3-none-any.whl