PyPI - xinference - Versions diffs - 0.14.4.post1__py3-none-any.whl → 0.15.1__py3-none-any.whl - Mend

xinference 0.14.4.post1py3-none-any.whl → 0.15.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (194) hide show

xinference/model/llm/transformers/llama_2.py DELETED Viewed

@@ -1,108 +0,0 @@
-# Copyright 2022-2023 XProbe Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from typing import List, Optional
-from ....types import LoRA
-from ..llm_family import LLMFamilyV1, LLMSpecV1
-from .core import PytorchChatModel, PytorchModel, PytorchModelConfig
-class LlamaPytorchModel(PytorchModel):
-    def __init__(
-        self,
-        model_uid: str,
-        model_family: "LLMFamilyV1",
-        model_spec: "LLMSpecV1",
-        quantization: str,
-        model_path: str,
-        pytorch_model_config: Optional[PytorchModelConfig] = None,
-        peft_model: Optional[List[LoRA]] = None,
-    ):
-        super().__init__(
-            model_uid,
-            model_family,
-            model_spec,
-            quantization,
-            model_path,
-            pytorch_model_config=pytorch_model_config,
-            peft_model=peft_model,
-        )
-    def _load_model(self, **kwargs):
-        model, tokenizer = super()._load_model(**kwargs)
-        # Llama has no pad token by default
-        # https://github.com/huggingface/transformers/blob/07998ef39926b76d3f6667025535d0859eed61c3/docs/source/en/llm_tutorial.md?plain=1#L125
-        tokenizer.pad_token = tokenizer.eos_token
-        model.config.eos_token_id = tokenizer.eos_token_id
-        model.config.pad_token_id = tokenizer.pad_token_id
-        return model, tokenizer
-    @classmethod
-    def match(
-        cls, llm_family: "LLMFamilyV1", llm_spec: "LLMSpecV1", quantization: str
-    ) -> bool:
-        if llm_spec.model_format != "pytorch":
-            return False
-        model_family = llm_family.model_family or llm_family.model_name
-        if "llama-2" not in model_family:
-            return False
-        if "generate" not in llm_family.model_ability:
-            return False
-        return True
-class LlamaPytorchChatModel(PytorchChatModel):
-    def __init__(
-        self,
-        model_uid: str,
-        model_family: "LLMFamilyV1",
-        model_spec: "LLMSpecV1",
-        quantization: str,
-        model_path: str,
-        pytorch_model_config: Optional["PytorchModelConfig"] = None,
-        peft_model: Optional[List[LoRA]] = None,
-    ):
-        super().__init__(
-            model_uid,
-            model_family,
-            model_spec,
-            quantization,
-            model_path,
-            peft_model=peft_model,
-            pytorch_model_config=pytorch_model_config,
-        )
-        self._use_fast_tokenizer = False
-    def _load_model(self, **kwargs):
-        model, tokenizer = super()._load_model(**kwargs)
-        # Llama has no pad token by default
-        # https://github.com/huggingface/transformers/blob/07998ef39926b76d3f6667025535d0859eed61c3/docs/source/en/llm_tutorial.md?plain=1#L125
-        tokenizer.pad_token = tokenizer.eos_token
-        model.config.eos_token_id = tokenizer.eos_token_id
-        model.config.pad_token_id = tokenizer.pad_token_id
-        return model, tokenizer
-    @classmethod
-    def match(
-        cls, llm_family: "LLMFamilyV1", llm_spec: "LLMSpecV1", quantization: str
-    ) -> bool:
-        if llm_spec.model_format != "pytorch":
-            return False
-        model_family = llm_family.model_family or llm_family.model_name
-        if "llama-2" not in model_family:
-            return False
-        if "chat" not in llm_family.model_ability:
-            return False
-        return True

xinference/thirdparty/fish_speech/fish_speech/models/vqgan/lit_module.py DELETED Viewed

@@ -1,442 +0,0 @@
-import itertools
-import math
-from typing import Any, Callable
-import lightning as L
-import torch
-import torch.nn.functional as F
-# import wandb
-from lightning.pytorch.loggers import TensorBoardLogger, WandbLogger
-from matplotlib import pyplot as plt
-from torch import nn
-from fish_speech.models.vqgan.modules.discriminator import Discriminator
-from fish_speech.models.vqgan.modules.wavenet import WaveNet
-from fish_speech.models.vqgan.utils import avg_with_mask, plot_mel, sequence_mask
-class VQGAN(L.LightningModule):
-    def __init__(
-        self,
-        optimizer: Callable,
-        lr_scheduler: Callable,
-        encoder: WaveNet,
-        quantizer: nn.Module,
-        decoder: WaveNet,
-        discriminator: Discriminator,
-        vocoder: nn.Module,
-        encode_mel_transform: nn.Module,
-        gt_mel_transform: nn.Module,
-        weight_adv: float = 1.0,
-        weight_vq: float = 1.0,
-        weight_mel: float = 1.0,
-        sampling_rate: int = 44100,
-        freeze_encoder: bool = False,
-    ):
-        super().__init__()
-        # Model parameters
-        self.optimizer_builder = optimizer
-        self.lr_scheduler_builder = lr_scheduler
-        # Modules
-        self.encoder = encoder
-        self.quantizer = quantizer
-        self.decoder = decoder
-        self.vocoder = vocoder
-        self.discriminator = discriminator
-        self.encode_mel_transform = encode_mel_transform
-        self.gt_mel_transform = gt_mel_transform
-        # A simple linear layer to project quality to condition channels
-        self.quality_projection = nn.Linear(1, 768)
-        # Freeze vocoder
-        for param in self.vocoder.parameters():
-            param.requires_grad = False
-        # Loss weights
-        self.weight_adv = weight_adv
-        self.weight_vq = weight_vq
-        self.weight_mel = weight_mel
-        # Other parameters
-        self.sampling_rate = sampling_rate
-        # Disable strict loading
-        self.strict_loading = False
-        # If encoder is frozen
-        if freeze_encoder:
-            for param in self.encoder.parameters():
-                param.requires_grad = False
-            for param in self.quantizer.parameters():
-                param.requires_grad = False
-        self.automatic_optimization = False
-    def on_save_checkpoint(self, checkpoint):
-        # Do not save vocoder
-        state_dict = checkpoint["state_dict"]
-        for name in list(state_dict.keys()):
-            if "vocoder" in name:
-                state_dict.pop(name)
-    def configure_optimizers(self):
-        optimizer_generator = self.optimizer_builder(
-            itertools.chain(
-                self.encoder.parameters(),
-                self.quantizer.parameters(),
-                self.decoder.parameters(),
-                self.quality_projection.parameters(),
-            )
-        )
-        optimizer_discriminator = self.optimizer_builder(
-            self.discriminator.parameters()
-        )
-        lr_scheduler_generator = self.lr_scheduler_builder(optimizer_generator)
-        lr_scheduler_discriminator = self.lr_scheduler_builder(optimizer_discriminator)
-        return (
-            {
-                "optimizer": optimizer_generator,
-                "lr_scheduler": {
-                    "scheduler": lr_scheduler_generator,
-                    "interval": "step",
-                    "name": "optimizer/generator",
-                },
-            },
-            {
-                "optimizer": optimizer_discriminator,
-                "lr_scheduler": {
-                    "scheduler": lr_scheduler_discriminator,
-                    "interval": "step",
-                    "name": "optimizer/discriminator",
-                },
-            },
-        )
-    def training_step(self, batch, batch_idx):
-        optim_g, optim_d = self.optimizers()
-        audios, audio_lengths = batch["audios"], batch["audio_lengths"]
-        audios = audios.float()
-        audios = audios[:, None, :]
-        with torch.no_grad():
-            encoded_mels = self.encode_mel_transform(audios)
-            gt_mels = self.gt_mel_transform(audios)
-            quality = ((gt_mels.mean(-1) > -8).sum(-1) - 90) / 10
-            quality = quality.unsqueeze(-1)
-        mel_lengths = audio_lengths // self.gt_mel_transform.hop_length
-        mel_masks = sequence_mask(mel_lengths, gt_mels.shape[2])
-        mel_masks_float_conv = mel_masks[:, None, :].float()
-        gt_mels = gt_mels * mel_masks_float_conv
-        encoded_mels = encoded_mels * mel_masks_float_conv
-        # Encode
-        encoded_features = self.encoder(encoded_mels) * mel_masks_float_conv
-        # Quantize
-        vq_result = self.quantizer(encoded_features)
-        loss_vq = getattr("vq_result", "loss", 0.0)
-        vq_recon_features = vq_result.z * mel_masks_float_conv
-        vq_recon_features = (
-            vq_recon_features + self.quality_projection(quality)[:, :, None]
-        )
-        # VQ Decode
-        gen_mel = (
-            self.decoder(
-                torch.randn_like(vq_recon_features) * mel_masks_float_conv,
-                condition=vq_recon_features,
-            )
-            * mel_masks_float_conv
-        )
-        # Discriminator
-        real_logits = self.discriminator(gt_mels)
-        fake_logits = self.discriminator(gen_mel.detach())
-        d_mask = F.interpolate(
-            mel_masks_float_conv, size=(real_logits.shape[2],), mode="nearest"
-        )
-        loss_real = avg_with_mask((real_logits - 1) ** 2, d_mask)
-        loss_fake = avg_with_mask(fake_logits**2, d_mask)
-        loss_d = loss_real + loss_fake
-        self.log(
-            "train/discriminator/loss",
-            loss_d,
-            on_step=True,
-            on_epoch=False,
-            prog_bar=True,
-            logger=True,
-        )
-        # Discriminator backward
-        optim_d.zero_grad()
-        self.manual_backward(loss_d)
-        self.clip_gradients(
-            optim_d, gradient_clip_val=1000.0, gradient_clip_algorithm="norm"
-        )
-        optim_d.step()
-        # Mel Loss, applying l1, using a weighted sum
-        mel_distance = (
-            gen_mel - gt_mels
-        ).abs()  # * 0.5 + self.ssim(gen_mel, gt_mels) * 0.5
-        loss_mel_low_freq = avg_with_mask(mel_distance[:, :40, :], mel_masks_float_conv)
-        loss_mel_mid_freq = avg_with_mask(
-            mel_distance[:, 40:70, :], mel_masks_float_conv
-        )
-        loss_mel_high_freq = avg_with_mask(
-            mel_distance[:, 70:, :], mel_masks_float_conv
-        )
-        loss_mel = (
-            loss_mel_low_freq * 0.6 + loss_mel_mid_freq * 0.3 + loss_mel_high_freq * 0.1
-        )
-        # Adversarial Loss
-        fake_logits = self.discriminator(gen_mel)
-        loss_adv = avg_with_mask((fake_logits - 1) ** 2, d_mask)
-        # Total loss
-        loss = (
-            self.weight_vq * loss_vq
-            + self.weight_mel * loss_mel
-            + self.weight_adv * loss_adv
-        )
-        # Log losses
-        self.log(
-            "train/generator/loss",
-            loss,
-            on_step=True,
-            on_epoch=False,
-            prog_bar=True,
-            logger=True,
-        )
-        self.log(
-            "train/generator/loss_vq",
-            loss_vq,
-            on_step=True,
-            on_epoch=False,
-            prog_bar=False,
-            logger=True,
-        )
-        self.log(
-            "train/generator/loss_mel",
-            loss_mel,
-            on_step=True,
-            on_epoch=False,
-            prog_bar=False,
-            logger=True,
-        )
-        self.log(
-            "train/generator/loss_adv",
-            loss_adv,
-            on_step=True,
-            on_epoch=False,
-            prog_bar=False,
-            logger=True,
-        )
-        # Generator backward
-        optim_g.zero_grad()
-        self.manual_backward(loss)
-        self.clip_gradients(
-            optim_g, gradient_clip_val=1000.0, gradient_clip_algorithm="norm"
-        )
-        optim_g.step()
-        scheduler_g, scheduler_d = self.lr_schedulers()
-        scheduler_g.step()
-        scheduler_d.step()
-    def validation_step(self, batch: Any, batch_idx: int):
-        audios, audio_lengths = batch["audios"], batch["audio_lengths"]
-        audios = audios.float()
-        audios = audios[:, None, :]
-        encoded_mels = self.encode_mel_transform(audios)
-        gt_mels = self.gt_mel_transform(audios)
-        mel_lengths = audio_lengths // self.gt_mel_transform.hop_length
-        mel_masks = sequence_mask(mel_lengths, gt_mels.shape[2])
-        mel_masks_float_conv = mel_masks[:, None, :].float()
-        gt_mels = gt_mels * mel_masks_float_conv
-        encoded_mels = encoded_mels * mel_masks_float_conv
-        # Encode
-        encoded_features = self.encoder(encoded_mels) * mel_masks_float_conv
-        # Quantize
-        vq_recon_features = self.quantizer(encoded_features).z * mel_masks_float_conv
-        vq_recon_features = (
-            vq_recon_features
-            + self.quality_projection(
-                torch.ones(
-                    vq_recon_features.shape[0], 1, device=vq_recon_features.device
-                )
-                * 2
-            )[:, :, None]
-        )
-        # VQ Decode
-        gen_aux_mels = (
-            self.decoder(
-                torch.randn_like(vq_recon_features) * mel_masks_float_conv,
-                condition=vq_recon_features,
-            )
-            * mel_masks_float_conv
-        )
-        loss_mel = avg_with_mask((gen_aux_mels - gt_mels).abs(), mel_masks_float_conv)
-        self.log(
-            "val/loss_mel",
-            loss_mel,
-            on_step=False,
-            on_epoch=True,
-            prog_bar=False,
-            logger=True,
-            sync_dist=True,
-        )
-        recon_audios = self.vocoder(gt_mels)
-        gen_aux_audios = self.vocoder(gen_aux_mels)
-        # only log the first batch
-        if batch_idx != 0:
-            return
-        for idx, (
-            gt_mel,
-            gen_aux_mel,
-            audio,
-            gen_aux_audio,
-            recon_audio,
-            audio_len,
-        ) in enumerate(
-            zip(
-                gt_mels,
-                gen_aux_mels,
-                audios.cpu().float(),
-                gen_aux_audios.cpu().float(),
-                recon_audios.cpu().float(),
-                audio_lengths,
-            )
-        ):
-            if idx > 4:
-                break
-            mel_len = audio_len // self.gt_mel_transform.hop_length
-            image_mels = plot_mel(
-                [
-                    gt_mel[:, :mel_len],
-                    gen_aux_mel[:, :mel_len],
-                ],
-                [
-                    "Ground-Truth",
-                    "Auxiliary",
-                ],
-            )
-            if isinstance(self.logger, WandbLogger):
-                self.logger.experiment.log(
-                    {
-                        "reconstruction_mel": wandb.Image(image_mels, caption="mels"),
-                        "wavs": [
-                            wandb.Audio(
-                                audio[0, :audio_len],
-                                sample_rate=self.sampling_rate,
-                                caption="gt",
-                            ),
-                            wandb.Audio(
-                                gen_aux_audio[0, :audio_len],
-                                sample_rate=self.sampling_rate,
-                                caption="aux",
-                            ),
-                            wandb.Audio(
-                                recon_audio[0, :audio_len],
-                                sample_rate=self.sampling_rate,
-                                caption="recon",
-                            ),
-                        ],
-                    },
-                )
-            if isinstance(self.logger, TensorBoardLogger):
-                self.logger.experiment.add_figure(
-                    f"sample-{idx}/mels",
-                    image_mels,
-                    global_step=self.global_step,
-                )
-                self.logger.experiment.add_audio(
-                    f"sample-{idx}/wavs/gt",
-                    audio[0, :audio_len],
-                    self.global_step,
-                    sample_rate=self.sampling_rate,
-                )
-                self.logger.experiment.add_audio(
-                    f"sample-{idx}/wavs/gen",
-                    gen_aux_audio[0, :audio_len],
-                    self.global_step,
-                    sample_rate=self.sampling_rate,
-                )
-                self.logger.experiment.add_audio(
-                    f"sample-{idx}/wavs/recon",
-                    recon_audio[0, :audio_len],
-                    self.global_step,
-                    sample_rate=self.sampling_rate,
-                )
-            plt.close(image_mels)
-    def encode(self, audios, audio_lengths):
-        audios = audios.float()
-        mels = self.encode_mel_transform(audios)
-        mel_lengths = audio_lengths // self.encode_mel_transform.hop_length
-        mel_masks = sequence_mask(mel_lengths, mels.shape[2])
-        mel_masks_float_conv = mel_masks[:, None, :].float()
-        mels = mels * mel_masks_float_conv
-        # Encode
-        encoded_features = self.encoder(mels) * mel_masks_float_conv
-        feature_lengths = mel_lengths // math.prod(self.quantizer.downsample_factor)
-        return self.quantizer.encode(encoded_features), feature_lengths
-    def decode(self, indices, feature_lengths, return_audios=False):
-        factor = math.prod(self.quantizer.downsample_factor)
-        mel_masks = sequence_mask(feature_lengths * factor, indices.shape[2] * factor)
-        mel_masks_float_conv = mel_masks[:, None, :].float()
-        z = self.quantizer.decode(indices) * mel_masks_float_conv
-        z = (
-            z
-            + self.quality_projection(torch.ones(z.shape[0], 1, device=z.device) * 2)[
-                :, :, None
-            ]
-        )
-        gen_mel = (
-            self.decoder(
-                torch.randn_like(z) * mel_masks_float_conv,
-                condition=z,
-            )
-            * mel_masks_float_conv
-        )
-        if return_audios:
-            return self.vocoder(gen_mel)
-        return gen_mel

xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/discriminator.py DELETED Viewed

@@ -1,44 +0,0 @@
-import torch
-from torch import nn
-from torch.nn.utils.parametrizations import weight_norm
-class Discriminator(nn.Module):
-    def __init__(self):
-        super().__init__()
-        blocks = []
-        convs = [
-            (1, 64, (3, 9), 1, (1, 4)),
-            (64, 128, (3, 9), (1, 2), (1, 4)),
-            (128, 256, (3, 9), (1, 2), (1, 4)),
-            (256, 512, (3, 9), (1, 2), (1, 4)),
-            (512, 1024, (3, 3), 1, (1, 1)),
-            (1024, 1, (3, 3), 1, (1, 1)),
-        ]
-        for idx, (in_channels, out_channels, kernel_size, stride, padding) in enumerate(
-            convs
-        ):
-            blocks.append(
-                weight_norm(
-                    nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
-                )
-            )
-            if idx != len(convs) - 1:
-                blocks.append(nn.SiLU(inplace=True))
-        self.blocks = nn.Sequential(*blocks)
-    def forward(self, x):
-        return self.blocks(x[:, None])[:, 0]
-if __name__ == "__main__":
-    model = Discriminator()
-    print(sum(p.numel() for p in model.parameters()) / 1_000_000)
-    x = torch.randn(1, 128, 1024)
-    y = model(x)
-    print(y.shape)
-    print(y)

xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/reference.py DELETED Viewed

@@ -1,115 +0,0 @@
-from typing import Optional
-import torch
-import torch.nn.functional as F
-from torch import nn
-from fish_speech.utils import autocast_exclude_mps
-from .wavenet import WaveNet
-class ReferenceEncoder(WaveNet):
-    def __init__(
-        self,
-        input_channels: Optional[int] = None,
-        output_channels: Optional[int] = None,
-        residual_channels: int = 512,
-        residual_layers: int = 20,
-        dilation_cycle: Optional[int] = 4,
-        num_heads: int = 8,
-        latent_len: int = 4,
-    ):
-        super().__init__(
-            input_channels=input_channels,
-            residual_channels=residual_channels,
-            residual_layers=residual_layers,
-            dilation_cycle=dilation_cycle,
-        )
-        self.head_dim = residual_channels // num_heads
-        self.num_heads = num_heads
-        self.latent_len = latent_len
-        self.latent = nn.Parameter(torch.zeros(1, self.latent_len, residual_channels))
-        self.q = nn.Linear(residual_channels, residual_channels, bias=True)
-        self.kv = nn.Linear(residual_channels, residual_channels * 2, bias=True)
-        self.q_norm = nn.LayerNorm(self.head_dim)
-        self.k_norm = nn.LayerNorm(self.head_dim)
-        self.proj = nn.Linear(residual_channels, residual_channels)
-        self.proj_drop = nn.Dropout(0.1)
-        self.norm = nn.LayerNorm(residual_channels)
-        self.mlp = nn.Sequential(
-            nn.Linear(residual_channels, residual_channels * 4),
-            nn.SiLU(),
-            nn.Linear(residual_channels * 4, residual_channels),
-        )
-        self.output_projection_attn = nn.Linear(residual_channels, output_channels)
-        torch.nn.init.trunc_normal_(self.latent, std=0.02)
-        self.apply(self.init_weights)
-    def init_weights(self, m):
-        if isinstance(m, nn.Linear):
-            torch.nn.init.trunc_normal_(m.weight, std=0.02)
-            if m.bias is not None:
-                torch.nn.init.constant_(m.bias, 0)
-    def forward(self, x, attn_mask=None):
-        x = super().forward(x).mT
-        B, N, C = x.shape
-        # Calculate mask
-        if attn_mask is not None:
-            assert attn_mask.shape == (B, N) and attn_mask.dtype == torch.bool
-            attn_mask = attn_mask[:, None, None, :].expand(
-                B, self.num_heads, self.latent_len, N
-            )
-        q_latent = self.latent.expand(B, -1, -1)
-        q = (
-            self.q(q_latent)
-            .reshape(B, self.latent_len, self.num_heads, self.head_dim)
-            .transpose(1, 2)
-        )
-        kv = (
-            self.kv(x)
-            .reshape(B, N, 2, self.num_heads, self.head_dim)
-            .permute(2, 0, 3, 1, 4)
-        )
-        k, v = kv.unbind(0)
-        q, k = self.q_norm(q), self.k_norm(k)
-        x = F.scaled_dot_product_attention(q, k, v, attn_mask=attn_mask)
-        x = x.transpose(1, 2).reshape(B, self.latent_len, C)
-        x = self.proj(x)
-        x = self.proj_drop(x)
-        x = x + self.mlp(self.norm(x))
-        x = self.output_projection_attn(x)
-        x = x.mean(1)
-        return x
-if __name__ == "__main__":
-    with autocast_exclude_mps(device_type="cpu", dtype=torch.bfloat16):
-        model = ReferenceEncoder(
-            input_channels=128,
-            output_channels=64,
-            residual_channels=384,
-            residual_layers=20,
-            dilation_cycle=4,
-            num_heads=8,
-        )
-        x = torch.randn(4, 128, 64)
-        mask = torch.ones(4, 64, dtype=torch.bool)
-        y = model(x, mask)
-        print(y.shape)
-        loss = F.mse_loss(y, torch.randn(4, 64))
-        loss.backward()

xinference 0.14.4.post1__py3-none-any.whl → 0.15.1__py3-none-any.whl

Potentially problematic release.

xinference 0.14.4.post1py3-none-any.whl → 0.15.1py3-none-any.whl