PyPI - xinference - Versions diffs - 1.10.0__py3-none-any.whl → 1.11.0__py3-none-any.whl - Mend - Supply Chain Defender

xinference 1.10.0py3-none-any.whl → 1.11.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (328) hide show

xinference/thirdparty/indextts/s2mel/modules/campplus/classifier.py ADDED Viewed

@@ -0,0 +1,70 @@
+# Copyright 3D-Speaker (https://github.com/alibaba-damo-academy/3D-Speaker). All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 (http://www.apache.org/licenses/LICENSE-2.0)
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from modules.campplus.layers import DenseLayer
+class CosineClassifier(nn.Module):
+    def __init__(
+        self,
+        input_dim,
+        num_blocks=0,
+        inter_dim=512,
+        out_neurons=1000,
+    ):
+        super().__init__()
+        self.blocks = nn.ModuleList()
+        for index in range(num_blocks):
+            self.blocks.append(
+                DenseLayer(input_dim, inter_dim, config_str='batchnorm')
+            )
+            input_dim = inter_dim
+        self.weight = nn.Parameter(
+            torch.FloatTensor(out_neurons, input_dim)
+        )
+        nn.init.xavier_uniform_(self.weight)
+    def forward(self, x):
+        # x: [B, dim]
+        for layer in self.blocks:
+            x = layer(x)
+        # normalized
+        x = F.linear(F.normalize(x), F.normalize(self.weight))
+        return x
+class LinearClassifier(nn.Module):
+    def __init__(
+        self,
+        input_dim,
+        num_blocks=0,
+        inter_dim=512,
+        out_neurons=1000,
+    ):
+        super().__init__()
+        self.blocks = nn.ModuleList()
+        self.nonlinear = nn.ReLU(inplace=True)
+        for index in range(num_blocks):
+            self.blocks.append(
+                DenseLayer(input_dim, inter_dim, bias=True)
+            )
+            input_dim = inter_dim
+        self.linear = nn.Linear(input_dim, out_neurons, bias=True)
+    def forward(self, x):
+        # x: [B, dim]
+        x = self.nonlinear(x)
+        for layer in self.blocks:
+            x = layer(x)
+        x = self.linear(x)
+        return x

xinference/thirdparty/indextts/s2mel/modules/campplus/layers.py ADDED Viewed

@@ -0,0 +1,253 @@
+# Copyright 3D-Speaker (https://github.com/alibaba-damo-academy/3D-Speaker). All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 (http://www.apache.org/licenses/LICENSE-2.0)
+import torch
+import torch.nn.functional as F
+import torch.utils.checkpoint as cp
+from torch import nn
+def get_nonlinear(config_str, channels):
+    nonlinear = nn.Sequential()
+    for name in config_str.split('-'):
+        if name == 'relu':
+            nonlinear.add_module('relu', nn.ReLU(inplace=True))
+        elif name == 'prelu':
+            nonlinear.add_module('prelu', nn.PReLU(channels))
+        elif name == 'batchnorm':
+            nonlinear.add_module('batchnorm', nn.BatchNorm1d(channels))
+        elif name == 'batchnorm_':
+            nonlinear.add_module('batchnorm',
+                                 nn.BatchNorm1d(channels, affine=False))
+        else:
+            raise ValueError('Unexpected module ({}).'.format(name))
+    return nonlinear
+def statistics_pooling(x, dim=-1, keepdim=False, unbiased=True, eps=1e-2):
+    mean = x.mean(dim=dim)
+    std = x.std(dim=dim, unbiased=unbiased)
+    stats = torch.cat([mean, std], dim=-1)
+    if keepdim:
+        stats = stats.unsqueeze(dim=dim)
+    return stats
+class StatsPool(nn.Module):
+    def forward(self, x):
+        return statistics_pooling(x)
+class TDNNLayer(nn.Module):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride=1,
+                 padding=0,
+                 dilation=1,
+                 bias=False,
+                 config_str='batchnorm-relu'):
+        super(TDNNLayer, self).__init__()
+        if padding < 0:
+            assert kernel_size % 2 == 1, 'Expect equal paddings, but got even kernel size ({})'.format(
+                kernel_size)
+            padding = (kernel_size - 1) // 2 * dilation
+        self.linear = nn.Conv1d(in_channels,
+                                out_channels,
+                                kernel_size,
+                                stride=stride,
+                                padding=padding,
+                                dilation=dilation,
+                                bias=bias)
+        self.nonlinear = get_nonlinear(config_str, out_channels)
+    def forward(self, x):
+        x = self.linear(x)
+        x = self.nonlinear(x)
+        return x
+class CAMLayer(nn.Module):
+    def __init__(self,
+                bn_channels,
+                out_channels,
+                kernel_size,
+                stride,
+                padding,
+                dilation,
+                bias,
+                reduction=2):
+        super(CAMLayer, self).__init__()
+        self.linear_local = nn.Conv1d(bn_channels,
+                                 out_channels,
+                                 kernel_size,
+                                 stride=stride,
+                                 padding=padding,
+                                 dilation=dilation,
+                                 bias=bias)
+        self.linear1 = nn.Conv1d(bn_channels, bn_channels // reduction, 1)
+        self.relu = nn.ReLU(inplace=True)
+        self.linear2 = nn.Conv1d(bn_channels // reduction, out_channels, 1)
+        self.sigmoid = nn.Sigmoid()
+    def forward(self, x):
+        y = self.linear_local(x)
+        context = x.mean(-1, keepdim=True)+self.seg_pooling(x)
+        context = self.relu(self.linear1(context))
+        m = self.sigmoid(self.linear2(context))
+        return y*m
+    def seg_pooling(self, x, seg_len=100, stype='avg'):
+        if stype == 'avg':
+            seg = F.avg_pool1d(x, kernel_size=seg_len, stride=seg_len, ceil_mode=True)
+        elif stype == 'max':
+            seg = F.max_pool1d(x, kernel_size=seg_len, stride=seg_len, ceil_mode=True)
+        else:
+            raise ValueError('Wrong segment pooling type.')
+        shape = seg.shape
+        seg = seg.unsqueeze(-1).expand(*shape, seg_len).reshape(*shape[:-1], -1)
+        seg = seg[..., :x.shape[-1]]
+        return seg
+class CAMDenseTDNNLayer(nn.Module):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 bn_channels,
+                 kernel_size,
+                 stride=1,
+                 dilation=1,
+                 bias=False,
+                 config_str='batchnorm-relu',
+                 memory_efficient=False):
+        super(CAMDenseTDNNLayer, self).__init__()
+        assert kernel_size % 2 == 1, 'Expect equal paddings, but got even kernel size ({})'.format(
+            kernel_size)
+        padding = (kernel_size - 1) // 2 * dilation
+        self.memory_efficient = memory_efficient
+        self.nonlinear1 = get_nonlinear(config_str, in_channels)
+        self.linear1 = nn.Conv1d(in_channels, bn_channels, 1, bias=False)
+        self.nonlinear2 = get_nonlinear(config_str, bn_channels)
+        self.cam_layer = CAMLayer(bn_channels,
+                                out_channels,
+                                kernel_size,
+                                stride=stride,
+                                padding=padding,
+                                dilation=dilation,
+                                bias=bias)
+    def bn_function(self, x):
+        return self.linear1(self.nonlinear1(x))
+    def forward(self, x):
+        if self.training and self.memory_efficient:
+            x = cp.checkpoint(self.bn_function, x)
+        else:
+            x = self.bn_function(x)
+        x = self.cam_layer(self.nonlinear2(x))
+        return x
+class CAMDenseTDNNBlock(nn.ModuleList):
+    def __init__(self,
+                 num_layers,
+                 in_channels,
+                 out_channels,
+                 bn_channels,
+                 kernel_size,
+                 stride=1,
+                 dilation=1,
+                 bias=False,
+                 config_str='batchnorm-relu',
+                 memory_efficient=False):
+        super(CAMDenseTDNNBlock, self).__init__()
+        for i in range(num_layers):
+            layer = CAMDenseTDNNLayer(in_channels=in_channels + i * out_channels,
+                                   out_channels=out_channels,
+                                   bn_channels=bn_channels,
+                                   kernel_size=kernel_size,
+                                   stride=stride,
+                                   dilation=dilation,
+                                   bias=bias,
+                                   config_str=config_str,
+                                   memory_efficient=memory_efficient)
+            self.add_module('tdnnd%d' % (i + 1), layer)
+    def forward(self, x):
+        for layer in self:
+            x = torch.cat([x, layer(x)], dim=1)
+        return x
+class TransitLayer(nn.Module):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 bias=True,
+                 config_str='batchnorm-relu'):
+        super(TransitLayer, self).__init__()
+        self.nonlinear = get_nonlinear(config_str, in_channels)
+        self.linear = nn.Conv1d(in_channels, out_channels, 1, bias=bias)
+    def forward(self, x):
+        x = self.nonlinear(x)
+        x = self.linear(x)
+        return x
+class DenseLayer(nn.Module):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 bias=False,
+                 config_str='batchnorm-relu'):
+        super(DenseLayer, self).__init__()
+        self.linear = nn.Conv1d(in_channels, out_channels, 1, bias=bias)
+        self.nonlinear = get_nonlinear(config_str, out_channels)
+    def forward(self, x):
+        if len(x.shape) == 2:
+            x = self.linear(x.unsqueeze(dim=-1)).squeeze(dim=-1)
+        else:
+            x = self.linear(x)
+        x = self.nonlinear(x)
+        return x
+class BasicResBlock(nn.Module):
+    expansion = 1
+    def __init__(self, in_planes, planes, stride=1):
+        super(BasicResBlock, self).__init__()
+        self.conv1 = nn.Conv2d(in_planes,
+                               planes,
+                               kernel_size=3,
+                               stride=(stride, 1),
+                               padding=1,
+                               bias=False)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.conv2 = nn.Conv2d(planes,
+                               planes,
+                               kernel_size=3,
+                               stride=1,
+                               padding=1,
+                               bias=False)
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.shortcut = nn.Sequential()
+        if stride != 1 or in_planes != self.expansion * planes:
+            self.shortcut = nn.Sequential(
+                nn.Conv2d(in_planes,
+                          self.expansion * planes,
+                          kernel_size=1,
+                          stride=(stride, 1),
+                          bias=False),
+                nn.BatchNorm2d(self.expansion * planes))
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.bn2(self.conv2(out))
+        out += self.shortcut(x)
+        out = F.relu(out)
+        return out