xinference 1.2.0__py3-none-any.whl → 1.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +4 -7
- xinference/client/handlers.py +3 -0
- xinference/core/chat_interface.py +6 -1
- xinference/core/model.py +2 -0
- xinference/core/scheduler.py +4 -7
- xinference/core/supervisor.py +114 -23
- xinference/core/worker.py +70 -4
- xinference/deploy/local.py +2 -1
- xinference/model/audio/core.py +11 -0
- xinference/model/audio/cosyvoice.py +16 -5
- xinference/model/audio/kokoro.py +139 -0
- xinference/model/audio/melotts.py +110 -0
- xinference/model/audio/model_spec.json +80 -0
- xinference/model/audio/model_spec_modelscope.json +18 -0
- xinference/model/audio/whisper.py +35 -10
- xinference/model/llm/llama_cpp/core.py +21 -14
- xinference/model/llm/llm_family.json +527 -1
- xinference/model/llm/llm_family.py +4 -1
- xinference/model/llm/llm_family_modelscope.json +495 -3
- xinference/model/llm/memory.py +1 -1
- xinference/model/llm/mlx/core.py +24 -6
- xinference/model/llm/transformers/core.py +9 -1
- xinference/model/llm/transformers/qwen2_audio.py +3 -1
- xinference/model/llm/transformers/qwen2_vl.py +20 -3
- xinference/model/llm/transformers/utils.py +22 -11
- xinference/model/llm/utils.py +115 -1
- xinference/model/llm/vllm/core.py +14 -4
- xinference/model/llm/vllm/xavier/block.py +3 -4
- xinference/model/llm/vllm/xavier/block_tracker.py +71 -58
- xinference/model/llm/vllm/xavier/collective.py +74 -0
- xinference/model/llm/vllm/xavier/collective_manager.py +147 -0
- xinference/model/llm/vllm/xavier/executor.py +18 -16
- xinference/model/llm/vllm/xavier/scheduler.py +79 -63
- xinference/model/llm/vllm/xavier/test/test_xavier.py +60 -35
- xinference/model/llm/vllm/xavier/transfer.py +53 -32
- xinference/thirdparty/cosyvoice/bin/spk2info.pt +0 -0
- xinference/thirdparty/melo/__init__.py +0 -0
- xinference/thirdparty/melo/api.py +135 -0
- xinference/thirdparty/melo/app.py +61 -0
- xinference/thirdparty/melo/attentions.py +459 -0
- xinference/thirdparty/melo/commons.py +160 -0
- xinference/thirdparty/melo/configs/config.json +94 -0
- xinference/thirdparty/melo/data/example/metadata.list +20 -0
- xinference/thirdparty/melo/data_utils.py +413 -0
- xinference/thirdparty/melo/download_utils.py +67 -0
- xinference/thirdparty/melo/infer.py +25 -0
- xinference/thirdparty/melo/init_downloads.py +14 -0
- xinference/thirdparty/melo/losses.py +58 -0
- xinference/thirdparty/melo/main.py +36 -0
- xinference/thirdparty/melo/mel_processing.py +174 -0
- xinference/thirdparty/melo/models.py +1030 -0
- xinference/thirdparty/melo/modules.py +598 -0
- xinference/thirdparty/melo/monotonic_align/__init__.py +16 -0
- xinference/thirdparty/melo/monotonic_align/core.py +46 -0
- xinference/thirdparty/melo/preprocess_text.py +135 -0
- xinference/thirdparty/melo/split_utils.py +174 -0
- xinference/thirdparty/melo/text/__init__.py +35 -0
- xinference/thirdparty/melo/text/chinese.py +199 -0
- xinference/thirdparty/melo/text/chinese_bert.py +107 -0
- xinference/thirdparty/melo/text/chinese_mix.py +253 -0
- xinference/thirdparty/melo/text/cleaner.py +36 -0
- xinference/thirdparty/melo/text/cleaner_multiling.py +110 -0
- xinference/thirdparty/melo/text/cmudict.rep +129530 -0
- xinference/thirdparty/melo/text/cmudict_cache.pickle +0 -0
- xinference/thirdparty/melo/text/english.py +284 -0
- xinference/thirdparty/melo/text/english_bert.py +39 -0
- xinference/thirdparty/melo/text/english_utils/__init__.py +0 -0
- xinference/thirdparty/melo/text/english_utils/abbreviations.py +35 -0
- xinference/thirdparty/melo/text/english_utils/number_norm.py +97 -0
- xinference/thirdparty/melo/text/english_utils/time_norm.py +47 -0
- xinference/thirdparty/melo/text/es_phonemizer/__init__.py +0 -0
- xinference/thirdparty/melo/text/es_phonemizer/base.py +140 -0
- xinference/thirdparty/melo/text/es_phonemizer/cleaner.py +109 -0
- xinference/thirdparty/melo/text/es_phonemizer/es_symbols.json +79 -0
- xinference/thirdparty/melo/text/es_phonemizer/es_symbols.txt +1 -0
- xinference/thirdparty/melo/text/es_phonemizer/es_symbols_v2.json +83 -0
- xinference/thirdparty/melo/text/es_phonemizer/es_to_ipa.py +12 -0
- xinference/thirdparty/melo/text/es_phonemizer/example_ipa.txt +400 -0
- xinference/thirdparty/melo/text/es_phonemizer/gruut_wrapper.py +253 -0
- xinference/thirdparty/melo/text/es_phonemizer/punctuation.py +174 -0
- xinference/thirdparty/melo/text/es_phonemizer/spanish_symbols.txt +1 -0
- xinference/thirdparty/melo/text/es_phonemizer/test.ipynb +124 -0
- xinference/thirdparty/melo/text/fr_phonemizer/__init__.py +0 -0
- xinference/thirdparty/melo/text/fr_phonemizer/base.py +140 -0
- xinference/thirdparty/melo/text/fr_phonemizer/cleaner.py +122 -0
- xinference/thirdparty/melo/text/fr_phonemizer/en_symbols.json +78 -0
- xinference/thirdparty/melo/text/fr_phonemizer/example_ipa.txt +1 -0
- xinference/thirdparty/melo/text/fr_phonemizer/fr_symbols.json +89 -0
- xinference/thirdparty/melo/text/fr_phonemizer/fr_to_ipa.py +30 -0
- xinference/thirdparty/melo/text/fr_phonemizer/french_abbreviations.py +48 -0
- xinference/thirdparty/melo/text/fr_phonemizer/french_symbols.txt +1 -0
- xinference/thirdparty/melo/text/fr_phonemizer/gruut_wrapper.py +258 -0
- xinference/thirdparty/melo/text/fr_phonemizer/punctuation.py +172 -0
- xinference/thirdparty/melo/text/french.py +94 -0
- xinference/thirdparty/melo/text/french_bert.py +39 -0
- xinference/thirdparty/melo/text/japanese.py +647 -0
- xinference/thirdparty/melo/text/japanese_bert.py +49 -0
- xinference/thirdparty/melo/text/ko_dictionary.py +44 -0
- xinference/thirdparty/melo/text/korean.py +192 -0
- xinference/thirdparty/melo/text/opencpop-strict.txt +429 -0
- xinference/thirdparty/melo/text/spanish.py +122 -0
- xinference/thirdparty/melo/text/spanish_bert.py +39 -0
- xinference/thirdparty/melo/text/symbols.py +290 -0
- xinference/thirdparty/melo/text/tone_sandhi.py +769 -0
- xinference/thirdparty/melo/train.py +635 -0
- xinference/thirdparty/melo/train.sh +19 -0
- xinference/thirdparty/melo/transforms.py +209 -0
- xinference/thirdparty/melo/utils.py +424 -0
- xinference/types.py +2 -0
- xinference/web/ui/build/asset-manifest.json +3 -3
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/js/{main.1eb206d1.js → main.b0936c54.js} +3 -3
- xinference/web/ui/build/static/js/main.b0936c54.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a3ff866acddf34917a7ee399e0e571a4dfd8ba66d5057db885f243e16a6eb17d.json +1 -0
- {xinference-1.2.0.dist-info → xinference-1.2.2.dist-info}/METADATA +37 -27
- {xinference-1.2.0.dist-info → xinference-1.2.2.dist-info}/RECORD +122 -45
- xinference/web/ui/build/static/js/main.1eb206d1.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/2213d49de260e1f67c888081b18f120f5225462b829ae57c9e05a05cec83689d.json +0 -1
- /xinference/web/ui/build/static/js/{main.1eb206d1.js.LICENSE.txt → main.b0936c54.js.LICENSE.txt} +0 -0
- {xinference-1.2.0.dist-info → xinference-1.2.2.dist-info}/LICENSE +0 -0
- {xinference-1.2.0.dist-info → xinference-1.2.2.dist-info}/WHEEL +0 -0
- {xinference-1.2.0.dist-info → xinference-1.2.2.dist-info}/entry_points.txt +0 -0
- {xinference-1.2.0.dist-info → xinference-1.2.2.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
import torch
|
|
2
|
+
import torch.utils.data
|
|
3
|
+
import librosa
|
|
4
|
+
from librosa.filters import mel as librosa_mel_fn
|
|
5
|
+
|
|
6
|
+
MAX_WAV_VALUE = 32768.0
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def dynamic_range_compression_torch(x, C=1, clip_val=1e-5):
|
|
10
|
+
"""
|
|
11
|
+
PARAMS
|
|
12
|
+
------
|
|
13
|
+
C: compression factor
|
|
14
|
+
"""
|
|
15
|
+
return torch.log(torch.clamp(x, min=clip_val) * C)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def dynamic_range_decompression_torch(x, C=1):
|
|
19
|
+
"""
|
|
20
|
+
PARAMS
|
|
21
|
+
------
|
|
22
|
+
C: compression factor used to compress
|
|
23
|
+
"""
|
|
24
|
+
return torch.exp(x) / C
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def spectral_normalize_torch(magnitudes):
|
|
28
|
+
output = dynamic_range_compression_torch(magnitudes)
|
|
29
|
+
return output
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def spectral_de_normalize_torch(magnitudes):
|
|
33
|
+
output = dynamic_range_decompression_torch(magnitudes)
|
|
34
|
+
return output
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
mel_basis = {}
|
|
38
|
+
hann_window = {}
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def spectrogram_torch(y, n_fft, sampling_rate, hop_size, win_size, center=False):
|
|
42
|
+
if torch.min(y) < -1.1:
|
|
43
|
+
print("min value is ", torch.min(y))
|
|
44
|
+
if torch.max(y) > 1.1:
|
|
45
|
+
print("max value is ", torch.max(y))
|
|
46
|
+
|
|
47
|
+
global hann_window
|
|
48
|
+
dtype_device = str(y.dtype) + "_" + str(y.device)
|
|
49
|
+
wnsize_dtype_device = str(win_size) + "_" + dtype_device
|
|
50
|
+
if wnsize_dtype_device not in hann_window:
|
|
51
|
+
hann_window[wnsize_dtype_device] = torch.hann_window(win_size).to(
|
|
52
|
+
dtype=y.dtype, device=y.device
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
y = torch.nn.functional.pad(
|
|
56
|
+
y.unsqueeze(1),
|
|
57
|
+
(int((n_fft - hop_size) / 2), int((n_fft - hop_size) / 2)),
|
|
58
|
+
mode="reflect",
|
|
59
|
+
)
|
|
60
|
+
y = y.squeeze(1)
|
|
61
|
+
|
|
62
|
+
spec = torch.stft(
|
|
63
|
+
y,
|
|
64
|
+
n_fft,
|
|
65
|
+
hop_length=hop_size,
|
|
66
|
+
win_length=win_size,
|
|
67
|
+
window=hann_window[wnsize_dtype_device],
|
|
68
|
+
center=center,
|
|
69
|
+
pad_mode="reflect",
|
|
70
|
+
normalized=False,
|
|
71
|
+
onesided=True,
|
|
72
|
+
return_complex=False,
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
spec = torch.sqrt(spec.pow(2).sum(-1) + 1e-6)
|
|
76
|
+
return spec
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def spectrogram_torch_conv(y, n_fft, sampling_rate, hop_size, win_size, center=False):
|
|
80
|
+
global hann_window
|
|
81
|
+
dtype_device = str(y.dtype) + '_' + str(y.device)
|
|
82
|
+
wnsize_dtype_device = str(win_size) + '_' + dtype_device
|
|
83
|
+
if wnsize_dtype_device not in hann_window:
|
|
84
|
+
hann_window[wnsize_dtype_device] = torch.hann_window(win_size).to(dtype=y.dtype, device=y.device)
|
|
85
|
+
|
|
86
|
+
y = torch.nn.functional.pad(y.unsqueeze(1), (int((n_fft-hop_size)/2), int((n_fft-hop_size)/2)), mode='reflect')
|
|
87
|
+
|
|
88
|
+
# ******************** original ************************#
|
|
89
|
+
# y = y.squeeze(1)
|
|
90
|
+
# spec1 = torch.stft(y, n_fft, hop_length=hop_size, win_length=win_size, window=hann_window[wnsize_dtype_device],
|
|
91
|
+
# center=center, pad_mode='reflect', normalized=False, onesided=True, return_complex=False)
|
|
92
|
+
|
|
93
|
+
# ******************** ConvSTFT ************************#
|
|
94
|
+
freq_cutoff = n_fft // 2 + 1
|
|
95
|
+
fourier_basis = torch.view_as_real(torch.fft.fft(torch.eye(n_fft)))
|
|
96
|
+
forward_basis = fourier_basis[:freq_cutoff].permute(2, 0, 1).reshape(-1, 1, fourier_basis.shape[1])
|
|
97
|
+
forward_basis = forward_basis * torch.as_tensor(librosa.util.pad_center(torch.hann_window(win_size), size=n_fft)).float()
|
|
98
|
+
|
|
99
|
+
import torch.nn.functional as F
|
|
100
|
+
|
|
101
|
+
# if center:
|
|
102
|
+
# signal = F.pad(y[:, None, None, :], (n_fft // 2, n_fft // 2, 0, 0), mode = 'reflect').squeeze(1)
|
|
103
|
+
assert center is False
|
|
104
|
+
|
|
105
|
+
forward_transform_squared = F.conv1d(y, forward_basis.to(y.device), stride = hop_size)
|
|
106
|
+
spec2 = torch.stack([forward_transform_squared[:, :freq_cutoff, :], forward_transform_squared[:, freq_cutoff:, :]], dim = -1)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
# ******************** Verification ************************#
|
|
110
|
+
spec1 = torch.stft(y.squeeze(1), n_fft, hop_length=hop_size, win_length=win_size, window=hann_window[wnsize_dtype_device],
|
|
111
|
+
center=center, pad_mode='reflect', normalized=False, onesided=True, return_complex=False)
|
|
112
|
+
assert torch.allclose(spec1, spec2, atol=1e-4)
|
|
113
|
+
|
|
114
|
+
spec = torch.sqrt(spec2.pow(2).sum(-1) + 1e-6)
|
|
115
|
+
return spec
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def spec_to_mel_torch(spec, n_fft, num_mels, sampling_rate, fmin, fmax):
|
|
119
|
+
global mel_basis
|
|
120
|
+
dtype_device = str(spec.dtype) + "_" + str(spec.device)
|
|
121
|
+
fmax_dtype_device = str(fmax) + "_" + dtype_device
|
|
122
|
+
if fmax_dtype_device not in mel_basis:
|
|
123
|
+
mel = librosa_mel_fn(sr=sampling_rate, n_fft=n_fft, n_mels=num_mels, fmin=fmin, fmax=fmax)
|
|
124
|
+
mel_basis[fmax_dtype_device] = torch.from_numpy(mel).to(
|
|
125
|
+
dtype=spec.dtype, device=spec.device
|
|
126
|
+
)
|
|
127
|
+
spec = torch.matmul(mel_basis[fmax_dtype_device], spec)
|
|
128
|
+
spec = spectral_normalize_torch(spec)
|
|
129
|
+
return spec
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def mel_spectrogram_torch(
|
|
133
|
+
y, n_fft, num_mels, sampling_rate, hop_size, win_size, fmin, fmax, center=False
|
|
134
|
+
):
|
|
135
|
+
global mel_basis, hann_window
|
|
136
|
+
dtype_device = str(y.dtype) + "_" + str(y.device)
|
|
137
|
+
fmax_dtype_device = str(fmax) + "_" + dtype_device
|
|
138
|
+
wnsize_dtype_device = str(win_size) + "_" + dtype_device
|
|
139
|
+
if fmax_dtype_device not in mel_basis:
|
|
140
|
+
mel = librosa_mel_fn(sr=sampling_rate, n_fft=n_fft, n_mels=num_mels, fmin=fmin, fmax=fmax)
|
|
141
|
+
mel_basis[fmax_dtype_device] = torch.from_numpy(mel).to(
|
|
142
|
+
dtype=y.dtype, device=y.device
|
|
143
|
+
)
|
|
144
|
+
if wnsize_dtype_device not in hann_window:
|
|
145
|
+
hann_window[wnsize_dtype_device] = torch.hann_window(win_size).to(
|
|
146
|
+
dtype=y.dtype, device=y.device
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
y = torch.nn.functional.pad(
|
|
150
|
+
y.unsqueeze(1),
|
|
151
|
+
(int((n_fft - hop_size) / 2), int((n_fft - hop_size) / 2)),
|
|
152
|
+
mode="reflect",
|
|
153
|
+
)
|
|
154
|
+
y = y.squeeze(1)
|
|
155
|
+
|
|
156
|
+
spec = torch.stft(
|
|
157
|
+
y,
|
|
158
|
+
n_fft,
|
|
159
|
+
hop_length=hop_size,
|
|
160
|
+
win_length=win_size,
|
|
161
|
+
window=hann_window[wnsize_dtype_device],
|
|
162
|
+
center=center,
|
|
163
|
+
pad_mode="reflect",
|
|
164
|
+
normalized=False,
|
|
165
|
+
onesided=True,
|
|
166
|
+
return_complex=False,
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
spec = torch.sqrt(spec.pow(2).sum(-1) + 1e-6)
|
|
170
|
+
|
|
171
|
+
spec = torch.matmul(mel_basis[fmax_dtype_device], spec)
|
|
172
|
+
spec = spectral_normalize_torch(spec)
|
|
173
|
+
|
|
174
|
+
return spec
|