torchaudio 2.7.1__cp310-cp310-win_amd64.whl → 2.9.0__cp310-cp310-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of torchaudio might be problematic. Click here for more details.
- torchaudio/__init__.py +184 -33
- torchaudio/_extension/__init__.py +1 -14
- torchaudio/_extension/utils.py +0 -47
- torchaudio/_internal/module_utils.py +68 -10
- torchaudio/_torchcodec.py +340 -0
- torchaudio/datasets/cmuarctic.py +1 -1
- torchaudio/datasets/utils.py +1 -1
- torchaudio/functional/__init__.py +6 -3
- torchaudio/functional/_alignment.py +1 -1
- torchaudio/functional/filtering.py +70 -55
- torchaudio/functional/functional.py +31 -61
- torchaudio/lib/_torchaudio.pyd +0 -0
- torchaudio/lib/libtorchaudio.pyd +0 -0
- torchaudio/models/decoder/__init__.py +19 -1
- torchaudio/models/decoder/_ctc_decoder.py +6 -6
- torchaudio/models/decoder/_cuda_ctc_decoder.py +1 -1
- torchaudio/models/squim/objective.py +2 -2
- torchaudio/pipelines/_source_separation_pipeline.py +1 -1
- torchaudio/pipelines/_squim_pipeline.py +2 -2
- torchaudio/pipelines/_tts/utils.py +3 -1
- torchaudio/pipelines/rnnt_pipeline.py +4 -4
- torchaudio/transforms/__init__.py +4 -1
- torchaudio/transforms/_transforms.py +4 -3
- torchaudio/utils/__init__.py +2 -9
- torchaudio/utils/download.py +1 -1
- torchaudio/version.py +2 -2
- {torchaudio-2.7.1.dist-info → torchaudio-2.9.0.dist-info}/METADATA +15 -7
- torchaudio-2.9.0.dist-info/RECORD +85 -0
- {torchaudio-2.7.1.dist-info → torchaudio-2.9.0.dist-info}/top_level.txt +0 -1
- torchaudio/_backend/__init__.py +0 -61
- torchaudio/_backend/backend.py +0 -53
- torchaudio/_backend/common.py +0 -52
- torchaudio/_backend/ffmpeg.py +0 -334
- torchaudio/_backend/soundfile.py +0 -54
- torchaudio/_backend/soundfile_backend.py +0 -457
- torchaudio/_backend/sox.py +0 -91
- torchaudio/_backend/utils.py +0 -317
- torchaudio/backend/__init__.py +0 -8
- torchaudio/backend/_no_backend.py +0 -25
- torchaudio/backend/_sox_io_backend.py +0 -294
- torchaudio/backend/common.py +0 -13
- torchaudio/backend/no_backend.py +0 -14
- torchaudio/backend/soundfile_backend.py +0 -14
- torchaudio/backend/sox_io_backend.py +0 -14
- torchaudio/io/__init__.py +0 -13
- torchaudio/io/_effector.py +0 -347
- torchaudio/io/_playback.py +0 -72
- torchaudio/kaldi_io.py +0 -144
- torchaudio/prototype/__init__.py +0 -0
- torchaudio/prototype/datasets/__init__.py +0 -4
- torchaudio/prototype/datasets/musan.py +0 -67
- torchaudio/prototype/functional/__init__.py +0 -26
- torchaudio/prototype/functional/_dsp.py +0 -433
- torchaudio/prototype/functional/_rir.py +0 -379
- torchaudio/prototype/functional/functional.py +0 -190
- torchaudio/prototype/models/__init__.py +0 -36
- torchaudio/prototype/models/_conformer_wav2vec2.py +0 -794
- torchaudio/prototype/models/_emformer_hubert.py +0 -333
- torchaudio/prototype/models/conv_emformer.py +0 -525
- torchaudio/prototype/models/hifi_gan.py +0 -336
- torchaudio/prototype/models/rnnt.py +0 -711
- torchaudio/prototype/models/rnnt_decoder.py +0 -399
- torchaudio/prototype/pipelines/__init__.py +0 -12
- torchaudio/prototype/pipelines/_vggish/__init__.py +0 -3
- torchaudio/prototype/pipelines/_vggish/_vggish_impl.py +0 -233
- torchaudio/prototype/pipelines/_vggish/_vggish_pipeline.py +0 -82
- torchaudio/prototype/pipelines/hifigan_pipeline.py +0 -228
- torchaudio/prototype/pipelines/rnnt_pipeline.py +0 -58
- torchaudio/prototype/transforms/__init__.py +0 -9
- torchaudio/prototype/transforms/_transforms.py +0 -456
- torchaudio/sox_effects/__init__.py +0 -10
- torchaudio/sox_effects/sox_effects.py +0 -272
- torchaudio/utils/ffmpeg_utils.py +0 -11
- torchaudio/utils/sox_utils.py +0 -99
- torchaudio-2.7.1.dist-info/RECORD +0 -144
- torio/__init__.py +0 -8
- torio/_extension/__init__.py +0 -13
- torio/_extension/utils.py +0 -147
- torio/io/__init__.py +0 -9
- torio/io/_streaming_media_decoder.py +0 -978
- torio/io/_streaming_media_encoder.py +0 -502
- torio/lib/__init__.py +0 -0
- torio/lib/_torio_ffmpeg4.pyd +0 -0
- torio/lib/_torio_ffmpeg5.pyd +0 -0
- torio/lib/_torio_ffmpeg6.pyd +0 -0
- torio/lib/libtorio_ffmpeg4.pyd +0 -0
- torio/lib/libtorio_ffmpeg5.pyd +0 -0
- torio/lib/libtorio_ffmpeg6.pyd +0 -0
- torio/utils/__init__.py +0 -4
- torio/utils/ffmpeg_utils.py +0 -247
- {torchaudio-2.7.1.dist-info → torchaudio-2.9.0.dist-info}/WHEEL +0 -0
- {torchaudio-2.7.1.dist-info → torchaudio-2.9.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
2
|
|
|
3
3
|
import math
|
|
4
|
-
import tempfile
|
|
5
4
|
import warnings
|
|
6
5
|
from collections.abc import Sequence
|
|
7
6
|
from typing import List, Optional, Tuple, Union
|
|
@@ -9,7 +8,7 @@ from typing import List, Optional, Tuple, Union
|
|
|
9
8
|
import torch
|
|
10
9
|
import torchaudio
|
|
11
10
|
from torch import Tensor
|
|
12
|
-
from torchaudio._internal.module_utils import
|
|
11
|
+
from torchaudio._internal.module_utils import dropping_support
|
|
13
12
|
|
|
14
13
|
from .filtering import highpass_biquad, treble_biquad
|
|
15
14
|
|
|
@@ -33,7 +32,6 @@ __all__ = [
|
|
|
33
32
|
"mask_along_axis_iid",
|
|
34
33
|
"sliding_window_cmn",
|
|
35
34
|
"spectral_centroid",
|
|
36
|
-
"apply_codec",
|
|
37
35
|
"resample",
|
|
38
36
|
"edit_distance",
|
|
39
37
|
"loudness",
|
|
@@ -816,7 +814,7 @@ def _get_mask_param(mask_param: int, p: float, axis_length: int) -> int:
|
|
|
816
814
|
def mask_along_axis_iid(
|
|
817
815
|
specgrams: Tensor,
|
|
818
816
|
mask_param: int,
|
|
819
|
-
mask_value: float,
|
|
817
|
+
mask_value: Union[float, Tensor],
|
|
820
818
|
axis: int,
|
|
821
819
|
p: float = 1.0,
|
|
822
820
|
) -> Tensor:
|
|
@@ -873,7 +871,12 @@ def mask_along_axis_iid(
|
|
|
873
871
|
|
|
874
872
|
# Per batch example masking
|
|
875
873
|
specgrams = specgrams.transpose(axis, -1)
|
|
876
|
-
|
|
874
|
+
# this aims to avoid CPU-GPU sync from upstream
|
|
875
|
+
specgrams = (
|
|
876
|
+
torch.where((mask >= mask_start) & (mask < mask_end), mask_value.repeat(specgrams.shape), specgrams)
|
|
877
|
+
if isinstance(mask_value, Tensor)
|
|
878
|
+
else specgrams.masked_fill((mask >= mask_start) & (mask < mask_end), mask_value)
|
|
879
|
+
)
|
|
877
880
|
specgrams = specgrams.transpose(axis, -1)
|
|
878
881
|
|
|
879
882
|
return specgrams
|
|
@@ -1295,51 +1298,6 @@ def spectral_centroid(
|
|
|
1295
1298
|
return (freqs * specgram).sum(dim=freq_dim) / specgram.sum(dim=freq_dim)
|
|
1296
1299
|
|
|
1297
1300
|
|
|
1298
|
-
@deprecated("Please migrate to :py:class:`torchaudio.io.AudioEffector`.", remove=False)
|
|
1299
|
-
def apply_codec(
|
|
1300
|
-
waveform: Tensor,
|
|
1301
|
-
sample_rate: int,
|
|
1302
|
-
format: str,
|
|
1303
|
-
channels_first: bool = True,
|
|
1304
|
-
compression: Optional[float] = None,
|
|
1305
|
-
encoding: Optional[str] = None,
|
|
1306
|
-
bits_per_sample: Optional[int] = None,
|
|
1307
|
-
) -> Tensor:
|
|
1308
|
-
r"""
|
|
1309
|
-
Apply codecs as a form of augmentation.
|
|
1310
|
-
|
|
1311
|
-
.. devices:: CPU
|
|
1312
|
-
|
|
1313
|
-
Args:
|
|
1314
|
-
waveform (Tensor): Audio data. Must be 2 dimensional. See also ```channels_first```.
|
|
1315
|
-
sample_rate (int): Sample rate of the audio waveform.
|
|
1316
|
-
format (str): File format.
|
|
1317
|
-
channels_first (bool, optional):
|
|
1318
|
-
When True, both the input and output Tensor have dimension `(channel, time)`.
|
|
1319
|
-
Otherwise, they have dimension `(time, channel)`.
|
|
1320
|
-
compression (float or None, optional): Used for formats other than WAV.
|
|
1321
|
-
For more details see :py:func:`torchaudio.backend.sox_io_backend.save`.
|
|
1322
|
-
encoding (str or None, optional): Changes the encoding for the supported formats.
|
|
1323
|
-
For more details see :py:func:`torchaudio.backend.sox_io_backend.save`.
|
|
1324
|
-
bits_per_sample (int or None, optional): Changes the bit depth for the supported formats.
|
|
1325
|
-
For more details see :py:func:`torchaudio.backend.sox_io_backend.save`.
|
|
1326
|
-
|
|
1327
|
-
Returns:
|
|
1328
|
-
Tensor: Resulting Tensor.
|
|
1329
|
-
If ``channels_first=True``, it has `(channel, time)` else `(time, channel)`.
|
|
1330
|
-
"""
|
|
1331
|
-
from torchaudio.backend import _sox_io_backend
|
|
1332
|
-
|
|
1333
|
-
with tempfile.NamedTemporaryFile() as f:
|
|
1334
|
-
torchaudio.backend._sox_io_backend.save(
|
|
1335
|
-
f.name, waveform, sample_rate, channels_first, compression, format, encoding, bits_per_sample
|
|
1336
|
-
)
|
|
1337
|
-
augmented, sr = _sox_io_backend.load(f.name, channels_first=channels_first, format=format)
|
|
1338
|
-
if sr != sample_rate:
|
|
1339
|
-
augmented = resample(augmented, sr, sample_rate)
|
|
1340
|
-
return augmented
|
|
1341
|
-
|
|
1342
|
-
|
|
1343
1301
|
_CPU = torch.device("cpu")
|
|
1344
1302
|
|
|
1345
1303
|
|
|
@@ -1760,7 +1718,22 @@ def _fix_waveform_shape(
|
|
|
1760
1718
|
return waveform_shift
|
|
1761
1719
|
|
|
1762
1720
|
|
|
1763
|
-
|
|
1721
|
+
class RnntLoss(torch.autograd.Function):
|
|
1722
|
+
@staticmethod
|
|
1723
|
+
def forward(ctx, *args):
|
|
1724
|
+
output, saved = torch.ops.torchaudio.rnnt_loss_forward(*args)
|
|
1725
|
+
ctx.save_for_backward(saved)
|
|
1726
|
+
return output
|
|
1727
|
+
|
|
1728
|
+
@staticmethod
|
|
1729
|
+
def backward(ctx, dy):
|
|
1730
|
+
grad = ctx.saved_tensors[0]
|
|
1731
|
+
grad_out = dy.view((-1, 1, 1, 1))
|
|
1732
|
+
result = grad * grad_out
|
|
1733
|
+
return (result, None, None, None, None, None, None, None)
|
|
1734
|
+
|
|
1735
|
+
|
|
1736
|
+
def _rnnt_loss(
|
|
1764
1737
|
logits: Tensor,
|
|
1765
1738
|
targets: Tensor,
|
|
1766
1739
|
logit_lengths: Tensor,
|
|
@@ -1802,15 +1775,7 @@ def rnnt_loss(
|
|
|
1802
1775
|
if blank < 0: # reinterpret blank index if blank < 0.
|
|
1803
1776
|
blank = logits.shape[-1] + blank
|
|
1804
1777
|
|
|
1805
|
-
costs
|
|
1806
|
-
logits=logits,
|
|
1807
|
-
targets=targets,
|
|
1808
|
-
logit_lengths=logit_lengths,
|
|
1809
|
-
target_lengths=target_lengths,
|
|
1810
|
-
blank=blank,
|
|
1811
|
-
clamp=clamp,
|
|
1812
|
-
fused_log_softmax=fused_log_softmax,
|
|
1813
|
-
)
|
|
1778
|
+
costs = RnntLoss.apply(logits, targets, logit_lengths, target_lengths, blank, clamp, fused_log_softmax)
|
|
1814
1779
|
|
|
1815
1780
|
if reduction == "mean":
|
|
1816
1781
|
return costs.mean()
|
|
@@ -1865,6 +1830,11 @@ def psd(
|
|
|
1865
1830
|
return psd
|
|
1866
1831
|
|
|
1867
1832
|
|
|
1833
|
+
# Expose both deprecated wrapper as well as original because torchscript breaks on
|
|
1834
|
+
# wrapped functions.
|
|
1835
|
+
rnnt_loss = dropping_support(_rnnt_loss)
|
|
1836
|
+
|
|
1837
|
+
|
|
1868
1838
|
def _compute_mat_trace(input: torch.Tensor, dim1: int = -1, dim2: int = -2) -> torch.Tensor:
|
|
1869
1839
|
r"""Compute the trace of a Tensor along ``dim1`` and ``dim2`` dimensions.
|
|
1870
1840
|
|
|
@@ -2494,7 +2464,7 @@ def deemphasis(waveform, coeff: float = 0.97) -> torch.Tensor:
|
|
|
2494
2464
|
"""
|
|
2495
2465
|
a_coeffs = torch.tensor([1.0, -coeff], dtype=waveform.dtype, device=waveform.device)
|
|
2496
2466
|
b_coeffs = torch.tensor([1.0, 0.0], dtype=waveform.dtype, device=waveform.device)
|
|
2497
|
-
return torchaudio.functional.lfilter(waveform, a_coeffs=a_coeffs, b_coeffs=b_coeffs)
|
|
2467
|
+
return torchaudio.functional.filtering.lfilter(waveform, a_coeffs=a_coeffs, b_coeffs=b_coeffs)
|
|
2498
2468
|
|
|
2499
2469
|
|
|
2500
2470
|
def frechet_distance(mu_x, sigma_x, mu_y, sigma_y):
|
torchaudio/lib/_torchaudio.pyd
CHANGED
|
Binary file
|
torchaudio/lib/libtorchaudio.pyd
CHANGED
|
Binary file
|
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
import inspect
|
|
2
|
+
|
|
3
|
+
from torchaudio._internal.module_utils import dropping_class_support, dropping_support
|
|
4
|
+
|
|
1
5
|
_CTC_DECODERS = [
|
|
2
6
|
"CTCHypothesis",
|
|
3
7
|
"CTCDecoder",
|
|
@@ -33,7 +37,21 @@ def __getattr__(name: str):
|
|
|
33
37
|
"To use CUCTC decoder, please set BUILD_CUDA_CTC_DECODER=1 when building from source."
|
|
34
38
|
) from err
|
|
35
39
|
|
|
36
|
-
|
|
40
|
+
# TODO: when all unsupported classes are removed, replace the
|
|
41
|
+
# following if-else block with
|
|
42
|
+
# item = getattr(_cuda_ctc_decoder, name)
|
|
43
|
+
orig_item = getattr(_cuda_ctc_decoder, name)
|
|
44
|
+
if inspect.isclass(orig_item) or (
|
|
45
|
+
# workaround a failure to detect type instances
|
|
46
|
+
# after sphinx autodoc mocking, required for
|
|
47
|
+
# building docs
|
|
48
|
+
getattr(orig_item, "__sphinx_mock__", False)
|
|
49
|
+
and inspect.isclass(orig_item.__class__)
|
|
50
|
+
):
|
|
51
|
+
item = dropping_class_support(orig_item)
|
|
52
|
+
else:
|
|
53
|
+
item = dropping_support(orig_item)
|
|
54
|
+
|
|
37
55
|
globals()[name] = item
|
|
38
56
|
return item
|
|
39
57
|
raise AttributeError(f"module {__name__} has no attribute {name}")
|
|
@@ -25,7 +25,7 @@ from flashlight.lib.text.dictionary import (
|
|
|
25
25
|
Dictionary as _Dictionary,
|
|
26
26
|
load_words as _load_words,
|
|
27
27
|
)
|
|
28
|
-
from torchaudio.utils import
|
|
28
|
+
from torchaudio.utils import _download_asset
|
|
29
29
|
|
|
30
30
|
try:
|
|
31
31
|
from flashlight.lib.text.decoder.kenlm import KenLM as _KenLM
|
|
@@ -69,7 +69,7 @@ def _get_word_dict(lexicon, lm, lm_dict, tokens_dict, unk_word):
|
|
|
69
69
|
|
|
70
70
|
if lexicon and word_dict is None:
|
|
71
71
|
word_dict = _create_word_dict(lexicon)
|
|
72
|
-
elif not lexicon and word_dict is None and type(lm)
|
|
72
|
+
elif not lexicon and word_dict is None and type(lm) is str:
|
|
73
73
|
d = {tokens_dict.get_entry(i): [[tokens_dict.get_entry(i)]] for i in range(tokens_dict.index_size())}
|
|
74
74
|
d[unk_word] = [[unk_word]]
|
|
75
75
|
word_dict = _create_word_dict(d)
|
|
@@ -499,7 +499,7 @@ def ctc_decoder(
|
|
|
499
499
|
# construct word dict and language model
|
|
500
500
|
word_dict = _get_word_dict(lexicon, lm, lm_dict, tokens_dict, unk_word)
|
|
501
501
|
|
|
502
|
-
if type(lm)
|
|
502
|
+
if type(lm) is str:
|
|
503
503
|
if _KenLM is None:
|
|
504
504
|
raise RuntimeError(
|
|
505
505
|
"flashlight-text is installed, but KenLM is not installed. "
|
|
@@ -554,10 +554,10 @@ def download_pretrained_files(model: str) -> _PretrainedFiles:
|
|
|
554
554
|
"""
|
|
555
555
|
|
|
556
556
|
files = _get_filenames(model)
|
|
557
|
-
lexicon_file =
|
|
558
|
-
tokens_file =
|
|
557
|
+
lexicon_file = _download_asset(files.lexicon)
|
|
558
|
+
tokens_file = _download_asset(files.tokens)
|
|
559
559
|
if files.lm is not None:
|
|
560
|
-
lm_file =
|
|
560
|
+
lm_file = _download_asset(files.lm)
|
|
561
561
|
else:
|
|
562
562
|
lm_file = None
|
|
563
563
|
|
|
@@ -181,7 +181,7 @@ def cuda_ctc_decoder(
|
|
|
181
181
|
>>> )
|
|
182
182
|
>>> results = decoder(log_probs, encoder_out_lens) # List of shape (B, nbest) of Hypotheses
|
|
183
183
|
"""
|
|
184
|
-
if type(tokens)
|
|
184
|
+
if type(tokens) is str:
|
|
185
185
|
tokens = _get_vocab_list(tokens)
|
|
186
186
|
|
|
187
187
|
return CUCTCDecoder(vocab_list=tokens, beam_size=beam_size, nbest=nbest, blank_skip_threshold=blank_skip_threshold)
|
|
@@ -285,7 +285,7 @@ def squim_objective_model(
|
|
|
285
285
|
chunk_size: int,
|
|
286
286
|
chunk_stride: Optional[int] = None,
|
|
287
287
|
) -> SquimObjective:
|
|
288
|
-
"""Build a custome :class:`torchaudio.
|
|
288
|
+
"""Build a custome :class:`torchaudio.models.squim.SquimObjective` model.
|
|
289
289
|
|
|
290
290
|
Args:
|
|
291
291
|
feat_dim (int, optional): The feature dimension after Encoder module.
|
|
@@ -313,7 +313,7 @@ def squim_objective_model(
|
|
|
313
313
|
|
|
314
314
|
|
|
315
315
|
def squim_objective_base() -> SquimObjective:
|
|
316
|
-
"""Build :class:`torchaudio.
|
|
316
|
+
"""Build :class:`torchaudio.models.squim.SquimObjective` model with default arguments."""
|
|
317
317
|
return squim_objective_model(
|
|
318
318
|
feat_dim=256,
|
|
319
319
|
win_len=64,
|
|
@@ -52,7 +52,7 @@ class SourceSeparationBundle:
|
|
|
52
52
|
def get_model(self) -> torch.nn.Module:
|
|
53
53
|
"""Construct the model and load the pretrained weight."""
|
|
54
54
|
model = self._model_factory_func()
|
|
55
|
-
path = torchaudio.utils.
|
|
55
|
+
path = torchaudio.utils._download_asset(self._model_path)
|
|
56
56
|
state_dict = torch.load(path)
|
|
57
57
|
model.load_state_dict(state_dict)
|
|
58
58
|
model.eval()
|
|
@@ -50,7 +50,7 @@ class SquimObjectiveBundle:
|
|
|
50
50
|
Variation of :py:class:`~torchaudio.models.SquimObjective`.
|
|
51
51
|
"""
|
|
52
52
|
model = squim_objective_base()
|
|
53
|
-
path = torchaudio.utils.
|
|
53
|
+
path = torchaudio.utils._download_asset(f"models/{self._path}")
|
|
54
54
|
state_dict = torch.load(path, weights_only=True)
|
|
55
55
|
model.load_state_dict(state_dict)
|
|
56
56
|
model.eval()
|
|
@@ -125,7 +125,7 @@ class SquimSubjectiveBundle:
|
|
|
125
125
|
Variation of :py:class:`~torchaudio.models.SquimObjective`.
|
|
126
126
|
"""
|
|
127
127
|
model = squim_subjective_base()
|
|
128
|
-
path = torchaudio.utils.
|
|
128
|
+
path = torchaudio.utils._download_asset(f"models/{self._path}")
|
|
129
129
|
state_dict = torch.load(path, weights_only=True)
|
|
130
130
|
model.load_state_dict(state_dict)
|
|
131
131
|
model.eval()
|
|
@@ -161,6 +161,7 @@ def _load_phonemizer(file, dl_kwargs):
|
|
|
161
161
|
raise RuntimeError("DeepPhonemizer is not installed. Please install it.")
|
|
162
162
|
|
|
163
163
|
from dp.phonemizer import Phonemizer
|
|
164
|
+
from dp.preprocessing.text import LanguageTokenizer, Preprocessor, SequenceTokenizer
|
|
164
165
|
|
|
165
166
|
# By default, dp issues DEBUG level log.
|
|
166
167
|
logger = logging.getLogger("dp")
|
|
@@ -174,7 +175,8 @@ def _load_phonemizer(file, dl_kwargs):
|
|
|
174
175
|
if not os.path.exists(path):
|
|
175
176
|
dl_kwargs = {} if dl_kwargs is None else dl_kwargs
|
|
176
177
|
download_url_to_file(url, path, **dl_kwargs)
|
|
177
|
-
|
|
178
|
+
with torch.serialization.safe_globals([Preprocessor, LanguageTokenizer, SequenceTokenizer]):
|
|
179
|
+
return Phonemizer.from_checkpoint(path)
|
|
178
180
|
finally:
|
|
179
181
|
logger.setLevel(orig_level)
|
|
180
182
|
|
|
@@ -244,7 +244,7 @@ class RNNTBundle:
|
|
|
244
244
|
|
|
245
245
|
def _get_model(self) -> RNNT:
|
|
246
246
|
model = self._rnnt_factory_func()
|
|
247
|
-
path = torchaudio.utils.
|
|
247
|
+
path = torchaudio.utils._download_asset(self._rnnt_path)
|
|
248
248
|
state_dict = torch.load(path)
|
|
249
249
|
model.load_state_dict(state_dict)
|
|
250
250
|
model.eval()
|
|
@@ -313,7 +313,7 @@ class RNNTBundle:
|
|
|
313
313
|
Returns:
|
|
314
314
|
FeatureExtractor
|
|
315
315
|
"""
|
|
316
|
-
local_path = torchaudio.utils.
|
|
316
|
+
local_path = torchaudio.utils._download_asset(self._global_stats_path)
|
|
317
317
|
return _ModuleFeatureExtractor(
|
|
318
318
|
torch.nn.Sequential(
|
|
319
319
|
torchaudio.transforms.MelSpectrogram(
|
|
@@ -332,7 +332,7 @@ class RNNTBundle:
|
|
|
332
332
|
Returns:
|
|
333
333
|
FeatureExtractor
|
|
334
334
|
"""
|
|
335
|
-
local_path = torchaudio.utils.
|
|
335
|
+
local_path = torchaudio.utils._download_asset(self._global_stats_path)
|
|
336
336
|
return _ModuleFeatureExtractor(
|
|
337
337
|
torch.nn.Sequential(
|
|
338
338
|
torchaudio.transforms.MelSpectrogram(
|
|
@@ -350,7 +350,7 @@ class RNNTBundle:
|
|
|
350
350
|
Returns:
|
|
351
351
|
TokenProcessor
|
|
352
352
|
"""
|
|
353
|
-
local_path = torchaudio.utils.
|
|
353
|
+
local_path = torchaudio.utils._download_asset(self._sp_model_path)
|
|
354
354
|
return _SentencePieceTokenProcessor(local_path)
|
|
355
355
|
|
|
356
356
|
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
from torchaudio._internal.module_utils import dropping_class_support
|
|
2
|
+
|
|
1
3
|
from ._multi_channel import MVDR, PSD, RTFMVDR, SoudenMVDR
|
|
2
4
|
from ._transforms import (
|
|
3
5
|
AddNoise,
|
|
@@ -21,7 +23,7 @@ from ._transforms import (
|
|
|
21
23
|
PitchShift,
|
|
22
24
|
Preemphasis,
|
|
23
25
|
Resample,
|
|
24
|
-
RNNTLoss,
|
|
26
|
+
RNNTLoss as _RNNTLoss,
|
|
25
27
|
SlidingWindowCmn,
|
|
26
28
|
SpecAugment,
|
|
27
29
|
SpectralCentroid,
|
|
@@ -34,6 +36,7 @@ from ._transforms import (
|
|
|
34
36
|
Vol,
|
|
35
37
|
)
|
|
36
38
|
|
|
39
|
+
RNNTLoss = dropping_class_support(_RNNTLoss)
|
|
37
40
|
|
|
38
41
|
__all__ = [
|
|
39
42
|
"AddNoise",
|
|
@@ -15,6 +15,7 @@ from torchaudio.functional.functional import (
|
|
|
15
15
|
_check_convolve_mode,
|
|
16
16
|
_fix_waveform_shape,
|
|
17
17
|
_get_sinc_resample_kernel,
|
|
18
|
+
_rnnt_loss,
|
|
18
19
|
_stretch_waveform,
|
|
19
20
|
)
|
|
20
21
|
|
|
@@ -1184,7 +1185,7 @@ class _AxisMasking(torch.nn.Module):
|
|
|
1184
1185
|
self.iid_masks = iid_masks
|
|
1185
1186
|
self.p = p
|
|
1186
1187
|
|
|
1187
|
-
def forward(self, specgram: Tensor, mask_value: float = 0.0) -> Tensor:
|
|
1188
|
+
def forward(self, specgram: Tensor, mask_value: Union[float, torch.Tensor] = 0.0) -> Tensor:
|
|
1188
1189
|
r"""
|
|
1189
1190
|
Args:
|
|
1190
1191
|
specgram (Tensor): Tensor of dimension `(..., freq, time)`.
|
|
@@ -1846,7 +1847,7 @@ class RNNTLoss(torch.nn.Module):
|
|
|
1846
1847
|
Tensor: Loss with the reduction option applied. If ``reduction`` is ``"none"``, then size (batch),
|
|
1847
1848
|
otherwise scalar.
|
|
1848
1849
|
"""
|
|
1849
|
-
return
|
|
1850
|
+
return _rnnt_loss(
|
|
1850
1851
|
logits,
|
|
1851
1852
|
targets,
|
|
1852
1853
|
logit_lengths,
|
|
@@ -2134,4 +2135,4 @@ class Deemphasis(torch.nn.Module):
|
|
|
2134
2135
|
Returns:
|
|
2135
2136
|
torch.Tensor: De-emphasized waveform, with shape `(..., N)`.
|
|
2136
2137
|
"""
|
|
2137
|
-
return F.deemphasis(waveform, coeff=self.coeff)
|
|
2138
|
+
return F.functional.deemphasis(waveform, coeff=self.coeff)
|
torchaudio/utils/__init__.py
CHANGED
torchaudio/utils/download.py
CHANGED
torchaudio/version.py
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
__version__ = '2.
|
|
2
|
-
git_version = '
|
|
1
|
+
__version__ = '2.9.0+cpu'
|
|
2
|
+
git_version = 'eaa9e4e4dd413dca1084116581dc84fad403db3b'
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: torchaudio
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.9.0
|
|
4
4
|
Summary: An audio package for PyTorch
|
|
5
5
|
Home-page: https://github.com/pytorch/audio
|
|
6
6
|
Author: Soumith Chintala, David Pollack, Sean Naren, Peter Goldsborough, Moto Hira, Caroline Chen, Jeff Hwang, Zhaoheng Ni, Xiaohui Zhang
|
|
@@ -15,17 +15,17 @@ Classifier: Operating System :: MacOS :: MacOS X
|
|
|
15
15
|
Classifier: Operating System :: Microsoft :: Windows
|
|
16
16
|
Classifier: Operating System :: POSIX
|
|
17
17
|
Classifier: Programming Language :: C++
|
|
18
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
19
18
|
Classifier: Programming Language :: Python :: 3.10
|
|
20
19
|
Classifier: Programming Language :: Python :: 3.11
|
|
21
20
|
Classifier: Programming Language :: Python :: 3.12
|
|
22
21
|
Classifier: Programming Language :: Python :: 3.13
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
23
23
|
Classifier: Programming Language :: Python :: Implementation :: CPython
|
|
24
24
|
Classifier: Topic :: Multimedia :: Sound/Audio
|
|
25
25
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
26
26
|
Description-Content-Type: text/markdown
|
|
27
27
|
License-File: LICENSE
|
|
28
|
-
Requires-Dist: torch==2.
|
|
28
|
+
Requires-Dist: torch==2.9.0
|
|
29
29
|
Dynamic: author
|
|
30
30
|
Dynamic: author-email
|
|
31
31
|
Dynamic: classifier
|
|
@@ -47,6 +47,17 @@ torchaudio: an audio library for PyTorch
|
|
|
47
47
|
|
|
48
48
|

|
|
49
49
|
|
|
50
|
+
> [!NOTE]
|
|
51
|
+
> **We have transitioned TorchAudio into a
|
|
52
|
+
> maintenance phase. This process removed some user-facing
|
|
53
|
+
> features. These features were deprecated from TorchAudio 2.8 and removed in 2.9.
|
|
54
|
+
> Our main goals were to reduce redundancies with the rest of the
|
|
55
|
+
> PyTorch ecosystem, make it easier to maintain, and create a version of
|
|
56
|
+
> TorchAudio that is more tightly scoped to its strengths: processing audio
|
|
57
|
+
> data for ML. Please see
|
|
58
|
+
> [our community message](https://github.com/pytorch/audio/issues/3902)
|
|
59
|
+
> for more details.**
|
|
60
|
+
|
|
50
61
|
The aim of torchaudio is to apply [PyTorch](https://github.com/pytorch/pytorch) to
|
|
51
62
|
the audio domain. By supporting PyTorch, torchaudio follows the same philosophy
|
|
52
63
|
of providing strong GPU acceleration, having a focus on trainable features through
|
|
@@ -56,9 +67,6 @@ processing library. The benefits of PyTorch can be seen in torchaudio through
|
|
|
56
67
|
having all the computations be through PyTorch operations which makes it easy
|
|
57
68
|
to use and feel like a natural extension.
|
|
58
69
|
|
|
59
|
-
- [Support audio I/O (Load files, Save files)](http://pytorch.org/audio/main/)
|
|
60
|
-
- Load a variety of audio formats, such as `wav`, `mp3`, `ogg`, `flac`, `opus`, `sphere`, into a torch Tensor using SoX
|
|
61
|
-
- [Kaldi (ark/scp)](http://pytorch.org/audio/main/kaldi_io.html)
|
|
62
70
|
- [Dataloaders for common audio datasets](http://pytorch.org/audio/main/datasets.html)
|
|
63
71
|
- Audio and speech processing functions
|
|
64
72
|
- [forced_align](https://pytorch.org/audio/main/generated/torchaudio.functional.forced_align.html)
|
|
@@ -99,7 +107,7 @@ If you find this package useful, please cite as:
|
|
|
99
107
|
|
|
100
108
|
```bibtex
|
|
101
109
|
@misc{hwang2023torchaudio,
|
|
102
|
-
title={TorchAudio 2.1: Advancing speech recognition, self-supervised learning, and audio processing components for PyTorch},
|
|
110
|
+
title={TorchAudio 2.1: Advancing speech recognition, self-supervised learning, and audio processing components for PyTorch},
|
|
103
111
|
author={Jeff Hwang and Moto Hira and Caroline Chen and Xiaohui Zhang and Zhaoheng Ni and Guangzhi Sun and Pingchuan Ma and Ruizhe Huang and Vineel Pratap and Yuekai Zhang and Anurag Kumar and Chin-Yun Yu and Chuang Zhu and Chunxi Liu and Jacob Kahn and Mirco Ravanelli and Peng Sun and Shinji Watanabe and Yangyang Shi and Yumeng Tao and Robin Scheibler and Samuele Cornell and Sean Kim and Stavros Petridis},
|
|
104
112
|
year={2023},
|
|
105
113
|
eprint={2310.17864},
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
torchaudio/__init__.py,sha256=-yWZZVblWA06HQ4cUS8sRsaSZugqacqw0A7vQELxjYE,8082
|
|
2
|
+
torchaudio/_torchcodec.py,sha256=2saifA0BdhE12Zb51vgS6zrwsE1ir7mmvmJ1lFOWtLI,13764
|
|
3
|
+
torchaudio/version.py,sha256=UZrgLp-AqA1uCSaLgWb8hq-wCqJ8Pz6Pe5opbKJNYKQ,85
|
|
4
|
+
torchaudio/_extension/__init__.py,sha256=j7wdZTgwGv6PcQgS1kMisbDA-M4emX3gheOSmjq_jWs,1966
|
|
5
|
+
torchaudio/_extension/utils.py,sha256=kQ_PyLToNuPjLKOQa_-tT1LpFowcGQ0lpcuzrRPrmb8,5059
|
|
6
|
+
torchaudio/_internal/__init__.py,sha256=80cpJfTS8977YYrU3q5p4DRAGAkqEJrmG9Lq2hEDpoo,251
|
|
7
|
+
torchaudio/_internal/module_utils.py,sha256=sXO16_5rS9c67LlADALR16k3HcZo9dHyZ-y_L0zFnnY,5400
|
|
8
|
+
torchaudio/compliance/__init__.py,sha256=JNH_-dTQVmm55YwcVMuVvUYFWdXhGn4C__9S8IUsNoU,53
|
|
9
|
+
torchaudio/compliance/kaldi.py,sha256=bS7qJgS3k8FK1RkMiNEoP3q0xhjeV_V4RHQ9jo_rqOM,37479
|
|
10
|
+
torchaudio/datasets/__init__.py,sha256=hdHldm3OzoQLbI0kHj8tLxqwDhzMfedq0_t1kAK7ORg,1218
|
|
11
|
+
torchaudio/datasets/cmuarctic.py,sha256=c7c75817_brmb7cvFO6_Bj249cJDph9LDBOqs8aUyhM,7238
|
|
12
|
+
torchaudio/datasets/cmudict.py,sha256=_9vTz7_8BFVrcHeA61_-h2XLOl6IsdWCptkMWziOW7U,6176
|
|
13
|
+
torchaudio/datasets/commonvoice.py,sha256=OcFn-nG4YfBIz0YIpH91xH9rFka8yFJmrxy4vFZkC4I,2849
|
|
14
|
+
torchaudio/datasets/dr_vctk.py,sha256=Ayf85prDNr1LcWQ4bysVWdRVPry2JALjv6Mtq-6iBpY,4498
|
|
15
|
+
torchaudio/datasets/fluentcommands.py,sha256=KnmH1Y28k5PhqQX6eV-75MqwTRxiHSUUcvAsa-K954s,3353
|
|
16
|
+
torchaudio/datasets/gtzan.py,sha256=kt25Ly9qDGuiiVXgsXhS05tGi6laRhRko81-BQ4sZ-w,25475
|
|
17
|
+
torchaudio/datasets/iemocap.py,sha256=ZMMG_FpcWcMHEbhuRYRQaUWi_DoegjxCrnVyCg5EEVE,5077
|
|
18
|
+
torchaudio/datasets/librilight_limited.py,sha256=iwZBlSKVLrXzhZvaqjuVRGO6czxX4fpdzd8wWe5feWQ,4290
|
|
19
|
+
torchaudio/datasets/librimix.py,sha256=AncE671AOl04dRPsajNZW-ZxxI_PwA2sjBftdBg4Q-k,5249
|
|
20
|
+
torchaudio/datasets/librispeech.py,sha256=ys769I0UzG07UEmyZ_KDwATh4yc08hFUuCayK8tYIGg,6482
|
|
21
|
+
torchaudio/datasets/librispeech_biasing.py,sha256=KEGplRU_wpgb0VqrT-t42kvtC7lg4uMssZcosVvvPhg,7147
|
|
22
|
+
torchaudio/datasets/libritts.py,sha256=91Ep2Mq3OySre25GniXBLmRzTwEPiKmMaqXnzirn0xY,6038
|
|
23
|
+
torchaudio/datasets/ljspeech.py,sha256=l09BSBQH76I-LhYkIRF0u18tTi-4yysaF4gj2GSZaxw,3601
|
|
24
|
+
torchaudio/datasets/musdb_hq.py,sha256=FVlKsGEBHiT50y9GLswnt2QFph2PjiI6yCy1MxiG6f8,5214
|
|
25
|
+
torchaudio/datasets/quesst14.py,sha256=3y6H3T3g78jkDqca8jORQBOViZhH1RhlsfuY8HJ2OcU,4591
|
|
26
|
+
torchaudio/datasets/snips.py,sha256=mwVc5KsbMlPQJ87eyYgjnQ5S4EFXoQvm13dO0rXpJuE,5165
|
|
27
|
+
torchaudio/datasets/speechcommands.py,sha256=_wmrKSiEe0COO7uk0JVXypBmNxu0urnceHuFQ6zMOk0,7664
|
|
28
|
+
torchaudio/datasets/tedlium.py,sha256=UQZUaeUqmFntZWcH9HXOpGeW6tsCcG81bPjX2_CWxbg,8916
|
|
29
|
+
torchaudio/datasets/utils.py,sha256=mpg4t0hFitRGj9Ow7MXwCFNKGTnVsErVLpxfsbP7FE8,1757
|
|
30
|
+
torchaudio/datasets/vctk.py,sha256=vN_VzxTLyHW11I_rzfzMVA3h5JW917FaU3NCnR-zcL0,5842
|
|
31
|
+
torchaudio/datasets/voxceleb1.py,sha256=JlYkbyYOAFUFhGLULe3lgucANWf_G7qGqw47YjiX2IM,12034
|
|
32
|
+
torchaudio/datasets/yesno.py,sha256=B3hRNUazvB8V8SwOUlQzliB9vI9gMkl9SEl-dZ4PEaw,3115
|
|
33
|
+
torchaudio/functional/__init__.py,sha256=do2OUOUhg_8Z7TPUQ1HHpoWjNAPrwgxDIemk718TWO0,2581
|
|
34
|
+
torchaudio/functional/_alignment.py,sha256=P2ehTZ7IwuMFWVNqrhYjc1imBKNykwC03D7uvbgxBCA,4867
|
|
35
|
+
torchaudio/functional/filtering.py,sha256=piUbVknBOBdILrd1M9bzk2A9UCCn4qzKXNEgv7IYD7Q,64010
|
|
36
|
+
torchaudio/functional/functional.py,sha256=c-jSGnLx54qnQk5efZiLrioi5x1-2LIQF3P2lvLPPPo,97236
|
|
37
|
+
torchaudio/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
38
|
+
torchaudio/lib/_torchaudio.pyd,sha256=Vcw2IS4O7DRE-FXSTmY1eECrMj89X6yQOjOh0xcFC9Y,826880
|
|
39
|
+
torchaudio/lib/libtorchaudio.pyd,sha256=irBrmf1jbLJZtkxCK2tEcEppSR4JxFbNaZYdjTk7lKA,923648
|
|
40
|
+
torchaudio/models/__init__.py,sha256=Gi3UQvxjwTLW9wfKlF42O3Vup70d0bk2x-rZS89ASwI,2080
|
|
41
|
+
torchaudio/models/_hdemucs.py,sha256=ipAj7965PO_WEZqQwW1om9gQj90UhQOeU6HU3Lpvzwo,39250
|
|
42
|
+
torchaudio/models/conformer.py,sha256=gVrOYeJkPlVaX-4eZpVzNUe_r3k7g1Y6NaaQ8JZP-r4,10361
|
|
43
|
+
torchaudio/models/conv_tasnet.py,sha256=D7Y10sOzLe03gygfN1J5R73SIHkIGVQOkqKQ6Ni3o_s,12870
|
|
44
|
+
torchaudio/models/deepspeech.py,sha256=nVYc2xwWpFO6gu5CR0mbqLiAzJn8lAfHcdcP92i22mo,2830
|
|
45
|
+
torchaudio/models/emformer.py,sha256=WbaeZcrPFOOLn4igqweE0AfuF_SQZpqg7XPGEhl7C8c,38650
|
|
46
|
+
torchaudio/models/rnnt.py,sha256=PNJpZd3vH6wRq8TEf4UlPtVHbte9wOJ-bRMEug6gp08,36357
|
|
47
|
+
torchaudio/models/rnnt_decoder.py,sha256=CBBMZhhq5Bgax0_3p3SZD-Os3S1LFHB91oTgVED4bmY,13178
|
|
48
|
+
torchaudio/models/tacotron2.py,sha256=mZ5lLSa75oqc0hgkc3sIm5_gK-knhtgX3dmg9-oLQao,46960
|
|
49
|
+
torchaudio/models/wav2letter.py,sha256=oetxpH5RG0TadYB75IOmYOrnraaPvSlcSNpRZb2FE_A,3350
|
|
50
|
+
torchaudio/models/wavernn.py,sha256=LRgL36jA6WzI1PAzBY6P52oCMGSTOraXB8fEgkwpSxw,15855
|
|
51
|
+
torchaudio/models/decoder/__init__.py,sha256=PonG1Rg0CRBBbmRLZZQ1n2rXiDhivAAU9x67_G15seI,1963
|
|
52
|
+
torchaudio/models/decoder/_ctc_decoder.py,sha256=zKsOdPNrUn7v2QJmluC3kOp90RQaP3CSmQSurc1nAFw,20654
|
|
53
|
+
torchaudio/models/decoder/_cuda_ctc_decoder.py,sha256=4JKcQak4Ke6Id0EJEDJEx1yLTXKbJpIDNiu7QSe3gWU,7373
|
|
54
|
+
torchaudio/models/squim/__init__.py,sha256=eQox8kPviOthKulpzZvPK0a66NHW7MzYE4aOF7va_kU,357
|
|
55
|
+
torchaudio/models/squim/objective.py,sha256=FCYu0i2OXY3e6Z-BO2p-rc6rU0PvpJZ0gA-CPZZA9fw,12607
|
|
56
|
+
torchaudio/models/squim/subjective.py,sha256=1_gK9O3nvrjiikpP46IdsMzKduSTt91kKklA69wQqiw,5947
|
|
57
|
+
torchaudio/models/wav2vec2/__init__.py,sha256=j5FdQFfuIpdIKYwoMLop4Ba70GGoS-lK61tU-oNG5wg,972
|
|
58
|
+
torchaudio/models/wav2vec2/components.py,sha256=EzmuGc5qHVPrHCGqYVHTvdjqP2gCrBfnHSoTK9GsZ1w,48244
|
|
59
|
+
torchaudio/models/wav2vec2/model.py,sha256=kP6QKsF1PjleyUMhaPjydi0pCRy4GGUArRWBzfDJmdE,61671
|
|
60
|
+
torchaudio/models/wav2vec2/wavlm_attention.py,sha256=iYde9grsb_RaEs87FI5ykyN3z0Ix1plqpsMNvakAiWM,11058
|
|
61
|
+
torchaudio/models/wav2vec2/utils/__init__.py,sha256=1eowaOEKRbp7JajFNv_r47REJqnMmXidukS7Mrwp_5Q,188
|
|
62
|
+
torchaudio/models/wav2vec2/utils/import_fairseq.py,sha256=so7T-otDNCsTUtzJRUFFGWyd0caWl3RY_UbFMxJ4DJE,9411
|
|
63
|
+
torchaudio/models/wav2vec2/utils/import_huggingface.py,sha256=NMK6YrAIDfOw8j1tV-3XTwx_mwbJHvg8ldTrAWRztIM,6080
|
|
64
|
+
torchaudio/pipelines/__init__.py,sha256=oMwOu-1T_ugJmhdaoI5NrCDrUAGrpDOlJQO8h-bLAW4,2847
|
|
65
|
+
torchaudio/pipelines/_source_separation_pipeline.py,sha256=ttHqjcwCmCPWLj0YeDsTa1-XetuyjPDZ9D2deE3FmkA,4334
|
|
66
|
+
torchaudio/pipelines/_squim_pipeline.py,sha256=eYdrKVXUru3VdfpaDnMN5qCuKHNveEd_jwGqtemV9ls,6438
|
|
67
|
+
torchaudio/pipelines/rnnt_pipeline.py,sha256=16OMN_4yY1TEKLWjqkzFSMKByITxLobj6X1uk78pwQI,14133
|
|
68
|
+
torchaudio/pipelines/_tts/__init__.py,sha256=WKc5c06b_M9MvEohJZghJJWAL7vXvfwRIkdy85UCh04,442
|
|
69
|
+
torchaudio/pipelines/_tts/impl.py,sha256=wwrTyTEEkew22AnzB_ZklapGaAstJSUBawhA7bOcGXM,15759
|
|
70
|
+
torchaudio/pipelines/_tts/interface.py,sha256=y1mU0446Vy2hHpCwMqRZt1UI4ZXl-C4tJp92EylwHh0,10479
|
|
71
|
+
torchaudio/pipelines/_tts/utils.py,sha256=tuiEA5eqoBNgt46TxGA7lOEqljbuECL0-pc_uSco0xo,5040
|
|
72
|
+
torchaudio/pipelines/_wav2vec2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
73
|
+
torchaudio/pipelines/_wav2vec2/aligner.py,sha256=HOcthFgup97QMx9ZXCmkv6jdw_zxdRT-e_SilXEujNU,2796
|
|
74
|
+
torchaudio/pipelines/_wav2vec2/impl.py,sha256=I6htNo4Wt5LPxX9Z8rmxarFE8BZOZBUFIU9T9k1k2Po,67260
|
|
75
|
+
torchaudio/pipelines/_wav2vec2/utils.py,sha256=CVawfXmVGWY8mj-_6r4KO907BpF67WAVWHEHhycFIaM,7317
|
|
76
|
+
torchaudio/transforms/__init__.py,sha256=TsmUD7pXQO940uG0GhFTuMB48PT6uOklN5ptd-Yut14,1476
|
|
77
|
+
torchaudio/transforms/_multi_channel.py,sha256=Musw7dTu25HNjKeIcKHUDuqBmj_GC2e3TaakqJcffW8,22688
|
|
78
|
+
torchaudio/transforms/_transforms.py,sha256=g-E3nGgCEcKeWqEtyrDquSKfecHMD8olJRUMnqHHWYI,89057
|
|
79
|
+
torchaudio/utils/__init__.py,sha256=yNMWIjoGd68FPxV6PhDdjO1oRemlM0QPJsu_k6iVaGQ,74
|
|
80
|
+
torchaudio/utils/download.py,sha256=rf_yS18i7n4JYbIGpWiWc0ipe4sGv3Rvivv6p0DaZgU,2972
|
|
81
|
+
torchaudio-2.9.0.dist-info/METADATA,sha256=Nu1BcomsFTqaoMK8arCV6-o4AEnU86n_TsLPFOzn--A,6911
|
|
82
|
+
torchaudio-2.9.0.dist-info/WHEEL,sha256=wtBQCQglBFOecpGS5EGnKK_Vb7B0KxTvBpQoiSy-jL0,101
|
|
83
|
+
torchaudio-2.9.0.dist-info/top_level.txt,sha256=mPKWMIRWWW2JwbJN6wRckeN1gpbjhifapAF0Z9t7SMo,11
|
|
84
|
+
torchaudio-2.9.0.dist-info/RECORD,,
|
|
85
|
+
torchaudio-2.9.0.dist-info/licenses/LICENSE,sha256=MmOOF5kxv-VR6r9nsOZ6E7SD4Wa1jdcmNjSrf4nzlvU,1363
|
torchaudio/_backend/__init__.py
DELETED
|
@@ -1,61 +0,0 @@
|
|
|
1
|
-
from typing import List, Optional
|
|
2
|
-
|
|
3
|
-
from torchaudio._internal.module_utils import deprecated
|
|
4
|
-
|
|
5
|
-
from . import utils
|
|
6
|
-
from .common import AudioMetaData
|
|
7
|
-
|
|
8
|
-
__all__ = [
|
|
9
|
-
"AudioMetaData",
|
|
10
|
-
"load",
|
|
11
|
-
"info",
|
|
12
|
-
"save",
|
|
13
|
-
"list_audio_backends",
|
|
14
|
-
"get_audio_backend",
|
|
15
|
-
"set_audio_backend",
|
|
16
|
-
]
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
info = utils.get_info_func()
|
|
20
|
-
load = utils.get_load_func()
|
|
21
|
-
save = utils.get_save_func()
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
def list_audio_backends() -> List[str]:
|
|
25
|
-
"""List available backends
|
|
26
|
-
|
|
27
|
-
Returns:
|
|
28
|
-
list of str: The list of available backends.
|
|
29
|
-
|
|
30
|
-
The possible values are; ``"ffmpeg"``, ``"sox"`` and ``"soundfile"``.
|
|
31
|
-
"""
|
|
32
|
-
|
|
33
|
-
return list(utils.get_available_backends().keys())
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
# Temporary until global backend is removed
|
|
37
|
-
@deprecated("With dispatcher enabled, this function is no-op. You can remove the function call.")
|
|
38
|
-
def get_audio_backend() -> Optional[str]:
|
|
39
|
-
"""Get the name of the current global backend
|
|
40
|
-
|
|
41
|
-
Returns:
|
|
42
|
-
str or None:
|
|
43
|
-
If dispatcher mode is enabled, returns ``None`` otherwise,
|
|
44
|
-
the name of current backend or ``None`` (no backend is set).
|
|
45
|
-
"""
|
|
46
|
-
return None
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
# Temporary until global backend is removed
|
|
50
|
-
@deprecated("With dispatcher enabled, this function is no-op. You can remove the function call.")
|
|
51
|
-
def set_audio_backend(backend: Optional[str]): # noqa
|
|
52
|
-
"""Set the global backend.
|
|
53
|
-
|
|
54
|
-
This is a no-op when dispatcher mode is enabled.
|
|
55
|
-
|
|
56
|
-
Args:
|
|
57
|
-
backend (str or None): Name of the backend.
|
|
58
|
-
One of ``"sox_io"`` or ``"soundfile"`` based on availability
|
|
59
|
-
of the system. If ``None`` is provided the current backend is unassigned.
|
|
60
|
-
"""
|
|
61
|
-
pass
|