PyPI - torchaudio - Versions diffs - 2.8.0__cp313-cp313-win_amd64.whl → 2.9.0__cp313-cp313-win_amd64.whl - Mend

torchaudio 2.8.0__cp313-cp313-win_amd64.whl → 2.9.0__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of torchaudio might be problematic. Click here for more details.

Files changed (92) hide show

torchaudio/__init__.py +179 -39
torchaudio/_extension/__init__.py +1 -14
torchaudio/_extension/utils.py +0 -47
torchaudio/_internal/module_utils.py +12 -3
torchaudio/_torchcodec.py +73 -85
torchaudio/datasets/cmuarctic.py +1 -1
torchaudio/datasets/utils.py +1 -1
torchaudio/functional/__init__.py +0 -2
torchaudio/functional/_alignment.py +1 -1
torchaudio/functional/filtering.py +70 -55
torchaudio/functional/functional.py +26 -60
torchaudio/lib/_torchaudio.pyd +0 -0
torchaudio/lib/libtorchaudio.pyd +0 -0
torchaudio/models/decoder/__init__.py +14 -2
torchaudio/models/decoder/_ctc_decoder.py +6 -6
torchaudio/models/decoder/_cuda_ctc_decoder.py +1 -1
torchaudio/models/squim/objective.py +2 -2
torchaudio/pipelines/_source_separation_pipeline.py +1 -1
torchaudio/pipelines/_squim_pipeline.py +2 -2
torchaudio/pipelines/_tts/utils.py +1 -1
torchaudio/pipelines/rnnt_pipeline.py +4 -4
torchaudio/transforms/__init__.py +1 -0
torchaudio/transforms/_transforms.py +2 -2
torchaudio/utils/__init__.py +2 -9
torchaudio/utils/download.py +1 -3
torchaudio/version.py +2 -2
{torchaudio-2.8.0.dist-info → torchaudio-2.9.0.dist-info}/METADATA +8 -11
torchaudio-2.9.0.dist-info/RECORD +85 -0
{torchaudio-2.8.0.dist-info → torchaudio-2.9.0.dist-info}/top_level.txt +0 -1
torchaudio/_backend/__init__.py +0 -61
torchaudio/_backend/backend.py +0 -53
torchaudio/_backend/common.py +0 -52
torchaudio/_backend/ffmpeg.py +0 -334
torchaudio/_backend/soundfile.py +0 -54
torchaudio/_backend/soundfile_backend.py +0 -457
torchaudio/_backend/sox.py +0 -91
torchaudio/_backend/utils.py +0 -350
torchaudio/backend/__init__.py +0 -8
torchaudio/backend/_no_backend.py +0 -25
torchaudio/backend/_sox_io_backend.py +0 -294
torchaudio/backend/common.py +0 -13
torchaudio/backend/no_backend.py +0 -14
torchaudio/backend/soundfile_backend.py +0 -14
torchaudio/backend/sox_io_backend.py +0 -14
torchaudio/io/__init__.py +0 -20
torchaudio/io/_effector.py +0 -347
torchaudio/io/_playback.py +0 -72
torchaudio/kaldi_io.py +0 -150
torchaudio/prototype/__init__.py +0 -0
torchaudio/prototype/datasets/__init__.py +0 -4
torchaudio/prototype/datasets/musan.py +0 -68
torchaudio/prototype/functional/__init__.py +0 -26
torchaudio/prototype/functional/_dsp.py +0 -441
torchaudio/prototype/functional/_rir.py +0 -382
torchaudio/prototype/functional/functional.py +0 -193
torchaudio/prototype/models/__init__.py +0 -39
torchaudio/prototype/models/_conformer_wav2vec2.py +0 -801
torchaudio/prototype/models/_emformer_hubert.py +0 -337
torchaudio/prototype/models/conv_emformer.py +0 -529
torchaudio/prototype/models/hifi_gan.py +0 -342
torchaudio/prototype/models/rnnt.py +0 -717
torchaudio/prototype/models/rnnt_decoder.py +0 -402
torchaudio/prototype/pipelines/__init__.py +0 -21
torchaudio/prototype/pipelines/_vggish/__init__.py +0 -7
torchaudio/prototype/pipelines/_vggish/_vggish_impl.py +0 -236
torchaudio/prototype/pipelines/_vggish/_vggish_pipeline.py +0 -83
torchaudio/prototype/pipelines/hifigan_pipeline.py +0 -233
torchaudio/prototype/pipelines/rnnt_pipeline.py +0 -58
torchaudio/prototype/transforms/__init__.py +0 -9
torchaudio/prototype/transforms/_transforms.py +0 -461
torchaudio/sox_effects/__init__.py +0 -10
torchaudio/sox_effects/sox_effects.py +0 -275
torchaudio/utils/ffmpeg_utils.py +0 -11
torchaudio/utils/sox_utils.py +0 -118
torchaudio-2.8.0.dist-info/RECORD +0 -145
torio/__init__.py +0 -8
torio/_extension/__init__.py +0 -13
torio/_extension/utils.py +0 -147
torio/io/__init__.py +0 -9
torio/io/_streaming_media_decoder.py +0 -977
torio/io/_streaming_media_encoder.py +0 -502
torio/lib/__init__.py +0 -0
torio/lib/_torio_ffmpeg4.pyd +0 -0
torio/lib/_torio_ffmpeg5.pyd +0 -0
torio/lib/_torio_ffmpeg6.pyd +0 -0
torio/lib/libtorio_ffmpeg4.pyd +0 -0
torio/lib/libtorio_ffmpeg5.pyd +0 -0
torio/lib/libtorio_ffmpeg6.pyd +0 -0
torio/utils/__init__.py +0 -4
torio/utils/ffmpeg_utils.py +0 -275
{torchaudio-2.8.0.dist-info → torchaudio-2.9.0.dist-info}/WHEEL +0 -0
{torchaudio-2.8.0.dist-info → torchaudio-2.9.0.dist-info}/licenses/LICENSE +0 -0

torchaudio/functional/functional.py CHANGED Viewed

@@ -1,7 +1,6 @@
 # -*- coding: utf-8 -*-
 import math
-import tempfile
 import warnings
 from collections.abc import Sequence
 from typing import List, Optional, Tuple, Union
@@ -9,8 +8,7 @@ from typing import List, Optional, Tuple, Union
 import torch
 import torchaudio
 from torch import Tensor
-from torchaudio._internal.module_utils import deprecated, dropping_support
+from torchaudio._internal.module_utils import dropping_support
 from .filtering import highpass_biquad, treble_biquad
@@ -34,7 +32,6 @@ __all__ = [
     "mask_along_axis_iid",
     "sliding_window_cmn",
     "spectral_centroid",
-    "apply_codec",
     "resample",
     "edit_distance",
     "loudness",
@@ -817,7 +814,7 @@ def _get_mask_param(mask_param: int, p: float, axis_length: int) -> int:
 def mask_along_axis_iid(
     specgrams: Tensor,
     mask_param: int,
-    mask_value: float,
+    mask_value: Union[float, Tensor],
     axis: int,
     p: float = 1.0,
 ) -> Tensor:
@@ -874,7 +871,12 @@ def mask_along_axis_iid(
     # Per batch example masking
     specgrams = specgrams.transpose(axis, -1)
-    specgrams = specgrams.masked_fill((mask >= mask_start) & (mask < mask_end), mask_value)
+    # this aims to avoid CPU-GPU sync from upstream
+    specgrams = (
+        torch.where((mask >= mask_start) & (mask < mask_end), mask_value.repeat(specgrams.shape), specgrams)
+        if isinstance(mask_value, Tensor)
+        else specgrams.masked_fill((mask >= mask_start) & (mask < mask_end), mask_value)
+    )
     specgrams = specgrams.transpose(axis, -1)
     return specgrams
@@ -1296,51 +1298,6 @@ def spectral_centroid(
     return (freqs * specgram).sum(dim=freq_dim) / specgram.sum(dim=freq_dim)
-@deprecated("Please migrate to :py:class:`torchaudio.io.AudioEffector`.", remove=False)
-def apply_codec(
-    waveform: Tensor,
-    sample_rate: int,
-    format: str,
-    channels_first: bool = True,
-    compression: Optional[float] = None,
-    encoding: Optional[str] = None,
-    bits_per_sample: Optional[int] = None,
-) -> Tensor:
-    r"""
-    Apply codecs as a form of augmentation.
-    .. devices:: CPU
-    Args:
-        waveform (Tensor): Audio data. Must be 2 dimensional. See also ```channels_first```.
-        sample_rate (int): Sample rate of the audio waveform.
-        format (str): File format.
-        channels_first (bool, optional):
-            When True, both the input and output Tensor have dimension `(channel, time)`.
-            Otherwise, they have dimension `(time, channel)`.
-        compression (float or None, optional): Used for formats other than WAV.
-            For more details see :py:func:`torchaudio.backend.sox_io_backend.save`.
-        encoding (str or None, optional): Changes the encoding for the supported formats.
-            For more details see :py:func:`torchaudio.backend.sox_io_backend.save`.
-        bits_per_sample (int or None, optional): Changes the bit depth for the supported formats.
-            For more details see :py:func:`torchaudio.backend.sox_io_backend.save`.
-    Returns:
-        Tensor: Resulting Tensor.
-        If ``channels_first=True``, it has `(channel, time)` else `(time, channel)`.
-    """
-    from torchaudio.backend import _sox_io_backend
-    with tempfile.NamedTemporaryFile() as f:
-        torchaudio.backend._sox_io_backend.save(
-            f.name, waveform, sample_rate, channels_first, compression, format, encoding, bits_per_sample
-        )
-        augmented, sr = _sox_io_backend.load(f.name, channels_first=channels_first, format=format)
-    if sr != sample_rate:
-        augmented = resample(augmented, sr, sample_rate)
-    return augmented
 _CPU = torch.device("cpu")
@@ -1761,6 +1718,21 @@ def _fix_waveform_shape(
     return waveform_shift
+class RnntLoss(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, *args):
+        output, saved = torch.ops.torchaudio.rnnt_loss_forward(*args)
+        ctx.save_for_backward(saved)
+        return output
+    @staticmethod
+    def backward(ctx, dy):
+        grad = ctx.saved_tensors[0]
+        grad_out = dy.view((-1, 1, 1, 1))
+        result = grad * grad_out
+        return (result, None, None, None, None, None, None, None)
 def _rnnt_loss(
     logits: Tensor,
     targets: Tensor,
@@ -1803,15 +1775,7 @@ def _rnnt_loss(
     if blank < 0:  # reinterpret blank index if blank < 0.
         blank = logits.shape[-1] + blank
-    costs, _ = torch.ops.torchaudio.rnnt_loss(
-        logits=logits,
-        targets=targets,
-        logit_lengths=logit_lengths,
-        target_lengths=target_lengths,
-        blank=blank,
-        clamp=clamp,
-        fused_log_softmax=fused_log_softmax,
-    )
+    costs = RnntLoss.apply(logits, targets, logit_lengths, target_lengths, blank, clamp, fused_log_softmax)
     if reduction == "mean":
         return costs.mean()
@@ -1865,10 +1829,12 @@ def psd(
     psd = psd.sum(dim=-3)
     return psd
 # Expose both deprecated wrapper as well as original because torchscript breaks on
 # wrapped functions.
 rnnt_loss = dropping_support(_rnnt_loss)
 def _compute_mat_trace(input: torch.Tensor, dim1: int = -1, dim2: int = -2) -> torch.Tensor:
     r"""Compute the trace of a Tensor along ``dim1`` and ``dim2`` dimensions.

torchaudio/lib/_torchaudio.pyd CHANGED Viewed

Binary file

torchaudio/lib/libtorchaudio.pyd CHANGED Viewed

Binary file

torchaudio/models/decoder/__init__.py CHANGED Viewed

@@ -1,5 +1,7 @@
-from torchaudio._internal.module_utils import dropping_support, dropping_class_support
 import inspect
+from torchaudio._internal.module_utils import dropping_class_support, dropping_support
 _CTC_DECODERS = [
     "CTCHypothesis",
     "CTCDecoder",
@@ -35,11 +37,21 @@ def __getattr__(name: str):
                 "To use CUCTC decoder, please set BUILD_CUDA_CTC_DECODER=1 when building from source."
             ) from err
+        # TODO: when all unsupported classes are removed, replace the
+        # following if-else block with
+        #  item = getattr(_cuda_ctc_decoder, name)
         orig_item = getattr(_cuda_ctc_decoder, name)
-        if inspect.isclass(orig_item):
+        if inspect.isclass(orig_item) or (
+            # workaround a failure to detect type instances
+            # after sphinx autodoc mocking, required for
+            # building docs
+            getattr(orig_item, "__sphinx_mock__", False)
+            and inspect.isclass(orig_item.__class__)
+        ):
             item = dropping_class_support(orig_item)
         else:
             item = dropping_support(orig_item)
         globals()[name] = item
         return item
     raise AttributeError(f"module {__name__} has no attribute {name}")

torchaudio/models/decoder/_ctc_decoder.py CHANGED Viewed

@@ -25,7 +25,7 @@ from flashlight.lib.text.dictionary import (
     Dictionary as _Dictionary,
     load_words as _load_words,
 )
-from torchaudio.utils import download_asset
+from torchaudio.utils import _download_asset
 try:
     from flashlight.lib.text.decoder.kenlm import KenLM as _KenLM
@@ -69,7 +69,7 @@ def _get_word_dict(lexicon, lm, lm_dict, tokens_dict, unk_word):
     if lexicon and word_dict is None:
         word_dict = _create_word_dict(lexicon)
-    elif not lexicon and word_dict is None and type(lm) == str:
+    elif not lexicon and word_dict is None and type(lm) is str:
         d = {tokens_dict.get_entry(i): [[tokens_dict.get_entry(i)]] for i in range(tokens_dict.index_size())}
         d[unk_word] = [[unk_word]]
         word_dict = _create_word_dict(d)
@@ -499,7 +499,7 @@ def ctc_decoder(
     # construct word dict and language model
     word_dict = _get_word_dict(lexicon, lm, lm_dict, tokens_dict, unk_word)
-    if type(lm) == str:
+    if type(lm) is str:
         if _KenLM is None:
             raise RuntimeError(
                 "flashlight-text is installed, but KenLM is not installed. "
@@ -554,10 +554,10 @@ def download_pretrained_files(model: str) -> _PretrainedFiles:
     """
     files = _get_filenames(model)
-    lexicon_file = download_asset(files.lexicon)
-    tokens_file = download_asset(files.tokens)
+    lexicon_file = _download_asset(files.lexicon)
+    tokens_file = _download_asset(files.tokens)
     if files.lm is not None:
-        lm_file = download_asset(files.lm)
+        lm_file = _download_asset(files.lm)
     else:
         lm_file = None

torchaudio/models/decoder/_cuda_ctc_decoder.py CHANGED Viewed

@@ -181,7 +181,7 @@ def cuda_ctc_decoder(
         >>> )
         >>> results = decoder(log_probs, encoder_out_lens) # List of shape (B, nbest) of Hypotheses
     """
-    if type(tokens) == str:
+    if type(tokens) is str:
         tokens = _get_vocab_list(tokens)
     return CUCTCDecoder(vocab_list=tokens, beam_size=beam_size, nbest=nbest, blank_skip_threshold=blank_skip_threshold)

torchaudio/models/squim/objective.py CHANGED Viewed

@@ -285,7 +285,7 @@ def squim_objective_model(
     chunk_size: int,
     chunk_stride: Optional[int] = None,
 ) -> SquimObjective:
-    """Build a custome :class:`torchaudio.prototype.models.SquimObjective` model.
+    """Build a custome :class:`torchaudio.models.squim.SquimObjective` model.
     Args:
         feat_dim (int, optional): The feature dimension after Encoder module.
@@ -313,7 +313,7 @@ def squim_objective_model(
 def squim_objective_base() -> SquimObjective:
-    """Build :class:`torchaudio.prototype.models.SquimObjective` model with default arguments."""
+    """Build :class:`torchaudio.models.squim.SquimObjective` model with default arguments."""
     return squim_objective_model(
         feat_dim=256,
         win_len=64,

torchaudio/pipelines/_source_separation_pipeline.py CHANGED Viewed

@@ -52,7 +52,7 @@ class SourceSeparationBundle:
     def get_model(self) -> torch.nn.Module:
         """Construct the model and load the pretrained weight."""
         model = self._model_factory_func()
-        path = torchaudio.utils.download_asset(self._model_path)
+        path = torchaudio.utils._download_asset(self._model_path)
         state_dict = torch.load(path)
         model.load_state_dict(state_dict)
         model.eval()

torchaudio/pipelines/_squim_pipeline.py CHANGED Viewed

@@ -50,7 +50,7 @@ class SquimObjectiveBundle:
             Variation of :py:class:`~torchaudio.models.SquimObjective`.
         """
         model = squim_objective_base()
-        path = torchaudio.utils.download_asset(f"models/{self._path}")
+        path = torchaudio.utils._download_asset(f"models/{self._path}")
         state_dict = torch.load(path, weights_only=True)
         model.load_state_dict(state_dict)
         model.eval()
@@ -125,7 +125,7 @@ class SquimSubjectiveBundle:
             Variation of :py:class:`~torchaudio.models.SquimObjective`.
         """
         model = squim_subjective_base()
-        path = torchaudio.utils.download_asset(f"models/{self._path}")
+        path = torchaudio.utils._download_asset(f"models/{self._path}")
         state_dict = torch.load(path, weights_only=True)
         model.load_state_dict(state_dict)
         model.eval()

torchaudio/pipelines/_tts/utils.py CHANGED Viewed

@@ -161,7 +161,7 @@ def _load_phonemizer(file, dl_kwargs):
         raise RuntimeError("DeepPhonemizer is not installed. Please install it.")
     from dp.phonemizer import Phonemizer
-    from dp.preprocessing.text import Preprocessor, LanguageTokenizer, SequenceTokenizer
+    from dp.preprocessing.text import LanguageTokenizer, Preprocessor, SequenceTokenizer
     # By default, dp issues DEBUG level log.
     logger = logging.getLogger("dp")

torchaudio/pipelines/rnnt_pipeline.py CHANGED Viewed

@@ -244,7 +244,7 @@ class RNNTBundle:
     def _get_model(self) -> RNNT:
         model = self._rnnt_factory_func()
-        path = torchaudio.utils.download_asset(self._rnnt_path)
+        path = torchaudio.utils._download_asset(self._rnnt_path)
         state_dict = torch.load(path)
         model.load_state_dict(state_dict)
         model.eval()
@@ -313,7 +313,7 @@ class RNNTBundle:
         Returns:
             FeatureExtractor
         """
-        local_path = torchaudio.utils.download_asset(self._global_stats_path)
+        local_path = torchaudio.utils._download_asset(self._global_stats_path)
         return _ModuleFeatureExtractor(
             torch.nn.Sequential(
                 torchaudio.transforms.MelSpectrogram(
@@ -332,7 +332,7 @@ class RNNTBundle:
         Returns:
             FeatureExtractor
         """
-        local_path = torchaudio.utils.download_asset(self._global_stats_path)
+        local_path = torchaudio.utils._download_asset(self._global_stats_path)
         return _ModuleFeatureExtractor(
             torch.nn.Sequential(
                 torchaudio.transforms.MelSpectrogram(
@@ -350,7 +350,7 @@ class RNNTBundle:
         Returns:
             TokenProcessor
         """
-        local_path = torchaudio.utils.download_asset(self._sp_model_path)
+        local_path = torchaudio.utils._download_asset(self._sp_model_path)
         return _SentencePieceTokenProcessor(local_path)

torchaudio/transforms/__init__.py CHANGED Viewed

@@ -1,4 +1,5 @@
 from torchaudio._internal.module_utils import dropping_class_support
 from ._multi_channel import MVDR, PSD, RTFMVDR, SoudenMVDR
 from ._transforms import (
     AddNoise,

torchaudio/transforms/_transforms.py CHANGED Viewed

@@ -10,12 +10,12 @@ from torch.nn.modules.lazy import LazyModuleMixin
 from torch.nn.parameter import UninitializedParameter
 from torchaudio import functional as F
-from torchaudio.functional.functional import _rnnt_loss
 from torchaudio.functional.functional import (
     _apply_sinc_resample_kernel,
     _check_convolve_mode,
     _fix_waveform_shape,
     _get_sinc_resample_kernel,
+    _rnnt_loss,
     _stretch_waveform,
 )
@@ -1185,7 +1185,7 @@ class _AxisMasking(torch.nn.Module):
         self.iid_masks = iid_masks
         self.p = p
-    def forward(self, specgram: Tensor, mask_value: float = 0.0) -> Tensor:
+    def forward(self, specgram: Tensor, mask_value: Union[float, torch.Tensor] = 0.0) -> Tensor:
         r"""
         Args:
             specgram (Tensor): Tensor of dimension `(..., freq, time)`.

torchaudio/utils/__init__.py CHANGED Viewed

@@ -1,11 +1,4 @@
-from torio.utils import ffmpeg_utils
+from .download import _download_asset
-from . import sox_utils
-from .download import download_asset
-__all__ = [
-    "download_asset",
-    "sox_utils",
-    "ffmpeg_utils",
-]
+__all__ = ["_download_asset"]

torchaudio/utils/download.py CHANGED Viewed

@@ -30,10 +30,8 @@ def _get_hash(path, hash, chunk_size=1028):
             data = file.read(chunk_size)
     return m.hexdigest()
-from torchaudio._internal.module_utils import dropping_support
-@dropping_support
-def download_asset(
+def _download_asset(
     key: str,
     hash: str = "",
     path: Union[str, PathLike] = "",

torchaudio/version.py CHANGED Viewed

@@ -1,2 +1,2 @@
-__version__ = '2.8.0+cpu'
-git_version = '6e1c7fe9ff6d82b8665d0a46d859d3357d2ebaaa'
+__version__ = '2.9.0+cpu'
+git_version = 'eaa9e4e4dd413dca1084116581dc84fad403db3b'

{torchaudio-2.8.0.dist-info → torchaudio-2.9.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: torchaudio
-Version: 2.8.0
+Version: 2.9.0
 Summary: An audio package for PyTorch
 Home-page: https://github.com/pytorch/audio
 Author: Soumith Chintala, David Pollack, Sean Naren, Peter Goldsborough, Moto Hira, Caroline Chen, Jeff Hwang, Zhaoheng Ni, Xiaohui Zhang
@@ -15,17 +15,17 @@ Classifier: Operating System :: MacOS :: MacOS X
 Classifier: Operating System :: Microsoft :: Windows
 Classifier: Operating System :: POSIX
 Classifier: Programming Language :: C++
-Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
 Classifier: Programming Language :: Python :: 3.13
+Classifier: Programming Language :: Python :: 3.14
 Classifier: Programming Language :: Python :: Implementation :: CPython
 Classifier: Topic :: Multimedia :: Sound/Audio
 Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
 Description-Content-Type: text/markdown
 License-File: LICENSE
-Requires-Dist: torch==2.8.0
+Requires-Dist: torch==2.9.0
 Dynamic: author
 Dynamic: author-email
 Dynamic: classifier
@@ -48,10 +48,10 @@ torchaudio: an audio library for PyTorch
 ![TorchAudio Logo](docs/source/_static/img/logo.png)
 > [!NOTE]
-> **We are in the process of refactoring TorchAudio and transitioning it into a
->  maintenance phase. This process will include removing some user-facing
->  features: those features are deprecated from TorchAudio 2.8 and will be removed in 2.9.
->  Our main goals are to reduce redundancies with the rest of the
+> **We have transitioned TorchAudio into a
+>  maintenance phase. This process removed some user-facing
+>  features. These features were deprecated from TorchAudio 2.8 and removed in 2.9.
+>  Our main goals were to reduce redundancies with the rest of the
 >  PyTorch ecosystem, make it easier to maintain, and create a version of
 >  TorchAudio that is more tightly scoped to its strengths: processing audio
 >  data for ML. Please see
@@ -67,9 +67,6 @@ processing library. The benefits of PyTorch can be seen in torchaudio through
 having all the computations be through PyTorch operations which makes it easy
 to use and feel like a natural extension.
-- [Support audio I/O (Load files, Save files)](http://pytorch.org/audio/main/)
-  - Load a variety of audio formats, such as `wav`, `mp3`, `ogg`, `flac`, `opus`, `sphere`, into a torch Tensor using SoX
-  - [Kaldi (ark/scp)](http://pytorch.org/audio/main/kaldi_io.html)
 - [Dataloaders for common audio datasets](http://pytorch.org/audio/main/datasets.html)
 - Audio and speech processing functions
   - [forced_align](https://pytorch.org/audio/main/generated/torchaudio.functional.forced_align.html)
@@ -110,7 +107,7 @@ If you find this package useful, please cite as:
 ```bibtex
 @misc{hwang2023torchaudio,
-      title={TorchAudio 2.1: Advancing speech recognition, self-supervised learning, and audio processing components for PyTorch},
+      title={TorchAudio 2.1: Advancing speech recognition, self-supervised learning, and audio processing components for PyTorch},
       author={Jeff Hwang and Moto Hira and Caroline Chen and Xiaohui Zhang and Zhaoheng Ni and Guangzhi Sun and Pingchuan Ma and Ruizhe Huang and Vineel Pratap and Yuekai Zhang and Anurag Kumar and Chin-Yun Yu and Chuang Zhu and Chunxi Liu and Jacob Kahn and Mirco Ravanelli and Peng Sun and Shinji Watanabe and Yangyang Shi and Yumeng Tao and Robin Scheibler and Samuele Cornell and Sean Kim and Stavros Petridis},
       year={2023},
       eprint={2310.17864},

torchaudio-2.9.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,85 @@
+torchaudio/__init__.py,sha256=-yWZZVblWA06HQ4cUS8sRsaSZugqacqw0A7vQELxjYE,8082
+torchaudio/_torchcodec.py,sha256=2saifA0BdhE12Zb51vgS6zrwsE1ir7mmvmJ1lFOWtLI,13764
+torchaudio/version.py,sha256=UZrgLp-AqA1uCSaLgWb8hq-wCqJ8Pz6Pe5opbKJNYKQ,85
+torchaudio/_extension/__init__.py,sha256=j7wdZTgwGv6PcQgS1kMisbDA-M4emX3gheOSmjq_jWs,1966
+torchaudio/_extension/utils.py,sha256=kQ_PyLToNuPjLKOQa_-tT1LpFowcGQ0lpcuzrRPrmb8,5059
+torchaudio/_internal/__init__.py,sha256=80cpJfTS8977YYrU3q5p4DRAGAkqEJrmG9Lq2hEDpoo,251
+torchaudio/_internal/module_utils.py,sha256=sXO16_5rS9c67LlADALR16k3HcZo9dHyZ-y_L0zFnnY,5400
+torchaudio/compliance/__init__.py,sha256=JNH_-dTQVmm55YwcVMuVvUYFWdXhGn4C__9S8IUsNoU,53
+torchaudio/compliance/kaldi.py,sha256=bS7qJgS3k8FK1RkMiNEoP3q0xhjeV_V4RHQ9jo_rqOM,37479
+torchaudio/datasets/__init__.py,sha256=hdHldm3OzoQLbI0kHj8tLxqwDhzMfedq0_t1kAK7ORg,1218
+torchaudio/datasets/cmuarctic.py,sha256=c7c75817_brmb7cvFO6_Bj249cJDph9LDBOqs8aUyhM,7238
+torchaudio/datasets/cmudict.py,sha256=_9vTz7_8BFVrcHeA61_-h2XLOl6IsdWCptkMWziOW7U,6176
+torchaudio/datasets/commonvoice.py,sha256=OcFn-nG4YfBIz0YIpH91xH9rFka8yFJmrxy4vFZkC4I,2849
+torchaudio/datasets/dr_vctk.py,sha256=Ayf85prDNr1LcWQ4bysVWdRVPry2JALjv6Mtq-6iBpY,4498
+torchaudio/datasets/fluentcommands.py,sha256=KnmH1Y28k5PhqQX6eV-75MqwTRxiHSUUcvAsa-K954s,3353
+torchaudio/datasets/gtzan.py,sha256=kt25Ly9qDGuiiVXgsXhS05tGi6laRhRko81-BQ4sZ-w,25475
+torchaudio/datasets/iemocap.py,sha256=ZMMG_FpcWcMHEbhuRYRQaUWi_DoegjxCrnVyCg5EEVE,5077
+torchaudio/datasets/librilight_limited.py,sha256=iwZBlSKVLrXzhZvaqjuVRGO6czxX4fpdzd8wWe5feWQ,4290
+torchaudio/datasets/librimix.py,sha256=AncE671AOl04dRPsajNZW-ZxxI_PwA2sjBftdBg4Q-k,5249
+torchaudio/datasets/librispeech.py,sha256=ys769I0UzG07UEmyZ_KDwATh4yc08hFUuCayK8tYIGg,6482
+torchaudio/datasets/librispeech_biasing.py,sha256=KEGplRU_wpgb0VqrT-t42kvtC7lg4uMssZcosVvvPhg,7147
+torchaudio/datasets/libritts.py,sha256=91Ep2Mq3OySre25GniXBLmRzTwEPiKmMaqXnzirn0xY,6038
+torchaudio/datasets/ljspeech.py,sha256=l09BSBQH76I-LhYkIRF0u18tTi-4yysaF4gj2GSZaxw,3601
+torchaudio/datasets/musdb_hq.py,sha256=FVlKsGEBHiT50y9GLswnt2QFph2PjiI6yCy1MxiG6f8,5214
+torchaudio/datasets/quesst14.py,sha256=3y6H3T3g78jkDqca8jORQBOViZhH1RhlsfuY8HJ2OcU,4591
+torchaudio/datasets/snips.py,sha256=mwVc5KsbMlPQJ87eyYgjnQ5S4EFXoQvm13dO0rXpJuE,5165
+torchaudio/datasets/speechcommands.py,sha256=_wmrKSiEe0COO7uk0JVXypBmNxu0urnceHuFQ6zMOk0,7664
+torchaudio/datasets/tedlium.py,sha256=UQZUaeUqmFntZWcH9HXOpGeW6tsCcG81bPjX2_CWxbg,8916
+torchaudio/datasets/utils.py,sha256=mpg4t0hFitRGj9Ow7MXwCFNKGTnVsErVLpxfsbP7FE8,1757
+torchaudio/datasets/vctk.py,sha256=vN_VzxTLyHW11I_rzfzMVA3h5JW917FaU3NCnR-zcL0,5842
+torchaudio/datasets/voxceleb1.py,sha256=JlYkbyYOAFUFhGLULe3lgucANWf_G7qGqw47YjiX2IM,12034
+torchaudio/datasets/yesno.py,sha256=B3hRNUazvB8V8SwOUlQzliB9vI9gMkl9SEl-dZ4PEaw,3115
+torchaudio/functional/__init__.py,sha256=do2OUOUhg_8Z7TPUQ1HHpoWjNAPrwgxDIemk718TWO0,2581
+torchaudio/functional/_alignment.py,sha256=P2ehTZ7IwuMFWVNqrhYjc1imBKNykwC03D7uvbgxBCA,4867
+torchaudio/functional/filtering.py,sha256=piUbVknBOBdILrd1M9bzk2A9UCCn4qzKXNEgv7IYD7Q,64010
+torchaudio/functional/functional.py,sha256=c-jSGnLx54qnQk5efZiLrioi5x1-2LIQF3P2lvLPPPo,97236
+torchaudio/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+torchaudio/lib/_torchaudio.pyd,sha256=Rxb-7sUaNsFBbVYhvFro2iof-_xXcUqgFWOCUghOGrc,835584
+torchaudio/lib/libtorchaudio.pyd,sha256=rRAWMlUmFFnBF5kFihd33UgTVJ-vJE-FMFPREK8Qx3g,923648
+torchaudio/models/__init__.py,sha256=Gi3UQvxjwTLW9wfKlF42O3Vup70d0bk2x-rZS89ASwI,2080
+torchaudio/models/_hdemucs.py,sha256=ipAj7965PO_WEZqQwW1om9gQj90UhQOeU6HU3Lpvzwo,39250
+torchaudio/models/conformer.py,sha256=gVrOYeJkPlVaX-4eZpVzNUe_r3k7g1Y6NaaQ8JZP-r4,10361
+torchaudio/models/conv_tasnet.py,sha256=D7Y10sOzLe03gygfN1J5R73SIHkIGVQOkqKQ6Ni3o_s,12870
+torchaudio/models/deepspeech.py,sha256=nVYc2xwWpFO6gu5CR0mbqLiAzJn8lAfHcdcP92i22mo,2830
+torchaudio/models/emformer.py,sha256=WbaeZcrPFOOLn4igqweE0AfuF_SQZpqg7XPGEhl7C8c,38650
+torchaudio/models/rnnt.py,sha256=PNJpZd3vH6wRq8TEf4UlPtVHbte9wOJ-bRMEug6gp08,36357
+torchaudio/models/rnnt_decoder.py,sha256=CBBMZhhq5Bgax0_3p3SZD-Os3S1LFHB91oTgVED4bmY,13178
+torchaudio/models/tacotron2.py,sha256=mZ5lLSa75oqc0hgkc3sIm5_gK-knhtgX3dmg9-oLQao,46960
+torchaudio/models/wav2letter.py,sha256=oetxpH5RG0TadYB75IOmYOrnraaPvSlcSNpRZb2FE_A,3350
+torchaudio/models/wavernn.py,sha256=LRgL36jA6WzI1PAzBY6P52oCMGSTOraXB8fEgkwpSxw,15855
+torchaudio/models/decoder/__init__.py,sha256=PonG1Rg0CRBBbmRLZZQ1n2rXiDhivAAU9x67_G15seI,1963
+torchaudio/models/decoder/_ctc_decoder.py,sha256=zKsOdPNrUn7v2QJmluC3kOp90RQaP3CSmQSurc1nAFw,20654
+torchaudio/models/decoder/_cuda_ctc_decoder.py,sha256=4JKcQak4Ke6Id0EJEDJEx1yLTXKbJpIDNiu7QSe3gWU,7373
+torchaudio/models/squim/__init__.py,sha256=eQox8kPviOthKulpzZvPK0a66NHW7MzYE4aOF7va_kU,357
+torchaudio/models/squim/objective.py,sha256=FCYu0i2OXY3e6Z-BO2p-rc6rU0PvpJZ0gA-CPZZA9fw,12607
+torchaudio/models/squim/subjective.py,sha256=1_gK9O3nvrjiikpP46IdsMzKduSTt91kKklA69wQqiw,5947
+torchaudio/models/wav2vec2/__init__.py,sha256=j5FdQFfuIpdIKYwoMLop4Ba70GGoS-lK61tU-oNG5wg,972
+torchaudio/models/wav2vec2/components.py,sha256=EzmuGc5qHVPrHCGqYVHTvdjqP2gCrBfnHSoTK9GsZ1w,48244
+torchaudio/models/wav2vec2/model.py,sha256=kP6QKsF1PjleyUMhaPjydi0pCRy4GGUArRWBzfDJmdE,61671
+torchaudio/models/wav2vec2/wavlm_attention.py,sha256=iYde9grsb_RaEs87FI5ykyN3z0Ix1plqpsMNvakAiWM,11058
+torchaudio/models/wav2vec2/utils/__init__.py,sha256=1eowaOEKRbp7JajFNv_r47REJqnMmXidukS7Mrwp_5Q,188
+torchaudio/models/wav2vec2/utils/import_fairseq.py,sha256=so7T-otDNCsTUtzJRUFFGWyd0caWl3RY_UbFMxJ4DJE,9411
+torchaudio/models/wav2vec2/utils/import_huggingface.py,sha256=NMK6YrAIDfOw8j1tV-3XTwx_mwbJHvg8ldTrAWRztIM,6080
+torchaudio/pipelines/__init__.py,sha256=oMwOu-1T_ugJmhdaoI5NrCDrUAGrpDOlJQO8h-bLAW4,2847
+torchaudio/pipelines/_source_separation_pipeline.py,sha256=ttHqjcwCmCPWLj0YeDsTa1-XetuyjPDZ9D2deE3FmkA,4334
+torchaudio/pipelines/_squim_pipeline.py,sha256=eYdrKVXUru3VdfpaDnMN5qCuKHNveEd_jwGqtemV9ls,6438
+torchaudio/pipelines/rnnt_pipeline.py,sha256=16OMN_4yY1TEKLWjqkzFSMKByITxLobj6X1uk78pwQI,14133
+torchaudio/pipelines/_tts/__init__.py,sha256=WKc5c06b_M9MvEohJZghJJWAL7vXvfwRIkdy85UCh04,442
+torchaudio/pipelines/_tts/impl.py,sha256=wwrTyTEEkew22AnzB_ZklapGaAstJSUBawhA7bOcGXM,15759
+torchaudio/pipelines/_tts/interface.py,sha256=y1mU0446Vy2hHpCwMqRZt1UI4ZXl-C4tJp92EylwHh0,10479
+torchaudio/pipelines/_tts/utils.py,sha256=tuiEA5eqoBNgt46TxGA7lOEqljbuECL0-pc_uSco0xo,5040
+torchaudio/pipelines/_wav2vec2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+torchaudio/pipelines/_wav2vec2/aligner.py,sha256=HOcthFgup97QMx9ZXCmkv6jdw_zxdRT-e_SilXEujNU,2796
+torchaudio/pipelines/_wav2vec2/impl.py,sha256=I6htNo4Wt5LPxX9Z8rmxarFE8BZOZBUFIU9T9k1k2Po,67260
+torchaudio/pipelines/_wav2vec2/utils.py,sha256=CVawfXmVGWY8mj-_6r4KO907BpF67WAVWHEHhycFIaM,7317
+torchaudio/transforms/__init__.py,sha256=TsmUD7pXQO940uG0GhFTuMB48PT6uOklN5ptd-Yut14,1476
+torchaudio/transforms/_multi_channel.py,sha256=Musw7dTu25HNjKeIcKHUDuqBmj_GC2e3TaakqJcffW8,22688
+torchaudio/transforms/_transforms.py,sha256=g-E3nGgCEcKeWqEtyrDquSKfecHMD8olJRUMnqHHWYI,89057
+torchaudio/utils/__init__.py,sha256=yNMWIjoGd68FPxV6PhDdjO1oRemlM0QPJsu_k6iVaGQ,74
+torchaudio/utils/download.py,sha256=rf_yS18i7n4JYbIGpWiWc0ipe4sGv3Rvivv6p0DaZgU,2972
+torchaudio-2.9.0.dist-info/METADATA,sha256=Nu1BcomsFTqaoMK8arCV6-o4AEnU86n_TsLPFOzn--A,6911
+torchaudio-2.9.0.dist-info/WHEEL,sha256=yC3OVe9skFE0rAd70upJxuH5WUo8L-vbuVSibQ-iR4c,101
+torchaudio-2.9.0.dist-info/top_level.txt,sha256=mPKWMIRWWW2JwbJN6wRckeN1gpbjhifapAF0Z9t7SMo,11
+torchaudio-2.9.0.dist-info/RECORD,,
+torchaudio-2.9.0.dist-info/licenses/LICENSE,sha256=MmOOF5kxv-VR6r9nsOZ6E7SD4Wa1jdcmNjSrf4nzlvU,1363

{torchaudio-2.8.0.dist-info → torchaudio-2.9.0.dist-info}/top_level.txt RENAMED Viewed

	@@ -1,2 +1 @@
1 1	torchaudio
2	- torio

torchaudio/_backend/__init__.py DELETED Viewed

@@ -1,61 +0,0 @@
-from typing import List, Optional
-from torchaudio._internal.module_utils import deprecated
-from . import utils
-from .common import AudioMetaData
-__all__ = [
-    "AudioMetaData",
-    "load",
-    "info",
-    "save",
-    "list_audio_backends",
-    "get_audio_backend",
-    "set_audio_backend",
-]
-info = utils.get_info_func()
-load = utils.get_load_func()
-save = utils.get_save_func()
-def list_audio_backends() -> List[str]:
-    """List available backends
-    Returns:
-        list of str: The list of available backends.
-        The possible values are; ``"ffmpeg"``, ``"sox"`` and ``"soundfile"``.
-    """
-    return list(utils.get_available_backends().keys())
-# Temporary until global backend is removed
-@deprecated("With dispatcher enabled, this function is no-op. You can remove the function call.")
-def get_audio_backend() -> Optional[str]:
-    """Get the name of the current global backend
-    Returns:
-        str or None:
-            If dispatcher mode is enabled, returns ``None`` otherwise,
-            the name of current backend or ``None`` (no backend is set).
-    """
-    return None
-# Temporary until global backend is removed
-@deprecated("With dispatcher enabled, this function is no-op. You can remove the function call.")
-def set_audio_backend(backend: Optional[str]):  # noqa
-    """Set the global backend.
-    This is a no-op when dispatcher mode is enabled.
-    Args:
-        backend (str or None): Name of the backend.
-            One of ``"sox_io"`` or ``"soundfile"`` based on availability
-            of the system. If ``None`` is provided the  current backend is unassigned.
-    """
-    pass