torchaudio 2.8.0__cp313-cp313-win_amd64.whl → 2.9.0__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of torchaudio might be problematic. Click here for more details.

Files changed (92) hide show
  1. torchaudio/__init__.py +179 -39
  2. torchaudio/_extension/__init__.py +1 -14
  3. torchaudio/_extension/utils.py +0 -47
  4. torchaudio/_internal/module_utils.py +12 -3
  5. torchaudio/_torchcodec.py +73 -85
  6. torchaudio/datasets/cmuarctic.py +1 -1
  7. torchaudio/datasets/utils.py +1 -1
  8. torchaudio/functional/__init__.py +0 -2
  9. torchaudio/functional/_alignment.py +1 -1
  10. torchaudio/functional/filtering.py +70 -55
  11. torchaudio/functional/functional.py +26 -60
  12. torchaudio/lib/_torchaudio.pyd +0 -0
  13. torchaudio/lib/libtorchaudio.pyd +0 -0
  14. torchaudio/models/decoder/__init__.py +14 -2
  15. torchaudio/models/decoder/_ctc_decoder.py +6 -6
  16. torchaudio/models/decoder/_cuda_ctc_decoder.py +1 -1
  17. torchaudio/models/squim/objective.py +2 -2
  18. torchaudio/pipelines/_source_separation_pipeline.py +1 -1
  19. torchaudio/pipelines/_squim_pipeline.py +2 -2
  20. torchaudio/pipelines/_tts/utils.py +1 -1
  21. torchaudio/pipelines/rnnt_pipeline.py +4 -4
  22. torchaudio/transforms/__init__.py +1 -0
  23. torchaudio/transforms/_transforms.py +2 -2
  24. torchaudio/utils/__init__.py +2 -9
  25. torchaudio/utils/download.py +1 -3
  26. torchaudio/version.py +2 -2
  27. {torchaudio-2.8.0.dist-info → torchaudio-2.9.0.dist-info}/METADATA +8 -11
  28. torchaudio-2.9.0.dist-info/RECORD +85 -0
  29. {torchaudio-2.8.0.dist-info → torchaudio-2.9.0.dist-info}/top_level.txt +0 -1
  30. torchaudio/_backend/__init__.py +0 -61
  31. torchaudio/_backend/backend.py +0 -53
  32. torchaudio/_backend/common.py +0 -52
  33. torchaudio/_backend/ffmpeg.py +0 -334
  34. torchaudio/_backend/soundfile.py +0 -54
  35. torchaudio/_backend/soundfile_backend.py +0 -457
  36. torchaudio/_backend/sox.py +0 -91
  37. torchaudio/_backend/utils.py +0 -350
  38. torchaudio/backend/__init__.py +0 -8
  39. torchaudio/backend/_no_backend.py +0 -25
  40. torchaudio/backend/_sox_io_backend.py +0 -294
  41. torchaudio/backend/common.py +0 -13
  42. torchaudio/backend/no_backend.py +0 -14
  43. torchaudio/backend/soundfile_backend.py +0 -14
  44. torchaudio/backend/sox_io_backend.py +0 -14
  45. torchaudio/io/__init__.py +0 -20
  46. torchaudio/io/_effector.py +0 -347
  47. torchaudio/io/_playback.py +0 -72
  48. torchaudio/kaldi_io.py +0 -150
  49. torchaudio/prototype/__init__.py +0 -0
  50. torchaudio/prototype/datasets/__init__.py +0 -4
  51. torchaudio/prototype/datasets/musan.py +0 -68
  52. torchaudio/prototype/functional/__init__.py +0 -26
  53. torchaudio/prototype/functional/_dsp.py +0 -441
  54. torchaudio/prototype/functional/_rir.py +0 -382
  55. torchaudio/prototype/functional/functional.py +0 -193
  56. torchaudio/prototype/models/__init__.py +0 -39
  57. torchaudio/prototype/models/_conformer_wav2vec2.py +0 -801
  58. torchaudio/prototype/models/_emformer_hubert.py +0 -337
  59. torchaudio/prototype/models/conv_emformer.py +0 -529
  60. torchaudio/prototype/models/hifi_gan.py +0 -342
  61. torchaudio/prototype/models/rnnt.py +0 -717
  62. torchaudio/prototype/models/rnnt_decoder.py +0 -402
  63. torchaudio/prototype/pipelines/__init__.py +0 -21
  64. torchaudio/prototype/pipelines/_vggish/__init__.py +0 -7
  65. torchaudio/prototype/pipelines/_vggish/_vggish_impl.py +0 -236
  66. torchaudio/prototype/pipelines/_vggish/_vggish_pipeline.py +0 -83
  67. torchaudio/prototype/pipelines/hifigan_pipeline.py +0 -233
  68. torchaudio/prototype/pipelines/rnnt_pipeline.py +0 -58
  69. torchaudio/prototype/transforms/__init__.py +0 -9
  70. torchaudio/prototype/transforms/_transforms.py +0 -461
  71. torchaudio/sox_effects/__init__.py +0 -10
  72. torchaudio/sox_effects/sox_effects.py +0 -275
  73. torchaudio/utils/ffmpeg_utils.py +0 -11
  74. torchaudio/utils/sox_utils.py +0 -118
  75. torchaudio-2.8.0.dist-info/RECORD +0 -145
  76. torio/__init__.py +0 -8
  77. torio/_extension/__init__.py +0 -13
  78. torio/_extension/utils.py +0 -147
  79. torio/io/__init__.py +0 -9
  80. torio/io/_streaming_media_decoder.py +0 -977
  81. torio/io/_streaming_media_encoder.py +0 -502
  82. torio/lib/__init__.py +0 -0
  83. torio/lib/_torio_ffmpeg4.pyd +0 -0
  84. torio/lib/_torio_ffmpeg5.pyd +0 -0
  85. torio/lib/_torio_ffmpeg6.pyd +0 -0
  86. torio/lib/libtorio_ffmpeg4.pyd +0 -0
  87. torio/lib/libtorio_ffmpeg5.pyd +0 -0
  88. torio/lib/libtorio_ffmpeg6.pyd +0 -0
  89. torio/utils/__init__.py +0 -4
  90. torio/utils/ffmpeg_utils.py +0 -275
  91. {torchaudio-2.8.0.dist-info → torchaudio-2.9.0.dist-info}/WHEEL +0 -0
  92. {torchaudio-2.8.0.dist-info → torchaudio-2.9.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,7 +1,6 @@
1
1
  # -*- coding: utf-8 -*-
2
2
 
3
3
  import math
4
- import tempfile
5
4
  import warnings
6
5
  from collections.abc import Sequence
7
6
  from typing import List, Optional, Tuple, Union
@@ -9,8 +8,7 @@ from typing import List, Optional, Tuple, Union
9
8
  import torch
10
9
  import torchaudio
11
10
  from torch import Tensor
12
- from torchaudio._internal.module_utils import deprecated, dropping_support
13
-
11
+ from torchaudio._internal.module_utils import dropping_support
14
12
 
15
13
  from .filtering import highpass_biquad, treble_biquad
16
14
 
@@ -34,7 +32,6 @@ __all__ = [
34
32
  "mask_along_axis_iid",
35
33
  "sliding_window_cmn",
36
34
  "spectral_centroid",
37
- "apply_codec",
38
35
  "resample",
39
36
  "edit_distance",
40
37
  "loudness",
@@ -817,7 +814,7 @@ def _get_mask_param(mask_param: int, p: float, axis_length: int) -> int:
817
814
  def mask_along_axis_iid(
818
815
  specgrams: Tensor,
819
816
  mask_param: int,
820
- mask_value: float,
817
+ mask_value: Union[float, Tensor],
821
818
  axis: int,
822
819
  p: float = 1.0,
823
820
  ) -> Tensor:
@@ -874,7 +871,12 @@ def mask_along_axis_iid(
874
871
 
875
872
  # Per batch example masking
876
873
  specgrams = specgrams.transpose(axis, -1)
877
- specgrams = specgrams.masked_fill((mask >= mask_start) & (mask < mask_end), mask_value)
874
+ # this aims to avoid CPU-GPU sync from upstream
875
+ specgrams = (
876
+ torch.where((mask >= mask_start) & (mask < mask_end), mask_value.repeat(specgrams.shape), specgrams)
877
+ if isinstance(mask_value, Tensor)
878
+ else specgrams.masked_fill((mask >= mask_start) & (mask < mask_end), mask_value)
879
+ )
878
880
  specgrams = specgrams.transpose(axis, -1)
879
881
 
880
882
  return specgrams
@@ -1296,51 +1298,6 @@ def spectral_centroid(
1296
1298
  return (freqs * specgram).sum(dim=freq_dim) / specgram.sum(dim=freq_dim)
1297
1299
 
1298
1300
 
1299
- @deprecated("Please migrate to :py:class:`torchaudio.io.AudioEffector`.", remove=False)
1300
- def apply_codec(
1301
- waveform: Tensor,
1302
- sample_rate: int,
1303
- format: str,
1304
- channels_first: bool = True,
1305
- compression: Optional[float] = None,
1306
- encoding: Optional[str] = None,
1307
- bits_per_sample: Optional[int] = None,
1308
- ) -> Tensor:
1309
- r"""
1310
- Apply codecs as a form of augmentation.
1311
-
1312
- .. devices:: CPU
1313
-
1314
- Args:
1315
- waveform (Tensor): Audio data. Must be 2 dimensional. See also ```channels_first```.
1316
- sample_rate (int): Sample rate of the audio waveform.
1317
- format (str): File format.
1318
- channels_first (bool, optional):
1319
- When True, both the input and output Tensor have dimension `(channel, time)`.
1320
- Otherwise, they have dimension `(time, channel)`.
1321
- compression (float or None, optional): Used for formats other than WAV.
1322
- For more details see :py:func:`torchaudio.backend.sox_io_backend.save`.
1323
- encoding (str or None, optional): Changes the encoding for the supported formats.
1324
- For more details see :py:func:`torchaudio.backend.sox_io_backend.save`.
1325
- bits_per_sample (int or None, optional): Changes the bit depth for the supported formats.
1326
- For more details see :py:func:`torchaudio.backend.sox_io_backend.save`.
1327
-
1328
- Returns:
1329
- Tensor: Resulting Tensor.
1330
- If ``channels_first=True``, it has `(channel, time)` else `(time, channel)`.
1331
- """
1332
- from torchaudio.backend import _sox_io_backend
1333
-
1334
- with tempfile.NamedTemporaryFile() as f:
1335
- torchaudio.backend._sox_io_backend.save(
1336
- f.name, waveform, sample_rate, channels_first, compression, format, encoding, bits_per_sample
1337
- )
1338
- augmented, sr = _sox_io_backend.load(f.name, channels_first=channels_first, format=format)
1339
- if sr != sample_rate:
1340
- augmented = resample(augmented, sr, sample_rate)
1341
- return augmented
1342
-
1343
-
1344
1301
  _CPU = torch.device("cpu")
1345
1302
 
1346
1303
 
@@ -1761,6 +1718,21 @@ def _fix_waveform_shape(
1761
1718
  return waveform_shift
1762
1719
 
1763
1720
 
1721
+ class RnntLoss(torch.autograd.Function):
1722
+ @staticmethod
1723
+ def forward(ctx, *args):
1724
+ output, saved = torch.ops.torchaudio.rnnt_loss_forward(*args)
1725
+ ctx.save_for_backward(saved)
1726
+ return output
1727
+
1728
+ @staticmethod
1729
+ def backward(ctx, dy):
1730
+ grad = ctx.saved_tensors[0]
1731
+ grad_out = dy.view((-1, 1, 1, 1))
1732
+ result = grad * grad_out
1733
+ return (result, None, None, None, None, None, None, None)
1734
+
1735
+
1764
1736
  def _rnnt_loss(
1765
1737
  logits: Tensor,
1766
1738
  targets: Tensor,
@@ -1803,15 +1775,7 @@ def _rnnt_loss(
1803
1775
  if blank < 0: # reinterpret blank index if blank < 0.
1804
1776
  blank = logits.shape[-1] + blank
1805
1777
 
1806
- costs, _ = torch.ops.torchaudio.rnnt_loss(
1807
- logits=logits,
1808
- targets=targets,
1809
- logit_lengths=logit_lengths,
1810
- target_lengths=target_lengths,
1811
- blank=blank,
1812
- clamp=clamp,
1813
- fused_log_softmax=fused_log_softmax,
1814
- )
1778
+ costs = RnntLoss.apply(logits, targets, logit_lengths, target_lengths, blank, clamp, fused_log_softmax)
1815
1779
 
1816
1780
  if reduction == "mean":
1817
1781
  return costs.mean()
@@ -1865,10 +1829,12 @@ def psd(
1865
1829
  psd = psd.sum(dim=-3)
1866
1830
  return psd
1867
1831
 
1832
+
1868
1833
  # Expose both deprecated wrapper as well as original because torchscript breaks on
1869
1834
  # wrapped functions.
1870
1835
  rnnt_loss = dropping_support(_rnnt_loss)
1871
1836
 
1837
+
1872
1838
  def _compute_mat_trace(input: torch.Tensor, dim1: int = -1, dim2: int = -2) -> torch.Tensor:
1873
1839
  r"""Compute the trace of a Tensor along ``dim1`` and ``dim2`` dimensions.
1874
1840
 
Binary file
Binary file
@@ -1,5 +1,7 @@
1
- from torchaudio._internal.module_utils import dropping_support, dropping_class_support
2
1
  import inspect
2
+
3
+ from torchaudio._internal.module_utils import dropping_class_support, dropping_support
4
+
3
5
  _CTC_DECODERS = [
4
6
  "CTCHypothesis",
5
7
  "CTCDecoder",
@@ -35,11 +37,21 @@ def __getattr__(name: str):
35
37
  "To use CUCTC decoder, please set BUILD_CUDA_CTC_DECODER=1 when building from source."
36
38
  ) from err
37
39
 
40
+ # TODO: when all unsupported classes are removed, replace the
41
+ # following if-else block with
42
+ # item = getattr(_cuda_ctc_decoder, name)
38
43
  orig_item = getattr(_cuda_ctc_decoder, name)
39
- if inspect.isclass(orig_item):
44
+ if inspect.isclass(orig_item) or (
45
+ # workaround a failure to detect type instances
46
+ # after sphinx autodoc mocking, required for
47
+ # building docs
48
+ getattr(orig_item, "__sphinx_mock__", False)
49
+ and inspect.isclass(orig_item.__class__)
50
+ ):
40
51
  item = dropping_class_support(orig_item)
41
52
  else:
42
53
  item = dropping_support(orig_item)
54
+
43
55
  globals()[name] = item
44
56
  return item
45
57
  raise AttributeError(f"module {__name__} has no attribute {name}")
@@ -25,7 +25,7 @@ from flashlight.lib.text.dictionary import (
25
25
  Dictionary as _Dictionary,
26
26
  load_words as _load_words,
27
27
  )
28
- from torchaudio.utils import download_asset
28
+ from torchaudio.utils import _download_asset
29
29
 
30
30
  try:
31
31
  from flashlight.lib.text.decoder.kenlm import KenLM as _KenLM
@@ -69,7 +69,7 @@ def _get_word_dict(lexicon, lm, lm_dict, tokens_dict, unk_word):
69
69
 
70
70
  if lexicon and word_dict is None:
71
71
  word_dict = _create_word_dict(lexicon)
72
- elif not lexicon and word_dict is None and type(lm) == str:
72
+ elif not lexicon and word_dict is None and type(lm) is str:
73
73
  d = {tokens_dict.get_entry(i): [[tokens_dict.get_entry(i)]] for i in range(tokens_dict.index_size())}
74
74
  d[unk_word] = [[unk_word]]
75
75
  word_dict = _create_word_dict(d)
@@ -499,7 +499,7 @@ def ctc_decoder(
499
499
  # construct word dict and language model
500
500
  word_dict = _get_word_dict(lexicon, lm, lm_dict, tokens_dict, unk_word)
501
501
 
502
- if type(lm) == str:
502
+ if type(lm) is str:
503
503
  if _KenLM is None:
504
504
  raise RuntimeError(
505
505
  "flashlight-text is installed, but KenLM is not installed. "
@@ -554,10 +554,10 @@ def download_pretrained_files(model: str) -> _PretrainedFiles:
554
554
  """
555
555
 
556
556
  files = _get_filenames(model)
557
- lexicon_file = download_asset(files.lexicon)
558
- tokens_file = download_asset(files.tokens)
557
+ lexicon_file = _download_asset(files.lexicon)
558
+ tokens_file = _download_asset(files.tokens)
559
559
  if files.lm is not None:
560
- lm_file = download_asset(files.lm)
560
+ lm_file = _download_asset(files.lm)
561
561
  else:
562
562
  lm_file = None
563
563
 
@@ -181,7 +181,7 @@ def cuda_ctc_decoder(
181
181
  >>> )
182
182
  >>> results = decoder(log_probs, encoder_out_lens) # List of shape (B, nbest) of Hypotheses
183
183
  """
184
- if type(tokens) == str:
184
+ if type(tokens) is str:
185
185
  tokens = _get_vocab_list(tokens)
186
186
 
187
187
  return CUCTCDecoder(vocab_list=tokens, beam_size=beam_size, nbest=nbest, blank_skip_threshold=blank_skip_threshold)
@@ -285,7 +285,7 @@ def squim_objective_model(
285
285
  chunk_size: int,
286
286
  chunk_stride: Optional[int] = None,
287
287
  ) -> SquimObjective:
288
- """Build a custome :class:`torchaudio.prototype.models.SquimObjective` model.
288
+ """Build a custome :class:`torchaudio.models.squim.SquimObjective` model.
289
289
 
290
290
  Args:
291
291
  feat_dim (int, optional): The feature dimension after Encoder module.
@@ -313,7 +313,7 @@ def squim_objective_model(
313
313
 
314
314
 
315
315
  def squim_objective_base() -> SquimObjective:
316
- """Build :class:`torchaudio.prototype.models.SquimObjective` model with default arguments."""
316
+ """Build :class:`torchaudio.models.squim.SquimObjective` model with default arguments."""
317
317
  return squim_objective_model(
318
318
  feat_dim=256,
319
319
  win_len=64,
@@ -52,7 +52,7 @@ class SourceSeparationBundle:
52
52
  def get_model(self) -> torch.nn.Module:
53
53
  """Construct the model and load the pretrained weight."""
54
54
  model = self._model_factory_func()
55
- path = torchaudio.utils.download_asset(self._model_path)
55
+ path = torchaudio.utils._download_asset(self._model_path)
56
56
  state_dict = torch.load(path)
57
57
  model.load_state_dict(state_dict)
58
58
  model.eval()
@@ -50,7 +50,7 @@ class SquimObjectiveBundle:
50
50
  Variation of :py:class:`~torchaudio.models.SquimObjective`.
51
51
  """
52
52
  model = squim_objective_base()
53
- path = torchaudio.utils.download_asset(f"models/{self._path}")
53
+ path = torchaudio.utils._download_asset(f"models/{self._path}")
54
54
  state_dict = torch.load(path, weights_only=True)
55
55
  model.load_state_dict(state_dict)
56
56
  model.eval()
@@ -125,7 +125,7 @@ class SquimSubjectiveBundle:
125
125
  Variation of :py:class:`~torchaudio.models.SquimObjective`.
126
126
  """
127
127
  model = squim_subjective_base()
128
- path = torchaudio.utils.download_asset(f"models/{self._path}")
128
+ path = torchaudio.utils._download_asset(f"models/{self._path}")
129
129
  state_dict = torch.load(path, weights_only=True)
130
130
  model.load_state_dict(state_dict)
131
131
  model.eval()
@@ -161,7 +161,7 @@ def _load_phonemizer(file, dl_kwargs):
161
161
  raise RuntimeError("DeepPhonemizer is not installed. Please install it.")
162
162
 
163
163
  from dp.phonemizer import Phonemizer
164
- from dp.preprocessing.text import Preprocessor, LanguageTokenizer, SequenceTokenizer
164
+ from dp.preprocessing.text import LanguageTokenizer, Preprocessor, SequenceTokenizer
165
165
 
166
166
  # By default, dp issues DEBUG level log.
167
167
  logger = logging.getLogger("dp")
@@ -244,7 +244,7 @@ class RNNTBundle:
244
244
 
245
245
  def _get_model(self) -> RNNT:
246
246
  model = self._rnnt_factory_func()
247
- path = torchaudio.utils.download_asset(self._rnnt_path)
247
+ path = torchaudio.utils._download_asset(self._rnnt_path)
248
248
  state_dict = torch.load(path)
249
249
  model.load_state_dict(state_dict)
250
250
  model.eval()
@@ -313,7 +313,7 @@ class RNNTBundle:
313
313
  Returns:
314
314
  FeatureExtractor
315
315
  """
316
- local_path = torchaudio.utils.download_asset(self._global_stats_path)
316
+ local_path = torchaudio.utils._download_asset(self._global_stats_path)
317
317
  return _ModuleFeatureExtractor(
318
318
  torch.nn.Sequential(
319
319
  torchaudio.transforms.MelSpectrogram(
@@ -332,7 +332,7 @@ class RNNTBundle:
332
332
  Returns:
333
333
  FeatureExtractor
334
334
  """
335
- local_path = torchaudio.utils.download_asset(self._global_stats_path)
335
+ local_path = torchaudio.utils._download_asset(self._global_stats_path)
336
336
  return _ModuleFeatureExtractor(
337
337
  torch.nn.Sequential(
338
338
  torchaudio.transforms.MelSpectrogram(
@@ -350,7 +350,7 @@ class RNNTBundle:
350
350
  Returns:
351
351
  TokenProcessor
352
352
  """
353
- local_path = torchaudio.utils.download_asset(self._sp_model_path)
353
+ local_path = torchaudio.utils._download_asset(self._sp_model_path)
354
354
  return _SentencePieceTokenProcessor(local_path)
355
355
 
356
356
 
@@ -1,4 +1,5 @@
1
1
  from torchaudio._internal.module_utils import dropping_class_support
2
+
2
3
  from ._multi_channel import MVDR, PSD, RTFMVDR, SoudenMVDR
3
4
  from ._transforms import (
4
5
  AddNoise,
@@ -10,12 +10,12 @@ from torch.nn.modules.lazy import LazyModuleMixin
10
10
  from torch.nn.parameter import UninitializedParameter
11
11
 
12
12
  from torchaudio import functional as F
13
- from torchaudio.functional.functional import _rnnt_loss
14
13
  from torchaudio.functional.functional import (
15
14
  _apply_sinc_resample_kernel,
16
15
  _check_convolve_mode,
17
16
  _fix_waveform_shape,
18
17
  _get_sinc_resample_kernel,
18
+ _rnnt_loss,
19
19
  _stretch_waveform,
20
20
  )
21
21
 
@@ -1185,7 +1185,7 @@ class _AxisMasking(torch.nn.Module):
1185
1185
  self.iid_masks = iid_masks
1186
1186
  self.p = p
1187
1187
 
1188
- def forward(self, specgram: Tensor, mask_value: float = 0.0) -> Tensor:
1188
+ def forward(self, specgram: Tensor, mask_value: Union[float, torch.Tensor] = 0.0) -> Tensor:
1189
1189
  r"""
1190
1190
  Args:
1191
1191
  specgram (Tensor): Tensor of dimension `(..., freq, time)`.
@@ -1,11 +1,4 @@
1
- from torio.utils import ffmpeg_utils
1
+ from .download import _download_asset
2
2
 
3
- from . import sox_utils
4
- from .download import download_asset
5
3
 
6
-
7
- __all__ = [
8
- "download_asset",
9
- "sox_utils",
10
- "ffmpeg_utils",
11
- ]
4
+ __all__ = ["_download_asset"]
@@ -30,10 +30,8 @@ def _get_hash(path, hash, chunk_size=1028):
30
30
  data = file.read(chunk_size)
31
31
  return m.hexdigest()
32
32
 
33
- from torchaudio._internal.module_utils import dropping_support
34
33
 
35
- @dropping_support
36
- def download_asset(
34
+ def _download_asset(
37
35
  key: str,
38
36
  hash: str = "",
39
37
  path: Union[str, PathLike] = "",
torchaudio/version.py CHANGED
@@ -1,2 +1,2 @@
1
- __version__ = '2.8.0+cpu'
2
- git_version = '6e1c7fe9ff6d82b8665d0a46d859d3357d2ebaaa'
1
+ __version__ = '2.9.0+cpu'
2
+ git_version = 'eaa9e4e4dd413dca1084116581dc84fad403db3b'
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: torchaudio
3
- Version: 2.8.0
3
+ Version: 2.9.0
4
4
  Summary: An audio package for PyTorch
5
5
  Home-page: https://github.com/pytorch/audio
6
6
  Author: Soumith Chintala, David Pollack, Sean Naren, Peter Goldsborough, Moto Hira, Caroline Chen, Jeff Hwang, Zhaoheng Ni, Xiaohui Zhang
@@ -15,17 +15,17 @@ Classifier: Operating System :: MacOS :: MacOS X
15
15
  Classifier: Operating System :: Microsoft :: Windows
16
16
  Classifier: Operating System :: POSIX
17
17
  Classifier: Programming Language :: C++
18
- Classifier: Programming Language :: Python :: 3.9
19
18
  Classifier: Programming Language :: Python :: 3.10
20
19
  Classifier: Programming Language :: Python :: 3.11
21
20
  Classifier: Programming Language :: Python :: 3.12
22
21
  Classifier: Programming Language :: Python :: 3.13
22
+ Classifier: Programming Language :: Python :: 3.14
23
23
  Classifier: Programming Language :: Python :: Implementation :: CPython
24
24
  Classifier: Topic :: Multimedia :: Sound/Audio
25
25
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
26
26
  Description-Content-Type: text/markdown
27
27
  License-File: LICENSE
28
- Requires-Dist: torch==2.8.0
28
+ Requires-Dist: torch==2.9.0
29
29
  Dynamic: author
30
30
  Dynamic: author-email
31
31
  Dynamic: classifier
@@ -48,10 +48,10 @@ torchaudio: an audio library for PyTorch
48
48
  ![TorchAudio Logo](docs/source/_static/img/logo.png)
49
49
 
50
50
  > [!NOTE]
51
- > **We are in the process of refactoring TorchAudio and transitioning it into a
52
- > maintenance phase. This process will include removing some user-facing
53
- > features: those features are deprecated from TorchAudio 2.8 and will be removed in 2.9.
54
- > Our main goals are to reduce redundancies with the rest of the
51
+ > **We have transitioned TorchAudio into a
52
+ > maintenance phase. This process removed some user-facing
53
+ > features. These features were deprecated from TorchAudio 2.8 and removed in 2.9.
54
+ > Our main goals were to reduce redundancies with the rest of the
55
55
  > PyTorch ecosystem, make it easier to maintain, and create a version of
56
56
  > TorchAudio that is more tightly scoped to its strengths: processing audio
57
57
  > data for ML. Please see
@@ -67,9 +67,6 @@ processing library. The benefits of PyTorch can be seen in torchaudio through
67
67
  having all the computations be through PyTorch operations which makes it easy
68
68
  to use and feel like a natural extension.
69
69
 
70
- - [Support audio I/O (Load files, Save files)](http://pytorch.org/audio/main/)
71
- - Load a variety of audio formats, such as `wav`, `mp3`, `ogg`, `flac`, `opus`, `sphere`, into a torch Tensor using SoX
72
- - [Kaldi (ark/scp)](http://pytorch.org/audio/main/kaldi_io.html)
73
70
  - [Dataloaders for common audio datasets](http://pytorch.org/audio/main/datasets.html)
74
71
  - Audio and speech processing functions
75
72
  - [forced_align](https://pytorch.org/audio/main/generated/torchaudio.functional.forced_align.html)
@@ -110,7 +107,7 @@ If you find this package useful, please cite as:
110
107
 
111
108
  ```bibtex
112
109
  @misc{hwang2023torchaudio,
113
- title={TorchAudio 2.1: Advancing speech recognition, self-supervised learning, and audio processing components for PyTorch},
110
+ title={TorchAudio 2.1: Advancing speech recognition, self-supervised learning, and audio processing components for PyTorch},
114
111
  author={Jeff Hwang and Moto Hira and Caroline Chen and Xiaohui Zhang and Zhaoheng Ni and Guangzhi Sun and Pingchuan Ma and Ruizhe Huang and Vineel Pratap and Yuekai Zhang and Anurag Kumar and Chin-Yun Yu and Chuang Zhu and Chunxi Liu and Jacob Kahn and Mirco Ravanelli and Peng Sun and Shinji Watanabe and Yangyang Shi and Yumeng Tao and Robin Scheibler and Samuele Cornell and Sean Kim and Stavros Petridis},
115
112
  year={2023},
116
113
  eprint={2310.17864},
@@ -0,0 +1,85 @@
1
+ torchaudio/__init__.py,sha256=-yWZZVblWA06HQ4cUS8sRsaSZugqacqw0A7vQELxjYE,8082
2
+ torchaudio/_torchcodec.py,sha256=2saifA0BdhE12Zb51vgS6zrwsE1ir7mmvmJ1lFOWtLI,13764
3
+ torchaudio/version.py,sha256=UZrgLp-AqA1uCSaLgWb8hq-wCqJ8Pz6Pe5opbKJNYKQ,85
4
+ torchaudio/_extension/__init__.py,sha256=j7wdZTgwGv6PcQgS1kMisbDA-M4emX3gheOSmjq_jWs,1966
5
+ torchaudio/_extension/utils.py,sha256=kQ_PyLToNuPjLKOQa_-tT1LpFowcGQ0lpcuzrRPrmb8,5059
6
+ torchaudio/_internal/__init__.py,sha256=80cpJfTS8977YYrU3q5p4DRAGAkqEJrmG9Lq2hEDpoo,251
7
+ torchaudio/_internal/module_utils.py,sha256=sXO16_5rS9c67LlADALR16k3HcZo9dHyZ-y_L0zFnnY,5400
8
+ torchaudio/compliance/__init__.py,sha256=JNH_-dTQVmm55YwcVMuVvUYFWdXhGn4C__9S8IUsNoU,53
9
+ torchaudio/compliance/kaldi.py,sha256=bS7qJgS3k8FK1RkMiNEoP3q0xhjeV_V4RHQ9jo_rqOM,37479
10
+ torchaudio/datasets/__init__.py,sha256=hdHldm3OzoQLbI0kHj8tLxqwDhzMfedq0_t1kAK7ORg,1218
11
+ torchaudio/datasets/cmuarctic.py,sha256=c7c75817_brmb7cvFO6_Bj249cJDph9LDBOqs8aUyhM,7238
12
+ torchaudio/datasets/cmudict.py,sha256=_9vTz7_8BFVrcHeA61_-h2XLOl6IsdWCptkMWziOW7U,6176
13
+ torchaudio/datasets/commonvoice.py,sha256=OcFn-nG4YfBIz0YIpH91xH9rFka8yFJmrxy4vFZkC4I,2849
14
+ torchaudio/datasets/dr_vctk.py,sha256=Ayf85prDNr1LcWQ4bysVWdRVPry2JALjv6Mtq-6iBpY,4498
15
+ torchaudio/datasets/fluentcommands.py,sha256=KnmH1Y28k5PhqQX6eV-75MqwTRxiHSUUcvAsa-K954s,3353
16
+ torchaudio/datasets/gtzan.py,sha256=kt25Ly9qDGuiiVXgsXhS05tGi6laRhRko81-BQ4sZ-w,25475
17
+ torchaudio/datasets/iemocap.py,sha256=ZMMG_FpcWcMHEbhuRYRQaUWi_DoegjxCrnVyCg5EEVE,5077
18
+ torchaudio/datasets/librilight_limited.py,sha256=iwZBlSKVLrXzhZvaqjuVRGO6czxX4fpdzd8wWe5feWQ,4290
19
+ torchaudio/datasets/librimix.py,sha256=AncE671AOl04dRPsajNZW-ZxxI_PwA2sjBftdBg4Q-k,5249
20
+ torchaudio/datasets/librispeech.py,sha256=ys769I0UzG07UEmyZ_KDwATh4yc08hFUuCayK8tYIGg,6482
21
+ torchaudio/datasets/librispeech_biasing.py,sha256=KEGplRU_wpgb0VqrT-t42kvtC7lg4uMssZcosVvvPhg,7147
22
+ torchaudio/datasets/libritts.py,sha256=91Ep2Mq3OySre25GniXBLmRzTwEPiKmMaqXnzirn0xY,6038
23
+ torchaudio/datasets/ljspeech.py,sha256=l09BSBQH76I-LhYkIRF0u18tTi-4yysaF4gj2GSZaxw,3601
24
+ torchaudio/datasets/musdb_hq.py,sha256=FVlKsGEBHiT50y9GLswnt2QFph2PjiI6yCy1MxiG6f8,5214
25
+ torchaudio/datasets/quesst14.py,sha256=3y6H3T3g78jkDqca8jORQBOViZhH1RhlsfuY8HJ2OcU,4591
26
+ torchaudio/datasets/snips.py,sha256=mwVc5KsbMlPQJ87eyYgjnQ5S4EFXoQvm13dO0rXpJuE,5165
27
+ torchaudio/datasets/speechcommands.py,sha256=_wmrKSiEe0COO7uk0JVXypBmNxu0urnceHuFQ6zMOk0,7664
28
+ torchaudio/datasets/tedlium.py,sha256=UQZUaeUqmFntZWcH9HXOpGeW6tsCcG81bPjX2_CWxbg,8916
29
+ torchaudio/datasets/utils.py,sha256=mpg4t0hFitRGj9Ow7MXwCFNKGTnVsErVLpxfsbP7FE8,1757
30
+ torchaudio/datasets/vctk.py,sha256=vN_VzxTLyHW11I_rzfzMVA3h5JW917FaU3NCnR-zcL0,5842
31
+ torchaudio/datasets/voxceleb1.py,sha256=JlYkbyYOAFUFhGLULe3lgucANWf_G7qGqw47YjiX2IM,12034
32
+ torchaudio/datasets/yesno.py,sha256=B3hRNUazvB8V8SwOUlQzliB9vI9gMkl9SEl-dZ4PEaw,3115
33
+ torchaudio/functional/__init__.py,sha256=do2OUOUhg_8Z7TPUQ1HHpoWjNAPrwgxDIemk718TWO0,2581
34
+ torchaudio/functional/_alignment.py,sha256=P2ehTZ7IwuMFWVNqrhYjc1imBKNykwC03D7uvbgxBCA,4867
35
+ torchaudio/functional/filtering.py,sha256=piUbVknBOBdILrd1M9bzk2A9UCCn4qzKXNEgv7IYD7Q,64010
36
+ torchaudio/functional/functional.py,sha256=c-jSGnLx54qnQk5efZiLrioi5x1-2LIQF3P2lvLPPPo,97236
37
+ torchaudio/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
38
+ torchaudio/lib/_torchaudio.pyd,sha256=Rxb-7sUaNsFBbVYhvFro2iof-_xXcUqgFWOCUghOGrc,835584
39
+ torchaudio/lib/libtorchaudio.pyd,sha256=rRAWMlUmFFnBF5kFihd33UgTVJ-vJE-FMFPREK8Qx3g,923648
40
+ torchaudio/models/__init__.py,sha256=Gi3UQvxjwTLW9wfKlF42O3Vup70d0bk2x-rZS89ASwI,2080
41
+ torchaudio/models/_hdemucs.py,sha256=ipAj7965PO_WEZqQwW1om9gQj90UhQOeU6HU3Lpvzwo,39250
42
+ torchaudio/models/conformer.py,sha256=gVrOYeJkPlVaX-4eZpVzNUe_r3k7g1Y6NaaQ8JZP-r4,10361
43
+ torchaudio/models/conv_tasnet.py,sha256=D7Y10sOzLe03gygfN1J5R73SIHkIGVQOkqKQ6Ni3o_s,12870
44
+ torchaudio/models/deepspeech.py,sha256=nVYc2xwWpFO6gu5CR0mbqLiAzJn8lAfHcdcP92i22mo,2830
45
+ torchaudio/models/emformer.py,sha256=WbaeZcrPFOOLn4igqweE0AfuF_SQZpqg7XPGEhl7C8c,38650
46
+ torchaudio/models/rnnt.py,sha256=PNJpZd3vH6wRq8TEf4UlPtVHbte9wOJ-bRMEug6gp08,36357
47
+ torchaudio/models/rnnt_decoder.py,sha256=CBBMZhhq5Bgax0_3p3SZD-Os3S1LFHB91oTgVED4bmY,13178
48
+ torchaudio/models/tacotron2.py,sha256=mZ5lLSa75oqc0hgkc3sIm5_gK-knhtgX3dmg9-oLQao,46960
49
+ torchaudio/models/wav2letter.py,sha256=oetxpH5RG0TadYB75IOmYOrnraaPvSlcSNpRZb2FE_A,3350
50
+ torchaudio/models/wavernn.py,sha256=LRgL36jA6WzI1PAzBY6P52oCMGSTOraXB8fEgkwpSxw,15855
51
+ torchaudio/models/decoder/__init__.py,sha256=PonG1Rg0CRBBbmRLZZQ1n2rXiDhivAAU9x67_G15seI,1963
52
+ torchaudio/models/decoder/_ctc_decoder.py,sha256=zKsOdPNrUn7v2QJmluC3kOp90RQaP3CSmQSurc1nAFw,20654
53
+ torchaudio/models/decoder/_cuda_ctc_decoder.py,sha256=4JKcQak4Ke6Id0EJEDJEx1yLTXKbJpIDNiu7QSe3gWU,7373
54
+ torchaudio/models/squim/__init__.py,sha256=eQox8kPviOthKulpzZvPK0a66NHW7MzYE4aOF7va_kU,357
55
+ torchaudio/models/squim/objective.py,sha256=FCYu0i2OXY3e6Z-BO2p-rc6rU0PvpJZ0gA-CPZZA9fw,12607
56
+ torchaudio/models/squim/subjective.py,sha256=1_gK9O3nvrjiikpP46IdsMzKduSTt91kKklA69wQqiw,5947
57
+ torchaudio/models/wav2vec2/__init__.py,sha256=j5FdQFfuIpdIKYwoMLop4Ba70GGoS-lK61tU-oNG5wg,972
58
+ torchaudio/models/wav2vec2/components.py,sha256=EzmuGc5qHVPrHCGqYVHTvdjqP2gCrBfnHSoTK9GsZ1w,48244
59
+ torchaudio/models/wav2vec2/model.py,sha256=kP6QKsF1PjleyUMhaPjydi0pCRy4GGUArRWBzfDJmdE,61671
60
+ torchaudio/models/wav2vec2/wavlm_attention.py,sha256=iYde9grsb_RaEs87FI5ykyN3z0Ix1plqpsMNvakAiWM,11058
61
+ torchaudio/models/wav2vec2/utils/__init__.py,sha256=1eowaOEKRbp7JajFNv_r47REJqnMmXidukS7Mrwp_5Q,188
62
+ torchaudio/models/wav2vec2/utils/import_fairseq.py,sha256=so7T-otDNCsTUtzJRUFFGWyd0caWl3RY_UbFMxJ4DJE,9411
63
+ torchaudio/models/wav2vec2/utils/import_huggingface.py,sha256=NMK6YrAIDfOw8j1tV-3XTwx_mwbJHvg8ldTrAWRztIM,6080
64
+ torchaudio/pipelines/__init__.py,sha256=oMwOu-1T_ugJmhdaoI5NrCDrUAGrpDOlJQO8h-bLAW4,2847
65
+ torchaudio/pipelines/_source_separation_pipeline.py,sha256=ttHqjcwCmCPWLj0YeDsTa1-XetuyjPDZ9D2deE3FmkA,4334
66
+ torchaudio/pipelines/_squim_pipeline.py,sha256=eYdrKVXUru3VdfpaDnMN5qCuKHNveEd_jwGqtemV9ls,6438
67
+ torchaudio/pipelines/rnnt_pipeline.py,sha256=16OMN_4yY1TEKLWjqkzFSMKByITxLobj6X1uk78pwQI,14133
68
+ torchaudio/pipelines/_tts/__init__.py,sha256=WKc5c06b_M9MvEohJZghJJWAL7vXvfwRIkdy85UCh04,442
69
+ torchaudio/pipelines/_tts/impl.py,sha256=wwrTyTEEkew22AnzB_ZklapGaAstJSUBawhA7bOcGXM,15759
70
+ torchaudio/pipelines/_tts/interface.py,sha256=y1mU0446Vy2hHpCwMqRZt1UI4ZXl-C4tJp92EylwHh0,10479
71
+ torchaudio/pipelines/_tts/utils.py,sha256=tuiEA5eqoBNgt46TxGA7lOEqljbuECL0-pc_uSco0xo,5040
72
+ torchaudio/pipelines/_wav2vec2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
73
+ torchaudio/pipelines/_wav2vec2/aligner.py,sha256=HOcthFgup97QMx9ZXCmkv6jdw_zxdRT-e_SilXEujNU,2796
74
+ torchaudio/pipelines/_wav2vec2/impl.py,sha256=I6htNo4Wt5LPxX9Z8rmxarFE8BZOZBUFIU9T9k1k2Po,67260
75
+ torchaudio/pipelines/_wav2vec2/utils.py,sha256=CVawfXmVGWY8mj-_6r4KO907BpF67WAVWHEHhycFIaM,7317
76
+ torchaudio/transforms/__init__.py,sha256=TsmUD7pXQO940uG0GhFTuMB48PT6uOklN5ptd-Yut14,1476
77
+ torchaudio/transforms/_multi_channel.py,sha256=Musw7dTu25HNjKeIcKHUDuqBmj_GC2e3TaakqJcffW8,22688
78
+ torchaudio/transforms/_transforms.py,sha256=g-E3nGgCEcKeWqEtyrDquSKfecHMD8olJRUMnqHHWYI,89057
79
+ torchaudio/utils/__init__.py,sha256=yNMWIjoGd68FPxV6PhDdjO1oRemlM0QPJsu_k6iVaGQ,74
80
+ torchaudio/utils/download.py,sha256=rf_yS18i7n4JYbIGpWiWc0ipe4sGv3Rvivv6p0DaZgU,2972
81
+ torchaudio-2.9.0.dist-info/METADATA,sha256=Nu1BcomsFTqaoMK8arCV6-o4AEnU86n_TsLPFOzn--A,6911
82
+ torchaudio-2.9.0.dist-info/WHEEL,sha256=yC3OVe9skFE0rAd70upJxuH5WUo8L-vbuVSibQ-iR4c,101
83
+ torchaudio-2.9.0.dist-info/top_level.txt,sha256=mPKWMIRWWW2JwbJN6wRckeN1gpbjhifapAF0Z9t7SMo,11
84
+ torchaudio-2.9.0.dist-info/RECORD,,
85
+ torchaudio-2.9.0.dist-info/licenses/LICENSE,sha256=MmOOF5kxv-VR6r9nsOZ6E7SD4Wa1jdcmNjSrf4nzlvU,1363
@@ -1,61 +0,0 @@
1
- from typing import List, Optional
2
-
3
- from torchaudio._internal.module_utils import deprecated
4
-
5
- from . import utils
6
- from .common import AudioMetaData
7
-
8
- __all__ = [
9
- "AudioMetaData",
10
- "load",
11
- "info",
12
- "save",
13
- "list_audio_backends",
14
- "get_audio_backend",
15
- "set_audio_backend",
16
- ]
17
-
18
-
19
- info = utils.get_info_func()
20
- load = utils.get_load_func()
21
- save = utils.get_save_func()
22
-
23
-
24
- def list_audio_backends() -> List[str]:
25
- """List available backends
26
-
27
- Returns:
28
- list of str: The list of available backends.
29
-
30
- The possible values are; ``"ffmpeg"``, ``"sox"`` and ``"soundfile"``.
31
- """
32
-
33
- return list(utils.get_available_backends().keys())
34
-
35
-
36
- # Temporary until global backend is removed
37
- @deprecated("With dispatcher enabled, this function is no-op. You can remove the function call.")
38
- def get_audio_backend() -> Optional[str]:
39
- """Get the name of the current global backend
40
-
41
- Returns:
42
- str or None:
43
- If dispatcher mode is enabled, returns ``None`` otherwise,
44
- the name of current backend or ``None`` (no backend is set).
45
- """
46
- return None
47
-
48
-
49
- # Temporary until global backend is removed
50
- @deprecated("With dispatcher enabled, this function is no-op. You can remove the function call.")
51
- def set_audio_backend(backend: Optional[str]): # noqa
52
- """Set the global backend.
53
-
54
- This is a no-op when dispatcher mode is enabled.
55
-
56
- Args:
57
- backend (str or None): Name of the backend.
58
- One of ``"sox_io"`` or ``"soundfile"`` based on availability
59
- of the system. If ``None`` is provided the current backend is unassigned.
60
- """
61
- pass