torchaudio 2.7.0__cp312-cp312-macosx_11_0_arm64.whl → 2.8.0__cp312-cp312-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of torchaudio might be problematic. Click here for more details.

Files changed (55) hide show
  1. torchaudio/.dylibs/libc++.1.0.dylib +0 -0
  2. torchaudio/__init__.py +16 -5
  3. torchaudio/_backend/sox.py +2 -2
  4. torchaudio/_backend/utils.py +33 -0
  5. torchaudio/_internal/module_utils.py +59 -10
  6. torchaudio/_torchcodec.py +352 -0
  7. torchaudio/backend/no_backend.py +2 -2
  8. torchaudio/backend/soundfile_backend.py +2 -2
  9. torchaudio/backend/sox_io_backend.py +2 -2
  10. torchaudio/functional/__init__.py +6 -1
  11. torchaudio/functional/functional.py +7 -3
  12. torchaudio/io/__init__.py +10 -3
  13. torchaudio/kaldi_io.py +6 -0
  14. torchaudio/lib/_torchaudio.so +0 -0
  15. torchaudio/lib/_torchaudio_sox.so +0 -0
  16. torchaudio/lib/libtorchaudio.so +0 -0
  17. torchaudio/lib/libtorchaudio_sox.so +0 -0
  18. torchaudio/models/decoder/__init__.py +7 -1
  19. torchaudio/pipelines/_tts/utils.py +3 -1
  20. torchaudio/prototype/datasets/musan.py +2 -1
  21. torchaudio/prototype/functional/_dsp.py +8 -0
  22. torchaudio/prototype/functional/_rir.py +3 -0
  23. torchaudio/prototype/functional/functional.py +3 -0
  24. torchaudio/prototype/models/__init__.py +4 -1
  25. torchaudio/prototype/models/_conformer_wav2vec2.py +7 -0
  26. torchaudio/prototype/models/_emformer_hubert.py +4 -0
  27. torchaudio/prototype/models/conv_emformer.py +4 -0
  28. torchaudio/prototype/models/hifi_gan.py +6 -0
  29. torchaudio/prototype/models/rnnt.py +6 -0
  30. torchaudio/prototype/models/rnnt_decoder.py +3 -0
  31. torchaudio/prototype/pipelines/__init__.py +11 -2
  32. torchaudio/prototype/pipelines/_vggish/__init__.py +5 -1
  33. torchaudio/prototype/pipelines/_vggish/_vggish_impl.py +4 -1
  34. torchaudio/prototype/pipelines/_vggish/_vggish_pipeline.py +3 -2
  35. torchaudio/prototype/pipelines/hifigan_pipeline.py +5 -0
  36. torchaudio/prototype/transforms/_transforms.py +6 -1
  37. torchaudio/sox_effects/sox_effects.py +4 -1
  38. torchaudio/transforms/__init__.py +3 -1
  39. torchaudio/transforms/_transforms.py +3 -2
  40. torchaudio/utils/download.py +2 -0
  41. torchaudio/utils/sox_utils.py +19 -0
  42. torchaudio/version.py +2 -2
  43. {torchaudio-2.7.0.dist-info → torchaudio-2.8.0.dist-info}/METADATA +15 -3
  44. {torchaudio-2.7.0.dist-info → torchaudio-2.8.0.dist-info}/RECORD +55 -54
  45. {torchaudio-2.7.0.dist-info → torchaudio-2.8.0.dist-info}/WHEEL +1 -1
  46. torio/io/_streaming_media_decoder.py +0 -1
  47. torio/lib/_torio_ffmpeg4.so +0 -0
  48. torio/lib/_torio_ffmpeg5.so +0 -0
  49. torio/lib/_torio_ffmpeg6.so +0 -0
  50. torio/lib/libtorio_ffmpeg4.so +0 -0
  51. torio/lib/libtorio_ffmpeg5.so +0 -0
  52. torio/lib/libtorio_ffmpeg6.so +0 -0
  53. torio/utils/ffmpeg_utils.py +28 -0
  54. {torchaudio-2.7.0.dist-info → torchaudio-2.8.0.dist-info/licenses}/LICENSE +0 -0
  55. {torchaudio-2.7.0.dist-info → torchaudio-2.8.0.dist-info}/top_level.txt +0 -0
Binary file
torchaudio/__init__.py CHANGED
@@ -1,14 +1,23 @@
1
+ from torchaudio._internal.module_utils import dropping_io_support, dropping_class_io_support
2
+
1
3
  # Initialize extension and backend first
2
4
  from . import _extension # noqa # usort: skip
3
5
  from ._backend import ( # noqa # usort: skip
4
- AudioMetaData,
5
- get_audio_backend,
6
- info,
7
- list_audio_backends,
6
+ AudioMetaData as _AudioMetaData,
7
+ get_audio_backend as _get_audio_backend,
8
+ info as _info,
9
+ list_audio_backends as _list_audio_backends,
8
10
  load,
9
11
  save,
10
- set_audio_backend,
12
+ set_audio_backend as _set_audio_backend,
11
13
  )
14
+ from ._torchcodec import load_with_torchcodec, save_with_torchcodec
15
+
16
+ AudioMetaData = dropping_class_io_support(_AudioMetaData)
17
+ get_audio_backend = dropping_io_support(_get_audio_backend)
18
+ info = dropping_io_support(_info)
19
+ list_audio_backends = dropping_io_support(_list_audio_backends)
20
+ set_audio_backend = dropping_io_support(_set_audio_backend)
12
21
 
13
22
  from . import ( # noqa: F401
14
23
  compliance,
@@ -35,6 +44,8 @@ except ImportError:
35
44
  __all__ = [
36
45
  "AudioMetaData",
37
46
  "load",
47
+ "load_with_torchcodec",
48
+ "save_with_torchcodec",
38
49
  "info",
39
50
  "save",
40
51
  "io",
@@ -41,7 +41,7 @@ class SoXBackend(Backend):
41
41
  "Please use an alternative backend that does support loading from file-like objects, e.g. FFmpeg.",
42
42
  )
43
43
  else:
44
- ret = sox_ext.load_audio_file(uri, frame_offset, num_frames, normalize, channels_first, format)
44
+ ret = sox_ext.load_audio_file(str(uri), frame_offset, num_frames, normalize, channels_first, format)
45
45
  if not ret:
46
46
  raise RuntimeError(f"Failed to load audio from {uri}.")
47
47
  return ret
@@ -70,7 +70,7 @@ class SoXBackend(Backend):
70
70
  )
71
71
  else:
72
72
  sox_ext.save_audio_file(
73
- uri,
73
+ str(uri),
74
74
  src,
75
75
  sample_rate,
76
76
  channels_first,
@@ -1,6 +1,7 @@
1
1
  import os
2
2
  from functools import lru_cache
3
3
  from typing import BinaryIO, Dict, Optional, Tuple, Type, Union
4
+ import warnings
4
5
 
5
6
  import torch
6
7
 
@@ -127,6 +128,14 @@ def get_load_func():
127
128
  ) -> Tuple[torch.Tensor, int]:
128
129
  """Load audio data from source.
129
130
 
131
+ .. warning::
132
+ In 2.9, this function's implementation will be changed to use
133
+ :func:`~torchaudio.load_with_torchcodec` under the hood. Some
134
+ parameters like ``normalize``, ``format``, ``buffer_size``, and
135
+ ``backend`` will be ignored. We recommend that you port your code to
136
+ rely directly on TorchCodec's decoder instead:
137
+ https://docs.pytorch.org/torchcodec/stable/generated/torchcodec.decoders.AudioDecoder.html#torchcodec.decoders.AudioDecoder.
138
+
130
139
  By default (``normalize=True``, ``channels_first=True``), this function returns Tensor with
131
140
  ``float32`` dtype, and the shape of `[channel, time]`.
132
141
 
@@ -201,6 +210,14 @@ def get_load_func():
201
210
  integer type, else ``float32`` type. If ``channels_first=True``, it has
202
211
  `[channel, time]` else `[time, channel]`.
203
212
  """
213
+ warnings.warn(
214
+ "In 2.9, this function's implementation will be changed to use "
215
+ "torchaudio.load_with_torchcodec` under the hood. Some "
216
+ "parameters like ``normalize``, ``format``, ``buffer_size``, and "
217
+ "``backend`` will be ignored. We recommend that you port your code to "
218
+ "rely directly on TorchCodec's decoder instead: "
219
+ "https://docs.pytorch.org/torchcodec/stable/generated/torchcodec.decoders.AudioDecoder.html#torchcodec.decoders.AudioDecoder."
220
+ )
204
221
  backend = dispatcher(uri, format, backend)
205
222
  return backend.load(uri, frame_offset, num_frames, normalize, channels_first, format, buffer_size)
206
223
 
@@ -235,6 +252,14 @@ def get_save_func():
235
252
  ):
236
253
  """Save audio data to file.
237
254
 
255
+ .. warning::
256
+ In 2.9, this function's implementation will be changed to use
257
+ :func:`~torchaudio.save_with_torchcodec` under the hood. Some
258
+ parameters like format, encoding, bits_per_sample, buffer_size, and
259
+ ``backend`` will be ignored. We recommend that you port your code to
260
+ rely directly on TorchCodec's decoder instead:
261
+ https://docs.pytorch.org/torchcodec/stable/generated/torchcodec.encoders.AudioEncoder
262
+
238
263
  Note:
239
264
  The formats this function can handle depend on the availability of backends.
240
265
  Please use the following functions to fetch the supported formats.
@@ -309,6 +334,14 @@ def get_save_func():
309
334
  Refer to http://sox.sourceforge.net/soxformat.html for more details.
310
335
 
311
336
  """
337
+ warnings.warn(
338
+ "In 2.9, this function's implementation will be changed to use "
339
+ "torchaudio.save_with_torchcodec` under the hood. Some "
340
+ "parameters like format, encoding, bits_per_sample, buffer_size, and "
341
+ "``backend`` will be ignored. We recommend that you port your code to "
342
+ "rely directly on TorchCodec's encoder instead: "
343
+ "https://docs.pytorch.org/torchcodec/stable/generated/torchcodec.encoders.AudioEncoder"
344
+ )
312
345
  backend = dispatcher(uri, format, backend)
313
346
  return backend.save(
314
347
  uri, src, sample_rate, channels_first, format, encoding, bits_per_sample, buffer_size, compression
@@ -1,7 +1,7 @@
1
1
  import importlib.util
2
2
  import os
3
3
  import warnings
4
- from functools import wraps
4
+ from functools import wraps, partial
5
5
  from typing import Optional
6
6
 
7
7
 
@@ -59,6 +59,17 @@ def requires_module(*modules: str):
59
59
 
60
60
  return decorator
61
61
 
62
+ UNSUPPORTED = []
63
+
64
+ def wrap_deprecated(func, name, direction: str, version: Optional[str] = None, remove: bool = False):
65
+ @wraps(func)
66
+ def wrapped(*args, **kwargs):
67
+ message = f"{name} has been deprecated. {direction}"
68
+ if remove:
69
+ message += f' It will be removed from {"a future" if version is None else "the " + str(version)} release. '
70
+ warnings.warn(message, stacklevel=2)
71
+ return func(*args, **kwargs)
72
+ return wrapped
62
73
 
63
74
  def deprecated(direction: str, version: Optional[str] = None, remove: bool = False):
64
75
  """Decorator to add deprecation message
@@ -70,30 +81,68 @@ def deprecated(direction: str, version: Optional[str] = None, remove: bool = Fal
70
81
  """
71
82
 
72
83
  def decorator(func):
73
- @wraps(func)
74
- def wrapped(*args, **kwargs):
75
- message = f"{func.__module__}.{func.__name__} has been deprecated. {direction}"
76
- if remove:
77
- message += f' It will be removed from {"future" if version is None else version} release. '
78
- warnings.warn(message, stacklevel=2)
79
- return func(*args, **kwargs)
84
+ wrapped = wrap_deprecated(func, f"{func.__module__}.{func.__name__}", direction, version=version, remove=remove)
80
85
 
81
86
  message = "This function has been deprecated. "
82
87
  if remove:
83
88
  message += f'It will be removed from {"future" if version is None else version} release. '
84
89
 
85
- wrapped.__doc__ = f"""DEPRECATED: {func.__doc__}
90
+ wrapped.__doc__ = f"""DEPRECATED
86
91
 
87
92
  .. warning::
88
93
 
89
94
  {message}
90
95
  {direction}
91
- """
92
96
 
97
+ {func.__doc__}
98
+ """
99
+
100
+ UNSUPPORTED.append(wrapped)
93
101
  return wrapped
94
102
 
95
103
  return decorator
96
104
 
105
+ DEPRECATION_MSG = (
106
+ "This deprecation is part of a large refactoring effort to transition TorchAudio into a maintenance phase. "
107
+ "Please see https://github.com/pytorch/audio/issues/3902 for more information."
108
+ )
109
+
110
+ IO_DEPRECATION_MSG = (
111
+ "This deprecation is part of a large refactoring effort to transition TorchAudio into a maintenance phase. "
112
+ "The decoding and encoding capabilities of PyTorch for both audio and video are being consolidated into TorchCodec. "
113
+ "Please see https://github.com/pytorch/audio/issues/3902 for more information."
114
+ )
115
+
116
+ dropping_support = deprecated(DEPRECATION_MSG, version="2.9", remove=True)
117
+
118
+ def dropping_class_support(c, msg=DEPRECATION_MSG):
119
+ c.__init__ = wrap_deprecated(c.__init__, f"{c.__module__}.{c.__name__}", msg, version="2.9", remove=True)
120
+ c.__doc__ = f"""DEPRECATED
121
+
122
+ .. warning::
123
+
124
+ This class is deprecated from version 2.8. It will be removed in the 2.9 release.
125
+ {msg}
126
+ {c.__doc__}
127
+ """
128
+
129
+ UNSUPPORTED.append(c)
130
+ return c
131
+
132
+ def dropping_const_support(c, msg=DEPRECATION_MSG, name=None):
133
+ c.__doc__ = f"""[DEPRECATED]
134
+
135
+ .. warning::
136
+
137
+ This object is deprecated deprecated from version 2.8. It will be removed in the 2.9 release.
138
+ {msg}
139
+ {c.__doc__}
140
+ """
141
+ return c
142
+
143
+ dropping_class_io_support = partial(dropping_class_support, msg=IO_DEPRECATION_MSG)
144
+
145
+ dropping_io_support = deprecated(IO_DEPRECATION_MSG, version="2.9", remove=True)
97
146
 
98
147
  def fail_with_message(message):
99
148
  """Generate decorator to give users message about missing TorchAudio extension."""
@@ -0,0 +1,352 @@
1
+ """TorchCodec integration for TorchAudio."""
2
+
3
+ import os
4
+ from typing import BinaryIO, Optional, Tuple, Union
5
+
6
+ import torch
7
+
8
+
9
+ def load_with_torchcodec(
10
+ uri: Union[BinaryIO, str, os.PathLike],
11
+ frame_offset: int = 0,
12
+ num_frames: int = -1,
13
+ normalize: bool = True,
14
+ channels_first: bool = True,
15
+ format: Optional[str] = None,
16
+ buffer_size: int = 4096,
17
+ backend: Optional[str] = None,
18
+ ) -> Tuple[torch.Tensor, int]:
19
+ """Load audio data from source using TorchCodec's AudioDecoder.
20
+
21
+ .. note::
22
+
23
+ This function supports the same API as :func:`~torchaudio.load`, and
24
+ relies on TorchCodec's decoding capabilities under the hood. It is
25
+ provided for convenience, but we do recommend that you port your code to
26
+ natively use ``torchcodec``'s ``AudioDecoder`` class for better
27
+ performance:
28
+ https://docs.pytorch.org/torchcodec/stable/generated/torchcodec.decoders.AudioDecoder.
29
+ In TorchAudio 2.9, :func:`~torchaudio.load` will be relying on
30
+ :func:`~torchaudio.load_with_torchcodec`. Note that some parameters of
31
+ :func:`~torchaudio.load`, like ``normalize``, ``buffer_size``, and
32
+ ``backend``, are ignored by :func:`~torchaudio.load_with_torchcodec`.
33
+
34
+
35
+ Args:
36
+ uri (path-like object or file-like object):
37
+ Source of audio data. The following types are accepted:
38
+
39
+ * ``path-like``: File path or URL.
40
+ * ``file-like``: Object with ``read(size: int) -> bytes`` method.
41
+
42
+ frame_offset (int, optional):
43
+ Number of samples to skip before start reading data.
44
+ num_frames (int, optional):
45
+ Maximum number of samples to read. ``-1`` reads all the remaining samples,
46
+ starting from ``frame_offset``.
47
+ normalize (bool, optional):
48
+ TorchCodec always returns normalized float32 samples. This parameter
49
+ is ignored and a warning is issued if set to False.
50
+ Default: ``True``.
51
+ channels_first (bool, optional):
52
+ When True, the returned Tensor has dimension `[channel, time]`.
53
+ Otherwise, the returned Tensor's dimension is `[time, channel]`.
54
+ format (str or None, optional):
55
+ Format hint for the decoder. May not be supported by all TorchCodec
56
+ decoders. (Default: ``None``)
57
+ buffer_size (int, optional):
58
+ Not used by TorchCodec AudioDecoder. Provided for API compatibility.
59
+ backend (str or None, optional):
60
+ Not used by TorchCodec AudioDecoder. Provided for API compatibility.
61
+
62
+ Returns:
63
+ (torch.Tensor, int): Resulting Tensor and sample rate.
64
+ Always returns float32 tensors. If ``channels_first=True``, shape is
65
+ `[channel, time]`, otherwise `[time, channel]`.
66
+
67
+ Raises:
68
+ ImportError: If torchcodec is not available.
69
+ ValueError: If unsupported parameters are used.
70
+ RuntimeError: If TorchCodec fails to decode the audio.
71
+
72
+ Note:
73
+ - TorchCodec always returns normalized float32 samples, so the ``normalize``
74
+ parameter has no effect.
75
+ - The ``buffer_size`` and ``backend`` parameters are ignored.
76
+ - Not all audio formats supported by torchaudio backends may be supported
77
+ by TorchCodec.
78
+ """
79
+ # Import torchcodec here to provide clear error if not available
80
+ try:
81
+ from torchcodec.decoders import AudioDecoder
82
+ except ImportError as e:
83
+ raise ImportError(
84
+ "TorchCodec is required for load_with_torchcodec. "
85
+ "Please install torchcodec to use this function."
86
+ ) from e
87
+
88
+ # Parameter validation and warnings
89
+ if not normalize:
90
+ import warnings
91
+ warnings.warn(
92
+ "TorchCodec AudioDecoder always returns normalized float32 samples. "
93
+ "The 'normalize=False' parameter is ignored.",
94
+ UserWarning,
95
+ stacklevel=2
96
+ )
97
+
98
+ if buffer_size != 4096:
99
+ import warnings
100
+ warnings.warn(
101
+ "The 'buffer_size' parameter is not used by TorchCodec AudioDecoder.",
102
+ UserWarning,
103
+ stacklevel=2
104
+ )
105
+
106
+ if backend is not None:
107
+ import warnings
108
+ warnings.warn(
109
+ "The 'backend' parameter is not used by TorchCodec AudioDecoder.",
110
+ UserWarning,
111
+ stacklevel=2
112
+ )
113
+
114
+ if format is not None:
115
+ import warnings
116
+ warnings.warn(
117
+ "The 'format' parameter is not supported by TorchCodec AudioDecoder.",
118
+ UserWarning,
119
+ stacklevel=2
120
+ )
121
+
122
+ # Create AudioDecoder
123
+ try:
124
+ decoder = AudioDecoder(uri)
125
+ except Exception as e:
126
+ raise RuntimeError(f"Failed to create AudioDecoder for {uri}: {e}") from e
127
+
128
+ # Get sample rate from metadata
129
+ sample_rate = decoder.metadata.sample_rate
130
+ if sample_rate is None:
131
+ raise RuntimeError("Unable to determine sample rate from audio metadata")
132
+
133
+ # Decode the entire file first, then subsample manually
134
+ # This is the simplest approach since torchcodec uses time-based indexing
135
+ try:
136
+ audio_samples = decoder.get_all_samples()
137
+ except Exception as e:
138
+ raise RuntimeError(f"Failed to decode audio samples: {e}") from e
139
+
140
+ data = audio_samples.data
141
+
142
+ # Apply frame_offset and num_frames (which are actually sample offsets)
143
+ if frame_offset > 0:
144
+ if frame_offset >= data.shape[1]:
145
+ # Return empty tensor if offset is beyond available data
146
+ empty_shape = (data.shape[0], 0) if channels_first else (0, data.shape[0])
147
+ return torch.zeros(empty_shape, dtype=torch.float32), sample_rate
148
+ data = data[:, frame_offset:]
149
+
150
+ if num_frames == 0:
151
+ # Return empty tensor if num_frames is 0
152
+ empty_shape = (data.shape[0], 0) if channels_first else (0, data.shape[0])
153
+ return torch.zeros(empty_shape, dtype=torch.float32), sample_rate
154
+ elif num_frames > 0:
155
+ data = data[:, :num_frames]
156
+
157
+ # TorchCodec returns data in [channel, time] format by default
158
+ # Handle channels_first parameter
159
+ if not channels_first:
160
+ data = data.transpose(0, 1) # [channel, time] -> [time, channel]
161
+
162
+ return data, sample_rate
163
+
164
+
165
+ def save_with_torchcodec(
166
+ uri: Union[str, os.PathLike],
167
+ src: torch.Tensor,
168
+ sample_rate: int,
169
+ channels_first: bool = True,
170
+ format: Optional[str] = None,
171
+ encoding: Optional[str] = None,
172
+ bits_per_sample: Optional[int] = None,
173
+ buffer_size: int = 4096,
174
+ backend: Optional[str] = None,
175
+ compression: Optional[Union[float, int]] = None,
176
+ ) -> None:
177
+ """Save audio data to file using TorchCodec's AudioEncoder.
178
+
179
+ .. note::
180
+
181
+ This function supports the same API as :func:`~torchaudio.save`, and
182
+ relies on TorchCodec's encoding capabilities under the hood. It is
183
+ provided for convenience, but we do recommend that you port your code to
184
+ natively use ``torchcodec``'s ``AudioEncoder`` class for better
185
+ performance:
186
+ https://docs.pytorch.org/torchcodec/stable/generated/torchcodec.encoders.AudioEncoder.
187
+ In TorchAudio 2.9, :func:`~torchaudio.save` will be relying on
188
+ :func:`~torchaudio.save_with_torchcodec`. Note that some parameters of
189
+ :func:`~torchaudio.save`, like ``format``, ``encoding``,
190
+ ``bits_per_sample``, ``buffer_size``, and ``backend``, are ignored by
191
+ are ignored by :func:`~torchaudio.save_with_torchcodec`.
192
+
193
+ This function provides a TorchCodec-based alternative to torchaudio.save
194
+ with the same API. TorchCodec's AudioEncoder provides efficient encoding
195
+ with FFmpeg under the hood.
196
+
197
+ Args:
198
+ uri (path-like object):
199
+ Path to save the audio file. The file extension determines the format.
200
+
201
+ src (torch.Tensor):
202
+ Audio data to save. Must be a 1D or 2D tensor with float32 values
203
+ in the range [-1, 1]. If 2D, shape should be [channel, time] when
204
+ channels_first=True, or [time, channel] when channels_first=False.
205
+
206
+ sample_rate (int):
207
+ Sample rate of the audio data.
208
+
209
+ channels_first (bool, optional):
210
+ Indicates whether the input tensor has channels as the first dimension.
211
+ If True, expects [channel, time]. If False, expects [time, channel].
212
+ Default: True.
213
+
214
+ format (str or None, optional):
215
+ Audio format hint. Not used by TorchCodec (format is determined by
216
+ file extension). A warning is issued if provided.
217
+ Default: None.
218
+
219
+ encoding (str or None, optional):
220
+ Audio encoding. Not fully supported by TorchCodec AudioEncoder.
221
+ A warning is issued if provided. Default: None.
222
+
223
+ bits_per_sample (int or None, optional):
224
+ Bits per sample. Not directly supported by TorchCodec AudioEncoder.
225
+ A warning is issued if provided. Default: None.
226
+
227
+ buffer_size (int, optional):
228
+ Not used by TorchCodec AudioEncoder. Provided for API compatibility.
229
+ A warning is issued if not default value. Default: 4096.
230
+
231
+ backend (str or None, optional):
232
+ Not used by TorchCodec AudioEncoder. Provided for API compatibility.
233
+ A warning is issued if provided. Default: None.
234
+
235
+ compression (float, int or None, optional):
236
+ Compression level or bit rate. Maps to bit_rate parameter in
237
+ TorchCodec AudioEncoder. Default: None.
238
+
239
+ Raises:
240
+ ImportError: If torchcodec is not available.
241
+ ValueError: If input parameters are invalid.
242
+ RuntimeError: If TorchCodec fails to encode the audio.
243
+
244
+ Note:
245
+ - TorchCodec AudioEncoder expects float32 samples in [-1, 1] range.
246
+ - Some parameters (format, encoding, bits_per_sample, buffer_size, backend)
247
+ are not used by TorchCodec but are provided for API compatibility.
248
+ - The output format is determined by the file extension in the uri.
249
+ - TorchCodec uses FFmpeg under the hood for encoding.
250
+ """
251
+ # Import torchcodec here to provide clear error if not available
252
+ try:
253
+ from torchcodec.encoders import AudioEncoder
254
+ except ImportError as e:
255
+ raise ImportError(
256
+ "TorchCodec is required for save_with_torchcodec. "
257
+ "Please install torchcodec to use this function."
258
+ ) from e
259
+
260
+ # Parameter validation and warnings
261
+ if format is not None:
262
+ import warnings
263
+ warnings.warn(
264
+ "The 'format' parameter is not used by TorchCodec AudioEncoder. "
265
+ "Format is determined by the file extension.",
266
+ UserWarning,
267
+ stacklevel=2
268
+ )
269
+
270
+ if encoding is not None:
271
+ import warnings
272
+ warnings.warn(
273
+ "The 'encoding' parameter is not fully supported by TorchCodec AudioEncoder.",
274
+ UserWarning,
275
+ stacklevel=2
276
+ )
277
+
278
+ if bits_per_sample is not None:
279
+ import warnings
280
+ warnings.warn(
281
+ "The 'bits_per_sample' parameter is not directly supported by TorchCodec AudioEncoder.",
282
+ UserWarning,
283
+ stacklevel=2
284
+ )
285
+
286
+ if buffer_size != 4096:
287
+ import warnings
288
+ warnings.warn(
289
+ "The 'buffer_size' parameter is not used by TorchCodec AudioEncoder.",
290
+ UserWarning,
291
+ stacklevel=2
292
+ )
293
+
294
+ if backend is not None:
295
+ import warnings
296
+ warnings.warn(
297
+ "The 'backend' parameter is not used by TorchCodec AudioEncoder.",
298
+ UserWarning,
299
+ stacklevel=2
300
+ )
301
+
302
+ # Input validation
303
+ if not isinstance(src, torch.Tensor):
304
+ raise ValueError(f"Expected src to be a torch.Tensor, got {type(src)}")
305
+
306
+ if src.dtype != torch.float32:
307
+ src = src.float()
308
+
309
+ if sample_rate <= 0:
310
+ raise ValueError(f"sample_rate must be positive, got {sample_rate}")
311
+
312
+ # Handle tensor shape and channels_first
313
+ if src.ndim == 1:
314
+ # Convert to 2D: [1, time] for channels_first=True
315
+ if channels_first:
316
+ data = src.unsqueeze(0) # [1, time]
317
+ else:
318
+ # For channels_first=False, input is [time] -> reshape to [time, 1] -> transpose to [1, time]
319
+ data = src.unsqueeze(1).transpose(0, 1) # [time, 1] -> [1, time]
320
+ elif src.ndim == 2:
321
+ if channels_first:
322
+ data = src # Already [channel, time]
323
+ else:
324
+ data = src.transpose(0, 1) # [time, channel] -> [channel, time]
325
+ else:
326
+ raise ValueError(f"Expected 1D or 2D tensor, got {src.ndim}D tensor")
327
+
328
+ # Create AudioEncoder
329
+ try:
330
+ encoder = AudioEncoder(data, sample_rate=sample_rate)
331
+ except Exception as e:
332
+ raise RuntimeError(f"Failed to create AudioEncoder: {e}") from e
333
+
334
+ # Determine bit_rate from compression parameter
335
+ bit_rate = None
336
+ if compression is not None:
337
+ if isinstance(compression, (int, float)):
338
+ bit_rate = int(compression)
339
+ else:
340
+ import warnings
341
+ warnings.warn(
342
+ f"Unsupported compression type {type(compression)}. "
343
+ "TorchCodec AudioEncoder expects int or float for bit_rate.",
344
+ UserWarning,
345
+ stacklevel=2
346
+ )
347
+
348
+ # Save to file
349
+ try:
350
+ encoder.to_file(uri, bit_rate=bit_rate)
351
+ except Exception as e:
352
+ raise RuntimeError(f"Failed to save audio to {uri}: {e}") from e
@@ -2,10 +2,10 @@ def __getattr__(name: str):
2
2
  import warnings
3
3
 
4
4
  warnings.warn(
5
- "Torchaudio's I/O functions now support par-call bakcend dispatch. "
5
+ "Torchaudio's I/O functions now support per-call backend dispatch. "
6
6
  "Importing backend implementation directly is no longer guaranteed to work. "
7
7
  "Please use `backend` keyword with load/save/info function, instead of "
8
- "calling the udnerlying implementation directly.",
8
+ "calling the underlying implementation directly.",
9
9
  stacklevel=2,
10
10
  )
11
11
 
@@ -2,10 +2,10 @@ def __getattr__(name: str):
2
2
  import warnings
3
3
 
4
4
  warnings.warn(
5
- "Torchaudio's I/O functions now support par-call bakcend dispatch. "
5
+ "Torchaudio's I/O functions now support per-call backend dispatch. "
6
6
  "Importing backend implementation directly is no longer guaranteed to work. "
7
7
  "Please use `backend` keyword with load/save/info function, instead of "
8
- "calling the udnerlying implementation directly.",
8
+ "calling the underlying implementation directly.",
9
9
  stacklevel=2,
10
10
  )
11
11
 
@@ -2,10 +2,10 @@ def __getattr__(name: str):
2
2
  import warnings
3
3
 
4
4
  warnings.warn(
5
- "Torchaudio's I/O functions now support par-call bakcend dispatch. "
5
+ "Torchaudio's I/O functions now support per-call backend dispatch. "
6
6
  "Importing backend implementation directly is no longer guaranteed to work. "
7
7
  "Please use `backend` keyword with load/save/info function, instead of "
8
- "calling the udnerlying implementation directly.",
8
+ "calling the underlying implementation directly.",
9
9
  stacklevel=2,
10
10
  )
11
11
 
@@ -1,4 +1,6 @@
1
- from ._alignment import forced_align, merge_tokens, TokenSpan
1
+ from torchaudio._internal.module_utils import dropping_support
2
+
3
+ from ._alignment import forced_align as _forced_align, merge_tokens, TokenSpan
2
4
  from .filtering import (
3
5
  allpass_biquad,
4
6
  band_biquad,
@@ -23,6 +25,9 @@ from .filtering import (
23
25
  treble_biquad,
24
26
  vad,
25
27
  )
28
+
29
+ forced_align = dropping_support(_forced_align)
30
+
26
31
  from .functional import (
27
32
  add_noise,
28
33
  amplitude_to_DB,
@@ -9,7 +9,8 @@ from typing import List, Optional, Tuple, Union
9
9
  import torch
10
10
  import torchaudio
11
11
  from torch import Tensor
12
- from torchaudio._internal.module_utils import deprecated
12
+ from torchaudio._internal.module_utils import deprecated, dropping_support
13
+
13
14
 
14
15
  from .filtering import highpass_biquad, treble_biquad
15
16
 
@@ -1760,7 +1761,7 @@ def _fix_waveform_shape(
1760
1761
  return waveform_shift
1761
1762
 
1762
1763
 
1763
- def rnnt_loss(
1764
+ def _rnnt_loss(
1764
1765
  logits: Tensor,
1765
1766
  targets: Tensor,
1766
1767
  logit_lengths: Tensor,
@@ -1864,6 +1865,9 @@ def psd(
1864
1865
  psd = psd.sum(dim=-3)
1865
1866
  return psd
1866
1867
 
1868
+ # Expose both deprecated wrapper as well as original because torchscript breaks on
1869
+ # wrapped functions.
1870
+ rnnt_loss = dropping_support(_rnnt_loss)
1867
1871
 
1868
1872
  def _compute_mat_trace(input: torch.Tensor, dim1: int = -1, dim2: int = -2) -> torch.Tensor:
1869
1873
  r"""Compute the trace of a Tensor along ``dim1`` and ``dim2`` dimensions.
@@ -2494,7 +2498,7 @@ def deemphasis(waveform, coeff: float = 0.97) -> torch.Tensor:
2494
2498
  """
2495
2499
  a_coeffs = torch.tensor([1.0, -coeff], dtype=waveform.dtype, device=waveform.device)
2496
2500
  b_coeffs = torch.tensor([1.0, 0.0], dtype=waveform.dtype, device=waveform.device)
2497
- return torchaudio.functional.lfilter(waveform, a_coeffs=a_coeffs, b_coeffs=b_coeffs)
2501
+ return torchaudio.functional.filtering.lfilter(waveform, a_coeffs=a_coeffs, b_coeffs=b_coeffs)
2498
2502
 
2499
2503
 
2500
2504
  def frechet_distance(mu_x, sigma_x, mu_y, sigma_y):