torchaudio 2.0.2__cp311-cp311-win_amd64.whl → 2.1.1__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of torchaudio might be problematic. Click here for more details.

Files changed (88) hide show
  1. torchaudio/__init__.py +22 -3
  2. torchaudio/_backend/__init__.py +55 -4
  3. torchaudio/_backend/backend.py +53 -0
  4. torchaudio/_backend/common.py +52 -0
  5. torchaudio/_backend/ffmpeg.py +373 -0
  6. torchaudio/_backend/soundfile.py +54 -0
  7. torchaudio/_backend/soundfile_backend.py +457 -0
  8. torchaudio/_backend/sox.py +91 -0
  9. torchaudio/_backend/utils.py +81 -323
  10. torchaudio/_extension/__init__.py +55 -36
  11. torchaudio/_extension/utils.py +109 -17
  12. torchaudio/_internal/__init__.py +4 -1
  13. torchaudio/_internal/module_utils.py +37 -6
  14. torchaudio/backend/__init__.py +7 -11
  15. torchaudio/backend/_no_backend.py +24 -0
  16. torchaudio/backend/_sox_io_backend.py +297 -0
  17. torchaudio/backend/common.py +12 -52
  18. torchaudio/backend/no_backend.py +11 -21
  19. torchaudio/backend/soundfile_backend.py +11 -448
  20. torchaudio/backend/sox_io_backend.py +11 -435
  21. torchaudio/backend/utils.py +9 -18
  22. torchaudio/datasets/__init__.py +2 -0
  23. torchaudio/datasets/cmuarctic.py +1 -1
  24. torchaudio/datasets/cmudict.py +61 -62
  25. torchaudio/datasets/dr_vctk.py +1 -1
  26. torchaudio/datasets/gtzan.py +1 -1
  27. torchaudio/datasets/librilight_limited.py +1 -1
  28. torchaudio/datasets/librispeech.py +1 -1
  29. torchaudio/datasets/librispeech_biasing.py +189 -0
  30. torchaudio/datasets/libritts.py +1 -1
  31. torchaudio/datasets/ljspeech.py +1 -1
  32. torchaudio/datasets/musdb_hq.py +1 -1
  33. torchaudio/datasets/quesst14.py +1 -1
  34. torchaudio/datasets/speechcommands.py +1 -1
  35. torchaudio/datasets/tedlium.py +1 -1
  36. torchaudio/datasets/vctk.py +1 -1
  37. torchaudio/datasets/voxceleb1.py +1 -1
  38. torchaudio/datasets/yesno.py +1 -1
  39. torchaudio/functional/__init__.py +6 -2
  40. torchaudio/functional/_alignment.py +128 -0
  41. torchaudio/functional/filtering.py +69 -92
  42. torchaudio/functional/functional.py +99 -148
  43. torchaudio/io/__init__.py +4 -1
  44. torchaudio/io/_effector.py +347 -0
  45. torchaudio/io/_stream_reader.py +158 -90
  46. torchaudio/io/_stream_writer.py +196 -10
  47. torchaudio/lib/_torchaudio.pyd +0 -0
  48. torchaudio/lib/_torchaudio_ffmpeg4.pyd +0 -0
  49. torchaudio/lib/_torchaudio_ffmpeg5.pyd +0 -0
  50. torchaudio/lib/_torchaudio_ffmpeg6.pyd +0 -0
  51. torchaudio/lib/libtorchaudio.pyd +0 -0
  52. torchaudio/lib/libtorchaudio_ffmpeg4.pyd +0 -0
  53. torchaudio/lib/libtorchaudio_ffmpeg5.pyd +0 -0
  54. torchaudio/lib/libtorchaudio_ffmpeg6.pyd +0 -0
  55. torchaudio/models/__init__.py +14 -0
  56. torchaudio/models/decoder/__init__.py +22 -7
  57. torchaudio/models/decoder/_ctc_decoder.py +123 -69
  58. torchaudio/models/decoder/_cuda_ctc_decoder.py +187 -0
  59. torchaudio/models/rnnt_decoder.py +10 -14
  60. torchaudio/models/squim/__init__.py +11 -0
  61. torchaudio/models/squim/objective.py +326 -0
  62. torchaudio/models/squim/subjective.py +150 -0
  63. torchaudio/models/wav2vec2/components.py +6 -10
  64. torchaudio/pipelines/__init__.py +9 -0
  65. torchaudio/pipelines/_squim_pipeline.py +176 -0
  66. torchaudio/pipelines/_wav2vec2/aligner.py +87 -0
  67. torchaudio/pipelines/_wav2vec2/impl.py +198 -68
  68. torchaudio/pipelines/_wav2vec2/utils.py +120 -0
  69. torchaudio/sox_effects/sox_effects.py +7 -30
  70. torchaudio/transforms/__init__.py +2 -0
  71. torchaudio/transforms/_transforms.py +99 -54
  72. torchaudio/utils/download.py +2 -2
  73. torchaudio/utils/ffmpeg_utils.py +20 -15
  74. torchaudio/utils/sox_utils.py +8 -9
  75. torchaudio/version.py +2 -2
  76. torchaudio-2.1.1.dist-info/METADATA +113 -0
  77. torchaudio-2.1.1.dist-info/RECORD +115 -0
  78. {torchaudio-2.0.2.dist-info → torchaudio-2.1.1.dist-info}/WHEEL +1 -1
  79. torchaudio/io/_compat.py +0 -241
  80. torchaudio/lib/_torchaudio_ffmpeg.pyd +0 -0
  81. torchaudio/lib/flashlight_lib_text_decoder.pyd +0 -0
  82. torchaudio/lib/flashlight_lib_text_dictionary.pyd +0 -0
  83. torchaudio/lib/libflashlight-text.pyd +0 -0
  84. torchaudio/lib/libtorchaudio_ffmpeg.pyd +0 -0
  85. torchaudio-2.0.2.dist-info/METADATA +0 -26
  86. torchaudio-2.0.2.dist-info/RECORD +0 -98
  87. {torchaudio-2.0.2.dist-info → torchaudio-2.1.1.dist-info}/LICENSE +0 -0
  88. {torchaudio-2.0.2.dist-info → torchaudio-2.1.1.dist-info}/top_level.txt +0 -0
@@ -1,438 +1,14 @@
1
- import os
2
- import warnings
3
- from typing import Optional, Tuple
4
-
5
- import torch
6
- import torchaudio
7
- from torchaudio.utils.sox_utils import get_buffer_size
8
-
9
- from .common import AudioMetaData
10
-
11
-
12
- # Note: need to comply TorchScript syntax -- need annotation and no f-string
13
- def _fail_info(filepath: str, format: Optional[str]) -> AudioMetaData:
14
- raise RuntimeError("Failed to fetch metadata from {}".format(filepath))
15
-
16
-
17
- def _fail_info_fileobj(fileobj, format: Optional[str], buffer_size: int) -> AudioMetaData:
18
- raise RuntimeError("Failed to fetch metadata from {}".format(fileobj))
19
-
20
-
21
- # Note: need to comply TorchScript syntax -- need annotation and no f-string
22
- def _fail_load(
23
- filepath: str,
24
- frame_offset: int = 0,
25
- num_frames: int = -1,
26
- normalize: bool = True,
27
- channels_first: bool = True,
28
- format: Optional[str] = None,
29
- ) -> Tuple[torch.Tensor, int]:
30
- raise RuntimeError("Failed to load audio from {}".format(filepath))
31
-
32
-
33
- def _fail_load_fileobj(fileobj, *args, **kwargs):
34
- raise RuntimeError(f"Failed to load audio from {fileobj}")
35
-
36
-
37
- if torchaudio._extension._FFMPEG_INITIALIZED:
38
- import torchaudio.io._compat as _compat
39
-
40
- _fallback_info = _compat.info_audio
41
- _fallback_info_fileobj = _compat.info_audio_fileobj
42
- _fallback_load = _compat.load_audio
43
- _fallback_load_fileobj = _compat.load_audio_fileobj
44
- else:
45
- _fallback_info = _fail_info
46
- _fallback_info_fileobj = _fail_info_fileobj
47
- _fallback_load = _fail_load
48
- _fallback_load_fileobj = _fail_load_fileobj
49
-
50
-
51
- _deprecation_message = (
52
- "File-like object support in sox_io backend is deprecated, "
53
- "and will be removed in v2.1. "
54
- "See https://github.com/pytorch/audio/issues/2950 for the detail."
55
- "Please migrate to the new dispatcher, or use soundfile backend."
56
- )
57
-
58
-
59
- @torchaudio._extension.fail_if_no_sox
60
- def info(
61
- filepath: str,
62
- format: Optional[str] = None,
63
- ) -> AudioMetaData:
64
- """Get signal information of an audio file.
65
-
66
- Args:
67
- filepath (path-like object or file-like object):
68
- Source of audio data. When the function is not compiled by TorchScript,
69
- (e.g. ``torch.jit.script``), the following types are accepted;
70
-
71
- * ``path-like``: file path
72
- * ``file-like``: Object with ``read(size: int) -> bytes`` method,
73
- which returns byte string of at most ``size`` length.
74
-
75
- When the function is compiled by TorchScript, only ``str`` type is allowed.
76
-
77
- Note:
78
-
79
- * When the input type is file-like object, this function cannot
80
- get the correct length (``num_samples``) for certain formats,
81
- such as ``vorbis``.
82
- In this case, the value of ``num_samples`` is ``0``.
83
- * This argument is intentionally annotated as ``str`` only due to
84
- TorchScript compiler compatibility.
85
-
86
- format (str or None, optional):
87
- Override the format detection with the given format.
88
- Providing the argument might help when libsox can not infer the format
89
- from header or extension.
90
-
91
- Returns:
92
- AudioMetaData: Metadata of the given audio.
93
- """
94
- if not torch.jit.is_scripting():
95
- if hasattr(filepath, "read"):
96
- # Special case for Backward compatibility
97
- # v0.11 -> v0.12, mp3 handling is moved to FFmpeg.
98
- # file-like objects are not necessarily fallback-able
99
- # when they are not seekable.
100
- # The previous libsox-based implementation required `format="mp3"`
101
- # because internally libsox does not auto-detect the format.
102
- # For the special BC for mp3, we handle mp3 differently.
103
- buffer_size = get_buffer_size()
104
- if format == "mp3":
105
- return _fallback_info_fileobj(filepath, format, buffer_size)
106
- warnings.warn(_deprecation_message)
107
- sinfo = torchaudio.lib._torchaudio_sox.get_info_fileobj(filepath, format)
108
- if sinfo is not None:
109
- return AudioMetaData(*sinfo)
110
- return _fallback_info_fileobj(filepath, format, buffer_size)
111
- filepath = os.fspath(filepath)
112
- sinfo = torch.ops.torchaudio.sox_io_get_info(filepath, format)
113
- if sinfo is not None:
114
- return AudioMetaData(*sinfo)
115
- return _fallback_info(filepath, format)
116
-
117
-
118
- @torchaudio._extension.fail_if_no_sox
119
- def load(
120
- filepath: str,
121
- frame_offset: int = 0,
122
- num_frames: int = -1,
123
- normalize: bool = True,
124
- channels_first: bool = True,
125
- format: Optional[str] = None,
126
- ) -> Tuple[torch.Tensor, int]:
127
- """Load audio data from file.
128
-
129
- Note:
130
- This function can handle all the codecs that underlying libsox can handle,
131
- however it is tested on the following formats;
132
-
133
- * WAV, AMB
134
-
135
- * 32-bit floating-point
136
- * 32-bit signed integer
137
- * 24-bit signed integer
138
- * 16-bit signed integer
139
- * 8-bit unsigned integer (WAV only)
140
-
141
- * MP3
142
- * FLAC
143
- * OGG/VORBIS
144
- * OPUS
145
- * SPHERE
146
- * AMR-NB
147
-
148
- To load ``MP3``, ``FLAC``, ``OGG/VORBIS``, ``OPUS`` and other codecs ``libsox`` does not
149
- handle natively, your installation of ``torchaudio`` has to be linked to ``libsox``
150
- and corresponding codec libraries such as ``libmad`` or ``libmp3lame`` etc.
151
-
152
- By default (``normalize=True``, ``channels_first=True``), this function returns Tensor with
153
- ``float32`` dtype, and the shape of `[channel, time]`.
154
-
155
- .. warning::
156
-
157
- ``normalize`` argument does not perform volume normalization.
158
- It only converts the sample type to `torch.float32` from the native sample
159
- type.
160
-
161
- When the input format is WAV with integer type, such as 32-bit signed integer, 16-bit
162
- signed integer, 24-bit signed integer, and 8-bit unsigned integer, by providing ``normalize=False``,
163
- this function can return integer Tensor, where the samples are expressed within the whole range
164
- of the corresponding dtype, that is, ``int32`` tensor for 32-bit signed PCM,
165
- ``int16`` for 16-bit signed PCM and ``uint8`` for 8-bit unsigned PCM. Since torch does not
166
- support ``int24`` dtype, 24-bit signed PCM are converted to ``int32`` tensors.
167
-
168
- ``normalize`` argument has no effect on 32-bit floating-point WAV and other formats, such as
169
- ``flac`` and ``mp3``.
170
-
171
- For these formats, this function always returns ``float32`` Tensor with values.
172
-
173
- Args:
174
- filepath (path-like object or file-like object):
175
- Source of audio data. When the function is not compiled by TorchScript,
176
- (e.g. ``torch.jit.script``), the following types are accepted;
177
-
178
- * ``path-like``: file path
179
- * ``file-like``: Object with ``read(size: int) -> bytes`` method,
180
- which returns byte string of at most ``size`` length.
181
-
182
- When the function is compiled by TorchScript, only ``str`` type is allowed.
183
-
184
- Note: This argument is intentionally annotated as ``str`` only due to
185
- TorchScript compiler compatibility.
186
- frame_offset (int):
187
- Number of frames to skip before start reading data.
188
- num_frames (int, optional):
189
- Maximum number of frames to read. ``-1`` reads all the remaining samples,
190
- starting from ``frame_offset``.
191
- This function may return the less number of frames if there is not enough
192
- frames in the given file.
193
- normalize (bool, optional):
194
- When ``True``, this function converts the native sample type to ``float32``.
195
- Default: ``True``.
196
-
197
- If input file is integer WAV, giving ``False`` will change the resulting Tensor type to
198
- integer type.
199
- This argument has no effect for formats other than integer WAV type.
200
-
201
- channels_first (bool, optional):
202
- When True, the returned Tensor has dimension `[channel, time]`.
203
- Otherwise, the returned Tensor's dimension is `[time, channel]`.
204
- format (str or None, optional):
205
- Override the format detection with the given format.
206
- Providing the argument might help when libsox can not infer the format
207
- from header or extension.
208
-
209
- Returns:
210
- (torch.Tensor, int): Resulting Tensor and sample rate.
211
- If the input file has integer wav format and ``normalize=False``, then it has
212
- integer type, else ``float32`` type. If ``channels_first=True``, it has
213
- `[channel, time]` else `[time, channel]`.
214
- """
215
- if not torch.jit.is_scripting():
216
- if hasattr(filepath, "read"):
217
- # Special case for Backward compatibility
218
- # v0.11 -> v0.12, mp3 handling is moved to FFmpeg.
219
- # file-like objects are not necessarily fallback-able
220
- # when they are not seekable.
221
- # The previous libsox-based implementation required `format="mp3"`
222
- # because internally libsox does not auto-detect the format.
223
- # For the special BC for mp3, we handle mp3 differently.
224
- buffer_size = get_buffer_size()
225
- if format == "mp3":
226
- return _fallback_load_fileobj(
227
- filepath,
228
- frame_offset,
229
- num_frames,
230
- normalize,
231
- channels_first,
232
- format,
233
- buffer_size,
234
- )
235
- warnings.warn(_deprecation_message)
236
- ret = torchaudio.lib._torchaudio_sox.load_audio_fileobj(
237
- filepath, frame_offset, num_frames, normalize, channels_first, format
238
- )
239
- if ret is not None:
240
- return ret
241
- return _fallback_load_fileobj(
242
- filepath,
243
- frame_offset,
244
- num_frames,
245
- normalize,
246
- channels_first,
247
- format,
248
- buffer_size,
249
- )
250
- filepath = os.fspath(filepath)
251
- ret = torch.ops.torchaudio.sox_io_load_audio_file(
252
- filepath, frame_offset, num_frames, normalize, channels_first, format
1
+ def __getattr__(name: str):
2
+ import warnings
3
+
4
+ warnings.warn(
5
+ "Torchaudio's I/O functions now support par-call bakcend dispatch. "
6
+ "Importing backend implementation directly is no longer guaranteed to work. "
7
+ "Please use `backend` keyword with load/save/info function, instead of "
8
+ "calling the udnerlying implementation directly.",
9
+ stacklevel=2,
253
10
  )
254
- if ret is not None:
255
- return ret
256
- return _fallback_load(filepath, frame_offset, num_frames, normalize, channels_first, format)
257
-
258
-
259
- @torchaudio._extension.fail_if_no_sox
260
- def save(
261
- filepath: str,
262
- src: torch.Tensor,
263
- sample_rate: int,
264
- channels_first: bool = True,
265
- compression: Optional[float] = None,
266
- format: Optional[str] = None,
267
- encoding: Optional[str] = None,
268
- bits_per_sample: Optional[int] = None,
269
- ):
270
- """Save audio data to file.
271
-
272
- Args:
273
- filepath (str or pathlib.Path): Path to save file.
274
- This function also handles ``pathlib.Path`` objects, but is annotated
275
- as ``str`` for TorchScript compiler compatibility.
276
- src (torch.Tensor): Audio data to save. must be 2D tensor.
277
- sample_rate (int): sampling rate
278
- channels_first (bool, optional): If ``True``, the given tensor is interpreted as `[channel, time]`,
279
- otherwise `[time, channel]`.
280
- compression (float or None, optional): Used for formats other than WAV.
281
- This corresponds to ``-C`` option of ``sox`` command.
282
-
283
- ``"mp3"``
284
- Either bitrate (in ``kbps``) with quality factor, such as ``128.2``, or
285
- VBR encoding with quality factor such as ``-4.2``. Default: ``-4.5``.
286
-
287
- ``"flac"``
288
- Whole number from ``0`` to ``8``. ``8`` is default and highest compression.
289
-
290
- ``"ogg"``, ``"vorbis"``
291
- Number from ``-1`` to ``10``; ``-1`` is the highest compression
292
- and lowest quality. Default: ``3``.
293
-
294
- See the detail at http://sox.sourceforge.net/soxformat.html.
295
- format (str or None, optional): Override the audio format.
296
- When ``filepath`` argument is path-like object, audio format is infered from
297
- file extension. If file extension is missing or different, you can specify the
298
- correct format with this argument.
299
-
300
- When ``filepath`` argument is file-like object, this argument is required.
301
-
302
- Valid values are ``"wav"``, ``"mp3"``, ``"ogg"``, ``"vorbis"``, ``"amr-nb"``,
303
- ``"amb"``, ``"flac"``, ``"sph"``, ``"gsm"``, and ``"htk"``.
304
-
305
- encoding (str or None, optional): Changes the encoding for the supported formats.
306
- This argument is effective only for supported formats, such as ``"wav"``, ``""amb"``
307
- and ``"sph"``. Valid values are;
308
-
309
- - ``"PCM_S"`` (signed integer Linear PCM)
310
- - ``"PCM_U"`` (unsigned integer Linear PCM)
311
- - ``"PCM_F"`` (floating point PCM)
312
- - ``"ULAW"`` (mu-law)
313
- - ``"ALAW"`` (a-law)
314
-
315
- Default values
316
- If not provided, the default value is picked based on ``format`` and ``bits_per_sample``.
317
-
318
- ``"wav"``, ``"amb"``
319
- - | If both ``encoding`` and ``bits_per_sample`` are not provided, the ``dtype`` of the
320
- | Tensor is used to determine the default value.
321
-
322
- - ``"PCM_U"`` if dtype is ``uint8``
323
- - ``"PCM_S"`` if dtype is ``int16`` or ``int32``
324
- - ``"PCM_F"`` if dtype is ``float32``
325
-
326
- - ``"PCM_U"`` if ``bits_per_sample=8``
327
- - ``"PCM_S"`` otherwise
328
11
 
329
- ``"sph"`` format;
330
- - the default value is ``"PCM_S"``
12
+ from . import _sox_io_backend
331
13
 
332
- bits_per_sample (int or None, optional): Changes the bit depth for the supported formats.
333
- When ``format`` is one of ``"wav"``, ``"flac"``, ``"sph"``, or ``"amb"``, you can change the
334
- bit depth. Valid values are ``8``, ``16``, ``32`` and ``64``.
335
-
336
- Default Value;
337
- If not provided, the default values are picked based on ``format`` and ``"encoding"``;
338
-
339
- ``"wav"``, ``"amb"``;
340
- - | If both ``encoding`` and ``bits_per_sample`` are not provided, the ``dtype`` of the
341
- | Tensor is used.
342
-
343
- - ``8`` if dtype is ``uint8``
344
- - ``16`` if dtype is ``int16``
345
- - ``32`` if dtype is ``int32`` or ``float32``
346
-
347
- - ``8`` if ``encoding`` is ``"PCM_U"``, ``"ULAW"`` or ``"ALAW"``
348
- - ``16`` if ``encoding`` is ``"PCM_S"``
349
- - ``32`` if ``encoding`` is ``"PCM_F"``
350
-
351
- ``"flac"`` format;
352
- - the default value is ``24``
353
-
354
- ``"sph"`` format;
355
- - ``16`` if ``encoding`` is ``"PCM_U"``, ``"PCM_S"``, ``"PCM_F"`` or not provided.
356
- - ``8`` if ``encoding`` is ``"ULAW"`` or ``"ALAW"``
357
-
358
- ``"amb"`` format;
359
- - ``8`` if ``encoding`` is ``"PCM_U"``, ``"ULAW"`` or ``"ALAW"``
360
- - ``16`` if ``encoding`` is ``"PCM_S"`` or not provided.
361
- - ``32`` if ``encoding`` is ``"PCM_F"``
362
-
363
- Supported formats/encodings/bit depth/compression are;
364
-
365
- ``"wav"``, ``"amb"``
366
- - 32-bit floating-point PCM
367
- - 32-bit signed integer PCM
368
- - 24-bit signed integer PCM
369
- - 16-bit signed integer PCM
370
- - 8-bit unsigned integer PCM
371
- - 8-bit mu-law
372
- - 8-bit a-law
373
-
374
- Note: Default encoding/bit depth is determined by the dtype of the input Tensor.
375
-
376
- ``"mp3"``
377
- Fixed bit rate (such as 128kHz) and variable bit rate compression.
378
- Default: VBR with high quality.
379
-
380
- ``"flac"``
381
- - 8-bit
382
- - 16-bit
383
- - 24-bit (default)
384
-
385
- ``"ogg"``, ``"vorbis"``
386
- - Different quality level. Default: approx. 112kbps
387
-
388
- ``"sph"``
389
- - 8-bit signed integer PCM
390
- - 16-bit signed integer PCM
391
- - 24-bit signed integer PCM
392
- - 32-bit signed integer PCM (default)
393
- - 8-bit mu-law
394
- - 8-bit a-law
395
- - 16-bit a-law
396
- - 24-bit a-law
397
- - 32-bit a-law
398
-
399
- ``"amr-nb"``
400
- Bitrate ranging from 4.75 kbit/s to 12.2 kbit/s. Default: 4.75 kbit/s
401
-
402
- ``"gsm"``
403
- Lossy Speech Compression, CPU intensive.
404
-
405
- ``"htk"``
406
- Uses a default single-channel 16-bit PCM format.
407
-
408
- Note:
409
- To save into formats that ``libsox`` does not handle natively, (such as ``"mp3"``,
410
- ``"flac"``, ``"ogg"`` and ``"vorbis"``), your installation of ``torchaudio`` has
411
- to be linked to ``libsox`` and corresponding codec libraries such as ``libmad``
412
- or ``libmp3lame`` etc.
413
- """
414
- if not torch.jit.is_scripting():
415
- if hasattr(filepath, "write"):
416
- warnings.warn(_deprecation_message)
417
- torchaudio.lib._torchaudio_sox.save_audio_fileobj(
418
- filepath,
419
- src,
420
- sample_rate,
421
- channels_first,
422
- compression,
423
- format,
424
- encoding,
425
- bits_per_sample,
426
- )
427
- return
428
- filepath = os.fspath(filepath)
429
- torch.ops.torchaudio.sox_io_save_audio_file(
430
- filepath,
431
- src,
432
- sample_rate,
433
- channels_first,
434
- compression,
435
- format,
436
- encoding,
437
- bits_per_sample,
438
- )
14
+ return getattr(_sox_io_backend, name)
@@ -1,12 +1,12 @@
1
1
  """Defines utilities for switching audio backends"""
2
- import os
3
2
  import warnings
4
3
  from typing import List, Optional
5
4
 
6
5
  import torchaudio
6
+ from torchaudio._backend import soundfile_backend
7
7
  from torchaudio._internal import module_utils as _mod_utils
8
8
 
9
- from . import no_backend, soundfile_backend, sox_io_backend
9
+ from . import _no_backend as no_backend, _sox_io_backend as sox_io_backend
10
10
 
11
11
  __all__ = [
12
12
  "list_audio_backends",
@@ -15,19 +15,12 @@ __all__ = [
15
15
  ]
16
16
 
17
17
 
18
- def _is_backend_dispatcher_enabled() -> bool:
19
- return os.getenv("TORCHAUDIO_USE_BACKEND_DISPATCHER") == "1"
20
-
21
-
22
18
  def list_audio_backends() -> List[str]:
23
19
  """List available backends
24
20
 
25
21
  Returns:
26
22
  List[str]: The list of available backends.
27
23
  """
28
- if _is_backend_dispatcher_enabled():
29
- warnings.warn("list_audio_backend's return value is irrelevant when the I/O backend dispatcher is enabled.")
30
-
31
24
  backends = []
32
25
  if _mod_utils.is_module_available("soundfile"):
33
26
  backends.append("soundfile")
@@ -44,10 +37,6 @@ def set_audio_backend(backend: Optional[str]):
44
37
  One of ``"sox_io"`` or ``"soundfile"`` based on availability
45
38
  of the system. If ``None`` is provided the current backend is unassigned.
46
39
  """
47
- if _is_backend_dispatcher_enabled():
48
- warnings.warn("set_audio_backend is a no-op when the I/O backend dispatcher is enabled.")
49
- return
50
-
51
40
  if backend is not None and backend not in list_audio_backends():
52
41
  raise RuntimeError(f'Backend "{backend}" is not one of ' f"available backends: {list_audio_backends()}.")
53
42
 
@@ -64,14 +53,19 @@ def set_audio_backend(backend: Optional[str]):
64
53
  setattr(torchaudio, func, getattr(module, func))
65
54
 
66
55
 
67
- def _init_audio_backend():
56
+ def _init_backend():
57
+ warnings.warn(
58
+ "TorchAudio's global backend is now deprecated. "
59
+ "Please enable distpatcher by setting `TORCHAUDIO_USE_BACKEND_DISPATCHER=1`, "
60
+ "and specify backend when calling load/info/save function.",
61
+ stacklevel=3,
62
+ )
68
63
  backends = list_audio_backends()
69
64
  if "sox_io" in backends:
70
65
  set_audio_backend("sox_io")
71
66
  elif "soundfile" in backends:
72
67
  set_audio_backend("soundfile")
73
68
  else:
74
- warnings.warn("No audio backend is available.")
75
69
  set_audio_backend(None)
76
70
 
77
71
 
@@ -81,9 +75,6 @@ def get_audio_backend() -> Optional[str]:
81
75
  Returns:
82
76
  Optional[str]: The name of the current backend or ``None`` if no backend is assigned.
83
77
  """
84
- if _is_backend_dispatcher_enabled():
85
- warnings.warn("get_audio_backend's return value is irrelevant when the I/O backend dispatcher is enabled.")
86
-
87
78
  if torchaudio.load == no_backend.load:
88
79
  return None
89
80
  if torchaudio.load == sox_io_backend.load:
@@ -8,6 +8,7 @@ from .iemocap import IEMOCAP
8
8
  from .librilight_limited import LibriLightLimited
9
9
  from .librimix import LibriMix
10
10
  from .librispeech import LIBRISPEECH
11
+ from .librispeech_biasing import LibriSpeechBiasing
11
12
  from .libritts import LIBRITTS
12
13
  from .ljspeech import LJSPEECH
13
14
  from .musdb_hq import MUSDB_HQ
@@ -23,6 +24,7 @@ from .yesno import YESNO
23
24
  __all__ = [
24
25
  "COMMONVOICE",
25
26
  "LIBRISPEECH",
27
+ "LibriSpeechBiasing",
26
28
  "LibriLightLimited",
27
29
  "SPEECHCOMMANDS",
28
30
  "VCTK_092",
@@ -5,8 +5,8 @@ from typing import Tuple, Union
5
5
 
6
6
  import torchaudio
7
7
  from torch import Tensor
8
- from torch.hub import download_url_to_file
9
8
  from torch.utils.data import Dataset
9
+ from torchaudio._internal import download_url_to_file
10
10
  from torchaudio.datasets.utils import _extract_tar
11
11
 
12
12
  URL = "aew"