torchaudio 2.0.2__cp311-cp311-manylinux1_x86_64.whl → 2.1.1__cp311-cp311-manylinux1_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of torchaudio might be problematic. Click here for more details.

Files changed (92) hide show
  1. torchaudio/__init__.py +22 -3
  2. torchaudio/_backend/__init__.py +55 -4
  3. torchaudio/_backend/backend.py +53 -0
  4. torchaudio/_backend/common.py +52 -0
  5. torchaudio/_backend/ffmpeg.py +373 -0
  6. torchaudio/_backend/soundfile.py +54 -0
  7. torchaudio/_backend/soundfile_backend.py +457 -0
  8. torchaudio/_backend/sox.py +91 -0
  9. torchaudio/_backend/utils.py +81 -323
  10. torchaudio/_extension/__init__.py +55 -36
  11. torchaudio/_extension/utils.py +109 -17
  12. torchaudio/_internal/__init__.py +4 -1
  13. torchaudio/_internal/module_utils.py +37 -6
  14. torchaudio/backend/__init__.py +7 -11
  15. torchaudio/backend/_no_backend.py +24 -0
  16. torchaudio/backend/_sox_io_backend.py +297 -0
  17. torchaudio/backend/common.py +12 -52
  18. torchaudio/backend/no_backend.py +11 -21
  19. torchaudio/backend/soundfile_backend.py +11 -448
  20. torchaudio/backend/sox_io_backend.py +11 -435
  21. torchaudio/backend/utils.py +9 -18
  22. torchaudio/datasets/__init__.py +2 -0
  23. torchaudio/datasets/cmuarctic.py +1 -1
  24. torchaudio/datasets/cmudict.py +61 -62
  25. torchaudio/datasets/dr_vctk.py +1 -1
  26. torchaudio/datasets/gtzan.py +1 -1
  27. torchaudio/datasets/librilight_limited.py +1 -1
  28. torchaudio/datasets/librispeech.py +1 -1
  29. torchaudio/datasets/librispeech_biasing.py +189 -0
  30. torchaudio/datasets/libritts.py +1 -1
  31. torchaudio/datasets/ljspeech.py +1 -1
  32. torchaudio/datasets/musdb_hq.py +1 -1
  33. torchaudio/datasets/quesst14.py +1 -1
  34. torchaudio/datasets/speechcommands.py +1 -1
  35. torchaudio/datasets/tedlium.py +1 -1
  36. torchaudio/datasets/vctk.py +1 -1
  37. torchaudio/datasets/voxceleb1.py +1 -1
  38. torchaudio/datasets/yesno.py +1 -1
  39. torchaudio/functional/__init__.py +6 -2
  40. torchaudio/functional/_alignment.py +128 -0
  41. torchaudio/functional/filtering.py +69 -92
  42. torchaudio/functional/functional.py +99 -148
  43. torchaudio/io/__init__.py +4 -1
  44. torchaudio/io/_effector.py +347 -0
  45. torchaudio/io/_stream_reader.py +158 -90
  46. torchaudio/io/_stream_writer.py +196 -10
  47. torchaudio/lib/_torchaudio.so +0 -0
  48. torchaudio/lib/_torchaudio_ffmpeg4.so +0 -0
  49. torchaudio/lib/_torchaudio_ffmpeg5.so +0 -0
  50. torchaudio/lib/_torchaudio_ffmpeg6.so +0 -0
  51. torchaudio/lib/_torchaudio_sox.so +0 -0
  52. torchaudio/lib/libctc_prefix_decoder.so +0 -0
  53. torchaudio/lib/libtorchaudio.so +0 -0
  54. torchaudio/lib/libtorchaudio_ffmpeg4.so +0 -0
  55. torchaudio/lib/libtorchaudio_ffmpeg5.so +0 -0
  56. torchaudio/lib/libtorchaudio_ffmpeg6.so +0 -0
  57. torchaudio/lib/libtorchaudio_sox.so +0 -0
  58. torchaudio/lib/pybind11_prefixctc.so +0 -0
  59. torchaudio/models/__init__.py +14 -0
  60. torchaudio/models/decoder/__init__.py +22 -7
  61. torchaudio/models/decoder/_ctc_decoder.py +123 -69
  62. torchaudio/models/decoder/_cuda_ctc_decoder.py +187 -0
  63. torchaudio/models/rnnt_decoder.py +10 -14
  64. torchaudio/models/squim/__init__.py +11 -0
  65. torchaudio/models/squim/objective.py +326 -0
  66. torchaudio/models/squim/subjective.py +150 -0
  67. torchaudio/models/wav2vec2/components.py +6 -10
  68. torchaudio/pipelines/__init__.py +9 -0
  69. torchaudio/pipelines/_squim_pipeline.py +176 -0
  70. torchaudio/pipelines/_wav2vec2/aligner.py +87 -0
  71. torchaudio/pipelines/_wav2vec2/impl.py +198 -68
  72. torchaudio/pipelines/_wav2vec2/utils.py +120 -0
  73. torchaudio/sox_effects/sox_effects.py +7 -30
  74. torchaudio/transforms/__init__.py +2 -0
  75. torchaudio/transforms/_transforms.py +99 -54
  76. torchaudio/utils/download.py +2 -2
  77. torchaudio/utils/ffmpeg_utils.py +20 -15
  78. torchaudio/utils/sox_utils.py +8 -9
  79. torchaudio/version.py +2 -2
  80. torchaudio-2.1.1.dist-info/METADATA +113 -0
  81. torchaudio-2.1.1.dist-info/RECORD +119 -0
  82. torchaudio/io/_compat.py +0 -241
  83. torchaudio/lib/_torchaudio_ffmpeg.so +0 -0
  84. torchaudio/lib/flashlight_lib_text_decoder.so +0 -0
  85. torchaudio/lib/flashlight_lib_text_dictionary.so +0 -0
  86. torchaudio/lib/libflashlight-text.so +0 -0
  87. torchaudio/lib/libtorchaudio_ffmpeg.so +0 -0
  88. torchaudio-2.0.2.dist-info/METADATA +0 -26
  89. torchaudio-2.0.2.dist-info/RECORD +0 -100
  90. {torchaudio-2.0.2.dist-info → torchaudio-2.1.1.dist-info}/LICENSE +0 -0
  91. {torchaudio-2.0.2.dist-info → torchaudio-2.1.1.dist-info}/WHEEL +0 -0
  92. {torchaudio-2.0.2.dist-info → torchaudio-2.1.1.dist-info}/top_level.txt +0 -0
@@ -6,15 +6,18 @@ Anything that depends on external state should happen in __init__.py
6
6
  """
7
7
 
8
8
 
9
+ import importlib
10
+ import logging
9
11
  import os
12
+ import platform
13
+ import warnings
10
14
  from functools import wraps
11
15
  from pathlib import Path
12
16
 
13
17
  import torch
14
-
15
18
  import torchaudio
16
- from torchaudio._internal.module_utils import is_module_available
17
19
 
20
+ _LG = logging.getLogger(__name__)
18
21
  _LIB_DIR = Path(__file__).parent.parent / "lib"
19
22
 
20
23
 
@@ -67,7 +70,7 @@ def _init_sox():
67
70
  _load_lib("libtorchaudio_sox")
68
71
  import torchaudio.lib._torchaudio_sox # noqa
69
72
 
70
- torch.ops.torchaudio.sox_utils_set_verbosity(0)
73
+ torchaudio.lib._torchaudio_sox.set_verbosity(0)
71
74
 
72
75
  import atexit
73
76
 
@@ -75,22 +78,92 @@ def _init_sox():
75
78
  atexit.register(torch.ops.torchaudio.sox_effects_shutdown_sox_effects)
76
79
 
77
80
 
78
- def _init_ffmpeg():
79
- if not is_module_available("torchaudio.lib._torchaudio_ffmpeg"):
80
- raise RuntimeError(
81
- "torchaudio is not compiled with FFmpeg integration. Please set USE_FFMPEG=1 when compiling torchaudio."
82
- )
81
+ def _try_access_avutil(ffmpeg_ver):
82
+ libname_template = {
83
+ "Linux": "libavutil.so.{ver}",
84
+ "Darwin": "libavutil.{ver}.dylib",
85
+ "Windows": "avutil-{ver}.dll",
86
+ }[platform.system()]
87
+ avutil_ver = {"6": 58, "5": 57, "4": 56}[ffmpeg_ver]
88
+ libavutil = libname_template.format(ver=avutil_ver)
89
+ torchaudio.lib._torchaudio.find_avutil(libavutil)
90
+
91
+
92
+ def _find_versionsed_ffmpeg_extension(ffmpeg_ver: str):
93
+ _LG.debug("Attempting to load FFmpeg version %s.", ffmpeg_ver)
94
+
95
+ library = f"libtorchaudio_ffmpeg{ffmpeg_ver}"
96
+ extension = f"_torchaudio_ffmpeg{ffmpeg_ver}"
97
+
98
+ if not _get_lib_path(extension).exists():
99
+ raise RuntimeError(f"FFmpeg {ffmpeg_ver} extension is not available.")
100
+
101
+ if ffmpeg_ver:
102
+ # A simple check for FFmpeg availability.
103
+ # This is not technically sufficient as other libraries could be missing,
104
+ # but usually this is sufficient.
105
+ #
106
+ # Note: the reason why this check is performed is because I don't know
107
+ # if the next `_load_lib` (which calls `ctypes.CDLL` under the hood),
108
+ # could leak handle to shared libraries of dependencies, in case it fails.
109
+ #
110
+ # i.e. If the `ctypes.CDLL("foo")` fails because one of `foo`'s dependency
111
+ # does not exist while `foo` and some other dependencies exist, is it guaranteed
112
+ # that none-of them are kept in memory after the failure??
113
+ _try_access_avutil(ffmpeg_ver)
114
+
115
+ _load_lib(library)
83
116
 
84
- try:
85
- _load_lib("libtorchaudio_ffmpeg")
86
- except OSError as err:
87
- raise ImportError("FFmpeg libraries are not found. Please install FFmpeg.") from err
117
+ _LG.debug("Found FFmpeg version %s.", ffmpeg_ver)
118
+ return importlib.import_module(f"torchaudio.lib.{extension}")
88
119
 
89
- import torchaudio.lib._torchaudio_ffmpeg # noqa
90
120
 
91
- torch.ops.torchaudio.ffmpeg_init()
92
- if torch.ops.torchaudio.ffmpeg_get_log_level() > 8:
93
- torch.ops.torchaudio.ffmpeg_set_log_level(8)
121
+ _FFMPEG_VERS = ["6", "5", "4", ""]
122
+
123
+
124
+ def _find_ffmpeg_extension(ffmpeg_vers, show_error):
125
+ logger = _LG.error if show_error else _LG.debug
126
+ for ffmpeg_ver in ffmpeg_vers:
127
+ try:
128
+ return _find_versionsed_ffmpeg_extension(ffmpeg_ver)
129
+ except Exception:
130
+ logger("Failed to load FFmpeg %s extension.", ffmpeg_ver, exc_info=True)
131
+ continue
132
+ raise ImportError(f"Failed to intialize FFmpeg extension. Tried versions: {ffmpeg_vers}")
133
+
134
+
135
+ def _find_available_ffmpeg_ext():
136
+ ffmpeg_vers = ["6", "5", "4", ""]
137
+ return [v for v in ffmpeg_vers if _get_lib_path(f"_torchaudio_ffmpeg{v}").exists()]
138
+
139
+
140
+ def _init_ffmpeg(show_error=False):
141
+ ffmpeg_vers = _find_available_ffmpeg_ext()
142
+ if not ffmpeg_vers:
143
+ raise RuntimeError(
144
+ # fmt: off
145
+ "TorchAudio is not built with FFmpeg integration. "
146
+ "Please build torchaudio with USE_FFMPEG=1."
147
+ # fmt: on
148
+ )
149
+
150
+ # User override
151
+ if ffmpeg_ver := os.environ.get("TORCHAUDIO_USE_FFMPEG_VERSION"):
152
+ if ffmpeg_vers == [""]:
153
+ warnings.warn("TorchAudio is built in single FFmpeg mode. TORCHAUDIO_USE_FFMPEG_VERSION is ignored.")
154
+ else:
155
+ if ffmpeg_ver not in ffmpeg_vers:
156
+ raise ValueError(
157
+ f"The FFmpeg version {ffmpeg_ver} (read from TORCHAUDIO_USE_FFMPEG_VERSION) "
158
+ f"is not available. Available versions are {[v for v in ffmpeg_vers if v]}"
159
+ )
160
+ ffmpeg_vers = [ffmpeg_ver]
161
+
162
+ ext = _find_ffmpeg_extension(ffmpeg_vers, show_error)
163
+ ext.init()
164
+ if ext.get_log_level() > 8:
165
+ ext.set_log_level(8)
166
+ return ext
94
167
 
95
168
 
96
169
  def _init_dll_path():
@@ -124,6 +197,25 @@ def _check_cuda_version():
124
197
  return version
125
198
 
126
199
 
200
+ def _fail_since_no_sox(func):
201
+ @wraps(func)
202
+ def wrapped(*_args, **_kwargs):
203
+ try:
204
+ # Note:
205
+ # We run _init_sox again just to show users the stacktrace.
206
+ # _init_sox would not succeed here.
207
+ _init_sox()
208
+ except Exception as err:
209
+ raise RuntimeError(
210
+ f"{func.__name__} requires sox extension which is not available. "
211
+ "Please refer to the stacktrace above for how to resolve this."
212
+ ) from err
213
+ # This should not happen in normal execution, but just in case.
214
+ return func(*_args, **_kwargs)
215
+
216
+ return wrapped
217
+
218
+
127
219
  def _fail_since_no_ffmpeg(func):
128
220
  @wraps(func)
129
221
  def wrapped(*_args, **_kwargs):
@@ -131,7 +223,7 @@ def _fail_since_no_ffmpeg(func):
131
223
  # Note:
132
224
  # We run _init_ffmpeg again just to show users the stacktrace.
133
225
  # _init_ffmpeg would not succeed here.
134
- _init_ffmpeg()
226
+ _init_ffmpeg(show_error=True)
135
227
  except Exception as err:
136
228
  raise RuntimeError(
137
229
  f"{func.__name__} requires FFmpeg extension which is not available. "
@@ -1,4 +1,7 @@
1
- from torch.hub import download_url_to_file, load_state_dict_from_url
1
+ try:
2
+ from .fb import download_url_to_file, load_state_dict_from_url
3
+ except ImportError:
4
+ from torch.hub import download_url_to_file, load_state_dict_from_url
2
5
 
3
6
 
4
7
  __all__ = [
@@ -1,9 +1,29 @@
1
1
  import importlib.util
2
+ import os
2
3
  import warnings
3
4
  from functools import wraps
4
5
  from typing import Optional
5
6
 
6
7
 
8
+ def eval_env(var, default):
9
+ """Check if environment varable has True-y value"""
10
+ if var not in os.environ:
11
+ return default
12
+
13
+ val = os.environ.get(var, "0")
14
+ trues = ["1", "true", "TRUE", "on", "ON", "yes", "YES"]
15
+ falses = ["0", "false", "FALSE", "off", "OFF", "no", "NO"]
16
+ if val in trues:
17
+ return True
18
+ if val not in falses:
19
+ # fmt: off
20
+ raise RuntimeError(
21
+ f"Unexpected environment variable value `{var}={val}`. "
22
+ f"Expected one of {trues + falses}")
23
+ # fmt: on
24
+ return False
25
+
26
+
7
27
  def is_module_available(*modules: str) -> bool:
8
28
  r"""Returns if a top-level module with :attr:`name` exists *without**
9
29
  importing it. This is generally safer than try-catch block around a
@@ -40,25 +60,36 @@ def requires_module(*modules: str):
40
60
  return decorator
41
61
 
42
62
 
43
- def deprecated(direction: str, version: Optional[str] = None):
63
+ def deprecated(direction: str, version: Optional[str] = None, remove: bool = False):
44
64
  """Decorator to add deprecation message
45
65
 
46
66
  Args:
47
67
  direction (str): Migration steps to be given to users.
48
68
  version (str or int): The version when the object will be removed
69
+ remove (bool): If enabled, append future removal message.
49
70
  """
50
71
 
51
72
  def decorator(func):
52
73
  @wraps(func)
53
74
  def wrapped(*args, **kwargs):
54
- message = (
55
- f"{func.__module__}.{func.__name__} has been deprecated "
56
- f'and will be removed from {"future" if version is None else version} release. '
57
- f"{direction}"
58
- )
75
+ message = f"{func.__module__}.{func.__name__} has been deprecated. {direction}"
76
+ if remove:
77
+ message += f' It will be removed from {"future" if version is None else version} release. '
59
78
  warnings.warn(message, stacklevel=2)
60
79
  return func(*args, **kwargs)
61
80
 
81
+ message = "This function has been deprecated. "
82
+ if remove:
83
+ message += f'It will be removed from {"future" if version is None else version} release. '
84
+
85
+ wrapped.__doc__ = f"""DEPRECATED: {func.__doc__}
86
+
87
+ .. warning::
88
+
89
+ {message}
90
+ {direction}
91
+ """
92
+
62
93
  return wrapped
63
94
 
64
95
  return decorator
@@ -1,14 +1,10 @@
1
- # flake8: noqa
2
- import torchaudio
1
+ # NOTE:
2
+ # The entire `torchaudio.backend` module is deprecated.
3
+ # New things should be added to `torchaudio._backend`.
4
+ # Only things related to backward compatibility should be placed here.
3
5
 
4
- from . import utils
5
- from .utils import _is_backend_dispatcher_enabled, get_audio_backend, list_audio_backends, set_audio_backend
6
6
 
7
- if _is_backend_dispatcher_enabled():
8
- from torchaudio._backend.utils import get_info_func, get_load_func, get_save_func
7
+ from . import common, no_backend, soundfile_backend, sox_io_backend # noqa
8
+ from .utils import _init_backend, get_audio_backend, list_audio_backends, set_audio_backend
9
9
 
10
- torchaudio.info = get_info_func()
11
- torchaudio.load = get_load_func()
12
- torchaudio.save = get_save_func()
13
- else:
14
- utils._init_audio_backend()
10
+ __all__ = ["_init_backend", "get_audio_backend", "list_audio_backends", "set_audio_backend"]
@@ -0,0 +1,24 @@
1
+ from pathlib import Path
2
+ from typing import Callable, Optional, Tuple, Union
3
+
4
+ from torch import Tensor
5
+
6
+
7
+ def load(
8
+ filepath: Union[str, Path],
9
+ out: Optional[Tensor] = None,
10
+ normalization: Union[bool, float, Callable] = True,
11
+ channels_first: bool = True,
12
+ num_frames: int = 0,
13
+ offset: int = 0,
14
+ filetype: Optional[str] = None,
15
+ ) -> Tuple[Tensor, int]:
16
+ raise RuntimeError("No audio I/O backend is available.")
17
+
18
+
19
+ def save(filepath: str, src: Tensor, sample_rate: int, precision: int = 16, channels_first: bool = True) -> None:
20
+ raise RuntimeError("No audio I/O backend is available.")
21
+
22
+
23
+ def info(filepath: str) -> None:
24
+ raise RuntimeError("No audio I/O backend is available.")
@@ -0,0 +1,297 @@
1
+ import os
2
+ from typing import Optional, Tuple
3
+
4
+ import torch
5
+ import torchaudio
6
+ from torchaudio import AudioMetaData
7
+
8
+
9
+ @torchaudio._extension.fail_if_no_sox
10
+ def info(
11
+ filepath: str,
12
+ format: Optional[str] = None,
13
+ ) -> AudioMetaData:
14
+ """Get signal information of an audio file.
15
+
16
+ Args:
17
+ filepath (str):
18
+ Source of audio data.
19
+
20
+ format (str or None, optional):
21
+ Override the format detection with the given format.
22
+ Providing the argument might help when libsox can not infer the format
23
+ from header or extension.
24
+
25
+ Returns:
26
+ AudioMetaData: Metadata of the given audio.
27
+ """
28
+ if not torch.jit.is_scripting():
29
+ if hasattr(filepath, "read"):
30
+ raise RuntimeError("sox_io backend does not support file-like object.")
31
+ filepath = os.fspath(filepath)
32
+ sinfo = torch.ops.torchaudio.sox_io_get_info(filepath, format)
33
+ return AudioMetaData(*sinfo)
34
+
35
+
36
+ @torchaudio._extension.fail_if_no_sox
37
+ def load(
38
+ filepath: str,
39
+ frame_offset: int = 0,
40
+ num_frames: int = -1,
41
+ normalize: bool = True,
42
+ channels_first: bool = True,
43
+ format: Optional[str] = None,
44
+ ) -> Tuple[torch.Tensor, int]:
45
+ """Load audio data from file.
46
+
47
+ Note:
48
+ This function can handle all the codecs that underlying libsox can handle,
49
+ however it is tested on the following formats;
50
+
51
+ * WAV, AMB
52
+
53
+ * 32-bit floating-point
54
+ * 32-bit signed integer
55
+ * 24-bit signed integer
56
+ * 16-bit signed integer
57
+ * 8-bit unsigned integer (WAV only)
58
+
59
+ * MP3
60
+ * FLAC
61
+ * OGG/VORBIS
62
+ * OPUS
63
+ * SPHERE
64
+ * AMR-NB
65
+
66
+ To load ``MP3``, ``FLAC``, ``OGG/VORBIS``, ``OPUS`` and other codecs ``libsox`` does not
67
+ handle natively, your installation of ``torchaudio`` has to be linked to ``libsox``
68
+ and corresponding codec libraries such as ``libmad`` or ``libmp3lame`` etc.
69
+
70
+ By default (``normalize=True``, ``channels_first=True``), this function returns Tensor with
71
+ ``float32`` dtype, and the shape of `[channel, time]`.
72
+
73
+ .. warning::
74
+
75
+ ``normalize`` argument does not perform volume normalization.
76
+ It only converts the sample type to `torch.float32` from the native sample
77
+ type.
78
+
79
+ When the input format is WAV with integer type, such as 32-bit signed integer, 16-bit
80
+ signed integer, 24-bit signed integer, and 8-bit unsigned integer, by providing ``normalize=False``,
81
+ this function can return integer Tensor, where the samples are expressed within the whole range
82
+ of the corresponding dtype, that is, ``int32`` tensor for 32-bit signed PCM,
83
+ ``int16`` for 16-bit signed PCM and ``uint8`` for 8-bit unsigned PCM. Since torch does not
84
+ support ``int24`` dtype, 24-bit signed PCM are converted to ``int32`` tensors.
85
+
86
+ ``normalize`` argument has no effect on 32-bit floating-point WAV and other formats, such as
87
+ ``flac`` and ``mp3``.
88
+
89
+ For these formats, this function always returns ``float32`` Tensor with values.
90
+
91
+ Args:
92
+ filepath (path-like object): Source of audio data.
93
+ frame_offset (int):
94
+ Number of frames to skip before start reading data.
95
+ num_frames (int, optional):
96
+ Maximum number of frames to read. ``-1`` reads all the remaining samples,
97
+ starting from ``frame_offset``.
98
+ This function may return the less number of frames if there is not enough
99
+ frames in the given file.
100
+ normalize (bool, optional):
101
+ When ``True``, this function converts the native sample type to ``float32``.
102
+ Default: ``True``.
103
+
104
+ If input file is integer WAV, giving ``False`` will change the resulting Tensor type to
105
+ integer type.
106
+ This argument has no effect for formats other than integer WAV type.
107
+
108
+ channels_first (bool, optional):
109
+ When True, the returned Tensor has dimension `[channel, time]`.
110
+ Otherwise, the returned Tensor's dimension is `[time, channel]`.
111
+ format (str or None, optional):
112
+ Override the format detection with the given format.
113
+ Providing the argument might help when libsox can not infer the format
114
+ from header or extension.
115
+
116
+ Returns:
117
+ (torch.Tensor, int): Resulting Tensor and sample rate.
118
+ If the input file has integer wav format and ``normalize=False``, then it has
119
+ integer type, else ``float32`` type. If ``channels_first=True``, it has
120
+ `[channel, time]` else `[time, channel]`.
121
+ """
122
+ if not torch.jit.is_scripting():
123
+ if hasattr(filepath, "read"):
124
+ raise RuntimeError("sox_io backend does not support file-like object.")
125
+ filepath = os.fspath(filepath)
126
+ return torch.ops.torchaudio.sox_io_load_audio_file(
127
+ filepath, frame_offset, num_frames, normalize, channels_first, format
128
+ )
129
+
130
+
131
+ @torchaudio._extension.fail_if_no_sox
132
+ def save(
133
+ filepath: str,
134
+ src: torch.Tensor,
135
+ sample_rate: int,
136
+ channels_first: bool = True,
137
+ compression: Optional[float] = None,
138
+ format: Optional[str] = None,
139
+ encoding: Optional[str] = None,
140
+ bits_per_sample: Optional[int] = None,
141
+ ):
142
+ """Save audio data to file.
143
+
144
+ Args:
145
+ filepath (path-like object): Path to save file.
146
+ src (torch.Tensor): Audio data to save. must be 2D tensor.
147
+ sample_rate (int): sampling rate
148
+ channels_first (bool, optional): If ``True``, the given tensor is interpreted as `[channel, time]`,
149
+ otherwise `[time, channel]`.
150
+ compression (float or None, optional): Used for formats other than WAV.
151
+ This corresponds to ``-C`` option of ``sox`` command.
152
+
153
+ ``"mp3"``
154
+ Either bitrate (in ``kbps``) with quality factor, such as ``128.2``, or
155
+ VBR encoding with quality factor such as ``-4.2``. Default: ``-4.5``.
156
+
157
+ ``"flac"``
158
+ Whole number from ``0`` to ``8``. ``8`` is default and highest compression.
159
+
160
+ ``"ogg"``, ``"vorbis"``
161
+ Number from ``-1`` to ``10``; ``-1`` is the highest compression
162
+ and lowest quality. Default: ``3``.
163
+
164
+ See the detail at http://sox.sourceforge.net/soxformat.html.
165
+ format (str or None, optional): Override the audio format.
166
+ When ``filepath`` argument is path-like object, audio format is infered from
167
+ file extension. If file extension is missing or different, you can specify the
168
+ correct format with this argument.
169
+
170
+ When ``filepath`` argument is file-like object, this argument is required.
171
+
172
+ Valid values are ``"wav"``, ``"mp3"``, ``"ogg"``, ``"vorbis"``, ``"amr-nb"``,
173
+ ``"amb"``, ``"flac"``, ``"sph"``, ``"gsm"``, and ``"htk"``.
174
+
175
+ encoding (str or None, optional): Changes the encoding for the supported formats.
176
+ This argument is effective only for supported formats, such as ``"wav"``, ``""amb"``
177
+ and ``"sph"``. Valid values are;
178
+
179
+ - ``"PCM_S"`` (signed integer Linear PCM)
180
+ - ``"PCM_U"`` (unsigned integer Linear PCM)
181
+ - ``"PCM_F"`` (floating point PCM)
182
+ - ``"ULAW"`` (mu-law)
183
+ - ``"ALAW"`` (a-law)
184
+
185
+ Default values
186
+ If not provided, the default value is picked based on ``format`` and ``bits_per_sample``.
187
+
188
+ ``"wav"``, ``"amb"``
189
+ - | If both ``encoding`` and ``bits_per_sample`` are not provided, the ``dtype`` of the
190
+ | Tensor is used to determine the default value.
191
+
192
+ - ``"PCM_U"`` if dtype is ``uint8``
193
+ - ``"PCM_S"`` if dtype is ``int16`` or ``int32``
194
+ - ``"PCM_F"`` if dtype is ``float32``
195
+
196
+ - ``"PCM_U"`` if ``bits_per_sample=8``
197
+ - ``"PCM_S"`` otherwise
198
+
199
+ ``"sph"`` format;
200
+ - the default value is ``"PCM_S"``
201
+
202
+ bits_per_sample (int or None, optional): Changes the bit depth for the supported formats.
203
+ When ``format`` is one of ``"wav"``, ``"flac"``, ``"sph"``, or ``"amb"``, you can change the
204
+ bit depth. Valid values are ``8``, ``16``, ``32`` and ``64``.
205
+
206
+ Default Value;
207
+ If not provided, the default values are picked based on ``format`` and ``"encoding"``;
208
+
209
+ ``"wav"``, ``"amb"``;
210
+ - | If both ``encoding`` and ``bits_per_sample`` are not provided, the ``dtype`` of the
211
+ | Tensor is used.
212
+
213
+ - ``8`` if dtype is ``uint8``
214
+ - ``16`` if dtype is ``int16``
215
+ - ``32`` if dtype is ``int32`` or ``float32``
216
+
217
+ - ``8`` if ``encoding`` is ``"PCM_U"``, ``"ULAW"`` or ``"ALAW"``
218
+ - ``16`` if ``encoding`` is ``"PCM_S"``
219
+ - ``32`` if ``encoding`` is ``"PCM_F"``
220
+
221
+ ``"flac"`` format;
222
+ - the default value is ``24``
223
+
224
+ ``"sph"`` format;
225
+ - ``16`` if ``encoding`` is ``"PCM_U"``, ``"PCM_S"``, ``"PCM_F"`` or not provided.
226
+ - ``8`` if ``encoding`` is ``"ULAW"`` or ``"ALAW"``
227
+
228
+ ``"amb"`` format;
229
+ - ``8`` if ``encoding`` is ``"PCM_U"``, ``"ULAW"`` or ``"ALAW"``
230
+ - ``16`` if ``encoding`` is ``"PCM_S"`` or not provided.
231
+ - ``32`` if ``encoding`` is ``"PCM_F"``
232
+
233
+ Supported formats/encodings/bit depth/compression are;
234
+
235
+ ``"wav"``, ``"amb"``
236
+ - 32-bit floating-point PCM
237
+ - 32-bit signed integer PCM
238
+ - 24-bit signed integer PCM
239
+ - 16-bit signed integer PCM
240
+ - 8-bit unsigned integer PCM
241
+ - 8-bit mu-law
242
+ - 8-bit a-law
243
+
244
+ Note: Default encoding/bit depth is determined by the dtype of the input Tensor.
245
+
246
+ ``"mp3"``
247
+ Fixed bit rate (such as 128kHz) and variable bit rate compression.
248
+ Default: VBR with high quality.
249
+
250
+ ``"flac"``
251
+ - 8-bit
252
+ - 16-bit
253
+ - 24-bit (default)
254
+
255
+ ``"ogg"``, ``"vorbis"``
256
+ - Different quality level. Default: approx. 112kbps
257
+
258
+ ``"sph"``
259
+ - 8-bit signed integer PCM
260
+ - 16-bit signed integer PCM
261
+ - 24-bit signed integer PCM
262
+ - 32-bit signed integer PCM (default)
263
+ - 8-bit mu-law
264
+ - 8-bit a-law
265
+ - 16-bit a-law
266
+ - 24-bit a-law
267
+ - 32-bit a-law
268
+
269
+ ``"amr-nb"``
270
+ Bitrate ranging from 4.75 kbit/s to 12.2 kbit/s. Default: 4.75 kbit/s
271
+
272
+ ``"gsm"``
273
+ Lossy Speech Compression, CPU intensive.
274
+
275
+ ``"htk"``
276
+ Uses a default single-channel 16-bit PCM format.
277
+
278
+ Note:
279
+ To save into formats that ``libsox`` does not handle natively, (such as ``"mp3"``,
280
+ ``"flac"``, ``"ogg"`` and ``"vorbis"``), your installation of ``torchaudio`` has
281
+ to be linked to ``libsox`` and corresponding codec libraries such as ``libmad``
282
+ or ``libmp3lame`` etc.
283
+ """
284
+ if not torch.jit.is_scripting():
285
+ if hasattr(filepath, "write"):
286
+ raise RuntimeError("sox_io backend does not handle file-like object.")
287
+ filepath = os.fspath(filepath)
288
+ torch.ops.torchaudio.sox_io_save_audio_file(
289
+ filepath,
290
+ src,
291
+ sample_rate,
292
+ channels_first,
293
+ compression,
294
+ format,
295
+ encoding,
296
+ bits_per_sample,
297
+ )
@@ -1,53 +1,13 @@
1
- class AudioMetaData:
2
- """Return type of ``torchaudio.info`` function.
3
-
4
- This class is used by :py:mod:`"sox_io" backend<torchaudio.backends.sox_io_backend>` and
5
- :py:mod:`"soundfile" backend<torchaudio.backends.soundfile_backend>`.
6
-
7
- :ivar int sample_rate: Sample rate
8
- :ivar int num_frames: The number of frames
9
- :ivar int num_channels: The number of channels
10
- :ivar int bits_per_sample: The number of bits per sample. This is 0 for lossy formats,
11
- or when it cannot be accurately inferred.
12
- :ivar str encoding: Audio encoding
13
- The values encoding can take are one of the following:
14
-
15
- * ``PCM_S``: Signed integer linear PCM
16
- * ``PCM_U``: Unsigned integer linear PCM
17
- * ``PCM_F``: Floating point linear PCM
18
- * ``FLAC``: Flac, Free Lossless Audio Codec
19
- * ``ULAW``: Mu-law
20
- * ``ALAW``: A-law
21
- * ``MP3`` : MP3, MPEG-1 Audio Layer III
22
- * ``VORBIS``: OGG Vorbis
23
- * ``AMR_WB``: Adaptive Multi-Rate Wideband
24
- * ``AMR_NB``: Adaptive Multi-Rate Narrowband
25
- * ``OPUS``: Opus
26
- * ``HTK``: Single channel 16-bit PCM
27
- * ``UNKNOWN`` : None of above
28
- """
29
-
30
- def __init__(
31
- self,
32
- sample_rate: int,
33
- num_frames: int,
34
- num_channels: int,
35
- bits_per_sample: int,
36
- encoding: str,
37
- ):
38
- self.sample_rate = sample_rate
39
- self.num_frames = num_frames
40
- self.num_channels = num_channels
41
- self.bits_per_sample = bits_per_sample
42
- self.encoding = encoding
43
-
44
- def __str__(self):
45
- return (
46
- f"AudioMetaData("
47
- f"sample_rate={self.sample_rate}, "
48
- f"num_frames={self.num_frames}, "
49
- f"num_channels={self.num_channels}, "
50
- f"bits_per_sample={self.bits_per_sample}, "
51
- f"encoding={self.encoding}"
52
- f")"
1
+ def __getattr__(name: str):
2
+ import warnings
3
+
4
+ if name == "AudioMetaData":
5
+ warnings.warn(
6
+ "`torchaudio.backend.common.AudioMetaData` has been moved to "
7
+ "`torchaudio.AudioMetaData`. Please update the import path.",
8
+ stacklevel=2,
53
9
  )
10
+ from torchaudio._backend.common import AudioMetaData
11
+
12
+ return AudioMetaData
13
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")