torchaudio 2.1.0__cp310-cp310-manylinux2014_aarch64.whl → 2.1.1__cp310-cp310-manylinux2014_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of torchaudio might be problematic. Click here for more details.

@@ -3,6 +3,7 @@ from abc import ABC, abstractmethod
3
3
  from typing import BinaryIO, Optional, Tuple, Union
4
4
 
5
5
  from torch import Tensor
6
+ from torchaudio.io import CodecConfig
6
7
 
7
8
  from .common import AudioMetaData
8
9
 
@@ -37,6 +38,7 @@ class Backend(ABC):
37
38
  encoding: Optional[str] = None,
38
39
  bits_per_sample: Optional[int] = None,
39
40
  buffer_size: int = 4096,
41
+ compression: Optional[Union[CodecConfig, float, int]] = None,
40
42
  ) -> None:
41
43
  raise NotImplementedError
42
44
 
@@ -253,6 +253,7 @@ def save_audio(
253
253
  encoding: Optional[str] = None,
254
254
  bits_per_sample: Optional[int] = None,
255
255
  buffer_size: int = 4096,
256
+ compression: Optional[torchaudio.io.CodecConfig] = None,
256
257
  ) -> None:
257
258
  ext = None
258
259
  if hasattr(uri, "write"):
@@ -275,6 +276,7 @@ def save_audio(
275
276
  format=_get_sample_format(src.dtype),
276
277
  encoder=encoder,
277
278
  encoder_format=enc_fmt,
279
+ codec_config=compression,
278
280
  )
279
281
  with s.open():
280
282
  s.write_audio_chunk(0, src)
@@ -343,7 +345,13 @@ class FFmpegBackend(Backend):
343
345
  encoding: Optional[str] = None,
344
346
  bits_per_sample: Optional[int] = None,
345
347
  buffer_size: int = 4096,
348
+ compression: Optional[Union[torchaudio.io.CodecConfig, float, int]] = None,
346
349
  ) -> None:
350
+ if not isinstance(compression, (torchaudio.io.CodecConfig, type(None))):
351
+ raise ValueError(
352
+ "FFmpeg backend expects non-`None` value for argument `compression` to be of ",
353
+ f"type `torchaudio.io.CodecConfig`, but received value of type {type(compression)}",
354
+ )
347
355
  save_audio(
348
356
  uri,
349
357
  src,
@@ -353,6 +361,7 @@ class FFmpegBackend(Backend):
353
361
  encoding,
354
362
  bits_per_sample,
355
363
  buffer_size,
364
+ compression,
356
365
  )
357
366
 
358
367
  @staticmethod
@@ -2,6 +2,7 @@ import os
2
2
  from typing import BinaryIO, Optional, Tuple, Union
3
3
 
4
4
  import torch
5
+ from torchaudio.io import CodecConfig
5
6
 
6
7
  from . import soundfile_backend
7
8
  from .backend import Backend
@@ -35,7 +36,11 @@ class SoundfileBackend(Backend):
35
36
  encoding: Optional[str] = None,
36
37
  bits_per_sample: Optional[int] = None,
37
38
  buffer_size: int = 4096,
39
+ compression: Optional[Union[CodecConfig, float, int]] = None,
38
40
  ) -> None:
41
+ if compression:
42
+ raise ValueError("soundfile backend does not support argument `compression`.")
43
+
39
44
  soundfile_backend.save(
40
45
  uri, src, sample_rate, channels_first, format=format, encoding=encoding, bits_per_sample=bits_per_sample
41
46
  )
@@ -2,6 +2,7 @@ import os
2
2
  from typing import BinaryIO, Optional, Tuple, Union
3
3
 
4
4
  import torch
5
+ from torchaudio.io import CodecConfig
5
6
 
6
7
  from .backend import Backend
7
8
  from .common import AudioMetaData
@@ -55,7 +56,13 @@ class SoXBackend(Backend):
55
56
  encoding: Optional[str] = None,
56
57
  bits_per_sample: Optional[int] = None,
57
58
  buffer_size: int = 4096,
59
+ compression: Optional[Union[CodecConfig, float, int]] = None,
58
60
  ) -> None:
61
+ if not isinstance(compression, (float, int, type(None))):
62
+ raise ValueError(
63
+ "SoX backend expects non-`None` value for argument `compression` to be of ",
64
+ f"type `float` or `int`, but received value of type {type(compression)}",
65
+ )
59
66
  if hasattr(uri, "write"):
60
67
  raise ValueError(
61
68
  "SoX backend does not support writing to file-like objects. ",
@@ -67,7 +74,7 @@ class SoXBackend(Backend):
67
74
  src,
68
75
  sample_rate,
69
76
  channels_first,
70
- None,
77
+ compression,
71
78
  format,
72
79
  encoding,
73
80
  bits_per_sample,
@@ -5,6 +5,7 @@ from typing import BinaryIO, Dict, Optional, Tuple, Type, Union
5
5
  import torch
6
6
 
7
7
  from torchaudio._extension import _FFMPEG_EXT, _SOX_INITIALIZED
8
+ from torchaudio.io import CodecConfig
8
9
 
9
10
  from . import soundfile_backend
10
11
 
@@ -229,6 +230,7 @@ def get_save_func():
229
230
  bits_per_sample: Optional[int] = None,
230
231
  buffer_size: int = 4096,
231
232
  backend: Optional[str] = None,
233
+ compression: Optional[Union[CodecConfig, float, int]] = None,
232
234
  ):
233
235
  """Save audio data to file.
234
236
 
@@ -283,8 +285,32 @@ def get_save_func():
283
285
 
284
286
  .. seealso::
285
287
  :ref:`backend`
288
+
289
+ compression (CodecConfig, float, int, or None, optional):
290
+ Compression configuration to apply.
291
+
292
+ If the selected backend is FFmpeg, an instance of :py:class:`CodecConfig` must be provided.
293
+
294
+ Otherwise, if the selected backend is SoX, a float or int value corresponding to option ``-C`` of the
295
+ ``sox`` command line interface must be provided. For instance:
296
+
297
+ ``"mp3"``
298
+ Either bitrate (in ``kbps``) with quality factor, such as ``128.2``, or
299
+ VBR encoding with quality factor such as ``-4.2``. Default: ``-4.5``.
300
+
301
+ ``"flac"``
302
+ Whole number from ``0`` to ``8``. ``8`` is default and highest compression.
303
+
304
+ ``"ogg"``, ``"vorbis"``
305
+ Number from ``-1`` to ``10``; ``-1`` is the highest compression
306
+ and lowest quality. Default: ``3``.
307
+
308
+ Refer to http://sox.sourceforge.net/soxformat.html for more details.
309
+
286
310
  """
287
311
  backend = dispatcher(uri, format, backend)
288
- return backend.save(uri, src, sample_rate, channels_first, format, encoding, bits_per_sample, buffer_size)
312
+ return backend.save(
313
+ uri, src, sample_rate, channels_first, format, encoding, bits_per_sample, buffer_size, compression
314
+ )
289
315
 
290
316
  return save
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
@@ -53,7 +53,7 @@ class IAligner(ABC):
53
53
  emission (Tensor): Sequence of token probability distributions in log-domain.
54
54
  Shape: `(time, tokens)`.
55
55
  tokens (list of integer sequence): Tokenized transcript.
56
- Output from :py:class:`Wav2Vec2FABundle.Tokenizer`.
56
+ Output from :py:class:`torchaudio.pipelines.Wav2Vec2FABundle.Tokenizer`.
57
57
 
58
58
  Returns:
59
59
  (list of TokenSpan sequence): Tokens with time stamps and scores.
@@ -1350,7 +1350,7 @@ WAVLM_LARGE = Wav2Vec2Bundle(
1350
1350
  "encoder_ff_interm_features": 4096,
1351
1351
  "encoder_ff_interm_dropout": 0.0,
1352
1352
  "encoder_dropout": 0.1,
1353
- "encoder_layer_norm_first": False,
1353
+ "encoder_layer_norm_first": True,
1354
1354
  "encoder_layer_drop": 0.05,
1355
1355
  "aux_num_out": None,
1356
1356
  },
torchaudio/version.py CHANGED
@@ -1,2 +1,2 @@
1
- __version__ = '2.1.0'
2
- git_version = '6ea1133706801ec6e81bb29142da2e21a8583a0a'
1
+ __version__ = '2.1.1'
2
+ git_version = '5784206b90d738de888dce4c99b8b46be213f019'
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: torchaudio
3
- Version: 2.1.0
3
+ Version: 2.1.1
4
4
  Summary: An audio package for PyTorch
5
5
  Home-page: https://github.com/pytorch/audio
6
6
  Author: Soumith Chintala, David Pollack, Sean Naren, Peter Goldsborough, Moto Hira, Caroline Chen, Jeff Hwang, Zhaoheng Ni, Xiaohui Zhang
@@ -24,7 +24,7 @@ Classifier: Topic :: Multimedia :: Sound/Audio
24
24
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
25
25
  Description-Content-Type: text/markdown
26
26
  License-File: LICENSE
27
- Requires-Dist: torch (==2.1.0)
27
+ Requires-Dist: torch (==2.1.1)
28
28
 
29
29
  torchaudio: an audio library for PyTorch
30
30
  ========================================
@@ -85,6 +85,17 @@ If you find this package useful, please cite as:
85
85
  }
86
86
  ```
87
87
 
88
+ ```bibtex
89
+ @misc{hwang2023torchaudio,
90
+ title={TorchAudio 2.1: Advancing speech recognition, self-supervised learning, and audio processing components for PyTorch},
91
+ author={Jeff Hwang and Moto Hira and Caroline Chen and Xiaohui Zhang and Zhaoheng Ni and Guangzhi Sun and Pingchuan Ma and Ruizhe Huang and Vineel Pratap and Yuekai Zhang and Anurag Kumar and Chin-Yun Yu and Chuang Zhu and Chunxi Liu and Jacob Kahn and Mirco Ravanelli and Peng Sun and Shinji Watanabe and Yangyang Shi and Yumeng Tao and Robin Scheibler and Samuele Cornell and Sean Kim and Stavros Petridis},
92
+ year={2023},
93
+ eprint={2310.17864},
94
+ archivePrefix={arXiv},
95
+ primaryClass={eess.AS}
96
+ }
97
+ ```
98
+
88
99
  Disclaimer on Datasets
89
100
  ----------------------
90
101
 
@@ -1,14 +1,14 @@
1
1
  torchaudio/__init__.py,sha256=JNqWeRjt7G9AbkqH-zqgtJ41tBbuKXxzYFwSR6BBwRY,1117
2
2
  torchaudio/kaldi_io.py,sha256=TwS2YgSLlJwOXjNNsHBuXyxhKeKKpptVHLBV7QYZCas,5073
3
- torchaudio/version.py,sha256=qWqmxRpZJgrXDKiBtU2ZfWwMsRHsLYOFf5Ah8P5LnlM,79
3
+ torchaudio/version.py,sha256=DK_cu7DnZC1EF7FtSq_Hik3hHwjvRPbRNaGX9klJIys,79
4
4
  torchaudio/_backend/__init__.py,sha256=3li8KAJkJN2gkw9j9NSTFbxvXFdLiDAsaefdM72oYBU,1730
5
- torchaudio/_backend/backend.py,sha256=g63FIX949ei_u6qRWyMnGYMf1piPD86axuQdCGyzufc,1457
5
+ torchaudio/_backend/backend.py,sha256=hSrfZcj5FMzx5ZpwubN-LLMvBFb7ENyw7HvT_6pVYVU,1565
6
6
  torchaudio/_backend/common.py,sha256=55Y0r0MsdW6gvTOT_Zy60UGFXc60DfdJ7uvycJKK3is,1783
7
- torchaudio/_backend/ffmpeg.py,sha256=thqemsSahEsuNgHoQHny8z7dYuyUuUQIwvq1B8haM4I,12107
8
- torchaudio/_backend/soundfile.py,sha256=4PiQ5-aeT7Cwhvcvdz-sBD7G-EPkB6fJURw86nE6faU,1479
7
+ torchaudio/_backend/ffmpeg.py,sha256=lgof8oWCJA1IgBETLz3bzgURwGs7QphJQPTWnRkL044,12633
8
+ torchaudio/_backend/soundfile.py,sha256=n0Epw0J9rBb89xVJWTXaDfb96YFz0-i2xarXIdDd-Cw,1703
9
9
  torchaudio/_backend/soundfile_backend.py,sha256=qJHEEXU1egCkPJ2Y9uJWFvVhW3AqDZ7z7P7mkJjJJWM,17376
10
- torchaudio/_backend/sox.py,sha256=qSSI3ojVbPXY-ZLrSQCdXd8tgdrK2pehxlTy3ySTR2s,2993
11
- torchaudio/_backend/utils.py,sha256=_cL3g0R9FYsHVCGPvp-TXI7rcG39iEyNilY_sfDFJEw,12047
10
+ torchaudio/_backend/sox.py,sha256=2ZQQ62C9bEm_m1no1bPxmiziRSUlkOy6kuSx-nLfDl0,3401
11
+ torchaudio/_backend/utils.py,sha256=j1LEBoRk1mbJ5yqB1c19qklEQrRUZgKKYCeNCBjqQUM,13221
12
12
  torchaudio/_extension/__init__.py,sha256=gMNUxx7DJmCR5z1tDKlJFihpEfmmoo53JOfKI5FUI_0,4262
13
13
  torchaudio/_extension/utils.py,sha256=jhWYhsh-QTh9Y4dxX2P0w9X8HrOIghCFKwnAK0ekz5s,8679
14
14
  torchaudio/_internal/__init__.py,sha256=gjU8g9HhVd9hHrHXJM0xOlZL6cT8ktO60MN8RHI6ZbA,241
@@ -55,16 +55,16 @@ torchaudio/io/_effector.py,sha256=oeRV2338JK9iX61cE5thrihWtaq7t1l45P-W1aB0d_U,11
55
55
  torchaudio/io/_playback.py,sha256=5-YwAWkU6IHjreGu_GvjsZynCfX_VYwI5wH5qjtXYPk,2320
56
56
  torchaudio/io/_stream_reader.py,sha256=cBOHBB2gVIWSBXyJfJsPFp9fvquKv6AkrD4x1Lwr7lA,34257
57
57
  torchaudio/io/_stream_writer.py,sha256=bFNWKXHLYCwr9SU49rXkuFZHuT1hS3Er0sTiNh5x6uU,19714
58
- torchaudio/lib/_torchaudio.so,sha256=iKB79_X9IaBEWejlv7tQCtttZNxWGVw8HywMb8xqKow,168632
59
- torchaudio/lib/_torchaudio_ffmpeg4.so,sha256=jWFovG2fL2Y8Lksohg9F-5BYoYC7jW5btntf8PQjUwI,470432
60
- torchaudio/lib/_torchaudio_ffmpeg5.so,sha256=kf0falY_RK4PkFBE5slTJSDHWCwZG86ZIQhDiApDtk8,470432
61
- torchaudio/lib/_torchaudio_ffmpeg6.so,sha256=9xvyY9W-hFiuPVfrdGubuXX_Qv9wiD1BdD7NcRZPKmA,470432
62
- torchaudio/lib/_torchaudio_sox.so,sha256=j52Rw-oRK0YzIoMbwnIjpAUaDYnTML8aozsp9EykFug,173608
63
- torchaudio/lib/libtorchaudio.so,sha256=DVWdY8vaLKZQNoDktKXW88JwaYuLMR6D4rWKsp8Lw-o,553688
64
- torchaudio/lib/libtorchaudio_ffmpeg4.so,sha256=utGmAPOPhjDz7dv1flPUARBrSJgFyTz_acmRFZ-h1Ow,613952
65
- torchaudio/lib/libtorchaudio_ffmpeg5.so,sha256=IhJmROxDCuEQjR4hWMUZzs29Tqgz_zNAsXmw_iCP_gk,613952
66
- torchaudio/lib/libtorchaudio_ffmpeg6.so,sha256=YdIlY3XYc8IQNcuhxa5o02Ffib-DOXr_ksx1Jg2tmuo,613952
67
- torchaudio/lib/libtorchaudio_sox.so,sha256=DnGsdB-08X14ysqD7X8OwJ5xz6WTb-BqS8lZAMpRLbw,255216
58
+ torchaudio/lib/_torchaudio.so,sha256=u5pxgZZiqLjAqkkCqJaNwn6QFpBIboM39s-wknTr5QI,168632
59
+ torchaudio/lib/_torchaudio_ffmpeg4.so,sha256=g_t1G5_RfHIqg65ZC6XGfn_HTFWN2TyntADtdevSmEM,470432
60
+ torchaudio/lib/_torchaudio_ffmpeg5.so,sha256=yt9juLGPPSlgH20XEDba-P44gDsgSAOzh097USt07CA,470432
61
+ torchaudio/lib/_torchaudio_ffmpeg6.so,sha256=CSxoIXO-A2y8yrw9-zKffaptGhmble9zKMlokug0FwM,470432
62
+ torchaudio/lib/_torchaudio_sox.so,sha256=h6dpqIdzzi9X6KtniAzeArbPe-UsM71eOda3pgMDCvA,173608
63
+ torchaudio/lib/libtorchaudio.so,sha256=901FBmCPerJcAnR3yDRuw6_museIcwQYUXxm_znIoR4,553688
64
+ torchaudio/lib/libtorchaudio_ffmpeg4.so,sha256=00k6dctt8vhlm4nXRH_Q33yvgotDx4eB510EaJztq4A,613952
65
+ torchaudio/lib/libtorchaudio_ffmpeg5.so,sha256=hpUwhVdCAXRqu-wcB51CcOVTec6Lnks8tsc2IH0aKbU,613952
66
+ torchaudio/lib/libtorchaudio_ffmpeg6.so,sha256=SqmnP1DdR4oyHoz_hb4b7_AZYayCZpNvWuVFAwJ8jpQ,613952
67
+ torchaudio/lib/libtorchaudio_sox.so,sha256=WELUMwI4KO0UJWMl5j2-m8Hs1XdQ6TGqs0m8dABRmBE,255216
68
68
  torchaudio/models/__init__.py,sha256=BNMNGuwpJAFRsdtwHYQ6slGClkrUTu31_7mXh7FjeV4,1995
69
69
  torchaudio/models/_hdemucs.py,sha256=VPnQ73lA9lfAxRjZ85NCGJYP36mPNwTjS-TU4qelu_k,38242
70
70
  torchaudio/models/conformer.py,sha256=5IceU-jcZKofkHTTqRKoytubQ75MzZPrPlfkLsIlxeA,10068
@@ -98,8 +98,8 @@ torchaudio/pipelines/_tts/impl.py,sha256=Tig4_5sITJADwxN5eZGek7Ath_-e3sV8CTM5t6U
98
98
  torchaudio/pipelines/_tts/interface.py,sha256=yUaS0UK3PTRruYXRWFil7lAhr-1iYiyBaDBLmEnJPUQ,10224
99
99
  torchaudio/pipelines/_tts/utils.py,sha256=0rLyoFWS78n5jn9AC99pgIwAjaXSw-MVbj_pjSaOHiM,4616
100
100
  torchaudio/pipelines/_wav2vec2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
101
- torchaudio/pipelines/_wav2vec2/aligner.py,sha256=xRKPcGlIRU_uPlsD4Xy1fo_C4K_oS2eKal53klV6dOA,2688
102
- torchaudio/pipelines/_wav2vec2/impl.py,sha256=bX57aQb8LTYoRwGCDIutWl9tcghEIK-1BL7ydD_0y60,65562
101
+ torchaudio/pipelines/_wav2vec2/aligner.py,sha256=pIWRgQ-kdYUxtL8bdc0qk9wBjwRrHY1uSWL3L4e2vxs,2709
102
+ torchaudio/pipelines/_wav2vec2/impl.py,sha256=zdXFjytJO5MvnB-3aygzUUFKxCTkQGU_OX_rhUh9c0k,65561
103
103
  torchaudio/pipelines/_wav2vec2/utils.py,sha256=Q8_fWOR2JDnHu0TTRmHzRjI3BOJa0hGIAl0cjtALgsQ,6971
104
104
  torchaudio/sox_effects/__init__.py,sha256=gCxdiwHK3ldlGCeYc9VatJW5HyzjWIgw_Sz_krp_rOw,262
105
105
  torchaudio/sox_effects/sox_effects.py,sha256=aVs0GddGxQg1eU9fae4KtYYzQu3Nx6zPeJ7aso2WT-Y,11089
@@ -110,8 +110,8 @@ torchaudio/utils/__init__.py,sha256=vBly7XEzLsppIfIqKJA_pu_ZTWP4EZ4TtlAJ0H2qRzc,
110
110
  torchaudio/utils/download.py,sha256=2IFKD1rsWBFE31HTiyUgpE5y7AJh8_AUPdc-btNQuKw,2882
111
111
  torchaudio/utils/ffmpeg_utils.py,sha256=mU-Kx_kNb2v3nJ0o7mC5Vj3YEo5bAdQOrb5lZi9YUe8,8985
112
112
  torchaudio/utils/sox_utils.py,sha256=x6COHPU1JY5VOg9HeVXQ9Ibq6UG7nydsvXD7_e6c0Rs,2854
113
- torchaudio-2.1.0.dist-info/LICENSE,sha256=k6WIYahYzBCOa2uDPgjnbosqZjOeSoAHyKWowf-cQNY,1338
114
- torchaudio-2.1.0.dist-info/METADATA,sha256=Htp97OrVr2VWGqDHtidztmWcG6EfSY7qSe-0QORubeQ,5655
115
- torchaudio-2.1.0.dist-info/WHEEL,sha256=PVmDYBwAtvZeWEsFe6188JJBg8t5Y5cU1sVwJOaYqII,106
116
- torchaudio-2.1.0.dist-info/top_level.txt,sha256=mPKWMIRWWW2JwbJN6wRckeN1gpbjhifapAF0Z9t7SMo,11
117
- torchaudio-2.1.0.dist-info/RECORD,,
113
+ torchaudio-2.1.1.dist-info/LICENSE,sha256=k6WIYahYzBCOa2uDPgjnbosqZjOeSoAHyKWowf-cQNY,1338
114
+ torchaudio-2.1.1.dist-info/METADATA,sha256=XpDpZl81t3G7h2UKbfF2-bE99asIWyUafMmavF51NL0,6351
115
+ torchaudio-2.1.1.dist-info/WHEEL,sha256=PVmDYBwAtvZeWEsFe6188JJBg8t5Y5cU1sVwJOaYqII,106
116
+ torchaudio-2.1.1.dist-info/top_level.txt,sha256=mPKWMIRWWW2JwbJN6wRckeN1gpbjhifapAF0Z9t7SMo,11
117
+ torchaudio-2.1.1.dist-info/RECORD,,