torchaudio 2.7.0__cp312-cp312-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of torchaudio might be problematic. Click here for more details.

Files changed (148) hide show
  1. torchaudio/__init__.py +53 -0
  2. torchaudio/_backend/__init__.py +61 -0
  3. torchaudio/_backend/backend.py +53 -0
  4. torchaudio/_backend/common.py +52 -0
  5. torchaudio/_backend/ffmpeg.py +334 -0
  6. torchaudio/_backend/soundfile.py +54 -0
  7. torchaudio/_backend/soundfile_backend.py +457 -0
  8. torchaudio/_backend/sox.py +91 -0
  9. torchaudio/_backend/utils.py +317 -0
  10. torchaudio/_extension/__init__.py +74 -0
  11. torchaudio/_extension/utils.py +180 -0
  12. torchaudio/_internal/__init__.py +10 -0
  13. torchaudio/_internal/module_utils.py +113 -0
  14. torchaudio/backend/__init__.py +8 -0
  15. torchaudio/backend/_no_backend.py +25 -0
  16. torchaudio/backend/_sox_io_backend.py +294 -0
  17. torchaudio/backend/common.py +13 -0
  18. torchaudio/backend/no_backend.py +14 -0
  19. torchaudio/backend/soundfile_backend.py +14 -0
  20. torchaudio/backend/sox_io_backend.py +14 -0
  21. torchaudio/compliance/__init__.py +5 -0
  22. torchaudio/compliance/kaldi.py +813 -0
  23. torchaudio/datasets/__init__.py +47 -0
  24. torchaudio/datasets/cmuarctic.py +157 -0
  25. torchaudio/datasets/cmudict.py +186 -0
  26. torchaudio/datasets/commonvoice.py +86 -0
  27. torchaudio/datasets/dr_vctk.py +121 -0
  28. torchaudio/datasets/fluentcommands.py +108 -0
  29. torchaudio/datasets/gtzan.py +1118 -0
  30. torchaudio/datasets/iemocap.py +147 -0
  31. torchaudio/datasets/librilight_limited.py +111 -0
  32. torchaudio/datasets/librimix.py +133 -0
  33. torchaudio/datasets/librispeech.py +174 -0
  34. torchaudio/datasets/librispeech_biasing.py +189 -0
  35. torchaudio/datasets/libritts.py +168 -0
  36. torchaudio/datasets/ljspeech.py +107 -0
  37. torchaudio/datasets/musdb_hq.py +139 -0
  38. torchaudio/datasets/quesst14.py +136 -0
  39. torchaudio/datasets/snips.py +157 -0
  40. torchaudio/datasets/speechcommands.py +183 -0
  41. torchaudio/datasets/tedlium.py +218 -0
  42. torchaudio/datasets/utils.py +54 -0
  43. torchaudio/datasets/vctk.py +143 -0
  44. torchaudio/datasets/voxceleb1.py +309 -0
  45. torchaudio/datasets/yesno.py +89 -0
  46. torchaudio/functional/__init__.py +127 -0
  47. torchaudio/functional/_alignment.py +128 -0
  48. torchaudio/functional/filtering.py +1670 -0
  49. torchaudio/functional/functional.py +2535 -0
  50. torchaudio/io/__init__.py +13 -0
  51. torchaudio/io/_effector.py +347 -0
  52. torchaudio/io/_playback.py +72 -0
  53. torchaudio/kaldi_io.py +144 -0
  54. torchaudio/lib/__init__.py +0 -0
  55. torchaudio/lib/_torchaudio.so +0 -0
  56. torchaudio/lib/_torchaudio_sox.so +0 -0
  57. torchaudio/lib/libctc_prefix_decoder.so +0 -0
  58. torchaudio/lib/libtorchaudio.so +0 -0
  59. torchaudio/lib/libtorchaudio_sox.so +0 -0
  60. torchaudio/lib/pybind11_prefixctc.so +0 -0
  61. torchaudio/models/__init__.py +85 -0
  62. torchaudio/models/_hdemucs.py +1008 -0
  63. torchaudio/models/conformer.py +293 -0
  64. torchaudio/models/conv_tasnet.py +330 -0
  65. torchaudio/models/decoder/__init__.py +46 -0
  66. torchaudio/models/decoder/_ctc_decoder.py +568 -0
  67. torchaudio/models/decoder/_cuda_ctc_decoder.py +187 -0
  68. torchaudio/models/deepspeech.py +84 -0
  69. torchaudio/models/emformer.py +884 -0
  70. torchaudio/models/rnnt.py +816 -0
  71. torchaudio/models/rnnt_decoder.py +339 -0
  72. torchaudio/models/squim/__init__.py +11 -0
  73. torchaudio/models/squim/objective.py +326 -0
  74. torchaudio/models/squim/subjective.py +150 -0
  75. torchaudio/models/tacotron2.py +1046 -0
  76. torchaudio/models/wav2letter.py +72 -0
  77. torchaudio/models/wav2vec2/__init__.py +45 -0
  78. torchaudio/models/wav2vec2/components.py +1167 -0
  79. torchaudio/models/wav2vec2/model.py +1579 -0
  80. torchaudio/models/wav2vec2/utils/__init__.py +7 -0
  81. torchaudio/models/wav2vec2/utils/import_fairseq.py +213 -0
  82. torchaudio/models/wav2vec2/utils/import_huggingface.py +134 -0
  83. torchaudio/models/wav2vec2/wavlm_attention.py +214 -0
  84. torchaudio/models/wavernn.py +409 -0
  85. torchaudio/pipelines/__init__.py +102 -0
  86. torchaudio/pipelines/_source_separation_pipeline.py +109 -0
  87. torchaudio/pipelines/_squim_pipeline.py +156 -0
  88. torchaudio/pipelines/_tts/__init__.py +16 -0
  89. torchaudio/pipelines/_tts/impl.py +385 -0
  90. torchaudio/pipelines/_tts/interface.py +255 -0
  91. torchaudio/pipelines/_tts/utils.py +228 -0
  92. torchaudio/pipelines/_wav2vec2/__init__.py +0 -0
  93. torchaudio/pipelines/_wav2vec2/aligner.py +87 -0
  94. torchaudio/pipelines/_wav2vec2/impl.py +1699 -0
  95. torchaudio/pipelines/_wav2vec2/utils.py +346 -0
  96. torchaudio/pipelines/rnnt_pipeline.py +380 -0
  97. torchaudio/prototype/__init__.py +0 -0
  98. torchaudio/prototype/datasets/__init__.py +4 -0
  99. torchaudio/prototype/datasets/musan.py +67 -0
  100. torchaudio/prototype/functional/__init__.py +26 -0
  101. torchaudio/prototype/functional/_dsp.py +433 -0
  102. torchaudio/prototype/functional/_rir.py +379 -0
  103. torchaudio/prototype/functional/functional.py +190 -0
  104. torchaudio/prototype/models/__init__.py +36 -0
  105. torchaudio/prototype/models/_conformer_wav2vec2.py +794 -0
  106. torchaudio/prototype/models/_emformer_hubert.py +333 -0
  107. torchaudio/prototype/models/conv_emformer.py +525 -0
  108. torchaudio/prototype/models/hifi_gan.py +336 -0
  109. torchaudio/prototype/models/rnnt.py +711 -0
  110. torchaudio/prototype/models/rnnt_decoder.py +399 -0
  111. torchaudio/prototype/pipelines/__init__.py +12 -0
  112. torchaudio/prototype/pipelines/_vggish/__init__.py +3 -0
  113. torchaudio/prototype/pipelines/_vggish/_vggish_impl.py +233 -0
  114. torchaudio/prototype/pipelines/_vggish/_vggish_pipeline.py +82 -0
  115. torchaudio/prototype/pipelines/hifigan_pipeline.py +228 -0
  116. torchaudio/prototype/pipelines/rnnt_pipeline.py +58 -0
  117. torchaudio/prototype/transforms/__init__.py +9 -0
  118. torchaudio/prototype/transforms/_transforms.py +456 -0
  119. torchaudio/sox_effects/__init__.py +10 -0
  120. torchaudio/sox_effects/sox_effects.py +272 -0
  121. torchaudio/transforms/__init__.py +75 -0
  122. torchaudio/transforms/_multi_channel.py +467 -0
  123. torchaudio/transforms/_transforms.py +2137 -0
  124. torchaudio/utils/__init__.py +11 -0
  125. torchaudio/utils/download.py +89 -0
  126. torchaudio/utils/ffmpeg_utils.py +11 -0
  127. torchaudio/utils/sox_utils.py +99 -0
  128. torchaudio/version.py +2 -0
  129. torchaudio-2.7.0.dist-info/LICENSE +25 -0
  130. torchaudio-2.7.0.dist-info/METADATA +124 -0
  131. torchaudio-2.7.0.dist-info/RECORD +148 -0
  132. torchaudio-2.7.0.dist-info/WHEEL +5 -0
  133. torchaudio-2.7.0.dist-info/top_level.txt +2 -0
  134. torio/__init__.py +8 -0
  135. torio/_extension/__init__.py +13 -0
  136. torio/_extension/utils.py +147 -0
  137. torio/io/__init__.py +9 -0
  138. torio/io/_streaming_media_decoder.py +978 -0
  139. torio/io/_streaming_media_encoder.py +502 -0
  140. torio/lib/__init__.py +0 -0
  141. torio/lib/_torio_ffmpeg4.so +0 -0
  142. torio/lib/_torio_ffmpeg5.so +0 -0
  143. torio/lib/_torio_ffmpeg6.so +0 -0
  144. torio/lib/libtorio_ffmpeg4.so +0 -0
  145. torio/lib/libtorio_ffmpeg5.so +0 -0
  146. torio/lib/libtorio_ffmpeg6.so +0 -0
  147. torio/utils/__init__.py +4 -0
  148. torio/utils/ffmpeg_utils.py +247 -0
@@ -0,0 +1,72 @@
1
+ from torch import nn, Tensor
2
+
3
+ __all__ = [
4
+ "Wav2Letter",
5
+ ]
6
+
7
+
8
+ class Wav2Letter(nn.Module):
9
+ r"""Wav2Letter model architecture from *Wav2Letter: an End-to-End ConvNet-based Speech
10
+ Recognition System* :cite:`collobert2016wav2letter`.
11
+
12
+ See Also:
13
+ * `Training example <https://github.com/pytorch/audio/tree/release/0.12/examples/pipeline_wav2letter>`__
14
+
15
+ Args:
16
+ num_classes (int, optional): Number of classes to be classified. (Default: ``40``)
17
+ input_type (str, optional): Wav2Letter can use as input: ``waveform``, ``power_spectrum``
18
+ or ``mfcc`` (Default: ``waveform``).
19
+ num_features (int, optional): Number of input features that the network will receive (Default: ``1``).
20
+ """
21
+
22
+ def __init__(self, num_classes: int = 40, input_type: str = "waveform", num_features: int = 1) -> None:
23
+ super().__init__()
24
+
25
+ acoustic_num_features = 250 if input_type == "waveform" else num_features
26
+ acoustic_model = nn.Sequential(
27
+ nn.Conv1d(in_channels=acoustic_num_features, out_channels=250, kernel_size=48, stride=2, padding=23),
28
+ nn.ReLU(inplace=True),
29
+ nn.Conv1d(in_channels=250, out_channels=250, kernel_size=7, stride=1, padding=3),
30
+ nn.ReLU(inplace=True),
31
+ nn.Conv1d(in_channels=250, out_channels=250, kernel_size=7, stride=1, padding=3),
32
+ nn.ReLU(inplace=True),
33
+ nn.Conv1d(in_channels=250, out_channels=250, kernel_size=7, stride=1, padding=3),
34
+ nn.ReLU(inplace=True),
35
+ nn.Conv1d(in_channels=250, out_channels=250, kernel_size=7, stride=1, padding=3),
36
+ nn.ReLU(inplace=True),
37
+ nn.Conv1d(in_channels=250, out_channels=250, kernel_size=7, stride=1, padding=3),
38
+ nn.ReLU(inplace=True),
39
+ nn.Conv1d(in_channels=250, out_channels=250, kernel_size=7, stride=1, padding=3),
40
+ nn.ReLU(inplace=True),
41
+ nn.Conv1d(in_channels=250, out_channels=250, kernel_size=7, stride=1, padding=3),
42
+ nn.ReLU(inplace=True),
43
+ nn.Conv1d(in_channels=250, out_channels=2000, kernel_size=32, stride=1, padding=16),
44
+ nn.ReLU(inplace=True),
45
+ nn.Conv1d(in_channels=2000, out_channels=2000, kernel_size=1, stride=1, padding=0),
46
+ nn.ReLU(inplace=True),
47
+ nn.Conv1d(in_channels=2000, out_channels=num_classes, kernel_size=1, stride=1, padding=0),
48
+ nn.ReLU(inplace=True),
49
+ )
50
+
51
+ if input_type == "waveform":
52
+ waveform_model = nn.Sequential(
53
+ nn.Conv1d(in_channels=num_features, out_channels=250, kernel_size=250, stride=160, padding=45),
54
+ nn.ReLU(inplace=True),
55
+ )
56
+ self.acoustic_model = nn.Sequential(waveform_model, acoustic_model)
57
+
58
+ if input_type in ["power_spectrum", "mfcc"]:
59
+ self.acoustic_model = acoustic_model
60
+
61
+ def forward(self, x: Tensor) -> Tensor:
62
+ r"""
63
+ Args:
64
+ x (torch.Tensor): Tensor of dimension (batch_size, num_features, input_length).
65
+
66
+ Returns:
67
+ Tensor: Predictor tensor of dimension (batch_size, number_of_classes, input_length).
68
+ """
69
+
70
+ x = self.acoustic_model(x)
71
+ x = nn.functional.log_softmax(x, dim=1)
72
+ return x
@@ -0,0 +1,45 @@
1
+ from . import utils
2
+ from .model import (
3
+ hubert_base,
4
+ hubert_large,
5
+ hubert_pretrain_base,
6
+ hubert_pretrain_large,
7
+ hubert_pretrain_model,
8
+ hubert_pretrain_xlarge,
9
+ hubert_xlarge,
10
+ HuBERTPretrainModel,
11
+ wav2vec2_base,
12
+ wav2vec2_large,
13
+ wav2vec2_large_lv60k,
14
+ wav2vec2_model,
15
+ wav2vec2_xlsr_1b,
16
+ wav2vec2_xlsr_2b,
17
+ wav2vec2_xlsr_300m,
18
+ Wav2Vec2Model,
19
+ wavlm_base,
20
+ wavlm_large,
21
+ wavlm_model,
22
+ )
23
+
24
+ __all__ = [
25
+ "Wav2Vec2Model",
26
+ "HuBERTPretrainModel",
27
+ "wavlm_model",
28
+ "wavlm_base",
29
+ "wavlm_large",
30
+ "wav2vec2_model",
31
+ "wav2vec2_base",
32
+ "wav2vec2_large",
33
+ "wav2vec2_large_lv60k",
34
+ "hubert_base",
35
+ "hubert_large",
36
+ "hubert_xlarge",
37
+ "hubert_pretrain_model",
38
+ "hubert_pretrain_base",
39
+ "hubert_pretrain_large",
40
+ "hubert_pretrain_xlarge",
41
+ "utils",
42
+ "wav2vec2_xlsr_300m",
43
+ "wav2vec2_xlsr_1b",
44
+ "wav2vec2_xlsr_2b",
45
+ ]