torchaudio 2.9.0__cp314-cp314-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of torchaudio might be problematic. Click here for more details.

Files changed (86) hide show
  1. torchaudio/.dylibs/libc++.1.0.dylib +0 -0
  2. torchaudio/__init__.py +204 -0
  3. torchaudio/_extension/__init__.py +61 -0
  4. torchaudio/_extension/utils.py +133 -0
  5. torchaudio/_internal/__init__.py +10 -0
  6. torchaudio/_internal/module_utils.py +171 -0
  7. torchaudio/_torchcodec.py +340 -0
  8. torchaudio/compliance/__init__.py +5 -0
  9. torchaudio/compliance/kaldi.py +813 -0
  10. torchaudio/datasets/__init__.py +47 -0
  11. torchaudio/datasets/cmuarctic.py +157 -0
  12. torchaudio/datasets/cmudict.py +186 -0
  13. torchaudio/datasets/commonvoice.py +86 -0
  14. torchaudio/datasets/dr_vctk.py +121 -0
  15. torchaudio/datasets/fluentcommands.py +108 -0
  16. torchaudio/datasets/gtzan.py +1118 -0
  17. torchaudio/datasets/iemocap.py +147 -0
  18. torchaudio/datasets/librilight_limited.py +111 -0
  19. torchaudio/datasets/librimix.py +133 -0
  20. torchaudio/datasets/librispeech.py +174 -0
  21. torchaudio/datasets/librispeech_biasing.py +189 -0
  22. torchaudio/datasets/libritts.py +168 -0
  23. torchaudio/datasets/ljspeech.py +107 -0
  24. torchaudio/datasets/musdb_hq.py +139 -0
  25. torchaudio/datasets/quesst14.py +136 -0
  26. torchaudio/datasets/snips.py +157 -0
  27. torchaudio/datasets/speechcommands.py +183 -0
  28. torchaudio/datasets/tedlium.py +218 -0
  29. torchaudio/datasets/utils.py +54 -0
  30. torchaudio/datasets/vctk.py +143 -0
  31. torchaudio/datasets/voxceleb1.py +309 -0
  32. torchaudio/datasets/yesno.py +89 -0
  33. torchaudio/functional/__init__.py +130 -0
  34. torchaudio/functional/_alignment.py +128 -0
  35. torchaudio/functional/filtering.py +1685 -0
  36. torchaudio/functional/functional.py +2505 -0
  37. torchaudio/lib/__init__.py +0 -0
  38. torchaudio/lib/_torchaudio.so +0 -0
  39. torchaudio/lib/libtorchaudio.so +0 -0
  40. torchaudio/models/__init__.py +85 -0
  41. torchaudio/models/_hdemucs.py +1008 -0
  42. torchaudio/models/conformer.py +293 -0
  43. torchaudio/models/conv_tasnet.py +330 -0
  44. torchaudio/models/decoder/__init__.py +64 -0
  45. torchaudio/models/decoder/_ctc_decoder.py +568 -0
  46. torchaudio/models/decoder/_cuda_ctc_decoder.py +187 -0
  47. torchaudio/models/deepspeech.py +84 -0
  48. torchaudio/models/emformer.py +884 -0
  49. torchaudio/models/rnnt.py +816 -0
  50. torchaudio/models/rnnt_decoder.py +339 -0
  51. torchaudio/models/squim/__init__.py +11 -0
  52. torchaudio/models/squim/objective.py +326 -0
  53. torchaudio/models/squim/subjective.py +150 -0
  54. torchaudio/models/tacotron2.py +1046 -0
  55. torchaudio/models/wav2letter.py +72 -0
  56. torchaudio/models/wav2vec2/__init__.py +45 -0
  57. torchaudio/models/wav2vec2/components.py +1167 -0
  58. torchaudio/models/wav2vec2/model.py +1579 -0
  59. torchaudio/models/wav2vec2/utils/__init__.py +7 -0
  60. torchaudio/models/wav2vec2/utils/import_fairseq.py +213 -0
  61. torchaudio/models/wav2vec2/utils/import_huggingface.py +134 -0
  62. torchaudio/models/wav2vec2/wavlm_attention.py +214 -0
  63. torchaudio/models/wavernn.py +409 -0
  64. torchaudio/pipelines/__init__.py +102 -0
  65. torchaudio/pipelines/_source_separation_pipeline.py +109 -0
  66. torchaudio/pipelines/_squim_pipeline.py +156 -0
  67. torchaudio/pipelines/_tts/__init__.py +16 -0
  68. torchaudio/pipelines/_tts/impl.py +385 -0
  69. torchaudio/pipelines/_tts/interface.py +255 -0
  70. torchaudio/pipelines/_tts/utils.py +230 -0
  71. torchaudio/pipelines/_wav2vec2/__init__.py +0 -0
  72. torchaudio/pipelines/_wav2vec2/aligner.py +87 -0
  73. torchaudio/pipelines/_wav2vec2/impl.py +1699 -0
  74. torchaudio/pipelines/_wav2vec2/utils.py +346 -0
  75. torchaudio/pipelines/rnnt_pipeline.py +380 -0
  76. torchaudio/transforms/__init__.py +78 -0
  77. torchaudio/transforms/_multi_channel.py +467 -0
  78. torchaudio/transforms/_transforms.py +2138 -0
  79. torchaudio/utils/__init__.py +4 -0
  80. torchaudio/utils/download.py +89 -0
  81. torchaudio/version.py +2 -0
  82. torchaudio-2.9.0.dist-info/LICENSE +25 -0
  83. torchaudio-2.9.0.dist-info/METADATA +122 -0
  84. torchaudio-2.9.0.dist-info/RECORD +86 -0
  85. torchaudio-2.9.0.dist-info/WHEEL +5 -0
  86. torchaudio-2.9.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,108 @@
1
+ import csv
2
+ import os
3
+ from pathlib import Path
4
+ from typing import Tuple, Union
5
+
6
+ from torch import Tensor
7
+ from torch.utils.data import Dataset
8
+ from torchaudio.datasets.utils import _load_waveform
9
+
10
+ SAMPLE_RATE = 16000
11
+
12
+
13
+ class FluentSpeechCommands(Dataset):
14
+ """*Fluent Speech Commands* :cite:`fluent` dataset
15
+
16
+ Args:
17
+ root (str of Path): Path to the directory where the dataset is found.
18
+ subset (str, optional): subset of the dataset to use.
19
+ Options: [``"train"``, ``"valid"``, ``"test"``].
20
+ (Default: ``"train"``)
21
+ """
22
+
23
+ def __init__(self, root: Union[str, Path], subset: str = "train"):
24
+ if subset not in ["train", "valid", "test"]:
25
+ raise ValueError("`subset` must be one of ['train', 'valid', 'test']")
26
+
27
+ root = os.fspath(root)
28
+ self._path = os.path.join(root, "fluent_speech_commands_dataset")
29
+
30
+ if not os.path.isdir(self._path):
31
+ raise RuntimeError("Dataset not found.")
32
+
33
+ subset_path = os.path.join(self._path, "data", f"{subset}_data.csv")
34
+ with open(subset_path) as subset_csv:
35
+ subset_reader = csv.reader(subset_csv)
36
+ data = list(subset_reader)
37
+
38
+ self.header = data[0]
39
+ self.data = data[1:]
40
+
41
+ def get_metadata(self, n: int) -> Tuple[str, int, str, int, str, str, str, str]:
42
+ """Get metadata for the n-th sample from the dataset. Returns filepath instead of waveform,
43
+ but otherwise returns the same fields as :py:func:`__getitem__`.
44
+
45
+ Args:
46
+ n (int): The index of the sample to be loaded
47
+
48
+ Returns:
49
+ Tuple of the following items;
50
+
51
+ str:
52
+ Path to audio
53
+ int:
54
+ Sample rate
55
+ str:
56
+ File name
57
+ int:
58
+ Speaker ID
59
+ str:
60
+ Transcription
61
+ str:
62
+ Action
63
+ str:
64
+ Object
65
+ str:
66
+ Location
67
+ """
68
+ sample = self.data[n]
69
+
70
+ file_name = sample[self.header.index("path")].split("/")[-1]
71
+ file_name = file_name.split(".")[0]
72
+ speaker_id, transcription, action, obj, location = sample[2:]
73
+ file_path = os.path.join("wavs", "speakers", speaker_id, f"{file_name}.wav")
74
+
75
+ return file_path, SAMPLE_RATE, file_name, speaker_id, transcription, action, obj, location
76
+
77
+ def __len__(self) -> int:
78
+ return len(self.data)
79
+
80
+ def __getitem__(self, n: int) -> Tuple[Tensor, int, str, int, str, str, str, str]:
81
+ """Load the n-th sample from the dataset.
82
+
83
+ Args:
84
+ n (int): The index of the sample to be loaded
85
+
86
+ Returns:
87
+ Tuple of the following items;
88
+
89
+ Tensor:
90
+ Waveform
91
+ int:
92
+ Sample rate
93
+ str:
94
+ File name
95
+ int:
96
+ Speaker ID
97
+ str:
98
+ Transcription
99
+ str:
100
+ Action
101
+ str:
102
+ Object
103
+ str:
104
+ Location
105
+ """
106
+ metadata = self.get_metadata(n)
107
+ waveform = _load_waveform(self._path, metadata[0], metadata[1])
108
+ return (waveform,) + metadata[1:]