torchaudio 2.9.0__cp314-cp314-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of torchaudio might be problematic. Click here for more details.
- torchaudio/.dylibs/libc++.1.0.dylib +0 -0
- torchaudio/__init__.py +204 -0
- torchaudio/_extension/__init__.py +61 -0
- torchaudio/_extension/utils.py +133 -0
- torchaudio/_internal/__init__.py +10 -0
- torchaudio/_internal/module_utils.py +171 -0
- torchaudio/_torchcodec.py +340 -0
- torchaudio/compliance/__init__.py +5 -0
- torchaudio/compliance/kaldi.py +813 -0
- torchaudio/datasets/__init__.py +47 -0
- torchaudio/datasets/cmuarctic.py +157 -0
- torchaudio/datasets/cmudict.py +186 -0
- torchaudio/datasets/commonvoice.py +86 -0
- torchaudio/datasets/dr_vctk.py +121 -0
- torchaudio/datasets/fluentcommands.py +108 -0
- torchaudio/datasets/gtzan.py +1118 -0
- torchaudio/datasets/iemocap.py +147 -0
- torchaudio/datasets/librilight_limited.py +111 -0
- torchaudio/datasets/librimix.py +133 -0
- torchaudio/datasets/librispeech.py +174 -0
- torchaudio/datasets/librispeech_biasing.py +189 -0
- torchaudio/datasets/libritts.py +168 -0
- torchaudio/datasets/ljspeech.py +107 -0
- torchaudio/datasets/musdb_hq.py +139 -0
- torchaudio/datasets/quesst14.py +136 -0
- torchaudio/datasets/snips.py +157 -0
- torchaudio/datasets/speechcommands.py +183 -0
- torchaudio/datasets/tedlium.py +218 -0
- torchaudio/datasets/utils.py +54 -0
- torchaudio/datasets/vctk.py +143 -0
- torchaudio/datasets/voxceleb1.py +309 -0
- torchaudio/datasets/yesno.py +89 -0
- torchaudio/functional/__init__.py +130 -0
- torchaudio/functional/_alignment.py +128 -0
- torchaudio/functional/filtering.py +1685 -0
- torchaudio/functional/functional.py +2505 -0
- torchaudio/lib/__init__.py +0 -0
- torchaudio/lib/_torchaudio.so +0 -0
- torchaudio/lib/libtorchaudio.so +0 -0
- torchaudio/models/__init__.py +85 -0
- torchaudio/models/_hdemucs.py +1008 -0
- torchaudio/models/conformer.py +293 -0
- torchaudio/models/conv_tasnet.py +330 -0
- torchaudio/models/decoder/__init__.py +64 -0
- torchaudio/models/decoder/_ctc_decoder.py +568 -0
- torchaudio/models/decoder/_cuda_ctc_decoder.py +187 -0
- torchaudio/models/deepspeech.py +84 -0
- torchaudio/models/emformer.py +884 -0
- torchaudio/models/rnnt.py +816 -0
- torchaudio/models/rnnt_decoder.py +339 -0
- torchaudio/models/squim/__init__.py +11 -0
- torchaudio/models/squim/objective.py +326 -0
- torchaudio/models/squim/subjective.py +150 -0
- torchaudio/models/tacotron2.py +1046 -0
- torchaudio/models/wav2letter.py +72 -0
- torchaudio/models/wav2vec2/__init__.py +45 -0
- torchaudio/models/wav2vec2/components.py +1167 -0
- torchaudio/models/wav2vec2/model.py +1579 -0
- torchaudio/models/wav2vec2/utils/__init__.py +7 -0
- torchaudio/models/wav2vec2/utils/import_fairseq.py +213 -0
- torchaudio/models/wav2vec2/utils/import_huggingface.py +134 -0
- torchaudio/models/wav2vec2/wavlm_attention.py +214 -0
- torchaudio/models/wavernn.py +409 -0
- torchaudio/pipelines/__init__.py +102 -0
- torchaudio/pipelines/_source_separation_pipeline.py +109 -0
- torchaudio/pipelines/_squim_pipeline.py +156 -0
- torchaudio/pipelines/_tts/__init__.py +16 -0
- torchaudio/pipelines/_tts/impl.py +385 -0
- torchaudio/pipelines/_tts/interface.py +255 -0
- torchaudio/pipelines/_tts/utils.py +230 -0
- torchaudio/pipelines/_wav2vec2/__init__.py +0 -0
- torchaudio/pipelines/_wav2vec2/aligner.py +87 -0
- torchaudio/pipelines/_wav2vec2/impl.py +1699 -0
- torchaudio/pipelines/_wav2vec2/utils.py +346 -0
- torchaudio/pipelines/rnnt_pipeline.py +380 -0
- torchaudio/transforms/__init__.py +78 -0
- torchaudio/transforms/_multi_channel.py +467 -0
- torchaudio/transforms/_transforms.py +2138 -0
- torchaudio/utils/__init__.py +4 -0
- torchaudio/utils/download.py +89 -0
- torchaudio/version.py +2 -0
- torchaudio-2.9.0.dist-info/LICENSE +25 -0
- torchaudio-2.9.0.dist-info/METADATA +122 -0
- torchaudio-2.9.0.dist-info/RECORD +86 -0
- torchaudio-2.9.0.dist-info/WHEEL +5 -0
- torchaudio-2.9.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
import csv
|
|
2
|
+
import os
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Tuple, Union
|
|
5
|
+
|
|
6
|
+
from torch import Tensor
|
|
7
|
+
from torch.utils.data import Dataset
|
|
8
|
+
from torchaudio.datasets.utils import _load_waveform
|
|
9
|
+
|
|
10
|
+
SAMPLE_RATE = 16000
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class FluentSpeechCommands(Dataset):
|
|
14
|
+
"""*Fluent Speech Commands* :cite:`fluent` dataset
|
|
15
|
+
|
|
16
|
+
Args:
|
|
17
|
+
root (str of Path): Path to the directory where the dataset is found.
|
|
18
|
+
subset (str, optional): subset of the dataset to use.
|
|
19
|
+
Options: [``"train"``, ``"valid"``, ``"test"``].
|
|
20
|
+
(Default: ``"train"``)
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
def __init__(self, root: Union[str, Path], subset: str = "train"):
|
|
24
|
+
if subset not in ["train", "valid", "test"]:
|
|
25
|
+
raise ValueError("`subset` must be one of ['train', 'valid', 'test']")
|
|
26
|
+
|
|
27
|
+
root = os.fspath(root)
|
|
28
|
+
self._path = os.path.join(root, "fluent_speech_commands_dataset")
|
|
29
|
+
|
|
30
|
+
if not os.path.isdir(self._path):
|
|
31
|
+
raise RuntimeError("Dataset not found.")
|
|
32
|
+
|
|
33
|
+
subset_path = os.path.join(self._path, "data", f"{subset}_data.csv")
|
|
34
|
+
with open(subset_path) as subset_csv:
|
|
35
|
+
subset_reader = csv.reader(subset_csv)
|
|
36
|
+
data = list(subset_reader)
|
|
37
|
+
|
|
38
|
+
self.header = data[0]
|
|
39
|
+
self.data = data[1:]
|
|
40
|
+
|
|
41
|
+
def get_metadata(self, n: int) -> Tuple[str, int, str, int, str, str, str, str]:
|
|
42
|
+
"""Get metadata for the n-th sample from the dataset. Returns filepath instead of waveform,
|
|
43
|
+
but otherwise returns the same fields as :py:func:`__getitem__`.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
n (int): The index of the sample to be loaded
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
Tuple of the following items;
|
|
50
|
+
|
|
51
|
+
str:
|
|
52
|
+
Path to audio
|
|
53
|
+
int:
|
|
54
|
+
Sample rate
|
|
55
|
+
str:
|
|
56
|
+
File name
|
|
57
|
+
int:
|
|
58
|
+
Speaker ID
|
|
59
|
+
str:
|
|
60
|
+
Transcription
|
|
61
|
+
str:
|
|
62
|
+
Action
|
|
63
|
+
str:
|
|
64
|
+
Object
|
|
65
|
+
str:
|
|
66
|
+
Location
|
|
67
|
+
"""
|
|
68
|
+
sample = self.data[n]
|
|
69
|
+
|
|
70
|
+
file_name = sample[self.header.index("path")].split("/")[-1]
|
|
71
|
+
file_name = file_name.split(".")[0]
|
|
72
|
+
speaker_id, transcription, action, obj, location = sample[2:]
|
|
73
|
+
file_path = os.path.join("wavs", "speakers", speaker_id, f"{file_name}.wav")
|
|
74
|
+
|
|
75
|
+
return file_path, SAMPLE_RATE, file_name, speaker_id, transcription, action, obj, location
|
|
76
|
+
|
|
77
|
+
def __len__(self) -> int:
|
|
78
|
+
return len(self.data)
|
|
79
|
+
|
|
80
|
+
def __getitem__(self, n: int) -> Tuple[Tensor, int, str, int, str, str, str, str]:
|
|
81
|
+
"""Load the n-th sample from the dataset.
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
n (int): The index of the sample to be loaded
|
|
85
|
+
|
|
86
|
+
Returns:
|
|
87
|
+
Tuple of the following items;
|
|
88
|
+
|
|
89
|
+
Tensor:
|
|
90
|
+
Waveform
|
|
91
|
+
int:
|
|
92
|
+
Sample rate
|
|
93
|
+
str:
|
|
94
|
+
File name
|
|
95
|
+
int:
|
|
96
|
+
Speaker ID
|
|
97
|
+
str:
|
|
98
|
+
Transcription
|
|
99
|
+
str:
|
|
100
|
+
Action
|
|
101
|
+
str:
|
|
102
|
+
Object
|
|
103
|
+
str:
|
|
104
|
+
Location
|
|
105
|
+
"""
|
|
106
|
+
metadata = self.get_metadata(n)
|
|
107
|
+
waveform = _load_waveform(self._path, metadata[0], metadata[1])
|
|
108
|
+
return (waveform,) + metadata[1:]
|