sonusai 0.19.10__py3-none-any.whl → 0.20.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sonusai/data/genmixdb.yml +4 -2
- sonusai/doc/doc.py +14 -0
- sonusai/ir_metric.py +555 -0
- sonusai/metrics/calc_audio_stats.py +277 -1
- sonusai/metrics_summary.py +5 -3
- sonusai/mixture/__init__.py +4 -1
- sonusai/mixture/audio.py +103 -12
- sonusai/mixture/augmentation.py +199 -84
- sonusai/mixture/config.py +9 -4
- sonusai/mixture/constants.py +0 -1
- sonusai/mixture/datatypes.py +19 -10
- sonusai/mixture/generation.py +11 -12
- sonusai/mixture/helpers.py +20 -23
- sonusai/mixture/ir_delay.py +63 -0
- sonusai/mixture/mixdb.py +103 -19
- sonusai/mixture/targets.py +3 -6
- sonusai/utils/__init__.py +2 -0
- sonusai/utils/temp_seed.py +13 -0
- {sonusai-0.19.10.dist-info → sonusai-0.20.3.dist-info}/METADATA +2 -2
- {sonusai-0.19.10.dist-info → sonusai-0.20.3.dist-info}/RECORD +22 -24
- {sonusai-0.19.10.dist-info → sonusai-0.20.3.dist-info}/WHEEL +1 -1
- sonusai/mixture/soundfile_audio.py +0 -130
- sonusai/mixture/sox_audio.py +0 -476
- sonusai/mixture/sox_augmentation.py +0 -136
- sonusai/mixture/torchaudio_audio.py +0 -106
- sonusai/mixture/torchaudio_augmentation.py +0 -109
- {sonusai-0.19.10.dist-info → sonusai-0.20.3.dist-info}/entry_points.txt +0 -0
@@ -1,3 +1,8 @@
|
|
1
|
+
from pathlib import Path
|
2
|
+
|
3
|
+
import numpy as np
|
4
|
+
from sox import Transformer as SoxTransformer
|
5
|
+
|
1
6
|
from sonusai.mixture.datatypes import AudioStatsMetrics
|
2
7
|
from sonusai.mixture.datatypes import AudioT
|
3
8
|
|
@@ -12,7 +17,6 @@ def _convert_str_with_factors_to_int(x: str) -> int:
|
|
12
17
|
|
13
18
|
def calc_audio_stats(audio: AudioT, win_len: float | None = None) -> AudioStatsMetrics:
|
14
19
|
from sonusai.mixture import SAMPLE_RATE
|
15
|
-
from sonusai.mixture import Transformer
|
16
20
|
|
17
21
|
args = ["stats"]
|
18
22
|
if win_len is not None:
|
@@ -53,3 +57,275 @@ def calc_audio_stats(audio: AudioT, win_len: float | None = None) -> AudioStatsM
|
|
53
57
|
fl=float(stats["Flat factor"]),
|
54
58
|
pkc=_convert_str_with_factors_to_int(stats["Pk count"]),
|
55
59
|
)
|
60
|
+
|
61
|
+
|
62
|
+
class Transformer(SoxTransformer):
|
63
|
+
"""Override certain sox.Transformer methods"""
|
64
|
+
|
65
|
+
def build( # pyright: ignore [reportIncompatibleMethodOverride]
|
66
|
+
self,
|
67
|
+
input_filepath: str | Path | None = None,
|
68
|
+
output_filepath: str | Path | None = None,
|
69
|
+
input_array: np.ndarray | None = None,
|
70
|
+
sample_rate_in: float | None = None,
|
71
|
+
extra_args: list[str] | None = None,
|
72
|
+
return_output: bool = False,
|
73
|
+
) -> tuple[bool, str | None, str | None]:
|
74
|
+
"""Given an input file or array, creates an output_file on disk by
|
75
|
+
executing the current set of commands. This function returns True on
|
76
|
+
success. If return_output is True, this function returns a triple of
|
77
|
+
(status, out, err), giving the success state, along with stdout and
|
78
|
+
stderr returned by sox.
|
79
|
+
|
80
|
+
Parameters
|
81
|
+
----------
|
82
|
+
input_filepath : str or None
|
83
|
+
Either path to input audio file or None for array input.
|
84
|
+
output_filepath : str
|
85
|
+
Path to desired output file. If a file already exists at
|
86
|
+
the given path, the file will be overwritten.
|
87
|
+
If '-n', no file is created.
|
88
|
+
input_array : np.ndarray or None
|
89
|
+
An np.ndarray of an waveform with shape (n_samples, n_channels).
|
90
|
+
sample_rate_in must also be provided.
|
91
|
+
If None, input_filepath must be specified.
|
92
|
+
sample_rate_in : int
|
93
|
+
Sample rate of input_array.
|
94
|
+
This argument is ignored if input_array is None.
|
95
|
+
extra_args : list or None, default=None
|
96
|
+
If a list is given, these additional arguments are passed to SoX
|
97
|
+
at the end of the list of effects.
|
98
|
+
Don't use this argument unless you know exactly what you're doing!
|
99
|
+
return_output : bool, default=False
|
100
|
+
If True, returns the status and information sent to stderr and
|
101
|
+
stdout as a tuple (status, stdout, stderr).
|
102
|
+
If output_filepath is None, return_output=True by default.
|
103
|
+
If False, returns True on success.
|
104
|
+
|
105
|
+
Returns
|
106
|
+
-------
|
107
|
+
status : bool
|
108
|
+
True on success.
|
109
|
+
out : str (optional)
|
110
|
+
This is not returned unless return_output is True.
|
111
|
+
When returned, captures the stdout produced by sox.
|
112
|
+
err : str (optional)
|
113
|
+
This is not returned unless return_output is True.
|
114
|
+
When returned, captures the stderr produced by sox.
|
115
|
+
|
116
|
+
Examples
|
117
|
+
--------
|
118
|
+
> import numpy as np
|
119
|
+
> import sox
|
120
|
+
> tfm = sox.Transformer()
|
121
|
+
> sample_rate = 44100
|
122
|
+
> y = np.sin(2 * np.pi * 440.0 * np.arange(sample_rate * 1.0) / sample_rate)
|
123
|
+
|
124
|
+
file in, file out - basic usage
|
125
|
+
|
126
|
+
> status = tfm.build('path/to/input.wav', 'path/to/output.mp3')
|
127
|
+
|
128
|
+
file in, file out - equivalent usage
|
129
|
+
|
130
|
+
> status = tfm.build(
|
131
|
+
input_filepath='path/to/input.wav',
|
132
|
+
output_filepath='path/to/output.mp3'
|
133
|
+
)
|
134
|
+
|
135
|
+
array in, file out
|
136
|
+
|
137
|
+
> status = tfm.build(
|
138
|
+
input_array=y, sample_rate_in=sample_rate,
|
139
|
+
output_filepath='path/to/output.mp3'
|
140
|
+
)
|
141
|
+
|
142
|
+
"""
|
143
|
+
from sox import file_info
|
144
|
+
from sox.core import SoxError
|
145
|
+
from sox.core import sox
|
146
|
+
from sox.log import logger
|
147
|
+
|
148
|
+
input_format, input_filepath = self._parse_inputs(input_filepath, input_array, sample_rate_in)
|
149
|
+
|
150
|
+
if output_filepath is None:
|
151
|
+
raise ValueError("output_filepath is not specified!")
|
152
|
+
|
153
|
+
# set output parameters
|
154
|
+
if input_filepath == output_filepath:
|
155
|
+
raise ValueError("input_filepath must be different from output_filepath.")
|
156
|
+
file_info.validate_output_file(output_filepath)
|
157
|
+
|
158
|
+
args = []
|
159
|
+
args.extend(self.globals)
|
160
|
+
args.extend(self._input_format_args(input_format))
|
161
|
+
args.append(input_filepath)
|
162
|
+
args.extend(self._output_format_args(self.output_format))
|
163
|
+
args.append(output_filepath)
|
164
|
+
args.extend(self.effects)
|
165
|
+
|
166
|
+
if extra_args is not None:
|
167
|
+
if not isinstance(extra_args, list):
|
168
|
+
raise ValueError("extra_args must be a list.")
|
169
|
+
args.extend(extra_args)
|
170
|
+
|
171
|
+
status, out, err = sox(args, input_array, True)
|
172
|
+
if status != 0:
|
173
|
+
raise SoxError(f"Stdout: {out}\nStderr: {err}")
|
174
|
+
|
175
|
+
logger.info("Created %s with effects: %s", output_filepath, " ".join(self.effects_log))
|
176
|
+
|
177
|
+
if return_output:
|
178
|
+
return status, out, err # pyright: ignore [reportReturnType]
|
179
|
+
|
180
|
+
return True, None, None
|
181
|
+
|
182
|
+
def build_array( # pyright: ignore [reportIncompatibleMethodOverride]
|
183
|
+
self,
|
184
|
+
input_filepath: str | Path | None = None,
|
185
|
+
input_array: np.ndarray | None = None,
|
186
|
+
sample_rate_in: int | None = None,
|
187
|
+
extra_args: list[str] | None = None,
|
188
|
+
) -> np.ndarray:
|
189
|
+
"""Given an input file or array, returns the output as a numpy array
|
190
|
+
by executing the current set of commands. By default, the array will
|
191
|
+
have the same sample rate as the input file unless otherwise specified
|
192
|
+
using set_output_format. Functions such as channels and convert
|
193
|
+
will be ignored!
|
194
|
+
|
195
|
+
The SonusAI override does not generate a warning for rate transforms.
|
196
|
+
|
197
|
+
Parameters
|
198
|
+
----------
|
199
|
+
input_filepath : str, Path or None
|
200
|
+
Either path to input audio file or None.
|
201
|
+
input_array : np.ndarray or None
|
202
|
+
A np.ndarray of a waveform with shape (n_samples, n_channels).
|
203
|
+
If this argument is passed, sample_rate_in must also be provided.
|
204
|
+
If None, input_filepath must be specified.
|
205
|
+
sample_rate_in : int
|
206
|
+
Sample rate of input_array.
|
207
|
+
This argument is ignored if input_array is None.
|
208
|
+
extra_args : list or None, default=None
|
209
|
+
If a list is given, these additional arguments are passed to SoX
|
210
|
+
at the end of the list of effects.
|
211
|
+
Don't use this argument unless you know exactly what you're doing!
|
212
|
+
|
213
|
+
Returns
|
214
|
+
-------
|
215
|
+
output_array : np.ndarray
|
216
|
+
Output audio as a numpy array
|
217
|
+
|
218
|
+
Examples
|
219
|
+
--------
|
220
|
+
|
221
|
+
> import numpy as np
|
222
|
+
> import sox
|
223
|
+
> tfm = sox.Transformer()
|
224
|
+
> sample_rate = 44100
|
225
|
+
> y = np.sin(2 * np.pi * 440.0 * np.arange(sample_rate * 1.0) / sample_rate)
|
226
|
+
|
227
|
+
file in, array out
|
228
|
+
|
229
|
+
> output_array = tfm.build(input_filepath='path/to/input.wav')
|
230
|
+
|
231
|
+
array in, array out
|
232
|
+
|
233
|
+
> output_array = tfm.build(input_array=y, sample_rate_in=sample_rate)
|
234
|
+
|
235
|
+
specifying the output sample rate
|
236
|
+
|
237
|
+
> tfm.set_output_format(rate=8000)
|
238
|
+
> output_array = tfm.build(input_array=y, sample_rate_in=sample_rate)
|
239
|
+
|
240
|
+
if an effect changes the number of channels, you must explicitly
|
241
|
+
specify the number of output channels
|
242
|
+
|
243
|
+
> tfm.remix(remix_dictionary={1: [1], 2: [1], 3: [1]})
|
244
|
+
> tfm.set_output_format(channels=3)
|
245
|
+
> output_array = tfm.build(input_array=y, sample_rate_in=sample_rate)
|
246
|
+
|
247
|
+
|
248
|
+
"""
|
249
|
+
from sox.core import SoxError
|
250
|
+
from sox.core import sox
|
251
|
+
from sox.log import logger
|
252
|
+
from sox.transform import ENCODINGS_MAPPING
|
253
|
+
|
254
|
+
input_format, input_filepath = self._parse_inputs(input_filepath, input_array, sample_rate_in)
|
255
|
+
|
256
|
+
# check if any of the below commands are part of the effects chain
|
257
|
+
ignored_commands = ["channels", "convert"]
|
258
|
+
if set(ignored_commands) & set(self.effects_log):
|
259
|
+
logger.warning(
|
260
|
+
"When outputting to an array, channels and convert "
|
261
|
+
+ "effects may be ignored. Use set_output_format() to "
|
262
|
+
+ "specify output formats."
|
263
|
+
)
|
264
|
+
|
265
|
+
output_filepath = "-"
|
266
|
+
|
267
|
+
if input_format.get("file_type") is None:
|
268
|
+
encoding_out = np.int16
|
269
|
+
else:
|
270
|
+
encoding_out = next(k for k, v in ENCODINGS_MAPPING.items() if input_format["file_type"] == v)
|
271
|
+
|
272
|
+
n_bits = np.dtype(encoding_out).itemsize * 8
|
273
|
+
|
274
|
+
output_format = {
|
275
|
+
"file_type": "raw",
|
276
|
+
"rate": sample_rate_in,
|
277
|
+
"bits": n_bits,
|
278
|
+
"channels": input_format["channels"],
|
279
|
+
"encoding": None,
|
280
|
+
"comments": None,
|
281
|
+
"append_comments": True,
|
282
|
+
}
|
283
|
+
|
284
|
+
if self.output_format.get("rate") is not None:
|
285
|
+
output_format["rate"] = self.output_format["rate"]
|
286
|
+
|
287
|
+
if self.output_format.get("channels") is not None:
|
288
|
+
output_format["channels"] = self.output_format["channels"]
|
289
|
+
|
290
|
+
if self.output_format.get("bits") is not None:
|
291
|
+
n_bits = self.output_format["bits"]
|
292
|
+
output_format["bits"] = n_bits
|
293
|
+
|
294
|
+
match n_bits:
|
295
|
+
case 8:
|
296
|
+
encoding_out = np.int8 # type: ignore[assignment]
|
297
|
+
case 16:
|
298
|
+
encoding_out = np.int16
|
299
|
+
case 32:
|
300
|
+
encoding_out = np.float32 # type: ignore[assignment]
|
301
|
+
case 64:
|
302
|
+
encoding_out = np.float64 # type: ignore[assignment]
|
303
|
+
case _:
|
304
|
+
raise ValueError(f"invalid n_bits {n_bits}")
|
305
|
+
|
306
|
+
args = []
|
307
|
+
args.extend(self.globals)
|
308
|
+
args.extend(self._input_format_args(input_format))
|
309
|
+
args.append(input_filepath)
|
310
|
+
args.extend(self._output_format_args(output_format))
|
311
|
+
args.append(output_filepath)
|
312
|
+
args.extend(self.effects)
|
313
|
+
|
314
|
+
if extra_args is not None:
|
315
|
+
if not isinstance(extra_args, list):
|
316
|
+
raise ValueError("extra_args must be a list.")
|
317
|
+
args.extend(extra_args)
|
318
|
+
|
319
|
+
status, out, err = sox(args, input_array, False)
|
320
|
+
if status != 0:
|
321
|
+
raise SoxError(f"Stdout: {out}\nStderr: {err}")
|
322
|
+
|
323
|
+
out = np.frombuffer(out, dtype=encoding_out) # pyright: ignore [reportArgumentType, reportCallIssue]
|
324
|
+
if output_format["channels"] > 1:
|
325
|
+
out = out.reshape(
|
326
|
+
(output_format["channels"], int(len(out) / output_format["channels"])),
|
327
|
+
order="F",
|
328
|
+
).T
|
329
|
+
logger.info("Created array with effects: %s", " ".join(self.effects_log))
|
330
|
+
|
331
|
+
return out
|
sonusai/metrics_summary.py
CHANGED
@@ -137,7 +137,9 @@ def main() -> None:
|
|
137
137
|
print(f"Could not open SonusAI mixture database in {location}, exiting ...")
|
138
138
|
return
|
139
139
|
|
140
|
-
|
140
|
+
# Only check first and last mixture in order to save time
|
141
|
+
metrics_present = mixdb.cached_metrics([0, mixdb.num_mixtures - 1])
|
142
|
+
|
141
143
|
num_metrics_present = len(metrics_present)
|
142
144
|
if num_metrics_present < 1:
|
143
145
|
print(f"mixdb reports no pre-generated metrics are present. Nothing to summarize in {location}, exiting ...")
|
@@ -150,7 +152,7 @@ def main() -> None:
|
|
150
152
|
create_file_handler(join(location, "metrics_summary.log"))
|
151
153
|
update_console_handler(verbose)
|
152
154
|
initial_log_messages("metrics_summary")
|
153
|
-
logger.info(f"Logging summary of SonusAI mixture
|
155
|
+
logger.info(f"Logging summary of SonusAI mixture database at {location}")
|
154
156
|
else:
|
155
157
|
update_console_handler(verbose)
|
156
158
|
|
@@ -164,7 +166,7 @@ def main() -> None:
|
|
164
166
|
fsuffix = f"_s{len(mixids)}t{mixdb.num_mixtures}"
|
165
167
|
else:
|
166
168
|
logger.info(
|
167
|
-
f"Summarizing SonusAI mixture
|
169
|
+
f"Summarizing SonusAI mixture database with {mixdb.num_mixtures} mixtures "
|
168
170
|
f"and {num_metrics_present} pre-generated metrics ..."
|
169
171
|
)
|
170
172
|
fsuffix = ""
|
sonusai/mixture/__init__.py
CHANGED
@@ -5,6 +5,7 @@ from .audio import get_duration
|
|
5
5
|
from .audio import get_next_noise
|
6
6
|
from .audio import get_num_samples
|
7
7
|
from .audio import get_sample_rate
|
8
|
+
from .audio import raw_read_audio
|
8
9
|
from .audio import read_audio
|
9
10
|
from .audio import read_ir
|
10
11
|
from .audio import validate_input_file
|
@@ -53,7 +54,9 @@ from .datatypes import AudioF
|
|
53
54
|
from .datatypes import AudioStatsMetrics
|
54
55
|
from .datatypes import AudioT
|
55
56
|
from .datatypes import Augmentation
|
57
|
+
from .datatypes import AugmentationEffects
|
56
58
|
from .datatypes import AugmentationRule
|
59
|
+
from .datatypes import AugmentationRuleEffects
|
57
60
|
from .datatypes import AugmentedTarget
|
58
61
|
from .datatypes import ClassCount
|
59
62
|
from .datatypes import EnergyF
|
@@ -111,10 +114,10 @@ from .helpers import get_transform_from_audio
|
|
111
114
|
from .helpers import inverse_transform
|
112
115
|
from .helpers import mixture_metadata
|
113
116
|
from .helpers import write_mixture_metadata
|
117
|
+
from .ir_delay import get_impulse_response_delay
|
114
118
|
from .log_duration_and_sizes import log_duration_and_sizes
|
115
119
|
from .mixdb import MixtureDatabase
|
116
120
|
from .mixdb import db_file
|
117
|
-
from .sox_audio import Transformer
|
118
121
|
from .spectral_mask import apply_spectral_mask
|
119
122
|
from .target_class_balancing import balance_targets
|
120
123
|
from .targets import get_augmented_target_ids_by_class
|
sonusai/mixture/audio.py
CHANGED
@@ -58,9 +58,62 @@ def get_sample_rate(name: str | Path, use_cache: bool = True) -> int:
|
|
58
58
|
|
59
59
|
@lru_cache
|
60
60
|
def _get_sample_rate(name: str | Path) -> int:
|
61
|
-
from
|
61
|
+
"""Get sample rate from audio file using soundfile
|
62
62
|
|
63
|
-
|
63
|
+
:param name: File name
|
64
|
+
:return: Sample rate
|
65
|
+
"""
|
66
|
+
import soundfile
|
67
|
+
from pydub import AudioSegment
|
68
|
+
|
69
|
+
from .tokenized_shell_vars import tokenized_expand
|
70
|
+
|
71
|
+
expanded_name, _ = tokenized_expand(name)
|
72
|
+
|
73
|
+
try:
|
74
|
+
if expanded_name.endswith(".mp3"):
|
75
|
+
return AudioSegment.from_mp3(expanded_name).frame_rate
|
76
|
+
|
77
|
+
if expanded_name.endswith(".m4a"):
|
78
|
+
return AudioSegment.from_file(expanded_name).frame_rate
|
79
|
+
|
80
|
+
return soundfile.info(expanded_name).samplerate
|
81
|
+
except Exception as e:
|
82
|
+
if name != expanded_name:
|
83
|
+
raise OSError(f"Error reading {name} (expanded: {expanded_name}): {e}") from e
|
84
|
+
else:
|
85
|
+
raise OSError(f"Error reading {name}: {e}") from e
|
86
|
+
|
87
|
+
|
88
|
+
def raw_read_audio(name: str | Path) -> tuple[AudioT, int]:
|
89
|
+
import numpy as np
|
90
|
+
import soundfile
|
91
|
+
from pydub import AudioSegment
|
92
|
+
|
93
|
+
from .tokenized_shell_vars import tokenized_expand
|
94
|
+
|
95
|
+
expanded_name, _ = tokenized_expand(name)
|
96
|
+
|
97
|
+
try:
|
98
|
+
if expanded_name.endswith(".mp3"):
|
99
|
+
sound = AudioSegment.from_mp3(expanded_name)
|
100
|
+
raw = np.array(sound.get_array_of_samples()).astype(np.float32).reshape((-1, sound.channels))
|
101
|
+
raw = raw / 2 ** (sound.sample_width * 8 - 1)
|
102
|
+
sample_rate = sound.frame_rate
|
103
|
+
elif expanded_name.endswith(".m4a"):
|
104
|
+
sound = AudioSegment.from_file(expanded_name)
|
105
|
+
raw = np.array(sound.get_array_of_samples()).astype(np.float32).reshape((-1, sound.channels))
|
106
|
+
raw = raw / 2 ** (sound.sample_width * 8 - 1)
|
107
|
+
sample_rate = sound.frame_rate
|
108
|
+
else:
|
109
|
+
raw, sample_rate = soundfile.read(expanded_name, always_2d=True, dtype="float32")
|
110
|
+
except Exception as e:
|
111
|
+
if name != expanded_name:
|
112
|
+
raise OSError(f"Error reading {name} (expanded: {expanded_name}): {e}") from e
|
113
|
+
else:
|
114
|
+
raise OSError(f"Error reading {name}: {e}") from e
|
115
|
+
|
116
|
+
return np.squeeze(raw[:, 0].astype(np.float32)), sample_rate
|
64
117
|
|
65
118
|
|
66
119
|
def read_audio(name: str | Path, use_cache: bool = True) -> AudioT:
|
@@ -77,28 +130,45 @@ def read_audio(name: str | Path, use_cache: bool = True) -> AudioT:
|
|
77
130
|
|
78
131
|
@lru_cache
|
79
132
|
def _read_audio(name: str | Path) -> AudioT:
|
80
|
-
from
|
133
|
+
"""Read audio data from a file using soundfile
|
134
|
+
|
135
|
+
:param name: File name
|
136
|
+
:return: Array of time domain audio data
|
137
|
+
"""
|
138
|
+
import librosa
|
139
|
+
|
140
|
+
from .constants import SAMPLE_RATE
|
141
|
+
|
142
|
+
out, sample_rate = raw_read_audio(name)
|
143
|
+
out = librosa.resample(out, orig_sr=sample_rate, target_sr=SAMPLE_RATE, res_type="soxr_hq")
|
81
144
|
|
82
|
-
return
|
145
|
+
return out
|
83
146
|
|
84
147
|
|
85
|
-
def read_ir(name: str | Path, use_cache: bool = True) -> ImpulseResponseData:
|
148
|
+
def read_ir(name: str | Path, delay: int, use_cache: bool = True) -> ImpulseResponseData:
|
86
149
|
"""Read impulse response data
|
87
150
|
|
88
151
|
:param name: File name
|
152
|
+
:param delay: Delay in samples
|
89
153
|
:param use_cache: If true, use LRU caching
|
90
154
|
:return: ImpulseResponseData object
|
91
155
|
"""
|
92
156
|
if use_cache:
|
93
|
-
return _read_ir(name)
|
94
|
-
return _read_ir.__wrapped__(name)
|
157
|
+
return _read_ir(name, delay)
|
158
|
+
return _read_ir.__wrapped__(name, delay)
|
95
159
|
|
96
160
|
|
97
161
|
@lru_cache
|
98
|
-
def _read_ir(name: str | Path) -> ImpulseResponseData:
|
99
|
-
|
162
|
+
def _read_ir(name: str | Path, delay: int) -> ImpulseResponseData:
|
163
|
+
"""Read impulse response data using soundfile
|
100
164
|
|
101
|
-
|
165
|
+
:param name: File name
|
166
|
+
:param delay: Delay in samples
|
167
|
+
:return: ImpulseResponseData object
|
168
|
+
"""
|
169
|
+
out, sample_rate = raw_read_audio(name)
|
170
|
+
|
171
|
+
return ImpulseResponseData(data=out, sample_rate=sample_rate, delay=delay)
|
102
172
|
|
103
173
|
|
104
174
|
def get_num_samples(name: str | Path, use_cache: bool = True) -> int:
|
@@ -120,6 +190,27 @@ def _get_num_samples(name: str | Path) -> int:
|
|
120
190
|
:param name: File name
|
121
191
|
:return: number of samples in resampled audio
|
122
192
|
"""
|
123
|
-
|
193
|
+
import math
|
194
|
+
|
195
|
+
import soundfile
|
196
|
+
from pydub import AudioSegment
|
124
197
|
|
125
|
-
|
198
|
+
from .constants import SAMPLE_RATE
|
199
|
+
from .tokenized_shell_vars import tokenized_expand
|
200
|
+
|
201
|
+
expanded_name, _ = tokenized_expand(name)
|
202
|
+
|
203
|
+
if expanded_name.endswith(".mp3"):
|
204
|
+
sound = AudioSegment.from_mp3(expanded_name)
|
205
|
+
samples = sound.frame_count()
|
206
|
+
sample_rate = sound.frame_rate
|
207
|
+
elif expanded_name.endswith(".m4a"):
|
208
|
+
sound = AudioSegment.from_file(expanded_name)
|
209
|
+
samples = sound.frame_count()
|
210
|
+
sample_rate = sound.frame_rate
|
211
|
+
else:
|
212
|
+
info = soundfile.info(name)
|
213
|
+
samples = info.frames
|
214
|
+
sample_rate = info.samplerate
|
215
|
+
|
216
|
+
return math.ceil(SAMPLE_RATE * samples / sample_rate)
|