sonusai 1.0.16__cp311-abi3-macosx_10_12_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sonusai/__init__.py +170 -0
- sonusai/aawscd_probwrite.py +148 -0
- sonusai/audiofe.py +481 -0
- sonusai/calc_metric_spenh.py +1136 -0
- sonusai/config/__init__.py +0 -0
- sonusai/config/asr.py +21 -0
- sonusai/config/config.py +65 -0
- sonusai/config/config.yml +49 -0
- sonusai/config/constants.py +53 -0
- sonusai/config/ir.py +124 -0
- sonusai/config/ir_delay.py +62 -0
- sonusai/config/source.py +275 -0
- sonusai/config/spectral_masks.py +15 -0
- sonusai/config/truth.py +64 -0
- sonusai/constants.py +14 -0
- sonusai/data/__init__.py +0 -0
- sonusai/data/silero_vad_v5.1.jit +0 -0
- sonusai/data/silero_vad_v5.1.onnx +0 -0
- sonusai/data/speech_ma01_01.wav +0 -0
- sonusai/data/whitenoise.wav +0 -0
- sonusai/datatypes.py +383 -0
- sonusai/deprecated/gentcst.py +632 -0
- sonusai/deprecated/plot.py +519 -0
- sonusai/deprecated/tplot.py +365 -0
- sonusai/doc.py +52 -0
- sonusai/doc_strings/__init__.py +1 -0
- sonusai/doc_strings/doc_strings.py +531 -0
- sonusai/genft.py +196 -0
- sonusai/genmetrics.py +183 -0
- sonusai/genmix.py +199 -0
- sonusai/genmixdb.py +235 -0
- sonusai/ir_metric.py +551 -0
- sonusai/lsdb.py +141 -0
- sonusai/main.py +134 -0
- sonusai/metrics/__init__.py +43 -0
- sonusai/metrics/calc_audio_stats.py +42 -0
- sonusai/metrics/calc_class_weights.py +90 -0
- sonusai/metrics/calc_optimal_thresholds.py +73 -0
- sonusai/metrics/calc_pcm.py +45 -0
- sonusai/metrics/calc_pesq.py +36 -0
- sonusai/metrics/calc_phase_distance.py +43 -0
- sonusai/metrics/calc_sa_sdr.py +64 -0
- sonusai/metrics/calc_sample_weights.py +25 -0
- sonusai/metrics/calc_segsnr_f.py +82 -0
- sonusai/metrics/calc_speech.py +382 -0
- sonusai/metrics/calc_wer.py +71 -0
- sonusai/metrics/calc_wsdr.py +57 -0
- sonusai/metrics/calculate_metrics.py +395 -0
- sonusai/metrics/class_summary.py +74 -0
- sonusai/metrics/confusion_matrix_summary.py +75 -0
- sonusai/metrics/one_hot.py +283 -0
- sonusai/metrics/snr_summary.py +128 -0
- sonusai/metrics_summary.py +314 -0
- sonusai/mixture/__init__.py +15 -0
- sonusai/mixture/audio.py +187 -0
- sonusai/mixture/class_balancing.py +103 -0
- sonusai/mixture/constants.py +3 -0
- sonusai/mixture/data_io.py +173 -0
- sonusai/mixture/db.py +169 -0
- sonusai/mixture/db_datatypes.py +92 -0
- sonusai/mixture/effects.py +344 -0
- sonusai/mixture/feature.py +78 -0
- sonusai/mixture/generation.py +1116 -0
- sonusai/mixture/helpers.py +351 -0
- sonusai/mixture/ir_effects.py +77 -0
- sonusai/mixture/log_duration_and_sizes.py +23 -0
- sonusai/mixture/mixdb.py +1857 -0
- sonusai/mixture/pad_audio.py +35 -0
- sonusai/mixture/resample.py +7 -0
- sonusai/mixture/sox_effects.py +195 -0
- sonusai/mixture/sox_help.py +650 -0
- sonusai/mixture/spectral_mask.py +51 -0
- sonusai/mixture/truth.py +61 -0
- sonusai/mixture/truth_functions/__init__.py +45 -0
- sonusai/mixture/truth_functions/crm.py +105 -0
- sonusai/mixture/truth_functions/energy.py +222 -0
- sonusai/mixture/truth_functions/file.py +48 -0
- sonusai/mixture/truth_functions/metadata.py +24 -0
- sonusai/mixture/truth_functions/metrics.py +28 -0
- sonusai/mixture/truth_functions/phoneme.py +18 -0
- sonusai/mixture/truth_functions/sed.py +98 -0
- sonusai/mixture/truth_functions/target.py +142 -0
- sonusai/mkwav.py +135 -0
- sonusai/onnx_predict.py +363 -0
- sonusai/parse/__init__.py +0 -0
- sonusai/parse/expand.py +156 -0
- sonusai/parse/parse_source_directive.py +129 -0
- sonusai/parse/rand.py +214 -0
- sonusai/py.typed +0 -0
- sonusai/queries/__init__.py +0 -0
- sonusai/queries/queries.py +239 -0
- sonusai/rs.abi3.so +0 -0
- sonusai/rs.pyi +1 -0
- sonusai/rust/__init__.py +0 -0
- sonusai/speech/__init__.py +0 -0
- sonusai/speech/l2arctic.py +121 -0
- sonusai/speech/librispeech.py +102 -0
- sonusai/speech/mcgill.py +71 -0
- sonusai/speech/textgrid.py +89 -0
- sonusai/speech/timit.py +138 -0
- sonusai/speech/types.py +12 -0
- sonusai/speech/vctk.py +53 -0
- sonusai/speech/voxceleb.py +108 -0
- sonusai/utils/__init__.py +3 -0
- sonusai/utils/asl_p56.py +130 -0
- sonusai/utils/asr.py +91 -0
- sonusai/utils/asr_functions/__init__.py +3 -0
- sonusai/utils/asr_functions/aaware_whisper.py +69 -0
- sonusai/utils/audio_devices.py +50 -0
- sonusai/utils/braced_glob.py +50 -0
- sonusai/utils/calculate_input_shape.py +26 -0
- sonusai/utils/choice.py +51 -0
- sonusai/utils/compress.py +25 -0
- sonusai/utils/convert_string_to_number.py +6 -0
- sonusai/utils/create_timestamp.py +5 -0
- sonusai/utils/create_ts_name.py +14 -0
- sonusai/utils/dataclass_from_dict.py +27 -0
- sonusai/utils/db.py +16 -0
- sonusai/utils/docstring.py +53 -0
- sonusai/utils/energy_f.py +44 -0
- sonusai/utils/engineering_number.py +166 -0
- sonusai/utils/evaluate_random_rule.py +15 -0
- sonusai/utils/get_frames_per_batch.py +2 -0
- sonusai/utils/get_label_names.py +20 -0
- sonusai/utils/grouper.py +6 -0
- sonusai/utils/human_readable_size.py +7 -0
- sonusai/utils/keyboard_interrupt.py +12 -0
- sonusai/utils/load_object.py +21 -0
- sonusai/utils/max_text_width.py +9 -0
- sonusai/utils/model_utils.py +28 -0
- sonusai/utils/numeric_conversion.py +11 -0
- sonusai/utils/onnx_utils.py +155 -0
- sonusai/utils/parallel.py +162 -0
- sonusai/utils/path_info.py +7 -0
- sonusai/utils/print_mixture_details.py +60 -0
- sonusai/utils/rand.py +13 -0
- sonusai/utils/ranges.py +43 -0
- sonusai/utils/read_predict_data.py +32 -0
- sonusai/utils/reshape.py +154 -0
- sonusai/utils/seconds_to_hms.py +7 -0
- sonusai/utils/stacked_complex.py +82 -0
- sonusai/utils/stratified_shuffle_split.py +170 -0
- sonusai/utils/tokenized_shell_vars.py +143 -0
- sonusai/utils/write_audio.py +26 -0
- sonusai/utils/yes_or_no.py +8 -0
- sonusai/vars.py +47 -0
- sonusai-1.0.16.dist-info/METADATA +56 -0
- sonusai-1.0.16.dist-info/RECORD +150 -0
- sonusai-1.0.16.dist-info/WHEEL +4 -0
- sonusai-1.0.16.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,351 @@
|
|
1
|
+
from pyaaware import ForwardTransform
|
2
|
+
from pyaaware import InverseTransform
|
3
|
+
|
4
|
+
from ..datatypes import AudioF
|
5
|
+
from ..datatypes import AudioT
|
6
|
+
from ..datatypes import EnergyT
|
7
|
+
from ..datatypes import FeatureGeneratorConfig
|
8
|
+
from ..datatypes import FeatureGeneratorInfo
|
9
|
+
from ..datatypes import GeneralizedIDs
|
10
|
+
from ..datatypes import Mixture
|
11
|
+
from ..datatypes import Source
|
12
|
+
from ..datatypes import Sources
|
13
|
+
from ..datatypes import SpeechMetadata
|
14
|
+
from ..datatypes import TransformConfig
|
15
|
+
from .db_datatypes import MixtureRecord
|
16
|
+
from .db_datatypes import SourceRecord
|
17
|
+
from .mixdb import MixtureDatabase
|
18
|
+
|
19
|
+
|
20
|
+
def generic_ids_to_list(num_ids: int, ids: GeneralizedIDs = "*") -> list[int]:
|
21
|
+
"""Resolve generalized IDs to a list of integers
|
22
|
+
|
23
|
+
:param num_ids: Total number of indices
|
24
|
+
:param ids: Generalized IDs
|
25
|
+
:return: List of ID integers
|
26
|
+
"""
|
27
|
+
all_ids = list(range(num_ids))
|
28
|
+
|
29
|
+
if isinstance(ids, str):
|
30
|
+
if ids == "*":
|
31
|
+
return all_ids
|
32
|
+
|
33
|
+
try:
|
34
|
+
result = eval(f"{all_ids}[{ids}]") # noqa: S307
|
35
|
+
if isinstance(result, list):
|
36
|
+
return result
|
37
|
+
else:
|
38
|
+
return [result]
|
39
|
+
except NameError as e:
|
40
|
+
raise ValueError(f"Empty ids {ids}: {e}") from e
|
41
|
+
|
42
|
+
if isinstance(ids, range):
|
43
|
+
result = list(ids)
|
44
|
+
elif isinstance(ids, int):
|
45
|
+
result = [ids]
|
46
|
+
else:
|
47
|
+
result = ids
|
48
|
+
|
49
|
+
if not all(isinstance(x, int) and 0 <= x < num_ids for x in result):
|
50
|
+
raise ValueError(f"Invalid entries in ids of {ids}")
|
51
|
+
|
52
|
+
if not result:
|
53
|
+
raise ValueError(f"Empty ids {ids}")
|
54
|
+
|
55
|
+
return result
|
56
|
+
|
57
|
+
|
58
|
+
def get_feature_generator_info(fg_config: FeatureGeneratorConfig) -> FeatureGeneratorInfo:
|
59
|
+
from pyaaware import FeatureGenerator
|
60
|
+
|
61
|
+
from ..datatypes import TransformConfig
|
62
|
+
|
63
|
+
fg = FeatureGenerator(feature_mode=fg_config.feature_mode)
|
64
|
+
|
65
|
+
return FeatureGeneratorInfo(
|
66
|
+
decimation=fg.decimation,
|
67
|
+
stride=fg.stride,
|
68
|
+
step=fg.step,
|
69
|
+
feature_parameters=fg.feature_parameters,
|
70
|
+
ft_config=TransformConfig(
|
71
|
+
length=fg.ftransform_length,
|
72
|
+
overlap=fg.ftransform_overlap,
|
73
|
+
bin_start=fg.bin_start,
|
74
|
+
bin_end=fg.bin_end,
|
75
|
+
ttype=fg.ftransform_ttype,
|
76
|
+
),
|
77
|
+
eft_config=TransformConfig(
|
78
|
+
length=fg.eftransform_length,
|
79
|
+
overlap=fg.eftransform_overlap,
|
80
|
+
bin_start=fg.bin_start,
|
81
|
+
bin_end=fg.bin_end,
|
82
|
+
ttype=fg.eftransform_ttype,
|
83
|
+
),
|
84
|
+
it_config=TransformConfig(
|
85
|
+
length=fg.itransform_length,
|
86
|
+
overlap=fg.itransform_overlap,
|
87
|
+
bin_start=fg.bin_start,
|
88
|
+
bin_end=fg.bin_end,
|
89
|
+
ttype=fg.itransform_ttype,
|
90
|
+
),
|
91
|
+
)
|
92
|
+
|
93
|
+
|
94
|
+
def mixture_all_speech_metadata(mixdb: MixtureDatabase, mixture: Mixture) -> dict[str, dict[str, SpeechMetadata]]:
|
95
|
+
"""Get a list of all speech metadata for the given mixture"""
|
96
|
+
from praatio.utilities.constants import Interval
|
97
|
+
|
98
|
+
from ..datatypes import SpeechMetadata
|
99
|
+
|
100
|
+
results: dict[str, dict[str, SpeechMetadata]] = {}
|
101
|
+
for category, source in mixture.all_sources.items():
|
102
|
+
data: dict[str, SpeechMetadata] = {}
|
103
|
+
for tier in mixdb.speaker_metadata_tiers:
|
104
|
+
data[tier] = mixdb.speaker(mixdb.source_file(source.file_id).speaker_id, tier)
|
105
|
+
|
106
|
+
for tier in mixdb.textgrid_metadata_tiers:
|
107
|
+
item = get_textgrid_tier_from_source_file(mixdb.source_file(source.file_id).name, tier)
|
108
|
+
if isinstance(item, list):
|
109
|
+
# Check for tempo effect and adjust Interval start and end data as needed
|
110
|
+
entries = []
|
111
|
+
for entry in item:
|
112
|
+
entries.append(
|
113
|
+
Interval(
|
114
|
+
entry.start / source.pre_tempo,
|
115
|
+
entry.end / source.pre_tempo,
|
116
|
+
entry.label,
|
117
|
+
)
|
118
|
+
)
|
119
|
+
data[tier] = entries
|
120
|
+
else:
|
121
|
+
data[tier] = item
|
122
|
+
results[category] = data
|
123
|
+
|
124
|
+
return results
|
125
|
+
|
126
|
+
|
127
|
+
def mixture_metadata(mixdb: MixtureDatabase, m_id: int | None = None, mixture: Mixture | None = None) -> str:
|
128
|
+
"""Create a string of metadata for a Mixture
|
129
|
+
|
130
|
+
:param mixdb: Mixture database
|
131
|
+
:param m_id: Mixture ID
|
132
|
+
:param mixture: Mixture record
|
133
|
+
:return: String of metadata
|
134
|
+
"""
|
135
|
+
if m_id is not None:
|
136
|
+
mixture = mixdb.mixture(m_id)
|
137
|
+
|
138
|
+
if mixture is None:
|
139
|
+
raise ValueError("No mixture specified.")
|
140
|
+
|
141
|
+
metadata = ""
|
142
|
+
speech_metadata = mixture_all_speech_metadata(mixdb, mixture)
|
143
|
+
metadata += f"samples: {mixture.samples}\n"
|
144
|
+
for category, source in mixture.all_sources.items():
|
145
|
+
source_file = mixdb.source_file(source.file_id)
|
146
|
+
metadata += f"{category} name: {source_file.name}\n"
|
147
|
+
metadata += f"{category} effects: {source.effects.to_dict()}\n"
|
148
|
+
metadata += f"{category} pre_tempo: {source.pre_tempo}\n"
|
149
|
+
metadata += f"{category} class indices: {source_file.class_indices}\n"
|
150
|
+
metadata += f"{category} start: {source.start}\n"
|
151
|
+
metadata += f"{category} repeat: {source.loop}\n"
|
152
|
+
metadata += f"{category} snr: {source.snr}\n"
|
153
|
+
metadata += f"{category} random_snr: {source.snr.is_random}\n"
|
154
|
+
metadata += f"{category} snr_gain: {source.snr_gain}\n"
|
155
|
+
for key in source_file.truth_configs:
|
156
|
+
metadata += f"{category} truth '{key}' function: {source_file.truth_configs[key].function}\n"
|
157
|
+
metadata += f"{category} truth '{key}' config: {source_file.truth_configs[key].config}\n"
|
158
|
+
for key in speech_metadata[category]:
|
159
|
+
metadata += f"{category} speech {key}: {speech_metadata[category][key]}\n"
|
160
|
+
|
161
|
+
return metadata
|
162
|
+
|
163
|
+
|
164
|
+
def write_mixture_metadata(mixdb: MixtureDatabase, m_id: int | None = None, mixture: Mixture | None = None) -> None:
|
165
|
+
"""Write mixture metadata to a text file
|
166
|
+
|
167
|
+
:param mixdb: Mixture database
|
168
|
+
:param m_id: Mixture ID
|
169
|
+
:param mixture: Mixture record
|
170
|
+
"""
|
171
|
+
from os.path import join
|
172
|
+
|
173
|
+
if m_id is not None:
|
174
|
+
name = mixdb.mixture(m_id).name
|
175
|
+
elif mixture is not None:
|
176
|
+
name = mixture.name
|
177
|
+
else:
|
178
|
+
raise ValueError("No mixture specified.")
|
179
|
+
|
180
|
+
name = join(mixdb.location, "mixture", name, "metadata.txt")
|
181
|
+
with open(file=name, mode="w") as f:
|
182
|
+
f.write(mixture_metadata(mixdb, m_id, mixture))
|
183
|
+
|
184
|
+
|
185
|
+
def from_mixture(mixture: Mixture) -> tuple[str, int, int, int]:
|
186
|
+
return mixture.name, mixture.samples, mixture.spectral_mask_id, mixture.spectral_mask_seed
|
187
|
+
|
188
|
+
|
189
|
+
def to_mixture(entry: MixtureRecord, sources: Sources) -> Mixture:
|
190
|
+
return Mixture(
|
191
|
+
name=entry.name,
|
192
|
+
samples=entry.samples,
|
193
|
+
all_sources=sources,
|
194
|
+
spectral_mask_id=entry.spectral_mask_id,
|
195
|
+
spectral_mask_seed=entry.spectral_mask_seed,
|
196
|
+
)
|
197
|
+
|
198
|
+
|
199
|
+
def from_source(source: Source) -> tuple[str, int, float, bool, float, float, bool, int]:
|
200
|
+
return (
|
201
|
+
source.effects.to_json(),
|
202
|
+
source.file_id,
|
203
|
+
source.pre_tempo,
|
204
|
+
source.loop,
|
205
|
+
source.snr,
|
206
|
+
source.snr_gain,
|
207
|
+
source.snr.is_random,
|
208
|
+
source.start,
|
209
|
+
)
|
210
|
+
|
211
|
+
|
212
|
+
def to_source(entry: SourceRecord) -> Source:
|
213
|
+
import json
|
214
|
+
|
215
|
+
from ..datatypes import Effects
|
216
|
+
from ..datatypes import UniversalSNR
|
217
|
+
from ..utils.dataclass_from_dict import dataclass_from_dict
|
218
|
+
|
219
|
+
return Source(
|
220
|
+
file_id=entry.file_id,
|
221
|
+
effects=dataclass_from_dict(Effects, json.loads(entry.effects)),
|
222
|
+
start=entry.start,
|
223
|
+
loop=entry.repeat,
|
224
|
+
snr=UniversalSNR(entry.snr, entry.snr_random),
|
225
|
+
snr_gain=entry.snr_gain,
|
226
|
+
pre_tempo=entry.pre_tempo,
|
227
|
+
)
|
228
|
+
|
229
|
+
|
230
|
+
def get_transform_from_audio(audio: AudioT, transform: ForwardTransform) -> tuple[AudioF, EnergyT]:
|
231
|
+
"""Apply forward transform to input audio data to generate transform data
|
232
|
+
|
233
|
+
:param audio: Time domain data [samples]
|
234
|
+
:param transform: ForwardTransform object
|
235
|
+
:return: Frequency domain data [frames, bins], Energy [frames]
|
236
|
+
"""
|
237
|
+
import torch
|
238
|
+
|
239
|
+
f, e = transform.execute_all(torch.from_numpy(audio))
|
240
|
+
|
241
|
+
return f.numpy(), e.numpy()
|
242
|
+
|
243
|
+
|
244
|
+
def forward_transform(audio: AudioT, config: TransformConfig) -> AudioF:
|
245
|
+
"""Transform time domain data into frequency domain using the forward transform config from the feature
|
246
|
+
|
247
|
+
A new transform is used for each call; i.e., state is not maintained between calls to forward_transform().
|
248
|
+
|
249
|
+
:param audio: Time domain data [samples]
|
250
|
+
:param config: Transform configuration
|
251
|
+
:return: Frequency domain data [frames, bins]
|
252
|
+
"""
|
253
|
+
from pyaaware import ForwardTransform
|
254
|
+
|
255
|
+
audio_f, _ = get_transform_from_audio(
|
256
|
+
audio=audio,
|
257
|
+
transform=ForwardTransform(
|
258
|
+
length=config.length,
|
259
|
+
overlap=config.overlap,
|
260
|
+
bin_start=config.bin_start,
|
261
|
+
bin_end=config.bin_end,
|
262
|
+
ttype=config.ttype,
|
263
|
+
),
|
264
|
+
)
|
265
|
+
return audio_f
|
266
|
+
|
267
|
+
|
268
|
+
def get_audio_from_transform(data: AudioF, transform: InverseTransform) -> tuple[AudioT, EnergyT]:
|
269
|
+
"""Apply inverse transform to input transform data to generate audio data
|
270
|
+
|
271
|
+
:param data: Frequency domain data [frames, bins]
|
272
|
+
:param transform: InverseTransform object
|
273
|
+
:return: Time domain data [samples], Energy [frames]
|
274
|
+
"""
|
275
|
+
|
276
|
+
import torch
|
277
|
+
|
278
|
+
t, e = transform.execute_all(torch.from_numpy(data))
|
279
|
+
|
280
|
+
return t.numpy(), e.numpy()
|
281
|
+
|
282
|
+
|
283
|
+
def inverse_transform(transform: AudioF, config: TransformConfig) -> AudioT:
|
284
|
+
"""Transform frequency domain data into time domain using the inverse transform config from the feature
|
285
|
+
|
286
|
+
A new transform is used for each call; i.e., state is not maintained between calls to inverse_transform().
|
287
|
+
|
288
|
+
:param transform: Frequency domain data [frames, bins]
|
289
|
+
:param config: Transform configuration
|
290
|
+
:return: Time domain data [samples]
|
291
|
+
"""
|
292
|
+
from pyaaware import InverseTransform
|
293
|
+
|
294
|
+
audio, _ = get_audio_from_transform(
|
295
|
+
data=transform,
|
296
|
+
transform=InverseTransform(
|
297
|
+
length=config.length,
|
298
|
+
overlap=config.overlap,
|
299
|
+
bin_start=config.bin_start,
|
300
|
+
bin_end=config.bin_end,
|
301
|
+
ttype=config.ttype,
|
302
|
+
gain=1,
|
303
|
+
),
|
304
|
+
)
|
305
|
+
return audio
|
306
|
+
|
307
|
+
|
308
|
+
def check_audio_files_exist(mixdb: MixtureDatabase) -> None:
|
309
|
+
"""Walk through all the noise and target audio files in a mixture database ensuring that they exist"""
|
310
|
+
from os.path import exists
|
311
|
+
|
312
|
+
from ..utils.tokenized_shell_vars import tokenized_expand
|
313
|
+
|
314
|
+
for source_files in mixdb.source_files.values():
|
315
|
+
for source_file in source_files:
|
316
|
+
file_name, _ = tokenized_expand(source_file.name)
|
317
|
+
if not exists(file_name):
|
318
|
+
raise OSError(f"Could not find {file_name}")
|
319
|
+
|
320
|
+
|
321
|
+
def get_textgrid_tier_from_source_file(source_file: str, tier: str) -> SpeechMetadata | None:
|
322
|
+
from pathlib import Path
|
323
|
+
|
324
|
+
from praatio import textgrid
|
325
|
+
from praatio.utilities.constants import Interval
|
326
|
+
|
327
|
+
from ..utils.tokenized_shell_vars import tokenized_expand
|
328
|
+
|
329
|
+
textgrid_file = Path(tokenized_expand(source_file)[0]).with_suffix(".TextGrid")
|
330
|
+
if not textgrid_file.exists():
|
331
|
+
return None
|
332
|
+
|
333
|
+
tg = textgrid.openTextgrid(str(textgrid_file), includeEmptyIntervals=False)
|
334
|
+
|
335
|
+
if tier not in tg.tierNames:
|
336
|
+
return None
|
337
|
+
|
338
|
+
entries = tg.getTier(tier).entries
|
339
|
+
if len(entries) > 1:
|
340
|
+
return [entry for entry in entries if isinstance(entry, Interval)]
|
341
|
+
|
342
|
+
if len(entries) == 1:
|
343
|
+
return entries[0].label
|
344
|
+
|
345
|
+
return None
|
346
|
+
|
347
|
+
|
348
|
+
def frames_from_samples(samples: int, step_samples: int) -> int:
|
349
|
+
import numpy as np
|
350
|
+
|
351
|
+
return int(np.ceil(samples / step_samples))
|
@@ -0,0 +1,77 @@
|
|
1
|
+
from functools import lru_cache
|
2
|
+
from pathlib import Path
|
3
|
+
|
4
|
+
from ..datatypes import AudioT
|
5
|
+
from ..datatypes import ImpulseResponseData
|
6
|
+
from .audio import raw_read_audio
|
7
|
+
|
8
|
+
|
9
|
+
def apply_ir(audio: AudioT, ir: ImpulseResponseData) -> AudioT:
|
10
|
+
"""Apply impulse response to audio data using scipy
|
11
|
+
|
12
|
+
:param audio: Audio
|
13
|
+
:param ir: Impulse response data
|
14
|
+
:return: Effected audio
|
15
|
+
"""
|
16
|
+
import numpy as np
|
17
|
+
from librosa import resample
|
18
|
+
from scipy.signal import fftconvolve
|
19
|
+
|
20
|
+
from ..constants import SAMPLE_RATE
|
21
|
+
|
22
|
+
if not isinstance(audio, np.ndarray):
|
23
|
+
raise TypeError("audio must be a numpy array")
|
24
|
+
|
25
|
+
# Early exit if no ir or if all audio is zero
|
26
|
+
if ir is None or not audio.any():
|
27
|
+
return audio
|
28
|
+
|
29
|
+
pk_in = np.max(np.abs(audio))
|
30
|
+
|
31
|
+
# Convert audio to IR sample rate
|
32
|
+
audio_in = resample(audio, orig_sr=SAMPLE_RATE, target_sr=ir.sample_rate, res_type="soxr_hq")
|
33
|
+
|
34
|
+
# Apply IR
|
35
|
+
audio_out = fftconvolve(audio_in, ir.data, mode="full")
|
36
|
+
|
37
|
+
# Delay compensation
|
38
|
+
audio_out = audio_out[ir.delay :]
|
39
|
+
|
40
|
+
# Convert back to global sample rate
|
41
|
+
audio_out = resample(audio_out, orig_sr=ir.sample_rate, target_sr=SAMPLE_RATE, res_type="soxr_hq")
|
42
|
+
|
43
|
+
# Trim to length
|
44
|
+
audio_out = audio_out[: len(audio)]
|
45
|
+
|
46
|
+
# Gain compensation
|
47
|
+
pk_out = np.max(np.abs(audio_out))
|
48
|
+
pk_gain = pk_in / pk_out
|
49
|
+
audio_out = audio_out * pk_gain
|
50
|
+
|
51
|
+
return audio_out
|
52
|
+
|
53
|
+
|
54
|
+
def read_ir(name: str | Path, delay: int, use_cache: bool = True) -> ImpulseResponseData:
|
55
|
+
"""Read impulse response data
|
56
|
+
|
57
|
+
:param name: File name
|
58
|
+
:param delay: Delay in samples
|
59
|
+
:param use_cache: If true, use LRU caching
|
60
|
+
:return: ImpulseResponseData object
|
61
|
+
"""
|
62
|
+
if use_cache:
|
63
|
+
return _read_ir(name, delay)
|
64
|
+
return _read_ir.__wrapped__(name, delay)
|
65
|
+
|
66
|
+
|
67
|
+
@lru_cache
|
68
|
+
def _read_ir(name: str | Path, delay: int) -> ImpulseResponseData:
|
69
|
+
"""Read impulse response data using soundfile
|
70
|
+
|
71
|
+
:param name: File name
|
72
|
+
:param delay: Delay in samples
|
73
|
+
:return: ImpulseResponseData object
|
74
|
+
"""
|
75
|
+
out, sample_rate = raw_read_audio(name)
|
76
|
+
|
77
|
+
return ImpulseResponseData(data=out, sample_rate=sample_rate, delay=delay)
|
@@ -0,0 +1,23 @@
|
|
1
|
+
def log_duration_and_sizes(
|
2
|
+
total_duration: float,
|
3
|
+
feature_step_samples: int,
|
4
|
+
feature_parameters: int,
|
5
|
+
stride: int,
|
6
|
+
desc: str,
|
7
|
+
) -> None:
|
8
|
+
from .. import logger
|
9
|
+
from ..constants import FLOAT_BYTES
|
10
|
+
from ..constants import SAMPLE_BYTES
|
11
|
+
from ..constants import SAMPLE_RATE
|
12
|
+
from ..utils.human_readable_size import human_readable_size
|
13
|
+
from ..utils.seconds_to_hms import seconds_to_hms
|
14
|
+
|
15
|
+
total_samples = int(total_duration * SAMPLE_RATE)
|
16
|
+
mixture_bytes = total_samples * SAMPLE_BYTES
|
17
|
+
feature_bytes = total_samples / feature_step_samples * stride * feature_parameters * FLOAT_BYTES
|
18
|
+
|
19
|
+
logger.info("")
|
20
|
+
logger.info(f"{desc} duration: {seconds_to_hms(seconds=total_duration)}")
|
21
|
+
logger.info(f"{desc} sizes:")
|
22
|
+
logger.info(f" mixture: {human_readable_size(mixture_bytes, 1)}")
|
23
|
+
logger.info(f" feature: {human_readable_size(feature_bytes, 1)}")
|