sonusai 0.20.3__py3-none-any.whl → 1.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sonusai/__init__.py +16 -3
- sonusai/audiofe.py +241 -77
- sonusai/calc_metric_spenh.py +71 -73
- sonusai/config/__init__.py +3 -0
- sonusai/config/config.py +61 -0
- sonusai/config/config.yml +20 -0
- sonusai/config/constants.py +8 -0
- sonusai/constants.py +11 -0
- sonusai/data/genmixdb.yml +21 -36
- sonusai/{mixture/datatypes.py → datatypes.py} +91 -130
- sonusai/deprecated/plot.py +4 -5
- sonusai/doc/doc.py +4 -4
- sonusai/doc.py +11 -4
- sonusai/genft.py +43 -45
- sonusai/genmetrics.py +25 -19
- sonusai/genmix.py +54 -82
- sonusai/genmixdb.py +88 -264
- sonusai/ir_metric.py +30 -34
- sonusai/lsdb.py +41 -48
- sonusai/main.py +15 -22
- sonusai/metrics/calc_audio_stats.py +4 -293
- sonusai/metrics/calc_class_weights.py +4 -4
- sonusai/metrics/calc_optimal_thresholds.py +8 -5
- sonusai/metrics/calc_pesq.py +2 -2
- sonusai/metrics/calc_segsnr_f.py +4 -4
- sonusai/metrics/calc_speech.py +25 -13
- sonusai/metrics/class_summary.py +7 -7
- sonusai/metrics/confusion_matrix_summary.py +5 -5
- sonusai/metrics/one_hot.py +4 -4
- sonusai/metrics/snr_summary.py +7 -7
- sonusai/metrics_summary.py +38 -45
- sonusai/mixture/__init__.py +4 -104
- sonusai/mixture/audio.py +10 -39
- sonusai/mixture/class_balancing.py +103 -0
- sonusai/mixture/config.py +251 -271
- sonusai/mixture/constants.py +35 -39
- sonusai/mixture/data_io.py +25 -36
- sonusai/mixture/db_datatypes.py +58 -22
- sonusai/mixture/effects.py +386 -0
- sonusai/mixture/feature.py +7 -11
- sonusai/mixture/generation.py +478 -628
- sonusai/mixture/helpers.py +82 -184
- sonusai/mixture/ir_delay.py +3 -4
- sonusai/mixture/ir_effects.py +77 -0
- sonusai/mixture/log_duration_and_sizes.py +6 -12
- sonusai/mixture/mixdb.py +910 -729
- sonusai/mixture/pad_audio.py +35 -0
- sonusai/mixture/resample.py +7 -0
- sonusai/mixture/sox_effects.py +195 -0
- sonusai/mixture/sox_help.py +650 -0
- sonusai/mixture/spectral_mask.py +2 -2
- sonusai/mixture/truth.py +17 -15
- sonusai/mixture/truth_functions/crm.py +12 -12
- sonusai/mixture/truth_functions/energy.py +22 -22
- sonusai/mixture/truth_functions/file.py +5 -5
- sonusai/mixture/truth_functions/metadata.py +4 -4
- sonusai/mixture/truth_functions/metrics.py +4 -4
- sonusai/mixture/truth_functions/phoneme.py +3 -3
- sonusai/mixture/truth_functions/sed.py +11 -13
- sonusai/mixture/truth_functions/target.py +10 -10
- sonusai/mkwav.py +26 -29
- sonusai/onnx_predict.py +240 -88
- sonusai/queries/__init__.py +2 -2
- sonusai/queries/queries.py +38 -34
- sonusai/speech/librispeech.py +1 -1
- sonusai/speech/mcgill.py +1 -1
- sonusai/speech/timit.py +2 -2
- sonusai/summarize_metric_spenh.py +10 -17
- sonusai/utils/__init__.py +7 -1
- sonusai/utils/asl_p56.py +2 -2
- sonusai/utils/asr.py +2 -2
- sonusai/utils/asr_functions/aaware_whisper.py +4 -5
- sonusai/utils/choice.py +31 -0
- sonusai/utils/compress.py +1 -1
- sonusai/utils/dataclass_from_dict.py +19 -1
- sonusai/utils/energy_f.py +3 -3
- sonusai/utils/evaluate_random_rule.py +15 -0
- sonusai/utils/keyboard_interrupt.py +12 -0
- sonusai/utils/onnx_utils.py +3 -17
- sonusai/utils/print_mixture_details.py +21 -19
- sonusai/utils/{temp_seed.py → rand.py} +3 -3
- sonusai/utils/read_predict_data.py +2 -2
- sonusai/utils/reshape.py +3 -3
- sonusai/utils/stratified_shuffle_split.py +3 -3
- sonusai/{mixture → utils}/tokenized_shell_vars.py +1 -1
- sonusai/utils/write_audio.py +2 -2
- sonusai/vars.py +11 -4
- {sonusai-0.20.3.dist-info → sonusai-1.0.2.dist-info}/METADATA +4 -2
- sonusai-1.0.2.dist-info/RECORD +138 -0
- sonusai/mixture/augmentation.py +0 -444
- sonusai/mixture/class_count.py +0 -15
- sonusai/mixture/eq_rule_is_valid.py +0 -45
- sonusai/mixture/target_class_balancing.py +0 -107
- sonusai/mixture/targets.py +0 -175
- sonusai-0.20.3.dist-info/RECORD +0 -128
- {sonusai-0.20.3.dist-info → sonusai-1.0.2.dist-info}/WHEEL +0 -0
- {sonusai-0.20.3.dist-info → sonusai-1.0.2.dist-info}/entry_points.txt +0 -0
sonusai/genmixdb.py
CHANGED
@@ -5,7 +5,7 @@ usage: genmixdb [-hvmdjn] LOC
|
|
5
5
|
options:
|
6
6
|
-h, --help
|
7
7
|
-v, --verbose Be verbose.
|
8
|
-
-m, --mix
|
8
|
+
-m, --mix Save mixture data. [default: False].
|
9
9
|
-d, --dryrun Perform a dry run showing the processed config. [default: False].
|
10
10
|
-j, --json Save JSON version of database. [default: False].
|
11
11
|
-n, --nopar Do not run in parallel. [default: False].
|
@@ -16,120 +16,14 @@ genmixdb creates a database of training and evaluation feature and truth data ge
|
|
16
16
|
choice of audio neural-network feature types that are supported by the Aaware real-time front-end and truth data that is
|
17
17
|
synchronized frame-by-frame with the feature data.
|
18
18
|
|
19
|
-
|
20
|
-
|
21
|
-
#### Adding target data
|
22
|
-
Suppose you have an audio file which is an example, or target, of what you want to recognize or detect. Of course, for
|
23
|
-
training a NN you also need truth data for that file (also called parameters/labels/classes). If you don't already have
|
24
|
-
it, genmixdb can create truth using a variety of generation functions on each frame of the feature data. You can also
|
25
|
-
select different feature types. Here's an example:
|
26
|
-
|
27
|
-
genmixdb target_gfr32ts2
|
28
|
-
|
29
|
-
where target_gfr32ts2 contains config.yml with the following inside:
|
30
|
-
---
|
31
|
-
feature: gfr32ts2
|
32
|
-
|
33
|
-
targets:
|
34
|
-
- name: data/target.wav
|
35
|
-
|
36
|
-
target_augmentations:
|
37
|
-
- normalize: -3.5
|
38
|
-
...
|
39
|
-
|
40
|
-
The mixture database is written to a SQLite file (mixdb.db) in the same directory that contains the config.yml file.
|
41
|
-
|
42
|
-
#### Target data mix with noise and augmentation
|
43
|
-
|
44
|
-
genmixdb mix_gfr32ts2.yml
|
45
|
-
|
46
|
-
where mix_gfr32ts2.yml contains:
|
47
|
-
---
|
48
|
-
feature: gfr32ts2
|
49
|
-
|
50
|
-
targets:
|
51
|
-
- name: data/target.wav
|
52
|
-
|
53
|
-
target_augmentations:
|
54
|
-
- normalize: -3.5
|
55
|
-
pitch: [-3, 0, 3]
|
56
|
-
tempo: [0.8, 1, 1.2]
|
57
|
-
|
58
|
-
noises:
|
59
|
-
- name: data/noise.wav
|
60
|
-
|
61
|
-
noise_augmentations:
|
62
|
-
- normalize: -3.5
|
63
|
-
|
64
|
-
snrs:
|
65
|
-
- 20
|
66
|
-
...
|
67
|
-
|
68
|
-
In this example a time-domain mixture is created and feature data is calculated as specified by 'feature: gfr32ts2'.
|
69
|
-
Various feature types are available which vary in spectral and temporal resolution (4 ms or higher), and other feature
|
70
|
-
algorithm parameters. The total feature size, dimension, and #frames for mixture is reported in the log file (the log
|
71
|
-
file name is genmixdb.log).
|
72
|
-
|
73
|
-
Truth (parameters/labels/classes) can be automatically created per feature output frame based on a variety of truth
|
74
|
-
generation functions. By default, these are included with the feature data in a single HDF5 output file. By default,
|
75
|
-
truth generation is turned on with default settings (see truth section) and a single class, i.e., detecting a single
|
76
|
-
type of sound. The truth format is a single float per class representing the probability of activity/presence, and
|
77
|
-
multi-class truth is possible by specifying the number of classes and either a scalar index or a vector of indices in
|
78
|
-
which to put the truth result. For example, 'num_class: 3' and 'class_indices: [ 2 ]' adds a 1x3 vector to the feature
|
79
|
-
data with truth put in index 2 (others would be 0) for data/target.wav being an audio clip from sound type of class 2.
|
80
|
-
|
81
|
-
The mixture is created with potential data augmentation functions in the following way:
|
82
|
-
1. apply noise augmentation rule
|
83
|
-
2. apply target augmentation rule to each target in the mixture (multiple targets may be used in mixup)
|
84
|
-
3. adjust noise and target gains for specified SNR
|
85
|
-
4. add augmented noise to augmented target(s)
|
86
|
-
|
87
|
-
Note: If an impulse response is part of the target augmentation, truth generation is performed on the targets before
|
88
|
-
applying the IRs. In this way, the truth is not impacted by the IR.
|
89
|
-
|
90
|
-
The mixture length is the length of the longest target in the mixture, and the noise signal is repeated if it is
|
91
|
-
shorter, or trimmed if longer.
|
92
|
-
|
93
|
-
#### Target and noise using path lists
|
94
|
-
|
95
|
-
Target and noise audio is specified as a list containing text files, audio files, and file globs. Text files are
|
96
|
-
processed with items on each line where each item can be a text file, an audio file, or a file glob. Each item will be
|
97
|
-
searched for audio files which can be WAV, MP3, FLAC, AIFF, or OGG format with any sample rate, bit depth, or channel
|
98
|
-
count. All audio files will be converted to 16 kHz, float32, single channel (only the first channel is used) format
|
99
|
-
before processing.
|
100
|
-
|
101
|
-
For example,
|
102
|
-
|
103
|
-
genmixdb dog-bark.yml
|
104
|
-
|
105
|
-
where dog-bark.yml contains:
|
106
|
-
---
|
107
|
-
targets:
|
108
|
-
- name: slib/dog-outside/*.wav
|
109
|
-
- name: slib/dog-inside/*.wav
|
110
|
-
|
111
|
-
will find all .wav files in the specified directories and process them as targets.
|
19
|
+
For details, see sonusai doc.
|
112
20
|
|
113
21
|
"""
|
114
22
|
|
115
|
-
import
|
116
|
-
|
117
|
-
from sonusai.mixture import Mixture
|
23
|
+
from sonusai.datatypes import Mixture
|
118
24
|
from sonusai.mixture import MixtureDatabase
|
119
25
|
|
120
26
|
|
121
|
-
def signal_handler(_sig, _frame):
|
122
|
-
import sys
|
123
|
-
|
124
|
-
from sonusai import logger
|
125
|
-
|
126
|
-
logger.info("Canceled due to keyboard interrupt")
|
127
|
-
sys.exit(1)
|
128
|
-
|
129
|
-
|
130
|
-
signal.signal(signal.SIGINT, signal_handler)
|
131
|
-
|
132
|
-
|
133
27
|
def genmixdb(
|
134
28
|
location: str,
|
135
29
|
save_mix: bool = False,
|
@@ -142,23 +36,17 @@ def genmixdb(
|
|
142
36
|
from functools import partial
|
143
37
|
from random import seed
|
144
38
|
|
39
|
+
import pandas as pd
|
145
40
|
import yaml
|
146
41
|
|
147
42
|
from sonusai import logger
|
148
|
-
from sonusai.
|
149
|
-
from sonusai.
|
150
|
-
from sonusai.mixture import AugmentationRule
|
43
|
+
from sonusai.constants import SAMPLE_BYTES
|
44
|
+
from sonusai.constants import SAMPLE_RATE
|
151
45
|
from sonusai.mixture import MixtureDatabase
|
152
|
-
from sonusai.mixture import balance_targets
|
153
46
|
from sonusai.mixture import generate_mixtures
|
154
|
-
from sonusai.mixture import
|
155
|
-
from sonusai.mixture import
|
156
|
-
from sonusai.mixture import
|
157
|
-
from sonusai.mixture import get_impulse_response_files
|
158
|
-
from sonusai.mixture import get_mixups
|
159
|
-
from sonusai.mixture import get_noise_files
|
160
|
-
from sonusai.mixture import get_target_augmentations_for_mixup
|
161
|
-
from sonusai.mixture import get_target_files
|
47
|
+
from sonusai.mixture import get_effect_rules
|
48
|
+
from sonusai.mixture import get_ir_files
|
49
|
+
from sonusai.mixture import get_source_files
|
162
50
|
from sonusai.mixture import initialize_db
|
163
51
|
from sonusai.mixture import load_config
|
164
52
|
from sonusai.mixture import log_duration_and_sizes
|
@@ -166,22 +54,20 @@ def genmixdb(
|
|
166
54
|
from sonusai.mixture import populate_class_weights_threshold_table
|
167
55
|
from sonusai.mixture import populate_impulse_response_file_table
|
168
56
|
from sonusai.mixture import populate_mixture_table
|
169
|
-
from sonusai.mixture import
|
57
|
+
from sonusai.mixture import populate_source_file_table
|
170
58
|
from sonusai.mixture import populate_spectral_mask_table
|
171
|
-
from sonusai.mixture import populate_target_file_table
|
172
59
|
from sonusai.mixture import populate_top_table
|
173
60
|
from sonusai.mixture import populate_truth_parameters_table
|
174
61
|
from sonusai.mixture import update_mixid_width
|
175
|
-
from sonusai.utils import dataclass_from_dict
|
176
62
|
from sonusai.utils import human_readable_size
|
177
63
|
from sonusai.utils import par_track
|
178
64
|
from sonusai.utils import seconds_to_hms
|
179
65
|
from sonusai.utils import track
|
180
66
|
|
181
67
|
config = load_config(location)
|
182
|
-
initialize_db(location
|
68
|
+
initialize_db(location, test)
|
183
69
|
|
184
|
-
mixdb = MixtureDatabase(location
|
70
|
+
mixdb = MixtureDatabase(location, test)
|
185
71
|
|
186
72
|
populate_top_table(location, config, test)
|
187
73
|
populate_class_label_table(location, config, test)
|
@@ -197,148 +83,94 @@ def genmixdb(
|
|
197
83
|
logger.debug(yaml.dump(config))
|
198
84
|
|
199
85
|
if logging:
|
200
|
-
logger.info("Collecting
|
86
|
+
logger.info("Collecting sources")
|
201
87
|
|
202
|
-
|
203
|
-
|
204
|
-
if len(target_files) == 0:
|
205
|
-
raise RuntimeError("Canceled due to no targets")
|
88
|
+
source_files = get_source_files(config, show_progress)
|
89
|
+
logger.info("")
|
206
90
|
|
207
|
-
|
91
|
+
if len([file for file in source_files if file.category == "primary"]) == 0:
|
92
|
+
raise RuntimeError("Canceled due to no primary sources")
|
208
93
|
|
209
|
-
|
210
|
-
logger.debug("List of targets:")
|
211
|
-
logger.debug(yaml.dump([target.name for target in mixdb.target_files], default_flow_style=False))
|
212
|
-
logger.debug("")
|
94
|
+
populate_source_file_table(location, source_files, test)
|
213
95
|
|
214
96
|
if logging:
|
215
|
-
logger.info("
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
97
|
+
logger.info("Sources summary")
|
98
|
+
data = {
|
99
|
+
"category": [],
|
100
|
+
"files": [],
|
101
|
+
"size": [],
|
102
|
+
"duration": [],
|
103
|
+
}
|
104
|
+
for category, source_files in mixdb.source_files.items():
|
105
|
+
audio_samples = sum([source.samples for source in source_files])
|
106
|
+
audio_duration = audio_samples / SAMPLE_RATE
|
107
|
+
data["category"].append(category)
|
108
|
+
data["files"].append(mixdb.num_source_files(category))
|
109
|
+
data["size"].append(human_readable_size(audio_samples * SAMPLE_BYTES, 1))
|
110
|
+
data["duration"].append(seconds_to_hms(seconds=audio_duration))
|
111
|
+
|
112
|
+
df = pd.DataFrame(data)
|
113
|
+
logger.info(df.to_string(index=False, header=False))
|
114
|
+
logger.info("")
|
220
115
|
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
logger.debug("")
|
116
|
+
for category, files in mixdb.source_files.items():
|
117
|
+
logger.debug(f"List of {category} sources:")
|
118
|
+
logger.debug(yaml.dump([file.name for file in files], default_flow_style=False))
|
225
119
|
|
226
120
|
if logging:
|
227
121
|
logger.info("Collecting impulse responses")
|
228
122
|
|
229
|
-
|
123
|
+
ir_files = get_ir_files(config, show_progress=show_progress)
|
124
|
+
logger.info("")
|
230
125
|
|
231
|
-
populate_impulse_response_file_table(location,
|
126
|
+
populate_impulse_response_file_table(location, ir_files, test)
|
232
127
|
|
233
128
|
if logging:
|
234
129
|
logger.debug("List of impulse responses:")
|
235
|
-
|
236
|
-
|
237
|
-
[entry.file for entry in mixdb.impulse_response_files],
|
238
|
-
default_flow_style=False,
|
239
|
-
)
|
240
|
-
)
|
130
|
+
for idx, file in enumerate(ir_files):
|
131
|
+
logger.debug(f"id: {idx}, name:{file.name}, delay: {file.delay}, tags: [{', '.join(file.tags)}]")
|
241
132
|
logger.debug("")
|
242
133
|
|
243
134
|
if logging:
|
244
|
-
logger.info("Collecting
|
245
|
-
|
246
|
-
target_augmentations = get_augmentation_rules(
|
247
|
-
rules=config["target_augmentations"], num_ir=mixdb.num_impulse_response_files
|
248
|
-
)
|
249
|
-
mixups = get_mixups(target_augmentations)
|
135
|
+
logger.info("Collecting effects")
|
250
136
|
|
251
|
-
|
252
|
-
for mixup in mixups:
|
253
|
-
logger.debug(f"Expanded list of target augmentation rules for mixup of {mixup}:")
|
254
|
-
for target_augmentation in get_target_augmentations_for_mixup(target_augmentations, mixup):
|
255
|
-
ta_dict = target_augmentation.to_dict()
|
256
|
-
del ta_dict["mixup"]
|
257
|
-
logger.debug(f"- {ta_dict}")
|
258
|
-
logger.debug("")
|
137
|
+
rules = get_effect_rules(location, config, test)
|
259
138
|
|
260
139
|
if logging:
|
261
|
-
logger.info("
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
)
|
140
|
+
logger.info("")
|
141
|
+
for category, effect in rules.items():
|
142
|
+
logger.debug(f"List of {category} rules:")
|
143
|
+
logger.debug(yaml.dump([entry.to_dict() for entry in effect], default_flow_style=False))
|
266
144
|
|
267
145
|
if logging:
|
268
|
-
logger.debug("
|
269
|
-
for
|
270
|
-
|
271
|
-
|
272
|
-
|
146
|
+
logger.debug("SNRS:")
|
147
|
+
for category, source in config["sources"].items():
|
148
|
+
if category != "primary":
|
149
|
+
logger.debug(f" {category}")
|
150
|
+
for snr in source["snrs"]:
|
151
|
+
logger.debug(f" - {snr}")
|
152
|
+
logger.debug("")
|
153
|
+
logger.debug("Mix Rules:")
|
154
|
+
for category, source in config["sources"].items():
|
155
|
+
if category != "primary":
|
156
|
+
logger.debug(f" {category}")
|
157
|
+
for mix_rule in source["mix_rules"]:
|
158
|
+
logger.debug(f" - {mix_rule}")
|
273
159
|
logger.debug("")
|
274
|
-
|
275
|
-
if logging:
|
276
|
-
logger.debug(f"SNRs: {config['snrs']}\n")
|
277
|
-
logger.debug(f"Random SNRs: {config['random_snrs']}\n")
|
278
|
-
logger.debug(f"Noise mix mode: {mixdb.noise_mix_mode}\n")
|
279
160
|
logger.debug("Spectral masks:")
|
280
161
|
for spectral_mask in mixdb.spectral_masks:
|
281
162
|
logger.debug(f"- {spectral_mask}")
|
282
163
|
logger.debug("")
|
283
164
|
|
284
|
-
if logging:
|
285
|
-
logger.info("Collecting augmented targets")
|
286
|
-
|
287
|
-
augmented_targets = get_augmented_targets(target_files, target_augmentations, mixups)
|
288
|
-
|
289
|
-
if config["class_balancing"]:
|
290
|
-
class_balancing_augmentation = dataclass_from_dict(AugmentationRule, config["class_balancing_augmentation"])
|
291
|
-
augmented_targets, target_augmentations = balance_targets(
|
292
|
-
augmented_targets=augmented_targets,
|
293
|
-
targets=target_files,
|
294
|
-
target_augmentations=target_augmentations,
|
295
|
-
class_balancing_augmentation=class_balancing_augmentation, # pyright: ignore [reportArgumentType]
|
296
|
-
num_classes=mixdb.num_classes,
|
297
|
-
num_ir=mixdb.num_impulse_response_files,
|
298
|
-
mixups=mixups,
|
299
|
-
)
|
300
|
-
|
301
|
-
target_audio_samples = sum([targets.samples for targets in mixdb.target_files])
|
302
|
-
target_audio_duration = target_audio_samples / SAMPLE_RATE
|
303
|
-
noise_audio_duration = sum([noises.duration for noises in mixdb.noise_files])
|
304
|
-
noise_audio_samples = noise_audio_duration * SAMPLE_RATE
|
305
|
-
|
306
|
-
if logging:
|
307
|
-
logger.info("")
|
308
|
-
logger.info(
|
309
|
-
f"Target audio: {mixdb.num_target_files} files, "
|
310
|
-
f"{human_readable_size(target_audio_samples * SAMPLE_BYTES, 1)}, "
|
311
|
-
f"{seconds_to_hms(seconds=target_audio_duration)}"
|
312
|
-
)
|
313
|
-
logger.info(
|
314
|
-
f"Noise audio: {mixdb.num_noise_files} files, "
|
315
|
-
f"{human_readable_size(noise_audio_samples * SAMPLE_BYTES, 1)}, "
|
316
|
-
f"{seconds_to_hms(seconds=noise_audio_duration)}"
|
317
|
-
)
|
318
|
-
|
319
165
|
if logging:
|
320
166
|
logger.info("Generating mixtures")
|
321
167
|
|
322
|
-
|
323
|
-
noise_mix_mode=mixdb.noise_mix_mode,
|
324
|
-
augmented_targets=augmented_targets,
|
325
|
-
target_files=target_files,
|
326
|
-
target_augmentations=target_augmentations,
|
327
|
-
noise_files=noise_files,
|
328
|
-
noise_augmentations=noise_augmentations,
|
329
|
-
spectral_masks=mixdb.spectral_masks,
|
330
|
-
all_snrs=get_all_snrs_from_config(config),
|
331
|
-
mixups=mixups,
|
332
|
-
num_classes=mixdb.num_classes,
|
333
|
-
feature_step_samples=mixdb.feature_step_samples,
|
334
|
-
num_ir=mixdb.num_impulse_response_files,
|
335
|
-
)
|
168
|
+
mixtures = generate_mixtures(location, config, rules, test)
|
336
169
|
|
337
170
|
num_mixtures = len(mixtures)
|
338
171
|
update_mixid_width(location, num_mixtures, test)
|
339
172
|
|
340
173
|
if logging:
|
341
|
-
logger.info("")
|
342
174
|
logger.info(f"Found {num_mixtures:,} mixtures to process")
|
343
175
|
|
344
176
|
total_duration = float(sum([mixture.samples for mixture in mixtures])) / SAMPLE_RATE
|
@@ -346,7 +178,6 @@ def genmixdb(
|
|
346
178
|
if logging:
|
347
179
|
log_duration_and_sizes(
|
348
180
|
total_duration=total_duration,
|
349
|
-
num_classes=mixdb.num_classes,
|
350
181
|
feature_step_samples=mixdb.feature_step_samples,
|
351
182
|
feature_parameters=mixdb.feature_parameters,
|
352
183
|
stride=mixdb.fg_stride,
|
@@ -386,27 +217,17 @@ def genmixdb(
|
|
386
217
|
show_progress=show_progress,
|
387
218
|
)
|
388
219
|
|
389
|
-
|
390
|
-
|
391
|
-
total_samples = mixdb.total_samples()
|
392
|
-
total_duration = float(total_samples / SAMPLE_RATE)
|
393
|
-
|
394
|
-
noise_files_percent = (float(used_noise_files) / float(total_noise_files)) * 100
|
395
|
-
noise_samples_percent = (float(used_noise_samples) / float(noise_audio_samples)) * 100
|
220
|
+
total_duration = float(mixdb.total_samples() / SAMPLE_RATE)
|
396
221
|
|
397
222
|
if logging:
|
398
223
|
log_duration_and_sizes(
|
399
224
|
total_duration=total_duration,
|
400
|
-
num_classes=mixdb.num_classes,
|
401
225
|
feature_step_samples=mixdb.feature_step_samples,
|
402
226
|
feature_parameters=mixdb.feature_parameters,
|
403
227
|
stride=mixdb.fg_stride,
|
404
228
|
desc="Actual",
|
405
229
|
)
|
406
230
|
logger.info("")
|
407
|
-
logger.info(f"Used {noise_files_percent:,.0f}% of noise files")
|
408
|
-
logger.info(f"Used {noise_samples_percent:,.0f}% of noise audio")
|
409
|
-
logger.info("")
|
410
231
|
|
411
232
|
if not test and save_json:
|
412
233
|
if logging:
|
@@ -434,12 +255,12 @@ def _process_mixture(
|
|
434
255
|
|
435
256
|
if save_mix:
|
436
257
|
write(
|
437
|
-
items=
|
438
|
-
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
|
258
|
+
items={
|
259
|
+
"sources": genmix_data.sources,
|
260
|
+
"source": genmix_data.source,
|
261
|
+
"noise": genmix_data.noise,
|
262
|
+
"mixture": genmix_data.mixture,
|
263
|
+
}
|
443
264
|
)
|
444
265
|
|
445
266
|
write_mixture_metadata(mixdb, mixture=mixture)
|
@@ -450,10 +271,10 @@ def _process_mixture(
|
|
450
271
|
def main() -> None:
|
451
272
|
from docopt import docopt
|
452
273
|
|
453
|
-
import
|
274
|
+
from sonusai import __version__ as sai_version
|
454
275
|
from sonusai.utils import trim_docstring
|
455
276
|
|
456
|
-
args = docopt(trim_docstring(__doc__), version=
|
277
|
+
args = docopt(trim_docstring(__doc__), version=sai_version, options_first=True)
|
457
278
|
|
458
279
|
import time
|
459
280
|
from os import makedirs
|
@@ -498,17 +319,13 @@ def main() -> None:
|
|
498
319
|
logger.info(f"Creating mixture database for {location}")
|
499
320
|
logger.info("")
|
500
321
|
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
|
506
|
-
|
507
|
-
|
508
|
-
)
|
509
|
-
except Exception as e:
|
510
|
-
logger.debug(e)
|
511
|
-
raise
|
322
|
+
genmixdb(
|
323
|
+
location=location,
|
324
|
+
save_mix=save_mix,
|
325
|
+
show_progress=True,
|
326
|
+
save_json=save_json,
|
327
|
+
no_par=no_par,
|
328
|
+
)
|
512
329
|
|
513
330
|
end_time = time.monotonic()
|
514
331
|
logger.info(f"Completed in {seconds_to_hms(seconds=end_time - start_time)}")
|
@@ -516,4 +333,11 @@ def main() -> None:
|
|
516
333
|
|
517
334
|
|
518
335
|
if __name__ == "__main__":
|
519
|
-
|
336
|
+
from sonusai import exception_handler
|
337
|
+
from sonusai.utils import register_keyboard_interrupt
|
338
|
+
|
339
|
+
register_keyboard_interrupt()
|
340
|
+
try:
|
341
|
+
main()
|
342
|
+
except Exception as e:
|
343
|
+
exception_handler(e)
|
sonusai/ir_metric.py
CHANGED
@@ -1,10 +1,9 @@
|
|
1
1
|
"""sonusai ir_metric
|
2
2
|
|
3
|
-
usage: ir_metric [-
|
3
|
+
usage: ir_metric [-h] [-n NCPU] IRLOC
|
4
4
|
|
5
5
|
options:
|
6
6
|
-h, --help
|
7
|
-
-v, --verbose Be verbose.
|
8
7
|
-n, --num_process NCPU Number of parallel processes to use [default: auto]
|
9
8
|
|
10
9
|
Calculate delay and gain metrics of impulse response (IR) files <filename>.wav in IRLOC.
|
@@ -22,7 +21,6 @@ IRLOC directory containing impulse response data in audio files (.wav, .flac, e
|
|
22
21
|
"""
|
23
22
|
|
24
23
|
import glob
|
25
|
-
import signal
|
26
24
|
from os.path import abspath
|
27
25
|
from os.path import basename
|
28
26
|
from os.path import commonprefix
|
@@ -42,18 +40,6 @@ from numpy import fft
|
|
42
40
|
from sonusai.utils import braced_iglob
|
43
41
|
|
44
42
|
|
45
|
-
def signal_handler(_sig, _frame):
|
46
|
-
import sys
|
47
|
-
|
48
|
-
from sonusai import logger
|
49
|
-
|
50
|
-
logger.info("Canceled due to keyboard interrupt")
|
51
|
-
sys.exit(1)
|
52
|
-
|
53
|
-
|
54
|
-
signal.signal(signal.SIGINT, signal_handler)
|
55
|
-
|
56
|
-
|
57
43
|
def tdoa(signal, reference, interp=1, phat=False, fs=1, t_max=None):
|
58
44
|
"""
|
59
45
|
Estimates the shift of array signal with respect to reference
|
@@ -263,7 +249,7 @@ def measure_rt60(h, fs=1, decay_db=60, energy_thres=1.0, plot=False, rt60_tgt=No
|
|
263
249
|
t60_decay = 3 * i_decay20db / fs
|
264
250
|
rt60 = t60_decay - t_5db
|
265
251
|
|
266
|
-
# #
|
252
|
+
# # extrapolate to compute the rt60 decay time from decay_db decay time
|
267
253
|
# decay_time = t_decay - t_5db
|
268
254
|
# est_rt60 = (60 / decay_db) * decay_time
|
269
255
|
|
@@ -297,27 +283,30 @@ def measure_rt60(h, fs=1, decay_db=60, energy_thres=1.0, plot=False, rt60_tgt=No
|
|
297
283
|
return rt60, edt, rt10, rt20, floor
|
298
284
|
|
299
285
|
|
300
|
-
def process_path(path,
|
286
|
+
def process_path(path: str, extensions: list[str] | None = None) -> tuple[list, str | None]:
|
301
287
|
"""
|
302
288
|
Check path which can be a single file, a subdirectory, or a regex
|
303
289
|
return:
|
304
290
|
- a list of files with matching extensions to any in extlist provided (i.e. ['.wav', '.mp3', '.acc'])
|
305
291
|
- the basedir of the path, if
|
306
292
|
"""
|
293
|
+
if extensions is None:
|
294
|
+
extensions = [".wav", ".WAV", ".flac", ".FLAC", ".mp3", ".aac"]
|
295
|
+
|
307
296
|
# Check if the path is a single file, and return it as a list with the dirname
|
308
297
|
if isfile(path):
|
309
|
-
if any(path.endswith(ext) for ext in
|
298
|
+
if any(path.endswith(ext) for ext in extensions):
|
310
299
|
basedir = dirname(path) # base directory
|
311
300
|
if not basedir:
|
312
301
|
basedir = "./"
|
313
302
|
return [path], basedir
|
314
|
-
|
315
|
-
|
303
|
+
|
304
|
+
return [], None
|
316
305
|
|
317
306
|
# Check if the path is a dir, recursively find all files any of the specified extensions, return file list and dir
|
318
307
|
if isdir(path):
|
319
308
|
matching_files = []
|
320
|
-
for ext in
|
309
|
+
for ext in extensions:
|
321
310
|
matching_files.extend(glob.glob(join(path, "**/*" + ext), recursive=True))
|
322
311
|
return matching_files, path
|
323
312
|
|
@@ -326,11 +315,12 @@ def process_path(path, extlist=[".wav", ".WAV", ".flac", ".FLAC", ".mp3", ".aac"
|
|
326
315
|
matching_files = []
|
327
316
|
for file in braced_iglob(pathname=apath, recursive=True):
|
328
317
|
matching_files.append(file)
|
318
|
+
|
329
319
|
if matching_files:
|
330
320
|
basedir = commonprefix(matching_files) # Find basedir
|
331
321
|
return matching_files, basedir
|
332
|
-
|
333
|
-
|
322
|
+
|
323
|
+
return [], None
|
334
324
|
|
335
325
|
|
336
326
|
def _process_ir(pfile: str, irtab_col: list, basedir: str) -> pd.DataFrame:
|
@@ -424,20 +414,19 @@ def _process_ir(pfile: str, irtab_col: list, basedir: str) -> pd.DataFrame:
|
|
424
414
|
def main():
|
425
415
|
from docopt import docopt
|
426
416
|
|
427
|
-
import
|
428
|
-
from
|
417
|
+
from . import __version__ as sai_version
|
418
|
+
from .utils.docstring import trim_docstring
|
429
419
|
|
430
|
-
args = docopt(trim_docstring(__doc__), version=
|
420
|
+
args = docopt(trim_docstring(__doc__), version=sai_version, options_first=True)
|
431
421
|
|
432
|
-
verbose = args["--verbose"]
|
433
422
|
ir_location = args["IRLOC"]
|
434
423
|
num_proc = args["--num_process"]
|
435
424
|
|
436
425
|
import psutil
|
437
426
|
|
438
|
-
from
|
439
|
-
from
|
440
|
-
from
|
427
|
+
from .utils.create_timestamp import create_timestamp
|
428
|
+
from .utils.parallel import par_track
|
429
|
+
from .utils.parallel import track
|
441
430
|
|
442
431
|
# Check location, default ext are ['.wav', '.WAV', '.flac', '.FLAC', '.mp3', '.aac']
|
443
432
|
pfiles, basedir = process_path(ir_location)
|
@@ -446,15 +435,15 @@ def main():
|
|
446
435
|
if pfiles is None or len(pfiles) < 1:
|
447
436
|
print(f"No IR audio files found in {ir_location}, exiting ...")
|
448
437
|
raise SystemExit(1)
|
449
|
-
|
438
|
+
if len(pfiles) == 1:
|
450
439
|
print(f"Found single IR audio file {ir_location} , writing to *-irmetric.txt ...")
|
451
440
|
fbase, ext = splitext(basename(pfiles[0]))
|
452
441
|
wlcsv_name = None
|
453
442
|
txt_fname = str(join(basedir, fbase + "-irmetric.txt"))
|
454
|
-
|
443
|
+
else:
|
455
444
|
print(f"Found {len(pfiles)} files under {basedir} for impulse response metric calculations")
|
456
|
-
txt_fname = str(join(basedir, "ir_metric_summary.txt"))
|
457
445
|
wlcsv_name = str(join(basedir, "ir_metric_list.csv"))
|
446
|
+
txt_fname = str(join(basedir, "ir_metric_summary.txt"))
|
458
447
|
|
459
448
|
num_cpu = psutil.cpu_count()
|
460
449
|
cpu_percent = psutil.cpu_percent(interval=1)
|
@@ -552,4 +541,11 @@ def main():
|
|
552
541
|
|
553
542
|
|
554
543
|
if __name__ == "__main__":
|
555
|
-
|
544
|
+
from sonusai import exception_handler
|
545
|
+
from sonusai.utils import register_keyboard_interrupt
|
546
|
+
|
547
|
+
register_keyboard_interrupt()
|
548
|
+
try:
|
549
|
+
main()
|
550
|
+
except Exception as e:
|
551
|
+
exception_handler(e)
|