sonusai 0.19.10__py3-none-any.whl → 0.20.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sonusai/data/genmixdb.yml CHANGED
@@ -23,7 +23,8 @@ truth_configs: { }
23
23
 
24
24
  asr_manifest: [ ]
25
25
 
26
- target_augmentations: [ ]
26
+ target_augmentations:
27
+ - pre:
27
28
 
28
29
  class_balancing_augmentation:
29
30
  normalize: -3.5
@@ -39,7 +40,8 @@ noises:
39
40
  - "${default_noise}"
40
41
 
41
42
  noise_augmentations:
42
- - normalize: -3.5
43
+ - pre:
44
+ normalize: -3.5
43
45
 
44
46
  snrs:
45
47
  - 99
sonusai/doc/doc.py CHANGED
@@ -329,6 +329,20 @@ See 'augmentations' for details on augmentation rules.
329
329
  # fmt: on
330
330
 
331
331
 
332
+ def doc_target_distortions() -> str:
333
+ import yaml
334
+
335
+ default = f"\nDefault value:\n\n{yaml.dump(get_default_config()['target_distortions'])}"
336
+ # fmt: off
337
+ return """
338
+ 'target_distortions' is a mixture database configuration parameter that
339
+ specifies a list of distortion rules to use for each target.
340
+
341
+ See 'augmentations' for details on distortion rules.
342
+ """ + default
343
+ # fmt: on
344
+
345
+
332
346
  def doc_noises() -> str:
333
347
  default = f"\nDefault value: {get_default_config()['class_balancing']}"
334
348
  # fmt: off
sonusai/ir_metric.py ADDED
@@ -0,0 +1,555 @@
1
+ """sonusai ir_metric
2
+
3
+ usage: ir_metric [-hv] [-n NCPU] IRLOC
4
+
5
+ options:
6
+ -h, --help
7
+ -v, --verbose Be verbose.
8
+ -n, --num_process NCPU Number of parallel processes to use [default: auto]
9
+
10
+ Calculate delay and gain metrics of impulse response (IR) files <filename>.wav in IRLOC.
11
+ Metrics include gain and multiple ways to calculate the IR delay:
12
+ - gmax: max abs(fft(ir,4096))
13
+ - dcc: cross-correlation of ir with pulse train
14
+ - dmax: index of max(ir)
15
+ - dgd: group delay method
16
+ - dcen: centroid of energy
17
+
18
+ Results are written into IRLOC/ir_metrics.txt
19
+
20
+ IRLOC directory containing impulse response data in audio files (.wav, .flac, etc.). Only first channel is analyzed.
21
+
22
+ """
23
+
24
+ import glob
25
+ import signal
26
+ from os.path import abspath
27
+ from os.path import basename
28
+ from os.path import commonprefix
29
+ from os.path import dirname
30
+ from os.path import isdir
31
+ from os.path import isfile
32
+ from os.path import join
33
+ from os.path import relpath
34
+ from os.path import splitext
35
+
36
+ import matplotlib.pyplot as plt
37
+ import numpy as np
38
+ import pandas as pd
39
+ import soundfile
40
+ from numpy import fft
41
+
42
+ from sonusai.utils import braced_iglob
43
+
44
+
45
+ def signal_handler(_sig, _frame):
46
+ import sys
47
+
48
+ from sonusai import logger
49
+
50
+ logger.info("Canceled due to keyboard interrupt")
51
+ sys.exit(1)
52
+
53
+
54
+ signal.signal(signal.SIGINT, signal_handler)
55
+
56
+
57
+ def tdoa(signal, reference, interp=1, phat=False, fs=1, t_max=None):
58
+ """
59
+ Estimates the shift of array signal with respect to reference
60
+ using generalized cross-correlation
61
+
62
+ Parameters
63
+ ----------
64
+ signal: array_like
65
+ The array whose tdoa is measured
66
+ reference: array_like
67
+ The reference array
68
+ interp: int, optional
69
+ The interpolation factor for the output array, default 1.
70
+ phat: bool, optional
71
+ Apply the PHAT weighting (default False)
72
+ fs: int or float, optional
73
+ The sampling frequency of the input arrays, default=1
74
+
75
+ Returns
76
+ -------
77
+ The estimated delay between the two arrays
78
+ """
79
+
80
+ signal = np.array(signal)
81
+ reference = np.array(reference)
82
+
83
+ N1 = signal.shape[0]
84
+ N2 = reference.shape[0]
85
+
86
+ r_12 = correlate(signal, reference, interp=interp, phat=phat)
87
+
88
+ delay = (np.argmax(np.abs(r_12)) / interp - (N2 - 1)) / fs
89
+
90
+ return delay
91
+
92
+
93
+ def correlate(x1, x2, interp=1, phat=False):
94
+ """
95
+ Compute the cross-correlation between x1 and x2
96
+
97
+ Parameters
98
+ ----------
99
+ x1,x2: array_like
100
+ The data arrays
101
+ interp: int, optional
102
+ The interpolation factor for the output array, default 1.
103
+ phat: bool, optional
104
+ Apply the PHAT weighting (default False)
105
+
106
+ Returns
107
+ -------
108
+ The cross-correlation between the two arrays
109
+ """
110
+
111
+ N1 = x1.shape[0]
112
+ N2 = x2.shape[0]
113
+
114
+ N = N1 + N2 - 1
115
+
116
+ X1 = fft.rfft(x1, n=N)
117
+ X2 = fft.rfft(x2, n=N)
118
+
119
+ if phat:
120
+ eps1 = np.mean(np.abs(X1)) * 1e-10
121
+ X1 /= np.abs(X1) + eps1
122
+ eps2 = np.mean(np.abs(X2)) * 1e-10
123
+ X2 /= np.abs(X2) + eps2
124
+
125
+ m = np.minimum(N1, N2)
126
+
127
+ out = fft.irfft(X1 * np.conj(X2), n=int(N * interp))
128
+
129
+ return np.concatenate([out[-interp * (N2 - 1) :], out[: (interp * N1)]])
130
+
131
+
132
+ def hilbert(u):
133
+ # N : fft length
134
+ # M : number of elements to zero out
135
+ # U : DFT of u
136
+ # v : IDFT of H(U)
137
+
138
+ N = len(u)
139
+ # take forward Fourier transform
140
+ U = fft.fft(u)
141
+ M = N - N // 2 - 1
142
+ # zero out negative frequency components
143
+ U[N // 2 + 1 :] = [0] * M
144
+ # double fft energy except @ DC0
145
+ U[1 : N // 2] = 2 * U[1 : N // 2]
146
+ # take inverse Fourier transform
147
+ v = fft.ifft(U)
148
+ return v
149
+
150
+
151
+ def measure_rt60(h, fs=1, decay_db=60, energy_thres=1.0, plot=False, rt60_tgt=None):
152
+ """
153
+ RT60 Measurement Routine (taken/modified from Pyroom acoustics.)
154
+
155
+ Calculates reverberation time of an impulse response using the Schroeder method [1].
156
+ Returns:
157
+ rt60: Reverberation time to -60db (-5db to -65db), will be estimated from rt20 or rt10 if noise floor > -65db
158
+ edt: Early decay time from 0db to -10db
159
+ rt10: Reverberation time to -10db (-5db to -15db)
160
+ rt20: Reverberation time to -20db (-5db to -25db), will be estimated from rt10 if noise floor > -25db
161
+ floor: 0 if noise floor > -10db or energy curve is not a decay
162
+ 1 if noise floor > -15db and edt is measured, but rt10 estimated from entire energy curve length
163
+ 2 if noise -15db > floor > -25db, rt20 is estimated from measured rt10
164
+ 3 if noise -25db > floor > -65db, rt60 is estimated from measured rt20
165
+ 4 if noise floor < -65db, rt60, edt, rt10, rt20 are all measured
166
+ Optionally plots some useful information.
167
+
168
+ Parameters
169
+ ----------
170
+ h: array_like
171
+ The impulse response.
172
+ fs: float or int, optional
173
+ The sampling frequency of h (default to 1, i.e., samples).
174
+ decay_db: float or int, optional
175
+ The decay in decibels for which we actually estimate the slope and time.
176
+ Although we want to estimate the RT60, it might not be practical. Instead,
177
+ we measure the RT10, RT20 or RT30 and extrapolate to RT60.
178
+ energy_thres: float
179
+ This should be a value between 0.0 and 1.0.
180
+ If provided, the fit will be done using a fraction energy_thres of the
181
+ whole energy. This is useful when there is a long noisy tail for example.
182
+ plot: bool, optional
183
+ If set to ``True``, the power decay and different estimated values will
184
+ be plotted (default False).
185
+ rt60_tgt: float
186
+ This parameter can be used to indicate a target RT60 to which we want
187
+ to compare the estimated value.
188
+
189
+ References
190
+ ----------
191
+
192
+ [1] M. R. Schroeder, "New Method of Measuring Reverberation Time,"
193
+ J. Acoust. Soc. Am., vol. 37, no. 3, pp. 409-412, Mar. 1968.
194
+ """
195
+
196
+ h = np.array(h)
197
+ fs = float(fs)
198
+ h = np.abs(hilbert(h)) # hilbert from scratch, see above
199
+
200
+ # The power of the impulse response in dB
201
+ power = h**2
202
+ # Backward energy integration according to Schroeder
203
+ energy = np.cumsum(power[::-1])[::-1] # Integration according to Schroeder
204
+
205
+ if energy_thres < 1.0:
206
+ assert 0.0 < energy_thres < 1.0
207
+ energy -= energy[0] * (1.0 - energy_thres)
208
+ energy = np.maximum(energy, 0.0)
209
+
210
+ # remove the possibly all zero tail
211
+ i_nz = np.max(np.where(energy > 0)[0])
212
+ energy = energy[:i_nz]
213
+ energy_db = 10 * np.log10(energy)
214
+ energy_db -= energy_db[0] # normalize to first sample assuming it's the peak
215
+
216
+ min_energy_db = -np.min(energy_db)
217
+ if min_energy_db - 5 < decay_db:
218
+ decay_db = min_energy_db
219
+
220
+ # -5 dB headroom
221
+ try:
222
+ i_5db = np.min(np.where(energy_db < -5)[0])
223
+ except ValueError:
224
+ floor = 0
225
+ return 0.0, 0.0, 0.0, 0.0, floor # failed, energy curve is not a decay, or has noise floor tail above -5db
226
+ e_5db = energy_db[i_5db]
227
+ t_5db = i_5db / fs # This is the initial decay to -5db, used as start of decay slope measurements
228
+
229
+ # Estimate slope from 0db to -10db - this is also known as EDT (early decay time)
230
+ try:
231
+ i_10db = np.min(np.where(energy_db < -10)[0])
232
+ except ValueError:
233
+ floor = 0
234
+ return 0.0, 0.0, 0.0, 0.0, floor # failed, energy curve is not a decay, or noise floor tail above -10db
235
+ e_10db = energy_db[i_10db]
236
+ edt = i_10db / fs # this is also known as EDT (early decay time)
237
+
238
+ # after initial decay, estimate RT10, RT20, RT60
239
+ try:
240
+ i_decay10db = np.min(np.where(energy_db < -5 - 10)[0])
241
+ except ValueError:
242
+ floor = 1
243
+ i_decay10db = len(energy_db) # noise floor tail is above -15db, use entire curve
244
+ t10_decay = i_decay10db / fs
245
+ rt10 = t10_decay - t_5db
246
+
247
+ try:
248
+ i_decay20db = np.min(np.where(energy_db < -5 - 20)[0])
249
+ except ValueError:
250
+ floor = 2
251
+ i_decay20db = len(energy_db) # noise floor tail is above -20db, use entire curve
252
+ t20_decay = i_decay20db / fs
253
+ rt20 = t20_decay - t_5db
254
+
255
+ try:
256
+ i_decay60db = np.min(np.where(energy_db < -5 - 60)[0])
257
+ t60_decay = i_decay60db / fs
258
+ rt60 = t60_decay - t_5db
259
+ floor = 4
260
+ except ValueError:
261
+ floor = 3
262
+ i_decay60db = len(energy_db) # noise floor tail is above -60db, use t20_decay to estimate
263
+ t60_decay = 3 * i_decay20db / fs
264
+ rt60 = t60_decay - t_5db
265
+
266
+ # # extropolate to compute the rt60 decay time from decay_db decay time
267
+ # decay_time = t_decay - t_5db
268
+ # est_rt60 = (60 / decay_db) * decay_time
269
+
270
+ if plot:
271
+ # Remove clip power below to minimum energy (for plotting purpose mostly)
272
+ energy_min = energy[-1]
273
+ energy_db_min = energy_db[-1]
274
+ power[power < energy[-1]] = energy_min
275
+ power_db = 10 * np.log10(power)
276
+ power_db -= np.max(power_db)
277
+
278
+ # time vector
279
+ def get_time(x, fs):
280
+ return np.arange(x.shape[0]) / fs - i_5db / fs
281
+
282
+ T = get_time(power_db, fs)
283
+
284
+ # plot power and energy
285
+ plt.plot(get_time(energy_db, fs), energy_db, label="Energy")
286
+
287
+ # now the linear fit
288
+ plt.plot([0, rt60], [e_5db, -65], "--", label="Linear Fit")
289
+ plt.plot(T, np.ones_like(T) * -60, "--", label="-60 dB")
290
+ plt.vlines(rt60, energy_db_min, 0, linestyles="dashed", label="Estimated RT60")
291
+
292
+ if rt60_tgt is not None:
293
+ plt.vlines(rt60_tgt, energy_db_min, 0, label="Target RT60")
294
+
295
+ plt.legend()
296
+
297
+ return rt60, edt, rt10, rt20, floor
298
+
299
+
300
+ def process_path(path, extlist=[".wav", ".WAV", ".flac", ".FLAC", ".mp3", ".aac"]):
301
+ """
302
+ Check path which can be a single file, a subdirectory, or a regex
303
+ return:
304
+ - a list of files with matching extensions to any in extlist provided (i.e. ['.wav', '.mp3', '.acc'])
305
+ - the basedir of the path, if
306
+ """
307
+ # Check if the path is a single file, and return it as a list with the dirname
308
+ if isfile(path):
309
+ if any(path.endswith(ext) for ext in extlist):
310
+ basedir = dirname(path) # base directory
311
+ if not basedir:
312
+ basedir = "./"
313
+ return [path], basedir
314
+ else:
315
+ return [], []
316
+
317
+ # Check if the path is a dir, recursively find all files any of the specified extensions, return file list and dir
318
+ if isdir(path):
319
+ matching_files = []
320
+ for ext in extlist:
321
+ matching_files.extend(glob.glob(join(path, "**/*" + ext), recursive=True))
322
+ return matching_files, path
323
+
324
+ # Process as a regex, return list of filenames and basedir
325
+ apath = abspath(path) # join(abspath(path), "**", "*.{wav,flac,WAV}")
326
+ matching_files = []
327
+ for file in braced_iglob(pathname=apath, recursive=True):
328
+ matching_files.append(file)
329
+ if matching_files:
330
+ basedir = commonprefix(matching_files) # Find basedir
331
+ return matching_files, basedir
332
+ else:
333
+ return [], []
334
+
335
+
336
+ def _process_ir(pfile: str, irtab_col: list, basedir: str) -> pd.DataFrame:
337
+ # 1) Read ir audio file, and calc basic stats
338
+ ir_fname = pfile[1] # abs_path
339
+ irwav, sample_rate = soundfile.read(ir_fname)
340
+ if irwav.ndim == 2:
341
+ irwav = irwav[:, 0] # Only first channel of multi-channel
342
+ duration = len(irwav) / sample_rate
343
+ srk = sample_rate / 1000
344
+ ir_basename = relpath(ir_fname, basedir)
345
+
346
+ # 2) Compute delay via autocorrelation (not working - always zero, use interplated tdoa instead)
347
+ # ar = np.correlate(irwav, irwav, mode='same')
348
+ # acdelay_index = np.argmax(ar)
349
+ # dacc= acdelay_index - len(ar) // 2 # Center the delay around 0 of 'same' mode
350
+
351
+ # 3) Compute delay via max argument - find the peak
352
+ peak_index = np.argmax(irwav)
353
+ peak_value = irwav[peak_index]
354
+ dmax = peak_index
355
+
356
+ # 4) Calculate cross-correlation with white gaussian noise ref (ssame as pyrooma.tdoa() with interp=1)
357
+ np.random.seed(42)
358
+ wgn_ref = np.random.normal(0, 0.2, int(np.ceil(0.05 * sample_rate))) # (mean,std_dev,length)
359
+ wgn_conv = np.convolve(irwav, wgn_ref)
360
+ wgn_corr = np.correlate(wgn_conv, wgn_ref, mode="full") # Compute cross-correlation
361
+ delay_index = np.argmax(np.abs(wgn_corr)) # Find the delay (need abs??, yes)
362
+ dcc = delay_index - len(wgn_ref) + 1 # Adjust for the mode='full' shift
363
+ # GCC with PHAT weighting known to be best, but does seem to mismatch dcc, dmax more frequently
364
+ dtdoa = tdoa(wgn_conv, wgn_ref, interp=16, phat=True)
365
+ gdccmax = np.max(np.abs(wgn_conv)) / np.max(np.abs(wgn_ref)) # gain of max value
366
+
367
+ # # 4b) Calculate cross-correlation with chirp 20Hz-20KHz
368
+ # t_end = 2 # 1s
369
+ # t = np.linspace(0, t_end, int(t_end * sample_rate))
370
+ # k = (20 - 20000) / t_end
371
+ # chrp_phase = 2 * np.pi * (20 * t + 0.5 * k * t ** 2)
372
+ # chrp = np.cos(chrp_phase)
373
+ # chrp_convout = np.convolve(irwav,chrp)
374
+ # chrp_corr = np.correlate(chrp_convout, chrp, mode='full') # Compute cross-correlation
375
+ # chrp_delay_idx = np.argmax(np.abs(chrp_corr))
376
+ # dcchr = chrp_delay_idx - len(chrp) + 1
377
+ # dtdoachr = tdoa(chrp_convout, chrp, interp=16, phat=False)
378
+ # gdcchrmax = np.max(np.abs(chrp_convout)) / np.max(np.abs(chrp))
379
+ # #sin_ref = np.sin(2 * np.pi * 500/sample_rate * np.arange(0,sample_rate))
380
+
381
+ # # Create a pulse train alternating +1, -1, ... of width PW, spacing PS_ms
382
+ # PS = int(0.010 * sample_rate) # Spacing between pulses in sec (to samples)
383
+ # PW = 5 # Pulse width in samples, make sure < PS
384
+ # PTLEN = int(1 * sample_rate) # Length in sec (to samples)
385
+ # #sample_vec = np.arange(PTLEN)
386
+ #
387
+ # # Construct the pulse train
388
+ # ptrain_ref = np.zeros(PTLEN)
389
+ # polarity = 1
390
+ # for i in range(0, PTLEN, PS):
391
+ # if polarity == 1:
392
+ # ptrain_ref[i:(i + PW)] = 1
393
+ # polarity = -1
394
+ # else:
395
+ # ptrain_ref[i:(i + PW)] = -1
396
+ # polarity = 1
397
+ #
398
+ # pt_convout = np.convolve(irwav,ptrain_ref)
399
+ # pt_corr = np.correlate(pt_convout, ptrain_ref, mode='full') # Compute cross-correlation
400
+ # pt_delay_idx = np.argmax(np.abs(pt_corr))
401
+ # dcc = pt_delay_idx - len(ptrain_ref) + 1
402
+ # dtdoa = tdoa(pt_convout, ptrain_ref, interp=16, phat=True)
403
+ # gdccptmax = np.max(np.abs(pt_convout)) / np.max(np.abs(ptrain_ref))
404
+
405
+ # 5) Calculate delay using group_delay method
406
+ fft_size = len(irwav)
407
+ H = np.fft.fft(irwav, n=fft_size)
408
+ phase = np.unwrap(np.angle(H))
409
+ freq = np.fft.fftfreq(fft_size) # in samples, using d=1/sampling_rate=1
410
+ group_delay = -np.gradient(phase) / (2 * np.pi * np.gradient(freq))
411
+ dagd = np.mean(group_delay[np.isfinite(group_delay)]) # Average group delay
412
+ gmax = max(np.abs(H))
413
+
414
+ rt60, edt, rt10, rt20, nfloor = measure_rt60(irwav, sample_rate, plot=False)
415
+
416
+ # 4) Tabulate metrics as single row in table of scalar metrics per mixture
417
+ # irtab_col = ["dmax", "dcc", "dccphat", "dagd", "gdccmax", "rt20", "rt60", "max", "min", "gmax", "dur", "sr", "irfile"]
418
+ metr1 = [dmax, dcc, dtdoa, dagd, gdccmax, rt20, rt60, peak_value, min(irwav), gmax, duration, srk, ir_basename]
419
+ mtab1 = pd.DataFrame([metr1], columns=irtab_col, index=[pfile[0]]) # return tuple of dataframe
420
+
421
+ return mtab1
422
+
423
+
424
+ def main():
425
+ from docopt import docopt
426
+
427
+ import sonusai
428
+ from sonusai.utils import trim_docstring
429
+
430
+ args = docopt(trim_docstring(__doc__), version=sonusai.__version__, options_first=True)
431
+
432
+ verbose = args["--verbose"]
433
+ ir_location = args["IRLOC"]
434
+ num_proc = args["--num_process"]
435
+
436
+ import psutil
437
+
438
+ from sonusai.utils import create_timestamp
439
+ from sonusai.utils import par_track
440
+ from sonusai.utils import track
441
+
442
+ # Check location, default ext are ['.wav', '.WAV', '.flac', '.FLAC', '.mp3', '.aac']
443
+ pfiles, basedir = process_path(ir_location)
444
+ pfiles = sorted(pfiles, key=basename)
445
+
446
+ if pfiles is None or len(pfiles) < 1:
447
+ print(f"No IR audio files found in {ir_location}, exiting ...")
448
+ raise SystemExit(1)
449
+ elif len(pfiles) == 1:
450
+ print(f"Found single IR audio file {ir_location} , writing to *-irmetric.txt ...")
451
+ fbase, ext = splitext(basename(pfiles[0]))
452
+ wlcsv_name = None
453
+ txt_fname = str(join(basedir, fbase + "-irmetric.txt"))
454
+ elif len(pfiles) > 1:
455
+ print(f"Found {len(pfiles)} files under {basedir} for impulse response metric calculations")
456
+ txt_fname = str(join(basedir, "ir_metric_summary.txt"))
457
+ wlcsv_name = str(join(basedir, "ir_metric_list.csv"))
458
+
459
+ num_cpu = psutil.cpu_count()
460
+ cpu_percent = psutil.cpu_percent(interval=1)
461
+ print(f"#CPUs: {num_cpu}, current CPU utilization: {cpu_percent}%")
462
+ print(f"Memory utilization: {psutil.virtual_memory().percent}%")
463
+ if num_proc == "auto":
464
+ use_cpu = int(num_cpu * (0.9 - cpu_percent / 100)) # default use 80% of available cpus
465
+ elif num_proc == "None":
466
+ use_cpu = None
467
+ else:
468
+ use_cpu = min(max(int(num_proc), 1), num_cpu)
469
+
470
+ timestamp = create_timestamp()
471
+ # Individual mixtures use pandas print, set precision to 2 decimal places
472
+ # pd.set_option('float_format', '{:.2f}'.format)
473
+ print(f"Calculating metrics for {len(pfiles)} impulse response files using {use_cpu} parallel processes ...")
474
+ progress = track(total=len(pfiles))
475
+ if use_cpu is None or len(pfiles) == 1:
476
+ no_par = True
477
+ num_cpus = None
478
+ else:
479
+ no_par = False
480
+ num_cpus = use_cpu
481
+
482
+ from functools import partial
483
+
484
+ # Setup pandas table for summarizing ir metrics
485
+ irtab_col = [
486
+ "dmax",
487
+ "dcc",
488
+ "dccphat",
489
+ "dagd",
490
+ "gdccmax",
491
+ "rt20",
492
+ "rt60",
493
+ "max",
494
+ "min",
495
+ "gmax",
496
+ "dur",
497
+ "sr",
498
+ "irfile",
499
+ ]
500
+ idx = range(len(pfiles))
501
+ llfiles = list(zip(idx, pfiles, strict=False))
502
+
503
+ all_metrics_tables = par_track(
504
+ partial(
505
+ _process_ir,
506
+ irtab_col=irtab_col,
507
+ basedir=basedir,
508
+ ),
509
+ llfiles,
510
+ progress=progress,
511
+ num_cpus=num_cpus,
512
+ no_par=no_par,
513
+ )
514
+ progress.close()
515
+
516
+ # progress = tqdm(total=len(pfiles), desc='ir_metric')
517
+ # if use_cpu is None:
518
+ # all_metrics_tab = pp_tqdm_imap(_process_mixture, pfiles, progress=progress, no_par=True)
519
+ # else:
520
+ # all_metrics_tab = pp_tqdm_imap(_process_mixture, pfiles, progress=progress, num_cpus=use_cpu)
521
+ # progress.close()
522
+
523
+ header_args = {
524
+ "mode": "a",
525
+ "encoding": "utf-8",
526
+ "index": False,
527
+ "header": False,
528
+ }
529
+ table_args = {
530
+ "mode": "a",
531
+ "encoding": "utf-8",
532
+ }
533
+
534
+ all_metrics_tab = pd.concat([item for item in all_metrics_tables]) # already sorted by truth filename via idx
535
+ mtabsort = all_metrics_tab.sort_values(by=["irfile"])
536
+
537
+ # Write list to .csv
538
+ if wlcsv_name:
539
+ pd.DataFrame([["Timestamp", timestamp]]).to_csv(wlcsv_name, header=False, index=False)
540
+ pd.DataFrame([f"IR metric list for {ir_location}:"]).to_csv(wlcsv_name, mode="a", header=False, index=False)
541
+ mtabsort.round(2).to_csv(wlcsv_name, **table_args)
542
+
543
+ # Write summary and list to .txt
544
+ with open(txt_fname, "w") as f:
545
+ print(f"Timestamp: {timestamp}", file=f)
546
+ print(f"IR metrics stats over {len(llfiles)} files:", file=f)
547
+ print(mtabsort.describe().round(3).T.to_string(float_format=lambda x: f"{x:.3f}", index=True), file=f)
548
+ print("", file=f)
549
+ print("", file=f)
550
+ print([f"IR metric list for {ir_location}:"], file=f)
551
+ print(mtabsort.round(3).to_string(), file=f)
552
+
553
+
554
+ if __name__ == "__main__":
555
+ main()
@@ -137,7 +137,9 @@ def main() -> None:
137
137
  print(f"Could not open SonusAI mixture database in {location}, exiting ...")
138
138
  return
139
139
 
140
- metrics_present = mixdb.cached_metrics()
140
+ # Only check first and last mixture in order to save time
141
+ metrics_present = mixdb.cached_metrics([0, mixdb.num_mixtures - 1])
142
+
141
143
  num_metrics_present = len(metrics_present)
142
144
  if num_metrics_present < 1:
143
145
  print(f"mixdb reports no pre-generated metrics are present. Nothing to summarize in {location}, exiting ...")
@@ -150,7 +152,7 @@ def main() -> None:
150
152
  create_file_handler(join(location, "metrics_summary.log"))
151
153
  update_console_handler(verbose)
152
154
  initial_log_messages("metrics_summary")
153
- logger.info(f"Logging summary of SonusAI mixture db at {location}")
155
+ logger.info(f"Logging summary of SonusAI mixture database at {location}")
154
156
  else:
155
157
  update_console_handler(verbose)
156
158
 
@@ -164,7 +166,7 @@ def main() -> None:
164
166
  fsuffix = f"_s{len(mixids)}t{mixdb.num_mixtures}"
165
167
  else:
166
168
  logger.info(
167
- f"Summarizing SonusAI mixture db with {mixdb.num_mixtures} mixtures "
169
+ f"Summarizing SonusAI mixture database with {mixdb.num_mixtures} mixtures "
168
170
  f"and {num_metrics_present} pre-generated metrics ..."
169
171
  )
170
172
  fsuffix = ""
@@ -5,6 +5,7 @@ from .audio import get_duration
5
5
  from .audio import get_next_noise
6
6
  from .audio import get_num_samples
7
7
  from .audio import get_sample_rate
8
+ from .audio import raw_read_audio
8
9
  from .audio import read_audio
9
10
  from .audio import read_ir
10
11
  from .audio import validate_input_file
@@ -53,7 +54,9 @@ from .datatypes import AudioF
53
54
  from .datatypes import AudioStatsMetrics
54
55
  from .datatypes import AudioT
55
56
  from .datatypes import Augmentation
57
+ from .datatypes import AugmentationEffects
56
58
  from .datatypes import AugmentationRule
59
+ from .datatypes import AugmentationRuleEffects
57
60
  from .datatypes import AugmentedTarget
58
61
  from .datatypes import ClassCount
59
62
  from .datatypes import EnergyF
@@ -111,10 +114,10 @@ from .helpers import get_transform_from_audio
111
114
  from .helpers import inverse_transform
112
115
  from .helpers import mixture_metadata
113
116
  from .helpers import write_mixture_metadata
117
+ from .ir_delay import get_impulse_response_delay
114
118
  from .log_duration_and_sizes import log_duration_and_sizes
115
119
  from .mixdb import MixtureDatabase
116
120
  from .mixdb import db_file
117
- from .sox_audio import Transformer
118
121
  from .spectral_mask import apply_spectral_mask
119
122
  from .target_class_balancing import balance_targets
120
123
  from .targets import get_augmented_target_ids_by_class