sonusai 0.19.10__py3-none-any.whl → 0.20.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sonusai/data/genmixdb.yml +4 -2
- sonusai/doc/doc.py +14 -0
- sonusai/ir_metric.py +555 -0
- sonusai/metrics_summary.py +5 -3
- sonusai/mixture/__init__.py +4 -1
- sonusai/mixture/audio.py +103 -12
- sonusai/mixture/augmentation.py +199 -84
- sonusai/mixture/config.py +9 -4
- sonusai/mixture/constants.py +0 -1
- sonusai/mixture/datatypes.py +19 -10
- sonusai/mixture/generation.py +11 -12
- sonusai/mixture/helpers.py +20 -23
- sonusai/mixture/ir_delay.py +63 -0
- sonusai/mixture/mixdb.py +103 -19
- sonusai/mixture/targets.py +3 -6
- sonusai/utils/__init__.py +2 -0
- sonusai/utils/temp_seed.py +13 -0
- {sonusai-0.19.10.dist-info → sonusai-0.20.2.dist-info}/METADATA +2 -2
- {sonusai-0.19.10.dist-info → sonusai-0.20.2.dist-info}/RECORD +21 -23
- {sonusai-0.19.10.dist-info → sonusai-0.20.2.dist-info}/WHEEL +1 -1
- sonusai/mixture/soundfile_audio.py +0 -130
- sonusai/mixture/sox_audio.py +0 -476
- sonusai/mixture/sox_augmentation.py +0 -136
- sonusai/mixture/torchaudio_audio.py +0 -106
- sonusai/mixture/torchaudio_augmentation.py +0 -109
- {sonusai-0.19.10.dist-info → sonusai-0.20.2.dist-info}/entry_points.txt +0 -0
sonusai/mixture/generation.py
CHANGED
@@ -93,7 +93,8 @@ def initialize_db(location: str, test: bool = False) -> None:
|
|
93
93
|
CREATE TABLE impulse_response_file (
|
94
94
|
id INTEGER PRIMARY KEY NOT NULL,
|
95
95
|
file TEXT NOT NULL,
|
96
|
-
tags TEXT NOT NULL
|
96
|
+
tags TEXT NOT NULL,
|
97
|
+
delay INTEGER NOT NULL)
|
97
98
|
""")
|
98
99
|
|
99
100
|
con.execute("""
|
@@ -360,11 +361,12 @@ def populate_impulse_response_file_table(
|
|
360
361
|
|
361
362
|
con = db_connection(location=location, readonly=False, test=test)
|
362
363
|
con.executemany(
|
363
|
-
"INSERT INTO impulse_response_file (file, tags) VALUES (?, ?)",
|
364
|
+
"INSERT INTO impulse_response_file (file, tags, delay) VALUES (?, ?, ?)",
|
364
365
|
[
|
365
366
|
(
|
366
367
|
impulse_response_file.file,
|
367
368
|
json.dumps(impulse_response_file.tags),
|
369
|
+
impulse_response_file.delay,
|
368
370
|
)
|
369
371
|
for impulse_response_file in impulse_response_files
|
370
372
|
],
|
@@ -529,17 +531,13 @@ def update_mixture(mixdb: MixtureDatabase, mixture: Mixture, with_data: bool = F
|
|
529
531
|
|
530
532
|
def _augmented_noise_audio(mixdb: MixtureDatabase, mixture: Mixture) -> AudioT:
|
531
533
|
from .audio import read_audio
|
532
|
-
from .audio import read_ir
|
533
534
|
from .augmentation import apply_augmentation
|
534
|
-
from .augmentation import apply_impulse_response
|
535
535
|
|
536
536
|
noise = mixdb.noise_file(mixture.noise.file_id)
|
537
537
|
noise_augmentation = mixture.noise.augmentation
|
538
538
|
|
539
539
|
audio = read_audio(noise.name)
|
540
|
-
audio = apply_augmentation(audio, noise_augmentation)
|
541
|
-
if noise_augmentation.ir is not None:
|
542
|
-
audio = apply_impulse_response(audio, read_ir(mixdb.impulse_response_file(noise_augmentation.ir))) # pyright: ignore [reportArgumentType]
|
540
|
+
audio = apply_augmentation(mixdb, audio, noise_augmentation.pre)
|
543
541
|
|
544
542
|
return audio
|
545
543
|
|
@@ -553,8 +551,9 @@ def _initialize_targets_audio(mixdb: MixtureDatabase, mixture: Mixture) -> tuple
|
|
553
551
|
target_audio = mixdb.read_target_audio(target.file_id)
|
554
552
|
targets_audio.append(
|
555
553
|
apply_augmentation(
|
554
|
+
mixdb=mixdb,
|
556
555
|
audio=target_audio,
|
557
|
-
augmentation=target.augmentation,
|
556
|
+
augmentation=target.augmentation.pre,
|
558
557
|
frame_length=mixdb.feature_step_samples,
|
559
558
|
)
|
560
559
|
)
|
@@ -680,7 +679,7 @@ def _exhaustive_noise_mix(
|
|
680
679
|
noise_offset = 0
|
681
680
|
noise_length = estimate_augmented_length_from_length(
|
682
681
|
length=noise_files[noise_file_id].samples,
|
683
|
-
tempo=noise_augmentation.tempo,
|
682
|
+
tempo=noise_augmentation.pre.tempo,
|
684
683
|
)
|
685
684
|
|
686
685
|
for augmented_target_ids_for_mixup in augmented_target_ids_for_mixups:
|
@@ -922,7 +921,7 @@ def _get_next_noise_indices(
|
|
922
921
|
|
923
922
|
noise_augmentation = augmentation_from_rule(noise_augmentations[noise_augmentation_id], num_ir)
|
924
923
|
noise_length = estimate_augmented_length_from_length(
|
925
|
-
length=noise_files[noise_file_id].samples, tempo=noise_augmentation.tempo
|
924
|
+
length=noise_files[noise_file_id].samples, tempo=noise_augmentation.pre.tempo
|
926
925
|
)
|
927
926
|
return noise_file_id, noise_augmentation_id, noise_augmentation, noise_length
|
928
927
|
|
@@ -946,7 +945,7 @@ def _get_next_noise_offset(
|
|
946
945
|
|
947
946
|
noise_augmentation = augmentation_from_rule(noise_augmentations[noise_file_id], num_ir)
|
948
947
|
noise_length = estimate_augmented_length_from_length(
|
949
|
-
length=noise_files[noise_file_id].samples, tempo=noise_augmentation.tempo
|
948
|
+
length=noise_files[noise_file_id].samples, tempo=noise_augmentation.pre.tempo
|
950
949
|
)
|
951
950
|
if noise_offset + target_length >= noise_length:
|
952
951
|
if noise_offset == 0:
|
@@ -987,7 +986,7 @@ def _get_target_info(
|
|
987
986
|
target_length = max(
|
988
987
|
estimate_augmented_length_from_length(
|
989
988
|
length=target_files[tfi].samples,
|
990
|
-
tempo=target_augmentation.tempo,
|
989
|
+
tempo=target_augmentation.pre.tempo,
|
991
990
|
frame_length=feature_step_samples,
|
992
991
|
),
|
993
992
|
target_length,
|
sonusai/mixture/helpers.py
CHANGED
@@ -117,11 +117,11 @@ def mixture_all_speech_metadata(mixdb: MixtureDatabase, mixture: Mixture) -> lis
|
|
117
117
|
# Check for tempo augmentation and adjust Interval start and end data as needed
|
118
118
|
entries = []
|
119
119
|
for entry in item:
|
120
|
-
if target.augmentation.tempo is not None:
|
120
|
+
if target.augmentation.pre.tempo is not None:
|
121
121
|
entries.append(
|
122
122
|
Interval(
|
123
|
-
entry.start / target.augmentation.tempo,
|
124
|
-
entry.end / target.augmentation.tempo,
|
123
|
+
entry.start / target.augmentation.pre.tempo,
|
124
|
+
entry.end / target.augmentation.pre.tempo,
|
125
125
|
entry.label,
|
126
126
|
)
|
127
127
|
)
|
@@ -153,10 +153,8 @@ def mixture_metadata(mixdb: MixtureDatabase, m_id: int | None = None, mixture: M
|
|
153
153
|
speech_metadata = mixture_all_speech_metadata(mixdb, mixture)
|
154
154
|
for mi, target in enumerate(mixture.targets):
|
155
155
|
target_file = mixdb.target_file(target.file_id)
|
156
|
-
target_augmentation = target.augmentation
|
157
156
|
metadata += f"target {mi} name: {target_file.name}\n"
|
158
157
|
metadata += f"target {mi} augmentation: {target.augmentation.to_dict()}\n"
|
159
|
-
metadata += f"target {mi} ir: {mixdb.impulse_response_file(target_augmentation.ir)}\n"
|
160
158
|
metadata += f"target {mi} target_gain: {target.gain if not mixture.is_noise_only else 0}\n"
|
161
159
|
metadata += f"target {mi} class indices: {target_file.class_indices}\n"
|
162
160
|
for key in target_file.truth_configs:
|
@@ -168,7 +166,6 @@ def mixture_metadata(mixdb: MixtureDatabase, m_id: int | None = None, mixture: M
|
|
168
166
|
noise_augmentation = mixture.noise.augmentation
|
169
167
|
metadata += f"noise name: {noise.name}\n"
|
170
168
|
metadata += f"noise augmentation: {noise_augmentation.to_dict()}\n"
|
171
|
-
metadata += f"noise ir: {mixdb.impulse_response_file(noise_augmentation.ir)}\n"
|
172
169
|
metadata += f"noise offset: {mixture.noise_offset}\n"
|
173
170
|
metadata += f"snr: {mixture.snr}\n"
|
174
171
|
metadata += f"random_snr: {mixture.snr.is_random}\n"
|
@@ -260,33 +257,33 @@ def to_target(entry: TargetRecord) -> Target:
|
|
260
257
|
)
|
261
258
|
|
262
259
|
|
263
|
-
def get_target(mixdb: MixtureDatabase, mixture: Mixture, targets_audio: list[AudioT]
|
260
|
+
def get_target(mixdb: MixtureDatabase, mixture: Mixture, targets_audio: list[AudioT]) -> AudioT:
|
264
261
|
"""Get the augmented target audio data for the given mixture record
|
265
262
|
|
266
263
|
:param mixdb: Mixture database
|
267
264
|
:param mixture: Mixture record
|
268
265
|
:param targets_audio: List of augmented target audio data (one per target in the mixup)
|
269
|
-
:param use_cache: If true, use LRU caching
|
270
266
|
:return: Sum of augmented target audio data
|
271
267
|
"""
|
272
|
-
# Apply
|
268
|
+
# Apply post-truth augmentation effects to targets and sum
|
273
269
|
import numpy as np
|
274
270
|
|
275
|
-
from .
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
271
|
+
from .augmentation import apply_augmentation
|
272
|
+
|
273
|
+
targets_post = []
|
274
|
+
for idx, target_audio in enumerate(targets_audio):
|
275
|
+
target = mixture.targets[idx]
|
276
|
+
targets_post.append(
|
277
|
+
apply_augmentation(
|
278
|
+
mixdb=mixdb,
|
279
|
+
audio=target_audio,
|
280
|
+
augmentation=target.augmentation.post,
|
281
|
+
frame_length=mixdb.feature_step_samples,
|
284
282
|
)
|
285
|
-
|
286
|
-
targets_ir.append(target)
|
283
|
+
)
|
287
284
|
|
288
285
|
# Return sum of targets
|
289
|
-
return np.sum(
|
286
|
+
return np.sum(targets_post, axis=0)
|
290
287
|
|
291
288
|
|
292
289
|
def get_transform_from_audio(audio: AudioT, transform: ForwardTransform) -> tuple[AudioF, EnergyT]:
|
@@ -400,7 +397,7 @@ def augmented_target_samples(
|
|
400
397
|
[
|
401
398
|
estimate_augmented_length_from_length(
|
402
399
|
length=target_files[fi].samples,
|
403
|
-
tempo=target_augmentations[ai].tempo,
|
400
|
+
tempo=target_augmentations[ai].pre.tempo,
|
404
401
|
frame_length=feature_step_samples,
|
405
402
|
)
|
406
403
|
for fi, ai in it
|
@@ -420,7 +417,7 @@ def augmented_noise_samples(noise_files: list[NoiseFile], noise_augmentations: l
|
|
420
417
|
def augmented_noise_length(noise_file: NoiseFile, noise_augmentation: Augmentation) -> int:
|
421
418
|
from .augmentation import estimate_augmented_length_from_length
|
422
419
|
|
423
|
-
return estimate_augmented_length_from_length(length=noise_file.samples, tempo=noise_augmentation.tempo)
|
420
|
+
return estimate_augmented_length_from_length(length=noise_file.samples, tempo=noise_augmentation.pre.tempo)
|
424
421
|
|
425
422
|
|
426
423
|
def get_textgrid_tier_from_target_file(target_file: str, tier: str) -> SpeechMetadata | None:
|
@@ -0,0 +1,63 @@
|
|
1
|
+
import numpy as np
|
2
|
+
|
3
|
+
|
4
|
+
def get_impulse_response_delay(file: str) -> int:
|
5
|
+
from sonusai.utils import temp_seed
|
6
|
+
|
7
|
+
from .audio import raw_read_audio
|
8
|
+
|
9
|
+
ir, sample_rate = raw_read_audio(file)
|
10
|
+
|
11
|
+
with temp_seed(42):
|
12
|
+
wgn_ref = np.random.normal(loc=0, scale=0.2, size=int(np.ceil(0.05 * sample_rate))).astype(np.float32)
|
13
|
+
|
14
|
+
wgn_conv = np.convolve(ir, wgn_ref)
|
15
|
+
|
16
|
+
return int(np.round(tdoa(wgn_conv, wgn_ref, interp=16, phat=True)))
|
17
|
+
|
18
|
+
|
19
|
+
def tdoa(signal: np.ndarray, reference: np.ndarray, interp: int = 1, phat: bool = False, fs: int | float = 1) -> float:
|
20
|
+
"""Estimates the shift of array signal with respect to reference using generalized cross-correlation.
|
21
|
+
|
22
|
+
:param signal: The array whose tdoa is measured
|
23
|
+
:param reference: The reference array
|
24
|
+
:param interp: Interpolation factor for the output array
|
25
|
+
:param phat: Apply the PHAT weighting
|
26
|
+
:param fs: The sampling frequency of the input arrays
|
27
|
+
:return: The estimated delay between the two arrays
|
28
|
+
"""
|
29
|
+
n_reference = reference.shape[0]
|
30
|
+
|
31
|
+
r_12 = correlate(signal, reference, interp=interp, phat=phat)
|
32
|
+
|
33
|
+
delay = (np.argmax(np.abs(r_12)) / interp - (n_reference - 1)) / fs
|
34
|
+
|
35
|
+
return float(delay)
|
36
|
+
|
37
|
+
|
38
|
+
def correlate(x1: np.ndarray, x2: np.ndarray, interp: int = 1, phat: bool = False) -> np.ndarray:
|
39
|
+
"""Compute the cross-correlation between x1 and x2
|
40
|
+
|
41
|
+
:param x1: Input array 1
|
42
|
+
:param x2: Input array 2
|
43
|
+
:param interp: Interpolation factor for the output array
|
44
|
+
:param phat: Apply the PHAT weighting
|
45
|
+
:return: The cross-correlation between the two arrays
|
46
|
+
"""
|
47
|
+
n_x1 = x1.shape[0]
|
48
|
+
n_x2 = x2.shape[0]
|
49
|
+
|
50
|
+
n = n_x1 + n_x2 - 1
|
51
|
+
|
52
|
+
fft1 = np.fft.rfft(x1, n=n)
|
53
|
+
fft2 = np.fft.rfft(x2, n=n)
|
54
|
+
|
55
|
+
if phat:
|
56
|
+
eps1 = np.mean(np.abs(fft1)) * 1e-10
|
57
|
+
fft1 /= np.abs(fft1) + eps1
|
58
|
+
eps2 = np.mean(np.abs(fft2)) * 1e-10
|
59
|
+
fft2 /= np.abs(fft2) + eps2
|
60
|
+
|
61
|
+
out = np.fft.irfft(fft1 * np.conj(fft2), n=int(n * interp))
|
62
|
+
|
63
|
+
return np.concatenate([out[-interp * (n_x2 - 1) :], out[: (interp * n_x1)]])
|
sonusai/mixture/mixdb.py
CHANGED
@@ -255,6 +255,16 @@ class MixtureDatabase:
|
|
255
255
|
"Predicted rating of overall quality of mixture versus true targets",
|
256
256
|
),
|
257
257
|
MetricDoc("Mixture Metrics", "ssnr", "Segmental SNR"),
|
258
|
+
MetricDoc("Mixture Metrics", "mxdco", "Mixture DC offset"),
|
259
|
+
MetricDoc("Mixture Metrics", "mxmin", "Mixture min level"),
|
260
|
+
MetricDoc("Mixture Metrics", "mxmax", "Mixture max levl"),
|
261
|
+
MetricDoc("Mixture Metrics", "mxpkdb", "Mixture Pk lev dB"),
|
262
|
+
MetricDoc("Mixture Metrics", "mxlrms", "Mixture RMS lev dB"),
|
263
|
+
MetricDoc("Mixture Metrics", "mxpkr", "Mixture RMS Pk dB"),
|
264
|
+
MetricDoc("Mixture Metrics", "mxtr", "Mixture RMS Tr dB"),
|
265
|
+
MetricDoc("Mixture Metrics", "mxcr", "Mixture Crest factor"),
|
266
|
+
MetricDoc("Mixture Metrics", "mxfl", "Mixture Flat factor"),
|
267
|
+
MetricDoc("Mixture Metrics", "mxpkc", "Mixture Pk count"),
|
258
268
|
MetricDoc("Mixture Metrics", "mxtdco", "Mixture target DC offset"),
|
259
269
|
MetricDoc("Mixture Metrics", "mxtmin", "Mixture target min level"),
|
260
270
|
MetricDoc("Mixture Metrics", "mxtmax", "Mixture target max levl"),
|
@@ -681,7 +691,7 @@ class MixtureDatabase:
|
|
681
691
|
|
682
692
|
with self.db() as c:
|
683
693
|
return [
|
684
|
-
ImpulseResponseFile(impulse_response[1], json.loads(impulse_response[2]))
|
694
|
+
ImpulseResponseFile(impulse_response[1], json.loads(impulse_response[2]), impulse_response[3])
|
685
695
|
for impulse_response in c.execute(
|
686
696
|
"SELECT impulse_response_file.* FROM impulse_response_file"
|
687
697
|
).fetchall()
|
@@ -700,15 +710,25 @@ class MixtureDatabase:
|
|
700
710
|
]
|
701
711
|
|
702
712
|
def impulse_response_file(self, ir_id: int | None) -> str | None:
|
703
|
-
"""Get impulse response file with ID from db
|
713
|
+
"""Get impulse response file name with ID from db
|
704
714
|
|
705
715
|
:param ir_id: Impulse response file ID
|
706
|
-
:return:
|
716
|
+
:return: Impulse response file name
|
707
717
|
"""
|
708
718
|
if ir_id is None:
|
709
719
|
return None
|
710
720
|
return _impulse_response_file(self.db, ir_id, self.use_cache)
|
711
721
|
|
722
|
+
def impulse_response_delay(self, ir_id: int | None) -> int | None:
|
723
|
+
"""Get impulse response delay with ID from db
|
724
|
+
|
725
|
+
:param ir_id: Impulse response file ID
|
726
|
+
:return: Impulse response delay
|
727
|
+
"""
|
728
|
+
if ir_id is None:
|
729
|
+
return None
|
730
|
+
return _impulse_response_delay(self.db, ir_id, self.use_cache)
|
731
|
+
|
712
732
|
@cached_property
|
713
733
|
def num_impulse_response_files(self) -> int:
|
714
734
|
"""Get number of impulse response files from db
|
@@ -814,18 +834,11 @@ class MixtureDatabase:
|
|
814
834
|
:return: Augmented noise audio
|
815
835
|
"""
|
816
836
|
from .audio import read_audio
|
817
|
-
from .audio import read_ir
|
818
837
|
from .augmentation import apply_augmentation
|
819
|
-
from .augmentation import apply_impulse_response
|
820
838
|
|
821
839
|
noise = self.noise_file(mixture.noise.file_id)
|
822
840
|
audio = read_audio(noise.name, self.use_cache)
|
823
|
-
audio = apply_augmentation(audio, mixture.noise.augmentation)
|
824
|
-
if mixture.noise.augmentation.ir is not None:
|
825
|
-
audio = apply_impulse_response(
|
826
|
-
audio,
|
827
|
-
read_ir(self.impulse_response_file(mixture.noise.augmentation.ir), self.use_cache), # pyright: ignore [reportArgumentType]
|
828
|
-
)
|
841
|
+
audio = apply_augmentation(self, audio, mixture.noise.augmentation.pre)
|
829
842
|
|
830
843
|
return audio
|
831
844
|
|
@@ -859,8 +872,9 @@ class MixtureDatabase:
|
|
859
872
|
for target in mixture.targets:
|
860
873
|
target_audio = self.read_target_audio(target.file_id)
|
861
874
|
target_audio = apply_augmentation(
|
875
|
+
mixdb=self,
|
862
876
|
audio=target_audio,
|
863
|
-
augmentation=target.augmentation,
|
877
|
+
augmentation=target.augmentation.pre,
|
864
878
|
frame_length=self.feature_step_samples,
|
865
879
|
)
|
866
880
|
target_audio = apply_gain(audio=target_audio, gain=mixture.target_snr_gain)
|
@@ -1119,8 +1133,7 @@ class MixtureDatabase:
|
|
1119
1133
|
offsets = range(0, mixture.samples, self.ft_config.overlap)
|
1120
1134
|
if len(target_energy) != len(offsets):
|
1121
1135
|
raise ValueError(
|
1122
|
-
f"Number of frames in energy, {len(target_energy)},"
|
1123
|
-
f" is not number of frames in mixture, {len(offsets)}"
|
1136
|
+
f"Number of frames in energy, {len(target_energy)}, is not number of frames in mixture, {len(offsets)}"
|
1124
1137
|
)
|
1125
1138
|
|
1126
1139
|
for idx, offset in enumerate(offsets):
|
@@ -1370,11 +1383,11 @@ class MixtureDatabase:
|
|
1370
1383
|
# Check for tempo augmentation and adjust Interval start and end data as needed
|
1371
1384
|
entries = []
|
1372
1385
|
for entry in data:
|
1373
|
-
if target.augmentation.tempo is not None:
|
1386
|
+
if target.augmentation.pre.tempo is not None:
|
1374
1387
|
entries.append(
|
1375
1388
|
Interval(
|
1376
|
-
entry.start / target.augmentation.tempo,
|
1377
|
-
entry.end / target.augmentation.tempo,
|
1389
|
+
entry.start / target.augmentation.pre.tempo,
|
1390
|
+
entry.end / target.augmentation.pre.tempo,
|
1378
1391
|
entry.label,
|
1379
1392
|
)
|
1380
1393
|
)
|
@@ -1595,6 +1608,19 @@ class MixtureDatabase:
|
|
1595
1608
|
|
1596
1609
|
speech = create_speech()
|
1597
1610
|
|
1611
|
+
def create_mixture_stats() -> Callable[[], AudioStatsMetrics]:
|
1612
|
+
state: AudioStatsMetrics | None = None
|
1613
|
+
|
1614
|
+
def get() -> AudioStatsMetrics:
|
1615
|
+
nonlocal state
|
1616
|
+
if state is None:
|
1617
|
+
state = calc_audio_stats(mixture_audio(), self.fg_info.ft_config.length / SAMPLE_RATE)
|
1618
|
+
return state
|
1619
|
+
|
1620
|
+
return get
|
1621
|
+
|
1622
|
+
mixture_stats = create_mixture_stats()
|
1623
|
+
|
1598
1624
|
def create_targets_stats() -> Callable[[], list[AudioStatsMetrics]]:
|
1599
1625
|
state: list[AudioStatsMetrics] | None = None
|
1600
1626
|
|
@@ -1803,6 +1829,36 @@ class MixtureDatabase:
|
|
1803
1829
|
extended=False,
|
1804
1830
|
)
|
1805
1831
|
|
1832
|
+
if m == "mxdco":
|
1833
|
+
return mixture_stats().dco
|
1834
|
+
|
1835
|
+
if m == "mxmin":
|
1836
|
+
return mixture_stats().min
|
1837
|
+
|
1838
|
+
if m == "mxmax":
|
1839
|
+
return mixture_stats().max
|
1840
|
+
|
1841
|
+
if m == "mxpkdb":
|
1842
|
+
return mixture_stats().pkdb
|
1843
|
+
|
1844
|
+
if m == "mxlrms":
|
1845
|
+
return mixture_stats().lrms
|
1846
|
+
|
1847
|
+
if m == "mxpkr":
|
1848
|
+
return mixture_stats().pkr
|
1849
|
+
|
1850
|
+
if m == "mxtr":
|
1851
|
+
return mixture_stats().tr
|
1852
|
+
|
1853
|
+
if m == "mxcr":
|
1854
|
+
return mixture_stats().cr
|
1855
|
+
|
1856
|
+
if m == "mxfl":
|
1857
|
+
return mixture_stats().fl
|
1858
|
+
|
1859
|
+
if m == "mxpkc":
|
1860
|
+
return mixture_stats().pkc
|
1861
|
+
|
1806
1862
|
if m == "mxtdco":
|
1807
1863
|
return target_stats().dco
|
1808
1864
|
|
@@ -2042,12 +2098,12 @@ def __noise_file(db: partial, n_id: int) -> NoiseFile:
|
|
2042
2098
|
|
2043
2099
|
|
2044
2100
|
def _impulse_response_file(db: partial, ir_id: int, use_cache: bool = True) -> str:
|
2045
|
-
"""Get impulse response file with ID from db
|
2101
|
+
"""Get impulse response file name with ID from db
|
2046
2102
|
|
2047
2103
|
:param db: Database context
|
2048
2104
|
:param ir_id: Impulse response file ID
|
2049
2105
|
:param use_cache: If true, use LRU caching
|
2050
|
-
:return: Impulse response
|
2106
|
+
:return: Impulse response file name
|
2051
2107
|
"""
|
2052
2108
|
if use_cache:
|
2053
2109
|
return __impulse_response_file(db, ir_id)
|
@@ -2069,6 +2125,34 @@ def __impulse_response_file(db: partial, ir_id: int) -> str:
|
|
2069
2125
|
)
|
2070
2126
|
|
2071
2127
|
|
2128
|
+
def _impulse_response_delay(db: partial, ir_id: int, use_cache: bool = True) -> int:
|
2129
|
+
"""Get impulse response delay with ID from db
|
2130
|
+
|
2131
|
+
:param db: Database context
|
2132
|
+
:param ir_id: Impulse response file ID
|
2133
|
+
:param use_cache: If true, use LRU caching
|
2134
|
+
:return: Impulse response delay
|
2135
|
+
"""
|
2136
|
+
if use_cache:
|
2137
|
+
return __impulse_response_delay(db, ir_id)
|
2138
|
+
return __impulse_response_delay.__wrapped__(db, ir_id)
|
2139
|
+
|
2140
|
+
|
2141
|
+
@lru_cache
|
2142
|
+
def __impulse_response_delay(db: partial, ir_id: int) -> int:
|
2143
|
+
with db() as c:
|
2144
|
+
return int(
|
2145
|
+
c.execute(
|
2146
|
+
"""
|
2147
|
+
SELECT impulse_response_file.delay
|
2148
|
+
FROM impulse_response_file
|
2149
|
+
WHERE ? = impulse_response_file.id
|
2150
|
+
""",
|
2151
|
+
(ir_id + 1,),
|
2152
|
+
).fetchone()[0]
|
2153
|
+
)
|
2154
|
+
|
2155
|
+
|
2072
2156
|
def _mixture(db: partial, m_id: int, use_cache: bool = True) -> Mixture:
|
2073
2157
|
"""Get mixture record with ID from db
|
2074
2158
|
|
sonusai/mixture/targets.py
CHANGED
@@ -16,14 +16,11 @@ def get_augmented_targets(
|
|
16
16
|
|
17
17
|
augmented_targets: list[AugmentedTarget] = []
|
18
18
|
for mixup in mixups:
|
19
|
-
|
19
|
+
target_augmentation_indices = get_augmentation_indices_for_mixup(target_augmentations, mixup)
|
20
20
|
for target_index in range(len(target_files)):
|
21
|
-
for
|
21
|
+
for target_augmentation_index in target_augmentation_indices:
|
22
22
|
augmented_targets.append(
|
23
|
-
AugmentedTarget(
|
24
|
-
target_id=target_index,
|
25
|
-
target_augmentation_id=augmentation_index,
|
26
|
-
)
|
23
|
+
AugmentedTarget(target_id=target_index, target_augmentation_id=target_augmentation_index)
|
27
24
|
)
|
28
25
|
|
29
26
|
return augmented_targets
|
sonusai/utils/__init__.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
# SonusAI general utilities
|
2
2
|
# ruff: noqa: F401
|
3
|
+
|
3
4
|
from .asl_p56 import asl_p56
|
4
5
|
from .asr import ASRResult
|
5
6
|
from .asr import calc_asr
|
@@ -53,5 +54,6 @@ from .stacked_complex import stacked_complex_imag
|
|
53
54
|
from .stacked_complex import stacked_complex_real
|
54
55
|
from .stacked_complex import unstack_complex
|
55
56
|
from .stratified_shuffle_split import stratified_shuffle_split_mixid
|
57
|
+
from .temp_seed import temp_seed
|
56
58
|
from .write_audio import write_audio
|
57
59
|
from .yes_or_no import yes_or_no
|
@@ -3,7 +3,7 @@ sonusai/aawscd_probwrite.py,sha256=QZLMQrmPr3OjZ06buyYDwlnk9YPCpyr4KHkBjPsiqjU,3
|
|
3
3
|
sonusai/audiofe.py,sha256=iFdthh4UrOvziT8urjrjD7dACWZPQz9orM5bVAW3WSQ,11269
|
4
4
|
sonusai/calc_metric_spenh.py,sha256=XWa2DzLSCEQ6GzsJv-YHfnN51f_oFwcRMMgMzusAvYA,49304
|
5
5
|
sonusai/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
6
|
-
sonusai/data/genmixdb.yml,sha256=
|
6
|
+
sonusai/data/genmixdb.yml,sha256=qFK_VoUxHmc-EhJYZr4pAEY9tu6zRvloubq0NmrbH6I,956
|
7
7
|
sonusai/data/silero_vad_v5.1.jit,sha256=hcSOHw7LYE5dKiaPPM-5EtT36TWs3IavWj_FsK6nspo,2269612
|
8
8
|
sonusai/data/silero_vad_v5.1.onnx,sha256=JiOilT9v89LB5hdAxs23FoEzR5smff7xFKSjzFvdeI8,2327524
|
9
9
|
sonusai/data/speech_ma01_01.wav,sha256=PK0vMKg-NR6rPE3KouxHGF6PKXnJCr7AwjMqfu98LUA,76644
|
@@ -12,12 +12,13 @@ sonusai/deprecated/gentcst.py,sha256=nKbHy3aHreHqA-XnLQOzOApS8RuTNUFqnx52a8I5zLQ
|
|
12
12
|
sonusai/deprecated/plot.py,sha256=xL0w8Dtjdns2KX8tbTrdBGXviy_aoV3WUJSVKPZkQng,17423
|
13
13
|
sonusai/deprecated/tplot.py,sha256=0p238DvTaP4oU9y-dp0JdLaTV4TKrooAwbx7zdz_QAc,14641
|
14
14
|
sonusai/doc/__init__.py,sha256=KyQ26Um0RM8A3GYsb_tbFH64RwpoAw6lja2f_moUWas,33
|
15
|
-
sonusai/doc/doc.py,sha256=
|
15
|
+
sonusai/doc/doc.py,sha256=nEnvau0PIl2xbxET8AjFkwTLVShemmQ4CiTio94aLg0,19275
|
16
16
|
sonusai/doc.py,sha256=zSmXpioB0YS_5-7kqfS5cr--veSaXkxRKzldId9Hyoc,878
|
17
17
|
sonusai/genft.py,sha256=K2wjO5J48UgyhCj2Sx789nkjt0DWtYgnRDbQyNtjCSY,5591
|
18
18
|
sonusai/genmetrics.py,sha256=jORQCdf_SCrtcvDd47lgcPgQTplG956RTAqmf58Xe8Y,5689
|
19
19
|
sonusai/genmix.py,sha256=mSc5FfAYrUt3zloPSnp81dks8ntvSH6jyk-nh97wnww,6707
|
20
20
|
sonusai/genmixdb.py,sha256=SsbHRpPoJ77XzOBQRRDheucyuJzE-tucQtRoYl89ApU,17841
|
21
|
+
sonusai/ir_metric.py,sha256=n35_RssAk2jjqm1iXJ6euMtK00LV4qohdBfDAZZpNlU,19581
|
21
22
|
sonusai/lsdb.py,sha256=0HOGDDndB3LT9cz9AaxKIpt9vslAoSP4F239gply4Xg,5149
|
22
23
|
sonusai/main.py,sha256=HbnEia1B1-Z-mlHkLfojH8aj9GIpL1Btw3oH60T_CCQ,2590
|
23
24
|
sonusai/metrics/__init__.py,sha256=ssV6JEK_oklRSocsp6HMcG-GtJvV8IkRQtdKhHHmwU8,878
|
@@ -37,31 +38,27 @@ sonusai/metrics/class_summary.py,sha256=ZA7zNgwBpmTs1TP_t4jRT0pWnDnATC_up_8qE4aH
|
|
37
38
|
sonusai/metrics/confusion_matrix_summary.py,sha256=zBL_Ke7wF6oKtrKZPr0fsyF_taofdjxBlZmKodu0xUA,3143
|
38
39
|
sonusai/metrics/one_hot.py,sha256=hmuyh-9tpRjb_oyqU3WqZ14zItpRJQfcqBDKJeb5H9I,13930
|
39
40
|
sonusai/metrics/snr_summary.py,sha256=t8Fi_8WtboTi8flkZuOiHq9H3-nIELx4AKvnm-qvxLQ,5785
|
40
|
-
sonusai/metrics_summary.py,sha256=
|
41
|
-
sonusai/mixture/__init__.py,sha256=
|
42
|
-
sonusai/mixture/audio.py,sha256=
|
43
|
-
sonusai/mixture/augmentation.py,sha256=
|
41
|
+
sonusai/metrics_summary.py,sha256=DchpgBNYcBPz4t1YRindCm1CVmJLmXY7-oyaXpxBnWA,12106
|
42
|
+
sonusai/mixture/__init__.py,sha256=9TE21nlj4TOrSPopLh5Lh769v68v0kWgNkr_GGk9TEs,5300
|
43
|
+
sonusai/mixture/audio.py,sha256=MlsuhY8Zc8puBClO5utBP3mveAk8MLZtXvF6ztuisW4,6629
|
44
|
+
sonusai/mixture/augmentation.py,sha256=UYNdfVcYCsmegicKpGLTsYyvO97NQOTLoZjwI9sZROk,14646
|
44
45
|
sonusai/mixture/class_count.py,sha256=zcC3BDYMPN6wJYmO1RcOuqmrnTQIbMSznl33oN3e2sc,597
|
45
|
-
sonusai/mixture/config.py,sha256=
|
46
|
-
sonusai/mixture/constants.py,sha256=
|
46
|
+
sonusai/mixture/config.py,sha256=gZUPLGl7VKW32HfnFMEajKgJE35ZJ_edpxaA0TRPE1E,24469
|
47
|
+
sonusai/mixture/constants.py,sha256=yGXNjB87boJVSt1Q8hBTrNzOP0XVZcFf1k34u1yyUWU,1481
|
47
48
|
sonusai/mixture/data_io.py,sha256=KZGqhHd9_ucAfZEAXPIc5XL3aHYgdV5CyqaBx5_t8OM,5551
|
48
|
-
sonusai/mixture/datatypes.py,sha256=
|
49
|
+
sonusai/mixture/datatypes.py,sha256=LFWDsgGVY7Z3VPjWaB5g8q5Ss8v5bR2EfU6ygI0wZaU,10707
|
49
50
|
sonusai/mixture/db_datatypes.py,sha256=kvdUOMS6Pkkj9AmxCiq6zM8x7jbPPi933tVaXRxbTdQ,1534
|
50
51
|
sonusai/mixture/eq_rule_is_valid.py,sha256=O3gCAs_0hpxENK5b7kxxpDmOpKHlXGBWuLGT_97ARSM,1210
|
51
52
|
sonusai/mixture/feature.py,sha256=L0bPFG0RO-CrrtTStUMt_14euYsVo8_TWTP2IKSFKaA,2335
|
52
|
-
sonusai/mixture/generation.py,sha256=
|
53
|
-
sonusai/mixture/helpers.py,sha256=
|
53
|
+
sonusai/mixture/generation.py,sha256=f3DsDvVAAdpvh1lBWV-jMj5qNeZ2qmR9RA_4eI4NDcU,37954
|
54
|
+
sonusai/mixture/helpers.py,sha256=nNqK__MBp8f10telUU3A8FhkNeXYPGCx8dxxDpzCAbg,15464
|
55
|
+
sonusai/mixture/ir_delay.py,sha256=WRoYjuHpsppe0D5qQExNxsCyKbMPDfWJ4CTlr3Ps50k,2036
|
54
56
|
sonusai/mixture/log_duration_and_sizes.py,sha256=qhgl87C2KbjxLdKEpjYOoqNL6rc-8-PB4R7Gx_7UG8g,1240
|
55
|
-
sonusai/mixture/mixdb.py,sha256=
|
56
|
-
sonusai/mixture/soundfile_audio.py,sha256=At_ZC2b9pZ_9IYp1UxyPzRoBK9-1cKPCLMm74F1AjKE,4092
|
57
|
-
sonusai/mixture/sox_audio.py,sha256=7ouCLqXYS6tjG2L0v5lugVO7z5UwJmsr1VigbrXhs74,16725
|
58
|
-
sonusai/mixture/sox_augmentation.py,sha256=DtfGLPaB1BIt2wvTEA__MYkGFNU85Tuup5BFsIVrh0E,4546
|
57
|
+
sonusai/mixture/mixdb.py,sha256=oMBbi0HRiEBtN7lDup8qhPBIQ_td89CHa0bYy2PXdWQ,78744
|
59
58
|
sonusai/mixture/spectral_mask.py,sha256=U9XJ_SAoI9b67K_3SE7bNw6U8cPGFOBttaZAxMjA_Jc,2042
|
60
59
|
sonusai/mixture/target_class_balancing.py,sha256=o_TZ8kVYq10lgeXHh3GUFfflfdUvRt0FekFu2eaNkDs,4251
|
61
|
-
sonusai/mixture/targets.py,sha256=
|
60
|
+
sonusai/mixture/targets.py,sha256=oOeqdE-n-sCq_9luEt82HEP0MRCaHG_7J-p3nCftkAc,6399
|
62
61
|
sonusai/mixture/tokenized_shell_vars.py,sha256=lXTzUDutuBWGV1zIsqeIxWmy-eKm0Vx1y8-iLdsL1gQ,4921
|
63
|
-
sonusai/mixture/torchaudio_audio.py,sha256=72Hxo5TKAW7mYpRy15QFfD7AYDORBk6bVCcHENniWGw,3116
|
64
|
-
sonusai/mixture/torchaudio_augmentation.py,sha256=uFAKxIfs50J5FR-WXodsEACm2Ao-t5dZRSJ0DwTAfBg,3930
|
65
62
|
sonusai/mixture/truth.py,sha256=-CwwawFRGjqodR2yKvAMGL1XaYLct-tli7wZ2gbhLtQ,2121
|
66
63
|
sonusai/mixture/truth_functions/__init__.py,sha256=0mlOFChPnXG5BC0eKOe4n9VH17jY4iOqZFLuF6Gprdk,1505
|
67
64
|
sonusai/mixture/truth_functions/crm.py,sha256=iidcffXfqV8k9O5wt5KTWIAFaTSjmhV5ucKZPbTgpvQ,3809
|
@@ -86,7 +83,7 @@ sonusai/speech/types.py,sha256=4eKVPAktpkIrZ2qoVp2iT45zxTVNocQEGT6O_Zlub_w,214
|
|
86
83
|
sonusai/speech/vctk.py,sha256=WInvRRRkZCW6t_NcZAJffJzgCbyetal-j2w0kKX5SDw,1527
|
87
84
|
sonusai/speech/voxceleb.py,sha256=Uu1kB1krf8hess1yuvGbYfV_VgYhklEyoz4I7KfrVpw,2658
|
88
85
|
sonusai/summarize_metric_spenh.py,sha256=2w81ZgJahYvD6wCpE3DFoUFrXexLXjO44ITRVm1HJXw,1858
|
89
|
-
sonusai/utils/__init__.py,sha256=
|
86
|
+
sonusai/utils/__init__.py,sha256=D7IFq4Ozy_DRq4pC50YRxGQybqWEDXglCuAgxNIpFyU,2413
|
90
87
|
sonusai/utils/asl_p56.py,sha256=cPUVwXawF7vLJgs4zUtoRGk7Wdbe5KKti_-v_8xIU10,3862
|
91
88
|
sonusai/utils/asr.py,sha256=ubiU3E61HN3r9MhPV7ci37cnLZowll8KfjUS7os3Sho,2822
|
92
89
|
sonusai/utils/asr_functions/__init__.py,sha256=HKGRm_c48tcxlfwqH63m-MvhAoK_pCcw76lxmFmiP_U,63
|
@@ -121,10 +118,11 @@ sonusai/utils/reshape.py,sha256=Ozuh3UlmAS5NCeOK7NR8KgcQacHvgq10pys0VfCnOPU,5746
|
|
121
118
|
sonusai/utils/seconds_to_hms.py,sha256=9Ya9O97txFtTIXZUQw1K8g7b7Xx-ptvUtMUlzsIduTo,260
|
122
119
|
sonusai/utils/stacked_complex.py,sha256=JW6iAa1C-4Tuh4dD5c-D-O-yo-OY5Xm0AKVU0YsqsJU,2782
|
123
120
|
sonusai/utils/stratified_shuffle_split.py,sha256=fcGW8nkZIwUqq1qtxbK_ZH58sYULqZfv7iNBQnKGH-M,6706
|
121
|
+
sonusai/utils/temp_seed.py,sha256=Ava5TCGpvDBtaRx2l-40CuGIjhgLevu1KFfZsgr38qM,218
|
124
122
|
sonusai/utils/write_audio.py,sha256=0lKdaX57N6H-UWdioqmXCJMjwT1eBz5B-bSGqDvloAc,838
|
125
123
|
sonusai/utils/yes_or_no.py,sha256=0h1okjXmDNbJp7rZJFR2V-HFU1GJDm3YFTUVmYExkOU,263
|
126
124
|
sonusai/vars.py,sha256=kBBzuvC8szmdIZEEDA7XXmD765addZKdM2aFipeGO1w,933
|
127
|
-
sonusai-0.
|
128
|
-
sonusai-0.
|
129
|
-
sonusai-0.
|
130
|
-
sonusai-0.
|
125
|
+
sonusai-0.20.2.dist-info/METADATA,sha256=CT_z1tJfku142nH0PL22DT7EEc8bFfJj9qyWcR7v6FU,2535
|
126
|
+
sonusai-0.20.2.dist-info/WHEEL,sha256=RaoafKOydTQ7I_I3JTrPCg6kUmTgtm4BornzOqyEfJ8,88
|
127
|
+
sonusai-0.20.2.dist-info/entry_points.txt,sha256=zMNjEphEPO6B3cD1GNpit7z-yA9tUU5-j3W2v-UWstU,92
|
128
|
+
sonusai-0.20.2.dist-info/RECORD,,
|