sonusai 0.19.6__py3-none-any.whl → 0.19.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sonusai/__init__.py +1 -1
- sonusai/aawscd_probwrite.py +1 -1
- sonusai/calc_metric_spenh.py +1 -1
- sonusai/genft.py +29 -14
- sonusai/genmetrics.py +60 -42
- sonusai/genmix.py +41 -29
- sonusai/genmixdb.py +56 -64
- sonusai/metrics/calc_class_weights.py +1 -3
- sonusai/metrics/calc_optimal_thresholds.py +2 -2
- sonusai/metrics/calc_phase_distance.py +1 -1
- sonusai/metrics/calc_speech.py +6 -6
- sonusai/metrics/class_summary.py +6 -15
- sonusai/metrics/confusion_matrix_summary.py +11 -27
- sonusai/metrics/one_hot.py +3 -3
- sonusai/metrics/snr_summary.py +7 -7
- sonusai/mixture/__init__.py +2 -17
- sonusai/mixture/augmentation.py +5 -6
- sonusai/mixture/class_count.py +1 -1
- sonusai/mixture/config.py +36 -46
- sonusai/mixture/data_io.py +30 -1
- sonusai/mixture/datatypes.py +29 -40
- sonusai/mixture/db_datatypes.py +1 -1
- sonusai/mixture/feature.py +3 -23
- sonusai/mixture/generation.py +161 -204
- sonusai/mixture/helpers.py +29 -187
- sonusai/mixture/mixdb.py +386 -159
- sonusai/mixture/soundfile_audio.py +1 -1
- sonusai/mixture/sox_audio.py +4 -4
- sonusai/mixture/sox_augmentation.py +1 -1
- sonusai/mixture/target_class_balancing.py +9 -11
- sonusai/mixture/targets.py +23 -20
- sonusai/mixture/torchaudio_audio.py +18 -7
- sonusai/mixture/torchaudio_augmentation.py +3 -4
- sonusai/mixture/truth.py +21 -34
- sonusai/mixture/truth_functions/__init__.py +6 -0
- sonusai/mixture/truth_functions/crm.py +51 -37
- sonusai/mixture/truth_functions/energy.py +95 -50
- sonusai/mixture/truth_functions/file.py +12 -8
- sonusai/mixture/truth_functions/metadata.py +24 -0
- sonusai/mixture/truth_functions/metrics.py +28 -0
- sonusai/mixture/truth_functions/phoneme.py +4 -5
- sonusai/mixture/truth_functions/sed.py +32 -23
- sonusai/mixture/truth_functions/target.py +62 -29
- sonusai/mkwav.py +20 -19
- sonusai/queries/queries.py +9 -15
- sonusai/speech/l2arctic.py +6 -2
- sonusai/summarize_metric_spenh.py +1 -1
- sonusai/utils/__init__.py +1 -0
- sonusai/utils/asr_functions/aaware_whisper.py +1 -1
- sonusai/utils/audio_devices.py +27 -18
- sonusai/utils/docstring.py +6 -3
- sonusai/utils/energy_f.py +5 -3
- sonusai/utils/human_readable_size.py +6 -6
- sonusai/utils/load_object.py +15 -0
- sonusai/utils/onnx_utils.py +2 -2
- sonusai/utils/print_mixture_details.py +3 -3
- {sonusai-0.19.6.dist-info → sonusai-0.19.9.dist-info}/METADATA +2 -2
- {sonusai-0.19.6.dist-info → sonusai-0.19.9.dist-info}/RECORD +60 -58
- sonusai/mixture/truth_functions/datatypes.py +0 -37
- {sonusai-0.19.6.dist-info → sonusai-0.19.9.dist-info}/WHEEL +0 -0
- {sonusai-0.19.6.dist-info → sonusai-0.19.9.dist-info}/entry_points.txt +0 -0
sonusai/mixture/generation.py
CHANGED
@@ -1,17 +1,15 @@
|
|
1
1
|
# ruff: noqa: S608
|
2
|
-
from .datatypes import AudiosT
|
3
2
|
from .datatypes import AudioT
|
4
3
|
from .datatypes import Augmentation
|
5
|
-
from .datatypes import
|
6
|
-
from .datatypes import
|
4
|
+
from .datatypes import AugmentationRule
|
5
|
+
from .datatypes import AugmentedTarget
|
7
6
|
from .datatypes import GenMixData
|
8
|
-
from .datatypes import
|
7
|
+
from .datatypes import ImpulseResponseFile
|
9
8
|
from .datatypes import Mixture
|
10
|
-
from .datatypes import
|
11
|
-
from .datatypes import
|
12
|
-
from .datatypes import
|
13
|
-
from .datatypes import
|
14
|
-
from .datatypes import Targets
|
9
|
+
from .datatypes import NoiseFile
|
10
|
+
from .datatypes import SpectralMask
|
11
|
+
from .datatypes import Target
|
12
|
+
from .datatypes import TargetFile
|
15
13
|
from .datatypes import UniversalSNRGenerator
|
16
14
|
from .mixdb import MixtureDatabase
|
17
15
|
|
@@ -37,7 +35,7 @@ def initialize_db(location: str, test: bool = False) -> None:
|
|
37
35
|
CREATE TABLE truth_parameters(
|
38
36
|
id INTEGER PRIMARY KEY NOT NULL,
|
39
37
|
name TEXT NOT NULL,
|
40
|
-
parameters INTEGER
|
38
|
+
parameters INTEGER)
|
41
39
|
""")
|
42
40
|
|
43
41
|
con.execute("""
|
@@ -121,8 +119,8 @@ def initialize_db(location: str, test: bool = False) -> None:
|
|
121
119
|
id INTEGER PRIMARY KEY NOT NULL,
|
122
120
|
file_id INTEGER NOT NULL,
|
123
121
|
augmentation TEXT NOT NULL,
|
124
|
-
|
125
|
-
|
122
|
+
FOREIGN KEY(file_id) REFERENCES target_file (id),
|
123
|
+
UNIQUE(file_id, augmentation))
|
126
124
|
""")
|
127
125
|
|
128
126
|
con.execute("""
|
@@ -165,11 +163,12 @@ def populate_top_table(location: str, config: dict, test: bool = False) -> None:
|
|
165
163
|
con = db_connection(location=location, readonly=False, test=test)
|
166
164
|
con.execute(
|
167
165
|
"""
|
168
|
-
INSERT INTO top (version, asr_configs, class_balancing, feature, noise_mix_mode, num_classes,
|
166
|
+
INSERT INTO top (id, version, asr_configs, class_balancing, feature, noise_mix_mode, num_classes,
|
169
167
|
seed, mixid_width, speaker_metadata_tiers, textgrid_metadata_tiers)
|
170
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
168
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
171
169
|
""",
|
172
170
|
(
|
171
|
+
1,
|
173
172
|
MIXDB_VERSION,
|
174
173
|
json.dumps(config["asr_configs"]),
|
175
174
|
config["class_balancing"],
|
@@ -271,7 +270,7 @@ def populate_truth_parameters_table(location: str, config: dict, test: bool = Fa
|
|
271
270
|
con.close()
|
272
271
|
|
273
272
|
|
274
|
-
def populate_target_file_table(location: str, target_files:
|
273
|
+
def populate_target_file_table(location: str, target_files: list[TargetFile], test: bool = False) -> None:
|
275
274
|
"""Populate target file table"""
|
276
275
|
import json
|
277
276
|
from pathlib import Path
|
@@ -331,7 +330,7 @@ def populate_target_file_table(location: str, target_files: TargetFiles, test: b
|
|
331
330
|
|
332
331
|
# Update textgrid_metadata_tiers in the top table
|
333
332
|
con.execute(
|
334
|
-
"UPDATE top SET textgrid_metadata_tiers=? WHERE top.id
|
333
|
+
"UPDATE top SET textgrid_metadata_tiers=? WHERE ? = top.id",
|
335
334
|
(json.dumps(sorted(textgrid_metadata_tiers)), 1),
|
336
335
|
)
|
337
336
|
|
@@ -339,7 +338,7 @@ def populate_target_file_table(location: str, target_files: TargetFiles, test: b
|
|
339
338
|
con.close()
|
340
339
|
|
341
340
|
|
342
|
-
def populate_noise_file_table(location: str, noise_files:
|
341
|
+
def populate_noise_file_table(location: str, noise_files: list[NoiseFile], test: bool = False) -> None:
|
343
342
|
"""Populate noise file table"""
|
344
343
|
from .mixdb import db_connection
|
345
344
|
|
@@ -353,7 +352,7 @@ def populate_noise_file_table(location: str, noise_files: NoiseFiles, test: bool
|
|
353
352
|
|
354
353
|
|
355
354
|
def populate_impulse_response_file_table(
|
356
|
-
location: str, impulse_response_files:
|
355
|
+
location: str, impulse_response_files: list[ImpulseResponseFile], test: bool = False
|
357
356
|
) -> None:
|
358
357
|
"""Populate impulse response file table"""
|
359
358
|
import json
|
@@ -383,79 +382,115 @@ def update_mixid_width(location: str, num_mixtures: int, test: bool = False) ->
|
|
383
382
|
|
384
383
|
con = db_connection(location=location, readonly=False, test=test)
|
385
384
|
con.execute(
|
386
|
-
"UPDATE top SET mixid_width=? WHERE top.id
|
385
|
+
"UPDATE top SET mixid_width=? WHERE ? = top.id",
|
387
386
|
(max_text_width(num_mixtures), 1),
|
388
387
|
)
|
389
388
|
con.commit()
|
390
389
|
con.close()
|
391
390
|
|
392
391
|
|
393
|
-
def populate_mixture_table(
|
394
|
-
|
392
|
+
def populate_mixture_table(
|
393
|
+
location: str,
|
394
|
+
noise_mix_mode: str,
|
395
|
+
augmented_targets: list[AugmentedTarget],
|
396
|
+
target_files: list[TargetFile],
|
397
|
+
target_augmentations: list[AugmentationRule],
|
398
|
+
noise_files: list[NoiseFile],
|
399
|
+
noise_augmentations: list[AugmentationRule],
|
400
|
+
spectral_masks: list[SpectralMask],
|
401
|
+
all_snrs: list[UniversalSNRGenerator],
|
402
|
+
mixups: list[int],
|
403
|
+
num_classes: int,
|
404
|
+
feature_step_samples: int,
|
405
|
+
num_ir: int,
|
406
|
+
test: bool = False,
|
407
|
+
) -> tuple[int, int]:
|
408
|
+
"""Generate mixtures and populate mixture table"""
|
395
409
|
from .helpers import from_mixture
|
396
410
|
from .helpers import from_target
|
397
411
|
from .mixdb import db_connection
|
398
412
|
|
399
|
-
|
413
|
+
if noise_mix_mode == "exhaustive":
|
414
|
+
func = _exhaustive_noise_mix
|
415
|
+
elif noise_mix_mode == "non-exhaustive":
|
416
|
+
func = _non_exhaustive_noise_mix
|
417
|
+
elif noise_mix_mode == "non-combinatorial":
|
418
|
+
func = _non_combinatorial_noise_mix
|
419
|
+
else:
|
420
|
+
raise ValueError(f"invalid noise_mix_mode: {noise_mix_mode}")
|
421
|
+
|
422
|
+
used_noise_files, used_noise_samples, mixtures = func(
|
423
|
+
augmented_targets=augmented_targets,
|
424
|
+
target_files=target_files,
|
425
|
+
target_augmentations=target_augmentations,
|
426
|
+
noise_files=noise_files,
|
427
|
+
noise_augmentations=noise_augmentations,
|
428
|
+
spectral_masks=spectral_masks,
|
429
|
+
all_snrs=all_snrs,
|
430
|
+
mixups=mixups,
|
431
|
+
num_classes=num_classes,
|
432
|
+
feature_step_samples=feature_step_samples,
|
433
|
+
num_ir=num_ir,
|
434
|
+
)
|
400
435
|
|
436
|
+
con = db_connection(location=location, readonly=False, test=test)
|
401
437
|
# Populate target table
|
402
|
-
targets: list[tuple[int, str, float]] = []
|
403
438
|
for mixture in mixtures:
|
404
439
|
for target in mixture.targets:
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
440
|
+
con.execute(
|
441
|
+
"""
|
442
|
+
INSERT OR IGNORE INTO target (file_id, augmentation)
|
443
|
+
VALUES (?, ?)
|
444
|
+
""",
|
445
|
+
from_target(target),
|
446
|
+
)
|
410
447
|
|
411
448
|
# Populate mixture table
|
412
|
-
|
413
|
-
|
414
|
-
cur.execute(
|
449
|
+
for m_id, mixture in enumerate(mixtures):
|
450
|
+
con.execute(
|
415
451
|
"""
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
from_mixture(mixture),
|
452
|
+
INSERT INTO mixture (id, name, noise_file_id, noise_augmentation, noise_offset, noise_snr_gain, random_snr,
|
453
|
+
snr, samples, spectral_mask_id, spectral_mask_seed, target_snr_gain)
|
454
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
455
|
+
""",
|
456
|
+
(m_id + 1, *from_mixture(mixture)),
|
421
457
|
)
|
422
458
|
|
423
|
-
mixture_id = cur.lastrowid
|
424
459
|
for target in mixture.targets:
|
425
460
|
target_id = con.execute(
|
426
461
|
"""
|
427
|
-
|
428
|
-
|
429
|
-
|
462
|
+
SELECT target.id
|
463
|
+
FROM target
|
464
|
+
WHERE ? = target.file_id AND ? = target.augmentation
|
430
465
|
""",
|
431
466
|
from_target(target),
|
432
467
|
).fetchone()[0]
|
433
468
|
con.execute(
|
434
469
|
"INSERT INTO mixture_target (mixture_id, target_id) VALUES (?, ?)",
|
435
|
-
(
|
470
|
+
(m_id + 1, target_id),
|
436
471
|
)
|
437
472
|
|
438
473
|
con.commit()
|
439
474
|
con.close()
|
440
475
|
|
476
|
+
return used_noise_files, used_noise_samples
|
441
477
|
|
442
|
-
def update_mixture(mixdb: MixtureDatabase, mixture: Mixture, with_data: bool = False) -> tuple[Mixture, GenMixData]:
|
443
|
-
"""Update mixture record with name and gains
|
444
478
|
|
445
|
-
|
446
|
-
|
447
|
-
:param with_data: Return audio data
|
448
|
-
:return: Generated audio data (if requested)
|
449
|
-
"""
|
479
|
+
def update_mixture_table(location: str, m_id: int, with_data: bool = False, test: bool = False) -> GenMixData:
|
480
|
+
"""Update mixture record with name and gains"""
|
450
481
|
from .audio import get_next_noise
|
451
482
|
from .augmentation import apply_gain
|
452
483
|
from .datatypes import GenMixData
|
484
|
+
from .helpers import from_mixture
|
453
485
|
from .helpers import get_target
|
486
|
+
from .mixdb import db_connection
|
454
487
|
|
488
|
+
mixdb = MixtureDatabase(location, test)
|
489
|
+
mixture = mixdb.mixture(m_id)
|
455
490
|
mixture, targets_audio = _initialize_targets_audio(mixdb, mixture)
|
456
491
|
|
457
492
|
noise_audio = _augmented_noise_audio(mixdb, mixture)
|
458
|
-
noise_audio = get_next_noise(audio=noise_audio, offset=mixture.
|
493
|
+
noise_audio = get_next_noise(audio=noise_audio, offset=mixture.noise_offset, length=mixture.samples)
|
459
494
|
|
460
495
|
# Apply IR and sum targets audio before initializing the mixture SNR gains
|
461
496
|
target_audio = get_target(mixdb, mixture, targets_audio)
|
@@ -466,8 +501,29 @@ def update_mixture(mixdb: MixtureDatabase, mixture: Mixture, with_data: bool = F
|
|
466
501
|
|
467
502
|
mixture.name = f"{int(mixture.name):0{mixdb.mixid_width}}"
|
468
503
|
|
504
|
+
con = db_connection(location=location, readonly=False, test=test)
|
505
|
+
con.execute(
|
506
|
+
"""
|
507
|
+
UPDATE mixture SET name=?,
|
508
|
+
noise_file_id=?,
|
509
|
+
noise_augmentation=?,
|
510
|
+
noise_offset=?,
|
511
|
+
noise_snr_gain=?,
|
512
|
+
random_snr=?,
|
513
|
+
snr=?,
|
514
|
+
samples=?,
|
515
|
+
spectral_mask_id=?,
|
516
|
+
spectral_mask_seed=?,
|
517
|
+
target_snr_gain=?
|
518
|
+
WHERE ? = mixture.id
|
519
|
+
""",
|
520
|
+
(*from_mixture(mixture), m_id + 1),
|
521
|
+
)
|
522
|
+
con.commit()
|
523
|
+
con.close()
|
524
|
+
|
469
525
|
if not with_data:
|
470
|
-
return
|
526
|
+
return GenMixData()
|
471
527
|
|
472
528
|
# Apply SNR gains
|
473
529
|
targets_audio = [apply_gain(audio=target_audio, gain=mixture.target_snr_gain) for target_audio in targets_audio]
|
@@ -477,7 +533,7 @@ def update_mixture(mixdb: MixtureDatabase, mixture: Mixture, with_data: bool = F
|
|
477
533
|
target_audio = get_target(mixdb, mixture, targets_audio)
|
478
534
|
mixture_audio = target_audio + noise_audio
|
479
535
|
|
480
|
-
return
|
536
|
+
return GenMixData(
|
481
537
|
mixture=mixture_audio,
|
482
538
|
targets=targets_audio,
|
483
539
|
target=target_audio,
|
@@ -502,7 +558,7 @@ def _augmented_noise_audio(mixdb: MixtureDatabase, mixture: Mixture) -> AudioT:
|
|
502
558
|
return audio
|
503
559
|
|
504
560
|
|
505
|
-
def _initialize_targets_audio(mixdb: MixtureDatabase, mixture: Mixture) -> tuple[Mixture,
|
561
|
+
def _initialize_targets_audio(mixdb: MixtureDatabase, mixture: Mixture) -> tuple[Mixture, list[AudioT]]:
|
506
562
|
from .augmentation import apply_augmentation
|
507
563
|
from .augmentation import pad_audio_to_length
|
508
564
|
|
@@ -517,13 +573,6 @@ def _initialize_targets_audio(mixdb: MixtureDatabase, mixture: Mixture) -> tuple
|
|
517
573
|
)
|
518
574
|
)
|
519
575
|
|
520
|
-
# target_gain is used to back out the gain augmentation in order to return the target audio
|
521
|
-
# to its normalized level when calculating truth (if needed).
|
522
|
-
if target.augmentation.gain is not None:
|
523
|
-
target.gain = round(10 ** (target.augmentation.gain / 20), ndigits=5)
|
524
|
-
else:
|
525
|
-
target.gain = 1
|
526
|
-
|
527
576
|
mixture.samples = max([len(item) for item in targets_audio])
|
528
577
|
|
529
578
|
for idx in range(len(targets_audio)):
|
@@ -540,14 +589,11 @@ def _initialize_mixture_gains(
|
|
540
589
|
from sonusai.utils import asl_p56
|
541
590
|
from sonusai.utils import db_to_linear
|
542
591
|
|
543
|
-
if mixture.
|
592
|
+
if mixture.is_noise_only:
|
544
593
|
# Special case for zeroing out target data
|
545
594
|
mixture.target_snr_gain = 0
|
546
595
|
mixture.noise_snr_gain = 1
|
547
|
-
|
548
|
-
for target in mixture.targets:
|
549
|
-
target.gain = 0
|
550
|
-
elif mixture.snr > 96:
|
596
|
+
elif mixture.is_target_only:
|
551
597
|
# Special case for zeroing out noise data
|
552
598
|
mixture.target_snr_gain = 1
|
553
599
|
mixture.noise_snr_gain = 0
|
@@ -598,98 +644,20 @@ def _initialize_mixture_gains(
|
|
598
644
|
return mixture
|
599
645
|
|
600
646
|
|
601
|
-
def generate_mixtures(
|
602
|
-
noise_mix_mode: str,
|
603
|
-
augmented_targets: AugmentedTargets,
|
604
|
-
target_files: TargetFiles,
|
605
|
-
target_augmentations: AugmentationRules,
|
606
|
-
noise_files: NoiseFiles,
|
607
|
-
noise_augmentations: AugmentationRules,
|
608
|
-
spectral_masks: SpectralMasks,
|
609
|
-
all_snrs: list[UniversalSNRGenerator],
|
610
|
-
mixups: list[int],
|
611
|
-
num_classes: int,
|
612
|
-
feature_step_samples: int,
|
613
|
-
num_ir: int,
|
614
|
-
) -> tuple[int, int, Mixtures]:
|
615
|
-
"""Generate mixtures
|
616
|
-
|
617
|
-
:param noise_mix_mode: Noise mix mode
|
618
|
-
:param augmented_targets: List of augmented targets
|
619
|
-
:param target_files: List of target files
|
620
|
-
:param target_augmentations: List of target augmentations
|
621
|
-
:param noise_files: List of noise files
|
622
|
-
:param noise_augmentations: List of noise augmentations
|
623
|
-
:param spectral_masks: List of spectral masks
|
624
|
-
:param all_snrs: List of all SNRs
|
625
|
-
:param mixups: List of mixup values
|
626
|
-
:param num_classes: Number of classes
|
627
|
-
:param feature_step_samples: Number of samples in a feature step
|
628
|
-
:param num_ir: Number of impulse response files
|
629
|
-
:return: (Number of noise files used, number of noise samples used, list of mixture records)
|
630
|
-
"""
|
631
|
-
if noise_mix_mode == "exhaustive":
|
632
|
-
return _exhaustive_noise_mix(
|
633
|
-
augmented_targets=augmented_targets,
|
634
|
-
target_files=target_files,
|
635
|
-
target_augmentations=target_augmentations,
|
636
|
-
noise_files=noise_files,
|
637
|
-
noise_augmentations=noise_augmentations,
|
638
|
-
spectral_masks=spectral_masks,
|
639
|
-
all_snrs=all_snrs,
|
640
|
-
mixups=mixups,
|
641
|
-
num_classes=num_classes,
|
642
|
-
feature_step_samples=feature_step_samples,
|
643
|
-
num_ir=num_ir,
|
644
|
-
)
|
645
|
-
|
646
|
-
if noise_mix_mode == "non-exhaustive":
|
647
|
-
return _non_exhaustive_noise_mix(
|
648
|
-
augmented_targets=augmented_targets,
|
649
|
-
target_files=target_files,
|
650
|
-
target_augmentations=target_augmentations,
|
651
|
-
noise_files=noise_files,
|
652
|
-
noise_augmentations=noise_augmentations,
|
653
|
-
spectral_masks=spectral_masks,
|
654
|
-
all_snrs=all_snrs,
|
655
|
-
mixups=mixups,
|
656
|
-
num_classes=num_classes,
|
657
|
-
feature_step_samples=feature_step_samples,
|
658
|
-
num_ir=num_ir,
|
659
|
-
)
|
660
|
-
|
661
|
-
if noise_mix_mode == "non-combinatorial":
|
662
|
-
return _non_combinatorial_noise_mix(
|
663
|
-
augmented_targets=augmented_targets,
|
664
|
-
target_files=target_files,
|
665
|
-
target_augmentations=target_augmentations,
|
666
|
-
noise_files=noise_files,
|
667
|
-
noise_augmentations=noise_augmentations,
|
668
|
-
spectral_masks=spectral_masks,
|
669
|
-
all_snrs=all_snrs,
|
670
|
-
mixups=mixups,
|
671
|
-
num_classes=num_classes,
|
672
|
-
feature_step_samples=feature_step_samples,
|
673
|
-
num_ir=num_ir,
|
674
|
-
)
|
675
|
-
|
676
|
-
raise ValueError(f"invalid noise_mix_mode: {noise_mix_mode}")
|
677
|
-
|
678
|
-
|
679
647
|
def _exhaustive_noise_mix(
|
680
|
-
augmented_targets:
|
681
|
-
target_files:
|
682
|
-
target_augmentations:
|
683
|
-
noise_files:
|
684
|
-
noise_augmentations:
|
685
|
-
spectral_masks:
|
648
|
+
augmented_targets: list[AugmentedTarget],
|
649
|
+
target_files: list[TargetFile],
|
650
|
+
target_augmentations: list[AugmentationRule],
|
651
|
+
noise_files: list[NoiseFile],
|
652
|
+
noise_augmentations: list[AugmentationRule],
|
653
|
+
spectral_masks: list[SpectralMask],
|
686
654
|
all_snrs: list[UniversalSNRGenerator],
|
687
655
|
mixups: list[int],
|
688
656
|
num_classes: int,
|
689
657
|
feature_step_samples: int,
|
690
658
|
num_ir: int,
|
691
|
-
) -> tuple[int, int,
|
692
|
-
"""Use every noise/augmentation with every target/augmentation"""
|
659
|
+
) -> tuple[int, int, list[Mixture]]:
|
660
|
+
"""Use every noise/augmentation with every target/augmentation+interferences/augmentation"""
|
693
661
|
from random import randint
|
694
662
|
|
695
663
|
import numpy as np
|
@@ -697,12 +665,10 @@ def _exhaustive_noise_mix(
|
|
697
665
|
from .augmentation import augmentation_from_rule
|
698
666
|
from .augmentation import estimate_augmented_length_from_length
|
699
667
|
from .datatypes import Mixture
|
700
|
-
from .datatypes import Mixtures
|
701
668
|
from .datatypes import Noise
|
702
669
|
from .datatypes import UniversalSNR
|
703
670
|
from .targets import get_augmented_target_ids_for_mixup
|
704
671
|
|
705
|
-
mixtures: Mixtures = []
|
706
672
|
m_id = 0
|
707
673
|
used_noise_files = len(noise_files) * len(noise_augmentations)
|
708
674
|
used_noise_samples = 0
|
@@ -717,6 +683,8 @@ def _exhaustive_noise_mix(
|
|
717
683
|
)
|
718
684
|
for mixup in mixups
|
719
685
|
]
|
686
|
+
|
687
|
+
mixtures: list[Mixture] = []
|
720
688
|
for noise_file_id in range(len(noise_files)):
|
721
689
|
for noise_augmentation_rule in noise_augmentations:
|
722
690
|
noise_augmentation = augmentation_from_rule(noise_augmentation_rule, num_ir)
|
@@ -743,11 +711,8 @@ def _exhaustive_noise_mix(
|
|
743
711
|
Mixture(
|
744
712
|
targets=targets,
|
745
713
|
name=str(m_id),
|
746
|
-
noise=Noise(
|
747
|
-
|
748
|
-
augmentation=noise_augmentation,
|
749
|
-
offset=noise_offset,
|
750
|
-
),
|
714
|
+
noise=Noise(file_id=noise_file_id + 1, augmentation=noise_augmentation),
|
715
|
+
noise_offset=noise_offset,
|
751
716
|
samples=target_length,
|
752
717
|
snr=UniversalSNR(value=snr.value, is_random=snr.is_random),
|
753
718
|
spectral_mask_id=spectral_mask_id + 1,
|
@@ -763,32 +728,30 @@ def _exhaustive_noise_mix(
|
|
763
728
|
|
764
729
|
|
765
730
|
def _non_exhaustive_noise_mix(
|
766
|
-
augmented_targets:
|
767
|
-
target_files:
|
768
|
-
target_augmentations:
|
769
|
-
noise_files:
|
770
|
-
noise_augmentations:
|
771
|
-
spectral_masks:
|
731
|
+
augmented_targets: list[AugmentedTarget],
|
732
|
+
target_files: list[TargetFile],
|
733
|
+
target_augmentations: list[AugmentationRule],
|
734
|
+
noise_files: list[NoiseFile],
|
735
|
+
noise_augmentations: list[AugmentationRule],
|
736
|
+
spectral_masks: list[SpectralMask],
|
772
737
|
all_snrs: list[UniversalSNRGenerator],
|
773
738
|
mixups: list[int],
|
774
739
|
num_classes: int,
|
775
740
|
feature_step_samples: int,
|
776
741
|
num_ir: int,
|
777
|
-
) -> tuple[int, int,
|
778
|
-
"""Cycle through every target/augmentation without necessarily using all
|
779
|
-
(reduced data set).
|
742
|
+
) -> tuple[int, int, list[Mixture]]:
|
743
|
+
"""Cycle through every target/augmentation+interferences/augmentation without necessarily using all
|
744
|
+
noise/augmentation combinations (reduced data set).
|
780
745
|
"""
|
781
746
|
from random import randint
|
782
747
|
|
783
748
|
import numpy as np
|
784
749
|
|
785
750
|
from .datatypes import Mixture
|
786
|
-
from .datatypes import Mixtures
|
787
751
|
from .datatypes import Noise
|
788
752
|
from .datatypes import UniversalSNR
|
789
753
|
from .targets import get_augmented_target_ids_for_mixup
|
790
754
|
|
791
|
-
mixtures: Mixtures = []
|
792
755
|
m_id = 0
|
793
756
|
used_noise_files = set()
|
794
757
|
used_noise_samples = 0
|
@@ -806,6 +769,8 @@ def _non_exhaustive_noise_mix(
|
|
806
769
|
)
|
807
770
|
for mixup in mixups
|
808
771
|
]
|
772
|
+
|
773
|
+
mixtures: list[Mixture] = []
|
809
774
|
for mixup in augmented_target_indices_for_mixups:
|
810
775
|
for augmented_target_indices in mixup:
|
811
776
|
targets, target_length = _get_target_info(
|
@@ -841,11 +806,8 @@ def _non_exhaustive_noise_mix(
|
|
841
806
|
Mixture(
|
842
807
|
targets=targets,
|
843
808
|
name=str(m_id),
|
844
|
-
noise=Noise(
|
845
|
-
|
846
|
-
augmentation=noise_augmentation,
|
847
|
-
offset=noise_offset,
|
848
|
-
),
|
809
|
+
noise=Noise(file_id=noise_file_id + 1, augmentation=noise_augmentation),
|
810
|
+
noise_offset=noise_offset,
|
849
811
|
samples=target_length,
|
850
812
|
snr=UniversalSNR(value=snr.value, is_random=snr.is_random),
|
851
813
|
spectral_mask_id=spectral_mask_id + 1,
|
@@ -858,21 +820,21 @@ def _non_exhaustive_noise_mix(
|
|
858
820
|
|
859
821
|
|
860
822
|
def _non_combinatorial_noise_mix(
|
861
|
-
augmented_targets:
|
862
|
-
target_files:
|
863
|
-
target_augmentations:
|
864
|
-
noise_files:
|
865
|
-
noise_augmentations:
|
866
|
-
spectral_masks:
|
823
|
+
augmented_targets: list[AugmentedTarget],
|
824
|
+
target_files: list[TargetFile],
|
825
|
+
target_augmentations: list[AugmentationRule],
|
826
|
+
noise_files: list[NoiseFile],
|
827
|
+
noise_augmentations: list[AugmentationRule],
|
828
|
+
spectral_masks: list[SpectralMask],
|
867
829
|
all_snrs: list[UniversalSNRGenerator],
|
868
830
|
mixups: list[int],
|
869
831
|
num_classes: int,
|
870
832
|
feature_step_samples: int,
|
871
833
|
num_ir: int,
|
872
|
-
) -> tuple[int, int,
|
873
|
-
"""Combine a target/augmentation with a single cut of a noise/augmentation
|
874
|
-
(each target/augmentation does not use each noise/augmentation).
|
875
|
-
beginning if end of noise/augmentation is reached.
|
834
|
+
) -> tuple[int, int, list[Mixture]]:
|
835
|
+
"""Combine a target/augmentation+interferences/augmentation with a single cut of a noise/augmentation
|
836
|
+
non-exhaustively (each target/augmentation+interferences/augmentation does not use each noise/augmentation).
|
837
|
+
Cut has random start and loop back to beginning if end of noise/augmentation is reached.
|
876
838
|
"""
|
877
839
|
from random import choice
|
878
840
|
from random import randint
|
@@ -880,12 +842,10 @@ def _non_combinatorial_noise_mix(
|
|
880
842
|
import numpy as np
|
881
843
|
|
882
844
|
from .datatypes import Mixture
|
883
|
-
from .datatypes import Mixtures
|
884
845
|
from .datatypes import Noise
|
885
846
|
from .datatypes import UniversalSNR
|
886
847
|
from .targets import get_augmented_target_ids_for_mixup
|
887
848
|
|
888
|
-
mixtures: Mixtures = []
|
889
849
|
m_id = 0
|
890
850
|
used_noise_files = set()
|
891
851
|
used_noise_samples = 0
|
@@ -902,6 +862,8 @@ def _non_combinatorial_noise_mix(
|
|
902
862
|
)
|
903
863
|
for mixup in mixups
|
904
864
|
]
|
865
|
+
|
866
|
+
mixtures: list[Mixture] = []
|
905
867
|
for mixup in augmented_target_indices_for_mixups:
|
906
868
|
for augmented_target_indices in mixup:
|
907
869
|
targets, target_length = _get_target_info(
|
@@ -935,11 +897,8 @@ def _non_combinatorial_noise_mix(
|
|
935
897
|
Mixture(
|
936
898
|
targets=targets,
|
937
899
|
name=str(m_id),
|
938
|
-
noise=Noise(
|
939
|
-
|
940
|
-
augmentation=noise_augmentation,
|
941
|
-
offset=choice(range(noise_length)), # noqa: S311
|
942
|
-
),
|
900
|
+
noise=Noise(file_id=noise_file_id + 1, augmentation=noise_augmentation),
|
901
|
+
noise_offset=choice(range(noise_length)), # noqa: S311
|
943
902
|
samples=target_length,
|
944
903
|
snr=UniversalSNR(value=snr.value, is_random=snr.is_random),
|
945
904
|
spectral_mask_id=spectral_mask_id + 1,
|
@@ -954,8 +913,8 @@ def _non_combinatorial_noise_mix(
|
|
954
913
|
def _get_next_noise_indices(
|
955
914
|
noise_file_id: int | None,
|
956
915
|
noise_augmentation_id: int | None,
|
957
|
-
noise_files:
|
958
|
-
noise_augmentations:
|
916
|
+
noise_files: list[NoiseFile],
|
917
|
+
noise_augmentations: list[AugmentationRule],
|
959
918
|
num_ir: int,
|
960
919
|
) -> tuple[int, int, Augmentation, int]:
|
961
920
|
from .augmentation import augmentation_from_rule
|
@@ -984,8 +943,8 @@ def _get_next_noise_offset(
|
|
984
943
|
noise_augmentation_id: int | None,
|
985
944
|
noise_offset: int | None,
|
986
945
|
target_length: int,
|
987
|
-
noise_files:
|
988
|
-
noise_augmentations:
|
946
|
+
noise_files: list[NoiseFile],
|
947
|
+
noise_augmentations: list[AugmentationRule],
|
989
948
|
num_ir: int,
|
990
949
|
) -> tuple[int, int, Augmentation, int]:
|
991
950
|
from .augmentation import augmentation_from_rule
|
@@ -1018,18 +977,16 @@ def _get_next_noise_offset(
|
|
1018
977
|
|
1019
978
|
def _get_target_info(
|
1020
979
|
augmented_target_ids: list[int],
|
1021
|
-
augmented_targets:
|
1022
|
-
target_files:
|
1023
|
-
target_augmentations:
|
980
|
+
augmented_targets: list[AugmentedTarget],
|
981
|
+
target_files: list[TargetFile],
|
982
|
+
target_augmentations: list[AugmentationRule],
|
1024
983
|
feature_step_samples: int,
|
1025
984
|
num_ir: int,
|
1026
|
-
) -> tuple[
|
985
|
+
) -> tuple[list[Target], int]:
|
1027
986
|
from .augmentation import augmentation_from_rule
|
1028
987
|
from .augmentation import estimate_augmented_length_from_length
|
1029
|
-
from .datatypes import Target
|
1030
|
-
from .datatypes import Targets
|
1031
988
|
|
1032
|
-
mixups:
|
989
|
+
mixups: list[Target] = []
|
1033
990
|
target_length = 0
|
1034
991
|
for idx in augmented_target_ids:
|
1035
992
|
tfi = augmented_targets[idx].target_id
|
@@ -1073,7 +1030,7 @@ def _get_textgrid_tiers_from_target_file(target_file: str) -> list[str]:
|
|
1073
1030
|
return sorted(tg.tierNames)
|
1074
1031
|
|
1075
1032
|
|
1076
|
-
def _populate_speaker_table(location: str, target_files:
|
1033
|
+
def _populate_speaker_table(location: str, target_files: list[TargetFile], test: bool = False) -> None:
|
1077
1034
|
"""Populate speaker table"""
|
1078
1035
|
import json
|
1079
1036
|
from pathlib import Path
|
@@ -1122,7 +1079,7 @@ def _populate_speaker_table(location: str, target_files: TargetFiles, test: bool
|
|
1122
1079
|
if description[0] not in ("id", "parent")
|
1123
1080
|
]
|
1124
1081
|
con.execute(
|
1125
|
-
"UPDATE top SET speaker_metadata_tiers=? WHERE top.id
|
1082
|
+
"UPDATE top SET speaker_metadata_tiers=? WHERE ? = top.id",
|
1126
1083
|
(json.dumps(tiers), 1),
|
1127
1084
|
)
|
1128
1085
|
|
@@ -1133,7 +1090,7 @@ def _populate_speaker_table(location: str, target_files: TargetFiles, test: bool
|
|
1133
1090
|
con.close()
|
1134
1091
|
|
1135
1092
|
|
1136
|
-
def _populate_truth_config_table(location: str, target_files:
|
1093
|
+
def _populate_truth_config_table(location: str, target_files: list[TargetFile], test: bool = False) -> None:
|
1137
1094
|
"""Populate truth_config table"""
|
1138
1095
|
import json
|
1139
1096
|
|