sonusai 0.19.6__py3-none-any.whl → 0.19.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. sonusai/__init__.py +1 -1
  2. sonusai/aawscd_probwrite.py +1 -1
  3. sonusai/calc_metric_spenh.py +1 -1
  4. sonusai/genft.py +29 -14
  5. sonusai/genmetrics.py +60 -42
  6. sonusai/genmix.py +41 -29
  7. sonusai/genmixdb.py +56 -64
  8. sonusai/metrics/calc_class_weights.py +1 -3
  9. sonusai/metrics/calc_optimal_thresholds.py +2 -2
  10. sonusai/metrics/calc_phase_distance.py +1 -1
  11. sonusai/metrics/calc_speech.py +6 -6
  12. sonusai/metrics/class_summary.py +6 -15
  13. sonusai/metrics/confusion_matrix_summary.py +11 -27
  14. sonusai/metrics/one_hot.py +3 -3
  15. sonusai/metrics/snr_summary.py +7 -7
  16. sonusai/mixture/__init__.py +2 -17
  17. sonusai/mixture/augmentation.py +5 -6
  18. sonusai/mixture/class_count.py +1 -1
  19. sonusai/mixture/config.py +36 -46
  20. sonusai/mixture/data_io.py +30 -1
  21. sonusai/mixture/datatypes.py +29 -40
  22. sonusai/mixture/db_datatypes.py +1 -1
  23. sonusai/mixture/feature.py +3 -23
  24. sonusai/mixture/generation.py +161 -204
  25. sonusai/mixture/helpers.py +29 -187
  26. sonusai/mixture/mixdb.py +386 -159
  27. sonusai/mixture/soundfile_audio.py +1 -1
  28. sonusai/mixture/sox_audio.py +4 -4
  29. sonusai/mixture/sox_augmentation.py +1 -1
  30. sonusai/mixture/target_class_balancing.py +9 -11
  31. sonusai/mixture/targets.py +23 -20
  32. sonusai/mixture/torchaudio_audio.py +18 -7
  33. sonusai/mixture/torchaudio_augmentation.py +3 -4
  34. sonusai/mixture/truth.py +21 -34
  35. sonusai/mixture/truth_functions/__init__.py +6 -0
  36. sonusai/mixture/truth_functions/crm.py +51 -37
  37. sonusai/mixture/truth_functions/energy.py +95 -50
  38. sonusai/mixture/truth_functions/file.py +12 -8
  39. sonusai/mixture/truth_functions/metadata.py +24 -0
  40. sonusai/mixture/truth_functions/metrics.py +28 -0
  41. sonusai/mixture/truth_functions/phoneme.py +4 -5
  42. sonusai/mixture/truth_functions/sed.py +32 -23
  43. sonusai/mixture/truth_functions/target.py +62 -29
  44. sonusai/mkwav.py +20 -19
  45. sonusai/queries/queries.py +9 -15
  46. sonusai/speech/l2arctic.py +6 -2
  47. sonusai/summarize_metric_spenh.py +1 -1
  48. sonusai/utils/__init__.py +1 -0
  49. sonusai/utils/asr_functions/aaware_whisper.py +1 -1
  50. sonusai/utils/audio_devices.py +27 -18
  51. sonusai/utils/docstring.py +6 -3
  52. sonusai/utils/energy_f.py +5 -3
  53. sonusai/utils/human_readable_size.py +6 -6
  54. sonusai/utils/load_object.py +15 -0
  55. sonusai/utils/onnx_utils.py +2 -2
  56. sonusai/utils/print_mixture_details.py +3 -3
  57. {sonusai-0.19.6.dist-info → sonusai-0.19.9.dist-info}/METADATA +2 -2
  58. {sonusai-0.19.6.dist-info → sonusai-0.19.9.dist-info}/RECORD +60 -58
  59. sonusai/mixture/truth_functions/datatypes.py +0 -37
  60. {sonusai-0.19.6.dist-info → sonusai-0.19.9.dist-info}/WHEEL +0 -0
  61. {sonusai-0.19.6.dist-info → sonusai-0.19.9.dist-info}/entry_points.txt +0 -0
@@ -1,17 +1,15 @@
1
1
  # ruff: noqa: S608
2
- from .datatypes import AudiosT
3
2
  from .datatypes import AudioT
4
3
  from .datatypes import Augmentation
5
- from .datatypes import AugmentationRules
6
- from .datatypes import AugmentedTargets
4
+ from .datatypes import AugmentationRule
5
+ from .datatypes import AugmentedTarget
7
6
  from .datatypes import GenMixData
8
- from .datatypes import ImpulseResponseFiles
7
+ from .datatypes import ImpulseResponseFile
9
8
  from .datatypes import Mixture
10
- from .datatypes import Mixtures
11
- from .datatypes import NoiseFiles
12
- from .datatypes import SpectralMasks
13
- from .datatypes import TargetFiles
14
- from .datatypes import Targets
9
+ from .datatypes import NoiseFile
10
+ from .datatypes import SpectralMask
11
+ from .datatypes import Target
12
+ from .datatypes import TargetFile
15
13
  from .datatypes import UniversalSNRGenerator
16
14
  from .mixdb import MixtureDatabase
17
15
 
@@ -37,7 +35,7 @@ def initialize_db(location: str, test: bool = False) -> None:
37
35
  CREATE TABLE truth_parameters(
38
36
  id INTEGER PRIMARY KEY NOT NULL,
39
37
  name TEXT NOT NULL,
40
- parameters INTEGER NOT NULL)
38
+ parameters INTEGER)
41
39
  """)
42
40
 
43
41
  con.execute("""
@@ -121,8 +119,8 @@ def initialize_db(location: str, test: bool = False) -> None:
121
119
  id INTEGER PRIMARY KEY NOT NULL,
122
120
  file_id INTEGER NOT NULL,
123
121
  augmentation TEXT NOT NULL,
124
- gain FLOAT,
125
- FOREIGN KEY(file_id) REFERENCES target_file (id))
122
+ FOREIGN KEY(file_id) REFERENCES target_file (id),
123
+ UNIQUE(file_id, augmentation))
126
124
  """)
127
125
 
128
126
  con.execute("""
@@ -165,11 +163,12 @@ def populate_top_table(location: str, config: dict, test: bool = False) -> None:
165
163
  con = db_connection(location=location, readonly=False, test=test)
166
164
  con.execute(
167
165
  """
168
- INSERT INTO top (version, asr_configs, class_balancing, feature, noise_mix_mode, num_classes,
166
+ INSERT INTO top (id, version, asr_configs, class_balancing, feature, noise_mix_mode, num_classes,
169
167
  seed, mixid_width, speaker_metadata_tiers, textgrid_metadata_tiers)
170
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
168
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
171
169
  """,
172
170
  (
171
+ 1,
173
172
  MIXDB_VERSION,
174
173
  json.dumps(config["asr_configs"]),
175
174
  config["class_balancing"],
@@ -271,7 +270,7 @@ def populate_truth_parameters_table(location: str, config: dict, test: bool = Fa
271
270
  con.close()
272
271
 
273
272
 
274
- def populate_target_file_table(location: str, target_files: TargetFiles, test: bool = False) -> None:
273
+ def populate_target_file_table(location: str, target_files: list[TargetFile], test: bool = False) -> None:
275
274
  """Populate target file table"""
276
275
  import json
277
276
  from pathlib import Path
@@ -331,7 +330,7 @@ def populate_target_file_table(location: str, target_files: TargetFiles, test: b
331
330
 
332
331
  # Update textgrid_metadata_tiers in the top table
333
332
  con.execute(
334
- "UPDATE top SET textgrid_metadata_tiers=? WHERE top.id = ?",
333
+ "UPDATE top SET textgrid_metadata_tiers=? WHERE ? = top.id",
335
334
  (json.dumps(sorted(textgrid_metadata_tiers)), 1),
336
335
  )
337
336
 
@@ -339,7 +338,7 @@ def populate_target_file_table(location: str, target_files: TargetFiles, test: b
339
338
  con.close()
340
339
 
341
340
 
342
- def populate_noise_file_table(location: str, noise_files: NoiseFiles, test: bool = False) -> None:
341
+ def populate_noise_file_table(location: str, noise_files: list[NoiseFile], test: bool = False) -> None:
343
342
  """Populate noise file table"""
344
343
  from .mixdb import db_connection
345
344
 
@@ -353,7 +352,7 @@ def populate_noise_file_table(location: str, noise_files: NoiseFiles, test: bool
353
352
 
354
353
 
355
354
  def populate_impulse_response_file_table(
356
- location: str, impulse_response_files: ImpulseResponseFiles, test: bool = False
355
+ location: str, impulse_response_files: list[ImpulseResponseFile], test: bool = False
357
356
  ) -> None:
358
357
  """Populate impulse response file table"""
359
358
  import json
@@ -383,79 +382,115 @@ def update_mixid_width(location: str, num_mixtures: int, test: bool = False) ->
383
382
 
384
383
  con = db_connection(location=location, readonly=False, test=test)
385
384
  con.execute(
386
- "UPDATE top SET mixid_width=? WHERE top.id = ?",
385
+ "UPDATE top SET mixid_width=? WHERE ? = top.id",
387
386
  (max_text_width(num_mixtures), 1),
388
387
  )
389
388
  con.commit()
390
389
  con.close()
391
390
 
392
391
 
393
- def populate_mixture_table(location: str, mixtures: Mixtures, test: bool = False) -> None:
394
- """Populate mixture table"""
392
+ def populate_mixture_table(
393
+ location: str,
394
+ noise_mix_mode: str,
395
+ augmented_targets: list[AugmentedTarget],
396
+ target_files: list[TargetFile],
397
+ target_augmentations: list[AugmentationRule],
398
+ noise_files: list[NoiseFile],
399
+ noise_augmentations: list[AugmentationRule],
400
+ spectral_masks: list[SpectralMask],
401
+ all_snrs: list[UniversalSNRGenerator],
402
+ mixups: list[int],
403
+ num_classes: int,
404
+ feature_step_samples: int,
405
+ num_ir: int,
406
+ test: bool = False,
407
+ ) -> tuple[int, int]:
408
+ """Generate mixtures and populate mixture table"""
395
409
  from .helpers import from_mixture
396
410
  from .helpers import from_target
397
411
  from .mixdb import db_connection
398
412
 
399
- con = db_connection(location=location, readonly=False, test=test)
413
+ if noise_mix_mode == "exhaustive":
414
+ func = _exhaustive_noise_mix
415
+ elif noise_mix_mode == "non-exhaustive":
416
+ func = _non_exhaustive_noise_mix
417
+ elif noise_mix_mode == "non-combinatorial":
418
+ func = _non_combinatorial_noise_mix
419
+ else:
420
+ raise ValueError(f"invalid noise_mix_mode: {noise_mix_mode}")
421
+
422
+ used_noise_files, used_noise_samples, mixtures = func(
423
+ augmented_targets=augmented_targets,
424
+ target_files=target_files,
425
+ target_augmentations=target_augmentations,
426
+ noise_files=noise_files,
427
+ noise_augmentations=noise_augmentations,
428
+ spectral_masks=spectral_masks,
429
+ all_snrs=all_snrs,
430
+ mixups=mixups,
431
+ num_classes=num_classes,
432
+ feature_step_samples=feature_step_samples,
433
+ num_ir=num_ir,
434
+ )
400
435
 
436
+ con = db_connection(location=location, readonly=False, test=test)
401
437
  # Populate target table
402
- targets: list[tuple[int, str, float]] = []
403
438
  for mixture in mixtures:
404
439
  for target in mixture.targets:
405
- entry = from_target(target)
406
- if entry not in targets:
407
- targets.append(entry)
408
-
409
- con.executemany("INSERT INTO target (file_id, augmentation, gain) VALUES (?, ?, ?)", targets)
440
+ con.execute(
441
+ """
442
+ INSERT OR IGNORE INTO target (file_id, augmentation)
443
+ VALUES (?, ?)
444
+ """,
445
+ from_target(target),
446
+ )
410
447
 
411
448
  # Populate mixture table
412
- cur = con.cursor()
413
- for mixture in mixtures:
414
- cur.execute(
449
+ for m_id, mixture in enumerate(mixtures):
450
+ con.execute(
415
451
  """
416
- INSERT INTO mixture (name, noise_file_id, noise_augmentation, noise_offset, noise_snr_gain, random_snr,
417
- snr, samples, spectral_mask_id, spectral_mask_seed, target_snr_gain)
418
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
419
- """,
420
- from_mixture(mixture),
452
+ INSERT INTO mixture (id, name, noise_file_id, noise_augmentation, noise_offset, noise_snr_gain, random_snr,
453
+ snr, samples, spectral_mask_id, spectral_mask_seed, target_snr_gain)
454
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
455
+ """,
456
+ (m_id + 1, *from_mixture(mixture)),
421
457
  )
422
458
 
423
- mixture_id = cur.lastrowid
424
459
  for target in mixture.targets:
425
460
  target_id = con.execute(
426
461
  """
427
- SELECT target.id
428
- FROM target
429
- WHERE ? = target.file_id AND ? = target.augmentation AND ? = target.gain
462
+ SELECT target.id
463
+ FROM target
464
+ WHERE ? = target.file_id AND ? = target.augmentation
430
465
  """,
431
466
  from_target(target),
432
467
  ).fetchone()[0]
433
468
  con.execute(
434
469
  "INSERT INTO mixture_target (mixture_id, target_id) VALUES (?, ?)",
435
- (mixture_id, target_id),
470
+ (m_id + 1, target_id),
436
471
  )
437
472
 
438
473
  con.commit()
439
474
  con.close()
440
475
 
476
+ return used_noise_files, used_noise_samples
441
477
 
442
- def update_mixture(mixdb: MixtureDatabase, mixture: Mixture, with_data: bool = False) -> tuple[Mixture, GenMixData]:
443
- """Update mixture record with name and gains
444
478
 
445
- :param mixdb: Mixture database
446
- :param mixture: Mixture record
447
- :param with_data: Return audio data
448
- :return: Generated audio data (if requested)
449
- """
479
+ def update_mixture_table(location: str, m_id: int, with_data: bool = False, test: bool = False) -> GenMixData:
480
+ """Update mixture record with name and gains"""
450
481
  from .audio import get_next_noise
451
482
  from .augmentation import apply_gain
452
483
  from .datatypes import GenMixData
484
+ from .helpers import from_mixture
453
485
  from .helpers import get_target
486
+ from .mixdb import db_connection
454
487
 
488
+ mixdb = MixtureDatabase(location, test)
489
+ mixture = mixdb.mixture(m_id)
455
490
  mixture, targets_audio = _initialize_targets_audio(mixdb, mixture)
456
491
 
457
492
  noise_audio = _augmented_noise_audio(mixdb, mixture)
458
- noise_audio = get_next_noise(audio=noise_audio, offset=mixture.noise.offset, length=mixture.samples)
493
+ noise_audio = get_next_noise(audio=noise_audio, offset=mixture.noise_offset, length=mixture.samples)
459
494
 
460
495
  # Apply IR and sum targets audio before initializing the mixture SNR gains
461
496
  target_audio = get_target(mixdb, mixture, targets_audio)
@@ -466,8 +501,29 @@ def update_mixture(mixdb: MixtureDatabase, mixture: Mixture, with_data: bool = F
466
501
 
467
502
  mixture.name = f"{int(mixture.name):0{mixdb.mixid_width}}"
468
503
 
504
+ con = db_connection(location=location, readonly=False, test=test)
505
+ con.execute(
506
+ """
507
+ UPDATE mixture SET name=?,
508
+ noise_file_id=?,
509
+ noise_augmentation=?,
510
+ noise_offset=?,
511
+ noise_snr_gain=?,
512
+ random_snr=?,
513
+ snr=?,
514
+ samples=?,
515
+ spectral_mask_id=?,
516
+ spectral_mask_seed=?,
517
+ target_snr_gain=?
518
+ WHERE ? = mixture.id
519
+ """,
520
+ (*from_mixture(mixture), m_id + 1),
521
+ )
522
+ con.commit()
523
+ con.close()
524
+
469
525
  if not with_data:
470
- return mixture, GenMixData()
526
+ return GenMixData()
471
527
 
472
528
  # Apply SNR gains
473
529
  targets_audio = [apply_gain(audio=target_audio, gain=mixture.target_snr_gain) for target_audio in targets_audio]
@@ -477,7 +533,7 @@ def update_mixture(mixdb: MixtureDatabase, mixture: Mixture, with_data: bool = F
477
533
  target_audio = get_target(mixdb, mixture, targets_audio)
478
534
  mixture_audio = target_audio + noise_audio
479
535
 
480
- return mixture, GenMixData(
536
+ return GenMixData(
481
537
  mixture=mixture_audio,
482
538
  targets=targets_audio,
483
539
  target=target_audio,
@@ -502,7 +558,7 @@ def _augmented_noise_audio(mixdb: MixtureDatabase, mixture: Mixture) -> AudioT:
502
558
  return audio
503
559
 
504
560
 
505
- def _initialize_targets_audio(mixdb: MixtureDatabase, mixture: Mixture) -> tuple[Mixture, AudiosT]:
561
+ def _initialize_targets_audio(mixdb: MixtureDatabase, mixture: Mixture) -> tuple[Mixture, list[AudioT]]:
506
562
  from .augmentation import apply_augmentation
507
563
  from .augmentation import pad_audio_to_length
508
564
 
@@ -517,13 +573,6 @@ def _initialize_targets_audio(mixdb: MixtureDatabase, mixture: Mixture) -> tuple
517
573
  )
518
574
  )
519
575
 
520
- # target_gain is used to back out the gain augmentation in order to return the target audio
521
- # to its normalized level when calculating truth (if needed).
522
- if target.augmentation.gain is not None:
523
- target.gain = round(10 ** (target.augmentation.gain / 20), ndigits=5)
524
- else:
525
- target.gain = 1
526
-
527
576
  mixture.samples = max([len(item) for item in targets_audio])
528
577
 
529
578
  for idx in range(len(targets_audio)):
@@ -540,14 +589,11 @@ def _initialize_mixture_gains(
540
589
  from sonusai.utils import asl_p56
541
590
  from sonusai.utils import db_to_linear
542
591
 
543
- if mixture.snr < -96:
592
+ if mixture.is_noise_only:
544
593
  # Special case for zeroing out target data
545
594
  mixture.target_snr_gain = 0
546
595
  mixture.noise_snr_gain = 1
547
- # Setting target_gain to zero will cause the truth to be all zeros.
548
- for target in mixture.targets:
549
- target.gain = 0
550
- elif mixture.snr > 96:
596
+ elif mixture.is_target_only:
551
597
  # Special case for zeroing out noise data
552
598
  mixture.target_snr_gain = 1
553
599
  mixture.noise_snr_gain = 0
@@ -598,98 +644,20 @@ def _initialize_mixture_gains(
598
644
  return mixture
599
645
 
600
646
 
601
- def generate_mixtures(
602
- noise_mix_mode: str,
603
- augmented_targets: AugmentedTargets,
604
- target_files: TargetFiles,
605
- target_augmentations: AugmentationRules,
606
- noise_files: NoiseFiles,
607
- noise_augmentations: AugmentationRules,
608
- spectral_masks: SpectralMasks,
609
- all_snrs: list[UniversalSNRGenerator],
610
- mixups: list[int],
611
- num_classes: int,
612
- feature_step_samples: int,
613
- num_ir: int,
614
- ) -> tuple[int, int, Mixtures]:
615
- """Generate mixtures
616
-
617
- :param noise_mix_mode: Noise mix mode
618
- :param augmented_targets: List of augmented targets
619
- :param target_files: List of target files
620
- :param target_augmentations: List of target augmentations
621
- :param noise_files: List of noise files
622
- :param noise_augmentations: List of noise augmentations
623
- :param spectral_masks: List of spectral masks
624
- :param all_snrs: List of all SNRs
625
- :param mixups: List of mixup values
626
- :param num_classes: Number of classes
627
- :param feature_step_samples: Number of samples in a feature step
628
- :param num_ir: Number of impulse response files
629
- :return: (Number of noise files used, number of noise samples used, list of mixture records)
630
- """
631
- if noise_mix_mode == "exhaustive":
632
- return _exhaustive_noise_mix(
633
- augmented_targets=augmented_targets,
634
- target_files=target_files,
635
- target_augmentations=target_augmentations,
636
- noise_files=noise_files,
637
- noise_augmentations=noise_augmentations,
638
- spectral_masks=spectral_masks,
639
- all_snrs=all_snrs,
640
- mixups=mixups,
641
- num_classes=num_classes,
642
- feature_step_samples=feature_step_samples,
643
- num_ir=num_ir,
644
- )
645
-
646
- if noise_mix_mode == "non-exhaustive":
647
- return _non_exhaustive_noise_mix(
648
- augmented_targets=augmented_targets,
649
- target_files=target_files,
650
- target_augmentations=target_augmentations,
651
- noise_files=noise_files,
652
- noise_augmentations=noise_augmentations,
653
- spectral_masks=spectral_masks,
654
- all_snrs=all_snrs,
655
- mixups=mixups,
656
- num_classes=num_classes,
657
- feature_step_samples=feature_step_samples,
658
- num_ir=num_ir,
659
- )
660
-
661
- if noise_mix_mode == "non-combinatorial":
662
- return _non_combinatorial_noise_mix(
663
- augmented_targets=augmented_targets,
664
- target_files=target_files,
665
- target_augmentations=target_augmentations,
666
- noise_files=noise_files,
667
- noise_augmentations=noise_augmentations,
668
- spectral_masks=spectral_masks,
669
- all_snrs=all_snrs,
670
- mixups=mixups,
671
- num_classes=num_classes,
672
- feature_step_samples=feature_step_samples,
673
- num_ir=num_ir,
674
- )
675
-
676
- raise ValueError(f"invalid noise_mix_mode: {noise_mix_mode}")
677
-
678
-
679
647
  def _exhaustive_noise_mix(
680
- augmented_targets: AugmentedTargets,
681
- target_files: TargetFiles,
682
- target_augmentations: AugmentationRules,
683
- noise_files: NoiseFiles,
684
- noise_augmentations: AugmentationRules,
685
- spectral_masks: SpectralMasks,
648
+ augmented_targets: list[AugmentedTarget],
649
+ target_files: list[TargetFile],
650
+ target_augmentations: list[AugmentationRule],
651
+ noise_files: list[NoiseFile],
652
+ noise_augmentations: list[AugmentationRule],
653
+ spectral_masks: list[SpectralMask],
686
654
  all_snrs: list[UniversalSNRGenerator],
687
655
  mixups: list[int],
688
656
  num_classes: int,
689
657
  feature_step_samples: int,
690
658
  num_ir: int,
691
- ) -> tuple[int, int, Mixtures]:
692
- """Use every noise/augmentation with every target/augmentation"""
659
+ ) -> tuple[int, int, list[Mixture]]:
660
+ """Use every noise/augmentation with every target/augmentation+interferences/augmentation"""
693
661
  from random import randint
694
662
 
695
663
  import numpy as np
@@ -697,12 +665,10 @@ def _exhaustive_noise_mix(
697
665
  from .augmentation import augmentation_from_rule
698
666
  from .augmentation import estimate_augmented_length_from_length
699
667
  from .datatypes import Mixture
700
- from .datatypes import Mixtures
701
668
  from .datatypes import Noise
702
669
  from .datatypes import UniversalSNR
703
670
  from .targets import get_augmented_target_ids_for_mixup
704
671
 
705
- mixtures: Mixtures = []
706
672
  m_id = 0
707
673
  used_noise_files = len(noise_files) * len(noise_augmentations)
708
674
  used_noise_samples = 0
@@ -717,6 +683,8 @@ def _exhaustive_noise_mix(
717
683
  )
718
684
  for mixup in mixups
719
685
  ]
686
+
687
+ mixtures: list[Mixture] = []
720
688
  for noise_file_id in range(len(noise_files)):
721
689
  for noise_augmentation_rule in noise_augmentations:
722
690
  noise_augmentation = augmentation_from_rule(noise_augmentation_rule, num_ir)
@@ -743,11 +711,8 @@ def _exhaustive_noise_mix(
743
711
  Mixture(
744
712
  targets=targets,
745
713
  name=str(m_id),
746
- noise=Noise(
747
- file_id=noise_file_id + 1,
748
- augmentation=noise_augmentation,
749
- offset=noise_offset,
750
- ),
714
+ noise=Noise(file_id=noise_file_id + 1, augmentation=noise_augmentation),
715
+ noise_offset=noise_offset,
751
716
  samples=target_length,
752
717
  snr=UniversalSNR(value=snr.value, is_random=snr.is_random),
753
718
  spectral_mask_id=spectral_mask_id + 1,
@@ -763,32 +728,30 @@ def _exhaustive_noise_mix(
763
728
 
764
729
 
765
730
  def _non_exhaustive_noise_mix(
766
- augmented_targets: AugmentedTargets,
767
- target_files: TargetFiles,
768
- target_augmentations: AugmentationRules,
769
- noise_files: NoiseFiles,
770
- noise_augmentations: AugmentationRules,
771
- spectral_masks: SpectralMasks,
731
+ augmented_targets: list[AugmentedTarget],
732
+ target_files: list[TargetFile],
733
+ target_augmentations: list[AugmentationRule],
734
+ noise_files: list[NoiseFile],
735
+ noise_augmentations: list[AugmentationRule],
736
+ spectral_masks: list[SpectralMask],
772
737
  all_snrs: list[UniversalSNRGenerator],
773
738
  mixups: list[int],
774
739
  num_classes: int,
775
740
  feature_step_samples: int,
776
741
  num_ir: int,
777
- ) -> tuple[int, int, Mixtures]:
778
- """Cycle through every target/augmentation without necessarily using all noise/augmentation combinations
779
- (reduced data set).
742
+ ) -> tuple[int, int, list[Mixture]]:
743
+ """Cycle through every target/augmentation+interferences/augmentation without necessarily using all
744
+ noise/augmentation combinations (reduced data set).
780
745
  """
781
746
  from random import randint
782
747
 
783
748
  import numpy as np
784
749
 
785
750
  from .datatypes import Mixture
786
- from .datatypes import Mixtures
787
751
  from .datatypes import Noise
788
752
  from .datatypes import UniversalSNR
789
753
  from .targets import get_augmented_target_ids_for_mixup
790
754
 
791
- mixtures: Mixtures = []
792
755
  m_id = 0
793
756
  used_noise_files = set()
794
757
  used_noise_samples = 0
@@ -806,6 +769,8 @@ def _non_exhaustive_noise_mix(
806
769
  )
807
770
  for mixup in mixups
808
771
  ]
772
+
773
+ mixtures: list[Mixture] = []
809
774
  for mixup in augmented_target_indices_for_mixups:
810
775
  for augmented_target_indices in mixup:
811
776
  targets, target_length = _get_target_info(
@@ -841,11 +806,8 @@ def _non_exhaustive_noise_mix(
841
806
  Mixture(
842
807
  targets=targets,
843
808
  name=str(m_id),
844
- noise=Noise(
845
- file_id=noise_file_id + 1,
846
- augmentation=noise_augmentation,
847
- offset=noise_offset,
848
- ),
809
+ noise=Noise(file_id=noise_file_id + 1, augmentation=noise_augmentation),
810
+ noise_offset=noise_offset,
849
811
  samples=target_length,
850
812
  snr=UniversalSNR(value=snr.value, is_random=snr.is_random),
851
813
  spectral_mask_id=spectral_mask_id + 1,
@@ -858,21 +820,21 @@ def _non_exhaustive_noise_mix(
858
820
 
859
821
 
860
822
  def _non_combinatorial_noise_mix(
861
- augmented_targets: AugmentedTargets,
862
- target_files: TargetFiles,
863
- target_augmentations: AugmentationRules,
864
- noise_files: NoiseFiles,
865
- noise_augmentations: AugmentationRules,
866
- spectral_masks: SpectralMasks,
823
+ augmented_targets: list[AugmentedTarget],
824
+ target_files: list[TargetFile],
825
+ target_augmentations: list[AugmentationRule],
826
+ noise_files: list[NoiseFile],
827
+ noise_augmentations: list[AugmentationRule],
828
+ spectral_masks: list[SpectralMask],
867
829
  all_snrs: list[UniversalSNRGenerator],
868
830
  mixups: list[int],
869
831
  num_classes: int,
870
832
  feature_step_samples: int,
871
833
  num_ir: int,
872
- ) -> tuple[int, int, Mixtures]:
873
- """Combine a target/augmentation with a single cut of a noise/augmentation non-exhaustively
874
- (each target/augmentation does not use each noise/augmentation). Cut has random start and loop back to
875
- beginning if end of noise/augmentation is reached.
834
+ ) -> tuple[int, int, list[Mixture]]:
835
+ """Combine a target/augmentation+interferences/augmentation with a single cut of a noise/augmentation
836
+ non-exhaustively (each target/augmentation+interferences/augmentation does not use each noise/augmentation).
837
+ Cut has random start and loop back to beginning if end of noise/augmentation is reached.
876
838
  """
877
839
  from random import choice
878
840
  from random import randint
@@ -880,12 +842,10 @@ def _non_combinatorial_noise_mix(
880
842
  import numpy as np
881
843
 
882
844
  from .datatypes import Mixture
883
- from .datatypes import Mixtures
884
845
  from .datatypes import Noise
885
846
  from .datatypes import UniversalSNR
886
847
  from .targets import get_augmented_target_ids_for_mixup
887
848
 
888
- mixtures: Mixtures = []
889
849
  m_id = 0
890
850
  used_noise_files = set()
891
851
  used_noise_samples = 0
@@ -902,6 +862,8 @@ def _non_combinatorial_noise_mix(
902
862
  )
903
863
  for mixup in mixups
904
864
  ]
865
+
866
+ mixtures: list[Mixture] = []
905
867
  for mixup in augmented_target_indices_for_mixups:
906
868
  for augmented_target_indices in mixup:
907
869
  targets, target_length = _get_target_info(
@@ -935,11 +897,8 @@ def _non_combinatorial_noise_mix(
935
897
  Mixture(
936
898
  targets=targets,
937
899
  name=str(m_id),
938
- noise=Noise(
939
- file_id=noise_file_id + 1,
940
- augmentation=noise_augmentation,
941
- offset=choice(range(noise_length)), # noqa: S311
942
- ),
900
+ noise=Noise(file_id=noise_file_id + 1, augmentation=noise_augmentation),
901
+ noise_offset=choice(range(noise_length)), # noqa: S311
943
902
  samples=target_length,
944
903
  snr=UniversalSNR(value=snr.value, is_random=snr.is_random),
945
904
  spectral_mask_id=spectral_mask_id + 1,
@@ -954,8 +913,8 @@ def _non_combinatorial_noise_mix(
954
913
  def _get_next_noise_indices(
955
914
  noise_file_id: int | None,
956
915
  noise_augmentation_id: int | None,
957
- noise_files: NoiseFiles,
958
- noise_augmentations: AugmentationRules,
916
+ noise_files: list[NoiseFile],
917
+ noise_augmentations: list[AugmentationRule],
959
918
  num_ir: int,
960
919
  ) -> tuple[int, int, Augmentation, int]:
961
920
  from .augmentation import augmentation_from_rule
@@ -984,8 +943,8 @@ def _get_next_noise_offset(
984
943
  noise_augmentation_id: int | None,
985
944
  noise_offset: int | None,
986
945
  target_length: int,
987
- noise_files: NoiseFiles,
988
- noise_augmentations: AugmentationRules,
946
+ noise_files: list[NoiseFile],
947
+ noise_augmentations: list[AugmentationRule],
989
948
  num_ir: int,
990
949
  ) -> tuple[int, int, Augmentation, int]:
991
950
  from .augmentation import augmentation_from_rule
@@ -1018,18 +977,16 @@ def _get_next_noise_offset(
1018
977
 
1019
978
  def _get_target_info(
1020
979
  augmented_target_ids: list[int],
1021
- augmented_targets: AugmentedTargets,
1022
- target_files: TargetFiles,
1023
- target_augmentations: AugmentationRules,
980
+ augmented_targets: list[AugmentedTarget],
981
+ target_files: list[TargetFile],
982
+ target_augmentations: list[AugmentationRule],
1024
983
  feature_step_samples: int,
1025
984
  num_ir: int,
1026
- ) -> tuple[Targets, int]:
985
+ ) -> tuple[list[Target], int]:
1027
986
  from .augmentation import augmentation_from_rule
1028
987
  from .augmentation import estimate_augmented_length_from_length
1029
- from .datatypes import Target
1030
- from .datatypes import Targets
1031
988
 
1032
- mixups: Targets = []
989
+ mixups: list[Target] = []
1033
990
  target_length = 0
1034
991
  for idx in augmented_target_ids:
1035
992
  tfi = augmented_targets[idx].target_id
@@ -1073,7 +1030,7 @@ def _get_textgrid_tiers_from_target_file(target_file: str) -> list[str]:
1073
1030
  return sorted(tg.tierNames)
1074
1031
 
1075
1032
 
1076
- def _populate_speaker_table(location: str, target_files: TargetFiles, test: bool = False) -> None:
1033
+ def _populate_speaker_table(location: str, target_files: list[TargetFile], test: bool = False) -> None:
1077
1034
  """Populate speaker table"""
1078
1035
  import json
1079
1036
  from pathlib import Path
@@ -1122,7 +1079,7 @@ def _populate_speaker_table(location: str, target_files: TargetFiles, test: bool
1122
1079
  if description[0] not in ("id", "parent")
1123
1080
  ]
1124
1081
  con.execute(
1125
- "UPDATE top SET speaker_metadata_tiers=? WHERE top.id = ?",
1082
+ "UPDATE top SET speaker_metadata_tiers=? WHERE ? = top.id",
1126
1083
  (json.dumps(tiers), 1),
1127
1084
  )
1128
1085
 
@@ -1133,7 +1090,7 @@ def _populate_speaker_table(location: str, target_files: TargetFiles, test: bool
1133
1090
  con.close()
1134
1091
 
1135
1092
 
1136
- def _populate_truth_config_table(location: str, target_files: TargetFiles, test: bool = False) -> None:
1093
+ def _populate_truth_config_table(location: str, target_files: list[TargetFile], test: bool = False) -> None:
1137
1094
  """Populate truth_config table"""
1138
1095
  import json
1139
1096