sonusai 0.19.8__py3-none-any.whl → 0.19.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sonusai/genmixdb.py CHANGED
@@ -292,7 +292,7 @@ def genmixdb(
292
292
  augmented_targets=augmented_targets,
293
293
  targets=target_files,
294
294
  target_augmentations=target_augmentations,
295
- class_balancing_augmentation=class_balancing_augmentation,
295
+ class_balancing_augmentation=class_balancing_augmentation, # pyright: ignore [reportArgumentType]
296
296
  num_classes=mixdb.num_classes,
297
297
  num_ir=mixdb.num_impulse_response_files,
298
298
  mixups=mixups,
@@ -354,7 +354,7 @@ def genmixdb(
354
354
  logger.info(
355
355
  f"Feature shape: "
356
356
  f"{mixdb.fg_stride} x {mixdb.feature_parameters} "
357
- f"({mixdb.fg_stride * mixdb.feature_parameters} total params)"
357
+ f"({mixdb.fg_stride * mixdb.feature_parameters} total parameters)"
358
358
  )
359
359
  logger.info(f"Feature samples: {mixdb.feature_samples} samples ({mixdb.feature_ms} ms)")
360
360
  logger.info(f"Feature step samples: {mixdb.feature_step_samples} samples ({mixdb.feature_step_ms} ms)")
@@ -406,6 +406,10 @@ def populate_mixture_table(
406
406
  test: bool = False,
407
407
  ) -> tuple[int, int]:
408
408
  """Generate mixtures and populate mixture table"""
409
+ from .helpers import from_mixture
410
+ from .helpers import from_target
411
+ from .mixdb import db_connection
412
+
409
413
  if noise_mix_mode == "exhaustive":
410
414
  func = _exhaustive_noise_mix
411
415
  elif noise_mix_mode == "non-exhaustive":
@@ -415,8 +419,7 @@ def populate_mixture_table(
415
419
  else:
416
420
  raise ValueError(f"invalid noise_mix_mode: {noise_mix_mode}")
417
421
 
418
- used_noise_files, used_noise_samples = func(
419
- location=location,
422
+ used_noise_files, used_noise_samples, mixtures = func(
420
423
  augmented_targets=augmented_targets,
421
424
  target_files=target_files,
422
425
  target_augmentations=target_augmentations,
@@ -428,9 +431,48 @@ def populate_mixture_table(
428
431
  num_classes=num_classes,
429
432
  feature_step_samples=feature_step_samples,
430
433
  num_ir=num_ir,
431
- test=test,
432
434
  )
433
435
 
436
+ con = db_connection(location=location, readonly=False, test=test)
437
+ # Populate target table
438
+ for mixture in mixtures:
439
+ for target in mixture.targets:
440
+ con.execute(
441
+ """
442
+ INSERT OR IGNORE INTO target (file_id, augmentation)
443
+ VALUES (?, ?)
444
+ """,
445
+ from_target(target),
446
+ )
447
+
448
+ # Populate mixture table
449
+ for m_id, mixture in enumerate(mixtures):
450
+ con.execute(
451
+ """
452
+ INSERT INTO mixture (id, name, noise_file_id, noise_augmentation, noise_offset, noise_snr_gain, random_snr,
453
+ snr, samples, spectral_mask_id, spectral_mask_seed, target_snr_gain)
454
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
455
+ """,
456
+ (m_id + 1, *from_mixture(mixture)),
457
+ )
458
+
459
+ for target in mixture.targets:
460
+ target_id = con.execute(
461
+ """
462
+ SELECT target.id
463
+ FROM target
464
+ WHERE ? = target.file_id AND ? = target.augmentation
465
+ """,
466
+ from_target(target),
467
+ ).fetchone()[0]
468
+ con.execute(
469
+ "INSERT INTO mixture_target (mixture_id, target_id) VALUES (?, ?)",
470
+ (m_id + 1, target_id),
471
+ )
472
+
473
+ con.commit()
474
+ con.close()
475
+
434
476
  return used_noise_files, used_noise_samples
435
477
 
436
478
 
@@ -603,7 +645,6 @@ def _initialize_mixture_gains(
603
645
 
604
646
 
605
647
  def _exhaustive_noise_mix(
606
- location: str,
607
648
  augmented_targets: list[AugmentedTarget],
608
649
  target_files: list[TargetFile],
609
650
  target_augmentations: list[AugmentationRule],
@@ -615,9 +656,8 @@ def _exhaustive_noise_mix(
615
656
  num_classes: int,
616
657
  feature_step_samples: int,
617
658
  num_ir: int,
618
- test: bool = False,
619
- ) -> tuple[int, int]:
620
- """Use every noise/augmentation with every target/augmentation"""
659
+ ) -> tuple[int, int, list[Mixture]]:
660
+ """Use every noise/augmentation with every target/augmentation+interferences/augmentation"""
621
661
  from random import randint
622
662
 
623
663
  import numpy as np
@@ -643,6 +683,8 @@ def _exhaustive_noise_mix(
643
683
  )
644
684
  for mixup in mixups
645
685
  ]
686
+
687
+ mixtures: list[Mixture] = []
646
688
  for noise_file_id in range(len(noise_files)):
647
689
  for noise_augmentation_rule in noise_augmentations:
648
690
  noise_augmentation = augmentation_from_rule(noise_augmentation_rule, num_ir)
@@ -665,10 +707,8 @@ def _exhaustive_noise_mix(
665
707
 
666
708
  for spectral_mask_id in range(len(spectral_masks)):
667
709
  for snr in all_snrs:
668
- _insert_mixture_record(
669
- location=location,
670
- m_id=m_id,
671
- mixture=Mixture(
710
+ mixtures.append(
711
+ Mixture(
672
712
  targets=targets,
673
713
  name=str(m_id),
674
714
  noise=Noise(file_id=noise_file_id + 1, augmentation=noise_augmentation),
@@ -677,19 +717,17 @@ def _exhaustive_noise_mix(
677
717
  snr=UniversalSNR(value=snr.value, is_random=snr.is_random),
678
718
  spectral_mask_id=spectral_mask_id + 1,
679
719
  spectral_mask_seed=randint(0, np.iinfo("i").max), # noqa: S311
680
- ),
681
- test=test,
720
+ )
682
721
  )
683
722
  m_id += 1
684
723
 
685
724
  noise_offset = int((noise_offset + target_length) % noise_length)
686
725
  used_noise_samples += target_length
687
726
 
688
- return used_noise_files, used_noise_samples
727
+ return used_noise_files, used_noise_samples, mixtures
689
728
 
690
729
 
691
730
  def _non_exhaustive_noise_mix(
692
- location: str,
693
731
  augmented_targets: list[AugmentedTarget],
694
732
  target_files: list[TargetFile],
695
733
  target_augmentations: list[AugmentationRule],
@@ -701,10 +739,9 @@ def _non_exhaustive_noise_mix(
701
739
  num_classes: int,
702
740
  feature_step_samples: int,
703
741
  num_ir: int,
704
- test: bool = False,
705
- ) -> tuple[int, int]:
706
- """Cycle through every target/augmentation without necessarily using all noise/augmentation combinations
707
- (reduced data set).
742
+ ) -> tuple[int, int, list[Mixture]]:
743
+ """Cycle through every target/augmentation+interferences/augmentation without necessarily using all
744
+ noise/augmentation combinations (reduced data set).
708
745
  """
709
746
  from random import randint
710
747
 
@@ -732,6 +769,8 @@ def _non_exhaustive_noise_mix(
732
769
  )
733
770
  for mixup in mixups
734
771
  ]
772
+
773
+ mixtures: list[Mixture] = []
735
774
  for mixup in augmented_target_indices_for_mixups:
736
775
  for augmented_target_indices in mixup:
737
776
  targets, target_length = _get_target_info(
@@ -763,10 +802,8 @@ def _non_exhaustive_noise_mix(
763
802
 
764
803
  used_noise_files.add(f"{noise_file_id}_{noise_augmentation_id}")
765
804
 
766
- _insert_mixture_record(
767
- location=location,
768
- m_id=m_id,
769
- mixture=Mixture(
805
+ mixtures.append(
806
+ Mixture(
770
807
  targets=targets,
771
808
  name=str(m_id),
772
809
  noise=Noise(file_id=noise_file_id + 1, augmentation=noise_augmentation),
@@ -775,16 +812,14 @@ def _non_exhaustive_noise_mix(
775
812
  snr=UniversalSNR(value=snr.value, is_random=snr.is_random),
776
813
  spectral_mask_id=spectral_mask_id + 1,
777
814
  spectral_mask_seed=randint(0, np.iinfo("i").max), # noqa: S311
778
- ),
779
- test=test,
815
+ )
780
816
  )
781
817
  m_id += 1
782
818
 
783
- return len(used_noise_files), used_noise_samples
819
+ return len(used_noise_files), used_noise_samples, mixtures
784
820
 
785
821
 
786
822
  def _non_combinatorial_noise_mix(
787
- location: str,
788
823
  augmented_targets: list[AugmentedTarget],
789
824
  target_files: list[TargetFile],
790
825
  target_augmentations: list[AugmentationRule],
@@ -796,11 +831,10 @@ def _non_combinatorial_noise_mix(
796
831
  num_classes: int,
797
832
  feature_step_samples: int,
798
833
  num_ir: int,
799
- test: bool = False,
800
- ) -> tuple[int, int]:
801
- """Combine a target/augmentation with a single cut of a noise/augmentation non-exhaustively
802
- (each target/augmentation does not use each noise/augmentation). Cut has random start and loop back to
803
- beginning if end of noise/augmentation is reached.
834
+ ) -> tuple[int, int, list[Mixture]]:
835
+ """Combine a target/augmentation+interferences/augmentation with a single cut of a noise/augmentation
836
+ non-exhaustively (each target/augmentation+interferences/augmentation does not use each noise/augmentation).
837
+ Cut has random start and loop back to beginning if end of noise/augmentation is reached.
804
838
  """
805
839
  from random import choice
806
840
  from random import randint
@@ -828,6 +862,8 @@ def _non_combinatorial_noise_mix(
828
862
  )
829
863
  for mixup in mixups
830
864
  ]
865
+
866
+ mixtures: list[Mixture] = []
831
867
  for mixup in augmented_target_indices_for_mixups:
832
868
  for augmented_target_indices in mixup:
833
869
  targets, target_length = _get_target_info(
@@ -857,10 +893,8 @@ def _non_combinatorial_noise_mix(
857
893
 
858
894
  used_noise_files.add(f"{noise_file_id}_{noise_augmentation_id}")
859
895
 
860
- _insert_mixture_record(
861
- location=location,
862
- m_id=m_id,
863
- mixture=Mixture(
896
+ mixtures.append(
897
+ Mixture(
864
898
  targets=targets,
865
899
  name=str(m_id),
866
900
  noise=Noise(file_id=noise_file_id + 1, augmentation=noise_augmentation),
@@ -869,12 +903,11 @@ def _non_combinatorial_noise_mix(
869
903
  snr=UniversalSNR(value=snr.value, is_random=snr.is_random),
870
904
  spectral_mask_id=spectral_mask_id + 1,
871
905
  spectral_mask_seed=randint(0, np.iinfo("i").max), # noqa: S311
872
- ),
873
- test=test,
906
+ )
874
907
  )
875
908
  m_id += 1
876
909
 
877
- return len(used_noise_files), used_noise_samples
910
+ return len(used_noise_files), used_noise_samples, mixtures
878
911
 
879
912
 
880
913
  def _get_next_noise_indices(
@@ -973,49 +1006,6 @@ def _get_target_info(
973
1006
  return mixups, target_length
974
1007
 
975
1008
 
976
- def _insert_mixture_record(location: str, m_id: int, mixture: Mixture, test: bool = False) -> None:
977
- from .helpers import from_mixture
978
- from .helpers import from_target
979
- from .mixdb import db_connection
980
-
981
- con = db_connection(location=location, readonly=False, test=test)
982
- # Populate target table
983
- for target in mixture.targets:
984
- con.execute(
985
- """
986
- INSERT OR IGNORE INTO target (file_id, augmentation)
987
- VALUES (?, ?)
988
- """,
989
- from_target(target),
990
- )
991
-
992
- # Populate mixture table
993
- con.execute(
994
- """
995
- INSERT INTO mixture (id, name, noise_file_id, noise_augmentation, noise_offset, noise_snr_gain, random_snr,
996
- snr, samples, spectral_mask_id, spectral_mask_seed, target_snr_gain)
997
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
998
- """,
999
- (m_id + 1, *from_mixture(mixture)),
1000
- )
1001
-
1002
- for target in mixture.targets:
1003
- target_id = con.execute(
1004
- """
1005
- SELECT target.id
1006
- FROM target
1007
- WHERE ? = target.file_id AND ? = target.augmentation
1008
- """,
1009
- from_target(target),
1010
- ).fetchone()[0]
1011
- con.execute(
1012
- "INSERT INTO mixture_target (mixture_id, target_id) VALUES (?, ?)",
1013
- (m_id + 1, target_id),
1014
- )
1015
- con.commit()
1016
- con.close()
1017
-
1018
-
1019
1009
  def get_all_snrs_from_config(config: dict) -> list[UniversalSNRGenerator]:
1020
1010
  from .datatypes import UniversalSNRGenerator
1021
1011
 
@@ -4,10 +4,16 @@ from sonusai.mixture.datatypes import AudioT
4
4
  from sonusai.mixture.datatypes import ImpulseResponseData
5
5
 
6
6
 
7
- def read_impulse_response(name: str | Path) -> ImpulseResponseData:
7
+ def read_impulse_response(
8
+ name: str | Path,
9
+ delay_compensation: bool = True,
10
+ normalize: bool = True,
11
+ ) -> ImpulseResponseData:
8
12
  """Read impulse response data using torchaudio
9
13
 
10
14
  :param name: File name
15
+ :param delay_compensation: Apply delay compensation
16
+ :param normalize: Apply normalization
11
17
  :return: ImpulseResponseData object
12
18
  """
13
19
  import numpy as np
@@ -28,14 +34,19 @@ def read_impulse_response(name: str | Path) -> ImpulseResponseData:
28
34
  raise OSError(f"Error reading {name}: {e}") from e
29
35
 
30
36
  raw = torch.squeeze(raw[0, :])
31
- offset = torch.argmax(raw)
32
- raw = raw[offset:]
33
- # Inexplicably, torch.linalg.vector_norm() causes multiprocessing contexts to hang.
34
- # Use np.linalg.norm() instead.
35
- # raw = raw / torch.linalg.vector_norm(raw)
37
+
38
+ if delay_compensation:
39
+ offset = torch.argmax(raw)
40
+ raw = raw[offset:]
36
41
 
37
42
  data = np.array(raw).astype(np.float32)
38
- data = data / np.linalg.norm(data)
43
+
44
+ if normalize:
45
+ # Inexplicably,
46
+ # data = data / torch.linalg.vector_norm(data)
47
+ # causes multiprocessing contexts to hang.
48
+ # Use np.linalg.norm() instead.
49
+ data = data / np.linalg.norm(data)
39
50
 
40
51
  return ImpulseResponseData(name=str(name), sample_rate=sample_rate, data=data)
41
52
 
@@ -20,10 +20,9 @@ def apply_augmentation(audio: AudioT, augmentation: Augmentation, frame_length:
20
20
 
21
21
  effects: list[list[str]] = []
22
22
 
23
- # TODO
24
- # Always normalize and remove normalize from list of available augmentations
25
- # Normalize to globally set level (should this be a global config parameter,
26
- # or hard-coded into the script?)
23
+ # TODO: Always normalize and remove normalize from list of available augmentations
24
+ # Normalize to globally set level (should this be a global config parameter, or hard-coded into the script?)
25
+ # TODO: Support all sox effects supported by torchaudio (torchaudio.sox_effects.effect_names())
27
26
  if augmentation.normalize is not None:
28
27
  effects.append(["norm", str(augmentation.normalize)])
29
28
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sonusai
3
- Version: 0.19.8
3
+ Version: 0.19.9
4
4
  Summary: Framework for building deep neural network models for sound, speech, and voice AI
5
5
  Home-page: https://aaware.com
6
6
  License: GPL-3.0-only
@@ -15,7 +15,7 @@ sonusai/doc.py,sha256=zSmXpioB0YS_5-7kqfS5cr--veSaXkxRKzldId9Hyoc,878
15
15
  sonusai/genft.py,sha256=TqtmexKw7tZsugU1DbZ3fY7_YZ2hFlIU-ema6f0LRgQ,5586
16
16
  sonusai/genmetrics.py,sha256=RiEYmkRl0yVCIr9PHkKRqEG68WR77pP9WpWAYeKnMLE,5723
17
17
  sonusai/genmix.py,sha256=pugp74prksf4ZiJYMXWg2l3v48Qqi9KpP2WhPEBpZOU,6702
18
- sonusai/genmixdb.py,sha256=QB9iARnAutuPPxML-g-kc3C-W1Sodabr68e4GSRD21Q,18680
18
+ sonusai/genmixdb.py,sha256=gUF_9dGfuWRCmHtCoXZRrgCMejz46A5KhWoEh31laqc,18724
19
19
  sonusai/lsdb.py,sha256=0HOGDDndB3LT9cz9AaxKIpt9vslAoSP4F239gply4Xg,5149
20
20
  sonusai/main.py,sha256=HbnEia1B1-Z-mlHkLfojH8aj9GIpL1Btw3oH60T_CCQ,2590
21
21
  sonusai/metrics/__init__.py,sha256=ssV6JEK_oklRSocsp6HMcG-GtJvV8IkRQtdKhHHmwU8,878
@@ -46,7 +46,7 @@ sonusai/mixture/datatypes.py,sha256=xNDBWFTVQ3plJ7qHKzrXyV4pffPYuf1xMVqBsR40n4o,
46
46
  sonusai/mixture/db_datatypes.py,sha256=kvdUOMS6Pkkj9AmxCiq6zM8x7jbPPi933tVaXRxbTdQ,1534
47
47
  sonusai/mixture/eq_rule_is_valid.py,sha256=O3gCAs_0hpxENK5b7kxxpDmOpKHlXGBWuLGT_97ARSM,1210
48
48
  sonusai/mixture/feature.py,sha256=L0bPFG0RO-CrrtTStUMt_14euYsVo8_TWTP2IKSFKaA,2335
49
- sonusai/mixture/generation.py,sha256=sPsl-r5d9mkSglHOG5_VbmoDNqOscuWTdu7F6OJa-Qw,38751
49
+ sonusai/mixture/generation.py,sha256=Okmyc7LVVdyt7UDzLSR4XdK-Q92vSQHX0RI__RJbJM8,38551
50
50
  sonusai/mixture/helpers.py,sha256=UEN_9LttIZwvms9nOkjMr_Yf3lpb7sMPmDVUoX8D7yQ,15225
51
51
  sonusai/mixture/log_duration_and_sizes.py,sha256=qhgl87C2KbjxLdKEpjYOoqNL6rc-8-PB4R7Gx_7UG8g,1240
52
52
  sonusai/mixture/mixdb.py,sha256=W8B54U6f-8szMylXSnU0YzOR-a9M3XoCRpyqOgBIJuQ,73498
@@ -57,8 +57,8 @@ sonusai/mixture/spectral_mask.py,sha256=U9XJ_SAoI9b67K_3SE7bNw6U8cPGFOBttaZAxMjA
57
57
  sonusai/mixture/target_class_balancing.py,sha256=o_TZ8kVYq10lgeXHh3GUFfflfdUvRt0FekFu2eaNkDs,4251
58
58
  sonusai/mixture/targets.py,sha256=6emo2fxxp9ZhSpHuUM9xIjYMz8zeIHAw684jT3l7fAs,6442
59
59
  sonusai/mixture/tokenized_shell_vars.py,sha256=lXTzUDutuBWGV1zIsqeIxWmy-eKm0Vx1y8-iLdsL1gQ,4921
60
- sonusai/mixture/torchaudio_audio.py,sha256=169VXKEoOHc5nyiHJwaihkcQ_a1ZH_O-nnk9Gq4JtaQ,2887
61
- sonusai/mixture/torchaudio_augmentation.py,sha256=68QpJ4JcAH2fsL4qVck7VvBv615UbCBPEnNe4HMvA_8,3844
60
+ sonusai/mixture/torchaudio_audio.py,sha256=72Hxo5TKAW7mYpRy15QFfD7AYDORBk6bVCcHENniWGw,3116
61
+ sonusai/mixture/torchaudio_augmentation.py,sha256=uFAKxIfs50J5FR-WXodsEACm2Ao-t5dZRSJ0DwTAfBg,3930
62
62
  sonusai/mixture/truth.py,sha256=-CwwawFRGjqodR2yKvAMGL1XaYLct-tli7wZ2gbhLtQ,2121
63
63
  sonusai/mixture/truth_functions/__init__.py,sha256=0mlOFChPnXG5BC0eKOe4n9VH17jY4iOqZFLuF6Gprdk,1505
64
64
  sonusai/mixture/truth_functions/crm.py,sha256=iidcffXfqV8k9O5wt5KTWIAFaTSjmhV5ucKZPbTgpvQ,3809
@@ -121,7 +121,7 @@ sonusai/utils/stratified_shuffle_split.py,sha256=d7WLUirywSvgZWkt_5a0F8YvTnJjuXl
121
121
  sonusai/utils/write_audio.py,sha256=0lKdaX57N6H-UWdioqmXCJMjwT1eBz5B-bSGqDvloAc,838
122
122
  sonusai/utils/yes_or_no.py,sha256=0h1okjXmDNbJp7rZJFR2V-HFU1GJDm3YFTUVmYExkOU,263
123
123
  sonusai/vars.py,sha256=kBBzuvC8szmdIZEEDA7XXmD765addZKdM2aFipeGO1w,933
124
- sonusai-0.19.8.dist-info/METADATA,sha256=KQEqF0XgSi8xV6rSYzn-bync-wXfEjzGyWXgb909gDo,2535
125
- sonusai-0.19.8.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
126
- sonusai-0.19.8.dist-info/entry_points.txt,sha256=zMNjEphEPO6B3cD1GNpit7z-yA9tUU5-j3W2v-UWstU,92
127
- sonusai-0.19.8.dist-info/RECORD,,
124
+ sonusai-0.19.9.dist-info/METADATA,sha256=rCXEzWOsVKZdZB8E5iPt0fachIQ_-zkmM1UNAe8JZc4,2535
125
+ sonusai-0.19.9.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
126
+ sonusai-0.19.9.dist-info/entry_points.txt,sha256=zMNjEphEPO6B3cD1GNpit7z-yA9tUU5-j3W2v-UWstU,92
127
+ sonusai-0.19.9.dist-info/RECORD,,