sonusai 0.19.10__py3-none-any.whl → 0.20.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -93,7 +93,8 @@ def initialize_db(location: str, test: bool = False) -> None:
93
93
  CREATE TABLE impulse_response_file (
94
94
  id INTEGER PRIMARY KEY NOT NULL,
95
95
  file TEXT NOT NULL,
96
- tags TEXT NOT NULL)
96
+ tags TEXT NOT NULL,
97
+ delay INTEGER NOT NULL)
97
98
  """)
98
99
 
99
100
  con.execute("""
@@ -360,11 +361,12 @@ def populate_impulse_response_file_table(
360
361
 
361
362
  con = db_connection(location=location, readonly=False, test=test)
362
363
  con.executemany(
363
- "INSERT INTO impulse_response_file (file, tags) VALUES (?, ?)",
364
+ "INSERT INTO impulse_response_file (file, tags, delay) VALUES (?, ?, ?)",
364
365
  [
365
366
  (
366
367
  impulse_response_file.file,
367
368
  json.dumps(impulse_response_file.tags),
369
+ impulse_response_file.delay,
368
370
  )
369
371
  for impulse_response_file in impulse_response_files
370
372
  ],
@@ -529,17 +531,13 @@ def update_mixture(mixdb: MixtureDatabase, mixture: Mixture, with_data: bool = F
529
531
 
530
532
  def _augmented_noise_audio(mixdb: MixtureDatabase, mixture: Mixture) -> AudioT:
531
533
  from .audio import read_audio
532
- from .audio import read_ir
533
534
  from .augmentation import apply_augmentation
534
- from .augmentation import apply_impulse_response
535
535
 
536
536
  noise = mixdb.noise_file(mixture.noise.file_id)
537
537
  noise_augmentation = mixture.noise.augmentation
538
538
 
539
539
  audio = read_audio(noise.name)
540
- audio = apply_augmentation(audio, noise_augmentation)
541
- if noise_augmentation.ir is not None:
542
- audio = apply_impulse_response(audio, read_ir(mixdb.impulse_response_file(noise_augmentation.ir))) # pyright: ignore [reportArgumentType]
540
+ audio = apply_augmentation(mixdb, audio, noise_augmentation.pre)
543
541
 
544
542
  return audio
545
543
 
@@ -553,8 +551,9 @@ def _initialize_targets_audio(mixdb: MixtureDatabase, mixture: Mixture) -> tuple
553
551
  target_audio = mixdb.read_target_audio(target.file_id)
554
552
  targets_audio.append(
555
553
  apply_augmentation(
554
+ mixdb=mixdb,
556
555
  audio=target_audio,
557
- augmentation=target.augmentation,
556
+ augmentation=target.augmentation.pre,
558
557
  frame_length=mixdb.feature_step_samples,
559
558
  )
560
559
  )
@@ -680,7 +679,7 @@ def _exhaustive_noise_mix(
680
679
  noise_offset = 0
681
680
  noise_length = estimate_augmented_length_from_length(
682
681
  length=noise_files[noise_file_id].samples,
683
- tempo=noise_augmentation.tempo,
682
+ tempo=noise_augmentation.pre.tempo,
684
683
  )
685
684
 
686
685
  for augmented_target_ids_for_mixup in augmented_target_ids_for_mixups:
@@ -922,7 +921,7 @@ def _get_next_noise_indices(
922
921
 
923
922
  noise_augmentation = augmentation_from_rule(noise_augmentations[noise_augmentation_id], num_ir)
924
923
  noise_length = estimate_augmented_length_from_length(
925
- length=noise_files[noise_file_id].samples, tempo=noise_augmentation.tempo
924
+ length=noise_files[noise_file_id].samples, tempo=noise_augmentation.pre.tempo
926
925
  )
927
926
  return noise_file_id, noise_augmentation_id, noise_augmentation, noise_length
928
927
 
@@ -946,7 +945,7 @@ def _get_next_noise_offset(
946
945
 
947
946
  noise_augmentation = augmentation_from_rule(noise_augmentations[noise_file_id], num_ir)
948
947
  noise_length = estimate_augmented_length_from_length(
949
- length=noise_files[noise_file_id].samples, tempo=noise_augmentation.tempo
948
+ length=noise_files[noise_file_id].samples, tempo=noise_augmentation.pre.tempo
950
949
  )
951
950
  if noise_offset + target_length >= noise_length:
952
951
  if noise_offset == 0:
@@ -987,7 +986,7 @@ def _get_target_info(
987
986
  target_length = max(
988
987
  estimate_augmented_length_from_length(
989
988
  length=target_files[tfi].samples,
990
- tempo=target_augmentation.tempo,
989
+ tempo=target_augmentation.pre.tempo,
991
990
  frame_length=feature_step_samples,
992
991
  ),
993
992
  target_length,
@@ -117,11 +117,11 @@ def mixture_all_speech_metadata(mixdb: MixtureDatabase, mixture: Mixture) -> lis
117
117
  # Check for tempo augmentation and adjust Interval start and end data as needed
118
118
  entries = []
119
119
  for entry in item:
120
- if target.augmentation.tempo is not None:
120
+ if target.augmentation.pre.tempo is not None:
121
121
  entries.append(
122
122
  Interval(
123
- entry.start / target.augmentation.tempo,
124
- entry.end / target.augmentation.tempo,
123
+ entry.start / target.augmentation.pre.tempo,
124
+ entry.end / target.augmentation.pre.tempo,
125
125
  entry.label,
126
126
  )
127
127
  )
@@ -153,10 +153,8 @@ def mixture_metadata(mixdb: MixtureDatabase, m_id: int | None = None, mixture: M
153
153
  speech_metadata = mixture_all_speech_metadata(mixdb, mixture)
154
154
  for mi, target in enumerate(mixture.targets):
155
155
  target_file = mixdb.target_file(target.file_id)
156
- target_augmentation = target.augmentation
157
156
  metadata += f"target {mi} name: {target_file.name}\n"
158
157
  metadata += f"target {mi} augmentation: {target.augmentation.to_dict()}\n"
159
- metadata += f"target {mi} ir: {mixdb.impulse_response_file(target_augmentation.ir)}\n"
160
158
  metadata += f"target {mi} target_gain: {target.gain if not mixture.is_noise_only else 0}\n"
161
159
  metadata += f"target {mi} class indices: {target_file.class_indices}\n"
162
160
  for key in target_file.truth_configs:
@@ -168,7 +166,6 @@ def mixture_metadata(mixdb: MixtureDatabase, m_id: int | None = None, mixture: M
168
166
  noise_augmentation = mixture.noise.augmentation
169
167
  metadata += f"noise name: {noise.name}\n"
170
168
  metadata += f"noise augmentation: {noise_augmentation.to_dict()}\n"
171
- metadata += f"noise ir: {mixdb.impulse_response_file(noise_augmentation.ir)}\n"
172
169
  metadata += f"noise offset: {mixture.noise_offset}\n"
173
170
  metadata += f"snr: {mixture.snr}\n"
174
171
  metadata += f"random_snr: {mixture.snr.is_random}\n"
@@ -260,33 +257,33 @@ def to_target(entry: TargetRecord) -> Target:
260
257
  )
261
258
 
262
259
 
263
- def get_target(mixdb: MixtureDatabase, mixture: Mixture, targets_audio: list[AudioT], use_cache: bool = True) -> AudioT:
260
+ def get_target(mixdb: MixtureDatabase, mixture: Mixture, targets_audio: list[AudioT]) -> AudioT:
264
261
  """Get the augmented target audio data for the given mixture record
265
262
 
266
263
  :param mixdb: Mixture database
267
264
  :param mixture: Mixture record
268
265
  :param targets_audio: List of augmented target audio data (one per target in the mixup)
269
- :param use_cache: If true, use LRU caching
270
266
  :return: Sum of augmented target audio data
271
267
  """
272
- # Apply impulse responses to targets
268
+ # Apply post-truth augmentation effects to targets and sum
273
269
  import numpy as np
274
270
 
275
- from .audio import read_ir
276
- from .augmentation import apply_impulse_response
277
-
278
- targets_ir = []
279
- for idx, target in enumerate(targets_audio):
280
- ir_idx = mixture.targets[idx].augmentation.ir
281
- if ir_idx is not None:
282
- targets_ir.append(
283
- apply_impulse_response(audio=target, ir=read_ir(mixdb.impulse_response_file(int(ir_idx)), use_cache)) # pyright: ignore [reportArgumentType]
271
+ from .augmentation import apply_augmentation
272
+
273
+ targets_post = []
274
+ for idx, target_audio in enumerate(targets_audio):
275
+ target = mixture.targets[idx]
276
+ targets_post.append(
277
+ apply_augmentation(
278
+ mixdb=mixdb,
279
+ audio=target_audio,
280
+ augmentation=target.augmentation.post,
281
+ frame_length=mixdb.feature_step_samples,
284
282
  )
285
- else:
286
- targets_ir.append(target)
283
+ )
287
284
 
288
285
  # Return sum of targets
289
- return np.sum(targets_ir, axis=0)
286
+ return np.sum(targets_post, axis=0)
290
287
 
291
288
 
292
289
  def get_transform_from_audio(audio: AudioT, transform: ForwardTransform) -> tuple[AudioF, EnergyT]:
@@ -400,7 +397,7 @@ def augmented_target_samples(
400
397
  [
401
398
  estimate_augmented_length_from_length(
402
399
  length=target_files[fi].samples,
403
- tempo=target_augmentations[ai].tempo,
400
+ tempo=target_augmentations[ai].pre.tempo,
404
401
  frame_length=feature_step_samples,
405
402
  )
406
403
  for fi, ai in it
@@ -420,7 +417,7 @@ def augmented_noise_samples(noise_files: list[NoiseFile], noise_augmentations: l
420
417
  def augmented_noise_length(noise_file: NoiseFile, noise_augmentation: Augmentation) -> int:
421
418
  from .augmentation import estimate_augmented_length_from_length
422
419
 
423
- return estimate_augmented_length_from_length(length=noise_file.samples, tempo=noise_augmentation.tempo)
420
+ return estimate_augmented_length_from_length(length=noise_file.samples, tempo=noise_augmentation.pre.tempo)
424
421
 
425
422
 
426
423
  def get_textgrid_tier_from_target_file(target_file: str, tier: str) -> SpeechMetadata | None:
@@ -0,0 +1,63 @@
1
+ import numpy as np
2
+
3
+
4
+ def get_impulse_response_delay(file: str) -> int:
5
+ from sonusai.utils import temp_seed
6
+
7
+ from .audio import raw_read_audio
8
+
9
+ ir, sample_rate = raw_read_audio(file)
10
+
11
+ with temp_seed(42):
12
+ wgn_ref = np.random.normal(loc=0, scale=0.2, size=int(np.ceil(0.05 * sample_rate))).astype(np.float32)
13
+
14
+ wgn_conv = np.convolve(ir, wgn_ref)
15
+
16
+ return int(np.round(tdoa(wgn_conv, wgn_ref, interp=16, phat=True)))
17
+
18
+
19
+ def tdoa(signal: np.ndarray, reference: np.ndarray, interp: int = 1, phat: bool = False, fs: int | float = 1) -> float:
20
+ """Estimates the shift of array signal with respect to reference using generalized cross-correlation.
21
+
22
+ :param signal: The array whose tdoa is measured
23
+ :param reference: The reference array
24
+ :param interp: Interpolation factor for the output array
25
+ :param phat: Apply the PHAT weighting
26
+ :param fs: The sampling frequency of the input arrays
27
+ :return: The estimated delay between the two arrays
28
+ """
29
+ n_reference = reference.shape[0]
30
+
31
+ r_12 = correlate(signal, reference, interp=interp, phat=phat)
32
+
33
+ delay = (np.argmax(np.abs(r_12)) / interp - (n_reference - 1)) / fs
34
+
35
+ return float(delay)
36
+
37
+
38
+ def correlate(x1: np.ndarray, x2: np.ndarray, interp: int = 1, phat: bool = False) -> np.ndarray:
39
+ """Compute the cross-correlation between x1 and x2
40
+
41
+ :param x1: Input array 1
42
+ :param x2: Input array 2
43
+ :param interp: Interpolation factor for the output array
44
+ :param phat: Apply the PHAT weighting
45
+ :return: The cross-correlation between the two arrays
46
+ """
47
+ n_x1 = x1.shape[0]
48
+ n_x2 = x2.shape[0]
49
+
50
+ n = n_x1 + n_x2 - 1
51
+
52
+ fft1 = np.fft.rfft(x1, n=n)
53
+ fft2 = np.fft.rfft(x2, n=n)
54
+
55
+ if phat:
56
+ eps1 = np.mean(np.abs(fft1)) * 1e-10
57
+ fft1 /= np.abs(fft1) + eps1
58
+ eps2 = np.mean(np.abs(fft2)) * 1e-10
59
+ fft2 /= np.abs(fft2) + eps2
60
+
61
+ out = np.fft.irfft(fft1 * np.conj(fft2), n=int(n * interp))
62
+
63
+ return np.concatenate([out[-interp * (n_x2 - 1) :], out[: (interp * n_x1)]])
sonusai/mixture/mixdb.py CHANGED
@@ -255,6 +255,16 @@ class MixtureDatabase:
255
255
  "Predicted rating of overall quality of mixture versus true targets",
256
256
  ),
257
257
  MetricDoc("Mixture Metrics", "ssnr", "Segmental SNR"),
258
+ MetricDoc("Mixture Metrics", "mxdco", "Mixture DC offset"),
259
+ MetricDoc("Mixture Metrics", "mxmin", "Mixture min level"),
260
+ MetricDoc("Mixture Metrics", "mxmax", "Mixture max levl"),
261
+ MetricDoc("Mixture Metrics", "mxpkdb", "Mixture Pk lev dB"),
262
+ MetricDoc("Mixture Metrics", "mxlrms", "Mixture RMS lev dB"),
263
+ MetricDoc("Mixture Metrics", "mxpkr", "Mixture RMS Pk dB"),
264
+ MetricDoc("Mixture Metrics", "mxtr", "Mixture RMS Tr dB"),
265
+ MetricDoc("Mixture Metrics", "mxcr", "Mixture Crest factor"),
266
+ MetricDoc("Mixture Metrics", "mxfl", "Mixture Flat factor"),
267
+ MetricDoc("Mixture Metrics", "mxpkc", "Mixture Pk count"),
258
268
  MetricDoc("Mixture Metrics", "mxtdco", "Mixture target DC offset"),
259
269
  MetricDoc("Mixture Metrics", "mxtmin", "Mixture target min level"),
260
270
  MetricDoc("Mixture Metrics", "mxtmax", "Mixture target max levl"),
@@ -681,7 +691,7 @@ class MixtureDatabase:
681
691
 
682
692
  with self.db() as c:
683
693
  return [
684
- ImpulseResponseFile(impulse_response[1], json.loads(impulse_response[2]))
694
+ ImpulseResponseFile(impulse_response[1], json.loads(impulse_response[2]), impulse_response[3])
685
695
  for impulse_response in c.execute(
686
696
  "SELECT impulse_response_file.* FROM impulse_response_file"
687
697
  ).fetchall()
@@ -700,15 +710,25 @@ class MixtureDatabase:
700
710
  ]
701
711
 
702
712
  def impulse_response_file(self, ir_id: int | None) -> str | None:
703
- """Get impulse response file with ID from db
713
+ """Get impulse response file name with ID from db
704
714
 
705
715
  :param ir_id: Impulse response file ID
706
- :return: Noise
716
+ :return: Impulse response file name
707
717
  """
708
718
  if ir_id is None:
709
719
  return None
710
720
  return _impulse_response_file(self.db, ir_id, self.use_cache)
711
721
 
722
+ def impulse_response_delay(self, ir_id: int | None) -> int | None:
723
+ """Get impulse response delay with ID from db
724
+
725
+ :param ir_id: Impulse response file ID
726
+ :return: Impulse response delay
727
+ """
728
+ if ir_id is None:
729
+ return None
730
+ return _impulse_response_delay(self.db, ir_id, self.use_cache)
731
+
712
732
  @cached_property
713
733
  def num_impulse_response_files(self) -> int:
714
734
  """Get number of impulse response files from db
@@ -814,18 +834,11 @@ class MixtureDatabase:
814
834
  :return: Augmented noise audio
815
835
  """
816
836
  from .audio import read_audio
817
- from .audio import read_ir
818
837
  from .augmentation import apply_augmentation
819
- from .augmentation import apply_impulse_response
820
838
 
821
839
  noise = self.noise_file(mixture.noise.file_id)
822
840
  audio = read_audio(noise.name, self.use_cache)
823
- audio = apply_augmentation(audio, mixture.noise.augmentation)
824
- if mixture.noise.augmentation.ir is not None:
825
- audio = apply_impulse_response(
826
- audio,
827
- read_ir(self.impulse_response_file(mixture.noise.augmentation.ir), self.use_cache), # pyright: ignore [reportArgumentType]
828
- )
841
+ audio = apply_augmentation(self, audio, mixture.noise.augmentation.pre)
829
842
 
830
843
  return audio
831
844
 
@@ -859,8 +872,9 @@ class MixtureDatabase:
859
872
  for target in mixture.targets:
860
873
  target_audio = self.read_target_audio(target.file_id)
861
874
  target_audio = apply_augmentation(
875
+ mixdb=self,
862
876
  audio=target_audio,
863
- augmentation=target.augmentation,
877
+ augmentation=target.augmentation.pre,
864
878
  frame_length=self.feature_step_samples,
865
879
  )
866
880
  target_audio = apply_gain(audio=target_audio, gain=mixture.target_snr_gain)
@@ -1119,8 +1133,7 @@ class MixtureDatabase:
1119
1133
  offsets = range(0, mixture.samples, self.ft_config.overlap)
1120
1134
  if len(target_energy) != len(offsets):
1121
1135
  raise ValueError(
1122
- f"Number of frames in energy, {len(target_energy)},"
1123
- f" is not number of frames in mixture, {len(offsets)}"
1136
+ f"Number of frames in energy, {len(target_energy)}, is not number of frames in mixture, {len(offsets)}"
1124
1137
  )
1125
1138
 
1126
1139
  for idx, offset in enumerate(offsets):
@@ -1370,11 +1383,11 @@ class MixtureDatabase:
1370
1383
  # Check for tempo augmentation and adjust Interval start and end data as needed
1371
1384
  entries = []
1372
1385
  for entry in data:
1373
- if target.augmentation.tempo is not None:
1386
+ if target.augmentation.pre.tempo is not None:
1374
1387
  entries.append(
1375
1388
  Interval(
1376
- entry.start / target.augmentation.tempo,
1377
- entry.end / target.augmentation.tempo,
1389
+ entry.start / target.augmentation.pre.tempo,
1390
+ entry.end / target.augmentation.pre.tempo,
1378
1391
  entry.label,
1379
1392
  )
1380
1393
  )
@@ -1595,6 +1608,19 @@ class MixtureDatabase:
1595
1608
 
1596
1609
  speech = create_speech()
1597
1610
 
1611
+ def create_mixture_stats() -> Callable[[], AudioStatsMetrics]:
1612
+ state: AudioStatsMetrics | None = None
1613
+
1614
+ def get() -> AudioStatsMetrics:
1615
+ nonlocal state
1616
+ if state is None:
1617
+ state = calc_audio_stats(mixture_audio(), self.fg_info.ft_config.length / SAMPLE_RATE)
1618
+ return state
1619
+
1620
+ return get
1621
+
1622
+ mixture_stats = create_mixture_stats()
1623
+
1598
1624
  def create_targets_stats() -> Callable[[], list[AudioStatsMetrics]]:
1599
1625
  state: list[AudioStatsMetrics] | None = None
1600
1626
 
@@ -1803,6 +1829,36 @@ class MixtureDatabase:
1803
1829
  extended=False,
1804
1830
  )
1805
1831
 
1832
+ if m == "mxdco":
1833
+ return mixture_stats().dco
1834
+
1835
+ if m == "mxmin":
1836
+ return mixture_stats().min
1837
+
1838
+ if m == "mxmax":
1839
+ return mixture_stats().max
1840
+
1841
+ if m == "mxpkdb":
1842
+ return mixture_stats().pkdb
1843
+
1844
+ if m == "mxlrms":
1845
+ return mixture_stats().lrms
1846
+
1847
+ if m == "mxpkr":
1848
+ return mixture_stats().pkr
1849
+
1850
+ if m == "mxtr":
1851
+ return mixture_stats().tr
1852
+
1853
+ if m == "mxcr":
1854
+ return mixture_stats().cr
1855
+
1856
+ if m == "mxfl":
1857
+ return mixture_stats().fl
1858
+
1859
+ if m == "mxpkc":
1860
+ return mixture_stats().pkc
1861
+
1806
1862
  if m == "mxtdco":
1807
1863
  return target_stats().dco
1808
1864
 
@@ -2042,12 +2098,12 @@ def __noise_file(db: partial, n_id: int) -> NoiseFile:
2042
2098
 
2043
2099
 
2044
2100
  def _impulse_response_file(db: partial, ir_id: int, use_cache: bool = True) -> str:
2045
- """Get impulse response file with ID from db
2101
+ """Get impulse response file name with ID from db
2046
2102
 
2047
2103
  :param db: Database context
2048
2104
  :param ir_id: Impulse response file ID
2049
2105
  :param use_cache: If true, use LRU caching
2050
- :return: Impulse response
2106
+ :return: Impulse response file name
2051
2107
  """
2052
2108
  if use_cache:
2053
2109
  return __impulse_response_file(db, ir_id)
@@ -2069,6 +2125,34 @@ def __impulse_response_file(db: partial, ir_id: int) -> str:
2069
2125
  )
2070
2126
 
2071
2127
 
2128
+ def _impulse_response_delay(db: partial, ir_id: int, use_cache: bool = True) -> int:
2129
+ """Get impulse response delay with ID from db
2130
+
2131
+ :param db: Database context
2132
+ :param ir_id: Impulse response file ID
2133
+ :param use_cache: If true, use LRU caching
2134
+ :return: Impulse response delay
2135
+ """
2136
+ if use_cache:
2137
+ return __impulse_response_delay(db, ir_id)
2138
+ return __impulse_response_delay.__wrapped__(db, ir_id)
2139
+
2140
+
2141
+ @lru_cache
2142
+ def __impulse_response_delay(db: partial, ir_id: int) -> int:
2143
+ with db() as c:
2144
+ return int(
2145
+ c.execute(
2146
+ """
2147
+ SELECT impulse_response_file.delay
2148
+ FROM impulse_response_file
2149
+ WHERE ? = impulse_response_file.id
2150
+ """,
2151
+ (ir_id + 1,),
2152
+ ).fetchone()[0]
2153
+ )
2154
+
2155
+
2072
2156
  def _mixture(db: partial, m_id: int, use_cache: bool = True) -> Mixture:
2073
2157
  """Get mixture record with ID from db
2074
2158
 
@@ -16,14 +16,11 @@ def get_augmented_targets(
16
16
 
17
17
  augmented_targets: list[AugmentedTarget] = []
18
18
  for mixup in mixups:
19
- augmentation_indices = get_augmentation_indices_for_mixup(target_augmentations, mixup)
19
+ target_augmentation_indices = get_augmentation_indices_for_mixup(target_augmentations, mixup)
20
20
  for target_index in range(len(target_files)):
21
- for augmentation_index in augmentation_indices:
21
+ for target_augmentation_index in target_augmentation_indices:
22
22
  augmented_targets.append(
23
- AugmentedTarget(
24
- target_id=target_index,
25
- target_augmentation_id=augmentation_index,
26
- )
23
+ AugmentedTarget(target_id=target_index, target_augmentation_id=target_augmentation_index)
27
24
  )
28
25
 
29
26
  return augmented_targets
sonusai/utils/__init__.py CHANGED
@@ -1,5 +1,6 @@
1
1
  # SonusAI general utilities
2
2
  # ruff: noqa: F401
3
+
3
4
  from .asl_p56 import asl_p56
4
5
  from .asr import ASRResult
5
6
  from .asr import calc_asr
@@ -53,5 +54,6 @@ from .stacked_complex import stacked_complex_imag
53
54
  from .stacked_complex import stacked_complex_real
54
55
  from .stacked_complex import unstack_complex
55
56
  from .stratified_shuffle_split import stratified_shuffle_split_mixid
57
+ from .temp_seed import temp_seed
56
58
  from .write_audio import write_audio
57
59
  from .yes_or_no import yes_or_no
@@ -0,0 +1,13 @@
1
+ import contextlib
2
+
3
+ import numpy as np
4
+
5
+
6
+ @contextlib.contextmanager
7
+ def temp_seed(seed):
8
+ state = np.random.get_state()
9
+ np.random.seed(seed)
10
+ try:
11
+ yield
12
+ finally:
13
+ np.random.set_state(state)
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.3
2
2
  Name: sonusai
3
- Version: 0.19.10
3
+ Version: 0.20.2
4
4
  Summary: Framework for building deep neural network models for sound, speech, and voice AI
5
5
  Home-page: https://aaware.com
6
6
  License: GPL-3.0-only
@@ -3,7 +3,7 @@ sonusai/aawscd_probwrite.py,sha256=QZLMQrmPr3OjZ06buyYDwlnk9YPCpyr4KHkBjPsiqjU,3
3
3
  sonusai/audiofe.py,sha256=iFdthh4UrOvziT8urjrjD7dACWZPQz9orM5bVAW3WSQ,11269
4
4
  sonusai/calc_metric_spenh.py,sha256=XWa2DzLSCEQ6GzsJv-YHfnN51f_oFwcRMMgMzusAvYA,49304
5
5
  sonusai/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
- sonusai/data/genmixdb.yml,sha256=U_kLbE7gZ5rA7yNSB2NW7eK5dnYP5grJVMR321VMLt8,940
6
+ sonusai/data/genmixdb.yml,sha256=qFK_VoUxHmc-EhJYZr4pAEY9tu6zRvloubq0NmrbH6I,956
7
7
  sonusai/data/silero_vad_v5.1.jit,sha256=hcSOHw7LYE5dKiaPPM-5EtT36TWs3IavWj_FsK6nspo,2269612
8
8
  sonusai/data/silero_vad_v5.1.onnx,sha256=JiOilT9v89LB5hdAxs23FoEzR5smff7xFKSjzFvdeI8,2327524
9
9
  sonusai/data/speech_ma01_01.wav,sha256=PK0vMKg-NR6rPE3KouxHGF6PKXnJCr7AwjMqfu98LUA,76644
@@ -12,12 +12,13 @@ sonusai/deprecated/gentcst.py,sha256=nKbHy3aHreHqA-XnLQOzOApS8RuTNUFqnx52a8I5zLQ
12
12
  sonusai/deprecated/plot.py,sha256=xL0w8Dtjdns2KX8tbTrdBGXviy_aoV3WUJSVKPZkQng,17423
13
13
  sonusai/deprecated/tplot.py,sha256=0p238DvTaP4oU9y-dp0JdLaTV4TKrooAwbx7zdz_QAc,14641
14
14
  sonusai/doc/__init__.py,sha256=KyQ26Um0RM8A3GYsb_tbFH64RwpoAw6lja2f_moUWas,33
15
- sonusai/doc/doc.py,sha256=VZXauwbOb-VIufWw-lu0yfrd6jMRPeFeVPaaEjZNvn4,18881
15
+ sonusai/doc/doc.py,sha256=nEnvau0PIl2xbxET8AjFkwTLVShemmQ4CiTio94aLg0,19275
16
16
  sonusai/doc.py,sha256=zSmXpioB0YS_5-7kqfS5cr--veSaXkxRKzldId9Hyoc,878
17
17
  sonusai/genft.py,sha256=K2wjO5J48UgyhCj2Sx789nkjt0DWtYgnRDbQyNtjCSY,5591
18
18
  sonusai/genmetrics.py,sha256=jORQCdf_SCrtcvDd47lgcPgQTplG956RTAqmf58Xe8Y,5689
19
19
  sonusai/genmix.py,sha256=mSc5FfAYrUt3zloPSnp81dks8ntvSH6jyk-nh97wnww,6707
20
20
  sonusai/genmixdb.py,sha256=SsbHRpPoJ77XzOBQRRDheucyuJzE-tucQtRoYl89ApU,17841
21
+ sonusai/ir_metric.py,sha256=n35_RssAk2jjqm1iXJ6euMtK00LV4qohdBfDAZZpNlU,19581
21
22
  sonusai/lsdb.py,sha256=0HOGDDndB3LT9cz9AaxKIpt9vslAoSP4F239gply4Xg,5149
22
23
  sonusai/main.py,sha256=HbnEia1B1-Z-mlHkLfojH8aj9GIpL1Btw3oH60T_CCQ,2590
23
24
  sonusai/metrics/__init__.py,sha256=ssV6JEK_oklRSocsp6HMcG-GtJvV8IkRQtdKhHHmwU8,878
@@ -37,31 +38,27 @@ sonusai/metrics/class_summary.py,sha256=ZA7zNgwBpmTs1TP_t4jRT0pWnDnATC_up_8qE4aH
37
38
  sonusai/metrics/confusion_matrix_summary.py,sha256=zBL_Ke7wF6oKtrKZPr0fsyF_taofdjxBlZmKodu0xUA,3143
38
39
  sonusai/metrics/one_hot.py,sha256=hmuyh-9tpRjb_oyqU3WqZ14zItpRJQfcqBDKJeb5H9I,13930
39
40
  sonusai/metrics/snr_summary.py,sha256=t8Fi_8WtboTi8flkZuOiHq9H3-nIELx4AKvnm-qvxLQ,5785
40
- sonusai/metrics_summary.py,sha256=HVqjgCavxM1yzyoeDZSg_bJaXrifNQxNY7xYNKKva8g,12004
41
- sonusai/mixture/__init__.py,sha256=ePkmFbBltwHsx1eJDb_RDieTceZtqa1wVY1D2Pfg2rw,5162
42
- sonusai/mixture/audio.py,sha256=5iq39_Q0q9xuN_FNylvnn-gAZ8Io3Ir1Mqj60mVQeaQ,3432
43
- sonusai/mixture/augmentation.py,sha256=s8QlPHnFJOblRU59fMQ-Zqysiv4OUJ7CxLRcV81lnaA,10407
41
+ sonusai/metrics_summary.py,sha256=DchpgBNYcBPz4t1YRindCm1CVmJLmXY7-oyaXpxBnWA,12106
42
+ sonusai/mixture/__init__.py,sha256=9TE21nlj4TOrSPopLh5Lh769v68v0kWgNkr_GGk9TEs,5300
43
+ sonusai/mixture/audio.py,sha256=MlsuhY8Zc8puBClO5utBP3mveAk8MLZtXvF6ztuisW4,6629
44
+ sonusai/mixture/augmentation.py,sha256=UYNdfVcYCsmegicKpGLTsYyvO97NQOTLoZjwI9sZROk,14646
44
45
  sonusai/mixture/class_count.py,sha256=zcC3BDYMPN6wJYmO1RcOuqmrnTQIbMSznl33oN3e2sc,597
45
- sonusai/mixture/config.py,sha256=g5ZmOhFYqmEdRQYSgfDIZ9VM0QiTwBqk7vIyAvxnPMo,24211
46
- sonusai/mixture/constants.py,sha256=fXcWuSI4YZOAuncLGEUeEW9WWNZeN-6mI8LFNILwyTc,1494
46
+ sonusai/mixture/config.py,sha256=gZUPLGl7VKW32HfnFMEajKgJE35ZJ_edpxaA0TRPE1E,24469
47
+ sonusai/mixture/constants.py,sha256=yGXNjB87boJVSt1Q8hBTrNzOP0XVZcFf1k34u1yyUWU,1481
47
48
  sonusai/mixture/data_io.py,sha256=KZGqhHd9_ucAfZEAXPIc5XL3aHYgdV5CyqaBx5_t8OM,5551
48
- sonusai/mixture/datatypes.py,sha256=xNDBWFTVQ3plJ7qHKzrXyV4pffPYuf1xMVqBsR40n4o,10487
49
+ sonusai/mixture/datatypes.py,sha256=LFWDsgGVY7Z3VPjWaB5g8q5Ss8v5bR2EfU6ygI0wZaU,10707
49
50
  sonusai/mixture/db_datatypes.py,sha256=kvdUOMS6Pkkj9AmxCiq6zM8x7jbPPi933tVaXRxbTdQ,1534
50
51
  sonusai/mixture/eq_rule_is_valid.py,sha256=O3gCAs_0hpxENK5b7kxxpDmOpKHlXGBWuLGT_97ARSM,1210
51
52
  sonusai/mixture/feature.py,sha256=L0bPFG0RO-CrrtTStUMt_14euYsVo8_TWTP2IKSFKaA,2335
52
- sonusai/mixture/generation.py,sha256=yoJOcY9KPe_B1RVnENVr4ekcnXyZJMdvKMbJggpLOi4,38084
53
- sonusai/mixture/helpers.py,sha256=Bt9njNb_OZ3j02qgrVEMZiL0hX4kXtFK_tkPoGoeb4Y,15787
53
+ sonusai/mixture/generation.py,sha256=f3DsDvVAAdpvh1lBWV-jMj5qNeZ2qmR9RA_4eI4NDcU,37954
54
+ sonusai/mixture/helpers.py,sha256=nNqK__MBp8f10telUU3A8FhkNeXYPGCx8dxxDpzCAbg,15464
55
+ sonusai/mixture/ir_delay.py,sha256=WRoYjuHpsppe0D5qQExNxsCyKbMPDfWJ4CTlr3Ps50k,2036
54
56
  sonusai/mixture/log_duration_and_sizes.py,sha256=qhgl87C2KbjxLdKEpjYOoqNL6rc-8-PB4R7Gx_7UG8g,1240
55
- sonusai/mixture/mixdb.py,sha256=Yg3FQqb6oI3LsFh_00CvMeH1Rrmn2pA5waaAyJDCpfY,75912
56
- sonusai/mixture/soundfile_audio.py,sha256=At_ZC2b9pZ_9IYp1UxyPzRoBK9-1cKPCLMm74F1AjKE,4092
57
- sonusai/mixture/sox_audio.py,sha256=7ouCLqXYS6tjG2L0v5lugVO7z5UwJmsr1VigbrXhs74,16725
58
- sonusai/mixture/sox_augmentation.py,sha256=DtfGLPaB1BIt2wvTEA__MYkGFNU85Tuup5BFsIVrh0E,4546
57
+ sonusai/mixture/mixdb.py,sha256=oMBbi0HRiEBtN7lDup8qhPBIQ_td89CHa0bYy2PXdWQ,78744
59
58
  sonusai/mixture/spectral_mask.py,sha256=U9XJ_SAoI9b67K_3SE7bNw6U8cPGFOBttaZAxMjA_Jc,2042
60
59
  sonusai/mixture/target_class_balancing.py,sha256=o_TZ8kVYq10lgeXHh3GUFfflfdUvRt0FekFu2eaNkDs,4251
61
- sonusai/mixture/targets.py,sha256=6emo2fxxp9ZhSpHuUM9xIjYMz8zeIHAw684jT3l7fAs,6442
60
+ sonusai/mixture/targets.py,sha256=oOeqdE-n-sCq_9luEt82HEP0MRCaHG_7J-p3nCftkAc,6399
62
61
  sonusai/mixture/tokenized_shell_vars.py,sha256=lXTzUDutuBWGV1zIsqeIxWmy-eKm0Vx1y8-iLdsL1gQ,4921
63
- sonusai/mixture/torchaudio_audio.py,sha256=72Hxo5TKAW7mYpRy15QFfD7AYDORBk6bVCcHENniWGw,3116
64
- sonusai/mixture/torchaudio_augmentation.py,sha256=uFAKxIfs50J5FR-WXodsEACm2Ao-t5dZRSJ0DwTAfBg,3930
65
62
  sonusai/mixture/truth.py,sha256=-CwwawFRGjqodR2yKvAMGL1XaYLct-tli7wZ2gbhLtQ,2121
66
63
  sonusai/mixture/truth_functions/__init__.py,sha256=0mlOFChPnXG5BC0eKOe4n9VH17jY4iOqZFLuF6Gprdk,1505
67
64
  sonusai/mixture/truth_functions/crm.py,sha256=iidcffXfqV8k9O5wt5KTWIAFaTSjmhV5ucKZPbTgpvQ,3809
@@ -86,7 +83,7 @@ sonusai/speech/types.py,sha256=4eKVPAktpkIrZ2qoVp2iT45zxTVNocQEGT6O_Zlub_w,214
86
83
  sonusai/speech/vctk.py,sha256=WInvRRRkZCW6t_NcZAJffJzgCbyetal-j2w0kKX5SDw,1527
87
84
  sonusai/speech/voxceleb.py,sha256=Uu1kB1krf8hess1yuvGbYfV_VgYhklEyoz4I7KfrVpw,2658
88
85
  sonusai/summarize_metric_spenh.py,sha256=2w81ZgJahYvD6wCpE3DFoUFrXexLXjO44ITRVm1HJXw,1858
89
- sonusai/utils/__init__.py,sha256=z72OlzZCHpYfYHKnHn7jznj6Zt7zB-FyO6hIgFk45As,2379
86
+ sonusai/utils/__init__.py,sha256=D7IFq4Ozy_DRq4pC50YRxGQybqWEDXglCuAgxNIpFyU,2413
90
87
  sonusai/utils/asl_p56.py,sha256=cPUVwXawF7vLJgs4zUtoRGk7Wdbe5KKti_-v_8xIU10,3862
91
88
  sonusai/utils/asr.py,sha256=ubiU3E61HN3r9MhPV7ci37cnLZowll8KfjUS7os3Sho,2822
92
89
  sonusai/utils/asr_functions/__init__.py,sha256=HKGRm_c48tcxlfwqH63m-MvhAoK_pCcw76lxmFmiP_U,63
@@ -121,10 +118,11 @@ sonusai/utils/reshape.py,sha256=Ozuh3UlmAS5NCeOK7NR8KgcQacHvgq10pys0VfCnOPU,5746
121
118
  sonusai/utils/seconds_to_hms.py,sha256=9Ya9O97txFtTIXZUQw1K8g7b7Xx-ptvUtMUlzsIduTo,260
122
119
  sonusai/utils/stacked_complex.py,sha256=JW6iAa1C-4Tuh4dD5c-D-O-yo-OY5Xm0AKVU0YsqsJU,2782
123
120
  sonusai/utils/stratified_shuffle_split.py,sha256=fcGW8nkZIwUqq1qtxbK_ZH58sYULqZfv7iNBQnKGH-M,6706
121
+ sonusai/utils/temp_seed.py,sha256=Ava5TCGpvDBtaRx2l-40CuGIjhgLevu1KFfZsgr38qM,218
124
122
  sonusai/utils/write_audio.py,sha256=0lKdaX57N6H-UWdioqmXCJMjwT1eBz5B-bSGqDvloAc,838
125
123
  sonusai/utils/yes_or_no.py,sha256=0h1okjXmDNbJp7rZJFR2V-HFU1GJDm3YFTUVmYExkOU,263
126
124
  sonusai/vars.py,sha256=kBBzuvC8szmdIZEEDA7XXmD765addZKdM2aFipeGO1w,933
127
- sonusai-0.19.10.dist-info/METADATA,sha256=ibwwklSb5-vmwAJMdRhW0MBWxqQYFVsYpEx5-8oaRXI,2536
128
- sonusai-0.19.10.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
129
- sonusai-0.19.10.dist-info/entry_points.txt,sha256=zMNjEphEPO6B3cD1GNpit7z-yA9tUU5-j3W2v-UWstU,92
130
- sonusai-0.19.10.dist-info/RECORD,,
125
+ sonusai-0.20.2.dist-info/METADATA,sha256=CT_z1tJfku142nH0PL22DT7EEc8bFfJj9qyWcR7v6FU,2535
126
+ sonusai-0.20.2.dist-info/WHEEL,sha256=RaoafKOydTQ7I_I3JTrPCg6kUmTgtm4BornzOqyEfJ8,88
127
+ sonusai-0.20.2.dist-info/entry_points.txt,sha256=zMNjEphEPO6B3cD1GNpit7z-yA9tUU5-j3W2v-UWstU,92
128
+ sonusai-0.20.2.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry-core 1.9.1
2
+ Generator: poetry-core 2.0.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any