sonusai 0.15.6__py3-none-any.whl → 0.15.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -433,7 +433,7 @@ def _initialize_targets_audio(mixdb: MixtureDatabase, mixture: Mixture) -> tuple
433
433
  # target_gain is used to back out the gain augmentation in order to return the target audio
434
434
  # to its normalized level when calculating truth (if needed).
435
435
  if target.augmentation.gain is not None:
436
- target.gain = round(10 ** (target.augmentation.gain / 20), ndigits=7)
436
+ target.gain = round(10 ** (target.augmentation.gain / 20), ndigits=5)
437
437
  else:
438
438
  target.gain = 1
439
439
 
@@ -507,8 +507,8 @@ def _initialize_mixture_gains(mixdb: MixtureDatabase,
507
507
  mixture.target_snr_gain *= gain_adjustment
508
508
  mixture.noise_snr_gain *= gain_adjustment
509
509
 
510
- mixture.target_snr_gain = round(mixture.target_snr_gain, ndigits=7)
511
- mixture.noise_snr_gain = round(mixture.noise_snr_gain, ndigits=7)
510
+ mixture.target_snr_gain = round(mixture.target_snr_gain, ndigits=5)
511
+ mixture.noise_snr_gain = round(mixture.noise_snr_gain, ndigits=5)
512
512
  return mixture
513
513
 
514
514
 
@@ -23,6 +23,8 @@ class Data:
23
23
  num_classes=config.num_classes,
24
24
  truth_mutex=config.mutex)
25
25
 
26
+ self.num_bands = fg.num_bands
27
+ self.ttype = fg.ftransform_ttype
26
28
  self.frame_size = fg.ftransform_R
27
29
 
28
30
  if len(target_audio) % self.frame_size != 0:
@@ -13,12 +13,13 @@ Calculates the true transform of the target using the STFT
13
13
  configuration defined by the feature. This will include a
14
14
  forward transform window if defined by the feature.
15
15
 
16
- Output shape: [:, 2 * bins] (stacked real, imag)
16
+ Output shape: [:, num_classes]
17
+ (target stacked real, imag; or real only for tdac-co)
17
18
  """
18
19
 
19
20
  from sonusai import SonusAIError
20
21
 
21
- if data.config.num_classes != 2 * data.target_fft.bins:
22
+ if data.config.num_classes != data.num_bands:
22
23
  raise SonusAIError(f'Invalid num_classes for target_f truth: {data.config.num_classes}')
23
24
 
24
25
  target_freq = _execute_fft(data.target_audio, data.target_fft, len(data.offsets))
@@ -28,6 +29,7 @@ Output shape: [:, 2 * bins] (stacked real, imag)
28
29
  frame_size=data.frame_size,
29
30
  zero_based_indices=data.zero_based_indices,
30
31
  bins=data.target_fft.bins,
32
+ ttype=data.ttype,
31
33
  start=0,
32
34
  truth=data.truth)
33
35
 
@@ -43,11 +45,13 @@ using the STFT configuration defined by the feature. This
43
45
  will include a forward transform window if defined by the
44
46
  feature.
45
47
 
46
- Output shape: [:, 4 * bins] (target stacked real, imag; mixture stacked real, imag)
48
+ Output shape: [:, 2 * num_classes]
49
+ (target stacked real, imag; or real only for tdac-co)
50
+ (mixture stacked real, imag; or real only for tdac-co)
47
51
  """
48
52
  from sonusai import SonusAIError
49
53
 
50
- if data.config.num_classes != 2 * data.target_fft.bins + 2 * data.mixture_fft.bins:
54
+ if data.config.num_classes != 2 * data.num_bands:
51
55
  raise SonusAIError(f'Invalid num_classes for target_mixture_f truth: {data.config.num_classes}')
52
56
 
53
57
  target_freq = _execute_fft(data.target_audio, data.target_fft, len(data.offsets))
@@ -59,6 +63,7 @@ Output shape: [:, 4 * bins] (target stacked real, imag; mixture stacked real, im
59
63
  frame_size=data.frame_size,
60
64
  zero_based_indices=data.zero_based_indices,
61
65
  bins=data.target_fft.bins,
66
+ ttype=data.ttype,
62
67
  start=0,
63
68
  truth=data.truth)
64
69
 
@@ -67,6 +72,7 @@ Output shape: [:, 4 * bins] (target stacked real, imag; mixture stacked real, im
67
72
  frame_size=data.frame_size,
68
73
  zero_based_indices=data.zero_based_indices,
69
74
  bins=data.target_fft.bins,
75
+ ttype=data.ttype,
70
76
  start=data.target_fft.bins * 2,
71
77
  truth=data.truth)
72
78
 
@@ -125,6 +131,7 @@ def _stack_real_imag(data: AudioF,
125
131
  frame_size: int,
126
132
  zero_based_indices: list[int],
127
133
  bins: int,
134
+ ttype: str,
128
135
  start: int,
129
136
  truth: Truth) -> Truth:
130
137
  import numpy as np
@@ -134,7 +141,8 @@ def _stack_real_imag(data: AudioF,
134
141
  b = _get_bin_slice(index + start, bins)
135
142
  truth[i, b] = np.real(data)
136
143
 
137
- b = _get_bin_slice(b.stop, bins)
138
- truth[i, b] = np.imag(data)
144
+ if ttype != 'tdac-co':
145
+ b = _get_bin_slice(b.stop, bins)
146
+ truth[i, b] = np.imag(data)
139
147
 
140
148
  return truth
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sonusai
3
- Version: 0.15.6
3
+ Version: 0.15.8
4
4
  Summary: Framework for building deep neural network models for sound, speech, and voice AI
5
5
  Home-page: https://aaware.com
6
6
  License: GPL-3.0-only
@@ -16,21 +16,21 @@ Classifier: Programming Language :: Python :: 3.10
16
16
  Classifier: Programming Language :: Python :: 3.11
17
17
  Requires-Dist: PyYAML (>=6.0.1,<7.0.0)
18
18
  Requires-Dist: aixplain (>=0.2.6,<0.3.0)
19
+ Requires-Dist: ctranslate2 (==4.1.0)
19
20
  Requires-Dist: dataclasses-json (>=0.6.1,<0.7.0)
20
21
  Requires-Dist: deepgram-sdk (>=3.0.0,<4.0.0)
21
22
  Requires-Dist: docopt (>=0.6.2,<0.7.0)
22
- Requires-Dist: faster-whisper (>=0.10.0,<0.11.0)
23
- Requires-Dist: ffmpeg (>=1.4,<2.0)
24
- Requires-Dist: greenlet (>=3.0.1,<4.0.0)
25
- Requires-Dist: grpcio (==1.60.0)
23
+ Requires-Dist: faster-whisper (>=1.0.1,<2.0.0)
24
+ Requires-Dist: h5py (>=3.11.0,<4.0.0)
26
25
  Requires-Dist: jiwer (>=3.0.3,<4.0.0)
27
- Requires-Dist: keras-tuner (>=1.4.5,<2.0.0)
26
+ Requires-Dist: keras (>=3.1.1,<4.0.0)
27
+ Requires-Dist: keras-tuner (>=1.4.7,<2.0.0)
28
28
  Requires-Dist: librosa (>=0.10.1,<0.11.0)
29
- Requires-Dist: lightning (>=2.1.0,<3.0.0)
29
+ Requires-Dist: lightning (>=2.2,<2.3)
30
30
  Requires-Dist: matplotlib (>=3.8.0,<4.0.0)
31
- Requires-Dist: onnx (==1.14.1)
31
+ Requires-Dist: onnx (>=1.14.1,<2.0.0)
32
32
  Requires-Dist: onnxruntime (>=1.16.1,<2.0.0)
33
- Requires-Dist: paho-mqtt (>=1.6.1,<2.0.0)
33
+ Requires-Dist: paho-mqtt (>=2.0.0,<3.0.0)
34
34
  Requires-Dist: pandas (>=2.1.1,<3.0.0)
35
35
  Requires-Dist: pesq (>=0.0.4,<0.0.5)
36
36
  Requires-Dist: pyaaware (>=1.5.3,<2.0.0)
@@ -38,18 +38,14 @@ Requires-Dist: pydub (>=0.25.1,<0.26.0)
38
38
  Requires-Dist: pystoi (>=0.4.0,<0.5.0)
39
39
  Requires-Dist: python-magic (>=0.4.27,<0.5.0)
40
40
  Requires-Dist: requests (>=2.31.0,<3.0.0)
41
- Requires-Dist: resampy (>=0.4.2,<0.5.0)
42
- Requires-Dist: samplerate (>=0.1.0,<0.2.0)
43
- Requires-Dist: scikit-learn (>=1.3.1,<2.0.0)
44
- Requires-Dist: sh (>=2.0.6,<3.0.0)
41
+ Requires-Dist: samplerate (>=0.2.1,<0.3.0)
45
42
  Requires-Dist: soundfile (>=0.12.1,<0.13.0)
46
43
  Requires-Dist: sox (>=1.4.1,<2.0.0)
47
- Requires-Dist: speechrecognition (>=3.10.0,<4.0.0)
44
+ Requires-Dist: speechrecognition (>=3.10.1,<4.0.0)
48
45
  Requires-Dist: tensorflow (>=2.15.0,<3.0.0)
49
- Requires-Dist: tensorflow-addons (>=0.23.0,<0.24.0)
50
46
  Requires-Dist: tf2onnx (>=1.15.1,<2.0.0)
51
- Requires-Dist: torch (>=2.1.0,<3.0.0)
52
- Requires-Dist: torchaudio (>=2.1.0,<3.0.0)
47
+ Requires-Dist: torch (>=2.2,<2.3)
48
+ Requires-Dist: torchaudio (>=2.2,<2.3)
53
49
  Requires-Dist: torchinfo (>=1.8.0,<2.0.0)
54
50
  Requires-Dist: tqdm (>=4.66.1,<5.0.0)
55
51
  Description-Content-Type: text/x-rst
@@ -59,7 +55,7 @@ Sonus AI: Framework for simplified creation of deep NN models for sound, speech,
59
55
  Sonus AI includes functions for pre-processing training and validation data and
60
56
  creating performance metrics reports for key types of Keras models:
61
57
  - recurrent, convolutional, or a combination (i.e. RCNNs)
62
- - binary, multiclass single-label, multiclass multi-label, and regresssion
58
+ - binary, multiclass single-label, multiclass multi-label, and regression
63
59
  - training with data augmentations: noise mixing, pitch and time stretch, etc.
64
60
 
65
61
  Sonus AI python functions are used by:
@@ -44,7 +44,7 @@ sonusai/mixture/constants.py,sha256=xjCskcQi6khqYZDf7j6z1OkeN1C6wE06kBBapcJiNI4,
44
44
  sonusai/mixture/datatypes.py,sha256=xN-GdPCEHGE2Ak_TdFbjuSyMs4x7TLRp59trbMTiYLg,8164
45
45
  sonusai/mixture/eq_rule_is_valid.py,sha256=MpQwRA5M76wSiQWEI1lW2cLFdPaMttBLcQp3tWD8efM,1243
46
46
  sonusai/mixture/feature.py,sha256=io6OiJAJ3GYvPChiUmPQuP3h0OB2onjYF8o9-AWkmqM,1996
47
- sonusai/mixture/generation.py,sha256=5SXV1R9os4XPN0pVm8ribG23vsXkYzsegoctRZdXQT4,39039
47
+ sonusai/mixture/generation.py,sha256=miUrc3QOSUNIG6mDkiMCZ6M2ulivUZxlYUAJUOVomWc,39039
48
48
  sonusai/mixture/helpers.py,sha256=XqpcB15MezEMVJwf3jxzATDJSpj_27b8Cru1TDIFD7w,21326
49
49
  sonusai/mixture/log_duration_and_sizes.py,sha256=r-wVjrLW1XBciOL4pkZSYMR7ZNADbojE95TPSQkp3kc,1329
50
50
  sonusai/mixture/mapped_snr_f.py,sha256=mlbYM1t14OXe_Zg4CjpWTuA_Zun4W0O3bSUXeodRBQs,1845
@@ -61,12 +61,12 @@ sonusai/mixture/torchaudio_augmentation.py,sha256=1vEDHI0caL1vrgoY2lAWe4CiHE2jKR
61
61
  sonusai/mixture/truth.py,sha256=Y41pZ52Xkols9LUler0NlgnilUOscBIucmw4GcxXNzU,1612
62
62
  sonusai/mixture/truth_functions/__init__.py,sha256=82lKYHhLy8KW3gHngrocoqwupGVLVsWdIXdYs3vhjOc,359
63
63
  sonusai/mixture/truth_functions/crm.py,sha256=_Vy8UMrOUQXsrM3nutvUMWCpvI8GePr01QFlyqLFd4k,2626
64
- sonusai/mixture/truth_functions/data.py,sha256=wUlZAAeE4AXg4Egtcvs7Qhbf5dnWur-1Qr_Dz6GoJiQ,2778
64
+ sonusai/mixture/truth_functions/data.py,sha256=NJNZz5fB3jnntUDlnsKJVQIeuHNUvD4x5iNaQVQlo3Y,2857
65
65
  sonusai/mixture/truth_functions/energy.py,sha256=ydMtMLjMloG76DB30ZHQ5tkBVh4dkMJ82XEhKBokmIk,4281
66
66
  sonusai/mixture/truth_functions/file.py,sha256=jOJuC_3y9BH6GGOp9eKcbVrHLVRzUA80BJq59LhcBUM,1539
67
67
  sonusai/mixture/truth_functions/phoneme.py,sha256=stYdlPuNytQK_LLT61OJLfYSqKd-sDjQZdtJKGzt5wA,479
68
68
  sonusai/mixture/truth_functions/sed.py,sha256=8cHjEFjZaH_0hIOHhPmj4AJz2GpEADM6Ys2x4NoiWSY,2469
69
- sonusai/mixture/truth_functions/target.py,sha256=V-bXcJZ3P83NHKqYs4RcsxxuZdO8kqzSEN5CNRa5e0k,5394
69
+ sonusai/mixture/truth_functions/target.py,sha256=3rPXYwU4SBiPP3uIDpOL-B2Xw1Zh3JboD_MYNEyUpuk,5746
70
70
  sonusai/mkmanifest.py,sha256=dIPVFKKhnhHdq63OGr6p__pK7fyx3OdKVtbmGUJxsR8,7078
71
71
  sonusai/mkwav.py,sha256=LZNyhq4gJEs_NtGvRsYHA2qfgkkODpt6HoH1b-Tjjuw,5266
72
72
  sonusai/onnx_predict.py,sha256=RhQbbNG3w6rCXuSFUWCaQmUH5JzSP2hmu6TG5_81IVA,9055
@@ -122,7 +122,7 @@ sonusai/utils/trim_docstring.py,sha256=dSrtiRsEN4wkkvKBp6WDr13RUypfqZzgH_jOBLs1o
122
122
  sonusai/utils/wave.py,sha256=TKE-CNPGFXNXUW626CBPzCTNgWJut8I0ZEUsgG9q4Po,586
123
123
  sonusai/utils/yes_or_no.py,sha256=eMLXBVH0cEahiXY4W2KNORmwNQ-ba10eRtldh0y4NYg,263
124
124
  sonusai/vars.py,sha256=m2AefF0m5bXWGXpJj8Pi42zWL2ydeEj7bkak3GrtMyM,940
125
- sonusai-0.15.6.dist-info/METADATA,sha256=bUbyCYkQJaHaxBfqMyieWqqGRewQJ3Vu0LFzGpOR3Lg,3096
126
- sonusai-0.15.6.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
127
- sonusai-0.15.6.dist-info/entry_points.txt,sha256=zMNjEphEPO6B3cD1GNpit7z-yA9tUU5-j3W2v-UWstU,92
128
- sonusai-0.15.6.dist-info/RECORD,,
125
+ sonusai-0.15.8.dist-info/METADATA,sha256=3eCpCJmXOfr7GV3a7HDWo0iilEVHB5ANdQqS59O0Yi0,2920
126
+ sonusai-0.15.8.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
127
+ sonusai-0.15.8.dist-info/entry_points.txt,sha256=zMNjEphEPO6B3cD1GNpit7z-yA9tUU5-j3W2v-UWstU,92
128
+ sonusai-0.15.8.dist-info/RECORD,,