PyPI - sonusai - Versions diffs - 0.15.6__py3-none-any.whl → 0.15.8__py3-none-any.whl - Mend

sonusai 0.15.6py3-none-any.whl → 0.15.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

sonusai/mixture/generation.py CHANGED Viewed

@@ -433,7 +433,7 @@ def _initialize_targets_audio(mixdb: MixtureDatabase, mixture: Mixture) -> tuple
         # target_gain is used to back out the gain augmentation in order to return the target audio
         # to its normalized level when calculating truth (if needed).
         if target.augmentation.gain is not None:
-            target.gain = round(10 ** (target.augmentation.gain / 20), ndigits=7)
+            target.gain = round(10 ** (target.augmentation.gain / 20), ndigits=5)
         else:
             target.gain = 1
@@ -507,8 +507,8 @@ def _initialize_mixture_gains(mixdb: MixtureDatabase,
         mixture.target_snr_gain *= gain_adjustment
         mixture.noise_snr_gain *= gain_adjustment
-    mixture.target_snr_gain = round(mixture.target_snr_gain, ndigits=7)
-    mixture.noise_snr_gain = round(mixture.noise_snr_gain, ndigits=7)
+    mixture.target_snr_gain = round(mixture.target_snr_gain, ndigits=5)
+    mixture.noise_snr_gain = round(mixture.noise_snr_gain, ndigits=5)
     return mixture

sonusai/mixture/truth_functions/data.py CHANGED Viewed

@@ -23,6 +23,8 @@ class Data:
                               num_classes=config.num_classes,
                               truth_mutex=config.mutex)
+        self.num_bands = fg.num_bands
+        self.ttype = fg.ftransform_ttype
         self.frame_size = fg.ftransform_R
         if len(target_audio) % self.frame_size != 0:

sonusai/mixture/truth_functions/target.py CHANGED Viewed

@@ -13,12 +13,13 @@ Calculates the true transform of the target using the STFT
 configuration defined by the feature. This will include a
 forward transform window if defined by the feature.
-Output shape: [:, 2 * bins] (stacked real, imag)
+Output shape: [:, num_classes]
+                    (target stacked real, imag; or real only for tdac-co)
     """
     from sonusai import SonusAIError
-    if data.config.num_classes != 2 * data.target_fft.bins:
+    if data.config.num_classes != data.num_bands:
         raise SonusAIError(f'Invalid num_classes for target_f truth: {data.config.num_classes}')
     target_freq = _execute_fft(data.target_audio, data.target_fft, len(data.offsets))
@@ -28,6 +29,7 @@ Output shape: [:, 2 * bins] (stacked real, imag)
                                       frame_size=data.frame_size,
                                       zero_based_indices=data.zero_based_indices,
                                       bins=data.target_fft.bins,
+                                      ttype=data.ttype,
                                       start=0,
                                       truth=data.truth)
@@ -43,11 +45,13 @@ using the STFT configuration defined by the feature. This
 will include a forward transform window if defined by the
 feature.
-Output shape: [:, 4 * bins] (target stacked real, imag; mixture stacked real, imag)
+Output shape: [:, 2 * num_classes]
+                    (target stacked real, imag; or real only for tdac-co)
+                    (mixture stacked real, imag; or real only for tdac-co)
     """
     from sonusai import SonusAIError
-    if data.config.num_classes != 2 * data.target_fft.bins + 2 * data.mixture_fft.bins:
+    if data.config.num_classes != 2 * data.num_bands:
         raise SonusAIError(f'Invalid num_classes for target_mixture_f truth: {data.config.num_classes}')
     target_freq = _execute_fft(data.target_audio, data.target_fft, len(data.offsets))
@@ -59,6 +63,7 @@ Output shape: [:, 4 * bins] (target stacked real, imag; mixture stacked real, im
                                       frame_size=data.frame_size,
                                       zero_based_indices=data.zero_based_indices,
                                       bins=data.target_fft.bins,
+                                      ttype=data.ttype,
                                       start=0,
                                       truth=data.truth)
@@ -67,6 +72,7 @@ Output shape: [:, 4 * bins] (target stacked real, imag; mixture stacked real, im
                                       frame_size=data.frame_size,
                                       zero_based_indices=data.zero_based_indices,
                                       bins=data.target_fft.bins,
+                                      ttype=data.ttype,
                                       start=data.target_fft.bins * 2,
                                       truth=data.truth)
@@ -125,6 +131,7 @@ def _stack_real_imag(data: AudioF,
                      frame_size: int,
                      zero_based_indices: list[int],
                      bins: int,
+                     ttype: str,
                      start: int,
                      truth: Truth) -> Truth:
     import numpy as np
@@ -134,7 +141,8 @@ def _stack_real_imag(data: AudioF,
         b = _get_bin_slice(index + start, bins)
         truth[i, b] = np.real(data)
-        b = _get_bin_slice(b.stop, bins)
-        truth[i, b] = np.imag(data)
+        if ttype != 'tdac-co':
+            b = _get_bin_slice(b.stop, bins)
+            truth[i, b] = np.imag(data)
     return truth

{sonusai-0.15.6.dist-info → sonusai-0.15.8.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: sonusai
-Version: 0.15.6
+Version: 0.15.8
 Summary: Framework for building deep neural network models for sound, speech, and voice AI
 Home-page: https://aaware.com
 License: GPL-3.0-only
@@ -16,21 +16,21 @@ Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
 Requires-Dist: PyYAML (>=6.0.1,<7.0.0)
 Requires-Dist: aixplain (>=0.2.6,<0.3.0)
+Requires-Dist: ctranslate2 (==4.1.0)
 Requires-Dist: dataclasses-json (>=0.6.1,<0.7.0)
 Requires-Dist: deepgram-sdk (>=3.0.0,<4.0.0)
 Requires-Dist: docopt (>=0.6.2,<0.7.0)
-Requires-Dist: faster-whisper (>=0.10.0,<0.11.0)
-Requires-Dist: ffmpeg (>=1.4,<2.0)
-Requires-Dist: greenlet (>=3.0.1,<4.0.0)
-Requires-Dist: grpcio (==1.60.0)
+Requires-Dist: faster-whisper (>=1.0.1,<2.0.0)
+Requires-Dist: h5py (>=3.11.0,<4.0.0)
 Requires-Dist: jiwer (>=3.0.3,<4.0.0)
-Requires-Dist: keras-tuner (>=1.4.5,<2.0.0)
+Requires-Dist: keras (>=3.1.1,<4.0.0)
+Requires-Dist: keras-tuner (>=1.4.7,<2.0.0)
 Requires-Dist: librosa (>=0.10.1,<0.11.0)
-Requires-Dist: lightning (>=2.1.0,<3.0.0)
+Requires-Dist: lightning (>=2.2,<2.3)
 Requires-Dist: matplotlib (>=3.8.0,<4.0.0)
-Requires-Dist: onnx (==1.14.1)
+Requires-Dist: onnx (>=1.14.1,<2.0.0)
 Requires-Dist: onnxruntime (>=1.16.1,<2.0.0)
-Requires-Dist: paho-mqtt (>=1.6.1,<2.0.0)
+Requires-Dist: paho-mqtt (>=2.0.0,<3.0.0)
 Requires-Dist: pandas (>=2.1.1,<3.0.0)
 Requires-Dist: pesq (>=0.0.4,<0.0.5)
 Requires-Dist: pyaaware (>=1.5.3,<2.0.0)
@@ -38,18 +38,14 @@ Requires-Dist: pydub (>=0.25.1,<0.26.0)
 Requires-Dist: pystoi (>=0.4.0,<0.5.0)
 Requires-Dist: python-magic (>=0.4.27,<0.5.0)
 Requires-Dist: requests (>=2.31.0,<3.0.0)
-Requires-Dist: resampy (>=0.4.2,<0.5.0)
-Requires-Dist: samplerate (>=0.1.0,<0.2.0)
-Requires-Dist: scikit-learn (>=1.3.1,<2.0.0)
-Requires-Dist: sh (>=2.0.6,<3.0.0)
+Requires-Dist: samplerate (>=0.2.1,<0.3.0)
 Requires-Dist: soundfile (>=0.12.1,<0.13.0)
 Requires-Dist: sox (>=1.4.1,<2.0.0)
-Requires-Dist: speechrecognition (>=3.10.0,<4.0.0)
+Requires-Dist: speechrecognition (>=3.10.1,<4.0.0)
 Requires-Dist: tensorflow (>=2.15.0,<3.0.0)
-Requires-Dist: tensorflow-addons (>=0.23.0,<0.24.0)
 Requires-Dist: tf2onnx (>=1.15.1,<2.0.0)
-Requires-Dist: torch (>=2.1.0,<3.0.0)
-Requires-Dist: torchaudio (>=2.1.0,<3.0.0)
+Requires-Dist: torch (>=2.2,<2.3)
+Requires-Dist: torchaudio (>=2.2,<2.3)
 Requires-Dist: torchinfo (>=1.8.0,<2.0.0)
 Requires-Dist: tqdm (>=4.66.1,<5.0.0)
 Description-Content-Type: text/x-rst
@@ -59,7 +55,7 @@ Sonus AI: Framework for simplified creation of deep NN models for sound, speech,
 Sonus AI includes functions for pre-processing training and validation data and
 creating performance metrics reports for key types of Keras models:
 - recurrent, convolutional, or a combination (i.e. RCNNs)
-- binary, multiclass single-label, multiclass multi-label, and regresssion
+- binary, multiclass single-label, multiclass multi-label, and regression
 - training with data augmentations:  noise mixing, pitch and time stretch, etc.
 Sonus AI python functions are used by:

{sonusai-0.15.6.dist-info → sonusai-0.15.8.dist-info}/RECORD RENAMED Viewed

@@ -44,7 +44,7 @@ sonusai/mixture/constants.py,sha256=xjCskcQi6khqYZDf7j6z1OkeN1C6wE06kBBapcJiNI4,
 sonusai/mixture/datatypes.py,sha256=xN-GdPCEHGE2Ak_TdFbjuSyMs4x7TLRp59trbMTiYLg,8164
 sonusai/mixture/eq_rule_is_valid.py,sha256=MpQwRA5M76wSiQWEI1lW2cLFdPaMttBLcQp3tWD8efM,1243
 sonusai/mixture/feature.py,sha256=io6OiJAJ3GYvPChiUmPQuP3h0OB2onjYF8o9-AWkmqM,1996
-sonusai/mixture/generation.py,sha256=5SXV1R9os4XPN0pVm8ribG23vsXkYzsegoctRZdXQT4,39039
+sonusai/mixture/generation.py,sha256=miUrc3QOSUNIG6mDkiMCZ6M2ulivUZxlYUAJUOVomWc,39039
 sonusai/mixture/helpers.py,sha256=XqpcB15MezEMVJwf3jxzATDJSpj_27b8Cru1TDIFD7w,21326
 sonusai/mixture/log_duration_and_sizes.py,sha256=r-wVjrLW1XBciOL4pkZSYMR7ZNADbojE95TPSQkp3kc,1329
 sonusai/mixture/mapped_snr_f.py,sha256=mlbYM1t14OXe_Zg4CjpWTuA_Zun4W0O3bSUXeodRBQs,1845
@@ -61,12 +61,12 @@ sonusai/mixture/torchaudio_augmentation.py,sha256=1vEDHI0caL1vrgoY2lAWe4CiHE2jKR
 sonusai/mixture/truth.py,sha256=Y41pZ52Xkols9LUler0NlgnilUOscBIucmw4GcxXNzU,1612
 sonusai/mixture/truth_functions/__init__.py,sha256=82lKYHhLy8KW3gHngrocoqwupGVLVsWdIXdYs3vhjOc,359
 sonusai/mixture/truth_functions/crm.py,sha256=_Vy8UMrOUQXsrM3nutvUMWCpvI8GePr01QFlyqLFd4k,2626
-sonusai/mixture/truth_functions/data.py,sha256=wUlZAAeE4AXg4Egtcvs7Qhbf5dnWur-1Qr_Dz6GoJiQ,2778
+sonusai/mixture/truth_functions/data.py,sha256=NJNZz5fB3jnntUDlnsKJVQIeuHNUvD4x5iNaQVQlo3Y,2857
 sonusai/mixture/truth_functions/energy.py,sha256=ydMtMLjMloG76DB30ZHQ5tkBVh4dkMJ82XEhKBokmIk,4281
 sonusai/mixture/truth_functions/file.py,sha256=jOJuC_3y9BH6GGOp9eKcbVrHLVRzUA80BJq59LhcBUM,1539
 sonusai/mixture/truth_functions/phoneme.py,sha256=stYdlPuNytQK_LLT61OJLfYSqKd-sDjQZdtJKGzt5wA,479
 sonusai/mixture/truth_functions/sed.py,sha256=8cHjEFjZaH_0hIOHhPmj4AJz2GpEADM6Ys2x4NoiWSY,2469
-sonusai/mixture/truth_functions/target.py,sha256=V-bXcJZ3P83NHKqYs4RcsxxuZdO8kqzSEN5CNRa5e0k,5394
+sonusai/mixture/truth_functions/target.py,sha256=3rPXYwU4SBiPP3uIDpOL-B2Xw1Zh3JboD_MYNEyUpuk,5746
 sonusai/mkmanifest.py,sha256=dIPVFKKhnhHdq63OGr6p__pK7fyx3OdKVtbmGUJxsR8,7078
 sonusai/mkwav.py,sha256=LZNyhq4gJEs_NtGvRsYHA2qfgkkODpt6HoH1b-Tjjuw,5266
 sonusai/onnx_predict.py,sha256=RhQbbNG3w6rCXuSFUWCaQmUH5JzSP2hmu6TG5_81IVA,9055
@@ -122,7 +122,7 @@ sonusai/utils/trim_docstring.py,sha256=dSrtiRsEN4wkkvKBp6WDr13RUypfqZzgH_jOBLs1o
 sonusai/utils/wave.py,sha256=TKE-CNPGFXNXUW626CBPzCTNgWJut8I0ZEUsgG9q4Po,586
 sonusai/utils/yes_or_no.py,sha256=eMLXBVH0cEahiXY4W2KNORmwNQ-ba10eRtldh0y4NYg,263
 sonusai/vars.py,sha256=m2AefF0m5bXWGXpJj8Pi42zWL2ydeEj7bkak3GrtMyM,940
-sonusai-0.15.6.dist-info/METADATA,sha256=bUbyCYkQJaHaxBfqMyieWqqGRewQJ3Vu0LFzGpOR3Lg,3096
-sonusai-0.15.6.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
-sonusai-0.15.6.dist-info/entry_points.txt,sha256=zMNjEphEPO6B3cD1GNpit7z-yA9tUU5-j3W2v-UWstU,92
-sonusai-0.15.6.dist-info/RECORD,,
+sonusai-0.15.8.dist-info/METADATA,sha256=3eCpCJmXOfr7GV3a7HDWo0iilEVHB5ANdQqS59O0Yi0,2920
+sonusai-0.15.8.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
+sonusai-0.15.8.dist-info/entry_points.txt,sha256=zMNjEphEPO6B3cD1GNpit7z-yA9tUU5-j3W2v-UWstU,92
+sonusai-0.15.8.dist-info/RECORD,,

{sonusai-0.15.6.dist-info → sonusai-0.15.8.dist-info}/WHEEL RENAMED Viewed

File without changes

{sonusai-0.15.6.dist-info → sonusai-0.15.8.dist-info}/entry_points.txt RENAMED Viewed

File without changes

sonusai 0.15.6__py3-none-any.whl → 0.15.8__py3-none-any.whl

sonusai 0.15.6py3-none-any.whl → 0.15.8py3-none-any.whl