sonusai 0.15.6__py3-none-any.whl → 0.15.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sonusai/mixture/generation.py +3 -3
- sonusai/mixture/truth_functions/data.py +2 -0
- sonusai/mixture/truth_functions/target.py +14 -6
- {sonusai-0.15.6.dist-info → sonusai-0.15.8.dist-info}/METADATA +14 -18
- {sonusai-0.15.6.dist-info → sonusai-0.15.8.dist-info}/RECORD +7 -7
- {sonusai-0.15.6.dist-info → sonusai-0.15.8.dist-info}/WHEEL +0 -0
- {sonusai-0.15.6.dist-info → sonusai-0.15.8.dist-info}/entry_points.txt +0 -0
sonusai/mixture/generation.py
CHANGED
@@ -433,7 +433,7 @@ def _initialize_targets_audio(mixdb: MixtureDatabase, mixture: Mixture) -> tuple
|
|
433
433
|
# target_gain is used to back out the gain augmentation in order to return the target audio
|
434
434
|
# to its normalized level when calculating truth (if needed).
|
435
435
|
if target.augmentation.gain is not None:
|
436
|
-
target.gain = round(10 ** (target.augmentation.gain / 20), ndigits=
|
436
|
+
target.gain = round(10 ** (target.augmentation.gain / 20), ndigits=5)
|
437
437
|
else:
|
438
438
|
target.gain = 1
|
439
439
|
|
@@ -507,8 +507,8 @@ def _initialize_mixture_gains(mixdb: MixtureDatabase,
|
|
507
507
|
mixture.target_snr_gain *= gain_adjustment
|
508
508
|
mixture.noise_snr_gain *= gain_adjustment
|
509
509
|
|
510
|
-
mixture.target_snr_gain = round(mixture.target_snr_gain, ndigits=
|
511
|
-
mixture.noise_snr_gain = round(mixture.noise_snr_gain, ndigits=
|
510
|
+
mixture.target_snr_gain = round(mixture.target_snr_gain, ndigits=5)
|
511
|
+
mixture.noise_snr_gain = round(mixture.noise_snr_gain, ndigits=5)
|
512
512
|
return mixture
|
513
513
|
|
514
514
|
|
@@ -13,12 +13,13 @@ Calculates the true transform of the target using the STFT
|
|
13
13
|
configuration defined by the feature. This will include a
|
14
14
|
forward transform window if defined by the feature.
|
15
15
|
|
16
|
-
Output shape: [:,
|
16
|
+
Output shape: [:, num_classes]
|
17
|
+
(target stacked real, imag; or real only for tdac-co)
|
17
18
|
"""
|
18
19
|
|
19
20
|
from sonusai import SonusAIError
|
20
21
|
|
21
|
-
if data.config.num_classes !=
|
22
|
+
if data.config.num_classes != data.num_bands:
|
22
23
|
raise SonusAIError(f'Invalid num_classes for target_f truth: {data.config.num_classes}')
|
23
24
|
|
24
25
|
target_freq = _execute_fft(data.target_audio, data.target_fft, len(data.offsets))
|
@@ -28,6 +29,7 @@ Output shape: [:, 2 * bins] (stacked real, imag)
|
|
28
29
|
frame_size=data.frame_size,
|
29
30
|
zero_based_indices=data.zero_based_indices,
|
30
31
|
bins=data.target_fft.bins,
|
32
|
+
ttype=data.ttype,
|
31
33
|
start=0,
|
32
34
|
truth=data.truth)
|
33
35
|
|
@@ -43,11 +45,13 @@ using the STFT configuration defined by the feature. This
|
|
43
45
|
will include a forward transform window if defined by the
|
44
46
|
feature.
|
45
47
|
|
46
|
-
Output shape: [:,
|
48
|
+
Output shape: [:, 2 * num_classes]
|
49
|
+
(target stacked real, imag; or real only for tdac-co)
|
50
|
+
(mixture stacked real, imag; or real only for tdac-co)
|
47
51
|
"""
|
48
52
|
from sonusai import SonusAIError
|
49
53
|
|
50
|
-
if data.config.num_classes != 2 * data.
|
54
|
+
if data.config.num_classes != 2 * data.num_bands:
|
51
55
|
raise SonusAIError(f'Invalid num_classes for target_mixture_f truth: {data.config.num_classes}')
|
52
56
|
|
53
57
|
target_freq = _execute_fft(data.target_audio, data.target_fft, len(data.offsets))
|
@@ -59,6 +63,7 @@ Output shape: [:, 4 * bins] (target stacked real, imag; mixture stacked real, im
|
|
59
63
|
frame_size=data.frame_size,
|
60
64
|
zero_based_indices=data.zero_based_indices,
|
61
65
|
bins=data.target_fft.bins,
|
66
|
+
ttype=data.ttype,
|
62
67
|
start=0,
|
63
68
|
truth=data.truth)
|
64
69
|
|
@@ -67,6 +72,7 @@ Output shape: [:, 4 * bins] (target stacked real, imag; mixture stacked real, im
|
|
67
72
|
frame_size=data.frame_size,
|
68
73
|
zero_based_indices=data.zero_based_indices,
|
69
74
|
bins=data.target_fft.bins,
|
75
|
+
ttype=data.ttype,
|
70
76
|
start=data.target_fft.bins * 2,
|
71
77
|
truth=data.truth)
|
72
78
|
|
@@ -125,6 +131,7 @@ def _stack_real_imag(data: AudioF,
|
|
125
131
|
frame_size: int,
|
126
132
|
zero_based_indices: list[int],
|
127
133
|
bins: int,
|
134
|
+
ttype: str,
|
128
135
|
start: int,
|
129
136
|
truth: Truth) -> Truth:
|
130
137
|
import numpy as np
|
@@ -134,7 +141,8 @@ def _stack_real_imag(data: AudioF,
|
|
134
141
|
b = _get_bin_slice(index + start, bins)
|
135
142
|
truth[i, b] = np.real(data)
|
136
143
|
|
137
|
-
|
138
|
-
|
144
|
+
if ttype != 'tdac-co':
|
145
|
+
b = _get_bin_slice(b.stop, bins)
|
146
|
+
truth[i, b] = np.imag(data)
|
139
147
|
|
140
148
|
return truth
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: sonusai
|
3
|
-
Version: 0.15.
|
3
|
+
Version: 0.15.8
|
4
4
|
Summary: Framework for building deep neural network models for sound, speech, and voice AI
|
5
5
|
Home-page: https://aaware.com
|
6
6
|
License: GPL-3.0-only
|
@@ -16,21 +16,21 @@ Classifier: Programming Language :: Python :: 3.10
|
|
16
16
|
Classifier: Programming Language :: Python :: 3.11
|
17
17
|
Requires-Dist: PyYAML (>=6.0.1,<7.0.0)
|
18
18
|
Requires-Dist: aixplain (>=0.2.6,<0.3.0)
|
19
|
+
Requires-Dist: ctranslate2 (==4.1.0)
|
19
20
|
Requires-Dist: dataclasses-json (>=0.6.1,<0.7.0)
|
20
21
|
Requires-Dist: deepgram-sdk (>=3.0.0,<4.0.0)
|
21
22
|
Requires-Dist: docopt (>=0.6.2,<0.7.0)
|
22
|
-
Requires-Dist: faster-whisper (>=0.
|
23
|
-
Requires-Dist:
|
24
|
-
Requires-Dist: greenlet (>=3.0.1,<4.0.0)
|
25
|
-
Requires-Dist: grpcio (==1.60.0)
|
23
|
+
Requires-Dist: faster-whisper (>=1.0.1,<2.0.0)
|
24
|
+
Requires-Dist: h5py (>=3.11.0,<4.0.0)
|
26
25
|
Requires-Dist: jiwer (>=3.0.3,<4.0.0)
|
27
|
-
Requires-Dist: keras
|
26
|
+
Requires-Dist: keras (>=3.1.1,<4.0.0)
|
27
|
+
Requires-Dist: keras-tuner (>=1.4.7,<2.0.0)
|
28
28
|
Requires-Dist: librosa (>=0.10.1,<0.11.0)
|
29
|
-
Requires-Dist: lightning (>=2.
|
29
|
+
Requires-Dist: lightning (>=2.2,<2.3)
|
30
30
|
Requires-Dist: matplotlib (>=3.8.0,<4.0.0)
|
31
|
-
Requires-Dist: onnx (
|
31
|
+
Requires-Dist: onnx (>=1.14.1,<2.0.0)
|
32
32
|
Requires-Dist: onnxruntime (>=1.16.1,<2.0.0)
|
33
|
-
Requires-Dist: paho-mqtt (>=
|
33
|
+
Requires-Dist: paho-mqtt (>=2.0.0,<3.0.0)
|
34
34
|
Requires-Dist: pandas (>=2.1.1,<3.0.0)
|
35
35
|
Requires-Dist: pesq (>=0.0.4,<0.0.5)
|
36
36
|
Requires-Dist: pyaaware (>=1.5.3,<2.0.0)
|
@@ -38,18 +38,14 @@ Requires-Dist: pydub (>=0.25.1,<0.26.0)
|
|
38
38
|
Requires-Dist: pystoi (>=0.4.0,<0.5.0)
|
39
39
|
Requires-Dist: python-magic (>=0.4.27,<0.5.0)
|
40
40
|
Requires-Dist: requests (>=2.31.0,<3.0.0)
|
41
|
-
Requires-Dist:
|
42
|
-
Requires-Dist: samplerate (>=0.1.0,<0.2.0)
|
43
|
-
Requires-Dist: scikit-learn (>=1.3.1,<2.0.0)
|
44
|
-
Requires-Dist: sh (>=2.0.6,<3.0.0)
|
41
|
+
Requires-Dist: samplerate (>=0.2.1,<0.3.0)
|
45
42
|
Requires-Dist: soundfile (>=0.12.1,<0.13.0)
|
46
43
|
Requires-Dist: sox (>=1.4.1,<2.0.0)
|
47
|
-
Requires-Dist: speechrecognition (>=3.10.
|
44
|
+
Requires-Dist: speechrecognition (>=3.10.1,<4.0.0)
|
48
45
|
Requires-Dist: tensorflow (>=2.15.0,<3.0.0)
|
49
|
-
Requires-Dist: tensorflow-addons (>=0.23.0,<0.24.0)
|
50
46
|
Requires-Dist: tf2onnx (>=1.15.1,<2.0.0)
|
51
|
-
Requires-Dist: torch (>=2.
|
52
|
-
Requires-Dist: torchaudio (>=2.
|
47
|
+
Requires-Dist: torch (>=2.2,<2.3)
|
48
|
+
Requires-Dist: torchaudio (>=2.2,<2.3)
|
53
49
|
Requires-Dist: torchinfo (>=1.8.0,<2.0.0)
|
54
50
|
Requires-Dist: tqdm (>=4.66.1,<5.0.0)
|
55
51
|
Description-Content-Type: text/x-rst
|
@@ -59,7 +55,7 @@ Sonus AI: Framework for simplified creation of deep NN models for sound, speech,
|
|
59
55
|
Sonus AI includes functions for pre-processing training and validation data and
|
60
56
|
creating performance metrics reports for key types of Keras models:
|
61
57
|
- recurrent, convolutional, or a combination (i.e. RCNNs)
|
62
|
-
- binary, multiclass single-label, multiclass multi-label, and
|
58
|
+
- binary, multiclass single-label, multiclass multi-label, and regression
|
63
59
|
- training with data augmentations: noise mixing, pitch and time stretch, etc.
|
64
60
|
|
65
61
|
Sonus AI python functions are used by:
|
@@ -44,7 +44,7 @@ sonusai/mixture/constants.py,sha256=xjCskcQi6khqYZDf7j6z1OkeN1C6wE06kBBapcJiNI4,
|
|
44
44
|
sonusai/mixture/datatypes.py,sha256=xN-GdPCEHGE2Ak_TdFbjuSyMs4x7TLRp59trbMTiYLg,8164
|
45
45
|
sonusai/mixture/eq_rule_is_valid.py,sha256=MpQwRA5M76wSiQWEI1lW2cLFdPaMttBLcQp3tWD8efM,1243
|
46
46
|
sonusai/mixture/feature.py,sha256=io6OiJAJ3GYvPChiUmPQuP3h0OB2onjYF8o9-AWkmqM,1996
|
47
|
-
sonusai/mixture/generation.py,sha256=
|
47
|
+
sonusai/mixture/generation.py,sha256=miUrc3QOSUNIG6mDkiMCZ6M2ulivUZxlYUAJUOVomWc,39039
|
48
48
|
sonusai/mixture/helpers.py,sha256=XqpcB15MezEMVJwf3jxzATDJSpj_27b8Cru1TDIFD7w,21326
|
49
49
|
sonusai/mixture/log_duration_and_sizes.py,sha256=r-wVjrLW1XBciOL4pkZSYMR7ZNADbojE95TPSQkp3kc,1329
|
50
50
|
sonusai/mixture/mapped_snr_f.py,sha256=mlbYM1t14OXe_Zg4CjpWTuA_Zun4W0O3bSUXeodRBQs,1845
|
@@ -61,12 +61,12 @@ sonusai/mixture/torchaudio_augmentation.py,sha256=1vEDHI0caL1vrgoY2lAWe4CiHE2jKR
|
|
61
61
|
sonusai/mixture/truth.py,sha256=Y41pZ52Xkols9LUler0NlgnilUOscBIucmw4GcxXNzU,1612
|
62
62
|
sonusai/mixture/truth_functions/__init__.py,sha256=82lKYHhLy8KW3gHngrocoqwupGVLVsWdIXdYs3vhjOc,359
|
63
63
|
sonusai/mixture/truth_functions/crm.py,sha256=_Vy8UMrOUQXsrM3nutvUMWCpvI8GePr01QFlyqLFd4k,2626
|
64
|
-
sonusai/mixture/truth_functions/data.py,sha256=
|
64
|
+
sonusai/mixture/truth_functions/data.py,sha256=NJNZz5fB3jnntUDlnsKJVQIeuHNUvD4x5iNaQVQlo3Y,2857
|
65
65
|
sonusai/mixture/truth_functions/energy.py,sha256=ydMtMLjMloG76DB30ZHQ5tkBVh4dkMJ82XEhKBokmIk,4281
|
66
66
|
sonusai/mixture/truth_functions/file.py,sha256=jOJuC_3y9BH6GGOp9eKcbVrHLVRzUA80BJq59LhcBUM,1539
|
67
67
|
sonusai/mixture/truth_functions/phoneme.py,sha256=stYdlPuNytQK_LLT61OJLfYSqKd-sDjQZdtJKGzt5wA,479
|
68
68
|
sonusai/mixture/truth_functions/sed.py,sha256=8cHjEFjZaH_0hIOHhPmj4AJz2GpEADM6Ys2x4NoiWSY,2469
|
69
|
-
sonusai/mixture/truth_functions/target.py,sha256=
|
69
|
+
sonusai/mixture/truth_functions/target.py,sha256=3rPXYwU4SBiPP3uIDpOL-B2Xw1Zh3JboD_MYNEyUpuk,5746
|
70
70
|
sonusai/mkmanifest.py,sha256=dIPVFKKhnhHdq63OGr6p__pK7fyx3OdKVtbmGUJxsR8,7078
|
71
71
|
sonusai/mkwav.py,sha256=LZNyhq4gJEs_NtGvRsYHA2qfgkkODpt6HoH1b-Tjjuw,5266
|
72
72
|
sonusai/onnx_predict.py,sha256=RhQbbNG3w6rCXuSFUWCaQmUH5JzSP2hmu6TG5_81IVA,9055
|
@@ -122,7 +122,7 @@ sonusai/utils/trim_docstring.py,sha256=dSrtiRsEN4wkkvKBp6WDr13RUypfqZzgH_jOBLs1o
|
|
122
122
|
sonusai/utils/wave.py,sha256=TKE-CNPGFXNXUW626CBPzCTNgWJut8I0ZEUsgG9q4Po,586
|
123
123
|
sonusai/utils/yes_or_no.py,sha256=eMLXBVH0cEahiXY4W2KNORmwNQ-ba10eRtldh0y4NYg,263
|
124
124
|
sonusai/vars.py,sha256=m2AefF0m5bXWGXpJj8Pi42zWL2ydeEj7bkak3GrtMyM,940
|
125
|
-
sonusai-0.15.
|
126
|
-
sonusai-0.15.
|
127
|
-
sonusai-0.15.
|
128
|
-
sonusai-0.15.
|
125
|
+
sonusai-0.15.8.dist-info/METADATA,sha256=3eCpCJmXOfr7GV3a7HDWo0iilEVHB5ANdQqS59O0Yi0,2920
|
126
|
+
sonusai-0.15.8.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
|
127
|
+
sonusai-0.15.8.dist-info/entry_points.txt,sha256=zMNjEphEPO6B3cD1GNpit7z-yA9tUU5-j3W2v-UWstU,92
|
128
|
+
sonusai-0.15.8.dist-info/RECORD,,
|
File without changes
|
File without changes
|