sonusai 0.18.9__py3-none-any.whl → 0.19.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sonusai/__init__.py +20 -29
- sonusai/aawscd_probwrite.py +18 -18
- sonusai/audiofe.py +93 -80
- sonusai/calc_metric_spenh.py +395 -321
- sonusai/data/genmixdb.yml +5 -11
- sonusai/{gentcst.py → deprecated/gentcst.py} +146 -149
- sonusai/{plot.py → deprecated/plot.py} +177 -131
- sonusai/{tplot.py → deprecated/tplot.py} +124 -102
- sonusai/doc/__init__.py +1 -1
- sonusai/doc/doc.py +112 -177
- sonusai/doc.py +10 -10
- sonusai/genft.py +81 -91
- sonusai/genmetrics.py +51 -61
- sonusai/genmix.py +105 -115
- sonusai/genmixdb.py +201 -174
- sonusai/lsdb.py +56 -66
- sonusai/main.py +23 -20
- sonusai/metrics/__init__.py +2 -0
- sonusai/metrics/calc_audio_stats.py +29 -24
- sonusai/metrics/calc_class_weights.py +7 -7
- sonusai/metrics/calc_optimal_thresholds.py +5 -7
- sonusai/metrics/calc_pcm.py +3 -3
- sonusai/metrics/calc_pesq.py +10 -7
- sonusai/metrics/calc_phase_distance.py +3 -3
- sonusai/metrics/calc_sa_sdr.py +10 -8
- sonusai/metrics/calc_segsnr_f.py +16 -18
- sonusai/metrics/calc_speech.py +105 -47
- sonusai/metrics/calc_wer.py +35 -32
- sonusai/metrics/calc_wsdr.py +10 -7
- sonusai/metrics/class_summary.py +30 -27
- sonusai/metrics/confusion_matrix_summary.py +25 -22
- sonusai/metrics/one_hot.py +91 -57
- sonusai/metrics/snr_summary.py +53 -46
- sonusai/mixture/__init__.py +20 -14
- sonusai/mixture/audio.py +4 -6
- sonusai/mixture/augmentation.py +37 -43
- sonusai/mixture/class_count.py +5 -14
- sonusai/mixture/config.py +292 -225
- sonusai/mixture/constants.py +41 -30
- sonusai/mixture/data_io.py +155 -0
- sonusai/mixture/datatypes.py +111 -108
- sonusai/mixture/db_datatypes.py +54 -70
- sonusai/mixture/eq_rule_is_valid.py +6 -9
- sonusai/mixture/feature.py +40 -38
- sonusai/mixture/generation.py +522 -389
- sonusai/mixture/helpers.py +217 -272
- sonusai/mixture/log_duration_and_sizes.py +16 -13
- sonusai/mixture/mixdb.py +669 -477
- sonusai/mixture/soundfile_audio.py +12 -17
- sonusai/mixture/sox_audio.py +91 -112
- sonusai/mixture/sox_augmentation.py +8 -9
- sonusai/mixture/spectral_mask.py +4 -6
- sonusai/mixture/target_class_balancing.py +41 -36
- sonusai/mixture/targets.py +69 -67
- sonusai/mixture/tokenized_shell_vars.py +23 -23
- sonusai/mixture/torchaudio_audio.py +14 -15
- sonusai/mixture/torchaudio_augmentation.py +23 -27
- sonusai/mixture/truth.py +48 -26
- sonusai/mixture/truth_functions/__init__.py +26 -0
- sonusai/mixture/truth_functions/crm.py +56 -38
- sonusai/mixture/truth_functions/datatypes.py +37 -0
- sonusai/mixture/truth_functions/energy.py +85 -59
- sonusai/mixture/truth_functions/file.py +30 -30
- sonusai/mixture/truth_functions/phoneme.py +14 -7
- sonusai/mixture/truth_functions/sed.py +71 -45
- sonusai/mixture/truth_functions/target.py +69 -106
- sonusai/mkwav.py +58 -101
- sonusai/onnx_predict.py +46 -43
- sonusai/queries/__init__.py +3 -1
- sonusai/queries/queries.py +100 -59
- sonusai/speech/__init__.py +2 -0
- sonusai/speech/l2arctic.py +24 -23
- sonusai/speech/librispeech.py +16 -17
- sonusai/speech/mcgill.py +22 -21
- sonusai/speech/textgrid.py +32 -25
- sonusai/speech/timit.py +45 -42
- sonusai/speech/vctk.py +14 -13
- sonusai/speech/voxceleb.py +26 -20
- sonusai/summarize_metric_spenh.py +11 -10
- sonusai/utils/__init__.py +4 -3
- sonusai/utils/asl_p56.py +1 -1
- sonusai/utils/asr.py +37 -17
- sonusai/utils/asr_functions/__init__.py +2 -0
- sonusai/utils/asr_functions/aaware_whisper.py +18 -12
- sonusai/utils/audio_devices.py +12 -12
- sonusai/utils/braced_glob.py +6 -8
- sonusai/utils/calculate_input_shape.py +1 -4
- sonusai/utils/compress.py +2 -2
- sonusai/utils/convert_string_to_number.py +1 -3
- sonusai/utils/create_timestamp.py +1 -1
- sonusai/utils/create_ts_name.py +2 -2
- sonusai/utils/dataclass_from_dict.py +1 -1
- sonusai/utils/docstring.py +6 -6
- sonusai/utils/energy_f.py +9 -7
- sonusai/utils/engineering_number.py +56 -54
- sonusai/utils/get_label_names.py +8 -10
- sonusai/utils/human_readable_size.py +2 -2
- sonusai/utils/model_utils.py +3 -5
- sonusai/utils/numeric_conversion.py +2 -4
- sonusai/utils/onnx_utils.py +43 -32
- sonusai/utils/parallel.py +41 -30
- sonusai/utils/print_mixture_details.py +25 -22
- sonusai/utils/ranges.py +12 -12
- sonusai/utils/read_predict_data.py +11 -9
- sonusai/utils/reshape.py +19 -26
- sonusai/utils/seconds_to_hms.py +1 -1
- sonusai/utils/stacked_complex.py +8 -16
- sonusai/utils/stratified_shuffle_split.py +29 -27
- sonusai/utils/write_audio.py +2 -2
- sonusai/utils/yes_or_no.py +3 -3
- sonusai/vars.py +14 -14
- {sonusai-0.18.9.dist-info → sonusai-0.19.6.dist-info}/METADATA +20 -21
- sonusai-0.19.6.dist-info/RECORD +125 -0
- {sonusai-0.18.9.dist-info → sonusai-0.19.6.dist-info}/WHEEL +1 -1
- sonusai/mixture/truth_functions/data.py +0 -58
- sonusai/utils/read_mixture_data.py +0 -14
- sonusai-0.18.9.dist-info/RECORD +0 -125
- {sonusai-0.18.9.dist-info → sonusai-0.19.6.dist-info}/entry_points.txt +0 -0
sonusai/utils/ranges.py
CHANGED
@@ -2,18 +2,18 @@ def expand_range(s: str, sort: bool = True) -> list[int]:
|
|
2
2
|
"""Returns a list of integers from a string input representing a range."""
|
3
3
|
import re
|
4
4
|
|
5
|
-
clean_s = s.replace(
|
6
|
-
clean_s = clean_s.replace(
|
7
|
-
clean_s = re.sub(r
|
8
|
-
clean_s = re.sub(r
|
5
|
+
clean_s = s.replace(":", "-")
|
6
|
+
clean_s = clean_s.replace(";", ",")
|
7
|
+
clean_s = re.sub(r" +", ",", clean_s)
|
8
|
+
clean_s = re.sub(r",+", ",", clean_s)
|
9
9
|
|
10
10
|
r: list[int] = []
|
11
|
-
for i in clean_s.split(
|
12
|
-
if
|
11
|
+
for i in clean_s.split(","):
|
12
|
+
if "-" not in i:
|
13
13
|
r.append(int(i))
|
14
14
|
else:
|
15
|
-
|
16
|
-
r += range(
|
15
|
+
lo, hi = map(int, i.split("-"))
|
16
|
+
r += range(lo, hi + 1)
|
17
17
|
|
18
18
|
if sort:
|
19
19
|
r = sorted(r)
|
@@ -23,12 +23,12 @@ def expand_range(s: str, sort: bool = True) -> list[int]:
|
|
23
23
|
|
24
24
|
def consolidate_range(r: list[int]) -> str:
|
25
25
|
"""Returns a string representing a range from an input list of integers."""
|
26
|
-
from
|
26
|
+
from collections.abc import Generator
|
27
27
|
|
28
28
|
def ranges(i: list[int]) -> Generator[tuple[int, int], None, None]:
|
29
29
|
import itertools
|
30
30
|
|
31
|
-
for
|
31
|
+
for _, b in itertools.groupby(enumerate(i), lambda pair: pair[1] - pair[0]):
|
32
32
|
b_list = list(b)
|
33
33
|
yield b_list[0][1], b_list[-1][1]
|
34
34
|
|
@@ -37,7 +37,7 @@ def consolidate_range(r: list[int]) -> str:
|
|
37
37
|
for val in ls:
|
38
38
|
entry = str(val[0])
|
39
39
|
if val[0] != val[1]:
|
40
|
-
entry += f
|
40
|
+
entry += f"-{val[1]}"
|
41
41
|
result.append(entry)
|
42
42
|
|
43
|
-
return
|
43
|
+
return ", ".join(result)
|
@@ -7,13 +7,12 @@ def read_predict_data(filename: str) -> Predict:
|
|
7
7
|
"""Read predict data from given HDF5 file and return it."""
|
8
8
|
import h5py
|
9
9
|
|
10
|
-
from sonusai import SonusAIError
|
11
10
|
from sonusai import logger
|
12
11
|
|
13
|
-
logger.debug(f
|
14
|
-
with h5py.File(filename,
|
12
|
+
logger.debug(f"Reading prediction data from {filename}")
|
13
|
+
with h5py.File(filename, "r") as f:
|
15
14
|
# prediction data is either [frames, num_classes], or [frames, timesteps, num_classes]
|
16
|
-
predict = np.array(f[
|
15
|
+
predict = np.array(f["predict"])
|
17
16
|
|
18
17
|
if predict.ndim == 2:
|
19
18
|
return predict
|
@@ -21,10 +20,13 @@ def read_predict_data(filename: str) -> Predict:
|
|
21
20
|
if predict.ndim == 3:
|
22
21
|
frames, timesteps, num_classes = predict.shape
|
23
22
|
|
24
|
-
logger.debug(
|
25
|
-
|
26
|
-
|
27
|
-
|
23
|
+
logger.debug(
|
24
|
+
f"Reshaping prediction data in {filename} "
|
25
|
+
f""
|
26
|
+
f"from [{frames}, {timesteps}, {num_classes}] "
|
27
|
+
f"to [{frames * timesteps}, {num_classes}]"
|
28
|
+
)
|
29
|
+
predict = np.reshape(predict, [frames * timesteps, num_classes], order="F")
|
28
30
|
return predict
|
29
31
|
|
30
|
-
raise
|
32
|
+
raise RuntimeError(f"Invalid prediction data dimensions in {filename}")
|
sonusai/utils/reshape.py
CHANGED
@@ -1,5 +1,3 @@
|
|
1
|
-
from typing import Optional
|
2
|
-
|
3
1
|
import numpy as np
|
4
2
|
|
5
3
|
from sonusai.mixture.datatypes import Feature
|
@@ -11,12 +9,14 @@ def get_input_shape(feature: Feature) -> tuple[int, ...]:
|
|
11
9
|
return feature.shape[1:]
|
12
10
|
|
13
11
|
|
14
|
-
def reshape_inputs(
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
12
|
+
def reshape_inputs(
|
13
|
+
feature: Feature,
|
14
|
+
batch_size: int,
|
15
|
+
truth: Truth | None = None,
|
16
|
+
timesteps: int = 0,
|
17
|
+
flatten: bool = False,
|
18
|
+
add1ch: bool = False,
|
19
|
+
) -> tuple[Feature, Truth | None]:
|
20
20
|
"""Check SonusAI feature and truth data and reshape feature of size [frames, strides, feature_parameters] into
|
21
21
|
one of several options:
|
22
22
|
|
@@ -38,16 +38,14 @@ def reshape_inputs(feature: Feature,
|
|
38
38
|
feature reshaped feature
|
39
39
|
truth reshaped truth
|
40
40
|
"""
|
41
|
-
from sonusai import SonusAIError
|
42
|
-
|
43
41
|
frames, strides, feature_parameters = feature.shape
|
44
42
|
if truth is not None:
|
45
43
|
truth_frames, num_classes = truth.shape
|
46
44
|
# Double-check correctness of inputs
|
47
45
|
if frames != truth_frames:
|
48
|
-
raise
|
46
|
+
raise ValueError("Frames in feature and truth do not match")
|
49
47
|
else:
|
50
|
-
num_classes =
|
48
|
+
num_classes = 0
|
51
49
|
|
52
50
|
if flatten:
|
53
51
|
feature = np.reshape(feature, (frames, strides * feature_parameters))
|
@@ -64,12 +62,12 @@ def reshape_inputs(feature: Feature,
|
|
64
62
|
fr2drop = frames_rem + bf_rem
|
65
63
|
if fr2drop:
|
66
64
|
if feature.ndim == 2:
|
67
|
-
feature = feature[0:-fr2drop,
|
65
|
+
feature = feature[0:-fr2drop,] # flattened input
|
68
66
|
elif feature.ndim == 3:
|
69
|
-
feature = feature[0:-fr2drop,
|
67
|
+
feature = feature[0:-fr2drop,] # un-flattened input
|
70
68
|
|
71
69
|
if truth is not None:
|
72
|
-
truth = truth[0:-fr2drop,
|
70
|
+
truth = truth[0:-fr2drop,]
|
73
71
|
|
74
72
|
# Reshape
|
75
73
|
if feature.ndim == 2: # flattened input
|
@@ -88,9 +86,9 @@ def reshape_inputs(feature: Feature,
|
|
88
86
|
# Drop frames if remainder exists (not fitting into a multiple of new number of sequences)
|
89
87
|
fr2drop = feature.shape[0] % batch_size
|
90
88
|
if fr2drop > 0:
|
91
|
-
feature = feature[0:-fr2drop,
|
89
|
+
feature = feature[0:-fr2drop,]
|
92
90
|
if truth is not None:
|
93
|
-
truth = truth[0:-fr2drop,
|
91
|
+
truth = truth[0:-fr2drop,]
|
94
92
|
|
95
93
|
# Add channel dimension if required for input to model (i.e. for cnn type input)
|
96
94
|
if add1ch:
|
@@ -119,25 +117,20 @@ def get_num_classes_from_predict(predict: Predict, timesteps: int = 0) -> int:
|
|
119
117
|
return dims[1]
|
120
118
|
|
121
119
|
|
122
|
-
def reshape_outputs(predict: Predict,
|
123
|
-
truth: Optional[Truth] = None,
|
124
|
-
timesteps: int = 0) -> tuple[Predict, Optional[Truth]]:
|
120
|
+
def reshape_outputs(predict: Predict, truth: Truth | None = None, timesteps: int = 0) -> tuple[Predict, Truth | None]:
|
125
121
|
"""Reshape model output data.
|
126
122
|
|
127
123
|
truth and predict can be either [frames, num_classes], or [frames, timesteps, num_classes]
|
128
124
|
In binary case, num_classes dim may not exist; detect this and set num_classes to 1.
|
129
125
|
"""
|
130
|
-
|
131
|
-
|
132
|
-
if truth is not None:
|
133
|
-
if predict.shape != truth.shape:
|
134
|
-
raise SonusAIError('predict and truth shapes do not match')
|
126
|
+
if truth is not None and predict.shape != truth.shape:
|
127
|
+
raise ValueError("predict and truth shapes do not match")
|
135
128
|
|
136
129
|
ndim = predict.ndim
|
137
130
|
shape = predict.shape
|
138
131
|
|
139
132
|
if not (0 < ndim <= 3):
|
140
|
-
raise
|
133
|
+
raise ValueError(f"do not know how to reshape data with {ndim} dimensions")
|
141
134
|
|
142
135
|
if ndim == 3 or (ndim == 2 and timesteps > 0):
|
143
136
|
if ndim == 2:
|
sonusai/utils/seconds_to_hms.py
CHANGED
sonusai/utils/stacked_complex.py
CHANGED
@@ -12,14 +12,12 @@ def stack_complex(unstacked: np.ndarray) -> np.ndarray:
|
|
12
12
|
:return: A stacked array
|
13
13
|
:raises TypeError:
|
14
14
|
"""
|
15
|
-
from sonusai import SonusAIError
|
16
|
-
|
17
15
|
if not unstacked.ndim > 1:
|
18
|
-
raise
|
16
|
+
raise ValueError("unstacked must have more than 1 dimension")
|
19
17
|
|
20
18
|
shape = list(unstacked.shape)
|
21
19
|
shape[-1] = shape[-1] * 2
|
22
|
-
stacked = np.empty(shape, dtype=np.
|
20
|
+
stacked = np.empty(shape, dtype=np.float32)
|
23
21
|
half = unstacked.shape[-1]
|
24
22
|
stacked[..., :half] = np.real(unstacked)
|
25
23
|
stacked[..., half:] = np.imag(unstacked)
|
@@ -35,13 +33,11 @@ def unstack_complex(stacked: np.ndarray) -> np.ndarray:
|
|
35
33
|
:return: An unstacked complex array
|
36
34
|
:raises TypeError:
|
37
35
|
"""
|
38
|
-
from sonusai import SonusAIError
|
39
|
-
|
40
36
|
if not stacked.ndim > 1:
|
41
|
-
raise
|
37
|
+
raise ValueError("stacked must have more than 1 dimension")
|
42
38
|
|
43
39
|
if stacked.shape[-1] % 2 != 0:
|
44
|
-
raise
|
40
|
+
raise ValueError("last dimension of stacked must be a multiple of 2")
|
45
41
|
|
46
42
|
half = stacked.shape[-1] // 2
|
47
43
|
unstacked = 1j * stacked[..., half:]
|
@@ -58,13 +54,11 @@ def stacked_complex_real(stacked: np.ndarray) -> np.ndarray:
|
|
58
54
|
:return: The real elements
|
59
55
|
:raises TypeError:
|
60
56
|
"""
|
61
|
-
from sonusai import SonusAIError
|
62
|
-
|
63
57
|
if not stacked.ndim > 1:
|
64
|
-
raise
|
58
|
+
raise ValueError("stacked must have more than 1 dimension")
|
65
59
|
|
66
60
|
if stacked.shape[-1] % 2 != 0:
|
67
|
-
raise
|
61
|
+
raise ValueError("last dimension of stacked must be a multiple of 2")
|
68
62
|
|
69
63
|
half = stacked.shape[-1] // 2
|
70
64
|
return stacked[..., :half]
|
@@ -78,13 +72,11 @@ def stacked_complex_imag(stacked: np.ndarray) -> np.ndarray:
|
|
78
72
|
:return: The imaginary elements
|
79
73
|
:raises TypeError:
|
80
74
|
"""
|
81
|
-
from sonusai import SonusAIError
|
82
|
-
|
83
75
|
if not stacked.ndim > 1:
|
84
|
-
raise
|
76
|
+
raise ValueError("stacked must have more than 1 dimension")
|
85
77
|
|
86
78
|
if stacked.shape[-1] % 2 != 0:
|
87
|
-
raise
|
79
|
+
raise ValueError("last dimension of stacked must be a multiple of 2")
|
88
80
|
|
89
81
|
half = stacked.shape[-1] // 2
|
90
82
|
return stacked[..., half:]
|
@@ -1,14 +1,14 @@
|
|
1
|
-
from typing import Optional
|
2
|
-
|
3
1
|
import numpy as np
|
4
2
|
|
5
3
|
from sonusai.mixture import MixtureDatabase
|
6
4
|
|
7
5
|
|
8
|
-
def stratified_shuffle_split_mixid(
|
9
|
-
|
10
|
-
|
11
|
-
|
6
|
+
def stratified_shuffle_split_mixid(
|
7
|
+
mixdb: MixtureDatabase,
|
8
|
+
vsplit: float = 0.2,
|
9
|
+
nsplit: int = 0,
|
10
|
+
rnd_seed: int | None = 0,
|
11
|
+
) -> tuple[list[int], list[int], np.ndarray, np.ndarray]:
|
12
12
|
"""
|
13
13
|
Create a training and test/validation list of mixture IDs from all mixtures in a mixture database.
|
14
14
|
The test/validation split is specified by vsplit (0.0 to 1.0), default 0.2.
|
@@ -35,20 +35,18 @@ def stratified_shuffle_split_mixid(mixdb: MixtureDatabase,
|
|
35
35
|
import random
|
36
36
|
from copy import deepcopy
|
37
37
|
|
38
|
-
from sonusai import SonusAIError
|
39
38
|
from sonusai import logger
|
40
39
|
from sonusai.mixture import get_class_count_from_mixids
|
41
|
-
from sonusai.mixture import get_truth_indices_for_target
|
42
40
|
|
43
41
|
if vsplit < 0 or vsplit > 1:
|
44
|
-
raise
|
42
|
+
raise ValueError("vsplit must be between 0 and 1")
|
45
43
|
|
46
44
|
a_class_mixid: dict[int, list[int]] = {i + 1: [] for i in range(mixdb.num_classes)}
|
47
45
|
for mixid, mixture in enumerate(mixdb.mixtures):
|
48
46
|
class_count = get_class_count_from_mixids(mixdb, mixid)
|
49
|
-
if any(class_count)
|
50
|
-
for
|
51
|
-
a_class_mixid[
|
47
|
+
if any(class_count):
|
48
|
+
for class_index in mixdb.target_files[mixture.targets[0].file_id].class_indices:
|
49
|
+
a_class_mixid[class_index].append(mixid)
|
52
50
|
else:
|
53
51
|
# no counts and mutex mode means this is all 'other' class
|
54
52
|
a_class_mixid[mixdb.num_classes].append(mixid)
|
@@ -80,11 +78,11 @@ def stratified_shuffle_split_mixid(mixdb: MixtureDatabase,
|
|
80
78
|
# randomize order
|
81
79
|
random.shuffle(indices)
|
82
80
|
|
83
|
-
t_class_mixid[ci] = [a_class_mixid[ci + 1][ii] for ii in indices[0:t_num_mixid[ci]]]
|
84
|
-
v_class_mixid[ci] = [a_class_mixid[ci + 1][ii] for ii in indices[t_num_mixid[ci]:]]
|
81
|
+
t_class_mixid[ci] = [a_class_mixid[ci + 1][ii] for ii in indices[0 : t_num_mixid[ci]]]
|
82
|
+
v_class_mixid[ci] = [a_class_mixid[ci + 1][ii] for ii in indices[t_num_mixid[ci] :]]
|
85
83
|
|
86
84
|
if np.any(~(t_num_mixid > 0)):
|
87
|
-
logger.warning(f
|
85
|
+
logger.warning(f"Some classes have zero coverage: {np.where(~(t_num_mixid > 0))[0]}")
|
88
86
|
|
89
87
|
# Stratify over non-zero classes
|
90
88
|
nz_indices = np.where(t_num_mixid > 0)[0]
|
@@ -97,8 +95,10 @@ def stratified_shuffle_split_mixid(mixdb: MixtureDatabase,
|
|
97
95
|
# 2nd stage stratify by class_count/min(class_count-n3) n2 times
|
98
96
|
n2 = int(max(min_class - n0 - n3, 0))
|
99
97
|
|
100
|
-
logger.info(
|
101
|
-
|
98
|
+
logger.info(
|
99
|
+
f"Stratifying training, x1 cnt {n0}: x(class_count/{n2}): x1 cnt {n3} x1, "
|
100
|
+
f"for {len(nz_indices)} populated classes"
|
101
|
+
)
|
102
102
|
|
103
103
|
# initialize source list
|
104
104
|
tt = deepcopy(t_class_mixid)
|
@@ -116,13 +116,13 @@ def stratified_shuffle_split_mixid(mixdb: MixtureDatabase,
|
|
116
116
|
# which will leave approx n3 remaining
|
117
117
|
if n2 > 0:
|
118
118
|
# should always be non-zero
|
119
|
-
min_class = np.min(t_num_mixid2 - n3)
|
119
|
+
min_class = int(np.min(t_num_mixid2 - n3))
|
120
120
|
class_count = np.floor((t_num_mixid2 - n3) / min_class)
|
121
121
|
# class_count = np.maximum(np.floor((t_num_mixid2 - n3) / n2),0) # Counts per class
|
122
122
|
for _ in range(min_class):
|
123
123
|
for ci in range(mixdb.num_classes):
|
124
124
|
if class_count[ci] > 0:
|
125
|
-
for
|
125
|
+
for _ in range(int(class_count[ci])):
|
126
126
|
# append first
|
127
127
|
t_mixid.append(tt[ci][0])
|
128
128
|
del tt[ci][0]
|
@@ -133,10 +133,10 @@ def stratified_shuffle_split_mixid(mixdb: MixtureDatabase,
|
|
133
133
|
t_mixid = _extract_remaining_mixids(mixdb, t_mixid, t_num_mixid2, tt)
|
134
134
|
|
135
135
|
if len(t_mixid) != sum(t_num_mixid):
|
136
|
-
logger.warning(
|
136
|
+
logger.warning("Final stratified training list length does not match starting list length.")
|
137
137
|
|
138
138
|
if any(t_num_mixid2) or any(tt):
|
139
|
-
logger.warning(
|
139
|
+
logger.warning("Remaining training mixid list not empty.")
|
140
140
|
|
141
141
|
# Now stratify the validation list, which is probably not as important, so use simple method
|
142
142
|
# initialize source list
|
@@ -145,18 +145,20 @@ def stratified_shuffle_split_mixid(mixdb: MixtureDatabase,
|
|
145
145
|
v_mixid = _extract_remaining_mixids(mixdb, [], v_num_mixid2, vv)
|
146
146
|
|
147
147
|
if len(v_mixid) != sum(v_num_mixid):
|
148
|
-
logger.warning(
|
148
|
+
logger.warning("Final stratified validation list length does not match starting lists length.")
|
149
149
|
|
150
150
|
if any(v_num_mixid2) or any(vv):
|
151
|
-
logger.warning(
|
151
|
+
logger.warning("Remaining validation mixid list not empty.")
|
152
152
|
|
153
153
|
return t_mixid, v_mixid, t_num_mixid, v_num_mixid
|
154
154
|
|
155
155
|
|
156
|
-
def _extract_remaining_mixids(
|
157
|
-
|
158
|
-
|
159
|
-
|
156
|
+
def _extract_remaining_mixids(
|
157
|
+
mixdb: MixtureDatabase,
|
158
|
+
mixid: list[int],
|
159
|
+
num_mixid: np.ndarray,
|
160
|
+
class_mixid: list[list[int]],
|
161
|
+
) -> list[int]:
|
160
162
|
for _ in range(max(num_mixid)):
|
161
163
|
for ci in range(mixdb.num_classes):
|
162
164
|
if num_mixid[ci] > 0:
|
sonusai/utils/write_audio.py
CHANGED
@@ -3,7 +3,7 @@ from sonusai.mixture.datatypes import AudioT
|
|
3
3
|
|
4
4
|
|
5
5
|
def write_audio(name: str, audio: AudioT, sample_rate: int = SAMPLE_RATE) -> None:
|
6
|
-
"""
|
6
|
+
"""Write an audio file.
|
7
7
|
|
8
8
|
To write multiple channels, use a 2D array of shape [channels, samples].
|
9
9
|
The bits per sample and PCM/float are determined by the data type.
|
@@ -17,7 +17,7 @@ def write_audio(name: str, audio: AudioT, sample_rate: int = SAMPLE_RATE) -> Non
|
|
17
17
|
if data.dim() == 1:
|
18
18
|
data = torch.reshape(data, (1, data.shape[0]))
|
19
19
|
if data.dim() != 2:
|
20
|
-
raise ValueError(
|
20
|
+
raise ValueError("audio must be a 1D or 2D array")
|
21
21
|
|
22
22
|
# Assuming data has more samples than channels, check if array needs to be transposed
|
23
23
|
if data.shape[1] < data.shape[0]:
|
sonusai/utils/yes_or_no.py
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
def yes_or_no(question: str) -> bool:
|
2
2
|
"""Wait for yes or no input"""
|
3
3
|
while True:
|
4
|
-
reply = str(input(question +
|
5
|
-
if reply[:1] ==
|
4
|
+
reply = str(input(question + " (y/n)?: ")).lower().strip()
|
5
|
+
if reply[:1] == "y":
|
6
6
|
return True
|
7
|
-
if reply[:1] ==
|
7
|
+
if reply[:1] == "n":
|
8
8
|
return False
|
sonusai/vars.py
CHANGED
@@ -23,18 +23,18 @@ def main() -> None:
|
|
23
23
|
|
24
24
|
from sonusai.mixture import DEFAULT_NOISE
|
25
25
|
|
26
|
-
print(
|
27
|
-
print(
|
28
|
-
print(f
|
29
|
-
print(
|
30
|
-
print(
|
31
|
-
print(
|
32
|
-
print(f
|
33
|
-
print(f
|
34
|
-
print(
|
35
|
-
items = [
|
36
|
-
items += [item for item in environ
|
37
|
-
|
38
|
-
|
39
|
-
if __name__ ==
|
26
|
+
print("Custom SonusAI variables:")
|
27
|
+
print("")
|
28
|
+
print(f"${{default_noise}}: {DEFAULT_NOISE}")
|
29
|
+
print("")
|
30
|
+
print("SonusAI recognized environment variables:")
|
31
|
+
print("")
|
32
|
+
print(f"DEEPGRAM_API_KEY {getenv('DEEPGRAM_API_KEY')}")
|
33
|
+
print(f"GOOGLE_SPEECH_API_KEY {getenv('GOOGLE_SPEECH_API_KEY')}")
|
34
|
+
print("")
|
35
|
+
items = ["DEEPGRAM_API_KEY", "GOOGLE_SPEECH_API_KEY"]
|
36
|
+
items += [item for item in environ if item.upper().startswith("AIXP_WHISPER_")]
|
37
|
+
|
38
|
+
|
39
|
+
if __name__ == "__main__":
|
40
40
|
main()
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: sonusai
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.19.6
|
4
4
|
Summary: Framework for building deep neural network models for sound, speech, and voice AI
|
5
5
|
Home-page: https://aaware.com
|
6
6
|
License: GPL-3.0-only
|
@@ -8,39 +8,38 @@ Author: Chris Eddington
|
|
8
8
|
Author-email: chris@aaware.com
|
9
9
|
Maintainer: Chris Eddington
|
10
10
|
Maintainer-email: chris@aaware.com
|
11
|
-
Requires-Python: >=3.
|
11
|
+
Requires-Python: >=3.11,<3.12
|
12
12
|
Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
|
13
13
|
Classifier: Programming Language :: Python :: 3
|
14
|
-
Classifier: Programming Language :: Python :: 3.9
|
15
|
-
Classifier: Programming Language :: Python :: 3.10
|
16
14
|
Classifier: Programming Language :: Python :: 3.11
|
17
|
-
Requires-Dist:
|
18
|
-
Requires-Dist: dataclasses-json (>=0.6.1,<0.7.0)
|
15
|
+
Requires-Dist: dataclasses-json (>=0.6.7,<0.7.0)
|
19
16
|
Requires-Dist: docopt (>=0.6.2,<0.7.0)
|
20
|
-
Requires-Dist: h5py (>=3.
|
21
|
-
Requires-Dist: jiwer (>=3.0.
|
22
|
-
Requires-Dist: librosa (>=0.10.
|
23
|
-
Requires-Dist: matplotlib (>=3.
|
17
|
+
Requires-Dist: h5py (>=3.12.1,<4.0.0)
|
18
|
+
Requires-Dist: jiwer (>=3.0.4,<4.0.0)
|
19
|
+
Requires-Dist: librosa (>=0.10.2.post1,<0.11.0)
|
20
|
+
Requires-Dist: matplotlib (>=3.9.2,<4.0.0)
|
24
21
|
Requires-Dist: mgzip (>=0.2.1,<0.3.0)
|
25
|
-
Requires-Dist: numpy (>=1
|
26
|
-
Requires-Dist: onnx (>=1.
|
27
|
-
Requires-Dist: onnxruntime (>=1.
|
28
|
-
Requires-Dist: paho-mqtt (>=2.
|
29
|
-
Requires-Dist: pandas (>=2.
|
22
|
+
Requires-Dist: numpy (>=1,<2)
|
23
|
+
Requires-Dist: onnx (>=1.17.0,<2.0.0)
|
24
|
+
Requires-Dist: onnxruntime (>=1.19.2,<2.0.0)
|
25
|
+
Requires-Dist: paho-mqtt (>=2.1.0,<3.0.0)
|
26
|
+
Requires-Dist: pandas (>=2.2.3,<3.0.0)
|
30
27
|
Requires-Dist: pesq (>=0.0.4,<0.0.5)
|
31
28
|
Requires-Dist: praatio (>=6.2.0,<7.0.0)
|
32
|
-
Requires-Dist: psutil (>=
|
33
|
-
Requires-Dist: pyaaware (>=1.5.
|
29
|
+
Requires-Dist: psutil (>=6.0.0,<7.0.0)
|
30
|
+
Requires-Dist: pyaaware (>=1.5.18,<2.0.0)
|
34
31
|
Requires-Dist: pyaudio (>=0.2.14,<0.3.0)
|
35
32
|
Requires-Dist: pydub (>=0.25.1,<0.26.0)
|
36
|
-
Requires-Dist: pystoi (>=0.4.
|
37
|
-
Requires-Dist:
|
33
|
+
Requires-Dist: pystoi (>=0.4.1,<0.5.0)
|
34
|
+
Requires-Dist: pyyaml (>=6.0.2,<7.0.0)
|
35
|
+
Requires-Dist: requests (>=2.32.3,<3.0.0)
|
36
|
+
Requires-Dist: rich (>=13.9.4,<14.0.0)
|
38
37
|
Requires-Dist: samplerate (>=0.2.1,<0.3.0)
|
39
38
|
Requires-Dist: soundfile (>=0.12.1,<0.13.0)
|
40
|
-
Requires-Dist: sox (>=1.
|
39
|
+
Requires-Dist: sox (>=1.5.0,<2.0.0)
|
41
40
|
Requires-Dist: torch (>=2.2,<2.3)
|
42
41
|
Requires-Dist: torchaudio (>=2.2,<2.3)
|
43
|
-
Requires-Dist: tqdm (>=4.66.
|
42
|
+
Requires-Dist: tqdm (>=4.66.5,<5.0.0)
|
44
43
|
Description-Content-Type: text/x-rst
|
45
44
|
|
46
45
|
SonusAI: Framework for simplified creation of deep NN models for sound, speech, and voice AI
|