sonusai 0.18.8__py3-none-any.whl → 0.19.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sonusai/__init__.py +20 -29
- sonusai/aawscd_probwrite.py +18 -18
- sonusai/audiofe.py +93 -80
- sonusai/calc_metric_spenh.py +395 -321
- sonusai/data/genmixdb.yml +5 -11
- sonusai/{gentcst.py → deprecated/gentcst.py} +146 -149
- sonusai/{plot.py → deprecated/plot.py} +177 -131
- sonusai/{tplot.py → deprecated/tplot.py} +124 -102
- sonusai/doc/__init__.py +1 -1
- sonusai/doc/doc.py +112 -177
- sonusai/doc.py +10 -10
- sonusai/genft.py +93 -77
- sonusai/genmetrics.py +59 -46
- sonusai/genmix.py +116 -104
- sonusai/genmixdb.py +194 -153
- sonusai/lsdb.py +56 -66
- sonusai/main.py +23 -20
- sonusai/metrics/__init__.py +2 -0
- sonusai/metrics/calc_audio_stats.py +29 -24
- sonusai/metrics/calc_class_weights.py +7 -7
- sonusai/metrics/calc_optimal_thresholds.py +5 -7
- sonusai/metrics/calc_pcm.py +3 -3
- sonusai/metrics/calc_pesq.py +10 -7
- sonusai/metrics/calc_phase_distance.py +3 -3
- sonusai/metrics/calc_sa_sdr.py +10 -8
- sonusai/metrics/calc_segsnr_f.py +15 -17
- sonusai/metrics/calc_speech.py +105 -47
- sonusai/metrics/calc_wer.py +35 -32
- sonusai/metrics/calc_wsdr.py +10 -7
- sonusai/metrics/class_summary.py +30 -27
- sonusai/metrics/confusion_matrix_summary.py +25 -22
- sonusai/metrics/one_hot.py +91 -57
- sonusai/metrics/snr_summary.py +53 -46
- sonusai/mixture/__init__.py +19 -14
- sonusai/mixture/audio.py +4 -6
- sonusai/mixture/augmentation.py +37 -43
- sonusai/mixture/class_count.py +5 -14
- sonusai/mixture/config.py +292 -225
- sonusai/mixture/constants.py +41 -30
- sonusai/mixture/data_io.py +155 -0
- sonusai/mixture/datatypes.py +111 -108
- sonusai/mixture/db_datatypes.py +54 -70
- sonusai/mixture/eq_rule_is_valid.py +6 -9
- sonusai/mixture/feature.py +50 -46
- sonusai/mixture/generation.py +522 -389
- sonusai/mixture/helpers.py +217 -272
- sonusai/mixture/log_duration_and_sizes.py +16 -13
- sonusai/mixture/mixdb.py +677 -473
- sonusai/mixture/soundfile_audio.py +12 -17
- sonusai/mixture/sox_audio.py +91 -112
- sonusai/mixture/sox_augmentation.py +8 -9
- sonusai/mixture/spectral_mask.py +4 -6
- sonusai/mixture/target_class_balancing.py +41 -36
- sonusai/mixture/targets.py +69 -67
- sonusai/mixture/tokenized_shell_vars.py +23 -23
- sonusai/mixture/torchaudio_audio.py +14 -15
- sonusai/mixture/torchaudio_augmentation.py +23 -27
- sonusai/mixture/truth.py +48 -26
- sonusai/mixture/truth_functions/__init__.py +26 -0
- sonusai/mixture/truth_functions/crm.py +56 -38
- sonusai/mixture/truth_functions/datatypes.py +37 -0
- sonusai/mixture/truth_functions/energy.py +85 -59
- sonusai/mixture/truth_functions/file.py +30 -30
- sonusai/mixture/truth_functions/phoneme.py +14 -7
- sonusai/mixture/truth_functions/sed.py +71 -45
- sonusai/mixture/truth_functions/target.py +69 -106
- sonusai/mkwav.py +52 -85
- sonusai/onnx_predict.py +46 -43
- sonusai/queries/__init__.py +3 -1
- sonusai/queries/queries.py +100 -59
- sonusai/speech/__init__.py +2 -0
- sonusai/speech/l2arctic.py +24 -23
- sonusai/speech/librispeech.py +16 -17
- sonusai/speech/mcgill.py +22 -21
- sonusai/speech/textgrid.py +32 -25
- sonusai/speech/timit.py +45 -42
- sonusai/speech/vctk.py +14 -13
- sonusai/speech/voxceleb.py +26 -20
- sonusai/summarize_metric_spenh.py +11 -10
- sonusai/utils/__init__.py +4 -3
- sonusai/utils/asl_p56.py +1 -1
- sonusai/utils/asr.py +37 -17
- sonusai/utils/asr_functions/__init__.py +2 -0
- sonusai/utils/asr_functions/aaware_whisper.py +18 -12
- sonusai/utils/audio_devices.py +12 -12
- sonusai/utils/braced_glob.py +6 -8
- sonusai/utils/calculate_input_shape.py +1 -4
- sonusai/utils/compress.py +2 -2
- sonusai/utils/convert_string_to_number.py +1 -3
- sonusai/utils/create_timestamp.py +1 -1
- sonusai/utils/create_ts_name.py +2 -2
- sonusai/utils/dataclass_from_dict.py +1 -1
- sonusai/utils/docstring.py +6 -6
- sonusai/utils/energy_f.py +9 -7
- sonusai/utils/engineering_number.py +56 -54
- sonusai/utils/get_label_names.py +8 -10
- sonusai/utils/human_readable_size.py +2 -2
- sonusai/utils/model_utils.py +3 -5
- sonusai/utils/numeric_conversion.py +2 -4
- sonusai/utils/onnx_utils.py +43 -32
- sonusai/utils/parallel.py +40 -27
- sonusai/utils/print_mixture_details.py +25 -22
- sonusai/utils/ranges.py +12 -12
- sonusai/utils/read_predict_data.py +11 -9
- sonusai/utils/reshape.py +19 -26
- sonusai/utils/seconds_to_hms.py +1 -1
- sonusai/utils/stacked_complex.py +8 -16
- sonusai/utils/stratified_shuffle_split.py +29 -27
- sonusai/utils/write_audio.py +2 -2
- sonusai/utils/yes_or_no.py +3 -3
- sonusai/vars.py +14 -14
- {sonusai-0.18.8.dist-info → sonusai-0.19.5.dist-info}/METADATA +20 -21
- sonusai-0.19.5.dist-info/RECORD +125 -0
- {sonusai-0.18.8.dist-info → sonusai-0.19.5.dist-info}/WHEEL +1 -1
- sonusai/mixture/truth_functions/data.py +0 -58
- sonusai/utils/read_mixture_data.py +0 -14
- sonusai-0.18.8.dist-info/RECORD +0 -125
- {sonusai-0.18.8.dist-info → sonusai-0.19.5.dist-info}/entry_points.txt +0 -0
sonusai/mixture/db_datatypes.py
CHANGED
@@ -1,72 +1,56 @@
|
|
1
1
|
from collections import namedtuple
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
TargetFileRecord = namedtuple(
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
'gain'])
|
58
|
-
|
59
|
-
MixtureRecord = namedtuple('MixtureRecord', [
|
60
|
-
'id',
|
61
|
-
'name',
|
62
|
-
'noise_file_id',
|
63
|
-
'noise_augmentation',
|
64
|
-
'noise_offset',
|
65
|
-
'noise_snr_gain',
|
66
|
-
'random_snr',
|
67
|
-
'snr',
|
68
|
-
'samples',
|
69
|
-
'spectral_mask_id',
|
70
|
-
'spectral_mask_seed',
|
71
|
-
'target_snr_gain'
|
72
|
-
])
|
3
|
+
TruthConfigRecord = namedtuple("TruthConfigRecord", ["id", "name", "function", "stride_reduction", "config"])
|
4
|
+
|
5
|
+
TruthParametersRecord = namedtuple("TruthParametersRecord", ["id", "name", "parameters"])
|
6
|
+
|
7
|
+
TargetFileRecord = namedtuple("TargetFileRecord", ["id", "name", "samples", "class_indices", "level_type", "speaker_id"])
|
8
|
+
|
9
|
+
NoiseFileRecord = namedtuple("NoiseFileRecord", ["id", "name", "samples"])
|
10
|
+
|
11
|
+
TopRecord = namedtuple(
|
12
|
+
"TopRecord",
|
13
|
+
[
|
14
|
+
"id",
|
15
|
+
"version",
|
16
|
+
"class_balancing",
|
17
|
+
"feature",
|
18
|
+
"noise_mix_mode",
|
19
|
+
"num_classes",
|
20
|
+
"seed",
|
21
|
+
"mixid_width",
|
22
|
+
"speaker_metadata_tiers",
|
23
|
+
"textgrid_metadata_tiers",
|
24
|
+
],
|
25
|
+
)
|
26
|
+
|
27
|
+
ClassLabelRecord = namedtuple("ClassLabelRecord", ["id", "label"])
|
28
|
+
|
29
|
+
ClassWeightsThresholdRecord = namedtuple("ClassWeightsThresholdRecord", ["id", "threshold"])
|
30
|
+
|
31
|
+
ImpulseResponseFileRecord = namedtuple("ImpulseResponseFileRecord", ["id", "file"])
|
32
|
+
|
33
|
+
SpectralMaskRecord = namedtuple(
|
34
|
+
"SpectralMaskRecord",
|
35
|
+
["id", "f_max_width", "f_num", "t_max_width", "t_num", "t_max_percent"],
|
36
|
+
)
|
37
|
+
|
38
|
+
TargetRecord = namedtuple("TargetRecord", ["id", "file_id", "augmentation", "gain"])
|
39
|
+
|
40
|
+
MixtureRecord = namedtuple(
|
41
|
+
"MixtureRecord",
|
42
|
+
[
|
43
|
+
"id",
|
44
|
+
"name",
|
45
|
+
"noise_file_id",
|
46
|
+
"noise_augmentation",
|
47
|
+
"noise_offset",
|
48
|
+
"noise_snr_gain",
|
49
|
+
"random_snr",
|
50
|
+
"snr",
|
51
|
+
"samples",
|
52
|
+
"spectral_mask_id",
|
53
|
+
"spectral_mask_seed",
|
54
|
+
"target_snr_gain",
|
55
|
+
],
|
56
|
+
)
|
@@ -8,7 +8,7 @@ def eq_rule_is_valid(rule: Any) -> bool:
|
|
8
8
|
"""
|
9
9
|
|
10
10
|
# Must be a list or string equal to 'none'
|
11
|
-
if isinstance(rule, str) and rule ==
|
11
|
+
if isinstance(rule, str) and rule == "none":
|
12
12
|
return True
|
13
13
|
|
14
14
|
if not isinstance(rule, list):
|
@@ -27,22 +27,19 @@ def eq_rule_is_valid(rule: Any) -> bool:
|
|
27
27
|
if not all(isinstance(el, float | int | str) for el in r):
|
28
28
|
return False
|
29
29
|
|
30
|
-
if isinstance(r, str) and r ==
|
30
|
+
if isinstance(r, str) and r == "none":
|
31
31
|
continue
|
32
32
|
|
33
33
|
for el in r:
|
34
34
|
# If a string, item must start with 'rand'
|
35
|
-
if isinstance(el, str) and not el.startswith(
|
35
|
+
if isinstance(el, str) and not el.startswith("rand"):
|
36
36
|
return False
|
37
37
|
|
38
38
|
return True
|
39
39
|
|
40
40
|
|
41
41
|
def _check_for_none(rule: Any) -> bool:
|
42
|
-
"""Check if EQ rule is 'none'
|
43
|
-
""
|
44
|
-
if isinstance(rule, str) and rule == 'none':
|
45
|
-
return True
|
46
|
-
if isinstance(rule, list) and len(rule) == 3:
|
42
|
+
"""Check if EQ rule is 'none'"""
|
43
|
+
if isinstance(rule, str) and rule == "none":
|
47
44
|
return True
|
48
|
-
return
|
45
|
+
return bool(isinstance(rule, list) and len(rule) == 3)
|
sonusai/mixture/feature.py
CHANGED
@@ -1,46 +1,38 @@
|
|
1
|
-
from typing import Optional
|
2
|
-
|
3
1
|
from sonusai.mixture.datatypes import AudioT
|
4
2
|
from sonusai.mixture.datatypes import Feature
|
5
3
|
|
6
4
|
|
7
|
-
def get_feature_from_audio(
|
8
|
-
|
9
|
-
|
10
|
-
|
5
|
+
def get_feature_from_audio(
|
6
|
+
audio: AudioT,
|
7
|
+
feature_mode: str,
|
8
|
+
) -> Feature:
|
11
9
|
"""Apply forward transform and generate feature data from audio data
|
12
10
|
|
13
11
|
:param audio: Time domain audio data [samples]
|
14
12
|
:param feature_mode: Feature mode
|
15
|
-
:param num_classes: Number of classes
|
16
|
-
:param truth_mutex: Whether to calculate 'other' label
|
17
13
|
:return: Feature data [frames, strides, feature_parameters]
|
18
14
|
"""
|
19
15
|
import numpy as np
|
20
16
|
from pyaaware import FeatureGenerator
|
21
17
|
|
22
|
-
from .augmentation import pad_audio_to_frame
|
23
18
|
from .datatypes import TransformConfig
|
24
19
|
from .helpers import forward_transform
|
25
20
|
|
26
|
-
fg = FeatureGenerator(feature_mode=feature_mode
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
transform_frames = samples // fg.ftransform_R
|
42
|
-
feature_frames = samples // feature_step_samples
|
43
|
-
|
21
|
+
fg = FeatureGenerator(feature_mode=feature_mode)
|
22
|
+
|
23
|
+
audio_f = forward_transform(
|
24
|
+
audio=audio,
|
25
|
+
config=TransformConfig(
|
26
|
+
length=fg.ftransform_length,
|
27
|
+
overlap=fg.ftransform_overlap,
|
28
|
+
bin_start=fg.bin_start,
|
29
|
+
bin_end=fg.bin_end,
|
30
|
+
ttype=fg.ftransform_ttype,
|
31
|
+
),
|
32
|
+
)
|
33
|
+
|
34
|
+
transform_frames = audio_f.shape[0]
|
35
|
+
feature_frames = transform_frames // (fg.decimation * fg.step)
|
44
36
|
feature = np.empty((feature_frames, fg.stride, fg.feature_parameters), dtype=np.float32)
|
45
37
|
|
46
38
|
feature_frame = 0
|
@@ -54,37 +46,49 @@ def get_feature_from_audio(audio: AudioT,
|
|
54
46
|
return feature
|
55
47
|
|
56
48
|
|
57
|
-
def get_audio_from_feature(
|
58
|
-
|
59
|
-
|
60
|
-
|
49
|
+
def get_audio_from_feature(
|
50
|
+
feature: Feature,
|
51
|
+
feature_mode: str,
|
52
|
+
num_classes: int | None = 1,
|
53
|
+
truth_mutex: bool | None = False,
|
54
|
+
) -> AudioT:
|
61
55
|
"""Apply inverse transform to feature data to generate audio data
|
62
56
|
|
63
|
-
:param feature: Feature data [frames,
|
57
|
+
:param feature: Feature data [frames, stride=1, feature_parameters]
|
64
58
|
:param feature_mode: Feature mode
|
65
59
|
:param num_classes: Number of classes
|
66
60
|
:param truth_mutex: Whether to calculate 'other' label
|
67
61
|
:return: Audio data [samples]
|
68
62
|
"""
|
69
63
|
import numpy as np
|
70
|
-
|
71
64
|
from pyaaware import FeatureGenerator
|
72
65
|
|
66
|
+
from sonusai.utils.compress import power_uncompress
|
67
|
+
from sonusai.utils.stacked_complex import unstack_complex
|
68
|
+
|
73
69
|
from .datatypes import TransformConfig
|
74
70
|
from .helpers import inverse_transform
|
75
|
-
from sonusai.utils.stacked_complex import unstack_complex
|
76
|
-
from sonusai.utils.compress import power_uncompress
|
77
71
|
|
78
|
-
|
79
|
-
|
80
|
-
|
72
|
+
if feature.ndim != 3:
|
73
|
+
raise ValueError("feature must have 3 dimensions: [frames, stride=1, feature_parameters]")
|
74
|
+
|
75
|
+
if feature.shape[1] != 1:
|
76
|
+
raise ValueError("Strided feature data is not supported for audio extraction; stride must be 1.")
|
77
|
+
|
78
|
+
fg = FeatureGenerator(feature_mode=feature_mode, num_classes=num_classes, truth_mutex=truth_mutex)
|
81
79
|
|
82
|
-
feature_complex = unstack_complex(feature)
|
83
|
-
if feature_mode[0:1] ==
|
80
|
+
feature_complex = unstack_complex(feature.squeeze())
|
81
|
+
if feature_mode[0:1] == "h":
|
84
82
|
feature_complex = power_uncompress(feature_complex)
|
85
|
-
return np.squeeze(
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
83
|
+
return np.squeeze(
|
84
|
+
inverse_transform(
|
85
|
+
transform=feature_complex,
|
86
|
+
config=TransformConfig(
|
87
|
+
length=fg.itransform_length,
|
88
|
+
overlap=fg.itransform_overlap,
|
89
|
+
bin_start=fg.bin_start,
|
90
|
+
bin_end=fg.bin_end,
|
91
|
+
ttype=fg.itransform_ttype,
|
92
|
+
),
|
93
|
+
)
|
94
|
+
)
|