sonusai 0.18.9__py3-none-any.whl → 0.19.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sonusai/__init__.py +20 -29
- sonusai/aawscd_probwrite.py +18 -18
- sonusai/audiofe.py +93 -80
- sonusai/calc_metric_spenh.py +395 -321
- sonusai/data/genmixdb.yml +5 -11
- sonusai/{gentcst.py → deprecated/gentcst.py} +146 -149
- sonusai/{plot.py → deprecated/plot.py} +177 -131
- sonusai/{tplot.py → deprecated/tplot.py} +124 -102
- sonusai/doc/__init__.py +1 -1
- sonusai/doc/doc.py +112 -177
- sonusai/doc.py +10 -10
- sonusai/genft.py +81 -91
- sonusai/genmetrics.py +51 -61
- sonusai/genmix.py +105 -115
- sonusai/genmixdb.py +201 -174
- sonusai/lsdb.py +56 -66
- sonusai/main.py +23 -20
- sonusai/metrics/__init__.py +2 -0
- sonusai/metrics/calc_audio_stats.py +29 -24
- sonusai/metrics/calc_class_weights.py +7 -7
- sonusai/metrics/calc_optimal_thresholds.py +5 -7
- sonusai/metrics/calc_pcm.py +3 -3
- sonusai/metrics/calc_pesq.py +10 -7
- sonusai/metrics/calc_phase_distance.py +3 -3
- sonusai/metrics/calc_sa_sdr.py +10 -8
- sonusai/metrics/calc_segsnr_f.py +16 -18
- sonusai/metrics/calc_speech.py +105 -47
- sonusai/metrics/calc_wer.py +35 -32
- sonusai/metrics/calc_wsdr.py +10 -7
- sonusai/metrics/class_summary.py +30 -27
- sonusai/metrics/confusion_matrix_summary.py +25 -22
- sonusai/metrics/one_hot.py +91 -57
- sonusai/metrics/snr_summary.py +53 -46
- sonusai/mixture/__init__.py +20 -14
- sonusai/mixture/audio.py +4 -6
- sonusai/mixture/augmentation.py +37 -43
- sonusai/mixture/class_count.py +5 -14
- sonusai/mixture/config.py +292 -225
- sonusai/mixture/constants.py +41 -30
- sonusai/mixture/data_io.py +155 -0
- sonusai/mixture/datatypes.py +111 -108
- sonusai/mixture/db_datatypes.py +54 -70
- sonusai/mixture/eq_rule_is_valid.py +6 -9
- sonusai/mixture/feature.py +40 -38
- sonusai/mixture/generation.py +522 -389
- sonusai/mixture/helpers.py +217 -272
- sonusai/mixture/log_duration_and_sizes.py +16 -13
- sonusai/mixture/mixdb.py +669 -477
- sonusai/mixture/soundfile_audio.py +12 -17
- sonusai/mixture/sox_audio.py +91 -112
- sonusai/mixture/sox_augmentation.py +8 -9
- sonusai/mixture/spectral_mask.py +4 -6
- sonusai/mixture/target_class_balancing.py +41 -36
- sonusai/mixture/targets.py +69 -67
- sonusai/mixture/tokenized_shell_vars.py +23 -23
- sonusai/mixture/torchaudio_audio.py +14 -15
- sonusai/mixture/torchaudio_augmentation.py +23 -27
- sonusai/mixture/truth.py +48 -26
- sonusai/mixture/truth_functions/__init__.py +26 -0
- sonusai/mixture/truth_functions/crm.py +56 -38
- sonusai/mixture/truth_functions/datatypes.py +37 -0
- sonusai/mixture/truth_functions/energy.py +85 -59
- sonusai/mixture/truth_functions/file.py +30 -30
- sonusai/mixture/truth_functions/phoneme.py +14 -7
- sonusai/mixture/truth_functions/sed.py +71 -45
- sonusai/mixture/truth_functions/target.py +69 -106
- sonusai/mkwav.py +58 -101
- sonusai/onnx_predict.py +46 -43
- sonusai/queries/__init__.py +3 -1
- sonusai/queries/queries.py +100 -59
- sonusai/speech/__init__.py +2 -0
- sonusai/speech/l2arctic.py +24 -23
- sonusai/speech/librispeech.py +16 -17
- sonusai/speech/mcgill.py +22 -21
- sonusai/speech/textgrid.py +32 -25
- sonusai/speech/timit.py +45 -42
- sonusai/speech/vctk.py +14 -13
- sonusai/speech/voxceleb.py +26 -20
- sonusai/summarize_metric_spenh.py +11 -10
- sonusai/utils/__init__.py +4 -3
- sonusai/utils/asl_p56.py +1 -1
- sonusai/utils/asr.py +37 -17
- sonusai/utils/asr_functions/__init__.py +2 -0
- sonusai/utils/asr_functions/aaware_whisper.py +18 -12
- sonusai/utils/audio_devices.py +12 -12
- sonusai/utils/braced_glob.py +6 -8
- sonusai/utils/calculate_input_shape.py +1 -4
- sonusai/utils/compress.py +2 -2
- sonusai/utils/convert_string_to_number.py +1 -3
- sonusai/utils/create_timestamp.py +1 -1
- sonusai/utils/create_ts_name.py +2 -2
- sonusai/utils/dataclass_from_dict.py +1 -1
- sonusai/utils/docstring.py +6 -6
- sonusai/utils/energy_f.py +9 -7
- sonusai/utils/engineering_number.py +56 -54
- sonusai/utils/get_label_names.py +8 -10
- sonusai/utils/human_readable_size.py +2 -2
- sonusai/utils/model_utils.py +3 -5
- sonusai/utils/numeric_conversion.py +2 -4
- sonusai/utils/onnx_utils.py +43 -32
- sonusai/utils/parallel.py +41 -30
- sonusai/utils/print_mixture_details.py +25 -22
- sonusai/utils/ranges.py +12 -12
- sonusai/utils/read_predict_data.py +11 -9
- sonusai/utils/reshape.py +19 -26
- sonusai/utils/seconds_to_hms.py +1 -1
- sonusai/utils/stacked_complex.py +8 -16
- sonusai/utils/stratified_shuffle_split.py +29 -27
- sonusai/utils/write_audio.py +2 -2
- sonusai/utils/yes_or_no.py +3 -3
- sonusai/vars.py +14 -14
- {sonusai-0.18.9.dist-info → sonusai-0.19.6.dist-info}/METADATA +20 -21
- sonusai-0.19.6.dist-info/RECORD +125 -0
- {sonusai-0.18.9.dist-info → sonusai-0.19.6.dist-info}/WHEEL +1 -1
- sonusai/mixture/truth_functions/data.py +0 -58
- sonusai/utils/read_mixture_data.py +0 -14
- sonusai-0.18.9.dist-info/RECORD +0 -125
- {sonusai-0.18.9.dist-info → sonusai-0.19.6.dist-info}/entry_points.txt +0 -0
sonusai/mixture/constants.py
CHANGED
@@ -1,49 +1,60 @@
|
|
1
1
|
import re
|
2
|
-
|
3
2
|
from importlib.resources import as_file
|
4
3
|
from importlib.resources import files
|
5
4
|
|
6
5
|
REQUIRED_CONFIGS = [
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
'truth_settings',
|
6
|
+
"asr_configs",
|
7
|
+
"class_balancing",
|
8
|
+
"class_balancing_augmentation",
|
9
|
+
"class_indices",
|
10
|
+
"class_labels",
|
11
|
+
"class_weights_threshold",
|
12
|
+
"feature",
|
13
|
+
"impulse_responses",
|
14
|
+
"noise_augmentations",
|
15
|
+
"noise_mix_mode",
|
16
|
+
"noises",
|
17
|
+
"num_classes",
|
18
|
+
"random_snrs",
|
19
|
+
"seed",
|
20
|
+
"snrs",
|
21
|
+
"spectral_masks",
|
22
|
+
"target_augmentations",
|
23
|
+
"target_level_type",
|
24
|
+
"targets",
|
25
|
+
"truth_configs",
|
28
26
|
]
|
29
27
|
OPTIONAL_CONFIGS: list[str] = []
|
30
28
|
VALID_CONFIGS = REQUIRED_CONFIGS + OPTIONAL_CONFIGS
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
29
|
+
REQUIRED_TRUTH_CONFIGS = ["function", "stride_reduction"]
|
30
|
+
REQUIRED_ASR_CONFIGS = ["engine"]
|
31
|
+
VALID_AUGMENTATIONS = [
|
32
|
+
"normalize",
|
33
|
+
"gain",
|
34
|
+
"pitch",
|
35
|
+
"tempo",
|
36
|
+
"eq1",
|
37
|
+
"eq2",
|
38
|
+
"eq3",
|
39
|
+
"lpf",
|
40
|
+
"ir",
|
41
|
+
"mixup",
|
42
|
+
]
|
43
|
+
VALID_NOISE_MIX_MODES = ["exhaustive", "non-exhaustive", "non-combinatorial"]
|
44
|
+
RAND_PATTERN = re.compile(r"rand\(([-+]?(\d+(\.\d*)?|\.\d+)),\s*([-+]?(\d+(\.\d*)?|\.\d+))\)")
|
35
45
|
SAMPLE_RATE = 16000
|
36
46
|
BIT_DEPTH = 32
|
37
|
-
ENCODING =
|
47
|
+
ENCODING = "floating-point"
|
38
48
|
CHANNEL_COUNT = 1
|
39
49
|
SAMPLE_BYTES = BIT_DEPTH // 8
|
40
50
|
FLOAT_BYTES = 4
|
51
|
+
MIXDB_VERSION = 2
|
41
52
|
|
42
|
-
with as_file(files(
|
53
|
+
with as_file(files("sonusai.data").joinpath("whitenoise.wav")) as path:
|
43
54
|
DEFAULT_NOISE = str(path)
|
44
55
|
|
45
|
-
with as_file(files(
|
56
|
+
with as_file(files("sonusai.data").joinpath("genmixdb.yml")) as path:
|
46
57
|
DEFAULT_CONFIG = str(path)
|
47
58
|
|
48
|
-
with as_file(files(
|
59
|
+
with as_file(files("sonusai.data").joinpath("speech_ma01_01.wav")) as path:
|
49
60
|
DEFAULT_SPEECH = str(path)
|
@@ -0,0 +1,155 @@
|
|
1
|
+
from typing import Any
|
2
|
+
|
3
|
+
|
4
|
+
def _get_hdf5_name(location: str, index: str) -> str:
|
5
|
+
from os.path import join
|
6
|
+
|
7
|
+
return join(location, index + ".h5")
|
8
|
+
|
9
|
+
|
10
|
+
def _get_pickle_name(location: str, index: str, item: str) -> str:
|
11
|
+
from os.path import join
|
12
|
+
|
13
|
+
return join(location, index, item + ".pkl")
|
14
|
+
|
15
|
+
|
16
|
+
def read_hdf5_data(location: str, index: str, items: list[str] | str) -> Any:
|
17
|
+
"""Read mixture, target, or noise data from an HDF5 file
|
18
|
+
|
19
|
+
:param location: Location of the file
|
20
|
+
:param index: Mixture, target, or noise index
|
21
|
+
:param items: String(s) of data to retrieve
|
22
|
+
:return: Data (or tuple of data)
|
23
|
+
"""
|
24
|
+
from os.path import exists
|
25
|
+
from typing import Any
|
26
|
+
|
27
|
+
import h5py
|
28
|
+
import numpy as np
|
29
|
+
|
30
|
+
def _get_dataset(file: h5py.File, d_name: str) -> Any:
|
31
|
+
if d_name in file:
|
32
|
+
data = np.array(file[d_name])
|
33
|
+
if data.size == 1:
|
34
|
+
item = data.item()
|
35
|
+
if isinstance(item, bytes):
|
36
|
+
return item.decode("utf-8")
|
37
|
+
return item
|
38
|
+
return data
|
39
|
+
return None
|
40
|
+
|
41
|
+
if not isinstance(items, list):
|
42
|
+
items = [items]
|
43
|
+
|
44
|
+
h5_name = _get_hdf5_name(location, index)
|
45
|
+
if exists(h5_name):
|
46
|
+
try:
|
47
|
+
with h5py.File(h5_name, "r") as f:
|
48
|
+
result = [_get_dataset(f, item) for item in items]
|
49
|
+
except Exception as e:
|
50
|
+
raise OSError(f"Error reading {h5_name}: {e}") from e
|
51
|
+
else:
|
52
|
+
result = [None for _ in items]
|
53
|
+
|
54
|
+
if len(items) == 1:
|
55
|
+
result = result[0]
|
56
|
+
|
57
|
+
return result
|
58
|
+
|
59
|
+
|
60
|
+
def write_hdf5_data(location: str, index: str, items: list[tuple[str, Any]] | tuple[str, Any]) -> None:
|
61
|
+
"""Write mixture, target, or noise data to an HDF5 file
|
62
|
+
|
63
|
+
:param location: Location of the file
|
64
|
+
:param index: Mixture, target, or noise index
|
65
|
+
:param items: Tuple(s) of (name, data)
|
66
|
+
"""
|
67
|
+
import h5py
|
68
|
+
|
69
|
+
if not isinstance(items, list):
|
70
|
+
items = [items]
|
71
|
+
|
72
|
+
h5_name = _get_hdf5_name(location, index)
|
73
|
+
with h5py.File(h5_name, "a") as f:
|
74
|
+
for item in items:
|
75
|
+
if item[0] in f:
|
76
|
+
del f[item[0]]
|
77
|
+
f.create_dataset(name=item[0], data=item[1])
|
78
|
+
|
79
|
+
|
80
|
+
def read_pickle_data(location: str, index: str, items: list[str] | str) -> Any:
|
81
|
+
"""Read mixture, target, or noise data from a pickle file
|
82
|
+
|
83
|
+
:param location: Location of the file
|
84
|
+
:param index: Mixture, target, or noise index
|
85
|
+
:param items: String(s) of data to retrieve
|
86
|
+
:return: Data (or tuple of data)
|
87
|
+
"""
|
88
|
+
import pickle
|
89
|
+
from os.path import exists
|
90
|
+
from typing import Any
|
91
|
+
|
92
|
+
if not isinstance(items, list):
|
93
|
+
items = [items]
|
94
|
+
|
95
|
+
result: list[Any] = []
|
96
|
+
for item in items:
|
97
|
+
pkl_name = _get_pickle_name(location, index, item)
|
98
|
+
if exists(pkl_name):
|
99
|
+
with open(pkl_name, "rb") as f:
|
100
|
+
result.append(pickle.load(f)) # noqa: S301
|
101
|
+
else:
|
102
|
+
result.append(None)
|
103
|
+
|
104
|
+
if len(items) == 1:
|
105
|
+
result = result[0]
|
106
|
+
|
107
|
+
return result
|
108
|
+
|
109
|
+
|
110
|
+
def write_pickle_data(location: str, index: str, items: list[tuple[str, Any]] | tuple[str, Any]) -> None:
|
111
|
+
"""Write mixture, target, or noise data to a pickle file
|
112
|
+
|
113
|
+
:param location: Location of the file
|
114
|
+
:param index: Mixture, target, or noise index
|
115
|
+
:param items: Tuple(s) of (name, data)
|
116
|
+
"""
|
117
|
+
import pickle
|
118
|
+
from os import makedirs
|
119
|
+
from os.path import join
|
120
|
+
|
121
|
+
if not isinstance(items, list):
|
122
|
+
items = [items]
|
123
|
+
|
124
|
+
makedirs(join(location, index), exist_ok=True)
|
125
|
+
for item in items:
|
126
|
+
pkl_name = _get_pickle_name(location, index, item[0])
|
127
|
+
with open(pkl_name, "wb") as f:
|
128
|
+
f.write(pickle.dumps(item[1]))
|
129
|
+
|
130
|
+
|
131
|
+
def read_cached_data(location: str, name: str, index: str, items: list[str] | str) -> Any:
|
132
|
+
"""Read cached data from a file
|
133
|
+
|
134
|
+
:param location: Location of the mixture database
|
135
|
+
:param name: Data name ('mixture', 'target', or 'noise')
|
136
|
+
:param index: Data index (mixture, target, or noise ID)
|
137
|
+
:param items: String(s) of data to retrieve
|
138
|
+
:return: Data (or tuple of data)
|
139
|
+
"""
|
140
|
+
from os.path import join
|
141
|
+
|
142
|
+
return read_pickle_data(join(location, name), index, items)
|
143
|
+
|
144
|
+
|
145
|
+
def write_cached_data(location: str, name: str, index: str, items: list[tuple[str, Any]] | tuple[str, Any]) -> None:
|
146
|
+
"""Write mixture data to a file
|
147
|
+
|
148
|
+
:param location: Location of the mixture database
|
149
|
+
:param name: Data name ('mixture', 'target', or 'noise')
|
150
|
+
:param index: Data index (mixture, target, or noise ID)
|
151
|
+
:param items: Tuple(s) of (name, data)
|
152
|
+
"""
|
153
|
+
from os.path import join
|
154
|
+
|
155
|
+
write_pickle_data(join(location, name), index, items)
|
sonusai/mixture/datatypes.py
CHANGED
@@ -1,8 +1,8 @@
|
|
1
|
+
from collections.abc import Iterable
|
1
2
|
from dataclasses import dataclass
|
3
|
+
from dataclasses import field
|
2
4
|
from typing import Any
|
3
|
-
from typing import Iterable
|
4
5
|
from typing import NamedTuple
|
5
|
-
from typing import Optional
|
6
6
|
from typing import SupportsIndex
|
7
7
|
from typing import TypeAlias
|
8
8
|
|
@@ -17,6 +17,7 @@ AudiosT: TypeAlias = list[AudioT]
|
|
17
17
|
ListAudiosT: TypeAlias = list[AudiosT]
|
18
18
|
|
19
19
|
Truth: TypeAlias = npt.NDArray[np.float32]
|
20
|
+
TruthDict: TypeAlias = dict[str, Truth]
|
20
21
|
Segsnr: TypeAlias = npt.NDArray[np.float32]
|
21
22
|
|
22
23
|
AudioF: TypeAlias = npt.NDArray[np.complex64]
|
@@ -35,7 +36,7 @@ Json: TypeAlias = dict | list | str | int | float | bool | None
|
|
35
36
|
|
36
37
|
class DataClassSonusAIMixin(DataClassJsonMixin):
|
37
38
|
def __str__(self):
|
38
|
-
return f
|
39
|
+
return f"{self.to_dict()}"
|
39
40
|
|
40
41
|
# Override DataClassJsonMixin to remove dictionary keys with values of None
|
41
42
|
def to_dict(self, encode_json=False) -> dict[str, Json]:
|
@@ -58,22 +59,22 @@ class DataClassSonusAIMixin(DataClassJsonMixin):
|
|
58
59
|
|
59
60
|
|
60
61
|
@dataclass(frozen=True)
|
61
|
-
class
|
62
|
-
|
63
|
-
|
64
|
-
|
62
|
+
class TruthConfig(DataClassSonusAIMixin):
|
63
|
+
function: str
|
64
|
+
stride_reduction: str
|
65
|
+
config: dict = field(default_factory=dict)
|
65
66
|
|
66
67
|
def __hash__(self):
|
67
68
|
return hash(self.to_json())
|
68
69
|
|
69
70
|
def __eq__(self, other):
|
70
|
-
return isinstance(other,
|
71
|
+
return isinstance(other, TruthConfig) and hash(self) == hash(other)
|
71
72
|
|
72
73
|
|
73
|
-
|
74
|
+
TruthConfigs: TypeAlias = dict[str, TruthConfig]
|
74
75
|
NumberStr: TypeAlias = float | int | str
|
75
|
-
OptionalNumberStr: TypeAlias =
|
76
|
-
OptionalListNumberStr: TypeAlias =
|
76
|
+
OptionalNumberStr: TypeAlias = NumberStr | None
|
77
|
+
OptionalListNumberStr: TypeAlias = list[NumberStr] | None
|
77
78
|
EQ: TypeAlias = tuple[float | int, float | int, float | int]
|
78
79
|
|
79
80
|
|
@@ -88,7 +89,7 @@ class AugmentationRule(DataClassSonusAIMixin):
|
|
88
89
|
eq3: OptionalListNumberStr = None
|
89
90
|
lpf: OptionalNumberStr = None
|
90
91
|
ir: OptionalNumberStr = None
|
91
|
-
mixup:
|
92
|
+
mixup: int = 1
|
92
93
|
|
93
94
|
|
94
95
|
AugmentationRules: TypeAlias = list[AugmentationRule]
|
@@ -96,15 +97,15 @@ AugmentationRules: TypeAlias = list[AugmentationRule]
|
|
96
97
|
|
97
98
|
@dataclass
|
98
99
|
class Augmentation(DataClassSonusAIMixin):
|
99
|
-
normalize:
|
100
|
-
pitch:
|
101
|
-
tempo:
|
102
|
-
gain:
|
103
|
-
eq1:
|
104
|
-
eq2:
|
105
|
-
eq3:
|
106
|
-
lpf:
|
107
|
-
ir:
|
100
|
+
normalize: float | None = None
|
101
|
+
pitch: float | None = None
|
102
|
+
tempo: float | None = None
|
103
|
+
gain: float | None = None
|
104
|
+
eq1: EQ | None = None
|
105
|
+
eq2: EQ | None = None
|
106
|
+
eq3: EQ | None = None
|
107
|
+
lpf: float | None = None
|
108
|
+
ir: int | None = None
|
108
109
|
|
109
110
|
|
110
111
|
Augmentations: TypeAlias = list[Augmentation]
|
@@ -145,10 +146,11 @@ Speaker: TypeAlias = dict[str, str]
|
|
145
146
|
class TargetFile(DataClassSonusAIMixin):
|
146
147
|
name: str
|
147
148
|
samples: int
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
149
|
+
class_indices: list[int]
|
150
|
+
truth_configs: TruthConfigs
|
151
|
+
class_balancing_augmentation: AugmentationRule | None = None
|
152
|
+
level_type: str | None = None
|
153
|
+
speaker_id: int | None = None
|
152
154
|
|
153
155
|
@property
|
154
156
|
def duration(self) -> float:
|
@@ -187,32 +189,21 @@ ClassCount: TypeAlias = list[int]
|
|
187
189
|
GeneralizedIDs: TypeAlias = str | int | list[int] | range
|
188
190
|
|
189
191
|
|
190
|
-
@dataclass(frozen=True)
|
191
|
-
class TruthFunctionConfig(DataClassSonusAIMixin):
|
192
|
-
feature: str
|
193
|
-
mutex: bool
|
194
|
-
num_classes: int
|
195
|
-
target_gain: float
|
196
|
-
config: Optional[dict] = None
|
197
|
-
function: Optional[str] = None
|
198
|
-
index: Optional[list[int]] = None
|
199
|
-
|
200
|
-
|
201
192
|
@dataclass
|
202
193
|
class GenMixData:
|
203
|
-
targets:
|
204
|
-
target:
|
205
|
-
noise:
|
206
|
-
mixture:
|
207
|
-
truth_t:
|
208
|
-
segsnr_t:
|
194
|
+
targets: AudiosT | None = None
|
195
|
+
target: AudioT | None = None
|
196
|
+
noise: AudioT | None = None
|
197
|
+
mixture: AudioT | None = None
|
198
|
+
truth_t: TruthDict | None = None
|
199
|
+
segsnr_t: Segsnr | None = None
|
209
200
|
|
210
201
|
|
211
202
|
@dataclass
|
212
203
|
class GenFTData:
|
213
|
-
feature:
|
214
|
-
truth_f:
|
215
|
-
segsnr:
|
204
|
+
feature: Feature | None = None
|
205
|
+
truth_f: TruthDict | None = None
|
206
|
+
segsnr: Segsnr | None = None
|
216
207
|
|
217
208
|
|
218
209
|
@dataclass
|
@@ -226,7 +217,13 @@ class ImpulseResponseData:
|
|
226
217
|
return len(self.data)
|
227
218
|
|
228
219
|
|
229
|
-
|
220
|
+
@dataclass
|
221
|
+
class ImpulseResponseFile:
|
222
|
+
file: str
|
223
|
+
tags: list[str]
|
224
|
+
|
225
|
+
|
226
|
+
ImpulseResponseFiles: TypeAlias = list[ImpulseResponseFile]
|
230
227
|
|
231
228
|
|
232
229
|
@dataclass(frozen=True)
|
@@ -241,11 +238,20 @@ class SpectralMask(DataClassSonusAIMixin):
|
|
241
238
|
SpectralMasks: TypeAlias = list[SpectralMask]
|
242
239
|
|
243
240
|
|
241
|
+
@dataclass(frozen=True)
|
242
|
+
class TruthParameter(DataClassSonusAIMixin):
|
243
|
+
name: str
|
244
|
+
parameters: int
|
245
|
+
|
246
|
+
|
247
|
+
TruthParameters: TypeAlias = list[TruthParameter]
|
248
|
+
|
249
|
+
|
244
250
|
@dataclass
|
245
251
|
class Target(DataClassSonusAIMixin):
|
246
|
-
file_id:
|
247
|
-
augmentation:
|
248
|
-
gain:
|
252
|
+
file_id: int
|
253
|
+
augmentation: Augmentation
|
254
|
+
gain: float = 1.0
|
249
255
|
|
250
256
|
|
251
257
|
Targets: TypeAlias = list[Target]
|
@@ -253,22 +259,22 @@ Targets: TypeAlias = list[Target]
|
|
253
259
|
|
254
260
|
@dataclass
|
255
261
|
class Noise(DataClassSonusAIMixin):
|
256
|
-
file_id:
|
257
|
-
augmentation:
|
258
|
-
offset:
|
262
|
+
file_id: int
|
263
|
+
augmentation: Augmentation
|
264
|
+
offset: int = 0
|
259
265
|
|
260
266
|
|
261
267
|
@dataclass
|
262
268
|
class Mixture(DataClassSonusAIMixin):
|
263
|
-
name:
|
264
|
-
|
265
|
-
|
266
|
-
samples:
|
267
|
-
snr:
|
268
|
-
spectral_mask_id:
|
269
|
-
spectral_mask_seed:
|
270
|
-
target_snr_gain:
|
271
|
-
|
269
|
+
name: str
|
270
|
+
targets: Targets
|
271
|
+
noise: Noise
|
272
|
+
samples: int
|
273
|
+
snr: UniversalSNR
|
274
|
+
spectral_mask_id: int
|
275
|
+
spectral_mask_seed: int
|
276
|
+
target_snr_gain: float = 1.0
|
277
|
+
noise_snr_gain: float = 1.0
|
272
278
|
|
273
279
|
@property
|
274
280
|
def noise_id(self) -> int:
|
@@ -288,8 +294,8 @@ Mixtures: TypeAlias = list[Mixture]
|
|
288
294
|
|
289
295
|
@dataclass(frozen=True)
|
290
296
|
class TransformConfig:
|
291
|
-
|
292
|
-
|
297
|
+
length: int
|
298
|
+
overlap: int
|
293
299
|
bin_start: int
|
294
300
|
bin_end: int
|
295
301
|
ttype: str
|
@@ -298,8 +304,7 @@ class TransformConfig:
|
|
298
304
|
@dataclass(frozen=True)
|
299
305
|
class FeatureGeneratorConfig:
|
300
306
|
feature_mode: str
|
301
|
-
|
302
|
-
truth_mutex: bool
|
307
|
+
truth_parameters: dict[str, int]
|
303
308
|
|
304
309
|
|
305
310
|
@dataclass(frozen=True)
|
@@ -318,57 +323,55 @@ ASRConfigs: TypeAlias = dict[str, dict[str, Any]]
|
|
318
323
|
|
319
324
|
@dataclass
|
320
325
|
class MixtureDatabaseConfig(DataClassSonusAIMixin):
|
321
|
-
asr_configs:
|
322
|
-
class_balancing:
|
323
|
-
class_labels:
|
324
|
-
class_weights_threshold:
|
325
|
-
feature:
|
326
|
-
impulse_response_files:
|
327
|
-
mixtures:
|
328
|
-
noise_mix_mode:
|
329
|
-
noise_files:
|
330
|
-
num_classes:
|
331
|
-
spectral_masks:
|
332
|
-
target_files:
|
333
|
-
truth_mutex: Optional[bool] = None
|
334
|
-
truth_reduction_function: Optional[str] = None
|
326
|
+
asr_configs: ASRConfigs
|
327
|
+
class_balancing: bool
|
328
|
+
class_labels: list[str]
|
329
|
+
class_weights_threshold: list[float]
|
330
|
+
feature: str
|
331
|
+
impulse_response_files: ImpulseResponseFiles
|
332
|
+
mixtures: Mixtures
|
333
|
+
noise_mix_mode: str
|
334
|
+
noise_files: NoiseFiles
|
335
|
+
num_classes: int
|
336
|
+
spectral_masks: SpectralMasks
|
337
|
+
target_files: TargetFiles
|
335
338
|
|
336
339
|
|
337
340
|
SpeechMetadata: TypeAlias = str | list[Interval] | None
|
338
341
|
|
339
342
|
|
340
343
|
class SnrFMetrics(NamedTuple):
|
341
|
-
avg:
|
342
|
-
std:
|
343
|
-
db_avg:
|
344
|
-
db_std:
|
344
|
+
avg: float | None = None
|
345
|
+
std: float | None = None
|
346
|
+
db_avg: float | None = None
|
347
|
+
db_std: float | None = None
|
345
348
|
|
346
349
|
|
347
350
|
class SnrFBinMetrics(NamedTuple):
|
348
|
-
avg:
|
349
|
-
std:
|
350
|
-
db_avg:
|
351
|
-
db_std:
|
351
|
+
avg: np.ndarray | None = None
|
352
|
+
std: np.ndarray | None = None
|
353
|
+
db_avg: np.ndarray | None = None
|
354
|
+
db_std: np.ndarray | None = None
|
352
355
|
|
353
356
|
|
354
357
|
class SpeechMetrics(NamedTuple):
|
355
|
-
pesq:
|
356
|
-
csig:
|
357
|
-
cbak:
|
358
|
-
covl:
|
358
|
+
pesq: float | None = None
|
359
|
+
csig: float | None = None
|
360
|
+
cbak: float | None = None
|
361
|
+
covl: float | None = None
|
359
362
|
|
360
363
|
|
361
364
|
class AudioStatsMetrics(NamedTuple):
|
362
|
-
dco:
|
363
|
-
min:
|
364
|
-
max:
|
365
|
-
pkdb:
|
366
|
-
lrms:
|
367
|
-
pkr:
|
368
|
-
tr:
|
369
|
-
cr:
|
370
|
-
fl:
|
371
|
-
pkc:
|
365
|
+
dco: float | None = None
|
366
|
+
min: float | None = None
|
367
|
+
max: float | None = None
|
368
|
+
pkdb: float | None = None
|
369
|
+
lrms: float | None = None
|
370
|
+
pkr: float | None = None
|
371
|
+
tr: float | None = None
|
372
|
+
cr: float | None = None
|
373
|
+
fl: float | None = None
|
374
|
+
pkc: float | None = None
|
372
375
|
|
373
376
|
|
374
377
|
@dataclass
|
@@ -382,7 +385,7 @@ class MetricDocs(list[MetricDoc]):
|
|
382
385
|
def __init__(self, __iterable: Iterable[MetricDoc]) -> None:
|
383
386
|
super().__init__(item for item in __iterable)
|
384
387
|
|
385
|
-
def __setitem__(self, __key: SupportsIndex, __value: MetricDoc) -> None: # type: ignore
|
388
|
+
def __setitem__(self, __key: SupportsIndex, __value: MetricDoc) -> None: # type: ignore[override]
|
386
389
|
super().__setitem__(__key, __value)
|
387
390
|
|
388
391
|
def insert(self, __index: SupportsIndex, __object: MetricDoc) -> None:
|
@@ -406,16 +409,16 @@ class MetricDocs(list[MetricDoc]):
|
|
406
409
|
if item.category not in categories:
|
407
410
|
categories.append(item.category)
|
408
411
|
|
409
|
-
result =
|
412
|
+
result = ""
|
410
413
|
for category in categories:
|
411
|
-
result += f
|
412
|
-
result +=
|
414
|
+
result += f"{category}\n"
|
415
|
+
result += "-" * max_category_len + "\n"
|
413
416
|
for item in [sub for sub in self if sub.category == category]:
|
414
|
-
result += f
|
415
|
-
result +=
|
417
|
+
result += f" {item.name:<{max_name_len}}{item.description}\n"
|
418
|
+
result += "\n"
|
416
419
|
|
417
420
|
return result
|
418
421
|
|
419
422
|
@property
|
420
423
|
def names(self) -> set[str]:
|
421
|
-
return
|
424
|
+
return {item.name for item in self}
|