sonusai 0.18.9__py3-none-any.whl → 0.19.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. sonusai/__init__.py +20 -29
  2. sonusai/aawscd_probwrite.py +18 -18
  3. sonusai/audiofe.py +93 -80
  4. sonusai/calc_metric_spenh.py +395 -321
  5. sonusai/data/genmixdb.yml +5 -11
  6. sonusai/{gentcst.py → deprecated/gentcst.py} +146 -149
  7. sonusai/{plot.py → deprecated/plot.py} +177 -131
  8. sonusai/{tplot.py → deprecated/tplot.py} +124 -102
  9. sonusai/doc/__init__.py +1 -1
  10. sonusai/doc/doc.py +112 -177
  11. sonusai/doc.py +10 -10
  12. sonusai/genft.py +81 -91
  13. sonusai/genmetrics.py +51 -61
  14. sonusai/genmix.py +105 -115
  15. sonusai/genmixdb.py +201 -174
  16. sonusai/lsdb.py +56 -66
  17. sonusai/main.py +23 -20
  18. sonusai/metrics/__init__.py +2 -0
  19. sonusai/metrics/calc_audio_stats.py +29 -24
  20. sonusai/metrics/calc_class_weights.py +7 -7
  21. sonusai/metrics/calc_optimal_thresholds.py +5 -7
  22. sonusai/metrics/calc_pcm.py +3 -3
  23. sonusai/metrics/calc_pesq.py +10 -7
  24. sonusai/metrics/calc_phase_distance.py +3 -3
  25. sonusai/metrics/calc_sa_sdr.py +10 -8
  26. sonusai/metrics/calc_segsnr_f.py +16 -18
  27. sonusai/metrics/calc_speech.py +105 -47
  28. sonusai/metrics/calc_wer.py +35 -32
  29. sonusai/metrics/calc_wsdr.py +10 -7
  30. sonusai/metrics/class_summary.py +30 -27
  31. sonusai/metrics/confusion_matrix_summary.py +25 -22
  32. sonusai/metrics/one_hot.py +91 -57
  33. sonusai/metrics/snr_summary.py +53 -46
  34. sonusai/mixture/__init__.py +20 -14
  35. sonusai/mixture/audio.py +4 -6
  36. sonusai/mixture/augmentation.py +37 -43
  37. sonusai/mixture/class_count.py +5 -14
  38. sonusai/mixture/config.py +292 -225
  39. sonusai/mixture/constants.py +41 -30
  40. sonusai/mixture/data_io.py +155 -0
  41. sonusai/mixture/datatypes.py +111 -108
  42. sonusai/mixture/db_datatypes.py +54 -70
  43. sonusai/mixture/eq_rule_is_valid.py +6 -9
  44. sonusai/mixture/feature.py +40 -38
  45. sonusai/mixture/generation.py +522 -389
  46. sonusai/mixture/helpers.py +217 -272
  47. sonusai/mixture/log_duration_and_sizes.py +16 -13
  48. sonusai/mixture/mixdb.py +669 -477
  49. sonusai/mixture/soundfile_audio.py +12 -17
  50. sonusai/mixture/sox_audio.py +91 -112
  51. sonusai/mixture/sox_augmentation.py +8 -9
  52. sonusai/mixture/spectral_mask.py +4 -6
  53. sonusai/mixture/target_class_balancing.py +41 -36
  54. sonusai/mixture/targets.py +69 -67
  55. sonusai/mixture/tokenized_shell_vars.py +23 -23
  56. sonusai/mixture/torchaudio_audio.py +14 -15
  57. sonusai/mixture/torchaudio_augmentation.py +23 -27
  58. sonusai/mixture/truth.py +48 -26
  59. sonusai/mixture/truth_functions/__init__.py +26 -0
  60. sonusai/mixture/truth_functions/crm.py +56 -38
  61. sonusai/mixture/truth_functions/datatypes.py +37 -0
  62. sonusai/mixture/truth_functions/energy.py +85 -59
  63. sonusai/mixture/truth_functions/file.py +30 -30
  64. sonusai/mixture/truth_functions/phoneme.py +14 -7
  65. sonusai/mixture/truth_functions/sed.py +71 -45
  66. sonusai/mixture/truth_functions/target.py +69 -106
  67. sonusai/mkwav.py +58 -101
  68. sonusai/onnx_predict.py +46 -43
  69. sonusai/queries/__init__.py +3 -1
  70. sonusai/queries/queries.py +100 -59
  71. sonusai/speech/__init__.py +2 -0
  72. sonusai/speech/l2arctic.py +24 -23
  73. sonusai/speech/librispeech.py +16 -17
  74. sonusai/speech/mcgill.py +22 -21
  75. sonusai/speech/textgrid.py +32 -25
  76. sonusai/speech/timit.py +45 -42
  77. sonusai/speech/vctk.py +14 -13
  78. sonusai/speech/voxceleb.py +26 -20
  79. sonusai/summarize_metric_spenh.py +11 -10
  80. sonusai/utils/__init__.py +4 -3
  81. sonusai/utils/asl_p56.py +1 -1
  82. sonusai/utils/asr.py +37 -17
  83. sonusai/utils/asr_functions/__init__.py +2 -0
  84. sonusai/utils/asr_functions/aaware_whisper.py +18 -12
  85. sonusai/utils/audio_devices.py +12 -12
  86. sonusai/utils/braced_glob.py +6 -8
  87. sonusai/utils/calculate_input_shape.py +1 -4
  88. sonusai/utils/compress.py +2 -2
  89. sonusai/utils/convert_string_to_number.py +1 -3
  90. sonusai/utils/create_timestamp.py +1 -1
  91. sonusai/utils/create_ts_name.py +2 -2
  92. sonusai/utils/dataclass_from_dict.py +1 -1
  93. sonusai/utils/docstring.py +6 -6
  94. sonusai/utils/energy_f.py +9 -7
  95. sonusai/utils/engineering_number.py +56 -54
  96. sonusai/utils/get_label_names.py +8 -10
  97. sonusai/utils/human_readable_size.py +2 -2
  98. sonusai/utils/model_utils.py +3 -5
  99. sonusai/utils/numeric_conversion.py +2 -4
  100. sonusai/utils/onnx_utils.py +43 -32
  101. sonusai/utils/parallel.py +41 -30
  102. sonusai/utils/print_mixture_details.py +25 -22
  103. sonusai/utils/ranges.py +12 -12
  104. sonusai/utils/read_predict_data.py +11 -9
  105. sonusai/utils/reshape.py +19 -26
  106. sonusai/utils/seconds_to_hms.py +1 -1
  107. sonusai/utils/stacked_complex.py +8 -16
  108. sonusai/utils/stratified_shuffle_split.py +29 -27
  109. sonusai/utils/write_audio.py +2 -2
  110. sonusai/utils/yes_or_no.py +3 -3
  111. sonusai/vars.py +14 -14
  112. {sonusai-0.18.9.dist-info → sonusai-0.19.6.dist-info}/METADATA +20 -21
  113. sonusai-0.19.6.dist-info/RECORD +125 -0
  114. {sonusai-0.18.9.dist-info → sonusai-0.19.6.dist-info}/WHEEL +1 -1
  115. sonusai/mixture/truth_functions/data.py +0 -58
  116. sonusai/utils/read_mixture_data.py +0 -14
  117. sonusai-0.18.9.dist-info/RECORD +0 -125
  118. {sonusai-0.18.9.dist-info → sonusai-0.19.6.dist-info}/entry_points.txt +0 -0
@@ -1,49 +1,60 @@
1
1
  import re
2
-
3
2
  from importlib.resources import as_file
4
3
  from importlib.resources import files
5
4
 
6
5
  REQUIRED_CONFIGS = [
7
- 'asr_configs',
8
- 'class_balancing',
9
- 'class_balancing_augmentation',
10
- 'class_labels',
11
- 'class_weights_threshold',
12
- 'feature',
13
- 'impulse_responses',
14
- 'noise_augmentations',
15
- 'noise_mix_mode',
16
- 'noises',
17
- 'num_classes',
18
- 'random_snrs',
19
- 'seed',
20
- 'snrs',
21
- 'spectral_masks',
22
- 'target_augmentations',
23
- 'target_level_type',
24
- 'targets',
25
- 'truth_mode',
26
- 'truth_reduction_function',
27
- 'truth_settings',
6
+ "asr_configs",
7
+ "class_balancing",
8
+ "class_balancing_augmentation",
9
+ "class_indices",
10
+ "class_labels",
11
+ "class_weights_threshold",
12
+ "feature",
13
+ "impulse_responses",
14
+ "noise_augmentations",
15
+ "noise_mix_mode",
16
+ "noises",
17
+ "num_classes",
18
+ "random_snrs",
19
+ "seed",
20
+ "snrs",
21
+ "spectral_masks",
22
+ "target_augmentations",
23
+ "target_level_type",
24
+ "targets",
25
+ "truth_configs",
28
26
  ]
29
27
  OPTIONAL_CONFIGS: list[str] = []
30
28
  VALID_CONFIGS = REQUIRED_CONFIGS + OPTIONAL_CONFIGS
31
- VALID_TRUTH_SETTINGS = ['function', 'config', 'index']
32
- VALID_AUGMENTATIONS = ['normalize', 'gain', 'pitch', 'tempo', 'eq1', 'eq2', 'eq3', 'lpf', 'ir', 'mixup']
33
- VALID_NOISE_MIX_MODES = ['exhaustive', 'non-exhaustive', 'non-combinatorial']
34
- RAND_PATTERN = re.compile(r'rand\(([-+]?(\d+(\.\d*)?|\.\d+)),\s*([-+]?(\d+(\.\d*)?|\.\d+))\)')
29
+ REQUIRED_TRUTH_CONFIGS = ["function", "stride_reduction"]
30
+ REQUIRED_ASR_CONFIGS = ["engine"]
31
+ VALID_AUGMENTATIONS = [
32
+ "normalize",
33
+ "gain",
34
+ "pitch",
35
+ "tempo",
36
+ "eq1",
37
+ "eq2",
38
+ "eq3",
39
+ "lpf",
40
+ "ir",
41
+ "mixup",
42
+ ]
43
+ VALID_NOISE_MIX_MODES = ["exhaustive", "non-exhaustive", "non-combinatorial"]
44
+ RAND_PATTERN = re.compile(r"rand\(([-+]?(\d+(\.\d*)?|\.\d+)),\s*([-+]?(\d+(\.\d*)?|\.\d+))\)")
35
45
  SAMPLE_RATE = 16000
36
46
  BIT_DEPTH = 32
37
- ENCODING = 'floating-point'
47
+ ENCODING = "floating-point"
38
48
  CHANNEL_COUNT = 1
39
49
  SAMPLE_BYTES = BIT_DEPTH // 8
40
50
  FLOAT_BYTES = 4
51
+ MIXDB_VERSION = 2
41
52
 
42
- with as_file(files('sonusai.data').joinpath('whitenoise.wav')) as path:
53
+ with as_file(files("sonusai.data").joinpath("whitenoise.wav")) as path:
43
54
  DEFAULT_NOISE = str(path)
44
55
 
45
- with as_file(files('sonusai.data').joinpath('genmixdb.yml')) as path:
56
+ with as_file(files("sonusai.data").joinpath("genmixdb.yml")) as path:
46
57
  DEFAULT_CONFIG = str(path)
47
58
 
48
- with as_file(files('sonusai.data').joinpath('speech_ma01_01.wav')) as path:
59
+ with as_file(files("sonusai.data").joinpath("speech_ma01_01.wav")) as path:
49
60
  DEFAULT_SPEECH = str(path)
@@ -0,0 +1,155 @@
1
+ from typing import Any
2
+
3
+
4
+ def _get_hdf5_name(location: str, index: str) -> str:
5
+ from os.path import join
6
+
7
+ return join(location, index + ".h5")
8
+
9
+
10
+ def _get_pickle_name(location: str, index: str, item: str) -> str:
11
+ from os.path import join
12
+
13
+ return join(location, index, item + ".pkl")
14
+
15
+
16
+ def read_hdf5_data(location: str, index: str, items: list[str] | str) -> Any:
17
+ """Read mixture, target, or noise data from an HDF5 file
18
+
19
+ :param location: Location of the file
20
+ :param index: Mixture, target, or noise index
21
+ :param items: String(s) of data to retrieve
22
+ :return: Data (or tuple of data)
23
+ """
24
+ from os.path import exists
25
+ from typing import Any
26
+
27
+ import h5py
28
+ import numpy as np
29
+
30
+ def _get_dataset(file: h5py.File, d_name: str) -> Any:
31
+ if d_name in file:
32
+ data = np.array(file[d_name])
33
+ if data.size == 1:
34
+ item = data.item()
35
+ if isinstance(item, bytes):
36
+ return item.decode("utf-8")
37
+ return item
38
+ return data
39
+ return None
40
+
41
+ if not isinstance(items, list):
42
+ items = [items]
43
+
44
+ h5_name = _get_hdf5_name(location, index)
45
+ if exists(h5_name):
46
+ try:
47
+ with h5py.File(h5_name, "r") as f:
48
+ result = [_get_dataset(f, item) for item in items]
49
+ except Exception as e:
50
+ raise OSError(f"Error reading {h5_name}: {e}") from e
51
+ else:
52
+ result = [None for _ in items]
53
+
54
+ if len(items) == 1:
55
+ result = result[0]
56
+
57
+ return result
58
+
59
+
60
+ def write_hdf5_data(location: str, index: str, items: list[tuple[str, Any]] | tuple[str, Any]) -> None:
61
+ """Write mixture, target, or noise data to an HDF5 file
62
+
63
+ :param location: Location of the file
64
+ :param index: Mixture, target, or noise index
65
+ :param items: Tuple(s) of (name, data)
66
+ """
67
+ import h5py
68
+
69
+ if not isinstance(items, list):
70
+ items = [items]
71
+
72
+ h5_name = _get_hdf5_name(location, index)
73
+ with h5py.File(h5_name, "a") as f:
74
+ for item in items:
75
+ if item[0] in f:
76
+ del f[item[0]]
77
+ f.create_dataset(name=item[0], data=item[1])
78
+
79
+
80
+ def read_pickle_data(location: str, index: str, items: list[str] | str) -> Any:
81
+ """Read mixture, target, or noise data from a pickle file
82
+
83
+ :param location: Location of the file
84
+ :param index: Mixture, target, or noise index
85
+ :param items: String(s) of data to retrieve
86
+ :return: Data (or tuple of data)
87
+ """
88
+ import pickle
89
+ from os.path import exists
90
+ from typing import Any
91
+
92
+ if not isinstance(items, list):
93
+ items = [items]
94
+
95
+ result: list[Any] = []
96
+ for item in items:
97
+ pkl_name = _get_pickle_name(location, index, item)
98
+ if exists(pkl_name):
99
+ with open(pkl_name, "rb") as f:
100
+ result.append(pickle.load(f)) # noqa: S301
101
+ else:
102
+ result.append(None)
103
+
104
+ if len(items) == 1:
105
+ result = result[0]
106
+
107
+ return result
108
+
109
+
110
+ def write_pickle_data(location: str, index: str, items: list[tuple[str, Any]] | tuple[str, Any]) -> None:
111
+ """Write mixture, target, or noise data to a pickle file
112
+
113
+ :param location: Location of the file
114
+ :param index: Mixture, target, or noise index
115
+ :param items: Tuple(s) of (name, data)
116
+ """
117
+ import pickle
118
+ from os import makedirs
119
+ from os.path import join
120
+
121
+ if not isinstance(items, list):
122
+ items = [items]
123
+
124
+ makedirs(join(location, index), exist_ok=True)
125
+ for item in items:
126
+ pkl_name = _get_pickle_name(location, index, item[0])
127
+ with open(pkl_name, "wb") as f:
128
+ f.write(pickle.dumps(item[1]))
129
+
130
+
131
+ def read_cached_data(location: str, name: str, index: str, items: list[str] | str) -> Any:
132
+ """Read cached data from a file
133
+
134
+ :param location: Location of the mixture database
135
+ :param name: Data name ('mixture', 'target', or 'noise')
136
+ :param index: Data index (mixture, target, or noise ID)
137
+ :param items: String(s) of data to retrieve
138
+ :return: Data (or tuple of data)
139
+ """
140
+ from os.path import join
141
+
142
+ return read_pickle_data(join(location, name), index, items)
143
+
144
+
145
+ def write_cached_data(location: str, name: str, index: str, items: list[tuple[str, Any]] | tuple[str, Any]) -> None:
146
+ """Write mixture data to a file
147
+
148
+ :param location: Location of the mixture database
149
+ :param name: Data name ('mixture', 'target', or 'noise')
150
+ :param index: Data index (mixture, target, or noise ID)
151
+ :param items: Tuple(s) of (name, data)
152
+ """
153
+ from os.path import join
154
+
155
+ write_pickle_data(join(location, name), index, items)
@@ -1,8 +1,8 @@
1
+ from collections.abc import Iterable
1
2
  from dataclasses import dataclass
3
+ from dataclasses import field
2
4
  from typing import Any
3
- from typing import Iterable
4
5
  from typing import NamedTuple
5
- from typing import Optional
6
6
  from typing import SupportsIndex
7
7
  from typing import TypeAlias
8
8
 
@@ -17,6 +17,7 @@ AudiosT: TypeAlias = list[AudioT]
17
17
  ListAudiosT: TypeAlias = list[AudiosT]
18
18
 
19
19
  Truth: TypeAlias = npt.NDArray[np.float32]
20
+ TruthDict: TypeAlias = dict[str, Truth]
20
21
  Segsnr: TypeAlias = npt.NDArray[np.float32]
21
22
 
22
23
  AudioF: TypeAlias = npt.NDArray[np.complex64]
@@ -35,7 +36,7 @@ Json: TypeAlias = dict | list | str | int | float | bool | None
35
36
 
36
37
  class DataClassSonusAIMixin(DataClassJsonMixin):
37
38
  def __str__(self):
38
- return f'{self.to_dict()}'
39
+ return f"{self.to_dict()}"
39
40
 
40
41
  # Override DataClassJsonMixin to remove dictionary keys with values of None
41
42
  def to_dict(self, encode_json=False) -> dict[str, Json]:
@@ -58,22 +59,22 @@ class DataClassSonusAIMixin(DataClassJsonMixin):
58
59
 
59
60
 
60
61
  @dataclass(frozen=True)
61
- class TruthSetting(DataClassSonusAIMixin):
62
- config: Optional[dict] = None
63
- function: Optional[str] = None
64
- index: Optional[list[int]] = None
62
+ class TruthConfig(DataClassSonusAIMixin):
63
+ function: str
64
+ stride_reduction: str
65
+ config: dict = field(default_factory=dict)
65
66
 
66
67
  def __hash__(self):
67
68
  return hash(self.to_json())
68
69
 
69
70
  def __eq__(self, other):
70
- return isinstance(other, TruthSetting) and hash(self) == hash(other)
71
+ return isinstance(other, TruthConfig) and hash(self) == hash(other)
71
72
 
72
73
 
73
- TruthSettings: TypeAlias = list[TruthSetting]
74
+ TruthConfigs: TypeAlias = dict[str, TruthConfig]
74
75
  NumberStr: TypeAlias = float | int | str
75
- OptionalNumberStr: TypeAlias = Optional[NumberStr]
76
- OptionalListNumberStr: TypeAlias = Optional[list[NumberStr]]
76
+ OptionalNumberStr: TypeAlias = NumberStr | None
77
+ OptionalListNumberStr: TypeAlias = list[NumberStr] | None
77
78
  EQ: TypeAlias = tuple[float | int, float | int, float | int]
78
79
 
79
80
 
@@ -88,7 +89,7 @@ class AugmentationRule(DataClassSonusAIMixin):
88
89
  eq3: OptionalListNumberStr = None
89
90
  lpf: OptionalNumberStr = None
90
91
  ir: OptionalNumberStr = None
91
- mixup: Optional[int] = 1
92
+ mixup: int = 1
92
93
 
93
94
 
94
95
  AugmentationRules: TypeAlias = list[AugmentationRule]
@@ -96,15 +97,15 @@ AugmentationRules: TypeAlias = list[AugmentationRule]
96
97
 
97
98
  @dataclass
98
99
  class Augmentation(DataClassSonusAIMixin):
99
- normalize: Optional[float] = None
100
- pitch: Optional[float] = None
101
- tempo: Optional[float] = None
102
- gain: Optional[float] = None
103
- eq1: Optional[EQ] = None
104
- eq2: Optional[EQ] = None
105
- eq3: Optional[EQ] = None
106
- lpf: Optional[float] = None
107
- ir: Optional[int] = None
100
+ normalize: float | None = None
101
+ pitch: float | None = None
102
+ tempo: float | None = None
103
+ gain: float | None = None
104
+ eq1: EQ | None = None
105
+ eq2: EQ | None = None
106
+ eq3: EQ | None = None
107
+ lpf: float | None = None
108
+ ir: int | None = None
108
109
 
109
110
 
110
111
  Augmentations: TypeAlias = list[Augmentation]
@@ -145,10 +146,11 @@ Speaker: TypeAlias = dict[str, str]
145
146
  class TargetFile(DataClassSonusAIMixin):
146
147
  name: str
147
148
  samples: int
148
- truth_settings: TruthSettings
149
- class_balancing_augmentation: Optional[AugmentationRule] = None
150
- level_type: Optional[str] = None
151
- speaker_id: Optional[int] = None
149
+ class_indices: list[int]
150
+ truth_configs: TruthConfigs
151
+ class_balancing_augmentation: AugmentationRule | None = None
152
+ level_type: str | None = None
153
+ speaker_id: int | None = None
152
154
 
153
155
  @property
154
156
  def duration(self) -> float:
@@ -187,32 +189,21 @@ ClassCount: TypeAlias = list[int]
187
189
  GeneralizedIDs: TypeAlias = str | int | list[int] | range
188
190
 
189
191
 
190
- @dataclass(frozen=True)
191
- class TruthFunctionConfig(DataClassSonusAIMixin):
192
- feature: str
193
- mutex: bool
194
- num_classes: int
195
- target_gain: float
196
- config: Optional[dict] = None
197
- function: Optional[str] = None
198
- index: Optional[list[int]] = None
199
-
200
-
201
192
  @dataclass
202
193
  class GenMixData:
203
- targets: Optional[AudiosT] = None
204
- target: Optional[AudioT] = None
205
- noise: Optional[AudioT] = None
206
- mixture: Optional[AudioT] = None
207
- truth_t: Optional[Truth] = None
208
- segsnr_t: Optional[Segsnr] = None
194
+ targets: AudiosT | None = None
195
+ target: AudioT | None = None
196
+ noise: AudioT | None = None
197
+ mixture: AudioT | None = None
198
+ truth_t: TruthDict | None = None
199
+ segsnr_t: Segsnr | None = None
209
200
 
210
201
 
211
202
  @dataclass
212
203
  class GenFTData:
213
- feature: Optional[Feature] = None
214
- truth_f: Optional[Truth] = None
215
- segsnr: Optional[Segsnr] = None
204
+ feature: Feature | None = None
205
+ truth_f: TruthDict | None = None
206
+ segsnr: Segsnr | None = None
216
207
 
217
208
 
218
209
  @dataclass
@@ -226,7 +217,13 @@ class ImpulseResponseData:
226
217
  return len(self.data)
227
218
 
228
219
 
229
- ImpulseResponseFiles: TypeAlias = list[str]
220
+ @dataclass
221
+ class ImpulseResponseFile:
222
+ file: str
223
+ tags: list[str]
224
+
225
+
226
+ ImpulseResponseFiles: TypeAlias = list[ImpulseResponseFile]
230
227
 
231
228
 
232
229
  @dataclass(frozen=True)
@@ -241,11 +238,20 @@ class SpectralMask(DataClassSonusAIMixin):
241
238
  SpectralMasks: TypeAlias = list[SpectralMask]
242
239
 
243
240
 
241
+ @dataclass(frozen=True)
242
+ class TruthParameter(DataClassSonusAIMixin):
243
+ name: str
244
+ parameters: int
245
+
246
+
247
+ TruthParameters: TypeAlias = list[TruthParameter]
248
+
249
+
244
250
  @dataclass
245
251
  class Target(DataClassSonusAIMixin):
246
- file_id: Optional[int] = None
247
- augmentation: Optional[Augmentation] = None
248
- gain: Optional[float] = None
252
+ file_id: int
253
+ augmentation: Augmentation
254
+ gain: float = 1.0
249
255
 
250
256
 
251
257
  Targets: TypeAlias = list[Target]
@@ -253,22 +259,22 @@ Targets: TypeAlias = list[Target]
253
259
 
254
260
  @dataclass
255
261
  class Noise(DataClassSonusAIMixin):
256
- file_id: Optional[int] = None
257
- augmentation: Optional[Augmentation] = None
258
- offset: Optional[int] = None
262
+ file_id: int
263
+ augmentation: Augmentation
264
+ offset: int = 0
259
265
 
260
266
 
261
267
  @dataclass
262
268
  class Mixture(DataClassSonusAIMixin):
263
- name: Optional[str] = None
264
- noise: Optional[Noise] = None
265
- noise_snr_gain: Optional[float] = None
266
- samples: Optional[int] = None
267
- snr: Optional[UniversalSNR] = None
268
- spectral_mask_id: Optional[int] = None
269
- spectral_mask_seed: Optional[int] = None
270
- target_snr_gain: Optional[float] = None
271
- targets: Optional[Targets] = None
269
+ name: str
270
+ targets: Targets
271
+ noise: Noise
272
+ samples: int
273
+ snr: UniversalSNR
274
+ spectral_mask_id: int
275
+ spectral_mask_seed: int
276
+ target_snr_gain: float = 1.0
277
+ noise_snr_gain: float = 1.0
272
278
 
273
279
  @property
274
280
  def noise_id(self) -> int:
@@ -288,8 +294,8 @@ Mixtures: TypeAlias = list[Mixture]
288
294
 
289
295
  @dataclass(frozen=True)
290
296
  class TransformConfig:
291
- N: int
292
- R: int
297
+ length: int
298
+ overlap: int
293
299
  bin_start: int
294
300
  bin_end: int
295
301
  ttype: str
@@ -298,8 +304,7 @@ class TransformConfig:
298
304
  @dataclass(frozen=True)
299
305
  class FeatureGeneratorConfig:
300
306
  feature_mode: str
301
- num_classes: int
302
- truth_mutex: bool
307
+ truth_parameters: dict[str, int]
303
308
 
304
309
 
305
310
  @dataclass(frozen=True)
@@ -318,57 +323,55 @@ ASRConfigs: TypeAlias = dict[str, dict[str, Any]]
318
323
 
319
324
  @dataclass
320
325
  class MixtureDatabaseConfig(DataClassSonusAIMixin):
321
- asr_configs: Optional[ASRConfigs] = None
322
- class_balancing: Optional[bool] = False
323
- class_labels: Optional[list[str]] = None
324
- class_weights_threshold: Optional[list[float]] = None
325
- feature: Optional[str] = None
326
- impulse_response_files: Optional[ImpulseResponseFiles] = None
327
- mixtures: Optional[Mixtures] = None
328
- noise_mix_mode: Optional[str] = 'exhaustive'
329
- noise_files: Optional[NoiseFiles] = None
330
- num_classes: Optional[int] = None
331
- spectral_masks: Optional[SpectralMasks] = None
332
- target_files: Optional[TargetFiles] = None
333
- truth_mutex: Optional[bool] = None
334
- truth_reduction_function: Optional[str] = None
326
+ asr_configs: ASRConfigs
327
+ class_balancing: bool
328
+ class_labels: list[str]
329
+ class_weights_threshold: list[float]
330
+ feature: str
331
+ impulse_response_files: ImpulseResponseFiles
332
+ mixtures: Mixtures
333
+ noise_mix_mode: str
334
+ noise_files: NoiseFiles
335
+ num_classes: int
336
+ spectral_masks: SpectralMasks
337
+ target_files: TargetFiles
335
338
 
336
339
 
337
340
  SpeechMetadata: TypeAlias = str | list[Interval] | None
338
341
 
339
342
 
340
343
  class SnrFMetrics(NamedTuple):
341
- avg: Optional[float] = None
342
- std: Optional[float] = None
343
- db_avg: Optional[float] = None
344
- db_std: Optional[float] = None
344
+ avg: float | None = None
345
+ std: float | None = None
346
+ db_avg: float | None = None
347
+ db_std: float | None = None
345
348
 
346
349
 
347
350
  class SnrFBinMetrics(NamedTuple):
348
- avg: Optional[np.ndarray] = None
349
- std: Optional[np.ndarray] = None
350
- db_avg: Optional[np.ndarray] = None
351
- db_std: Optional[np.ndarray] = None
351
+ avg: np.ndarray | None = None
352
+ std: np.ndarray | None = None
353
+ db_avg: np.ndarray | None = None
354
+ db_std: np.ndarray | None = None
352
355
 
353
356
 
354
357
  class SpeechMetrics(NamedTuple):
355
- pesq: Optional[float] = None
356
- csig: Optional[float] = None
357
- cbak: Optional[float] = None
358
- covl: Optional[float] = None
358
+ pesq: float | None = None
359
+ csig: float | None = None
360
+ cbak: float | None = None
361
+ covl: float | None = None
359
362
 
360
363
 
361
364
  class AudioStatsMetrics(NamedTuple):
362
- dco: Optional[float] = None
363
- min: Optional[float] = None
364
- max: Optional[float] = None
365
- pkdb: Optional[float] = None
366
- lrms: Optional[float] = None
367
- pkr: Optional[float] = None
368
- tr: Optional[float] = None
369
- cr: Optional[float] = None
370
- fl: Optional[float] = None
371
- pkc: Optional[float] = None
365
+ dco: float | None = None
366
+ min: float | None = None
367
+ max: float | None = None
368
+ pkdb: float | None = None
369
+ lrms: float | None = None
370
+ pkr: float | None = None
371
+ tr: float | None = None
372
+ cr: float | None = None
373
+ fl: float | None = None
374
+ pkc: float | None = None
372
375
 
373
376
 
374
377
  @dataclass
@@ -382,7 +385,7 @@ class MetricDocs(list[MetricDoc]):
382
385
  def __init__(self, __iterable: Iterable[MetricDoc]) -> None:
383
386
  super().__init__(item for item in __iterable)
384
387
 
385
- def __setitem__(self, __key: SupportsIndex, __value: MetricDoc) -> None: # type: ignore
388
+ def __setitem__(self, __key: SupportsIndex, __value: MetricDoc) -> None: # type: ignore[override]
386
389
  super().__setitem__(__key, __value)
387
390
 
388
391
  def insert(self, __index: SupportsIndex, __object: MetricDoc) -> None:
@@ -406,16 +409,16 @@ class MetricDocs(list[MetricDoc]):
406
409
  if item.category not in categories:
407
410
  categories.append(item.category)
408
411
 
409
- result = ''
412
+ result = ""
410
413
  for category in categories:
411
- result += f'{category}\n'
412
- result += '-' * max_category_len + '\n'
414
+ result += f"{category}\n"
415
+ result += "-" * max_category_len + "\n"
413
416
  for item in [sub for sub in self if sub.category == category]:
414
- result += f' {item.name:<{max_name_len}}{item.description}\n'
415
- result += '\n'
417
+ result += f" {item.name:<{max_name_len}}{item.description}\n"
418
+ result += "\n"
416
419
 
417
420
  return result
418
421
 
419
422
  @property
420
423
  def names(self) -> set[str]:
421
- return set(item.name for item in self)
424
+ return {item.name for item in self}