sonusai 0.20.3__py3-none-any.whl → 1.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sonusai/__init__.py +16 -3
- sonusai/audiofe.py +241 -77
- sonusai/calc_metric_spenh.py +71 -73
- sonusai/config/__init__.py +3 -0
- sonusai/config/config.py +61 -0
- sonusai/config/config.yml +20 -0
- sonusai/config/constants.py +8 -0
- sonusai/constants.py +11 -0
- sonusai/data/genmixdb.yml +21 -36
- sonusai/{mixture/datatypes.py → datatypes.py} +91 -130
- sonusai/deprecated/plot.py +4 -5
- sonusai/doc/doc.py +4 -4
- sonusai/doc.py +11 -4
- sonusai/genft.py +43 -45
- sonusai/genmetrics.py +25 -19
- sonusai/genmix.py +54 -82
- sonusai/genmixdb.py +88 -264
- sonusai/ir_metric.py +30 -34
- sonusai/lsdb.py +41 -48
- sonusai/main.py +15 -22
- sonusai/metrics/calc_audio_stats.py +4 -293
- sonusai/metrics/calc_class_weights.py +4 -4
- sonusai/metrics/calc_optimal_thresholds.py +8 -5
- sonusai/metrics/calc_pesq.py +2 -2
- sonusai/metrics/calc_segsnr_f.py +4 -4
- sonusai/metrics/calc_speech.py +25 -13
- sonusai/metrics/class_summary.py +7 -7
- sonusai/metrics/confusion_matrix_summary.py +5 -5
- sonusai/metrics/one_hot.py +4 -4
- sonusai/metrics/snr_summary.py +7 -7
- sonusai/metrics_summary.py +38 -45
- sonusai/mixture/__init__.py +4 -104
- sonusai/mixture/audio.py +10 -39
- sonusai/mixture/class_balancing.py +103 -0
- sonusai/mixture/config.py +251 -271
- sonusai/mixture/constants.py +35 -39
- sonusai/mixture/data_io.py +25 -36
- sonusai/mixture/db_datatypes.py +58 -22
- sonusai/mixture/effects.py +386 -0
- sonusai/mixture/feature.py +7 -11
- sonusai/mixture/generation.py +478 -628
- sonusai/mixture/helpers.py +82 -184
- sonusai/mixture/ir_delay.py +3 -4
- sonusai/mixture/ir_effects.py +77 -0
- sonusai/mixture/log_duration_and_sizes.py +6 -12
- sonusai/mixture/mixdb.py +910 -729
- sonusai/mixture/pad_audio.py +35 -0
- sonusai/mixture/resample.py +7 -0
- sonusai/mixture/sox_effects.py +195 -0
- sonusai/mixture/sox_help.py +650 -0
- sonusai/mixture/spectral_mask.py +2 -2
- sonusai/mixture/truth.py +17 -15
- sonusai/mixture/truth_functions/crm.py +12 -12
- sonusai/mixture/truth_functions/energy.py +22 -22
- sonusai/mixture/truth_functions/file.py +5 -5
- sonusai/mixture/truth_functions/metadata.py +4 -4
- sonusai/mixture/truth_functions/metrics.py +4 -4
- sonusai/mixture/truth_functions/phoneme.py +3 -3
- sonusai/mixture/truth_functions/sed.py +11 -13
- sonusai/mixture/truth_functions/target.py +10 -10
- sonusai/mkwav.py +26 -29
- sonusai/onnx_predict.py +240 -88
- sonusai/queries/__init__.py +2 -2
- sonusai/queries/queries.py +38 -34
- sonusai/speech/librispeech.py +1 -1
- sonusai/speech/mcgill.py +1 -1
- sonusai/speech/timit.py +2 -2
- sonusai/summarize_metric_spenh.py +10 -17
- sonusai/utils/__init__.py +7 -1
- sonusai/utils/asl_p56.py +2 -2
- sonusai/utils/asr.py +2 -2
- sonusai/utils/asr_functions/aaware_whisper.py +4 -5
- sonusai/utils/choice.py +31 -0
- sonusai/utils/compress.py +1 -1
- sonusai/utils/dataclass_from_dict.py +19 -1
- sonusai/utils/energy_f.py +3 -3
- sonusai/utils/evaluate_random_rule.py +15 -0
- sonusai/utils/keyboard_interrupt.py +12 -0
- sonusai/utils/onnx_utils.py +3 -17
- sonusai/utils/print_mixture_details.py +21 -19
- sonusai/utils/{temp_seed.py → rand.py} +3 -3
- sonusai/utils/read_predict_data.py +2 -2
- sonusai/utils/reshape.py +3 -3
- sonusai/utils/stratified_shuffle_split.py +3 -3
- sonusai/{mixture → utils}/tokenized_shell_vars.py +1 -1
- sonusai/utils/write_audio.py +2 -2
- sonusai/vars.py +11 -4
- {sonusai-0.20.3.dist-info → sonusai-1.0.2.dist-info}/METADATA +4 -2
- sonusai-1.0.2.dist-info/RECORD +138 -0
- sonusai/mixture/augmentation.py +0 -444
- sonusai/mixture/class_count.py +0 -15
- sonusai/mixture/eq_rule_is_valid.py +0 -45
- sonusai/mixture/target_class_balancing.py +0 -107
- sonusai/mixture/targets.py +0 -175
- sonusai-0.20.3.dist-info/RECORD +0 -128
- {sonusai-0.20.3.dist-info → sonusai-1.0.2.dist-info}/WHEEL +0 -0
- {sonusai-0.20.3.dist-info → sonusai-1.0.2.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,35 @@
|
|
1
|
+
from ..datatypes import AudioT
|
2
|
+
|
3
|
+
|
4
|
+
def pad_audio_to_frame(audio: AudioT, frame_length: int = 1) -> AudioT:
|
5
|
+
"""Pad audio to be a multiple of frame length
|
6
|
+
|
7
|
+
:param audio: Audio
|
8
|
+
:param frame_length: Pad resulting audio to be a multiple of this
|
9
|
+
:return: Padded audio
|
10
|
+
"""
|
11
|
+
return pad_audio_to_length(audio, get_padded_length(len(audio), frame_length))
|
12
|
+
|
13
|
+
|
14
|
+
def get_padded_length(length: int, frame_length: int) -> int:
|
15
|
+
"""Get the number of pad samples needed
|
16
|
+
|
17
|
+
:param length: Length of audio
|
18
|
+
:param frame_length: Desired length will be a multiple of this
|
19
|
+
:return: Padded length
|
20
|
+
"""
|
21
|
+
mod = int(length % frame_length)
|
22
|
+
pad_length = frame_length - mod if mod else 0
|
23
|
+
return length + pad_length
|
24
|
+
|
25
|
+
|
26
|
+
def pad_audio_to_length(audio: AudioT, length: int) -> AudioT:
|
27
|
+
"""Pad audio to given length
|
28
|
+
|
29
|
+
:param audio: Audio
|
30
|
+
:param length: Length of output
|
31
|
+
:return: Padded audio
|
32
|
+
"""
|
33
|
+
import numpy as np
|
34
|
+
|
35
|
+
return np.pad(array=audio, pad_width=(0, length - len(audio)))
|
@@ -0,0 +1,195 @@
|
|
1
|
+
from functools import lru_cache
|
2
|
+
|
3
|
+
from ..datatypes import AudioT
|
4
|
+
|
5
|
+
|
6
|
+
def validate_sox_effects(effects: list[str]) -> None:
|
7
|
+
import subprocess
|
8
|
+
|
9
|
+
import numpy as np
|
10
|
+
|
11
|
+
zeros = np.zeros((1, 100), dtype=np.float32)
|
12
|
+
|
13
|
+
for effect in effects:
|
14
|
+
name = effect.split()[0]
|
15
|
+
if name not in list_sox_effects():
|
16
|
+
raise ValueError(f"Effect {name} is not supported.")
|
17
|
+
|
18
|
+
args_list = _build_sox_args(effects)
|
19
|
+
|
20
|
+
for args in args_list:
|
21
|
+
# print(f"Validating sox effects: {' '.join(args)}")
|
22
|
+
|
23
|
+
process_handle = subprocess.Popen( # noqa: S603
|
24
|
+
args,
|
25
|
+
stdin=subprocess.PIPE,
|
26
|
+
stdout=subprocess.PIPE,
|
27
|
+
stderr=subprocess.PIPE,
|
28
|
+
)
|
29
|
+
_, stderr = process_handle.communicate(zeros.T.tobytes(order="F"))
|
30
|
+
stderr = stderr.decode("utf-8")
|
31
|
+
status = process_handle.returncode
|
32
|
+
|
33
|
+
if status != 0:
|
34
|
+
raise ValueError(f"For sox effects: {' '.join(effects)}\n{stderr}")
|
35
|
+
|
36
|
+
|
37
|
+
def apply_sox_effects(audio: AudioT, effects: list[str]) -> AudioT:
|
38
|
+
"""Apply effects to audio data using sox
|
39
|
+
|
40
|
+
:param audio: Audio
|
41
|
+
:param effects: List of effects
|
42
|
+
:return: Effected audio
|
43
|
+
"""
|
44
|
+
import subprocess
|
45
|
+
|
46
|
+
import numpy as np
|
47
|
+
|
48
|
+
new_audio = audio.copy()
|
49
|
+
|
50
|
+
args_list = _build_sox_args(effects)
|
51
|
+
for args in args_list:
|
52
|
+
# print(f"Applying sox effects: {' '.join(args)}")
|
53
|
+
|
54
|
+
process_handle = subprocess.Popen( # noqa: S603
|
55
|
+
args,
|
56
|
+
stdin=subprocess.PIPE,
|
57
|
+
stdout=subprocess.PIPE,
|
58
|
+
stderr=subprocess.PIPE,
|
59
|
+
)
|
60
|
+
stdout, stderr = process_handle.communicate(new_audio.T.tobytes(order="F"))
|
61
|
+
stderr = stderr.decode("utf-8")
|
62
|
+
status = process_handle.returncode
|
63
|
+
|
64
|
+
if status != 0:
|
65
|
+
raise RuntimeError(stderr)
|
66
|
+
|
67
|
+
old_samples = len(new_audio)
|
68
|
+
new_audio = np.frombuffer(stdout, dtype=audio.dtype)
|
69
|
+
|
70
|
+
# The length sometimes changes +/-1 with the 'pitch' effect;
|
71
|
+
# force the output back to the original length
|
72
|
+
new_samples = len(new_audio)
|
73
|
+
if "pitch" in args:
|
74
|
+
if abs(new_samples - old_samples) > 1:
|
75
|
+
raise RuntimeError(
|
76
|
+
"Encountered unexpected length change during 'pitch' effect:\n"
|
77
|
+
+ f"{' '.join(args)}\n"
|
78
|
+
+ f"original length: {old_samples}, new length: {new_samples}"
|
79
|
+
)
|
80
|
+
if new_samples < old_samples:
|
81
|
+
new_audio = np.pad(array=new_audio, pad_width=(0, old_samples - new_samples))
|
82
|
+
elif new_samples > old_samples:
|
83
|
+
new_audio = new_audio[:old_samples]
|
84
|
+
|
85
|
+
return new_audio
|
86
|
+
|
87
|
+
|
88
|
+
def _build_sox_args(effects: list[str]) -> list[list[str]]:
|
89
|
+
from shlex import split
|
90
|
+
|
91
|
+
from ..constants import SAMPLE_RATE
|
92
|
+
|
93
|
+
base_args = [
|
94
|
+
"sox",
|
95
|
+
"-D", # don't dither automatically
|
96
|
+
"-V2", # set verbosity to warning
|
97
|
+
"-t", # set input file type
|
98
|
+
"f32",
|
99
|
+
"-r", # set input sample rate
|
100
|
+
SAMPLE_RATE,
|
101
|
+
"-c", # set input channels
|
102
|
+
1,
|
103
|
+
"-",
|
104
|
+
"-t", # set output file type
|
105
|
+
"raw",
|
106
|
+
"-r", # set output sample rate
|
107
|
+
SAMPLE_RATE,
|
108
|
+
"-b", # set output encoded sample size in bits
|
109
|
+
32,
|
110
|
+
"-c", # set output channels
|
111
|
+
1,
|
112
|
+
"-",
|
113
|
+
]
|
114
|
+
|
115
|
+
result: list[list[str]] = []
|
116
|
+
args: list = []
|
117
|
+
for effect in effects:
|
118
|
+
# If this is a pitch effect and there were other effects already,
|
119
|
+
# isolate those other effects and start a new chain
|
120
|
+
if effect.startswith("pitch") and args:
|
121
|
+
result.append([str(x) for x in base_args + args])
|
122
|
+
args = []
|
123
|
+
|
124
|
+
args.extend(split(effect))
|
125
|
+
|
126
|
+
# If this is a pitch effect, finish isolating it as its own chain
|
127
|
+
# This allows "fixing" the length after applying the effect
|
128
|
+
if effect.startswith("pitch"):
|
129
|
+
result.append([str(x) for x in base_args + args])
|
130
|
+
args = []
|
131
|
+
|
132
|
+
if args:
|
133
|
+
result.append([str(x) for x in base_args + args])
|
134
|
+
|
135
|
+
return result
|
136
|
+
|
137
|
+
|
138
|
+
@lru_cache
|
139
|
+
def list_sox_effects() -> list[str]:
|
140
|
+
from inspect import getmembers
|
141
|
+
from inspect import isfunction
|
142
|
+
|
143
|
+
from . import sox_help
|
144
|
+
|
145
|
+
return [member[0] for member in getmembers(sox_help, isfunction)]
|
146
|
+
|
147
|
+
|
148
|
+
def help_sox_effects(name: str) -> str:
|
149
|
+
from . import sox_help
|
150
|
+
|
151
|
+
if name not in list_sox_effects():
|
152
|
+
raise ValueError(f"Effect {name} not supported.")
|
153
|
+
|
154
|
+
return getattr(sox_help, name)()
|
155
|
+
|
156
|
+
|
157
|
+
def sox_stats(audio: AudioT, win_len: float | None = None) -> str:
|
158
|
+
import subprocess
|
159
|
+
|
160
|
+
from ..constants import SAMPLE_RATE
|
161
|
+
|
162
|
+
args = [
|
163
|
+
"sox",
|
164
|
+
"-D",
|
165
|
+
"-V2",
|
166
|
+
"-t",
|
167
|
+
"f32",
|
168
|
+
"-r",
|
169
|
+
SAMPLE_RATE,
|
170
|
+
"-c",
|
171
|
+
1,
|
172
|
+
"-",
|
173
|
+
"-n",
|
174
|
+
"stats",
|
175
|
+
]
|
176
|
+
|
177
|
+
if win_len is not None:
|
178
|
+
args.extend(["-w", win_len])
|
179
|
+
|
180
|
+
args = [str(x) for x in args]
|
181
|
+
|
182
|
+
process_handle = subprocess.Popen( # noqa: S603
|
183
|
+
args,
|
184
|
+
stdin=subprocess.PIPE,
|
185
|
+
stdout=subprocess.PIPE,
|
186
|
+
stderr=subprocess.PIPE,
|
187
|
+
)
|
188
|
+
_, stderr = process_handle.communicate(audio.T.tobytes(order="F"))
|
189
|
+
stderr = stderr.decode("utf-8")
|
190
|
+
status = process_handle.returncode
|
191
|
+
|
192
|
+
if status != 0:
|
193
|
+
raise RuntimeError(stderr)
|
194
|
+
|
195
|
+
return stderr
|