sonusai 0.20.3__py3-none-any.whl → 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. sonusai/__init__.py +16 -3
  2. sonusai/audiofe.py +241 -77
  3. sonusai/calc_metric_spenh.py +71 -73
  4. sonusai/config/__init__.py +3 -0
  5. sonusai/config/config.py +61 -0
  6. sonusai/config/config.yml +20 -0
  7. sonusai/config/constants.py +8 -0
  8. sonusai/constants.py +11 -0
  9. sonusai/data/genmixdb.yml +21 -36
  10. sonusai/{mixture/datatypes.py → datatypes.py} +91 -130
  11. sonusai/deprecated/plot.py +4 -5
  12. sonusai/doc/doc.py +4 -4
  13. sonusai/doc.py +11 -4
  14. sonusai/genft.py +43 -45
  15. sonusai/genmetrics.py +25 -19
  16. sonusai/genmix.py +54 -82
  17. sonusai/genmixdb.py +88 -264
  18. sonusai/ir_metric.py +30 -34
  19. sonusai/lsdb.py +41 -48
  20. sonusai/main.py +15 -22
  21. sonusai/metrics/calc_audio_stats.py +4 -293
  22. sonusai/metrics/calc_class_weights.py +4 -4
  23. sonusai/metrics/calc_optimal_thresholds.py +8 -5
  24. sonusai/metrics/calc_pesq.py +2 -2
  25. sonusai/metrics/calc_segsnr_f.py +4 -4
  26. sonusai/metrics/calc_speech.py +25 -13
  27. sonusai/metrics/class_summary.py +7 -7
  28. sonusai/metrics/confusion_matrix_summary.py +5 -5
  29. sonusai/metrics/one_hot.py +4 -4
  30. sonusai/metrics/snr_summary.py +7 -7
  31. sonusai/metrics_summary.py +38 -45
  32. sonusai/mixture/__init__.py +4 -104
  33. sonusai/mixture/audio.py +10 -39
  34. sonusai/mixture/class_balancing.py +103 -0
  35. sonusai/mixture/config.py +251 -271
  36. sonusai/mixture/constants.py +35 -39
  37. sonusai/mixture/data_io.py +25 -36
  38. sonusai/mixture/db_datatypes.py +58 -22
  39. sonusai/mixture/effects.py +386 -0
  40. sonusai/mixture/feature.py +7 -11
  41. sonusai/mixture/generation.py +478 -628
  42. sonusai/mixture/helpers.py +82 -184
  43. sonusai/mixture/ir_delay.py +3 -4
  44. sonusai/mixture/ir_effects.py +77 -0
  45. sonusai/mixture/log_duration_and_sizes.py +6 -12
  46. sonusai/mixture/mixdb.py +910 -729
  47. sonusai/mixture/pad_audio.py +35 -0
  48. sonusai/mixture/resample.py +7 -0
  49. sonusai/mixture/sox_effects.py +195 -0
  50. sonusai/mixture/sox_help.py +650 -0
  51. sonusai/mixture/spectral_mask.py +2 -2
  52. sonusai/mixture/truth.py +17 -15
  53. sonusai/mixture/truth_functions/crm.py +12 -12
  54. sonusai/mixture/truth_functions/energy.py +22 -22
  55. sonusai/mixture/truth_functions/file.py +5 -5
  56. sonusai/mixture/truth_functions/metadata.py +4 -4
  57. sonusai/mixture/truth_functions/metrics.py +4 -4
  58. sonusai/mixture/truth_functions/phoneme.py +3 -3
  59. sonusai/mixture/truth_functions/sed.py +11 -13
  60. sonusai/mixture/truth_functions/target.py +10 -10
  61. sonusai/mkwav.py +26 -29
  62. sonusai/onnx_predict.py +240 -88
  63. sonusai/queries/__init__.py +2 -2
  64. sonusai/queries/queries.py +38 -34
  65. sonusai/speech/librispeech.py +1 -1
  66. sonusai/speech/mcgill.py +1 -1
  67. sonusai/speech/timit.py +2 -2
  68. sonusai/summarize_metric_spenh.py +10 -17
  69. sonusai/utils/__init__.py +7 -1
  70. sonusai/utils/asl_p56.py +2 -2
  71. sonusai/utils/asr.py +2 -2
  72. sonusai/utils/asr_functions/aaware_whisper.py +4 -5
  73. sonusai/utils/choice.py +31 -0
  74. sonusai/utils/compress.py +1 -1
  75. sonusai/utils/dataclass_from_dict.py +19 -1
  76. sonusai/utils/energy_f.py +3 -3
  77. sonusai/utils/evaluate_random_rule.py +15 -0
  78. sonusai/utils/keyboard_interrupt.py +12 -0
  79. sonusai/utils/onnx_utils.py +3 -17
  80. sonusai/utils/print_mixture_details.py +21 -19
  81. sonusai/utils/{temp_seed.py → rand.py} +3 -3
  82. sonusai/utils/read_predict_data.py +2 -2
  83. sonusai/utils/reshape.py +3 -3
  84. sonusai/utils/stratified_shuffle_split.py +3 -3
  85. sonusai/{mixture → utils}/tokenized_shell_vars.py +1 -1
  86. sonusai/utils/write_audio.py +2 -2
  87. sonusai/vars.py +11 -4
  88. {sonusai-0.20.3.dist-info → sonusai-1.0.2.dist-info}/METADATA +4 -2
  89. sonusai-1.0.2.dist-info/RECORD +138 -0
  90. sonusai/mixture/augmentation.py +0 -444
  91. sonusai/mixture/class_count.py +0 -15
  92. sonusai/mixture/eq_rule_is_valid.py +0 -45
  93. sonusai/mixture/target_class_balancing.py +0 -107
  94. sonusai/mixture/targets.py +0 -175
  95. sonusai-0.20.3.dist-info/RECORD +0 -128
  96. {sonusai-0.20.3.dist-info → sonusai-1.0.2.dist-info}/WHEEL +0 -0
  97. {sonusai-0.20.3.dist-info → sonusai-1.0.2.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,35 @@
1
+ from ..datatypes import AudioT
2
+
3
+
4
+ def pad_audio_to_frame(audio: AudioT, frame_length: int = 1) -> AudioT:
5
+ """Pad audio to be a multiple of frame length
6
+
7
+ :param audio: Audio
8
+ :param frame_length: Pad resulting audio to be a multiple of this
9
+ :return: Padded audio
10
+ """
11
+ return pad_audio_to_length(audio, get_padded_length(len(audio), frame_length))
12
+
13
+
14
+ def get_padded_length(length: int, frame_length: int) -> int:
15
+ """Get the number of pad samples needed
16
+
17
+ :param length: Length of audio
18
+ :param frame_length: Desired length will be a multiple of this
19
+ :return: Padded length
20
+ """
21
+ mod = int(length % frame_length)
22
+ pad_length = frame_length - mod if mod else 0
23
+ return length + pad_length
24
+
25
+
26
+ def pad_audio_to_length(audio: AudioT, length: int) -> AudioT:
27
+ """Pad audio to given length
28
+
29
+ :param audio: Audio
30
+ :param length: Length of output
31
+ :return: Padded audio
32
+ """
33
+ import numpy as np
34
+
35
+ return np.pad(array=audio, pad_width=(0, length - len(audio)))
@@ -0,0 +1,7 @@
1
+ from ..datatypes import AudioT
2
+
3
+
4
+ def resample(audio: AudioT, orig_sr: int, target_sr: int) -> AudioT:
5
+ from librosa import resample
6
+
7
+ return resample(audio, orig_sr=orig_sr, target_sr=target_sr, res_type="soxr_hq")
@@ -0,0 +1,195 @@
1
+ from functools import lru_cache
2
+
3
+ from ..datatypes import AudioT
4
+
5
+
6
+ def validate_sox_effects(effects: list[str]) -> None:
7
+ import subprocess
8
+
9
+ import numpy as np
10
+
11
+ zeros = np.zeros((1, 100), dtype=np.float32)
12
+
13
+ for effect in effects:
14
+ name = effect.split()[0]
15
+ if name not in list_sox_effects():
16
+ raise ValueError(f"Effect {name} is not supported.")
17
+
18
+ args_list = _build_sox_args(effects)
19
+
20
+ for args in args_list:
21
+ # print(f"Validating sox effects: {' '.join(args)}")
22
+
23
+ process_handle = subprocess.Popen( # noqa: S603
24
+ args,
25
+ stdin=subprocess.PIPE,
26
+ stdout=subprocess.PIPE,
27
+ stderr=subprocess.PIPE,
28
+ )
29
+ _, stderr = process_handle.communicate(zeros.T.tobytes(order="F"))
30
+ stderr = stderr.decode("utf-8")
31
+ status = process_handle.returncode
32
+
33
+ if status != 0:
34
+ raise ValueError(f"For sox effects: {' '.join(effects)}\n{stderr}")
35
+
36
+
37
+ def apply_sox_effects(audio: AudioT, effects: list[str]) -> AudioT:
38
+ """Apply effects to audio data using sox
39
+
40
+ :param audio: Audio
41
+ :param effects: List of effects
42
+ :return: Effected audio
43
+ """
44
+ import subprocess
45
+
46
+ import numpy as np
47
+
48
+ new_audio = audio.copy()
49
+
50
+ args_list = _build_sox_args(effects)
51
+ for args in args_list:
52
+ # print(f"Applying sox effects: {' '.join(args)}")
53
+
54
+ process_handle = subprocess.Popen( # noqa: S603
55
+ args,
56
+ stdin=subprocess.PIPE,
57
+ stdout=subprocess.PIPE,
58
+ stderr=subprocess.PIPE,
59
+ )
60
+ stdout, stderr = process_handle.communicate(new_audio.T.tobytes(order="F"))
61
+ stderr = stderr.decode("utf-8")
62
+ status = process_handle.returncode
63
+
64
+ if status != 0:
65
+ raise RuntimeError(stderr)
66
+
67
+ old_samples = len(new_audio)
68
+ new_audio = np.frombuffer(stdout, dtype=audio.dtype)
69
+
70
+ # The length sometimes changes +/-1 with the 'pitch' effect;
71
+ # force the output back to the original length
72
+ new_samples = len(new_audio)
73
+ if "pitch" in args:
74
+ if abs(new_samples - old_samples) > 1:
75
+ raise RuntimeError(
76
+ "Encountered unexpected length change during 'pitch' effect:\n"
77
+ + f"{' '.join(args)}\n"
78
+ + f"original length: {old_samples}, new length: {new_samples}"
79
+ )
80
+ if new_samples < old_samples:
81
+ new_audio = np.pad(array=new_audio, pad_width=(0, old_samples - new_samples))
82
+ elif new_samples > old_samples:
83
+ new_audio = new_audio[:old_samples]
84
+
85
+ return new_audio
86
+
87
+
88
+ def _build_sox_args(effects: list[str]) -> list[list[str]]:
89
+ from shlex import split
90
+
91
+ from ..constants import SAMPLE_RATE
92
+
93
+ base_args = [
94
+ "sox",
95
+ "-D", # don't dither automatically
96
+ "-V2", # set verbosity to warning
97
+ "-t", # set input file type
98
+ "f32",
99
+ "-r", # set input sample rate
100
+ SAMPLE_RATE,
101
+ "-c", # set input channels
102
+ 1,
103
+ "-",
104
+ "-t", # set output file type
105
+ "raw",
106
+ "-r", # set output sample rate
107
+ SAMPLE_RATE,
108
+ "-b", # set output encoded sample size in bits
109
+ 32,
110
+ "-c", # set output channels
111
+ 1,
112
+ "-",
113
+ ]
114
+
115
+ result: list[list[str]] = []
116
+ args: list = []
117
+ for effect in effects:
118
+ # If this is a pitch effect and there were other effects already,
119
+ # isolate those other effects and start a new chain
120
+ if effect.startswith("pitch") and args:
121
+ result.append([str(x) for x in base_args + args])
122
+ args = []
123
+
124
+ args.extend(split(effect))
125
+
126
+ # If this is a pitch effect, finish isolating it as its own chain
127
+ # This allows "fixing" the length after applying the effect
128
+ if effect.startswith("pitch"):
129
+ result.append([str(x) for x in base_args + args])
130
+ args = []
131
+
132
+ if args:
133
+ result.append([str(x) for x in base_args + args])
134
+
135
+ return result
136
+
137
+
138
+ @lru_cache
139
+ def list_sox_effects() -> list[str]:
140
+ from inspect import getmembers
141
+ from inspect import isfunction
142
+
143
+ from . import sox_help
144
+
145
+ return [member[0] for member in getmembers(sox_help, isfunction)]
146
+
147
+
148
+ def help_sox_effects(name: str) -> str:
149
+ from . import sox_help
150
+
151
+ if name not in list_sox_effects():
152
+ raise ValueError(f"Effect {name} not supported.")
153
+
154
+ return getattr(sox_help, name)()
155
+
156
+
157
+ def sox_stats(audio: AudioT, win_len: float | None = None) -> str:
158
+ import subprocess
159
+
160
+ from ..constants import SAMPLE_RATE
161
+
162
+ args = [
163
+ "sox",
164
+ "-D",
165
+ "-V2",
166
+ "-t",
167
+ "f32",
168
+ "-r",
169
+ SAMPLE_RATE,
170
+ "-c",
171
+ 1,
172
+ "-",
173
+ "-n",
174
+ "stats",
175
+ ]
176
+
177
+ if win_len is not None:
178
+ args.extend(["-w", win_len])
179
+
180
+ args = [str(x) for x in args]
181
+
182
+ process_handle = subprocess.Popen( # noqa: S603
183
+ args,
184
+ stdin=subprocess.PIPE,
185
+ stdout=subprocess.PIPE,
186
+ stderr=subprocess.PIPE,
187
+ )
188
+ _, stderr = process_handle.communicate(audio.T.tobytes(order="F"))
189
+ stderr = stderr.decode("utf-8")
190
+ status = process_handle.returncode
191
+
192
+ if status != 0:
193
+ raise RuntimeError(stderr)
194
+
195
+ return stderr