sonusai 0.20.2__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. sonusai/__init__.py +16 -3
  2. sonusai/audiofe.py +240 -76
  3. sonusai/calc_metric_spenh.py +71 -73
  4. sonusai/config/__init__.py +3 -0
  5. sonusai/config/config.py +61 -0
  6. sonusai/config/config.yml +20 -0
  7. sonusai/config/constants.py +8 -0
  8. sonusai/constants.py +11 -0
  9. sonusai/data/genmixdb.yml +21 -36
  10. sonusai/{mixture/datatypes.py → datatypes.py} +91 -130
  11. sonusai/deprecated/plot.py +4 -5
  12. sonusai/doc/doc.py +4 -4
  13. sonusai/doc.py +11 -4
  14. sonusai/genft.py +43 -45
  15. sonusai/genmetrics.py +23 -19
  16. sonusai/genmix.py +54 -82
  17. sonusai/genmixdb.py +88 -264
  18. sonusai/ir_metric.py +30 -34
  19. sonusai/lsdb.py +41 -48
  20. sonusai/main.py +15 -22
  21. sonusai/metrics/calc_audio_stats.py +4 -17
  22. sonusai/metrics/calc_class_weights.py +4 -4
  23. sonusai/metrics/calc_optimal_thresholds.py +8 -5
  24. sonusai/metrics/calc_pesq.py +2 -2
  25. sonusai/metrics/calc_segsnr_f.py +4 -4
  26. sonusai/metrics/calc_speech.py +25 -13
  27. sonusai/metrics/class_summary.py +7 -7
  28. sonusai/metrics/confusion_matrix_summary.py +5 -5
  29. sonusai/metrics/one_hot.py +4 -4
  30. sonusai/metrics/snr_summary.py +7 -7
  31. sonusai/metrics_summary.py +38 -45
  32. sonusai/mixture/__init__.py +5 -104
  33. sonusai/mixture/audio.py +10 -39
  34. sonusai/mixture/class_balancing.py +103 -0
  35. sonusai/mixture/config.py +251 -271
  36. sonusai/mixture/constants.py +35 -39
  37. sonusai/mixture/data_io.py +25 -36
  38. sonusai/mixture/db_datatypes.py +58 -22
  39. sonusai/mixture/effects.py +386 -0
  40. sonusai/mixture/feature.py +7 -11
  41. sonusai/mixture/generation.py +484 -611
  42. sonusai/mixture/helpers.py +82 -184
  43. sonusai/mixture/ir_delay.py +3 -4
  44. sonusai/mixture/ir_effects.py +77 -0
  45. sonusai/mixture/log_duration_and_sizes.py +6 -12
  46. sonusai/mixture/mixdb.py +931 -669
  47. sonusai/mixture/pad_audio.py +35 -0
  48. sonusai/mixture/resample.py +7 -0
  49. sonusai/mixture/sox_effects.py +195 -0
  50. sonusai/mixture/sox_help.py +650 -0
  51. sonusai/mixture/spectral_mask.py +2 -2
  52. sonusai/mixture/truth.py +17 -15
  53. sonusai/mixture/truth_functions/crm.py +12 -12
  54. sonusai/mixture/truth_functions/energy.py +22 -22
  55. sonusai/mixture/truth_functions/file.py +5 -5
  56. sonusai/mixture/truth_functions/metadata.py +4 -4
  57. sonusai/mixture/truth_functions/metrics.py +4 -4
  58. sonusai/mixture/truth_functions/phoneme.py +3 -3
  59. sonusai/mixture/truth_functions/sed.py +11 -13
  60. sonusai/mixture/truth_functions/target.py +10 -10
  61. sonusai/mkwav.py +26 -29
  62. sonusai/onnx_predict.py +240 -88
  63. sonusai/queries/__init__.py +2 -2
  64. sonusai/queries/queries.py +38 -34
  65. sonusai/speech/librispeech.py +1 -1
  66. sonusai/speech/mcgill.py +1 -1
  67. sonusai/speech/timit.py +2 -2
  68. sonusai/summarize_metric_spenh.py +10 -17
  69. sonusai/utils/__init__.py +7 -1
  70. sonusai/utils/asl_p56.py +2 -2
  71. sonusai/utils/asr.py +2 -2
  72. sonusai/utils/asr_functions/aaware_whisper.py +4 -5
  73. sonusai/utils/choice.py +31 -0
  74. sonusai/utils/compress.py +1 -1
  75. sonusai/utils/dataclass_from_dict.py +19 -1
  76. sonusai/utils/energy_f.py +3 -3
  77. sonusai/utils/evaluate_random_rule.py +15 -0
  78. sonusai/utils/keyboard_interrupt.py +12 -0
  79. sonusai/utils/onnx_utils.py +3 -17
  80. sonusai/utils/print_mixture_details.py +21 -19
  81. sonusai/utils/{temp_seed.py → rand.py} +3 -3
  82. sonusai/utils/read_predict_data.py +2 -2
  83. sonusai/utils/reshape.py +3 -3
  84. sonusai/utils/stratified_shuffle_split.py +3 -3
  85. sonusai/{mixture → utils}/tokenized_shell_vars.py +1 -1
  86. sonusai/utils/write_audio.py +2 -2
  87. sonusai/vars.py +11 -4
  88. {sonusai-0.20.2.dist-info → sonusai-1.0.1.dist-info}/METADATA +4 -2
  89. sonusai-1.0.1.dist-info/RECORD +138 -0
  90. sonusai/mixture/augmentation.py +0 -444
  91. sonusai/mixture/class_count.py +0 -15
  92. sonusai/mixture/eq_rule_is_valid.py +0 -45
  93. sonusai/mixture/target_class_balancing.py +0 -107
  94. sonusai/mixture/targets.py +0 -175
  95. sonusai-0.20.2.dist-info/RECORD +0 -128
  96. {sonusai-0.20.2.dist-info → sonusai-1.0.1.dist-info}/WHEEL +0 -0
  97. {sonusai-0.20.2.dist-info → sonusai-1.0.1.dist-info}/entry_points.txt +0 -0
sonusai/__init__.py CHANGED
@@ -3,9 +3,6 @@ from importlib import metadata
3
3
  from os.path import dirname
4
4
 
5
5
  from rich.logging import RichHandler
6
- from rich.traceback import install
7
-
8
- install(show_locals=True)
9
6
 
10
7
  __version__ = metadata.version(__package__) # pyright: ignore [reportArgumentType]
11
8
  BASEDIR = dirname(__file__)
@@ -81,3 +78,19 @@ def commands_list(doc: str = commands_doc) -> list[str]:
81
78
  if command:
82
79
  commands.append(command)
83
80
  return commands
81
+
82
+
83
+ def exception_handler(e: Exception) -> None:
84
+ import sys
85
+
86
+ from rich.console import Console
87
+
88
+ logger.error(f"{type(e).__name__}: {e}")
89
+ handlers = [handler for handler in logger.handlers if isinstance(handler, logging.FileHandler)]
90
+ logger.error(f"See {', '.join(handler.baseFilename for handler in handlers)} for details")
91
+
92
+ console = Console(color_system=None)
93
+ with console.capture() as capture:
94
+ console.print_exception(show_locals=False)
95
+ logger.debug(capture.get())
96
+ sys.exit(1)
sonusai/audiofe.py CHANGED
@@ -1,17 +1,23 @@
1
1
  """sonusai audiofe
2
2
 
3
- usage: audiofe [-hvds] [--version] [-i INPUT] [-l LENGTH] [-m MODEL] [-a ASR] [-w WMODEL]
3
+ usage: audiofe [-hvdsp] [--version] [-i INPUT] [-l LENGTH] [-a ASR] [-n NOISEDB]
4
+ [-w WMODEL] [-o FEATURE] MODEL
4
5
 
5
6
  options:
6
7
  -h, --help
7
8
  -v, --verbose Be verbose.
8
9
  -d, --debug Write debug data to H5 file.
9
10
  -s, --show Display a list of available audio inputs.
10
- -i INPUT, --input INPUT Input audio.
11
+ -i INPUT, --input INPUT Audio source from ALSA or .wav file. See -s or arecord -L. [default: default]
11
12
  -l LENGTH, --length LENGTH Length of audio in seconds. [default: -1].
12
- -m MODEL, --model MODEL ONNX model.
13
+ -m MODEL, --model MODEL SonusAI ONNX model applied to the captured audio.
14
+ -n NOISEDB, --noiseadd NOISEDB Amount of noise to keep in clean audio output. [default: -30]
15
+ -p, --playback Enable playback of noisy audio, then the model prediction output audio
13
16
  -a ASR, --asr ASR ASR method to use.
14
17
  -w WMODEL, --whisper WMODEL Model used in whisper, aixplain_whisper and faster_whisper methods. [default: tiny].
18
+ -o FEATURE, --feature-overlap Run SonusAI model in overlap-streaming mode using FEATURE which is an 8-10 character
19
+ string specifying a stride-overlap feature of the same type as the model, i.e. a
20
+ model with default feature of hun00ns1 could use hun00nv80 or hun00nv128, etc.
15
21
 
16
22
  Aaware SonusAI Audio Front End.
17
23
 
@@ -35,68 +41,40 @@ audiofe_<TIMESTAMP>.h5.
35
41
 
36
42
  """
37
43
 
38
- import signal
39
-
40
44
  import numpy as np
41
45
 
42
46
  from sonusai.mixture import AudioT
43
47
 
44
48
 
45
- def signal_handler(_sig, _frame):
46
- import sys
47
-
48
- from sonusai import logger
49
-
50
- logger.info("Canceled due to keyboard interrupt")
51
- sys.exit(1)
52
-
53
-
54
- signal.signal(signal.SIGINT, signal_handler)
55
-
56
-
57
49
  def main() -> None:
58
50
  from docopt import docopt
59
51
 
60
- import sonusai
52
+ from sonusai import __version__ as sai_version
61
53
  from sonusai.utils import trim_docstring
62
54
 
63
- args = docopt(trim_docstring(__doc__), version=sonusai.__version__, options_first=True)
55
+ args = docopt(trim_docstring(__doc__), version=sai_version, options_first=True)
64
56
 
65
57
  verbose = args["--verbose"]
66
58
  length = float(args["--length"])
67
59
  input_name = args["--input"]
68
- model_name = args["--model"]
60
+ feature_ovr = args["--feature-overlap"]
69
61
  asr_name = args["--asr"]
70
62
  whisper_name = args["--whisper"]
71
63
  debug = args["--debug"]
72
64
  show = args["--show"]
65
+ playback = args["--playback"]
66
+ noiseadd = args["--noiseadd"]
67
+ model_name = args["MODEL"]
73
68
 
74
- from os.path import exists
75
-
76
- import h5py
77
69
  import pyaudio
78
70
 
79
71
  from sonusai import create_file_handler
80
72
  from sonusai import initial_log_messages
81
73
  from sonusai import logger
82
74
  from sonusai import update_console_handler
83
- from sonusai.mixture import SAMPLE_RATE
84
- from sonusai.mixture import get_audio_from_feature
85
- from sonusai.mixture import get_feature_from_audio
86
- from sonusai.utils import calc_asr
87
75
  from sonusai.utils import create_timestamp
88
76
  from sonusai.utils import get_input_devices
89
77
  from sonusai.utils import load_ort_session
90
- from sonusai.utils import write_audio
91
-
92
- ts = create_timestamp()
93
- capture_name = f"audiofe_capture_{ts}"
94
- capture_wav = capture_name + ".wav"
95
- capture_png = capture_name + ".png"
96
- predict_name = f"audiofe_predict_{ts}"
97
- predict_wav = predict_name + ".wav"
98
- predict_png = predict_name + ".png"
99
- h5_name = f"audiofe_{ts}.h5"
100
78
 
101
79
  # Setup logging file
102
80
  create_file_handler("audiofe.log")
@@ -111,7 +89,91 @@ def main() -> None:
111
89
  logger.info(f"{name}")
112
90
  logger.info("")
113
91
  p.terminate()
114
- return
92
+ # return
93
+
94
+ ts = create_timestamp()
95
+ capture_name = f"{ts}-noisy"
96
+ capture_wav = capture_name + ".wav"
97
+ capture_png = capture_name + ".png"
98
+ predict_name = f"{ts}-pred"
99
+ predict_wav = predict_name + ".wav"
100
+ predict_png = predict_name + ".png"
101
+ h5_name = f"{ts}-audiofe.h5"
102
+
103
+ if model_name is not None:
104
+ session, options, model_root, hparams, sess_inputs, sess_outputs = load_ort_session(model_name)
105
+ if hparams is None:
106
+ logger.error("Error: ONNX model does not have required SonusAI hyperparameters, cannot proceed.")
107
+ raise SystemExit(1)
108
+ feature_mode = hparams["feature"]
109
+ if feature_ovr is not None:
110
+ # TBD checks for match and valid feature_ovr
111
+ stride = int(feature_ovr[7:])
112
+ sov_type = feature_ovr[6] # v,e,f,t supported, need to calculate stride from tstep
113
+ if sov_type == "v":
114
+ feat_step = int(np.ceil(0.5 * stride))
115
+ elif sov_type == "e":
116
+ feat_step = int(np.ceil(4 * stride / 5))
117
+ elif sov_type == "f":
118
+ feat_step = int(np.ceil(3 * stride / 4))
119
+ elif sov_type == "t":
120
+ feat_step = int(np.ceil(2 * stride / 3))
121
+ else:
122
+ logger.error("Override feature does not have a supported overlap mode, exiting.")
123
+ raise SystemExit(1)
124
+ feature_orig = feature_mode
125
+ feature_mode = feature_ovr
126
+ logger.info(
127
+ f"Overriding feature with {feature_ovr} (was {feature_orig}), with stride={stride}, step={feat_step}."
128
+ )
129
+ else:
130
+ feat_step = 1
131
+
132
+ from pyaaware import FeatureGenerator
133
+
134
+ fg = FeatureGenerator(feature_mode=feature_mode)
135
+ ftn = fg.ftransform_length # feature transform length
136
+ ftr = fg.ftransform_overlap # forward transform samples per step (R)
137
+ fstride = fg.stride # feature stride
138
+ fsamples = fstride * ftr # total samples in feature
139
+
140
+ in0name = sess_inputs[0].name
141
+ in0type = sess_inputs[0].type
142
+ out_names = [n.name for n in session.get_outputs()]
143
+ if len(sess_inputs) != 1:
144
+ logger.error(f"Error: ONNX model does not have 1 input, but {len(sess_inputs)}. Exit due to unknown input.")
145
+ raise SystemExit(1)
146
+ if verbose:
147
+ logger.info(f"Read and compiled ONNX model from {model_name}.")
148
+ import onnx
149
+
150
+ omodel = onnx.load(model_name)
151
+ from sonusai.utils.onnx_utils import get_and_check_inputs
152
+ from sonusai.utils.onnx_utils import get_and_check_outputs
153
+
154
+ logger.info(f"Onnx model uses ir_version {omodel.ir_version}")
155
+ onnx_inputs, inshapes = get_and_check_inputs(omodel) # Note: logs warning if # inputs > 1
156
+ logger.info(f"Onnx model input has {len(inshapes[0])} dims with shape (0 means dynamic): {inshapes[0]}")
157
+ logger.info(f"Onnx model input has type: {in0type}")
158
+ onnx_outputs, oshapes = get_and_check_outputs(omodel)
159
+ logger.info(f"Onnx model output has {len(oshapes[0])} dims with shape (0 means dynamic): {oshapes[0]}")
160
+ import onnxruntime as ort
161
+
162
+ providers = ort.get_available_providers()
163
+ logger.info(f"ONNX runtime available providers: {providers}.")
164
+ else:
165
+ logger.error("No ONNX model provided, exiting.")
166
+ raise SystemExit(1)
167
+
168
+ from os.path import exists
169
+
170
+ import h5py
171
+
172
+ from sonusai.constants import SAMPLE_RATE
173
+ from sonusai.mixture import get_audio_from_feature
174
+ from sonusai.mixture import get_feature_from_audio
175
+ from sonusai.utils import calc_asr
176
+ from sonusai.utils import write_audio
115
177
 
116
178
  if input_name is not None and exists(input_name):
117
179
  capture_audio = get_frames_from_file(input_name, length)
@@ -123,8 +185,12 @@ def main() -> None:
123
185
  return
124
186
  # Only write if capture from device, not for file input
125
187
  write_audio(capture_wav, capture_audio, SAMPLE_RATE)
126
- logger.info("")
127
- logger.info(f"Wrote capture audio with shape {capture_audio.shape} to {capture_wav}")
188
+ logger.debug("")
189
+ logger.debug(f"Wrote capture audio with shape {capture_audio.shape} to {capture_wav}")
190
+
191
+ # Pad audio to transform step size
192
+ padlen_tf = int(np.ceil(len(capture_audio) / ftr)) * ftr - len(capture_audio)
193
+ capture_audio = np.pad(capture_audio, (0, padlen_tf), "constant", constant_values=(0, 0))
128
194
 
129
195
  if debug:
130
196
  with h5py.File(h5_name, "a") as f:
@@ -135,24 +201,16 @@ def main() -> None:
135
201
 
136
202
  if asr_name is not None:
137
203
  logger.info(f"Running ASR on captured audio with {asr_name} ...")
138
- capture_asr = calc_asr(capture_audio, engine=asr_name, whisper_model_name=whisper_name).text
139
- logger.info(f"Capture audio ASR: {capture_asr}")
204
+ capture_asr = calc_asr(capture_audio, engine=asr_name, model=whisper_name).text
205
+ logger.info(f"Noisy audio ASR: {capture_asr}")
140
206
 
141
207
  if model_name is not None:
142
- session, options, model_root, hparams, sess_inputs, sess_outputs = load_ort_session(model_name)
143
- if hparams is None:
144
- logger.error("Error: ONNX model does not have required SonusAI hyperparameters, cannot proceed.")
145
- raise SystemExit(1)
146
- feature_mode = hparams["feature"]
147
- in0name = sess_inputs[0].name
148
- in0type = sess_inputs[0].type
149
- out_names = [n.name for n in session.get_outputs()]
150
-
151
- # frames x stride x feat_params
152
- feature = get_feature_from_audio(audio=capture_audio, feature_mode=feature_mode)
153
- save_figure(capture_png, capture_audio, feature)
154
- logger.info(f"Wrote capture plots to {capture_png}")
208
+ # Pad audio to fill total feature stride * transform stride samples
209
+ padlen = int(np.ceil(len(capture_audio) / fsamples)) * fsamples - len(capture_audio)
210
+ capture_audio_p = np.pad(capture_audio, (0, padlen), "constant", constant_values=(0, 0))
155
211
 
212
+ # feature always frames x stride x feat_params, convert to always Batch x Tsteps x Bins
213
+ feature = get_feature_from_audio(audio=capture_audio_p, feature_mode=feature_mode)
156
214
  if debug:
157
215
  with h5py.File(h5_name, "a") as f:
158
216
  if "feature" in f:
@@ -160,25 +218,61 @@ def main() -> None:
160
218
  f.create_dataset("feature", data=feature)
161
219
  logger.info(f"Wrote feature with shape {feature.shape} to {h5_name}")
162
220
 
221
+ feat_nov = sov2nov(feature, feat_step) # remove overlap, output always Batch x Tsteps x Bins
222
+ # TBD remove padding of feature-stride
223
+ # if padlen > 0:
224
+ save_figure(capture_png, capture_audio, feat_nov)
225
+ logger.info(f"Wrote capture plots to {capture_png}")
226
+
227
+ if feature_ovr is not None:
228
+ test_audio = get_audio_from_feature(feature=feat_nov, feature_mode=feature_orig)
229
+ # write_audio(f'{ts}-noisy-itf.wav', test_audio, SAMPLE_RATE)
230
+ else:
231
+ # feature is frames x 1 x Bins, reshape to 1 x frames x Bins for model
232
+ feature = feature.transpose((1, 0, 2))
233
+
163
234
  if in0type.find("float16") != -1:
164
235
  logger.info("Detected input of float16, converting all feature inputs to that type.")
165
- feature = np.float16(feature) # type: ignore[assignment]
236
+ feature = np.float16(feature) # type: ignore
166
237
 
167
238
  # Run inference, ort session wants batch x timesteps x feat_params, outputs numpy BxTxFP or BxFP
168
239
  # Note full reshape not needed here since we assume speech enhancement type model, so a transpose suffices
169
- predict = np.transpose(
170
- session.run(out_names, {in0name: np.transpose(feature, (1, 0, 2))})[0],
171
- (1, 0, 2),
172
- )
240
+ logger.info(f"Running model on data with shape {feature.shape} ...")
241
+ if feature_ovr is None:
242
+ predict = session.run(out_names, {in0name: feature})[0] # standard mode (entire batch)
243
+ else:
244
+ predict = np.zeros(feature.shape)
245
+ for i in range(predict.shape[0]):
246
+ logger.debug(f"running batch: {i}")
247
+ predict[i, :, :] = session.run(out_names, {in0name: feature[i : i + 1, :, :]})[0]
173
248
 
174
249
  if debug:
175
250
  with h5py.File(h5_name, "a") as f:
176
251
  if "predict" in f:
177
252
  del f["predict"]
178
253
  f.create_dataset("predict", data=predict)
179
- logger.info(f"Wrote predict with shape {predict.shape} to {h5_name}")
254
+ logger.info(f"Wrote predict data with shape {predict.shape} to {h5_name}")
255
+
256
+ if feature_ovr is not None:
257
+ predict = sov2nov(predict, feat_step) # always returns batch x tsteps x feat_params
258
+ predict_audio = get_audio_from_feature(feature=predict, feature_mode=feature_orig)
259
+ else:
260
+ predict = predict.transpose((1, 0, 2)) # need transpose to frames x 1 x bins
261
+ predict_audio = get_audio_from_feature(feature=predict, feature_mode=feature_mode)
262
+
263
+ if predict_audio.shape[0] > capture_audio.shape[0]:
264
+ predict_audio = predict_audio[0 : (capture_audio.shape[0] - predict_audio.shape[0])]
265
+
266
+ if predict_audio.shape[0] < capture_audio.shape[0]:
267
+ capture_audio = capture_audio[0 : (predict_audio.shape[0] - capture_audio.shape[0])]
268
+
269
+ if noiseadd is not None:
270
+ ngain = np.power(10, min(float(noiseadd), 0.0) / 20.0) # limit to gain <1, convert to float
271
+ if ngain < 1.0: # don't apply if it's 1.0
272
+ logger.info(f"Adding back noise with gain of {ngain} = {noiseadd} db.")
273
+ noise = capture_audio - predict_audio
274
+ predict_audio = predict_audio + ngain * noise
180
275
 
181
- predict_audio = get_audio_from_feature(feature=predict, feature_mode=feature_mode)
182
276
  write_audio(predict_wav, predict_audio, SAMPLE_RATE)
183
277
  logger.info(f"Wrote predict audio with shape {predict_audio.shape} to {predict_wav}")
184
278
  if debug:
@@ -193,9 +287,35 @@ def main() -> None:
193
287
 
194
288
  if asr_name is not None:
195
289
  logger.info(f"Running ASR on model-enhanced audio with {asr_name} ...")
196
- predict_asr = calc_asr(predict_audio, engine=asr_name, whisper_model_name=whisper_name).text
290
+ predict_asr = calc_asr(predict_audio, engine=asr_name, model=whisper_name).text
197
291
  logger.info(f"Predict audio ASR: {predict_asr}")
198
292
 
293
+ plot_en = True
294
+ if plot_en is not None:
295
+ import subprocess
296
+
297
+ # Construct plot command using spgramd, start the process non-blocking (will leave matplot open)
298
+ command = ["python", "spgramd.py", capture_wav, predict_wav]
299
+ process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
300
+
301
+ if playback is not None:
302
+ import sh
303
+
304
+ sh.play(capture_wav)
305
+ sh.play(predict_wav)
306
+ flag_end = False
307
+ while not flag_end:
308
+ choice = input("Press 'r' to replay or 'q' to quit: ").strip().lower()
309
+ if choice == "q":
310
+ print("Quitting...")
311
+ flag_end = True
312
+ elif choice == "r":
313
+ print("Replaying...")
314
+ sh.play(capture_wav)
315
+ sh.play(predict_wav)
316
+ else:
317
+ print("Invalid input. Please try again.")
318
+
199
319
 
200
320
  def get_frames_from_device(input_name: str | None, length: float, chunk: int = 1024) -> AudioT:
201
321
  from select import select
@@ -204,8 +324,8 @@ def get_frames_from_device(input_name: str | None, length: float, chunk: int = 1
204
324
  import pyaudio
205
325
 
206
326
  from sonusai import logger
207
- from sonusai.mixture import CHANNEL_COUNT
208
- from sonusai.mixture import SAMPLE_RATE
327
+ from sonusai.constants import CHANNEL_COUNT
328
+ from sonusai.constants import SAMPLE_RATE
209
329
  from sonusai.utils import get_input_device_index_by_name
210
330
  from sonusai.utils import get_input_devices
211
331
 
@@ -220,20 +340,16 @@ def get_frames_from_device(input_name: str | None, length: float, chunk: int = 1
220
340
 
221
341
  try:
222
342
  device_index = get_input_device_index_by_name(p, input_name)
223
- except ValueError as e:
343
+ except ValueError as ex:
224
344
  msg = f"Could not find {input_name}\n"
225
345
  msg += "Available devices:\n"
226
346
  for input_device in input_devices:
227
347
  msg += f" {input_device}\n"
228
- raise ValueError(msg) from e
348
+ raise ValueError(msg) from ex
229
349
 
230
350
  logger.info(f"Capturing from {p.get_device_info_by_index(device_index).get('name')}")
231
351
  stream = p.open(
232
- format=pyaudio.paFloat32,
233
- channels=CHANNEL_COUNT,
234
- rate=SAMPLE_RATE,
235
- input=True,
236
- input_device_index=device_index,
352
+ format=pyaudio.paFloat32, channels=CHANNEL_COUNT, rate=SAMPLE_RATE, input=True, input_device_index=device_index
237
353
  )
238
354
  stream.start_stream()
239
355
 
@@ -269,7 +385,7 @@ def get_frames_from_device(input_name: str | None, length: float, chunk: int = 1
269
385
 
270
386
  def get_frames_from_file(input_name: str, length: float) -> AudioT:
271
387
  from sonusai import logger
272
- from sonusai.mixture import SAMPLE_RATE
388
+ from sonusai.constants import SAMPLE_RATE
273
389
  from sonusai.mixture import read_audio
274
390
 
275
391
  logger.info(f"Capturing from {input_name}")
@@ -281,14 +397,30 @@ def get_frames_from_file(input_name: str, length: float) -> AudioT:
281
397
  return frames
282
398
 
283
399
 
400
+ def sov2nov(feature: np.ndarray, step: int) -> np.ndarray:
401
+ """Convert stride-overlap batch x stride x bins to no overlap frames x 1 x bins"""
402
+
403
+ stride = feature.shape[1] # stride, tsteps is set to stride in sov mode
404
+ if stride == 1:
405
+ return feature # no reshape if stride is already 1
406
+ # else:
407
+ # hs = feature.shape[1]//2 # half of stride
408
+ # nb = feature.shape[0] # batches
409
+
410
+ nb = feature.shape[0]
411
+ fout = feature[:, (stride - step) :, :] # take last
412
+ fout = np.reshape(fout, [step * nb, 1, feature.shape[2]])
413
+ return fout # np.transpose(fout,[1,0,2])
414
+
415
+
284
416
  def save_figure(name: str, audio: np.ndarray, feature: np.ndarray) -> None:
285
417
  import matplotlib.pyplot as plt
286
418
  from scipy.interpolate import CubicSpline
287
419
 
288
- from sonusai.mixture import SAMPLE_RATE
420
+ from sonusai.constants import SAMPLE_RATE
289
421
  from sonusai.utils import unstack_complex
290
422
 
291
- spectrum = 20 * np.log(np.abs(np.squeeze(unstack_complex(feature)).transpose()))
423
+ spectrum = 20 * np.log(np.abs(np.squeeze(unstack_complex(feature)).transpose()) + 1e-7)
292
424
  frames = spectrum.shape[1]
293
425
  samples = (len(audio) // frames) * frames
294
426
  length_in_s = samples / SAMPLE_RATE
@@ -314,4 +446,36 @@ def save_figure(name: str, audio: np.ndarray, feature: np.ndarray) -> None:
314
446
 
315
447
 
316
448
  if __name__ == "__main__":
317
- main()
449
+ from sonusai import exception_handler
450
+ from sonusai.utils import register_keyboard_interrupt
451
+
452
+ register_keyboard_interrupt()
453
+ try:
454
+ main()
455
+ except Exception as e:
456
+ exception_handler(e)
457
+
458
+
459
+ # import subprocess
460
+ #
461
+ # # Define the arguments
462
+ # arg1 = "value1"
463
+ # arg2 = "value2"
464
+ #
465
+ # # Construct the command
466
+ # command = ["python", "script.py", arg1, arg2]
467
+ #
468
+ # # Start the process
469
+ # process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
470
+ #
471
+ # # Optionally, you can communicate with the process later if needed
472
+ # # For example, to wait for the process to finish and get the output
473
+ # stdout, stderr = process.communicate()
474
+ #
475
+ # # Check if the process was successful
476
+ # if process.returncode == 0:
477
+ # print("Process executed successfully:")
478
+ # print(stdout)
479
+ # else:
480
+ # print("Process failed:")
481
+ # print(stderr)