sonusai 0.17.0__py3-none-any.whl → 0.17.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. sonusai/audiofe.py +25 -54
  2. sonusai/calc_metric_spenh.py +212 -219
  3. sonusai/doc/doc.py +1 -1
  4. sonusai/mixture/__init__.py +2 -0
  5. sonusai/mixture/audio.py +12 -0
  6. sonusai/mixture/datatypes.py +11 -3
  7. sonusai/mixture/mixdb.py +100 -0
  8. sonusai/mixture/soundfile_audio.py +39 -0
  9. sonusai/mixture/sox_augmentation.py +3 -0
  10. sonusai/mixture/speaker_metadata.py +35 -0
  11. sonusai/mixture/torchaudio_audio.py +22 -0
  12. sonusai/mkmanifest.py +1 -1
  13. sonusai/mkwav.py +4 -4
  14. sonusai/onnx_predict.py +114 -410
  15. sonusai/post_spenh_targetf.py +2 -2
  16. sonusai/queries/queries.py +1 -1
  17. sonusai/speech/__init__.py +3 -0
  18. sonusai/speech/l2arctic.py +116 -0
  19. sonusai/speech/librispeech.py +99 -0
  20. sonusai/speech/mcgill.py +70 -0
  21. sonusai/speech/textgrid.py +100 -0
  22. sonusai/speech/timit.py +135 -0
  23. sonusai/speech/types.py +12 -0
  24. sonusai/speech/vctk.py +52 -0
  25. sonusai/speech/voxceleb.py +102 -0
  26. sonusai/utils/__init__.py +3 -2
  27. sonusai/utils/asr_functions/aaware_whisper.py +2 -2
  28. sonusai/utils/asr_manifest_functions/__init__.py +0 -1
  29. sonusai/utils/asr_manifest_functions/data.py +0 -8
  30. sonusai/utils/asr_manifest_functions/librispeech.py +1 -1
  31. sonusai/utils/asr_manifest_functions/mcgill_speech.py +1 -1
  32. sonusai/utils/asr_manifest_functions/vctk_noisy_speech.py +1 -1
  33. sonusai/utils/braced_glob.py +7 -3
  34. sonusai/utils/onnx_utils.py +110 -106
  35. sonusai/utils/path_info.py +7 -0
  36. sonusai/utils/{wave.py → write_audio.py} +2 -2
  37. {sonusai-0.17.0.dist-info → sonusai-0.17.3.dist-info}/METADATA +3 -1
  38. {sonusai-0.17.0.dist-info → sonusai-0.17.3.dist-info}/RECORD +40 -35
  39. {sonusai-0.17.0.dist-info → sonusai-0.17.3.dist-info}/WHEEL +1 -1
  40. sonusai/calc_metric_spenh-save.py +0 -1334
  41. sonusai/onnx_predict-old.py +0 -240
  42. sonusai/onnx_predict-save.py +0 -487
  43. sonusai/ovino_predict.py +0 -508
  44. sonusai/ovino_query_devices.py +0 -47
  45. sonusai/torchl_onnx-old.py +0 -216
  46. {sonusai-0.17.0.dist-info → sonusai-0.17.3.dist-info}/entry_points.txt +0 -0
sonusai/audiofe.py CHANGED
@@ -1,16 +1,15 @@
1
1
  """sonusai audiofe
2
2
 
3
- usage: audiofe [-hvds] [--version] [-i INPUT] [-l LENGTH] [-m MODEL] [-k CKPT] [-a ASR] [-w WMODEL]
3
+ usage: audiofe [-hvds] [--version] [-i INPUT] [-l LENGTH] [-m MODEL] [-a ASR] [-w WMODEL]
4
4
 
5
5
  options:
6
6
  -h, --help
7
7
  -v, --verbose Be verbose.
8
8
  -d, --debug Write debug data to H5 file.
9
- -s, --show Show a list of available audio inputs.
9
+ -s, --show Display a list of available audio inputs.
10
10
  -i INPUT, --input INPUT Input audio.
11
11
  -l LENGTH, --length LENGTH Length of audio in seconds. [default: -1].
12
- -m MODEL, --model MODEL PL model .py file path.
13
- -k CKPT, --checkpoint CKPT PL checkpoint file with weights.
12
+ -m MODEL, --model MODEL ONNX model.
14
13
  -a ASR, --asr ASR ASR method to use.
15
14
  -w WMODEL, --whisper WMODEL Model used in whisper, aixplain_whisper and faster_whisper methods. [default: tiny].
16
15
 
@@ -66,7 +65,6 @@ def main() -> None:
66
65
  length = float(args['--length'])
67
66
  input_name = args['--input']
68
67
  model_name = args['--model']
69
- ckpt_name = args['--checkpoint']
70
68
  asr_name = args['--asr']
71
69
  whisper_name = args['--whisper']
72
70
  debug = args['--debug']
@@ -76,10 +74,6 @@ def main() -> None:
76
74
 
77
75
  import h5py
78
76
  import pyaudio
79
- import torch
80
- from docopt import printable_usage
81
- from sonusai_torchl.utils import load_torchl_ckpt_model
82
- from sonusai.utils.onnx_utils import load_ort_session
83
77
 
84
78
  from sonusai import create_file_handler
85
79
  from sonusai import initial_log_messages
@@ -91,8 +85,8 @@ def main() -> None:
91
85
  from sonusai.utils import calc_asr
92
86
  from sonusai.utils import create_timestamp
93
87
  from sonusai.utils import get_input_devices
94
- from sonusai.utils import trim_docstring
95
- from sonusai.utils import write_wav
88
+ from sonusai.utils import load_ort_session
89
+ from sonusai.utils import write_audio
96
90
 
97
91
  ts = create_timestamp()
98
92
  capture_name = f'audiofe_capture_{ts}'
@@ -103,33 +97,6 @@ def main() -> None:
103
97
  predict_png = predict_name + '.png'
104
98
  h5_name = f'audiofe_{ts}.h5'
105
99
 
106
- if model_name is not None:
107
- from os.path import splitext
108
- if splitext(model_name)[1] == '.onnx':
109
- session, options, model_root, hparams, sess_inputs, sess_outputs = load_ort_session(model_name)
110
- if hparams is None:
111
- logger.error(f'Error: onnx model does not have required SonusAI hyper-parameters, can not proceed.')
112
- raise SystemExit(1)
113
- feature_mode = hparams["feature"]
114
- model_is_onnx = True
115
- in0name = sess_inputs[0].name
116
- in0type = sess_inputs[0].type
117
- out0name = sess_outputs[0].name
118
- out_names = [n.name for n in session.get_outputs()]
119
- if in0type.find('float16') != -1:
120
- model_is_fp16 = True
121
- logger.info(f'Detected input of float16, converting all feature inputs to that type.')
122
- else:
123
- model_is_fp16 = False
124
- else:
125
- model_is_onnx = False
126
- if ckpt_name is None:
127
- print(printable_usage(trim_docstring(__doc__)))
128
- exit(1)
129
- model = load_torchl_ckpt_model(model_name=model_name, ckpt_name=ckpt_name)
130
- feature_mode = model.hparams.feature
131
- model.eval()
132
-
133
100
  # Setup logging file
134
101
  create_file_handler('audiofe.log')
135
102
  update_console_handler(verbose)
@@ -153,8 +120,8 @@ def main() -> None:
153
120
  except ValueError as e:
154
121
  logger.exception(e)
155
122
  return
156
- # Only write if capture, not for file input
157
- write_wav(capture_wav, capture_audio, SAMPLE_RATE)
123
+ # Only write if capture from device, not for file input
124
+ write_audio(capture_wav, capture_audio, SAMPLE_RATE)
158
125
  logger.info('')
159
126
  logger.info(f'Wrote capture audio with shape {capture_audio.shape} to {capture_wav}')
160
127
 
@@ -171,7 +138,17 @@ def main() -> None:
171
138
  logger.info(f'Capture audio ASR: {capture_asr}')
172
139
 
173
140
  if model_name is not None:
174
- feature = get_feature_from_audio(audio=capture_audio, feature_mode=feature_mode) #frames x stride x feat_params
141
+ session, options, model_root, hparams, sess_inputs, sess_outputs = load_ort_session(model_name)
142
+ if hparams is None:
143
+ logger.error(f'Error: ONNX model does not have required SonusAI hyperparameters, cannot proceed.')
144
+ raise SystemExit(1)
145
+ feature_mode = hparams.feature
146
+ in0name = sess_inputs[0].name
147
+ in0type = sess_inputs[0].type
148
+ out_names = [n.name for n in session.get_outputs()]
149
+
150
+ # frames x stride x feat_params
151
+ feature = get_feature_from_audio(audio=capture_audio, feature_mode=feature_mode)
175
152
  save_figure(capture_png, capture_audio, feature)
176
153
  logger.info(f'Wrote capture plots to {capture_png}')
177
154
 
@@ -182,19 +159,13 @@ def main() -> None:
182
159
  f.create_dataset('feature', data=feature)
183
160
  logger.info(f'Wrote feature with shape {feature.shape} to {h5_name}')
184
161
 
185
- if model_is_onnx:
186
- # run ort session, wants i.e. batch x tsteps x feat_params, outputs numpy BxTxFP or BxFP
187
- # Note full reshape not needed here since we assume speech enhanement type model, so a transpose suffices
188
- if model_is_fp16:
189
- feature = np.float16(feature)
190
- # run inference, ort session wants i.e. batch x tsteps x feat_params, outputs numpy BxTxFP or BxFP
191
- predict = np.transpose(session.run(out_names, {in0name: np.transpose(feature,(1,0,2))})[0],(1,0,2))
192
- else:
193
- with torch.no_grad():
194
- # model wants batch x timesteps x feature_parameters
195
- predict = model(torch.tensor(feature).permute((1, 0, 2))).permute(1, 0, 2).numpy()
196
-
162
+ if in0type.find('float16') != -1:
163
+ logger.info(f'Detected input of float16, converting all feature inputs to that type.')
164
+ feature = np.float16(feature) # type: ignore
197
165
 
166
+ # Run inference, ort session wants batch x timesteps x feat_params, outputs numpy BxTxFP or BxFP
167
+ # Note full reshape not needed here since we assume speech enhancement type model, so a transpose suffices
168
+ predict = np.transpose(session.run(out_names, {in0name: np.transpose(feature, (1, 0, 2))})[0], (1, 0, 2))
198
169
 
199
170
  if debug:
200
171
  with h5py.File(h5_name, 'a') as f:
@@ -204,7 +175,7 @@ def main() -> None:
204
175
  logger.info(f'Wrote predict with shape {predict.shape} to {h5_name}')
205
176
 
206
177
  predict_audio = get_audio_from_feature(feature=predict, feature_mode=feature_mode)
207
- write_wav(predict_wav, predict_audio, SAMPLE_RATE)
178
+ write_audio(predict_wav, predict_audio, SAMPLE_RATE)
208
179
  logger.info(f'Wrote predict audio with shape {predict_audio.shape} to {predict_wav}')
209
180
  if debug:
210
181
  with h5py.File(h5_name, 'a') as f: