sonusai 0.16.1__tar.gz → 0.17.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. {sonusai-0.16.1 → sonusai-0.17.0}/PKG-INFO +1 -1
  2. {sonusai-0.16.1 → sonusai-0.17.0}/pyproject.toml +1 -1
  3. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/audiofe.py +52 -17
  4. sonusai-0.17.0/sonusai/calc_metric_spenh-save.py +1334 -0
  5. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/calc_metric_spenh.py +1 -1
  6. sonusai-0.16.1/sonusai/onnx_predict.py → sonusai-0.17.0/sonusai/onnx_predict-old.py +6 -16
  7. sonusai-0.17.0/sonusai/onnx_predict-save.py +487 -0
  8. sonusai-0.17.0/sonusai/onnx_predict.py +504 -0
  9. sonusai-0.17.0/sonusai/ovino_predict.py +508 -0
  10. sonusai-0.17.0/sonusai/ovino_query_devices.py +47 -0
  11. sonusai-0.17.0/sonusai/torchl_onnx-old.py +216 -0
  12. sonusai-0.17.0/sonusai/utils/onnx_utils.py +154 -0
  13. sonusai-0.16.1/sonusai/utils/onnx_utils.py +0 -65
  14. {sonusai-0.16.1 → sonusai-0.17.0}/README.rst +0 -0
  15. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/__init__.py +0 -0
  16. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/aawscd_probwrite.py +0 -0
  17. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/data/__init__.py +0 -0
  18. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/data/genmixdb.yml +0 -0
  19. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/data/speech_ma01_01.wav +0 -0
  20. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/data/whitenoise.wav +0 -0
  21. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/doc/__init__.py +0 -0
  22. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/doc/doc.py +0 -0
  23. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/doc.py +0 -0
  24. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/genft.py +0 -0
  25. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/genmix.py +0 -0
  26. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/genmixdb.py +0 -0
  27. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/gentcst.py +0 -0
  28. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/lsdb.py +0 -0
  29. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/main.py +0 -0
  30. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/metrics/__init__.py +0 -0
  31. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/metrics/calc_class_weights.py +0 -0
  32. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/metrics/calc_optimal_thresholds.py +0 -0
  33. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/metrics/calc_pcm.py +0 -0
  34. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/metrics/calc_pesq.py +0 -0
  35. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/metrics/calc_sa_sdr.py +0 -0
  36. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/metrics/calc_sample_weights.py +0 -0
  37. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/metrics/calc_wer.py +0 -0
  38. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/metrics/calc_wsdr.py +0 -0
  39. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/metrics/class_summary.py +0 -0
  40. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/metrics/confusion_matrix_summary.py +0 -0
  41. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/metrics/one_hot.py +0 -0
  42. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/metrics/snr_summary.py +0 -0
  43. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/mixture/__init__.py +0 -0
  44. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/mixture/audio.py +0 -0
  45. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/mixture/augmentation.py +0 -0
  46. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/mixture/class_count.py +0 -0
  47. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/mixture/config.py +0 -0
  48. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/mixture/constants.py +0 -0
  49. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/mixture/datatypes.py +0 -0
  50. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/mixture/eq_rule_is_valid.py +0 -0
  51. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/mixture/feature.py +0 -0
  52. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/mixture/generation.py +0 -0
  53. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/mixture/helpers.py +0 -0
  54. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/mixture/log_duration_and_sizes.py +0 -0
  55. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/mixture/mapped_snr_f.py +0 -0
  56. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/mixture/mixdb.py +0 -0
  57. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/mixture/soundfile_audio.py +0 -0
  58. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/mixture/sox_audio.py +0 -0
  59. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/mixture/sox_augmentation.py +0 -0
  60. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/mixture/spectral_mask.py +0 -0
  61. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/mixture/target_class_balancing.py +0 -0
  62. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/mixture/targets.py +0 -0
  63. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/mixture/tokenized_shell_vars.py +0 -0
  64. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/mixture/torchaudio_audio.py +0 -0
  65. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/mixture/torchaudio_augmentation.py +0 -0
  66. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/mixture/truth.py +0 -0
  67. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/mixture/truth_functions/__init__.py +0 -0
  68. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/mixture/truth_functions/crm.py +0 -0
  69. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/mixture/truth_functions/data.py +0 -0
  70. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/mixture/truth_functions/energy.py +0 -0
  71. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/mixture/truth_functions/file.py +0 -0
  72. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/mixture/truth_functions/phoneme.py +0 -0
  73. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/mixture/truth_functions/sed.py +0 -0
  74. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/mixture/truth_functions/target.py +0 -0
  75. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/mkmanifest.py +0 -0
  76. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/mkwav.py +0 -0
  77. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/plot.py +0 -0
  78. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/post_spenh_targetf.py +0 -0
  79. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/queries/__init__.py +0 -0
  80. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/queries/queries.py +0 -0
  81. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/summarize_metric_spenh.py +0 -0
  82. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/tplot.py +0 -0
  83. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/utils/__init__.py +0 -0
  84. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/utils/asl_p56.py +0 -0
  85. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/utils/asr.py +0 -0
  86. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/utils/asr_functions/__init__.py +0 -0
  87. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/utils/asr_functions/aaware_whisper.py +0 -0
  88. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/utils/asr_manifest_functions/__init__.py +0 -0
  89. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/utils/asr_manifest_functions/data.py +0 -0
  90. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/utils/asr_manifest_functions/librispeech.py +0 -0
  91. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/utils/asr_manifest_functions/mcgill_speech.py +0 -0
  92. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/utils/asr_manifest_functions/vctk_noisy_speech.py +0 -0
  93. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/utils/audio_devices.py +0 -0
  94. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/utils/braced_glob.py +0 -0
  95. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/utils/calculate_input_shape.py +0 -0
  96. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/utils/convert_string_to_number.py +0 -0
  97. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/utils/create_timestamp.py +0 -0
  98. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/utils/create_ts_name.py +0 -0
  99. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/utils/dataclass_from_dict.py +0 -0
  100. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/utils/db.py +0 -0
  101. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/utils/docstring.py +0 -0
  102. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/utils/energy_f.py +0 -0
  103. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/utils/engineering_number.py +0 -0
  104. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/utils/get_frames_per_batch.py +0 -0
  105. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/utils/get_label_names.py +0 -0
  106. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/utils/grouper.py +0 -0
  107. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/utils/human_readable_size.py +0 -0
  108. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/utils/max_text_width.py +0 -0
  109. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/utils/model_utils.py +0 -0
  110. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/utils/numeric_conversion.py +0 -0
  111. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/utils/parallel.py +0 -0
  112. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/utils/print_mixture_details.py +0 -0
  113. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/utils/ranges.py +0 -0
  114. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/utils/read_mixture_data.py +0 -0
  115. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/utils/read_predict_data.py +0 -0
  116. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/utils/reshape.py +0 -0
  117. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/utils/seconds_to_hms.py +0 -0
  118. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/utils/stacked_complex.py +0 -0
  119. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/utils/stratified_shuffle_split.py +0 -0
  120. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/utils/wave.py +0 -0
  121. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/utils/yes_or_no.py +0 -0
  122. {sonusai-0.16.1 → sonusai-0.17.0}/sonusai/vars.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sonusai
3
- Version: 0.16.1
3
+ Version: 0.17.0
4
4
  Summary: Framework for building deep neural network models for sound, speech, and voice AI
5
5
  Home-page: https://aaware.com
6
6
  License: GPL-3.0-only
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "sonusai"
3
- version = "0.16.1"
3
+ version = "0.17.0"
4
4
  description = "Framework for building deep neural network models for sound, speech, and voice AI"
5
5
  authors = ["Chris Eddington <chris@aaware.com>", "Jason Calderwood <jason@aaware.com>"]
6
6
  maintainers = ["Chris Eddington <chris@aaware.com>", "Jason Calderwood <jason@aaware.com>"]
@@ -12,7 +12,7 @@ options:
12
12
  -m MODEL, --model MODEL PL model .py file path.
13
13
  -k CKPT, --checkpoint CKPT PL checkpoint file with weights.
14
14
  -a ASR, --asr ASR ASR method to use.
15
- -w WMODEL, --whisper WMODEL Whisper model used in aixplain_whisper and whisper methods. [default: tiny].
15
+ -w WMODEL, --whisper WMODEL Model used in whisper, aixplain_whisper and faster_whisper methods. [default: tiny].
16
16
 
17
17
  Aaware SonusAI Audio Front End.
18
18
 
@@ -29,7 +29,7 @@ audiofe_capture_<TIMESTAMP>.png and predict data (time-domain signal and feature
29
29
  audiofe_predict_<TIMESTAMP>.png.
30
30
 
31
31
  If an ASR is specified, run ASR on the captured audio and print the results. In addition, if a model was also specified,
32
- run ASR on the predict audio and print the results.
32
+ run ASR on the predict audio and print the results. Examples: faster_whisper, google,
33
33
 
34
34
  If the debug option is enabled, write capture audio, feature, reconstruct audio, predict, and predict audio to
35
35
  audiofe_<TIMESTAMP>.h5.
@@ -79,6 +79,7 @@ def main() -> None:
79
79
  import torch
80
80
  from docopt import printable_usage
81
81
  from sonusai_torchl.utils import load_torchl_ckpt_model
82
+ from sonusai.utils.onnx_utils import load_ort_session
82
83
 
83
84
  from sonusai import create_file_handler
84
85
  from sonusai import initial_log_messages
@@ -102,9 +103,32 @@ def main() -> None:
102
103
  predict_png = predict_name + '.png'
103
104
  h5_name = f'audiofe_{ts}.h5'
104
105
 
105
- if model_name is not None and ckpt_name is None:
106
- print(printable_usage(trim_docstring(__doc__)))
107
- exit(1)
106
+ if model_name is not None:
107
+ from os.path import splitext
108
+ if splitext(model_name)[1] == '.onnx':
109
+ session, options, model_root, hparams, sess_inputs, sess_outputs = load_ort_session(model_name)
110
+ if hparams is None:
111
+ logger.error(f'Error: onnx model does not have required SonusAI hyper-parameters, can not proceed.')
112
+ raise SystemExit(1)
113
+ feature_mode = hparams["feature"]
114
+ model_is_onnx = True
115
+ in0name = sess_inputs[0].name
116
+ in0type = sess_inputs[0].type
117
+ out0name = sess_outputs[0].name
118
+ out_names = [n.name for n in session.get_outputs()]
119
+ if in0type.find('float16') != -1:
120
+ model_is_fp16 = True
121
+ logger.info(f'Detected input of float16, converting all feature inputs to that type.')
122
+ else:
123
+ model_is_fp16 = False
124
+ else:
125
+ model_is_onnx = False
126
+ if ckpt_name is None:
127
+ print(printable_usage(trim_docstring(__doc__)))
128
+ exit(1)
129
+ model = load_torchl_ckpt_model(model_name=model_name, ckpt_name=ckpt_name)
130
+ feature_mode = model.hparams.feature
131
+ model.eval()
108
132
 
109
133
  # Setup logging file
110
134
  create_file_handler('audiofe.log')
@@ -129,26 +153,25 @@ def main() -> None:
129
153
  except ValueError as e:
130
154
  logger.exception(e)
131
155
  return
156
+ # Only write if capture, not for file input
157
+ write_wav(capture_wav, capture_audio, SAMPLE_RATE)
158
+ logger.info('')
159
+ logger.info(f'Wrote capture audio with shape {capture_audio.shape} to {capture_wav}')
132
160
 
133
- write_wav(capture_wav, capture_audio, SAMPLE_RATE)
134
- logger.info('')
135
- logger.info(f'Wrote capture audio with shape {capture_audio.shape} to {capture_wav}')
136
161
  if debug:
137
162
  with h5py.File(h5_name, 'a') as f:
138
163
  if 'capture_audio' in f:
139
164
  del f['capture_audio']
140
165
  f.create_dataset('capture_audio', data=capture_audio)
141
- logger.info(f'Wrote capture audio with shape {capture_audio.shape} to {h5_name}')
166
+ logger.info(f'Wrote capture feature data with shape {capture_audio.shape} to {h5_name}')
142
167
 
143
168
  if asr_name is not None:
169
+ logger.info(f'Running ASR on captured audio with {asr_name} ...')
144
170
  capture_asr = calc_asr(capture_audio, engine=asr_name, whisper_model_name=whisper_name).text
145
171
  logger.info(f'Capture audio ASR: {capture_asr}')
146
172
 
147
173
  if model_name is not None:
148
- model = load_torchl_ckpt_model(model_name=model_name, ckpt_name=ckpt_name)
149
- model.eval()
150
-
151
- feature = get_feature_from_audio(audio=capture_audio, feature_mode=model.hparams.feature)
174
+ feature = get_feature_from_audio(audio=capture_audio, feature_mode=feature_mode) #frames x stride x feat_params
152
175
  save_figure(capture_png, capture_audio, feature)
153
176
  logger.info(f'Wrote capture plots to {capture_png}')
154
177
 
@@ -159,9 +182,20 @@ def main() -> None:
159
182
  f.create_dataset('feature', data=feature)
160
183
  logger.info(f'Wrote feature with shape {feature.shape} to {h5_name}')
161
184
 
162
- with torch.no_grad():
163
- # model wants batch x timesteps x feature_parameters
164
- predict = model(torch.tensor(feature).permute((1, 0, 2))).permute(1, 0, 2).numpy()
185
+ if model_is_onnx:
186
+ # run ort session, wants i.e. batch x tsteps x feat_params, outputs numpy BxTxFP or BxFP
187
+ # Note full reshape not needed here since we assume speech enhanement type model, so a transpose suffices
188
+ if model_is_fp16:
189
+ feature = np.float16(feature)
190
+ # run inference, ort session wants i.e. batch x tsteps x feat_params, outputs numpy BxTxFP or BxFP
191
+ predict = np.transpose(session.run(out_names, {in0name: np.transpose(feature,(1,0,2))})[0],(1,0,2))
192
+ else:
193
+ with torch.no_grad():
194
+ # model wants batch x timesteps x feature_parameters
195
+ predict = model(torch.tensor(feature).permute((1, 0, 2))).permute(1, 0, 2).numpy()
196
+
197
+
198
+
165
199
  if debug:
166
200
  with h5py.File(h5_name, 'a') as f:
167
201
  if 'predict' in f:
@@ -169,7 +203,7 @@ def main() -> None:
169
203
  f.create_dataset('predict', data=predict)
170
204
  logger.info(f'Wrote predict with shape {predict.shape} to {h5_name}')
171
205
 
172
- predict_audio = get_audio_from_feature(feature=predict, feature_mode=model.hparams.feature)
206
+ predict_audio = get_audio_from_feature(feature=predict, feature_mode=feature_mode)
173
207
  write_wav(predict_wav, predict_audio, SAMPLE_RATE)
174
208
  logger.info(f'Wrote predict audio with shape {predict_audio.shape} to {predict_wav}')
175
209
  if debug:
@@ -183,6 +217,7 @@ def main() -> None:
183
217
  logger.info(f'Wrote predict plots to {predict_png}')
184
218
 
185
219
  if asr_name is not None:
220
+ logger.info(f'Running ASR on model-enhanced audio with {asr_name} ...')
186
221
  predict_asr = calc_asr(predict_audio, engine=asr_name, whisper_model_name=whisper_name).text
187
222
  logger.info(f'Predict audio ASR: {predict_asr}')
188
223