sonusai 0.17.0__py3-none-any.whl → 0.17.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. sonusai/audiofe.py +22 -51
  2. sonusai/calc_metric_spenh.py +206 -213
  3. sonusai/doc/doc.py +1 -1
  4. sonusai/mixture/__init__.py +2 -0
  5. sonusai/mixture/audio.py +12 -0
  6. sonusai/mixture/datatypes.py +11 -3
  7. sonusai/mixture/mixdb.py +101 -0
  8. sonusai/mixture/soundfile_audio.py +39 -0
  9. sonusai/mixture/speaker_metadata.py +35 -0
  10. sonusai/mixture/torchaudio_audio.py +22 -0
  11. sonusai/mkmanifest.py +1 -1
  12. sonusai/onnx_predict.py +114 -410
  13. sonusai/queries/queries.py +1 -1
  14. sonusai/speech/__init__.py +3 -0
  15. sonusai/speech/l2arctic.py +116 -0
  16. sonusai/speech/librispeech.py +99 -0
  17. sonusai/speech/mcgill.py +70 -0
  18. sonusai/speech/textgrid.py +100 -0
  19. sonusai/speech/timit.py +135 -0
  20. sonusai/speech/types.py +12 -0
  21. sonusai/speech/vctk.py +52 -0
  22. sonusai/speech/voxceleb2.py +86 -0
  23. sonusai/utils/__init__.py +2 -1
  24. sonusai/utils/asr_manifest_functions/__init__.py +0 -1
  25. sonusai/utils/asr_manifest_functions/data.py +0 -8
  26. sonusai/utils/asr_manifest_functions/librispeech.py +1 -1
  27. sonusai/utils/asr_manifest_functions/mcgill_speech.py +1 -1
  28. sonusai/utils/asr_manifest_functions/vctk_noisy_speech.py +1 -1
  29. sonusai/utils/braced_glob.py +7 -3
  30. sonusai/utils/onnx_utils.py +110 -106
  31. sonusai/utils/path_info.py +7 -0
  32. {sonusai-0.17.0.dist-info → sonusai-0.17.2.dist-info}/METADATA +2 -1
  33. {sonusai-0.17.0.dist-info → sonusai-0.17.2.dist-info}/RECORD +35 -30
  34. {sonusai-0.17.0.dist-info → sonusai-0.17.2.dist-info}/WHEEL +1 -1
  35. sonusai/calc_metric_spenh-save.py +0 -1334
  36. sonusai/onnx_predict-old.py +0 -240
  37. sonusai/onnx_predict-save.py +0 -487
  38. sonusai/ovino_predict.py +0 -508
  39. sonusai/ovino_query_devices.py +0 -47
  40. sonusai/torchl_onnx-old.py +0 -216
  41. {sonusai-0.17.0.dist-info → sonusai-0.17.2.dist-info}/entry_points.txt +0 -0
@@ -1,240 +0,0 @@
1
- """sonusai predict
2
-
3
- usage: predict [-hvr] [-i MIXID] (-m MODEL) INPUT
4
-
5
- options:
6
- -h, --help
7
- -v, --verbose Be verbose.
8
- -i MIXID, --mixid MIXID Mixture ID(s) to generate if input is a mixture database. [default: *].
9
- -m MODEL, --model MODEL Trained ONNX model file.
10
- -r, --reset Reset model between each file.
11
-
12
- Run prediction on a trained ONNX model using SonusAI genft or WAV data.
13
-
14
- Inputs:
15
- MODEL A SonusAI trained ONNX model file.
16
- INPUT The input data must be one of the following:
17
- * WAV
18
- Using the given model, generate feature data and run prediction. A model file must be
19
- provided. The MIXID is ignored.
20
-
21
- * directory
22
- Using the given SonusAI mixture database directory, generate feature and truth data if not found.
23
- Run prediction. The MIXID is required.
24
-
25
- Outputs the following to opredict-<TIMESTAMP> directory:
26
- <id>.h5
27
- dataset: predict
28
- onnx_predict.log
29
-
30
- """
31
-
32
- from sonusai import logger
33
- from sonusai.mixture import Feature
34
- from sonusai.mixture import Predict
35
- from sonusai.utils import SonusAIMetaData
36
-
37
-
38
- def main() -> None:
39
- from docopt import docopt
40
-
41
- import sonusai
42
- from sonusai.utils import trim_docstring
43
-
44
- args = docopt(trim_docstring(__doc__), version=sonusai.__version__, options_first=True)
45
-
46
- verbose = args['--verbose']
47
- mixids = args['--mixid']
48
- model_name = args['--model']
49
- reset = args['--reset']
50
- input_name = args['INPUT']
51
-
52
- from os import makedirs
53
- from os.path import isdir
54
- from os.path import join
55
- from os.path import splitext
56
-
57
- import h5py
58
- import onnxruntime as rt
59
- import numpy as np
60
-
61
- from sonusai import create_file_handler
62
- from sonusai import initial_log_messages
63
- from sonusai import update_console_handler
64
- from sonusai.mixture import MixtureDatabase
65
- from sonusai.mixture import get_feature_from_audio
66
- from sonusai.mixture import read_audio
67
- from sonusai.utils import create_ts_name
68
- from sonusai.utils import get_frames_per_batch
69
- from sonusai.utils import get_sonusai_metadata
70
-
71
- output_dir = create_ts_name('opredict')
72
- makedirs(output_dir, exist_ok=True)
73
-
74
- # Setup logging file
75
- create_file_handler(join(output_dir, 'onnx_predict.log'))
76
- update_console_handler(verbose)
77
- initial_log_messages('onnx_predict')
78
-
79
- model = rt.InferenceSession(model_name, providers=['CPUExecutionProvider'])
80
- model_metadata = get_sonusai_metadata(model)
81
-
82
- batch_size = model_metadata.input_shape[0]
83
- if model_metadata.timestep:
84
- timesteps = model_metadata.input_shape[1]
85
- else:
86
- timesteps = 0
87
- num_classes = model_metadata.output_shape[-1]
88
-
89
- frames_per_batch = get_frames_per_batch(batch_size, timesteps)
90
-
91
- logger.info('')
92
- logger.info(f'feature {model_metadata.feature}')
93
- logger.info(f'num_classes {num_classes}')
94
- logger.info(f'batch_size {batch_size}')
95
- logger.info(f'timesteps {timesteps}')
96
- logger.info(f'flatten {model_metadata.flattened}')
97
- logger.info(f'add1ch {model_metadata.channel}')
98
- logger.info(f'truth_mutex {model_metadata.mutex}')
99
- logger.info(f'input_shape {model_metadata.input_shape}')
100
- logger.info(f'output_shape {model_metadata.output_shape}')
101
- logger.info('')
102
-
103
- if splitext(input_name)[1] == '.wav':
104
- # Convert WAV to feature data
105
- logger.info('')
106
- logger.info(f'Run prediction on {input_name}')
107
- audio = read_audio(input_name)
108
- feature = get_feature_from_audio(audio=audio, feature_mode=model_metadata.feature)
109
-
110
- predict = pad_and_predict(feature=feature,
111
- model_name=model_name,
112
- model_metadata=model_metadata,
113
- frames_per_batch=frames_per_batch,
114
- batch_size=batch_size,
115
- timesteps=timesteps,
116
- reset=reset)
117
-
118
- output_name = splitext(input_name)[0] + '.h5'
119
- with h5py.File(output_name, 'a') as f:
120
- if 'feature' in f:
121
- del f['feature']
122
- f.create_dataset(name='feature', data=feature)
123
-
124
- if 'predict' in f:
125
- del f['predict']
126
- f.create_dataset(name='predict', data=predict)
127
-
128
- logger.info(f'Saved results to {output_name}')
129
- return
130
-
131
- if not isdir(input_name):
132
- logger.exception(f'Do not know how to process input from {input_name}')
133
- raise SystemExit(1)
134
-
135
- mixdb = MixtureDatabase(input_name)
136
-
137
- if mixdb.feature != model_metadata.feature:
138
- logger.exception(f'Feature in mixture database does not match feature in model')
139
- raise SystemExit(1)
140
-
141
- mixids = mixdb.mixids_to_list(mixids)
142
- if reset:
143
- # reset mode cycles through each file one at a time
144
- for mixid in mixids:
145
- feature, _ = mixdb.mixture_ft(mixid)
146
-
147
- predict = pad_and_predict(feature=feature,
148
- model_name=model_name,
149
- model_metadata=model_metadata,
150
- frames_per_batch=frames_per_batch,
151
- batch_size=batch_size,
152
- timesteps=timesteps,
153
- reset=reset)
154
-
155
- output_name = join(output_dir, mixdb.mixtures[mixid].name)
156
- with h5py.File(output_name, 'a') as f:
157
- if 'predict' in f:
158
- del f['predict']
159
- f.create_dataset(name='predict', data=predict)
160
- else:
161
- features: list[Feature] = []
162
- file_indices: list[slice] = []
163
- total_frames = 0
164
- for mixid in mixids:
165
- current_feature, _ = mixdb.mixture_ft(mixid)
166
- current_frames = current_feature.shape[0]
167
- features.append(current_feature)
168
- file_indices.append(slice(total_frames, total_frames + current_frames))
169
- total_frames += current_frames
170
- feature = np.vstack([features[i] for i in range(len(features))])
171
-
172
- predict = pad_and_predict(feature=feature,
173
- model_name=model_name,
174
- model_metadata=model_metadata,
175
- frames_per_batch=frames_per_batch,
176
- batch_size=batch_size,
177
- timesteps=timesteps,
178
- reset=reset)
179
-
180
- # Write data to separate files
181
- for idx, mixid in enumerate(mixids):
182
- output_name = join(output_dir, mixdb.mixtures[mixid].name)
183
- with h5py.File(output_name, 'a') as f:
184
- if 'predict' in f:
185
- del f['predict']
186
- f.create_dataset('predict', data=predict[file_indices[idx]])
187
-
188
- logger.info(f'Saved results to {output_dir}')
189
-
190
-
191
- def pad_and_predict(feature: Feature,
192
- model_name: str,
193
- model_metadata: SonusAIMetaData,
194
- frames_per_batch: int,
195
- batch_size: int,
196
- timesteps: int,
197
- reset: bool) -> Predict:
198
- import onnxruntime as rt
199
- import numpy as np
200
-
201
- from sonusai.utils import reshape_inputs
202
- from sonusai.utils import reshape_outputs
203
-
204
- frames = feature.shape[0]
205
- padding = frames_per_batch - frames % frames_per_batch
206
- feature = np.pad(array=feature, pad_width=((0, padding), (0, 0), (0, 0)))
207
- feature, _ = reshape_inputs(feature=feature,
208
- batch_size=batch_size,
209
- timesteps=timesteps,
210
- flatten=model_metadata.flattened,
211
- add1ch=model_metadata.channel)
212
- sequences = feature.shape[0] // model_metadata.input_shape[0]
213
- feature = np.reshape(feature, [sequences, *model_metadata.input_shape])
214
-
215
- model = rt.InferenceSession(model_name, providers=['CPUExecutionProvider'])
216
- output_names = [n.name for n in model.get_outputs()]
217
- input_names = [n.name for n in model.get_inputs()]
218
-
219
- predict = []
220
- for sequence in range(sequences):
221
- predict.append(model.run(output_names, {input_names[0]: feature[sequence]}))
222
- if reset:
223
- model = rt.InferenceSession(model_name, providers=['CPUExecutionProvider'])
224
-
225
- predict_arr = np.vstack(predict)
226
- # Combine [sequences, batch_size, ...] into [frames, ...]
227
- predict_shape = predict_arr.shape
228
- predict_arr = np.reshape(predict_arr, [predict_shape[0] * predict_shape[1], *predict_shape[2:]])
229
- predict_arr, _ = reshape_outputs(predict=predict_arr, timesteps=timesteps)
230
- predict_arr = predict_arr[:frames, :]
231
-
232
- return predict_arr
233
-
234
-
235
- if __name__ == '__main__':
236
- try:
237
- main()
238
- except KeyboardInterrupt:
239
- logger.info('Canceled due to keyboard interrupt')
240
- raise SystemExit(0)