sonusai 0.17.0__py3-none-any.whl → 0.17.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sonusai/audiofe.py +22 -51
- sonusai/calc_metric_spenh.py +206 -213
- sonusai/doc/doc.py +1 -1
- sonusai/mixture/__init__.py +2 -0
- sonusai/mixture/audio.py +12 -0
- sonusai/mixture/datatypes.py +11 -3
- sonusai/mixture/mixdb.py +101 -0
- sonusai/mixture/soundfile_audio.py +39 -0
- sonusai/mixture/speaker_metadata.py +35 -0
- sonusai/mixture/torchaudio_audio.py +22 -0
- sonusai/mkmanifest.py +1 -1
- sonusai/onnx_predict.py +114 -410
- sonusai/queries/queries.py +1 -1
- sonusai/speech/__init__.py +3 -0
- sonusai/speech/l2arctic.py +116 -0
- sonusai/speech/librispeech.py +99 -0
- sonusai/speech/mcgill.py +70 -0
- sonusai/speech/textgrid.py +100 -0
- sonusai/speech/timit.py +135 -0
- sonusai/speech/types.py +12 -0
- sonusai/speech/vctk.py +52 -0
- sonusai/speech/voxceleb2.py +86 -0
- sonusai/utils/__init__.py +2 -1
- sonusai/utils/asr_manifest_functions/__init__.py +0 -1
- sonusai/utils/asr_manifest_functions/data.py +0 -8
- sonusai/utils/asr_manifest_functions/librispeech.py +1 -1
- sonusai/utils/asr_manifest_functions/mcgill_speech.py +1 -1
- sonusai/utils/asr_manifest_functions/vctk_noisy_speech.py +1 -1
- sonusai/utils/braced_glob.py +7 -3
- sonusai/utils/onnx_utils.py +110 -106
- sonusai/utils/path_info.py +7 -0
- {sonusai-0.17.0.dist-info → sonusai-0.17.2.dist-info}/METADATA +2 -1
- {sonusai-0.17.0.dist-info → sonusai-0.17.2.dist-info}/RECORD +35 -30
- {sonusai-0.17.0.dist-info → sonusai-0.17.2.dist-info}/WHEEL +1 -1
- sonusai/calc_metric_spenh-save.py +0 -1334
- sonusai/onnx_predict-old.py +0 -240
- sonusai/onnx_predict-save.py +0 -487
- sonusai/ovino_predict.py +0 -508
- sonusai/ovino_query_devices.py +0 -47
- sonusai/torchl_onnx-old.py +0 -216
- {sonusai-0.17.0.dist-info → sonusai-0.17.2.dist-info}/entry_points.txt +0 -0
sonusai/onnx_predict-old.py
DELETED
@@ -1,240 +0,0 @@
|
|
1
|
-
"""sonusai predict
|
2
|
-
|
3
|
-
usage: predict [-hvr] [-i MIXID] (-m MODEL) INPUT
|
4
|
-
|
5
|
-
options:
|
6
|
-
-h, --help
|
7
|
-
-v, --verbose Be verbose.
|
8
|
-
-i MIXID, --mixid MIXID Mixture ID(s) to generate if input is a mixture database. [default: *].
|
9
|
-
-m MODEL, --model MODEL Trained ONNX model file.
|
10
|
-
-r, --reset Reset model between each file.
|
11
|
-
|
12
|
-
Run prediction on a trained ONNX model using SonusAI genft or WAV data.
|
13
|
-
|
14
|
-
Inputs:
|
15
|
-
MODEL A SonusAI trained ONNX model file.
|
16
|
-
INPUT The input data must be one of the following:
|
17
|
-
* WAV
|
18
|
-
Using the given model, generate feature data and run prediction. A model file must be
|
19
|
-
provided. The MIXID is ignored.
|
20
|
-
|
21
|
-
* directory
|
22
|
-
Using the given SonusAI mixture database directory, generate feature and truth data if not found.
|
23
|
-
Run prediction. The MIXID is required.
|
24
|
-
|
25
|
-
Outputs the following to opredict-<TIMESTAMP> directory:
|
26
|
-
<id>.h5
|
27
|
-
dataset: predict
|
28
|
-
onnx_predict.log
|
29
|
-
|
30
|
-
"""
|
31
|
-
|
32
|
-
from sonusai import logger
|
33
|
-
from sonusai.mixture import Feature
|
34
|
-
from sonusai.mixture import Predict
|
35
|
-
from sonusai.utils import SonusAIMetaData
|
36
|
-
|
37
|
-
|
38
|
-
def main() -> None:
|
39
|
-
from docopt import docopt
|
40
|
-
|
41
|
-
import sonusai
|
42
|
-
from sonusai.utils import trim_docstring
|
43
|
-
|
44
|
-
args = docopt(trim_docstring(__doc__), version=sonusai.__version__, options_first=True)
|
45
|
-
|
46
|
-
verbose = args['--verbose']
|
47
|
-
mixids = args['--mixid']
|
48
|
-
model_name = args['--model']
|
49
|
-
reset = args['--reset']
|
50
|
-
input_name = args['INPUT']
|
51
|
-
|
52
|
-
from os import makedirs
|
53
|
-
from os.path import isdir
|
54
|
-
from os.path import join
|
55
|
-
from os.path import splitext
|
56
|
-
|
57
|
-
import h5py
|
58
|
-
import onnxruntime as rt
|
59
|
-
import numpy as np
|
60
|
-
|
61
|
-
from sonusai import create_file_handler
|
62
|
-
from sonusai import initial_log_messages
|
63
|
-
from sonusai import update_console_handler
|
64
|
-
from sonusai.mixture import MixtureDatabase
|
65
|
-
from sonusai.mixture import get_feature_from_audio
|
66
|
-
from sonusai.mixture import read_audio
|
67
|
-
from sonusai.utils import create_ts_name
|
68
|
-
from sonusai.utils import get_frames_per_batch
|
69
|
-
from sonusai.utils import get_sonusai_metadata
|
70
|
-
|
71
|
-
output_dir = create_ts_name('opredict')
|
72
|
-
makedirs(output_dir, exist_ok=True)
|
73
|
-
|
74
|
-
# Setup logging file
|
75
|
-
create_file_handler(join(output_dir, 'onnx_predict.log'))
|
76
|
-
update_console_handler(verbose)
|
77
|
-
initial_log_messages('onnx_predict')
|
78
|
-
|
79
|
-
model = rt.InferenceSession(model_name, providers=['CPUExecutionProvider'])
|
80
|
-
model_metadata = get_sonusai_metadata(model)
|
81
|
-
|
82
|
-
batch_size = model_metadata.input_shape[0]
|
83
|
-
if model_metadata.timestep:
|
84
|
-
timesteps = model_metadata.input_shape[1]
|
85
|
-
else:
|
86
|
-
timesteps = 0
|
87
|
-
num_classes = model_metadata.output_shape[-1]
|
88
|
-
|
89
|
-
frames_per_batch = get_frames_per_batch(batch_size, timesteps)
|
90
|
-
|
91
|
-
logger.info('')
|
92
|
-
logger.info(f'feature {model_metadata.feature}')
|
93
|
-
logger.info(f'num_classes {num_classes}')
|
94
|
-
logger.info(f'batch_size {batch_size}')
|
95
|
-
logger.info(f'timesteps {timesteps}')
|
96
|
-
logger.info(f'flatten {model_metadata.flattened}')
|
97
|
-
logger.info(f'add1ch {model_metadata.channel}')
|
98
|
-
logger.info(f'truth_mutex {model_metadata.mutex}')
|
99
|
-
logger.info(f'input_shape {model_metadata.input_shape}')
|
100
|
-
logger.info(f'output_shape {model_metadata.output_shape}')
|
101
|
-
logger.info('')
|
102
|
-
|
103
|
-
if splitext(input_name)[1] == '.wav':
|
104
|
-
# Convert WAV to feature data
|
105
|
-
logger.info('')
|
106
|
-
logger.info(f'Run prediction on {input_name}')
|
107
|
-
audio = read_audio(input_name)
|
108
|
-
feature = get_feature_from_audio(audio=audio, feature_mode=model_metadata.feature)
|
109
|
-
|
110
|
-
predict = pad_and_predict(feature=feature,
|
111
|
-
model_name=model_name,
|
112
|
-
model_metadata=model_metadata,
|
113
|
-
frames_per_batch=frames_per_batch,
|
114
|
-
batch_size=batch_size,
|
115
|
-
timesteps=timesteps,
|
116
|
-
reset=reset)
|
117
|
-
|
118
|
-
output_name = splitext(input_name)[0] + '.h5'
|
119
|
-
with h5py.File(output_name, 'a') as f:
|
120
|
-
if 'feature' in f:
|
121
|
-
del f['feature']
|
122
|
-
f.create_dataset(name='feature', data=feature)
|
123
|
-
|
124
|
-
if 'predict' in f:
|
125
|
-
del f['predict']
|
126
|
-
f.create_dataset(name='predict', data=predict)
|
127
|
-
|
128
|
-
logger.info(f'Saved results to {output_name}')
|
129
|
-
return
|
130
|
-
|
131
|
-
if not isdir(input_name):
|
132
|
-
logger.exception(f'Do not know how to process input from {input_name}')
|
133
|
-
raise SystemExit(1)
|
134
|
-
|
135
|
-
mixdb = MixtureDatabase(input_name)
|
136
|
-
|
137
|
-
if mixdb.feature != model_metadata.feature:
|
138
|
-
logger.exception(f'Feature in mixture database does not match feature in model')
|
139
|
-
raise SystemExit(1)
|
140
|
-
|
141
|
-
mixids = mixdb.mixids_to_list(mixids)
|
142
|
-
if reset:
|
143
|
-
# reset mode cycles through each file one at a time
|
144
|
-
for mixid in mixids:
|
145
|
-
feature, _ = mixdb.mixture_ft(mixid)
|
146
|
-
|
147
|
-
predict = pad_and_predict(feature=feature,
|
148
|
-
model_name=model_name,
|
149
|
-
model_metadata=model_metadata,
|
150
|
-
frames_per_batch=frames_per_batch,
|
151
|
-
batch_size=batch_size,
|
152
|
-
timesteps=timesteps,
|
153
|
-
reset=reset)
|
154
|
-
|
155
|
-
output_name = join(output_dir, mixdb.mixtures[mixid].name)
|
156
|
-
with h5py.File(output_name, 'a') as f:
|
157
|
-
if 'predict' in f:
|
158
|
-
del f['predict']
|
159
|
-
f.create_dataset(name='predict', data=predict)
|
160
|
-
else:
|
161
|
-
features: list[Feature] = []
|
162
|
-
file_indices: list[slice] = []
|
163
|
-
total_frames = 0
|
164
|
-
for mixid in mixids:
|
165
|
-
current_feature, _ = mixdb.mixture_ft(mixid)
|
166
|
-
current_frames = current_feature.shape[0]
|
167
|
-
features.append(current_feature)
|
168
|
-
file_indices.append(slice(total_frames, total_frames + current_frames))
|
169
|
-
total_frames += current_frames
|
170
|
-
feature = np.vstack([features[i] for i in range(len(features))])
|
171
|
-
|
172
|
-
predict = pad_and_predict(feature=feature,
|
173
|
-
model_name=model_name,
|
174
|
-
model_metadata=model_metadata,
|
175
|
-
frames_per_batch=frames_per_batch,
|
176
|
-
batch_size=batch_size,
|
177
|
-
timesteps=timesteps,
|
178
|
-
reset=reset)
|
179
|
-
|
180
|
-
# Write data to separate files
|
181
|
-
for idx, mixid in enumerate(mixids):
|
182
|
-
output_name = join(output_dir, mixdb.mixtures[mixid].name)
|
183
|
-
with h5py.File(output_name, 'a') as f:
|
184
|
-
if 'predict' in f:
|
185
|
-
del f['predict']
|
186
|
-
f.create_dataset('predict', data=predict[file_indices[idx]])
|
187
|
-
|
188
|
-
logger.info(f'Saved results to {output_dir}')
|
189
|
-
|
190
|
-
|
191
|
-
def pad_and_predict(feature: Feature,
|
192
|
-
model_name: str,
|
193
|
-
model_metadata: SonusAIMetaData,
|
194
|
-
frames_per_batch: int,
|
195
|
-
batch_size: int,
|
196
|
-
timesteps: int,
|
197
|
-
reset: bool) -> Predict:
|
198
|
-
import onnxruntime as rt
|
199
|
-
import numpy as np
|
200
|
-
|
201
|
-
from sonusai.utils import reshape_inputs
|
202
|
-
from sonusai.utils import reshape_outputs
|
203
|
-
|
204
|
-
frames = feature.shape[0]
|
205
|
-
padding = frames_per_batch - frames % frames_per_batch
|
206
|
-
feature = np.pad(array=feature, pad_width=((0, padding), (0, 0), (0, 0)))
|
207
|
-
feature, _ = reshape_inputs(feature=feature,
|
208
|
-
batch_size=batch_size,
|
209
|
-
timesteps=timesteps,
|
210
|
-
flatten=model_metadata.flattened,
|
211
|
-
add1ch=model_metadata.channel)
|
212
|
-
sequences = feature.shape[0] // model_metadata.input_shape[0]
|
213
|
-
feature = np.reshape(feature, [sequences, *model_metadata.input_shape])
|
214
|
-
|
215
|
-
model = rt.InferenceSession(model_name, providers=['CPUExecutionProvider'])
|
216
|
-
output_names = [n.name for n in model.get_outputs()]
|
217
|
-
input_names = [n.name for n in model.get_inputs()]
|
218
|
-
|
219
|
-
predict = []
|
220
|
-
for sequence in range(sequences):
|
221
|
-
predict.append(model.run(output_names, {input_names[0]: feature[sequence]}))
|
222
|
-
if reset:
|
223
|
-
model = rt.InferenceSession(model_name, providers=['CPUExecutionProvider'])
|
224
|
-
|
225
|
-
predict_arr = np.vstack(predict)
|
226
|
-
# Combine [sequences, batch_size, ...] into [frames, ...]
|
227
|
-
predict_shape = predict_arr.shape
|
228
|
-
predict_arr = np.reshape(predict_arr, [predict_shape[0] * predict_shape[1], *predict_shape[2:]])
|
229
|
-
predict_arr, _ = reshape_outputs(predict=predict_arr, timesteps=timesteps)
|
230
|
-
predict_arr = predict_arr[:frames, :]
|
231
|
-
|
232
|
-
return predict_arr
|
233
|
-
|
234
|
-
|
235
|
-
if __name__ == '__main__':
|
236
|
-
try:
|
237
|
-
main()
|
238
|
-
except KeyboardInterrupt:
|
239
|
-
logger.info('Canceled due to keyboard interrupt')
|
240
|
-
raise SystemExit(0)
|