sonusai 0.11.2__tar.gz → 0.11.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sonusai-0.11.2 → sonusai-0.11.4}/PKG-INFO +3 -4
- {sonusai-0.11.2 → sonusai-0.11.4}/pyproject.toml +3 -4
- {sonusai-0.11.2 → sonusai-0.11.4}/setup.py +3 -4
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/__init__.py +2 -3
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/data_generator/__init__.py +1 -0
- sonusai-0.11.4/sonusai/data_generator/dataset_from_mixdb.py +246 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/data_generator/keras_from_mixdb.py +26 -2
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/keras_train.py +38 -4
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/mixture/mixdb.py +17 -1
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/mixture/truth_functions/data.py +6 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/post_spenh_targetf.py +6 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/README.rst +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/aawscd_probwrite.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/calc_metric_spenh_targetf.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/data/genmixdb.yml +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/data/whitenoise.wav +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/evaluate.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/genft.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/genmix.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/genmixdb.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/gentcst.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/keras_onnx.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/keras_predict.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/lsdb.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/main.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/metrics/__init__.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/metrics/calc_class_weights.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/metrics/calc_optimal_thresholds.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/metrics/calc_pcm.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/metrics/calc_pesq.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/metrics/calc_sa_sdr.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/metrics/calc_sample_weights.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/metrics/calc_wer.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/metrics/class_summary.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/metrics/confusion_matrix_summary.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/metrics/one_hot.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/metrics/snr_summary.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/mixture/__init__.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/mixture/active_truth_class_balancing.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/mixture/audio.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/mixture/augmentation.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/mixture/balance.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/mixture/class_count.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/mixture/config.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/mixture/constants.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/mixture/feature.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/mixture/initialize.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/mixture/log_duration_and_sizes.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/mixture/mapped_snr_f.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/mixture/spectral_mask.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/mixture/target_class_balancing.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/mixture/targets.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/mixture/truth.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/mixture/truth_functions/__init__.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/mixture/truth_functions/crm.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/mixture/truth_functions/energy.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/mixture/truth_functions/file.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/mixture/truth_functions/phoneme.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/mixture/truth_functions/sed.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/mixture/truth_functions/target.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/mixture/types.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/mkwav.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/onnx_predict.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/plot.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/queries/__init__.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/queries/queries.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/tplot.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/utils/__init__.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/utils/asl_p56.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/utils/asr.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/utils/asr_functions/__init__.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/utils/asr_functions/data.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/utils/asr_functions/deepgram.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/utils/asr_functions/google.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/utils/asr_functions/whisper.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/utils/braced_glob.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/utils/calculate_input_shape.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/utils/create_ts_name.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/utils/dataclass_from_dict.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/utils/db.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/utils/engineering_number.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/utils/get_frames_per_batch.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/utils/get_label_names.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/utils/grouper.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/utils/human_readable_size.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/utils/keras_utils.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/utils/numeric_conversion.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/utils/onnx_utils.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/utils/parallel.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/utils/parallel_tqdm.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/utils/print_mixture_details.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/utils/ranges.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/utils/read_mixture_data.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/utils/read_predict_data.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/utils/reshape.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/utils/seconds_to_hms.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/utils/stacked_complex.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/utils/stratified_shuffle_split.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/utils/trim_docstring.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/utils/wave.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/utils/yes_or_no.py +0 -0
- {sonusai-0.11.2 → sonusai-0.11.4}/sonusai/vars.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: sonusai
|
3
|
-
Version: 0.11.
|
3
|
+
Version: 0.11.4
|
4
4
|
Summary: Framework for building deep neural network models for sound, speech, and voice AI
|
5
5
|
Home-page: https://aaware.com
|
6
6
|
License: GPL-3.0-only
|
@@ -22,14 +22,13 @@ Requires-Dist: jiwer (>=2.5.1,<3.0.0)
|
|
22
22
|
Requires-Dist: keras-tuner (>=1.1.3,<2.0.0)
|
23
23
|
Requires-Dist: matplotlib (>=3.6.1,<4.0.0)
|
24
24
|
Requires-Dist: onnxruntime-gpu (>=1.12.1,<2.0.0)
|
25
|
-
Requires-Dist: openai-whisper (>=
|
25
|
+
Requires-Dist: openai-whisper (>=20230308,<20230309)
|
26
26
|
Requires-Dist: paho-mqtt (>=1.6.1,<2.0.0)
|
27
27
|
Requires-Dist: pandas (>=1.5.1,<2.0.0)
|
28
28
|
Requires-Dist: pesq (>=0.0.4,<0.0.5)
|
29
|
-
Requires-Dist: pyaaware (>=1.4.
|
29
|
+
Requires-Dist: pyaaware (>=1.4.10,<2.0.0)
|
30
30
|
Requires-Dist: python-magic (>=0.4.27,<0.5.0)
|
31
31
|
Requires-Dist: scikit-learn (>=1.2.0,<2.0.0)
|
32
|
-
Requires-Dist: setuptools (>=67.0.0,<68.0.0)
|
33
32
|
Requires-Dist: sh (>=1.14.3,<2.0.0)
|
34
33
|
Requires-Dist: sox (>=1.4.1,<2.0.0)
|
35
34
|
Requires-Dist: speechrecognition (>=3.9.0,<4.0.0)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[tool.poetry]
|
2
2
|
name = "sonusai"
|
3
|
-
version = "0.11.
|
3
|
+
version = "0.11.4"
|
4
4
|
description = "Framework for building deep neural network models for sound, speech, and voice AI"
|
5
5
|
authors = ["Chris Eddington <chris@aaware.com>", "Jason Calderwood <jason@aaware.com>"]
|
6
6
|
maintainers = ["Chris Eddington <chris@aaware.com>", "Jason Calderwood <jason@aaware.com>"]
|
@@ -22,15 +22,14 @@ jiwer = "^2.5.1"
|
|
22
22
|
keras-tuner = "^1.1.3"
|
23
23
|
matplotlib = "^3.6.1"
|
24
24
|
onnxruntime-gpu = "^1.12.1"
|
25
|
-
openai-whisper = "^
|
25
|
+
openai-whisper = "^20230308"
|
26
26
|
paho-mqtt = "^1.6.1"
|
27
27
|
pandas = "^1.5.1"
|
28
28
|
pesq = "^0.0.4"
|
29
|
-
pyaaware = "^1.4.
|
29
|
+
pyaaware = "^1.4.10"
|
30
30
|
python = ">=3.8,<3.11"
|
31
31
|
python-magic = "^0.4.27"
|
32
32
|
scikit-learn = "^1.2.0"
|
33
|
-
setuptools = "^67.0.0"
|
34
33
|
sh = "^1.14.3"
|
35
34
|
sox = "^1.4.1"
|
36
35
|
speechrecognition = "^3.9.0"
|
@@ -23,14 +23,13 @@ install_requires = \
|
|
23
23
|
'keras-tuner>=1.1.3,<2.0.0',
|
24
24
|
'matplotlib>=3.6.1,<4.0.0',
|
25
25
|
'onnxruntime-gpu>=1.12.1,<2.0.0',
|
26
|
-
'openai-whisper>=
|
26
|
+
'openai-whisper>=20230308,<20230309',
|
27
27
|
'paho-mqtt>=1.6.1,<2.0.0',
|
28
28
|
'pandas>=1.5.1,<2.0.0',
|
29
29
|
'pesq>=0.0.4,<0.0.5',
|
30
|
-
'pyaaware>=1.4.
|
30
|
+
'pyaaware>=1.4.10,<2.0.0',
|
31
31
|
'python-magic>=0.4.27,<0.5.0',
|
32
32
|
'scikit-learn>=1.2.0,<2.0.0',
|
33
|
-
'setuptools>=67.0.0,<68.0.0',
|
34
33
|
'sh>=1.14.3,<2.0.0',
|
35
34
|
'sox>=1.4.1,<2.0.0',
|
36
35
|
'speechrecognition>=3.9.0,<4.0.0',
|
@@ -45,7 +44,7 @@ entry_points = \
|
|
45
44
|
|
46
45
|
setup_kwargs = {
|
47
46
|
'name': 'sonusai',
|
48
|
-
'version': '0.11.
|
47
|
+
'version': '0.11.4',
|
49
48
|
'description': 'Framework for building deep neural network models for sound, speech, and voice AI',
|
50
49
|
'long_description': "Sonus AI: Framework for simplified creation of deep NN models for sound, speech, and voice AI\n\nSonus AI includes functions for pre-processing training and validation data and\ncreating performance metrics reports for key types of Keras models:\n- recurrent, convolutional, or a combination (i.e. RCNNs)\n- binary, multiclass single-label, multiclass multi-label, and regresssion\n- training with data augmentations: noise mixing, pitch and time stretch, etc.\n\nSonus AI python functions are used by:\n - Aaware Inc. sonusai executable: Easily create train/validation data, run prediction, evaluate model performance\n - Keras model scripts: User python scripts for keras model creation, training, and prediction. These can use sonusai-specific data but also some general useful utilities for trainining rnn-based models like CRNN's, DSCRNN's, etc. in Keras\n",
|
51
50
|
'author': 'Chris Eddington',
|
@@ -1,9 +1,8 @@
|
|
1
1
|
import logging
|
2
|
+
from importlib import metadata
|
2
3
|
from os.path import dirname
|
3
4
|
|
4
|
-
|
5
|
-
|
6
|
-
__version__ = get_distribution('sonusai').version
|
5
|
+
__version__ = metadata.version('sonusai')
|
7
6
|
BASEDIR = dirname(__file__)
|
8
7
|
|
9
8
|
# create logger
|
@@ -0,0 +1,246 @@
|
|
1
|
+
import warnings
|
2
|
+
from dataclasses import dataclass
|
3
|
+
from typing import List
|
4
|
+
|
5
|
+
import numpy as np
|
6
|
+
import tensorflow as tf
|
7
|
+
|
8
|
+
from sonusai.mixture import GeneralizedIDs
|
9
|
+
from sonusai.mixture import MixtureDatabase
|
10
|
+
from sonusai.utils import get_frames_per_batch
|
11
|
+
|
12
|
+
|
13
|
+
def get_dataset_from_mixdb(mixdb: MixtureDatabase,
|
14
|
+
mixids: GeneralizedIDs,
|
15
|
+
batch_size: int,
|
16
|
+
timesteps: int,
|
17
|
+
flatten: bool,
|
18
|
+
add1ch: bool,
|
19
|
+
shuffle: bool = False) -> tf.data.Dataset:
|
20
|
+
@dataclass(frozen=True)
|
21
|
+
class BatchParams:
|
22
|
+
mixids: List[int]
|
23
|
+
offset: int
|
24
|
+
extra: int
|
25
|
+
padding: int
|
26
|
+
|
27
|
+
def _getitem(batch_index) -> (np.ndarray, np.ndarray):
|
28
|
+
"""Get one batch of data
|
29
|
+
"""
|
30
|
+
from sonusai.utils import reshape_inputs
|
31
|
+
|
32
|
+
batch_params = self.batch_params[batch_index]
|
33
|
+
|
34
|
+
result = [self.mixdb.mixture_ft(mixid) for mixid in batch_params.mixids]
|
35
|
+
feature = np.vstack([result[i][0] for i in range(len(result))])
|
36
|
+
truth = np.vstack([result[i][1] for i in range(len(result))])
|
37
|
+
|
38
|
+
pad_shape = list(feature.shape)
|
39
|
+
pad_shape[0] = batch_params.padding
|
40
|
+
feature = np.vstack([feature, np.zeros(pad_shape)])
|
41
|
+
|
42
|
+
pad_shape = list(truth.shape)
|
43
|
+
pad_shape[0] = batch_params.padding
|
44
|
+
truth = np.vstack([truth, np.zeros(pad_shape)])
|
45
|
+
|
46
|
+
if batch_params.extra > 0:
|
47
|
+
feature = feature[batch_params.offset:-batch_params.extra]
|
48
|
+
truth = truth[batch_params.offset:-batch_params.extra]
|
49
|
+
else:
|
50
|
+
feature = feature[batch_params.offset:]
|
51
|
+
truth = truth[batch_params.offset:]
|
52
|
+
|
53
|
+
feature, truth = reshape_inputs(feature=feature,
|
54
|
+
truth=truth,
|
55
|
+
batch_size=self.batch_size,
|
56
|
+
timesteps=self.timesteps,
|
57
|
+
flatten=self.flatten,
|
58
|
+
add1ch=self.add1ch)
|
59
|
+
|
60
|
+
return feature, truth
|
61
|
+
|
62
|
+
mixids = mixdb.mixids_to_list(mixids)
|
63
|
+
stride = mixdb.fg.stride
|
64
|
+
num_bands = mixdb.fg.num_bands
|
65
|
+
num_classes = mixdb.num_classes
|
66
|
+
mixture_frame_segments = None
|
67
|
+
batch_frame_segments = None
|
68
|
+
|
69
|
+
frames_per_batch = get_frames_per_batch(batch_size, timesteps)
|
70
|
+
# Always extend the number of batches to use all available data
|
71
|
+
# The last batch may need padding
|
72
|
+
total_batches = int(np.ceil(mixdb.total_feature_frames(mixids) / frames_per_batch))
|
73
|
+
|
74
|
+
# Compute mixid, offset, and extra for dataset
|
75
|
+
# offsets and extras are needed because mixtures are not guaranteed to fall on batch boundaries.
|
76
|
+
# When fetching a new index that starts in the middle of a sequence of mixtures, the
|
77
|
+
# previous feature frame offset must be maintained in order to preserve the correct
|
78
|
+
# data sequence. And the extra must be maintained in order to preserve the correct data length.
|
79
|
+
cumulative_frames = 0
|
80
|
+
start_mixture_index = 0
|
81
|
+
offset = 0
|
82
|
+
batch_params = []
|
83
|
+
file_indices = []
|
84
|
+
total_frames = 0
|
85
|
+
for idx, mixid in enumerate(mixids):
|
86
|
+
current_frames = mixdb.mixture_samples(mixid) // mixdb.feature_step_samples
|
87
|
+
file_indices.append(slice(total_frames, total_frames + current_frames))
|
88
|
+
total_frames += current_frames
|
89
|
+
cumulative_frames += current_frames
|
90
|
+
while cumulative_frames >= frames_per_batch:
|
91
|
+
extra = cumulative_frames - frames_per_batch
|
92
|
+
mixids = mixids[start_mixture_index:idx + 1]
|
93
|
+
batch_params.append(BatchParams(mixids=mixids, offset=offset, extra=extra, padding=0))
|
94
|
+
if extra == 0:
|
95
|
+
start_mixture_index = idx + 1
|
96
|
+
offset = 0
|
97
|
+
else:
|
98
|
+
start_mixture_index = idx
|
99
|
+
offset = current_frames - extra
|
100
|
+
cumulative_frames = extra
|
101
|
+
|
102
|
+
# If needed, add final batch with padding
|
103
|
+
needed_frames = total_batches * frames_per_batch
|
104
|
+
padding = needed_frames - total_frames
|
105
|
+
if padding != 0:
|
106
|
+
mixids = mixids[start_mixture_index:]
|
107
|
+
batch_params.append(BatchParams(mixids=mixids, offset=offset, extra=0, padding=padding))
|
108
|
+
|
109
|
+
dataset = tf.data.Dataset.from_generator()
|
110
|
+
return dataset
|
111
|
+
|
112
|
+
|
113
|
+
with warnings.catch_warnings():
|
114
|
+
warnings.simplefilter('ignore')
|
115
|
+
from keras.utils import Sequence
|
116
|
+
|
117
|
+
|
118
|
+
class DatasetFromMixtureDatabase(Sequence):
|
119
|
+
"""Generates data for Keras from a SonusAI mixture database
|
120
|
+
"""
|
121
|
+
from dataclasses import dataclass
|
122
|
+
|
123
|
+
@dataclass(frozen=True)
|
124
|
+
class BatchParams:
|
125
|
+
mixids: List[int]
|
126
|
+
offset: int
|
127
|
+
extra: int
|
128
|
+
padding: int
|
129
|
+
|
130
|
+
def __init__(self,
|
131
|
+
mixdb: MixtureDatabase,
|
132
|
+
mixids: GeneralizedIDs,
|
133
|
+
batch_size: int,
|
134
|
+
timesteps: int,
|
135
|
+
flatten: bool,
|
136
|
+
add1ch: bool,
|
137
|
+
shuffle: bool = False):
|
138
|
+
"""Initialization
|
139
|
+
"""
|
140
|
+
self.mixdb = mixdb
|
141
|
+
self.mixids = self.mixdb.mixids_to_list(mixids)
|
142
|
+
self.batch_size = batch_size
|
143
|
+
self.timesteps = timesteps
|
144
|
+
self.flatten = flatten
|
145
|
+
self.add1ch = add1ch
|
146
|
+
self.shuffle = shuffle
|
147
|
+
self.stride = self.mixdb.fg.stride
|
148
|
+
self.num_bands = self.mixdb.fg.num_bands
|
149
|
+
self.num_classes = self.mixdb.num_classes
|
150
|
+
self.mixture_frame_segments = None
|
151
|
+
self.batch_frame_segments = None
|
152
|
+
self.total_batches = None
|
153
|
+
|
154
|
+
self._initialize_mixtures()
|
155
|
+
|
156
|
+
def __len__(self) -> int:
|
157
|
+
"""Denotes the number of batches per epoch
|
158
|
+
"""
|
159
|
+
return self.total_batches
|
160
|
+
|
161
|
+
def __getitem__(self, batch_index: int) -> (np.ndarray, np.ndarray):
|
162
|
+
"""Get one batch of data
|
163
|
+
"""
|
164
|
+
from sonusai.utils import reshape_inputs
|
165
|
+
|
166
|
+
batch_params = self.batch_params[batch_index]
|
167
|
+
|
168
|
+
result = [self.mixdb.mixture_ft(mixid) for mixid in batch_params.mixids]
|
169
|
+
feature = np.vstack([result[i][0] for i in range(len(result))])
|
170
|
+
truth = np.vstack([result[i][1] for i in range(len(result))])
|
171
|
+
|
172
|
+
pad_shape = list(feature.shape)
|
173
|
+
pad_shape[0] = batch_params.padding
|
174
|
+
feature = np.vstack([feature, np.zeros(pad_shape)])
|
175
|
+
|
176
|
+
pad_shape = list(truth.shape)
|
177
|
+
pad_shape[0] = batch_params.padding
|
178
|
+
truth = np.vstack([truth, np.zeros(pad_shape)])
|
179
|
+
|
180
|
+
if batch_params.extra > 0:
|
181
|
+
feature = feature[batch_params.offset:-batch_params.extra]
|
182
|
+
truth = truth[batch_params.offset:-batch_params.extra]
|
183
|
+
else:
|
184
|
+
feature = feature[batch_params.offset:]
|
185
|
+
truth = truth[batch_params.offset:]
|
186
|
+
|
187
|
+
feature, truth = reshape_inputs(feature=feature,
|
188
|
+
truth=truth,
|
189
|
+
batch_size=self.batch_size,
|
190
|
+
timesteps=self.timesteps,
|
191
|
+
flatten=self.flatten,
|
192
|
+
add1ch=self.add1ch)
|
193
|
+
|
194
|
+
return feature, truth
|
195
|
+
|
196
|
+
def on_epoch_end(self) -> None:
|
197
|
+
"""Modification of dataset between epochs
|
198
|
+
"""
|
199
|
+
import random
|
200
|
+
|
201
|
+
if self.shuffle:
|
202
|
+
random.shuffle(self.mixids)
|
203
|
+
self._initialize_mixtures()
|
204
|
+
|
205
|
+
def _initialize_mixtures(self) -> None:
|
206
|
+
from sonusai.utils import get_frames_per_batch
|
207
|
+
|
208
|
+
frames_per_batch = get_frames_per_batch(self.batch_size, self.timesteps)
|
209
|
+
# Always extend the number of batches to use all available data
|
210
|
+
# The last batch may need padding
|
211
|
+
self.total_batches = int(np.ceil(self.mixdb.total_feature_frames(self.mixids) / frames_per_batch))
|
212
|
+
|
213
|
+
# Compute mixid, offset, and extra for dataset
|
214
|
+
# offsets and extras are needed because mixtures are not guaranteed to fall on batch boundaries.
|
215
|
+
# When fetching a new index that starts in the middle of a sequence of mixtures, the
|
216
|
+
# previous feature frame offset must be maintained in order to preserve the correct
|
217
|
+
# data sequence. And the extra must be maintained in order to preserve the correct data length.
|
218
|
+
cumulative_frames = 0
|
219
|
+
start_mixture_index = 0
|
220
|
+
offset = 0
|
221
|
+
self.batch_params = []
|
222
|
+
self.file_indices = []
|
223
|
+
total_frames = 0
|
224
|
+
for idx, mixid in enumerate(self.mixids):
|
225
|
+
current_frames = self.mixdb.mixture_samples(mixid) // self.mixdb.feature_step_samples
|
226
|
+
self.file_indices.append(slice(total_frames, total_frames + current_frames))
|
227
|
+
total_frames += current_frames
|
228
|
+
cumulative_frames += current_frames
|
229
|
+
while cumulative_frames >= frames_per_batch:
|
230
|
+
extra = cumulative_frames - frames_per_batch
|
231
|
+
mixids = self.mixids[start_mixture_index:idx + 1]
|
232
|
+
self.batch_params.append(self.BatchParams(mixids=mixids, offset=offset, extra=extra, padding=0))
|
233
|
+
if extra == 0:
|
234
|
+
start_mixture_index = idx + 1
|
235
|
+
offset = 0
|
236
|
+
else:
|
237
|
+
start_mixture_index = idx
|
238
|
+
offset = current_frames - extra
|
239
|
+
cumulative_frames = extra
|
240
|
+
|
241
|
+
# If needed, add final batch with padding
|
242
|
+
needed_frames = self.total_batches * frames_per_batch
|
243
|
+
padding = needed_frames - total_frames
|
244
|
+
if padding != 0:
|
245
|
+
mixids = self.mixids[start_mixture_index:]
|
246
|
+
self.batch_params.append(self.BatchParams(mixids=mixids, offset=offset, extra=0, padding=padding))
|
@@ -1,20 +1,40 @@
|
|
1
|
+
import multiprocessing as mp
|
1
2
|
import warnings
|
3
|
+
from dataclasses import dataclass
|
2
4
|
from typing import List
|
3
5
|
|
4
6
|
import numpy as np
|
5
7
|
|
8
|
+
from sonusai.mixture import Feature
|
6
9
|
from sonusai.mixture import GeneralizedIDs
|
7
10
|
from sonusai.mixture import MixtureDatabase
|
11
|
+
from sonusai.mixture import Truth
|
8
12
|
|
9
13
|
with warnings.catch_warnings():
|
10
14
|
warnings.simplefilter('ignore')
|
11
15
|
from keras.utils import Sequence
|
12
16
|
|
13
17
|
|
18
|
+
@dataclass
|
19
|
+
class MPGlobal:
|
20
|
+
mixdb: MixtureDatabase = None
|
21
|
+
|
22
|
+
|
23
|
+
MP_GLOBAL = MPGlobal()
|
24
|
+
|
25
|
+
|
26
|
+
def _pool_init(mixdb: MixtureDatabase) -> None:
|
27
|
+
MP_GLOBAL.mixdb = mixdb
|
28
|
+
|
29
|
+
|
30
|
+
def _pool_func(mixid: int) -> (Feature, Truth):
|
31
|
+
mixdb = MP_GLOBAL.mixdb
|
32
|
+
return mixdb.mixture_ft(mixid)
|
33
|
+
|
34
|
+
|
14
35
|
class KerasFromMixtureDatabase(Sequence):
|
15
36
|
"""Generates data for Keras from a SonusAI mixture database
|
16
37
|
"""
|
17
|
-
from dataclasses import dataclass
|
18
38
|
|
19
39
|
@dataclass(frozen=True)
|
20
40
|
class BatchParams:
|
@@ -49,6 +69,10 @@ class KerasFromMixtureDatabase(Sequence):
|
|
49
69
|
|
50
70
|
self._initialize_mixtures()
|
51
71
|
|
72
|
+
self.pool = mp.Pool(processes=mp.cpu_count(),
|
73
|
+
initializer=_pool_init,
|
74
|
+
initargs=[mixdb])
|
75
|
+
|
52
76
|
def __len__(self) -> int:
|
53
77
|
"""Denotes the number of batches per epoch
|
54
78
|
"""
|
@@ -61,7 +85,7 @@ class KerasFromMixtureDatabase(Sequence):
|
|
61
85
|
|
62
86
|
batch_params = self.batch_params[batch_index]
|
63
87
|
|
64
|
-
result =
|
88
|
+
result = self.pool.map(_pool_func, batch_params.mixids)
|
65
89
|
feature = np.vstack([result[i][0] for i in range(len(result))])
|
66
90
|
truth = np.vstack([result[i][1] for i in range(len(result))])
|
67
91
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
"""sonusai keras_train
|
2
2
|
|
3
|
-
usage: keras_train [-
|
3
|
+
usage: keras_train [-hgv] (-m MODEL) (-l VLOC) [-w KMODEL] [-e EPOCHS] [-b BATCH] [-t TSTEPS] [-p ESP] TLOC
|
4
4
|
|
5
5
|
options:
|
6
6
|
-h, --help
|
@@ -12,6 +12,7 @@ options:
|
|
12
12
|
-b BATCH, --batch BATCH Batch size.
|
13
13
|
-t TSTEPS, --tsteps TSTEPS Timesteps.
|
14
14
|
-p ESP, --patience ESP Early stopping patience.
|
15
|
+
-g, --loss-batch-log Enable per-batch loss log. [default: False]
|
15
16
|
|
16
17
|
Use Keras to train a model defined by a Python definition file and SonusAI genft data.
|
17
18
|
|
@@ -20,6 +21,7 @@ Inputs:
|
|
20
21
|
VLOC A SonusAI mixture database directory to use for validation data.
|
21
22
|
|
22
23
|
Results are written into subdirectory <MODEL>-<TIMESTAMP>.
|
24
|
+
Per-batch loss history, if enabled, is written to <basename>-history-lossb.npy
|
23
25
|
|
24
26
|
"""
|
25
27
|
import tensorflow as tf
|
@@ -27,6 +29,20 @@ import tensorflow as tf
|
|
27
29
|
from sonusai import logger
|
28
30
|
|
29
31
|
|
32
|
+
class LossBatchHistory(tf.keras.callbacks.Callback):
|
33
|
+
def __init__(self):
|
34
|
+
super().__init__()
|
35
|
+
self.history = None
|
36
|
+
|
37
|
+
def on_train_begin(self, logs=None):
|
38
|
+
self.history = {'loss': []}
|
39
|
+
|
40
|
+
def on_batch_end(self, batch, logs=None):
|
41
|
+
if logs is None:
|
42
|
+
logs = {}
|
43
|
+
self.history['loss'].append(logs.get('loss'))
|
44
|
+
|
45
|
+
|
30
46
|
class SonusAIModelCheckpoint(tf.keras.callbacks.ModelCheckpoint):
|
31
47
|
def __init__(self,
|
32
48
|
filepath,
|
@@ -80,6 +96,7 @@ def main():
|
|
80
96
|
batch_size = args['--batch']
|
81
97
|
timesteps = args['--tsteps']
|
82
98
|
esp = args['--patience']
|
99
|
+
loss_batch_log = args['--loss-batch-log']
|
83
100
|
t_name = args['TLOC']
|
84
101
|
|
85
102
|
import warnings
|
@@ -108,6 +125,7 @@ def main():
|
|
108
125
|
from sonusai.utils import import_keras_model
|
109
126
|
from sonusai.utils import stratified_shuffle_split_mixid
|
110
127
|
from sonusai.utils import reshape_outputs
|
128
|
+
from sonusai.utils import get_frames_per_batch
|
111
129
|
|
112
130
|
model_base = basename(model_name)
|
113
131
|
model_root = splitext(model_base)[0]
|
@@ -143,15 +161,18 @@ def main():
|
|
143
161
|
|
144
162
|
# Check overrides
|
145
163
|
timesteps = check_keras_overrides(model, t_mixdb.feature, t_mixdb.num_classes, timesteps, batch_size)
|
164
|
+
# Calculate batches per epoch, use ceiling as last batch is zero extended
|
165
|
+
frames_per_batch = get_frames_per_batch(batch_size, timesteps)
|
166
|
+
batches_per_epoch = int(np.ceil(t_mixdb.total_feature_frames('*') / frames_per_batch))
|
146
167
|
|
147
|
-
logger.info('Building model')
|
168
|
+
logger.info('Building and compiling model')
|
148
169
|
try:
|
149
170
|
hypermodel = model.MyHyperModel(feature=t_mixdb.feature,
|
150
171
|
num_classes=t_mixdb.num_classes,
|
151
172
|
timesteps=timesteps,
|
152
173
|
batch_size=batch_size)
|
153
174
|
built_model = hypermodel.build_model(kt.HyperParameters())
|
154
|
-
built_model = hypermodel.compile_default(built_model)
|
175
|
+
built_model = hypermodel.compile_default(built_model, batches_per_epoch)
|
155
176
|
except Exception as e:
|
156
177
|
logger.exception(f'Error: build_model() in {model_base} failed: {e}')
|
157
178
|
raise SystemExit(1)
|
@@ -225,6 +246,15 @@ def main():
|
|
225
246
|
feature=hypermodel.feature,
|
226
247
|
num_classes=hypermodel.num_classes)
|
227
248
|
|
249
|
+
csv_logger = tf.keras.callbacks.CSVLogger(base_name + '-history.csv')
|
250
|
+
callbacks = [es, ckpt_callback, csv_logger]
|
251
|
+
# loss_batch_log = True
|
252
|
+
loss_batchlogger = None
|
253
|
+
if loss_batch_log is True:
|
254
|
+
loss_batchlogger = LossBatchHistory()
|
255
|
+
callbacks.append(loss_batchlogger)
|
256
|
+
logger.info(f'Adding per batch loss logging to training')
|
257
|
+
|
228
258
|
if weights_name is not None:
|
229
259
|
logger.info(f'Loading weights from {weights_name}')
|
230
260
|
built_model.load_weights(weights_name)
|
@@ -240,13 +270,17 @@ def main():
|
|
240
270
|
epochs=epochs,
|
241
271
|
validation_data=v_datagen,
|
242
272
|
shuffle=False,
|
243
|
-
callbacks=
|
273
|
+
callbacks=callbacks)
|
244
274
|
|
245
275
|
# Save history into numpy file
|
246
276
|
history_name = base_name + '-history'
|
247
277
|
np.save(history_name, history.history)
|
248
278
|
# Note: Reload with history=np.load(history_name, allow_pickle='TRUE').item()
|
249
279
|
logger.info(f'Saved training history to numpy file {history_name}.npy')
|
280
|
+
if loss_batch_log is True:
|
281
|
+
his_batch_loss_name = base_name + '-history-lossb.npy'
|
282
|
+
np.save(his_batch_loss_name, loss_batchlogger.history)
|
283
|
+
logger.info(f'Saved per-batch loss history to numpy file {his_batch_loss_name}')
|
250
284
|
|
251
285
|
# Find checkpoint file and load weights for prediction and model save
|
252
286
|
checkpoint_name = None
|
@@ -88,6 +88,8 @@ class MixtureDatabaseConfig(DataClassSonusAIMixin):
|
|
88
88
|
class TransformConfig:
|
89
89
|
N: int
|
90
90
|
R: int
|
91
|
+
bin_start: int
|
92
|
+
bin_end: int
|
91
93
|
ttype: str
|
92
94
|
|
93
95
|
|
@@ -112,14 +114,20 @@ class MixtureDatabase:
|
|
112
114
|
|
113
115
|
self.ft_config = TransformConfig(N=self.fg.ftransform_N,
|
114
116
|
R=self.fg.ftransform_R,
|
117
|
+
bin_start=self.fg.bin_start,
|
118
|
+
bin_end=self.fg.bin_end,
|
115
119
|
ttype=self.fg.ftransform_ttype)
|
116
120
|
|
117
121
|
self.eft_config = TransformConfig(N=self.fg.eftransform_N,
|
118
122
|
R=self.fg.eftransform_R,
|
123
|
+
bin_start=self.fg.bin_start,
|
124
|
+
bin_end=self.fg.bin_end,
|
119
125
|
ttype=self.fg.eftransform_ttype)
|
120
126
|
|
121
127
|
self.it_config = TransformConfig(N=self.fg.itransform_N,
|
122
128
|
R=self.fg.itransform_R,
|
129
|
+
bin_start=self.fg.bin_start,
|
130
|
+
bin_end=self.fg.bin_end,
|
123
131
|
ttype=self.fg.itransform_ttype)
|
124
132
|
|
125
133
|
self.show_progress = show_progress
|
@@ -183,6 +191,8 @@ class MixtureDatabase:
|
|
183
191
|
return calculate_transform_from_audio(audio=audio,
|
184
192
|
transform=ForwardTransform(N=self.ft_config.N,
|
185
193
|
R=self.ft_config.R,
|
194
|
+
bin_start=self.ft_config.bin_start,
|
195
|
+
bin_end=self.ft_config.bin_end,
|
186
196
|
ttype=self.ft_config.ttype))
|
187
197
|
|
188
198
|
def inverse_transform(self, transform: AudioF, trim: bool = True) -> AudioT:
|
@@ -201,6 +211,8 @@ class MixtureDatabase:
|
|
201
211
|
return calculate_audio_from_transform(data=transform,
|
202
212
|
transform=InverseTransform(N=self.it_config.N,
|
203
213
|
R=self.it_config.R,
|
214
|
+
bin_start=self.it_config.bin_start,
|
215
|
+
bin_end=self.it_config.bin_end,
|
204
216
|
ttype=self.it_config.ttype),
|
205
217
|
trim=trim)
|
206
218
|
|
@@ -931,7 +943,11 @@ class MixtureDatabase:
|
|
931
943
|
if noise is None:
|
932
944
|
noise = self.mixture_noise(mixid=mixid)
|
933
945
|
|
934
|
-
fft = ForwardTransform(N=self.ft_config.N,
|
946
|
+
fft = ForwardTransform(N=self.ft_config.N,
|
947
|
+
R=self.ft_config.R,
|
948
|
+
bin_start=self.ft_config.bin_start,
|
949
|
+
bin_end=self.ft_config.bin_end,
|
950
|
+
ttype=self.ft_config.ttype)
|
935
951
|
|
936
952
|
segsnr_t = np.empty(self.mixture_samples(mixid), dtype=np.float32)
|
937
953
|
|
@@ -22,11 +22,17 @@ class Data:
|
|
22
22
|
self.zero_based_indices = [x - 1 for x in config.index]
|
23
23
|
self.target_fft = ForwardTransform(N=fg.ftransform_N,
|
24
24
|
R=fg.ftransform_R,
|
25
|
+
bin_start=fg.bin_start,
|
26
|
+
bin_end=fg.bin_end,
|
25
27
|
ttype=fg.ftransform_ttype)
|
26
28
|
self.noise_fft = ForwardTransform(N=fg.ftransform_N,
|
27
29
|
R=fg.ftransform_R,
|
30
|
+
bin_start=fg.bin_start,
|
31
|
+
bin_end=fg.bin_end,
|
28
32
|
ttype=fg.ftransform_ttype)
|
29
33
|
self.swin = InverseTransform(N=fg.itransform_N,
|
30
34
|
R=fg.itransform_R,
|
35
|
+
bin_start=fg.bin_start,
|
36
|
+
bin_end=fg.bin_end,
|
31
37
|
ttype=fg.itransform_ttype).W
|
32
38
|
self.truth = np.zeros((len(target_audio), config.num_classes), dtype=np.float32)
|
@@ -30,6 +30,8 @@ from sonusai import logger
|
|
30
30
|
class MPGlobal:
|
31
31
|
N: int = None
|
32
32
|
R: int = None
|
33
|
+
bin_start: int = None
|
34
|
+
bin_end: int = None
|
33
35
|
ttype: str = None
|
34
36
|
output_dir: str = None
|
35
37
|
|
@@ -86,6 +88,8 @@ def main():
|
|
86
88
|
|
87
89
|
MP_GLOBAL.N = fg.itransform_N
|
88
90
|
MP_GLOBAL.R = fg.itransform_R
|
91
|
+
MP_GLOBAL.bin_start = fg.bin_start
|
92
|
+
MP_GLOBAL.bin_end = fg.bin_end
|
89
93
|
MP_GLOBAL.ttype = fg.itransform_ttype
|
90
94
|
MP_GLOBAL.output_dir = output_dir
|
91
95
|
|
@@ -135,6 +139,8 @@ def _process(file: str) -> None:
|
|
135
139
|
audio = calculate_audio_from_transform(data=predict,
|
136
140
|
transform=InverseTransform(N=MP_GLOBAL.N,
|
137
141
|
R=MP_GLOBAL.R,
|
142
|
+
bin_start=MP_GLOBAL.bin_start,
|
143
|
+
bin_end=MP_GLOBAL.bin_end,
|
138
144
|
ttype=MP_GLOBAL.ttype))
|
139
145
|
write_wav(name=output_name, audio=float_to_int16(audio))
|
140
146
|
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|