sonusai 0.12.5__tar.gz → 0.12.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sonusai-0.12.5 → sonusai-0.12.7}/PKG-INFO +2 -2
- {sonusai-0.12.5 → sonusai-0.12.7}/pyproject.toml +2 -2
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/calc_metric_spenh.py +2 -2
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/data_generator/keras_from_mixdb.py +3 -3
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/evaluate.py +1 -1
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/genft.py +2 -2
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/genmix.py +2 -2
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/genmixdb.py +18 -33
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/gentcst.py +1 -1
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/keras_onnx.py +1 -1
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/keras_predict.py +1 -1
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/keras_train.py +4 -4
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/lsdb.py +1 -1
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/main.py +8 -6
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/mixture/__init__.py +1 -1
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/mixture/audio.py +71 -3
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/mixture/augmentation.py +10 -13
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/mixture/config.py +2 -2
- sonusai-0.12.7/sonusai/mixture/generate_mixtures.py +294 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/mixture/mixdb.py +77 -8
- sonusai-0.12.7/sonusai/mkmanifest.py +174 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/mkwav.py +3 -3
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/onnx_predict.py +7 -5
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/plot.py +1 -1
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/post_spenh_targetf.py +2 -2
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/torchl_predict.py +1 -1
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/torchl_train.py +1 -1
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/tplot.py +1 -1
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/utils/__init__.py +1 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/utils/asr.py +6 -5
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/utils/asr_functions/data.py +4 -3
- sonusai-0.12.7/sonusai/utils/asr_manifest_functions/__init__.py +6 -0
- sonusai-0.12.7/sonusai/utils/asr_manifest_functions/data.py +10 -0
- sonusai-0.12.7/sonusai/utils/asr_manifest_functions/librispeech.py +49 -0
- sonusai-0.12.7/sonusai/utils/asr_manifest_functions/vctk_noisy_speech.py +69 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/utils/braced_glob.py +10 -3
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/utils/parallel_tqdm.py +5 -4
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/vars.py +9 -7
- sonusai-0.12.5/sonusai/mixture/generate_mixtures.py +0 -328
- {sonusai-0.12.5 → sonusai-0.12.7}/README.rst +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/__init__.py +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/aawscd_probwrite.py +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/data/__init__.py +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/data/genmixdb.yml +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/data/speech_ma01_01.wav +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/data/whitenoise.wav +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/data_generator/__init__.py +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/data_generator/dataset_from_mixdb.py +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/data_generator/torch_from_mixdb.py +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/metrics/__init__.py +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/metrics/calc_class_weights.py +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/metrics/calc_optimal_thresholds.py +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/metrics/calc_pcm.py +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/metrics/calc_pesq.py +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/metrics/calc_sa_sdr.py +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/metrics/calc_sample_weights.py +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/metrics/calc_wer.py +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/metrics/calc_wsdr.py +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/metrics/class_summary.py +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/metrics/confusion_matrix_summary.py +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/metrics/one_hot.py +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/metrics/snr_summary.py +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/mixture/active_truth_class_balancing.py +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/mixture/balance.py +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/mixture/class_count.py +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/mixture/constants.py +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/mixture/feature.py +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/mixture/initialize.py +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/mixture/log_duration_and_sizes.py +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/mixture/mapped_snr_f.py +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/mixture/spectral_mask.py +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/mixture/target_class_balancing.py +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/mixture/targets.py +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/mixture/tokenized_shell_vars.py +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/mixture/truth.py +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/mixture/truth_functions/__init__.py +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/mixture/truth_functions/crm.py +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/mixture/truth_functions/data.py +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/mixture/truth_functions/energy.py +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/mixture/truth_functions/file.py +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/mixture/truth_functions/phoneme.py +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/mixture/truth_functions/sed.py +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/mixture/truth_functions/target.py +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/mixture/types.py +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/queries/__init__.py +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/queries/queries.py +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/utils/asl_p56.py +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/utils/asr_functions/__init__.py +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/utils/asr_functions/aixplain_whisper.py +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/utils/asr_functions/deepgram.py +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/utils/asr_functions/google.py +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/utils/asr_functions/whisper.py +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/utils/calculate_input_shape.py +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/utils/create_ts_name.py +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/utils/dataclass_from_dict.py +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/utils/db.py +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/utils/energy_f.py +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/utils/engineering_number.py +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/utils/get_frames_per_batch.py +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/utils/get_label_names.py +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/utils/grouper.py +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/utils/human_readable_size.py +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/utils/keras_utils.py +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/utils/max_text_width.py +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/utils/numeric_conversion.py +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/utils/onnx_utils.py +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/utils/parallel.py +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/utils/print_mixture_details.py +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/utils/ranges.py +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/utils/read_mixture_data.py +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/utils/read_predict_data.py +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/utils/reshape.py +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/utils/seconds_to_hms.py +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/utils/stacked_complex.py +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/utils/stratified_shuffle_split.py +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/utils/trim_docstring.py +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/utils/wave.py +0 -0
- {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/utils/yes_or_no.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: sonusai
|
3
|
-
Version: 0.12.
|
3
|
+
Version: 0.12.7
|
4
4
|
Summary: Framework for building deep neural network models for sound, speech, and voice AI
|
5
5
|
Home-page: https://aaware.com
|
6
6
|
License: GPL-3.0-only
|
@@ -29,7 +29,7 @@ Requires-Dist: paho-mqtt (>=1.6.1,<2.0.0)
|
|
29
29
|
Requires-Dist: pandas (>=2.0.3,<3.0.0)
|
30
30
|
Requires-Dist: pesq (>=0.0.4,<0.0.5)
|
31
31
|
Requires-Dist: protobuf (>=4.24.0,<5.0.0)
|
32
|
-
Requires-Dist: pyaaware (>=1.4.
|
32
|
+
Requires-Dist: pyaaware (>=1.4.19,<2.0.0)
|
33
33
|
Requires-Dist: python-magic (>=0.4.27,<0.5.0)
|
34
34
|
Requires-Dist: scikit-learn (>=1.3.0,<2.0.0)
|
35
35
|
Requires-Dist: sh (>=1.14.3,<2.0.0)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[tool.poetry]
|
2
2
|
name = "sonusai"
|
3
|
-
version = "0.12.
|
3
|
+
version = "0.12.7"
|
4
4
|
description = "Framework for building deep neural network models for sound, speech, and voice AI"
|
5
5
|
authors = ["Chris Eddington <chris@aaware.com>", "Jason Calderwood <jason@aaware.com>"]
|
6
6
|
maintainers = ["Chris Eddington <chris@aaware.com>", "Jason Calderwood <jason@aaware.com>"]
|
@@ -31,7 +31,7 @@ paho-mqtt = "^1.6.1"
|
|
31
31
|
pandas = "^2.0.3"
|
32
32
|
pesq = "^0.0.4"
|
33
33
|
protobuf = "^4.24.0"
|
34
|
-
pyaaware = "^1.4.
|
34
|
+
pyaaware = "^1.4.19"
|
35
35
|
python = ">=3.8,<3.11"
|
36
36
|
python-magic = "^0.4.27"
|
37
37
|
scikit-learn = "^1.3.0"
|
@@ -710,7 +710,7 @@ def _process_mixture(mixid: int) -> Tuple[pd.DataFrame, pd.DataFrame]:
|
|
710
710
|
return all_metrics_table_1, all_metrics_table_2
|
711
711
|
|
712
712
|
|
713
|
-
def main():
|
713
|
+
def main() -> None:
|
714
714
|
from docopt import docopt
|
715
715
|
|
716
716
|
import sonusai
|
@@ -808,7 +808,7 @@ def main():
|
|
808
808
|
# Individual mixtures use pandas print, set precision to 2 decimal places
|
809
809
|
# pd.set_option('float_format', '{:.2f}'.format)
|
810
810
|
progress = tqdm(total=len(mixids))
|
811
|
-
all_metrics_tables = p_tqdm_map(_process_mixture, mixids, progress=progress)
|
811
|
+
all_metrics_tables = p_tqdm_map(_process_mixture, mixids, progress=progress, chunksize=10)
|
812
812
|
progress.close()
|
813
813
|
|
814
814
|
all_metrics_table_1 = pd.concat([item[0] for item in all_metrics_tables])
|
@@ -65,9 +65,9 @@ class KerasFromMixtureDatabase(Sequence):
|
|
65
65
|
self.stride = self.mixdb.fg.stride
|
66
66
|
self.num_bands = self.mixdb.fg.num_bands
|
67
67
|
self.num_classes = self.mixdb.num_classes
|
68
|
-
self.mixture_frame_segments = None
|
69
|
-
self.batch_frame_segments = None
|
70
|
-
self.total_batches: Optional[int]
|
68
|
+
self.mixture_frame_segments: Optional[int] = None
|
69
|
+
self.batch_frame_segments: Optional[int] = None
|
70
|
+
self.total_batches: Optional[int] = None
|
71
71
|
|
72
72
|
self._initialize_mixtures()
|
73
73
|
|
@@ -72,7 +72,7 @@ def genft(mixdb: MixtureDatabase,
|
|
72
72
|
results.append(_genft_kernel(mixid))
|
73
73
|
else:
|
74
74
|
progress = tqdm(total=len(mixids), disable=not show_progress)
|
75
|
-
results = p_tqdm_map(_genft_kernel, mixids, progress=progress)
|
75
|
+
results = p_tqdm_map(_genft_kernel, mixids, progress=progress, chunksize=10)
|
76
76
|
progress.close()
|
77
77
|
|
78
78
|
return results
|
@@ -100,7 +100,7 @@ def _genft_kernel(mixid: int) -> GenFTData:
|
|
100
100
|
return GenFTData(feature=feature, truth_f=truth_f, segsnr=segsnr)
|
101
101
|
|
102
102
|
|
103
|
-
def main():
|
103
|
+
def main() -> None:
|
104
104
|
from docopt import docopt
|
105
105
|
|
106
106
|
import sonusai
|
@@ -79,7 +79,7 @@ def genmix(mixdb: MixtureDatabase,
|
|
79
79
|
results.append(_genmix_kernel(mixid))
|
80
80
|
else:
|
81
81
|
progress = tqdm(total=len(mixids), disable=not show_progress)
|
82
|
-
results = p_tqdm_map(_genmix_kernel, mixids, progress=progress)
|
82
|
+
results = p_tqdm_map(_genmix_kernel, mixids, progress=progress, chunksize=10)
|
83
83
|
progress.close()
|
84
84
|
|
85
85
|
return results
|
@@ -128,7 +128,7 @@ def _genmix_kernel(mixid: int) -> GenMixData:
|
|
128
128
|
segsnr_t=segsnr_t)
|
129
129
|
|
130
130
|
|
131
|
-
def main():
|
131
|
+
def main() -> None:
|
132
132
|
from docopt import docopt
|
133
133
|
|
134
134
|
import sonusai
|
@@ -141,7 +141,6 @@ def genmixdb(location: Location,
|
|
141
141
|
show_progress: bool = False,
|
142
142
|
test_mode: bool = False,
|
143
143
|
use_cache: bool = True) -> MixtureDatabase:
|
144
|
-
import itertools
|
145
144
|
from random import seed
|
146
145
|
|
147
146
|
import yaml
|
@@ -155,7 +154,6 @@ def genmixdb(location: Location,
|
|
155
154
|
from sonusai.mixture import SAMPLE_RATE
|
156
155
|
from sonusai.mixture import TruthSettings
|
157
156
|
from sonusai.mixture import balance_targets
|
158
|
-
from sonusai.mixture import estimate_augmented_length_from_audio
|
159
157
|
from sonusai.mixture import generate_mixtures
|
160
158
|
from sonusai.mixture import get_augmentation_indices_for_mixup
|
161
159
|
from sonusai.mixture import get_augmentations
|
@@ -286,19 +284,11 @@ def genmixdb(location: Location,
|
|
286
284
|
|
287
285
|
augmented_targets = balance_targets(mixdb, augmented_targets)
|
288
286
|
|
289
|
-
augmented_noise_iter = list(itertools.product(*[range(len(mixdb.noises)), range(len(mixdb.noise_augmentations))]))
|
290
|
-
noise_audio_samples = sum([mixdb.augmented_noise_length(nfi, nai) for nfi, nai in augmented_noise_iter])
|
291
|
-
|
292
|
-
if logging:
|
293
|
-
logger.info('Generating mixtures first pass')
|
294
|
-
used_noise_files, used_noise_samples = generate_mixtures(mixdb=mixdb,
|
295
|
-
augmented_targets=augmented_targets,
|
296
|
-
noise_files=noise_files,
|
297
|
-
noise_augmentations=noise_augmentations,
|
298
|
-
mixups=mixups,
|
299
|
-
show_progress=show_progress)
|
300
|
-
|
301
287
|
total_noise_files = len(mixdb.noises) * len(mixdb.noise_augmentations)
|
288
|
+
aug_noise_audio_samples = mixdb.augmented_noise_samples
|
289
|
+
|
290
|
+
total_target_files = len(augmented_targets)
|
291
|
+
aug_target_audio_samples = mixdb.augmented_target_samples
|
302
292
|
|
303
293
|
if logging:
|
304
294
|
raw_target_audio_samples = sum([targets.samples for targets in mixdb.targets])
|
@@ -312,24 +302,19 @@ def genmixdb(location: Location,
|
|
312
302
|
f'{human_readable_size(raw_noise_audio_duration * SAMPLE_RATE * SAMPLE_BYTES, 1)}, '
|
313
303
|
f'{seconds_to_hms(seconds=raw_noise_audio_duration)}')
|
314
304
|
|
315
|
-
augmented_noise_audio_samples = float(sum([mixdb.augmented_noise_length(f, a) for f, a in
|
316
|
-
zip(range(len(mixdb.noises)),
|
317
|
-
range(len(mixdb.noise_augmentations)))]))
|
318
|
-
augmented_target_audio_samples = 0
|
319
|
-
for augmented_target in augmented_targets:
|
320
|
-
augmented_target_audio_samples += estimate_augmented_length_from_audio(
|
321
|
-
audio=mixdb.raw_target_audio(augmented_target.target_file_index),
|
322
|
-
augmentation=mixdb.target_augmentations[augmented_target.target_augmentation_index],
|
323
|
-
length_common_denominator=mixdb.feature_step_samples)
|
324
|
-
|
325
305
|
logger.info('')
|
326
|
-
logger.info(f'Augmented target audio: {
|
327
|
-
f'{human_readable_size(
|
328
|
-
f'{seconds_to_hms(seconds=
|
306
|
+
logger.info(f'Augmented target audio: {total_target_files} files, '
|
307
|
+
f'{human_readable_size(aug_target_audio_samples * SAMPLE_BYTES, 1)}, '
|
308
|
+
f'{seconds_to_hms(seconds=aug_target_audio_samples / SAMPLE_RATE)}')
|
329
309
|
logger.info(f'Augmented noise audio: {total_noise_files} files, '
|
330
|
-
f'{human_readable_size(
|
331
|
-
f'{seconds_to_hms(seconds=
|
310
|
+
f'{human_readable_size(aug_noise_audio_samples * SAMPLE_BYTES, 1)}, '
|
311
|
+
f'{seconds_to_hms(seconds=aug_noise_audio_samples / SAMPLE_RATE)}')
|
332
312
|
|
313
|
+
used_noise_files, used_noise_samples = generate_mixtures(mixdb=mixdb,
|
314
|
+
augmented_targets=augmented_targets,
|
315
|
+
noise_files=noise_files,
|
316
|
+
noise_augmentations=noise_augmentations,
|
317
|
+
mixups=mixups)
|
333
318
|
total_mixtures = len(mixdb.mixtures)
|
334
319
|
if logging:
|
335
320
|
logger.info('')
|
@@ -358,9 +343,9 @@ def genmixdb(location: Location,
|
|
358
343
|
MP_GLOBAL.save_segsnr = save_segsnr
|
359
344
|
|
360
345
|
if logging:
|
361
|
-
logger.info('Generating mixtures
|
346
|
+
logger.info('Generating mixtures')
|
362
347
|
progress = tqdm(total=total_mixtures, disable=not show_progress)
|
363
|
-
mixdb.mixtures = p_tqdm_map(_process_mixture, range(total_mixtures), progress=progress)
|
348
|
+
mixdb.mixtures = p_tqdm_map(_process_mixture, range(total_mixtures), progress=progress, chunksize=10)
|
364
349
|
progress.close()
|
365
350
|
|
366
351
|
total_samples = mixdb.total_samples()
|
@@ -374,7 +359,7 @@ def genmixdb(location: Location,
|
|
374
359
|
stride=mixdb.fg.stride,
|
375
360
|
desc='Actual')
|
376
361
|
noise_files_percent = (float(used_noise_files) / float(total_noise_files)) * 100
|
377
|
-
noise_samples_percent = (float(used_noise_samples) / float(
|
362
|
+
noise_samples_percent = (float(used_noise_samples) / float(aug_noise_audio_samples)) * 100
|
378
363
|
logger.info('')
|
379
364
|
logger.info(f'Used {noise_files_percent:,.0f}% of augmented noise files')
|
380
365
|
logger.info(f'Used {noise_samples_percent:,.0f}% of augmented noise audio')
|
@@ -431,7 +416,7 @@ def _process_mixture(mixid: int) -> MRecord:
|
|
431
416
|
return mrecord
|
432
417
|
|
433
418
|
|
434
|
-
def main():
|
419
|
+
def main() -> None:
|
435
420
|
from docopt import docopt
|
436
421
|
|
437
422
|
import sonusai
|
@@ -80,7 +80,7 @@ class SonusAIModelCheckpoint(tf.keras.callbacks.ModelCheckpoint):
|
|
80
80
|
f.attrs['sonusai_num_classes'] = str(self.num_classes)
|
81
81
|
|
82
82
|
|
83
|
-
def main():
|
83
|
+
def main() -> None:
|
84
84
|
from docopt import docopt
|
85
85
|
|
86
86
|
import sonusai
|
@@ -285,9 +285,9 @@ def main():
|
|
285
285
|
# Find checkpoint file and load weights for prediction and model save
|
286
286
|
checkpoint_name = None
|
287
287
|
for path, dirs, files in walk(output_dir):
|
288
|
-
for
|
289
|
-
if "ckpt" in
|
290
|
-
checkpoint_name =
|
288
|
+
for file in files:
|
289
|
+
if "ckpt" in file:
|
290
|
+
checkpoint_name = file
|
291
291
|
|
292
292
|
if checkpoint_name is not None:
|
293
293
|
logger.info('Using best checkpoint for prediction and model exports')
|
@@ -4,7 +4,6 @@ usage: sonusai [--version] [--help] <command> [<args>...]
|
|
4
4
|
|
5
5
|
The sonusai commands are:
|
6
6
|
calc_metric_spenh Run speech enhancement and analysis
|
7
|
-
calc_metric_spenh_targetf Run speech enhancement and analysis for targetf truth (deprecated)
|
8
7
|
evaluate Evaluate model performance
|
9
8
|
genft Generate feature and truth data
|
10
9
|
genmix Generate mixture and truth data
|
@@ -14,6 +13,7 @@ The sonusai commands are:
|
|
14
13
|
keras_train Train a model using Keras
|
15
14
|
keras_onnx Convert a trained Keras model to ONNX
|
16
15
|
lsdb List information about a mixture database
|
16
|
+
mkmanifest Make ASR manifest JSON file
|
17
17
|
mkwav Make WAV files from a mixture database
|
18
18
|
onnx_predict Run ONNX predict on a trained model
|
19
19
|
plot Plot mixture data
|
@@ -30,18 +30,14 @@ for more information on a specific command.
|
|
30
30
|
from sonusai import logger
|
31
31
|
|
32
32
|
|
33
|
-
def main():
|
34
|
-
from subprocess import call
|
35
|
-
|
33
|
+
def main() -> None:
|
36
34
|
from docopt import docopt
|
37
35
|
|
38
36
|
import sonusai
|
39
|
-
from sonusai import SonusAIError
|
40
37
|
from sonusai.utils import trim_docstring
|
41
38
|
|
42
39
|
commands = (
|
43
40
|
'calc_metric_spenh',
|
44
|
-
'calc_metric_spenh_targetf',
|
45
41
|
'evaluate',
|
46
42
|
'genft',
|
47
43
|
'genmix',
|
@@ -51,6 +47,7 @@ def main():
|
|
51
47
|
'keras_train',
|
52
48
|
'keras_onnx',
|
53
49
|
'lsdb',
|
50
|
+
'mkmanifest',
|
54
51
|
'mkwav',
|
55
52
|
'onnx_predict',
|
56
53
|
'plot',
|
@@ -66,6 +63,11 @@ def main():
|
|
66
63
|
command = args['<command>']
|
67
64
|
argv = args['<args>']
|
68
65
|
|
66
|
+
from subprocess import call
|
67
|
+
|
68
|
+
import sonusai
|
69
|
+
from sonusai import SonusAIError
|
70
|
+
|
69
71
|
if command == 'help':
|
70
72
|
if not argv:
|
71
73
|
exit(call(['sonusai', '-h']))
|
@@ -1,5 +1,5 @@
|
|
1
1
|
# SonusAI mixture utilities
|
2
|
-
from sonusai.mixture.audio import
|
2
|
+
from sonusai.mixture.audio import Transformer
|
3
3
|
from sonusai.mixture.audio import calculate_audio_from_transform
|
4
4
|
from sonusai.mixture.audio import calculate_transform_from_audio
|
5
5
|
from sonusai.mixture.audio import get_duration
|
@@ -97,7 +97,6 @@ def read_audio(name: Location) -> AudioT:
|
|
97
97
|
from typing import Any
|
98
98
|
|
99
99
|
import numpy as np
|
100
|
-
import sox
|
101
100
|
|
102
101
|
from sonusai import SonusAIError
|
103
102
|
from sonusai.mixture import BIT_DEPTH
|
@@ -196,10 +195,16 @@ def get_duration(audio: AudioT) -> float:
|
|
196
195
|
return len(audio) / SAMPLE_RATE
|
197
196
|
|
198
197
|
|
199
|
-
class
|
198
|
+
class Transformer(sox.Transformer):
|
199
|
+
"""Override certain sox.Transformer methods
|
200
|
+
"""
|
201
|
+
|
200
202
|
def fir(self, coefficients):
|
201
203
|
"""Use SoX’s FFT convolution engine with given FIR filter coefficients.
|
202
204
|
|
205
|
+
The SonusAI override allows coefficients to be either a list of numbers
|
206
|
+
or a string containing a text file with the coefficients.
|
207
|
+
|
203
208
|
Parameters
|
204
209
|
----------
|
205
210
|
coefficients : list or str
|
@@ -225,6 +230,69 @@ class AawareSoxTransformer(sox.Transformer):
|
|
225
230
|
|
226
231
|
return self
|
227
232
|
|
233
|
+
def tempo(self, factor, audio_type=None, quick=False):
|
234
|
+
"""Time stretch audio without changing pitch.
|
235
|
+
|
236
|
+
This effect uses the WSOLA algorithm. The audio is chopped up into
|
237
|
+
segments which are then shifted in the time domain and overlapped
|
238
|
+
(cross-faded) at points where their waveforms are most similar as
|
239
|
+
determined by measurement of least squares.
|
240
|
+
|
241
|
+
The SonusAI override does not generate a warning for small factors.
|
242
|
+
The sox.Transformer's implementation of stretch does not invert
|
243
|
+
the factor even though it says that it does; this invalidates the
|
244
|
+
factor size check and produces the wrong result.
|
245
|
+
|
246
|
+
Parameters
|
247
|
+
----------
|
248
|
+
factor : float
|
249
|
+
The ratio of new tempo to the old tempo.
|
250
|
+
For ex. 1.1 speeds up the tempo by 10%; 0.9 slows it down by 10%.
|
251
|
+
audio_type : str
|
252
|
+
Type of audio, which optimizes algorithm parameters. One of:
|
253
|
+
* m : Music,
|
254
|
+
* s : Speech,
|
255
|
+
* l : Linear (useful when factor is close to 1),
|
256
|
+
quick : bool, default=False
|
257
|
+
If True, this effect will run faster but with lower sound quality.
|
258
|
+
|
259
|
+
See Also
|
260
|
+
--------
|
261
|
+
stretch, speed, pitch
|
262
|
+
|
263
|
+
"""
|
264
|
+
from sox.core import is_number
|
265
|
+
|
266
|
+
from sonusai import SonusAIError
|
267
|
+
from sonusai import logger
|
268
|
+
|
269
|
+
if not is_number(factor) or factor <= 0:
|
270
|
+
raise SonusAIError('factor must be a positive number')
|
271
|
+
|
272
|
+
if factor < 0.5 or factor > 2:
|
273
|
+
logger.warning('Using an extreme time stretching factor. Quality of results will be poor')
|
274
|
+
|
275
|
+
if audio_type not in [None, 'm', 's', 'l']:
|
276
|
+
raise SonusAIError("audio_type must be one of None, 'm', 's', or 'l'.")
|
277
|
+
|
278
|
+
if not isinstance(quick, bool):
|
279
|
+
raise SonusAIError('quick must be a boolean')
|
280
|
+
|
281
|
+
effect_args = ['tempo']
|
282
|
+
|
283
|
+
if quick:
|
284
|
+
effect_args.append('-q')
|
285
|
+
|
286
|
+
if audio_type is not None:
|
287
|
+
effect_args.append('-{}'.format(audio_type))
|
288
|
+
|
289
|
+
effect_args.append('{:f}'.format(factor))
|
290
|
+
|
291
|
+
self.effects.extend(effect_args)
|
292
|
+
self.effects_log.append('tempo')
|
293
|
+
|
294
|
+
return self
|
295
|
+
|
228
296
|
|
229
297
|
def validate_input_file(input_filepath: str) -> None:
|
230
298
|
from os.path import exists
|
@@ -239,4 +307,4 @@ def validate_input_file(input_filepath: str) -> None:
|
|
239
307
|
|
240
308
|
ext = splitext(input_filepath)[1][1:].lower()
|
241
309
|
if ext not in VALID_FORMATS:
|
242
|
-
raise SonusAIError(f'This installation of
|
310
|
+
raise SonusAIError(f'This installation of SoX cannot process .{ext} files')
|
@@ -173,17 +173,17 @@ def apply_augmentation(audio: AudioT, augmentation: Augmentation, length_common_
|
|
173
173
|
:param length_common_denominator: Pad resulting audio to be a multiple of this
|
174
174
|
:return: Augmented audio
|
175
175
|
"""
|
176
|
-
import sox
|
177
176
|
|
178
177
|
from sonusai import SonusAIError
|
179
178
|
from sonusai.mixture import BIT_DEPTH
|
180
179
|
from sonusai.mixture import CHANNEL_COUNT
|
181
180
|
from sonusai.mixture import ENCODING
|
182
181
|
from sonusai.mixture import SAMPLE_RATE
|
182
|
+
from sonusai.mixture import Transformer
|
183
183
|
|
184
184
|
try:
|
185
185
|
# Apply augmentations
|
186
|
-
tfm =
|
186
|
+
tfm = Transformer()
|
187
187
|
tfm.set_input_format(rate=SAMPLE_RATE, bits=BIT_DEPTH, channels=CHANNEL_COUNT, encoding=ENCODING)
|
188
188
|
tfm.set_output_format(rate=SAMPLE_RATE, bits=BIT_DEPTH, channels=CHANNEL_COUNT, encoding=ENCODING)
|
189
189
|
|
@@ -201,11 +201,7 @@ def apply_augmentation(audio: AudioT, augmentation: Augmentation, length_common_
|
|
201
201
|
tfm.pitch(n_semitones=float(augmentation.pitch) / 100)
|
202
202
|
|
203
203
|
if augmentation.tempo is not None:
|
204
|
-
factor
|
205
|
-
if abs(factor - 1.0) <= 0.1:
|
206
|
-
tfm.stretch(factor=factor)
|
207
|
-
else:
|
208
|
-
tfm.tempo(factor=factor, audio_type='s')
|
204
|
+
tfm.tempo(factor=float(augmentation.tempo), audio_type='s')
|
209
205
|
|
210
206
|
if augmentation.eq1 is not None:
|
211
207
|
tfm.equalizer(frequency=augmentation.eq1[0], width_q=augmentation.eq1[1],
|
@@ -240,11 +236,10 @@ def apply_ir(audio: AudioT, ir: ImpulseResponseData) -> AudioT:
|
|
240
236
|
:return: Augmented audio
|
241
237
|
"""
|
242
238
|
import numpy as np
|
243
|
-
import sox
|
244
239
|
|
245
240
|
from sonusai import SonusAIError
|
246
|
-
from sonusai.mixture import AawareSoxTransformer
|
247
241
|
from sonusai.mixture import SAMPLE_RATE
|
242
|
+
from sonusai.mixture import Transformer
|
248
243
|
from sonusai.utils import linear_to_db
|
249
244
|
|
250
245
|
max_abs_audio = max(abs(audio))
|
@@ -257,7 +252,7 @@ def apply_ir(audio: AudioT, ir: ImpulseResponseData) -> AudioT:
|
|
257
252
|
max_db = linear_to_db(max_abs_audio)
|
258
253
|
|
259
254
|
# Convert audio to IR sample rate and normalize to -20 dBFS to avoid clipping when applying IR
|
260
|
-
tfm =
|
255
|
+
tfm = Transformer()
|
261
256
|
tfm.set_output_format(rate=ir.sample_rate)
|
262
257
|
tfm.norm(db_level=-20)
|
263
258
|
audio_out = tfm.build_array(input_array=audio, sample_rate_in=SAMPLE_RATE)
|
@@ -267,7 +262,7 @@ def apply_ir(audio: AudioT, ir: ImpulseResponseData) -> AudioT:
|
|
267
262
|
audio_out = np.pad(array=audio_out, pad_width=(pad, pad))
|
268
263
|
|
269
264
|
# Apply IR and convert back to global sample rate
|
270
|
-
tfm =
|
265
|
+
tfm = Transformer()
|
271
266
|
tfm.set_output_format(rate=SAMPLE_RATE)
|
272
267
|
tfm.fir(coefficients=ir.coefficients_file)
|
273
268
|
try:
|
@@ -276,7 +271,7 @@ def apply_ir(audio: AudioT, ir: ImpulseResponseData) -> AudioT:
|
|
276
271
|
raise SonusAIError(f'Error applying IR: {e}')
|
277
272
|
|
278
273
|
# Reset level to previous max value
|
279
|
-
tfm =
|
274
|
+
tfm = Transformer()
|
280
275
|
tfm.norm(db_level=max_db)
|
281
276
|
audio_out = tfm.build_array(input_array=audio_out, sample_rate_in=SAMPLE_RATE)
|
282
277
|
|
@@ -296,8 +291,10 @@ def estimate_augmented_length_from_length(length: int,
|
|
296
291
|
:param length_common_denominator: Pad resulting audio to be a multiple of this
|
297
292
|
:return: Estimated length of augmented audio
|
298
293
|
"""
|
294
|
+
import numpy as np
|
295
|
+
|
299
296
|
if augmentation.tempo is not None:
|
300
|
-
length = int(length
|
297
|
+
length = int(np.round(length / float(augmentation.tempo)))
|
301
298
|
|
302
299
|
length += get_pad_length(length, length_common_denominator)
|
303
300
|
|
@@ -250,7 +250,7 @@ def get_target_files(config: dict, show_progress: bool = False) -> TargetFiles:
|
|
250
250
|
for target in config['targets']]))
|
251
251
|
|
252
252
|
progress = tqdm(total=len(target_files), disable=not show_progress)
|
253
|
-
target_files = p_tqdm_map(_get_samples, target_files, progress=progress)
|
253
|
+
target_files = p_tqdm_map(_get_samples, target_files, progress=progress, chunksize=10)
|
254
254
|
progress.close()
|
255
255
|
|
256
256
|
max_class = get_max_class(config['num_classes'], config['truth_mode'] == 'mutex')
|
@@ -394,7 +394,7 @@ def get_noise_files(config: dict, show_progress: bool = False) -> NoiseFiles:
|
|
394
394
|
noise_files = list(chain.from_iterable([_append_noise_files(noise_file=noise) for noise in config['noises']]))
|
395
395
|
|
396
396
|
progress = tqdm(total=len(noise_files), disable=not show_progress)
|
397
|
-
noise_files = p_tqdm_map(_get_samples, noise_files, progress=progress)
|
397
|
+
noise_files = p_tqdm_map(_get_samples, noise_files, progress=progress, chunksize=10)
|
398
398
|
progress.close()
|
399
399
|
|
400
400
|
return dataclass_from_dict(NoiseFiles, noise_files)
|