sonusai 0.15.9__tar.gz → 0.16.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sonusai-0.15.9 → sonusai-0.16.1}/PKG-INFO +7 -25
- {sonusai-0.15.9 → sonusai-0.16.1}/README.rst +5 -5
- {sonusai-0.15.9 → sonusai-0.16.1}/pyproject.toml +5 -25
- sonusai-0.16.1/sonusai/__init__.py +87 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/audiofe.py +111 -106
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/calc_metric_spenh.py +38 -22
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/genft.py +15 -6
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/genmix.py +14 -6
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/genmixdb.py +15 -7
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/gentcst.py +13 -6
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/lsdb.py +15 -5
- sonusai-0.16.1/sonusai/main.py +90 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/mixture/__init__.py +1 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/mixture/config.py +1 -2
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/mkmanifest.py +43 -8
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/mkwav.py +15 -6
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/onnx_predict.py +16 -6
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/plot.py +16 -6
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/post_spenh_targetf.py +13 -6
- sonusai-0.16.1/sonusai/summarize_metric_spenh.py +71 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/tplot.py +14 -6
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/utils/__init__.py +4 -7
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/utils/asl_p56.py +3 -3
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/utils/asr.py +35 -8
- sonusai-0.16.1/sonusai/utils/asr_functions/__init__.py +1 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/utils/asr_functions/aaware_whisper.py +2 -2
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/utils/asr_manifest_functions/__init__.py +1 -0
- sonusai-0.16.1/sonusai/utils/asr_manifest_functions/mcgill_speech.py +29 -0
- sonusai-0.15.9/sonusai/utils/trim_docstring.py → sonusai-0.16.1/sonusai/utils/docstring.py +20 -0
- sonusai-0.16.1/sonusai/utils/model_utils.py +30 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/utils/onnx_utils.py +19 -45
- sonusai-0.15.9/sonusai/__init__.py +0 -55
- sonusai-0.15.9/sonusai/data_generator/__init__.py +0 -5
- sonusai-0.15.9/sonusai/data_generator/dataset_from_mixdb.py +0 -143
- sonusai-0.15.9/sonusai/data_generator/keras_from_mixdb.py +0 -169
- sonusai-0.15.9/sonusai/data_generator/torch_from_mixdb.py +0 -122
- sonusai-0.15.9/sonusai/keras_onnx.py +0 -86
- sonusai-0.15.9/sonusai/keras_predict.py +0 -231
- sonusai-0.15.9/sonusai/keras_train.py +0 -334
- sonusai-0.15.9/sonusai/main.py +0 -93
- sonusai-0.15.9/sonusai/torchl_onnx.py +0 -216
- sonusai-0.15.9/sonusai/torchl_predict.py +0 -542
- sonusai-0.15.9/sonusai/torchl_train.py +0 -223
- sonusai-0.15.9/sonusai/utils/asr_functions/__init__.py +0 -6
- sonusai-0.15.9/sonusai/utils/asr_functions/aixplain_whisper.py +0 -59
- sonusai-0.15.9/sonusai/utils/asr_functions/data.py +0 -16
- sonusai-0.15.9/sonusai/utils/asr_functions/deepgram.py +0 -97
- sonusai-0.15.9/sonusai/utils/asr_functions/fastwhisper.py +0 -90
- sonusai-0.15.9/sonusai/utils/asr_functions/google.py +0 -95
- sonusai-0.15.9/sonusai/utils/asr_functions/whisper.py +0 -49
- sonusai-0.15.9/sonusai/utils/keras_utils.py +0 -226
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/aawscd_probwrite.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/data/__init__.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/data/genmixdb.yml +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/data/speech_ma01_01.wav +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/data/whitenoise.wav +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/doc/__init__.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/doc/doc.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/doc.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/metrics/__init__.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/metrics/calc_class_weights.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/metrics/calc_optimal_thresholds.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/metrics/calc_pcm.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/metrics/calc_pesq.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/metrics/calc_sa_sdr.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/metrics/calc_sample_weights.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/metrics/calc_wer.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/metrics/calc_wsdr.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/metrics/class_summary.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/metrics/confusion_matrix_summary.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/metrics/one_hot.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/metrics/snr_summary.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/mixture/audio.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/mixture/augmentation.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/mixture/class_count.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/mixture/constants.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/mixture/datatypes.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/mixture/eq_rule_is_valid.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/mixture/feature.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/mixture/generation.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/mixture/helpers.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/mixture/log_duration_and_sizes.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/mixture/mapped_snr_f.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/mixture/mixdb.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/mixture/soundfile_audio.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/mixture/sox_audio.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/mixture/sox_augmentation.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/mixture/spectral_mask.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/mixture/target_class_balancing.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/mixture/targets.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/mixture/tokenized_shell_vars.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/mixture/torchaudio_audio.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/mixture/torchaudio_augmentation.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/mixture/truth.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/mixture/truth_functions/__init__.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/mixture/truth_functions/crm.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/mixture/truth_functions/data.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/mixture/truth_functions/energy.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/mixture/truth_functions/file.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/mixture/truth_functions/phoneme.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/mixture/truth_functions/sed.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/mixture/truth_functions/target.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/queries/__init__.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/queries/queries.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/utils/asr_manifest_functions/data.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/utils/asr_manifest_functions/librispeech.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/utils/asr_manifest_functions/vctk_noisy_speech.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/utils/audio_devices.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/utils/braced_glob.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/utils/calculate_input_shape.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/utils/convert_string_to_number.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/utils/create_timestamp.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/utils/create_ts_name.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/utils/dataclass_from_dict.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/utils/db.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/utils/energy_f.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/utils/engineering_number.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/utils/get_frames_per_batch.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/utils/get_label_names.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/utils/grouper.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/utils/human_readable_size.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/utils/max_text_width.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/utils/numeric_conversion.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/utils/parallel.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/utils/print_mixture_details.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/utils/ranges.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/utils/read_mixture_data.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/utils/read_predict_data.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/utils/reshape.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/utils/seconds_to_hms.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/utils/stacked_complex.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/utils/stratified_shuffle_split.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/utils/wave.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/utils/yes_or_no.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/vars.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: sonusai
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.16.1
|
4
4
|
Summary: Framework for building deep neural network models for sound, speech, and voice AI
|
5
5
|
Home-page: https://aaware.com
|
6
6
|
License: GPL-3.0-only
|
@@ -15,57 +15,39 @@ Classifier: Programming Language :: Python :: 3.9
|
|
15
15
|
Classifier: Programming Language :: Python :: 3.10
|
16
16
|
Classifier: Programming Language :: Python :: 3.11
|
17
17
|
Requires-Dist: PyYAML (>=6.0.1,<7.0.0)
|
18
|
-
Requires-Dist: aixplain (>=0.2.6,<0.3.0)
|
19
|
-
Requires-Dist: bitarray (>=2.9.2,<3.0.0)
|
20
|
-
Requires-Dist: ctranslate2 (==4.1.0)
|
21
18
|
Requires-Dist: dataclasses-json (>=0.6.1,<0.7.0)
|
22
|
-
Requires-Dist: deepgram-sdk (>=3.0.0,<4.0.0)
|
23
19
|
Requires-Dist: docopt (>=0.6.2,<0.7.0)
|
24
|
-
Requires-Dist: einops (>=0.7.0,<0.8.0)
|
25
|
-
Requires-Dist: faster-whisper (>=1.0.1,<2.0.0)
|
26
|
-
Requires-Dist: geomloss (>=0.2.6,<0.3.0)
|
27
20
|
Requires-Dist: h5py (>=3.11.0,<4.0.0)
|
28
|
-
Requires-Dist: hydra-core (>=1.3.2,<2.0.0)
|
29
21
|
Requires-Dist: jiwer (>=3.0.3,<4.0.0)
|
30
|
-
Requires-Dist: keras (>=3.1.1,<4.0.0)
|
31
|
-
Requires-Dist: keras-tuner (>=1.4.7,<2.0.0)
|
32
22
|
Requires-Dist: librosa (>=0.10.1,<0.11.0)
|
33
|
-
Requires-Dist: lightning (>=2.2,<2.3)
|
34
23
|
Requires-Dist: matplotlib (>=3.8.0,<4.0.0)
|
35
|
-
Requires-Dist: omegaconf (>=2.3.0,<3.0.0)
|
36
24
|
Requires-Dist: onnx (>=1.14.1,<2.0.0)
|
37
25
|
Requires-Dist: onnxruntime (>=1.16.1,<2.0.0)
|
38
26
|
Requires-Dist: paho-mqtt (>=2.0.0,<3.0.0)
|
39
27
|
Requires-Dist: pandas (>=2.1.1,<3.0.0)
|
40
28
|
Requires-Dist: pesq (>=0.0.4,<0.0.5)
|
41
|
-
Requires-Dist: pyaaware (>=1.5.
|
29
|
+
Requires-Dist: pyaaware (>=1.5.7,<2.0.0)
|
42
30
|
Requires-Dist: pyaudio (>=0.2.14,<0.3.0)
|
43
31
|
Requires-Dist: pydub (>=0.25.1,<0.26.0)
|
44
32
|
Requires-Dist: pystoi (>=0.4.0,<0.5.0)
|
45
|
-
Requires-Dist: python-magic (>=0.4.27,<0.5.0)
|
46
33
|
Requires-Dist: requests (>=2.31.0,<3.0.0)
|
47
|
-
Requires-Dist: sacrebleu (>=2.4.2,<3.0.0)
|
48
34
|
Requires-Dist: samplerate (>=0.2.1,<0.3.0)
|
49
35
|
Requires-Dist: soundfile (>=0.12.1,<0.13.0)
|
50
36
|
Requires-Dist: sox (>=1.4.1,<2.0.0)
|
51
|
-
Requires-Dist: speechrecognition (>=3.10.1,<4.0.0)
|
52
|
-
Requires-Dist: tensorflow (>=2.15.0,<3.0.0)
|
53
|
-
Requires-Dist: tf2onnx (>=1.15.1,<2.0.0)
|
54
37
|
Requires-Dist: torch (>=2.2,<2.3)
|
55
38
|
Requires-Dist: torchaudio (>=2.2,<2.3)
|
56
|
-
Requires-Dist: torchinfo (>=1.8.0,<2.0.0)
|
57
39
|
Requires-Dist: tqdm (>=4.66.1,<5.0.0)
|
58
40
|
Description-Content-Type: text/x-rst
|
59
41
|
|
60
|
-
|
42
|
+
SonusAI: Framework for simplified creation of deep NN models for sound, speech, and voice AI
|
61
43
|
|
62
|
-
|
44
|
+
SonusAI includes functions for pre-processing training and validation data and
|
63
45
|
creating performance metrics reports for key types of Keras models:
|
64
46
|
- recurrent, convolutional, or a combination (i.e. RCNNs)
|
65
47
|
- binary, multiclass single-label, multiclass multi-label, and regression
|
66
48
|
- training with data augmentations: noise mixing, pitch and time stretch, etc.
|
67
49
|
|
68
|
-
|
69
|
-
- Aaware Inc. sonusai
|
70
|
-
- Keras model scripts: User python scripts for
|
50
|
+
SonusAI python functions are used by:
|
51
|
+
- Aaware Inc. sonusai framework: Easily create train/validation data, run prediction, evaluate model performance
|
52
|
+
- Keras model scripts: User python scripts for Keras model creation, training, and prediction. These can use sonusai-specific data but also some general useful utilities for training rnn-based models like CRNN's, DSCRNN's, etc. in Keras.
|
71
53
|
|
@@ -1,11 +1,11 @@
|
|
1
|
-
|
1
|
+
SonusAI: Framework for simplified creation of deep NN models for sound, speech, and voice AI
|
2
2
|
|
3
|
-
|
3
|
+
SonusAI includes functions for pre-processing training and validation data and
|
4
4
|
creating performance metrics reports for key types of Keras models:
|
5
5
|
- recurrent, convolutional, or a combination (i.e. RCNNs)
|
6
6
|
- binary, multiclass single-label, multiclass multi-label, and regression
|
7
7
|
- training with data augmentations: noise mixing, pitch and time stretch, etc.
|
8
8
|
|
9
|
-
|
10
|
-
- Aaware Inc. sonusai
|
11
|
-
- Keras model scripts: User python scripts for
|
9
|
+
SonusAI python functions are used by:
|
10
|
+
- Aaware Inc. sonusai framework: Easily create train/validation data, run prediction, evaluate model performance
|
11
|
+
- Keras model scripts: User python scripts for Keras model creation, training, and prediction. These can use sonusai-specific data but also some general useful utilities for training rnn-based models like CRNN's, DSCRNN's, etc. in Keras.
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[tool.poetry]
|
2
2
|
name = "sonusai"
|
3
|
-
version = "0.
|
3
|
+
version = "0.16.1"
|
4
4
|
description = "Framework for building deep neural network models for sound, speech, and voice AI"
|
5
5
|
authors = ["Chris Eddington <chris@aaware.com>", "Jason Calderwood <jason@aaware.com>"]
|
6
6
|
maintainers = ["Chris Eddington <chris@aaware.com>", "Jason Calderwood <jason@aaware.com>"]
|
@@ -15,60 +15,40 @@ aawscd_probwrite = 'sonusai.aawscd_probwrite:main'
|
|
15
15
|
|
16
16
|
[tool.poetry.dependencies]
|
17
17
|
PyYAML = "^6.0.1"
|
18
|
-
aixplain = "^0.2.6"
|
19
|
-
bitarray = "^2.9.2"
|
20
|
-
ctranslate2 = "4.1.0"
|
21
18
|
dataclasses-json = "^0.6.1"
|
22
|
-
deepgram-sdk = "^3.0.0"
|
23
19
|
docopt = "^0.6.2"
|
24
|
-
einops = "^0.7.0"
|
25
|
-
faster-whisper = "^1.0.1"
|
26
|
-
geomloss = "^0.2.6"
|
27
20
|
h5py = "^3.11.0"
|
28
|
-
hydra-core = "^1.3.2"
|
29
21
|
jiwer = "^3.0.3"
|
30
|
-
keras = "^3.1.1"
|
31
|
-
keras-tuner = "^1.4.7"
|
32
22
|
librosa = "^0.10.1"
|
33
|
-
lightning = "~2.2"
|
34
23
|
matplotlib = "^3.8.0"
|
35
|
-
omegaconf = "^2.3.0"
|
36
24
|
onnx = "^1.14.1"
|
37
|
-
#onnxruntime-gpu = "^1.16.1"
|
38
25
|
onnxruntime = "^1.16.1"
|
39
|
-
#openai-whisper = "^20231117"
|
40
26
|
paho-mqtt = "^2.0.0"
|
41
27
|
pandas = "^2.1.1"
|
42
28
|
pesq = "^0.0.4"
|
43
|
-
pyaaware = "^1.5.
|
29
|
+
pyaaware = "^1.5.7"
|
44
30
|
pyaudio = "^0.2.14"
|
45
31
|
pydub = "^0.25.1"
|
46
32
|
pystoi = "^0.4.0"
|
47
33
|
python = ">=3.9,<3.12"
|
48
|
-
python-magic = "^0.4.27"
|
49
34
|
requests = "^2.31.0"
|
50
|
-
sacrebleu = "^2.4.2"
|
51
35
|
samplerate = "^0.2.1"
|
52
36
|
soundfile = "^0.12.1"
|
53
|
-
speechrecognition = "^3.10.1"
|
54
37
|
sox = "^1.4.1"
|
55
|
-
tensorflow = "^2.15.0"
|
56
|
-
tf2onnx = "^1.15.1"
|
57
38
|
torch = "~2.2"
|
58
39
|
torchaudio = "~2.2"
|
59
|
-
torchinfo = "^1.8.0"
|
60
40
|
tqdm = "^4.66.1"
|
61
41
|
|
62
42
|
[tool.poetry.group.dev.dependencies]
|
43
|
+
einops = "^0.8.0"
|
63
44
|
icecream = "^2.1.3"
|
64
|
-
ipython = "^8.16.1"
|
65
|
-
jupyter = "^1.0.0"
|
66
45
|
mypy = "^1.6.0"
|
67
46
|
mypy-extensions = "^1.0.0"
|
68
47
|
pytest = "^8.1.1"
|
48
|
+
sonusai-asr-cloud = "^0.1.0"
|
49
|
+
sonusai-torchl = "^0.1.0"
|
69
50
|
types-pyyaml = "^6.0.12.12"
|
70
51
|
types-requests = "^2.31.0.8"
|
71
|
-
yappi = "^1.4.0"
|
72
52
|
|
73
53
|
[tool.mypy]
|
74
54
|
ignore_missing_imports = true
|
@@ -0,0 +1,87 @@
|
|
1
|
+
import logging
|
2
|
+
from importlib import metadata
|
3
|
+
from os.path import dirname
|
4
|
+
|
5
|
+
__version__ = metadata.version(__package__)
|
6
|
+
BASEDIR = dirname(__file__)
|
7
|
+
|
8
|
+
commands_doc = """
|
9
|
+
audiofe Audio front end
|
10
|
+
calc_metric_spenh Run speech enhancement and analysis
|
11
|
+
doc Documentation
|
12
|
+
genft Generate feature and truth data
|
13
|
+
genmix Generate mixture and truth data
|
14
|
+
genmixdb Generate a mixture database
|
15
|
+
gentcst Generate target configuration from a subdirectory tree
|
16
|
+
lsdb List information about a mixture database
|
17
|
+
mkmanifest Make ASR manifest JSON file
|
18
|
+
mkwav Make WAV files from a mixture database
|
19
|
+
onnx_predict Run ONNX predict on a trained model
|
20
|
+
plot Plot mixture data
|
21
|
+
post_spenh_targetf Run post-processing for speech enhancement targetf data
|
22
|
+
summarize_metric_spenh Summarize speech enhancement and analysis results
|
23
|
+
tplot Plot truth data
|
24
|
+
vars List custom SonusAI variables
|
25
|
+
"""
|
26
|
+
|
27
|
+
# create logger
|
28
|
+
logger = logging.getLogger('sonusai')
|
29
|
+
logger.setLevel(logging.DEBUG)
|
30
|
+
formatter = logging.Formatter('%(message)s')
|
31
|
+
console_handler = logging.StreamHandler()
|
32
|
+
console_handler.setLevel(logging.DEBUG)
|
33
|
+
console_handler.setFormatter(formatter)
|
34
|
+
logger.addHandler(console_handler)
|
35
|
+
|
36
|
+
|
37
|
+
class SonusAIError(Exception):
|
38
|
+
def __init__(self, value):
|
39
|
+
logger.error(value)
|
40
|
+
|
41
|
+
|
42
|
+
# create file handler
|
43
|
+
def create_file_handler(filename: str) -> None:
|
44
|
+
fh = logging.FileHandler(filename=filename, mode='w')
|
45
|
+
fh.setLevel(logging.DEBUG)
|
46
|
+
fh.setFormatter(formatter)
|
47
|
+
logger.addHandler(fh)
|
48
|
+
|
49
|
+
|
50
|
+
# update console handler
|
51
|
+
def update_console_handler(verbose: bool) -> None:
|
52
|
+
if not verbose:
|
53
|
+
logger.removeHandler(console_handler)
|
54
|
+
console_handler.setLevel(logging.INFO)
|
55
|
+
logger.addHandler(console_handler)
|
56
|
+
|
57
|
+
|
58
|
+
# write initial log message
|
59
|
+
def initial_log_messages(name: str, subprocess: str = None) -> None:
|
60
|
+
from datetime import datetime
|
61
|
+
from getpass import getuser
|
62
|
+
from os import getcwd
|
63
|
+
from socket import gethostname
|
64
|
+
from sys import argv
|
65
|
+
|
66
|
+
if subprocess is None:
|
67
|
+
logger.info(f'SonusAI {__version__}')
|
68
|
+
else:
|
69
|
+
logger.info(f'SonusAI {subprocess}')
|
70
|
+
logger.info(f'{name}')
|
71
|
+
logger.info('')
|
72
|
+
logger.debug(f'Host: {gethostname()}')
|
73
|
+
logger.debug(f'User: {getuser()}')
|
74
|
+
logger.debug(f'Directory: {getcwd()}')
|
75
|
+
logger.debug(f'Date: {datetime.now()}')
|
76
|
+
logger.debug(f'Command: {" ".join(argv)}')
|
77
|
+
logger.debug('')
|
78
|
+
|
79
|
+
|
80
|
+
def commands_list(doc: str = commands_doc) -> list[str]:
|
81
|
+
lines = doc.split('\n')
|
82
|
+
commands = []
|
83
|
+
for line in lines:
|
84
|
+
command = line.strip().split(' ').pop(0)
|
85
|
+
if command:
|
86
|
+
commands.append(command)
|
87
|
+
return commands
|
@@ -24,6 +24,10 @@ audiofe_capture_<TIMESTAMP>.wav.
|
|
24
24
|
If a model is specified, run prediction on audio data from this model. Then compute the inverse transform of the
|
25
25
|
prediction result and save to audiofe_predict_<TIMESTAMP>.wav.
|
26
26
|
|
27
|
+
Also, if a model is specified, save plots of the capture data (time-domain signal and feature) to
|
28
|
+
audiofe_capture_<TIMESTAMP>.png and predict data (time-domain signal and feature) to
|
29
|
+
audiofe_predict_<TIMESTAMP>.png.
|
30
|
+
|
27
31
|
If an ASR is specified, run ASR on the captured audio and print the results. In addition, if a model was also specified,
|
28
32
|
run ASR on the predict audio and print the results.
|
29
33
|
|
@@ -31,41 +35,32 @@ If the debug option is enabled, write capture audio, feature, reconstruct audio,
|
|
31
35
|
audiofe_<TIMESTAMP>.h5.
|
32
36
|
|
33
37
|
"""
|
34
|
-
|
35
|
-
from select import select
|
36
|
-
from sys import stdin
|
37
|
-
from typing import Any
|
38
|
+
import signal
|
38
39
|
|
39
|
-
import h5py
|
40
40
|
import numpy as np
|
41
|
-
|
42
|
-
import torch
|
43
|
-
from docopt import docopt
|
44
|
-
from docopt import printable_usage
|
45
|
-
|
46
|
-
import sonusai
|
47
|
-
from sonusai import create_file_handler
|
48
|
-
from sonusai import initial_log_messages
|
49
|
-
from sonusai import logger
|
50
|
-
from sonusai import update_console_handler
|
41
|
+
|
51
42
|
from sonusai.mixture import AudioT
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
from sonusai
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
43
|
+
|
44
|
+
|
45
|
+
def signal_handler(_sig, _frame):
|
46
|
+
import sys
|
47
|
+
|
48
|
+
from sonusai import logger
|
49
|
+
|
50
|
+
logger.info('Canceled due to keyboard interrupt')
|
51
|
+
sys.exit(1)
|
52
|
+
|
53
|
+
|
54
|
+
signal.signal(signal.SIGINT, signal_handler)
|
64
55
|
|
65
56
|
|
66
57
|
def main() -> None:
|
58
|
+
from docopt import docopt
|
59
|
+
|
60
|
+
import sonusai
|
61
|
+
from sonusai.utils import trim_docstring
|
62
|
+
|
67
63
|
args = docopt(trim_docstring(__doc__), version=sonusai.__version__, options_first=True)
|
68
|
-
ts = create_timestamp()
|
69
64
|
|
70
65
|
verbose = args['--verbose']
|
71
66
|
length = float(args['--length'])
|
@@ -77,8 +72,34 @@ def main() -> None:
|
|
77
72
|
debug = args['--debug']
|
78
73
|
show = args['--show']
|
79
74
|
|
80
|
-
|
81
|
-
|
75
|
+
from os.path import exists
|
76
|
+
|
77
|
+
import h5py
|
78
|
+
import pyaudio
|
79
|
+
import torch
|
80
|
+
from docopt import printable_usage
|
81
|
+
from sonusai_torchl.utils import load_torchl_ckpt_model
|
82
|
+
|
83
|
+
from sonusai import create_file_handler
|
84
|
+
from sonusai import initial_log_messages
|
85
|
+
from sonusai import logger
|
86
|
+
from sonusai import update_console_handler
|
87
|
+
from sonusai.mixture import SAMPLE_RATE
|
88
|
+
from sonusai.mixture import get_audio_from_feature
|
89
|
+
from sonusai.mixture import get_feature_from_audio
|
90
|
+
from sonusai.utils import calc_asr
|
91
|
+
from sonusai.utils import create_timestamp
|
92
|
+
from sonusai.utils import get_input_devices
|
93
|
+
from sonusai.utils import trim_docstring
|
94
|
+
from sonusai.utils import write_wav
|
95
|
+
|
96
|
+
ts = create_timestamp()
|
97
|
+
capture_name = f'audiofe_capture_{ts}'
|
98
|
+
capture_wav = capture_name + '.wav'
|
99
|
+
capture_png = capture_name + '.png'
|
100
|
+
predict_name = f'audiofe_predict_{ts}'
|
101
|
+
predict_wav = predict_name + '.wav'
|
102
|
+
predict_png = predict_name + '.png'
|
82
103
|
h5_name = f'audiofe_{ts}.h5'
|
83
104
|
|
84
105
|
if model_name is not None and ckpt_name is None:
|
@@ -109,9 +130,9 @@ def main() -> None:
|
|
109
130
|
logger.exception(e)
|
110
131
|
return
|
111
132
|
|
112
|
-
write_wav(
|
133
|
+
write_wav(capture_wav, capture_audio, SAMPLE_RATE)
|
113
134
|
logger.info('')
|
114
|
-
logger.info(f'Wrote capture audio with shape {capture_audio.shape} to {
|
135
|
+
logger.info(f'Wrote capture audio with shape {capture_audio.shape} to {capture_wav}')
|
115
136
|
if debug:
|
116
137
|
with h5py.File(h5_name, 'a') as f:
|
117
138
|
if 'capture_audio' in f:
|
@@ -124,9 +145,13 @@ def main() -> None:
|
|
124
145
|
logger.info(f'Capture audio ASR: {capture_asr}')
|
125
146
|
|
126
147
|
if model_name is not None:
|
127
|
-
model =
|
148
|
+
model = load_torchl_ckpt_model(model_name=model_name, ckpt_name=ckpt_name)
|
149
|
+
model.eval()
|
128
150
|
|
129
151
|
feature = get_feature_from_audio(audio=capture_audio, feature_mode=model.hparams.feature)
|
152
|
+
save_figure(capture_png, capture_audio, feature)
|
153
|
+
logger.info(f'Wrote capture plots to {capture_png}')
|
154
|
+
|
130
155
|
if debug:
|
131
156
|
with h5py.File(h5_name, 'a') as f:
|
132
157
|
if 'feature' in f:
|
@@ -134,22 +159,9 @@ def main() -> None:
|
|
134
159
|
f.create_dataset('feature', data=feature)
|
135
160
|
logger.info(f'Wrote feature with shape {feature.shape} to {h5_name}')
|
136
161
|
|
137
|
-
# if debug:
|
138
|
-
# reconstruct_name = f'audiofe_reconstruct_{ts}.wav'
|
139
|
-
# reconstruct_audio = get_audio_from_feature(feature=feature, feature_mode=model.hparams.feature)
|
140
|
-
# samples = min(len(capture_audio), len(reconstruct_audio))
|
141
|
-
# max_err = np.max(np.abs(capture_audio[:samples] - reconstruct_audio[:samples]))
|
142
|
-
# logger.info(f'Maximum error between capture and reconstruct: {max_err}')
|
143
|
-
# write_wav(reconstruct_name, reconstruct_audio, SAMPLE_RATE)
|
144
|
-
# logger.info(f'Wrote reconstruct audio with shape {reconstruct_audio.shape} to {reconstruct_name}')
|
145
|
-
# with h5py.File(h5_name, 'a') as f:
|
146
|
-
# if 'reconstruct_audio' in f:
|
147
|
-
# del f['reconstruct_audio']
|
148
|
-
# f.create_dataset('reconstruct_audio', data=reconstruct_audio)
|
149
|
-
# logger.info(f'Wrote reconstruct audio with shape {reconstruct_audio.shape} to {h5_name}')
|
150
|
-
|
151
162
|
with torch.no_grad():
|
152
|
-
|
163
|
+
# model wants batch x timesteps x feature_parameters
|
164
|
+
predict = model(torch.tensor(feature).permute((1, 0, 2))).permute(1, 0, 2).numpy()
|
153
165
|
if debug:
|
154
166
|
with h5py.File(h5_name, 'a') as f:
|
155
167
|
if 'predict' in f:
|
@@ -157,9 +169,9 @@ def main() -> None:
|
|
157
169
|
f.create_dataset('predict', data=predict)
|
158
170
|
logger.info(f'Wrote predict with shape {predict.shape} to {h5_name}')
|
159
171
|
|
160
|
-
predict_audio = get_audio_from_feature(feature=predict
|
161
|
-
write_wav(
|
162
|
-
logger.info(f'Wrote predict audio with shape {predict_audio.shape} to {
|
172
|
+
predict_audio = get_audio_from_feature(feature=predict, feature_mode=model.hparams.feature)
|
173
|
+
write_wav(predict_wav, predict_audio, SAMPLE_RATE)
|
174
|
+
logger.info(f'Wrote predict audio with shape {predict_audio.shape} to {predict_wav}')
|
163
175
|
if debug:
|
164
176
|
with h5py.File(h5_name, 'a') as f:
|
165
177
|
if 'predict_audio' in f:
|
@@ -167,69 +179,26 @@ def main() -> None:
|
|
167
179
|
f.create_dataset('predict_audio', data=predict_audio)
|
168
180
|
logger.info(f'Wrote predict audio with shape {predict_audio.shape} to {h5_name}')
|
169
181
|
|
182
|
+
save_figure(predict_png, predict_audio, predict)
|
183
|
+
logger.info(f'Wrote predict plots to {predict_png}')
|
184
|
+
|
170
185
|
if asr_name is not None:
|
171
186
|
predict_asr = calc_asr(predict_audio, engine=asr_name, whisper_model_name=whisper_name).text
|
172
187
|
logger.info(f'Predict audio ASR: {predict_asr}')
|
173
188
|
|
174
189
|
|
175
|
-
def
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
# Load checkpoint first to get hparams if available
|
180
|
-
try:
|
181
|
-
checkpoint = torch.load(ckpt_name, map_location=lambda storage, loc: storage)
|
182
|
-
except Exception as e:
|
183
|
-
logger.exception(f'Error: could not load checkpoint from {ckpt_name}: {e}')
|
184
|
-
raise SystemExit(1)
|
185
|
-
|
186
|
-
# Import model definition file
|
187
|
-
logger.info(f'Importing {model_name}')
|
188
|
-
litemodule = import_keras_model(model_name)
|
189
|
-
|
190
|
-
if 'hyper_parameters' in checkpoint:
|
191
|
-
logger.info(f'Found checkpoint file with hyper-parameters')
|
192
|
-
hparams = checkpoint['hyper_parameters']
|
193
|
-
if hparams['batch_size'] != batch_size:
|
194
|
-
logger.info(
|
195
|
-
f'Overriding model default batch_size of {hparams["batch_size"]} with batch_size of {batch_size}')
|
196
|
-
hparams["batch_size"] = batch_size
|
197
|
-
|
198
|
-
if hparams['timesteps'] != 0 and timesteps == 0:
|
199
|
-
timesteps = hparams['timesteps']
|
200
|
-
logger.warning(f'Using model default timesteps of {timesteps}')
|
201
|
-
|
202
|
-
logger.info(f'Building model with {len(hparams)} total hparams')
|
203
|
-
try:
|
204
|
-
model = litemodule.MyHyperModel(**hparams)
|
205
|
-
except Exception as e:
|
206
|
-
logger.exception(f'Error: model build (MyHyperModel) in {model_name} failed: {e}')
|
207
|
-
raise SystemExit(1)
|
208
|
-
else:
|
209
|
-
logger.info(f'Found checkpoint file with no hyper-parameters')
|
210
|
-
logger.info(f'Building model with defaults')
|
211
|
-
try:
|
212
|
-
tmp = litemodule.MyHyperModel()
|
213
|
-
except Exception as e:
|
214
|
-
logger.exception(f'Error: model build (MyHyperModel) in {model_name} failed: {e}')
|
215
|
-
raise SystemExit(1)
|
216
|
-
|
217
|
-
if tmp.batch_size != batch_size:
|
218
|
-
logger.info(f'Overriding model default batch_size of {tmp.batch_size} with batch_size of {batch_size}')
|
219
|
-
|
220
|
-
if tmp.timesteps != 0 and timesteps == 0:
|
221
|
-
timesteps = tmp.timesteps
|
222
|
-
logger.warning(f'Using model default timesteps of {timesteps}')
|
223
|
-
|
224
|
-
model = litemodule.MyHyperModel(timesteps=timesteps, batch_size=batch_size)
|
190
|
+
def get_frames_from_device(input_name: str | None, length: float, chunk: int = 1024) -> AudioT:
|
191
|
+
from select import select
|
192
|
+
from sys import stdin
|
225
193
|
|
226
|
-
|
227
|
-
model.load_state_dict(checkpoint["state_dict"])
|
228
|
-
model.eval()
|
229
|
-
return model
|
194
|
+
import pyaudio
|
230
195
|
|
196
|
+
from sonusai import logger
|
197
|
+
from sonusai.mixture import CHANNEL_COUNT
|
198
|
+
from sonusai.mixture import SAMPLE_RATE
|
199
|
+
from sonusai.utils import get_input_device_index_by_name
|
200
|
+
from sonusai.utils import get_input_devices
|
231
201
|
|
232
|
-
def get_frames_from_device(input_name: str | None, length: float, chunk: int = 1024) -> AudioT:
|
233
202
|
p = pyaudio.PyAudio()
|
234
203
|
|
235
204
|
input_devices = get_input_devices(p)
|
@@ -280,6 +249,10 @@ def get_frames_from_device(input_name: str | None, length: float, chunk: int = 1
|
|
280
249
|
|
281
250
|
|
282
251
|
def get_frames_from_file(input_name: str, length: float) -> AudioT:
|
252
|
+
from sonusai import logger
|
253
|
+
from sonusai.mixture import SAMPLE_RATE
|
254
|
+
from sonusai.mixture import read_audio
|
255
|
+
|
283
256
|
logger.info(f'Capturing from {input_name}')
|
284
257
|
frames = read_audio(input_name)
|
285
258
|
if length != -1:
|
@@ -289,5 +262,37 @@ def get_frames_from_file(input_name: str, length: float) -> AudioT:
|
|
289
262
|
return frames
|
290
263
|
|
291
264
|
|
265
|
+
def save_figure(name: str, audio: np.ndarray, feature: np.ndarray) -> None:
|
266
|
+
import matplotlib.pyplot as plt
|
267
|
+
from scipy.interpolate import CubicSpline
|
268
|
+
|
269
|
+
from sonusai.mixture import SAMPLE_RATE
|
270
|
+
from sonusai.utils import unstack_complex
|
271
|
+
|
272
|
+
spectrum = 20 * np.log(np.abs(np.squeeze(unstack_complex(feature)).transpose()))
|
273
|
+
frames = spectrum.shape[1]
|
274
|
+
samples = (len(audio) // frames) * frames
|
275
|
+
length_in_s = samples / SAMPLE_RATE
|
276
|
+
interp = samples // frames
|
277
|
+
|
278
|
+
ts = np.arange(0.0, length_in_s, interp / SAMPLE_RATE)
|
279
|
+
t = np.arange(0.0, length_in_s, 1 / SAMPLE_RATE)
|
280
|
+
|
281
|
+
spectrum = CubicSpline(ts, spectrum, axis=-1)(t)
|
282
|
+
|
283
|
+
fig, (ax1, ax2) = plt.subplots(nrows=2)
|
284
|
+
ax1.set_title(name)
|
285
|
+
ax1.plot(t, audio[:samples])
|
286
|
+
ax1.set_ylabel('Signal')
|
287
|
+
ax1.set_xlim(0, length_in_s)
|
288
|
+
ax1.set_ylim(-1, 1)
|
289
|
+
|
290
|
+
ax2.imshow(spectrum, origin='lower', aspect='auto')
|
291
|
+
ax2.set_xticks([])
|
292
|
+
ax2.set_ylabel('Feature')
|
293
|
+
|
294
|
+
plt.savefig(name, dpi=300)
|
295
|
+
|
296
|
+
|
292
297
|
if __name__ == '__main__':
|
293
298
|
main()
|