sonusai 0.15.8__tar.gz → 0.16.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sonusai-0.15.8 → sonusai-0.16.0}/PKG-INFO +8 -19
- {sonusai-0.15.8 → sonusai-0.16.0}/README.rst +5 -5
- {sonusai-0.15.8 → sonusai-0.16.0}/pyproject.toml +3 -19
- sonusai-0.16.0/sonusai/__init__.py +86 -0
- sonusai-0.16.0/sonusai/audiofe.py +237 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/calc_metric_spenh.py +21 -12
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/genft.py +2 -1
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/genmixdb.py +5 -5
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/lsdb.py +2 -2
- sonusai-0.16.0/sonusai/main.py +90 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/mixture/__init__.py +4 -2
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/mixture/audio.py +0 -34
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/mixture/config.py +1 -2
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/mixture/datatypes.py +1 -1
- sonusai-0.16.0/sonusai/mixture/feature.py +105 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/mixture/helpers.py +60 -30
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/mixture/log_duration_and_sizes.py +2 -2
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/mixture/mixdb.py +13 -10
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/mixture/spectral_mask.py +14 -14
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/mixture/truth_functions/data.py +1 -1
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/mixture/truth_functions/target.py +2 -2
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/mkmanifest.py +29 -2
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/onnx_predict.py +1 -1
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/plot.py +4 -4
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/post_spenh_targetf.py +8 -8
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/utils/__init__.py +8 -7
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/utils/asl_p56.py +3 -3
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/utils/asr.py +35 -8
- sonusai-0.16.0/sonusai/utils/asr_functions/__init__.py +1 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/utils/asr_functions/aaware_whisper.py +2 -2
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/utils/asr_manifest_functions/__init__.py +1 -0
- sonusai-0.16.0/sonusai/utils/asr_manifest_functions/mcgill_speech.py +29 -0
- sonusai-0.16.0/sonusai/utils/audio_devices.py +41 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/utils/calculate_input_shape.py +3 -4
- sonusai-0.16.0/sonusai/utils/create_timestamp.py +5 -0
- sonusai-0.15.8/sonusai/utils/trim_docstring.py → sonusai-0.16.0/sonusai/utils/docstring.py +20 -0
- sonusai-0.16.0/sonusai/utils/model_utils.py +30 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/utils/onnx_utils.py +19 -45
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/utils/reshape.py +11 -11
- sonusai-0.16.0/sonusai/utils/wave.py +26 -0
- sonusai-0.15.8/sonusai/__init__.py +0 -55
- sonusai-0.15.8/sonusai/data_generator/__init__.py +0 -5
- sonusai-0.15.8/sonusai/data_generator/dataset_from_mixdb.py +0 -143
- sonusai-0.15.8/sonusai/data_generator/keras_from_mixdb.py +0 -169
- sonusai-0.15.8/sonusai/data_generator/torch_from_mixdb.py +0 -122
- sonusai-0.15.8/sonusai/evaluate.py +0 -245
- sonusai-0.15.8/sonusai/keras_onnx.py +0 -86
- sonusai-0.15.8/sonusai/keras_predict.py +0 -231
- sonusai-0.15.8/sonusai/keras_train.py +0 -334
- sonusai-0.15.8/sonusai/main.py +0 -93
- sonusai-0.15.8/sonusai/mixture/feature.py +0 -51
- sonusai-0.15.8/sonusai/torchl_onnx.py +0 -216
- sonusai-0.15.8/sonusai/torchl_predict.py +0 -547
- sonusai-0.15.8/sonusai/torchl_train.py +0 -223
- sonusai-0.15.8/sonusai/utils/asr_functions/__init__.py +0 -6
- sonusai-0.15.8/sonusai/utils/asr_functions/aixplain_whisper.py +0 -59
- sonusai-0.15.8/sonusai/utils/asr_functions/data.py +0 -16
- sonusai-0.15.8/sonusai/utils/asr_functions/deepgram.py +0 -97
- sonusai-0.15.8/sonusai/utils/asr_functions/fastwhisper.py +0 -90
- sonusai-0.15.8/sonusai/utils/asr_functions/google.py +0 -95
- sonusai-0.15.8/sonusai/utils/asr_functions/whisper.py +0 -49
- sonusai-0.15.8/sonusai/utils/keras_utils.py +0 -226
- sonusai-0.15.8/sonusai/utils/wave.py +0 -19
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/aawscd_probwrite.py +0 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/data/__init__.py +0 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/data/genmixdb.yml +0 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/data/speech_ma01_01.wav +0 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/data/whitenoise.wav +0 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/doc/__init__.py +0 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/doc/doc.py +0 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/doc.py +0 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/genmix.py +0 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/gentcst.py +0 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/metrics/__init__.py +0 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/metrics/calc_class_weights.py +0 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/metrics/calc_optimal_thresholds.py +0 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/metrics/calc_pcm.py +0 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/metrics/calc_pesq.py +0 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/metrics/calc_sa_sdr.py +0 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/metrics/calc_sample_weights.py +0 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/metrics/calc_wer.py +0 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/metrics/calc_wsdr.py +0 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/metrics/class_summary.py +0 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/metrics/confusion_matrix_summary.py +0 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/metrics/one_hot.py +0 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/metrics/snr_summary.py +0 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/mixture/augmentation.py +0 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/mixture/class_count.py +0 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/mixture/constants.py +0 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/mixture/eq_rule_is_valid.py +0 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/mixture/generation.py +0 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/mixture/mapped_snr_f.py +0 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/mixture/soundfile_audio.py +0 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/mixture/sox_audio.py +0 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/mixture/sox_augmentation.py +0 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/mixture/target_class_balancing.py +0 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/mixture/targets.py +0 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/mixture/tokenized_shell_vars.py +0 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/mixture/torchaudio_audio.py +0 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/mixture/torchaudio_augmentation.py +0 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/mixture/truth.py +0 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/mixture/truth_functions/__init__.py +0 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/mixture/truth_functions/crm.py +0 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/mixture/truth_functions/energy.py +0 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/mixture/truth_functions/file.py +0 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/mixture/truth_functions/phoneme.py +0 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/mixture/truth_functions/sed.py +0 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/mkwav.py +0 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/queries/__init__.py +0 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/queries/queries.py +0 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/tplot.py +0 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/utils/asr_manifest_functions/data.py +0 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/utils/asr_manifest_functions/librispeech.py +0 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/utils/asr_manifest_functions/vctk_noisy_speech.py +0 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/utils/braced_glob.py +0 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/utils/convert_string_to_number.py +0 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/utils/create_ts_name.py +0 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/utils/dataclass_from_dict.py +0 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/utils/db.py +0 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/utils/energy_f.py +0 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/utils/engineering_number.py +0 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/utils/get_frames_per_batch.py +0 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/utils/get_label_names.py +0 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/utils/grouper.py +0 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/utils/human_readable_size.py +0 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/utils/max_text_width.py +0 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/utils/numeric_conversion.py +0 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/utils/parallel.py +0 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/utils/print_mixture_details.py +0 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/utils/ranges.py +0 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/utils/read_mixture_data.py +0 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/utils/read_predict_data.py +0 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/utils/seconds_to_hms.py +0 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/utils/stacked_complex.py +0 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/utils/stratified_shuffle_split.py +0 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/utils/yes_or_no.py +0 -0
- {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/vars.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: sonusai
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.16.0
|
4
4
|
Summary: Framework for building deep neural network models for sound, speech, and voice AI
|
5
5
|
Home-page: https://aaware.com
|
6
6
|
License: GPL-3.0-only
|
@@ -15,50 +15,39 @@ Classifier: Programming Language :: Python :: 3.9
|
|
15
15
|
Classifier: Programming Language :: Python :: 3.10
|
16
16
|
Classifier: Programming Language :: Python :: 3.11
|
17
17
|
Requires-Dist: PyYAML (>=6.0.1,<7.0.0)
|
18
|
-
Requires-Dist: aixplain (>=0.2.6,<0.3.0)
|
19
|
-
Requires-Dist: ctranslate2 (==4.1.0)
|
20
18
|
Requires-Dist: dataclasses-json (>=0.6.1,<0.7.0)
|
21
|
-
Requires-Dist: deepgram-sdk (>=3.0.0,<4.0.0)
|
22
19
|
Requires-Dist: docopt (>=0.6.2,<0.7.0)
|
23
|
-
Requires-Dist: faster-whisper (>=1.0.1,<2.0.0)
|
24
20
|
Requires-Dist: h5py (>=3.11.0,<4.0.0)
|
25
21
|
Requires-Dist: jiwer (>=3.0.3,<4.0.0)
|
26
|
-
Requires-Dist: keras (>=3.1.1,<4.0.0)
|
27
|
-
Requires-Dist: keras-tuner (>=1.4.7,<2.0.0)
|
28
22
|
Requires-Dist: librosa (>=0.10.1,<0.11.0)
|
29
|
-
Requires-Dist: lightning (>=2.2,<2.3)
|
30
23
|
Requires-Dist: matplotlib (>=3.8.0,<4.0.0)
|
31
24
|
Requires-Dist: onnx (>=1.14.1,<2.0.0)
|
32
25
|
Requires-Dist: onnxruntime (>=1.16.1,<2.0.0)
|
33
26
|
Requires-Dist: paho-mqtt (>=2.0.0,<3.0.0)
|
34
27
|
Requires-Dist: pandas (>=2.1.1,<3.0.0)
|
35
28
|
Requires-Dist: pesq (>=0.0.4,<0.0.5)
|
36
|
-
Requires-Dist: pyaaware (>=1.5.
|
29
|
+
Requires-Dist: pyaaware (>=1.5.7,<2.0.0)
|
30
|
+
Requires-Dist: pyaudio (>=0.2.14,<0.3.0)
|
37
31
|
Requires-Dist: pydub (>=0.25.1,<0.26.0)
|
38
32
|
Requires-Dist: pystoi (>=0.4.0,<0.5.0)
|
39
|
-
Requires-Dist: python-magic (>=0.4.27,<0.5.0)
|
40
33
|
Requires-Dist: requests (>=2.31.0,<3.0.0)
|
41
34
|
Requires-Dist: samplerate (>=0.2.1,<0.3.0)
|
42
35
|
Requires-Dist: soundfile (>=0.12.1,<0.13.0)
|
43
36
|
Requires-Dist: sox (>=1.4.1,<2.0.0)
|
44
|
-
Requires-Dist: speechrecognition (>=3.10.1,<4.0.0)
|
45
|
-
Requires-Dist: tensorflow (>=2.15.0,<3.0.0)
|
46
|
-
Requires-Dist: tf2onnx (>=1.15.1,<2.0.0)
|
47
37
|
Requires-Dist: torch (>=2.2,<2.3)
|
48
38
|
Requires-Dist: torchaudio (>=2.2,<2.3)
|
49
|
-
Requires-Dist: torchinfo (>=1.8.0,<2.0.0)
|
50
39
|
Requires-Dist: tqdm (>=4.66.1,<5.0.0)
|
51
40
|
Description-Content-Type: text/x-rst
|
52
41
|
|
53
|
-
|
42
|
+
SonusAI: Framework for simplified creation of deep NN models for sound, speech, and voice AI
|
54
43
|
|
55
|
-
|
44
|
+
SonusAI includes functions for pre-processing training and validation data and
|
56
45
|
creating performance metrics reports for key types of Keras models:
|
57
46
|
- recurrent, convolutional, or a combination (i.e. RCNNs)
|
58
47
|
- binary, multiclass single-label, multiclass multi-label, and regression
|
59
48
|
- training with data augmentations: noise mixing, pitch and time stretch, etc.
|
60
49
|
|
61
|
-
|
62
|
-
- Aaware Inc. sonusai
|
63
|
-
- Keras model scripts: User python scripts for
|
50
|
+
SonusAI python functions are used by:
|
51
|
+
- Aaware Inc. sonusai framework: Easily create train/validation data, run prediction, evaluate model performance
|
52
|
+
- Keras model scripts: User python scripts for Keras model creation, training, and prediction. These can use sonusai-specific data but also some general useful utilities for training rnn-based models like CRNN's, DSCRNN's, etc. in Keras.
|
64
53
|
|
@@ -1,11 +1,11 @@
|
|
1
|
-
|
1
|
+
SonusAI: Framework for simplified creation of deep NN models for sound, speech, and voice AI
|
2
2
|
|
3
|
-
|
3
|
+
SonusAI includes functions for pre-processing training and validation data and
|
4
4
|
creating performance metrics reports for key types of Keras models:
|
5
5
|
- recurrent, convolutional, or a combination (i.e. RCNNs)
|
6
6
|
- binary, multiclass single-label, multiclass multi-label, and regression
|
7
7
|
- training with data augmentations: noise mixing, pitch and time stretch, etc.
|
8
8
|
|
9
|
-
|
10
|
-
- Aaware Inc. sonusai
|
11
|
-
- Keras model scripts: User python scripts for
|
9
|
+
SonusAI python functions are used by:
|
10
|
+
- Aaware Inc. sonusai framework: Easily create train/validation data, run prediction, evaluate model performance
|
11
|
+
- Keras model scripts: User python scripts for Keras model creation, training, and prediction. These can use sonusai-specific data but also some general useful utilities for training rnn-based models like CRNN's, DSCRNN's, etc. in Keras.
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[tool.poetry]
|
2
2
|
name = "sonusai"
|
3
|
-
version = "0.
|
3
|
+
version = "0.16.0"
|
4
4
|
description = "Framework for building deep neural network models for sound, speech, and voice AI"
|
5
5
|
authors = ["Chris Eddington <chris@aaware.com>", "Jason Calderwood <jason@aaware.com>"]
|
6
6
|
maintainers = ["Chris Eddington <chris@aaware.com>", "Jason Calderwood <jason@aaware.com>"]
|
@@ -15,53 +15,37 @@ aawscd_probwrite = 'sonusai.aawscd_probwrite:main'
|
|
15
15
|
|
16
16
|
[tool.poetry.dependencies]
|
17
17
|
PyYAML = "^6.0.1"
|
18
|
-
aixplain = "^0.2.6"
|
19
|
-
ctranslate2 = "4.1.0"
|
20
18
|
dataclasses-json = "^0.6.1"
|
21
|
-
deepgram-sdk = "^3.0.0"
|
22
19
|
docopt = "^0.6.2"
|
23
|
-
faster-whisper = "^1.0.1"
|
24
20
|
h5py = "^3.11.0"
|
25
21
|
jiwer = "^3.0.3"
|
26
|
-
keras = "^3.1.1"
|
27
|
-
keras-tuner = "^1.4.7"
|
28
22
|
librosa = "^0.10.1"
|
29
|
-
lightning = "~2.2"
|
30
23
|
matplotlib = "^3.8.0"
|
31
24
|
onnx = "^1.14.1"
|
32
|
-
#onnxruntime-gpu = "^1.16.1"
|
33
25
|
onnxruntime = "^1.16.1"
|
34
|
-
#openai-whisper = "^20231117"
|
35
26
|
paho-mqtt = "^2.0.0"
|
36
27
|
pandas = "^2.1.1"
|
37
28
|
pesq = "^0.0.4"
|
38
|
-
pyaaware = "^1.5.
|
29
|
+
pyaaware = "^1.5.7"
|
30
|
+
pyaudio = "^0.2.14"
|
39
31
|
pydub = "^0.25.1"
|
40
32
|
pystoi = "^0.4.0"
|
41
33
|
python = ">=3.9,<3.12"
|
42
|
-
python-magic = "^0.4.27"
|
43
34
|
requests = "^2.31.0"
|
44
35
|
samplerate = "^0.2.1"
|
45
36
|
soundfile = "^0.12.1"
|
46
|
-
speechrecognition = "^3.10.1"
|
47
37
|
sox = "^1.4.1"
|
48
|
-
tensorflow = "^2.15.0"
|
49
|
-
tf2onnx = "^1.15.1"
|
50
38
|
torch = "~2.2"
|
51
39
|
torchaudio = "~2.2"
|
52
|
-
torchinfo = "^1.8.0"
|
53
40
|
tqdm = "^4.66.1"
|
54
41
|
|
55
42
|
[tool.poetry.group.dev.dependencies]
|
56
43
|
icecream = "^2.1.3"
|
57
|
-
ipython = "^8.16.1"
|
58
|
-
jupyter = "^1.0.0"
|
59
44
|
mypy = "^1.6.0"
|
60
45
|
mypy-extensions = "^1.0.0"
|
61
46
|
pytest = "^8.1.1"
|
62
47
|
types-pyyaml = "^6.0.12.12"
|
63
48
|
types-requests = "^2.31.0.8"
|
64
|
-
yappi = "^1.4.0"
|
65
49
|
|
66
50
|
[tool.mypy]
|
67
51
|
ignore_missing_imports = true
|
@@ -0,0 +1,86 @@
|
|
1
|
+
import logging
|
2
|
+
from importlib import metadata
|
3
|
+
from os.path import dirname
|
4
|
+
|
5
|
+
__version__ = metadata.version(__package__)
|
6
|
+
BASEDIR = dirname(__file__)
|
7
|
+
|
8
|
+
commands_doc = """
|
9
|
+
audiofe Audio front end
|
10
|
+
calc_metric_spenh Run speech enhancement and analysis
|
11
|
+
doc Documentation
|
12
|
+
genft Generate feature and truth data
|
13
|
+
genmix Generate mixture and truth data
|
14
|
+
genmixdb Generate a mixture database
|
15
|
+
gentcst Generate target configuration from a subdirectory tree
|
16
|
+
lsdb List information about a mixture database
|
17
|
+
mkmanifest Make ASR manifest JSON file
|
18
|
+
mkwav Make WAV files from a mixture database
|
19
|
+
onnx_predict Run ONNX predict on a trained model
|
20
|
+
plot Plot mixture data
|
21
|
+
post_spenh_targetf Run post-processing for speech enhancement targetf data
|
22
|
+
tplot Plot truth data
|
23
|
+
vars List custom SonusAI variables
|
24
|
+
"""
|
25
|
+
|
26
|
+
# create logger
|
27
|
+
logger = logging.getLogger('sonusai')
|
28
|
+
logger.setLevel(logging.DEBUG)
|
29
|
+
formatter = logging.Formatter('%(message)s')
|
30
|
+
console_handler = logging.StreamHandler()
|
31
|
+
console_handler.setLevel(logging.DEBUG)
|
32
|
+
console_handler.setFormatter(formatter)
|
33
|
+
logger.addHandler(console_handler)
|
34
|
+
|
35
|
+
|
36
|
+
class SonusAIError(Exception):
|
37
|
+
def __init__(self, value):
|
38
|
+
logger.error(value)
|
39
|
+
|
40
|
+
|
41
|
+
# create file handler
|
42
|
+
def create_file_handler(filename: str) -> None:
|
43
|
+
fh = logging.FileHandler(filename=filename, mode='w')
|
44
|
+
fh.setLevel(logging.DEBUG)
|
45
|
+
fh.setFormatter(formatter)
|
46
|
+
logger.addHandler(fh)
|
47
|
+
|
48
|
+
|
49
|
+
# update console handler
|
50
|
+
def update_console_handler(verbose: bool) -> None:
|
51
|
+
if not verbose:
|
52
|
+
logger.removeHandler(console_handler)
|
53
|
+
console_handler.setLevel(logging.INFO)
|
54
|
+
logger.addHandler(console_handler)
|
55
|
+
|
56
|
+
|
57
|
+
# write initial log message
|
58
|
+
def initial_log_messages(name: str, subprocess: str = None) -> None:
|
59
|
+
from datetime import datetime
|
60
|
+
from getpass import getuser
|
61
|
+
from os import getcwd
|
62
|
+
from socket import gethostname
|
63
|
+
from sys import argv
|
64
|
+
|
65
|
+
if subprocess is None:
|
66
|
+
logger.info(f'SonusAI {__version__}')
|
67
|
+
else:
|
68
|
+
logger.info(f'SonusAI {subprocess}')
|
69
|
+
logger.info(f'{name}')
|
70
|
+
logger.info('')
|
71
|
+
logger.debug(f'Host: {gethostname()}')
|
72
|
+
logger.debug(f'User: {getuser()}')
|
73
|
+
logger.debug(f'Directory: {getcwd()}')
|
74
|
+
logger.debug(f'Date: {datetime.now()}')
|
75
|
+
logger.debug(f'Command: {" ".join(argv)}')
|
76
|
+
logger.debug('')
|
77
|
+
|
78
|
+
|
79
|
+
def commands_list(doc: str = commands_doc) -> list[str]:
|
80
|
+
lines = doc.split('\n')
|
81
|
+
commands = []
|
82
|
+
for line in lines:
|
83
|
+
command = line.strip().split(' ').pop(0)
|
84
|
+
if command:
|
85
|
+
commands.append(command)
|
86
|
+
return commands
|
@@ -0,0 +1,237 @@
|
|
1
|
+
"""sonusai audiofe
|
2
|
+
|
3
|
+
usage: audiofe [-hvds] [--version] [-i INPUT] [-l LENGTH] [-m MODEL] [-k CKPT] [-a ASR] [-w WMODEL]
|
4
|
+
|
5
|
+
options:
|
6
|
+
-h, --help
|
7
|
+
-v, --verbose Be verbose.
|
8
|
+
-d, --debug Write debug data to H5 file.
|
9
|
+
-s, --show Show a list of available audio inputs.
|
10
|
+
-i INPUT, --input INPUT Input audio.
|
11
|
+
-l LENGTH, --length LENGTH Length of audio in seconds. [default: -1].
|
12
|
+
-m MODEL, --model MODEL PL model .py file path.
|
13
|
+
-k CKPT, --checkpoint CKPT PL checkpoint file with weights.
|
14
|
+
-a ASR, --asr ASR ASR method to use.
|
15
|
+
-w WMODEL, --whisper WMODEL Whisper model used in aixplain_whisper and whisper methods. [default: tiny].
|
16
|
+
|
17
|
+
Aaware SonusAI Audio Front End.
|
18
|
+
|
19
|
+
Capture LENGTH seconds of audio from INPUT. If LENGTH is < 0, then capture until key is pressed. If INPUT is a valid
|
20
|
+
audio file name, then use the audio data from the specified file. In this case, if LENGTH is < 0, process entire file;
|
21
|
+
otherwise, process min(length(INPUT), LENGTH) seconds of audio from INPUT. Audio is saved to
|
22
|
+
audiofe_capture_<TIMESTAMP>.wav.
|
23
|
+
|
24
|
+
If a model is specified, run prediction on audio data from this model. Then compute the inverse transform of the
|
25
|
+
prediction result and save to audiofe_predict_<TIMESTAMP>.wav.
|
26
|
+
|
27
|
+
If an ASR is specified, run ASR on the captured audio and print the results. In addition, if a model was also specified,
|
28
|
+
run ASR on the predict audio and print the results.
|
29
|
+
|
30
|
+
If the debug option is enabled, write capture audio, feature, reconstruct audio, predict, and predict audio to
|
31
|
+
audiofe_<TIMESTAMP>.h5.
|
32
|
+
|
33
|
+
"""
|
34
|
+
from os.path import exists
|
35
|
+
from select import select
|
36
|
+
from sys import stdin
|
37
|
+
|
38
|
+
import h5py
|
39
|
+
import numpy as np
|
40
|
+
import pyaudio
|
41
|
+
import torch
|
42
|
+
from docopt import docopt
|
43
|
+
from docopt import printable_usage
|
44
|
+
|
45
|
+
import sonusai
|
46
|
+
from sonusai import create_file_handler
|
47
|
+
from sonusai import initial_log_messages
|
48
|
+
from sonusai import logger
|
49
|
+
from sonusai import update_console_handler
|
50
|
+
from sonusai.mixture import AudioT
|
51
|
+
from sonusai.mixture import CHANNEL_COUNT
|
52
|
+
from sonusai.mixture import SAMPLE_RATE
|
53
|
+
from sonusai.mixture import get_audio_from_feature
|
54
|
+
from sonusai.mixture import get_feature_from_audio
|
55
|
+
from sonusai.mixture import read_audio
|
56
|
+
from sonusai.utils import calc_asr
|
57
|
+
from sonusai.utils import create_timestamp
|
58
|
+
from sonusai.utils import get_input_device_index_by_name
|
59
|
+
from sonusai.utils import get_input_devices
|
60
|
+
from sonusai.utils import load_torchl_ckpt_model
|
61
|
+
from sonusai.utils import trim_docstring
|
62
|
+
from sonusai.utils import write_wav
|
63
|
+
|
64
|
+
|
65
|
+
def main() -> None:
|
66
|
+
args = docopt(trim_docstring(__doc__), version=sonusai.__version__, options_first=True)
|
67
|
+
ts = create_timestamp()
|
68
|
+
|
69
|
+
verbose = args['--verbose']
|
70
|
+
length = float(args['--length'])
|
71
|
+
input_name = args['--input']
|
72
|
+
model_name = args['--model']
|
73
|
+
ckpt_name = args['--checkpoint']
|
74
|
+
asr_name = args['--asr']
|
75
|
+
whisper_name = args['--whisper']
|
76
|
+
debug = args['--debug']
|
77
|
+
show = args['--show']
|
78
|
+
|
79
|
+
capture_name = f'audiofe_capture_{ts}.wav'
|
80
|
+
predict_name = f'audiofe_predict_{ts}.wav'
|
81
|
+
h5_name = f'audiofe_{ts}.h5'
|
82
|
+
|
83
|
+
if model_name is not None and ckpt_name is None:
|
84
|
+
print(printable_usage(trim_docstring(__doc__)))
|
85
|
+
exit(1)
|
86
|
+
|
87
|
+
# Setup logging file
|
88
|
+
create_file_handler('audiofe.log')
|
89
|
+
update_console_handler(verbose)
|
90
|
+
initial_log_messages('audiofe')
|
91
|
+
|
92
|
+
if show:
|
93
|
+
logger.info('List of available audio inputs:')
|
94
|
+
logger.info('')
|
95
|
+
p = pyaudio.PyAudio()
|
96
|
+
for name in get_input_devices(p):
|
97
|
+
logger.info(f'{name}')
|
98
|
+
logger.info('')
|
99
|
+
p.terminate()
|
100
|
+
return
|
101
|
+
|
102
|
+
if input_name is not None and exists(input_name):
|
103
|
+
capture_audio = get_frames_from_file(input_name, length)
|
104
|
+
else:
|
105
|
+
try:
|
106
|
+
capture_audio = get_frames_from_device(input_name, length)
|
107
|
+
except ValueError as e:
|
108
|
+
logger.exception(e)
|
109
|
+
return
|
110
|
+
|
111
|
+
write_wav(capture_name, capture_audio, SAMPLE_RATE)
|
112
|
+
logger.info('')
|
113
|
+
logger.info(f'Wrote capture audio with shape {capture_audio.shape} to {capture_name}')
|
114
|
+
if debug:
|
115
|
+
with h5py.File(h5_name, 'a') as f:
|
116
|
+
if 'capture_audio' in f:
|
117
|
+
del f['capture_audio']
|
118
|
+
f.create_dataset('capture_audio', data=capture_audio)
|
119
|
+
logger.info(f'Wrote capture audio with shape {capture_audio.shape} to {h5_name}')
|
120
|
+
|
121
|
+
if asr_name is not None:
|
122
|
+
capture_asr = calc_asr(capture_audio, engine=asr_name, whisper_model_name=whisper_name).text
|
123
|
+
logger.info(f'Capture audio ASR: {capture_asr}')
|
124
|
+
|
125
|
+
if model_name is not None:
|
126
|
+
model = load_torchl_ckpt_model(model_name=model_name, ckpt_name=ckpt_name)
|
127
|
+
model.eval()
|
128
|
+
|
129
|
+
feature = get_feature_from_audio(audio=capture_audio, feature_mode=model.hparams.feature)
|
130
|
+
if debug:
|
131
|
+
with h5py.File(h5_name, 'a') as f:
|
132
|
+
if 'feature' in f:
|
133
|
+
del f['feature']
|
134
|
+
f.create_dataset('feature', data=feature)
|
135
|
+
logger.info(f'Wrote feature with shape {feature.shape} to {h5_name}')
|
136
|
+
|
137
|
+
# if debug:
|
138
|
+
# reconstruct_name = f'audiofe_reconstruct_{ts}.wav'
|
139
|
+
# reconstruct_audio = get_audio_from_feature(feature=feature, feature_mode=model.hparams.feature)
|
140
|
+
# samples = min(len(capture_audio), len(reconstruct_audio))
|
141
|
+
# max_err = np.max(np.abs(capture_audio[:samples] - reconstruct_audio[:samples]))
|
142
|
+
# logger.info(f'Maximum error between capture and reconstruct: {max_err}')
|
143
|
+
# write_wav(reconstruct_name, reconstruct_audio, SAMPLE_RATE)
|
144
|
+
# logger.info(f'Wrote reconstruct audio with shape {reconstruct_audio.shape} to {reconstruct_name}')
|
145
|
+
# with h5py.File(h5_name, 'a') as f:
|
146
|
+
# if 'reconstruct_audio' in f:
|
147
|
+
# del f['reconstruct_audio']
|
148
|
+
# f.create_dataset('reconstruct_audio', data=reconstruct_audio)
|
149
|
+
# logger.info(f'Wrote reconstruct audio with shape {reconstruct_audio.shape} to {h5_name}')
|
150
|
+
|
151
|
+
with torch.no_grad():
|
152
|
+
# model wants batch x timesteps x feature_parameters
|
153
|
+
predict = model(torch.tensor(feature).permute((1, 0, 2))).permute(1, 0, 2).numpy()
|
154
|
+
if debug:
|
155
|
+
with h5py.File(h5_name, 'a') as f:
|
156
|
+
if 'predict' in f:
|
157
|
+
del f['predict']
|
158
|
+
f.create_dataset('predict', data=predict)
|
159
|
+
logger.info(f'Wrote predict with shape {predict.shape} to {h5_name}')
|
160
|
+
|
161
|
+
predict_audio = get_audio_from_feature(feature=predict, feature_mode=model.hparams.feature)
|
162
|
+
write_wav(predict_name, predict_audio, SAMPLE_RATE)
|
163
|
+
logger.info(f'Wrote predict audio with shape {predict_audio.shape} to {predict_name}')
|
164
|
+
if debug:
|
165
|
+
with h5py.File(h5_name, 'a') as f:
|
166
|
+
if 'predict_audio' in f:
|
167
|
+
del f['predict_audio']
|
168
|
+
f.create_dataset('predict_audio', data=predict_audio)
|
169
|
+
logger.info(f'Wrote predict audio with shape {predict_audio.shape} to {h5_name}')
|
170
|
+
|
171
|
+
if asr_name is not None:
|
172
|
+
predict_asr = calc_asr(predict_audio, engine=asr_name, whisper_model_name=whisper_name).text
|
173
|
+
logger.info(f'Predict audio ASR: {predict_asr}')
|
174
|
+
|
175
|
+
|
176
|
+
def get_frames_from_device(input_name: str | None, length: float, chunk: int = 1024) -> AudioT:
|
177
|
+
p = pyaudio.PyAudio()
|
178
|
+
|
179
|
+
input_devices = get_input_devices(p)
|
180
|
+
if not input_devices:
|
181
|
+
raise ValueError('No input audio devices found')
|
182
|
+
|
183
|
+
if input_name is None:
|
184
|
+
input_name = input_devices[0]
|
185
|
+
|
186
|
+
try:
|
187
|
+
device_index = get_input_device_index_by_name(p, input_name)
|
188
|
+
except ValueError:
|
189
|
+
msg = f'Could not find {input_name}\n'
|
190
|
+
msg += f'Available devices:\n'
|
191
|
+
for input_device in input_devices:
|
192
|
+
msg += f' {input_device}\n'
|
193
|
+
raise ValueError(msg)
|
194
|
+
|
195
|
+
logger.info(f'Capturing from {p.get_device_info_by_index(device_index).get("name")}')
|
196
|
+
stream = p.open(format=pyaudio.paFloat32,
|
197
|
+
channels=CHANNEL_COUNT,
|
198
|
+
rate=SAMPLE_RATE,
|
199
|
+
input=True,
|
200
|
+
input_device_index=device_index)
|
201
|
+
stream.start_stream()
|
202
|
+
|
203
|
+
print()
|
204
|
+
print('+---------------------------------+')
|
205
|
+
print('| Press Enter to stop |')
|
206
|
+
print('+---------------------------------+')
|
207
|
+
print()
|
208
|
+
|
209
|
+
elapsed = 0.0
|
210
|
+
seconds_per_chunk = float(chunk) / float(SAMPLE_RATE)
|
211
|
+
raw_frames = []
|
212
|
+
while elapsed < length or length == -1:
|
213
|
+
raw_frames.append(stream.read(num_frames=chunk, exception_on_overflow=False))
|
214
|
+
elapsed += seconds_per_chunk
|
215
|
+
if select([stdin, ], [], [], 0)[0]:
|
216
|
+
stdin.read(1)
|
217
|
+
length = elapsed
|
218
|
+
|
219
|
+
stream.stop_stream()
|
220
|
+
stream.close()
|
221
|
+
p.terminate()
|
222
|
+
frames = np.frombuffer(b''.join(raw_frames), dtype=np.float32)
|
223
|
+
return frames
|
224
|
+
|
225
|
+
|
226
|
+
def get_frames_from_file(input_name: str, length: float) -> AudioT:
|
227
|
+
logger.info(f'Capturing from {input_name}')
|
228
|
+
frames = read_audio(input_name)
|
229
|
+
if length != -1:
|
230
|
+
num_frames = int(length * SAMPLE_RATE)
|
231
|
+
if len(frames) > num_frames:
|
232
|
+
frames = frames[:num_frames]
|
233
|
+
return frames
|
234
|
+
|
235
|
+
|
236
|
+
if __name__ == '__main__':
|
237
|
+
main()
|
@@ -758,13 +758,18 @@ def _process_mixture(mixid: int) -> tuple[pd.DataFrame, pd.DataFrame]:
|
|
758
758
|
predict = stack_complex(predict)
|
759
759
|
|
760
760
|
# 2) Collect true target, noise, mixture data, trim to predict size if needed
|
761
|
-
|
762
|
-
target_f = mixdb.
|
763
|
-
|
764
|
-
|
765
|
-
|
761
|
+
tmp = mixdb.mixture_targets(mixid) # targets is list of pre-IR and pre-specaugment targets
|
762
|
+
target_f = mixdb.mixture_targets_f(mixid, targets=tmp)[0]
|
763
|
+
target = tmp[0]
|
764
|
+
mixture = mixdb.mixture_mixture(mixid) # note: gives full reverberated/distorted target, but no specaugment
|
765
|
+
# noise_wodist = mixdb.mixture_noise(mixid) # noise without specaugment and distortion
|
766
|
+
# noise_wodist_f = mixdb.mixture_noise_f(mixid, noise=noise_wodist)
|
767
|
+
noise = mixture - target # has time-domain distortion (ir,etc.) but does not have specaugment
|
768
|
+
# noise_f = mixdb.mixture_noise_f(mixid, noise=noise)
|
769
|
+
segsnr_f = mixdb.mixture_segsnr(mixid, target=target, noise=noise) # note: uses pre-IR, pre-specaug audio
|
766
770
|
mixture_f = mixdb.mixture_mixture_f(mixid, mixture=mixture)
|
767
|
-
|
771
|
+
noise_f = mixture_f - target_f # true noise in freq domain includes specaugment and time-domain ir,distortions
|
772
|
+
# segsnr_f = mixdb.mixture_segsnr(mixid, target=target, noise=noise)
|
768
773
|
segsnr_f[segsnr_f == inf] = 7.944e8 # 99db
|
769
774
|
segsnr_f[segsnr_f == -inf] = 1.258e-10 # -99db
|
770
775
|
# need to use inv-tf to match #samples & latency shift properties of predict inv tf
|
@@ -920,8 +925,9 @@ def _process_mixture(mixid: int) -> tuple[pd.DataFrame, pd.DataFrame]:
|
|
920
925
|
'NLERR': lerr_n_frame,
|
921
926
|
'SPD': phd_frame})
|
922
927
|
metr2 = metr2.describe() # Use pandas stat function
|
923
|
-
|
924
|
-
|
928
|
+
# Change SSNR stats to dB, except count. SSNR is index 0, pandas requires using iloc
|
929
|
+
# metr2['SSNR'][1:] = metr2['SSNR'][1:].apply(lambda x: 10 * np.log10(x + 1.01e-10))
|
930
|
+
metr2.iloc[1:, 0] = metr2['SSNR'][1:].apply(lambda x: 10 * np.log10(x + 1.01e-10))
|
925
931
|
# create a single row in multi-column header
|
926
932
|
new_labels = pd.MultiIndex.from_product([metr2.columns,
|
927
933
|
['Avg', 'Min', 'Med', 'Max', 'Std']],
|
@@ -1166,7 +1172,7 @@ def main():
|
|
1166
1172
|
# Individual mixtures use pandas print, set precision to 2 decimal places
|
1167
1173
|
# pd.set_option('float_format', '{:.2f}'.format)
|
1168
1174
|
progress = tqdm(total=len(mixids), desc='calc_metric_spenh')
|
1169
|
-
all_metrics_tables = pp_tqdm_imap(_process_mixture, mixids, progress=progress, num_cpus=
|
1175
|
+
all_metrics_tables = pp_tqdm_imap(_process_mixture, mixids, progress=progress, num_cpus=8)
|
1170
1176
|
progress.close()
|
1171
1177
|
|
1172
1178
|
all_metrics_table_1 = pd.concat([item[0] for item in all_metrics_tables])
|
@@ -1192,6 +1198,7 @@ def main():
|
|
1192
1198
|
if ~np.isnan(tmp.iloc[0].to_numpy()[0]).any():
|
1193
1199
|
mtab_snr_summary_em = pd.concat([mtab_snr_summary_em, tmp])
|
1194
1200
|
|
1201
|
+
mtab_snr_summary = mtab_snr_summary.sort_values(by=['MXSNR'], ascending=False)
|
1195
1202
|
# Correct percentages in snr summary table
|
1196
1203
|
mtab_snr_summary['PESQi%'] = 100 * (mtab_snr_summary['PESQ'] - mtab_snr_summary['MXPESQ']) / np.maximum(
|
1197
1204
|
mtab_snr_summary['MXPESQ'], 0.01)
|
@@ -1202,9 +1209,11 @@ def main():
|
|
1202
1209
|
else:
|
1203
1210
|
mtab_snr_summary['WERi%'].iloc[i] = -999.0
|
1204
1211
|
else:
|
1205
|
-
mtab_snr_summary['
|
1206
|
-
|
1207
|
-
|
1212
|
+
if ~np.isnan(mtab_snr_summary['WER'].iloc[i]) and ~np.isnan(mtab_snr_summary['MXWER'].iloc[i]):
|
1213
|
+
# update WERi% in 6th col
|
1214
|
+
mtab_snr_summary.iloc[i, 6] = 100 * (mtab_snr_summary['MXWER'].iloc[i] -
|
1215
|
+
mtab_snr_summary['WER'].iloc[i]) / \
|
1216
|
+
mtab_snr_summary['MXWER'].iloc[i]
|
1208
1217
|
|
1209
1218
|
# Calculate avg metrics over all mixtures except -99
|
1210
1219
|
all_mtab1_sorted_nom99 = all_mtab1_sorted[all_mtab1_sorted.MXSNR != -99]
|
@@ -165,7 +165,8 @@ def main() -> None:
|
|
165
165
|
logger.info(f'Wrote {len(mixids)} mixtures to {location}')
|
166
166
|
logger.info('')
|
167
167
|
logger.info(f'Duration: {seconds_to_hms(seconds=duration)}')
|
168
|
-
logger.info(
|
168
|
+
logger.info(
|
169
|
+
f'feature: {human_readable_size(total_feature_frames * mixdb.fg_stride * mixdb.feature_parameters * 4, 1)}')
|
169
170
|
logger.info(f'truth_f: {human_readable_size(total_feature_frames * mixdb.num_classes * 4, 1)}')
|
170
171
|
if compute_segsnr:
|
171
172
|
logger.info(f'segsnr: {human_readable_size(total_transform_frames * 4, 1)}')
|
@@ -225,7 +225,7 @@ def genmixdb(location: str,
|
|
225
225
|
if logging:
|
226
226
|
logger.info('Collecting impulse responses')
|
227
227
|
|
228
|
-
impulse_response_files = get_impulse_response_files(config
|
228
|
+
impulse_response_files = get_impulse_response_files(config)
|
229
229
|
|
230
230
|
populate_impulse_response_file_table(location, impulse_response_files, test)
|
231
231
|
|
@@ -337,12 +337,12 @@ def genmixdb(location: str,
|
|
337
337
|
log_duration_and_sizes(total_duration=total_duration,
|
338
338
|
num_classes=mixdb.num_classes,
|
339
339
|
feature_step_samples=mixdb.feature_step_samples,
|
340
|
-
|
340
|
+
feature_parameters=mixdb.feature_parameters,
|
341
341
|
stride=mixdb.fg_stride,
|
342
342
|
desc='Estimated')
|
343
343
|
logger.info(f'Feature shape: '
|
344
|
-
f'{mixdb.fg_stride} x {mixdb.
|
345
|
-
f'({mixdb.fg_stride * mixdb.
|
344
|
+
f'{mixdb.fg_stride} x {mixdb.feature_parameters} '
|
345
|
+
f'({mixdb.fg_stride * mixdb.feature_parameters} total params)')
|
346
346
|
logger.info(f'Feature samples: {mixdb.feature_samples} samples ({mixdb.feature_ms} ms)')
|
347
347
|
logger.info(f'Feature step samples: {mixdb.feature_step_samples} samples ({mixdb.feature_step_ms} ms)')
|
348
348
|
logger.info('')
|
@@ -371,7 +371,7 @@ def genmixdb(location: str,
|
|
371
371
|
log_duration_and_sizes(total_duration=total_duration,
|
372
372
|
num_classes=mixdb.num_classes,
|
373
373
|
feature_step_samples=mixdb.feature_step_samples,
|
374
|
-
|
374
|
+
feature_parameters=mixdb.feature_parameters,
|
375
375
|
stride=mixdb.fg_stride,
|
376
376
|
desc='Actual')
|
377
377
|
logger.info('')
|
@@ -48,8 +48,8 @@ def lsdb(mixdb: MixtureDatabase,
|
|
48
48
|
logger.info(f'{"Targets":{desc_len}} {mixdb.num_target_files}')
|
49
49
|
logger.info(f'{"Noises":{desc_len}} {mixdb.num_noise_files}')
|
50
50
|
logger.info(f'{"Feature":{desc_len}} {mixdb.feature}')
|
51
|
-
logger.info(f'{"Feature shape":{desc_len}} {mixdb.fg_stride} x {mixdb.
|
52
|
-
f'({mixdb.fg_stride * mixdb.
|
51
|
+
logger.info(f'{"Feature shape":{desc_len}} {mixdb.fg_stride} x {mixdb.feature_parameters} '
|
52
|
+
f'({mixdb.fg_stride * mixdb.feature_parameters} total params)')
|
53
53
|
logger.info(f'{"Feature samples":{desc_len}} {mixdb.feature_samples} samples ({mixdb.feature_ms} ms)')
|
54
54
|
logger.info(f'{"Feature step samples":{desc_len}} {mixdb.feature_step_samples} samples '
|
55
55
|
f'({mixdb.feature_step_ms} ms)')
|