sonusai 0.15.9__tar.gz → 0.16.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sonusai-0.15.9 → sonusai-0.16.0}/PKG-INFO +7 -25
- {sonusai-0.15.9 → sonusai-0.16.0}/README.rst +5 -5
- {sonusai-0.15.9 → sonusai-0.16.0}/pyproject.toml +2 -25
- sonusai-0.16.0/sonusai/__init__.py +86 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/audiofe.py +6 -62
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/calc_metric_spenh.py +24 -15
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/genmixdb.py +1 -1
- sonusai-0.16.0/sonusai/main.py +90 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/mixture/__init__.py +1 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/mixture/config.py +1 -2
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/mkmanifest.py +29 -2
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/utils/__init__.py +4 -7
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/utils/asl_p56.py +3 -3
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/utils/asr.py +35 -8
- sonusai-0.16.0/sonusai/utils/asr_functions/__init__.py +1 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/utils/asr_functions/aaware_whisper.py +2 -2
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/utils/asr_manifest_functions/__init__.py +1 -0
- sonusai-0.16.0/sonusai/utils/asr_manifest_functions/mcgill_speech.py +29 -0
- sonusai-0.15.9/sonusai/utils/trim_docstring.py → sonusai-0.16.0/sonusai/utils/docstring.py +20 -0
- sonusai-0.16.0/sonusai/utils/model_utils.py +30 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/utils/onnx_utils.py +19 -45
- sonusai-0.15.9/sonusai/__init__.py +0 -55
- sonusai-0.15.9/sonusai/data_generator/__init__.py +0 -5
- sonusai-0.15.9/sonusai/data_generator/dataset_from_mixdb.py +0 -143
- sonusai-0.15.9/sonusai/data_generator/keras_from_mixdb.py +0 -169
- sonusai-0.15.9/sonusai/data_generator/torch_from_mixdb.py +0 -122
- sonusai-0.15.9/sonusai/keras_onnx.py +0 -86
- sonusai-0.15.9/sonusai/keras_predict.py +0 -231
- sonusai-0.15.9/sonusai/keras_train.py +0 -334
- sonusai-0.15.9/sonusai/main.py +0 -93
- sonusai-0.15.9/sonusai/torchl_onnx.py +0 -216
- sonusai-0.15.9/sonusai/torchl_predict.py +0 -542
- sonusai-0.15.9/sonusai/torchl_train.py +0 -223
- sonusai-0.15.9/sonusai/utils/asr_functions/__init__.py +0 -6
- sonusai-0.15.9/sonusai/utils/asr_functions/aixplain_whisper.py +0 -59
- sonusai-0.15.9/sonusai/utils/asr_functions/data.py +0 -16
- sonusai-0.15.9/sonusai/utils/asr_functions/deepgram.py +0 -97
- sonusai-0.15.9/sonusai/utils/asr_functions/fastwhisper.py +0 -90
- sonusai-0.15.9/sonusai/utils/asr_functions/google.py +0 -95
- sonusai-0.15.9/sonusai/utils/asr_functions/whisper.py +0 -49
- sonusai-0.15.9/sonusai/utils/keras_utils.py +0 -226
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/aawscd_probwrite.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/data/__init__.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/data/genmixdb.yml +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/data/speech_ma01_01.wav +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/data/whitenoise.wav +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/doc/__init__.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/doc/doc.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/doc.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/genft.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/genmix.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/gentcst.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/lsdb.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/metrics/__init__.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/metrics/calc_class_weights.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/metrics/calc_optimal_thresholds.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/metrics/calc_pcm.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/metrics/calc_pesq.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/metrics/calc_sa_sdr.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/metrics/calc_sample_weights.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/metrics/calc_wer.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/metrics/calc_wsdr.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/metrics/class_summary.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/metrics/confusion_matrix_summary.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/metrics/one_hot.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/metrics/snr_summary.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/mixture/audio.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/mixture/augmentation.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/mixture/class_count.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/mixture/constants.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/mixture/datatypes.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/mixture/eq_rule_is_valid.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/mixture/feature.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/mixture/generation.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/mixture/helpers.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/mixture/log_duration_and_sizes.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/mixture/mapped_snr_f.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/mixture/mixdb.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/mixture/soundfile_audio.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/mixture/sox_audio.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/mixture/sox_augmentation.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/mixture/spectral_mask.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/mixture/target_class_balancing.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/mixture/targets.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/mixture/tokenized_shell_vars.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/mixture/torchaudio_audio.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/mixture/torchaudio_augmentation.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/mixture/truth.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/mixture/truth_functions/__init__.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/mixture/truth_functions/crm.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/mixture/truth_functions/data.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/mixture/truth_functions/energy.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/mixture/truth_functions/file.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/mixture/truth_functions/phoneme.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/mixture/truth_functions/sed.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/mixture/truth_functions/target.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/mkwav.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/onnx_predict.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/plot.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/post_spenh_targetf.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/queries/__init__.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/queries/queries.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/tplot.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/utils/asr_manifest_functions/data.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/utils/asr_manifest_functions/librispeech.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/utils/asr_manifest_functions/vctk_noisy_speech.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/utils/audio_devices.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/utils/braced_glob.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/utils/calculate_input_shape.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/utils/convert_string_to_number.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/utils/create_timestamp.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/utils/create_ts_name.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/utils/dataclass_from_dict.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/utils/db.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/utils/energy_f.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/utils/engineering_number.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/utils/get_frames_per_batch.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/utils/get_label_names.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/utils/grouper.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/utils/human_readable_size.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/utils/max_text_width.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/utils/numeric_conversion.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/utils/parallel.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/utils/print_mixture_details.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/utils/ranges.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/utils/read_mixture_data.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/utils/read_predict_data.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/utils/reshape.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/utils/seconds_to_hms.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/utils/stacked_complex.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/utils/stratified_shuffle_split.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/utils/wave.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/utils/yes_or_no.py +0 -0
- {sonusai-0.15.9 → sonusai-0.16.0}/sonusai/vars.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: sonusai
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.16.0
|
4
4
|
Summary: Framework for building deep neural network models for sound, speech, and voice AI
|
5
5
|
Home-page: https://aaware.com
|
6
6
|
License: GPL-3.0-only
|
@@ -15,57 +15,39 @@ Classifier: Programming Language :: Python :: 3.9
|
|
15
15
|
Classifier: Programming Language :: Python :: 3.10
|
16
16
|
Classifier: Programming Language :: Python :: 3.11
|
17
17
|
Requires-Dist: PyYAML (>=6.0.1,<7.0.0)
|
18
|
-
Requires-Dist: aixplain (>=0.2.6,<0.3.0)
|
19
|
-
Requires-Dist: bitarray (>=2.9.2,<3.0.0)
|
20
|
-
Requires-Dist: ctranslate2 (==4.1.0)
|
21
18
|
Requires-Dist: dataclasses-json (>=0.6.1,<0.7.0)
|
22
|
-
Requires-Dist: deepgram-sdk (>=3.0.0,<4.0.0)
|
23
19
|
Requires-Dist: docopt (>=0.6.2,<0.7.0)
|
24
|
-
Requires-Dist: einops (>=0.7.0,<0.8.0)
|
25
|
-
Requires-Dist: faster-whisper (>=1.0.1,<2.0.0)
|
26
|
-
Requires-Dist: geomloss (>=0.2.6,<0.3.0)
|
27
20
|
Requires-Dist: h5py (>=3.11.0,<4.0.0)
|
28
|
-
Requires-Dist: hydra-core (>=1.3.2,<2.0.0)
|
29
21
|
Requires-Dist: jiwer (>=3.0.3,<4.0.0)
|
30
|
-
Requires-Dist: keras (>=3.1.1,<4.0.0)
|
31
|
-
Requires-Dist: keras-tuner (>=1.4.7,<2.0.0)
|
32
22
|
Requires-Dist: librosa (>=0.10.1,<0.11.0)
|
33
|
-
Requires-Dist: lightning (>=2.2,<2.3)
|
34
23
|
Requires-Dist: matplotlib (>=3.8.0,<4.0.0)
|
35
|
-
Requires-Dist: omegaconf (>=2.3.0,<3.0.0)
|
36
24
|
Requires-Dist: onnx (>=1.14.1,<2.0.0)
|
37
25
|
Requires-Dist: onnxruntime (>=1.16.1,<2.0.0)
|
38
26
|
Requires-Dist: paho-mqtt (>=2.0.0,<3.0.0)
|
39
27
|
Requires-Dist: pandas (>=2.1.1,<3.0.0)
|
40
28
|
Requires-Dist: pesq (>=0.0.4,<0.0.5)
|
41
|
-
Requires-Dist: pyaaware (>=1.5.
|
29
|
+
Requires-Dist: pyaaware (>=1.5.7,<2.0.0)
|
42
30
|
Requires-Dist: pyaudio (>=0.2.14,<0.3.0)
|
43
31
|
Requires-Dist: pydub (>=0.25.1,<0.26.0)
|
44
32
|
Requires-Dist: pystoi (>=0.4.0,<0.5.0)
|
45
|
-
Requires-Dist: python-magic (>=0.4.27,<0.5.0)
|
46
33
|
Requires-Dist: requests (>=2.31.0,<3.0.0)
|
47
|
-
Requires-Dist: sacrebleu (>=2.4.2,<3.0.0)
|
48
34
|
Requires-Dist: samplerate (>=0.2.1,<0.3.0)
|
49
35
|
Requires-Dist: soundfile (>=0.12.1,<0.13.0)
|
50
36
|
Requires-Dist: sox (>=1.4.1,<2.0.0)
|
51
|
-
Requires-Dist: speechrecognition (>=3.10.1,<4.0.0)
|
52
|
-
Requires-Dist: tensorflow (>=2.15.0,<3.0.0)
|
53
|
-
Requires-Dist: tf2onnx (>=1.15.1,<2.0.0)
|
54
37
|
Requires-Dist: torch (>=2.2,<2.3)
|
55
38
|
Requires-Dist: torchaudio (>=2.2,<2.3)
|
56
|
-
Requires-Dist: torchinfo (>=1.8.0,<2.0.0)
|
57
39
|
Requires-Dist: tqdm (>=4.66.1,<5.0.0)
|
58
40
|
Description-Content-Type: text/x-rst
|
59
41
|
|
60
|
-
|
42
|
+
SonusAI: Framework for simplified creation of deep NN models for sound, speech, and voice AI
|
61
43
|
|
62
|
-
|
44
|
+
SonusAI includes functions for pre-processing training and validation data and
|
63
45
|
creating performance metrics reports for key types of Keras models:
|
64
46
|
- recurrent, convolutional, or a combination (i.e. RCNNs)
|
65
47
|
- binary, multiclass single-label, multiclass multi-label, and regression
|
66
48
|
- training with data augmentations: noise mixing, pitch and time stretch, etc.
|
67
49
|
|
68
|
-
|
69
|
-
- Aaware Inc. sonusai
|
70
|
-
- Keras model scripts: User python scripts for
|
50
|
+
SonusAI python functions are used by:
|
51
|
+
- Aaware Inc. sonusai framework: Easily create train/validation data, run prediction, evaluate model performance
|
52
|
+
- Keras model scripts: User python scripts for Keras model creation, training, and prediction. These can use sonusai-specific data but also some general useful utilities for training rnn-based models like CRNN's, DSCRNN's, etc. in Keras.
|
71
53
|
|
@@ -1,11 +1,11 @@
|
|
1
|
-
|
1
|
+
SonusAI: Framework for simplified creation of deep NN models for sound, speech, and voice AI
|
2
2
|
|
3
|
-
|
3
|
+
SonusAI includes functions for pre-processing training and validation data and
|
4
4
|
creating performance metrics reports for key types of Keras models:
|
5
5
|
- recurrent, convolutional, or a combination (i.e. RCNNs)
|
6
6
|
- binary, multiclass single-label, multiclass multi-label, and regression
|
7
7
|
- training with data augmentations: noise mixing, pitch and time stretch, etc.
|
8
8
|
|
9
|
-
|
10
|
-
- Aaware Inc. sonusai
|
11
|
-
- Keras model scripts: User python scripts for
|
9
|
+
SonusAI python functions are used by:
|
10
|
+
- Aaware Inc. sonusai framework: Easily create train/validation data, run prediction, evaluate model performance
|
11
|
+
- Keras model scripts: User python scripts for Keras model creation, training, and prediction. These can use sonusai-specific data but also some general useful utilities for training rnn-based models like CRNN's, DSCRNN's, etc. in Keras.
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[tool.poetry]
|
2
2
|
name = "sonusai"
|
3
|
-
version = "0.
|
3
|
+
version = "0.16.0"
|
4
4
|
description = "Framework for building deep neural network models for sound, speech, and voice AI"
|
5
5
|
authors = ["Chris Eddington <chris@aaware.com>", "Jason Calderwood <jason@aaware.com>"]
|
6
6
|
maintainers = ["Chris Eddington <chris@aaware.com>", "Jason Calderwood <jason@aaware.com>"]
|
@@ -15,60 +15,37 @@ aawscd_probwrite = 'sonusai.aawscd_probwrite:main'
|
|
15
15
|
|
16
16
|
[tool.poetry.dependencies]
|
17
17
|
PyYAML = "^6.0.1"
|
18
|
-
aixplain = "^0.2.6"
|
19
|
-
bitarray = "^2.9.2"
|
20
|
-
ctranslate2 = "4.1.0"
|
21
18
|
dataclasses-json = "^0.6.1"
|
22
|
-
deepgram-sdk = "^3.0.0"
|
23
19
|
docopt = "^0.6.2"
|
24
|
-
einops = "^0.7.0"
|
25
|
-
faster-whisper = "^1.0.1"
|
26
|
-
geomloss = "^0.2.6"
|
27
20
|
h5py = "^3.11.0"
|
28
|
-
hydra-core = "^1.3.2"
|
29
21
|
jiwer = "^3.0.3"
|
30
|
-
keras = "^3.1.1"
|
31
|
-
keras-tuner = "^1.4.7"
|
32
22
|
librosa = "^0.10.1"
|
33
|
-
lightning = "~2.2"
|
34
23
|
matplotlib = "^3.8.0"
|
35
|
-
omegaconf = "^2.3.0"
|
36
24
|
onnx = "^1.14.1"
|
37
|
-
#onnxruntime-gpu = "^1.16.1"
|
38
25
|
onnxruntime = "^1.16.1"
|
39
|
-
#openai-whisper = "^20231117"
|
40
26
|
paho-mqtt = "^2.0.0"
|
41
27
|
pandas = "^2.1.1"
|
42
28
|
pesq = "^0.0.4"
|
43
|
-
pyaaware = "^1.5.
|
29
|
+
pyaaware = "^1.5.7"
|
44
30
|
pyaudio = "^0.2.14"
|
45
31
|
pydub = "^0.25.1"
|
46
32
|
pystoi = "^0.4.0"
|
47
33
|
python = ">=3.9,<3.12"
|
48
|
-
python-magic = "^0.4.27"
|
49
34
|
requests = "^2.31.0"
|
50
|
-
sacrebleu = "^2.4.2"
|
51
35
|
samplerate = "^0.2.1"
|
52
36
|
soundfile = "^0.12.1"
|
53
|
-
speechrecognition = "^3.10.1"
|
54
37
|
sox = "^1.4.1"
|
55
|
-
tensorflow = "^2.15.0"
|
56
|
-
tf2onnx = "^1.15.1"
|
57
38
|
torch = "~2.2"
|
58
39
|
torchaudio = "~2.2"
|
59
|
-
torchinfo = "^1.8.0"
|
60
40
|
tqdm = "^4.66.1"
|
61
41
|
|
62
42
|
[tool.poetry.group.dev.dependencies]
|
63
43
|
icecream = "^2.1.3"
|
64
|
-
ipython = "^8.16.1"
|
65
|
-
jupyter = "^1.0.0"
|
66
44
|
mypy = "^1.6.0"
|
67
45
|
mypy-extensions = "^1.0.0"
|
68
46
|
pytest = "^8.1.1"
|
69
47
|
types-pyyaml = "^6.0.12.12"
|
70
48
|
types-requests = "^2.31.0.8"
|
71
|
-
yappi = "^1.4.0"
|
72
49
|
|
73
50
|
[tool.mypy]
|
74
51
|
ignore_missing_imports = true
|
@@ -0,0 +1,86 @@
|
|
1
|
+
import logging
|
2
|
+
from importlib import metadata
|
3
|
+
from os.path import dirname
|
4
|
+
|
5
|
+
__version__ = metadata.version(__package__)
|
6
|
+
BASEDIR = dirname(__file__)
|
7
|
+
|
8
|
+
commands_doc = """
|
9
|
+
audiofe Audio front end
|
10
|
+
calc_metric_spenh Run speech enhancement and analysis
|
11
|
+
doc Documentation
|
12
|
+
genft Generate feature and truth data
|
13
|
+
genmix Generate mixture and truth data
|
14
|
+
genmixdb Generate a mixture database
|
15
|
+
gentcst Generate target configuration from a subdirectory tree
|
16
|
+
lsdb List information about a mixture database
|
17
|
+
mkmanifest Make ASR manifest JSON file
|
18
|
+
mkwav Make WAV files from a mixture database
|
19
|
+
onnx_predict Run ONNX predict on a trained model
|
20
|
+
plot Plot mixture data
|
21
|
+
post_spenh_targetf Run post-processing for speech enhancement targetf data
|
22
|
+
tplot Plot truth data
|
23
|
+
vars List custom SonusAI variables
|
24
|
+
"""
|
25
|
+
|
26
|
+
# create logger
|
27
|
+
logger = logging.getLogger('sonusai')
|
28
|
+
logger.setLevel(logging.DEBUG)
|
29
|
+
formatter = logging.Formatter('%(message)s')
|
30
|
+
console_handler = logging.StreamHandler()
|
31
|
+
console_handler.setLevel(logging.DEBUG)
|
32
|
+
console_handler.setFormatter(formatter)
|
33
|
+
logger.addHandler(console_handler)
|
34
|
+
|
35
|
+
|
36
|
+
class SonusAIError(Exception):
|
37
|
+
def __init__(self, value):
|
38
|
+
logger.error(value)
|
39
|
+
|
40
|
+
|
41
|
+
# create file handler
|
42
|
+
def create_file_handler(filename: str) -> None:
|
43
|
+
fh = logging.FileHandler(filename=filename, mode='w')
|
44
|
+
fh.setLevel(logging.DEBUG)
|
45
|
+
fh.setFormatter(formatter)
|
46
|
+
logger.addHandler(fh)
|
47
|
+
|
48
|
+
|
49
|
+
# update console handler
|
50
|
+
def update_console_handler(verbose: bool) -> None:
|
51
|
+
if not verbose:
|
52
|
+
logger.removeHandler(console_handler)
|
53
|
+
console_handler.setLevel(logging.INFO)
|
54
|
+
logger.addHandler(console_handler)
|
55
|
+
|
56
|
+
|
57
|
+
# write initial log message
|
58
|
+
def initial_log_messages(name: str, subprocess: str = None) -> None:
|
59
|
+
from datetime import datetime
|
60
|
+
from getpass import getuser
|
61
|
+
from os import getcwd
|
62
|
+
from socket import gethostname
|
63
|
+
from sys import argv
|
64
|
+
|
65
|
+
if subprocess is None:
|
66
|
+
logger.info(f'SonusAI {__version__}')
|
67
|
+
else:
|
68
|
+
logger.info(f'SonusAI {subprocess}')
|
69
|
+
logger.info(f'{name}')
|
70
|
+
logger.info('')
|
71
|
+
logger.debug(f'Host: {gethostname()}')
|
72
|
+
logger.debug(f'User: {getuser()}')
|
73
|
+
logger.debug(f'Directory: {getcwd()}')
|
74
|
+
logger.debug(f'Date: {datetime.now()}')
|
75
|
+
logger.debug(f'Command: {" ".join(argv)}')
|
76
|
+
logger.debug('')
|
77
|
+
|
78
|
+
|
79
|
+
def commands_list(doc: str = commands_doc) -> list[str]:
|
80
|
+
lines = doc.split('\n')
|
81
|
+
commands = []
|
82
|
+
for line in lines:
|
83
|
+
command = line.strip().split(' ').pop(0)
|
84
|
+
if command:
|
85
|
+
commands.append(command)
|
86
|
+
return commands
|
@@ -34,7 +34,6 @@ audiofe_<TIMESTAMP>.h5.
|
|
34
34
|
from os.path import exists
|
35
35
|
from select import select
|
36
36
|
from sys import stdin
|
37
|
-
from typing import Any
|
38
37
|
|
39
38
|
import h5py
|
40
39
|
import numpy as np
|
@@ -58,7 +57,7 @@ from sonusai.utils import calc_asr
|
|
58
57
|
from sonusai.utils import create_timestamp
|
59
58
|
from sonusai.utils import get_input_device_index_by_name
|
60
59
|
from sonusai.utils import get_input_devices
|
61
|
-
from sonusai.utils import
|
60
|
+
from sonusai.utils import load_torchl_ckpt_model
|
62
61
|
from sonusai.utils import trim_docstring
|
63
62
|
from sonusai.utils import write_wav
|
64
63
|
|
@@ -124,7 +123,8 @@ def main() -> None:
|
|
124
123
|
logger.info(f'Capture audio ASR: {capture_asr}')
|
125
124
|
|
126
125
|
if model_name is not None:
|
127
|
-
model =
|
126
|
+
model = load_torchl_ckpt_model(model_name=model_name, ckpt_name=ckpt_name)
|
127
|
+
model.eval()
|
128
128
|
|
129
129
|
feature = get_feature_from_audio(audio=capture_audio, feature_mode=model.hparams.feature)
|
130
130
|
if debug:
|
@@ -149,7 +149,8 @@ def main() -> None:
|
|
149
149
|
# logger.info(f'Wrote reconstruct audio with shape {reconstruct_audio.shape} to {h5_name}')
|
150
150
|
|
151
151
|
with torch.no_grad():
|
152
|
-
|
152
|
+
# model wants batch x timesteps x feature_parameters
|
153
|
+
predict = model(torch.tensor(feature).permute((1, 0, 2))).permute(1, 0, 2).numpy()
|
153
154
|
if debug:
|
154
155
|
with h5py.File(h5_name, 'a') as f:
|
155
156
|
if 'predict' in f:
|
@@ -157,7 +158,7 @@ def main() -> None:
|
|
157
158
|
f.create_dataset('predict', data=predict)
|
158
159
|
logger.info(f'Wrote predict with shape {predict.shape} to {h5_name}')
|
159
160
|
|
160
|
-
predict_audio = get_audio_from_feature(feature=predict
|
161
|
+
predict_audio = get_audio_from_feature(feature=predict, feature_mode=model.hparams.feature)
|
161
162
|
write_wav(predict_name, predict_audio, SAMPLE_RATE)
|
162
163
|
logger.info(f'Wrote predict audio with shape {predict_audio.shape} to {predict_name}')
|
163
164
|
if debug:
|
@@ -172,63 +173,6 @@ def main() -> None:
|
|
172
173
|
logger.info(f'Predict audio ASR: {predict_asr}')
|
173
174
|
|
174
175
|
|
175
|
-
def load_model(model_name: str, ckpt_name: str) -> Any:
|
176
|
-
batch_size = 1
|
177
|
-
timesteps = 0
|
178
|
-
|
179
|
-
# Load checkpoint first to get hparams if available
|
180
|
-
try:
|
181
|
-
checkpoint = torch.load(ckpt_name, map_location=lambda storage, loc: storage)
|
182
|
-
except Exception as e:
|
183
|
-
logger.exception(f'Error: could not load checkpoint from {ckpt_name}: {e}')
|
184
|
-
raise SystemExit(1)
|
185
|
-
|
186
|
-
# Import model definition file
|
187
|
-
logger.info(f'Importing {model_name}')
|
188
|
-
litemodule = import_keras_model(model_name)
|
189
|
-
|
190
|
-
if 'hyper_parameters' in checkpoint:
|
191
|
-
logger.info(f'Found checkpoint file with hyper-parameters')
|
192
|
-
hparams = checkpoint['hyper_parameters']
|
193
|
-
if hparams['batch_size'] != batch_size:
|
194
|
-
logger.info(
|
195
|
-
f'Overriding model default batch_size of {hparams["batch_size"]} with batch_size of {batch_size}')
|
196
|
-
hparams["batch_size"] = batch_size
|
197
|
-
|
198
|
-
if hparams['timesteps'] != 0 and timesteps == 0:
|
199
|
-
timesteps = hparams['timesteps']
|
200
|
-
logger.warning(f'Using model default timesteps of {timesteps}')
|
201
|
-
|
202
|
-
logger.info(f'Building model with {len(hparams)} total hparams')
|
203
|
-
try:
|
204
|
-
model = litemodule.MyHyperModel(**hparams)
|
205
|
-
except Exception as e:
|
206
|
-
logger.exception(f'Error: model build (MyHyperModel) in {model_name} failed: {e}')
|
207
|
-
raise SystemExit(1)
|
208
|
-
else:
|
209
|
-
logger.info(f'Found checkpoint file with no hyper-parameters')
|
210
|
-
logger.info(f'Building model with defaults')
|
211
|
-
try:
|
212
|
-
tmp = litemodule.MyHyperModel()
|
213
|
-
except Exception as e:
|
214
|
-
logger.exception(f'Error: model build (MyHyperModel) in {model_name} failed: {e}')
|
215
|
-
raise SystemExit(1)
|
216
|
-
|
217
|
-
if tmp.batch_size != batch_size:
|
218
|
-
logger.info(f'Overriding model default batch_size of {tmp.batch_size} with batch_size of {batch_size}')
|
219
|
-
|
220
|
-
if tmp.timesteps != 0 and timesteps == 0:
|
221
|
-
timesteps = tmp.timesteps
|
222
|
-
logger.warning(f'Using model default timesteps of {timesteps}')
|
223
|
-
|
224
|
-
model = litemodule.MyHyperModel(timesteps=timesteps, batch_size=batch_size)
|
225
|
-
|
226
|
-
logger.info(f'Loading weights from {ckpt_name}')
|
227
|
-
model.load_state_dict(checkpoint["state_dict"])
|
228
|
-
model.eval()
|
229
|
-
return model
|
230
|
-
|
231
|
-
|
232
176
|
def get_frames_from_device(input_name: str | None, length: float, chunk: int = 1024) -> AudioT:
|
233
177
|
p = pyaudio.PyAudio()
|
234
178
|
|
@@ -758,13 +758,18 @@ def _process_mixture(mixid: int) -> tuple[pd.DataFrame, pd.DataFrame]:
|
|
758
758
|
predict = stack_complex(predict)
|
759
759
|
|
760
760
|
# 2) Collect true target, noise, mixture data, trim to predict size if needed
|
761
|
-
|
762
|
-
target_f = mixdb.
|
763
|
-
|
764
|
-
|
765
|
-
|
761
|
+
tmp = mixdb.mixture_targets(mixid) # targets is list of pre-IR and pre-specaugment targets
|
762
|
+
target_f = mixdb.mixture_targets_f(mixid, targets=tmp)[0]
|
763
|
+
target = tmp[0]
|
764
|
+
mixture = mixdb.mixture_mixture(mixid) # note: gives full reverberated/distorted target, but no specaugment
|
765
|
+
# noise_wodist = mixdb.mixture_noise(mixid) # noise without specaugment and distortion
|
766
|
+
# noise_wodist_f = mixdb.mixture_noise_f(mixid, noise=noise_wodist)
|
767
|
+
noise = mixture - target # has time-domain distortion (ir,etc.) but does not have specaugment
|
768
|
+
# noise_f = mixdb.mixture_noise_f(mixid, noise=noise)
|
769
|
+
segsnr_f = mixdb.mixture_segsnr(mixid, target=target, noise=noise) # note: uses pre-IR, pre-specaug audio
|
766
770
|
mixture_f = mixdb.mixture_mixture_f(mixid, mixture=mixture)
|
767
|
-
|
771
|
+
noise_f = mixture_f - target_f # true noise in freq domain includes specaugment and time-domain ir,distortions
|
772
|
+
# segsnr_f = mixdb.mixture_segsnr(mixid, target=target, noise=noise)
|
768
773
|
segsnr_f[segsnr_f == inf] = 7.944e8 # 99db
|
769
774
|
segsnr_f[segsnr_f == -inf] = 1.258e-10 # -99db
|
770
775
|
# need to use inv-tf to match #samples & latency shift properties of predict inv tf
|
@@ -920,8 +925,9 @@ def _process_mixture(mixid: int) -> tuple[pd.DataFrame, pd.DataFrame]:
|
|
920
925
|
'NLERR': lerr_n_frame,
|
921
926
|
'SPD': phd_frame})
|
922
927
|
metr2 = metr2.describe() # Use pandas stat function
|
923
|
-
|
924
|
-
|
928
|
+
# Change SSNR stats to dB, except count. SSNR is index 0, pandas requires using iloc
|
929
|
+
# metr2['SSNR'][1:] = metr2['SSNR'][1:].apply(lambda x: 10 * np.log10(x + 1.01e-10))
|
930
|
+
metr2.iloc[1:, 0] = metr2['SSNR'][1:].apply(lambda x: 10 * np.log10(x + 1.01e-10))
|
925
931
|
# create a single row in multi-column header
|
926
932
|
new_labels = pd.MultiIndex.from_product([metr2.columns,
|
927
933
|
['Avg', 'Min', 'Med', 'Max', 'Std']],
|
@@ -978,11 +984,11 @@ def _process_mixture(mixid: int) -> tuple[pd.DataFrame, pd.DataFrame]:
|
|
978
984
|
plot_fname = base_name + '_metric_spenh.pdf'
|
979
985
|
|
980
986
|
# Reshape feature to eliminate overlap redundancy for easier to understand spectrogram view
|
981
|
-
# Original size (frames, stride,
|
982
|
-
# Reshape to get frames*decimated_stride,
|
987
|
+
# Original size (frames, stride, num_bands), decimates in stride dimension only if step is > 1
|
988
|
+
# Reshape to get frames*decimated_stride, num_bands
|
983
989
|
step = int(mixdb.feature_samples / mixdb.feature_step_samples)
|
984
990
|
if feature.ndim != 3:
|
985
|
-
raise SonusAIError(f'feature does not have 3 dimensions: frames, stride,
|
991
|
+
raise SonusAIError(f'feature does not have 3 dimensions: frames, stride, num_bands')
|
986
992
|
|
987
993
|
# for feature cn*00n**
|
988
994
|
feat_sgram = unstack_complex(feature)
|
@@ -1166,7 +1172,7 @@ def main():
|
|
1166
1172
|
# Individual mixtures use pandas print, set precision to 2 decimal places
|
1167
1173
|
# pd.set_option('float_format', '{:.2f}'.format)
|
1168
1174
|
progress = tqdm(total=len(mixids), desc='calc_metric_spenh')
|
1169
|
-
all_metrics_tables = pp_tqdm_imap(_process_mixture, mixids, progress=progress, num_cpus=
|
1175
|
+
all_metrics_tables = pp_tqdm_imap(_process_mixture, mixids, progress=progress, num_cpus=8)
|
1170
1176
|
progress.close()
|
1171
1177
|
|
1172
1178
|
all_metrics_table_1 = pd.concat([item[0] for item in all_metrics_tables])
|
@@ -1192,6 +1198,7 @@ def main():
|
|
1192
1198
|
if ~np.isnan(tmp.iloc[0].to_numpy()[0]).any():
|
1193
1199
|
mtab_snr_summary_em = pd.concat([mtab_snr_summary_em, tmp])
|
1194
1200
|
|
1201
|
+
mtab_snr_summary = mtab_snr_summary.sort_values(by=['MXSNR'], ascending=False)
|
1195
1202
|
# Correct percentages in snr summary table
|
1196
1203
|
mtab_snr_summary['PESQi%'] = 100 * (mtab_snr_summary['PESQ'] - mtab_snr_summary['MXPESQ']) / np.maximum(
|
1197
1204
|
mtab_snr_summary['MXPESQ'], 0.01)
|
@@ -1202,9 +1209,11 @@ def main():
|
|
1202
1209
|
else:
|
1203
1210
|
mtab_snr_summary['WERi%'].iloc[i] = -999.0
|
1204
1211
|
else:
|
1205
|
-
mtab_snr_summary['
|
1206
|
-
|
1207
|
-
|
1212
|
+
if ~np.isnan(mtab_snr_summary['WER'].iloc[i]) and ~np.isnan(mtab_snr_summary['MXWER'].iloc[i]):
|
1213
|
+
# update WERi% in 6th col
|
1214
|
+
mtab_snr_summary.iloc[i, 6] = 100 * (mtab_snr_summary['MXWER'].iloc[i] -
|
1215
|
+
mtab_snr_summary['WER'].iloc[i]) / \
|
1216
|
+
mtab_snr_summary['MXWER'].iloc[i]
|
1208
1217
|
|
1209
1218
|
# Calculate avg metrics over all mixtures except -99
|
1210
1219
|
all_mtab1_sorted_nom99 = all_mtab1_sorted[all_mtab1_sorted.MXSNR != -99]
|
@@ -225,7 +225,7 @@ def genmixdb(location: str,
|
|
225
225
|
if logging:
|
226
226
|
logger.info('Collecting impulse responses')
|
227
227
|
|
228
|
-
impulse_response_files = get_impulse_response_files(config
|
228
|
+
impulse_response_files = get_impulse_response_files(config)
|
229
229
|
|
230
230
|
populate_impulse_response_file_table(location, impulse_response_files, test)
|
231
231
|
|
@@ -0,0 +1,90 @@
|
|
1
|
+
"""sonusai
|
2
|
+
|
3
|
+
usage: sonusai [--version] [--help] <command> [<args>...]
|
4
|
+
|
5
|
+
The sonusai commands are:
|
6
|
+
<This information is automatically generated.>
|
7
|
+
|
8
|
+
Aaware Sound and Voice Machine Learning Framework. See 'sonusai help <command>'
|
9
|
+
for more information on a specific command.
|
10
|
+
|
11
|
+
"""
|
12
|
+
import signal
|
13
|
+
|
14
|
+
|
15
|
+
def signal_handler(_sig, _frame):
|
16
|
+
import sys
|
17
|
+
|
18
|
+
from sonusai import logger
|
19
|
+
|
20
|
+
logger.info('Canceled due to keyboard interrupt')
|
21
|
+
sys.exit(1)
|
22
|
+
|
23
|
+
|
24
|
+
signal.signal(signal.SIGINT, signal_handler)
|
25
|
+
|
26
|
+
|
27
|
+
def main() -> None:
|
28
|
+
from importlib import import_module
|
29
|
+
from pkgutil import iter_modules
|
30
|
+
|
31
|
+
from sonusai import commands_list
|
32
|
+
|
33
|
+
plugins = {}
|
34
|
+
plugin_docstrings = []
|
35
|
+
for _, name, _ in iter_modules():
|
36
|
+
if name.startswith('sonusai_') and not name.startswith('sonusai_asr_'):
|
37
|
+
module = import_module(name)
|
38
|
+
plugins[name] = {
|
39
|
+
'commands': commands_list(module.commands_doc),
|
40
|
+
'basedir': module.BASEDIR,
|
41
|
+
}
|
42
|
+
plugin_docstrings.append(module.commands_doc)
|
43
|
+
|
44
|
+
from docopt import docopt
|
45
|
+
|
46
|
+
from sonusai import __version__
|
47
|
+
from sonusai.utils import add_commands_to_docstring
|
48
|
+
from sonusai.utils import trim_docstring
|
49
|
+
|
50
|
+
args = docopt(trim_docstring(add_commands_to_docstring(__doc__, plugin_docstrings)),
|
51
|
+
version=__version__,
|
52
|
+
options_first=True)
|
53
|
+
|
54
|
+
command = args['<command>']
|
55
|
+
argv = args['<args>']
|
56
|
+
|
57
|
+
import sys
|
58
|
+
from os.path import join
|
59
|
+
from subprocess import call
|
60
|
+
|
61
|
+
import sonusai
|
62
|
+
from sonusai import logger
|
63
|
+
|
64
|
+
base_commands = sonusai.commands_list()
|
65
|
+
if command == 'help':
|
66
|
+
if not argv:
|
67
|
+
exit(call(['sonusai', '-h']))
|
68
|
+
elif argv[0] in base_commands:
|
69
|
+
exit(call(['python', f'{join(sonusai.BASEDIR, argv[0])}.py', '-h']))
|
70
|
+
|
71
|
+
for plugin, data in plugins.items():
|
72
|
+
if argv[0] in data['commands']:
|
73
|
+
exit(call(['python', f'{join(data["basedir"], argv[0])}.py', '-h']))
|
74
|
+
|
75
|
+
logger.error(f"{argv[0]} is not a SonusAI command. See 'sonusai help'.")
|
76
|
+
sys.exit(1)
|
77
|
+
|
78
|
+
if command in base_commands:
|
79
|
+
exit(call(['python', f'{join(sonusai.BASEDIR, command)}.py'] + argv))
|
80
|
+
|
81
|
+
for plugin, data in plugins.items():
|
82
|
+
if command in data['commands']:
|
83
|
+
exit(call(['python', f'{join(data["basedir"], command)}.py'] + argv))
|
84
|
+
|
85
|
+
logger.error(f"{command} is not a SonusAI command. See 'sonusai help'.")
|
86
|
+
sys.exit(1)
|
87
|
+
|
88
|
+
|
89
|
+
if __name__ == '__main__':
|
90
|
+
main()
|
@@ -108,6 +108,7 @@ from .helpers import get_transform_from_audio
|
|
108
108
|
from .helpers import get_truth_t
|
109
109
|
from .helpers import inverse_transform
|
110
110
|
from .helpers import mixture_metadata
|
111
|
+
from .helpers import read_mixture_data
|
111
112
|
from .helpers import write_mixture_data
|
112
113
|
from .helpers import write_mixture_metadata
|
113
114
|
from .log_duration_and_sizes import log_duration_and_sizes
|
@@ -480,11 +480,10 @@ def append_noise_files(entry: dict | str, tokens: dict = None) -> list[dict]:
|
|
480
480
|
return noise_files
|
481
481
|
|
482
482
|
|
483
|
-
def get_impulse_response_files(config: dict
|
483
|
+
def get_impulse_response_files(config: dict) -> ImpulseResponseFiles:
|
484
484
|
"""Get the list of impulse response files from a config
|
485
485
|
|
486
486
|
:param config: Config dictionary
|
487
|
-
:param show_progress: Show progress bar
|
488
487
|
:return: List of impulse response files
|
489
488
|
"""
|
490
489
|
from itertools import chain
|
@@ -30,6 +30,8 @@ Inputs:
|
|
30
30
|
- 'librispeech'
|
31
31
|
- 'vctk_noisy_speech' expects subdirs named like <name>_wav/ and <name>_txt/ with files in
|
32
32
|
each using same basename, but with .wav and .txt respectively.
|
33
|
+
- 'mcgill-speech' expects audio data in basename/speakerid/speakerid-promptid.wav and
|
34
|
+
transcript data in Scripts/HarvardLists.dat
|
33
35
|
ADAT Audio data environment variable. All found files will be expanded to their full, absolute path and
|
34
36
|
then parts of the path that match the specified environment variable value will be replaced with
|
35
37
|
the variable. This accommodates portability across platforms where the sound datasets may in
|
@@ -42,11 +44,11 @@ Outputs the following to the current directory:
|
|
42
44
|
|
43
45
|
Example usage for LibriSpeech:
|
44
46
|
sonusai mkmanifest -mlibrispeech -eADAT -oasr_manifest.json --include='*.flac' train-clean-100
|
45
|
-
|
47
|
+
sonusai mkmanifest -m mcgill-speech -e ADAT -o asr_manifest_16k.json 16k-LP7/
|
46
48
|
"""
|
47
49
|
from sonusai import logger
|
48
50
|
|
49
|
-
VALID_METHOD = ['librispeech', 'vctk_noisy_speech']
|
51
|
+
VALID_METHOD = ['librispeech', 'vctk_noisy_speech', 'mcgill-speech']
|
50
52
|
|
51
53
|
|
52
54
|
def main() -> None:
|
@@ -88,6 +90,7 @@ def main() -> None:
|
|
88
90
|
from sonusai.utils.asr_manifest_functions import collect_vctk_noisy_speech_transcripts
|
89
91
|
from sonusai.utils.asr_manifest_functions import get_librispeech_manifest_entry
|
90
92
|
from sonusai.utils.asr_manifest_functions import get_vctk_noisy_speech_manifest_entry
|
93
|
+
from sonusai.utils.asr_manifest_functions import get_mcgill_speech_manifest_entry
|
91
94
|
|
92
95
|
start_time = time.monotonic()
|
93
96
|
|
@@ -160,6 +163,30 @@ def main() -> None:
|
|
160
163
|
for result in results:
|
161
164
|
f.write(json.dumps(result) + '\n')
|
162
165
|
|
166
|
+
if method == 'mcgill-speech':
|
167
|
+
logger.info(f'Found {len(entries)} Mcgill Speech files, opening prompt file ...')
|
168
|
+
# Note expecting only one path pointing to data subdir
|
169
|
+
if len(paths) != 1:
|
170
|
+
raise SonusAIError(f'mcgill-speech only support a single path')
|
171
|
+
prompt_fpath = join(join(realpath(abspath(paths[0]))), '../Scripts/HarvardList.dat')
|
172
|
+
with open(prompt_fpath, encoding='utf-8') as f:
|
173
|
+
lines = f.readlines()
|
174
|
+
|
175
|
+
logger.info(f'Found {len(lines) - 4} entries in prompt file.')
|
176
|
+
# First 4 lines are header stuff, can use remaining directly with simple lookup
|
177
|
+
# example line: '01_02:Glue the sheet ...\n' (paragraph 1, sentence 2)
|
178
|
+
# 11 entries per group, so getting line is 11*(p1-1)+(s2-1)
|
179
|
+
lines = lines[4:]
|
180
|
+
|
181
|
+
processing_func = partial(get_mcgill_speech_manifest_entry, transcript_data=lines)
|
182
|
+
progress = tqdm(total=len(entries), desc='Creating Mcgill Speech manifest data')
|
183
|
+
results = pp_tqdm_imap(processing_func, entries, progress=progress)
|
184
|
+
progress.close()
|
185
|
+
|
186
|
+
with open(output, 'w') as f:
|
187
|
+
for result in results:
|
188
|
+
f.write(json.dumps(result) + '\n')
|
189
|
+
|
163
190
|
end_time = time.monotonic()
|
164
191
|
logger.info('')
|
165
192
|
logger.info(f'Completed in {seconds_to_hms(seconds=end_time - start_time)}')
|
@@ -1,5 +1,6 @@
|
|
1
1
|
# SonusAI general utilities
|
2
2
|
from .asl_p56 import asl_p56
|
3
|
+
from .asr import ASRData
|
3
4
|
from .asr import ASRResult
|
4
5
|
from .asr import calc_asr
|
5
6
|
from .audio_devices import get_default_input_device
|
@@ -14,24 +15,21 @@ from .create_ts_name import create_ts_name
|
|
14
15
|
from .dataclass_from_dict import dataclass_from_dict
|
15
16
|
from .db import db_to_linear
|
16
17
|
from .db import linear_to_db
|
18
|
+
from .docstring import add_commands_to_docstring
|
19
|
+
from .docstring import trim_docstring
|
17
20
|
from .energy_f import compute_energy_f
|
18
21
|
from .engineering_number import EngineeringNumber
|
19
22
|
from .get_frames_per_batch import get_frames_per_batch
|
20
23
|
from .get_label_names import get_label_names
|
21
24
|
from .grouper import grouper
|
22
25
|
from .human_readable_size import human_readable_size
|
23
|
-
from .keras_utils import check_keras_overrides
|
24
|
-
from .keras_utils import create_onnx_from_keras
|
25
|
-
from .keras_utils import import_and_check_keras_model
|
26
|
-
from .keras_utils import import_keras_model
|
27
|
-
from .keras_utils import keras_onnx
|
28
26
|
from .max_text_width import max_text_width
|
27
|
+
from .model_utils import import_module
|
29
28
|
from .numeric_conversion import float_to_int16
|
30
29
|
from .numeric_conversion import int16_to_float
|
31
30
|
from .onnx_utils import SonusAIMetaData
|
32
31
|
from .onnx_utils import add_sonusai_metadata
|
33
32
|
from .onnx_utils import get_sonusai_metadata
|
34
|
-
from .onnx_utils import replace_stateful_grus
|
35
33
|
from .parallel import pp_imap
|
36
34
|
from .parallel import pp_tqdm_imap
|
37
35
|
from .print_mixture_details import print_class_count
|
@@ -50,6 +48,5 @@ from .stacked_complex import stacked_complex_imag
|
|
50
48
|
from .stacked_complex import stacked_complex_real
|
51
49
|
from .stacked_complex import unstack_complex
|
52
50
|
from .stratified_shuffle_split import stratified_shuffle_split_mixid
|
53
|
-
from .trim_docstring import trim_docstring
|
54
51
|
from .wave import write_wav
|
55
52
|
from .yes_or_no import yes_or_no
|