sonusai 0.12.6__tar.gz → 0.12.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sonusai-0.12.6 → sonusai-0.12.7}/PKG-INFO +1 -1
- {sonusai-0.12.6 → sonusai-0.12.7}/pyproject.toml +1 -1
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/calc_metric_spenh.py +1 -1
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/genft.py +1 -1
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/genmix.py +1 -1
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/genmixdb.py +1 -1
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/main.py +2 -2
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/mixture/config.py +2 -2
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/mixture/mixdb.py +1 -1
- sonusai-0.12.7/sonusai/mkmanifest.py +174 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/mkwav.py +2 -2
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/post_spenh_targetf.py +1 -1
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/utils/__init__.py +1 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/utils/asr.py +6 -5
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/utils/asr_functions/data.py +4 -3
- sonusai-0.12.7/sonusai/utils/asr_manifest_functions/__init__.py +6 -0
- sonusai-0.12.7/sonusai/utils/asr_manifest_functions/data.py +10 -0
- sonusai-0.12.7/sonusai/utils/asr_manifest_functions/librispeech.py +49 -0
- sonusai-0.12.7/sonusai/utils/asr_manifest_functions/vctk_noisy_speech.py +69 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/utils/braced_glob.py +10 -3
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/utils/parallel_tqdm.py +5 -4
- {sonusai-0.12.6 → sonusai-0.12.7}/README.rst +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/__init__.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/aawscd_probwrite.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/data/__init__.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/data/genmixdb.yml +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/data/speech_ma01_01.wav +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/data/whitenoise.wav +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/data_generator/__init__.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/data_generator/dataset_from_mixdb.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/data_generator/keras_from_mixdb.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/data_generator/torch_from_mixdb.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/evaluate.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/gentcst.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/keras_onnx.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/keras_predict.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/keras_train.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/lsdb.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/metrics/__init__.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/metrics/calc_class_weights.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/metrics/calc_optimal_thresholds.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/metrics/calc_pcm.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/metrics/calc_pesq.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/metrics/calc_sa_sdr.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/metrics/calc_sample_weights.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/metrics/calc_wer.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/metrics/calc_wsdr.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/metrics/class_summary.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/metrics/confusion_matrix_summary.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/metrics/one_hot.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/metrics/snr_summary.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/mixture/__init__.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/mixture/active_truth_class_balancing.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/mixture/audio.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/mixture/augmentation.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/mixture/balance.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/mixture/class_count.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/mixture/constants.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/mixture/feature.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/mixture/generate_mixtures.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/mixture/initialize.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/mixture/log_duration_and_sizes.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/mixture/mapped_snr_f.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/mixture/spectral_mask.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/mixture/target_class_balancing.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/mixture/targets.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/mixture/tokenized_shell_vars.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/mixture/truth.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/mixture/truth_functions/__init__.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/mixture/truth_functions/crm.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/mixture/truth_functions/data.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/mixture/truth_functions/energy.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/mixture/truth_functions/file.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/mixture/truth_functions/phoneme.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/mixture/truth_functions/sed.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/mixture/truth_functions/target.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/mixture/types.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/onnx_predict.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/plot.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/queries/__init__.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/queries/queries.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/torchl_predict.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/torchl_train.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/tplot.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/utils/asl_p56.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/utils/asr_functions/__init__.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/utils/asr_functions/aixplain_whisper.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/utils/asr_functions/deepgram.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/utils/asr_functions/google.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/utils/asr_functions/whisper.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/utils/calculate_input_shape.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/utils/create_ts_name.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/utils/dataclass_from_dict.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/utils/db.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/utils/energy_f.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/utils/engineering_number.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/utils/get_frames_per_batch.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/utils/get_label_names.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/utils/grouper.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/utils/human_readable_size.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/utils/keras_utils.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/utils/max_text_width.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/utils/numeric_conversion.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/utils/onnx_utils.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/utils/parallel.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/utils/print_mixture_details.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/utils/ranges.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/utils/read_mixture_data.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/utils/read_predict_data.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/utils/reshape.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/utils/seconds_to_hms.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/utils/stacked_complex.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/utils/stratified_shuffle_split.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/utils/trim_docstring.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/utils/wave.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/utils/yes_or_no.py +0 -0
- {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/vars.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
[tool.poetry]
|
2
2
|
name = "sonusai"
|
3
|
-
version = "0.12.
|
3
|
+
version = "0.12.7"
|
4
4
|
description = "Framework for building deep neural network models for sound, speech, and voice AI"
|
5
5
|
authors = ["Chris Eddington <chris@aaware.com>", "Jason Calderwood <jason@aaware.com>"]
|
6
6
|
maintainers = ["Chris Eddington <chris@aaware.com>", "Jason Calderwood <jason@aaware.com>"]
|
@@ -808,7 +808,7 @@ def main() -> None:
|
|
808
808
|
# Individual mixtures use pandas print, set precision to 2 decimal places
|
809
809
|
# pd.set_option('float_format', '{:.2f}'.format)
|
810
810
|
progress = tqdm(total=len(mixids))
|
811
|
-
all_metrics_tables = p_tqdm_map(_process_mixture, mixids, progress=progress)
|
811
|
+
all_metrics_tables = p_tqdm_map(_process_mixture, mixids, progress=progress, chunksize=10)
|
812
812
|
progress.close()
|
813
813
|
|
814
814
|
all_metrics_table_1 = pd.concat([item[0] for item in all_metrics_tables])
|
@@ -72,7 +72,7 @@ def genft(mixdb: MixtureDatabase,
|
|
72
72
|
results.append(_genft_kernel(mixid))
|
73
73
|
else:
|
74
74
|
progress = tqdm(total=len(mixids), disable=not show_progress)
|
75
|
-
results = p_tqdm_map(_genft_kernel, mixids, progress=progress)
|
75
|
+
results = p_tqdm_map(_genft_kernel, mixids, progress=progress, chunksize=10)
|
76
76
|
progress.close()
|
77
77
|
|
78
78
|
return results
|
@@ -79,7 +79,7 @@ def genmix(mixdb: MixtureDatabase,
|
|
79
79
|
results.append(_genmix_kernel(mixid))
|
80
80
|
else:
|
81
81
|
progress = tqdm(total=len(mixids), disable=not show_progress)
|
82
|
-
results = p_tqdm_map(_genmix_kernel, mixids, progress=progress)
|
82
|
+
results = p_tqdm_map(_genmix_kernel, mixids, progress=progress, chunksize=10)
|
83
83
|
progress.close()
|
84
84
|
|
85
85
|
return results
|
@@ -345,7 +345,7 @@ def genmixdb(location: Location,
|
|
345
345
|
if logging:
|
346
346
|
logger.info('Generating mixtures')
|
347
347
|
progress = tqdm(total=total_mixtures, disable=not show_progress)
|
348
|
-
mixdb.mixtures = p_tqdm_map(_process_mixture, range(total_mixtures), progress=progress)
|
348
|
+
mixdb.mixtures = p_tqdm_map(_process_mixture, range(total_mixtures), progress=progress, chunksize=10)
|
349
349
|
progress.close()
|
350
350
|
|
351
351
|
total_samples = mixdb.total_samples()
|
@@ -4,7 +4,6 @@ usage: sonusai [--version] [--help] <command> [<args>...]
|
|
4
4
|
|
5
5
|
The sonusai commands are:
|
6
6
|
calc_metric_spenh Run speech enhancement and analysis
|
7
|
-
calc_metric_spenh_targetf Run speech enhancement and analysis for targetf truth (deprecated)
|
8
7
|
evaluate Evaluate model performance
|
9
8
|
genft Generate feature and truth data
|
10
9
|
genmix Generate mixture and truth data
|
@@ -14,6 +13,7 @@ The sonusai commands are:
|
|
14
13
|
keras_train Train a model using Keras
|
15
14
|
keras_onnx Convert a trained Keras model to ONNX
|
16
15
|
lsdb List information about a mixture database
|
16
|
+
mkmanifest Make ASR manifest JSON file
|
17
17
|
mkwav Make WAV files from a mixture database
|
18
18
|
onnx_predict Run ONNX predict on a trained model
|
19
19
|
plot Plot mixture data
|
@@ -38,7 +38,6 @@ def main() -> None:
|
|
38
38
|
|
39
39
|
commands = (
|
40
40
|
'calc_metric_spenh',
|
41
|
-
'calc_metric_spenh_targetf',
|
42
41
|
'evaluate',
|
43
42
|
'genft',
|
44
43
|
'genmix',
|
@@ -48,6 +47,7 @@ def main() -> None:
|
|
48
47
|
'keras_train',
|
49
48
|
'keras_onnx',
|
50
49
|
'lsdb',
|
50
|
+
'mkmanifest',
|
51
51
|
'mkwav',
|
52
52
|
'onnx_predict',
|
53
53
|
'plot',
|
@@ -250,7 +250,7 @@ def get_target_files(config: dict, show_progress: bool = False) -> TargetFiles:
|
|
250
250
|
for target in config['targets']]))
|
251
251
|
|
252
252
|
progress = tqdm(total=len(target_files), disable=not show_progress)
|
253
|
-
target_files = p_tqdm_map(_get_samples, target_files, progress=progress)
|
253
|
+
target_files = p_tqdm_map(_get_samples, target_files, progress=progress, chunksize=10)
|
254
254
|
progress.close()
|
255
255
|
|
256
256
|
max_class = get_max_class(config['num_classes'], config['truth_mode'] == 'mutex')
|
@@ -394,7 +394,7 @@ def get_noise_files(config: dict, show_progress: bool = False) -> NoiseFiles:
|
|
394
394
|
noise_files = list(chain.from_iterable([_append_noise_files(noise_file=noise) for noise in config['noises']]))
|
395
395
|
|
396
396
|
progress = tqdm(total=len(noise_files), disable=not show_progress)
|
397
|
-
noise_files = p_tqdm_map(_get_samples, noise_files, progress=progress)
|
397
|
+
noise_files = p_tqdm_map(_get_samples, noise_files, progress=progress, chunksize=10)
|
398
398
|
progress.close()
|
399
399
|
|
400
400
|
return dataclass_from_dict(NoiseFiles, noise_files)
|
@@ -1065,7 +1065,7 @@ class MixtureDatabase:
|
|
1065
1065
|
:param mixid: Mixture ID
|
1066
1066
|
:param targets: List of augmented target audio data (one per target in the mixup) for the given mixid
|
1067
1067
|
:param noise: Augmented noise audio data for the given mixid
|
1068
|
-
:param force: Force computing data from original sources regardless of whether
|
1068
|
+
:param force: Force computing data from original sources regardless of whether cached data exists
|
1069
1069
|
:return: truth_t data
|
1070
1070
|
"""
|
1071
1071
|
import numpy as np
|
@@ -0,0 +1,174 @@
|
|
1
|
+
"""mkmanifest
|
2
|
+
|
3
|
+
usage: mkmanifest [-hvn] [--include GLOB] [-m METHOD] [-e ADAT] [-o OUTPUT] PATH ...
|
4
|
+
|
5
|
+
options:
|
6
|
+
-h, --help
|
7
|
+
-v, --verbose Be verbose: list all files found.
|
8
|
+
-n, --dry-run Collect files, but exit without processing and writing manifest file.
|
9
|
+
--include GLOB Search only files whose base name matches GLOB. [default: *.{wav,flac}].
|
10
|
+
-m METHOD, --method METHOD Method for getting the true speech text of the audio files. [default: librispeech].
|
11
|
+
-e ADAT, --audio-env ADAT Environment variable pointing to all audio data.
|
12
|
+
-o OUTPUT, --output OUTPUT Output file name. [default: asr_manifest.json].
|
13
|
+
|
14
|
+
Make a speech recognition (ASR) .json manifest file of all audio files under PATHS following the NVIDIA NeMo format.
|
15
|
+
An example of manifest entries:
|
16
|
+
|
17
|
+
{"audio_filepath": "<absolute_path_to>/1355-39947-0000.wav", "duration": 11.3, "text": "psychotherapy ..."}
|
18
|
+
{"audio_filepath": "<absolute_path_to>/1355-39947-0001.wav", "duration": 15.905, "text": "it is an ..."}
|
19
|
+
|
20
|
+
See the NVIDIA NeMo docs for more information:
|
21
|
+
https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/stable/asr/datasets.html
|
22
|
+
|
23
|
+
Inputs:
|
24
|
+
PATH A relative path name or list of paths containing audio files. Each will be
|
25
|
+
recursively searched for files matching the pattern GLOB.
|
26
|
+
GLOB Match the pattern GLOB using wildcard matching.
|
27
|
+
Example: '*.{wav,flac}' matches all .wav and .flac files.
|
28
|
+
METHOD The method to use for fetching the true speech of the audio files.
|
29
|
+
Supported methods:
|
30
|
+
- 'librispeech'
|
31
|
+
- 'vctk_noisy_speech'
|
32
|
+
ADAT Audio data environment variable. All found files will be expanded to their full, absolute path and
|
33
|
+
then parts of the path that match the specified environment variable value will be replaced with
|
34
|
+
the variable. This accommodates portability across platforms where the sound datasets may in
|
35
|
+
different locations.
|
36
|
+
OUTPUT Name of output file. Default is asr_manifest.json.
|
37
|
+
|
38
|
+
Outputs the following to the current directory:
|
39
|
+
<OUTPUT>
|
40
|
+
mkmanifest.log
|
41
|
+
|
42
|
+
Example usage for LibriSpeech:
|
43
|
+
sonusai mkmanifest -mlibrispeech -eADAT -oasr_manifest.json --include='*.flac' train-clean-100
|
44
|
+
|
45
|
+
"""
|
46
|
+
from sonusai import logger
|
47
|
+
|
48
|
+
VALID_METHOD = ['librispeech', 'vctk_noisy_speech']
|
49
|
+
|
50
|
+
|
51
|
+
def main() -> None:
|
52
|
+
from docopt import docopt
|
53
|
+
|
54
|
+
import sonusai
|
55
|
+
from sonusai.utils import trim_docstring
|
56
|
+
|
57
|
+
args = docopt(trim_docstring(__doc__), version=sonusai.__version__, options_first=True)
|
58
|
+
|
59
|
+
verbose = args['--verbose']
|
60
|
+
dry_run = args['--dry-run']
|
61
|
+
include = args['--include']
|
62
|
+
method = args['--method']
|
63
|
+
audio_env = args['--audio-env']
|
64
|
+
output = args['--output']
|
65
|
+
paths = args['PATH']
|
66
|
+
|
67
|
+
import json
|
68
|
+
from functools import partial
|
69
|
+
import time
|
70
|
+
from os import environ
|
71
|
+
from os.path import abspath
|
72
|
+
from os.path import join
|
73
|
+
from os.path import realpath
|
74
|
+
from typing import List
|
75
|
+
|
76
|
+
from tqdm import tqdm
|
77
|
+
|
78
|
+
from sonusai import SonusAIError
|
79
|
+
from sonusai import create_file_handler
|
80
|
+
from sonusai import initial_log_messages
|
81
|
+
from sonusai import logger
|
82
|
+
from sonusai import update_console_handler
|
83
|
+
from sonusai.utils import braced_iglob
|
84
|
+
from sonusai.utils import p_tqdm_map
|
85
|
+
from sonusai.utils import seconds_to_hms
|
86
|
+
from sonusai.utils.asr_manifest_functions import PathInfo
|
87
|
+
from sonusai.utils.asr_manifest_functions import collect_librispeech_transcripts
|
88
|
+
from sonusai.utils.asr_manifest_functions import collect_vctk_noisy_speech_transcripts
|
89
|
+
from sonusai.utils.asr_manifest_functions import get_librispeech_manifest_entry
|
90
|
+
from sonusai.utils.asr_manifest_functions import get_vctk_noisy_speech_manifest_entry
|
91
|
+
|
92
|
+
start_time = time.monotonic()
|
93
|
+
|
94
|
+
create_file_handler('mkmanifest.log')
|
95
|
+
update_console_handler(verbose)
|
96
|
+
initial_log_messages('mkmanifest')
|
97
|
+
|
98
|
+
if method not in VALID_METHOD:
|
99
|
+
raise SonusAIError(f'Unknown method: {method}')
|
100
|
+
|
101
|
+
audio_dir = None
|
102
|
+
if audio_env is not None:
|
103
|
+
audio_dir = realpath(environ[audio_env])
|
104
|
+
if audio_dir is None:
|
105
|
+
raise SonusAIError(f'Unknown environment variable: {audio_env}')
|
106
|
+
|
107
|
+
if audio_env:
|
108
|
+
for p in paths:
|
109
|
+
if not realpath(abspath(p)).startswith(audio_dir):
|
110
|
+
logger.warning(f'Specified directory, {p}, is not part of the provided audio environment: '
|
111
|
+
f'${audio_env}={audio_dir}')
|
112
|
+
|
113
|
+
logger.info('')
|
114
|
+
logger.info(f'Searching {len(paths)} provided director{"ies" if len(paths) > 1 else "y"}...')
|
115
|
+
|
116
|
+
entries: List[PathInfo] = []
|
117
|
+
for p in paths:
|
118
|
+
location = join(realpath(abspath(p)), '**', include)
|
119
|
+
logger.debug(f'Processing {location}')
|
120
|
+
for file in braced_iglob(pathname=location, recursive=True):
|
121
|
+
name = file
|
122
|
+
if audio_env is not None:
|
123
|
+
name = name.replace(audio_dir, f'${audio_env}')
|
124
|
+
entries.append(PathInfo(abs_path=file, audio_filepath=name))
|
125
|
+
logger.debug('')
|
126
|
+
|
127
|
+
logger.info(f'Found {len(entries)} audio file{"s" if len(entries) != 1 else ""}')
|
128
|
+
|
129
|
+
if dry_run:
|
130
|
+
logger.info('')
|
131
|
+
logger.info('Dry run')
|
132
|
+
logger.info('')
|
133
|
+
for entry in entries:
|
134
|
+
logger.info(f' - {entry.audio_filepath}')
|
135
|
+
return
|
136
|
+
|
137
|
+
if method == 'librispeech':
|
138
|
+
logger.info('Collecting LibriSpeech transcript data')
|
139
|
+
transcript_data = collect_librispeech_transcripts(paths=paths)
|
140
|
+
|
141
|
+
processing_func = partial(get_librispeech_manifest_entry, transcript_data=transcript_data)
|
142
|
+
progress = tqdm(total=len(entries), desc='Creating LibriSpeech manifest data')
|
143
|
+
results = p_tqdm_map(processing_func, entries, progress=progress, chunksize=10)
|
144
|
+
progress.close()
|
145
|
+
|
146
|
+
with open(output, 'w') as f:
|
147
|
+
for result in results:
|
148
|
+
f.write(json.dumps(result) + '\n')
|
149
|
+
|
150
|
+
if method == 'vctk_noisy_speech':
|
151
|
+
logger.info('Collecting VCTK Noisy Speech transcript data')
|
152
|
+
transcript_data = collect_vctk_noisy_speech_transcripts(paths=paths)
|
153
|
+
|
154
|
+
processing_func = partial(get_vctk_noisy_speech_manifest_entry, transcript_data=transcript_data)
|
155
|
+
progress = tqdm(total=len(entries), desc='Creating VCTK Noisy Speech manifest data')
|
156
|
+
results = p_tqdm_map(processing_func, entries, progress=progress, chunksize=10)
|
157
|
+
progress.close()
|
158
|
+
|
159
|
+
with open(output, 'w') as f:
|
160
|
+
for result in results:
|
161
|
+
f.write(json.dumps(result) + '\n')
|
162
|
+
|
163
|
+
end_time = time.monotonic()
|
164
|
+
logger.info('')
|
165
|
+
logger.info(f'Completed in {seconds_to_hms(seconds=end_time - start_time)}')
|
166
|
+
logger.info('')
|
167
|
+
|
168
|
+
|
169
|
+
if __name__ == '__main__':
|
170
|
+
try:
|
171
|
+
main()
|
172
|
+
except KeyboardInterrupt:
|
173
|
+
logger.info('Canceled due to keyboard interrupt')
|
174
|
+
raise SystemExit(0)
|
@@ -85,7 +85,7 @@ def _process_mixture(mixid: int) -> None:
|
|
85
85
|
with h5py.File(mixture_filename, 'r') as f:
|
86
86
|
mixture = np.array(f['mixture'])
|
87
87
|
if MP_GLOBAL.write_target:
|
88
|
-
target = sum(np.array(f['targets']))
|
88
|
+
target = np.sum(np.array(f['targets']), axis=0)
|
89
89
|
if MP_GLOBAL.write_noise:
|
90
90
|
noise = np.array(f['noise'])
|
91
91
|
|
@@ -148,7 +148,7 @@ def main() -> None:
|
|
148
148
|
MP_GLOBAL.write_noise = write_noise
|
149
149
|
|
150
150
|
progress = tqdm(total=len(mixid))
|
151
|
-
p_tqdm_map(_process_mixture, mixid, progress=progress)
|
151
|
+
p_tqdm_map(_process_mixture, mixid, progress=progress, chunksize=10)
|
152
152
|
progress.close()
|
153
153
|
|
154
154
|
logger.info(f'Wrote {len(mixid)} mixtures to {location}')
|
@@ -101,7 +101,7 @@ def main() -> None:
|
|
101
101
|
logger.info(f'Found {len(input_name):,} files to process')
|
102
102
|
|
103
103
|
progress = tqdm(total=len(input_name))
|
104
|
-
p_tqdm_map(_process, input_name, progress=progress)
|
104
|
+
p_tqdm_map(_process, input_name, progress=progress, chunksize=10)
|
105
105
|
progress.close()
|
106
106
|
|
107
107
|
logger.info(f'Wrote {len(input_name)} mixtures to {output_dir}')
|
@@ -3,6 +3,7 @@ from sonusai.utils.asl_p56 import asl_p56
|
|
3
3
|
from sonusai.utils.asr import ASRResult
|
4
4
|
from sonusai.utils.asr import calc_asr
|
5
5
|
from sonusai.utils.braced_glob import braced_glob
|
6
|
+
from sonusai.utils.braced_glob import braced_iglob
|
6
7
|
from sonusai.utils.calculate_input_shape import calculate_input_shape
|
7
8
|
from sonusai.utils.create_ts_name import create_ts_name
|
8
9
|
from sonusai.utils.dataclass_from_dict import dataclass_from_dict
|
@@ -1,5 +1,6 @@
|
|
1
1
|
from dataclasses import dataclass
|
2
2
|
from typing import Any
|
3
|
+
from typing import Optional
|
3
4
|
from typing import Union
|
4
5
|
|
5
6
|
from sonusai.mixture import AudioT
|
@@ -9,14 +10,14 @@ from sonusai.mixture import Location
|
|
9
10
|
@dataclass(frozen=True)
|
10
11
|
class ASRResult:
|
11
12
|
text: str
|
12
|
-
confidence: float = None
|
13
|
+
confidence: Optional[float] = None
|
13
14
|
|
14
15
|
|
15
16
|
def calc_asr(audio: Union[AudioT, Location],
|
16
|
-
engine: str = 'deepgram',
|
17
|
-
whisper_model: Any = None,
|
18
|
-
whisper_model_name: str = 'base.en',
|
19
|
-
device: str = None) -> ASRResult:
|
17
|
+
engine: Optional[str] = 'deepgram',
|
18
|
+
whisper_model: Optional[Any] = None,
|
19
|
+
whisper_model_name: Optional[str] = 'base.en',
|
20
|
+
device: Optional[str] = None) -> ASRResult:
|
20
21
|
"""Run ASR on audio
|
21
22
|
|
22
23
|
:param audio: Numpy array of audio samples or location of an audio file
|
@@ -1,5 +1,6 @@
|
|
1
1
|
from dataclasses import dataclass
|
2
2
|
from typing import Any
|
3
|
+
from typing import Optional
|
3
4
|
|
4
5
|
from sonusai.mixture.types import AudioT
|
5
6
|
|
@@ -7,6 +8,6 @@ from sonusai.mixture.types import AudioT
|
|
7
8
|
@dataclass(frozen=True)
|
8
9
|
class Data:
|
9
10
|
audio: AudioT
|
10
|
-
whisper_model: Any = None
|
11
|
-
whisper_model_name: str = None
|
12
|
-
device: str = None
|
11
|
+
whisper_model: Optional[Any] = None
|
12
|
+
whisper_model_name: Optional[str] = None
|
13
|
+
device: Optional[str] = None
|
@@ -0,0 +1,6 @@
|
|
1
|
+
from sonusai.utils.asr_manifest_functions.data import PathInfo
|
2
|
+
from sonusai.utils.asr_manifest_functions.data import TranscriptData
|
3
|
+
from sonusai.utils.asr_manifest_functions.librispeech import collect_librispeech_transcripts
|
4
|
+
from sonusai.utils.asr_manifest_functions.librispeech import get_librispeech_manifest_entry
|
5
|
+
from sonusai.utils.asr_manifest_functions.vctk_noisy_speech import collect_vctk_noisy_speech_transcripts
|
6
|
+
from sonusai.utils.asr_manifest_functions.vctk_noisy_speech import get_vctk_noisy_speech_manifest_entry
|
@@ -0,0 +1,49 @@
|
|
1
|
+
from typing import List
|
2
|
+
from typing import Union
|
3
|
+
|
4
|
+
from sonusai.utils.asr_manifest_functions import PathInfo
|
5
|
+
from sonusai.utils.asr_manifest_functions import TranscriptData
|
6
|
+
|
7
|
+
|
8
|
+
def collect_librispeech_transcripts(paths: Union[List[str], str]) -> TranscriptData:
|
9
|
+
from glob import iglob
|
10
|
+
from os.path import abspath
|
11
|
+
from os.path import dirname
|
12
|
+
from os.path import join
|
13
|
+
|
14
|
+
from sonusai import SonusAIError
|
15
|
+
|
16
|
+
entries: TranscriptData = {}
|
17
|
+
if not isinstance(paths, list):
|
18
|
+
paths = [paths]
|
19
|
+
|
20
|
+
for p in paths:
|
21
|
+
location = join(abspath(p), '**', '*.trans.txt')
|
22
|
+
for file in iglob(pathname=location, recursive=True):
|
23
|
+
root = dirname(file)
|
24
|
+
with open(file, encoding='utf-8') as f:
|
25
|
+
for line in f:
|
26
|
+
name, text = line[: line.index(' ')], line[line.index(' ') + 1:]
|
27
|
+
name = join(root, name)
|
28
|
+
if name in entries:
|
29
|
+
raise SonusAIError(f'{name} already exists in transcript data')
|
30
|
+
entries[name] = text.lower().strip()
|
31
|
+
return entries
|
32
|
+
|
33
|
+
|
34
|
+
def get_librispeech_manifest_entry(entry: PathInfo, transcript_data: TranscriptData) -> dict:
|
35
|
+
from os.path import splitext
|
36
|
+
from subprocess import check_output
|
37
|
+
|
38
|
+
from sonusai import SonusAIError
|
39
|
+
|
40
|
+
name = splitext(entry.abs_path)[0]
|
41
|
+
duration = float(check_output(f'soxi -D {entry.abs_path}', shell=True))
|
42
|
+
if name not in transcript_data.keys():
|
43
|
+
raise SonusAIError(f'Could not find {name} in transcript data')
|
44
|
+
|
45
|
+
return {
|
46
|
+
'audio_filepath': entry.audio_filepath,
|
47
|
+
'text': transcript_data[name],
|
48
|
+
'duration': duration,
|
49
|
+
}
|
@@ -0,0 +1,69 @@
|
|
1
|
+
from typing import List
|
2
|
+
from typing import Union
|
3
|
+
|
4
|
+
from sonusai.utils.asr_manifest_functions import PathInfo
|
5
|
+
from sonusai.utils.asr_manifest_functions import TranscriptData
|
6
|
+
|
7
|
+
|
8
|
+
def collect_vctk_noisy_speech_transcripts(paths: Union[List[str], str]) -> TranscriptData:
|
9
|
+
from glob import iglob
|
10
|
+
from os import listdir
|
11
|
+
from os.path import abspath
|
12
|
+
from os.path import basename
|
13
|
+
from os.path import join
|
14
|
+
from os.path import split
|
15
|
+
from os.path import splitext
|
16
|
+
|
17
|
+
from sonusai import SonusAIError
|
18
|
+
|
19
|
+
entries: TranscriptData = {}
|
20
|
+
if not isinstance(paths, list):
|
21
|
+
paths = [paths]
|
22
|
+
|
23
|
+
for p in paths:
|
24
|
+
abs_p = abspath(p)
|
25
|
+
head, tail = split(abs_p)
|
26
|
+
|
27
|
+
dirs = listdir(head)
|
28
|
+
tail = tail.replace('wav', 'txt')
|
29
|
+
|
30
|
+
location = None
|
31
|
+
for d in dirs:
|
32
|
+
if tail.endswith(d):
|
33
|
+
location = join(head, d, '*.txt')
|
34
|
+
break
|
35
|
+
if location is None:
|
36
|
+
raise SonusAIError(f'Could not find VCTK Noisy Speech transcript data for {p}')
|
37
|
+
|
38
|
+
for file in iglob(pathname=location, recursive=True):
|
39
|
+
with open(file, encoding='utf-8') as f:
|
40
|
+
lines = f.readlines()
|
41
|
+
if len(lines) != 1:
|
42
|
+
raise SonusAIError(f'Ill-formed VCTK Noisy Speech transcript file: {file}')
|
43
|
+
|
44
|
+
name = join(abs_p, splitext(basename(file))[0])
|
45
|
+
text = lines[0].lower().strip()
|
46
|
+
|
47
|
+
if name in entries:
|
48
|
+
raise SonusAIError(f'{name} already exists in transcript data')
|
49
|
+
entries[name] = text.lower().strip()
|
50
|
+
|
51
|
+
return entries
|
52
|
+
|
53
|
+
|
54
|
+
def get_vctk_noisy_speech_manifest_entry(entry: PathInfo, transcript_data: TranscriptData) -> dict:
|
55
|
+
from os.path import splitext
|
56
|
+
from subprocess import check_output
|
57
|
+
|
58
|
+
from sonusai import SonusAIError
|
59
|
+
|
60
|
+
name = splitext(entry.abs_path)[0]
|
61
|
+
duration = float(check_output(f'soxi -D {entry.abs_path}', shell=True))
|
62
|
+
if name not in transcript_data.keys():
|
63
|
+
raise SonusAIError(f'Could not find {name} in transcript data')
|
64
|
+
|
65
|
+
return {
|
66
|
+
'audio_filepath': entry.audio_filepath,
|
67
|
+
'text': transcript_data[name],
|
68
|
+
'duration': duration,
|
69
|
+
}
|
@@ -32,11 +32,18 @@ def expand_braces(text: str, seen: Optional[Set[str]] = None) -> Generator[str,
|
|
32
32
|
yield from expand_braces(''.join(replaced), seen)
|
33
33
|
|
34
34
|
|
35
|
-
def braced_glob(
|
35
|
+
def braced_glob(pathname: str, recursive: bool = False) -> List[str]:
|
36
36
|
from glob import glob
|
37
37
|
|
38
38
|
result = []
|
39
|
-
for
|
40
|
-
result.extend(glob(
|
39
|
+
for expanded_path in expand_braces(pathname):
|
40
|
+
result.extend(glob(expanded_path, recursive=recursive))
|
41
41
|
|
42
42
|
return result
|
43
|
+
|
44
|
+
|
45
|
+
def braced_iglob(pathname: str, recursive: bool = False) -> Generator[str, None, None]:
|
46
|
+
from glob import iglob
|
47
|
+
|
48
|
+
for expanded_path in expand_braces(pathname):
|
49
|
+
yield from iglob(expanded_path, recursive=recursive)
|
@@ -11,7 +11,7 @@ from typing import Iterable
|
|
11
11
|
from typing import List
|
12
12
|
|
13
13
|
|
14
|
-
def
|
14
|
+
def __parallel(ordered: bool, function: Callable, *iterables: Iterable, **kwargs: Any) -> Generator:
|
15
15
|
"""Returns a generator for a parallel map.
|
16
16
|
|
17
17
|
Arguments:
|
@@ -43,6 +43,7 @@ def _parallel(ordered: bool, function: Callable, *iterables: Iterable, **kwargs:
|
|
43
43
|
|
44
44
|
# Extract num_cpus
|
45
45
|
num_cpus = kwargs.pop('num_cpus', None)
|
46
|
+
chunksize = kwargs.pop('chunksize', 1)
|
46
47
|
|
47
48
|
# Determine num_cpus
|
48
49
|
if num_cpus is None:
|
@@ -57,7 +58,7 @@ def _parallel(ordered: bool, function: Callable, *iterables: Iterable, **kwargs:
|
|
57
58
|
with mp.Pool(num_cpus, initializer=initializer, initargs=initargs) as pool:
|
58
59
|
map_func = getattr(pool, map_type)
|
59
60
|
|
60
|
-
for item in map_func(function, *iterables):
|
61
|
+
for item in map_func(function, *iterables, chunksize=chunksize):
|
61
62
|
yield item
|
62
63
|
progress.update()
|
63
64
|
|
@@ -67,9 +68,9 @@ def _parallel(ordered: bool, function: Callable, *iterables: Iterable, **kwargs:
|
|
67
68
|
|
68
69
|
def p_tqdm_map(function: Callable, *iterables: Iterable, **kwargs: Any) -> List[Any]:
|
69
70
|
"""Performs a parallel ordered map."""
|
70
|
-
return list(
|
71
|
+
return list(__parallel(True, function, *iterables, **kwargs))
|
71
72
|
|
72
73
|
|
73
74
|
def p_tqdm_umap(function: Callable, *iterables: Iterable, **kwargs: Any) -> List[Any]:
|
74
75
|
"""Performs a parallel unordered map."""
|
75
|
-
return list(
|
76
|
+
return list(__parallel(False, function, *iterables, **kwargs))
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|