sonusai 0.12.6__tar.gz → 0.12.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. {sonusai-0.12.6 → sonusai-0.12.7}/PKG-INFO +1 -1
  2. {sonusai-0.12.6 → sonusai-0.12.7}/pyproject.toml +1 -1
  3. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/calc_metric_spenh.py +1 -1
  4. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/genft.py +1 -1
  5. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/genmix.py +1 -1
  6. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/genmixdb.py +1 -1
  7. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/main.py +2 -2
  8. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/mixture/config.py +2 -2
  9. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/mixture/mixdb.py +1 -1
  10. sonusai-0.12.7/sonusai/mkmanifest.py +174 -0
  11. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/mkwav.py +2 -2
  12. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/post_spenh_targetf.py +1 -1
  13. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/utils/__init__.py +1 -0
  14. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/utils/asr.py +6 -5
  15. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/utils/asr_functions/data.py +4 -3
  16. sonusai-0.12.7/sonusai/utils/asr_manifest_functions/__init__.py +6 -0
  17. sonusai-0.12.7/sonusai/utils/asr_manifest_functions/data.py +10 -0
  18. sonusai-0.12.7/sonusai/utils/asr_manifest_functions/librispeech.py +49 -0
  19. sonusai-0.12.7/sonusai/utils/asr_manifest_functions/vctk_noisy_speech.py +69 -0
  20. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/utils/braced_glob.py +10 -3
  21. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/utils/parallel_tqdm.py +5 -4
  22. {sonusai-0.12.6 → sonusai-0.12.7}/README.rst +0 -0
  23. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/__init__.py +0 -0
  24. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/aawscd_probwrite.py +0 -0
  25. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/data/__init__.py +0 -0
  26. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/data/genmixdb.yml +0 -0
  27. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/data/speech_ma01_01.wav +0 -0
  28. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/data/whitenoise.wav +0 -0
  29. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/data_generator/__init__.py +0 -0
  30. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/data_generator/dataset_from_mixdb.py +0 -0
  31. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/data_generator/keras_from_mixdb.py +0 -0
  32. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/data_generator/torch_from_mixdb.py +0 -0
  33. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/evaluate.py +0 -0
  34. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/gentcst.py +0 -0
  35. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/keras_onnx.py +0 -0
  36. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/keras_predict.py +0 -0
  37. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/keras_train.py +0 -0
  38. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/lsdb.py +0 -0
  39. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/metrics/__init__.py +0 -0
  40. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/metrics/calc_class_weights.py +0 -0
  41. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/metrics/calc_optimal_thresholds.py +0 -0
  42. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/metrics/calc_pcm.py +0 -0
  43. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/metrics/calc_pesq.py +0 -0
  44. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/metrics/calc_sa_sdr.py +0 -0
  45. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/metrics/calc_sample_weights.py +0 -0
  46. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/metrics/calc_wer.py +0 -0
  47. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/metrics/calc_wsdr.py +0 -0
  48. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/metrics/class_summary.py +0 -0
  49. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/metrics/confusion_matrix_summary.py +0 -0
  50. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/metrics/one_hot.py +0 -0
  51. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/metrics/snr_summary.py +0 -0
  52. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/mixture/__init__.py +0 -0
  53. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/mixture/active_truth_class_balancing.py +0 -0
  54. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/mixture/audio.py +0 -0
  55. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/mixture/augmentation.py +0 -0
  56. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/mixture/balance.py +0 -0
  57. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/mixture/class_count.py +0 -0
  58. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/mixture/constants.py +0 -0
  59. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/mixture/feature.py +0 -0
  60. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/mixture/generate_mixtures.py +0 -0
  61. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/mixture/initialize.py +0 -0
  62. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/mixture/log_duration_and_sizes.py +0 -0
  63. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/mixture/mapped_snr_f.py +0 -0
  64. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/mixture/spectral_mask.py +0 -0
  65. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/mixture/target_class_balancing.py +0 -0
  66. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/mixture/targets.py +0 -0
  67. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/mixture/tokenized_shell_vars.py +0 -0
  68. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/mixture/truth.py +0 -0
  69. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/mixture/truth_functions/__init__.py +0 -0
  70. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/mixture/truth_functions/crm.py +0 -0
  71. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/mixture/truth_functions/data.py +0 -0
  72. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/mixture/truth_functions/energy.py +0 -0
  73. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/mixture/truth_functions/file.py +0 -0
  74. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/mixture/truth_functions/phoneme.py +0 -0
  75. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/mixture/truth_functions/sed.py +0 -0
  76. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/mixture/truth_functions/target.py +0 -0
  77. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/mixture/types.py +0 -0
  78. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/onnx_predict.py +0 -0
  79. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/plot.py +0 -0
  80. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/queries/__init__.py +0 -0
  81. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/queries/queries.py +0 -0
  82. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/torchl_predict.py +0 -0
  83. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/torchl_train.py +0 -0
  84. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/tplot.py +0 -0
  85. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/utils/asl_p56.py +0 -0
  86. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/utils/asr_functions/__init__.py +0 -0
  87. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/utils/asr_functions/aixplain_whisper.py +0 -0
  88. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/utils/asr_functions/deepgram.py +0 -0
  89. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/utils/asr_functions/google.py +0 -0
  90. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/utils/asr_functions/whisper.py +0 -0
  91. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/utils/calculate_input_shape.py +0 -0
  92. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/utils/create_ts_name.py +0 -0
  93. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/utils/dataclass_from_dict.py +0 -0
  94. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/utils/db.py +0 -0
  95. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/utils/energy_f.py +0 -0
  96. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/utils/engineering_number.py +0 -0
  97. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/utils/get_frames_per_batch.py +0 -0
  98. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/utils/get_label_names.py +0 -0
  99. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/utils/grouper.py +0 -0
  100. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/utils/human_readable_size.py +0 -0
  101. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/utils/keras_utils.py +0 -0
  102. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/utils/max_text_width.py +0 -0
  103. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/utils/numeric_conversion.py +0 -0
  104. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/utils/onnx_utils.py +0 -0
  105. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/utils/parallel.py +0 -0
  106. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/utils/print_mixture_details.py +0 -0
  107. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/utils/ranges.py +0 -0
  108. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/utils/read_mixture_data.py +0 -0
  109. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/utils/read_predict_data.py +0 -0
  110. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/utils/reshape.py +0 -0
  111. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/utils/seconds_to_hms.py +0 -0
  112. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/utils/stacked_complex.py +0 -0
  113. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/utils/stratified_shuffle_split.py +0 -0
  114. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/utils/trim_docstring.py +0 -0
  115. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/utils/wave.py +0 -0
  116. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/utils/yes_or_no.py +0 -0
  117. {sonusai-0.12.6 → sonusai-0.12.7}/sonusai/vars.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sonusai
3
- Version: 0.12.6
3
+ Version: 0.12.7
4
4
  Summary: Framework for building deep neural network models for sound, speech, and voice AI
5
5
  Home-page: https://aaware.com
6
6
  License: GPL-3.0-only
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "sonusai"
3
- version = "0.12.6"
3
+ version = "0.12.7"
4
4
  description = "Framework for building deep neural network models for sound, speech, and voice AI"
5
5
  authors = ["Chris Eddington <chris@aaware.com>", "Jason Calderwood <jason@aaware.com>"]
6
6
  maintainers = ["Chris Eddington <chris@aaware.com>", "Jason Calderwood <jason@aaware.com>"]
@@ -808,7 +808,7 @@ def main() -> None:
808
808
  # Individual mixtures use pandas print, set precision to 2 decimal places
809
809
  # pd.set_option('float_format', '{:.2f}'.format)
810
810
  progress = tqdm(total=len(mixids))
811
- all_metrics_tables = p_tqdm_map(_process_mixture, mixids, progress=progress)
811
+ all_metrics_tables = p_tqdm_map(_process_mixture, mixids, progress=progress, chunksize=10)
812
812
  progress.close()
813
813
 
814
814
  all_metrics_table_1 = pd.concat([item[0] for item in all_metrics_tables])
@@ -72,7 +72,7 @@ def genft(mixdb: MixtureDatabase,
72
72
  results.append(_genft_kernel(mixid))
73
73
  else:
74
74
  progress = tqdm(total=len(mixids), disable=not show_progress)
75
- results = p_tqdm_map(_genft_kernel, mixids, progress=progress)
75
+ results = p_tqdm_map(_genft_kernel, mixids, progress=progress, chunksize=10)
76
76
  progress.close()
77
77
 
78
78
  return results
@@ -79,7 +79,7 @@ def genmix(mixdb: MixtureDatabase,
79
79
  results.append(_genmix_kernel(mixid))
80
80
  else:
81
81
  progress = tqdm(total=len(mixids), disable=not show_progress)
82
- results = p_tqdm_map(_genmix_kernel, mixids, progress=progress)
82
+ results = p_tqdm_map(_genmix_kernel, mixids, progress=progress, chunksize=10)
83
83
  progress.close()
84
84
 
85
85
  return results
@@ -345,7 +345,7 @@ def genmixdb(location: Location,
345
345
  if logging:
346
346
  logger.info('Generating mixtures')
347
347
  progress = tqdm(total=total_mixtures, disable=not show_progress)
348
- mixdb.mixtures = p_tqdm_map(_process_mixture, range(total_mixtures), progress=progress)
348
+ mixdb.mixtures = p_tqdm_map(_process_mixture, range(total_mixtures), progress=progress, chunksize=10)
349
349
  progress.close()
350
350
 
351
351
  total_samples = mixdb.total_samples()
@@ -4,7 +4,6 @@ usage: sonusai [--version] [--help] <command> [<args>...]
4
4
 
5
5
  The sonusai commands are:
6
6
  calc_metric_spenh Run speech enhancement and analysis
7
- calc_metric_spenh_targetf Run speech enhancement and analysis for targetf truth (deprecated)
8
7
  evaluate Evaluate model performance
9
8
  genft Generate feature and truth data
10
9
  genmix Generate mixture and truth data
@@ -14,6 +13,7 @@ The sonusai commands are:
14
13
  keras_train Train a model using Keras
15
14
  keras_onnx Convert a trained Keras model to ONNX
16
15
  lsdb List information about a mixture database
16
+ mkmanifest Make ASR manifest JSON file
17
17
  mkwav Make WAV files from a mixture database
18
18
  onnx_predict Run ONNX predict on a trained model
19
19
  plot Plot mixture data
@@ -38,7 +38,6 @@ def main() -> None:
38
38
 
39
39
  commands = (
40
40
  'calc_metric_spenh',
41
- 'calc_metric_spenh_targetf',
42
41
  'evaluate',
43
42
  'genft',
44
43
  'genmix',
@@ -48,6 +47,7 @@ def main() -> None:
48
47
  'keras_train',
49
48
  'keras_onnx',
50
49
  'lsdb',
50
+ 'mkmanifest',
51
51
  'mkwav',
52
52
  'onnx_predict',
53
53
  'plot',
@@ -250,7 +250,7 @@ def get_target_files(config: dict, show_progress: bool = False) -> TargetFiles:
250
250
  for target in config['targets']]))
251
251
 
252
252
  progress = tqdm(total=len(target_files), disable=not show_progress)
253
- target_files = p_tqdm_map(_get_samples, target_files, progress=progress)
253
+ target_files = p_tqdm_map(_get_samples, target_files, progress=progress, chunksize=10)
254
254
  progress.close()
255
255
 
256
256
  max_class = get_max_class(config['num_classes'], config['truth_mode'] == 'mutex')
@@ -394,7 +394,7 @@ def get_noise_files(config: dict, show_progress: bool = False) -> NoiseFiles:
394
394
  noise_files = list(chain.from_iterable([_append_noise_files(noise_file=noise) for noise in config['noises']]))
395
395
 
396
396
  progress = tqdm(total=len(noise_files), disable=not show_progress)
397
- noise_files = p_tqdm_map(_get_samples, noise_files, progress=progress)
397
+ noise_files = p_tqdm_map(_get_samples, noise_files, progress=progress, chunksize=10)
398
398
  progress.close()
399
399
 
400
400
  return dataclass_from_dict(NoiseFiles, noise_files)
@@ -1065,7 +1065,7 @@ class MixtureDatabase:
1065
1065
  :param mixid: Mixture ID
1066
1066
  :param targets: List of augmented target audio data (one per target in the mixup) for the given mixid
1067
1067
  :param noise: Augmented noise audio data for the given mixid
1068
- :param force: Force computing data from original sources regardless of whether or not cached data exists
1068
+ :param force: Force computing data from original sources regardless of whether cached data exists
1069
1069
  :return: truth_t data
1070
1070
  """
1071
1071
  import numpy as np
@@ -0,0 +1,174 @@
1
+ """mkmanifest
2
+
3
+ usage: mkmanifest [-hvn] [--include GLOB] [-m METHOD] [-e ADAT] [-o OUTPUT] PATH ...
4
+
5
+ options:
6
+ -h, --help
7
+ -v, --verbose Be verbose: list all files found.
8
+ -n, --dry-run Collect files, but exit without processing and writing manifest file.
9
+ --include GLOB Search only files whose base name matches GLOB. [default: *.{wav,flac}].
10
+ -m METHOD, --method METHOD Method for getting the true speech text of the audio files. [default: librispeech].
11
+ -e ADAT, --audio-env ADAT Environment variable pointing to all audio data.
12
+ -o OUTPUT, --output OUTPUT Output file name. [default: asr_manifest.json].
13
+
14
+ Make a speech recognition (ASR) .json manifest file of all audio files under PATHS following the NVIDIA NeMo format.
15
+ An example of manifest entries:
16
+
17
+ {"audio_filepath": "<absolute_path_to>/1355-39947-0000.wav", "duration": 11.3, "text": "psychotherapy ..."}
18
+ {"audio_filepath": "<absolute_path_to>/1355-39947-0001.wav", "duration": 15.905, "text": "it is an ..."}
19
+
20
+ See the NVIDIA NeMo docs for more information:
21
+ https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/stable/asr/datasets.html
22
+
23
+ Inputs:
24
+ PATH A relative path name or list of paths containing audio files. Each will be
25
+ recursively searched for files matching the pattern GLOB.
26
+ GLOB Match the pattern GLOB using wildcard matching.
27
+ Example: '*.{wav,flac}' matches all .wav and .flac files.
28
+ METHOD The method to use for fetching the true speech of the audio files.
29
+ Supported methods:
30
+ - 'librispeech'
31
+ - 'vctk_noisy_speech'
32
+ ADAT Audio data environment variable. All found files will be expanded to their full, absolute path and
33
+ then parts of the path that match the specified environment variable value will be replaced with
34
+ the variable. This accommodates portability across platforms where the sound datasets may in
35
+ different locations.
36
+ OUTPUT Name of output file. Default is asr_manifest.json.
37
+
38
+ Outputs the following to the current directory:
39
+ <OUTPUT>
40
+ mkmanifest.log
41
+
42
+ Example usage for LibriSpeech:
43
+ sonusai mkmanifest -mlibrispeech -eADAT -oasr_manifest.json --include='*.flac' train-clean-100
44
+
45
+ """
46
+ from sonusai import logger
47
+
48
+ VALID_METHOD = ['librispeech', 'vctk_noisy_speech']
49
+
50
+
51
+ def main() -> None:
52
+ from docopt import docopt
53
+
54
+ import sonusai
55
+ from sonusai.utils import trim_docstring
56
+
57
+ args = docopt(trim_docstring(__doc__), version=sonusai.__version__, options_first=True)
58
+
59
+ verbose = args['--verbose']
60
+ dry_run = args['--dry-run']
61
+ include = args['--include']
62
+ method = args['--method']
63
+ audio_env = args['--audio-env']
64
+ output = args['--output']
65
+ paths = args['PATH']
66
+
67
+ import json
68
+ from functools import partial
69
+ import time
70
+ from os import environ
71
+ from os.path import abspath
72
+ from os.path import join
73
+ from os.path import realpath
74
+ from typing import List
75
+
76
+ from tqdm import tqdm
77
+
78
+ from sonusai import SonusAIError
79
+ from sonusai import create_file_handler
80
+ from sonusai import initial_log_messages
81
+ from sonusai import logger
82
+ from sonusai import update_console_handler
83
+ from sonusai.utils import braced_iglob
84
+ from sonusai.utils import p_tqdm_map
85
+ from sonusai.utils import seconds_to_hms
86
+ from sonusai.utils.asr_manifest_functions import PathInfo
87
+ from sonusai.utils.asr_manifest_functions import collect_librispeech_transcripts
88
+ from sonusai.utils.asr_manifest_functions import collect_vctk_noisy_speech_transcripts
89
+ from sonusai.utils.asr_manifest_functions import get_librispeech_manifest_entry
90
+ from sonusai.utils.asr_manifest_functions import get_vctk_noisy_speech_manifest_entry
91
+
92
+ start_time = time.monotonic()
93
+
94
+ create_file_handler('mkmanifest.log')
95
+ update_console_handler(verbose)
96
+ initial_log_messages('mkmanifest')
97
+
98
+ if method not in VALID_METHOD:
99
+ raise SonusAIError(f'Unknown method: {method}')
100
+
101
+ audio_dir = None
102
+ if audio_env is not None:
103
+ audio_dir = realpath(environ[audio_env])
104
+ if audio_dir is None:
105
+ raise SonusAIError(f'Unknown environment variable: {audio_env}')
106
+
107
+ if audio_env:
108
+ for p in paths:
109
+ if not realpath(abspath(p)).startswith(audio_dir):
110
+ logger.warning(f'Specified directory, {p}, is not part of the provided audio environment: '
111
+ f'${audio_env}={audio_dir}')
112
+
113
+ logger.info('')
114
+ logger.info(f'Searching {len(paths)} provided director{"ies" if len(paths) > 1 else "y"}...')
115
+
116
+ entries: List[PathInfo] = []
117
+ for p in paths:
118
+ location = join(realpath(abspath(p)), '**', include)
119
+ logger.debug(f'Processing {location}')
120
+ for file in braced_iglob(pathname=location, recursive=True):
121
+ name = file
122
+ if audio_env is not None:
123
+ name = name.replace(audio_dir, f'${audio_env}')
124
+ entries.append(PathInfo(abs_path=file, audio_filepath=name))
125
+ logger.debug('')
126
+
127
+ logger.info(f'Found {len(entries)} audio file{"s" if len(entries) != 1 else ""}')
128
+
129
+ if dry_run:
130
+ logger.info('')
131
+ logger.info('Dry run')
132
+ logger.info('')
133
+ for entry in entries:
134
+ logger.info(f' - {entry.audio_filepath}')
135
+ return
136
+
137
+ if method == 'librispeech':
138
+ logger.info('Collecting LibriSpeech transcript data')
139
+ transcript_data = collect_librispeech_transcripts(paths=paths)
140
+
141
+ processing_func = partial(get_librispeech_manifest_entry, transcript_data=transcript_data)
142
+ progress = tqdm(total=len(entries), desc='Creating LibriSpeech manifest data')
143
+ results = p_tqdm_map(processing_func, entries, progress=progress, chunksize=10)
144
+ progress.close()
145
+
146
+ with open(output, 'w') as f:
147
+ for result in results:
148
+ f.write(json.dumps(result) + '\n')
149
+
150
+ if method == 'vctk_noisy_speech':
151
+ logger.info('Collecting VCTK Noisy Speech transcript data')
152
+ transcript_data = collect_vctk_noisy_speech_transcripts(paths=paths)
153
+
154
+ processing_func = partial(get_vctk_noisy_speech_manifest_entry, transcript_data=transcript_data)
155
+ progress = tqdm(total=len(entries), desc='Creating VCTK Noisy Speech manifest data')
156
+ results = p_tqdm_map(processing_func, entries, progress=progress, chunksize=10)
157
+ progress.close()
158
+
159
+ with open(output, 'w') as f:
160
+ for result in results:
161
+ f.write(json.dumps(result) + '\n')
162
+
163
+ end_time = time.monotonic()
164
+ logger.info('')
165
+ logger.info(f'Completed in {seconds_to_hms(seconds=end_time - start_time)}')
166
+ logger.info('')
167
+
168
+
169
+ if __name__ == '__main__':
170
+ try:
171
+ main()
172
+ except KeyboardInterrupt:
173
+ logger.info('Canceled due to keyboard interrupt')
174
+ raise SystemExit(0)
@@ -85,7 +85,7 @@ def _process_mixture(mixid: int) -> None:
85
85
  with h5py.File(mixture_filename, 'r') as f:
86
86
  mixture = np.array(f['mixture'])
87
87
  if MP_GLOBAL.write_target:
88
- target = sum(np.array(f['targets']))
88
+ target = np.sum(np.array(f['targets']), axis=0)
89
89
  if MP_GLOBAL.write_noise:
90
90
  noise = np.array(f['noise'])
91
91
 
@@ -148,7 +148,7 @@ def main() -> None:
148
148
  MP_GLOBAL.write_noise = write_noise
149
149
 
150
150
  progress = tqdm(total=len(mixid))
151
- p_tqdm_map(_process_mixture, mixid, progress=progress)
151
+ p_tqdm_map(_process_mixture, mixid, progress=progress, chunksize=10)
152
152
  progress.close()
153
153
 
154
154
  logger.info(f'Wrote {len(mixid)} mixtures to {location}')
@@ -101,7 +101,7 @@ def main() -> None:
101
101
  logger.info(f'Found {len(input_name):,} files to process')
102
102
 
103
103
  progress = tqdm(total=len(input_name))
104
- p_tqdm_map(_process, input_name, progress=progress)
104
+ p_tqdm_map(_process, input_name, progress=progress, chunksize=10)
105
105
  progress.close()
106
106
 
107
107
  logger.info(f'Wrote {len(input_name)} mixtures to {output_dir}')
@@ -3,6 +3,7 @@ from sonusai.utils.asl_p56 import asl_p56
3
3
  from sonusai.utils.asr import ASRResult
4
4
  from sonusai.utils.asr import calc_asr
5
5
  from sonusai.utils.braced_glob import braced_glob
6
+ from sonusai.utils.braced_glob import braced_iglob
6
7
  from sonusai.utils.calculate_input_shape import calculate_input_shape
7
8
  from sonusai.utils.create_ts_name import create_ts_name
8
9
  from sonusai.utils.dataclass_from_dict import dataclass_from_dict
@@ -1,5 +1,6 @@
1
1
  from dataclasses import dataclass
2
2
  from typing import Any
3
+ from typing import Optional
3
4
  from typing import Union
4
5
 
5
6
  from sonusai.mixture import AudioT
@@ -9,14 +10,14 @@ from sonusai.mixture import Location
9
10
  @dataclass(frozen=True)
10
11
  class ASRResult:
11
12
  text: str
12
- confidence: float = None
13
+ confidence: Optional[float] = None
13
14
 
14
15
 
15
16
  def calc_asr(audio: Union[AudioT, Location],
16
- engine: str = 'deepgram',
17
- whisper_model: Any = None,
18
- whisper_model_name: str = 'base.en',
19
- device: str = None) -> ASRResult:
17
+ engine: Optional[str] = 'deepgram',
18
+ whisper_model: Optional[Any] = None,
19
+ whisper_model_name: Optional[str] = 'base.en',
20
+ device: Optional[str] = None) -> ASRResult:
20
21
  """Run ASR on audio
21
22
 
22
23
  :param audio: Numpy array of audio samples or location of an audio file
@@ -1,5 +1,6 @@
1
1
  from dataclasses import dataclass
2
2
  from typing import Any
3
+ from typing import Optional
3
4
 
4
5
  from sonusai.mixture.types import AudioT
5
6
 
@@ -7,6 +8,6 @@ from sonusai.mixture.types import AudioT
7
8
  @dataclass(frozen=True)
8
9
  class Data:
9
10
  audio: AudioT
10
- whisper_model: Any = None
11
- whisper_model_name: str = None
12
- device: str = None
11
+ whisper_model: Optional[Any] = None
12
+ whisper_model_name: Optional[str] = None
13
+ device: Optional[str] = None
@@ -0,0 +1,6 @@
1
+ from sonusai.utils.asr_manifest_functions.data import PathInfo
2
+ from sonusai.utils.asr_manifest_functions.data import TranscriptData
3
+ from sonusai.utils.asr_manifest_functions.librispeech import collect_librispeech_transcripts
4
+ from sonusai.utils.asr_manifest_functions.librispeech import get_librispeech_manifest_entry
5
+ from sonusai.utils.asr_manifest_functions.vctk_noisy_speech import collect_vctk_noisy_speech_transcripts
6
+ from sonusai.utils.asr_manifest_functions.vctk_noisy_speech import get_vctk_noisy_speech_manifest_entry
@@ -0,0 +1,10 @@
1
+ from dataclasses import dataclass
2
+ from typing import Dict
3
+
4
+ TranscriptData = Dict[str, str]
5
+
6
+
7
+ @dataclass(frozen=True)
8
+ class PathInfo:
9
+ abs_path: str
10
+ audio_filepath: str
@@ -0,0 +1,49 @@
1
+ from typing import List
2
+ from typing import Union
3
+
4
+ from sonusai.utils.asr_manifest_functions import PathInfo
5
+ from sonusai.utils.asr_manifest_functions import TranscriptData
6
+
7
+
8
+ def collect_librispeech_transcripts(paths: Union[List[str], str]) -> TranscriptData:
9
+ from glob import iglob
10
+ from os.path import abspath
11
+ from os.path import dirname
12
+ from os.path import join
13
+
14
+ from sonusai import SonusAIError
15
+
16
+ entries: TranscriptData = {}
17
+ if not isinstance(paths, list):
18
+ paths = [paths]
19
+
20
+ for p in paths:
21
+ location = join(abspath(p), '**', '*.trans.txt')
22
+ for file in iglob(pathname=location, recursive=True):
23
+ root = dirname(file)
24
+ with open(file, encoding='utf-8') as f:
25
+ for line in f:
26
+ name, text = line[: line.index(' ')], line[line.index(' ') + 1:]
27
+ name = join(root, name)
28
+ if name in entries:
29
+ raise SonusAIError(f'{name} already exists in transcript data')
30
+ entries[name] = text.lower().strip()
31
+ return entries
32
+
33
+
34
+ def get_librispeech_manifest_entry(entry: PathInfo, transcript_data: TranscriptData) -> dict:
35
+ from os.path import splitext
36
+ from subprocess import check_output
37
+
38
+ from sonusai import SonusAIError
39
+
40
+ name = splitext(entry.abs_path)[0]
41
+ duration = float(check_output(f'soxi -D {entry.abs_path}', shell=True))
42
+ if name not in transcript_data.keys():
43
+ raise SonusAIError(f'Could not find {name} in transcript data')
44
+
45
+ return {
46
+ 'audio_filepath': entry.audio_filepath,
47
+ 'text': transcript_data[name],
48
+ 'duration': duration,
49
+ }
@@ -0,0 +1,69 @@
1
+ from typing import List
2
+ from typing import Union
3
+
4
+ from sonusai.utils.asr_manifest_functions import PathInfo
5
+ from sonusai.utils.asr_manifest_functions import TranscriptData
6
+
7
+
8
+ def collect_vctk_noisy_speech_transcripts(paths: Union[List[str], str]) -> TranscriptData:
9
+ from glob import iglob
10
+ from os import listdir
11
+ from os.path import abspath
12
+ from os.path import basename
13
+ from os.path import join
14
+ from os.path import split
15
+ from os.path import splitext
16
+
17
+ from sonusai import SonusAIError
18
+
19
+ entries: TranscriptData = {}
20
+ if not isinstance(paths, list):
21
+ paths = [paths]
22
+
23
+ for p in paths:
24
+ abs_p = abspath(p)
25
+ head, tail = split(abs_p)
26
+
27
+ dirs = listdir(head)
28
+ tail = tail.replace('wav', 'txt')
29
+
30
+ location = None
31
+ for d in dirs:
32
+ if tail.endswith(d):
33
+ location = join(head, d, '*.txt')
34
+ break
35
+ if location is None:
36
+ raise SonusAIError(f'Could not find VCTK Noisy Speech transcript data for {p}')
37
+
38
+ for file in iglob(pathname=location, recursive=True):
39
+ with open(file, encoding='utf-8') as f:
40
+ lines = f.readlines()
41
+ if len(lines) != 1:
42
+ raise SonusAIError(f'Ill-formed VCTK Noisy Speech transcript file: {file}')
43
+
44
+ name = join(abs_p, splitext(basename(file))[0])
45
+ text = lines[0].lower().strip()
46
+
47
+ if name in entries:
48
+ raise SonusAIError(f'{name} already exists in transcript data')
49
+ entries[name] = text.lower().strip()
50
+
51
+ return entries
52
+
53
+
54
+ def get_vctk_noisy_speech_manifest_entry(entry: PathInfo, transcript_data: TranscriptData) -> dict:
55
+ from os.path import splitext
56
+ from subprocess import check_output
57
+
58
+ from sonusai import SonusAIError
59
+
60
+ name = splitext(entry.abs_path)[0]
61
+ duration = float(check_output(f'soxi -D {entry.abs_path}', shell=True))
62
+ if name not in transcript_data.keys():
63
+ raise SonusAIError(f'Could not find {name} in transcript data')
64
+
65
+ return {
66
+ 'audio_filepath': entry.audio_filepath,
67
+ 'text': transcript_data[name],
68
+ 'duration': duration,
69
+ }
@@ -32,11 +32,18 @@ def expand_braces(text: str, seen: Optional[Set[str]] = None) -> Generator[str,
32
32
  yield from expand_braces(''.join(replaced), seen)
33
33
 
34
34
 
35
- def braced_glob(path: str) -> List[str]:
35
+ def braced_glob(pathname: str, recursive: bool = False) -> List[str]:
36
36
  from glob import glob
37
37
 
38
38
  result = []
39
- for x in expand_braces(path):
40
- result.extend(glob(x))
39
+ for expanded_path in expand_braces(pathname):
40
+ result.extend(glob(expanded_path, recursive=recursive))
41
41
 
42
42
  return result
43
+
44
+
45
+ def braced_iglob(pathname: str, recursive: bool = False) -> Generator[str, None, None]:
46
+ from glob import iglob
47
+
48
+ for expanded_path in expand_braces(pathname):
49
+ yield from iglob(expanded_path, recursive=recursive)
@@ -11,7 +11,7 @@ from typing import Iterable
11
11
  from typing import List
12
12
 
13
13
 
14
- def _parallel(ordered: bool, function: Callable, *iterables: Iterable, **kwargs: Any) -> Generator:
14
+ def __parallel(ordered: bool, function: Callable, *iterables: Iterable, **kwargs: Any) -> Generator:
15
15
  """Returns a generator for a parallel map.
16
16
 
17
17
  Arguments:
@@ -43,6 +43,7 @@ def _parallel(ordered: bool, function: Callable, *iterables: Iterable, **kwargs:
43
43
 
44
44
  # Extract num_cpus
45
45
  num_cpus = kwargs.pop('num_cpus', None)
46
+ chunksize = kwargs.pop('chunksize', 1)
46
47
 
47
48
  # Determine num_cpus
48
49
  if num_cpus is None:
@@ -57,7 +58,7 @@ def _parallel(ordered: bool, function: Callable, *iterables: Iterable, **kwargs:
57
58
  with mp.Pool(num_cpus, initializer=initializer, initargs=initargs) as pool:
58
59
  map_func = getattr(pool, map_type)
59
60
 
60
- for item in map_func(function, *iterables):
61
+ for item in map_func(function, *iterables, chunksize=chunksize):
61
62
  yield item
62
63
  progress.update()
63
64
 
@@ -67,9 +68,9 @@ def _parallel(ordered: bool, function: Callable, *iterables: Iterable, **kwargs:
67
68
 
68
69
  def p_tqdm_map(function: Callable, *iterables: Iterable, **kwargs: Any) -> List[Any]:
69
70
  """Performs a parallel ordered map."""
70
- return list(_parallel(True, function, *iterables, **kwargs))
71
+ return list(__parallel(True, function, *iterables, **kwargs))
71
72
 
72
73
 
73
74
  def p_tqdm_umap(function: Callable, *iterables: Iterable, **kwargs: Any) -> List[Any]:
74
75
  """Performs a parallel unordered map."""
75
- return list(_parallel(False, function, *iterables, **kwargs))
76
+ return list(__parallel(False, function, *iterables, **kwargs))
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes