sonusai 0.12.5__tar.gz → 0.12.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. {sonusai-0.12.5 → sonusai-0.12.7}/PKG-INFO +2 -2
  2. {sonusai-0.12.5 → sonusai-0.12.7}/pyproject.toml +2 -2
  3. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/calc_metric_spenh.py +2 -2
  4. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/data_generator/keras_from_mixdb.py +3 -3
  5. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/evaluate.py +1 -1
  6. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/genft.py +2 -2
  7. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/genmix.py +2 -2
  8. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/genmixdb.py +18 -33
  9. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/gentcst.py +1 -1
  10. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/keras_onnx.py +1 -1
  11. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/keras_predict.py +1 -1
  12. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/keras_train.py +4 -4
  13. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/lsdb.py +1 -1
  14. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/main.py +8 -6
  15. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/mixture/__init__.py +1 -1
  16. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/mixture/audio.py +71 -3
  17. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/mixture/augmentation.py +10 -13
  18. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/mixture/config.py +2 -2
  19. sonusai-0.12.7/sonusai/mixture/generate_mixtures.py +294 -0
  20. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/mixture/mixdb.py +77 -8
  21. sonusai-0.12.7/sonusai/mkmanifest.py +174 -0
  22. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/mkwav.py +3 -3
  23. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/onnx_predict.py +7 -5
  24. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/plot.py +1 -1
  25. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/post_spenh_targetf.py +2 -2
  26. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/torchl_predict.py +1 -1
  27. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/torchl_train.py +1 -1
  28. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/tplot.py +1 -1
  29. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/utils/__init__.py +1 -0
  30. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/utils/asr.py +6 -5
  31. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/utils/asr_functions/data.py +4 -3
  32. sonusai-0.12.7/sonusai/utils/asr_manifest_functions/__init__.py +6 -0
  33. sonusai-0.12.7/sonusai/utils/asr_manifest_functions/data.py +10 -0
  34. sonusai-0.12.7/sonusai/utils/asr_manifest_functions/librispeech.py +49 -0
  35. sonusai-0.12.7/sonusai/utils/asr_manifest_functions/vctk_noisy_speech.py +69 -0
  36. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/utils/braced_glob.py +10 -3
  37. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/utils/parallel_tqdm.py +5 -4
  38. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/vars.py +9 -7
  39. sonusai-0.12.5/sonusai/mixture/generate_mixtures.py +0 -328
  40. {sonusai-0.12.5 → sonusai-0.12.7}/README.rst +0 -0
  41. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/__init__.py +0 -0
  42. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/aawscd_probwrite.py +0 -0
  43. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/data/__init__.py +0 -0
  44. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/data/genmixdb.yml +0 -0
  45. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/data/speech_ma01_01.wav +0 -0
  46. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/data/whitenoise.wav +0 -0
  47. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/data_generator/__init__.py +0 -0
  48. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/data_generator/dataset_from_mixdb.py +0 -0
  49. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/data_generator/torch_from_mixdb.py +0 -0
  50. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/metrics/__init__.py +0 -0
  51. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/metrics/calc_class_weights.py +0 -0
  52. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/metrics/calc_optimal_thresholds.py +0 -0
  53. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/metrics/calc_pcm.py +0 -0
  54. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/metrics/calc_pesq.py +0 -0
  55. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/metrics/calc_sa_sdr.py +0 -0
  56. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/metrics/calc_sample_weights.py +0 -0
  57. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/metrics/calc_wer.py +0 -0
  58. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/metrics/calc_wsdr.py +0 -0
  59. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/metrics/class_summary.py +0 -0
  60. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/metrics/confusion_matrix_summary.py +0 -0
  61. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/metrics/one_hot.py +0 -0
  62. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/metrics/snr_summary.py +0 -0
  63. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/mixture/active_truth_class_balancing.py +0 -0
  64. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/mixture/balance.py +0 -0
  65. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/mixture/class_count.py +0 -0
  66. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/mixture/constants.py +0 -0
  67. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/mixture/feature.py +0 -0
  68. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/mixture/initialize.py +0 -0
  69. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/mixture/log_duration_and_sizes.py +0 -0
  70. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/mixture/mapped_snr_f.py +0 -0
  71. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/mixture/spectral_mask.py +0 -0
  72. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/mixture/target_class_balancing.py +0 -0
  73. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/mixture/targets.py +0 -0
  74. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/mixture/tokenized_shell_vars.py +0 -0
  75. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/mixture/truth.py +0 -0
  76. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/mixture/truth_functions/__init__.py +0 -0
  77. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/mixture/truth_functions/crm.py +0 -0
  78. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/mixture/truth_functions/data.py +0 -0
  79. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/mixture/truth_functions/energy.py +0 -0
  80. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/mixture/truth_functions/file.py +0 -0
  81. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/mixture/truth_functions/phoneme.py +0 -0
  82. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/mixture/truth_functions/sed.py +0 -0
  83. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/mixture/truth_functions/target.py +0 -0
  84. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/mixture/types.py +0 -0
  85. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/queries/__init__.py +0 -0
  86. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/queries/queries.py +0 -0
  87. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/utils/asl_p56.py +0 -0
  88. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/utils/asr_functions/__init__.py +0 -0
  89. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/utils/asr_functions/aixplain_whisper.py +0 -0
  90. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/utils/asr_functions/deepgram.py +0 -0
  91. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/utils/asr_functions/google.py +0 -0
  92. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/utils/asr_functions/whisper.py +0 -0
  93. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/utils/calculate_input_shape.py +0 -0
  94. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/utils/create_ts_name.py +0 -0
  95. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/utils/dataclass_from_dict.py +0 -0
  96. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/utils/db.py +0 -0
  97. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/utils/energy_f.py +0 -0
  98. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/utils/engineering_number.py +0 -0
  99. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/utils/get_frames_per_batch.py +0 -0
  100. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/utils/get_label_names.py +0 -0
  101. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/utils/grouper.py +0 -0
  102. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/utils/human_readable_size.py +0 -0
  103. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/utils/keras_utils.py +0 -0
  104. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/utils/max_text_width.py +0 -0
  105. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/utils/numeric_conversion.py +0 -0
  106. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/utils/onnx_utils.py +0 -0
  107. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/utils/parallel.py +0 -0
  108. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/utils/print_mixture_details.py +0 -0
  109. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/utils/ranges.py +0 -0
  110. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/utils/read_mixture_data.py +0 -0
  111. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/utils/read_predict_data.py +0 -0
  112. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/utils/reshape.py +0 -0
  113. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/utils/seconds_to_hms.py +0 -0
  114. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/utils/stacked_complex.py +0 -0
  115. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/utils/stratified_shuffle_split.py +0 -0
  116. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/utils/trim_docstring.py +0 -0
  117. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/utils/wave.py +0 -0
  118. {sonusai-0.12.5 → sonusai-0.12.7}/sonusai/utils/yes_or_no.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sonusai
3
- Version: 0.12.5
3
+ Version: 0.12.7
4
4
  Summary: Framework for building deep neural network models for sound, speech, and voice AI
5
5
  Home-page: https://aaware.com
6
6
  License: GPL-3.0-only
@@ -29,7 +29,7 @@ Requires-Dist: paho-mqtt (>=1.6.1,<2.0.0)
29
29
  Requires-Dist: pandas (>=2.0.3,<3.0.0)
30
30
  Requires-Dist: pesq (>=0.0.4,<0.0.5)
31
31
  Requires-Dist: protobuf (>=4.24.0,<5.0.0)
32
- Requires-Dist: pyaaware (>=1.4.18,<2.0.0)
32
+ Requires-Dist: pyaaware (>=1.4.19,<2.0.0)
33
33
  Requires-Dist: python-magic (>=0.4.27,<0.5.0)
34
34
  Requires-Dist: scikit-learn (>=1.3.0,<2.0.0)
35
35
  Requires-Dist: sh (>=1.14.3,<2.0.0)
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "sonusai"
3
- version = "0.12.5"
3
+ version = "0.12.7"
4
4
  description = "Framework for building deep neural network models for sound, speech, and voice AI"
5
5
  authors = ["Chris Eddington <chris@aaware.com>", "Jason Calderwood <jason@aaware.com>"]
6
6
  maintainers = ["Chris Eddington <chris@aaware.com>", "Jason Calderwood <jason@aaware.com>"]
@@ -31,7 +31,7 @@ paho-mqtt = "^1.6.1"
31
31
  pandas = "^2.0.3"
32
32
  pesq = "^0.0.4"
33
33
  protobuf = "^4.24.0"
34
- pyaaware = "^1.4.18"
34
+ pyaaware = "^1.4.19"
35
35
  python = ">=3.8,<3.11"
36
36
  python-magic = "^0.4.27"
37
37
  scikit-learn = "^1.3.0"
@@ -710,7 +710,7 @@ def _process_mixture(mixid: int) -> Tuple[pd.DataFrame, pd.DataFrame]:
710
710
  return all_metrics_table_1, all_metrics_table_2
711
711
 
712
712
 
713
- def main():
713
+ def main() -> None:
714
714
  from docopt import docopt
715
715
 
716
716
  import sonusai
@@ -808,7 +808,7 @@ def main():
808
808
  # Individual mixtures use pandas print, set precision to 2 decimal places
809
809
  # pd.set_option('float_format', '{:.2f}'.format)
810
810
  progress = tqdm(total=len(mixids))
811
- all_metrics_tables = p_tqdm_map(_process_mixture, mixids, progress=progress)
811
+ all_metrics_tables = p_tqdm_map(_process_mixture, mixids, progress=progress, chunksize=10)
812
812
  progress.close()
813
813
 
814
814
  all_metrics_table_1 = pd.concat([item[0] for item in all_metrics_tables])
@@ -65,9 +65,9 @@ class KerasFromMixtureDatabase(Sequence):
65
65
  self.stride = self.mixdb.fg.stride
66
66
  self.num_bands = self.mixdb.fg.num_bands
67
67
  self.num_classes = self.mixdb.num_classes
68
- self.mixture_frame_segments = None
69
- self.batch_frame_segments = None
70
- self.total_batches: Optional[int]
68
+ self.mixture_frame_segments: Optional[int] = None
69
+ self.batch_frame_segments: Optional[int] = None
70
+ self.total_batches: Optional[int] = None
71
71
 
72
72
  self._initialize_mixtures()
73
73
 
@@ -186,7 +186,7 @@ def evaluate(mixdb: MixtureDatabase,
186
186
  classdf.round(3).to_csv(join(output_dir, f'class_snr{snri}.csv'))
187
187
 
188
188
 
189
- def main():
189
+ def main() -> None:
190
190
  from datetime import datetime
191
191
  from os import mkdir
192
192
  from os.path import join
@@ -72,7 +72,7 @@ def genft(mixdb: MixtureDatabase,
72
72
  results.append(_genft_kernel(mixid))
73
73
  else:
74
74
  progress = tqdm(total=len(mixids), disable=not show_progress)
75
- results = p_tqdm_map(_genft_kernel, mixids, progress=progress)
75
+ results = p_tqdm_map(_genft_kernel, mixids, progress=progress, chunksize=10)
76
76
  progress.close()
77
77
 
78
78
  return results
@@ -100,7 +100,7 @@ def _genft_kernel(mixid: int) -> GenFTData:
100
100
  return GenFTData(feature=feature, truth_f=truth_f, segsnr=segsnr)
101
101
 
102
102
 
103
- def main():
103
+ def main() -> None:
104
104
  from docopt import docopt
105
105
 
106
106
  import sonusai
@@ -79,7 +79,7 @@ def genmix(mixdb: MixtureDatabase,
79
79
  results.append(_genmix_kernel(mixid))
80
80
  else:
81
81
  progress = tqdm(total=len(mixids), disable=not show_progress)
82
- results = p_tqdm_map(_genmix_kernel, mixids, progress=progress)
82
+ results = p_tqdm_map(_genmix_kernel, mixids, progress=progress, chunksize=10)
83
83
  progress.close()
84
84
 
85
85
  return results
@@ -128,7 +128,7 @@ def _genmix_kernel(mixid: int) -> GenMixData:
128
128
  segsnr_t=segsnr_t)
129
129
 
130
130
 
131
- def main():
131
+ def main() -> None:
132
132
  from docopt import docopt
133
133
 
134
134
  import sonusai
@@ -141,7 +141,6 @@ def genmixdb(location: Location,
141
141
  show_progress: bool = False,
142
142
  test_mode: bool = False,
143
143
  use_cache: bool = True) -> MixtureDatabase:
144
- import itertools
145
144
  from random import seed
146
145
 
147
146
  import yaml
@@ -155,7 +154,6 @@ def genmixdb(location: Location,
155
154
  from sonusai.mixture import SAMPLE_RATE
156
155
  from sonusai.mixture import TruthSettings
157
156
  from sonusai.mixture import balance_targets
158
- from sonusai.mixture import estimate_augmented_length_from_audio
159
157
  from sonusai.mixture import generate_mixtures
160
158
  from sonusai.mixture import get_augmentation_indices_for_mixup
161
159
  from sonusai.mixture import get_augmentations
@@ -286,19 +284,11 @@ def genmixdb(location: Location,
286
284
 
287
285
  augmented_targets = balance_targets(mixdb, augmented_targets)
288
286
 
289
- augmented_noise_iter = list(itertools.product(*[range(len(mixdb.noises)), range(len(mixdb.noise_augmentations))]))
290
- noise_audio_samples = sum([mixdb.augmented_noise_length(nfi, nai) for nfi, nai in augmented_noise_iter])
291
-
292
- if logging:
293
- logger.info('Generating mixtures first pass')
294
- used_noise_files, used_noise_samples = generate_mixtures(mixdb=mixdb,
295
- augmented_targets=augmented_targets,
296
- noise_files=noise_files,
297
- noise_augmentations=noise_augmentations,
298
- mixups=mixups,
299
- show_progress=show_progress)
300
-
301
287
  total_noise_files = len(mixdb.noises) * len(mixdb.noise_augmentations)
288
+ aug_noise_audio_samples = mixdb.augmented_noise_samples
289
+
290
+ total_target_files = len(augmented_targets)
291
+ aug_target_audio_samples = mixdb.augmented_target_samples
302
292
 
303
293
  if logging:
304
294
  raw_target_audio_samples = sum([targets.samples for targets in mixdb.targets])
@@ -312,24 +302,19 @@ def genmixdb(location: Location,
312
302
  f'{human_readable_size(raw_noise_audio_duration * SAMPLE_RATE * SAMPLE_BYTES, 1)}, '
313
303
  f'{seconds_to_hms(seconds=raw_noise_audio_duration)}')
314
304
 
315
- augmented_noise_audio_samples = float(sum([mixdb.augmented_noise_length(f, a) for f, a in
316
- zip(range(len(mixdb.noises)),
317
- range(len(mixdb.noise_augmentations)))]))
318
- augmented_target_audio_samples = 0
319
- for augmented_target in augmented_targets:
320
- augmented_target_audio_samples += estimate_augmented_length_from_audio(
321
- audio=mixdb.raw_target_audio(augmented_target.target_file_index),
322
- augmentation=mixdb.target_augmentations[augmented_target.target_augmentation_index],
323
- length_common_denominator=mixdb.feature_step_samples)
324
-
325
305
  logger.info('')
326
- logger.info(f'Augmented target audio: {len(augmented_targets)} files, '
327
- f'{human_readable_size(augmented_target_audio_samples * SAMPLE_BYTES, 1)}, '
328
- f'{seconds_to_hms(seconds=augmented_target_audio_samples / SAMPLE_RATE)}')
306
+ logger.info(f'Augmented target audio: {total_target_files} files, '
307
+ f'{human_readable_size(aug_target_audio_samples * SAMPLE_BYTES, 1)}, '
308
+ f'{seconds_to_hms(seconds=aug_target_audio_samples / SAMPLE_RATE)}')
329
309
  logger.info(f'Augmented noise audio: {total_noise_files} files, '
330
- f'{human_readable_size(augmented_noise_audio_samples * SAMPLE_BYTES, 1)}, '
331
- f'{seconds_to_hms(seconds=augmented_noise_audio_samples / SAMPLE_RATE)}')
310
+ f'{human_readable_size(aug_noise_audio_samples * SAMPLE_BYTES, 1)}, '
311
+ f'{seconds_to_hms(seconds=aug_noise_audio_samples / SAMPLE_RATE)}')
332
312
 
313
+ used_noise_files, used_noise_samples = generate_mixtures(mixdb=mixdb,
314
+ augmented_targets=augmented_targets,
315
+ noise_files=noise_files,
316
+ noise_augmentations=noise_augmentations,
317
+ mixups=mixups)
333
318
  total_mixtures = len(mixdb.mixtures)
334
319
  if logging:
335
320
  logger.info('')
@@ -358,9 +343,9 @@ def genmixdb(location: Location,
358
343
  MP_GLOBAL.save_segsnr = save_segsnr
359
344
 
360
345
  if logging:
361
- logger.info('Generating mixtures second pass')
346
+ logger.info('Generating mixtures')
362
347
  progress = tqdm(total=total_mixtures, disable=not show_progress)
363
- mixdb.mixtures = p_tqdm_map(_process_mixture, range(total_mixtures), progress=progress)
348
+ mixdb.mixtures = p_tqdm_map(_process_mixture, range(total_mixtures), progress=progress, chunksize=10)
364
349
  progress.close()
365
350
 
366
351
  total_samples = mixdb.total_samples()
@@ -374,7 +359,7 @@ def genmixdb(location: Location,
374
359
  stride=mixdb.fg.stride,
375
360
  desc='Actual')
376
361
  noise_files_percent = (float(used_noise_files) / float(total_noise_files)) * 100
377
- noise_samples_percent = (float(used_noise_samples) / float(noise_audio_samples)) * 100
362
+ noise_samples_percent = (float(used_noise_samples) / float(aug_noise_audio_samples)) * 100
378
363
  logger.info('')
379
364
  logger.info(f'Used {noise_files_percent:,.0f}% of augmented noise files')
380
365
  logger.info(f'Used {noise_samples_percent:,.0f}% of augmented noise audio')
@@ -431,7 +416,7 @@ def _process_mixture(mixid: int) -> MRecord:
431
416
  return mrecord
432
417
 
433
418
 
434
- def main():
419
+ def main() -> None:
435
420
  from docopt import docopt
436
421
 
437
422
  import sonusai
@@ -579,7 +579,7 @@ def report_leaf_fold_data_usage(all_files: List[FileInfo], use_files: List[FileI
579
579
  logger.warning('')
580
580
 
581
581
 
582
- def main():
582
+ def main() -> None:
583
583
  from docopt import docopt
584
584
 
585
585
  import sonusai
@@ -30,7 +30,7 @@ Results are written into subdirectory <MODEL>-<TIMESTAMP> unless OUTPUT is speci
30
30
  from sonusai import logger
31
31
 
32
32
 
33
- def main():
33
+ def main() -> None:
34
34
  from docopt import docopt
35
35
 
36
36
  import sonusai
@@ -40,7 +40,7 @@ from sonusai.mixture import Feature
40
40
  from sonusai.mixture import Predict
41
41
 
42
42
 
43
- def main():
43
+ def main() -> None:
44
44
  from docopt import docopt
45
45
 
46
46
  import sonusai
@@ -80,7 +80,7 @@ class SonusAIModelCheckpoint(tf.keras.callbacks.ModelCheckpoint):
80
80
  f.attrs['sonusai_num_classes'] = str(self.num_classes)
81
81
 
82
82
 
83
- def main():
83
+ def main() -> None:
84
84
  from docopt import docopt
85
85
 
86
86
  import sonusai
@@ -285,9 +285,9 @@ def main():
285
285
  # Find checkpoint file and load weights for prediction and model save
286
286
  checkpoint_name = None
287
287
  for path, dirs, files in walk(output_dir):
288
- for f in files:
289
- if "ckpt" in f:
290
- checkpoint_name = f
288
+ for file in files:
289
+ if "ckpt" in file:
290
+ checkpoint_name = file
291
291
 
292
292
  if checkpoint_name is not None:
293
293
  logger.info('Using best checkpoint for prediction and model exports')
@@ -109,7 +109,7 @@ def lsdb(mixdb: MixtureDatabase,
109
109
  logger.info(f' {snr_mean[c]:8.2f} {snr_std[c]:8.2f} {snr_db_mean[c]:8.2f} {snr_db_std[c]:8.2f}')
110
110
 
111
111
 
112
- def main():
112
+ def main() -> None:
113
113
  from docopt import docopt
114
114
 
115
115
  import sonusai
@@ -4,7 +4,6 @@ usage: sonusai [--version] [--help] <command> [<args>...]
4
4
 
5
5
  The sonusai commands are:
6
6
  calc_metric_spenh Run speech enhancement and analysis
7
- calc_metric_spenh_targetf Run speech enhancement and analysis for targetf truth (deprecated)
8
7
  evaluate Evaluate model performance
9
8
  genft Generate feature and truth data
10
9
  genmix Generate mixture and truth data
@@ -14,6 +13,7 @@ The sonusai commands are:
14
13
  keras_train Train a model using Keras
15
14
  keras_onnx Convert a trained Keras model to ONNX
16
15
  lsdb List information about a mixture database
16
+ mkmanifest Make ASR manifest JSON file
17
17
  mkwav Make WAV files from a mixture database
18
18
  onnx_predict Run ONNX predict on a trained model
19
19
  plot Plot mixture data
@@ -30,18 +30,14 @@ for more information on a specific command.
30
30
  from sonusai import logger
31
31
 
32
32
 
33
- def main():
34
- from subprocess import call
35
-
33
+ def main() -> None:
36
34
  from docopt import docopt
37
35
 
38
36
  import sonusai
39
- from sonusai import SonusAIError
40
37
  from sonusai.utils import trim_docstring
41
38
 
42
39
  commands = (
43
40
  'calc_metric_spenh',
44
- 'calc_metric_spenh_targetf',
45
41
  'evaluate',
46
42
  'genft',
47
43
  'genmix',
@@ -51,6 +47,7 @@ def main():
51
47
  'keras_train',
52
48
  'keras_onnx',
53
49
  'lsdb',
50
+ 'mkmanifest',
54
51
  'mkwav',
55
52
  'onnx_predict',
56
53
  'plot',
@@ -66,6 +63,11 @@ def main():
66
63
  command = args['<command>']
67
64
  argv = args['<args>']
68
65
 
66
+ from subprocess import call
67
+
68
+ import sonusai
69
+ from sonusai import SonusAIError
70
+
69
71
  if command == 'help':
70
72
  if not argv:
71
73
  exit(call(['sonusai', '-h']))
@@ -1,5 +1,5 @@
1
1
  # SonusAI mixture utilities
2
- from sonusai.mixture.audio import AawareSoxTransformer
2
+ from sonusai.mixture.audio import Transformer
3
3
  from sonusai.mixture.audio import calculate_audio_from_transform
4
4
  from sonusai.mixture.audio import calculate_transform_from_audio
5
5
  from sonusai.mixture.audio import get_duration
@@ -97,7 +97,6 @@ def read_audio(name: Location) -> AudioT:
97
97
  from typing import Any
98
98
 
99
99
  import numpy as np
100
- import sox
101
100
 
102
101
  from sonusai import SonusAIError
103
102
  from sonusai.mixture import BIT_DEPTH
@@ -196,10 +195,16 @@ def get_duration(audio: AudioT) -> float:
196
195
  return len(audio) / SAMPLE_RATE
197
196
 
198
197
 
199
- class AawareSoxTransformer(sox.Transformer):
198
+ class Transformer(sox.Transformer):
199
+ """Override certain sox.Transformer methods
200
+ """
201
+
200
202
  def fir(self, coefficients):
201
203
  """Use SoX’s FFT convolution engine with given FIR filter coefficients.
202
204
 
205
+ The SonusAI override allows coefficients to be either a list of numbers
206
+ or a string containing a text file with the coefficients.
207
+
203
208
  Parameters
204
209
  ----------
205
210
  coefficients : list or str
@@ -225,6 +230,69 @@ class AawareSoxTransformer(sox.Transformer):
225
230
 
226
231
  return self
227
232
 
233
+ def tempo(self, factor, audio_type=None, quick=False):
234
+ """Time stretch audio without changing pitch.
235
+
236
+ This effect uses the WSOLA algorithm. The audio is chopped up into
237
+ segments which are then shifted in the time domain and overlapped
238
+ (cross-faded) at points where their waveforms are most similar as
239
+ determined by measurement of least squares.
240
+
241
+ The SonusAI override does not generate a warning for small factors.
242
+ The sox.Transformer's implementation of stretch does not invert
243
+ the factor even though it says that it does; this invalidates the
244
+ factor size check and produces the wrong result.
245
+
246
+ Parameters
247
+ ----------
248
+ factor : float
249
+ The ratio of new tempo to the old tempo.
250
+ For ex. 1.1 speeds up the tempo by 10%; 0.9 slows it down by 10%.
251
+ audio_type : str
252
+ Type of audio, which optimizes algorithm parameters. One of:
253
+ * m : Music,
254
+ * s : Speech,
255
+ * l : Linear (useful when factor is close to 1),
256
+ quick : bool, default=False
257
+ If True, this effect will run faster but with lower sound quality.
258
+
259
+ See Also
260
+ --------
261
+ stretch, speed, pitch
262
+
263
+ """
264
+ from sox.core import is_number
265
+
266
+ from sonusai import SonusAIError
267
+ from sonusai import logger
268
+
269
+ if not is_number(factor) or factor <= 0:
270
+ raise SonusAIError('factor must be a positive number')
271
+
272
+ if factor < 0.5 or factor > 2:
273
+ logger.warning('Using an extreme time stretching factor. Quality of results will be poor')
274
+
275
+ if audio_type not in [None, 'm', 's', 'l']:
276
+ raise SonusAIError("audio_type must be one of None, 'm', 's', or 'l'.")
277
+
278
+ if not isinstance(quick, bool):
279
+ raise SonusAIError('quick must be a boolean')
280
+
281
+ effect_args = ['tempo']
282
+
283
+ if quick:
284
+ effect_args.append('-q')
285
+
286
+ if audio_type is not None:
287
+ effect_args.append('-{}'.format(audio_type))
288
+
289
+ effect_args.append('{:f}'.format(factor))
290
+
291
+ self.effects.extend(effect_args)
292
+ self.effects_log.append('tempo')
293
+
294
+ return self
295
+
228
296
 
229
297
  def validate_input_file(input_filepath: str) -> None:
230
298
  from os.path import exists
@@ -239,4 +307,4 @@ def validate_input_file(input_filepath: str) -> None:
239
307
 
240
308
  ext = splitext(input_filepath)[1][1:].lower()
241
309
  if ext not in VALID_FORMATS:
242
- raise SonusAIError(f'This installation of Sox cannot process .{ext} files')
310
+ raise SonusAIError(f'This installation of SoX cannot process .{ext} files')
@@ -173,17 +173,17 @@ def apply_augmentation(audio: AudioT, augmentation: Augmentation, length_common_
173
173
  :param length_common_denominator: Pad resulting audio to be a multiple of this
174
174
  :return: Augmented audio
175
175
  """
176
- import sox
177
176
 
178
177
  from sonusai import SonusAIError
179
178
  from sonusai.mixture import BIT_DEPTH
180
179
  from sonusai.mixture import CHANNEL_COUNT
181
180
  from sonusai.mixture import ENCODING
182
181
  from sonusai.mixture import SAMPLE_RATE
182
+ from sonusai.mixture import Transformer
183
183
 
184
184
  try:
185
185
  # Apply augmentations
186
- tfm = sox.Transformer()
186
+ tfm = Transformer()
187
187
  tfm.set_input_format(rate=SAMPLE_RATE, bits=BIT_DEPTH, channels=CHANNEL_COUNT, encoding=ENCODING)
188
188
  tfm.set_output_format(rate=SAMPLE_RATE, bits=BIT_DEPTH, channels=CHANNEL_COUNT, encoding=ENCODING)
189
189
 
@@ -201,11 +201,7 @@ def apply_augmentation(audio: AudioT, augmentation: Augmentation, length_common_
201
201
  tfm.pitch(n_semitones=float(augmentation.pitch) / 100)
202
202
 
203
203
  if augmentation.tempo is not None:
204
- factor = float(augmentation.tempo)
205
- if abs(factor - 1.0) <= 0.1:
206
- tfm.stretch(factor=factor)
207
- else:
208
- tfm.tempo(factor=factor, audio_type='s')
204
+ tfm.tempo(factor=float(augmentation.tempo), audio_type='s')
209
205
 
210
206
  if augmentation.eq1 is not None:
211
207
  tfm.equalizer(frequency=augmentation.eq1[0], width_q=augmentation.eq1[1],
@@ -240,11 +236,10 @@ def apply_ir(audio: AudioT, ir: ImpulseResponseData) -> AudioT:
240
236
  :return: Augmented audio
241
237
  """
242
238
  import numpy as np
243
- import sox
244
239
 
245
240
  from sonusai import SonusAIError
246
- from sonusai.mixture import AawareSoxTransformer
247
241
  from sonusai.mixture import SAMPLE_RATE
242
+ from sonusai.mixture import Transformer
248
243
  from sonusai.utils import linear_to_db
249
244
 
250
245
  max_abs_audio = max(abs(audio))
@@ -257,7 +252,7 @@ def apply_ir(audio: AudioT, ir: ImpulseResponseData) -> AudioT:
257
252
  max_db = linear_to_db(max_abs_audio)
258
253
 
259
254
  # Convert audio to IR sample rate and normalize to -20 dBFS to avoid clipping when applying IR
260
- tfm = sox.Transformer()
255
+ tfm = Transformer()
261
256
  tfm.set_output_format(rate=ir.sample_rate)
262
257
  tfm.norm(db_level=-20)
263
258
  audio_out = tfm.build_array(input_array=audio, sample_rate_in=SAMPLE_RATE)
@@ -267,7 +262,7 @@ def apply_ir(audio: AudioT, ir: ImpulseResponseData) -> AudioT:
267
262
  audio_out = np.pad(array=audio_out, pad_width=(pad, pad))
268
263
 
269
264
  # Apply IR and convert back to global sample rate
270
- tfm = AawareSoxTransformer()
265
+ tfm = Transformer()
271
266
  tfm.set_output_format(rate=SAMPLE_RATE)
272
267
  tfm.fir(coefficients=ir.coefficients_file)
273
268
  try:
@@ -276,7 +271,7 @@ def apply_ir(audio: AudioT, ir: ImpulseResponseData) -> AudioT:
276
271
  raise SonusAIError(f'Error applying IR: {e}')
277
272
 
278
273
  # Reset level to previous max value
279
- tfm = sox.Transformer()
274
+ tfm = Transformer()
280
275
  tfm.norm(db_level=max_db)
281
276
  audio_out = tfm.build_array(input_array=audio_out, sample_rate_in=SAMPLE_RATE)
282
277
 
@@ -296,8 +291,10 @@ def estimate_augmented_length_from_length(length: int,
296
291
  :param length_common_denominator: Pad resulting audio to be a multiple of this
297
292
  :return: Estimated length of augmented audio
298
293
  """
294
+ import numpy as np
295
+
299
296
  if augmentation.tempo is not None:
300
- length = int(length // float(augmentation.tempo))
297
+ length = int(np.round(length / float(augmentation.tempo)))
301
298
 
302
299
  length += get_pad_length(length, length_common_denominator)
303
300
 
@@ -250,7 +250,7 @@ def get_target_files(config: dict, show_progress: bool = False) -> TargetFiles:
250
250
  for target in config['targets']]))
251
251
 
252
252
  progress = tqdm(total=len(target_files), disable=not show_progress)
253
- target_files = p_tqdm_map(_get_samples, target_files, progress=progress)
253
+ target_files = p_tqdm_map(_get_samples, target_files, progress=progress, chunksize=10)
254
254
  progress.close()
255
255
 
256
256
  max_class = get_max_class(config['num_classes'], config['truth_mode'] == 'mutex')
@@ -394,7 +394,7 @@ def get_noise_files(config: dict, show_progress: bool = False) -> NoiseFiles:
394
394
  noise_files = list(chain.from_iterable([_append_noise_files(noise_file=noise) for noise in config['noises']]))
395
395
 
396
396
  progress = tqdm(total=len(noise_files), disable=not show_progress)
397
- noise_files = p_tqdm_map(_get_samples, noise_files, progress=progress)
397
+ noise_files = p_tqdm_map(_get_samples, noise_files, progress=progress, chunksize=10)
398
398
  progress.close()
399
399
 
400
400
  return dataclass_from_dict(NoiseFiles, noise_files)