sonusai 0.15.8__tar.gz → 0.16.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. {sonusai-0.15.8 → sonusai-0.16.0}/PKG-INFO +8 -19
  2. {sonusai-0.15.8 → sonusai-0.16.0}/README.rst +5 -5
  3. {sonusai-0.15.8 → sonusai-0.16.0}/pyproject.toml +3 -19
  4. sonusai-0.16.0/sonusai/__init__.py +86 -0
  5. sonusai-0.16.0/sonusai/audiofe.py +237 -0
  6. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/calc_metric_spenh.py +21 -12
  7. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/genft.py +2 -1
  8. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/genmixdb.py +5 -5
  9. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/lsdb.py +2 -2
  10. sonusai-0.16.0/sonusai/main.py +90 -0
  11. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/mixture/__init__.py +4 -2
  12. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/mixture/audio.py +0 -34
  13. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/mixture/config.py +1 -2
  14. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/mixture/datatypes.py +1 -1
  15. sonusai-0.16.0/sonusai/mixture/feature.py +105 -0
  16. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/mixture/helpers.py +60 -30
  17. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/mixture/log_duration_and_sizes.py +2 -2
  18. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/mixture/mixdb.py +13 -10
  19. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/mixture/spectral_mask.py +14 -14
  20. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/mixture/truth_functions/data.py +1 -1
  21. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/mixture/truth_functions/target.py +2 -2
  22. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/mkmanifest.py +29 -2
  23. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/onnx_predict.py +1 -1
  24. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/plot.py +4 -4
  25. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/post_spenh_targetf.py +8 -8
  26. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/utils/__init__.py +8 -7
  27. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/utils/asl_p56.py +3 -3
  28. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/utils/asr.py +35 -8
  29. sonusai-0.16.0/sonusai/utils/asr_functions/__init__.py +1 -0
  30. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/utils/asr_functions/aaware_whisper.py +2 -2
  31. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/utils/asr_manifest_functions/__init__.py +1 -0
  32. sonusai-0.16.0/sonusai/utils/asr_manifest_functions/mcgill_speech.py +29 -0
  33. sonusai-0.16.0/sonusai/utils/audio_devices.py +41 -0
  34. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/utils/calculate_input_shape.py +3 -4
  35. sonusai-0.16.0/sonusai/utils/create_timestamp.py +5 -0
  36. sonusai-0.15.8/sonusai/utils/trim_docstring.py → sonusai-0.16.0/sonusai/utils/docstring.py +20 -0
  37. sonusai-0.16.0/sonusai/utils/model_utils.py +30 -0
  38. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/utils/onnx_utils.py +19 -45
  39. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/utils/reshape.py +11 -11
  40. sonusai-0.16.0/sonusai/utils/wave.py +26 -0
  41. sonusai-0.15.8/sonusai/__init__.py +0 -55
  42. sonusai-0.15.8/sonusai/data_generator/__init__.py +0 -5
  43. sonusai-0.15.8/sonusai/data_generator/dataset_from_mixdb.py +0 -143
  44. sonusai-0.15.8/sonusai/data_generator/keras_from_mixdb.py +0 -169
  45. sonusai-0.15.8/sonusai/data_generator/torch_from_mixdb.py +0 -122
  46. sonusai-0.15.8/sonusai/evaluate.py +0 -245
  47. sonusai-0.15.8/sonusai/keras_onnx.py +0 -86
  48. sonusai-0.15.8/sonusai/keras_predict.py +0 -231
  49. sonusai-0.15.8/sonusai/keras_train.py +0 -334
  50. sonusai-0.15.8/sonusai/main.py +0 -93
  51. sonusai-0.15.8/sonusai/mixture/feature.py +0 -51
  52. sonusai-0.15.8/sonusai/torchl_onnx.py +0 -216
  53. sonusai-0.15.8/sonusai/torchl_predict.py +0 -547
  54. sonusai-0.15.8/sonusai/torchl_train.py +0 -223
  55. sonusai-0.15.8/sonusai/utils/asr_functions/__init__.py +0 -6
  56. sonusai-0.15.8/sonusai/utils/asr_functions/aixplain_whisper.py +0 -59
  57. sonusai-0.15.8/sonusai/utils/asr_functions/data.py +0 -16
  58. sonusai-0.15.8/sonusai/utils/asr_functions/deepgram.py +0 -97
  59. sonusai-0.15.8/sonusai/utils/asr_functions/fastwhisper.py +0 -90
  60. sonusai-0.15.8/sonusai/utils/asr_functions/google.py +0 -95
  61. sonusai-0.15.8/sonusai/utils/asr_functions/whisper.py +0 -49
  62. sonusai-0.15.8/sonusai/utils/keras_utils.py +0 -226
  63. sonusai-0.15.8/sonusai/utils/wave.py +0 -19
  64. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/aawscd_probwrite.py +0 -0
  65. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/data/__init__.py +0 -0
  66. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/data/genmixdb.yml +0 -0
  67. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/data/speech_ma01_01.wav +0 -0
  68. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/data/whitenoise.wav +0 -0
  69. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/doc/__init__.py +0 -0
  70. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/doc/doc.py +0 -0
  71. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/doc.py +0 -0
  72. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/genmix.py +0 -0
  73. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/gentcst.py +0 -0
  74. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/metrics/__init__.py +0 -0
  75. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/metrics/calc_class_weights.py +0 -0
  76. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/metrics/calc_optimal_thresholds.py +0 -0
  77. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/metrics/calc_pcm.py +0 -0
  78. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/metrics/calc_pesq.py +0 -0
  79. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/metrics/calc_sa_sdr.py +0 -0
  80. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/metrics/calc_sample_weights.py +0 -0
  81. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/metrics/calc_wer.py +0 -0
  82. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/metrics/calc_wsdr.py +0 -0
  83. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/metrics/class_summary.py +0 -0
  84. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/metrics/confusion_matrix_summary.py +0 -0
  85. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/metrics/one_hot.py +0 -0
  86. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/metrics/snr_summary.py +0 -0
  87. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/mixture/augmentation.py +0 -0
  88. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/mixture/class_count.py +0 -0
  89. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/mixture/constants.py +0 -0
  90. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/mixture/eq_rule_is_valid.py +0 -0
  91. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/mixture/generation.py +0 -0
  92. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/mixture/mapped_snr_f.py +0 -0
  93. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/mixture/soundfile_audio.py +0 -0
  94. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/mixture/sox_audio.py +0 -0
  95. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/mixture/sox_augmentation.py +0 -0
  96. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/mixture/target_class_balancing.py +0 -0
  97. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/mixture/targets.py +0 -0
  98. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/mixture/tokenized_shell_vars.py +0 -0
  99. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/mixture/torchaudio_audio.py +0 -0
  100. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/mixture/torchaudio_augmentation.py +0 -0
  101. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/mixture/truth.py +0 -0
  102. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/mixture/truth_functions/__init__.py +0 -0
  103. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/mixture/truth_functions/crm.py +0 -0
  104. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/mixture/truth_functions/energy.py +0 -0
  105. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/mixture/truth_functions/file.py +0 -0
  106. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/mixture/truth_functions/phoneme.py +0 -0
  107. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/mixture/truth_functions/sed.py +0 -0
  108. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/mkwav.py +0 -0
  109. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/queries/__init__.py +0 -0
  110. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/queries/queries.py +0 -0
  111. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/tplot.py +0 -0
  112. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/utils/asr_manifest_functions/data.py +0 -0
  113. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/utils/asr_manifest_functions/librispeech.py +0 -0
  114. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/utils/asr_manifest_functions/vctk_noisy_speech.py +0 -0
  115. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/utils/braced_glob.py +0 -0
  116. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/utils/convert_string_to_number.py +0 -0
  117. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/utils/create_ts_name.py +0 -0
  118. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/utils/dataclass_from_dict.py +0 -0
  119. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/utils/db.py +0 -0
  120. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/utils/energy_f.py +0 -0
  121. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/utils/engineering_number.py +0 -0
  122. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/utils/get_frames_per_batch.py +0 -0
  123. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/utils/get_label_names.py +0 -0
  124. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/utils/grouper.py +0 -0
  125. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/utils/human_readable_size.py +0 -0
  126. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/utils/max_text_width.py +0 -0
  127. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/utils/numeric_conversion.py +0 -0
  128. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/utils/parallel.py +0 -0
  129. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/utils/print_mixture_details.py +0 -0
  130. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/utils/ranges.py +0 -0
  131. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/utils/read_mixture_data.py +0 -0
  132. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/utils/read_predict_data.py +0 -0
  133. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/utils/seconds_to_hms.py +0 -0
  134. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/utils/stacked_complex.py +0 -0
  135. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/utils/stratified_shuffle_split.py +0 -0
  136. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/utils/yes_or_no.py +0 -0
  137. {sonusai-0.15.8 → sonusai-0.16.0}/sonusai/vars.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sonusai
3
- Version: 0.15.8
3
+ Version: 0.16.0
4
4
  Summary: Framework for building deep neural network models for sound, speech, and voice AI
5
5
  Home-page: https://aaware.com
6
6
  License: GPL-3.0-only
@@ -15,50 +15,39 @@ Classifier: Programming Language :: Python :: 3.9
15
15
  Classifier: Programming Language :: Python :: 3.10
16
16
  Classifier: Programming Language :: Python :: 3.11
17
17
  Requires-Dist: PyYAML (>=6.0.1,<7.0.0)
18
- Requires-Dist: aixplain (>=0.2.6,<0.3.0)
19
- Requires-Dist: ctranslate2 (==4.1.0)
20
18
  Requires-Dist: dataclasses-json (>=0.6.1,<0.7.0)
21
- Requires-Dist: deepgram-sdk (>=3.0.0,<4.0.0)
22
19
  Requires-Dist: docopt (>=0.6.2,<0.7.0)
23
- Requires-Dist: faster-whisper (>=1.0.1,<2.0.0)
24
20
  Requires-Dist: h5py (>=3.11.0,<4.0.0)
25
21
  Requires-Dist: jiwer (>=3.0.3,<4.0.0)
26
- Requires-Dist: keras (>=3.1.1,<4.0.0)
27
- Requires-Dist: keras-tuner (>=1.4.7,<2.0.0)
28
22
  Requires-Dist: librosa (>=0.10.1,<0.11.0)
29
- Requires-Dist: lightning (>=2.2,<2.3)
30
23
  Requires-Dist: matplotlib (>=3.8.0,<4.0.0)
31
24
  Requires-Dist: onnx (>=1.14.1,<2.0.0)
32
25
  Requires-Dist: onnxruntime (>=1.16.1,<2.0.0)
33
26
  Requires-Dist: paho-mqtt (>=2.0.0,<3.0.0)
34
27
  Requires-Dist: pandas (>=2.1.1,<3.0.0)
35
28
  Requires-Dist: pesq (>=0.0.4,<0.0.5)
36
- Requires-Dist: pyaaware (>=1.5.3,<2.0.0)
29
+ Requires-Dist: pyaaware (>=1.5.7,<2.0.0)
30
+ Requires-Dist: pyaudio (>=0.2.14,<0.3.0)
37
31
  Requires-Dist: pydub (>=0.25.1,<0.26.0)
38
32
  Requires-Dist: pystoi (>=0.4.0,<0.5.0)
39
- Requires-Dist: python-magic (>=0.4.27,<0.5.0)
40
33
  Requires-Dist: requests (>=2.31.0,<3.0.0)
41
34
  Requires-Dist: samplerate (>=0.2.1,<0.3.0)
42
35
  Requires-Dist: soundfile (>=0.12.1,<0.13.0)
43
36
  Requires-Dist: sox (>=1.4.1,<2.0.0)
44
- Requires-Dist: speechrecognition (>=3.10.1,<4.0.0)
45
- Requires-Dist: tensorflow (>=2.15.0,<3.0.0)
46
- Requires-Dist: tf2onnx (>=1.15.1,<2.0.0)
47
37
  Requires-Dist: torch (>=2.2,<2.3)
48
38
  Requires-Dist: torchaudio (>=2.2,<2.3)
49
- Requires-Dist: torchinfo (>=1.8.0,<2.0.0)
50
39
  Requires-Dist: tqdm (>=4.66.1,<5.0.0)
51
40
  Description-Content-Type: text/x-rst
52
41
 
53
- Sonus AI: Framework for simplified creation of deep NN models for sound, speech, and voice AI
42
+ SonusAI: Framework for simplified creation of deep NN models for sound, speech, and voice AI
54
43
 
55
- Sonus AI includes functions for pre-processing training and validation data and
44
+ SonusAI includes functions for pre-processing training and validation data and
56
45
  creating performance metrics reports for key types of Keras models:
57
46
  - recurrent, convolutional, or a combination (i.e. RCNNs)
58
47
  - binary, multiclass single-label, multiclass multi-label, and regression
59
48
  - training with data augmentations: noise mixing, pitch and time stretch, etc.
60
49
 
61
- Sonus AI python functions are used by:
62
- - Aaware Inc. sonusai executable: Easily create train/validation data, run prediction, evaluate model performance
63
- - Keras model scripts: User python scripts for keras model creation, training, and prediction. These can use sonusai-specific data but also some general useful utilities for trainining rnn-based models like CRNN's, DSCRNN's, etc. in Keras
50
+ SonusAI python functions are used by:
51
+ - Aaware Inc. sonusai framework: Easily create train/validation data, run prediction, evaluate model performance
52
+ - Keras model scripts: User python scripts for Keras model creation, training, and prediction. These can use sonusai-specific data but also some general useful utilities for training rnn-based models like CRNN's, DSCRNN's, etc. in Keras.
64
53
 
@@ -1,11 +1,11 @@
1
- Sonus AI: Framework for simplified creation of deep NN models for sound, speech, and voice AI
1
+ SonusAI: Framework for simplified creation of deep NN models for sound, speech, and voice AI
2
2
 
3
- Sonus AI includes functions for pre-processing training and validation data and
3
+ SonusAI includes functions for pre-processing training and validation data and
4
4
  creating performance metrics reports for key types of Keras models:
5
5
  - recurrent, convolutional, or a combination (i.e. RCNNs)
6
6
  - binary, multiclass single-label, multiclass multi-label, and regression
7
7
  - training with data augmentations: noise mixing, pitch and time stretch, etc.
8
8
 
9
- Sonus AI python functions are used by:
10
- - Aaware Inc. sonusai executable: Easily create train/validation data, run prediction, evaluate model performance
11
- - Keras model scripts: User python scripts for keras model creation, training, and prediction. These can use sonusai-specific data but also some general useful utilities for trainining rnn-based models like CRNN's, DSCRNN's, etc. in Keras
9
+ SonusAI python functions are used by:
10
+ - Aaware Inc. sonusai framework: Easily create train/validation data, run prediction, evaluate model performance
11
+ - Keras model scripts: User python scripts for Keras model creation, training, and prediction. These can use sonusai-specific data but also some general useful utilities for training rnn-based models like CRNN's, DSCRNN's, etc. in Keras.
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "sonusai"
3
- version = "0.15.8"
3
+ version = "0.16.0"
4
4
  description = "Framework for building deep neural network models for sound, speech, and voice AI"
5
5
  authors = ["Chris Eddington <chris@aaware.com>", "Jason Calderwood <jason@aaware.com>"]
6
6
  maintainers = ["Chris Eddington <chris@aaware.com>", "Jason Calderwood <jason@aaware.com>"]
@@ -15,53 +15,37 @@ aawscd_probwrite = 'sonusai.aawscd_probwrite:main'
15
15
 
16
16
  [tool.poetry.dependencies]
17
17
  PyYAML = "^6.0.1"
18
- aixplain = "^0.2.6"
19
- ctranslate2 = "4.1.0"
20
18
  dataclasses-json = "^0.6.1"
21
- deepgram-sdk = "^3.0.0"
22
19
  docopt = "^0.6.2"
23
- faster-whisper = "^1.0.1"
24
20
  h5py = "^3.11.0"
25
21
  jiwer = "^3.0.3"
26
- keras = "^3.1.1"
27
- keras-tuner = "^1.4.7"
28
22
  librosa = "^0.10.1"
29
- lightning = "~2.2"
30
23
  matplotlib = "^3.8.0"
31
24
  onnx = "^1.14.1"
32
- #onnxruntime-gpu = "^1.16.1"
33
25
  onnxruntime = "^1.16.1"
34
- #openai-whisper = "^20231117"
35
26
  paho-mqtt = "^2.0.0"
36
27
  pandas = "^2.1.1"
37
28
  pesq = "^0.0.4"
38
- pyaaware = "^1.5.3"
29
+ pyaaware = "^1.5.7"
30
+ pyaudio = "^0.2.14"
39
31
  pydub = "^0.25.1"
40
32
  pystoi = "^0.4.0"
41
33
  python = ">=3.9,<3.12"
42
- python-magic = "^0.4.27"
43
34
  requests = "^2.31.0"
44
35
  samplerate = "^0.2.1"
45
36
  soundfile = "^0.12.1"
46
- speechrecognition = "^3.10.1"
47
37
  sox = "^1.4.1"
48
- tensorflow = "^2.15.0"
49
- tf2onnx = "^1.15.1"
50
38
  torch = "~2.2"
51
39
  torchaudio = "~2.2"
52
- torchinfo = "^1.8.0"
53
40
  tqdm = "^4.66.1"
54
41
 
55
42
  [tool.poetry.group.dev.dependencies]
56
43
  icecream = "^2.1.3"
57
- ipython = "^8.16.1"
58
- jupyter = "^1.0.0"
59
44
  mypy = "^1.6.0"
60
45
  mypy-extensions = "^1.0.0"
61
46
  pytest = "^8.1.1"
62
47
  types-pyyaml = "^6.0.12.12"
63
48
  types-requests = "^2.31.0.8"
64
- yappi = "^1.4.0"
65
49
 
66
50
  [tool.mypy]
67
51
  ignore_missing_imports = true
@@ -0,0 +1,86 @@
1
+ import logging
2
+ from importlib import metadata
3
+ from os.path import dirname
4
+
5
+ __version__ = metadata.version(__package__)
6
+ BASEDIR = dirname(__file__)
7
+
8
+ commands_doc = """
9
+ audiofe Audio front end
10
+ calc_metric_spenh Run speech enhancement and analysis
11
+ doc Documentation
12
+ genft Generate feature and truth data
13
+ genmix Generate mixture and truth data
14
+ genmixdb Generate a mixture database
15
+ gentcst Generate target configuration from a subdirectory tree
16
+ lsdb List information about a mixture database
17
+ mkmanifest Make ASR manifest JSON file
18
+ mkwav Make WAV files from a mixture database
19
+ onnx_predict Run ONNX predict on a trained model
20
+ plot Plot mixture data
21
+ post_spenh_targetf Run post-processing for speech enhancement targetf data
22
+ tplot Plot truth data
23
+ vars List custom SonusAI variables
24
+ """
25
+
26
+ # create logger
27
+ logger = logging.getLogger('sonusai')
28
+ logger.setLevel(logging.DEBUG)
29
+ formatter = logging.Formatter('%(message)s')
30
+ console_handler = logging.StreamHandler()
31
+ console_handler.setLevel(logging.DEBUG)
32
+ console_handler.setFormatter(formatter)
33
+ logger.addHandler(console_handler)
34
+
35
+
36
+ class SonusAIError(Exception):
37
+ def __init__(self, value):
38
+ logger.error(value)
39
+
40
+
41
+ # create file handler
42
+ def create_file_handler(filename: str) -> None:
43
+ fh = logging.FileHandler(filename=filename, mode='w')
44
+ fh.setLevel(logging.DEBUG)
45
+ fh.setFormatter(formatter)
46
+ logger.addHandler(fh)
47
+
48
+
49
+ # update console handler
50
+ def update_console_handler(verbose: bool) -> None:
51
+ if not verbose:
52
+ logger.removeHandler(console_handler)
53
+ console_handler.setLevel(logging.INFO)
54
+ logger.addHandler(console_handler)
55
+
56
+
57
+ # write initial log message
58
+ def initial_log_messages(name: str, subprocess: str = None) -> None:
59
+ from datetime import datetime
60
+ from getpass import getuser
61
+ from os import getcwd
62
+ from socket import gethostname
63
+ from sys import argv
64
+
65
+ if subprocess is None:
66
+ logger.info(f'SonusAI {__version__}')
67
+ else:
68
+ logger.info(f'SonusAI {subprocess}')
69
+ logger.info(f'{name}')
70
+ logger.info('')
71
+ logger.debug(f'Host: {gethostname()}')
72
+ logger.debug(f'User: {getuser()}')
73
+ logger.debug(f'Directory: {getcwd()}')
74
+ logger.debug(f'Date: {datetime.now()}')
75
+ logger.debug(f'Command: {" ".join(argv)}')
76
+ logger.debug('')
77
+
78
+
79
+ def commands_list(doc: str = commands_doc) -> list[str]:
80
+ lines = doc.split('\n')
81
+ commands = []
82
+ for line in lines:
83
+ command = line.strip().split(' ').pop(0)
84
+ if command:
85
+ commands.append(command)
86
+ return commands
@@ -0,0 +1,237 @@
1
+ """sonusai audiofe
2
+
3
+ usage: audiofe [-hvds] [--version] [-i INPUT] [-l LENGTH] [-m MODEL] [-k CKPT] [-a ASR] [-w WMODEL]
4
+
5
+ options:
6
+ -h, --help
7
+ -v, --verbose Be verbose.
8
+ -d, --debug Write debug data to H5 file.
9
+ -s, --show Show a list of available audio inputs.
10
+ -i INPUT, --input INPUT Input audio.
11
+ -l LENGTH, --length LENGTH Length of audio in seconds. [default: -1].
12
+ -m MODEL, --model MODEL PL model .py file path.
13
+ -k CKPT, --checkpoint CKPT PL checkpoint file with weights.
14
+ -a ASR, --asr ASR ASR method to use.
15
+ -w WMODEL, --whisper WMODEL Whisper model used in aixplain_whisper and whisper methods. [default: tiny].
16
+
17
+ Aaware SonusAI Audio Front End.
18
+
19
+ Capture LENGTH seconds of audio from INPUT. If LENGTH is < 0, then capture until key is pressed. If INPUT is a valid
20
+ audio file name, then use the audio data from the specified file. In this case, if LENGTH is < 0, process entire file;
21
+ otherwise, process min(length(INPUT), LENGTH) seconds of audio from INPUT. Audio is saved to
22
+ audiofe_capture_<TIMESTAMP>.wav.
23
+
24
+ If a model is specified, run prediction on audio data from this model. Then compute the inverse transform of the
25
+ prediction result and save to audiofe_predict_<TIMESTAMP>.wav.
26
+
27
+ If an ASR is specified, run ASR on the captured audio and print the results. In addition, if a model was also specified,
28
+ run ASR on the predict audio and print the results.
29
+
30
+ If the debug option is enabled, write capture audio, feature, reconstruct audio, predict, and predict audio to
31
+ audiofe_<TIMESTAMP>.h5.
32
+
33
+ """
34
+ from os.path import exists
35
+ from select import select
36
+ from sys import stdin
37
+
38
+ import h5py
39
+ import numpy as np
40
+ import pyaudio
41
+ import torch
42
+ from docopt import docopt
43
+ from docopt import printable_usage
44
+
45
+ import sonusai
46
+ from sonusai import create_file_handler
47
+ from sonusai import initial_log_messages
48
+ from sonusai import logger
49
+ from sonusai import update_console_handler
50
+ from sonusai.mixture import AudioT
51
+ from sonusai.mixture import CHANNEL_COUNT
52
+ from sonusai.mixture import SAMPLE_RATE
53
+ from sonusai.mixture import get_audio_from_feature
54
+ from sonusai.mixture import get_feature_from_audio
55
+ from sonusai.mixture import read_audio
56
+ from sonusai.utils import calc_asr
57
+ from sonusai.utils import create_timestamp
58
+ from sonusai.utils import get_input_device_index_by_name
59
+ from sonusai.utils import get_input_devices
60
+ from sonusai.utils import load_torchl_ckpt_model
61
+ from sonusai.utils import trim_docstring
62
+ from sonusai.utils import write_wav
63
+
64
+
65
+ def main() -> None:
66
+ args = docopt(trim_docstring(__doc__), version=sonusai.__version__, options_first=True)
67
+ ts = create_timestamp()
68
+
69
+ verbose = args['--verbose']
70
+ length = float(args['--length'])
71
+ input_name = args['--input']
72
+ model_name = args['--model']
73
+ ckpt_name = args['--checkpoint']
74
+ asr_name = args['--asr']
75
+ whisper_name = args['--whisper']
76
+ debug = args['--debug']
77
+ show = args['--show']
78
+
79
+ capture_name = f'audiofe_capture_{ts}.wav'
80
+ predict_name = f'audiofe_predict_{ts}.wav'
81
+ h5_name = f'audiofe_{ts}.h5'
82
+
83
+ if model_name is not None and ckpt_name is None:
84
+ print(printable_usage(trim_docstring(__doc__)))
85
+ exit(1)
86
+
87
+ # Setup logging file
88
+ create_file_handler('audiofe.log')
89
+ update_console_handler(verbose)
90
+ initial_log_messages('audiofe')
91
+
92
+ if show:
93
+ logger.info('List of available audio inputs:')
94
+ logger.info('')
95
+ p = pyaudio.PyAudio()
96
+ for name in get_input_devices(p):
97
+ logger.info(f'{name}')
98
+ logger.info('')
99
+ p.terminate()
100
+ return
101
+
102
+ if input_name is not None and exists(input_name):
103
+ capture_audio = get_frames_from_file(input_name, length)
104
+ else:
105
+ try:
106
+ capture_audio = get_frames_from_device(input_name, length)
107
+ except ValueError as e:
108
+ logger.exception(e)
109
+ return
110
+
111
+ write_wav(capture_name, capture_audio, SAMPLE_RATE)
112
+ logger.info('')
113
+ logger.info(f'Wrote capture audio with shape {capture_audio.shape} to {capture_name}')
114
+ if debug:
115
+ with h5py.File(h5_name, 'a') as f:
116
+ if 'capture_audio' in f:
117
+ del f['capture_audio']
118
+ f.create_dataset('capture_audio', data=capture_audio)
119
+ logger.info(f'Wrote capture audio with shape {capture_audio.shape} to {h5_name}')
120
+
121
+ if asr_name is not None:
122
+ capture_asr = calc_asr(capture_audio, engine=asr_name, whisper_model_name=whisper_name).text
123
+ logger.info(f'Capture audio ASR: {capture_asr}')
124
+
125
+ if model_name is not None:
126
+ model = load_torchl_ckpt_model(model_name=model_name, ckpt_name=ckpt_name)
127
+ model.eval()
128
+
129
+ feature = get_feature_from_audio(audio=capture_audio, feature_mode=model.hparams.feature)
130
+ if debug:
131
+ with h5py.File(h5_name, 'a') as f:
132
+ if 'feature' in f:
133
+ del f['feature']
134
+ f.create_dataset('feature', data=feature)
135
+ logger.info(f'Wrote feature with shape {feature.shape} to {h5_name}')
136
+
137
+ # if debug:
138
+ # reconstruct_name = f'audiofe_reconstruct_{ts}.wav'
139
+ # reconstruct_audio = get_audio_from_feature(feature=feature, feature_mode=model.hparams.feature)
140
+ # samples = min(len(capture_audio), len(reconstruct_audio))
141
+ # max_err = np.max(np.abs(capture_audio[:samples] - reconstruct_audio[:samples]))
142
+ # logger.info(f'Maximum error between capture and reconstruct: {max_err}')
143
+ # write_wav(reconstruct_name, reconstruct_audio, SAMPLE_RATE)
144
+ # logger.info(f'Wrote reconstruct audio with shape {reconstruct_audio.shape} to {reconstruct_name}')
145
+ # with h5py.File(h5_name, 'a') as f:
146
+ # if 'reconstruct_audio' in f:
147
+ # del f['reconstruct_audio']
148
+ # f.create_dataset('reconstruct_audio', data=reconstruct_audio)
149
+ # logger.info(f'Wrote reconstruct audio with shape {reconstruct_audio.shape} to {h5_name}')
150
+
151
+ with torch.no_grad():
152
+ # model wants batch x timesteps x feature_parameters
153
+ predict = model(torch.tensor(feature).permute((1, 0, 2))).permute(1, 0, 2).numpy()
154
+ if debug:
155
+ with h5py.File(h5_name, 'a') as f:
156
+ if 'predict' in f:
157
+ del f['predict']
158
+ f.create_dataset('predict', data=predict)
159
+ logger.info(f'Wrote predict with shape {predict.shape} to {h5_name}')
160
+
161
+ predict_audio = get_audio_from_feature(feature=predict, feature_mode=model.hparams.feature)
162
+ write_wav(predict_name, predict_audio, SAMPLE_RATE)
163
+ logger.info(f'Wrote predict audio with shape {predict_audio.shape} to {predict_name}')
164
+ if debug:
165
+ with h5py.File(h5_name, 'a') as f:
166
+ if 'predict_audio' in f:
167
+ del f['predict_audio']
168
+ f.create_dataset('predict_audio', data=predict_audio)
169
+ logger.info(f'Wrote predict audio with shape {predict_audio.shape} to {h5_name}')
170
+
171
+ if asr_name is not None:
172
+ predict_asr = calc_asr(predict_audio, engine=asr_name, whisper_model_name=whisper_name).text
173
+ logger.info(f'Predict audio ASR: {predict_asr}')
174
+
175
+
176
+ def get_frames_from_device(input_name: str | None, length: float, chunk: int = 1024) -> AudioT:
177
+ p = pyaudio.PyAudio()
178
+
179
+ input_devices = get_input_devices(p)
180
+ if not input_devices:
181
+ raise ValueError('No input audio devices found')
182
+
183
+ if input_name is None:
184
+ input_name = input_devices[0]
185
+
186
+ try:
187
+ device_index = get_input_device_index_by_name(p, input_name)
188
+ except ValueError:
189
+ msg = f'Could not find {input_name}\n'
190
+ msg += f'Available devices:\n'
191
+ for input_device in input_devices:
192
+ msg += f' {input_device}\n'
193
+ raise ValueError(msg)
194
+
195
+ logger.info(f'Capturing from {p.get_device_info_by_index(device_index).get("name")}')
196
+ stream = p.open(format=pyaudio.paFloat32,
197
+ channels=CHANNEL_COUNT,
198
+ rate=SAMPLE_RATE,
199
+ input=True,
200
+ input_device_index=device_index)
201
+ stream.start_stream()
202
+
203
+ print()
204
+ print('+---------------------------------+')
205
+ print('| Press Enter to stop |')
206
+ print('+---------------------------------+')
207
+ print()
208
+
209
+ elapsed = 0.0
210
+ seconds_per_chunk = float(chunk) / float(SAMPLE_RATE)
211
+ raw_frames = []
212
+ while elapsed < length or length == -1:
213
+ raw_frames.append(stream.read(num_frames=chunk, exception_on_overflow=False))
214
+ elapsed += seconds_per_chunk
215
+ if select([stdin, ], [], [], 0)[0]:
216
+ stdin.read(1)
217
+ length = elapsed
218
+
219
+ stream.stop_stream()
220
+ stream.close()
221
+ p.terminate()
222
+ frames = np.frombuffer(b''.join(raw_frames), dtype=np.float32)
223
+ return frames
224
+
225
+
226
+ def get_frames_from_file(input_name: str, length: float) -> AudioT:
227
+ logger.info(f'Capturing from {input_name}')
228
+ frames = read_audio(input_name)
229
+ if length != -1:
230
+ num_frames = int(length * SAMPLE_RATE)
231
+ if len(frames) > num_frames:
232
+ frames = frames[:num_frames]
233
+ return frames
234
+
235
+
236
+ if __name__ == '__main__':
237
+ main()
@@ -758,13 +758,18 @@ def _process_mixture(mixid: int) -> tuple[pd.DataFrame, pd.DataFrame]:
758
758
  predict = stack_complex(predict)
759
759
 
760
760
  # 2) Collect true target, noise, mixture data, trim to predict size if needed
761
- target = mixdb.mixture_target(mixid)
762
- target_f = mixdb.mixture_target_f(mixid, target=target)
763
- noise = mixdb.mixture_noise(mixid)
764
- noise_f = mixdb.mixture_noise_f(mixid, noise=noise)
765
- mixture = mixdb.mixture_mixture(mixid, target=target, noise=noise)
761
+ tmp = mixdb.mixture_targets(mixid) # targets is list of pre-IR and pre-specaugment targets
762
+ target_f = mixdb.mixture_targets_f(mixid, targets=tmp)[0]
763
+ target = tmp[0]
764
+ mixture = mixdb.mixture_mixture(mixid) # note: gives full reverberated/distorted target, but no specaugment
765
+ # noise_wodist = mixdb.mixture_noise(mixid) # noise without specaugment and distortion
766
+ # noise_wodist_f = mixdb.mixture_noise_f(mixid, noise=noise_wodist)
767
+ noise = mixture - target # has time-domain distortion (ir,etc.) but does not have specaugment
768
+ # noise_f = mixdb.mixture_noise_f(mixid, noise=noise)
769
+ segsnr_f = mixdb.mixture_segsnr(mixid, target=target, noise=noise) # note: uses pre-IR, pre-specaug audio
766
770
  mixture_f = mixdb.mixture_mixture_f(mixid, mixture=mixture)
767
- segsnr_f = mixdb.mixture_segsnr(mixid, target=target, noise=noise)
771
+ noise_f = mixture_f - target_f # true noise in freq domain includes specaugment and time-domain ir,distortions
772
+ # segsnr_f = mixdb.mixture_segsnr(mixid, target=target, noise=noise)
768
773
  segsnr_f[segsnr_f == inf] = 7.944e8 # 99db
769
774
  segsnr_f[segsnr_f == -inf] = 1.258e-10 # -99db
770
775
  # need to use inv-tf to match #samples & latency shift properties of predict inv tf
@@ -920,8 +925,9 @@ def _process_mixture(mixid: int) -> tuple[pd.DataFrame, pd.DataFrame]:
920
925
  'NLERR': lerr_n_frame,
921
926
  'SPD': phd_frame})
922
927
  metr2 = metr2.describe() # Use pandas stat function
923
- metr2['SSNR'][1:] = metr2['SSNR'][1:].apply(
924
- lambda x: 10 * np.log10(x + 1.01e-10)) # Change SSNR stats to dB, except count
928
+ # Change SSNR stats to dB, except count. SSNR is index 0, pandas requires using iloc
929
+ # metr2['SSNR'][1:] = metr2['SSNR'][1:].apply(lambda x: 10 * np.log10(x + 1.01e-10))
930
+ metr2.iloc[1:, 0] = metr2['SSNR'][1:].apply(lambda x: 10 * np.log10(x + 1.01e-10))
925
931
  # create a single row in multi-column header
926
932
  new_labels = pd.MultiIndex.from_product([metr2.columns,
927
933
  ['Avg', 'Min', 'Med', 'Max', 'Std']],
@@ -1166,7 +1172,7 @@ def main():
1166
1172
  # Individual mixtures use pandas print, set precision to 2 decimal places
1167
1173
  # pd.set_option('float_format', '{:.2f}'.format)
1168
1174
  progress = tqdm(total=len(mixids), desc='calc_metric_spenh')
1169
- all_metrics_tables = pp_tqdm_imap(_process_mixture, mixids, progress=progress, num_cpus=None)
1175
+ all_metrics_tables = pp_tqdm_imap(_process_mixture, mixids, progress=progress, num_cpus=8)
1170
1176
  progress.close()
1171
1177
 
1172
1178
  all_metrics_table_1 = pd.concat([item[0] for item in all_metrics_tables])
@@ -1192,6 +1198,7 @@ def main():
1192
1198
  if ~np.isnan(tmp.iloc[0].to_numpy()[0]).any():
1193
1199
  mtab_snr_summary_em = pd.concat([mtab_snr_summary_em, tmp])
1194
1200
 
1201
+ mtab_snr_summary = mtab_snr_summary.sort_values(by=['MXSNR'], ascending=False)
1195
1202
  # Correct percentages in snr summary table
1196
1203
  mtab_snr_summary['PESQi%'] = 100 * (mtab_snr_summary['PESQ'] - mtab_snr_summary['MXPESQ']) / np.maximum(
1197
1204
  mtab_snr_summary['MXPESQ'], 0.01)
@@ -1202,9 +1209,11 @@ def main():
1202
1209
  else:
1203
1210
  mtab_snr_summary['WERi%'].iloc[i] = -999.0
1204
1211
  else:
1205
- mtab_snr_summary['WERi%'].iloc[i] = 100 * (mtab_snr_summary['MXWER'].iloc[i] -
1206
- mtab_snr_summary['WER'].iloc[i]) / \
1207
- mtab_snr_summary['MXWER'].iloc[i]
1212
+ if ~np.isnan(mtab_snr_summary['WER'].iloc[i]) and ~np.isnan(mtab_snr_summary['MXWER'].iloc[i]):
1213
+ # update WERi% in 6th col
1214
+ mtab_snr_summary.iloc[i, 6] = 100 * (mtab_snr_summary['MXWER'].iloc[i] -
1215
+ mtab_snr_summary['WER'].iloc[i]) / \
1216
+ mtab_snr_summary['MXWER'].iloc[i]
1208
1217
 
1209
1218
  # Calculate avg metrics over all mixtures except -99
1210
1219
  all_mtab1_sorted_nom99 = all_mtab1_sorted[all_mtab1_sorted.MXSNR != -99]
@@ -165,7 +165,8 @@ def main() -> None:
165
165
  logger.info(f'Wrote {len(mixids)} mixtures to {location}')
166
166
  logger.info('')
167
167
  logger.info(f'Duration: {seconds_to_hms(seconds=duration)}')
168
- logger.info(f'feature: {human_readable_size(total_feature_frames * mixdb.fg_stride * mixdb.fg_num_bands * 4, 1)}')
168
+ logger.info(
169
+ f'feature: {human_readable_size(total_feature_frames * mixdb.fg_stride * mixdb.feature_parameters * 4, 1)}')
169
170
  logger.info(f'truth_f: {human_readable_size(total_feature_frames * mixdb.num_classes * 4, 1)}')
170
171
  if compute_segsnr:
171
172
  logger.info(f'segsnr: {human_readable_size(total_transform_frames * 4, 1)}')
@@ -225,7 +225,7 @@ def genmixdb(location: str,
225
225
  if logging:
226
226
  logger.info('Collecting impulse responses')
227
227
 
228
- impulse_response_files = get_impulse_response_files(config, show_progress=show_progress)
228
+ impulse_response_files = get_impulse_response_files(config)
229
229
 
230
230
  populate_impulse_response_file_table(location, impulse_response_files, test)
231
231
 
@@ -337,12 +337,12 @@ def genmixdb(location: str,
337
337
  log_duration_and_sizes(total_duration=total_duration,
338
338
  num_classes=mixdb.num_classes,
339
339
  feature_step_samples=mixdb.feature_step_samples,
340
- num_bands=mixdb.fg_num_bands,
340
+ feature_parameters=mixdb.feature_parameters,
341
341
  stride=mixdb.fg_stride,
342
342
  desc='Estimated')
343
343
  logger.info(f'Feature shape: '
344
- f'{mixdb.fg_stride} x {mixdb.fg_num_bands} '
345
- f'({mixdb.fg_stride * mixdb.fg_num_bands} total params)')
344
+ f'{mixdb.fg_stride} x {mixdb.feature_parameters} '
345
+ f'({mixdb.fg_stride * mixdb.feature_parameters} total params)')
346
346
  logger.info(f'Feature samples: {mixdb.feature_samples} samples ({mixdb.feature_ms} ms)')
347
347
  logger.info(f'Feature step samples: {mixdb.feature_step_samples} samples ({mixdb.feature_step_ms} ms)')
348
348
  logger.info('')
@@ -371,7 +371,7 @@ def genmixdb(location: str,
371
371
  log_duration_and_sizes(total_duration=total_duration,
372
372
  num_classes=mixdb.num_classes,
373
373
  feature_step_samples=mixdb.feature_step_samples,
374
- num_bands=mixdb.fg_num_bands,
374
+ feature_parameters=mixdb.feature_parameters,
375
375
  stride=mixdb.fg_stride,
376
376
  desc='Actual')
377
377
  logger.info('')
@@ -48,8 +48,8 @@ def lsdb(mixdb: MixtureDatabase,
48
48
  logger.info(f'{"Targets":{desc_len}} {mixdb.num_target_files}')
49
49
  logger.info(f'{"Noises":{desc_len}} {mixdb.num_noise_files}')
50
50
  logger.info(f'{"Feature":{desc_len}} {mixdb.feature}')
51
- logger.info(f'{"Feature shape":{desc_len}} {mixdb.fg_stride} x {mixdb.fg_num_bands} '
52
- f'({mixdb.fg_stride * mixdb.fg_num_bands} total params)')
51
+ logger.info(f'{"Feature shape":{desc_len}} {mixdb.fg_stride} x {mixdb.feature_parameters} '
52
+ f'({mixdb.fg_stride * mixdb.feature_parameters} total params)')
53
53
  logger.info(f'{"Feature samples":{desc_len}} {mixdb.feature_samples} samples ({mixdb.feature_ms} ms)')
54
54
  logger.info(f'{"Feature step samples":{desc_len}} {mixdb.feature_step_samples} samples '
55
55
  f'({mixdb.feature_step_ms} ms)')