sonusai 0.15.9__tar.gz → 0.16.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (135) hide show
  1. {sonusai-0.15.9 → sonusai-0.16.1}/PKG-INFO +7 -25
  2. {sonusai-0.15.9 → sonusai-0.16.1}/README.rst +5 -5
  3. {sonusai-0.15.9 → sonusai-0.16.1}/pyproject.toml +5 -25
  4. sonusai-0.16.1/sonusai/__init__.py +87 -0
  5. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/audiofe.py +111 -106
  6. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/calc_metric_spenh.py +38 -22
  7. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/genft.py +15 -6
  8. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/genmix.py +14 -6
  9. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/genmixdb.py +15 -7
  10. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/gentcst.py +13 -6
  11. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/lsdb.py +15 -5
  12. sonusai-0.16.1/sonusai/main.py +90 -0
  13. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/mixture/__init__.py +1 -0
  14. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/mixture/config.py +1 -2
  15. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/mkmanifest.py +43 -8
  16. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/mkwav.py +15 -6
  17. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/onnx_predict.py +16 -6
  18. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/plot.py +16 -6
  19. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/post_spenh_targetf.py +13 -6
  20. sonusai-0.16.1/sonusai/summarize_metric_spenh.py +71 -0
  21. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/tplot.py +14 -6
  22. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/utils/__init__.py +4 -7
  23. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/utils/asl_p56.py +3 -3
  24. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/utils/asr.py +35 -8
  25. sonusai-0.16.1/sonusai/utils/asr_functions/__init__.py +1 -0
  26. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/utils/asr_functions/aaware_whisper.py +2 -2
  27. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/utils/asr_manifest_functions/__init__.py +1 -0
  28. sonusai-0.16.1/sonusai/utils/asr_manifest_functions/mcgill_speech.py +29 -0
  29. sonusai-0.15.9/sonusai/utils/trim_docstring.py → sonusai-0.16.1/sonusai/utils/docstring.py +20 -0
  30. sonusai-0.16.1/sonusai/utils/model_utils.py +30 -0
  31. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/utils/onnx_utils.py +19 -45
  32. sonusai-0.15.9/sonusai/__init__.py +0 -55
  33. sonusai-0.15.9/sonusai/data_generator/__init__.py +0 -5
  34. sonusai-0.15.9/sonusai/data_generator/dataset_from_mixdb.py +0 -143
  35. sonusai-0.15.9/sonusai/data_generator/keras_from_mixdb.py +0 -169
  36. sonusai-0.15.9/sonusai/data_generator/torch_from_mixdb.py +0 -122
  37. sonusai-0.15.9/sonusai/keras_onnx.py +0 -86
  38. sonusai-0.15.9/sonusai/keras_predict.py +0 -231
  39. sonusai-0.15.9/sonusai/keras_train.py +0 -334
  40. sonusai-0.15.9/sonusai/main.py +0 -93
  41. sonusai-0.15.9/sonusai/torchl_onnx.py +0 -216
  42. sonusai-0.15.9/sonusai/torchl_predict.py +0 -542
  43. sonusai-0.15.9/sonusai/torchl_train.py +0 -223
  44. sonusai-0.15.9/sonusai/utils/asr_functions/__init__.py +0 -6
  45. sonusai-0.15.9/sonusai/utils/asr_functions/aixplain_whisper.py +0 -59
  46. sonusai-0.15.9/sonusai/utils/asr_functions/data.py +0 -16
  47. sonusai-0.15.9/sonusai/utils/asr_functions/deepgram.py +0 -97
  48. sonusai-0.15.9/sonusai/utils/asr_functions/fastwhisper.py +0 -90
  49. sonusai-0.15.9/sonusai/utils/asr_functions/google.py +0 -95
  50. sonusai-0.15.9/sonusai/utils/asr_functions/whisper.py +0 -49
  51. sonusai-0.15.9/sonusai/utils/keras_utils.py +0 -226
  52. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/aawscd_probwrite.py +0 -0
  53. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/data/__init__.py +0 -0
  54. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/data/genmixdb.yml +0 -0
  55. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/data/speech_ma01_01.wav +0 -0
  56. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/data/whitenoise.wav +0 -0
  57. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/doc/__init__.py +0 -0
  58. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/doc/doc.py +0 -0
  59. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/doc.py +0 -0
  60. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/metrics/__init__.py +0 -0
  61. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/metrics/calc_class_weights.py +0 -0
  62. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/metrics/calc_optimal_thresholds.py +0 -0
  63. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/metrics/calc_pcm.py +0 -0
  64. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/metrics/calc_pesq.py +0 -0
  65. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/metrics/calc_sa_sdr.py +0 -0
  66. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/metrics/calc_sample_weights.py +0 -0
  67. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/metrics/calc_wer.py +0 -0
  68. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/metrics/calc_wsdr.py +0 -0
  69. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/metrics/class_summary.py +0 -0
  70. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/metrics/confusion_matrix_summary.py +0 -0
  71. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/metrics/one_hot.py +0 -0
  72. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/metrics/snr_summary.py +0 -0
  73. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/mixture/audio.py +0 -0
  74. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/mixture/augmentation.py +0 -0
  75. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/mixture/class_count.py +0 -0
  76. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/mixture/constants.py +0 -0
  77. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/mixture/datatypes.py +0 -0
  78. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/mixture/eq_rule_is_valid.py +0 -0
  79. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/mixture/feature.py +0 -0
  80. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/mixture/generation.py +0 -0
  81. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/mixture/helpers.py +0 -0
  82. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/mixture/log_duration_and_sizes.py +0 -0
  83. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/mixture/mapped_snr_f.py +0 -0
  84. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/mixture/mixdb.py +0 -0
  85. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/mixture/soundfile_audio.py +0 -0
  86. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/mixture/sox_audio.py +0 -0
  87. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/mixture/sox_augmentation.py +0 -0
  88. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/mixture/spectral_mask.py +0 -0
  89. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/mixture/target_class_balancing.py +0 -0
  90. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/mixture/targets.py +0 -0
  91. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/mixture/tokenized_shell_vars.py +0 -0
  92. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/mixture/torchaudio_audio.py +0 -0
  93. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/mixture/torchaudio_augmentation.py +0 -0
  94. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/mixture/truth.py +0 -0
  95. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/mixture/truth_functions/__init__.py +0 -0
  96. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/mixture/truth_functions/crm.py +0 -0
  97. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/mixture/truth_functions/data.py +0 -0
  98. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/mixture/truth_functions/energy.py +0 -0
  99. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/mixture/truth_functions/file.py +0 -0
  100. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/mixture/truth_functions/phoneme.py +0 -0
  101. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/mixture/truth_functions/sed.py +0 -0
  102. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/mixture/truth_functions/target.py +0 -0
  103. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/queries/__init__.py +0 -0
  104. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/queries/queries.py +0 -0
  105. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/utils/asr_manifest_functions/data.py +0 -0
  106. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/utils/asr_manifest_functions/librispeech.py +0 -0
  107. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/utils/asr_manifest_functions/vctk_noisy_speech.py +0 -0
  108. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/utils/audio_devices.py +0 -0
  109. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/utils/braced_glob.py +0 -0
  110. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/utils/calculate_input_shape.py +0 -0
  111. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/utils/convert_string_to_number.py +0 -0
  112. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/utils/create_timestamp.py +0 -0
  113. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/utils/create_ts_name.py +0 -0
  114. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/utils/dataclass_from_dict.py +0 -0
  115. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/utils/db.py +0 -0
  116. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/utils/energy_f.py +0 -0
  117. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/utils/engineering_number.py +0 -0
  118. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/utils/get_frames_per_batch.py +0 -0
  119. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/utils/get_label_names.py +0 -0
  120. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/utils/grouper.py +0 -0
  121. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/utils/human_readable_size.py +0 -0
  122. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/utils/max_text_width.py +0 -0
  123. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/utils/numeric_conversion.py +0 -0
  124. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/utils/parallel.py +0 -0
  125. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/utils/print_mixture_details.py +0 -0
  126. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/utils/ranges.py +0 -0
  127. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/utils/read_mixture_data.py +0 -0
  128. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/utils/read_predict_data.py +0 -0
  129. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/utils/reshape.py +0 -0
  130. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/utils/seconds_to_hms.py +0 -0
  131. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/utils/stacked_complex.py +0 -0
  132. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/utils/stratified_shuffle_split.py +0 -0
  133. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/utils/wave.py +0 -0
  134. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/utils/yes_or_no.py +0 -0
  135. {sonusai-0.15.9 → sonusai-0.16.1}/sonusai/vars.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sonusai
3
- Version: 0.15.9
3
+ Version: 0.16.1
4
4
  Summary: Framework for building deep neural network models for sound, speech, and voice AI
5
5
  Home-page: https://aaware.com
6
6
  License: GPL-3.0-only
@@ -15,57 +15,39 @@ Classifier: Programming Language :: Python :: 3.9
15
15
  Classifier: Programming Language :: Python :: 3.10
16
16
  Classifier: Programming Language :: Python :: 3.11
17
17
  Requires-Dist: PyYAML (>=6.0.1,<7.0.0)
18
- Requires-Dist: aixplain (>=0.2.6,<0.3.0)
19
- Requires-Dist: bitarray (>=2.9.2,<3.0.0)
20
- Requires-Dist: ctranslate2 (==4.1.0)
21
18
  Requires-Dist: dataclasses-json (>=0.6.1,<0.7.0)
22
- Requires-Dist: deepgram-sdk (>=3.0.0,<4.0.0)
23
19
  Requires-Dist: docopt (>=0.6.2,<0.7.0)
24
- Requires-Dist: einops (>=0.7.0,<0.8.0)
25
- Requires-Dist: faster-whisper (>=1.0.1,<2.0.0)
26
- Requires-Dist: geomloss (>=0.2.6,<0.3.0)
27
20
  Requires-Dist: h5py (>=3.11.0,<4.0.0)
28
- Requires-Dist: hydra-core (>=1.3.2,<2.0.0)
29
21
  Requires-Dist: jiwer (>=3.0.3,<4.0.0)
30
- Requires-Dist: keras (>=3.1.1,<4.0.0)
31
- Requires-Dist: keras-tuner (>=1.4.7,<2.0.0)
32
22
  Requires-Dist: librosa (>=0.10.1,<0.11.0)
33
- Requires-Dist: lightning (>=2.2,<2.3)
34
23
  Requires-Dist: matplotlib (>=3.8.0,<4.0.0)
35
- Requires-Dist: omegaconf (>=2.3.0,<3.0.0)
36
24
  Requires-Dist: onnx (>=1.14.1,<2.0.0)
37
25
  Requires-Dist: onnxruntime (>=1.16.1,<2.0.0)
38
26
  Requires-Dist: paho-mqtt (>=2.0.0,<3.0.0)
39
27
  Requires-Dist: pandas (>=2.1.1,<3.0.0)
40
28
  Requires-Dist: pesq (>=0.0.4,<0.0.5)
41
- Requires-Dist: pyaaware (>=1.5.3,<2.0.0)
29
+ Requires-Dist: pyaaware (>=1.5.7,<2.0.0)
42
30
  Requires-Dist: pyaudio (>=0.2.14,<0.3.0)
43
31
  Requires-Dist: pydub (>=0.25.1,<0.26.0)
44
32
  Requires-Dist: pystoi (>=0.4.0,<0.5.0)
45
- Requires-Dist: python-magic (>=0.4.27,<0.5.0)
46
33
  Requires-Dist: requests (>=2.31.0,<3.0.0)
47
- Requires-Dist: sacrebleu (>=2.4.2,<3.0.0)
48
34
  Requires-Dist: samplerate (>=0.2.1,<0.3.0)
49
35
  Requires-Dist: soundfile (>=0.12.1,<0.13.0)
50
36
  Requires-Dist: sox (>=1.4.1,<2.0.0)
51
- Requires-Dist: speechrecognition (>=3.10.1,<4.0.0)
52
- Requires-Dist: tensorflow (>=2.15.0,<3.0.0)
53
- Requires-Dist: tf2onnx (>=1.15.1,<2.0.0)
54
37
  Requires-Dist: torch (>=2.2,<2.3)
55
38
  Requires-Dist: torchaudio (>=2.2,<2.3)
56
- Requires-Dist: torchinfo (>=1.8.0,<2.0.0)
57
39
  Requires-Dist: tqdm (>=4.66.1,<5.0.0)
58
40
  Description-Content-Type: text/x-rst
59
41
 
60
- Sonus AI: Framework for simplified creation of deep NN models for sound, speech, and voice AI
42
+ SonusAI: Framework for simplified creation of deep NN models for sound, speech, and voice AI
61
43
 
62
- Sonus AI includes functions for pre-processing training and validation data and
44
+ SonusAI includes functions for pre-processing training and validation data and
63
45
  creating performance metrics reports for key types of Keras models:
64
46
  - recurrent, convolutional, or a combination (i.e. RCNNs)
65
47
  - binary, multiclass single-label, multiclass multi-label, and regression
66
48
  - training with data augmentations: noise mixing, pitch and time stretch, etc.
67
49
 
68
- Sonus AI python functions are used by:
69
- - Aaware Inc. sonusai executable: Easily create train/validation data, run prediction, evaluate model performance
70
- - Keras model scripts: User python scripts for keras model creation, training, and prediction. These can use sonusai-specific data but also some general useful utilities for trainining rnn-based models like CRNN's, DSCRNN's, etc. in Keras
50
+ SonusAI python functions are used by:
51
+ - Aaware Inc. sonusai framework: Easily create train/validation data, run prediction, evaluate model performance
52
+ - Keras model scripts: User python scripts for Keras model creation, training, and prediction. These can use sonusai-specific data but also some general useful utilities for training rnn-based models like CRNN's, DSCRNN's, etc. in Keras.
71
53
 
@@ -1,11 +1,11 @@
1
- Sonus AI: Framework for simplified creation of deep NN models for sound, speech, and voice AI
1
+ SonusAI: Framework for simplified creation of deep NN models for sound, speech, and voice AI
2
2
 
3
- Sonus AI includes functions for pre-processing training and validation data and
3
+ SonusAI includes functions for pre-processing training and validation data and
4
4
  creating performance metrics reports for key types of Keras models:
5
5
  - recurrent, convolutional, or a combination (i.e. RCNNs)
6
6
  - binary, multiclass single-label, multiclass multi-label, and regression
7
7
  - training with data augmentations: noise mixing, pitch and time stretch, etc.
8
8
 
9
- Sonus AI python functions are used by:
10
- - Aaware Inc. sonusai executable: Easily create train/validation data, run prediction, evaluate model performance
11
- - Keras model scripts: User python scripts for keras model creation, training, and prediction. These can use sonusai-specific data but also some general useful utilities for trainining rnn-based models like CRNN's, DSCRNN's, etc. in Keras
9
+ SonusAI python functions are used by:
10
+ - Aaware Inc. sonusai framework: Easily create train/validation data, run prediction, evaluate model performance
11
+ - Keras model scripts: User python scripts for Keras model creation, training, and prediction. These can use sonusai-specific data but also some general useful utilities for training rnn-based models like CRNN's, DSCRNN's, etc. in Keras.
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "sonusai"
3
- version = "0.15.9"
3
+ version = "0.16.1"
4
4
  description = "Framework for building deep neural network models for sound, speech, and voice AI"
5
5
  authors = ["Chris Eddington <chris@aaware.com>", "Jason Calderwood <jason@aaware.com>"]
6
6
  maintainers = ["Chris Eddington <chris@aaware.com>", "Jason Calderwood <jason@aaware.com>"]
@@ -15,60 +15,40 @@ aawscd_probwrite = 'sonusai.aawscd_probwrite:main'
15
15
 
16
16
  [tool.poetry.dependencies]
17
17
  PyYAML = "^6.0.1"
18
- aixplain = "^0.2.6"
19
- bitarray = "^2.9.2"
20
- ctranslate2 = "4.1.0"
21
18
  dataclasses-json = "^0.6.1"
22
- deepgram-sdk = "^3.0.0"
23
19
  docopt = "^0.6.2"
24
- einops = "^0.7.0"
25
- faster-whisper = "^1.0.1"
26
- geomloss = "^0.2.6"
27
20
  h5py = "^3.11.0"
28
- hydra-core = "^1.3.2"
29
21
  jiwer = "^3.0.3"
30
- keras = "^3.1.1"
31
- keras-tuner = "^1.4.7"
32
22
  librosa = "^0.10.1"
33
- lightning = "~2.2"
34
23
  matplotlib = "^3.8.0"
35
- omegaconf = "^2.3.0"
36
24
  onnx = "^1.14.1"
37
- #onnxruntime-gpu = "^1.16.1"
38
25
  onnxruntime = "^1.16.1"
39
- #openai-whisper = "^20231117"
40
26
  paho-mqtt = "^2.0.0"
41
27
  pandas = "^2.1.1"
42
28
  pesq = "^0.0.4"
43
- pyaaware = "^1.5.3"
29
+ pyaaware = "^1.5.7"
44
30
  pyaudio = "^0.2.14"
45
31
  pydub = "^0.25.1"
46
32
  pystoi = "^0.4.0"
47
33
  python = ">=3.9,<3.12"
48
- python-magic = "^0.4.27"
49
34
  requests = "^2.31.0"
50
- sacrebleu = "^2.4.2"
51
35
  samplerate = "^0.2.1"
52
36
  soundfile = "^0.12.1"
53
- speechrecognition = "^3.10.1"
54
37
  sox = "^1.4.1"
55
- tensorflow = "^2.15.0"
56
- tf2onnx = "^1.15.1"
57
38
  torch = "~2.2"
58
39
  torchaudio = "~2.2"
59
- torchinfo = "^1.8.0"
60
40
  tqdm = "^4.66.1"
61
41
 
62
42
  [tool.poetry.group.dev.dependencies]
43
+ einops = "^0.8.0"
63
44
  icecream = "^2.1.3"
64
- ipython = "^8.16.1"
65
- jupyter = "^1.0.0"
66
45
  mypy = "^1.6.0"
67
46
  mypy-extensions = "^1.0.0"
68
47
  pytest = "^8.1.1"
48
+ sonusai-asr-cloud = "^0.1.0"
49
+ sonusai-torchl = "^0.1.0"
69
50
  types-pyyaml = "^6.0.12.12"
70
51
  types-requests = "^2.31.0.8"
71
- yappi = "^1.4.0"
72
52
 
73
53
  [tool.mypy]
74
54
  ignore_missing_imports = true
@@ -0,0 +1,87 @@
1
+ import logging
2
+ from importlib import metadata
3
+ from os.path import dirname
4
+
5
+ __version__ = metadata.version(__package__)
6
+ BASEDIR = dirname(__file__)
7
+
8
+ commands_doc = """
9
+ audiofe Audio front end
10
+ calc_metric_spenh Run speech enhancement and analysis
11
+ doc Documentation
12
+ genft Generate feature and truth data
13
+ genmix Generate mixture and truth data
14
+ genmixdb Generate a mixture database
15
+ gentcst Generate target configuration from a subdirectory tree
16
+ lsdb List information about a mixture database
17
+ mkmanifest Make ASR manifest JSON file
18
+ mkwav Make WAV files from a mixture database
19
+ onnx_predict Run ONNX predict on a trained model
20
+ plot Plot mixture data
21
+ post_spenh_targetf Run post-processing for speech enhancement targetf data
22
+ summarize_metric_spenh Summarize speech enhancement and analysis results
23
+ tplot Plot truth data
24
+ vars List custom SonusAI variables
25
+ """
26
+
27
+ # create logger
28
+ logger = logging.getLogger('sonusai')
29
+ logger.setLevel(logging.DEBUG)
30
+ formatter = logging.Formatter('%(message)s')
31
+ console_handler = logging.StreamHandler()
32
+ console_handler.setLevel(logging.DEBUG)
33
+ console_handler.setFormatter(formatter)
34
+ logger.addHandler(console_handler)
35
+
36
+
37
+ class SonusAIError(Exception):
38
+ def __init__(self, value):
39
+ logger.error(value)
40
+
41
+
42
+ # create file handler
43
+ def create_file_handler(filename: str) -> None:
44
+ fh = logging.FileHandler(filename=filename, mode='w')
45
+ fh.setLevel(logging.DEBUG)
46
+ fh.setFormatter(formatter)
47
+ logger.addHandler(fh)
48
+
49
+
50
+ # update console handler
51
+ def update_console_handler(verbose: bool) -> None:
52
+ if not verbose:
53
+ logger.removeHandler(console_handler)
54
+ console_handler.setLevel(logging.INFO)
55
+ logger.addHandler(console_handler)
56
+
57
+
58
+ # write initial log message
59
+ def initial_log_messages(name: str, subprocess: str = None) -> None:
60
+ from datetime import datetime
61
+ from getpass import getuser
62
+ from os import getcwd
63
+ from socket import gethostname
64
+ from sys import argv
65
+
66
+ if subprocess is None:
67
+ logger.info(f'SonusAI {__version__}')
68
+ else:
69
+ logger.info(f'SonusAI {subprocess}')
70
+ logger.info(f'{name}')
71
+ logger.info('')
72
+ logger.debug(f'Host: {gethostname()}')
73
+ logger.debug(f'User: {getuser()}')
74
+ logger.debug(f'Directory: {getcwd()}')
75
+ logger.debug(f'Date: {datetime.now()}')
76
+ logger.debug(f'Command: {" ".join(argv)}')
77
+ logger.debug('')
78
+
79
+
80
+ def commands_list(doc: str = commands_doc) -> list[str]:
81
+ lines = doc.split('\n')
82
+ commands = []
83
+ for line in lines:
84
+ command = line.strip().split(' ').pop(0)
85
+ if command:
86
+ commands.append(command)
87
+ return commands
@@ -24,6 +24,10 @@ audiofe_capture_<TIMESTAMP>.wav.
24
24
  If a model is specified, run prediction on audio data from this model. Then compute the inverse transform of the
25
25
  prediction result and save to audiofe_predict_<TIMESTAMP>.wav.
26
26
 
27
+ Also, if a model is specified, save plots of the capture data (time-domain signal and feature) to
28
+ audiofe_capture_<TIMESTAMP>.png and predict data (time-domain signal and feature) to
29
+ audiofe_predict_<TIMESTAMP>.png.
30
+
27
31
  If an ASR is specified, run ASR on the captured audio and print the results. In addition, if a model was also specified,
28
32
  run ASR on the predict audio and print the results.
29
33
 
@@ -31,41 +35,32 @@ If the debug option is enabled, write capture audio, feature, reconstruct audio,
31
35
  audiofe_<TIMESTAMP>.h5.
32
36
 
33
37
  """
34
- from os.path import exists
35
- from select import select
36
- from sys import stdin
37
- from typing import Any
38
+ import signal
38
39
 
39
- import h5py
40
40
  import numpy as np
41
- import pyaudio
42
- import torch
43
- from docopt import docopt
44
- from docopt import printable_usage
45
-
46
- import sonusai
47
- from sonusai import create_file_handler
48
- from sonusai import initial_log_messages
49
- from sonusai import logger
50
- from sonusai import update_console_handler
41
+
51
42
  from sonusai.mixture import AudioT
52
- from sonusai.mixture import CHANNEL_COUNT
53
- from sonusai.mixture import SAMPLE_RATE
54
- from sonusai.mixture import get_audio_from_feature
55
- from sonusai.mixture import get_feature_from_audio
56
- from sonusai.mixture import read_audio
57
- from sonusai.utils import calc_asr
58
- from sonusai.utils import create_timestamp
59
- from sonusai.utils import get_input_device_index_by_name
60
- from sonusai.utils import get_input_devices
61
- from sonusai.utils import import_keras_model
62
- from sonusai.utils import trim_docstring
63
- from sonusai.utils import write_wav
43
+
44
+
45
+ def signal_handler(_sig, _frame):
46
+ import sys
47
+
48
+ from sonusai import logger
49
+
50
+ logger.info('Canceled due to keyboard interrupt')
51
+ sys.exit(1)
52
+
53
+
54
+ signal.signal(signal.SIGINT, signal_handler)
64
55
 
65
56
 
66
57
  def main() -> None:
58
+ from docopt import docopt
59
+
60
+ import sonusai
61
+ from sonusai.utils import trim_docstring
62
+
67
63
  args = docopt(trim_docstring(__doc__), version=sonusai.__version__, options_first=True)
68
- ts = create_timestamp()
69
64
 
70
65
  verbose = args['--verbose']
71
66
  length = float(args['--length'])
@@ -77,8 +72,34 @@ def main() -> None:
77
72
  debug = args['--debug']
78
73
  show = args['--show']
79
74
 
80
- capture_name = f'audiofe_capture_{ts}.wav'
81
- predict_name = f'audiofe_predict_{ts}.wav'
75
+ from os.path import exists
76
+
77
+ import h5py
78
+ import pyaudio
79
+ import torch
80
+ from docopt import printable_usage
81
+ from sonusai_torchl.utils import load_torchl_ckpt_model
82
+
83
+ from sonusai import create_file_handler
84
+ from sonusai import initial_log_messages
85
+ from sonusai import logger
86
+ from sonusai import update_console_handler
87
+ from sonusai.mixture import SAMPLE_RATE
88
+ from sonusai.mixture import get_audio_from_feature
89
+ from sonusai.mixture import get_feature_from_audio
90
+ from sonusai.utils import calc_asr
91
+ from sonusai.utils import create_timestamp
92
+ from sonusai.utils import get_input_devices
93
+ from sonusai.utils import trim_docstring
94
+ from sonusai.utils import write_wav
95
+
96
+ ts = create_timestamp()
97
+ capture_name = f'audiofe_capture_{ts}'
98
+ capture_wav = capture_name + '.wav'
99
+ capture_png = capture_name + '.png'
100
+ predict_name = f'audiofe_predict_{ts}'
101
+ predict_wav = predict_name + '.wav'
102
+ predict_png = predict_name + '.png'
82
103
  h5_name = f'audiofe_{ts}.h5'
83
104
 
84
105
  if model_name is not None and ckpt_name is None:
@@ -109,9 +130,9 @@ def main() -> None:
109
130
  logger.exception(e)
110
131
  return
111
132
 
112
- write_wav(capture_name, capture_audio, SAMPLE_RATE)
133
+ write_wav(capture_wav, capture_audio, SAMPLE_RATE)
113
134
  logger.info('')
114
- logger.info(f'Wrote capture audio with shape {capture_audio.shape} to {capture_name}')
135
+ logger.info(f'Wrote capture audio with shape {capture_audio.shape} to {capture_wav}')
115
136
  if debug:
116
137
  with h5py.File(h5_name, 'a') as f:
117
138
  if 'capture_audio' in f:
@@ -124,9 +145,13 @@ def main() -> None:
124
145
  logger.info(f'Capture audio ASR: {capture_asr}')
125
146
 
126
147
  if model_name is not None:
127
- model = load_model(model_name=model_name, ckpt_name=ckpt_name)
148
+ model = load_torchl_ckpt_model(model_name=model_name, ckpt_name=ckpt_name)
149
+ model.eval()
128
150
 
129
151
  feature = get_feature_from_audio(audio=capture_audio, feature_mode=model.hparams.feature)
152
+ save_figure(capture_png, capture_audio, feature)
153
+ logger.info(f'Wrote capture plots to {capture_png}')
154
+
130
155
  if debug:
131
156
  with h5py.File(h5_name, 'a') as f:
132
157
  if 'feature' in f:
@@ -134,22 +159,9 @@ def main() -> None:
134
159
  f.create_dataset('feature', data=feature)
135
160
  logger.info(f'Wrote feature with shape {feature.shape} to {h5_name}')
136
161
 
137
- # if debug:
138
- # reconstruct_name = f'audiofe_reconstruct_{ts}.wav'
139
- # reconstruct_audio = get_audio_from_feature(feature=feature, feature_mode=model.hparams.feature)
140
- # samples = min(len(capture_audio), len(reconstruct_audio))
141
- # max_err = np.max(np.abs(capture_audio[:samples] - reconstruct_audio[:samples]))
142
- # logger.info(f'Maximum error between capture and reconstruct: {max_err}')
143
- # write_wav(reconstruct_name, reconstruct_audio, SAMPLE_RATE)
144
- # logger.info(f'Wrote reconstruct audio with shape {reconstruct_audio.shape} to {reconstruct_name}')
145
- # with h5py.File(h5_name, 'a') as f:
146
- # if 'reconstruct_audio' in f:
147
- # del f['reconstruct_audio']
148
- # f.create_dataset('reconstruct_audio', data=reconstruct_audio)
149
- # logger.info(f'Wrote reconstruct audio with shape {reconstruct_audio.shape} to {h5_name}')
150
-
151
162
  with torch.no_grad():
152
- predict = model(torch.tensor(feature))
163
+ # model wants batch x timesteps x feature_parameters
164
+ predict = model(torch.tensor(feature).permute((1, 0, 2))).permute(1, 0, 2).numpy()
153
165
  if debug:
154
166
  with h5py.File(h5_name, 'a') as f:
155
167
  if 'predict' in f:
@@ -157,9 +169,9 @@ def main() -> None:
157
169
  f.create_dataset('predict', data=predict)
158
170
  logger.info(f'Wrote predict with shape {predict.shape} to {h5_name}')
159
171
 
160
- predict_audio = get_audio_from_feature(feature=predict.numpy(), feature_mode=model.hparams.feature)
161
- write_wav(predict_name, predict_audio, SAMPLE_RATE)
162
- logger.info(f'Wrote predict audio with shape {predict_audio.shape} to {predict_name}')
172
+ predict_audio = get_audio_from_feature(feature=predict, feature_mode=model.hparams.feature)
173
+ write_wav(predict_wav, predict_audio, SAMPLE_RATE)
174
+ logger.info(f'Wrote predict audio with shape {predict_audio.shape} to {predict_wav}')
163
175
  if debug:
164
176
  with h5py.File(h5_name, 'a') as f:
165
177
  if 'predict_audio' in f:
@@ -167,69 +179,26 @@ def main() -> None:
167
179
  f.create_dataset('predict_audio', data=predict_audio)
168
180
  logger.info(f'Wrote predict audio with shape {predict_audio.shape} to {h5_name}')
169
181
 
182
+ save_figure(predict_png, predict_audio, predict)
183
+ logger.info(f'Wrote predict plots to {predict_png}')
184
+
170
185
  if asr_name is not None:
171
186
  predict_asr = calc_asr(predict_audio, engine=asr_name, whisper_model_name=whisper_name).text
172
187
  logger.info(f'Predict audio ASR: {predict_asr}')
173
188
 
174
189
 
175
- def load_model(model_name: str, ckpt_name: str) -> Any:
176
- batch_size = 1
177
- timesteps = 0
178
-
179
- # Load checkpoint first to get hparams if available
180
- try:
181
- checkpoint = torch.load(ckpt_name, map_location=lambda storage, loc: storage)
182
- except Exception as e:
183
- logger.exception(f'Error: could not load checkpoint from {ckpt_name}: {e}')
184
- raise SystemExit(1)
185
-
186
- # Import model definition file
187
- logger.info(f'Importing {model_name}')
188
- litemodule = import_keras_model(model_name)
189
-
190
- if 'hyper_parameters' in checkpoint:
191
- logger.info(f'Found checkpoint file with hyper-parameters')
192
- hparams = checkpoint['hyper_parameters']
193
- if hparams['batch_size'] != batch_size:
194
- logger.info(
195
- f'Overriding model default batch_size of {hparams["batch_size"]} with batch_size of {batch_size}')
196
- hparams["batch_size"] = batch_size
197
-
198
- if hparams['timesteps'] != 0 and timesteps == 0:
199
- timesteps = hparams['timesteps']
200
- logger.warning(f'Using model default timesteps of {timesteps}')
201
-
202
- logger.info(f'Building model with {len(hparams)} total hparams')
203
- try:
204
- model = litemodule.MyHyperModel(**hparams)
205
- except Exception as e:
206
- logger.exception(f'Error: model build (MyHyperModel) in {model_name} failed: {e}')
207
- raise SystemExit(1)
208
- else:
209
- logger.info(f'Found checkpoint file with no hyper-parameters')
210
- logger.info(f'Building model with defaults')
211
- try:
212
- tmp = litemodule.MyHyperModel()
213
- except Exception as e:
214
- logger.exception(f'Error: model build (MyHyperModel) in {model_name} failed: {e}')
215
- raise SystemExit(1)
216
-
217
- if tmp.batch_size != batch_size:
218
- logger.info(f'Overriding model default batch_size of {tmp.batch_size} with batch_size of {batch_size}')
219
-
220
- if tmp.timesteps != 0 and timesteps == 0:
221
- timesteps = tmp.timesteps
222
- logger.warning(f'Using model default timesteps of {timesteps}')
223
-
224
- model = litemodule.MyHyperModel(timesteps=timesteps, batch_size=batch_size)
190
+ def get_frames_from_device(input_name: str | None, length: float, chunk: int = 1024) -> AudioT:
191
+ from select import select
192
+ from sys import stdin
225
193
 
226
- logger.info(f'Loading weights from {ckpt_name}')
227
- model.load_state_dict(checkpoint["state_dict"])
228
- model.eval()
229
- return model
194
+ import pyaudio
230
195
 
196
+ from sonusai import logger
197
+ from sonusai.mixture import CHANNEL_COUNT
198
+ from sonusai.mixture import SAMPLE_RATE
199
+ from sonusai.utils import get_input_device_index_by_name
200
+ from sonusai.utils import get_input_devices
231
201
 
232
- def get_frames_from_device(input_name: str | None, length: float, chunk: int = 1024) -> AudioT:
233
202
  p = pyaudio.PyAudio()
234
203
 
235
204
  input_devices = get_input_devices(p)
@@ -280,6 +249,10 @@ def get_frames_from_device(input_name: str | None, length: float, chunk: int = 1
280
249
 
281
250
 
282
251
  def get_frames_from_file(input_name: str, length: float) -> AudioT:
252
+ from sonusai import logger
253
+ from sonusai.mixture import SAMPLE_RATE
254
+ from sonusai.mixture import read_audio
255
+
283
256
  logger.info(f'Capturing from {input_name}')
284
257
  frames = read_audio(input_name)
285
258
  if length != -1:
@@ -289,5 +262,37 @@ def get_frames_from_file(input_name: str, length: float) -> AudioT:
289
262
  return frames
290
263
 
291
264
 
265
+ def save_figure(name: str, audio: np.ndarray, feature: np.ndarray) -> None:
266
+ import matplotlib.pyplot as plt
267
+ from scipy.interpolate import CubicSpline
268
+
269
+ from sonusai.mixture import SAMPLE_RATE
270
+ from sonusai.utils import unstack_complex
271
+
272
+ spectrum = 20 * np.log(np.abs(np.squeeze(unstack_complex(feature)).transpose()))
273
+ frames = spectrum.shape[1]
274
+ samples = (len(audio) // frames) * frames
275
+ length_in_s = samples / SAMPLE_RATE
276
+ interp = samples // frames
277
+
278
+ ts = np.arange(0.0, length_in_s, interp / SAMPLE_RATE)
279
+ t = np.arange(0.0, length_in_s, 1 / SAMPLE_RATE)
280
+
281
+ spectrum = CubicSpline(ts, spectrum, axis=-1)(t)
282
+
283
+ fig, (ax1, ax2) = plt.subplots(nrows=2)
284
+ ax1.set_title(name)
285
+ ax1.plot(t, audio[:samples])
286
+ ax1.set_ylabel('Signal')
287
+ ax1.set_xlim(0, length_in_s)
288
+ ax1.set_ylim(-1, 1)
289
+
290
+ ax2.imshow(spectrum, origin='lower', aspect='auto')
291
+ ax2.set_xticks([])
292
+ ax2.set_ylabel('Feature')
293
+
294
+ plt.savefig(name, dpi=300)
295
+
296
+
292
297
  if __name__ == '__main__':
293
298
  main()