sonusai 0.20.2__tar.gz → 1.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (152) hide show
  1. {sonusai-0.20.2 → sonusai-1.0.1}/PKG-INFO +4 -2
  2. {sonusai-0.20.2 → sonusai-1.0.1}/pyproject.toml +5 -3
  3. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/__init__.py +16 -3
  4. sonusai-1.0.1/sonusai/audiofe.py +481 -0
  5. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/calc_metric_spenh.py +71 -73
  6. sonusai-1.0.1/sonusai/config/__init__.py +3 -0
  7. sonusai-1.0.1/sonusai/config/config.py +61 -0
  8. sonusai-1.0.1/sonusai/config/config.yml +20 -0
  9. sonusai-1.0.1/sonusai/config/constants.py +8 -0
  10. sonusai-1.0.1/sonusai/constants.py +11 -0
  11. sonusai-1.0.1/sonusai/data/genmixdb.yml +47 -0
  12. {sonusai-0.20.2/sonusai/mixture → sonusai-1.0.1/sonusai}/datatypes.py +91 -130
  13. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/deprecated/plot.py +4 -5
  14. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/doc/doc.py +4 -4
  15. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/doc.py +11 -4
  16. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/genft.py +43 -45
  17. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/genmetrics.py +23 -19
  18. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/genmix.py +54 -82
  19. sonusai-1.0.1/sonusai/genmixdb.py +343 -0
  20. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/ir_metric.py +30 -34
  21. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/lsdb.py +41 -48
  22. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/main.py +15 -22
  23. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/metrics/calc_audio_stats.py +4 -17
  24. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/metrics/calc_class_weights.py +4 -4
  25. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/metrics/calc_optimal_thresholds.py +8 -5
  26. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/metrics/calc_pesq.py +2 -2
  27. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/metrics/calc_segsnr_f.py +4 -4
  28. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/metrics/calc_speech.py +25 -13
  29. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/metrics/class_summary.py +7 -7
  30. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/metrics/confusion_matrix_summary.py +5 -5
  31. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/metrics/one_hot.py +4 -4
  32. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/metrics/snr_summary.py +7 -7
  33. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/metrics_summary.py +38 -45
  34. sonusai-1.0.1/sonusai/mixture/__init__.py +32 -0
  35. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/mixture/audio.py +10 -39
  36. sonusai-1.0.1/sonusai/mixture/class_balancing.py +103 -0
  37. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/mixture/config.py +251 -271
  38. sonusai-1.0.1/sonusai/mixture/constants.py +55 -0
  39. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/mixture/data_io.py +25 -36
  40. sonusai-1.0.1/sonusai/mixture/db_datatypes.py +92 -0
  41. sonusai-1.0.1/sonusai/mixture/effects.py +386 -0
  42. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/mixture/feature.py +7 -11
  43. sonusai-1.0.1/sonusai/mixture/generation.py +975 -0
  44. sonusai-1.0.1/sonusai/mixture/helpers.py +351 -0
  45. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/mixture/ir_delay.py +3 -4
  46. sonusai-1.0.1/sonusai/mixture/ir_effects.py +77 -0
  47. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/mixture/log_duration_and_sizes.py +6 -12
  48. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/mixture/mixdb.py +931 -669
  49. sonusai-1.0.1/sonusai/mixture/pad_audio.py +35 -0
  50. sonusai-1.0.1/sonusai/mixture/resample.py +7 -0
  51. sonusai-1.0.1/sonusai/mixture/sox_effects.py +195 -0
  52. sonusai-1.0.1/sonusai/mixture/sox_help.py +650 -0
  53. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/mixture/spectral_mask.py +2 -2
  54. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/mixture/truth.py +17 -15
  55. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/mixture/truth_functions/crm.py +12 -12
  56. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/mixture/truth_functions/energy.py +22 -22
  57. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/mixture/truth_functions/file.py +5 -5
  58. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/mixture/truth_functions/metadata.py +4 -4
  59. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/mixture/truth_functions/metrics.py +4 -4
  60. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/mixture/truth_functions/phoneme.py +3 -3
  61. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/mixture/truth_functions/sed.py +11 -13
  62. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/mixture/truth_functions/target.py +10 -10
  63. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/mkwav.py +26 -29
  64. sonusai-1.0.1/sonusai/onnx_predict.py +363 -0
  65. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/queries/__init__.py +2 -2
  66. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/queries/queries.py +38 -34
  67. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/speech/librispeech.py +1 -1
  68. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/speech/mcgill.py +1 -1
  69. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/speech/timit.py +2 -2
  70. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/summarize_metric_spenh.py +10 -17
  71. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/utils/__init__.py +7 -1
  72. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/utils/asl_p56.py +2 -2
  73. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/utils/asr.py +2 -2
  74. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/utils/asr_functions/aaware_whisper.py +4 -5
  75. sonusai-1.0.1/sonusai/utils/choice.py +31 -0
  76. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/utils/compress.py +1 -1
  77. sonusai-1.0.1/sonusai/utils/dataclass_from_dict.py +27 -0
  78. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/utils/energy_f.py +3 -3
  79. sonusai-1.0.1/sonusai/utils/evaluate_random_rule.py +15 -0
  80. sonusai-1.0.1/sonusai/utils/keyboard_interrupt.py +12 -0
  81. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/utils/onnx_utils.py +3 -17
  82. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/utils/print_mixture_details.py +21 -19
  83. sonusai-0.20.2/sonusai/utils/temp_seed.py → sonusai-1.0.1/sonusai/utils/rand.py +3 -3
  84. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/utils/read_predict_data.py +2 -2
  85. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/utils/reshape.py +3 -3
  86. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/utils/stratified_shuffle_split.py +3 -3
  87. {sonusai-0.20.2/sonusai/mixture → sonusai-1.0.1/sonusai/utils}/tokenized_shell_vars.py +1 -1
  88. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/utils/write_audio.py +2 -2
  89. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/vars.py +11 -4
  90. sonusai-0.20.2/sonusai/audiofe.py +0 -317
  91. sonusai-0.20.2/sonusai/data/genmixdb.yml +0 -62
  92. sonusai-0.20.2/sonusai/genmixdb.py +0 -519
  93. sonusai-0.20.2/sonusai/mixture/__init__.py +0 -131
  94. sonusai-0.20.2/sonusai/mixture/augmentation.py +0 -444
  95. sonusai-0.20.2/sonusai/mixture/class_count.py +0 -15
  96. sonusai-0.20.2/sonusai/mixture/constants.py +0 -59
  97. sonusai-0.20.2/sonusai/mixture/db_datatypes.py +0 -56
  98. sonusai-0.20.2/sonusai/mixture/eq_rule_is_valid.py +0 -45
  99. sonusai-0.20.2/sonusai/mixture/generation.py +0 -1102
  100. sonusai-0.20.2/sonusai/mixture/helpers.py +0 -453
  101. sonusai-0.20.2/sonusai/mixture/target_class_balancing.py +0 -107
  102. sonusai-0.20.2/sonusai/mixture/targets.py +0 -175
  103. sonusai-0.20.2/sonusai/onnx_predict.py +0 -211
  104. sonusai-0.20.2/sonusai/utils/dataclass_from_dict.py +0 -9
  105. {sonusai-0.20.2 → sonusai-1.0.1}/README.rst +0 -0
  106. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/aawscd_probwrite.py +0 -0
  107. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/data/__init__.py +0 -0
  108. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/data/silero_vad_v5.1.jit +0 -0
  109. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/data/silero_vad_v5.1.onnx +0 -0
  110. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/data/speech_ma01_01.wav +0 -0
  111. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/data/whitenoise.wav +0 -0
  112. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/deprecated/gentcst.py +0 -0
  113. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/deprecated/tplot.py +0 -0
  114. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/doc/__init__.py +0 -0
  115. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/metrics/__init__.py +0 -0
  116. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/metrics/calc_pcm.py +0 -0
  117. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/metrics/calc_phase_distance.py +0 -0
  118. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/metrics/calc_sa_sdr.py +0 -0
  119. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/metrics/calc_sample_weights.py +0 -0
  120. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/metrics/calc_wer.py +0 -0
  121. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/metrics/calc_wsdr.py +0 -0
  122. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/mixture/truth_functions/__init__.py +0 -0
  123. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/speech/__init__.py +0 -0
  124. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/speech/l2arctic.py +0 -0
  125. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/speech/textgrid.py +0 -0
  126. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/speech/types.py +0 -0
  127. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/speech/vctk.py +0 -0
  128. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/speech/voxceleb.py +0 -0
  129. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/utils/asr_functions/__init__.py +0 -0
  130. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/utils/audio_devices.py +0 -0
  131. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/utils/braced_glob.py +0 -0
  132. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/utils/calculate_input_shape.py +0 -0
  133. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/utils/convert_string_to_number.py +0 -0
  134. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/utils/create_timestamp.py +0 -0
  135. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/utils/create_ts_name.py +0 -0
  136. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/utils/db.py +0 -0
  137. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/utils/docstring.py +0 -0
  138. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/utils/engineering_number.py +0 -0
  139. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/utils/get_frames_per_batch.py +0 -0
  140. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/utils/get_label_names.py +0 -0
  141. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/utils/grouper.py +0 -0
  142. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/utils/human_readable_size.py +0 -0
  143. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/utils/load_object.py +0 -0
  144. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/utils/max_text_width.py +0 -0
  145. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/utils/model_utils.py +0 -0
  146. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/utils/numeric_conversion.py +0 -0
  147. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/utils/parallel.py +0 -0
  148. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/utils/path_info.py +0 -0
  149. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/utils/ranges.py +0 -0
  150. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/utils/seconds_to_hms.py +0 -0
  151. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/utils/stacked_complex.py +0 -0
  152. {sonusai-0.20.2 → sonusai-1.0.1}/sonusai/utils/yes_or_no.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: sonusai
3
- Version: 0.20.2
3
+ Version: 1.0.1
4
4
  Summary: Framework for building deep neural network models for sound, speech, and voice AI
5
5
  Home-page: https://aaware.com
6
6
  License: GPL-3.0-only
@@ -25,9 +25,10 @@ Requires-Dist: onnxruntime (>=1.19.2,<2.0.0)
25
25
  Requires-Dist: paho-mqtt (>=2.1.0,<3.0.0)
26
26
  Requires-Dist: pandas (>=2.2.3,<3.0.0)
27
27
  Requires-Dist: pesq (>=0.0.4,<0.0.5)
28
+ Requires-Dist: pgzip (>=0.3.5,<0.4.0)
28
29
  Requires-Dist: praatio (>=6.2.0,<7.0.0)
29
30
  Requires-Dist: psutil (>=6.0.0,<7.0.0)
30
- Requires-Dist: pyaaware (>=1.6.3,<2.0.0)
31
+ Requires-Dist: pyaaware (>=2.0.0,<3.0.0)
31
32
  Requires-Dist: pyaudio (>=0.2.14,<0.3.0)
32
33
  Requires-Dist: pydub (>=0.25.1,<0.26.0)
33
34
  Requires-Dist: pystoi (>=0.4.1,<0.5.0)
@@ -35,6 +36,7 @@ Requires-Dist: pyyaml (>=6.0.2,<7.0.0)
35
36
  Requires-Dist: requests (>=2.32.3,<3.0.0)
36
37
  Requires-Dist: rich (>=13.9.4,<14.0.0)
37
38
  Requires-Dist: samplerate (>=0.2.1,<0.3.0)
39
+ Requires-Dist: sh (>=2.2.2,<3.0.0)
38
40
  Requires-Dist: soundfile (>=0.12.1,<0.13.0)
39
41
  Requires-Dist: sox (>=1.5.0,<2.0.0)
40
42
  Requires-Dist: torch (>=2.2,<2.3)
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "sonusai"
3
- version = "0.20.2"
3
+ version = "1.0.1"
4
4
  description = "Framework for building deep neural network models for sound, speech, and voice AI"
5
5
  authors = ["Chris Eddington <chris@aaware.com>", "Jason Calderwood <jason@aaware.com>"]
6
6
  maintainers = ["Chris Eddington <chris@aaware.com>", "Jason Calderwood <jason@aaware.com>"]
@@ -30,9 +30,10 @@ onnxruntime = "^1.19.2"
30
30
  paho-mqtt = "^2.1.0"
31
31
  pandas = "^2.2.3"
32
32
  pesq = "^0.0.4"
33
+ pgzip = "^0.3.5"
33
34
  praatio = "^6.2.0"
34
35
  psutil = "^6.0.0"
35
- pyaaware = "^1.6.3"
36
+ pyaaware = "^2.0.0"
36
37
  pyaudio = "^0.2.14"
37
38
  pydub = "^0.25.1"
38
39
  pystoi = "^0.4.1"
@@ -41,6 +42,7 @@ pyyaml = "^6.0.2"
41
42
  requests = "^2.32.3"
42
43
  rich = "^13.9.4"
43
44
  samplerate = "^0.2.1"
45
+ sh = "^2.2.2"
44
46
  soundfile = "^0.12.1"
45
47
  sox = "^1.5.0"
46
48
  torch = "~2.2"
@@ -55,7 +57,7 @@ pytest = "^8.3.3"
55
57
  sonusai-asr-cloud = "^0.1.4"
56
58
  sonusai-asr-faster-whisper = "^0.1.1"
57
59
  sonusai-asr-sensory = "^0.1.1"
58
- sonusai-torchl = "^0.3.3"
60
+ #sonusai-torchl = "^1.0.0"
59
61
  types-pyyaml = "^6.0.12.20240917"
60
62
  types-requests = "^2.32.0.20240914"
61
63
  tuna = "^0.5.11"
@@ -3,9 +3,6 @@ from importlib import metadata
3
3
  from os.path import dirname
4
4
 
5
5
  from rich.logging import RichHandler
6
- from rich.traceback import install
7
-
8
- install(show_locals=True)
9
6
 
10
7
  __version__ = metadata.version(__package__) # pyright: ignore [reportArgumentType]
11
8
  BASEDIR = dirname(__file__)
@@ -81,3 +78,19 @@ def commands_list(doc: str = commands_doc) -> list[str]:
81
78
  if command:
82
79
  commands.append(command)
83
80
  return commands
81
+
82
+
83
+ def exception_handler(e: Exception) -> None:
84
+ import sys
85
+
86
+ from rich.console import Console
87
+
88
+ logger.error(f"{type(e).__name__}: {e}")
89
+ handlers = [handler for handler in logger.handlers if isinstance(handler, logging.FileHandler)]
90
+ logger.error(f"See {', '.join(handler.baseFilename for handler in handlers)} for details")
91
+
92
+ console = Console(color_system=None)
93
+ with console.capture() as capture:
94
+ console.print_exception(show_locals=False)
95
+ logger.debug(capture.get())
96
+ sys.exit(1)
@@ -0,0 +1,481 @@
1
+ """sonusai audiofe
2
+
3
+ usage: audiofe [-hvdsp] [--version] [-i INPUT] [-l LENGTH] [-a ASR] [-n NOISEDB]
4
+ [-w WMODEL] [-o FEATURE] MODEL
5
+
6
+ options:
7
+ -h, --help
8
+ -v, --verbose Be verbose.
9
+ -d, --debug Write debug data to H5 file.
10
+ -s, --show Display a list of available audio inputs.
11
+ -i INPUT, --input INPUT Audio source from ALSA or .wav file. See -s or arecord -L. [default: default]
12
+ -l LENGTH, --length LENGTH Length of audio in seconds. [default: -1].
13
+ -m MODEL, --model MODEL SonusAI ONNX model applied to the captured audio.
14
+ -n NOISEDB, --noiseadd NOISEDB Amount of noise to keep in clean audio output. [default: -30]
15
+ -p, --playback Enable playback of noisy audio, then the model prediction output audio
16
+ -a ASR, --asr ASR ASR method to use.
17
+ -w WMODEL, --whisper WMODEL Model used in whisper, aixplain_whisper and faster_whisper methods. [default: tiny].
18
+ -o FEATURE, --feature-overlap Run SonusAI model in overlap-streaming mode using FEATURE which is an 8-10 character
19
+ string specifying a stride-overlap feature of the same type as the model, i.e. a
20
+ model with default feature of hun00ns1 could use hun00nv80 or hun00nv128, etc.
21
+
22
+ Aaware SonusAI Audio Front End.
23
+
24
+ Capture LENGTH seconds of audio from INPUT. If LENGTH is < 0, then capture until key is pressed. If INPUT is a valid
25
+ audio file name, then use the audio data from the specified file. In this case, if LENGTH is < 0, process entire file;
26
+ otherwise, process min(length(INPUT), LENGTH) seconds of audio from INPUT. Audio is saved to
27
+ audiofe_capture_<TIMESTAMP>.wav.
28
+
29
+ If a model is specified, run prediction on audio data from this model. Then compute the inverse transform of the
30
+ prediction result and save to audiofe_predict_<TIMESTAMP>.wav.
31
+
32
+ Also, if a model is specified, save plots of the capture data (time-domain signal and feature) to
33
+ audiofe_capture_<TIMESTAMP>.png and predict data (time-domain signal and feature) to
34
+ audiofe_predict_<TIMESTAMP>.png.
35
+
36
+ If an ASR is specified, run ASR on the captured audio and print the results. In addition, if a model was also specified,
37
+ run ASR on the predict audio and print the results. Examples: faster_whisper, google,
38
+
39
+ If the debug option is enabled, write capture audio, feature, reconstruct audio, predict, and predict audio to
40
+ audiofe_<TIMESTAMP>.h5.
41
+
42
+ """
43
+
44
+ import numpy as np
45
+
46
+ from sonusai.mixture import AudioT
47
+
48
+
49
+ def main() -> None:
50
+ from docopt import docopt
51
+
52
+ from sonusai import __version__ as sai_version
53
+ from sonusai.utils import trim_docstring
54
+
55
+ args = docopt(trim_docstring(__doc__), version=sai_version, options_first=True)
56
+
57
+ verbose = args["--verbose"]
58
+ length = float(args["--length"])
59
+ input_name = args["--input"]
60
+ feature_ovr = args["--feature-overlap"]
61
+ asr_name = args["--asr"]
62
+ whisper_name = args["--whisper"]
63
+ debug = args["--debug"]
64
+ show = args["--show"]
65
+ playback = args["--playback"]
66
+ noiseadd = args["--noiseadd"]
67
+ model_name = args["MODEL"]
68
+
69
+ import pyaudio
70
+
71
+ from sonusai import create_file_handler
72
+ from sonusai import initial_log_messages
73
+ from sonusai import logger
74
+ from sonusai import update_console_handler
75
+ from sonusai.utils import create_timestamp
76
+ from sonusai.utils import get_input_devices
77
+ from sonusai.utils import load_ort_session
78
+
79
+ # Setup logging file
80
+ create_file_handler("audiofe.log")
81
+ update_console_handler(verbose)
82
+ initial_log_messages("audiofe")
83
+
84
+ if show:
85
+ logger.info("List of available audio inputs:")
86
+ logger.info("")
87
+ p = pyaudio.PyAudio()
88
+ for name in get_input_devices(p):
89
+ logger.info(f"{name}")
90
+ logger.info("")
91
+ p.terminate()
92
+ # return
93
+
94
+ ts = create_timestamp()
95
+ capture_name = f"{ts}-noisy"
96
+ capture_wav = capture_name + ".wav"
97
+ capture_png = capture_name + ".png"
98
+ predict_name = f"{ts}-pred"
99
+ predict_wav = predict_name + ".wav"
100
+ predict_png = predict_name + ".png"
101
+ h5_name = f"{ts}-audiofe.h5"
102
+
103
+ if model_name is not None:
104
+ session, options, model_root, hparams, sess_inputs, sess_outputs = load_ort_session(model_name)
105
+ if hparams is None:
106
+ logger.error("Error: ONNX model does not have required SonusAI hyperparameters, cannot proceed.")
107
+ raise SystemExit(1)
108
+ feature_mode = hparams["feature"]
109
+ if feature_ovr is not None:
110
+ # TBD checks for match and valid feature_ovr
111
+ stride = int(feature_ovr[7:])
112
+ sov_type = feature_ovr[6] # v,e,f,t supported, need to calculate stride from tstep
113
+ if sov_type == "v":
114
+ feat_step = int(np.ceil(0.5 * stride))
115
+ elif sov_type == "e":
116
+ feat_step = int(np.ceil(4 * stride / 5))
117
+ elif sov_type == "f":
118
+ feat_step = int(np.ceil(3 * stride / 4))
119
+ elif sov_type == "t":
120
+ feat_step = int(np.ceil(2 * stride / 3))
121
+ else:
122
+ logger.error("Override feature does not have a supported overlap mode, exiting.")
123
+ raise SystemExit(1)
124
+ feature_orig = feature_mode
125
+ feature_mode = feature_ovr
126
+ logger.info(
127
+ f"Overriding feature with {feature_ovr} (was {feature_orig}), with stride={stride}, step={feat_step}."
128
+ )
129
+ else:
130
+ feat_step = 1
131
+
132
+ from pyaaware import FeatureGenerator
133
+
134
+ fg = FeatureGenerator(feature_mode=feature_mode)
135
+ ftn = fg.ftransform_length # feature transform length
136
+ ftr = fg.ftransform_overlap # forward transform samples per step (R)
137
+ fstride = fg.stride # feature stride
138
+ fsamples = fstride * ftr # total samples in feature
139
+
140
+ in0name = sess_inputs[0].name
141
+ in0type = sess_inputs[0].type
142
+ out_names = [n.name for n in session.get_outputs()]
143
+ if len(sess_inputs) != 1:
144
+ logger.error(f"Error: ONNX model does not have 1 input, but {len(sess_inputs)}. Exit due to unknown input.")
145
+ raise SystemExit(1)
146
+ if verbose:
147
+ logger.info(f"Read and compiled ONNX model from {model_name}.")
148
+ import onnx
149
+
150
+ omodel = onnx.load(model_name)
151
+ from sonusai.utils.onnx_utils import get_and_check_inputs
152
+ from sonusai.utils.onnx_utils import get_and_check_outputs
153
+
154
+ logger.info(f"Onnx model uses ir_version {omodel.ir_version}")
155
+ onnx_inputs, inshapes = get_and_check_inputs(omodel) # Note: logs warning if # inputs > 1
156
+ logger.info(f"Onnx model input has {len(inshapes[0])} dims with shape (0 means dynamic): {inshapes[0]}")
157
+ logger.info(f"Onnx model input has type: {in0type}")
158
+ onnx_outputs, oshapes = get_and_check_outputs(omodel)
159
+ logger.info(f"Onnx model output has {len(oshapes[0])} dims with shape (0 means dynamic): {oshapes[0]}")
160
+ import onnxruntime as ort
161
+
162
+ providers = ort.get_available_providers()
163
+ logger.info(f"ONNX runtime available providers: {providers}.")
164
+ else:
165
+ logger.error("No ONNX model provided, exiting.")
166
+ raise SystemExit(1)
167
+
168
+ from os.path import exists
169
+
170
+ import h5py
171
+
172
+ from sonusai.constants import SAMPLE_RATE
173
+ from sonusai.mixture import get_audio_from_feature
174
+ from sonusai.mixture import get_feature_from_audio
175
+ from sonusai.utils import calc_asr
176
+ from sonusai.utils import write_audio
177
+
178
+ if input_name is not None and exists(input_name):
179
+ capture_audio = get_frames_from_file(input_name, length)
180
+ else:
181
+ try:
182
+ capture_audio = get_frames_from_device(input_name, length)
183
+ except ValueError as e:
184
+ logger.exception(e)
185
+ return
186
+ # Only write if capture from device, not for file input
187
+ write_audio(capture_wav, capture_audio, SAMPLE_RATE)
188
+ logger.debug("")
189
+ logger.debug(f"Wrote capture audio with shape {capture_audio.shape} to {capture_wav}")
190
+
191
+ # Pad audio to transform step size
192
+ padlen_tf = int(np.ceil(len(capture_audio) / ftr)) * ftr - len(capture_audio)
193
+ capture_audio = np.pad(capture_audio, (0, padlen_tf), "constant", constant_values=(0, 0))
194
+
195
+ if debug:
196
+ with h5py.File(h5_name, "a") as f:
197
+ if "capture_audio" in f:
198
+ del f["capture_audio"]
199
+ f.create_dataset("capture_audio", data=capture_audio)
200
+ logger.info(f"Wrote capture feature data with shape {capture_audio.shape} to {h5_name}")
201
+
202
+ if asr_name is not None:
203
+ logger.info(f"Running ASR on captured audio with {asr_name} ...")
204
+ capture_asr = calc_asr(capture_audio, engine=asr_name, model=whisper_name).text
205
+ logger.info(f"Noisy audio ASR: {capture_asr}")
206
+
207
+ if model_name is not None:
208
+ # Pad audio to fill total feature stride * transform stride samples
209
+ padlen = int(np.ceil(len(capture_audio) / fsamples)) * fsamples - len(capture_audio)
210
+ capture_audio_p = np.pad(capture_audio, (0, padlen), "constant", constant_values=(0, 0))
211
+
212
+ # feature always frames x stride x feat_params, convert to always Batch x Tsteps x Bins
213
+ feature = get_feature_from_audio(audio=capture_audio_p, feature_mode=feature_mode)
214
+ if debug:
215
+ with h5py.File(h5_name, "a") as f:
216
+ if "feature" in f:
217
+ del f["feature"]
218
+ f.create_dataset("feature", data=feature)
219
+ logger.info(f"Wrote feature with shape {feature.shape} to {h5_name}")
220
+
221
+ feat_nov = sov2nov(feature, feat_step) # remove overlap, output always Batch x Tsteps x Bins
222
+ # TBD remove padding of feature-stride
223
+ # if padlen > 0:
224
+ save_figure(capture_png, capture_audio, feat_nov)
225
+ logger.info(f"Wrote capture plots to {capture_png}")
226
+
227
+ if feature_ovr is not None:
228
+ test_audio = get_audio_from_feature(feature=feat_nov, feature_mode=feature_orig)
229
+ # write_audio(f'{ts}-noisy-itf.wav', test_audio, SAMPLE_RATE)
230
+ else:
231
+ # feature is frames x 1 x Bins, reshape to 1 x frames x Bins for model
232
+ feature = feature.transpose((1, 0, 2))
233
+
234
+ if in0type.find("float16") != -1:
235
+ logger.info("Detected input of float16, converting all feature inputs to that type.")
236
+ feature = np.float16(feature) # type: ignore
237
+
238
+ # Run inference, ort session wants batch x timesteps x feat_params, outputs numpy BxTxFP or BxFP
239
+ # Note full reshape not needed here since we assume speech enhancement type model, so a transpose suffices
240
+ logger.info(f"Running model on data with shape {feature.shape} ...")
241
+ if feature_ovr is None:
242
+ predict = session.run(out_names, {in0name: feature})[0] # standard mode (entire batch)
243
+ else:
244
+ predict = np.zeros(feature.shape)
245
+ for i in range(predict.shape[0]):
246
+ logger.debug(f"running batch: {i}")
247
+ predict[i, :, :] = session.run(out_names, {in0name: feature[i : i + 1, :, :]})[0]
248
+
249
+ if debug:
250
+ with h5py.File(h5_name, "a") as f:
251
+ if "predict" in f:
252
+ del f["predict"]
253
+ f.create_dataset("predict", data=predict)
254
+ logger.info(f"Wrote predict data with shape {predict.shape} to {h5_name}")
255
+
256
+ if feature_ovr is not None:
257
+ predict = sov2nov(predict, feat_step) # always returns batch x tsteps x feat_params
258
+ predict_audio = get_audio_from_feature(feature=predict, feature_mode=feature_orig)
259
+ else:
260
+ predict = predict.transpose((1, 0, 2)) # need transpose to frames x 1 x bins
261
+ predict_audio = get_audio_from_feature(feature=predict, feature_mode=feature_mode)
262
+
263
+ if predict_audio.shape[0] > capture_audio.shape[0]:
264
+ predict_audio = predict_audio[0 : (capture_audio.shape[0] - predict_audio.shape[0])]
265
+
266
+ if predict_audio.shape[0] < capture_audio.shape[0]:
267
+ capture_audio = capture_audio[0 : (predict_audio.shape[0] - capture_audio.shape[0])]
268
+
269
+ if noiseadd is not None:
270
+ ngain = np.power(10, min(float(noiseadd), 0.0) / 20.0) # limit to gain <1, convert to float
271
+ if ngain < 1.0: # don't apply if it's 1.0
272
+ logger.info(f"Adding back noise with gain of {ngain} = {noiseadd} db.")
273
+ noise = capture_audio - predict_audio
274
+ predict_audio = predict_audio + ngain * noise
275
+
276
+ write_audio(predict_wav, predict_audio, SAMPLE_RATE)
277
+ logger.info(f"Wrote predict audio with shape {predict_audio.shape} to {predict_wav}")
278
+ if debug:
279
+ with h5py.File(h5_name, "a") as f:
280
+ if "predict_audio" in f:
281
+ del f["predict_audio"]
282
+ f.create_dataset("predict_audio", data=predict_audio)
283
+ logger.info(f"Wrote predict audio with shape {predict_audio.shape} to {h5_name}")
284
+
285
+ save_figure(predict_png, predict_audio, predict)
286
+ logger.info(f"Wrote predict plots to {predict_png}")
287
+
288
+ if asr_name is not None:
289
+ logger.info(f"Running ASR on model-enhanced audio with {asr_name} ...")
290
+ predict_asr = calc_asr(predict_audio, engine=asr_name, model=whisper_name).text
291
+ logger.info(f"Predict audio ASR: {predict_asr}")
292
+
293
+ plot_en = True
294
+ if plot_en is not None:
295
+ import subprocess
296
+
297
+ # Construct plot command using spgramd, start the process non-blocking (will leave matplot open)
298
+ command = ["python", "spgramd.py", capture_wav, predict_wav]
299
+ process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
300
+
301
+ if playback is not None:
302
+ import sh
303
+
304
+ sh.play(capture_wav)
305
+ sh.play(predict_wav)
306
+ flag_end = False
307
+ while not flag_end:
308
+ choice = input("Press 'r' to replay or 'q' to quit: ").strip().lower()
309
+ if choice == "q":
310
+ print("Quitting...")
311
+ flag_end = True
312
+ elif choice == "r":
313
+ print("Replaying...")
314
+ sh.play(capture_wav)
315
+ sh.play(predict_wav)
316
+ else:
317
+ print("Invalid input. Please try again.")
318
+
319
+
320
+ def get_frames_from_device(input_name: str | None, length: float, chunk: int = 1024) -> AudioT:
321
+ from select import select
322
+ from sys import stdin
323
+
324
+ import pyaudio
325
+
326
+ from sonusai import logger
327
+ from sonusai.constants import CHANNEL_COUNT
328
+ from sonusai.constants import SAMPLE_RATE
329
+ from sonusai.utils import get_input_device_index_by_name
330
+ from sonusai.utils import get_input_devices
331
+
332
+ p = pyaudio.PyAudio()
333
+
334
+ input_devices = get_input_devices(p)
335
+ if not input_devices:
336
+ raise ValueError("No input audio devices found")
337
+
338
+ if input_name is None:
339
+ input_name = input_devices[0]
340
+
341
+ try:
342
+ device_index = get_input_device_index_by_name(p, input_name)
343
+ except ValueError as ex:
344
+ msg = f"Could not find {input_name}\n"
345
+ msg += "Available devices:\n"
346
+ for input_device in input_devices:
347
+ msg += f" {input_device}\n"
348
+ raise ValueError(msg) from ex
349
+
350
+ logger.info(f"Capturing from {p.get_device_info_by_index(device_index).get('name')}")
351
+ stream = p.open(
352
+ format=pyaudio.paFloat32, channels=CHANNEL_COUNT, rate=SAMPLE_RATE, input=True, input_device_index=device_index
353
+ )
354
+ stream.start_stream()
355
+
356
+ print()
357
+ print("+---------------------------------+")
358
+ print("| Press Enter to stop |")
359
+ print("+---------------------------------+")
360
+ print()
361
+
362
+ elapsed = 0.0
363
+ seconds_per_chunk = float(chunk) / float(SAMPLE_RATE)
364
+ raw_frames = []
365
+ while elapsed < length or length == -1:
366
+ raw_frames.append(stream.read(num_frames=chunk, exception_on_overflow=False))
367
+ elapsed += seconds_per_chunk
368
+ if select(
369
+ [
370
+ stdin,
371
+ ],
372
+ [],
373
+ [],
374
+ 0,
375
+ )[0]:
376
+ stdin.read(1)
377
+ length = elapsed
378
+
379
+ stream.stop_stream()
380
+ stream.close()
381
+ p.terminate()
382
+ frames = np.frombuffer(b"".join(raw_frames), dtype=np.float32)
383
+ return frames
384
+
385
+
386
+ def get_frames_from_file(input_name: str, length: float) -> AudioT:
387
+ from sonusai import logger
388
+ from sonusai.constants import SAMPLE_RATE
389
+ from sonusai.mixture import read_audio
390
+
391
+ logger.info(f"Capturing from {input_name}")
392
+ frames = read_audio(input_name)
393
+ if length != -1:
394
+ num_frames = int(length * SAMPLE_RATE)
395
+ if len(frames) > num_frames:
396
+ frames = frames[:num_frames]
397
+ return frames
398
+
399
+
400
+ def sov2nov(feature: np.ndarray, step: int) -> np.ndarray:
401
+ """Convert stride-overlap batch x stride x bins to no overlap frames x 1 x bins"""
402
+
403
+ stride = feature.shape[1] # stride, tsteps is set to stride in sov mode
404
+ if stride == 1:
405
+ return feature # no reshape if stride is already 1
406
+ # else:
407
+ # hs = feature.shape[1]//2 # half of stride
408
+ # nb = feature.shape[0] # batches
409
+
410
+ nb = feature.shape[0]
411
+ fout = feature[:, (stride - step) :, :] # take last
412
+ fout = np.reshape(fout, [step * nb, 1, feature.shape[2]])
413
+ return fout # np.transpose(fout,[1,0,2])
414
+
415
+
416
+ def save_figure(name: str, audio: np.ndarray, feature: np.ndarray) -> None:
417
+ import matplotlib.pyplot as plt
418
+ from scipy.interpolate import CubicSpline
419
+
420
+ from sonusai.constants import SAMPLE_RATE
421
+ from sonusai.utils import unstack_complex
422
+
423
+ spectrum = 20 * np.log(np.abs(np.squeeze(unstack_complex(feature)).transpose()) + 1e-7)
424
+ frames = spectrum.shape[1]
425
+ samples = (len(audio) // frames) * frames
426
+ length_in_s = samples / SAMPLE_RATE
427
+ interp = samples // frames
428
+
429
+ ts = np.arange(0.0, length_in_s, interp / SAMPLE_RATE)
430
+ t = np.arange(0.0, length_in_s, 1 / SAMPLE_RATE)
431
+
432
+ spectrum = CubicSpline(ts, spectrum, axis=-1)(t)
433
+
434
+ fig, (ax1, ax2) = plt.subplots(nrows=2)
435
+ ax1.set_title(name)
436
+ ax1.plot(t, audio[:samples])
437
+ ax1.set_ylabel("Signal")
438
+ ax1.set_xlim(0, length_in_s)
439
+ ax1.set_ylim(-1, 1)
440
+
441
+ ax2.imshow(spectrum, origin="lower", aspect="auto")
442
+ ax2.set_xticks([])
443
+ ax2.set_ylabel("Feature")
444
+
445
+ plt.savefig(name, dpi=300)
446
+
447
+
448
+ if __name__ == "__main__":
449
+ from sonusai import exception_handler
450
+ from sonusai.utils import register_keyboard_interrupt
451
+
452
+ register_keyboard_interrupt()
453
+ try:
454
+ main()
455
+ except Exception as e:
456
+ exception_handler(e)
457
+
458
+
459
+ # import subprocess
460
+ #
461
+ # # Define the arguments
462
+ # arg1 = "value1"
463
+ # arg2 = "value2"
464
+ #
465
+ # # Construct the command
466
+ # command = ["python", "script.py", arg1, arg2]
467
+ #
468
+ # # Start the process
469
+ # process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
470
+ #
471
+ # # Optionally, you can communicate with the process later if needed
472
+ # # For example, to wait for the process to finish and get the output
473
+ # stdout, stderr = process.communicate()
474
+ #
475
+ # # Check if the process was successful
476
+ # if process.returncode == 0:
477
+ # print("Process executed successfully:")
478
+ # print(stdout)
479
+ # else:
480
+ # print("Process failed:")
481
+ # print(stderr)