sonusai 0.20.3__py3-none-any.whl → 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. sonusai/__init__.py +16 -3
  2. sonusai/audiofe.py +241 -77
  3. sonusai/calc_metric_spenh.py +71 -73
  4. sonusai/config/__init__.py +3 -0
  5. sonusai/config/config.py +61 -0
  6. sonusai/config/config.yml +20 -0
  7. sonusai/config/constants.py +8 -0
  8. sonusai/constants.py +11 -0
  9. sonusai/data/genmixdb.yml +21 -36
  10. sonusai/{mixture/datatypes.py → datatypes.py} +91 -130
  11. sonusai/deprecated/plot.py +4 -5
  12. sonusai/doc/doc.py +4 -4
  13. sonusai/doc.py +11 -4
  14. sonusai/genft.py +43 -45
  15. sonusai/genmetrics.py +25 -19
  16. sonusai/genmix.py +54 -82
  17. sonusai/genmixdb.py +88 -264
  18. sonusai/ir_metric.py +30 -34
  19. sonusai/lsdb.py +41 -48
  20. sonusai/main.py +15 -22
  21. sonusai/metrics/calc_audio_stats.py +4 -293
  22. sonusai/metrics/calc_class_weights.py +4 -4
  23. sonusai/metrics/calc_optimal_thresholds.py +8 -5
  24. sonusai/metrics/calc_pesq.py +2 -2
  25. sonusai/metrics/calc_segsnr_f.py +4 -4
  26. sonusai/metrics/calc_speech.py +25 -13
  27. sonusai/metrics/class_summary.py +7 -7
  28. sonusai/metrics/confusion_matrix_summary.py +5 -5
  29. sonusai/metrics/one_hot.py +4 -4
  30. sonusai/metrics/snr_summary.py +7 -7
  31. sonusai/metrics_summary.py +38 -45
  32. sonusai/mixture/__init__.py +4 -104
  33. sonusai/mixture/audio.py +10 -39
  34. sonusai/mixture/class_balancing.py +103 -0
  35. sonusai/mixture/config.py +251 -271
  36. sonusai/mixture/constants.py +35 -39
  37. sonusai/mixture/data_io.py +25 -36
  38. sonusai/mixture/db_datatypes.py +58 -22
  39. sonusai/mixture/effects.py +386 -0
  40. sonusai/mixture/feature.py +7 -11
  41. sonusai/mixture/generation.py +478 -628
  42. sonusai/mixture/helpers.py +82 -184
  43. sonusai/mixture/ir_delay.py +3 -4
  44. sonusai/mixture/ir_effects.py +77 -0
  45. sonusai/mixture/log_duration_and_sizes.py +6 -12
  46. sonusai/mixture/mixdb.py +910 -729
  47. sonusai/mixture/pad_audio.py +35 -0
  48. sonusai/mixture/resample.py +7 -0
  49. sonusai/mixture/sox_effects.py +195 -0
  50. sonusai/mixture/sox_help.py +650 -0
  51. sonusai/mixture/spectral_mask.py +2 -2
  52. sonusai/mixture/truth.py +17 -15
  53. sonusai/mixture/truth_functions/crm.py +12 -12
  54. sonusai/mixture/truth_functions/energy.py +22 -22
  55. sonusai/mixture/truth_functions/file.py +5 -5
  56. sonusai/mixture/truth_functions/metadata.py +4 -4
  57. sonusai/mixture/truth_functions/metrics.py +4 -4
  58. sonusai/mixture/truth_functions/phoneme.py +3 -3
  59. sonusai/mixture/truth_functions/sed.py +11 -13
  60. sonusai/mixture/truth_functions/target.py +10 -10
  61. sonusai/mkwav.py +26 -29
  62. sonusai/onnx_predict.py +240 -88
  63. sonusai/queries/__init__.py +2 -2
  64. sonusai/queries/queries.py +38 -34
  65. sonusai/speech/librispeech.py +1 -1
  66. sonusai/speech/mcgill.py +1 -1
  67. sonusai/speech/timit.py +2 -2
  68. sonusai/summarize_metric_spenh.py +10 -17
  69. sonusai/utils/__init__.py +7 -1
  70. sonusai/utils/asl_p56.py +2 -2
  71. sonusai/utils/asr.py +2 -2
  72. sonusai/utils/asr_functions/aaware_whisper.py +4 -5
  73. sonusai/utils/choice.py +31 -0
  74. sonusai/utils/compress.py +1 -1
  75. sonusai/utils/dataclass_from_dict.py +19 -1
  76. sonusai/utils/energy_f.py +3 -3
  77. sonusai/utils/evaluate_random_rule.py +15 -0
  78. sonusai/utils/keyboard_interrupt.py +12 -0
  79. sonusai/utils/onnx_utils.py +3 -17
  80. sonusai/utils/print_mixture_details.py +21 -19
  81. sonusai/utils/{temp_seed.py → rand.py} +3 -3
  82. sonusai/utils/read_predict_data.py +2 -2
  83. sonusai/utils/reshape.py +3 -3
  84. sonusai/utils/stratified_shuffle_split.py +3 -3
  85. sonusai/{mixture → utils}/tokenized_shell_vars.py +1 -1
  86. sonusai/utils/write_audio.py +2 -2
  87. sonusai/vars.py +11 -4
  88. {sonusai-0.20.3.dist-info → sonusai-1.0.2.dist-info}/METADATA +4 -2
  89. sonusai-1.0.2.dist-info/RECORD +138 -0
  90. sonusai/mixture/augmentation.py +0 -444
  91. sonusai/mixture/class_count.py +0 -15
  92. sonusai/mixture/eq_rule_is_valid.py +0 -45
  93. sonusai/mixture/target_class_balancing.py +0 -107
  94. sonusai/mixture/targets.py +0 -175
  95. sonusai-0.20.3.dist-info/RECORD +0 -128
  96. {sonusai-0.20.3.dist-info → sonusai-1.0.2.dist-info}/WHEEL +0 -0
  97. {sonusai-0.20.3.dist-info → sonusai-1.0.2.dist-info}/entry_points.txt +0 -0
sonusai/genmixdb.py CHANGED
@@ -5,7 +5,7 @@ usage: genmixdb [-hvmdjn] LOC
5
5
  options:
6
6
  -h, --help
7
7
  -v, --verbose Be verbose.
8
- -m, --mix ave mixture data. [default: False].
8
+ -m, --mix Save mixture data. [default: False].
9
9
  -d, --dryrun Perform a dry run showing the processed config. [default: False].
10
10
  -j, --json Save JSON version of database. [default: False].
11
11
  -n, --nopar Do not run in parallel. [default: False].
@@ -16,120 +16,14 @@ genmixdb creates a database of training and evaluation feature and truth data ge
16
16
  choice of audio neural-network feature types that are supported by the Aaware real-time front-end and truth data that is
17
17
  synchronized frame-by-frame with the feature data.
18
18
 
19
- Here are some examples:
20
-
21
- #### Adding target data
22
- Suppose you have an audio file which is an example, or target, of what you want to recognize or detect. Of course, for
23
- training a NN you also need truth data for that file (also called parameters/labels/classes). If you don't already have
24
- it, genmixdb can create truth using a variety of generation functions on each frame of the feature data. You can also
25
- select different feature types. Here's an example:
26
-
27
- genmixdb target_gfr32ts2
28
-
29
- where target_gfr32ts2 contains config.yml with the following inside:
30
- ---
31
- feature: gfr32ts2
32
-
33
- targets:
34
- - name: data/target.wav
35
-
36
- target_augmentations:
37
- - normalize: -3.5
38
- ...
39
-
40
- The mixture database is written to a SQLite file (mixdb.db) in the same directory that contains the config.yml file.
41
-
42
- #### Target data mix with noise and augmentation
43
-
44
- genmixdb mix_gfr32ts2.yml
45
-
46
- where mix_gfr32ts2.yml contains:
47
- ---
48
- feature: gfr32ts2
49
-
50
- targets:
51
- - name: data/target.wav
52
-
53
- target_augmentations:
54
- - normalize: -3.5
55
- pitch: [-3, 0, 3]
56
- tempo: [0.8, 1, 1.2]
57
-
58
- noises:
59
- - name: data/noise.wav
60
-
61
- noise_augmentations:
62
- - normalize: -3.5
63
-
64
- snrs:
65
- - 20
66
- ...
67
-
68
- In this example a time-domain mixture is created and feature data is calculated as specified by 'feature: gfr32ts2'.
69
- Various feature types are available which vary in spectral and temporal resolution (4 ms or higher), and other feature
70
- algorithm parameters. The total feature size, dimension, and #frames for mixture is reported in the log file (the log
71
- file name is genmixdb.log).
72
-
73
- Truth (parameters/labels/classes) can be automatically created per feature output frame based on a variety of truth
74
- generation functions. By default, these are included with the feature data in a single HDF5 output file. By default,
75
- truth generation is turned on with default settings (see truth section) and a single class, i.e., detecting a single
76
- type of sound. The truth format is a single float per class representing the probability of activity/presence, and
77
- multi-class truth is possible by specifying the number of classes and either a scalar index or a vector of indices in
78
- which to put the truth result. For example, 'num_class: 3' and 'class_indices: [ 2 ]' adds a 1x3 vector to the feature
79
- data with truth put in index 2 (others would be 0) for data/target.wav being an audio clip from sound type of class 2.
80
-
81
- The mixture is created with potential data augmentation functions in the following way:
82
- 1. apply noise augmentation rule
83
- 2. apply target augmentation rule to each target in the mixture (multiple targets may be used in mixup)
84
- 3. adjust noise and target gains for specified SNR
85
- 4. add augmented noise to augmented target(s)
86
-
87
- Note: If an impulse response is part of the target augmentation, truth generation is performed on the targets before
88
- applying the IRs. In this way, the truth is not impacted by the IR.
89
-
90
- The mixture length is the length of the longest target in the mixture, and the noise signal is repeated if it is
91
- shorter, or trimmed if longer.
92
-
93
- #### Target and noise using path lists
94
-
95
- Target and noise audio is specified as a list containing text files, audio files, and file globs. Text files are
96
- processed with items on each line where each item can be a text file, an audio file, or a file glob. Each item will be
97
- searched for audio files which can be WAV, MP3, FLAC, AIFF, or OGG format with any sample rate, bit depth, or channel
98
- count. All audio files will be converted to 16 kHz, float32, single channel (only the first channel is used) format
99
- before processing.
100
-
101
- For example,
102
-
103
- genmixdb dog-bark.yml
104
-
105
- where dog-bark.yml contains:
106
- ---
107
- targets:
108
- - name: slib/dog-outside/*.wav
109
- - name: slib/dog-inside/*.wav
110
-
111
- will find all .wav files in the specified directories and process them as targets.
19
+ For details, see sonusai doc.
112
20
 
113
21
  """
114
22
 
115
- import signal
116
-
117
- from sonusai.mixture import Mixture
23
+ from sonusai.datatypes import Mixture
118
24
  from sonusai.mixture import MixtureDatabase
119
25
 
120
26
 
121
- def signal_handler(_sig, _frame):
122
- import sys
123
-
124
- from sonusai import logger
125
-
126
- logger.info("Canceled due to keyboard interrupt")
127
- sys.exit(1)
128
-
129
-
130
- signal.signal(signal.SIGINT, signal_handler)
131
-
132
-
133
27
  def genmixdb(
134
28
  location: str,
135
29
  save_mix: bool = False,
@@ -142,23 +36,17 @@ def genmixdb(
142
36
  from functools import partial
143
37
  from random import seed
144
38
 
39
+ import pandas as pd
145
40
  import yaml
146
41
 
147
42
  from sonusai import logger
148
- from sonusai.mixture import SAMPLE_BYTES
149
- from sonusai.mixture import SAMPLE_RATE
150
- from sonusai.mixture import AugmentationRule
43
+ from sonusai.constants import SAMPLE_BYTES
44
+ from sonusai.constants import SAMPLE_RATE
151
45
  from sonusai.mixture import MixtureDatabase
152
- from sonusai.mixture import balance_targets
153
46
  from sonusai.mixture import generate_mixtures
154
- from sonusai.mixture import get_all_snrs_from_config
155
- from sonusai.mixture import get_augmentation_rules
156
- from sonusai.mixture import get_augmented_targets
157
- from sonusai.mixture import get_impulse_response_files
158
- from sonusai.mixture import get_mixups
159
- from sonusai.mixture import get_noise_files
160
- from sonusai.mixture import get_target_augmentations_for_mixup
161
- from sonusai.mixture import get_target_files
47
+ from sonusai.mixture import get_effect_rules
48
+ from sonusai.mixture import get_ir_files
49
+ from sonusai.mixture import get_source_files
162
50
  from sonusai.mixture import initialize_db
163
51
  from sonusai.mixture import load_config
164
52
  from sonusai.mixture import log_duration_and_sizes
@@ -166,22 +54,20 @@ def genmixdb(
166
54
  from sonusai.mixture import populate_class_weights_threshold_table
167
55
  from sonusai.mixture import populate_impulse_response_file_table
168
56
  from sonusai.mixture import populate_mixture_table
169
- from sonusai.mixture import populate_noise_file_table
57
+ from sonusai.mixture import populate_source_file_table
170
58
  from sonusai.mixture import populate_spectral_mask_table
171
- from sonusai.mixture import populate_target_file_table
172
59
  from sonusai.mixture import populate_top_table
173
60
  from sonusai.mixture import populate_truth_parameters_table
174
61
  from sonusai.mixture import update_mixid_width
175
- from sonusai.utils import dataclass_from_dict
176
62
  from sonusai.utils import human_readable_size
177
63
  from sonusai.utils import par_track
178
64
  from sonusai.utils import seconds_to_hms
179
65
  from sonusai.utils import track
180
66
 
181
67
  config = load_config(location)
182
- initialize_db(location=location, test=test)
68
+ initialize_db(location, test)
183
69
 
184
- mixdb = MixtureDatabase(location=location, test=test)
70
+ mixdb = MixtureDatabase(location, test)
185
71
 
186
72
  populate_top_table(location, config, test)
187
73
  populate_class_label_table(location, config, test)
@@ -197,148 +83,94 @@ def genmixdb(
197
83
  logger.debug(yaml.dump(config))
198
84
 
199
85
  if logging:
200
- logger.info("Collecting targets")
86
+ logger.info("Collecting sources")
201
87
 
202
- target_files = get_target_files(config, show_progress=show_progress)
203
-
204
- if len(target_files) == 0:
205
- raise RuntimeError("Canceled due to no targets")
88
+ source_files = get_source_files(config, show_progress)
89
+ logger.info("")
206
90
 
207
- populate_target_file_table(location, target_files, test)
91
+ if len([file for file in source_files if file.category == "primary"]) == 0:
92
+ raise RuntimeError("Canceled due to no primary sources")
208
93
 
209
- if logging:
210
- logger.debug("List of targets:")
211
- logger.debug(yaml.dump([target.name for target in mixdb.target_files], default_flow_style=False))
212
- logger.debug("")
94
+ populate_source_file_table(location, source_files, test)
213
95
 
214
96
  if logging:
215
- logger.info("Collecting noises")
216
-
217
- noise_files = get_noise_files(config, show_progress=show_progress)
218
-
219
- populate_noise_file_table(location, noise_files, test)
97
+ logger.info("Sources summary")
98
+ data = {
99
+ "category": [],
100
+ "files": [],
101
+ "size": [],
102
+ "duration": [],
103
+ }
104
+ for category, source_files in mixdb.source_files.items():
105
+ audio_samples = sum([source.samples for source in source_files])
106
+ audio_duration = audio_samples / SAMPLE_RATE
107
+ data["category"].append(category)
108
+ data["files"].append(mixdb.num_source_files(category))
109
+ data["size"].append(human_readable_size(audio_samples * SAMPLE_BYTES, 1))
110
+ data["duration"].append(seconds_to_hms(seconds=audio_duration))
111
+
112
+ df = pd.DataFrame(data)
113
+ logger.info(df.to_string(index=False, header=False))
114
+ logger.info("")
220
115
 
221
- if logging:
222
- logger.debug("List of noises:")
223
- logger.debug(yaml.dump([noise.name for noise in mixdb.noise_files], default_flow_style=False))
224
- logger.debug("")
116
+ for category, files in mixdb.source_files.items():
117
+ logger.debug(f"List of {category} sources:")
118
+ logger.debug(yaml.dump([file.name for file in files], default_flow_style=False))
225
119
 
226
120
  if logging:
227
121
  logger.info("Collecting impulse responses")
228
122
 
229
- impulse_response_files = get_impulse_response_files(config)
123
+ ir_files = get_ir_files(config, show_progress=show_progress)
124
+ logger.info("")
230
125
 
231
- populate_impulse_response_file_table(location, impulse_response_files, test)
126
+ populate_impulse_response_file_table(location, ir_files, test)
232
127
 
233
128
  if logging:
234
129
  logger.debug("List of impulse responses:")
235
- logger.debug(
236
- yaml.dump(
237
- [entry.file for entry in mixdb.impulse_response_files],
238
- default_flow_style=False,
239
- )
240
- )
130
+ for idx, file in enumerate(ir_files):
131
+ logger.debug(f"id: {idx}, name:{file.name}, delay: {file.delay}, tags: [{', '.join(file.tags)}]")
241
132
  logger.debug("")
242
133
 
243
134
  if logging:
244
- logger.info("Collecting target augmentations")
245
-
246
- target_augmentations = get_augmentation_rules(
247
- rules=config["target_augmentations"], num_ir=mixdb.num_impulse_response_files
248
- )
249
- mixups = get_mixups(target_augmentations)
135
+ logger.info("Collecting effects")
250
136
 
251
- if logging:
252
- for mixup in mixups:
253
- logger.debug(f"Expanded list of target augmentation rules for mixup of {mixup}:")
254
- for target_augmentation in get_target_augmentations_for_mixup(target_augmentations, mixup):
255
- ta_dict = target_augmentation.to_dict()
256
- del ta_dict["mixup"]
257
- logger.debug(f"- {ta_dict}")
258
- logger.debug("")
137
+ rules = get_effect_rules(location, config, test)
259
138
 
260
139
  if logging:
261
- logger.info("Collecting noise augmentations")
262
-
263
- noise_augmentations = get_augmentation_rules(
264
- rules=config["noise_augmentations"], num_ir=mixdb.num_impulse_response_files
265
- )
140
+ logger.info("")
141
+ for category, effect in rules.items():
142
+ logger.debug(f"List of {category} rules:")
143
+ logger.debug(yaml.dump([entry.to_dict() for entry in effect], default_flow_style=False))
266
144
 
267
145
  if logging:
268
- logger.debug("Expanded list of noise augmentations:")
269
- for noise_augmentation in noise_augmentations:
270
- na_dict = noise_augmentation.to_dict()
271
- del na_dict["mixup"]
272
- logger.debug(f"- {na_dict}")
146
+ logger.debug("SNRS:")
147
+ for category, source in config["sources"].items():
148
+ if category != "primary":
149
+ logger.debug(f" {category}")
150
+ for snr in source["snrs"]:
151
+ logger.debug(f" - {snr}")
152
+ logger.debug("")
153
+ logger.debug("Mix Rules:")
154
+ for category, source in config["sources"].items():
155
+ if category != "primary":
156
+ logger.debug(f" {category}")
157
+ for mix_rule in source["mix_rules"]:
158
+ logger.debug(f" - {mix_rule}")
273
159
  logger.debug("")
274
-
275
- if logging:
276
- logger.debug(f"SNRs: {config['snrs']}\n")
277
- logger.debug(f"Random SNRs: {config['random_snrs']}\n")
278
- logger.debug(f"Noise mix mode: {mixdb.noise_mix_mode}\n")
279
160
  logger.debug("Spectral masks:")
280
161
  for spectral_mask in mixdb.spectral_masks:
281
162
  logger.debug(f"- {spectral_mask}")
282
163
  logger.debug("")
283
164
 
284
- if logging:
285
- logger.info("Collecting augmented targets")
286
-
287
- augmented_targets = get_augmented_targets(target_files, target_augmentations, mixups)
288
-
289
- if config["class_balancing"]:
290
- class_balancing_augmentation = dataclass_from_dict(AugmentationRule, config["class_balancing_augmentation"])
291
- augmented_targets, target_augmentations = balance_targets(
292
- augmented_targets=augmented_targets,
293
- targets=target_files,
294
- target_augmentations=target_augmentations,
295
- class_balancing_augmentation=class_balancing_augmentation, # pyright: ignore [reportArgumentType]
296
- num_classes=mixdb.num_classes,
297
- num_ir=mixdb.num_impulse_response_files,
298
- mixups=mixups,
299
- )
300
-
301
- target_audio_samples = sum([targets.samples for targets in mixdb.target_files])
302
- target_audio_duration = target_audio_samples / SAMPLE_RATE
303
- noise_audio_duration = sum([noises.duration for noises in mixdb.noise_files])
304
- noise_audio_samples = noise_audio_duration * SAMPLE_RATE
305
-
306
- if logging:
307
- logger.info("")
308
- logger.info(
309
- f"Target audio: {mixdb.num_target_files} files, "
310
- f"{human_readable_size(target_audio_samples * SAMPLE_BYTES, 1)}, "
311
- f"{seconds_to_hms(seconds=target_audio_duration)}"
312
- )
313
- logger.info(
314
- f"Noise audio: {mixdb.num_noise_files} files, "
315
- f"{human_readable_size(noise_audio_samples * SAMPLE_BYTES, 1)}, "
316
- f"{seconds_to_hms(seconds=noise_audio_duration)}"
317
- )
318
-
319
165
  if logging:
320
166
  logger.info("Generating mixtures")
321
167
 
322
- used_noise_files, used_noise_samples, mixtures = generate_mixtures(
323
- noise_mix_mode=mixdb.noise_mix_mode,
324
- augmented_targets=augmented_targets,
325
- target_files=target_files,
326
- target_augmentations=target_augmentations,
327
- noise_files=noise_files,
328
- noise_augmentations=noise_augmentations,
329
- spectral_masks=mixdb.spectral_masks,
330
- all_snrs=get_all_snrs_from_config(config),
331
- mixups=mixups,
332
- num_classes=mixdb.num_classes,
333
- feature_step_samples=mixdb.feature_step_samples,
334
- num_ir=mixdb.num_impulse_response_files,
335
- )
168
+ mixtures = generate_mixtures(location, config, rules, test)
336
169
 
337
170
  num_mixtures = len(mixtures)
338
171
  update_mixid_width(location, num_mixtures, test)
339
172
 
340
173
  if logging:
341
- logger.info("")
342
174
  logger.info(f"Found {num_mixtures:,} mixtures to process")
343
175
 
344
176
  total_duration = float(sum([mixture.samples for mixture in mixtures])) / SAMPLE_RATE
@@ -346,7 +178,6 @@ def genmixdb(
346
178
  if logging:
347
179
  log_duration_and_sizes(
348
180
  total_duration=total_duration,
349
- num_classes=mixdb.num_classes,
350
181
  feature_step_samples=mixdb.feature_step_samples,
351
182
  feature_parameters=mixdb.feature_parameters,
352
183
  stride=mixdb.fg_stride,
@@ -386,27 +217,17 @@ def genmixdb(
386
217
  show_progress=show_progress,
387
218
  )
388
219
 
389
- total_noise_files = len(noise_files)
390
-
391
- total_samples = mixdb.total_samples()
392
- total_duration = float(total_samples / SAMPLE_RATE)
393
-
394
- noise_files_percent = (float(used_noise_files) / float(total_noise_files)) * 100
395
- noise_samples_percent = (float(used_noise_samples) / float(noise_audio_samples)) * 100
220
+ total_duration = float(mixdb.total_samples() / SAMPLE_RATE)
396
221
 
397
222
  if logging:
398
223
  log_duration_and_sizes(
399
224
  total_duration=total_duration,
400
- num_classes=mixdb.num_classes,
401
225
  feature_step_samples=mixdb.feature_step_samples,
402
226
  feature_parameters=mixdb.feature_parameters,
403
227
  stride=mixdb.fg_stride,
404
228
  desc="Actual",
405
229
  )
406
230
  logger.info("")
407
- logger.info(f"Used {noise_files_percent:,.0f}% of noise files")
408
- logger.info(f"Used {noise_samples_percent:,.0f}% of noise audio")
409
- logger.info("")
410
231
 
411
232
  if not test and save_json:
412
233
  if logging:
@@ -434,12 +255,12 @@ def _process_mixture(
434
255
 
435
256
  if save_mix:
436
257
  write(
437
- items=[
438
- ("targets", genmix_data.targets),
439
- ("target", genmix_data.target),
440
- ("noise", genmix_data.noise),
441
- ("mixture", genmix_data.mixture),
442
- ]
258
+ items={
259
+ "sources": genmix_data.sources,
260
+ "source": genmix_data.source,
261
+ "noise": genmix_data.noise,
262
+ "mixture": genmix_data.mixture,
263
+ }
443
264
  )
444
265
 
445
266
  write_mixture_metadata(mixdb, mixture=mixture)
@@ -450,10 +271,10 @@ def _process_mixture(
450
271
  def main() -> None:
451
272
  from docopt import docopt
452
273
 
453
- import sonusai
274
+ from sonusai import __version__ as sai_version
454
275
  from sonusai.utils import trim_docstring
455
276
 
456
- args = docopt(trim_docstring(__doc__), version=sonusai.__version__, options_first=True)
277
+ args = docopt(trim_docstring(__doc__), version=sai_version, options_first=True)
457
278
 
458
279
  import time
459
280
  from os import makedirs
@@ -498,17 +319,13 @@ def main() -> None:
498
319
  logger.info(f"Creating mixture database for {location}")
499
320
  logger.info("")
500
321
 
501
- try:
502
- genmixdb(
503
- location=location,
504
- save_mix=save_mix,
505
- show_progress=True,
506
- save_json=save_json,
507
- no_par=no_par,
508
- )
509
- except Exception as e:
510
- logger.debug(e)
511
- raise
322
+ genmixdb(
323
+ location=location,
324
+ save_mix=save_mix,
325
+ show_progress=True,
326
+ save_json=save_json,
327
+ no_par=no_par,
328
+ )
512
329
 
513
330
  end_time = time.monotonic()
514
331
  logger.info(f"Completed in {seconds_to_hms(seconds=end_time - start_time)}")
@@ -516,4 +333,11 @@ def main() -> None:
516
333
 
517
334
 
518
335
  if __name__ == "__main__":
519
- main()
336
+ from sonusai import exception_handler
337
+ from sonusai.utils import register_keyboard_interrupt
338
+
339
+ register_keyboard_interrupt()
340
+ try:
341
+ main()
342
+ except Exception as e:
343
+ exception_handler(e)
sonusai/ir_metric.py CHANGED
@@ -1,10 +1,9 @@
1
1
  """sonusai ir_metric
2
2
 
3
- usage: ir_metric [-hv] [-n NCPU] IRLOC
3
+ usage: ir_metric [-h] [-n NCPU] IRLOC
4
4
 
5
5
  options:
6
6
  -h, --help
7
- -v, --verbose Be verbose.
8
7
  -n, --num_process NCPU Number of parallel processes to use [default: auto]
9
8
 
10
9
  Calculate delay and gain metrics of impulse response (IR) files <filename>.wav in IRLOC.
@@ -22,7 +21,6 @@ IRLOC directory containing impulse response data in audio files (.wav, .flac, e
22
21
  """
23
22
 
24
23
  import glob
25
- import signal
26
24
  from os.path import abspath
27
25
  from os.path import basename
28
26
  from os.path import commonprefix
@@ -42,18 +40,6 @@ from numpy import fft
42
40
  from sonusai.utils import braced_iglob
43
41
 
44
42
 
45
- def signal_handler(_sig, _frame):
46
- import sys
47
-
48
- from sonusai import logger
49
-
50
- logger.info("Canceled due to keyboard interrupt")
51
- sys.exit(1)
52
-
53
-
54
- signal.signal(signal.SIGINT, signal_handler)
55
-
56
-
57
43
  def tdoa(signal, reference, interp=1, phat=False, fs=1, t_max=None):
58
44
  """
59
45
  Estimates the shift of array signal with respect to reference
@@ -263,7 +249,7 @@ def measure_rt60(h, fs=1, decay_db=60, energy_thres=1.0, plot=False, rt60_tgt=No
263
249
  t60_decay = 3 * i_decay20db / fs
264
250
  rt60 = t60_decay - t_5db
265
251
 
266
- # # extropolate to compute the rt60 decay time from decay_db decay time
252
+ # # extrapolate to compute the rt60 decay time from decay_db decay time
267
253
  # decay_time = t_decay - t_5db
268
254
  # est_rt60 = (60 / decay_db) * decay_time
269
255
 
@@ -297,27 +283,30 @@ def measure_rt60(h, fs=1, decay_db=60, energy_thres=1.0, plot=False, rt60_tgt=No
297
283
  return rt60, edt, rt10, rt20, floor
298
284
 
299
285
 
300
- def process_path(path, extlist=[".wav", ".WAV", ".flac", ".FLAC", ".mp3", ".aac"]):
286
+ def process_path(path: str, extensions: list[str] | None = None) -> tuple[list, str | None]:
301
287
  """
302
288
  Check path which can be a single file, a subdirectory, or a regex
303
289
  return:
304
290
  - a list of files with matching extensions to any in extlist provided (i.e. ['.wav', '.mp3', '.acc'])
305
291
  - the basedir of the path, if
306
292
  """
293
+ if extensions is None:
294
+ extensions = [".wav", ".WAV", ".flac", ".FLAC", ".mp3", ".aac"]
295
+
307
296
  # Check if the path is a single file, and return it as a list with the dirname
308
297
  if isfile(path):
309
- if any(path.endswith(ext) for ext in extlist):
298
+ if any(path.endswith(ext) for ext in extensions):
310
299
  basedir = dirname(path) # base directory
311
300
  if not basedir:
312
301
  basedir = "./"
313
302
  return [path], basedir
314
- else:
315
- return [], []
303
+
304
+ return [], None
316
305
 
317
306
  # Check if the path is a dir, recursively find all files any of the specified extensions, return file list and dir
318
307
  if isdir(path):
319
308
  matching_files = []
320
- for ext in extlist:
309
+ for ext in extensions:
321
310
  matching_files.extend(glob.glob(join(path, "**/*" + ext), recursive=True))
322
311
  return matching_files, path
323
312
 
@@ -326,11 +315,12 @@ def process_path(path, extlist=[".wav", ".WAV", ".flac", ".FLAC", ".mp3", ".aac"
326
315
  matching_files = []
327
316
  for file in braced_iglob(pathname=apath, recursive=True):
328
317
  matching_files.append(file)
318
+
329
319
  if matching_files:
330
320
  basedir = commonprefix(matching_files) # Find basedir
331
321
  return matching_files, basedir
332
- else:
333
- return [], []
322
+
323
+ return [], None
334
324
 
335
325
 
336
326
  def _process_ir(pfile: str, irtab_col: list, basedir: str) -> pd.DataFrame:
@@ -424,20 +414,19 @@ def _process_ir(pfile: str, irtab_col: list, basedir: str) -> pd.DataFrame:
424
414
  def main():
425
415
  from docopt import docopt
426
416
 
427
- import sonusai
428
- from sonusai.utils import trim_docstring
417
+ from . import __version__ as sai_version
418
+ from .utils.docstring import trim_docstring
429
419
 
430
- args = docopt(trim_docstring(__doc__), version=sonusai.__version__, options_first=True)
420
+ args = docopt(trim_docstring(__doc__), version=sai_version, options_first=True)
431
421
 
432
- verbose = args["--verbose"]
433
422
  ir_location = args["IRLOC"]
434
423
  num_proc = args["--num_process"]
435
424
 
436
425
  import psutil
437
426
 
438
- from sonusai.utils import create_timestamp
439
- from sonusai.utils import par_track
440
- from sonusai.utils import track
427
+ from .utils.create_timestamp import create_timestamp
428
+ from .utils.parallel import par_track
429
+ from .utils.parallel import track
441
430
 
442
431
  # Check location, default ext are ['.wav', '.WAV', '.flac', '.FLAC', '.mp3', '.aac']
443
432
  pfiles, basedir = process_path(ir_location)
@@ -446,15 +435,15 @@ def main():
446
435
  if pfiles is None or len(pfiles) < 1:
447
436
  print(f"No IR audio files found in {ir_location}, exiting ...")
448
437
  raise SystemExit(1)
449
- elif len(pfiles) == 1:
438
+ if len(pfiles) == 1:
450
439
  print(f"Found single IR audio file {ir_location} , writing to *-irmetric.txt ...")
451
440
  fbase, ext = splitext(basename(pfiles[0]))
452
441
  wlcsv_name = None
453
442
  txt_fname = str(join(basedir, fbase + "-irmetric.txt"))
454
- elif len(pfiles) > 1:
443
+ else:
455
444
  print(f"Found {len(pfiles)} files under {basedir} for impulse response metric calculations")
456
- txt_fname = str(join(basedir, "ir_metric_summary.txt"))
457
445
  wlcsv_name = str(join(basedir, "ir_metric_list.csv"))
446
+ txt_fname = str(join(basedir, "ir_metric_summary.txt"))
458
447
 
459
448
  num_cpu = psutil.cpu_count()
460
449
  cpu_percent = psutil.cpu_percent(interval=1)
@@ -552,4 +541,11 @@ def main():
552
541
 
553
542
 
554
543
  if __name__ == "__main__":
555
- main()
544
+ from sonusai import exception_handler
545
+ from sonusai.utils import register_keyboard_interrupt
546
+
547
+ register_keyboard_interrupt()
548
+ try:
549
+ main()
550
+ except Exception as e:
551
+ exception_handler(e)