sonusai 0.18.9__py3-none-any.whl → 0.19.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. sonusai/__init__.py +20 -29
  2. sonusai/aawscd_probwrite.py +18 -18
  3. sonusai/audiofe.py +93 -80
  4. sonusai/calc_metric_spenh.py +395 -321
  5. sonusai/data/genmixdb.yml +5 -11
  6. sonusai/{gentcst.py → deprecated/gentcst.py} +146 -149
  7. sonusai/{plot.py → deprecated/plot.py} +177 -131
  8. sonusai/{tplot.py → deprecated/tplot.py} +124 -102
  9. sonusai/doc/__init__.py +1 -1
  10. sonusai/doc/doc.py +112 -177
  11. sonusai/doc.py +10 -10
  12. sonusai/genft.py +93 -77
  13. sonusai/genmetrics.py +59 -46
  14. sonusai/genmix.py +116 -104
  15. sonusai/genmixdb.py +194 -153
  16. sonusai/lsdb.py +56 -66
  17. sonusai/main.py +23 -20
  18. sonusai/metrics/__init__.py +2 -0
  19. sonusai/metrics/calc_audio_stats.py +29 -24
  20. sonusai/metrics/calc_class_weights.py +7 -7
  21. sonusai/metrics/calc_optimal_thresholds.py +5 -7
  22. sonusai/metrics/calc_pcm.py +3 -3
  23. sonusai/metrics/calc_pesq.py +10 -7
  24. sonusai/metrics/calc_phase_distance.py +3 -3
  25. sonusai/metrics/calc_sa_sdr.py +10 -8
  26. sonusai/metrics/calc_segsnr_f.py +15 -17
  27. sonusai/metrics/calc_speech.py +105 -47
  28. sonusai/metrics/calc_wer.py +35 -32
  29. sonusai/metrics/calc_wsdr.py +10 -7
  30. sonusai/metrics/class_summary.py +30 -27
  31. sonusai/metrics/confusion_matrix_summary.py +25 -22
  32. sonusai/metrics/one_hot.py +91 -57
  33. sonusai/metrics/snr_summary.py +53 -46
  34. sonusai/mixture/__init__.py +19 -14
  35. sonusai/mixture/audio.py +4 -6
  36. sonusai/mixture/augmentation.py +37 -43
  37. sonusai/mixture/class_count.py +5 -14
  38. sonusai/mixture/config.py +292 -225
  39. sonusai/mixture/constants.py +41 -30
  40. sonusai/mixture/data_io.py +155 -0
  41. sonusai/mixture/datatypes.py +111 -108
  42. sonusai/mixture/db_datatypes.py +54 -70
  43. sonusai/mixture/eq_rule_is_valid.py +6 -9
  44. sonusai/mixture/feature.py +40 -38
  45. sonusai/mixture/generation.py +522 -389
  46. sonusai/mixture/helpers.py +217 -272
  47. sonusai/mixture/log_duration_and_sizes.py +16 -13
  48. sonusai/mixture/mixdb.py +669 -477
  49. sonusai/mixture/soundfile_audio.py +12 -17
  50. sonusai/mixture/sox_audio.py +91 -112
  51. sonusai/mixture/sox_augmentation.py +8 -9
  52. sonusai/mixture/spectral_mask.py +4 -6
  53. sonusai/mixture/target_class_balancing.py +41 -36
  54. sonusai/mixture/targets.py +69 -67
  55. sonusai/mixture/tokenized_shell_vars.py +23 -23
  56. sonusai/mixture/torchaudio_audio.py +14 -15
  57. sonusai/mixture/torchaudio_augmentation.py +23 -27
  58. sonusai/mixture/truth.py +48 -26
  59. sonusai/mixture/truth_functions/__init__.py +26 -0
  60. sonusai/mixture/truth_functions/crm.py +56 -38
  61. sonusai/mixture/truth_functions/datatypes.py +37 -0
  62. sonusai/mixture/truth_functions/energy.py +85 -59
  63. sonusai/mixture/truth_functions/file.py +30 -30
  64. sonusai/mixture/truth_functions/phoneme.py +14 -7
  65. sonusai/mixture/truth_functions/sed.py +71 -45
  66. sonusai/mixture/truth_functions/target.py +69 -106
  67. sonusai/mkwav.py +52 -85
  68. sonusai/onnx_predict.py +46 -43
  69. sonusai/queries/__init__.py +3 -1
  70. sonusai/queries/queries.py +100 -59
  71. sonusai/speech/__init__.py +2 -0
  72. sonusai/speech/l2arctic.py +24 -23
  73. sonusai/speech/librispeech.py +16 -17
  74. sonusai/speech/mcgill.py +22 -21
  75. sonusai/speech/textgrid.py +32 -25
  76. sonusai/speech/timit.py +45 -42
  77. sonusai/speech/vctk.py +14 -13
  78. sonusai/speech/voxceleb.py +26 -20
  79. sonusai/summarize_metric_spenh.py +11 -10
  80. sonusai/utils/__init__.py +4 -3
  81. sonusai/utils/asl_p56.py +1 -1
  82. sonusai/utils/asr.py +37 -17
  83. sonusai/utils/asr_functions/__init__.py +2 -0
  84. sonusai/utils/asr_functions/aaware_whisper.py +18 -12
  85. sonusai/utils/audio_devices.py +12 -12
  86. sonusai/utils/braced_glob.py +6 -8
  87. sonusai/utils/calculate_input_shape.py +1 -4
  88. sonusai/utils/compress.py +2 -2
  89. sonusai/utils/convert_string_to_number.py +1 -3
  90. sonusai/utils/create_timestamp.py +1 -1
  91. sonusai/utils/create_ts_name.py +2 -2
  92. sonusai/utils/dataclass_from_dict.py +1 -1
  93. sonusai/utils/docstring.py +6 -6
  94. sonusai/utils/energy_f.py +9 -7
  95. sonusai/utils/engineering_number.py +56 -54
  96. sonusai/utils/get_label_names.py +8 -10
  97. sonusai/utils/human_readable_size.py +2 -2
  98. sonusai/utils/model_utils.py +3 -5
  99. sonusai/utils/numeric_conversion.py +2 -4
  100. sonusai/utils/onnx_utils.py +43 -32
  101. sonusai/utils/parallel.py +40 -27
  102. sonusai/utils/print_mixture_details.py +25 -22
  103. sonusai/utils/ranges.py +12 -12
  104. sonusai/utils/read_predict_data.py +11 -9
  105. sonusai/utils/reshape.py +19 -26
  106. sonusai/utils/seconds_to_hms.py +1 -1
  107. sonusai/utils/stacked_complex.py +8 -16
  108. sonusai/utils/stratified_shuffle_split.py +29 -27
  109. sonusai/utils/write_audio.py +2 -2
  110. sonusai/utils/yes_or_no.py +3 -3
  111. sonusai/vars.py +14 -14
  112. {sonusai-0.18.9.dist-info → sonusai-0.19.5.dist-info}/METADATA +20 -21
  113. sonusai-0.19.5.dist-info/RECORD +125 -0
  114. {sonusai-0.18.9.dist-info → sonusai-0.19.5.dist-info}/WHEEL +1 -1
  115. sonusai/mixture/truth_functions/data.py +0 -58
  116. sonusai/utils/read_mixture_data.py +0 -14
  117. sonusai-0.18.9.dist-info/RECORD +0 -125
  118. {sonusai-0.18.9.dist-info → sonusai-0.19.5.dist-info}/entry_points.txt +0 -0
@@ -9,29 +9,28 @@ def _raw_read(name: str | Path) -> tuple[AudioT, int]:
9
9
  import soundfile
10
10
  from pydub import AudioSegment
11
11
 
12
- from sonusai import SonusAIError
13
12
  from .tokenized_shell_vars import tokenized_expand
14
13
 
15
14
  expanded_name, _ = tokenized_expand(name)
16
15
 
17
16
  try:
18
- if expanded_name.endswith('.mp3'):
17
+ if expanded_name.endswith(".mp3"):
19
18
  sound = AudioSegment.from_mp3(expanded_name)
20
19
  raw = np.array(sound.get_array_of_samples()).astype(np.float32).reshape((-1, sound.channels))
21
20
  raw = raw / 2 ** (sound.sample_width * 8 - 1)
22
21
  sample_rate = sound.frame_rate
23
- elif expanded_name.endswith('.m4a'):
22
+ elif expanded_name.endswith(".m4a"):
24
23
  sound = AudioSegment.from_file(expanded_name)
25
24
  raw = np.array(sound.get_array_of_samples()).astype(np.float32).reshape((-1, sound.channels))
26
25
  raw = raw / 2 ** (sound.sample_width * 8 - 1)
27
26
  sample_rate = sound.frame_rate
28
27
  else:
29
- raw, sample_rate = soundfile.read(expanded_name, always_2d=True, dtype='float32')
28
+ raw, sample_rate = soundfile.read(expanded_name, always_2d=True, dtype="float32")
30
29
  except Exception as e:
31
30
  if name != expanded_name:
32
- raise SonusAIError(f'Error reading {name} (expanded: {expanded_name}): {e}')
31
+ raise OSError(f"Error reading {name} (expanded: {expanded_name}): {e}") from e
33
32
  else:
34
- raise SonusAIError(f'Error reading {name}: {e}')
33
+ raise OSError(f"Error reading {name}: {e}") from e
35
34
 
36
35
  return np.squeeze(raw[:, 0]), sample_rate
37
36
 
@@ -45,24 +44,23 @@ def get_sample_rate(name: str | Path) -> int:
45
44
  import soundfile
46
45
  from pydub import AudioSegment
47
46
 
48
- from sonusai import SonusAIError
49
47
  from .tokenized_shell_vars import tokenized_expand
50
48
 
51
49
  expanded_name, _ = tokenized_expand(name)
52
50
 
53
51
  try:
54
- if expanded_name.endswith('.mp3'):
52
+ if expanded_name.endswith(".mp3"):
55
53
  return AudioSegment.from_mp3(expanded_name).frame_rate
56
54
 
57
- if expanded_name.endswith('.m4a'):
55
+ if expanded_name.endswith(".m4a"):
58
56
  return AudioSegment.from_file(expanded_name).frame_rate
59
57
 
60
58
  return soundfile.info(expanded_name).samplerate
61
59
  except Exception as e:
62
60
  if name != expanded_name:
63
- raise SonusAIError(f'Error reading {name} (expanded: {expanded_name}): {e}')
61
+ raise OSError(f"Error reading {name} (expanded: {expanded_name}): {e}") from e
64
62
  else:
65
- raise SonusAIError(f'Error reading {name}: {e}')
63
+ raise OSError(f"Error reading {name}: {e}") from e
66
64
 
67
65
 
68
66
  def read_ir(name: str | Path) -> ImpulseResponseData:
@@ -95,10 +93,7 @@ def read_audio(name: str | Path) -> AudioT:
95
93
  from .constants import SAMPLE_RATE
96
94
 
97
95
  out, sample_rate = _raw_read(name)
98
- out = librosa.resample(out,
99
- orig_sr=sample_rate,
100
- target_sr=SAMPLE_RATE,
101
- res_type='soxr_hq')
96
+ out = librosa.resample(out, orig_sr=sample_rate, target_sr=SAMPLE_RATE, res_type="soxr_hq")
102
97
 
103
98
  return out
104
99
 
@@ -119,11 +114,11 @@ def get_num_samples(name: str | Path) -> int:
119
114
 
120
115
  expanded_name, _ = tokenized_expand(name)
121
116
 
122
- if expanded_name.endswith('.mp3'):
117
+ if expanded_name.endswith(".mp3"):
123
118
  sound = AudioSegment.from_mp3(expanded_name)
124
119
  samples = sound.frame_count()
125
120
  sample_rate = sound.frame_rate
126
- elif expanded_name.endswith('.m4a'):
121
+ elif expanded_name.endswith(".m4a"):
127
122
  sound = AudioSegment.from_file(expanded_name)
128
123
  samples = sound.frame_count()
129
124
  sample_rate = sound.frame_rate
@@ -1,5 +1,4 @@
1
1
  from pathlib import Path
2
- from typing import Optional
3
2
 
4
3
  import numpy as np
5
4
  from sox import Transformer as SoxTransformer
@@ -16,7 +15,6 @@ def read_impulse_response(name: str | Path) -> ImpulseResponseData:
16
15
  """
17
16
  from scipy.io import wavfile
18
17
 
19
- from sonusai import SonusAIError
20
18
  from .datatypes import ImpulseResponseData
21
19
  from .tokenized_shell_vars import tokenized_expand
22
20
 
@@ -27,9 +25,9 @@ def read_impulse_response(name: str | Path) -> ImpulseResponseData:
27
25
  sample_rate, data = wavfile.read(expanded_name)
28
26
  except Exception as e:
29
27
  if name != expanded_name:
30
- raise SonusAIError(f'Error reading {name} (expanded: {expanded_name}): {e}')
28
+ raise OSError(f"Error reading {name} (expanded: {expanded_name}): {e}") from e
31
29
  else:
32
- raise SonusAIError(f'Error reading {name}: {e}')
30
+ raise OSError(f"Error reading {name}: {e}") from e
33
31
 
34
32
  data = data.astype(np.float32)
35
33
  offset = np.argmax(data)
@@ -49,7 +47,6 @@ def read_audio(name: str | Path) -> AudioT:
49
47
 
50
48
  from sox.core import sox
51
49
 
52
- from sonusai import SonusAIError
53
50
  from .constants import BIT_DEPTH
54
51
  from .constants import CHANNEL_COUNT
55
52
  from .constants import ENCODING
@@ -57,7 +54,6 @@ def read_audio(name: str | Path) -> AudioT:
57
54
  from .tokenized_shell_vars import tokenized_expand
58
55
 
59
56
  def encode_output(buffer: Any) -> np.ndarray:
60
- from sonusai import SonusAIError
61
57
  from .constants import BIT_DEPTH
62
58
  from .constants import ENCODING
63
59
 
@@ -71,14 +67,14 @@ def read_audio(name: str | Path) -> AudioT:
71
67
  return np.frombuffer(buffer, dtype=np.int32)
72
68
 
73
69
  if BIT_DEPTH == 32:
74
- if ENCODING == 'floating-point':
70
+ if ENCODING == "floating-point":
75
71
  return np.frombuffer(buffer, dtype=np.float32)
76
72
  return np.frombuffer(buffer, dtype=np.int32)
77
73
 
78
74
  if BIT_DEPTH == 64:
79
75
  return np.frombuffer(buffer, dtype=np.float64)
80
76
 
81
- raise SonusAIError(f'Invalid BIT_DEPTH {BIT_DEPTH}')
77
+ raise ValueError(f"Invalid BIT_DEPTH {BIT_DEPTH}")
82
78
 
83
79
  expanded_name, _ = tokenized_expand(name)
84
80
 
@@ -86,36 +82,41 @@ def read_audio(name: str | Path) -> AudioT:
86
82
  # Read in and convert to desired format
87
83
  # NOTE: pysox format transformations do not handle encoding properly; need to use direct call to sox instead
88
84
  args = [
89
- '-D',
90
- '-G',
85
+ "-D",
86
+ "-G",
91
87
  expanded_name,
92
- '-t', 'raw',
93
- '-r', str(SAMPLE_RATE),
94
- '-b', str(BIT_DEPTH),
95
- '-c', str(CHANNEL_COUNT),
96
- '-e', ENCODING,
97
- '-',
98
- 'remix', '1',
88
+ "-t",
89
+ "raw",
90
+ "-r",
91
+ str(SAMPLE_RATE),
92
+ "-b",
93
+ str(BIT_DEPTH),
94
+ "-c",
95
+ str(CHANNEL_COUNT),
96
+ "-e",
97
+ ENCODING,
98
+ "-",
99
+ "remix",
100
+ "1",
99
101
  ]
100
102
  status, out, err = sox(args, None, False)
101
103
  if status != 0:
102
- raise SonusAIError(f'sox stdout: {out}\nsox stderr: {err}')
104
+ raise RuntimeError(f"sox stdout: {out}\nsox stderr: {err}") # noqa: TRY301
103
105
 
104
106
  return encode_output(out)
105
107
 
106
108
  except Exception as e:
107
109
  if name != expanded_name:
108
- raise SonusAIError(f'Error reading {name} (expanded: {expanded_name}):\n{e}')
110
+ raise OSError(f"Error reading {name} (expanded: {expanded_name}):\n{e}") from e
109
111
  else:
110
- raise SonusAIError(f'Error reading {name}:\n{e}')
112
+ raise OSError(f"Error reading {name}:\n{e}") from e
111
113
 
112
114
 
113
115
  class Transformer(SoxTransformer):
114
- """Override certain sox.Transformer methods
115
- """
116
+ """Override certain sox.Transformer methods"""
116
117
 
117
118
  def fir(self, coefficients):
118
- """Use SoXs FFT convolution engine with given FIR filter coefficients.
119
+ """Use SoX's FFT convolution engine with given FIR filter coefficients.
119
120
 
120
121
  The SonusAI override allows coefficients to be either a list of numbers
121
122
  or a string containing a text file with the coefficients.
@@ -128,22 +129,20 @@ class Transformer(SoxTransformer):
128
129
  """
129
130
  from sox.core import is_number
130
131
 
131
- from sonusai import SonusAIError
132
-
133
132
  if not isinstance(coefficients, list) and not isinstance(coefficients, str):
134
- raise SonusAIError("coefficients must be a list or a str.")
133
+ raise TypeError("coefficients must be a list or a str.")
135
134
 
136
- if isinstance(coefficients, list) and not all([is_number(c) for c in coefficients]):
137
- raise SonusAIError("coefficients list must be numbers.")
135
+ if isinstance(coefficients, list) and not all(is_number(c) for c in coefficients):
136
+ raise TypeError("coefficients list must be numbers.")
138
137
 
139
- effect_args = ['fir']
138
+ effect_args = ["fir"]
140
139
  if isinstance(coefficients, list):
141
- effect_args.extend(['{:f}'.format(c) for c in coefficients])
140
+ effect_args.extend([f"{c:f}" for c in coefficients])
142
141
  else:
143
142
  effect_args.append(coefficients)
144
143
 
145
144
  self.effects.extend(effect_args)
146
- self.effects_log.append('fir')
145
+ self.effects_log.append("fir")
147
146
 
148
147
  return self
149
148
 
@@ -181,42 +180,42 @@ class Transformer(SoxTransformer):
181
180
  from sox.core import is_number
182
181
  from sox.log import logger
183
182
 
184
- from sonusai import SonusAIError
185
-
186
183
  if not is_number(factor) or factor <= 0:
187
- raise SonusAIError('factor must be a positive number')
184
+ raise ValueError("factor must be a positive number")
188
185
 
189
186
  if factor < 0.5 or factor > 2:
190
- logger.warning('Using an extreme time stretching factor. Quality of results will be poor')
187
+ logger.warning("Using an extreme time stretching factor. Quality of results will be poor")
191
188
 
192
- if audio_type not in [None, 'm', 's', 'l']:
193
- raise SonusAIError("audio_type must be one of None, 'm', 's', or 'l'.")
189
+ if audio_type not in [None, "m", "s", "l"]:
190
+ raise ValueError("audio_type must be one of None, 'm', 's', or 'l'.")
194
191
 
195
192
  if not isinstance(quick, bool):
196
- raise SonusAIError('quick must be a boolean')
193
+ raise TypeError("quick must be a boolean")
197
194
 
198
- effect_args = ['tempo']
195
+ effect_args = ["tempo"]
199
196
 
200
197
  if quick:
201
- effect_args.append('-q')
198
+ effect_args.append("-q")
202
199
 
203
200
  if audio_type is not None:
204
- effect_args.append('-{}'.format(audio_type))
201
+ effect_args.append(f"-{audio_type}")
205
202
 
206
- effect_args.append('{:f}'.format(factor))
203
+ effect_args.append(f"{factor:f}")
207
204
 
208
205
  self.effects.extend(effect_args)
209
- self.effects_log.append('tempo')
206
+ self.effects_log.append("tempo")
210
207
 
211
208
  return self
212
209
 
213
- def build(self,
214
- input_filepath: Optional[str | Path] = None,
215
- output_filepath: Optional[str | Path] = None,
216
- input_array: Optional[np.ndarray] = None,
217
- sample_rate_in: Optional[float] = None,
218
- extra_args: Optional[list[str]] = None,
219
- return_output: bool = False) -> tuple[bool, Optional[str], Optional[str]]:
210
+ def build(
211
+ self,
212
+ input_filepath: str | Path | None = None,
213
+ output_filepath: str | Path | None = None,
214
+ input_array: np.ndarray | None = None,
215
+ sample_rate_in: float | None = None,
216
+ extra_args: list[str] | None = None,
217
+ return_output: bool = False,
218
+ ) -> tuple[bool, str | None, str | None]:
220
219
  """Given an input file or array, creates an output_file on disk by
221
220
  executing the current set of commands. This function returns True on
222
221
  success. If return_output is True, this function returns a triple of
@@ -291,18 +290,14 @@ class Transformer(SoxTransformer):
291
290
  from sox.core import sox
292
291
  from sox.log import logger
293
292
 
294
- input_format, input_filepath = self._parse_inputs(
295
- input_filepath, input_array, sample_rate_in
296
- )
293
+ input_format, input_filepath = self._parse_inputs(input_filepath, input_array, sample_rate_in)
297
294
 
298
295
  if output_filepath is None:
299
296
  raise ValueError("output_filepath is not specified!")
300
297
 
301
298
  # set output parameters
302
299
  if input_filepath == output_filepath:
303
- raise ValueError(
304
- "input_filepath must be different from output_filepath."
305
- )
300
+ raise ValueError("input_filepath must be different from output_filepath.")
306
301
  file_info.validate_output_file(output_filepath)
307
302
 
308
303
  args = []
@@ -320,26 +315,22 @@ class Transformer(SoxTransformer):
320
315
 
321
316
  status, out, err = sox(args, input_array, True)
322
317
  if status != 0:
323
- raise SoxError(
324
- f"Stdout: {out}\nStderr: {err}"
325
- )
318
+ raise SoxError(f"Stdout: {out}\nStderr: {err}")
326
319
 
327
- logger.info(
328
- "Created %s with effects: %s",
329
- output_filepath,
330
- " ".join(self.effects_log)
331
- )
320
+ logger.info("Created %s with effects: %s", output_filepath, " ".join(self.effects_log))
332
321
 
333
322
  if return_output:
334
323
  return status, out, err
335
324
 
336
325
  return True, None, None
337
326
 
338
- def build_array(self,
339
- input_filepath: Optional[str | Path] = None,
340
- input_array: Optional[np.ndarray] = None,
341
- sample_rate_in: Optional[int] = None,
342
- extra_args: Optional[list[str]] = None) -> np.ndarray:
327
+ def build_array(
328
+ self,
329
+ input_filepath: str | Path | None = None,
330
+ input_array: np.ndarray | None = None,
331
+ sample_rate_in: int | None = None,
332
+ extra_args: list[str] | None = None,
333
+ ) -> np.ndarray:
343
334
  """Given an input file or array, returns the output as a numpy array
344
335
  by executing the current set of commands. By default, the array will
345
336
  have the same sample rate as the input file unless otherwise specified
@@ -405,62 +396,57 @@ class Transformer(SoxTransformer):
405
396
  from sox.log import logger
406
397
  from sox.transform import ENCODINGS_MAPPING
407
398
 
408
- input_format, input_filepath = self._parse_inputs(
409
- input_filepath, input_array, sample_rate_in
410
- )
399
+ input_format, input_filepath = self._parse_inputs(input_filepath, input_array, sample_rate_in)
411
400
 
412
401
  # check if any of the below commands are part of the effects chain
413
- ignored_commands = ['channels', 'convert']
402
+ ignored_commands = ["channels", "convert"]
414
403
  if set(ignored_commands) & set(self.effects_log):
415
404
  logger.warning(
416
- "When outputting to an array, channels and convert " +
417
- "effects may be ignored. Use set_output_format() to " +
418
- "specify output formats."
405
+ "When outputting to an array, channels and convert "
406
+ + "effects may be ignored. Use set_output_format() to "
407
+ + "specify output formats."
419
408
  )
420
409
 
421
- output_filepath = '-'
410
+ output_filepath = "-"
422
411
 
423
- if input_format.get('file_type') is None:
412
+ if input_format.get("file_type") is None:
424
413
  encoding_out = np.int16
425
414
  else:
426
- encoding_out = [
427
- k for k, v in ENCODINGS_MAPPING.items()
428
- if input_format['file_type'] == v
429
- ][0]
415
+ encoding_out = next(k for k, v in ENCODINGS_MAPPING.items() if input_format["file_type"] == v)
430
416
 
431
417
  n_bits = np.dtype(encoding_out).itemsize * 8
432
418
 
433
419
  output_format = {
434
- 'file_type': 'raw',
435
- 'rate': sample_rate_in,
436
- 'bits': n_bits,
437
- 'channels': input_format['channels'],
438
- 'encoding': None,
439
- 'comments': None,
440
- 'append_comments': True,
420
+ "file_type": "raw",
421
+ "rate": sample_rate_in,
422
+ "bits": n_bits,
423
+ "channels": input_format["channels"],
424
+ "encoding": None,
425
+ "comments": None,
426
+ "append_comments": True,
441
427
  }
442
428
 
443
- if self.output_format.get('rate') is not None:
444
- output_format['rate'] = self.output_format['rate']
429
+ if self.output_format.get("rate") is not None:
430
+ output_format["rate"] = self.output_format["rate"]
445
431
 
446
- if self.output_format.get('channels') is not None:
447
- output_format['channels'] = self.output_format['channels']
432
+ if self.output_format.get("channels") is not None:
433
+ output_format["channels"] = self.output_format["channels"]
448
434
 
449
- if self.output_format.get('bits') is not None:
450
- n_bits = self.output_format['bits']
451
- output_format['bits'] = n_bits
435
+ if self.output_format.get("bits") is not None:
436
+ n_bits = self.output_format["bits"]
437
+ output_format["bits"] = n_bits
452
438
 
453
439
  match n_bits:
454
440
  case 8:
455
- encoding_out = np.int8 # type: ignore
441
+ encoding_out = np.int8 # type: ignore[assignment]
456
442
  case 16:
457
443
  encoding_out = np.int16
458
444
  case 32:
459
- encoding_out = np.float32 # type: ignore
445
+ encoding_out = np.float32 # type: ignore[assignment]
460
446
  case 64:
461
- encoding_out = np.float64 # type: ignore
447
+ encoding_out = np.float64 # type: ignore[assignment]
462
448
  case _:
463
- raise ValueError("invalid n_bits {}".format(n_bits))
449
+ raise ValueError(f"invalid n_bits {n_bits}")
464
450
 
465
451
  args = []
466
452
  args.extend(self.globals)
@@ -477,21 +463,14 @@ class Transformer(SoxTransformer):
477
463
 
478
464
  status, out, err = sox(args, input_array, False)
479
465
  if status != 0:
480
- raise SoxError(
481
- "Stdout: {}\nStderr: {}".format(out, err)
482
- )
466
+ raise SoxError(f"Stdout: {out}\nStderr: {err}")
483
467
 
484
468
  out = np.frombuffer(out, dtype=encoding_out)
485
- if output_format['channels'] > 1:
469
+ if output_format["channels"] > 1:
486
470
  out = out.reshape(
487
- (
488
- output_format['channels'],
489
- int(len(out) / output_format['channels'])
490
- ), order='F'
471
+ (output_format["channels"], int(len(out) / output_format["channels"])),
472
+ order="F",
491
473
  ).T
492
- logger.info(
493
- "Created array with effects: %s",
494
- " ".join(self.effects_log)
495
- )
474
+ logger.info("Created array with effects: %s", " ".join(self.effects_log))
496
475
 
497
476
  return out
@@ -11,7 +11,6 @@ def apply_augmentation(audio: AudioT, augmentation: Augmentation, frame_length:
11
11
  :param frame_length: Pad resulting audio to be a multiple of this
12
12
  :return: Augmented audio
13
13
  """
14
- from sonusai import SonusAIError
15
14
  from .augmentation import pad_audio_to_frame
16
15
  from .constants import BIT_DEPTH
17
16
  from .constants import CHANNEL_COUNT
@@ -45,7 +44,7 @@ def apply_augmentation(audio: AudioT, augmentation: Augmentation, frame_length:
45
44
  has_effects = True
46
45
 
47
46
  if augmentation.tempo is not None:
48
- tfm.tempo(factor=float(augmentation.tempo), audio_type='s')
47
+ tfm.tempo(factor=float(augmentation.tempo), audio_type="s")
49
48
  has_effects = True
50
49
 
51
50
  if augmentation.eq1 is not None:
@@ -70,7 +69,7 @@ def apply_augmentation(audio: AudioT, augmentation: Augmentation, frame_length:
70
69
  audio_out = audio
71
70
 
72
71
  except Exception as e:
73
- raise SonusAIError(f'Error applying {augmentation}: {e}')
72
+ raise RuntimeError(f"Error applying {augmentation}: {e}") from e
74
73
 
75
74
  # make sure length is multiple of frame_length
76
75
  return pad_audio_to_frame(audio=audio_out, frame_length=frame_length)
@@ -84,13 +83,13 @@ def apply_impulse_response(audio: AudioT, ir: ImpulseResponseData) -> AudioT:
84
83
  :return: Augmented audio
85
84
  """
86
85
  import math
87
- from pathlib import Path
88
86
  import tempfile
87
+ from pathlib import Path
89
88
 
90
89
  import numpy as np
91
90
 
92
- from sonusai import SonusAIError
93
91
  from sonusai.utils import linear_to_db
92
+
94
93
  from .constants import SAMPLE_RATE
95
94
  from .sox_audio import Transformer
96
95
 
@@ -111,9 +110,9 @@ def apply_impulse_response(audio: AudioT, ir: ImpulseResponseData) -> AudioT:
111
110
  audio_out = np.pad(array=audio_out, pad_width=(pad, pad))
112
111
 
113
112
  # Write coefficients to temporary file
114
- temp = tempfile.NamedTemporaryFile(mode='w+t')
113
+ temp = tempfile.NamedTemporaryFile(mode="w+t")
115
114
  for d in ir.data:
116
- temp.write(f'{d:f}\n')
115
+ temp.write(f"{d:f}\n")
117
116
  temp.seek(0)
118
117
 
119
118
  # Apply IR and convert back to global sample rate
@@ -123,7 +122,7 @@ def apply_impulse_response(audio: AudioT, ir: ImpulseResponseData) -> AudioT:
123
122
  try:
124
123
  audio_out = tfm.build_array(input_array=audio_out, sample_rate_in=ir.sample_rate)
125
124
  except Exception as e:
126
- raise SonusAIError(f'Error applying IR: {e}')
125
+ raise RuntimeError(f"Error applying IR: {e}") from e
127
126
 
128
127
  path = Path(temp.name)
129
128
  temp.close()
@@ -134,4 +133,4 @@ def apply_impulse_response(audio: AudioT, ir: ImpulseResponseData) -> AudioT:
134
133
  tfm.norm(db_level=max_db)
135
134
  audio_out = tfm.build_array(input_array=audio_out, sample_rate_in=SAMPLE_RATE)
136
135
 
137
- return audio_out[:len(audio)]
136
+ return audio_out[: len(audio)]
@@ -2,7 +2,7 @@ from sonusai.mixture.datatypes import AudioF
2
2
  from sonusai.mixture.datatypes import SpectralMask
3
3
 
4
4
 
5
- def apply_spectral_mask(audio_f: AudioF, spectral_mask: SpectralMask, seed: int = None) -> AudioF:
5
+ def apply_spectral_mask(audio_f: AudioF, spectral_mask: SpectralMask, seed: int | None = None) -> AudioF:
6
6
  """Apply frequency and time masking
7
7
 
8
8
  Implementation of SpecAugment: A Simple Data Augmentation Method for Automatic Speech Recognition
@@ -24,10 +24,8 @@ def apply_spectral_mask(audio_f: AudioF, spectral_mask: SpectralMask, seed: int
24
24
  """
25
25
  import numpy as np
26
26
 
27
- from sonusai import SonusAIError
28
-
29
27
  if audio_f.ndim != 2:
30
- raise SonusAIError('feature input must have three dimensions [frames, bins]')
28
+ raise ValueError("feature input must have three dimensions [frames, bins]")
31
29
 
32
30
  frames, bins = audio_f.shape
33
31
 
@@ -41,13 +39,13 @@ def apply_spectral_mask(audio_f: AudioF, spectral_mask: SpectralMask, seed: int
41
39
  for _ in range(spectral_mask.f_num):
42
40
  f_width = int(rng.uniform(0, f_max_width))
43
41
  f_start = rng.integers(0, bins - f_width, endpoint=True)
44
- audio_f[:, f_start:f_start + f_width] = 0
42
+ audio_f[:, f_start : f_start + f_width] = 0
45
43
 
46
44
  # apply t_num time masks to the feature
47
45
  t_upper_bound = int(spectral_mask.t_max_percent / 100 * frames)
48
46
  for _ in range(spectral_mask.t_num):
49
47
  t_width = min(int(rng.uniform(0, spectral_mask.t_max_width)), t_upper_bound)
50
48
  t_start = rng.integers(0, frames - t_width, endpoint=True)
51
- audio_f[t_start:t_start + t_width, :] = 0
49
+ audio_f[t_start : t_start + t_width, :] = 0
52
50
 
53
51
  return audio_f