sonusai 0.20.2__py3-none-any.whl → 0.20.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,8 @@
1
+ from pathlib import Path
2
+
3
+ import numpy as np
4
+ from sox import Transformer as SoxTransformer
5
+
1
6
  from sonusai.mixture.datatypes import AudioStatsMetrics
2
7
  from sonusai.mixture.datatypes import AudioT
3
8
 
@@ -12,7 +17,6 @@ def _convert_str_with_factors_to_int(x: str) -> int:
12
17
 
13
18
  def calc_audio_stats(audio: AudioT, win_len: float | None = None) -> AudioStatsMetrics:
14
19
  from sonusai.mixture import SAMPLE_RATE
15
- from sonusai.mixture import Transformer
16
20
 
17
21
  args = ["stats"]
18
22
  if win_len is not None:
@@ -53,3 +57,275 @@ def calc_audio_stats(audio: AudioT, win_len: float | None = None) -> AudioStatsM
53
57
  fl=float(stats["Flat factor"]),
54
58
  pkc=_convert_str_with_factors_to_int(stats["Pk count"]),
55
59
  )
60
+
61
+
62
+ class Transformer(SoxTransformer):
63
+ """Override certain sox.Transformer methods"""
64
+
65
+ def build( # pyright: ignore [reportIncompatibleMethodOverride]
66
+ self,
67
+ input_filepath: str | Path | None = None,
68
+ output_filepath: str | Path | None = None,
69
+ input_array: np.ndarray | None = None,
70
+ sample_rate_in: float | None = None,
71
+ extra_args: list[str] | None = None,
72
+ return_output: bool = False,
73
+ ) -> tuple[bool, str | None, str | None]:
74
+ """Given an input file or array, creates an output_file on disk by
75
+ executing the current set of commands. This function returns True on
76
+ success. If return_output is True, this function returns a triple of
77
+ (status, out, err), giving the success state, along with stdout and
78
+ stderr returned by sox.
79
+
80
+ Parameters
81
+ ----------
82
+ input_filepath : str or None
83
+ Either path to input audio file or None for array input.
84
+ output_filepath : str
85
+ Path to desired output file. If a file already exists at
86
+ the given path, the file will be overwritten.
87
+ If '-n', no file is created.
88
+ input_array : np.ndarray or None
89
+ An np.ndarray of an waveform with shape (n_samples, n_channels).
90
+ sample_rate_in must also be provided.
91
+ If None, input_filepath must be specified.
92
+ sample_rate_in : int
93
+ Sample rate of input_array.
94
+ This argument is ignored if input_array is None.
95
+ extra_args : list or None, default=None
96
+ If a list is given, these additional arguments are passed to SoX
97
+ at the end of the list of effects.
98
+ Don't use this argument unless you know exactly what you're doing!
99
+ return_output : bool, default=False
100
+ If True, returns the status and information sent to stderr and
101
+ stdout as a tuple (status, stdout, stderr).
102
+ If output_filepath is None, return_output=True by default.
103
+ If False, returns True on success.
104
+
105
+ Returns
106
+ -------
107
+ status : bool
108
+ True on success.
109
+ out : str (optional)
110
+ This is not returned unless return_output is True.
111
+ When returned, captures the stdout produced by sox.
112
+ err : str (optional)
113
+ This is not returned unless return_output is True.
114
+ When returned, captures the stderr produced by sox.
115
+
116
+ Examples
117
+ --------
118
+ > import numpy as np
119
+ > import sox
120
+ > tfm = sox.Transformer()
121
+ > sample_rate = 44100
122
+ > y = np.sin(2 * np.pi * 440.0 * np.arange(sample_rate * 1.0) / sample_rate)
123
+
124
+ file in, file out - basic usage
125
+
126
+ > status = tfm.build('path/to/input.wav', 'path/to/output.mp3')
127
+
128
+ file in, file out - equivalent usage
129
+
130
+ > status = tfm.build(
131
+ input_filepath='path/to/input.wav',
132
+ output_filepath='path/to/output.mp3'
133
+ )
134
+
135
+ array in, file out
136
+
137
+ > status = tfm.build(
138
+ input_array=y, sample_rate_in=sample_rate,
139
+ output_filepath='path/to/output.mp3'
140
+ )
141
+
142
+ """
143
+ from sox import file_info
144
+ from sox.core import SoxError
145
+ from sox.core import sox
146
+ from sox.log import logger
147
+
148
+ input_format, input_filepath = self._parse_inputs(input_filepath, input_array, sample_rate_in)
149
+
150
+ if output_filepath is None:
151
+ raise ValueError("output_filepath is not specified!")
152
+
153
+ # set output parameters
154
+ if input_filepath == output_filepath:
155
+ raise ValueError("input_filepath must be different from output_filepath.")
156
+ file_info.validate_output_file(output_filepath)
157
+
158
+ args = []
159
+ args.extend(self.globals)
160
+ args.extend(self._input_format_args(input_format))
161
+ args.append(input_filepath)
162
+ args.extend(self._output_format_args(self.output_format))
163
+ args.append(output_filepath)
164
+ args.extend(self.effects)
165
+
166
+ if extra_args is not None:
167
+ if not isinstance(extra_args, list):
168
+ raise ValueError("extra_args must be a list.")
169
+ args.extend(extra_args)
170
+
171
+ status, out, err = sox(args, input_array, True)
172
+ if status != 0:
173
+ raise SoxError(f"Stdout: {out}\nStderr: {err}")
174
+
175
+ logger.info("Created %s with effects: %s", output_filepath, " ".join(self.effects_log))
176
+
177
+ if return_output:
178
+ return status, out, err # pyright: ignore [reportReturnType]
179
+
180
+ return True, None, None
181
+
182
+ def build_array( # pyright: ignore [reportIncompatibleMethodOverride]
183
+ self,
184
+ input_filepath: str | Path | None = None,
185
+ input_array: np.ndarray | None = None,
186
+ sample_rate_in: int | None = None,
187
+ extra_args: list[str] | None = None,
188
+ ) -> np.ndarray:
189
+ """Given an input file or array, returns the output as a numpy array
190
+ by executing the current set of commands. By default, the array will
191
+ have the same sample rate as the input file unless otherwise specified
192
+ using set_output_format. Functions such as channels and convert
193
+ will be ignored!
194
+
195
+ The SonusAI override does not generate a warning for rate transforms.
196
+
197
+ Parameters
198
+ ----------
199
+ input_filepath : str, Path or None
200
+ Either path to input audio file or None.
201
+ input_array : np.ndarray or None
202
+ A np.ndarray of a waveform with shape (n_samples, n_channels).
203
+ If this argument is passed, sample_rate_in must also be provided.
204
+ If None, input_filepath must be specified.
205
+ sample_rate_in : int
206
+ Sample rate of input_array.
207
+ This argument is ignored if input_array is None.
208
+ extra_args : list or None, default=None
209
+ If a list is given, these additional arguments are passed to SoX
210
+ at the end of the list of effects.
211
+ Don't use this argument unless you know exactly what you're doing!
212
+
213
+ Returns
214
+ -------
215
+ output_array : np.ndarray
216
+ Output audio as a numpy array
217
+
218
+ Examples
219
+ --------
220
+
221
+ > import numpy as np
222
+ > import sox
223
+ > tfm = sox.Transformer()
224
+ > sample_rate = 44100
225
+ > y = np.sin(2 * np.pi * 440.0 * np.arange(sample_rate * 1.0) / sample_rate)
226
+
227
+ file in, array out
228
+
229
+ > output_array = tfm.build(input_filepath='path/to/input.wav')
230
+
231
+ array in, array out
232
+
233
+ > output_array = tfm.build(input_array=y, sample_rate_in=sample_rate)
234
+
235
+ specifying the output sample rate
236
+
237
+ > tfm.set_output_format(rate=8000)
238
+ > output_array = tfm.build(input_array=y, sample_rate_in=sample_rate)
239
+
240
+ if an effect changes the number of channels, you must explicitly
241
+ specify the number of output channels
242
+
243
+ > tfm.remix(remix_dictionary={1: [1], 2: [1], 3: [1]})
244
+ > tfm.set_output_format(channels=3)
245
+ > output_array = tfm.build(input_array=y, sample_rate_in=sample_rate)
246
+
247
+
248
+ """
249
+ from sox.core import SoxError
250
+ from sox.core import sox
251
+ from sox.log import logger
252
+ from sox.transform import ENCODINGS_MAPPING
253
+
254
+ input_format, input_filepath = self._parse_inputs(input_filepath, input_array, sample_rate_in)
255
+
256
+ # check if any of the below commands are part of the effects chain
257
+ ignored_commands = ["channels", "convert"]
258
+ if set(ignored_commands) & set(self.effects_log):
259
+ logger.warning(
260
+ "When outputting to an array, channels and convert "
261
+ + "effects may be ignored. Use set_output_format() to "
262
+ + "specify output formats."
263
+ )
264
+
265
+ output_filepath = "-"
266
+
267
+ if input_format.get("file_type") is None:
268
+ encoding_out = np.int16
269
+ else:
270
+ encoding_out = next(k for k, v in ENCODINGS_MAPPING.items() if input_format["file_type"] == v)
271
+
272
+ n_bits = np.dtype(encoding_out).itemsize * 8
273
+
274
+ output_format = {
275
+ "file_type": "raw",
276
+ "rate": sample_rate_in,
277
+ "bits": n_bits,
278
+ "channels": input_format["channels"],
279
+ "encoding": None,
280
+ "comments": None,
281
+ "append_comments": True,
282
+ }
283
+
284
+ if self.output_format.get("rate") is not None:
285
+ output_format["rate"] = self.output_format["rate"]
286
+
287
+ if self.output_format.get("channels") is not None:
288
+ output_format["channels"] = self.output_format["channels"]
289
+
290
+ if self.output_format.get("bits") is not None:
291
+ n_bits = self.output_format["bits"]
292
+ output_format["bits"] = n_bits
293
+
294
+ match n_bits:
295
+ case 8:
296
+ encoding_out = np.int8 # type: ignore[assignment]
297
+ case 16:
298
+ encoding_out = np.int16
299
+ case 32:
300
+ encoding_out = np.float32 # type: ignore[assignment]
301
+ case 64:
302
+ encoding_out = np.float64 # type: ignore[assignment]
303
+ case _:
304
+ raise ValueError(f"invalid n_bits {n_bits}")
305
+
306
+ args = []
307
+ args.extend(self.globals)
308
+ args.extend(self._input_format_args(input_format))
309
+ args.append(input_filepath)
310
+ args.extend(self._output_format_args(output_format))
311
+ args.append(output_filepath)
312
+ args.extend(self.effects)
313
+
314
+ if extra_args is not None:
315
+ if not isinstance(extra_args, list):
316
+ raise ValueError("extra_args must be a list.")
317
+ args.extend(extra_args)
318
+
319
+ status, out, err = sox(args, input_array, False)
320
+ if status != 0:
321
+ raise SoxError(f"Stdout: {out}\nStderr: {err}")
322
+
323
+ out = np.frombuffer(out, dtype=encoding_out) # pyright: ignore [reportArgumentType, reportCallIssue]
324
+ if output_format["channels"] > 1:
325
+ out = out.reshape(
326
+ (output_format["channels"], int(len(out) / output_format["channels"])),
327
+ order="F",
328
+ ).T
329
+ logger.info("Created array with effects: %s", " ".join(self.effects_log))
330
+
331
+ return out
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: sonusai
3
- Version: 0.20.2
3
+ Version: 0.20.3
4
4
  Summary: Framework for building deep neural network models for sound, speech, and voice AI
5
5
  Home-page: https://aaware.com
6
6
  License: GPL-3.0-only
@@ -22,7 +22,7 @@ sonusai/ir_metric.py,sha256=n35_RssAk2jjqm1iXJ6euMtK00LV4qohdBfDAZZpNlU,19581
22
22
  sonusai/lsdb.py,sha256=0HOGDDndB3LT9cz9AaxKIpt9vslAoSP4F239gply4Xg,5149
23
23
  sonusai/main.py,sha256=HbnEia1B1-Z-mlHkLfojH8aj9GIpL1Btw3oH60T_CCQ,2590
24
24
  sonusai/metrics/__init__.py,sha256=ssV6JEK_oklRSocsp6HMcG-GtJvV8IkRQtdKhHHmwU8,878
25
- sonusai/metrics/calc_audio_stats.py,sha256=IHgYEPaob_Nw35SaH3tyHp7Wwju4f-2-BJZ99JyeLmc,1572
25
+ sonusai/metrics/calc_audio_stats.py,sha256=x3poP4_EzBLS8f1bnP2VzqI2nTjqEKbU1661Hngcuak,11846
26
26
  sonusai/metrics/calc_class_weights.py,sha256=SUOCdM4w03rFpyxAriPnPwCtEEFsAH3WxpK9N_fupwo,3637
27
27
  sonusai/metrics/calc_optimal_thresholds.py,sha256=0JOqU__doeOpNtgEZgeO1Kg7pttJRpITTVmqLU6TadY,3513
28
28
  sonusai/metrics/calc_pcm.py,sha256=yBQV9UJ1GK5f4c_8TNABMtZR-xyStKJCsSTT0FQGa50,1886
@@ -122,7 +122,7 @@ sonusai/utils/temp_seed.py,sha256=Ava5TCGpvDBtaRx2l-40CuGIjhgLevu1KFfZsgr38qM,21
122
122
  sonusai/utils/write_audio.py,sha256=0lKdaX57N6H-UWdioqmXCJMjwT1eBz5B-bSGqDvloAc,838
123
123
  sonusai/utils/yes_or_no.py,sha256=0h1okjXmDNbJp7rZJFR2V-HFU1GJDm3YFTUVmYExkOU,263
124
124
  sonusai/vars.py,sha256=kBBzuvC8szmdIZEEDA7XXmD765addZKdM2aFipeGO1w,933
125
- sonusai-0.20.2.dist-info/METADATA,sha256=CT_z1tJfku142nH0PL22DT7EEc8bFfJj9qyWcR7v6FU,2535
126
- sonusai-0.20.2.dist-info/WHEEL,sha256=RaoafKOydTQ7I_I3JTrPCg6kUmTgtm4BornzOqyEfJ8,88
127
- sonusai-0.20.2.dist-info/entry_points.txt,sha256=zMNjEphEPO6B3cD1GNpit7z-yA9tUU5-j3W2v-UWstU,92
128
- sonusai-0.20.2.dist-info/RECORD,,
125
+ sonusai-0.20.3.dist-info/METADATA,sha256=b1BfQSo-D0xBiozUy426X-iRkK0uY9_LlZkg1ZXaSl8,2535
126
+ sonusai-0.20.3.dist-info/WHEEL,sha256=RaoafKOydTQ7I_I3JTrPCg6kUmTgtm4BornzOqyEfJ8,88
127
+ sonusai-0.20.3.dist-info/entry_points.txt,sha256=zMNjEphEPO6B3cD1GNpit7z-yA9tUU5-j3W2v-UWstU,92
128
+ sonusai-0.20.3.dist-info/RECORD,,