PyPI - sonusai - Versions diffs - 0.18.8__py3-none-any.whl → 0.19.5__py3-none-any.whl - Mend

sonusai 0.18.8py3-none-any.whl → 0.19.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (118) hide show

sonusai/__init__.py +20 -29
sonusai/aawscd_probwrite.py +18 -18
sonusai/audiofe.py +93 -80
sonusai/calc_metric_spenh.py +395 -321
sonusai/data/genmixdb.yml +5 -11
sonusai/{gentcst.py → deprecated/gentcst.py} +146 -149
sonusai/{plot.py → deprecated/plot.py} +177 -131
sonusai/{tplot.py → deprecated/tplot.py} +124 -102
sonusai/doc/__init__.py +1 -1
sonusai/doc/doc.py +112 -177
sonusai/doc.py +10 -10
sonusai/genft.py +93 -77
sonusai/genmetrics.py +59 -46
sonusai/genmix.py +116 -104
sonusai/genmixdb.py +194 -153
sonusai/lsdb.py +56 -66
sonusai/main.py +23 -20
sonusai/metrics/__init__.py +2 -0
sonusai/metrics/calc_audio_stats.py +29 -24
sonusai/metrics/calc_class_weights.py +7 -7
sonusai/metrics/calc_optimal_thresholds.py +5 -7
sonusai/metrics/calc_pcm.py +3 -3
sonusai/metrics/calc_pesq.py +10 -7
sonusai/metrics/calc_phase_distance.py +3 -3
sonusai/metrics/calc_sa_sdr.py +10 -8
sonusai/metrics/calc_segsnr_f.py +15 -17
sonusai/metrics/calc_speech.py +105 -47
sonusai/metrics/calc_wer.py +35 -32
sonusai/metrics/calc_wsdr.py +10 -7
sonusai/metrics/class_summary.py +30 -27
sonusai/metrics/confusion_matrix_summary.py +25 -22
sonusai/metrics/one_hot.py +91 -57
sonusai/metrics/snr_summary.py +53 -46
sonusai/mixture/__init__.py +19 -14
sonusai/mixture/audio.py +4 -6
sonusai/mixture/augmentation.py +37 -43
sonusai/mixture/class_count.py +5 -14
sonusai/mixture/config.py +292 -225
sonusai/mixture/constants.py +41 -30
sonusai/mixture/data_io.py +155 -0
sonusai/mixture/datatypes.py +111 -108
sonusai/mixture/db_datatypes.py +54 -70
sonusai/mixture/eq_rule_is_valid.py +6 -9
sonusai/mixture/feature.py +50 -46
sonusai/mixture/generation.py +522 -389
sonusai/mixture/helpers.py +217 -272
sonusai/mixture/log_duration_and_sizes.py +16 -13
sonusai/mixture/mixdb.py +677 -473
sonusai/mixture/soundfile_audio.py +12 -17
sonusai/mixture/sox_audio.py +91 -112
sonusai/mixture/sox_augmentation.py +8 -9
sonusai/mixture/spectral_mask.py +4 -6
sonusai/mixture/target_class_balancing.py +41 -36
sonusai/mixture/targets.py +69 -67
sonusai/mixture/tokenized_shell_vars.py +23 -23
sonusai/mixture/torchaudio_audio.py +14 -15
sonusai/mixture/torchaudio_augmentation.py +23 -27
sonusai/mixture/truth.py +48 -26
sonusai/mixture/truth_functions/__init__.py +26 -0
sonusai/mixture/truth_functions/crm.py +56 -38
sonusai/mixture/truth_functions/datatypes.py +37 -0
sonusai/mixture/truth_functions/energy.py +85 -59
sonusai/mixture/truth_functions/file.py +30 -30
sonusai/mixture/truth_functions/phoneme.py +14 -7
sonusai/mixture/truth_functions/sed.py +71 -45
sonusai/mixture/truth_functions/target.py +69 -106
sonusai/mkwav.py +52 -85
sonusai/onnx_predict.py +46 -43
sonusai/queries/__init__.py +3 -1
sonusai/queries/queries.py +100 -59
sonusai/speech/__init__.py +2 -0
sonusai/speech/l2arctic.py +24 -23
sonusai/speech/librispeech.py +16 -17
sonusai/speech/mcgill.py +22 -21
sonusai/speech/textgrid.py +32 -25
sonusai/speech/timit.py +45 -42
sonusai/speech/vctk.py +14 -13
sonusai/speech/voxceleb.py +26 -20
sonusai/summarize_metric_spenh.py +11 -10
sonusai/utils/__init__.py +4 -3
sonusai/utils/asl_p56.py +1 -1
sonusai/utils/asr.py +37 -17
sonusai/utils/asr_functions/__init__.py +2 -0
sonusai/utils/asr_functions/aaware_whisper.py +18 -12
sonusai/utils/audio_devices.py +12 -12
sonusai/utils/braced_glob.py +6 -8
sonusai/utils/calculate_input_shape.py +1 -4
sonusai/utils/compress.py +2 -2
sonusai/utils/convert_string_to_number.py +1 -3
sonusai/utils/create_timestamp.py +1 -1
sonusai/utils/create_ts_name.py +2 -2
sonusai/utils/dataclass_from_dict.py +1 -1
sonusai/utils/docstring.py +6 -6
sonusai/utils/energy_f.py +9 -7
sonusai/utils/engineering_number.py +56 -54
sonusai/utils/get_label_names.py +8 -10
sonusai/utils/human_readable_size.py +2 -2
sonusai/utils/model_utils.py +3 -5
sonusai/utils/numeric_conversion.py +2 -4
sonusai/utils/onnx_utils.py +43 -32
sonusai/utils/parallel.py +40 -27
sonusai/utils/print_mixture_details.py +25 -22
sonusai/utils/ranges.py +12 -12
sonusai/utils/read_predict_data.py +11 -9
sonusai/utils/reshape.py +19 -26
sonusai/utils/seconds_to_hms.py +1 -1
sonusai/utils/stacked_complex.py +8 -16
sonusai/utils/stratified_shuffle_split.py +29 -27
sonusai/utils/write_audio.py +2 -2
sonusai/utils/yes_or_no.py +3 -3
sonusai/vars.py +14 -14
{sonusai-0.18.8.dist-info → sonusai-0.19.5.dist-info}/METADATA +20 -21
sonusai-0.19.5.dist-info/RECORD +125 -0
{sonusai-0.18.8.dist-info → sonusai-0.19.5.dist-info}/WHEEL +1 -1
sonusai/mixture/truth_functions/data.py +0 -58
sonusai/utils/read_mixture_data.py +0 -14
sonusai-0.18.8.dist-info/RECORD +0 -125
{sonusai-0.18.8.dist-info → sonusai-0.19.5.dist-info}/entry_points.txt +0 -0

sonusai/mixture/soundfile_audio.py CHANGED Viewed

@@ -9,29 +9,28 @@ def _raw_read(name: str | Path) -> tuple[AudioT, int]:
     import soundfile
     from pydub import AudioSegment
-    from sonusai import SonusAIError
     from .tokenized_shell_vars import tokenized_expand
     expanded_name, _ = tokenized_expand(name)
     try:
-        if expanded_name.endswith('.mp3'):
+        if expanded_name.endswith(".mp3"):
             sound = AudioSegment.from_mp3(expanded_name)
             raw = np.array(sound.get_array_of_samples()).astype(np.float32).reshape((-1, sound.channels))
             raw = raw / 2 ** (sound.sample_width * 8 - 1)
             sample_rate = sound.frame_rate
-        elif expanded_name.endswith('.m4a'):
+        elif expanded_name.endswith(".m4a"):
             sound = AudioSegment.from_file(expanded_name)
             raw = np.array(sound.get_array_of_samples()).astype(np.float32).reshape((-1, sound.channels))
             raw = raw / 2 ** (sound.sample_width * 8 - 1)
             sample_rate = sound.frame_rate
         else:
-            raw, sample_rate = soundfile.read(expanded_name, always_2d=True, dtype='float32')
+            raw, sample_rate = soundfile.read(expanded_name, always_2d=True, dtype="float32")
     except Exception as e:
         if name != expanded_name:
-            raise SonusAIError(f'Error reading {name} (expanded: {expanded_name}): {e}')
+            raise OSError(f"Error reading {name} (expanded: {expanded_name}): {e}") from e
         else:
-            raise SonusAIError(f'Error reading {name}: {e}')
+            raise OSError(f"Error reading {name}: {e}") from e
     return np.squeeze(raw[:, 0]), sample_rate
@@ -45,24 +44,23 @@ def get_sample_rate(name: str | Path) -> int:
     import soundfile
     from pydub import AudioSegment
-    from sonusai import SonusAIError
     from .tokenized_shell_vars import tokenized_expand
     expanded_name, _ = tokenized_expand(name)
     try:
-        if expanded_name.endswith('.mp3'):
+        if expanded_name.endswith(".mp3"):
             return AudioSegment.from_mp3(expanded_name).frame_rate
-        if expanded_name.endswith('.m4a'):
+        if expanded_name.endswith(".m4a"):
             return AudioSegment.from_file(expanded_name).frame_rate
         return soundfile.info(expanded_name).samplerate
     except Exception as e:
         if name != expanded_name:
-            raise SonusAIError(f'Error reading {name} (expanded: {expanded_name}): {e}')
+            raise OSError(f"Error reading {name} (expanded: {expanded_name}): {e}") from e
         else:
-            raise SonusAIError(f'Error reading {name}: {e}')
+            raise OSError(f"Error reading {name}: {e}") from e
 def read_ir(name: str | Path) -> ImpulseResponseData:
@@ -95,10 +93,7 @@ def read_audio(name: str | Path) -> AudioT:
     from .constants import SAMPLE_RATE
     out, sample_rate = _raw_read(name)
-    out = librosa.resample(out,
-                           orig_sr=sample_rate,
-                           target_sr=SAMPLE_RATE,
-                           res_type='soxr_hq')
+    out = librosa.resample(out, orig_sr=sample_rate, target_sr=SAMPLE_RATE, res_type="soxr_hq")
     return out
@@ -119,11 +114,11 @@ def get_num_samples(name: str | Path) -> int:
     expanded_name, _ = tokenized_expand(name)
-    if expanded_name.endswith('.mp3'):
+    if expanded_name.endswith(".mp3"):
         sound = AudioSegment.from_mp3(expanded_name)
         samples = sound.frame_count()
         sample_rate = sound.frame_rate
-    elif expanded_name.endswith('.m4a'):
+    elif expanded_name.endswith(".m4a"):
         sound = AudioSegment.from_file(expanded_name)
         samples = sound.frame_count()
         sample_rate = sound.frame_rate

sonusai/mixture/sox_audio.py CHANGED Viewed

@@ -1,5 +1,4 @@
 from pathlib import Path
-from typing import Optional
 import numpy as np
 from sox import Transformer as SoxTransformer
@@ -16,7 +15,6 @@ def read_impulse_response(name: str | Path) -> ImpulseResponseData:
     """
     from scipy.io import wavfile
-    from sonusai import SonusAIError
     from .datatypes import ImpulseResponseData
     from .tokenized_shell_vars import tokenized_expand
@@ -27,9 +25,9 @@ def read_impulse_response(name: str | Path) -> ImpulseResponseData:
         sample_rate, data = wavfile.read(expanded_name)
     except Exception as e:
         if name != expanded_name:
-            raise SonusAIError(f'Error reading {name} (expanded: {expanded_name}): {e}')
+            raise OSError(f"Error reading {name} (expanded: {expanded_name}): {e}") from e
         else:
-            raise SonusAIError(f'Error reading {name}: {e}')
+            raise OSError(f"Error reading {name}: {e}") from e
     data = data.astype(np.float32)
     offset = np.argmax(data)
@@ -49,7 +47,6 @@ def read_audio(name: str | Path) -> AudioT:
     from sox.core import sox
-    from sonusai import SonusAIError
     from .constants import BIT_DEPTH
     from .constants import CHANNEL_COUNT
     from .constants import ENCODING
@@ -57,7 +54,6 @@ def read_audio(name: str | Path) -> AudioT:
     from .tokenized_shell_vars import tokenized_expand
     def encode_output(buffer: Any) -> np.ndarray:
-        from sonusai import SonusAIError
         from .constants import BIT_DEPTH
         from .constants import ENCODING
@@ -71,14 +67,14 @@ def read_audio(name: str | Path) -> AudioT:
             return np.frombuffer(buffer, dtype=np.int32)
         if BIT_DEPTH == 32:
-            if ENCODING == 'floating-point':
+            if ENCODING == "floating-point":
                 return np.frombuffer(buffer, dtype=np.float32)
             return np.frombuffer(buffer, dtype=np.int32)
         if BIT_DEPTH == 64:
             return np.frombuffer(buffer, dtype=np.float64)
-        raise SonusAIError(f'Invalid BIT_DEPTH {BIT_DEPTH}')
+        raise ValueError(f"Invalid BIT_DEPTH {BIT_DEPTH}")
     expanded_name, _ = tokenized_expand(name)
@@ -86,36 +82,41 @@ def read_audio(name: str | Path) -> AudioT:
         # Read in and convert to desired format
         # NOTE: pysox format transformations do not handle encoding properly; need to use direct call to sox instead
         args = [
-            '-D',
-            '-G',
+            "-D",
+            "-G",
             expanded_name,
-            '-t', 'raw',
-            '-r', str(SAMPLE_RATE),
-            '-b', str(BIT_DEPTH),
-            '-c', str(CHANNEL_COUNT),
-            '-e', ENCODING,
-            '-',
-            'remix', '1',
+            "-t",
+            "raw",
+            "-r",
+            str(SAMPLE_RATE),
+            "-b",
+            str(BIT_DEPTH),
+            "-c",
+            str(CHANNEL_COUNT),
+            "-e",
+            ENCODING,
+            "-",
+            "remix",
+            "1",
         ]
         status, out, err = sox(args, None, False)
         if status != 0:
-            raise SonusAIError(f'sox stdout: {out}\nsox stderr: {err}')
+            raise RuntimeError(f"sox stdout: {out}\nsox stderr: {err}")  # noqa: TRY301
         return encode_output(out)
     except Exception as e:
         if name != expanded_name:
-            raise SonusAIError(f'Error reading {name} (expanded: {expanded_name}):\n{e}')
+            raise OSError(f"Error reading {name} (expanded: {expanded_name}):\n{e}") from e
         else:
-            raise SonusAIError(f'Error reading {name}:\n{e}')
+            raise OSError(f"Error reading {name}:\n{e}") from e
 class Transformer(SoxTransformer):
-    """Override certain sox.Transformer methods
-    """
+    """Override certain sox.Transformer methods"""
     def fir(self, coefficients):
-        """Use SoX’s FFT convolution engine with given FIR filter coefficients.
+        """Use SoX's FFT convolution engine with given FIR filter coefficients.
         The SonusAI override allows coefficients to be either a list of numbers
         or a string containing a text file with the coefficients.
@@ -128,22 +129,20 @@ class Transformer(SoxTransformer):
         """
         from sox.core import is_number
-        from sonusai import SonusAIError
         if not isinstance(coefficients, list) and not isinstance(coefficients, str):
-            raise SonusAIError("coefficients must be a list or a str.")
+            raise TypeError("coefficients must be a list or a str.")
-        if isinstance(coefficients, list) and not all([is_number(c) for c in coefficients]):
-            raise SonusAIError("coefficients list must be numbers.")
+        if isinstance(coefficients, list) and not all(is_number(c) for c in coefficients):
+            raise TypeError("coefficients list must be numbers.")
-        effect_args = ['fir']
+        effect_args = ["fir"]
         if isinstance(coefficients, list):
-            effect_args.extend(['{:f}'.format(c) for c in coefficients])
+            effect_args.extend([f"{c:f}" for c in coefficients])
         else:
             effect_args.append(coefficients)
         self.effects.extend(effect_args)
-        self.effects_log.append('fir')
+        self.effects_log.append("fir")
         return self
@@ -181,42 +180,42 @@ class Transformer(SoxTransformer):
         from sox.core import is_number
         from sox.log import logger
-        from sonusai import SonusAIError
         if not is_number(factor) or factor <= 0:
-            raise SonusAIError('factor must be a positive number')
+            raise ValueError("factor must be a positive number")
         if factor < 0.5 or factor > 2:
-            logger.warning('Using an extreme time stretching factor. Quality of results will be poor')
+            logger.warning("Using an extreme time stretching factor. Quality of results will be poor")
-        if audio_type not in [None, 'm', 's', 'l']:
-            raise SonusAIError("audio_type must be one of None, 'm', 's', or 'l'.")
+        if audio_type not in [None, "m", "s", "l"]:
+            raise ValueError("audio_type must be one of None, 'm', 's', or 'l'.")
         if not isinstance(quick, bool):
-            raise SonusAIError('quick must be a boolean')
+            raise TypeError("quick must be a boolean")
-        effect_args = ['tempo']
+        effect_args = ["tempo"]
         if quick:
-            effect_args.append('-q')
+            effect_args.append("-q")
         if audio_type is not None:
-            effect_args.append('-{}'.format(audio_type))
+            effect_args.append(f"-{audio_type}")
-        effect_args.append('{:f}'.format(factor))
+        effect_args.append(f"{factor:f}")
         self.effects.extend(effect_args)
-        self.effects_log.append('tempo')
+        self.effects_log.append("tempo")
         return self
-    def build(self,
-              input_filepath: Optional[str | Path] = None,
-              output_filepath: Optional[str | Path] = None,
-              input_array: Optional[np.ndarray] = None,
-              sample_rate_in: Optional[float] = None,
-              extra_args: Optional[list[str]] = None,
-              return_output: bool = False) -> tuple[bool, Optional[str], Optional[str]]:
+    def build(
+        self,
+        input_filepath: str | Path | None = None,
+        output_filepath: str | Path | None = None,
+        input_array: np.ndarray | None = None,
+        sample_rate_in: float | None = None,
+        extra_args: list[str] | None = None,
+        return_output: bool = False,
+    ) -> tuple[bool, str | None, str | None]:
         """Given an input file or array, creates an output_file on disk by
         executing the current set of commands. This function returns True on
         success. If return_output is True, this function returns a triple of
@@ -291,18 +290,14 @@ class Transformer(SoxTransformer):
         from sox.core import sox
         from sox.log import logger
-        input_format, input_filepath = self._parse_inputs(
-            input_filepath, input_array, sample_rate_in
-        )
+        input_format, input_filepath = self._parse_inputs(input_filepath, input_array, sample_rate_in)
         if output_filepath is None:
             raise ValueError("output_filepath is not specified!")
         # set output parameters
         if input_filepath == output_filepath:
-            raise ValueError(
-                "input_filepath must be different from output_filepath."
-            )
+            raise ValueError("input_filepath must be different from output_filepath.")
         file_info.validate_output_file(output_filepath)
         args = []
@@ -320,26 +315,22 @@ class Transformer(SoxTransformer):
         status, out, err = sox(args, input_array, True)
         if status != 0:
-            raise SoxError(
-                f"Stdout: {out}\nStderr: {err}"
-            )
+            raise SoxError(f"Stdout: {out}\nStderr: {err}")
-        logger.info(
-            "Created %s with effects: %s",
-            output_filepath,
-            " ".join(self.effects_log)
-        )
+        logger.info("Created %s with effects: %s", output_filepath, " ".join(self.effects_log))
         if return_output:
             return status, out, err
         return True, None, None
-    def build_array(self,
-                    input_filepath: Optional[str | Path] = None,
-                    input_array: Optional[np.ndarray] = None,
-                    sample_rate_in: Optional[int] = None,
-                    extra_args: Optional[list[str]] = None) -> np.ndarray:
+    def build_array(
+        self,
+        input_filepath: str | Path | None = None,
+        input_array: np.ndarray | None = None,
+        sample_rate_in: int | None = None,
+        extra_args: list[str] | None = None,
+    ) -> np.ndarray:
         """Given an input file or array, returns the output as a numpy array
         by executing the current set of commands. By default, the array will
         have the same sample rate as the input file unless otherwise specified
@@ -405,62 +396,57 @@ class Transformer(SoxTransformer):
         from sox.log import logger
         from sox.transform import ENCODINGS_MAPPING
-        input_format, input_filepath = self._parse_inputs(
-            input_filepath, input_array, sample_rate_in
-        )
+        input_format, input_filepath = self._parse_inputs(input_filepath, input_array, sample_rate_in)
         # check if any of the below commands are part of the effects chain
-        ignored_commands = ['channels', 'convert']
+        ignored_commands = ["channels", "convert"]
         if set(ignored_commands) & set(self.effects_log):
             logger.warning(
-                "When outputting to an array, channels and convert " +
-                "effects may be ignored. Use set_output_format() to " +
-                "specify output formats."
+                "When outputting to an array, channels and convert "
+                + "effects may be ignored. Use set_output_format() to "
+                + "specify output formats."
             )
-        output_filepath = '-'
+        output_filepath = "-"
-        if input_format.get('file_type') is None:
+        if input_format.get("file_type") is None:
             encoding_out = np.int16
         else:
-            encoding_out = [
-                k for k, v in ENCODINGS_MAPPING.items()
-                if input_format['file_type'] == v
-            ][0]
+            encoding_out = next(k for k, v in ENCODINGS_MAPPING.items() if input_format["file_type"] == v)
         n_bits = np.dtype(encoding_out).itemsize * 8
         output_format = {
-            'file_type':       'raw',
-            'rate':            sample_rate_in,
-            'bits':            n_bits,
-            'channels':        input_format['channels'],
-            'encoding':        None,
-            'comments':        None,
-            'append_comments': True,
+            "file_type": "raw",
+            "rate": sample_rate_in,
+            "bits": n_bits,
+            "channels": input_format["channels"],
+            "encoding": None,
+            "comments": None,
+            "append_comments": True,
         }
-        if self.output_format.get('rate') is not None:
-            output_format['rate'] = self.output_format['rate']
+        if self.output_format.get("rate") is not None:
+            output_format["rate"] = self.output_format["rate"]
-        if self.output_format.get('channels') is not None:
-            output_format['channels'] = self.output_format['channels']
+        if self.output_format.get("channels") is not None:
+            output_format["channels"] = self.output_format["channels"]
-        if self.output_format.get('bits') is not None:
-            n_bits = self.output_format['bits']
-            output_format['bits'] = n_bits
+        if self.output_format.get("bits") is not None:
+            n_bits = self.output_format["bits"]
+            output_format["bits"] = n_bits
         match n_bits:
             case 8:
-                encoding_out = np.int8  # type: ignore
+                encoding_out = np.int8  # type: ignore[assignment]
             case 16:
                 encoding_out = np.int16
             case 32:
-                encoding_out = np.float32  # type: ignore
+                encoding_out = np.float32  # type: ignore[assignment]
             case 64:
-                encoding_out = np.float64  # type: ignore
+                encoding_out = np.float64  # type: ignore[assignment]
             case _:
-                raise ValueError("invalid n_bits {}".format(n_bits))
+                raise ValueError(f"invalid n_bits {n_bits}")
         args = []
         args.extend(self.globals)
@@ -477,21 +463,14 @@ class Transformer(SoxTransformer):
         status, out, err = sox(args, input_array, False)
         if status != 0:
-            raise SoxError(
-                "Stdout: {}\nStderr: {}".format(out, err)
-            )
+            raise SoxError(f"Stdout: {out}\nStderr: {err}")
         out = np.frombuffer(out, dtype=encoding_out)
-        if output_format['channels'] > 1:
+        if output_format["channels"] > 1:
             out = out.reshape(
-                (
-                    output_format['channels'],
-                    int(len(out) / output_format['channels'])
-                ), order='F'
+                (output_format["channels"], int(len(out) / output_format["channels"])),
+                order="F",
             ).T
-        logger.info(
-            "Created array with effects: %s",
-            " ".join(self.effects_log)
-        )
+        logger.info("Created array with effects: %s", " ".join(self.effects_log))
         return out

sonusai/mixture/sox_augmentation.py CHANGED Viewed

@@ -11,7 +11,6 @@ def apply_augmentation(audio: AudioT, augmentation: Augmentation, frame_length:
     :param frame_length: Pad resulting audio to be a multiple of this
     :return: Augmented audio
     """
-    from sonusai import SonusAIError
     from .augmentation import pad_audio_to_frame
     from .constants import BIT_DEPTH
     from .constants import CHANNEL_COUNT
@@ -45,7 +44,7 @@ def apply_augmentation(audio: AudioT, augmentation: Augmentation, frame_length:
             has_effects = True
         if augmentation.tempo is not None:
-            tfm.tempo(factor=float(augmentation.tempo), audio_type='s')
+            tfm.tempo(factor=float(augmentation.tempo), audio_type="s")
             has_effects = True
         if augmentation.eq1 is not None:
@@ -70,7 +69,7 @@ def apply_augmentation(audio: AudioT, augmentation: Augmentation, frame_length:
             audio_out = audio
     except Exception as e:
-        raise SonusAIError(f'Error applying {augmentation}: {e}')
+        raise RuntimeError(f"Error applying {augmentation}: {e}") from e
     # make sure length is multiple of frame_length
     return pad_audio_to_frame(audio=audio_out, frame_length=frame_length)
@@ -84,13 +83,13 @@ def apply_impulse_response(audio: AudioT, ir: ImpulseResponseData) -> AudioT:
     :return: Augmented audio
     """
     import math
-    from pathlib import Path
     import tempfile
+    from pathlib import Path
     import numpy as np
-    from sonusai import SonusAIError
     from sonusai.utils import linear_to_db
     from .constants import SAMPLE_RATE
     from .sox_audio import Transformer
@@ -111,9 +110,9 @@ def apply_impulse_response(audio: AudioT, ir: ImpulseResponseData) -> AudioT:
     audio_out = np.pad(array=audio_out, pad_width=(pad, pad))
     # Write coefficients to temporary file
-    temp = tempfile.NamedTemporaryFile(mode='w+t')
+    temp = tempfile.NamedTemporaryFile(mode="w+t")
     for d in ir.data:
-        temp.write(f'{d:f}\n')
+        temp.write(f"{d:f}\n")
     temp.seek(0)
     # Apply IR and convert back to global sample rate
@@ -123,7 +122,7 @@ def apply_impulse_response(audio: AudioT, ir: ImpulseResponseData) -> AudioT:
     try:
         audio_out = tfm.build_array(input_array=audio_out, sample_rate_in=ir.sample_rate)
     except Exception as e:
-        raise SonusAIError(f'Error applying IR: {e}')
+        raise RuntimeError(f"Error applying IR: {e}") from e
     path = Path(temp.name)
     temp.close()
@@ -134,4 +133,4 @@ def apply_impulse_response(audio: AudioT, ir: ImpulseResponseData) -> AudioT:
     tfm.norm(db_level=max_db)
     audio_out = tfm.build_array(input_array=audio_out, sample_rate_in=SAMPLE_RATE)
-    return audio_out[:len(audio)]
+    return audio_out[: len(audio)]

sonusai/mixture/spectral_mask.py CHANGED Viewed

@@ -2,7 +2,7 @@ from sonusai.mixture.datatypes import AudioF
 from sonusai.mixture.datatypes import SpectralMask
-def apply_spectral_mask(audio_f: AudioF, spectral_mask: SpectralMask, seed: int = None) -> AudioF:
+def apply_spectral_mask(audio_f: AudioF, spectral_mask: SpectralMask, seed: int | None = None) -> AudioF:
     """Apply frequency and time masking
     Implementation of SpecAugment: A Simple Data Augmentation Method for Automatic Speech Recognition
@@ -24,10 +24,8 @@ def apply_spectral_mask(audio_f: AudioF, spectral_mask: SpectralMask, seed: int
     """
     import numpy as np
-    from sonusai import SonusAIError
     if audio_f.ndim != 2:
-        raise SonusAIError('feature input must have three dimensions [frames, bins]')
+        raise ValueError("feature input must have three dimensions [frames, bins]")
     frames, bins = audio_f.shape
@@ -41,13 +39,13 @@ def apply_spectral_mask(audio_f: AudioF, spectral_mask: SpectralMask, seed: int
     for _ in range(spectral_mask.f_num):
         f_width = int(rng.uniform(0, f_max_width))
         f_start = rng.integers(0, bins - f_width, endpoint=True)
-        audio_f[:, f_start:f_start + f_width] = 0
+        audio_f[:, f_start : f_start + f_width] = 0
     # apply t_num time masks to the feature
     t_upper_bound = int(spectral_mask.t_max_percent / 100 * frames)
     for _ in range(spectral_mask.t_num):
         t_width = min(int(rng.uniform(0, spectral_mask.t_max_width)), t_upper_bound)
         t_start = rng.integers(0, frames - t_width, endpoint=True)
-        audio_f[t_start:t_start + t_width, :] = 0
+        audio_f[t_start : t_start + t_width, :] = 0
     return audio_f

sonusai 0.18.8__py3-none-any.whl → 0.19.5__py3-none-any.whl

sonusai 0.18.8py3-none-any.whl → 0.19.5py3-none-any.whl